File tree Expand file tree Collapse file tree 1 file changed +21
-7
lines changed Expand file tree Collapse file tree 1 file changed +21
-7
lines changed Original file line number Diff line number Diff line change @@ -83,6 +83,9 @@ def get_embeddings(
83
83
return [result ["embedding" ] for result in response .data ]
84
84
85
85
86
+ import asyncio
87
+ from litellm .exceptions import RateLimitError
88
+
86
89
async def get_async_chat_completion_response (
87
90
messages : List [Dict [str , Any ]],
88
91
model : Optional [str ] = None ,
@@ -116,13 +119,24 @@ async def get_async_chat_completion_response(
116
119
else :
117
120
litellm_kwargs [key ] = type_hints [key ](value )
118
121
119
- completion = await acompletion (
120
- model = model ,
121
- messages = messages ,
122
- deployment_id = deployment_id ,
123
- ** litellm_kwargs
124
- )
125
- return completion
122
+ max_retries = 5
123
+ for attempt in range (max_retries ):
124
+ try :
125
+ # Your existing logic to get the response
126
+ completion = await acompletion (
127
+ model = model ,
128
+ messages = messages ,
129
+ deployment_id = deployment_id ,
130
+ ** litellm_kwargs
131
+ )
132
+ return completion
133
+ except RateLimitError as e :
134
+ if attempt < max_retries - 1 :
135
+ backoff_time = 2 ** attempt # Exponential backoff
136
+ await asyncio .sleep (backoff_time )
137
+ else :
138
+ raise HTTPException (status_code = 429 , detail = f"Rate limit exceeded: { e } " )
139
+
126
140
except Exception as e :
127
141
if "LLM Provider NOT provided" in e .args [0 ]:
128
142
logger .error (f"Error: error { model } is not currently supported" )
You can’t perform that action at this time.
0 commit comments