BerriAI · ishaan-jaff · Jan 19, 2024 · Jan 19, 2024 · Jan 19, 2024 · Jan 19, 2024
diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml
@@ -57,12 +57,13 @@ model_list:
       mode: embedding
 litellm_settings:
   fallbacks: [{"openai-gpt-3.5": ["azure-gpt-3.5"]}]
+  success_callback: ['langfuse']
   # cache: True     
   # setting callback class
   # callbacks: custom_callbacks.proxy_handler_instance # sets litellm.callbacks = [proxy_handler_instance]
 
-general_settings: 
-  master_key: sk-1234
+# general_settings: 
+#   master_key: sk-1234
   # database_type: "dynamo_db" 
   # database_args: { # 👈  all args - https://github.com/BerriAI/litellm/blob/befbcbb7ac8f59835ce47415c128decf37aac328/litellm/proxy/_types.py#L190
   #   "billing_mode": "PAY_PER_REQUEST", 

diff --git a/litellm/proxy/tests/load_test_completion.py b/litellm/proxy/tests/load_test_completion.py
@@ -11,12 +11,10 @@ async def litellm_completion():
     # Your existing code for litellm_completion goes here
     try:
         response = await litellm_client.chat.completions.create(
-            model="Azure OpenAI GPT-4 Canada-East (External)",
-            stream=True,
+            model="azure-gpt-3.5",
             messages=[{"role": "user", "content": f"This is a test: {uuid.uuid4()}"}],
         )
-        async for chunk in response:
-            print(chunk)
+        print(response)
         return response
 
     except Exception as e:
@@ -27,9 +25,9 @@ async def litellm_completion():
 
 
 async def main():
-    for i in range(1000000):
+    for i in range(150):
         start = time.time()
-        n = 1000  # Number of concurrent tasks
+        n = 150  # Number of concurrent tasks
         tasks = [litellm_completion() for _ in range(n)]
 
         chat_completions = await asyncio.gather(*tasks)

diff --git a/litellm/tests/test_async_callbacks.py b/litellm/tests/test_async_callbacks.py
@@ -0,0 +1,49 @@
+import json
+import sys
+import os
+import io, asyncio
+
+import logging
+
+logging.basicConfig(level=logging.DEBUG)
+sys.path.insert(0, os.path.abspath("../.."))
+
+from litellm import completion
+import litellm
+
+litellm.num_retries = 3
+import time
+import pytest
+
+
+async def custom_callback(
+    kwargs,  # kwargs to completion
+    completion_response,  # response from completion
+    start_time,
+    end_time,  # start/end time
+):
+    # Your custom code here
+    print("LITELLM: in custom callback function")
+    print("kwargs", kwargs)
+    print("completion_response", completion_response)
+    print("start_time", start_time)
+    print("end_time", end_time)
+    time.sleep(1)
+
+    return
+
+
+def test_time_to_run_10_completions():
+    litellm.callbacks = [custom_callback]
+    start = time.time()
+
+    asyncio.run(
+        litellm.acompletion(
+            model="gpt-3.5-turbo", messages=[{"role": "user", "content": "hello"}]
+        )
+    )
+    end = time.time()
+    print(f"Time to run 10 completions: {end - start}")
+
+
+test_time_to_run_10_completions()
diff --git a/litellm/utils.py b/litellm/utils.py
@@ -2357,12 +2357,16 @@ async def wrapper_async(*args, **kwargs):
             print_verbose(
                 f"Async Wrapper: Completed Call, calling async_success_handler: {logging_obj.async_success_handler}"
             )
-            asyncio.create_task(
-                logging_obj.async_success_handler(result, start_time, end_time)
-            )
+            # asyncio.to_thread(
+            #     logging_obj.async_success_handler(result, start_time, end_time)
+            # )
             threading.Thread(
                 target=logging_obj.success_handler, args=(result, start_time, end_time)
             ).start()
+            threading.Thread(
+                target=logging_obj.async_success_handler,
+                args=(result, start_time, end_time),
+            ).start()
             # RETURN RESULT
             if hasattr(result, "_hidden_params"):
                 result._hidden_params["model_id"] = kwargs.get("model_info", {}).get(