BerriAI · ishaan-jaff · Feb 12, 2025 · Feb 12, 2025 · Feb 12, 2025 · Feb 12, 2025
diff --git a/litellm/main.py b/litellm/main.py
@@ -3947,6 +3947,7 @@ async def atext_completion(
                 ),
                 model=model,
                 custom_llm_provider=custom_llm_provider,
+                stream_options=kwargs.get('stream_options'),
             )
         else:
             ## OpenAI / Azure Text Completion Returns here

diff --git a/tests/llm_translation/test_text_completion.py b/tests/llm_translation/test_text_completion.py
@@ -139,3 +139,38 @@ def test_convert_chat_to_text_completion_multiple_choices():
         completion_tokens_details=None,
         prompt_tokens_details=None,
     )
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("sync_mode", [True, False])
+async def test_text_completion_include_usage(sync_mode):
+    """Test text completion with include_usage"""
+    last_chunk = None
+    if sync_mode:
+        response = await litellm.atext_completion(
+            model="gpt-3.5-turbo",
+            prompt="Hello, world!",
+            stream=True,
+            stream_options={"include_usage": True},
+        )
+
+        async for chunk in response:
+            print(chunk)
+            last_chunk = chunk
+    else:
+        response = litellm.text_completion(
+            model="gpt-3.5-turbo",
+            prompt="Hello, world!",
+            stream=True,
+            stream_options={"include_usage": True},
+        )
+
+        for chunk in response:
+            print(chunk)
+            last_chunk = chunk
+
+    assert last_chunk is not None
+    assert last_chunk.usage is not None
+    assert last_chunk.usage.prompt_tokens > 0
+    assert last_chunk.usage.completion_tokens > 0
+    assert last_chunk.usage.total_tokens > 0
diff --git a/tests/test_openai_endpoints.py b/tests/test_openai_endpoints.py
@@ -378,6 +378,36 @@ async def test_chat_completion_streaming():
     print(f"response_str: {response_str}")
 
 
+@pytest.mark.asyncio
+async def test_completion_streaming_usage_metrics():
+    """
+    [PROD Test] Ensures usage metrics are returned correctly when `include_usage` is set to `True`
+    """
+    client = AsyncOpenAI(api_key="sk-1234", base_url="http://0.0.0.0:4000")
+
+    response = await client.completions.create(
+        model="gpt-instruct",
+        prompt="hey",
+        stream=True,
+        stream_options={"include_usage": True},
+        max_tokens=4,
+        temperature=0.00000001,
+    )
+
+    last_chunk = None
+    async for chunk in response:
+        print("chunk", chunk)
+        last_chunk = chunk
+
+    assert last_chunk is not None, "No chunks were received"
+    assert last_chunk.usage is not None, "Usage information was not received"
+    assert last_chunk.usage.prompt_tokens > 0, "Prompt tokens should be greater than 0"
+    assert (
+        last_chunk.usage.completion_tokens > 0
+    ), "Completion tokens should be greater than 0"
+    assert last_chunk.usage.total_tokens > 0, "Total tokens should be greater than 0"
+
+
 @pytest.mark.asyncio
 async def test_chat_completion_anthropic_structured_output():
     """