fix(ollama_chat.py): use tiktoken as backup for prompt token counting

BerriAI · Jan 18, 2024 · becff36 · becff36 · ALERTua · Jan 23, 2024
1 parent 76af479
commit becff36
Showing 1 changed file with 2 additions and 2 deletions.
diff --git a/litellm/llms/ollama_chat.py b/litellm/llms/ollama_chat.py
@@ -220,7 +220,7 @@ def get_ollama_response(
         model_response["choices"][0]["message"] = response_json["message"]
     model_response["created"] = int(time.time())
     model_response["model"] = "ollama/" + model
-    prompt_tokens = response_json["prompt_eval_count"]  # type: ignore
+    prompt_tokens = response_json.get("prompt_eval_count", len(encoding.encode(prompt)))  # type: ignore
     completion_tokens = response_json["eval_count"]
     model_response["usage"] = litellm.Usage(
         prompt_tokens=prompt_tokens,
@@ -320,7 +320,7 @@ async def ollama_acompletion(url, data, model_response, encoding, logging_obj):
                 model_response["choices"][0]["message"] = response_json["message"]
             model_response["created"] = int(time.time())
             model_response["model"] = "ollama/" + data["model"]
-            prompt_tokens = response_json["prompt_eval_count"]  # type: ignore
+            prompt_tokens = response_json.get("prompt_eval_count", len(encoding.encode(prompt)))  # type: ignore
             completion_tokens = response_json["eval_count"]
             model_response["usage"] = litellm.Usage(
                 prompt_tokens=prompt_tokens,