(Feat) - Add /bedrock/meta.llama3-3-70b-instruct-v1:0 tool calling …

…support + cost tracking + base llm unit test for tool calling (#8545) * Add support for bedrock meta.llama3-3-70b-instruct-v1:0 tool calling (#8512) * fix(converse_transformation.py): fixing bedrock meta.llama3-3-70b tool calling * test(test_bedrock_completion.py): adding llama3.3 tool compatibility check * add TestBedrockTestSuite * add bedrock llama 3.3 to base llm class * us.meta.llama3-3-70b-instruct-v1:0 * test_basic_tool_calling * TestAzureOpenAIO1 * test_basic_tool_calling * test_basic_tool_calling --------- Co-authored-by: miraclebakelaser <65143272+miraclebakelaser@users.noreply.github.com>
BerriAI · Feb 14, 2025 · 125f6ff · 125f6ff
1 parent ce2c618
commit 125f6ff
Show file tree

Hide file tree

Showing 7 changed files with 154 additions and 2 deletions.
diff --git a/litellm/llms/bedrock/chat/converse_transformation.py b/litellm/llms/bedrock/chat/converse_transformation.py
@@ -105,6 +105,7 @@ def get_supported_openai_params(self, model: str) -> List[str]:
             or base_model.startswith("cohere")
             or base_model.startswith("meta.llama3-1")
             or base_model.startswith("meta.llama3-2")
+            or base_model.startswith("meta.llama3-3")
             or base_model.startswith("amazon.nova")
         ):
             supported_params.append("tools")

diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json
@@ -7095,7 +7095,9 @@
         "input_cost_per_token": 0.00000072,
         "output_cost_per_token": 0.00000072,
         "litellm_provider": "bedrock_converse",
-        "mode": "chat"
+        "mode": "chat",
+        "supports_function_calling": true, 
+        "supports_tool_choice": false
     },
     "meta.llama2-13b-chat-v1": {
         "max_tokens": 4096, 
@@ -7435,6 +7437,17 @@
         "supports_function_calling": true, 
         "supports_tool_choice": false
     },
+    "us.meta.llama3-3-70b-instruct-v1:0": {
+        "max_tokens": 4096, 
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.00000072,
+        "output_cost_per_token": 0.00000072,
+        "litellm_provider": "bedrock_converse",
+        "mode": "chat",
+        "supports_function_calling": true, 
+        "supports_tool_choice": false
+    },
     "512-x-512/50-steps/stability.stable-diffusion-xl-v0": {
         "max_tokens": 77, 
         "max_input_tokens": 77, 

diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json
@@ -7095,7 +7095,9 @@
         "input_cost_per_token": 0.00000072,
         "output_cost_per_token": 0.00000072,
         "litellm_provider": "bedrock_converse",
-        "mode": "chat"
+        "mode": "chat",
+        "supports_function_calling": true, 
+        "supports_tool_choice": false
     },
     "meta.llama2-13b-chat-v1": {
         "max_tokens": 4096, 
@@ -7435,6 +7437,17 @@
         "supports_function_calling": true, 
         "supports_tool_choice": false
     },
+    "us.meta.llama3-3-70b-instruct-v1:0": {
+        "max_tokens": 4096, 
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.00000072,
+        "output_cost_per_token": 0.00000072,
+        "litellm_provider": "bedrock_converse",
+        "mode": "chat",
+        "supports_function_calling": true, 
+        "supports_tool_choice": false
+    },
     "512-x-512/50-steps/stability.stable-diffusion-xl-v0": {
         "max_tokens": 77, 
         "max_input_tokens": 77, 

diff --git a/tests/llm_translation/base_llm_unit_tests.py b/tests/llm_translation/base_llm_unit_tests.py
@@ -634,6 +634,107 @@ def pdf_messages(self):
 
         return url
 
+    def test_basic_tool_calling(self):
+        try:
+            from litellm import completion, ModelResponse
+
+            litellm.set_verbose = True
+            litellm._turn_on_debug()
+            from litellm.utils import supports_function_calling
+
+            os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
+            litellm.model_cost = litellm.get_model_cost_map(url="")
+
+            base_completion_call_args = self.get_base_completion_call_args()
+            if not supports_function_calling(base_completion_call_args["model"], None):
+                print("Model does not support function calling")
+                pytest.skip("Model does not support function calling")
+
+            tools = [
+                {
+                    "type": "function",
+                    "function": {
+                        "name": "get_current_weather",
+                        "description": "Get the current weather in a given location",
+                        "parameters": {
+                            "type": "object",
+                            "properties": {
+                                "location": {
+                                    "type": "string",
+                                    "description": "The city and state, e.g. San Francisco, CA",
+                                },
+                                "unit": {
+                                    "type": "string",
+                                    "enum": ["celsius", "fahrenheit"],
+                                },
+                            },
+                            "required": ["location"],
+                        },
+                    },
+                }
+            ]
+            messages = [
+                {
+                    "role": "user",
+                    "content": "What's the weather like in Boston today in fahrenheit?",
+                }
+            ]
+            request_args = {
+                "messages": messages,
+                "tools": tools,
+            }
+            request_args.update(self.get_base_completion_call_args())
+            response: ModelResponse = completion(**request_args)  # type: ignore
+            print(f"response: {response}")
+
+            assert response is not None
+
+            # if the provider did not return any tool calls do not make a subsequent llm api call
+            if response.choices[0].message.tool_calls is None:
+                return
+            # Add any assertions here to check the response
+
+            assert isinstance(
+                response.choices[0].message.tool_calls[0].function.name, str
+            )
+            assert isinstance(
+                response.choices[0].message.tool_calls[0].function.arguments, str
+            )
+            messages.append(
+                response.choices[0].message.model_dump()
+            )  # Add assistant tool invokes
+            tool_result = (
+                '{"location": "Boston", "temperature": "72", "unit": "fahrenheit"}'
+            )
+            # Add user submitted tool results in the OpenAI format
+            messages.append(
+                {
+                    "tool_call_id": response.choices[0].message.tool_calls[0].id,
+                    "role": "tool",
+                    "name": response.choices[0].message.tool_calls[0].function.name,
+                    "content": tool_result,
+                }
+            )
+            # In the second response, Claude should deduce answer from tool results
+            request_2_args = {
+                "messages": messages,
+                "tools": tools,
+            }
+            request_2_args.update(self.get_base_completion_call_args())
+            second_response: ModelResponse = completion(**request_2_args)  # type: ignore
+            print(f"second response: {second_response}")
+            assert second_response is not None
+
+            # either content or tool calls should be present
+            assert (
+                second_response.choices[0].message.content is not None
+                or second_response.choices[0].message.tool_calls is not None
+            )
+        except litellm.RateLimitError:
+            pass
+        except Exception as e:
+            pytest.fail(f"Error occurred: {e}")
+
     @pytest.mark.asyncio
     async def test_completion_cost(self):
         from litellm import completion_cost

diff --git a/tests/llm_translation/test_azure_o_series.py b/tests/llm_translation/test_azure_o_series.py
@@ -39,6 +39,9 @@ def test_tool_call_no_arguments(self, tool_call_no_arguments):
         """Test that tool calls with no arguments is translated correctly. Relevant issue: https://github.com/BerriAI/litellm/issues/6833"""
         pass
 
+    def test_basic_tool_calling(self):
+        pass
+
     def test_prompt_caching(self):
         """Temporary override. o1 prompt caching is not working."""
         pass

diff --git a/tests/llm_translation/test_bedrock_completion.py b/tests/llm_translation/test_bedrock_completion.py
@@ -2092,6 +2092,7 @@ def test_bedrock_prompt_caching_message(messages, expected_cache_control):
         ("bedrock/mistral.mistral-7b-instruct-v0.1:0", True),
         ("bedrock/meta.llama3-1-8b-instruct:0", True),
         ("bedrock/meta.llama3-2-70b-instruct:0", True),
+        ("bedrock/meta.llama3-3-70b-instruct-v1:0", True),
         ("bedrock/amazon.titan-embed-text-v1:0", False),
     ],
 )

diff --git a/tests/llm_translation/test_bedrock_llama.py b/tests/llm_translation/test_bedrock_llama.py
@@ -0,0 +1,20 @@
+from base_llm_unit_tests import BaseLLMChatTest
+import pytest
+import sys
+import os
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
+import litellm
+
+
+class TestBedrockTestSuite(BaseLLMChatTest):
+    def test_tool_call_no_arguments(self, tool_call_no_arguments):
+        pass
+
+    def get_base_completion_call_args(self) -> dict:
+        litellm._turn_on_debug()
+        return {
+            "model": "bedrock/converse/us.meta.llama3-3-70b-instruct-v1:0",
+        }