confident-ai · makawtharani · Mar 10, 2025
diff --git a/deepeval/key_handler.py b/deepeval/key_handler.py
@@ -27,6 +27,11 @@ class KeyValues(Enum):
     LOCAL_EMBEDDING_BASE_URL = "LOCAL_EMBEDDING_BASE_URL"
     LOCAL_EMBEDDING_API_KEY = "LOCAL_EMBEDDING_API_KEY"
     USE_LOCAL_EMBEDDINGS = "USE_LOCAL_EMBEDDINGS"
+    # AWS Cloud support
+    AWS_ACCESS_KEY_ID = "AWS_ACCESS_KEY_ID"
+    AWS_SECRET_ACCESS_KEY = "AWS_SECRET_ACCESS_KEY"
+    AWS_SESSION_TOKEN = "AWS_SESSION_TOKEN"
+    AWS_REGION = "AWS_REGION"
 
 
 class KeyFileHandler:

diff --git a/deepeval/metrics/answer_relevancy/template.py b/deepeval/metrics/answer_relevancy/template.py
@@ -121,4 +121,4 @@ def generate_reason(
 {input}
 
 JSON:
-"""
+"""
diff --git a/deepeval/models/__init__.py b/deepeval/models/__init__.py
@@ -7,6 +7,7 @@
 from deepeval.models.gpt_model import GPTModel, MultimodalGPTModel
 from deepeval.models.gpt_model_schematic import SchematicGPTModel
 from deepeval.models.openai_embedding_model import OpenAIEmbeddingModel
+from deepeval.models.bedrock_model import BedrockModel, MultimodalBedrockModel
 
 # TODO: uncomment out once fixed
 # from deepeval.models.summac_model import SummaCModels

diff --git a/deepeval/models/bedrock_model.py b/deepeval/models/bedrock_model.py
diff --git a/docs/aws_setup.md b/docs/aws_setup.md
@@ -0,0 +1,92 @@
+# AWS Bedrock Setup for DeepEval
+
+To use AWS Bedrock models (like LLaMA, Claude, etc.) with DeepEval, you'll need to set up your AWS credentials. Here's how:
+
+## Option 1: Environment Variables
+
+Set the following environment variables:
+
+```bash
+export AWS_ACCESS_KEY_ID="your-access-key-id"
+export AWS_SECRET_ACCESS_KEY="your-secret-access-key"
+export AWS_SESSION_TOKEN="your-session-token"  # Optional: If using temporary credentials
+export AWS_REGION="your-region"  # e.g., "us-west-2"
+```
+
+## Option 2: DeepEval Configuration
+
+Use the DeepEval CLI to set your credentials:
+
+```bash
+deepeval config set AWS_ACCESS_KEY_ID "your-access-key-id"
+deepeval config set AWS_SECRET_ACCESS_KEY "your-secret-access-key"
+deepeval config set AWS_SESSION_TOKEN "your-session-token"  # Optional
+deepeval config set AWS_REGION "your-region"
+```
+
+## Option 3: Direct Initialization
+
+You can also pass the credentials directly when initializing the models:
+
+```bash
+from deepeval.models import BedrockModel
+
+# Initialize Bedrock model with explicit credentials
+model = BedrockModel(
+    model_id="your-model-id",
+    access_key_id="your-access-key-id",
+    secret_access_key="your-secret-access-key",
+    session_token="your-session-token",  # Optional
+    region="your-region"
+)
+```
+
+## Authentication
+
+Make sure you have:
+
+1. An AWS account with Bedrock service enabled
+2. AWS credentials configured (either via environment variables, AWS credentials file, or direct initialization as shown above)
+
+## Available Models
+
+- claude-3-7-sonnet-20250219-v1:0
+- claude-3-5-haiku-20241022-v1:0
+- claude-3-5-sonnet-20241022-v2:0
+- claude-3-5-sonnet-20240620-v1:0
+- claude-3-opus-20240229-v1:0
+- claude-3-sonnet-20240229-v1:0
+- claude-3-haiku-20240307-v1:0
+
+## Default Models
+
+- Text-Only: claude-3-7-sonnet-20250219-v1:0
+- Multimodal: claude-3-7-sonnet-20250219-v1:0
+
+## Example Usage
+
+```python
+from deepeval.models import BedrockModel
+from deepeval.test_case import LLMTestCase
+from deepeval.metrics import AnswerRelevancyMetric
+
+# Initialize the model
+model = BedrockModel(
+    model_id="us.anthropic.claude-3-7-sonnet-20250219-v1:0",
+    access_key_id="your-access-key-id",
+    secret_access_key="your-secret-access-key",
+    region="us-west-2"
+)
+
+# Create a test case
+test_case = LLMTestCase(
+    input="What is the capital of France?",
+    actual_output=model.generate("What is the capital of France?")
+)
+
+# Evaluate using DeepEval metrics
+metric = AnswerRelevancyMetric(threshold=0.7)
+metric.measure(test_case)
+print(f"Score: {metric.score}")
+```
+
diff --git a/setup.py b/setup.py
@@ -47,6 +47,8 @@
         "nest-asyncio",
         "datasets",
         "ollama",
+        "boto3",
+        "pillow"
     ],
     extras_require={
         "dev": ["black"],

diff --git a/tests/data/test.jpg b/tests/data/test.jpg
diff --git a/tests/test_bedrock_model.py b/tests/test_bedrock_model.py
@@ -0,0 +1,215 @@
+"""Tests for Amazon Bedrock model implementations
+"""
+
+import pytest
+from unittest.mock import patch, MagicMock
+import base64
+from botocore.response import StreamingBody
+
+from deepeval.models import BedrockModel, MultimodalBedrockModel
+from deepeval.test_case import MLLMImage
+from deepeval.key_handler import KeyValues, KEY_FILE_HANDLER
+
+# Mock credentials for testing
+TEST_REGION = "us-east-1"
+TEST_RESPONSE_JSON = '{"content": [{"type": "text", "text": "This is a test response"}]}'
+TEST_RESPONSE = "This is a test response"
+TEST_IMAGE_URL = "https://www.shutterstock.com/image-photo/funny-large-longhair-gray-kitten-600nw-1842198919.jpg"
+TEST_LOCAL_IMAGE = "tests/data/test.jpg"
+
+@pytest.fixture
+def mock_boto3_client():
+    with patch('boto3.client') as mock:
+        client = MagicMock()
+        client.invoke_model.return_value = {"body": MagicMock(spec=StreamingBody, read=MagicMock(return_value=TEST_RESPONSE_JSON.encode("utf-8")))}
+        mock.return_value = client
+        yield mock
+
+@pytest.fixture
+def mock_key_handler():
+    with patch('deepeval.key_handler.KEY_FILE_HANDLER.fetch_data') as mock:
+        mock.side_effect = lambda x: {
+            KeyValues.AWS_REGION: TEST_REGION
+        }.get(x)
+        yield mock
+
+class TestBedrockModel:
+    """Test suite for Amazon Bedrock model"""
+
+    def test_initialization(self, mock_boto3_client, mock_key_handler):
+        """Test model initialization with default parameters"""
+        model = BedrockModel()
+
+        assert model.model_id == "us.anthropic.claude-3-7-sonnet-20250219-v1:0"
+        assert model.region == TEST_REGION
+
+        mock_boto3_client.assert_called_once_with('bedrock-runtime', region_name='us-east-1', aws_access_key_id=None, aws_secret_access_key=None, aws_session_token=None)
+
+    def test_initialization_with_custom_params(self, mock_boto3_client):
+        """Test model initialization with custom parameters"""
+        model = BedrockModel(
+            model_id="us.anthropic.claude-3-5-haiku-20241022-v1:0",
+            region="us-west-2"
+        )
+
+        assert model.model_id == "us.anthropic.claude-3-5-haiku-20241022-v1:0"
+        assert model.region == "us-west-2"
+
+    def test_invalid_model_name(self):
+        """Test initialization with invalid model name"""
+        with pytest.raises(ValueError, match="Invalid model"):
+            BedrockModel(model_id="invalid-model")
+
+    def test_generate(self, mock_boto3_client, mock_key_handler):
+        """Test text generation"""
+        model = BedrockModel()
+        test_prompt = "Test prompt"
+        response = model.generate(test_prompt)
+
+        assert response == TEST_RESPONSE
+
+        mock_instance = mock_boto3_client.return_value
+        mock_instance.invoke_model.assert_called_once()
+
+    @pytest.mark.asyncio
+    async def test_a_generate(self, mock_boto3_client, mock_key_handler):
+        """Test async text generation"""
+        model = BedrockModel()
+        test_prompt = "Test prompt"
+        response = await model.a_generate(test_prompt)
+
+        assert response == TEST_RESPONSE
+
+        mock_instance = mock_boto3_client.return_value
+        mock_instance.invoke_model.assert_called_once()
+
+
+class TestBedrockMultimodalModel:
+    """Test suite for Bedrock multimodal model (Anthropic Claude 3.7 Sonnet)."""
+
+    def test_initialization(self, mock_boto3_client, mock_key_handler):
+        """Test model initialization with default parameters."""
+        model = MultimodalBedrockModel()
+
+        assert model.model_id == "us.anthropic.claude-3-7-sonnet-20250219-v1:0"
+        assert model.region == TEST_REGION
+
+        mock_boto3_client.assert_called_once_with('bedrock-runtime', region_name='us-east-1', aws_access_key_id=None, aws_secret_access_key=None, aws_session_token=None)
+
+    def test_initialization_with_custom_params(self, mock_boto3_client):
+        """Test model initialization with custom parameters."""
+        model = MultimodalBedrockModel(
+            model_id="us.anthropic.claude-3-7-sonnet-20250219-v1:0",
+            region="us-west-2"
+        )
+
+        assert model.model_id == "us.anthropic.claude-3-7-sonnet-20250219-v1:0"
+        assert model.region == "us-west-2"
+
+    def test_invalid_model_name(self):
+        """Test initialization with invalid model name."""
+        with pytest.raises(ValueError, match="Invalid model"):
+            MultimodalBedrockModel(model_id="invalid-model")
+
+
+    def test_generate_prompt_local_image(self, mock_boto3_client, mock_key_handler):
+        """Test multimodal prompt generation with a local image."""
+        model = MultimodalBedrockModel()
+
+        with open(TEST_LOCAL_IMAGE, "rb") as image_file:
+            base64_image = base64.b64encode(image_file.read()).decode("utf-8")
+
+        multimodal_input = [
+            "What's in these images?",
+            MLLMImage(url=TEST_LOCAL_IMAGE, local=True)
+        ]
+
+        prompt = model.generate_prompt(multimodal_input)
+
+        assert isinstance(prompt, list)
+        assert len(prompt) == 2
+
+        print(f"Generated Prompt: {prompt}")
+
+        assert isinstance(prompt[0], dict)
+        assert prompt[0]['content'][0]["type"] == "text"
+        assert prompt[0]['content'][0]["text"] == "What's in these images?"
+
+        assert isinstance(prompt[1], dict)
+        assert prompt[1]['content'][0]["type"] == "image"
+        assert "source" in prompt[1]['content'][0]
+        assert prompt[1]['content'][0]["source"]["type"] == "base64"
+        assert prompt[1]['content'][0]["source"]["media_type"] == "image/jpeg"
+        assert isinstance(prompt[1]['content'][0]["source"]["data"], str)
+        assert prompt[1]['content'][0]["source"]["data"].startswith("/")
+
+    def test_generate_prompt_remote_image(self, mock_boto3_client, mock_key_handler):
+        """Test multimodal prompt generation with a remote image."""
+        model = MultimodalBedrockModel()
+
+        multimodal_input = [
+            "Describe this image:",
+            MLLMImage(url=TEST_IMAGE_URL, local=False)
+        ]
+
+        prompt = model.generate_prompt(multimodal_input)
+
+        assert isinstance(prompt, list)
+        assert len(prompt) == 2
+
+        assert prompt[0]['content'][0]["type"] == "text"
+        assert prompt[0]['content'][0]["text"] == "Describe this image:"
+
+        assert isinstance(prompt[1], dict)
+        assert prompt[1]['content'][0]["type"] == "image"
+        assert "source" in prompt[1]['content'][0]
+        assert prompt[1]['content'][0]["source"]["type"] == "base64"
+        assert prompt[1]['content'][0]["source"]["media_type"] == "image/jpeg"
+        assert isinstance(prompt[1]['content'][0]["source"]["data"], str)
+        assert prompt[1]['content'][0]["source"]["data"].startswith("/")
+
+
+    def test_generate(self, mock_boto3_client, mock_key_handler):
+        """Test multimodal generation with image and text."""
+        model = MultimodalBedrockModel()
+
+        multimodal_input = [
+            "Describe this image:",
+            MLLMImage(url=TEST_LOCAL_IMAGE, local=True)
+        ]
+
+        response = model.generate(multimodal_input)
+
+        assert response == TEST_RESPONSE
+
+        mock_instance = mock_boto3_client.return_value
+        mock_instance.invoke_model.assert_called_once()
+
+    @pytest.mark.asyncio
+    async def test_a_generate(self, mock_boto3_client, mock_key_handler):
+        """Test async multimodal generation."""
+        model = MultimodalBedrockModel()
+
+        multimodal_input = [
+            "Describe this image:",
+            MLLMImage(url=TEST_IMAGE_URL, local=False)
+        ]
+
+        response = await model.a_generate(multimodal_input)
+
+        assert response == TEST_RESPONSE
+
+        mock_instance = mock_boto3_client.return_value
+        mock_instance.invoke_model.assert_called_once()
+
+    def test_invalid_input_type(self, mock_boto3_client, mock_key_handler):
+        """Test handling of invalid input types."""
+        model = MultimodalBedrockModel()
+
+        multimodal_input = [
+            "Describe this image:",
+            {"url": TEST_IMAGE_URL}
+        ]
+
+        with pytest.raises(ValueError, match="Invalid input type"):
+            model.generate_prompt(multimodal_input)
-Original file line number
+Diff line change
@@ Expand Up / @@ -121,4 +121,4 @@ def generate_reason( @@
     {input}
     JSON:
-    """
+    """