diff --git a/deepeval/key_handler.py b/deepeval/key_handler.py index 5395e4af2..aaf9582b7 100644 --- a/deepeval/key_handler.py +++ b/deepeval/key_handler.py @@ -27,6 +27,11 @@ class KeyValues(Enum): LOCAL_EMBEDDING_BASE_URL = "LOCAL_EMBEDDING_BASE_URL" LOCAL_EMBEDDING_API_KEY = "LOCAL_EMBEDDING_API_KEY" USE_LOCAL_EMBEDDINGS = "USE_LOCAL_EMBEDDINGS" + # AWS Cloud support + AWS_ACCESS_KEY_ID = "AWS_ACCESS_KEY_ID" + AWS_SECRET_ACCESS_KEY = "AWS_SECRET_ACCESS_KEY" + AWS_SESSION_TOKEN = "AWS_SESSION_TOKEN" + AWS_REGION = "AWS_REGION" class KeyFileHandler: diff --git a/deepeval/metrics/answer_relevancy/template.py b/deepeval/metrics/answer_relevancy/template.py index 18d61cad2..d66cddd72 100644 --- a/deepeval/metrics/answer_relevancy/template.py +++ b/deepeval/metrics/answer_relevancy/template.py @@ -121,4 +121,4 @@ def generate_reason( {input} JSON: -""" +""" \ No newline at end of file diff --git a/deepeval/models/__init__.py b/deepeval/models/__init__.py index ea2feb042..9b2931d68 100644 --- a/deepeval/models/__init__.py +++ b/deepeval/models/__init__.py @@ -7,6 +7,7 @@ from deepeval.models.gpt_model import GPTModel, MultimodalGPTModel from deepeval.models.gpt_model_schematic import SchematicGPTModel from deepeval.models.openai_embedding_model import OpenAIEmbeddingModel +from deepeval.models.bedrock_model import BedrockModel, MultimodalBedrockModel # TODO: uncomment out once fixed # from deepeval.models.summac_model import SummaCModels diff --git a/deepeval/models/bedrock_model.py b/deepeval/models/bedrock_model.py new file mode 100644 index 000000000..ee8b8049a --- /dev/null +++ b/deepeval/models/bedrock_model.py @@ -0,0 +1,382 @@ +import boto3 +from botocore.exceptions import NoCredentialsError, PartialCredentialsError +import json +from pydantic import BaseModel, ValidationError +import logging +from typing import List, Tuple, Union, Optional +import base64 +from io import BytesIO +import mimetypes +import requests +from PIL import Image as PILImage + +from deepeval.models.base_model import DeepEvalBaseLLM, DeepEvalBaseMLLM +from deepeval.key_handler import KeyValues, KEY_FILE_HANDLER +from deepeval.test_case import MLLMImage + +# Set up logging configuration +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +valid_bedrock_models = [ + "us.anthropic.claude-3-7-sonnet-20250219-v1:0", + "us.anthropic.claude-3-5-haiku-20241022-v1:0", + "us.anthropic.claude-3-5-sonnet-20241022-v2:0", + "us.anthropic.claude-3-5-sonnet-20240620-v1:0", + "anthropic.claude-3-opus-20240229-v1:0", + "anthropic.claude-3-sonnet-20240229-v1:0", + "anthropic.claude-3-haiku-20240307-v1:0", +] + +default_bedrock_model = "us.anthropic.claude-3-7-sonnet-20250219-v1:0" +default_multimodal_bedrock_model = "us.anthropic.claude-3-7-sonnet-20250219-v1:0" +default_system_message = "You are a helpful AI assistant. Always generate your response as a valid json. No explanation or extra information is needed just the json." + +class BedrockModel(DeepEvalBaseLLM): + """A class that integrates with AWS Bedrock for model inference and text generation. + + This class communicates with the AWS Bedrock service to invoke models for generating text and extracting + JSON responses from the model outputs. + + Attributes: + model_id (str): The ID of the Bedrock model to use for inference. + system_prompt (str): A predefined system prompt for Bedrock models that directs their behavior. + access_key_id (str, optional): AWS access key ID for authentication. Can be provided or fetched from the key handler. + secret_access_key (str, optional): AWS secret access key for authentication. Can be provided or fetched from the key handler. + session_token (str, optional): AWS session token for temporary authentication. Can be provided or fetched from the key handler. + region (str, optional): AWS region where the Bedrock client will be created. If not provided, defaults to fetched value. + + Example: + ```python + from deepeval.models import BedrockModel + + # Initialize the model with your own model ID and system prompt + model = BedrockModel( + model_id="your-bedrock-model-id", + system_prompt="You are a helpful AI assistant. Always generate your response as a valid json. No explanation is needed just the json." + ) + + # Generate text with a prompt + response = model.generate("What is the capital of France?", schema) + ``` + """ + def __init__( + self, + model_id: Optional[str] = None, + system_prompt: Optional[str] = None, + access_key_id: Optional[str] = None, + secret_access_key: Optional[str] = None, + session_token: Optional[str] = None, + region: Optional[str] = None, + ): + """Initializes the BedrockModel with model_id, system_prompt, and optional AWS credentials.""" + self.model_id = model_id or default_bedrock_model + + if self.model_id not in valid_bedrock_models: + raise ValueError( + f"Invalid model: {self.model_id}. Available Bedrock models: {', '.join(model for model in valid_bedrock_models)}" + ) + + self.system_prompt = system_prompt or default_system_message + self.access_key_id = access_key_id or KEY_FILE_HANDLER.fetch_data(KeyValues.AWS_ACCESS_KEY_ID) + self.secret_access_key = secret_access_key or KEY_FILE_HANDLER.fetch_data(KeyValues.AWS_SECRET_ACCESS_KEY) + self.session_token = session_token or KEY_FILE_HANDLER.fetch_data(KeyValues.AWS_SESSION_TOKEN) + self.region = region or KEY_FILE_HANDLER.fetch_data(KeyValues.AWS_REGION) + + if not (self.access_key_id and self.secret_access_key): + try: + boto3.setup_default_session(region_name=self.region) + except (NoCredentialsError, PartialCredentialsError): + raise ValueError("AWS credentials are not found. Please provide valid access keys or ensure your AWS credentials file is configured.") + + self.client = boto3.client( + "bedrock-runtime", + region_name=self.region, + aws_access_key_id=self.access_key_id, + aws_secret_access_key=self.secret_access_key, + aws_session_token=self.session_token if self.session_token else None + ) + print("DEBUG: boto3.client called") + + def load_model(self): + """Loads the Bedrock client.""" + return self.client + + def extract_json(self, text: str) -> dict: + """Attempts to parse the given text into a valid JSON dictionary.""" + try: + return json.loads(text) + except json.JSONDecodeError: + logger.error("Error decoding JSON") + return {} + + def generate(self, prompt: str, schema: Optional[BaseModel] = None) -> Union[BaseModel, dict]: + """Generates text using the Bedrock model and returns the response as a Pydantic model.""" + messages = [{"role": "user", "content": prompt}] + + if schema: + self.system_prompt += f"\nOutput JSON schema: {schema.model_json_schema()}" + + payload = { + "anthropic_version": "bedrock-2023-05-31", + "max_tokens": 1000, + "messages": messages, + "system": self.system_prompt + } + + try: + response = self.client.invoke_model( + modelId=self.model_id, + body=json.dumps(payload) + ) + + response_body = json.loads(response["body"].read().decode("utf-8")) + + content = response_body.get("content", []) + if content and isinstance(content, list): + generated_text = content[0].get('text', '') + else: + logger.error("Invalid response structure: 'content' not found or malformed") + generated_text = "" + + if schema: + try: + extracted_result = self.extract_json(generated_text) + return schema(**extracted_result) + except ValidationError as e: + logger.error(f"Validation error: {e}") + return None + return generated_text + + except Exception as e: + logger.error(f"An error occurred while generating the result: {e}") + return {} if schema is None else None + + async def a_generate(self, prompt: str, schema: Optional[BaseModel] = None) -> Union[BaseModel, dict]: + return self.generate(prompt, schema) + + def get_model_name(self): + """Returns the model ID being used.""" + return self.model_id + + +class MultimodalBedrockModel(DeepEvalBaseMLLM): + """ + A class to interact with AWS Bedrock models for multimodal (text + image) evaluation. + + This class integrates AWS Bedrock, allowing both text and image inputs for generating multimodal outputs. It supports + both local and remote image inputs, converting images to base64 encoding for Bedrock requests. + + Attributes: + model_id (str): The ID of the Bedrock model to use. + access_key_id (str): AWS Access Key ID. + secret_access_key (str): AWS Secret Access Key. + session_token (str): AWS Session Token. + region (str): The AWS region for the Bedrock service. + client (boto3.client): The Bedrock client instance used for model interaction. + + Example: + ```python + from deepeval.models import MultimodalBedrockModel + + # Initialize the model + model = MultimodalBedrockModel( + model_id="your-bedrock-model-id", + access_key_id="your-aws-access-key", + secret_access_key="your-aws-secret-key", + region="us-west-2" + ) + + # Generate a response based on text and image input + response = model.generate([ + "Describe what you see in this image:", + MLLMImage(url="path/to/image.jpg", local=True) + ]) + ``` + + Methods: + __init__: Initializes the model, setting up credentials and the Bedrock client. + load_model: Loads and returns the Bedrock client instance. + encode_pil_image: Encodes a PIL image to base64 string format (JPEG). + generate_prompt: Constructs a request payload from text and image inputs. + generate: Sends a synchronous request to the Bedrock API for text and image-based generation. + a_generate: Asynchronous wrapper for the `generate` method. + get_model_name: Returns the Bedrock model ID in use. + """ + def __init__( + self, + model_id: Optional[str] = None, + system_prompt: Optional[str] = None, + access_key_id: Optional[str] = None, + secret_access_key: Optional[str] = None, + session_token: Optional[str] = None, + region: Optional[str] = None, + *args, + **kwargs + ): + self.model_id = model_id or default_multimodal_bedrock_model + if self.model_id not in valid_bedrock_models: + raise ValueError( + f"Invalid model. Available Bedrock models: {', '.join(model for model in valid_bedrock_models)}" + ) + + self.system_prompt = system_prompt or default_system_message + self.access_key_id = access_key_id or KEY_FILE_HANDLER.fetch_data(KeyValues.AWS_ACCESS_KEY_ID) + self.secret_access_key = secret_access_key or KEY_FILE_HANDLER.fetch_data(KeyValues.AWS_SECRET_ACCESS_KEY) + self.session_token = session_token or KEY_FILE_HANDLER.fetch_data(KeyValues.AWS_SESSION_TOKEN) + self.region = region or KEY_FILE_HANDLER.fetch_data(KeyValues.AWS_REGION) + + if not (self.access_key_id and self.secret_access_key): + try: + boto3.setup_default_session(region_name=self.region) + except (NoCredentialsError, PartialCredentialsError): + raise ValueError("AWS credentials are not found. Please provide valid access keys or ensure your AWS credentials file is configured.") + + self.client = boto3.client( + "bedrock-runtime", + region_name=self.region, + aws_access_key_id=self.access_key_id, + aws_secret_access_key=self.secret_access_key, + aws_session_token=self.session_token if self.session_token else None + ) + + super().__init__(model_id, *args, **kwargs) + self.model = self.load_model(*args, **kwargs) + + def load_model(self, *args, **kwargs): + """Loads and initializes the Bedrock model. + + Returns: + A Bedrock model instance ready for evaluation. + """ + return self.client + + def extract_json(self, text: str) -> dict: + """Attempts to parse the given text into a valid JSON dictionary.""" + try: + return json.loads(text) + except json.JSONDecodeError: + logger.error("Error decoding JSON") + return {} + + def encode_pil_image(self, pil_image: PILImage) -> str: + """Convert a PIL image to a base64-encoded string.""" + image_buffer = BytesIO() + format = pil_image.format.lower() + pil_image.save(image_buffer, format=format) + image_bytes = image_buffer.getvalue() + mime_type = f"image/{format}" + return base64.b64encode(image_bytes).decode("utf-8"), mime_type + + def download_image(self, url: str) -> PILImage: + """Downloads an image from a URL and returns it as a PIL image.""" + headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36" + } + try: + response = requests.get(url, headers=headers) + response.raise_for_status() + return PILImage.open(BytesIO(response.content)) + except requests.exceptions.RequestException as e: + raise ValueError(f"Failed to download image from URL {url}: {str(e)}") + + + def generate_prompt(self, multimodal_input: List[Union[str, MLLMImage]]) -> List[dict]: + """Constructs the message payload with both text and image inputs for Anthropic (base64 only).""" + prompt = [] + + for item in multimodal_input: + message = {"role": "user", "content": []} + + if isinstance(item, str): + if item.strip(): # Ensure text is not empty or whitespace-only + message["content"].append({"type": "text", "text": item}) + + elif isinstance(item, MLLMImage): + if not hasattr(item, "local") or not hasattr(item, "url"): + raise ValueError("Invalid MLLMImage object: Missing 'local' or 'url' attributes.") + + if item.local: + try: + image = PILImage.open(item.url) + image_data, mime_type = self.encode_pil_image(image) + message["content"].append({ + "type": "image", + "source": { + "type": "base64", + "media_type": mime_type, + "data": image_data + } + }) + except FileNotFoundError: + raise ValueError(f"Local image file not found: {item.url}") + else: + # Remote image handling + if not isinstance(item.url, str) or not item.url.startswith("http"): + raise ValueError("Invalid remote image URL.") + image = self.download_image(item.url) + image_data, mime_type = self.encode_pil_image(image) + message["content"].append({ + "type": "image", + "source": { + "type": "base64", + "media_type": mime_type, + "data": image_data + } + }) + else: + raise ValueError(f"Invalid input type: Expected str or MLLMImage, got {type(item).__name__}") + + if message["content"]: + prompt.append(message) + + return prompt + + def generate(self, multimodal_input: List[Union[str, MLLMImage]], schema: Optional[BaseModel] = None) -> Tuple[str, Optional[float]]: + """Sends a synchronous request to Bedrock for text & image-based generation.""" + + messages_list = self.generate_prompt(multimodal_input) + + if schema: + self.system_prompt += f"\nOutput JSON schema: {schema.model_json_schema()}" + + payload = { + "anthropic_version": "bedrock-2023-05-31", + "max_tokens": 1000, + "messages": messages_list, + "system": self.system_prompt + } + + try: + response = self.client.invoke_model( + modelId=self.model_id, + body=json.dumps(payload) + ) + response_body = json.loads(response["body"].read().decode("utf-8")) + + content = response_body.get("content", []) + if content and isinstance(content, list): + generated_text = content[0].get('text', '') + else: + logger.error("Invalid response structure: 'content' not found or malformed") + generated_text = "" + + if schema: + try: + extracted_result = self.extract_json(generated_text) + return schema(**extracted_result) + except ValidationError as e: + logger.error(f"Validation error: {e}") + return None + return generated_text + + except Exception as e: + logger.error(f"Error during Bedrock model inference: {e}") + raise + + async def a_generate(self, multimodal_input: List[Union[str, MLLMImage]], schema: Optional[BaseModel] = None) -> Tuple[str, Optional[float]]: + """Async wrapper for the generate function.""" + return self.generate(multimodal_input, schema) + + def get_model_name(self) -> str: + return self.model_id diff --git a/docs/aws_setup.md b/docs/aws_setup.md new file mode 100644 index 000000000..8f9620bcc --- /dev/null +++ b/docs/aws_setup.md @@ -0,0 +1,92 @@ +# AWS Bedrock Setup for DeepEval + +To use AWS Bedrock models (like LLaMA, Claude, etc.) with DeepEval, you'll need to set up your AWS credentials. Here's how: + +## Option 1: Environment Variables + +Set the following environment variables: + +```bash +export AWS_ACCESS_KEY_ID="your-access-key-id" +export AWS_SECRET_ACCESS_KEY="your-secret-access-key" +export AWS_SESSION_TOKEN="your-session-token" # Optional: If using temporary credentials +export AWS_REGION="your-region" # e.g., "us-west-2" +``` + +## Option 2: DeepEval Configuration + +Use the DeepEval CLI to set your credentials: + +```bash +deepeval config set AWS_ACCESS_KEY_ID "your-access-key-id" +deepeval config set AWS_SECRET_ACCESS_KEY "your-secret-access-key" +deepeval config set AWS_SESSION_TOKEN "your-session-token" # Optional +deepeval config set AWS_REGION "your-region" +``` + +## Option 3: Direct Initialization + +You can also pass the credentials directly when initializing the models: + +```bash +from deepeval.models import BedrockModel + +# Initialize Bedrock model with explicit credentials +model = BedrockModel( + model_id="your-model-id", + access_key_id="your-access-key-id", + secret_access_key="your-secret-access-key", + session_token="your-session-token", # Optional + region="your-region" +) +``` + +## Authentication + +Make sure you have: + +1. An AWS account with Bedrock service enabled +2. AWS credentials configured (either via environment variables, AWS credentials file, or direct initialization as shown above) + +## Available Models + +- claude-3-7-sonnet-20250219-v1:0 +- claude-3-5-haiku-20241022-v1:0 +- claude-3-5-sonnet-20241022-v2:0 +- claude-3-5-sonnet-20240620-v1:0 +- claude-3-opus-20240229-v1:0 +- claude-3-sonnet-20240229-v1:0 +- claude-3-haiku-20240307-v1:0 + +## Default Models + +- Text-Only: claude-3-7-sonnet-20250219-v1:0 +- Multimodal: claude-3-7-sonnet-20250219-v1:0 + +## Example Usage + +```python +from deepeval.models import BedrockModel +from deepeval.test_case import LLMTestCase +from deepeval.metrics import AnswerRelevancyMetric + +# Initialize the model +model = BedrockModel( + model_id="us.anthropic.claude-3-7-sonnet-20250219-v1:0", + access_key_id="your-access-key-id", + secret_access_key="your-secret-access-key", + region="us-west-2" +) + +# Create a test case +test_case = LLMTestCase( + input="What is the capital of France?", + actual_output=model.generate("What is the capital of France?") +) + +# Evaluate using DeepEval metrics +metric = AnswerRelevancyMetric(threshold=0.7) +metric.measure(test_case) +print(f"Score: {metric.score}") +``` + diff --git a/setup.py b/setup.py index d62057abd..7d75dd875 100644 --- a/setup.py +++ b/setup.py @@ -47,6 +47,8 @@ "nest-asyncio", "datasets", "ollama", + "boto3", + "pillow" ], extras_require={ "dev": ["black"], diff --git a/tests/data/test.jpg b/tests/data/test.jpg new file mode 100644 index 000000000..cd92bb4a1 Binary files /dev/null and b/tests/data/test.jpg differ diff --git a/tests/test_bedrock_model.py b/tests/test_bedrock_model.py new file mode 100644 index 000000000..f9c0dfbbb --- /dev/null +++ b/tests/test_bedrock_model.py @@ -0,0 +1,215 @@ +"""Tests for Amazon Bedrock model implementations +""" + +import pytest +from unittest.mock import patch, MagicMock +import base64 +from botocore.response import StreamingBody + +from deepeval.models import BedrockModel, MultimodalBedrockModel +from deepeval.test_case import MLLMImage +from deepeval.key_handler import KeyValues, KEY_FILE_HANDLER + +# Mock credentials for testing +TEST_REGION = "us-east-1" +TEST_RESPONSE_JSON = '{"content": [{"type": "text", "text": "This is a test response"}]}' +TEST_RESPONSE = "This is a test response" +TEST_IMAGE_URL = "https://www.shutterstock.com/image-photo/funny-large-longhair-gray-kitten-600nw-1842198919.jpg" +TEST_LOCAL_IMAGE = "tests/data/test.jpg" + +@pytest.fixture +def mock_boto3_client(): + with patch('boto3.client') as mock: + client = MagicMock() + client.invoke_model.return_value = {"body": MagicMock(spec=StreamingBody, read=MagicMock(return_value=TEST_RESPONSE_JSON.encode("utf-8")))} + mock.return_value = client + yield mock + +@pytest.fixture +def mock_key_handler(): + with patch('deepeval.key_handler.KEY_FILE_HANDLER.fetch_data') as mock: + mock.side_effect = lambda x: { + KeyValues.AWS_REGION: TEST_REGION + }.get(x) + yield mock + +class TestBedrockModel: + """Test suite for Amazon Bedrock model""" + + def test_initialization(self, mock_boto3_client, mock_key_handler): + """Test model initialization with default parameters""" + model = BedrockModel() + + assert model.model_id == "us.anthropic.claude-3-7-sonnet-20250219-v1:0" + assert model.region == TEST_REGION + + mock_boto3_client.assert_called_once_with('bedrock-runtime', region_name='us-east-1', aws_access_key_id=None, aws_secret_access_key=None, aws_session_token=None) + + def test_initialization_with_custom_params(self, mock_boto3_client): + """Test model initialization with custom parameters""" + model = BedrockModel( + model_id="us.anthropic.claude-3-5-haiku-20241022-v1:0", + region="us-west-2" + ) + + assert model.model_id == "us.anthropic.claude-3-5-haiku-20241022-v1:0" + assert model.region == "us-west-2" + + def test_invalid_model_name(self): + """Test initialization with invalid model name""" + with pytest.raises(ValueError, match="Invalid model"): + BedrockModel(model_id="invalid-model") + + def test_generate(self, mock_boto3_client, mock_key_handler): + """Test text generation""" + model = BedrockModel() + test_prompt = "Test prompt" + response = model.generate(test_prompt) + + assert response == TEST_RESPONSE + + mock_instance = mock_boto3_client.return_value + mock_instance.invoke_model.assert_called_once() + + @pytest.mark.asyncio + async def test_a_generate(self, mock_boto3_client, mock_key_handler): + """Test async text generation""" + model = BedrockModel() + test_prompt = "Test prompt" + response = await model.a_generate(test_prompt) + + assert response == TEST_RESPONSE + + mock_instance = mock_boto3_client.return_value + mock_instance.invoke_model.assert_called_once() + + +class TestBedrockMultimodalModel: + """Test suite for Bedrock multimodal model (Anthropic Claude 3.7 Sonnet).""" + + def test_initialization(self, mock_boto3_client, mock_key_handler): + """Test model initialization with default parameters.""" + model = MultimodalBedrockModel() + + assert model.model_id == "us.anthropic.claude-3-7-sonnet-20250219-v1:0" + assert model.region == TEST_REGION + + mock_boto3_client.assert_called_once_with('bedrock-runtime', region_name='us-east-1', aws_access_key_id=None, aws_secret_access_key=None, aws_session_token=None) + + def test_initialization_with_custom_params(self, mock_boto3_client): + """Test model initialization with custom parameters.""" + model = MultimodalBedrockModel( + model_id="us.anthropic.claude-3-7-sonnet-20250219-v1:0", + region="us-west-2" + ) + + assert model.model_id == "us.anthropic.claude-3-7-sonnet-20250219-v1:0" + assert model.region == "us-west-2" + + def test_invalid_model_name(self): + """Test initialization with invalid model name.""" + with pytest.raises(ValueError, match="Invalid model"): + MultimodalBedrockModel(model_id="invalid-model") + + + def test_generate_prompt_local_image(self, mock_boto3_client, mock_key_handler): + """Test multimodal prompt generation with a local image.""" + model = MultimodalBedrockModel() + + with open(TEST_LOCAL_IMAGE, "rb") as image_file: + base64_image = base64.b64encode(image_file.read()).decode("utf-8") + + multimodal_input = [ + "What's in these images?", + MLLMImage(url=TEST_LOCAL_IMAGE, local=True) + ] + + prompt = model.generate_prompt(multimodal_input) + + assert isinstance(prompt, list) + assert len(prompt) == 2 + + print(f"Generated Prompt: {prompt}") + + assert isinstance(prompt[0], dict) + assert prompt[0]['content'][0]["type"] == "text" + assert prompt[0]['content'][0]["text"] == "What's in these images?" + + assert isinstance(prompt[1], dict) + assert prompt[1]['content'][0]["type"] == "image" + assert "source" in prompt[1]['content'][0] + assert prompt[1]['content'][0]["source"]["type"] == "base64" + assert prompt[1]['content'][0]["source"]["media_type"] == "image/jpeg" + assert isinstance(prompt[1]['content'][0]["source"]["data"], str) + assert prompt[1]['content'][0]["source"]["data"].startswith("/") + + def test_generate_prompt_remote_image(self, mock_boto3_client, mock_key_handler): + """Test multimodal prompt generation with a remote image.""" + model = MultimodalBedrockModel() + + multimodal_input = [ + "Describe this image:", + MLLMImage(url=TEST_IMAGE_URL, local=False) + ] + + prompt = model.generate_prompt(multimodal_input) + + assert isinstance(prompt, list) + assert len(prompt) == 2 + + assert prompt[0]['content'][0]["type"] == "text" + assert prompt[0]['content'][0]["text"] == "Describe this image:" + + assert isinstance(prompt[1], dict) + assert prompt[1]['content'][0]["type"] == "image" + assert "source" in prompt[1]['content'][0] + assert prompt[1]['content'][0]["source"]["type"] == "base64" + assert prompt[1]['content'][0]["source"]["media_type"] == "image/jpeg" + assert isinstance(prompt[1]['content'][0]["source"]["data"], str) + assert prompt[1]['content'][0]["source"]["data"].startswith("/") + + + def test_generate(self, mock_boto3_client, mock_key_handler): + """Test multimodal generation with image and text.""" + model = MultimodalBedrockModel() + + multimodal_input = [ + "Describe this image:", + MLLMImage(url=TEST_LOCAL_IMAGE, local=True) + ] + + response = model.generate(multimodal_input) + + assert response == TEST_RESPONSE + + mock_instance = mock_boto3_client.return_value + mock_instance.invoke_model.assert_called_once() + + @pytest.mark.asyncio + async def test_a_generate(self, mock_boto3_client, mock_key_handler): + """Test async multimodal generation.""" + model = MultimodalBedrockModel() + + multimodal_input = [ + "Describe this image:", + MLLMImage(url=TEST_IMAGE_URL, local=False) + ] + + response = await model.a_generate(multimodal_input) + + assert response == TEST_RESPONSE + + mock_instance = mock_boto3_client.return_value + mock_instance.invoke_model.assert_called_once() + + def test_invalid_input_type(self, mock_boto3_client, mock_key_handler): + """Test handling of invalid input types.""" + model = MultimodalBedrockModel() + + multimodal_input = [ + "Describe this image:", + {"url": TEST_IMAGE_URL} + ] + + with pytest.raises(ValueError, match="Invalid input type"): + model.generate_prompt(multimodal_input) diff --git a/tests/test_bedrock_model_live.py b/tests/test_bedrock_model_live.py new file mode 100644 index 000000000..ba99a7a4c --- /dev/null +++ b/tests/test_bedrock_model_live.py @@ -0,0 +1,138 @@ +import os +import pytest +from deepeval.models import BedrockModel, MultimodalBedrockModel +from deepeval.test_case import LLMTestCase, MLLMTestCase, MLLMImage +from deepeval import assert_test +from deepeval.metrics import ( + AnswerRelevancyMetric, + MultimodalContextualPrecisionMetric, + MultimodalAnswerRelevancyMetric +) + +SKIP_LIVE_TESTS = not (os.getenv("AWS_ACCESS_KEY_ID") and os.getenv("AWS_SECRET_ACCESS_KEY") and os.getenv("AWS_REGION")) + +simple_test_case = LLMTestCase( + input="What is the capital of France?", + actual_output="The capital of France is Paris." +) + +multimodal_test_case = MLLMTestCase( + input=["Tell me about some landmarks in France"], + actual_output=[ + "The Eiffel Tower is located in Paris, France.", + MLLMImage( + url="https://upload.wikimedia.org/wikipedia/commons/thumb/8/85/Tour_Eiffel_Wikimedia_Commons_%28cropped%29.jpg/375px-Tour_Eiffel_Wikimedia_Commons_%28cropped%29.jpg" + ), + "The Statue of Liberty was a gift from France to the United States.", + MLLMImage( + url="https://upload.wikimedia.org/wikipedia/commons/thumb/3/3d/Front_view_of_Statue_of_Liberty_with_pedestal_and_base_2024.jpg/375px-Front_view_of_Statue_of_Liberty_with_pedestal_and_base_2024.jpg" + ), + ], + expected_output=[ + "The Eiffel Tower is located in Paris, France.", + MLLMImage( + url="https://upload.wikimedia.org/wikipedia/commons/thumb/8/85/Tour_Eiffel_Wikimedia_Commons_%28cropped%29.jpg/375px-Tour_Eiffel_Wikimedia_Commons_%28cropped%29.jpg" + ), + "The Statue of Liberty was a gift from France to the United States.", + MLLMImage( + url="https://upload.wikimedia.org/wikipedia/commons/thumb/3/3d/Front_view_of_Statue_of_Liberty_with_pedestal_and_base_2024.jpg/375px-Front_view_of_Statue_of_Liberty_with_pedestal_and_base_2024.jpg" + ), + ], + context=[ + "The Eiffel Tower is a wrought-iron lattice tower on the Champ de Mars in Paris.", + "It is named after the engineer Gustave Eiffel.", + MLLMImage( + url="https://upload.wikimedia.org/wikipedia/commons/thumb/8/85/Tour_Eiffel_Wikimedia_Commons_%28cropped%29.jpg/375px-Tour_Eiffel_Wikimedia_Commons_%28cropped%29.jpg" + ), + "The Statue of Liberty is a colossal neoclassical sculpture on Liberty Island in New York Harbor.", + MLLMImage( + url="https://upload.wikimedia.org/wikipedia/commons/thumb/0/03/Liberty-from-behind-2024.jpg/330px-Liberty-from-behind-2024.jpg" + ), + ], + retrieval_context=[ + "The Eiffel Tower is a wrought-iron lattice tower on the Champ de Mars in Paris.", + "It is named after the engineer Gustave Eiffel.", + MLLMImage( + url="https://upload.wikimedia.org/wikipedia/commons/thumb/8/85/Tour_Eiffel_Wikimedia_Commons_%28cropped%29.jpg/375px-Tour_Eiffel_Wikimedia_Commons_%28cropped%29.jpg" + ), + "The Statue of Liberty is a colossal neoclassical sculpture on Liberty Island in New York Harbor.", + MLLMImage( + url="https://upload.wikimedia.org/wikipedia/commons/thumb/0/03/Liberty-from-behind-2024.jpg/330px-Liberty-from-behind-2024.jpg" + ), + ], +) + +@pytest.mark.skipif(SKIP_LIVE_TESTS, reason="AWS credentials not set") +class TestBedrockModelLive: + """Live API tests for BedrockModel.""" + + def test_structured_output_generation(self): + """Test generation with structured output schema.""" + from pydantic import BaseModel + + class CityInfo(BaseModel): + city: str + country: str + population: int + + model = BedrockModel( + model_id="us.anthropic.claude-3-5-sonnet-20241022-v2:0", + access_key_id=os.getenv("AWS_ACCESS_KEY_ID"), + secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"), + session_token=os.getenv("AWS_SESSION_TOKEN"), + region=os.getenv("AWS_REGION") + ) + + response = model.generate( + "Give me information about Paris, France", + schema=CityInfo + ) + + print(f"response: {response}") + + assert isinstance(response, CityInfo) + assert response.city == "Paris" + assert response.country == "France" + assert response.population > 1000000 + + def test_simple_evaluation(self): + """Test simple evaluation with AnswerRelevancyMetric.""" + model = BedrockModel( + model_id="us.anthropic.claude-3-5-sonnet-20241022-v2:0", + access_key_id=os.getenv("AWS_ACCESS_KEY_ID"), + secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"), + session_token=os.getenv("AWS_SESSION_TOKEN"), + region=os.getenv("AWS_REGION") + ) + + metric = AnswerRelevancyMetric(model=model, threshold=0.8) + assert_test(simple_test_case, [metric]) + + +@pytest.mark.skipif(SKIP_LIVE_TESTS, reason="AWS credentials not set") +class TestMultimodalBedrockModelLive: + """Live API tests for MultimodalBedrockModel.""" + + def setup_method(self): + """Set up test fixtures.""" + self.model = MultimodalBedrockModel( + model_id="us.anthropic.claude-3-5-sonnet-20241022-v2:0", + access_key_id=os.getenv("AWS_ACCESS_KEY_ID"), + secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"), + session_token=os.getenv("AWS_SESSION_TOKEN"), + region=os.getenv("AWS_REGION") + ) + + def test_multimodal_evaluation(self): + """Test using MultimodalBedrockModel as judge for various metrics.""" + + metrics = [ + MultimodalContextualPrecisionMetric(model=self.model), + MultimodalAnswerRelevancyMetric(model=self.model) + ] + + assert_test( + multimodal_test_case, + metrics, + run_async=True, + )