diff --git a/deepeval/_version.py b/deepeval/_version.py index 0048970f0..dcc942c42 100644 --- a/deepeval/_version.py +++ b/deepeval/_version.py @@ -1 +1 @@ -__version__: str = "0.12.0" +__version__: str = "0.12.1" diff --git a/deepeval/api.py b/deepeval/api.py index 65b1da6d4..c33fe7ba0 100644 --- a/deepeval/api.py +++ b/deepeval/api.py @@ -2,9 +2,9 @@ import urllib.parse import os import requests -from .constants import API_KEY_ENV from typing import Any, Optional from requests.adapters import HTTPAdapter, Response, Retry +from .constants import API_KEY_ENV from .key_handler import KEY_FILE_HANDLER API_BASE_URL = "https://app.confident-ai.com/api" diff --git a/deepeval/cli/test.py b/deepeval/cli/test.py index 8c25fc01e..8fab19f9b 100644 --- a/deepeval/cli/test.py +++ b/deepeval/cli/test.py @@ -75,18 +75,29 @@ def check_if_legit_file(test_file: str): @app.command() -def run(test_file_or_directory: str, exit_on_first_failure: bool = False): +def run( + test_file_or_directory: str, + exit_on_first_failure: bool = False, + verbose: bool = False, + color: str = "yes", + durations: int = 10, + pdb: bool = False, +): """Run a test""" - if test_file_or_directory == "sample": - sample() - print( - "You can generate a sample test using [bold]deepeval test generate[/bold]." - ) - retcode = 0 + pytest_args = ["-k", test_file_or_directory] if exit_on_first_failure: - retcode = pytest.main(["-x", "-k", test_file_or_directory]) - else: - retcode = pytest.main(["-k", test_file_or_directory]) + pytest_args.insert(0, "-x") + pytest_args.extend( + [ + "--verbose" if verbose else "--quiet", + f"--color={color}", + f"--durations={durations}", + # f"--cov={cov}", + # f"--cov-report={cov_report}", + "--pdb" if pdb else "", + ] + ) + retcode = pytest.main(pytest_args) print("✅ Tests finished! View results on https://app.confident-ai.com/") return retcode diff --git a/docs/docs/quickstart/quickstart.md b/docs/docs/quickstart/quickstart.md index 25d263fd9..a8c5ff8cc 100644 --- a/docs/docs/quickstart/quickstart.md +++ b/docs/docs/quickstart/quickstart.md @@ -1,60 +1,64 @@ -# Write a simple test case +# QuickStart -If you are interested in running a quick Colab example, you can [click here](https://colab.research.google.com/drive/1Lfq5geYsvfVoquDqv84UkWS57SdAHm30?usp=sharing). +Once you have installed, run the login command. During this step, you will be asked to visit https://app.confident-ai.com to grab your API key. -You can write a simple test case as simply as: +Note: this step is entirely optional if you do not wish to track your results but we highly recommend it so you can view how results differ over time. ```bash -deepeval test generate test_sample.py -``` +deepeval login -```python -import os -import openai -from deepeval.metrics.factual_consistency import assert_factual_consistency - -openai.api_key = "sk-XXX" - -# Write a sample ChatGPT function -def generate_chatgpt_output(query: str): - response = openai.ChatCompletion.create( - model="gpt-3.5-turbo", - messages=[ - {"role": "system", "content": "You are a helpful assistant."}, - {"role": "assistant", "content": "The customer success phone line is 1200-231-231 and the customer success state is in Austin."}, - {"role": "user", "content": query} - ] - ) - llm_output = response.choices[0].message.content - return llm_output - -def test_factual_consistency(): - query = "What is the customer success phone line?" - context = "Our customer success phone line is 1200-231-231." - output = generate_chatgpt_output(query) - assert_factual_consistency(output, context) - -# Just run the following code in Python if required -test_factual_consistency() +# If you already have an API key and want to feed it in through CLI +deepeval login --api-key $API_KEY ``` -### Running it in Pytest +Once you have logged in, you can generate a sample test file as shown below. This test file allows you to quickly get started modifying it with various tests. (More on this later) -To run this in Pytest, just run: +```bash +deepeval test generate --output-file test_sample.py +``` -```python -# sample.py +Once you have generated the test file, you can then run tests as shown. -def test_factual_consistency(): - query = "What is the customer success phone line?" - context = "Our customer success phone line is 1200-231-231." - output = generate_chatgpt_output(query) - assert_factual_consistency(output, context) +```bash +deepeval test run test_sample.py +# if you wish to fail first +deepeval test run -x test_sample.py +# If you want to run an interactive debugger when a test fails +deepeval test run --pdb test_sample.py ``` -You can then run it in CLI using: +Under the hood, it triggers pytest and offers support for a number of pytest command line functionalities. Similarly, you may also trigger `pytest` natively for these tests such as ```bash -deepeval test run sample.py -# If you want to stay with pytest instead +pytest test_sample.py ``` + +Once you run the tests, you should be able to see a dashboard similar to the one below. + +![Dashboard Example](../../assets/dashboard-screenshot.png) + +## Diving Into The Example + +Diving into the example, it shows what a sample test looks like. It uses `assert_overall_score` to ensure that the overall score exceeds a certain threshold. We recommend experimenting with different tests to ensure that the LLMs work as intended across domains such as Bias, Answer Relevancy and Factual Consistency. + +With overall score, if you leave out `query` or `expected_output`, DeepEval will automatically run the relevant tests. + +For these tests, you will need a `test_` prefix for this to be ran in Python. + +```python +from deepeval.metrics.overall_score import assert_overall_score + + +def test_0(): + query = "How does photosynthesis work?" + output = "Photosynthesis is the process by which green plants and some other organisms use sunlight to synthesize foods with the help of chlorophyll pigment." + expected_output = "Photosynthesis is the process by which green plants and some other organisms use sunlight to synthesize food with the help of chlorophyll pigment." + context = "Biology" + + assert_overall_score(query, output, expected_output, context) +``` + +## What next? + +We recommend diving into [creating a dataset](dataset) to learn how to run tests in bulk or [defining custom metrics](../quickstart/custom-metrics) so you can support writing custom tests and metrics for your own use cases. + diff --git a/docs/docs/quickstart/write_test_case.md b/docs/docs/quickstart/write_test_case.md new file mode 100644 index 000000000..25d263fd9 --- /dev/null +++ b/docs/docs/quickstart/write_test_case.md @@ -0,0 +1,60 @@ +# Write a simple test case + +If you are interested in running a quick Colab example, you can [click here](https://colab.research.google.com/drive/1Lfq5geYsvfVoquDqv84UkWS57SdAHm30?usp=sharing). + +You can write a simple test case as simply as: + +```bash +deepeval test generate test_sample.py +``` + +```python +import os +import openai +from deepeval.metrics.factual_consistency import assert_factual_consistency + +openai.api_key = "sk-XXX" + +# Write a sample ChatGPT function +def generate_chatgpt_output(query: str): + response = openai.ChatCompletion.create( + model="gpt-3.5-turbo", + messages=[ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "assistant", "content": "The customer success phone line is 1200-231-231 and the customer success state is in Austin."}, + {"role": "user", "content": query} + ] + ) + llm_output = response.choices[0].message.content + return llm_output + +def test_factual_consistency(): + query = "What is the customer success phone line?" + context = "Our customer success phone line is 1200-231-231." + output = generate_chatgpt_output(query) + assert_factual_consistency(output, context) + +# Just run the following code in Python if required +test_factual_consistency() +``` + +### Running it in Pytest + +To run this in Pytest, just run: + +```python +# sample.py + +def test_factual_consistency(): + query = "What is the customer success phone line?" + context = "Our customer success phone line is 1200-231-231." + output = generate_chatgpt_output(query) + assert_factual_consistency(output, context) +``` + +You can then run it in CLI using: + +```bash +deepeval test run sample.py +# If you want to stay with pytest instead +``` diff --git a/docs/sidebars.js b/docs/sidebars.js index d214c4746..6ae654e91 100644 --- a/docs/sidebars.js +++ b/docs/sidebars.js @@ -26,6 +26,7 @@ const sidebars = { label: 'QuickStart', items: [ 'quickstart/quickstart', + 'quickstart/write_test_case', 'quickstart/dataset', 'quickstart/synthetic-data-creation', 'quickstart/dashboard-app'