Merge pull request #81 from confident-ai/feature/fix-quickstart

Feature/fix quickstart
confident-ai · Sep 7, 2023 · 34a7d0e · 34a7d0e
2 parents 89ba609 + 7994eee
commit 34a7d0e
Show file tree

Hide file tree

Showing 6 changed files with 133 additions and 57 deletions.
diff --git a/deepeval/_version.py b/deepeval/_version.py
@@ -1 +1 @@
-__version__: str = "0.12.0"
+__version__: str = "0.12.1"
diff --git a/deepeval/api.py b/deepeval/api.py
@@ -2,9 +2,9 @@
 import urllib.parse
 import os
 import requests
-from .constants import API_KEY_ENV
 from typing import Any, Optional
 from requests.adapters import HTTPAdapter, Response, Retry
+from .constants import API_KEY_ENV
 from .key_handler import KEY_FILE_HANDLER
 
 API_BASE_URL = "https://app.confident-ai.com/api"

diff --git a/deepeval/cli/test.py b/deepeval/cli/test.py
@@ -75,18 +75,29 @@ def check_if_legit_file(test_file: str):
 
 
 @app.command()
-def run(test_file_or_directory: str, exit_on_first_failure: bool = False):
+def run(
+    test_file_or_directory: str,
+    exit_on_first_failure: bool = False,
+    verbose: bool = False,
+    color: str = "yes",
+    durations: int = 10,
+    pdb: bool = False,
+):
     """Run a test"""
-    if test_file_or_directory == "sample":
-        sample()
-        print(
-            "You can generate a sample test using [bold]deepeval test generate[/bold]."
-        )
-        retcode = 0
+    pytest_args = ["-k", test_file_or_directory]
     if exit_on_first_failure:
-        retcode = pytest.main(["-x", "-k", test_file_or_directory])
-    else:
-        retcode = pytest.main(["-k", test_file_or_directory])
+        pytest_args.insert(0, "-x")
+    pytest_args.extend(
+        [
+            "--verbose" if verbose else "--quiet",
+            f"--color={color}",
+            f"--durations={durations}",
+            # f"--cov={cov}",
+            # f"--cov-report={cov_report}",
+            "--pdb" if pdb else "",
+        ]
+    )
+    retcode = pytest.main(pytest_args)
     print("✅ Tests finished! View results on https://app.confident-ai.com/")
     return retcode
 

diff --git a/docs/docs/quickstart/quickstart.md b/docs/docs/quickstart/quickstart.md
@@ -1,60 +1,64 @@
-# Write a simple test case
+# QuickStart
 
-If you are interested in running a quick Colab example, you can [click here](https://colab.research.google.com/drive/1Lfq5geYsvfVoquDqv84UkWS57SdAHm30?usp=sharing).
+Once you have installed, run the login command. During this step, you will be asked to visit https://app.confident-ai.com to grab your API key.
 
-You can write a simple test case as simply as:
+Note: this step is entirely optional if you do not wish to track your results but we highly recommend it so you can view how results differ over time.
 
 ```bash
-deepeval test generate test_sample.py
-```
+deepeval login
 
-```python
-import os
-import openai
-from deepeval.metrics.factual_consistency import assert_factual_consistency
-
-openai.api_key = "sk-XXX"
-
-# Write a sample ChatGPT function
-def generate_chatgpt_output(query: str):
-    response = openai.ChatCompletion.create(
-        model="gpt-3.5-turbo",
-        messages=[
-            {"role": "system", "content": "You are a helpful assistant."},
-            {"role": "assistant", "content": "The customer success phone line is 1200-231-231 and the customer success state is in Austin."},
-            {"role": "user", "content": query}
-        ]
-    )
-    llm_output = response.choices[0].message.content
-    return llm_output
-
-def test_factual_consistency():
-    query = "What is the customer success phone line?"
-    context = "Our customer success phone line is 1200-231-231."
-    output = generate_chatgpt_output(query)
-    assert_factual_consistency(output, context)
-
-# Just run the following code in Python if required
-test_factual_consistency()
+# If you already have an API key and want to feed it in through CLI
+deepeval login --api-key $API_KEY
 ```
 
-### Running it in Pytest
+Once you have logged in, you can generate a sample test file as shown below. This test file allows you to quickly get started modifying it with various tests. (More on this later)
 
-To run this in Pytest, just run:
+```bash
+deepeval test generate --output-file test_sample.py
+```
 
-```python
-# sample.py
+Once you have generated the test file, you can then run tests as shown.
 
-def test_factual_consistency():
-    query = "What is the customer success phone line?"
-    context = "Our customer success phone line is 1200-231-231."
-    output = generate_chatgpt_output(query)
-    assert_factual_consistency(output, context)
+```bash
+deepeval test run test_sample.py
+# if you wish to fail first 
+deepeval test run -x test_sample.py
+# If you want to run an interactive debugger when a test fails
+deepeval test run --pdb test_sample.py
 ```
 
-You can then run it in CLI using:
+Under the hood, it triggers pytest and offers support for a number of pytest command line functionalities. Similarly, you may also trigger `pytest` natively for these tests such as 
 
 ```bash
-deepeval test run sample.py
-# If you want to stay with pytest instead
+pytest test_sample.py
 ```
+
+Once you run the tests, you should be able to see a dashboard similar to the one below.
+
+![Dashboard Example](../../assets/dashboard-screenshot.png)
+
+## Diving Into The Example
+
+Diving into the example, it shows what a sample test looks like. It uses `assert_overall_score` to ensure that the overall score exceeds a certain threshold. We recommend experimenting with different tests to ensure that the LLMs work as intended across domains such as Bias, Answer Relevancy and Factual Consistency.
+
+With overall score, if you leave out `query` or `expected_output`, DeepEval will automatically run the relevant tests.
+
+For these tests, you will need a `test_` prefix for this to be ran in Python.
+
+```python
+from deepeval.metrics.overall_score import assert_overall_score
+
+
+def test_0():
+    query = "How does photosynthesis work?"
+    output = "Photosynthesis is the process by which green plants and some other organisms use sunlight to synthesize foods with the help of chlorophyll pigment."
+    expected_output = "Photosynthesis is the process by which green plants and some other organisms use sunlight to synthesize food with the help of chlorophyll pigment."
+    context = "Biology"
+
+    assert_overall_score(query, output, expected_output, context)
+```
+
+## What next?
+
+We recommend diving into [creating a dataset](dataset) to learn how to run tests in bulk or [defining custom metrics](../quickstart/custom-metrics) so you can support writing custom tests and metrics for your own use cases.
+
diff --git a/docs/docs/quickstart/write_test_case.md b/docs/docs/quickstart/write_test_case.md
@@ -0,0 +1,60 @@
+# Write a simple test case
+
+If you are interested in running a quick Colab example, you can [click here](https://colab.research.google.com/drive/1Lfq5geYsvfVoquDqv84UkWS57SdAHm30?usp=sharing).
+
+You can write a simple test case as simply as:
+
+```bash
+deepeval test generate test_sample.py
+```
+
+```python
+import os
+import openai
+from deepeval.metrics.factual_consistency import assert_factual_consistency
+
+openai.api_key = "sk-XXX"
+
+# Write a sample ChatGPT function
+def generate_chatgpt_output(query: str):
+    response = openai.ChatCompletion.create(
+        model="gpt-3.5-turbo",
+        messages=[
+            {"role": "system", "content": "You are a helpful assistant."},
+            {"role": "assistant", "content": "The customer success phone line is 1200-231-231 and the customer success state is in Austin."},
+            {"role": "user", "content": query}
+        ]
+    )
+    llm_output = response.choices[0].message.content
+    return llm_output
+
+def test_factual_consistency():
+    query = "What is the customer success phone line?"
+    context = "Our customer success phone line is 1200-231-231."
+    output = generate_chatgpt_output(query)
+    assert_factual_consistency(output, context)
+
+# Just run the following code in Python if required
+test_factual_consistency()
+```
+
+### Running it in Pytest
+
+To run this in Pytest, just run:
+
+```python
+# sample.py
+
+def test_factual_consistency():
+    query = "What is the customer success phone line?"
+    context = "Our customer success phone line is 1200-231-231."
+    output = generate_chatgpt_output(query)
+    assert_factual_consistency(output, context)
+```
+
+You can then run it in CLI using:
+
+```bash
+deepeval test run sample.py
+# If you want to stay with pytest instead
+```
diff --git a/docs/sidebars.js b/docs/sidebars.js
@@ -26,6 +26,7 @@ const sidebars = {
       label: 'QuickStart',
       items: [
         'quickstart/quickstart',
+        'quickstart/write_test_case',
         'quickstart/dataset',
         'quickstart/synthetic-data-creation',
         'quickstart/dashboard-app'
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		__version__: str = "0.12.0"
		__version__: str = "0.12.1"