Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add gpt synthetic data #74

Merged
merged 10 commits into from
Sep 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion deepeval/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__: str = "0.10.13"
__version__: str = "0.11.0"
65 changes: 64 additions & 1 deletion deepeval/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
"""
import json
import random
import time
from tabulate import tabulate
from datetime import datetime
from typing import List, Callable
Expand Down Expand Up @@ -237,7 +238,6 @@ def create_evaluation_dataset_from_raw_text(text: str, output_fn: str = "output.

# NOTE: loading this may take a while as the model used is quite big
gen = BEIRQueryGenerator()
text = "Synthetic queries are useful for scenraios where there is no data."
queries = gen.generate_queries(texts=[text], num_queries=2)
test_cases = []
with open(output_fn, "w") as f:
Expand All @@ -249,3 +249,66 @@ def create_evaluation_dataset_from_raw_text(text: str, output_fn: str = "output.

dataset = EvaluationDataset(test_cases=test_cases)
return dataset


def make_chat_completion_request(prompt: str, openai_api_key: str):
import openai

openai.api_key = openai_api_key
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt},
],
)
return response.choices[0].message.content


def generate_chatgpt_output(prompt: str, openai_api_key: str) -> str:
max_retries = 3
retry_delay = 1
for attempt in range(max_retries):
try:
expected_output = make_chat_completion_request(
prompt=prompt, openai_api_key=openai_api_key
)
break
except Exception as e:
print(f"Error occurred: {e}")
if attempt < max_retries - 1:
print(f"Retrying in {retry_delay} seconds...")
time.sleep(retry_delay)
retry_delay *= 2
else:
raise

return expected_output


def create_evaluation_query_output_pairs(
openai_api_key: str, context: str, n: int = 3, model: str = "openai/gpt-3.5-turbo"
) -> EvaluationDataset:
"""Utility function to create an evaluation dataset using GPT."""
prompt = f"""You are generating {n} sets of of query-answer pairs to create an evaluation dataset based on the below context.
Context: {context}

Respond in JSON format in 1 single line without white spaces an array of JSON with the keys `query` and `answer`.
"""
for _ in range(3):
try:
responses = generate_chatgpt_output(prompt, openai_api_key=openai_api_key)
responses = json.loads(responses)
break
except Exception as e:
return EvaluationDataset(test_cases=[])

test_cases = []
for response in responses:
test_case = TestCase(
query=response["query"], expected_output=response["answer"]
)
test_cases.append(test_case)

dataset = EvaluationDataset(test_cases=test_cases)
return dataset
32 changes: 22 additions & 10 deletions docs/docs/quickstart/synthetic-data-creation.md
Original file line number Diff line number Diff line change
@@ -1,29 +1,41 @@
# Create Synthetic Data
# Auto-Evaluation

## Problem synthetic data creation solves
## Introduction

- When there isn't much data or any data to start with for evaluating langchain pipelines
- When getting an eyeball check of current performance is done very quickly
Auto-evaluation is useful:

![Synthetic Queries](../../assets/synthetic-query-generation.png)
- When there isn't much data or any data to start with for evaluating langchain pipelines
- When getting an eyeball check of current performance is required to be done very quickly

Generating synthetic queries allows you to quickly evaluate the queries related to your prompts.
We help developers get up and running with example queries from just raw text.

We help developers get up and running with example queries from just raw text based on OpenAI's model. In this model, we generate query-answer pairs based on the text.

```python
# Loads the synthetic query model to generate them based on data you get.
# These automatically create synthetic queries and adds them to our online database
from deepeval.dataset import create_evaluation_dataset_from_raw_text
from deepeval.dataset import create_evaluation_query_output_pairs
dataset = create_evaluation_query_output_pairs("Python is a great language for mathematical expression and machine learning.")
```

Once you have created your evaluation dataset, we recommend saving it.

dataset = create_evaluation_dataset_from_raw_text("Python is a great language for mathematical expression and machine learning.")
```python
dataset.to_csv("sample.csv")
```

## Running test cases.
## Running tests/evaluation

Once you have defined a number of test cases, you can easily run it in bulk if required.

Once you have defined a number of test cases, you can easily run it in bulk if required.
If there are errors - this function will error.

```python
# test_bulk_runner.py

def generate_llm_output(query: str) -> str:
return "sample output"

# Run an evaluation as you would any normal evaluation.
dataset.run_evaluation(completion_fn=generate_llm_output)
```
20 changes: 20 additions & 0 deletions tests/test_dataset.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
import pytest
import os


def test_evaluation_dataset():
from deepeval.dataset import EvaluationDataset

Expand All @@ -20,3 +24,19 @@ def test_evaluation_dataset():
expected_output_column="expected_output",
id_column="id",
)
assert len(dataset) == 5


@pytest.mark.skip(reason="OpenAI costs")
def test_create_synthetic_dataset():
"""
test for creating a synthetic dataset
"""
from deepeval.dataset import create_evaluation_query_output_pairs

dataset = create_evaluation_query_output_pairs(
openai_api_key=os.environ["OPENAI_API_KEY"],
context="FastAPI is a modern, fast (high-performance), web framework for building APIs with Python 3.7+ based on standard Python type hints.",
n=1,
)
assert len(dataset) == 1