Skip to content
This repository has been archived by the owner on Jun 9, 2024. It is now read-only.

Commit

Permalink
Just json, no test files (#77)
Browse files Browse the repository at this point in the history
  • Loading branch information
SilenNaihin authored Jul 10, 2023
1 parent 5731305 commit 3d43117
Show file tree
Hide file tree
Showing 28 changed files with 158 additions and 220 deletions.
13 changes: 11 additions & 2 deletions agbenchmark/RegressionManager.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,18 @@ def __init__(self, filename: str):
def load(self) -> None:
try:
with open(self.filename, "r") as f:
self.tests = json.load(f)
except (FileNotFoundError, json.decoder.JSONDecodeError):
file_content = (
f.read().strip()
) # read the content and remove any leading/trailing whitespace
if file_content: # if file is not empty, load the json
self.tests = json.loads(file_content)
else: # if file is empty, assign an empty dictionary
self.tests = {}
except FileNotFoundError:
self.tests = {}
except json.decoder.JSONDecodeError: # If JSON is invalid
self.tests = {}
self.save()

def save(self) -> None:
with open(self.filename, "w") as f:
Expand Down
26 changes: 6 additions & 20 deletions agbenchmark/challenge.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
import glob
import inspect
import os
import subprocess
import types
from abc import ABC, ABCMeta
from typing import Any, Dict, List, Tuple, Type, cast
from abc import ABC
from typing import Any, Dict, List

from dotenv import load_dotenv

Expand All @@ -16,24 +14,12 @@
MOCK_TEST = mock_test_str.lower() == "true" if mock_test_str else False


class ChallengeMeta(ABCMeta):
def __init__(self, name: str, bases: Tuple[Type, ...], dct: Dict[str, Any]) -> None:
super().__init__(name, bases, dct)
try:
frame = cast(types.FrameType, inspect.currentframe())
assert frame.f_back is not None
self.CHALLENGE_LOCATION = os.path.dirname(inspect.getfile(frame.f_back))
except Exception as e:
print(f"Unable to get the file from 8 frames back due to: {str(e)}")
raise e


class Challenge(ABC, metaclass=ChallengeMeta):
class Challenge(ABC):
"""The parent class to all specific challenges classes.
Defines helper methods for running a challenge"""

_data_cache: Dict[str, ChallengeData] = {}
CHALLENGE_LOCATION: str
CHALLENGE_LOCATION: str = ""

@property
def data(self) -> ChallengeData:
Expand All @@ -54,10 +40,10 @@ def setup_challenge(self, config: Dict[str, Any]) -> None:
from agbenchmark.agent_interface import copy_artifacts_into_workspace, run_agent

copy_artifacts_into_workspace(
config["workspace"], "artifacts_in", self.__class__.CHALLENGE_LOCATION
config["workspace"], "artifacts_in", self.CHALLENGE_LOCATION
)

run_agent(self.task, config, self.__class__.CHALLENGE_LOCATION)
run_agent(self.task, config, self.CHALLENGE_LOCATION)

def test_method(self, config: Dict[str, Any]) -> None:
raise NotImplementedError
Expand Down
1 change: 1 addition & 0 deletions agbenchmark/challenges/code/d1/data.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{
"name": "TestDebugSimpleTypoWithGuidance",
"category": ["code"],
"task": "1- Run test.py.\n2- Read code.py.\n3- Modify code.py.\nRepeat step 1, 2 and 3 until test.py runs without errors.\n",
"dependencies": ["TestReadFile", "TestWriteFile"],
Expand Down

This file was deleted.

14 changes: 0 additions & 14 deletions agbenchmark/challenges/code/d2/d2_test.py

This file was deleted.

1 change: 1 addition & 0 deletions agbenchmark/challenges/code/d2/data.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{
"name": "TestDebugSimpleTypoWithoutGuidance",
"category": ["code"],
"task": "Make test.py run without errors.",
"dependencies": ["TestDebugSimpleTypoWithGuidance"],
Expand Down
1 change: 1 addition & 0 deletions agbenchmark/challenges/define_task_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ class Ground(BaseModel):


class ChallengeData(BaseModel):
name: str
category: List[str]
task: str
dependencies: List[str]
Expand Down
2 changes: 1 addition & 1 deletion agbenchmark/challenges/interface/read_file/data.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"name": "ReadFile",
"name": "TestReadFile",
"category": ["interface"],
"task": "Write the string 'random string' before any existing text to the file called file_to_check.txt",
"dependencies": ["TestWriteFile"],
Expand Down
12 changes: 0 additions & 12 deletions agbenchmark/challenges/interface/read_file/read_file_test.py

This file was deleted.

2 changes: 1 addition & 1 deletion agbenchmark/challenges/interface/write_file/data.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"name": "WriteFile",
"name": "TestWriteFile",
"category": ["interface"],
"task": "Print the the capital of America to a .txt file",
"dependencies": [],
Expand Down
13 changes: 0 additions & 13 deletions agbenchmark/challenges/interface/write_file/write_file_test.py

This file was deleted.

1 change: 1 addition & 0 deletions agbenchmark/challenges/memory/m1/data.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{
"name": "TestBasicMemory",
"category": ["memory"],
"task": "Follow the instructions in the instructions_1.txt file",
"dependencies": ["TestReadFile", "TestWriteFile"],
Expand Down
13 changes: 0 additions & 13 deletions agbenchmark/challenges/memory/m1/m1_test.py

This file was deleted.

1 change: 1 addition & 0 deletions agbenchmark/challenges/memory/m2/data.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{
"name": "TestRememberMultipleIds",
"category": ["memory"],
"task": "Follow the instructions in the instructions_1.txt file",
"dependencies": ["TestBasicMemory"],
Expand Down
13 changes: 0 additions & 13 deletions agbenchmark/challenges/memory/m2/remember_multiple_ids_test.py

This file was deleted.

1 change: 1 addition & 0 deletions agbenchmark/challenges/memory/m3/data.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{
"name": "TestRememberMultipleIdsWithNoise",
"category": ["memory"],
"task": "Follow the instructions in the instructions_1.txt file",
"dependencies": ["TestRememberMultipleIds"],
Expand Down

This file was deleted.

1 change: 1 addition & 0 deletions agbenchmark/challenges/memory/m4/data.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{
"name": "TestRememberMultiplePhrasesWithNoise",
"category": ["memory"],
"task": "Follow the instructions in the instructions_1.txt file",
"dependencies": ["TestRememberMultipleIdsWithNoise"],
Expand Down

This file was deleted.

1 change: 1 addition & 0 deletions agbenchmark/challenges/retrieval/r1/data.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{
"name": "TestBasicRetrieval",
"category": ["retrieval"],
"task": "Write the price of the book in this url 'http://books.toscrape.com/catalogue/meditations_33/index.html' to a .txt file.",
"dependencies": ["TestWriteFile"],
Expand Down
13 changes: 0 additions & 13 deletions agbenchmark/challenges/retrieval/r1/r1_test.py

This file was deleted.

3 changes: 2 additions & 1 deletion agbenchmark/challenges/retrieval/r2/data.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
{
"name": "TestRetrieval2",
"category": ["retrieval"],
"task": "Write tesla's revenue in 2022 into a .txt file. Use the US notation, with a precision rounded to the nearest million dollars (for instance, $31,578 billion).",
"dependencies": ["TestRetrieval"],
"dependencies": ["TestBasicRetrieval"],
"ground": {
"answer": "81,462",
"should_contain": ["81,462"],
Expand Down
13 changes: 0 additions & 13 deletions agbenchmark/challenges/retrieval/r2/r2_test.py

This file was deleted.

1 change: 1 addition & 0 deletions agbenchmark/challenges/retrieval/r3/data.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{
"name": "TestRetrieval3",
"category": ["retrieval"],
"task": "Write tesla's revenue every year since its creation into a .txt file. Use the US notation, with a precision rounded to the nearest million dollars (for instance, $31,578 billion).",
"dependencies": ["TestRetrieval2"],
Expand Down
14 changes: 0 additions & 14 deletions agbenchmark/challenges/retrieval/r3/r3_test.py

This file was deleted.

78 changes: 78 additions & 0 deletions agbenchmark/challenges/test_all.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import glob
import importlib
import json
import os
import types
from pathlib import Path
from typing import Any, Dict

import pytest
from dotenv import load_dotenv

from agbenchmark.challenge import Challenge

load_dotenv()

IMPROVE = os.getenv("IMPROVE", "False")


json_files = glob.glob("agbenchmark/challenges/**/data.json", recursive=True)


def get_test_path(json_file: str) -> str:
abs_location = os.path.dirname(os.path.abspath(json_file))

path = Path(abs_location)

# Find the index of "agbenchmark" in the path parts
try:
agbenchmark_index = path.parts.index("agbenchmark")
except ValueError:
raise ValueError("Invalid challenge location.")

# Create the path from "agbenchmark" onwards
challenge_location = Path(*path.parts[agbenchmark_index:])

return str(challenge_location)


def generate_tests() -> None:
print("Generating tests...")
# Dynamic class creation
for json_file in json_files:
with open(json_file, "r") as f:
data = json.load(f)

class_name = data.get("name", "")

challenge_location = get_test_path(json_file)

# Define test class dynamically
challenge_class = types.new_class(class_name, (Challenge,))

setattr(challenge_class, "CHALLENGE_LOCATION", challenge_location)

# Define test method within the dynamically created class
def test_method(self, config: Dict[str, Any]) -> None: # type: ignore
self.setup_challenge(config)

scores = self.get_scores(config)
assert 1 in scores

# Parametrize the method here
test_method = pytest.mark.parametrize(
"challenge_data",
[data],
indirect=True,
)(test_method)

setattr(challenge_class, "test_method", test_method)

# Attach the new class to a module so it can be discovered by pytest
module = importlib.import_module(__name__)
setattr(module, class_name, challenge_class)

print(f"Generated test for {class_name}.")


generate_tests()
Loading

0 comments on commit 3d43117

Please sign in to comment.