Just json, no test files (#77)

Significant-Gravitas · Jul 10, 2023 · 3d43117 · 3d43117
1 parent 5731305
commit 3d43117
Show file tree

Hide file tree

Showing 28 changed files with 158 additions and 220 deletions.
diff --git a/agbenchmark/RegressionManager.py b/agbenchmark/RegressionManager.py
@@ -11,9 +11,18 @@ def __init__(self, filename: str):
     def load(self) -> None:
         try:
             with open(self.filename, "r") as f:
-                self.tests = json.load(f)
-        except (FileNotFoundError, json.decoder.JSONDecodeError):
+                file_content = (
+                    f.read().strip()
+                )  # read the content and remove any leading/trailing whitespace
+                if file_content:  # if file is not empty, load the json
+                    self.tests = json.loads(file_content)
+                else:  # if file is empty, assign an empty dictionary
+                    self.tests = {}
+        except FileNotFoundError:
             self.tests = {}
+        except json.decoder.JSONDecodeError:  # If JSON is invalid
+            self.tests = {}
+        self.save()
 
     def save(self) -> None:
         with open(self.filename, "w") as f:

diff --git a/agbenchmark/challenge.py b/agbenchmark/challenge.py
@@ -1,10 +1,8 @@
 import glob
-import inspect
 import os
 import subprocess
-import types
-from abc import ABC, ABCMeta
-from typing import Any, Dict, List, Tuple, Type, cast
+from abc import ABC
+from typing import Any, Dict, List
 
 from dotenv import load_dotenv
 
@@ -16,24 +14,12 @@
 MOCK_TEST = mock_test_str.lower() == "true" if mock_test_str else False
 
 
-class ChallengeMeta(ABCMeta):
-    def __init__(self, name: str, bases: Tuple[Type, ...], dct: Dict[str, Any]) -> None:
-        super().__init__(name, bases, dct)
-        try:
-            frame = cast(types.FrameType, inspect.currentframe())
-            assert frame.f_back is not None
-            self.CHALLENGE_LOCATION = os.path.dirname(inspect.getfile(frame.f_back))
-        except Exception as e:
-            print(f"Unable to get the file from 8 frames back due to: {str(e)}")
-            raise e
-
-
-class Challenge(ABC, metaclass=ChallengeMeta):
+class Challenge(ABC):
     """The parent class to all specific challenges classes.
     Defines helper methods for running a challenge"""
 
     _data_cache: Dict[str, ChallengeData] = {}
-    CHALLENGE_LOCATION: str
+    CHALLENGE_LOCATION: str = ""
 
     @property
     def data(self) -> ChallengeData:
@@ -54,10 +40,10 @@ def setup_challenge(self, config: Dict[str, Any]) -> None:
         from agbenchmark.agent_interface import copy_artifacts_into_workspace, run_agent
 
         copy_artifacts_into_workspace(
-            config["workspace"], "artifacts_in", self.__class__.CHALLENGE_LOCATION
+            config["workspace"], "artifacts_in", self.CHALLENGE_LOCATION
         )
 
-        run_agent(self.task, config, self.__class__.CHALLENGE_LOCATION)
+        run_agent(self.task, config, self.CHALLENGE_LOCATION)
 
     def test_method(self, config: Dict[str, Any]) -> None:
         raise NotImplementedError

diff --git a/agbenchmark/challenges/code/d1/data.json b/agbenchmark/challenges/code/d1/data.json
@@ -1,4 +1,5 @@
 {
+  "name": "TestDebugSimpleTypoWithGuidance",
   "category": ["code"],
   "task": "1- Run test.py.\n2- Read code.py.\n3- Modify code.py.\nRepeat step 1, 2 and 3 until test.py runs without errors.\n",
   "dependencies": ["TestReadFile", "TestWriteFile"],

diff --git a/agbenchmark/challenges/code/d1/debug_simple_typo_with_guidance_test.py b/agbenchmark/challenges/code/d1/debug_simple_typo_with_guidance_test.py
diff --git a/agbenchmark/challenges/code/d2/d2_test.py b/agbenchmark/challenges/code/d2/d2_test.py
diff --git a/agbenchmark/challenges/code/d2/data.json b/agbenchmark/challenges/code/d2/data.json
@@ -1,4 +1,5 @@
 {
+  "name": "TestDebugSimpleTypoWithoutGuidance",
   "category": ["code"],
   "task": "Make test.py run without errors.",
   "dependencies": ["TestDebugSimpleTypoWithGuidance"],

diff --git a/agbenchmark/challenges/define_task_types.py b/agbenchmark/challenges/define_task_types.py
@@ -19,6 +19,7 @@ class Ground(BaseModel):
 
 
 class ChallengeData(BaseModel):
+    name: str
     category: List[str]
     task: str
     dependencies: List[str]

diff --git a/agbenchmark/challenges/interface/read_file/data.json b/agbenchmark/challenges/interface/read_file/data.json
@@ -1,5 +1,5 @@
 {
-  "name": "ReadFile",
+  "name": "TestReadFile",
   "category": ["interface"],
   "task": "Write the string 'random string' before any existing text to the file called file_to_check.txt",
   "dependencies": ["TestWriteFile"],

diff --git a/agbenchmark/challenges/interface/read_file/read_file_test.py b/agbenchmark/challenges/interface/read_file/read_file_test.py
diff --git a/agbenchmark/challenges/interface/write_file/data.json b/agbenchmark/challenges/interface/write_file/data.json
@@ -1,5 +1,5 @@
 {
-  "name": "WriteFile",
+  "name": "TestWriteFile",
   "category": ["interface"],
   "task": "Print the the capital of America to a .txt file",
   "dependencies": [],

diff --git a/agbenchmark/challenges/interface/write_file/write_file_test.py b/agbenchmark/challenges/interface/write_file/write_file_test.py
diff --git a/agbenchmark/challenges/memory/m1/data.json b/agbenchmark/challenges/memory/m1/data.json
@@ -1,4 +1,5 @@
 {
+  "name": "TestBasicMemory",
   "category": ["memory"],
   "task": "Follow the instructions in the instructions_1.txt file",
   "dependencies": ["TestReadFile", "TestWriteFile"],

diff --git a/agbenchmark/challenges/memory/m1/m1_test.py b/agbenchmark/challenges/memory/m1/m1_test.py
diff --git a/agbenchmark/challenges/memory/m2/data.json b/agbenchmark/challenges/memory/m2/data.json
@@ -1,4 +1,5 @@
 {
+  "name": "TestRememberMultipleIds",
   "category": ["memory"],
   "task": "Follow the instructions in the instructions_1.txt file",
   "dependencies": ["TestBasicMemory"],

diff --git a/agbenchmark/challenges/memory/m2/remember_multiple_ids_test.py b/agbenchmark/challenges/memory/m2/remember_multiple_ids_test.py
diff --git a/agbenchmark/challenges/memory/m3/data.json b/agbenchmark/challenges/memory/m3/data.json
@@ -1,4 +1,5 @@
 {
+  "name": "TestRememberMultipleIdsWithNoise",
   "category": ["memory"],
   "task": "Follow the instructions in the instructions_1.txt file",
   "dependencies": ["TestRememberMultipleIds"],

diff --git a/agbenchmark/challenges/memory/m3/remember_multiple_ids_with_noise_test.py b/agbenchmark/challenges/memory/m3/remember_multiple_ids_with_noise_test.py
diff --git a/agbenchmark/challenges/memory/m4/data.json b/agbenchmark/challenges/memory/m4/data.json
@@ -1,4 +1,5 @@
 {
+  "name": "TestRememberMultiplePhrasesWithNoise",
   "category": ["memory"],
   "task": "Follow the instructions in the instructions_1.txt file",
   "dependencies": ["TestRememberMultipleIdsWithNoise"],

diff --git a/agbenchmark/challenges/memory/m4/remember_multiple_phrases_with_noise_test.py b/agbenchmark/challenges/memory/m4/remember_multiple_phrases_with_noise_test.py
diff --git a/agbenchmark/challenges/retrieval/r1/data.json b/agbenchmark/challenges/retrieval/r1/data.json
@@ -1,4 +1,5 @@
 {
+  "name": "TestBasicRetrieval",
   "category": ["retrieval"],
   "task": "Write the price of the book in this url 'http://books.toscrape.com/catalogue/meditations_33/index.html' to a .txt file.",
   "dependencies": ["TestWriteFile"],

diff --git a/agbenchmark/challenges/retrieval/r1/r1_test.py b/agbenchmark/challenges/retrieval/r1/r1_test.py
diff --git a/agbenchmark/challenges/retrieval/r2/data.json b/agbenchmark/challenges/retrieval/r2/data.json
@@ -1,7 +1,8 @@
 {
+  "name": "TestRetrieval2",
   "category": ["retrieval"],
   "task": "Write tesla's revenue in 2022 into a .txt file. Use the US notation, with a precision rounded to the nearest million dollars (for instance, $31,578 billion).",
-  "dependencies": ["TestRetrieval"],
+  "dependencies": ["TestBasicRetrieval"],
   "ground": {
     "answer": "81,462",
     "should_contain": ["81,462"],

diff --git a/agbenchmark/challenges/retrieval/r2/r2_test.py b/agbenchmark/challenges/retrieval/r2/r2_test.py
diff --git a/agbenchmark/challenges/retrieval/r3/data.json b/agbenchmark/challenges/retrieval/r3/data.json
@@ -1,4 +1,5 @@
 {
+  "name": "TestRetrieval3",
   "category": ["retrieval"],
   "task": "Write tesla's revenue every year since its creation into a .txt file. Use the US notation, with a precision rounded to the nearest million dollars (for instance, $31,578 billion).",
   "dependencies": ["TestRetrieval2"],

diff --git a/agbenchmark/challenges/retrieval/r3/r3_test.py b/agbenchmark/challenges/retrieval/r3/r3_test.py
diff --git a/agbenchmark/challenges/test_all.py b/agbenchmark/challenges/test_all.py
@@ -0,0 +1,78 @@
+import glob
+import importlib
+import json
+import os
+import types
+from pathlib import Path
+from typing import Any, Dict
+
+import pytest
+from dotenv import load_dotenv
+
+from agbenchmark.challenge import Challenge
+
+load_dotenv()
+
+IMPROVE = os.getenv("IMPROVE", "False")
+
+
+json_files = glob.glob("agbenchmark/challenges/**/data.json", recursive=True)
+
+
+def get_test_path(json_file: str) -> str:
+    abs_location = os.path.dirname(os.path.abspath(json_file))
+
+    path = Path(abs_location)
+
+    # Find the index of "agbenchmark" in the path parts
+    try:
+        agbenchmark_index = path.parts.index("agbenchmark")
+    except ValueError:
+        raise ValueError("Invalid challenge location.")
+
+    # Create the path from "agbenchmark" onwards
+    challenge_location = Path(*path.parts[agbenchmark_index:])
+
+    return str(challenge_location)
+
+
+def generate_tests() -> None:
+    print("Generating tests...")
+    # Dynamic class creation
+    for json_file in json_files:
+        with open(json_file, "r") as f:
+            data = json.load(f)
+
+            class_name = data.get("name", "")
+
+        challenge_location = get_test_path(json_file)
+
+        # Define test class dynamically
+        challenge_class = types.new_class(class_name, (Challenge,))
+
+        setattr(challenge_class, "CHALLENGE_LOCATION", challenge_location)
+
+        # Define test method within the dynamically created class
+        def test_method(self, config: Dict[str, Any]) -> None:  # type: ignore
+            self.setup_challenge(config)
+
+            scores = self.get_scores(config)
+            assert 1 in scores
+
+        # Parametrize the method here
+        test_method = pytest.mark.parametrize(
+            "challenge_data",
+            [data],
+            indirect=True,
+        )(test_method)
+
+        setattr(challenge_class, "test_method", test_method)
+
+        # Attach the new class to a module so it can be discovered by pytest
+        module = importlib.import_module(__name__)
+        setattr(module, class_name, challenge_class)
+
+        print(f"Generated test for {class_name}.")
+
+
+generate_tests()