diff --git a/agbenchmark/RegressionManager.py b/agbenchmark/RegressionManager.py
index a1379ecaee8..e289a478780 100644
--- a/agbenchmark/RegressionManager.py
+++ b/agbenchmark/RegressionManager.py
@@ -11,9 +11,18 @@ def __init__(self, filename: str):
     def load(self) -> None:
         try:
             with open(self.filename, "r") as f:
-                self.tests = json.load(f)
-        except (FileNotFoundError, json.decoder.JSONDecodeError):
+                file_content = (
+                    f.read().strip()
+                )  # read the content and remove any leading/trailing whitespace
+                if file_content:  # if file is not empty, load the json
+                    self.tests = json.loads(file_content)
+                else:  # if file is empty, assign an empty dictionary
+                    self.tests = {}
+        except FileNotFoundError:
             self.tests = {}
+        except json.decoder.JSONDecodeError:  # If JSON is invalid
+            self.tests = {}
+        self.save()
 
     def save(self) -> None:
         with open(self.filename, "w") as f:
diff --git a/agbenchmark/challenge.py b/agbenchmark/challenge.py
index ddf69f42d3e..cf7ce104c57 100644
--- a/agbenchmark/challenge.py
+++ b/agbenchmark/challenge.py
@@ -1,10 +1,8 @@
 import glob
-import inspect
 import os
 import subprocess
-import types
-from abc import ABC, ABCMeta
-from typing import Any, Dict, List, Tuple, Type, cast
+from abc import ABC
+from typing import Any, Dict, List
 
 from dotenv import load_dotenv
 
@@ -16,24 +14,12 @@
 MOCK_TEST = mock_test_str.lower() == "true" if mock_test_str else False
 
 
-class ChallengeMeta(ABCMeta):
-    def __init__(self, name: str, bases: Tuple[Type, ...], dct: Dict[str, Any]) -> None:
-        super().__init__(name, bases, dct)
-        try:
-            frame = cast(types.FrameType, inspect.currentframe())
-            assert frame.f_back is not None
-            self.CHALLENGE_LOCATION = os.path.dirname(inspect.getfile(frame.f_back))
-        except Exception as e:
-            print(f"Unable to get the file from 8 frames back due to: {str(e)}")
-            raise e
-
-
-class Challenge(ABC, metaclass=ChallengeMeta):
+class Challenge(ABC):
     """The parent class to all specific challenges classes.
     Defines helper methods for running a challenge"""
 
     _data_cache: Dict[str, ChallengeData] = {}
-    CHALLENGE_LOCATION: str
+    CHALLENGE_LOCATION: str = ""
 
     @property
     def data(self) -> ChallengeData:
@@ -54,10 +40,10 @@ def setup_challenge(self, config: Dict[str, Any]) -> None:
         from agbenchmark.agent_interface import copy_artifacts_into_workspace, run_agent
 
         copy_artifacts_into_workspace(
-            config["workspace"], "artifacts_in", self.__class__.CHALLENGE_LOCATION
+            config["workspace"], "artifacts_in", self.CHALLENGE_LOCATION
         )
 
-        run_agent(self.task, config, self.__class__.CHALLENGE_LOCATION)
+        run_agent(self.task, config, self.CHALLENGE_LOCATION)
 
     def test_method(self, config: Dict[str, Any]) -> None:
         raise NotImplementedError
diff --git a/agbenchmark/challenges/code/d1/data.json b/agbenchmark/challenges/code/d1/data.json
index 6ac284b81f1..0c724600050 100644
--- a/agbenchmark/challenges/code/d1/data.json
+++ b/agbenchmark/challenges/code/d1/data.json
@@ -1,4 +1,5 @@
 {
+  "name": "TestDebugSimpleTypoWithGuidance",
   "category": ["code"],
   "task": "1- Run test.py.\n2- Read code.py.\n3- Modify code.py.\nRepeat step 1, 2 and 3 until test.py runs without errors.\n",
   "dependencies": ["TestReadFile", "TestWriteFile"],
diff --git a/agbenchmark/challenges/code/d1/debug_simple_typo_with_guidance_test.py b/agbenchmark/challenges/code/d1/debug_simple_typo_with_guidance_test.py
deleted file mode 100644
index d104b337450..00000000000
--- a/agbenchmark/challenges/code/d1/debug_simple_typo_with_guidance_test.py
+++ /dev/null
@@ -1,13 +0,0 @@
-from typing import Any, Dict
-
-from agbenchmark.challenge import Challenge
-
-
-class TestDebugSimpleTypoWithGuidance(Challenge):
-    """The first memory challenge"""
-
-    def test_method(self, config: Dict[str, Any]) -> None:
-        self.setup_challenge(config)
-
-        scores = self.get_scores(config)
-        assert 1 in scores
diff --git a/agbenchmark/challenges/code/d2/d2_test.py b/agbenchmark/challenges/code/d2/d2_test.py
deleted file mode 100644
index b02114a753d..00000000000
--- a/agbenchmark/challenges/code/d2/d2_test.py
+++ /dev/null
@@ -1,14 +0,0 @@
-from typing import Any, Dict
-
-from agbenchmark.challenge import Challenge
-
-
-class TestDebugSimpleTypoWithoutGuidance(Challenge):
-    """The first memory challenge"""
-
-    def test_method(self, config: Dict[str, Any]) -> None:
-        self.setup_challenge(config)
-
-        scores = self.get_scores(config)
-
-        assert 1 in scores
diff --git a/agbenchmark/challenges/code/d2/data.json b/agbenchmark/challenges/code/d2/data.json
index 3de5111f568..2923010949b 100644
--- a/agbenchmark/challenges/code/d2/data.json
+++ b/agbenchmark/challenges/code/d2/data.json
@@ -1,4 +1,5 @@
 {
+  "name": "TestDebugSimpleTypoWithoutGuidance",
   "category": ["code"],
   "task": "Make test.py run without errors.",
   "dependencies": ["TestDebugSimpleTypoWithGuidance"],
diff --git a/agbenchmark/challenges/define_task_types.py b/agbenchmark/challenges/define_task_types.py
index 308cb5ea6f1..94cba5b724c 100644
--- a/agbenchmark/challenges/define_task_types.py
+++ b/agbenchmark/challenges/define_task_types.py
@@ -19,6 +19,7 @@ class Ground(BaseModel):
 
 
 class ChallengeData(BaseModel):
+    name: str
     category: List[str]
     task: str
     dependencies: List[str]
diff --git a/agbenchmark/challenges/interface/read_file/data.json b/agbenchmark/challenges/interface/read_file/data.json
index dd399fabf5e..c827581b6bf 100644
--- a/agbenchmark/challenges/interface/read_file/data.json
+++ b/agbenchmark/challenges/interface/read_file/data.json
@@ -1,5 +1,5 @@
 {
-  "name": "ReadFile",
+  "name": "TestReadFile",
   "category": ["interface"],
   "task": "Write the string 'random string' before any existing text to the file called file_to_check.txt",
   "dependencies": ["TestWriteFile"],
diff --git a/agbenchmark/challenges/interface/read_file/read_file_test.py b/agbenchmark/challenges/interface/read_file/read_file_test.py
deleted file mode 100644
index 591d0a744a7..00000000000
--- a/agbenchmark/challenges/interface/read_file/read_file_test.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from typing import Any, Dict
-
-from agbenchmark.challenge import Challenge
-
-
-class TestReadFile(Challenge):
-    """Testing if LLM can read a file"""
-
-    def test_method(self, config: Dict[str, Any]) -> None:
-        self.setup_challenge(config)
-        scores = self.get_scores(config)
-        assert 1 in scores
diff --git a/agbenchmark/challenges/interface/write_file/data.json b/agbenchmark/challenges/interface/write_file/data.json
index b3e4b6f0270..2be2d0dfe8d 100644
--- a/agbenchmark/challenges/interface/write_file/data.json
+++ b/agbenchmark/challenges/interface/write_file/data.json
@@ -1,5 +1,5 @@
 {
-  "name": "WriteFile",
+  "name": "TestWriteFile",
   "category": ["interface"],
   "task": "Print the the capital of America to a .txt file",
   "dependencies": [],
diff --git a/agbenchmark/challenges/interface/write_file/write_file_test.py b/agbenchmark/challenges/interface/write_file/write_file_test.py
deleted file mode 100644
index 4a52b097965..00000000000
--- a/agbenchmark/challenges/interface/write_file/write_file_test.py
+++ /dev/null
@@ -1,13 +0,0 @@
-from typing import Any, Dict
-
-from agbenchmark.challenge import Challenge
-
-
-class TestWriteFile(Challenge):
-    """Testing if LLM can write to a file"""
-
-    def test_method(self, config: Dict[str, Any]) -> None:
-        self.setup_challenge(config)
-
-        scores = self.get_scores(config)
-        assert 1 in scores
diff --git a/agbenchmark/challenges/memory/m1/data.json b/agbenchmark/challenges/memory/m1/data.json
index f771a2669b4..506b246ad02 100644
--- a/agbenchmark/challenges/memory/m1/data.json
+++ b/agbenchmark/challenges/memory/m1/data.json
@@ -1,4 +1,5 @@
 {
+  "name": "TestBasicMemory",
   "category": ["memory"],
   "task": "Follow the instructions in the instructions_1.txt file",
   "dependencies": ["TestReadFile", "TestWriteFile"],
diff --git a/agbenchmark/challenges/memory/m1/m1_test.py b/agbenchmark/challenges/memory/m1/m1_test.py
deleted file mode 100644
index 0fc537eeb43..00000000000
--- a/agbenchmark/challenges/memory/m1/m1_test.py
+++ /dev/null
@@ -1,13 +0,0 @@
-from typing import Any, Dict
-
-from agbenchmark.challenge import Challenge
-
-
-class TestBasicMemory(Challenge):
-    """The first memory challenge"""
-
-    def test_method(self, config: Dict[str, Any]) -> None:
-        self.setup_challenge(config)
-
-        scores = self.get_scores(config)
-        assert 1 in scores
diff --git a/agbenchmark/challenges/memory/m2/data.json b/agbenchmark/challenges/memory/m2/data.json
index 998e894b16c..7ef2552d1cc 100644
--- a/agbenchmark/challenges/memory/m2/data.json
+++ b/agbenchmark/challenges/memory/m2/data.json
@@ -1,4 +1,5 @@
 {
+  "name": "TestRememberMultipleIds",
   "category": ["memory"],
   "task": "Follow the instructions in the instructions_1.txt file",
   "dependencies": ["TestBasicMemory"],
diff --git a/agbenchmark/challenges/memory/m2/remember_multiple_ids_test.py b/agbenchmark/challenges/memory/m2/remember_multiple_ids_test.py
deleted file mode 100644
index c88f288311a..00000000000
--- a/agbenchmark/challenges/memory/m2/remember_multiple_ids_test.py
+++ /dev/null
@@ -1,13 +0,0 @@
-from typing import Any, Dict
-
-from agbenchmark.challenge import Challenge
-
-
-class TestRememberMultipleIds(Challenge):
-    """The first memory challenge"""
-
-    def test_method(self, config: Dict[str, Any]) -> None:
-        self.setup_challenge(config)
-
-        scores = self.get_scores(config)
-        assert 1 in scores
diff --git a/agbenchmark/challenges/memory/m3/data.json b/agbenchmark/challenges/memory/m3/data.json
index d5d95b1de26..720cce93c0c 100644
--- a/agbenchmark/challenges/memory/m3/data.json
+++ b/agbenchmark/challenges/memory/m3/data.json
@@ -1,4 +1,5 @@
 {
+  "name": "TestRememberMultipleIdsWithNoise",
   "category": ["memory"],
   "task": "Follow the instructions in the instructions_1.txt file",
   "dependencies": ["TestRememberMultipleIds"],
diff --git a/agbenchmark/challenges/memory/m3/remember_multiple_ids_with_noise_test.py b/agbenchmark/challenges/memory/m3/remember_multiple_ids_with_noise_test.py
deleted file mode 100644
index 0e35dd2f47b..00000000000
--- a/agbenchmark/challenges/memory/m3/remember_multiple_ids_with_noise_test.py
+++ /dev/null
@@ -1,13 +0,0 @@
-from typing import Any, Dict
-
-from agbenchmark.challenge import Challenge
-
-
-class TestRememberMultipleIdsWithNoise(Challenge):
-    """The first memory challenge"""
-
-    def test_method(self, config: Dict[str, Any]) -> None:
-        self.setup_challenge(config)
-
-        scores = self.get_scores(config)
-        assert 1 in scores
diff --git a/agbenchmark/challenges/memory/m4/data.json b/agbenchmark/challenges/memory/m4/data.json
index 49831537e5a..61965206bdb 100644
--- a/agbenchmark/challenges/memory/m4/data.json
+++ b/agbenchmark/challenges/memory/m4/data.json
@@ -1,4 +1,5 @@
 {
+  "name": "TestRememberMultiplePhrasesWithNoise",
   "category": ["memory"],
   "task": "Follow the instructions in the instructions_1.txt file",
   "dependencies": ["TestRememberMultipleIdsWithNoise"],
diff --git a/agbenchmark/challenges/memory/m4/remember_multiple_phrases_with_noise_test.py b/agbenchmark/challenges/memory/m4/remember_multiple_phrases_with_noise_test.py
deleted file mode 100644
index 4c4bdce55e6..00000000000
--- a/agbenchmark/challenges/memory/m4/remember_multiple_phrases_with_noise_test.py
+++ /dev/null
@@ -1,13 +0,0 @@
-from typing import Any, Dict
-
-from agbenchmark.challenge import Challenge
-
-
-class TestRememberMultiplePhrasesWithNoise(Challenge):
-    """The first memory challenge"""
-
-    def test_method(self, config: Dict[str, Any]) -> None:
-        self.setup_challenge(config)
-
-        scores = self.get_scores(config)
-        assert 1 in scores
diff --git a/agbenchmark/challenges/retrieval/r1/data.json b/agbenchmark/challenges/retrieval/r1/data.json
index 6e1344b8bdc..7812c21dae7 100644
--- a/agbenchmark/challenges/retrieval/r1/data.json
+++ b/agbenchmark/challenges/retrieval/r1/data.json
@@ -1,4 +1,5 @@
 {
+  "name": "TestBasicRetrieval",
   "category": ["retrieval"],
   "task": "Write the price of the book in this url 'http://books.toscrape.com/catalogue/meditations_33/index.html' to a .txt file.",
   "dependencies": ["TestWriteFile"],
diff --git a/agbenchmark/challenges/retrieval/r1/r1_test.py b/agbenchmark/challenges/retrieval/r1/r1_test.py
deleted file mode 100644
index 9845a7b2a04..00000000000
--- a/agbenchmark/challenges/retrieval/r1/r1_test.py
+++ /dev/null
@@ -1,13 +0,0 @@
-from typing import Any, Dict
-
-from agbenchmark.challenge import Challenge
-
-
-class TestRetrieval(Challenge):
-    """The first information-retrieval challenge"""
-
-    def test_method(self, config: Dict[str, Any]) -> None:
-        self.setup_challenge(config)
-
-        scores = self.get_scores(config)
-        assert 1 in scores
diff --git a/agbenchmark/challenges/retrieval/r2/data.json b/agbenchmark/challenges/retrieval/r2/data.json
index 05846b9f37d..5bc2e96b4a5 100644
--- a/agbenchmark/challenges/retrieval/r2/data.json
+++ b/agbenchmark/challenges/retrieval/r2/data.json
@@ -1,7 +1,8 @@
 {
+  "name": "TestRetrieval2",
   "category": ["retrieval"],
   "task": "Write tesla's revenue in 2022 into a .txt file. Use the US notation, with a precision rounded to the nearest million dollars (for instance, $31,578 billion).",
-  "dependencies": ["TestRetrieval"],
+  "dependencies": ["TestBasicRetrieval"],
   "ground": {
     "answer": "81,462",
     "should_contain": ["81,462"],
diff --git a/agbenchmark/challenges/retrieval/r2/r2_test.py b/agbenchmark/challenges/retrieval/r2/r2_test.py
deleted file mode 100644
index f0f13ffbf42..00000000000
--- a/agbenchmark/challenges/retrieval/r2/r2_test.py
+++ /dev/null
@@ -1,13 +0,0 @@
-from typing import Any, Dict
-
-from agbenchmark.challenge import Challenge
-
-
-class TestRetrieval2(Challenge):
-    """The first information-retrieval challenge"""
-
-    def test_method(self, config: Dict[str, Any]) -> None:
-        self.setup_challenge(config)
-
-        scores = self.get_scores(config)
-        assert 1 in scores
diff --git a/agbenchmark/challenges/retrieval/r3/data.json b/agbenchmark/challenges/retrieval/r3/data.json
index 763c963ec4a..b918d3d4e81 100644
--- a/agbenchmark/challenges/retrieval/r3/data.json
+++ b/agbenchmark/challenges/retrieval/r3/data.json
@@ -1,4 +1,5 @@
 {
+  "name": "TestRetrieval3",
   "category": ["retrieval"],
   "task": "Write tesla's revenue every year since its creation into a .txt file. Use the US notation, with a precision rounded to the nearest million dollars (for instance, $31,578 billion).",
   "dependencies": ["TestRetrieval2"],
diff --git a/agbenchmark/challenges/retrieval/r3/r3_test.py b/agbenchmark/challenges/retrieval/r3/r3_test.py
deleted file mode 100644
index 5887c0b43a5..00000000000
--- a/agbenchmark/challenges/retrieval/r3/r3_test.py
+++ /dev/null
@@ -1,14 +0,0 @@
-from typing import Any, Dict
-
-from agbenchmark.challenge import Challenge
-
-
-class TestRetrieval3(Challenge):
-    """The first information-retrieval challenge"""
-
-    def test_method(self, config: Dict[str, Any]) -> None:
-        self.setup_challenge(config)
-
-        scores = self.get_scores(config)
-
-        assert 1 in scores
diff --git a/agbenchmark/challenges/test_all.py b/agbenchmark/challenges/test_all.py
new file mode 100644
index 00000000000..4f9e5b7f828
--- /dev/null
+++ b/agbenchmark/challenges/test_all.py
@@ -0,0 +1,78 @@
+import glob
+import importlib
+import json
+import os
+import types
+from pathlib import Path
+from typing import Any, Dict
+
+import pytest
+from dotenv import load_dotenv
+
+from agbenchmark.challenge import Challenge
+
+load_dotenv()
+
+IMPROVE = os.getenv("IMPROVE", "False")
+
+
+json_files = glob.glob("agbenchmark/challenges/**/data.json", recursive=True)
+
+
+def get_test_path(json_file: str) -> str:
+    abs_location = os.path.dirname(os.path.abspath(json_file))
+
+    path = Path(abs_location)
+
+    # Find the index of "agbenchmark" in the path parts
+    try:
+        agbenchmark_index = path.parts.index("agbenchmark")
+    except ValueError:
+        raise ValueError("Invalid challenge location.")
+
+    # Create the path from "agbenchmark" onwards
+    challenge_location = Path(*path.parts[agbenchmark_index:])
+
+    return str(challenge_location)
+
+
+def generate_tests() -> None:
+    print("Generating tests...")
+    # Dynamic class creation
+    for json_file in json_files:
+        with open(json_file, "r") as f:
+            data = json.load(f)
+
+            class_name = data.get("name", "")
+
+        challenge_location = get_test_path(json_file)
+
+        # Define test class dynamically
+        challenge_class = types.new_class(class_name, (Challenge,))
+
+        setattr(challenge_class, "CHALLENGE_LOCATION", challenge_location)
+
+        # Define test method within the dynamically created class
+        def test_method(self, config: Dict[str, Any]) -> None:  # type: ignore
+            self.setup_challenge(config)
+
+            scores = self.get_scores(config)
+            assert 1 in scores
+
+        # Parametrize the method here
+        test_method = pytest.mark.parametrize(
+            "challenge_data",
+            [data],
+            indirect=True,
+        )(test_method)
+
+        setattr(challenge_class, "test_method", test_method)
+
+        # Attach the new class to a module so it can be discovered by pytest
+        module = importlib.import_module(__name__)
+        setattr(module, class_name, challenge_class)
+
+        print(f"Generated test for {class_name}.")
+
+
+generate_tests()
diff --git a/agbenchmark/conftest.py b/agbenchmark/conftest.py
index 7d3dd8ed310..e321f5a26c8 100644
--- a/agbenchmark/conftest.py
+++ b/agbenchmark/conftest.py
@@ -88,13 +88,16 @@ def check_regression(request: Any) -> None:
     test_name = request.node.parent.name
     data = get_regression_data()
 
+    # Get the true location of the test
+    challenge_location = getattr(request.node.parent.cls, "CHALLENGE_LOCATION", "")
+
+    skip_string = f"Skipping {test_name} at {challenge_location}"
+
     # Check if the test name exists in the regression tests
     if request.config.getoption("--improve") and data.get(test_name, None):
-        pytest.skip("Skipping test because it's a regression test and --improve is set")
+        pytest.skip(f"{skip_string} because it's a regression test")
     elif request.config.getoption("--maintain") and not data.get(test_name, None):
-        pytest.skip(
-            "Skipping test because it's not a regression test and --maintain is set"
-        )
+        pytest.skip(f"{skip_string} because it's not a regression test")
 
 
 # this is to get the challenge_data from every test
@@ -109,15 +112,19 @@ def challenge_data(request: Any) -> None:
 def pytest_runtest_makereport(item: Any, call: Any) -> None:
     if call.when == "call":
         challenge_data = item.funcargs.get("challenge_data", None)
-        difficulty = challenge_data.info.difficulty if challenge_data else "unknown"
-        dependencies = challenge_data.dependencies if challenge_data else []
-        parts = item.nodeid.split("::")[0].split("/")
-        agbenchmark_index = parts.index("agbenchmark")
-        file_path = "/".join(parts[agbenchmark_index:])
+        difficulty = (
+            challenge_data["info"]["difficulty"] if challenge_data else "unknown"
+        )
+        dependencies = dependencies = (
+            challenge_data["dependencies"] if challenge_data else []
+        )
+        # Extract the challenge_location from the class
+        challenge_location: str = getattr(item.cls, "CHALLENGE_LOCATION", "")
+
         test_details = {
             "difficulty": difficulty,
             "dependencies": dependencies,
-            "test": file_path,
+            "test": challenge_location,
         }
 
         print("pytest_runtest_makereport", test_details)
@@ -132,19 +139,6 @@ def pytest_sessionfinish() -> None:
     regression_manager.save()
 
 
-# this is so that all tests can inherit from the Challenge class
-def pytest_generate_tests(metafunc: Any) -> None:
-    if "challenge_data" in metafunc.fixturenames:
-        # Get the instance of the test class
-        test_class = metafunc.cls()
-
-        # Generate the parameters
-        params = test_class.data
-
-        # Add the parameters to the test function
-        metafunc.parametrize("challenge_data", [params], indirect=True)
-
-
 # this is adding the dependency marker and category markers automatically from the json
 def pytest_collection_modifyitems(items: Any, config: Any) -> None:
     data = get_regression_data()
diff --git a/regression_tests.json b/regression_tests.json
index 44334801e5d..6132079179d 100644
--- a/regression_tests.json
+++ b/regression_tests.json
@@ -1,59 +1,64 @@
 {
-    "TestBasicMemory": {
+    "TestWriteFile": {
         "difficulty": "basic",
         "dependencies": [],
-        "test": "agbenchmark/challenges/memory/m1/m1_test.py"
+        "test": "agbenchmark\\challenges\\interface\\write_file"
     },
-    "TestRememberMultipleIds": {
+    "TestReadFile": {
         "difficulty": "basic",
         "dependencies": [
-            "TestBasicMemory"
+            "TestWriteFile"
         ],
-        "test": "agbenchmark/challenges/memory/m2/remember_multiple_ids_test.py"
+        "test": "agbenchmark\\challenges\\interface\\read_file"
     },
-    "TestRememberMultipleIdsWithNoise": {
-        "difficulty": "medium",
+    "TestBasicMemory": {
+        "difficulty": "basic",
         "dependencies": [
-            "TestRememberMultipleIds"
+            "TestReadFile",
+            "TestWriteFile"
         ],
-        "test": "agbenchmark/challenges/memory/m3/remember_multiple_ids_with_noise_test.py"
+        "test": "agbenchmark\\challenges\\memory\\m1"
     },
-    "TestRememberMultiplePhrasesWithNoise": {
-        "difficulty": "medium",
+    "TestBasicRetrieval": {
+        "difficulty": "basic",
         "dependencies": [
-            "TestRememberMultipleIdsWithNoise"
+            "TestWriteFile"
         ],
-        "test": "agbenchmark/challenges/memory/m4/remember_multiple_phrases_with_noise_test.py"
+        "test": "agbenchmark\\challenges\\retrieval\\r1"
     },
-    "TestRetrieval": {
+    "TestRememberMultipleIds": {
         "difficulty": "basic",
-        "dependencies": [],
-        "test": "agbenchmark/challenges/retrieval/r1/r1_test.py"
+        "dependencies": [
+            "TestBasicMemory"
+        ],
+        "test": "agbenchmark\\challenges\\memory\\m2"
     },
     "TestRetrieval2": {
         "difficulty": "basic",
         "dependencies": [
-            "TestRetrieval"
+            "TestBasicRetrieval"
+        ],
+        "test": "agbenchmark\\challenges\\retrieval\\r2"
+    },
+    "TestRememberMultipleIdsWithNoise": {
+        "difficulty": "medium",
+        "dependencies": [
+            "TestRememberMultipleIds"
         ],
-        "test": "agbenchmark/challenges/retrieval/r2/r2_test.py"
+        "test": "agbenchmark\\challenges\\memory\\m3"
     },
     "TestRetrieval3": {
         "difficulty": "basic",
         "dependencies": [
             "TestRetrieval2"
         ],
-        "test": "agbenchmark/challenges/retrieval/r3/r3_test.py"
+        "test": "agbenchmark\\challenges\\retrieval\\r3"
     },
-    "TestWriteFile": {
-        "difficulty": "basic",
-        "dependencies": [],
-        "test": "agbenchmark/challenges/interface/write_file/write_file_test.py"
-    },
-    "TestReadFile": {
-        "difficulty": "basic",
+    "TestRememberMultiplePhrasesWithNoise": {
+        "difficulty": "medium",
         "dependencies": [
-            "TestWriteFile"
+            "TestRememberMultipleIdsWithNoise"
         ],
-        "test": "agbenchmark/challenges/interface/read_file/read_file_test.py"
+        "test": "agbenchmark\\challenges\\memory\\m4"
     }
 }
\ No newline at end of file