diff --git a/agbenchmark/README.md b/agbenchmark/README.md index a478f83f319..01f602dc626 100644 --- a/agbenchmark/README.md +++ b/agbenchmark/README.md @@ -53,9 +53,6 @@ import os class TestWriteFile(BasicChallenge): """Testing if LLM can write to a file""" - def get_file_path(self) -> str: # all tests must implement this method - return os.path.join(os.path.dirname(__file__), "w_file_data.json") - @pytest.mark.depends(on=[], name="basic_write_file") def test_method(self, workspace): # implement scoring logic by looking at workspace diff --git a/agbenchmark/challenge.py b/agbenchmark/challenge.py index 4c8e6984817..29bc3ff9175 100644 --- a/agbenchmark/challenge.py +++ b/agbenchmark/challenge.py @@ -3,7 +3,7 @@ import os import subprocess import types -from abc import ABC, ABCMeta, abstractmethod +from abc import ABC, ABCMeta from typing import Any, Dict, List, Optional, Tuple, Type, cast import pytest @@ -35,20 +35,12 @@ class Challenge(ABC, metaclass=ChallengeMeta): Defines helper methods for running a challenge""" _data_cache: Dict[str, ChallengeData] = {} - - @abstractmethod - def get_file_path(self) -> str: - """This should be implemented by any class which inherits from BasicChallenge""" - pass + CHALLENGE_LOCATION: str @property def data(self) -> ChallengeData: - "Check if the data is already loaded, if not load it" - file_path = ( - self.get_file_path() - ) # file_path serves as the key in the cache dictionary - if file_path not in Challenge._data_cache: - Challenge._data_cache[file_path] = ChallengeData.deserialize(file_path) + file_path = f"{self.CHALLENGE_LOCATION}/data.json" + Challenge._data_cache[file_path] = ChallengeData.deserialize(file_path) return Challenge._data_cache[file_path] @property diff --git a/agbenchmark/challenges/code/d1/debug_simple_typo_with_guidance_data.json b/agbenchmark/challenges/code/d1/data.json similarity index 100% rename from agbenchmark/challenges/code/d1/debug_simple_typo_with_guidance_data.json rename to agbenchmark/challenges/code/d1/data.json diff --git a/agbenchmark/challenges/code/d1/debug_simple_typo_with_guidance_test.py b/agbenchmark/challenges/code/d1/debug_simple_typo_with_guidance_test.py index e5f50c700b9..16a12ae414d 100644 --- a/agbenchmark/challenges/code/d1/debug_simple_typo_with_guidance_test.py +++ b/agbenchmark/challenges/code/d1/debug_simple_typo_with_guidance_test.py @@ -1,4 +1,3 @@ -import os from typing import Any, Dict import pytest @@ -9,11 +8,6 @@ class TestDebugSimpleTypoWithGuidance(CodeChallenge): """The first memory challenge""" - def get_file_path(self) -> str: # all tests must implement this method - return os.path.join( - os.path.dirname(__file__), "debug_simple_typo_with_guidance_data.json" - ) - @pytest.mark.depends(name="test_debug_simple_typo_with_guidance") def test_method(self, config: Dict[str, Any]) -> None: self.setup_challenge(config) diff --git a/agbenchmark/challenges/code/d2/d2_test.py b/agbenchmark/challenges/code/d2/d2_test.py index d49f9dfe9de..7a5988b9468 100644 --- a/agbenchmark/challenges/code/d2/d2_test.py +++ b/agbenchmark/challenges/code/d2/d2_test.py @@ -1,4 +1,3 @@ -import os from typing import Any, Dict import pytest @@ -9,9 +8,6 @@ class TestDebugSimpleTypoWithoutGuidance(CodeChallenge): """The first memory challenge""" - def get_file_path(self) -> str: # all tests must implement this method - return os.path.join(os.path.dirname(__file__), "d2_data.json") - @pytest.mark.depends( name="test_debug_simple_typo_without_guidance", depends=["test_debug_simple_typo_with_guidance"], diff --git a/agbenchmark/challenges/code/d2/d2_data.json b/agbenchmark/challenges/code/d2/data.json similarity index 100% rename from agbenchmark/challenges/code/d2/d2_data.json rename to agbenchmark/challenges/code/d2/data.json diff --git a/agbenchmark/challenges/memory/m1/m1_data.json b/agbenchmark/challenges/memory/m1/data.json similarity index 100% rename from agbenchmark/challenges/memory/m1/m1_data.json rename to agbenchmark/challenges/memory/m1/data.json diff --git a/agbenchmark/challenges/memory/m1/m1_test.py b/agbenchmark/challenges/memory/m1/m1_test.py index c1f3702447d..9e5e0a775a0 100644 --- a/agbenchmark/challenges/memory/m1/m1_test.py +++ b/agbenchmark/challenges/memory/m1/m1_test.py @@ -1,4 +1,3 @@ -import os from typing import Any, Dict import pytest @@ -9,9 +8,6 @@ class TestBasicMemory(MemoryChallenge): """The first memory challenge""" - def get_file_path(self) -> str: # all tests must implement this method - return os.path.join(os.path.dirname(__file__), "m1_data.json") - @pytest.mark.depends(name="test_basic_memory") def test_method(self, config: Dict[str, Any]) -> None: self.setup_challenge(config) diff --git a/agbenchmark/challenges/memory/m2/remember_multiple_ids_data.json b/agbenchmark/challenges/memory/m2/data.json similarity index 100% rename from agbenchmark/challenges/memory/m2/remember_multiple_ids_data.json rename to agbenchmark/challenges/memory/m2/data.json diff --git a/agbenchmark/challenges/memory/m2/remember_multiple_ids_test.py b/agbenchmark/challenges/memory/m2/remember_multiple_ids_test.py index f0f2b39712a..6ba38dad33d 100644 --- a/agbenchmark/challenges/memory/m2/remember_multiple_ids_test.py +++ b/agbenchmark/challenges/memory/m2/remember_multiple_ids_test.py @@ -1,4 +1,3 @@ -import os from typing import Any, Dict import pytest @@ -9,11 +8,6 @@ class TestRememberMultipleIds(MemoryChallenge): """The first memory challenge""" - def get_file_path(self) -> str: # all tests must implement this method - return os.path.join( - os.path.dirname(__file__), "remember_multiple_ids_data.json" - ) - @pytest.mark.depends( name="test_remember_multiple_ids", depends=["test_basic_memory"] ) diff --git a/agbenchmark/challenges/memory/m3/remember_multiple_ids_with_noise_data.json b/agbenchmark/challenges/memory/m3/data.json similarity index 100% rename from agbenchmark/challenges/memory/m3/remember_multiple_ids_with_noise_data.json rename to agbenchmark/challenges/memory/m3/data.json diff --git a/agbenchmark/challenges/memory/m3/remember_multiple_ids_with_noise_test.py b/agbenchmark/challenges/memory/m3/remember_multiple_ids_with_noise_test.py index 493ea3574cf..037a6929e9c 100644 --- a/agbenchmark/challenges/memory/m3/remember_multiple_ids_with_noise_test.py +++ b/agbenchmark/challenges/memory/m3/remember_multiple_ids_with_noise_test.py @@ -1,4 +1,3 @@ -import os from typing import Any, Dict import pytest @@ -9,11 +8,6 @@ class TestRememberMultipleIdsWithNoise(MemoryChallenge): """The first memory challenge""" - def get_file_path(self) -> str: # all tests must implement this method - return os.path.join( - os.path.dirname(__file__), "remember_multiple_ids_with_noise_data.json" - ) - @pytest.mark.depends( name="test_remember_multiple_ids_with_noise", depends=["test_remember_multiple_ids"], diff --git a/agbenchmark/challenges/memory/m4/remember_multiple_phrases_with_noise_data.json b/agbenchmark/challenges/memory/m4/data.json similarity index 100% rename from agbenchmark/challenges/memory/m4/remember_multiple_phrases_with_noise_data.json rename to agbenchmark/challenges/memory/m4/data.json diff --git a/agbenchmark/challenges/memory/m4/remember_multiple_phrases_with_noise_test.py b/agbenchmark/challenges/memory/m4/remember_multiple_phrases_with_noise_test.py index e37e9a38503..2c931af8c8e 100644 --- a/agbenchmark/challenges/memory/m4/remember_multiple_phrases_with_noise_test.py +++ b/agbenchmark/challenges/memory/m4/remember_multiple_phrases_with_noise_test.py @@ -1,4 +1,3 @@ -import os from typing import Any, Dict import pytest @@ -9,11 +8,6 @@ class TestRememberMultiplePhrasesWithNoise(MemoryChallenge): """The first memory challenge""" - def get_file_path(self) -> str: # all tests must implement this method - return os.path.join( - os.path.dirname(__file__), "remember_multiple_phrases_with_noise_data.json" - ) - @pytest.mark.depends( name="test_remember_multiple_phrases_with_noise", depends=["test_remember_multiple_ids_with_noise"], diff --git a/agbenchmark/challenges/retrieval/r1/r1_data.json b/agbenchmark/challenges/retrieval/r1/data.json similarity index 100% rename from agbenchmark/challenges/retrieval/r1/r1_data.json rename to agbenchmark/challenges/retrieval/r1/data.json diff --git a/agbenchmark/challenges/retrieval/r1/r1_test.py b/agbenchmark/challenges/retrieval/r1/r1_test.py index 285b8affc45..68d3de4e3ea 100644 --- a/agbenchmark/challenges/retrieval/r1/r1_test.py +++ b/agbenchmark/challenges/retrieval/r1/r1_test.py @@ -1,4 +1,3 @@ -import os from typing import Any, Dict import pytest @@ -9,9 +8,6 @@ class TestRetrieval(RetrievalChallenge): """The first information-retrieval challenge""" - def get_file_path(self) -> str: # all tests must implement this method - return os.path.join(os.path.dirname(__file__), "r1_data.json") - @pytest.mark.depends(name="test_retrieval") def test_method(self, config: Dict[str, Any]) -> None: self.setup_challenge(config) diff --git a/agbenchmark/challenges/retrieval/r2/r2_data.json b/agbenchmark/challenges/retrieval/r2/data.json similarity index 100% rename from agbenchmark/challenges/retrieval/r2/r2_data.json rename to agbenchmark/challenges/retrieval/r2/data.json diff --git a/agbenchmark/challenges/retrieval/r2/r2_test.py b/agbenchmark/challenges/retrieval/r2/r2_test.py index ba727b8ed22..5a1a2069097 100644 --- a/agbenchmark/challenges/retrieval/r2/r2_test.py +++ b/agbenchmark/challenges/retrieval/r2/r2_test.py @@ -1,4 +1,3 @@ -import os from typing import Any, Dict import pytest @@ -9,9 +8,6 @@ class TestRetrieval2(RetrievalChallenge): """The first information-retrieval challenge""" - def get_file_path(self) -> str: # all tests must implement this method - return os.path.join(os.path.dirname(__file__), "r2_data.json") - @pytest.mark.depends(on=["test_retrieval"], name="test_retrieval_2") def test_method(self, config: Dict[str, Any]) -> None: self.setup_challenge(config) diff --git a/agbenchmark/challenges/retrieval/r3/r3_data.json b/agbenchmark/challenges/retrieval/r3/data.json similarity index 100% rename from agbenchmark/challenges/retrieval/r3/r3_data.json rename to agbenchmark/challenges/retrieval/r3/data.json diff --git a/agbenchmark/challenges/retrieval/r3/r3_test.py b/agbenchmark/challenges/retrieval/r3/r3_test.py index b58f42672b6..c4b4bcf12ca 100644 --- a/agbenchmark/challenges/retrieval/r3/r3_test.py +++ b/agbenchmark/challenges/retrieval/r3/r3_test.py @@ -1,4 +1,3 @@ -import os from typing import Any, Dict import pytest @@ -9,9 +8,6 @@ class TestRetrieval3(RetrievalChallenge): """The first information-retrieval challenge""" - def get_file_path(self) -> str: # all tests must implement this method - return os.path.join(os.path.dirname(__file__), "r3_data.json") - @pytest.mark.depends(on=["test_retrieval_2"], name="test_retrieval_3") def test_method(self, config: Dict[str, Any]) -> None: self.setup_challenge(config) diff --git a/agbenchmark/tests/basic_abilities/read_file/r_file_data.json b/agbenchmark/tests/basic_abilities/read_file/data.json similarity index 100% rename from agbenchmark/tests/basic_abilities/read_file/r_file_data.json rename to agbenchmark/tests/basic_abilities/read_file/data.json diff --git a/agbenchmark/tests/basic_abilities/read_file/read_file_test.py b/agbenchmark/tests/basic_abilities/read_file/read_file_test.py index 7c38d2832b2..cf5dceb6958 100644 --- a/agbenchmark/tests/basic_abilities/read_file/read_file_test.py +++ b/agbenchmark/tests/basic_abilities/read_file/read_file_test.py @@ -1,4 +1,3 @@ -import os from typing import Any, Dict import pytest @@ -9,9 +8,6 @@ class TestReadFile(BasicChallenge): """Testing if LLM can read a file""" - def get_file_path(self) -> str: # all tests must implement this method - return os.path.join(os.path.dirname(__file__), "r_file_data.json") - @pytest.mark.depends(on=["basic_write_file"], name="basic_read_file") def test_method(self, config: Dict[str, Any]) -> None: self.setup_challenge(config) diff --git a/agbenchmark/tests/basic_abilities/write_file/w_file_data.json b/agbenchmark/tests/basic_abilities/write_file/data.json similarity index 100% rename from agbenchmark/tests/basic_abilities/write_file/w_file_data.json rename to agbenchmark/tests/basic_abilities/write_file/data.json diff --git a/agbenchmark/tests/basic_abilities/write_file/write_file_test.py b/agbenchmark/tests/basic_abilities/write_file/write_file_test.py index 474d6712730..ba03951868b 100644 --- a/agbenchmark/tests/basic_abilities/write_file/write_file_test.py +++ b/agbenchmark/tests/basic_abilities/write_file/write_file_test.py @@ -1,4 +1,3 @@ -import os from typing import Any, Dict import pytest @@ -9,9 +8,6 @@ class TestWriteFile(BasicChallenge): """Testing if LLM can write to a file""" - def get_file_path(self) -> str: # all tests must implement this method - return os.path.join(os.path.dirname(__file__), "w_file_data.json") - @pytest.mark.depends(name="basic_write_file") def test_method(self, config: Dict[str, Any]) -> None: self.setup_challenge(config)