Significant-Gravitas · waynehamadi · Jun 30, 2023 · Jun 29, 2023 · Jun 30, 2023 · Jun 30, 2023
diff --git a/.env.example b/.env.example
@@ -1,3 +1,3 @@
 AGENT_NAME=mini-agi
-AGENT_TIMEOUT=60
+ENVIRONMENT=local
 MOCK_TEST=False
diff --git a/agbenchmark/Challenge.py b/agbenchmark/Challenge.py
@@ -4,7 +4,7 @@
 from abc import ABC, abstractmethod
 from agbenchmark.challenges.define_task_types import Ground
 from agbenchmark.challenges.define_task_types import ChallengeData
-from dotenv import load_dotenv, set_key
+from dotenv import load_dotenv
 
 load_dotenv()
 
@@ -23,6 +23,7 @@ def get_file_path(self) -> str:
 
     @property
     def data(self) -> ChallengeData:
+        # TODO: make it so that this is cached somewhere to just call self.deserialized_data
         return ChallengeData.deserialize(self.get_file_path())
 
     @property
@@ -37,25 +38,23 @@ def task(self):
 
     @property
     def dependencies(self) -> list:
-        print("self.data.dependencies", self.data.dependencies)
         return self.data.dependencies
 
+    def setup_challenge(self, config):
+        from agbenchmark.agent_interface import run_agent
+
+        run_agent(self.task, self.mock, config)
+
     @property
     def name(self) -> str:
-        print("self.data.name", self.data.name)
         return self.data.name
 
-    @pytest.mark.parametrize(
-        "run_agent",
-        [(task, mock)],
-        indirect=True,
-    )
     @pytest.mark.parametrize(
         "challenge_data",
         [data],
         indirect=True,
     )
-    def test_method(self, workspace):
+    def test_method(self, config):
         raise NotImplementedError
 
     @staticmethod

diff --git a/agbenchmark/agent_interface.py b/agbenchmark/agent_interface.py
@@ -0,0 +1,70 @@
+import os
+import importlib
+import time
+from agbenchmark.mocks.MockManager import MockManager
+from multiprocessing import Process, Pipe
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+MOCK_FLAG = os.getenv("MOCK_TEST")
+
+
+def run_agent(task, mock_func, config):
+    """Calling to get a response"""
+
+    if mock_func == None and MOCK_FLAG == "True":
+        print("No mock provided")
+    elif MOCK_FLAG == "True":
+        mock_manager = MockManager(
+            task
+        )  # workspace doesn't need to be passed in, stays the same
+        print("Server unavailable, using mock", mock_func)
+        mock_manager.delegate(mock_func)
+    else:
+        timeout = config["cutoff"]
+        print(f"Running Python function '{config['func_path']}' with timeout {timeout}")
+
+        parent_conn, child_conn = Pipe()
+
+        # Import the specific agent dynamically
+        module_name = config["func_path"].replace("/", ".").rstrip(".py")
+        module = importlib.import_module(module_name)
+        run_specific_agent = getattr(module, "run_specific_agent")
+
+        process = Process(target=run_specific_agent, args=(task, child_conn))
+        process.start()
+        start_time = time.time()
+
+        while True:
+            if (
+                parent_conn.poll()
+            ):  # Check if there's a new message from the child process
+                response, cycle_count = parent_conn.recv()
+                print(f"Cycle {cycle_count}: {response}")
+
+                if cycle_count >= config["cutoff"]:
+                    print(
+                        f"Cycle count has reached the limit of {config['cutoff']}. Terminating."
+                    )
+                    child_conn.send("terminate")
+                    break
+
+            if time.time() - start_time > timeout:
+                print(
+                    "The Python function has exceeded the time limit and was terminated."
+                )
+                child_conn.send(
+                    "terminate"
+                )  # Send a termination signal to the child process
+                break
+
+            if not process.is_alive():
+                print("The Python function has finished running.")
+                break
+
+        process.join()
+
+
+ENVIRONMENT = os.getenv("ENVIRONMENT") or "production"
diff --git a/agbenchmark/challenges/retrieval/r1/r1_test.py b/agbenchmark/challenges/retrieval/r1/r1_test.py
@@ -1,6 +1,4 @@
-import pytest
 from agbenchmark.challenges.retrieval.Retrieval import RetrievalChallenge
-from agbenchmark.challenges.define_task_types import ChallengeData, Ground
 import os
 
 
@@ -10,8 +8,9 @@ class TestRetrieval1(RetrievalChallenge):
     def get_file_path(self) -> str:  # all tests must implement this method
         return os.path.join(os.path.dirname(__file__), "r1_data.json")
 
-    def test_method(self, workspace):
-        files_contents = self.open_files(workspace, self.data.ground.files)
+    def test_method(self, config):
+        self.setup_challenge(config)
+        files_contents = self.open_files(config["workspace"], self.data.ground.files)
 
         scores = []
         for file_content in files_contents:

diff --git a/agbenchmark/config.json b/agbenchmark/config.json
@@ -1,3 +1,5 @@
 {
-  "hostname": "localhost"
+  "workspace": "C:\\Users\\silen\\miniagi",
+  "func_path": "agent/benchmarks.py",
+  "cutoff": 60
 }
diff --git a/agbenchmark/conftest.py b/agbenchmark/conftest.py
@@ -3,13 +3,6 @@
 import pytest
 import shutil
 from agbenchmark.tests.regression.RegressionManager import RegressionManager
-import requests
-from agbenchmark.mocks.MockManager import MockManager
-import subprocess
-from agbenchmark.Challenge import Challenge
-from dotenv import load_dotenv
-
-load_dotenv()
 
 
 @pytest.fixture(scope="module")
@@ -44,47 +37,6 @@ def pytest_addoption(parser):
     parser.addoption("--mock", action="store_true", default=False)
 
 
-AGENT_NAME = os.getenv("AGENT_NAME")
-AGENT_TIMEOUT = os.getenv("AGENT_TIMEOUT")
-
-
-@pytest.fixture(autouse=True)
-def run_agent(request, config):
-    """Calling to get a response"""
-    if isinstance(request.param, tuple):
-        task = request.param[0]  # The task is passed in indirectly
-        mock_function_name = request.param[1] or None
-    else:
-        task = request.param
-        mock_function_name = None
-
-    if mock_function_name != None and (request.config.getoption("--mock")):
-        if mock_function_name:
-            mock_manager = MockManager(
-                task
-            )  # workspace doesn't need to be passed in, stays the same
-            print("Server unavailable, using mock", mock_function_name)
-            mock_manager.delegate(mock_function_name)
-        else:
-            print("No mock provided")
-    else:
-        path = os.path.join(os.getcwd(), f"agent\\{AGENT_NAME}")
-
-        try:
-            timeout = int(AGENT_TIMEOUT) if AGENT_TIMEOUT is not None else 60
-
-            subprocess.run(
-                ["python", "miniagi.py", task],
-                check=True,
-                cwd=path,
-                timeout=timeout
-                # text=True,
-                # capture_output=True
-            )
-        except subprocess.TimeoutExpired:
-            print("The subprocess has exceeded the time limit and was terminated.")
-
-
 regression_json = "agbenchmark/tests/regression/regression_tests.json"
 
 regression_manager = RegressionManager(regression_json)
@@ -141,13 +93,3 @@ def pytest_generate_tests(metafunc):
 
         # Add the parameters to the test function
         metafunc.parametrize("challenge_data", [params], indirect=True)
-
-    if "run_agent" in metafunc.fixturenames:
-        # Get the instance of the test class
-        test_class = metafunc.cls()
-
-        # Generate the parameters
-        params = [(test_class.task, test_class.mock)]
-
-        # Add the parameters to the test function
-        metafunc.parametrize("run_agent", params, indirect=True)
diff --git a/agbenchmark/mocks/workspace/file_to_check.txt b/agbenchmark/mocks/workspace/file_to_check.txt
@@ -0,0 +1 @@
+Washington DC is the capital of the United States of America
diff --git a/agbenchmark/start_benchmark.py b/agbenchmark/start_benchmark.py
@@ -29,7 +29,17 @@ def start(category, noreg, mock):
 
         config["workspace"] = click.prompt(
             "Please enter a new workspace path",
-            default=os.path.join(Path.home(), "miniagi"),
+            default=os.path.join(Path.home(), "workspace"),
+        )
+
+        config["func_path"] = click.prompt(
+            "Please enter a the path to your run_specific_agent function implementation",
+            default="/benchmarks.py",
+        )
+
+        config["cutoff"] = click.prompt(
+            "Please enter a hard cutoff runtime for your agent",
+            default="60",
         )
 
         with open(config_dir, "w") as f:

diff --git a/agbenchmark/tests/basic_abilities/read_file/read_file_test.py b/agbenchmark/tests/basic_abilities/read_file/read_file_test.py
@@ -17,10 +17,9 @@ def get_file_path(self) -> str:  # all tests must implement this method
         return os.path.join(os.path.dirname(__file__), "r_file_data.json")
 
     @pytest.mark.depends(on=["basic_write_file"], name="basic_read_file")
-    def test_method(
-        self, workspace
-    ):  # run_test is a common name that all tests must implement
-        files_contents = self.open_files(workspace, self.data.ground.files)
+    def test_method(self, config):
+        self.setup_challenge(config)
+        files_contents = self.open_files(config["workspace"], self.data.ground.files)
 
         scores = []
         for file_content in files_contents:

diff --git a/agbenchmark/tests/basic_abilities/write_file/write_file_test.py b/agbenchmark/tests/basic_abilities/write_file/write_file_test.py
@@ -10,9 +10,9 @@ def get_file_path(self) -> str:  # all tests must implement this method
         return os.path.join(os.path.dirname(__file__), "w_file_data.json")
 
     @pytest.mark.depends(on=[], name="basic_write_file")
-    def test_method(self, workspace):
-        print("my workspace is ", workspace)
-        files_contents = self.open_files(workspace, self.data.ground.files)
+    def test_method(self, config):
+        self.setup_challenge(config)
+        files_contents = self.open_files(config["workspace"], self.data.ground.files)
 
         scores = []
         for file_content in files_contents:

diff --git a/agbenchmark/tests/regression/regression_tests.json b/agbenchmark/tests/regression/regression_tests.json
@@ -1,7 +1 @@
-{
-    "TestWriteFile": {
-        "difficulty": "basic",
-        "dependencies": [],
-        "test": "agbenchmark/tests/basic_abilities/write_file/write_file_test.py::TestWriteFile::test_method[challenge_data0-run_agent0]"
-    }
-}
+{}
diff --git a/agent/benchmarks.py b/agent/benchmarks.py
@@ -0,0 +1,15 @@
+# import subprocess
+
+
+def run_specific_agent(task, conn):
+    cycle_count = 0
+    while (
+        not conn.poll()
+    ):  # Check if there's a termination signal from the main process
+        response = run_agent(task)  # run the agent and get the response and cycle count
+
+        if response:
+            cycle_count += 1
+
+        # Send response and cycle count back to the main process
+        conn.send((response, cycle_count))
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Washington DC is the capital of the United States of America