Skip to content
This repository has been archived by the owner on Jun 9, 2024. It is now read-only.

adding hook to integrate agnostically #42

Merged
merged 4 commits into from
Jun 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .env.example
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
AGENT_NAME=mini-agi
AGENT_TIMEOUT=60
ENVIRONMENT=local
MOCK_TEST=False
17 changes: 8 additions & 9 deletions agbenchmark/Challenge.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from abc import ABC, abstractmethod
from agbenchmark.challenges.define_task_types import Ground
from agbenchmark.challenges.define_task_types import ChallengeData
from dotenv import load_dotenv, set_key
from dotenv import load_dotenv

load_dotenv()

Expand All @@ -23,6 +23,7 @@ def get_file_path(self) -> str:

@property
def data(self) -> ChallengeData:
# TODO: make it so that this is cached somewhere to just call self.deserialized_data
return ChallengeData.deserialize(self.get_file_path())

@property
Expand All @@ -37,25 +38,23 @@ def task(self):

@property
def dependencies(self) -> list:
print("self.data.dependencies", self.data.dependencies)
return self.data.dependencies

def setup_challenge(self, config):
from agbenchmark.agent_interface import run_agent

run_agent(self.task, self.mock, config)

@property
def name(self) -> str:
print("self.data.name", self.data.name)
return self.data.name

@pytest.mark.parametrize(
"run_agent",
[(task, mock)],
indirect=True,
)
@pytest.mark.parametrize(
"challenge_data",
[data],
indirect=True,
)
def test_method(self, workspace):
def test_method(self, config):
raise NotImplementedError

@staticmethod
Expand Down
70 changes: 70 additions & 0 deletions agbenchmark/agent_interface.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import os
import importlib
import time
from agbenchmark.mocks.MockManager import MockManager
from multiprocessing import Process, Pipe

from dotenv import load_dotenv

load_dotenv()

MOCK_FLAG = os.getenv("MOCK_TEST")


def run_agent(task, mock_func, config):
"""Calling to get a response"""

if mock_func == None and MOCK_FLAG == "True":
print("No mock provided")
elif MOCK_FLAG == "True":
mock_manager = MockManager(
task
) # workspace doesn't need to be passed in, stays the same
print("Server unavailable, using mock", mock_func)
mock_manager.delegate(mock_func)
else:
timeout = config["cutoff"]
print(f"Running Python function '{config['func_path']}' with timeout {timeout}")

parent_conn, child_conn = Pipe()

# Import the specific agent dynamically
module_name = config["func_path"].replace("/", ".").rstrip(".py")
module = importlib.import_module(module_name)
run_specific_agent = getattr(module, "run_specific_agent")

process = Process(target=run_specific_agent, args=(task, child_conn))
process.start()
start_time = time.time()

while True:
if (
parent_conn.poll()
): # Check if there's a new message from the child process
response, cycle_count = parent_conn.recv()
print(f"Cycle {cycle_count}: {response}")

if cycle_count >= config["cutoff"]:
print(
f"Cycle count has reached the limit of {config['cutoff']}. Terminating."
)
child_conn.send("terminate")
break

if time.time() - start_time > timeout:
print(
"The Python function has exceeded the time limit and was terminated."
)
child_conn.send(
"terminate"
) # Send a termination signal to the child process
break

if not process.is_alive():
print("The Python function has finished running.")
break

process.join()


ENVIRONMENT = os.getenv("ENVIRONMENT") or "production"
7 changes: 3 additions & 4 deletions agbenchmark/challenges/retrieval/r1/r1_test.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
import pytest
from agbenchmark.challenges.retrieval.Retrieval import RetrievalChallenge
from agbenchmark.challenges.define_task_types import ChallengeData, Ground
import os


Expand All @@ -10,8 +8,9 @@ class TestRetrieval1(RetrievalChallenge):
def get_file_path(self) -> str: # all tests must implement this method
return os.path.join(os.path.dirname(__file__), "r1_data.json")

def test_method(self, workspace):
files_contents = self.open_files(workspace, self.data.ground.files)
def test_method(self, config):
self.setup_challenge(config)
files_contents = self.open_files(config["workspace"], self.data.ground.files)

scores = []
for file_content in files_contents:
Expand Down
4 changes: 3 additions & 1 deletion agbenchmark/config.json
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
{
"hostname": "localhost"
"workspace": "C:\\Users\\silen\\miniagi",
"func_path": "agent/benchmarks.py",
"cutoff": 60
}
58 changes: 0 additions & 58 deletions agbenchmark/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,6 @@
import pytest
import shutil
from agbenchmark.tests.regression.RegressionManager import RegressionManager
import requests
from agbenchmark.mocks.MockManager import MockManager
import subprocess
from agbenchmark.Challenge import Challenge
from dotenv import load_dotenv

load_dotenv()


@pytest.fixture(scope="module")
Expand Down Expand Up @@ -44,47 +37,6 @@ def pytest_addoption(parser):
parser.addoption("--mock", action="store_true", default=False)


AGENT_NAME = os.getenv("AGENT_NAME")
AGENT_TIMEOUT = os.getenv("AGENT_TIMEOUT")


@pytest.fixture(autouse=True)
def run_agent(request, config):
"""Calling to get a response"""
if isinstance(request.param, tuple):
task = request.param[0] # The task is passed in indirectly
mock_function_name = request.param[1] or None
else:
task = request.param
mock_function_name = None

if mock_function_name != None and (request.config.getoption("--mock")):
if mock_function_name:
mock_manager = MockManager(
task
) # workspace doesn't need to be passed in, stays the same
print("Server unavailable, using mock", mock_function_name)
mock_manager.delegate(mock_function_name)
else:
print("No mock provided")
else:
path = os.path.join(os.getcwd(), f"agent\\{AGENT_NAME}")

try:
timeout = int(AGENT_TIMEOUT) if AGENT_TIMEOUT is not None else 60

subprocess.run(
["python", "miniagi.py", task],
check=True,
cwd=path,
timeout=timeout
# text=True,
# capture_output=True
)
except subprocess.TimeoutExpired:
print("The subprocess has exceeded the time limit and was terminated.")


regression_json = "agbenchmark/tests/regression/regression_tests.json"

regression_manager = RegressionManager(regression_json)
Expand Down Expand Up @@ -141,13 +93,3 @@ def pytest_generate_tests(metafunc):

# Add the parameters to the test function
metafunc.parametrize("challenge_data", [params], indirect=True)

if "run_agent" in metafunc.fixturenames:
# Get the instance of the test class
test_class = metafunc.cls()

# Generate the parameters
params = [(test_class.task, test_class.mock)]

# Add the parameters to the test function
metafunc.parametrize("run_agent", params, indirect=True)
1 change: 1 addition & 0 deletions agbenchmark/mocks/workspace/file_to_check.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Washington DC is the capital of the United States of America
12 changes: 11 additions & 1 deletion agbenchmark/start_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,17 @@ def start(category, noreg, mock):

config["workspace"] = click.prompt(
"Please enter a new workspace path",
default=os.path.join(Path.home(), "miniagi"),
default=os.path.join(Path.home(), "workspace"),
)

config["func_path"] = click.prompt(
"Please enter a the path to your run_specific_agent function implementation",
default="/benchmarks.py",
)

config["cutoff"] = click.prompt(
"Please enter a hard cutoff runtime for your agent",
default="60",
)

with open(config_dir, "w") as f:
Expand Down
7 changes: 3 additions & 4 deletions agbenchmark/tests/basic_abilities/read_file/read_file_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,9 @@ def get_file_path(self) -> str: # all tests must implement this method
return os.path.join(os.path.dirname(__file__), "r_file_data.json")

@pytest.mark.depends(on=["basic_write_file"], name="basic_read_file")
def test_method(
self, workspace
): # run_test is a common name that all tests must implement
files_contents = self.open_files(workspace, self.data.ground.files)
def test_method(self, config):
self.setup_challenge(config)
files_contents = self.open_files(config["workspace"], self.data.ground.files)

scores = []
for file_content in files_contents:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ def get_file_path(self) -> str: # all tests must implement this method
return os.path.join(os.path.dirname(__file__), "w_file_data.json")

@pytest.mark.depends(on=[], name="basic_write_file")
def test_method(self, workspace):
print("my workspace is ", workspace)
files_contents = self.open_files(workspace, self.data.ground.files)
def test_method(self, config):
self.setup_challenge(config)
files_contents = self.open_files(config["workspace"], self.data.ground.files)

scores = []
for file_content in files_contents:
Expand Down
8 changes: 1 addition & 7 deletions agbenchmark/tests/regression/regression_tests.json
Original file line number Diff line number Diff line change
@@ -1,7 +1 @@
{
"TestWriteFile": {
"difficulty": "basic",
"dependencies": [],
"test": "agbenchmark/tests/basic_abilities/write_file/write_file_test.py::TestWriteFile::test_method[challenge_data0-run_agent0]"
}
}
{}
15 changes: 15 additions & 0 deletions agent/benchmarks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# import subprocess


def run_specific_agent(task, conn):
cycle_count = 0
while (
not conn.poll()
): # Check if there's a termination signal from the main process
response = run_agent(task) # run the agent and get the response and cycle count

if response:
cycle_count += 1

# Send response and cycle count back to the main process
conn.send((response, cycle_count))
Loading