Skip to content

Commit

Permalink
Retry regression tests (Significant-Gravitas#4648)
Browse files Browse the repository at this point in the history
  • Loading branch information
waynehamadi authored Jun 11, 2023
1 parent 4e62128 commit 6fb9b6d
Show file tree
Hide file tree
Showing 12 changed files with 63 additions and 83 deletions.
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -61,3 +61,4 @@ pytest-mock
vcrpy @ git+https://github.com/Significant-Gravitas/vcrpy.git@master
pytest-recording
pytest-xdist
flaky
5 changes: 1 addition & 4 deletions tests/challenges/basic_abilities/test_browse_website.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,11 @@
from autogpt.agent import Agent
from tests.challenges.challenge_decorator.challenge_decorator import challenge
from tests.challenges.utils import run_interaction_loop
from tests.utils import requires_api_key

CYCLE_COUNT = 2


@requires_api_key("OPENAI_API_KEY")
@pytest.mark.vcr
@challenge
@challenge()
def test_browse_website(
browser_agent: Agent,
patched_api_requestor: None,
Expand Down
5 changes: 1 addition & 4 deletions tests/challenges/basic_abilities/test_write_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from autogpt.commands.file_operations import read_file
from tests.challenges.challenge_decorator.challenge_decorator import challenge
from tests.challenges.utils import get_workspace_path, run_interaction_loop
from tests.utils import requires_api_key

CYCLE_COUNT_PER_LEVEL = [1, 1]
EXPECTED_OUTPUTS_PER_LEVEL = [
Expand All @@ -15,9 +14,7 @@
]


@requires_api_key("OPENAI_API_KEY")
@pytest.mark.vcr
@challenge
@challenge()
def test_write_file(
file_system_agents: List[Agent],
patched_api_requestor: None,
Expand Down
93 changes: 52 additions & 41 deletions tests/challenges/challenge_decorator/challenge_decorator.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,15 @@
from typing import Any, Callable, Optional

import pytest
from flaky import flaky # type: ignore

from tests.challenges.challenge_decorator.challenge import Challenge
from tests.challenges.challenge_decorator.challenge_utils import create_challenge
from tests.challenges.challenge_decorator.score_utils import (
get_scores,
update_new_score,
)
from tests.utils import requires_api_key

MAX_LEVEL_TO_IMPROVE_ON = (
1 # we will attempt to beat 1 level above the current level for now.
Expand All @@ -18,52 +20,61 @@
CHALLENGE_FAILED_MESSAGE = "Challenges can sometimes fail randomly, please run this test again and if it fails reach out to us on https://discord.gg/autogpt and reach out to us on the 'challenges' channel to let us know the challenge you're struggling with."


def challenge(func: Callable[..., Any]) -> Callable[..., None]:
@wraps(func)
def wrapper(*args: Any, **kwargs: Any) -> None:
run_remaining = MAX_LEVEL_TO_IMPROVE_ON if Challenge.BEAT_CHALLENGES else 1
original_error: Optional[Exception] = None
def challenge(
max_runs: int = 2, min_passes: int = 1, api_key: str = "OPENAI_API_KEY"
) -> Callable[[Callable[..., Any]], Callable[..., None]]:
def decorator(func: Callable[..., Any]) -> Callable[..., None]:
@requires_api_key(api_key)
@pytest.mark.vcr
@flaky(max_runs=max_runs, min_passes=min_passes)
@wraps(func)
def wrapper(*args: Any, **kwargs: Any) -> None:
run_remaining = MAX_LEVEL_TO_IMPROVE_ON if Challenge.BEAT_CHALLENGES else 1
original_error: Optional[Exception] = None

while run_remaining > 0:
current_score, new_score, new_score_location = get_scores()
level_to_run = kwargs["level_to_run"] if "level_to_run" in kwargs else None
challenge = create_challenge(
func, current_score, Challenge.BEAT_CHALLENGES, level_to_run
)
if challenge.level_to_run is not None:
kwargs["level_to_run"] = challenge.level_to_run
try:
func(*args, **kwargs)
challenge.succeeded = True
except AssertionError as err:
original_error = AssertionError(
f"{CHALLENGE_FAILED_MESSAGE}\n{err}"
)
challenge.succeeded = False
except Exception as err:
original_error = err
challenge.succeeded = False
else:
challenge.skipped = True
if os.environ.get("CI") == "true":
new_max_level_beaten = get_new_max_level_beaten(
challenge, Challenge.BEAT_CHALLENGES
while run_remaining > 0:
current_score, new_score, new_score_location = get_scores()
level_to_run = (
kwargs["level_to_run"] if "level_to_run" in kwargs else None
)
update_new_score(
new_score_location, new_score, challenge, new_max_level_beaten
challenge = create_challenge(
func, current_score, Challenge.BEAT_CHALLENGES, level_to_run
)
if challenge.level_to_run is None:
pytest.skip("This test has not been unlocked yet.")
if challenge.level_to_run is not None:
kwargs["level_to_run"] = challenge.level_to_run
try:
func(*args, **kwargs)
challenge.succeeded = True
except AssertionError as err:
original_error = AssertionError(
f"{CHALLENGE_FAILED_MESSAGE}\n{err}"
)
challenge.succeeded = False
except Exception as err:
original_error = err
challenge.succeeded = False
else:
challenge.skipped = True
if os.environ.get("CI") == "true":
new_max_level_beaten = get_new_max_level_beaten(
challenge, Challenge.BEAT_CHALLENGES
)
update_new_score(
new_score_location, new_score, challenge, new_max_level_beaten
)
if challenge.level_to_run is None:
pytest.skip("This test has not been unlocked yet.")

if not challenge.succeeded:
if Challenge.BEAT_CHALLENGES or challenge.is_new_challenge:
pytest.xfail(str(original_error))
if original_error:
raise original_error
run_remaining -= 1

if not challenge.succeeded:
if Challenge.BEAT_CHALLENGES or challenge.is_new_challenge:
# xfail
pytest.xfail(str(original_error))
if original_error:
raise original_error
run_remaining -= 1
return wrapper

return wrapper
return decorator


def get_new_max_level_beaten(
Expand Down
5 changes: 1 addition & 4 deletions tests/challenges/debug_code/test_debug_code_challenge_a.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
get_workspace_path,
run_interaction_loop,
)
from tests.utils import requires_api_key

CYCLE_COUNT = 5
EXPECTED_VALUES = ["[0, 1]", "[2, 5]", "[0, 3]"]
Expand All @@ -20,9 +19,7 @@
TEST_FILE_PATH = "test.py"


@pytest.mark.vcr
@requires_api_key("OPENAI_API_KEY")
@challenge
@challenge()
def test_debug_code_challenge_a(
debug_code_agents: Agent,
monkeypatch: pytest.MonkeyPatch,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from autogpt.commands.file_operations import read_file
from tests.challenges.challenge_decorator.challenge_decorator import challenge
from tests.challenges.utils import get_workspace_path, run_interaction_loop
from tests.utils import requires_api_key

CYCLE_COUNT = 3
EXPECTED_REVENUES = [["81"], ["81"], ["81", "53", "24", "21", "11", "7", "4", "3", "2"]]
Expand All @@ -13,9 +12,7 @@
OUTPUT_LOCATION = "output.txt"


@pytest.mark.vcr
@requires_api_key("OPENAI_API_KEY")
@challenge
@challenge()
def test_information_retrieval_challenge_a(
information_retrieval_agents: Agent,
monkeypatch: pytest.MonkeyPatch,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,12 @@
from autogpt.commands.file_operations import read_file
from tests.challenges.challenge_decorator.challenge_decorator import challenge
from tests.challenges.utils import get_workspace_path, run_interaction_loop
from tests.utils import requires_api_key

CYCLE_COUNT = 3
OUTPUT_LOCATION = "2010_nobel_prize_winners.txt"


@pytest.mark.vcr
@requires_api_key("OPENAI_API_KEY")
@challenge
@challenge()
def test_information_retrieval_challenge_b(
get_nobel_prize_agent: Agent,
monkeypatch: pytest.MonkeyPatch,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,12 @@
from autogpt.commands.file_operations import read_file
from tests.challenges.challenge_decorator.challenge_decorator import challenge
from tests.challenges.utils import get_workspace_path, run_interaction_loop
from tests.utils import requires_api_key

CYCLE_COUNT = 3
OUTPUT_LOCATION = "kube.yaml"


@pytest.mark.vcr
@requires_api_key("OPENAI_API_KEY")
@challenge
@challenge()
def test_kubernetes_template_challenge_a(
kubernetes_agent: Agent,
monkeypatch: pytest.MonkeyPatch,
Expand Down
6 changes: 1 addition & 5 deletions tests/challenges/memory/test_memory_challenge_a.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,11 @@
from autogpt.commands.file_operations import read_file, write_to_file
from tests.challenges.challenge_decorator.challenge_decorator import challenge
from tests.challenges.utils import get_workspace_path, run_interaction_loop
from tests.utils import requires_api_key

OUTPUT_LOCATION = "output.txt"


@pytest.mark.vcr
@requires_api_key("OPENAI_API_KEY")
@challenge
@challenge()
def test_memory_challenge_a(
memory_management_agent: Agent,
patched_api_requestor: MockerFixture,
Expand All @@ -28,7 +25,6 @@ def test_memory_challenge_a(
monkeypatch (pytest.MonkeyPatch)
level_to_run (int)
"""

task_id = "2314"
create_instructions_files(memory_management_agent, level_to_run, task_id)

Expand Down
5 changes: 1 addition & 4 deletions tests/challenges/memory/test_memory_challenge_b.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,12 @@
get_workspace_path,
run_interaction_loop,
)
from tests.utils import requires_api_key

NOISE = 1000
OUTPUT_LOCATION = "output.txt"


@pytest.mark.vcr
@requires_api_key("OPENAI_API_KEY")
@challenge
@challenge()
def test_memory_challenge_b(
memory_management_agent: Agent,
patched_api_requestor: MockerFixture,
Expand Down
6 changes: 1 addition & 5 deletions tests/challenges/memory/test_memory_challenge_c.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,12 @@
get_workspace_path,
run_interaction_loop,
)
from tests.utils import requires_api_key

NOISE = 1000
OUTPUT_LOCATION = "output.txt"


# @pytest.mark.vcr
@pytest.mark.vcr
@requires_api_key("OPENAI_API_KEY")
@challenge
@challenge()
def test_memory_challenge_c(
memory_management_agent: Agent,
patched_api_requestor: MockerFixture,
Expand Down
5 changes: 1 addition & 4 deletions tests/challenges/memory/test_memory_challenge_d.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,13 @@
from autogpt.commands.file_operations import read_file, write_to_file
from tests.challenges.challenge_decorator.challenge_decorator import challenge
from tests.challenges.utils import get_workspace_path, run_interaction_loop
from tests.utils import requires_api_key

LEVEL_CURRENTLY_BEATEN = 1
MAX_LEVEL = 5
OUTPUT_LOCATION = "output.txt"


@pytest.mark.vcr
@requires_api_key("OPENAI_API_KEY")
@challenge
@challenge()
def test_memory_challenge_d(
memory_management_agent: Agent,
patched_api_requestor: MockerFixture,
Expand Down

0 comments on commit 6fb9b6d

Please sign in to comment.