Skip to content
This repository has been archived by the owner on Jun 9, 2024. It is now read-only.

Commit

Permalink
Merge branch 'master' into remove-graphql-logs
Browse files Browse the repository at this point in the history
  • Loading branch information
waynehamadi authored Aug 6, 2023
2 parents 5960d68 + 5a7ad44 commit 92ad915
Show file tree
Hide file tree
Showing 14 changed files with 1,802 additions and 3 deletions.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
848 changes: 848 additions & 0 deletions reports/beebot/folder31_08-06-17-15/report.json

Large diffs are not rendered by default.

7 changes: 6 additions & 1 deletion reports/beebot/regression_tests.json
Original file line number Diff line number Diff line change
@@ -1 +1,6 @@
{}
{
"TestAgentProtocol_CreateAgentTask": {
"difficulty": "interface",
"data_path": "agbenchmark/challenges/interface/agent_protocol_suite/1_create_agent_task/data.json"
}
}
47 changes: 47 additions & 0 deletions reports/beebot/success_rate.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
false,
false,
false,
false,
false
],
"TestAdaptSimpleTypoWithGuidance": [
Expand All @@ -41,6 +42,7 @@
false,
false,
false,
false,
false
],
"TestAdaptTeslaRevenue": [
Expand All @@ -63,9 +65,11 @@
false,
false,
false,
false,
false
],
"TestAgentProtocol_CreateAgentTask": [
true,
true,
true
],
Expand All @@ -89,6 +93,7 @@
false,
false,
false,
false,
false
],
"TestBasicMemory": [
Expand All @@ -111,6 +116,7 @@
false,
false,
false,
false,
false
],
"TestBasicRetrieval": [
Expand All @@ -134,6 +140,7 @@
true,
false,
false,
false,
false
],
"TestDebugMultipleTypo": [
Expand All @@ -156,6 +163,7 @@
false,
false,
false,
false,
false
],
"TestDebugSimpleTypoWithGuidance": [
Expand All @@ -178,6 +186,7 @@
false,
false,
false,
false,
false
],
"TestDebugSimpleTypoWithoutGuidance": [
Expand All @@ -200,6 +209,7 @@
false,
false,
false,
false,
false
],
"TestFunctionCodeGeneration": [
Expand All @@ -222,6 +232,7 @@
true,
false,
false,
false,
false
],
"TestGoalDivergence": [
Expand All @@ -245,6 +256,7 @@
false,
false,
false,
false,
false
],
"TestGoalLoss_Hard": [
Expand All @@ -267,6 +279,7 @@
false,
false,
false,
false,
false
],
"TestGoalLoss_Medium": [
Expand All @@ -289,6 +302,7 @@
false,
false,
false,
false,
false
],
"TestGoalLoss_Simple": [
Expand All @@ -311,6 +325,7 @@
false,
false,
false,
false,
false
],
"TestGoalLoss_advanced": [
Expand All @@ -333,6 +348,7 @@
false,
false,
false,
false,
false
],
"TestInstructionFollowing": [
Expand All @@ -355,6 +371,7 @@
false,
false,
false,
false,
false
],
"TestPlanCreation": [
Expand All @@ -378,6 +395,7 @@
true,
false,
false,
false,
false
],
"TestReadFile": [
Expand All @@ -401,6 +419,7 @@
true,
false,
false,
false,
false
],
"TestRememberMultipleIds": [
Expand All @@ -423,6 +442,7 @@
false,
false,
false,
false,
false
],
"TestRememberMultiplePhrasesWithNoise": [
Expand All @@ -445,6 +465,7 @@
false,
false,
false,
false,
false
],
"TestRememberMultipleWithNoise": [
Expand All @@ -467,6 +488,7 @@
false,
false,
false,
false,
false
],
"TestRetrieval3": [
Expand All @@ -489,6 +511,7 @@
false,
false,
false,
false,
false
],
"TestReturnCode_Modify": [
Expand All @@ -511,6 +534,7 @@
true,
false,
false,
false,
false
],
"TestReturnCode_Simple": [
Expand All @@ -533,6 +557,7 @@
true,
false,
false,
false,
false
],
"TestReturnCode_Tests": [
Expand All @@ -555,6 +580,7 @@
false,
false,
false,
false,
false
],
"TestReturnCode_Write": [
Expand All @@ -577,6 +603,7 @@
true,
false,
false,
false,
false
],
"TestRevenueRetrieval_1.0": [
Expand All @@ -599,6 +626,7 @@
false,
false,
false,
false,
false
],
"TestRevenueRetrieval_1.1": [
Expand All @@ -621,6 +649,7 @@
false,
false,
false,
false,
false
],
"TestRevenueRetrieval_1.2": [
Expand All @@ -643,6 +672,7 @@
false,
false,
false,
false,
false
],
"TestSearch": [
Expand All @@ -666,6 +696,7 @@
true,
false,
false,
false,
false
],
"TestThreeSum": [
Expand All @@ -688,6 +719,7 @@
false,
false,
false,
false,
false
],
"TestWriteFile": [
Expand All @@ -712,14 +744,29 @@
true,
false,
false,
false,
false
],
"TestWritingCLI_Easy": [
false,
false,
false
],
"TestWritingCLI_FileOrganizer": [
false,
false,
false
],
"TestAgentProtocol_ListAgentTasksIds": [
true
],
"TestAgentProtocol_GetAgentTask": [
true
],
"TestAgentProtocol_ListAgentTaskSteps": [
true
],
"TestAgentProtocol_ExecuteAgentTaskStep": [
true
]
}
Binary file added reports/combined_charts/run26/bar_chart.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added reports/combined_charts/run26/radar_chart.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions reports/combined_charts/run26/run_info.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"mini-agi": "2023-07-31-19:38", "BabyAGI": "2023-07-31-19:07", "Auto-GPT": "2023-07-31-19:39", "smol-developer": "2023-07-31-19:05", "gpt-engineer": "2023-07-31-19:38"}
Binary file added reports/combined_charts/run27/bar_chart.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added reports/combined_charts/run27/radar_chart.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions reports/combined_charts/run27/run_info.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"mini-agi": "2023-07-31-19:38", "BabyAGI": "2023-07-31-19:07", "Auto-GPT": "2023-07-31-19:06", "smol-developer": "2023-07-31-19:05", "gpt-engineer": "2023-07-31-19:38"}
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading

0 comments on commit 92ad915

Please sign in to comment.