Significant-Gravitas · SilenNaihin · Jul 31, 2023 · Jul 31, 2023 · Jul 31, 2023 · Jul 31, 2023
diff --git a/agbenchmark/reports/processing/gen_combined_chart.py b/agbenchmark/reports/processing/gen_combined_chart.py
@@ -1,24 +1,43 @@
+import json
 import os
 from pathlib import Path
 
-from agbenchmark.reports.processing.graphs import save_combined_radar_chart
+from agbenchmark.reports.processing.graphs import (
+    save_combined_bar_chart,
+    save_combined_radar_chart,
+)
 from agbenchmark.reports.processing.process_report import (
     all_agent_categories,
     get_reports_data,
 )
-from agbenchmark.start_benchmark import REPORTS_PATH
 
 
 def generate_combined_chart() -> None:
-    reports_data = get_reports_data(REPORTS_PATH)
+    all_agents_path = Path(__file__).parent.parent.parent.parent / "reports"
+
+    combined_charts_folder = all_agents_path / "combined_charts"
+
+    reports_data = get_reports_data(str(all_agents_path))
 
     categories = all_agent_categories(reports_data)
 
-    png_count = len([f for f in os.listdir(REPORTS_PATH) if f.endswith(".png")])
+    # Count the number of directories in this directory
+    num_dirs = len([f for f in combined_charts_folder.iterdir() if f.is_dir()])
+
+    run_charts_folder = combined_charts_folder / f"run{num_dirs + 1}"
+
+    if not os.path.exists(run_charts_folder):
+        os.makedirs(run_charts_folder)
+
+    info_data = {
+        report_name: data.benchmark_start_time
+        for report_name, data in reports_data.items()
+    }
+    with open(Path(run_charts_folder) / "run_info.json", "w") as f:
+        json.dump(info_data, f)
 
-    save_combined_radar_chart(
-        categories, Path(REPORTS_PATH) / f"run{png_count + 1}_radar_chart.png"
-    )
+    save_combined_radar_chart(categories, Path(run_charts_folder) / "radar_chart.png")
+    save_combined_bar_chart(categories, Path(run_charts_folder) / "bar_chart.png")
 
 
 if __name__ == "__main__":

diff --git a/agbenchmark/reports/processing/get_files.py b/agbenchmark/reports/processing/get_files.py
@@ -1,28 +1,34 @@
 import os
 
 
-def get_last_file_in_directory(directory_path: str) -> str | None:
-    # Get all files in the directory
-    files = [
-        f
-        for f in os.listdir(directory_path)
-        if os.path.isfile(os.path.join(directory_path, f)) and f.endswith(".json")
+def get_last_subdirectory(directory_path: str) -> str | None:
+    # Get all subdirectories in the directory
+    subdirs = [
+        os.path.join(directory_path, name)
+        for name in os.listdir(directory_path)
+        if os.path.isdir(os.path.join(directory_path, name))
     ]
 
-    # Sort the files by modification time
-    files.sort(key=lambda x: os.path.getmtime(os.path.join(directory_path, x)))
+    # Sort the subdirectories by creation time
+    subdirs.sort(key=os.path.getctime)
 
-    # Return the last file in the list
-    return files[-1] if files else None
+    # Return the last subdirectory in the list
+    return subdirs[-1] if subdirs else None
 
 
-def get_latest_files_in_subdirectories(
+def get_latest_report_from_agent_directories(
     directory_path: str,
-) -> list[tuple[str, str]] | None:
-    latest_files = []
+) -> list[tuple[os.DirEntry[str], str]]:
+    latest_reports = []
+
     for subdir in os.scandir(directory_path):
         if subdir.is_dir():
-            latest_file = get_last_file_in_directory(subdir.path)
-            if latest_file is not None:
-                latest_files.append((subdir.path, latest_file))
-    return latest_files
+            # Get the most recently created subdirectory within this agent's directory
+            latest_subdir = get_last_subdirectory(subdir.path)
+            if latest_subdir is not None:
+                # Look for 'report.json' in the subdirectory
+                report_file = os.path.join(latest_subdir, "report.json")
+                if os.path.isfile(report_file):
+                    latest_reports.append((subdir, report_file))
+
+    return latest_reports
diff --git a/agbenchmark/reports/processing/graphs.py b/agbenchmark/reports/processing/graphs.py
@@ -4,12 +4,15 @@
 import matplotlib.patches as mpatches
 import matplotlib.pyplot as plt
 import numpy as np
+import pandas as pd
 from matplotlib.colors import Normalize
 
 
 def save_combined_radar_chart(
     categories: dict[str, Any], save_path: str | Path
 ) -> None:
+    if not all(categories.values()):
+        raise Exception("No data to plot")
     labels = np.array(
         list(next(iter(categories.values())).keys())
     )  # We use the first category to get the keys
@@ -30,18 +33,9 @@ def save_combined_radar_chart(
         vmin=0, vmax=max([max(val.values()) for val in categories.values()])
     )  # We use the maximum of all categories for normalization
 
-    colors = [
-        "#40c463",
-        "#ff7f0e",
-        "#2ca02c",
-        "#d62728",
-        "#9467bd",
-        "#8c564b",
-        "#e377c2",
-        "#7f7f7f",
-        "#bcbd22",
-        "#17becf",
-    ]  # Define more colors for more categories
+    cmap = plt.cm.get_cmap("nipy_spectral", len(categories))  # type: ignore
+
+    colors = [cmap(i) for i in range(len(categories))]
 
     for i, (cat_name, cat_values) in enumerate(
         categories.items()
@@ -62,13 +56,18 @@ def save_combined_radar_chart(
         )  # Draw points
 
         # Draw legend
-        ax.legend(
+        legend = ax.legend(
             handles=[
                 mpatches.Patch(color=color, label=cat_name, alpha=0.25)
                 for cat_name, color in zip(categories.keys(), colors)
-            ]
+            ],
+            loc="upper left",
+            bbox_to_anchor=(0.7, 1.3),
         )
 
+        # Adjust layout to make room for the legend
+        plt.tight_layout()
+
     lines, labels = plt.thetagrids(
         np.degrees(angles[:-1]), (list(next(iter(categories.values())).keys()))
     )  # We use the first category to get the keys
@@ -178,3 +177,21 @@ def save_single_radar_chart(
 
     plt.savefig(save_path, dpi=300)  # Save the figure as a PNG file
     plt.close()  # Close the figure to free up memory
+
+
+def save_combined_bar_chart(categories: dict[str, Any], save_path: str | Path) -> None:
+    if not all(categories.values()):
+        raise Exception("No data to plot")
+
+    # Convert dictionary to DataFrame
+    df = pd.DataFrame(categories)
+
+    # Create a grouped bar chart
+    df.plot(kind="bar", figsize=(10, 7))
+
+    plt.title("Performance by Category for Each Agent")
+    plt.xlabel("Category")
+    plt.ylabel("Performance")
+
+    plt.savefig(save_path, dpi=300)  # Save the figure as a PNG file
+    plt.close()  # Close the figure to free up memory
diff --git a/agbenchmark/reports/processing/process_report.py b/agbenchmark/reports/processing/process_report.py
@@ -3,13 +3,15 @@
 from pathlib import Path
 from typing import Any
 
-from agbenchmark.reports.processing.get_files import get_latest_files_in_subdirectories
+from agbenchmark.reports.processing.get_files import (
+    get_latest_report_from_agent_directories,
+)
 from agbenchmark.reports.processing.report_types import Report, SuiteTest, Test
 from agbenchmark.utils.data_types import STRING_DIFFICULTY_MAP
 
 
 def get_reports_data(report_path: str) -> dict[str, Any]:
-    latest_files = get_latest_files_in_subdirectories(report_path)
+    latest_files = get_latest_report_from_agent_directories(report_path)
 
     reports_data = {}
 
@@ -19,7 +21,6 @@ def get_reports_data(report_path: str) -> dict[str, Any]:
     # This will print the latest file in each subdirectory and add to the files_data dictionary
     for subdir, file in latest_files:
         subdir_name = os.path.basename(os.path.normpath(subdir))
-        print(f"Subdirectory: {subdir}, Latest file: {file}")
         with open(Path(subdir) / file, "r") as f:
             # Load the JSON data from the file
             json_data = json.load(f)
@@ -37,9 +38,11 @@ def get_highest_category_difficulty(data: Test) -> None:
         for category in data.category:
             if category == "interface":
                 continue
-            num_dif = STRING_DIFFICULTY_MAP[data.metrics.difficulty]
-            if num_dif > categories.setdefault(category, 0):
-                categories[category] = num_dif
+            categories[category] = categories.get(category, 0)
+            if data.metrics.success:
+                num_dif = STRING_DIFFICULTY_MAP[data.metrics.difficulty]
+                if num_dif > categories[category]:
+                    categories[category] = num_dif
 
     for _, test_data in report.tests.items():
         if isinstance(test_data, SuiteTest):

diff --git a/notebooks/Visualization.ipynb b/notebooks/Visualization.ipynb
@@ -32,7 +32,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 95,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -404,13 +404,6 @@
     "plt.ylabel('Performance')\n",
     "plt.show()\n"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -21,6 +21,7 @@ pexpect = "^4.8.0"
 psutil = "^5.9.5"
 helicone = "^1.0.6"
 matplotlib = "^3.7.2"
+pandas = "^2.0.3"
 
 [tool.poetry.group.dev.dependencies]
 flake8 = "^3.9.2"

diff --git a/reports/Auto-GPT/folder1_07-31-02-07/radar_chart.png b/reports/Auto-GPT/folder1_07-31-02-07/radar_chart.png
diff --git a/reports/Auto-GPT/folder2_07-31-03-06/radar_chart.png b/reports/Auto-GPT/folder2_07-31-03-06/radar_chart.png
diff --git a/reports/Auto-GPT/folder3_07-31-04-35/report.json b/reports/Auto-GPT/folder3_07-31-04-35/report.json
diff --git a/reports/Auto-GPT/folder4_07-31-08-14/report.json b/reports/Auto-GPT/folder4_07-31-08-14/report.json
diff --git a/reports/BabyAGI/folder1_07-30-22-55/radar_chart.png b/reports/BabyAGI/folder1_07-30-22-55/radar_chart.png
diff --git a/reports/BabyAGI/folder2_07-31-02-10/radar_chart.png b/reports/BabyAGI/folder2_07-31-02-10/radar_chart.png
diff --git a/reports/BabyAGI/folder3_07-31-03-08/radar_chart.png b/reports/BabyAGI/folder3_07-31-03-08/radar_chart.png
diff --git a/reports/BabyAGI/folder4_07-31-04-37/report.json b/reports/BabyAGI/folder4_07-31-04-37/report.json
diff --git a/reports/BabyAGI/folder5_07-31-08-17/report.json b/reports/BabyAGI/folder5_07-31-08-17/report.json
diff --git a/reports/beebot/folder1_07-30-22-53/radar_chart.png b/reports/beebot/folder1_07-30-22-53/radar_chart.png
diff --git a/reports/beebot/folder2_07-31-02-07/radar_chart.png b/reports/beebot/folder2_07-31-02-07/radar_chart.png
diff --git a/reports/beebot/folder4_07-31-04-36/report.json b/reports/beebot/folder4_07-31-04-36/report.json
diff --git a/reports/beebot/folder5_07-31-08-14/report.json b/reports/beebot/folder5_07-31-08-14/report.json
diff --git a/reports/combined_charts/run1/bar_chart.png b/reports/combined_charts/run1/bar_chart.png
diff --git a/reports/combined_charts/run1/radar_chart.png b/reports/combined_charts/run1/radar_chart.png
diff --git a/reports/combined_charts/run1/run_info.json b/reports/combined_charts/run1/run_info.json
@@ -0,0 +1 @@
+{"Auto-GPT": "2023-07-31-03:06", "BabyAGI": "2023-07-31-03:08", "beebot": "2023-07-31-03:06", "gpt-engineer": "2023-07-31-02:07", "mini-agi": "2023-07-31-03:06", "smol-developer": "2023-07-31-03:06"}
diff --git a/reports/gpt-engineer/folder1_07-30-22-53/radar_chart.png b/reports/gpt-engineer/folder1_07-30-22-53/radar_chart.png
diff --git a/reports/gpt-engineer/folder2_07-31-02-07/radar_chart.png b/reports/gpt-engineer/folder2_07-31-02-07/radar_chart.png
diff --git a/reports/gpt-engineer/folder3_07-31-03-06/radar_chart.png b/reports/gpt-engineer/folder3_07-31-03-06/radar_chart.png
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		{"Auto-GPT": "2023-07-31-03:06", "BabyAGI": "2023-07-31-03:08", "beebot": "2023-07-31-03:06", "gpt-engineer": "2023-07-31-02:07", "mini-agi": "2023-07-31-03:06", "smol-developer": "2023-07-31-03:06"}