From e8d1c5846dea561f16eea97886379b3d07edf7d8 Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Wed, 20 Nov 2024 01:48:00 +0000
Subject: [PATCH] Record PR time benchmark results in JSON format (#140493)

I'm trying to make this benchmark results available on OSS benchmark database, so that people can query it from outside.  The first step is to also record the results in the JSON format compatible with the database schema defined in https://github.com/pytorch/test-infra/pull/5839.

Existing CSV files remain unchanged.

### Testing

The JSON results are uploaded as artifacts to S3 https://github.com/pytorch/pytorch/actions/runs/11809725848/job/32901411180#step:26:13, for example https://gha-artifacts.s3.amazonaws.com/pytorch/pytorch/11809725848/1/artifact/test-jsons-test-pr_time_benchmarks-1-1-linux.g4dn.metal.nvidia.gpu_32901411180.zip

Pull Request resolved: https://github.com/pytorch/pytorch/pull/140493
Approved by: https://github.com/laithsakka
---
 .github/workflows/_linux-test.yml             |  8 ++
 .../pr_time_benchmarks/benchmark_base.py      | 77 +++++++++++++++++++
 .../pr_time_benchmarks/benchmarks/add_loop.py | 25 +++---
 .../benchmarks/aotdispatcher.py               | 19 ++---
 .../benchmarks/aotdispatcher_partitioner.py   | 11 ++-
 .../benchmarks/basic_modules_benchmarks.py    | 17 ++--
 .../benchmarks/sum_floordiv.py                |  5 +-
 .../benchmarks/symint_sum.py                  |  9 ++-
 .../benchmarks/update_hint_benchmark.py       |  9 ++-
 9 files changed, 151 insertions(+), 29 deletions(-)

diff --git a/.github/workflows/_linux-test.yml b/.github/workflows/_linux-test.yml
index 22bf49145e140..aa7a2bdf8f144 100644
--- a/.github/workflows/_linux-test.yml
+++ b/.github/workflows/_linux-test.yml
@@ -330,6 +330,14 @@ jobs:
           test_config: ${{ matrix.config }}
           job_identifier: ${{ github.workflow }}_${{ inputs.build-environment }}
 
+      - name: Upload the benchmark results
+        uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
+        with:
+          benchmark-results-dir: test/test-reports
+          dry-run: false
+          schema-version: v3
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+
       - name: Print remaining test logs
         shell: bash
         if: always() && steps.test.conclusion
diff --git a/benchmarks/dynamo/pr_time_benchmarks/benchmark_base.py b/benchmarks/dynamo/pr_time_benchmarks/benchmark_base.py
index 83145e0d5445f..fa41a5e49d858 100644
--- a/benchmarks/dynamo/pr_time_benchmarks/benchmark_base.py
+++ b/benchmarks/dynamo/pr_time_benchmarks/benchmark_base.py
@@ -1,5 +1,7 @@
 import csv
 import gc
+import json
+import os
 from abc import ABC, abstractmethod
 
 from fbscribelogger import make_scribe_logger
@@ -65,6 +67,22 @@ class BenchmarkBase(ABC):
     # number of iterations used to run when collecting instruction_count or compile_time_instruction_count.
     _num_iterations = 5
 
+    def __init__(
+        self,
+        category: str,
+        device: str,
+        backend: str = "",
+        mode: str = "",
+        dynamic=None,
+    ):
+        # These individual attributes are used to support different filters on the
+        # dashboard later
+        self._category = category
+        self._device = device
+        self._backend = backend
+        self._mode = mode  # Training or inference
+        self._dynamic = dynamic
+
     def with_iterations(self, value):
         self._num_iterations = value
         return self
@@ -80,6 +98,21 @@ def enable_compile_time_instruction_count(self):
     def name(self):
         return ""
 
+    def backend(self):
+        return self._backend
+
+    def mode(self):
+        return self._mode
+
+    def category(self):
+        return self._category
+
+    def device(self):
+        return self._device
+
+    def is_dynamic(self):
+        return self._dynamic
+
     def description(self):
         return ""
 
@@ -134,6 +167,46 @@ def _count_compile_time_instructions(self):
         finally:
             gc.enable()
 
+    def _write_to_json(self, output_dir: str):
+        """
+        Write the result into JSON format, so that it can be uploaded to the benchmark database
+        to be displayed on OSS dashboard. The JSON format is defined at
+        https://github.com/pytorch/pytorch/wiki/How-to-integrate-with-PyTorch-OSS-benchmark-database
+        """
+        records = []
+        for entry in self.results:
+            metric_name = entry[1]
+            value = entry[2]
+
+            if not metric_name or value is None:
+                continue
+
+            records.append(
+                {
+                    "benchmark": {
+                        "name": "pr_time_benchmarks",
+                        "mode": self.mode(),
+                        "extra_info": {
+                            "is_dynamic": self.is_dynamic(),
+                            "device": self.device(),
+                            "description": self.description(),
+                        },
+                    },
+                    "model": {
+                        "name": self.name(),
+                        "type": self.category(),
+                        "backend": self.backend(),
+                    },
+                    "metric": {
+                        "name": metric_name,
+                        "benchmark_values": [value],
+                    },
+                }
+            )
+
+        with open(os.path.join(output_dir, f"{self.name()}.json"), "w") as f:
+            json.dump(records, f)
+
     def append_results(self, path):
         with open(path, "a", newline="") as csvfile:
             # Create a writer object
@@ -142,6 +215,10 @@ def append_results(self, path):
             for entry in self.results:
                 writer.writerow(entry)
 
+        # TODO (huydhn) This requires the path to write to, so it needs to be in the same place
+        # as the CSV writer for now
+        self._write_to_json(os.path.dirname(os.path.abspath(path)))
+
     def print(self):
         for entry in self.results:
             print(f"{entry[0]},{entry[1]},{entry[2]}")
diff --git a/benchmarks/dynamo/pr_time_benchmarks/benchmarks/add_loop.py b/benchmarks/dynamo/pr_time_benchmarks/benchmarks/add_loop.py
index f28d59f154ea2..e805b7ff6b380 100644
--- a/benchmarks/dynamo/pr_time_benchmarks/benchmarks/add_loop.py
+++ b/benchmarks/dynamo/pr_time_benchmarks/benchmarks/add_loop.py
@@ -8,15 +8,18 @@
 
 class Benchmark(BenchmarkBase):
     def __init__(self, backend, dynamic=False, is_gpu=False):
-        self._backend = backend
-        self._dynamic = dynamic
-        self._device = "cuda" if is_gpu else "cpu"
+        super().__init__(
+            category="add_loop",
+            backend=backend,
+            device="cuda" if is_gpu else "cpu",
+            dynamic=dynamic,
+        )
 
     def name(self):
-        prefix = f"add_loop_{self._backend}"
-        if self._dynamic:
+        prefix = f"{self.category()}_{self.backend()}"
+        if self.is_dynamic():
             prefix += "_dynamic"
-        if self._device == "cuda":
+        if self.device() == "cuda":
             prefix += "_gpu"
         return prefix
 
@@ -24,14 +27,18 @@ def description(self):
         return "a loop over 100 add node"
 
     def _prepare_once(self):
-        self.a = torch.ones(1000, device=self._device)
-        self.b = torch.torch.ones(1000, device=self._device)
+        self.a = torch.ones(1000, device=self.device())
+        self.b = torch.torch.ones(1000, device=self.device())
 
     def _prepare(self):
         torch._dynamo.reset()
 
     def _work(self):
-        @torch.compile(backend=self._backend, fullgraph=True, dynamic=self._dynamic)
+        @torch.compile(
+            backend=self.backend(),
+            fullgraph=True,
+            dynamic=self.is_dynamic(),
+        )
         def f(a, b):
             result = a.clone()
             for i in range(1000):
diff --git a/benchmarks/dynamo/pr_time_benchmarks/benchmarks/aotdispatcher.py b/benchmarks/dynamo/pr_time_benchmarks/benchmarks/aotdispatcher.py
index 53a8f20b06122..e2d57a3622886 100644
--- a/benchmarks/dynamo/pr_time_benchmarks/benchmarks/aotdispatcher.py
+++ b/benchmarks/dynamo/pr_time_benchmarks/benchmarks/aotdispatcher.py
@@ -10,19 +10,20 @@ class Benchmark(BenchmarkBase):
     def __init__(self, *, training, subclass):
         self._training = training
         self._subclass = subclass
-        self._device = "cpu"
+        super().__init__(
+            category="aotdispatcher",
+            backend="aot_eager_decomp_partition",
+            device="cpu",
+            mode="training" if self._training else "inference",
+        )
 
     def name(self):
-        prefix = "aotdispatcher"
-        if self._training:
-            prefix += "_training"
-        else:
-            prefix += "_inference"
+        prefix = f"{self.category()}_{self.mode()}"
         if self._subclass:
             prefix += "_subclass"
         else:
             prefix += "_nosubclass"
-        if self._device == "cpu":
+        if self.device() == "cpu":
             prefix += "_cpu"
         return prefix
 
@@ -31,7 +32,7 @@ def description(self):
 
     def _prepare_once(self):
         _args = [
-            torch.ones(100, requires_grad=self._training, device=self._device)
+            torch.ones(100, requires_grad=self._training, device=self.device())
             for _ in range(100)
         ]
         if self._subclass:
@@ -45,7 +46,7 @@ def _prepare(self):
         torch._dynamo.reset()
 
     def _work(self):
-        @torch.compile(backend="aot_eager_decomp_partition", fullgraph=True)
+        @torch.compile(backend=self.backend(), fullgraph=True)
         def f(*args):
             outs = [torch.add(x, x) for x in args]
             return outs
diff --git a/benchmarks/dynamo/pr_time_benchmarks/benchmarks/aotdispatcher_partitioner.py b/benchmarks/dynamo/pr_time_benchmarks/benchmarks/aotdispatcher_partitioner.py
index 30fa5fa386124..7f5b9aadf874c 100644
--- a/benchmarks/dynamo/pr_time_benchmarks/benchmarks/aotdispatcher_partitioner.py
+++ b/benchmarks/dynamo/pr_time_benchmarks/benchmarks/aotdispatcher_partitioner.py
@@ -6,8 +6,15 @@
 
 
 class Benchmark(BenchmarkBase):
+    def __init__(self):
+        super().__init__(
+            category="aotdispatcher_partitioner",
+            backend="aot_eager_decomp_partition",
+            device="cpu",
+        )
+
     def name(self):
-        return "aotdispatcher_partitioner_cpu"
+        return f"{self.category()}_{self.device()}"
 
     def description(self):
         return "partitioner benchmark 1 input and 100 weights, mix of recompute and non-recompute ops"
@@ -20,7 +27,7 @@ def _prepare(self):
         torch._dynamo.reset()
 
     def _work(self):
-        @torch.compile(backend="aot_eager_decomp_partition", fullgraph=True)
+        @torch.compile(backend=self.backend(), fullgraph=True)
         def f(inp, *weights):
             x = inp
             for w in weights:
diff --git a/benchmarks/dynamo/pr_time_benchmarks/benchmarks/basic_modules_benchmarks.py b/benchmarks/dynamo/pr_time_benchmarks/benchmarks/basic_modules_benchmarks.py
index 56398cfd12bd2..5a9e91da203df 100644
--- a/benchmarks/dynamo/pr_time_benchmarks/benchmarks/basic_modules_benchmarks.py
+++ b/benchmarks/dynamo/pr_time_benchmarks/benchmarks/basic_modules_benchmarks.py
@@ -24,15 +24,20 @@ def __init__(
         self, ModuleClass, backend, is_gpu=False, dynamic=False, force_shape_pad=False
     ):
         self.ModuleClass = ModuleClass
-        self.backend = backend
         self._name = ModuleClass.__name__
         self._is_gpu = is_gpu
-        self._dynamic = dynamic
         self._force_shape_pad = force_shape_pad
 
+        super().__init__(
+            category="basic_modules",
+            backend=backend,
+            device="cuda" if self._is_gpu else "cpu",
+            dynamic=dynamic,
+        )
+
     def name(self):
-        prefix = f"basic_modules_{self._name}_{self.backend}"
-        if self._dynamic:
+        prefix = f"{self.category()}_{self._name}_{self.backend()}"
+        if self.is_dynamic():
             prefix += "_dynamic"
         if self._is_gpu:
             prefix += "_gpu"
@@ -43,7 +48,7 @@ def name(self):
     def _prepare_once(self):
         self.m = self.ModuleClass()
         torch.set_float32_matmul_precision("high")
-        self.input = torch.ones(10, device="cuda" if self._is_gpu else "cpu")
+        self.input = torch.ones(10, device=self.device())
 
     def _prepare(self):
         torch._dynamo.reset()
@@ -52,7 +57,7 @@ def _work(self):
         with fresh_inductor_cache(), torch._inductor.config.patch(
             force_shape_pad=self._force_shape_pad
         ):
-            opt_m = torch.compile(backend=self.backend, dynamic=self._dynamic)(
+            opt_m = torch.compile(backend=self.backend(), dynamic=self.is_dynamic())(
                 self.m.cuda() if self._is_gpu else self.m
             )
             opt_m(self.input)
diff --git a/benchmarks/dynamo/pr_time_benchmarks/benchmarks/sum_floordiv.py b/benchmarks/dynamo/pr_time_benchmarks/benchmarks/sum_floordiv.py
index 3bd22d18bad00..f10eb47bd0aed 100644
--- a/benchmarks/dynamo/pr_time_benchmarks/benchmarks/sum_floordiv.py
+++ b/benchmarks/dynamo/pr_time_benchmarks/benchmarks/sum_floordiv.py
@@ -8,8 +8,11 @@
 class Benchmark(BenchmarkBase):
     N = 100
 
+    def __init__(self):
+        super().__init__(category="sum_floordiv", backend="export", device="cpu")
+
     def name(self):
-        return "sum_floordiv_regression"
+        return f"{self.category()}_regression"
 
     def description(self):
         return "information at https://github.com/pytorch/pytorch/issues/134133"
diff --git a/benchmarks/dynamo/pr_time_benchmarks/benchmarks/symint_sum.py b/benchmarks/dynamo/pr_time_benchmarks/benchmarks/symint_sum.py
index a70e4022fb41c..94a1c068d4211 100644
--- a/benchmarks/dynamo/pr_time_benchmarks/benchmarks/symint_sum.py
+++ b/benchmarks/dynamo/pr_time_benchmarks/benchmarks/symint_sum.py
@@ -8,8 +8,15 @@
 class Benchmark(BenchmarkBase):
     N = 200
 
+    def __init__(self):
+        super().__init__(
+            category="symint_sum",
+            backend="inductor",
+            device="cpu",
+        )
+
     def name(self):
-        return "symint_sum"
+        return self.category()
 
     def description(self):
         return "see https://docs.google.com/document/d/11xJXl1etSmefUxPiVyk885e0Dl-4o7QwxYcPiMIo2iY/edit"
diff --git a/benchmarks/dynamo/pr_time_benchmarks/benchmarks/update_hint_benchmark.py b/benchmarks/dynamo/pr_time_benchmarks/benchmarks/update_hint_benchmark.py
index 7957836b6a9d1..cc2edf660f9b1 100644
--- a/benchmarks/dynamo/pr_time_benchmarks/benchmarks/update_hint_benchmark.py
+++ b/benchmarks/dynamo/pr_time_benchmarks/benchmarks/update_hint_benchmark.py
@@ -8,8 +8,15 @@
 class Benchmark(BenchmarkBase):
     N = 20
 
+    def __init__(self):
+        super().__init__(
+            category="update_hint",
+            backend="inductor",
+            device="cpu",
+        )
+
     def name(self):
-        return "update_hint_regression"
+        return f"{self.category()}_regression"
 
     def description(self):
         return "information at https://github.com/pytorch/pytorch/pull/129893"