implement SJF & add result analysis ipynb

blinkbear · Sep 5, 2024 · 856b8bf · 856b8bf
1 parent f12596b
commit 856b8bf
Show file tree

Hide file tree

Showing 7 changed files with 1,012 additions and 46 deletions.
diff --git a/benchmarks/1_serving_benchmark.sh b/benchmarks/1_serving_benchmark.sh
@@ -21,9 +21,9 @@ result_dir="/root/v1/vllm/benchmarks/result"
 # swap_policies=(partial)
 declare -a scheduler_swap_policies
 # scheduler_swap_policies[0]="tfittradeoff partial"
-scheduler_swap_policies[1]="fcfs full"
+# scheduler_swap_policies[1]="fcfs full"
 # scheduler_swap_policies[1]="tfittradeoff full"
-# scheduler_swap_policies[2]="las full"
+scheduler_swap_policies[2]="sjf full"
 # scheduler_swap_policies[3]="sjmlfq full"
 # scheduler_swap_policies[3]="infer partial"
 # scheduler_swap_policies[4]="inferpreempt full"
@@ -38,15 +38,15 @@ iter_theshold=15
 
 # request_rates[0]=0.5
 # request_rates[1]=1.0
-# request_rates[2]=2.0
+request_rates[2]=2.0
 # request_rates[3]=5.0
-request_rates[4]=10.0
+# request_rates[4]=10.0
 # request_rates[5]=20.0
 
 # request_rates=(2.0)
 swap_out_partial_rates=(0.5)
 waiting_iter_base=(0.1)
-gpu_devices=3
+gpu_devices=1
 for i in {0..0}; do
   for waiting_iter in "${waiting_iter_base[@]}"; do
     for swap_out_partial_rate in "${swap_out_partial_rates[@]}"; do

diff --git a/benchmarks/backend_request_func.py b/benchmarks/backend_request_func.py
@@ -5,7 +5,7 @@
 import traceback
 from dataclasses import dataclass, field
 from typing import List, Optional
-
+import fnmatch
 import aiohttp
 from tqdm.asyncio import tqdm
 
@@ -25,6 +25,7 @@ class RequestFuncInput:
 
 @dataclass
 class RequestFuncOutput:
+    prompt: str = ""
     generated_text: str = ""
     success: bool = False
     latency: float = 0.0
@@ -56,6 +57,7 @@ async def async_request_tgi(
             "parameters": params,
         }
         output = RequestFuncOutput()
+        output.prompt = request_func_input.prompt
         output.prompt_len = request_func_input.prompt_len
 
         ttft = 0.0
@@ -210,34 +212,62 @@ async def async_request_deepspeed_mii(
         if pbar:
             pbar.update(1)
         return output
-
+
+def get_json_file():
+    for file_name in os.listdir('.'):
+        if fnmatch.fnmatch(file_name, '*.json'):
+            return file_name
+    return None
 
 async def async_request_openai_completions(
+    policy: str,
     request_func_input: RequestFuncInput,
     pbar: Optional[tqdm] = None,
 ) -> RequestFuncOutput:
     api_url = request_func_input.api_url
     assert api_url.endswith(
         "v1/completions"
     ), "OpenAI Completions API URL must end with 'v1/completions'."
+
+    if policy == "sjf":
+        file_path = get_json_file()
+        if file_path:
+            with open(file_path, 'r', encoding="utf-8") as file:
+                data = json.load(file)
+            print(f"Loaded data from {file_path}")
+        else:
+            print("No JSON file found in the current directory.")
+
 
     async with aiohttp.ClientSession(timeout=AIOHTTP_TIMEOUT) as session:
         assert not request_func_input.use_beam_search
-        payload = {
-            "model": request_func_input.model,
-            "prompt": request_func_input.prompt,
-            "temperature": 0.0,
-            "best_of": request_func_input.best_of,
-            "min_tokens": request_func_input.output_len,
-            "max_tokens": request_func_input.output_len,
-            "stream": True,
-        }
+        if policy == "sjf":
+            payload = {
+                "model": request_func_input.model,
+                "prompt": request_func_input.prompt,
+                "temperature": 0.0,
+                "best_of": request_func_input.best_of,
+                "min_tokens": data[request_func_input.prompt],
+                "max_tokens": data[request_func_input.prompt],
+                "stream": True,
+            }
+        else:
+            payload = {
+                "model": request_func_input.model,
+                "prompt": request_func_input.prompt,
+                "temperature": 0.0,
+                "best_of": request_func_input.best_of,
+                "max_tokens": request_func_input.output_len,
+                "stream": True,
+            }
+
 
         headers = {
             "Authorization": f"Bearer {os.environ.get('OPENAI_API_KEY')}"
         }
 
         output = RequestFuncOutput()
+        output.prompt = request_func_input.prompt
         output.prompt_len = request_func_input.prompt_len
 
         generated_text = ""

diff --git a/benchmarks/benchmark_serving.py b/benchmarks/benchmark_serving.py
@@ -343,10 +343,17 @@ async def benchmark(
             best_of=best_of,
             use_beam_search=use_beam_search,
         )
-        tasks.append(
-            asyncio.create_task(
-                request_func(request_func_input=request_func_input,
-                             pbar=pbar)))
+        if backend == "vllm":
+            tasks.append(
+                asyncio.create_task(
+                    request_func(args.scheduler_policy,
+                                request_func_input=request_func_input,
+                                pbar=pbar)))
+        else:
+            tasks.append(
+                asyncio.create_task(
+                    request_func(request_func_input=request_func_input,
+                                pbar=pbar)))
     outputs: List[RequestFuncOutput] = await asyncio.gather(*tasks)
 
     if not disable_tqdm:
@@ -425,7 +432,7 @@ async def benchmark(
         # "errors": [output.error for output in outputs],
         "latencies": [output.latency for output in outputs],
     }
-    return result
+    return result, outputs
 
 
 def check_health(api_url: str) -> bool:
@@ -512,7 +519,7 @@ def main(args: argparse.Namespace):
     else:
         raise ValueError(f"Unknown dataset: {args.dataset_name}")
 
-    benchmark_result = asyncio.run(
+    benchmark_result, outputs = asyncio.run(
         benchmark(
             backend=backend,
             api_url=api_url,
@@ -571,6 +578,21 @@ def main(args: argparse.Namespace):
             file_name = os.path.join(args.result_dir, dir_name, file_name)
         with open(file_name, "w") as outfile:
             json.dump(result_json, outfile)
+
+        prompt_output_lens_json = {}
+        print(benchmark_result["output_lens"])
+        for i in range(len(outputs)):
+            prompt_output_lens_json[outputs[i].prompt] = benchmark_result["output_lens"][i]
+        prompt_output_lens_file_name = f"prompt_output_{backend}-{args.request_rate}qps-{base_model_id}-{seconds}-{args.scheduler_policy}.json"
+
+        if args.result_dir:
+            print("result_dir:", args.result_dir)
+            prompt_output_lens_file_name = os.path.join(args.result_dir, dir_name, prompt_output_lens_file_name)
+        with open(prompt_output_lens_file_name, "w") as prompt_output_lens_file_name_outfile:
+            json.dump(prompt_output_lens_json, prompt_output_lens_file_name_outfile)
+
+
+
 
 
 if __name__ == "__main__":

diff --git a/benchmarks/result/analysis/result_analysis.py b/benchmarks/result/analysis/result_analysis.py
@@ -1,6 +1,6 @@
 import marimo
 
-__generated_with = "0.7.20"
+__generated_with = "0.8.9"
 app = marimo.App(width="full")
 
 
@@ -39,8 +39,8 @@ def __(mo):
 
 @app.cell
 def __(base_dir, os):
-    _date = "20240902"
-    _counters = [401]
+    _date = "20240904"
+    _counters = [451]
     e2e_result_dir_names = [
         os.path.join(base_dir, _date, str(counter)) for counter in _counters
     ]
@@ -191,7 +191,7 @@ def e2e_result(
     axes[1].set_ylabel("Throughput (Token/s)")
     axes[1].set_xlabel("Request Rate (r/s)")
     axes[1].grid(linestyle="--", alpha=0.5, axis="y")
-    plt.show()
+    plt.save()
     return axes, e2e_result, fig
 
 
@@ -247,11 +247,11 @@ def line_plot(_long_df):
         line_styles = ["-", "--", "-.", ":"]
         mark_styles = ["d", "o", "v"]
         colors = ["r", "g", "b", "y"]
-        show_legend = True
+        save_legend = True
         for _i, metric_type in enumerate(metric_types):
             _ax = _axes[_i // 2][_i % 2]
             if _i > 0:
-                show_legend = False
+                save_legend = False
             data = _long_df[(_long_df["metric_type"] == metric_type)]
 
             # 按 line_type 分组
@@ -294,18 +294,18 @@ def barplot(_long_df, request_rate):
 
         _long_df = _long_df[_long_df["request_rate"] == request_rate]
 
-        show_legend = True
+        save_legend = True
         for metric_type in metric_types:
             _i = metric_types.index(metric_type)
             if _i > 0:
-                show_legend = False
+                save_legend = False
             sns.barplot(
                 hue="scheduler_policy",
                 y="Ratio",
                 x="metric_name",
                 data=_long_df[_long_df["metric_type"] == metric_type],
                 ax=_ax[_i // 2][_i % 2],
-                legend=show_legend,
+                legend=save_legend,
             )
             _ax[_i // 2][_i % 2].set_xlabel(metric_type)
             _ax[_i // 2][_i % 2].set_ylabel("")
@@ -365,16 +365,16 @@ def get_metric_ratio(df):
         )
     )
     # _long_df = _long_df[_long_df["metric_name"] == "P99"]
-    show_legend = True
+    save_legend = True
 
     # line_plot(_long_df)
     # print(_long_df)
     barplot(_long_df, 2)  # Need to change
     fig.tight_layout()
     plt.subplots_adjust(wspace=0.2, hspace=0.4)
 
-    plt.show()
-    return get_metric_ratio, show_legend
+    plt.save()
+    return get_metric_ratio, save_legend
 
 
 @app.cell
@@ -896,7 +896,7 @@ def __(add_num_annotation, detailed_mean_result, plt, sns):
     plt.legend(title="")
     add_num_annotation(ax)
     plt.xticks(rotation=45)
-    plt.show()
+    plt.save()
     return ax,