Skip to content

Commit

Permalink
implement SJF & add result analysis ipynb
Browse files Browse the repository at this point in the history
  • Loading branch information
mchen644 committed Sep 5, 2024
1 parent f12596b commit 856b8bf
Show file tree
Hide file tree
Showing 7 changed files with 1,012 additions and 46 deletions.
10 changes: 5 additions & 5 deletions benchmarks/1_serving_benchmark.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@ result_dir="/root/v1/vllm/benchmarks/result"
# swap_policies=(partial)
declare -a scheduler_swap_policies
# scheduler_swap_policies[0]="tfittradeoff partial"
scheduler_swap_policies[1]="fcfs full"
# scheduler_swap_policies[1]="fcfs full"
# scheduler_swap_policies[1]="tfittradeoff full"
# scheduler_swap_policies[2]="las full"
scheduler_swap_policies[2]="sjf full"
# scheduler_swap_policies[3]="sjmlfq full"
# scheduler_swap_policies[3]="infer partial"
# scheduler_swap_policies[4]="inferpreempt full"
Expand All @@ -38,15 +38,15 @@ iter_theshold=15

# request_rates[0]=0.5
# request_rates[1]=1.0
# request_rates[2]=2.0
request_rates[2]=2.0
# request_rates[3]=5.0
request_rates[4]=10.0
# request_rates[4]=10.0
# request_rates[5]=20.0

# request_rates=(2.0)
swap_out_partial_rates=(0.5)
waiting_iter_base=(0.1)
gpu_devices=3
gpu_devices=1
for i in {0..0}; do
for waiting_iter in "${waiting_iter_base[@]}"; do
for swap_out_partial_rate in "${swap_out_partial_rates[@]}"; do
Expand Down
52 changes: 41 additions & 11 deletions benchmarks/backend_request_func.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import traceback
from dataclasses import dataclass, field
from typing import List, Optional

import fnmatch
import aiohttp
from tqdm.asyncio import tqdm

Expand All @@ -25,6 +25,7 @@ class RequestFuncInput:

@dataclass
class RequestFuncOutput:
prompt: str = ""
generated_text: str = ""
success: bool = False
latency: float = 0.0
Expand Down Expand Up @@ -56,6 +57,7 @@ async def async_request_tgi(
"parameters": params,
}
output = RequestFuncOutput()
output.prompt = request_func_input.prompt
output.prompt_len = request_func_input.prompt_len

ttft = 0.0
Expand Down Expand Up @@ -210,34 +212,62 @@ async def async_request_deepspeed_mii(
if pbar:
pbar.update(1)
return output


def get_json_file():
for file_name in os.listdir('.'):
if fnmatch.fnmatch(file_name, '*.json'):
return file_name
return None

async def async_request_openai_completions(
policy: str,
request_func_input: RequestFuncInput,
pbar: Optional[tqdm] = None,
) -> RequestFuncOutput:
api_url = request_func_input.api_url
assert api_url.endswith(
"v1/completions"
), "OpenAI Completions API URL must end with 'v1/completions'."

if policy == "sjf":
file_path = get_json_file()
if file_path:
with open(file_path, 'r', encoding="utf-8") as file:
data = json.load(file)
print(f"Loaded data from {file_path}")
else:
print("No JSON file found in the current directory.")


async with aiohttp.ClientSession(timeout=AIOHTTP_TIMEOUT) as session:
assert not request_func_input.use_beam_search
payload = {
"model": request_func_input.model,
"prompt": request_func_input.prompt,
"temperature": 0.0,
"best_of": request_func_input.best_of,
"min_tokens": request_func_input.output_len,
"max_tokens": request_func_input.output_len,
"stream": True,
}
if policy == "sjf":
payload = {
"model": request_func_input.model,
"prompt": request_func_input.prompt,
"temperature": 0.0,
"best_of": request_func_input.best_of,
"min_tokens": data[request_func_input.prompt],
"max_tokens": data[request_func_input.prompt],
"stream": True,
}
else:
payload = {
"model": request_func_input.model,
"prompt": request_func_input.prompt,
"temperature": 0.0,
"best_of": request_func_input.best_of,
"max_tokens": request_func_input.output_len,
"stream": True,
}


headers = {
"Authorization": f"Bearer {os.environ.get('OPENAI_API_KEY')}"
}

output = RequestFuncOutput()
output.prompt = request_func_input.prompt
output.prompt_len = request_func_input.prompt_len

generated_text = ""
Expand Down
34 changes: 28 additions & 6 deletions benchmarks/benchmark_serving.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,10 +343,17 @@ async def benchmark(
best_of=best_of,
use_beam_search=use_beam_search,
)
tasks.append(
asyncio.create_task(
request_func(request_func_input=request_func_input,
pbar=pbar)))
if backend == "vllm":
tasks.append(
asyncio.create_task(
request_func(args.scheduler_policy,
request_func_input=request_func_input,
pbar=pbar)))
else:
tasks.append(
asyncio.create_task(
request_func(request_func_input=request_func_input,
pbar=pbar)))
outputs: List[RequestFuncOutput] = await asyncio.gather(*tasks)

if not disable_tqdm:
Expand Down Expand Up @@ -425,7 +432,7 @@ async def benchmark(
# "errors": [output.error for output in outputs],
"latencies": [output.latency for output in outputs],
}
return result
return result, outputs


def check_health(api_url: str) -> bool:
Expand Down Expand Up @@ -512,7 +519,7 @@ def main(args: argparse.Namespace):
else:
raise ValueError(f"Unknown dataset: {args.dataset_name}")

benchmark_result = asyncio.run(
benchmark_result, outputs = asyncio.run(
benchmark(
backend=backend,
api_url=api_url,
Expand Down Expand Up @@ -571,6 +578,21 @@ def main(args: argparse.Namespace):
file_name = os.path.join(args.result_dir, dir_name, file_name)
with open(file_name, "w") as outfile:
json.dump(result_json, outfile)

prompt_output_lens_json = {}
print(benchmark_result["output_lens"])
for i in range(len(outputs)):
prompt_output_lens_json[outputs[i].prompt] = benchmark_result["output_lens"][i]
prompt_output_lens_file_name = f"prompt_output_{backend}-{args.request_rate}qps-{base_model_id}-{seconds}-{args.scheduler_policy}.json"

if args.result_dir:
print("result_dir:", args.result_dir)
prompt_output_lens_file_name = os.path.join(args.result_dir, dir_name, prompt_output_lens_file_name)
with open(prompt_output_lens_file_name, "w") as prompt_output_lens_file_name_outfile:
json.dump(prompt_output_lens_json, prompt_output_lens_file_name_outfile)





if __name__ == "__main__":
Expand Down
26 changes: 13 additions & 13 deletions benchmarks/result/analysis/result_analysis.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import marimo

__generated_with = "0.7.20"
__generated_with = "0.8.9"
app = marimo.App(width="full")


Expand Down Expand Up @@ -39,8 +39,8 @@ def __(mo):

@app.cell
def __(base_dir, os):
_date = "20240902"
_counters = [401]
_date = "20240904"
_counters = [451]
e2e_result_dir_names = [
os.path.join(base_dir, _date, str(counter)) for counter in _counters
]
Expand Down Expand Up @@ -191,7 +191,7 @@ def e2e_result(
axes[1].set_ylabel("Throughput (Token/s)")
axes[1].set_xlabel("Request Rate (r/s)")
axes[1].grid(linestyle="--", alpha=0.5, axis="y")
plt.show()
plt.save()
return axes, e2e_result, fig


Expand Down Expand Up @@ -247,11 +247,11 @@ def line_plot(_long_df):
line_styles = ["-", "--", "-.", ":"]
mark_styles = ["d", "o", "v"]
colors = ["r", "g", "b", "y"]
show_legend = True
save_legend = True
for _i, metric_type in enumerate(metric_types):
_ax = _axes[_i // 2][_i % 2]
if _i > 0:
show_legend = False
save_legend = False
data = _long_df[(_long_df["metric_type"] == metric_type)]

# 按 line_type 分组
Expand Down Expand Up @@ -294,18 +294,18 @@ def barplot(_long_df, request_rate):

_long_df = _long_df[_long_df["request_rate"] == request_rate]

show_legend = True
save_legend = True
for metric_type in metric_types:
_i = metric_types.index(metric_type)
if _i > 0:
show_legend = False
save_legend = False
sns.barplot(
hue="scheduler_policy",
y="Ratio",
x="metric_name",
data=_long_df[_long_df["metric_type"] == metric_type],
ax=_ax[_i // 2][_i % 2],
legend=show_legend,
legend=save_legend,
)
_ax[_i // 2][_i % 2].set_xlabel(metric_type)
_ax[_i // 2][_i % 2].set_ylabel("")
Expand Down Expand Up @@ -365,16 +365,16 @@ def get_metric_ratio(df):
)
)
# _long_df = _long_df[_long_df["metric_name"] == "P99"]
show_legend = True
save_legend = True

# line_plot(_long_df)
# print(_long_df)
barplot(_long_df, 2) # Need to change
fig.tight_layout()
plt.subplots_adjust(wspace=0.2, hspace=0.4)

plt.show()
return get_metric_ratio, show_legend
plt.save()
return get_metric_ratio, save_legend


@app.cell
Expand Down Expand Up @@ -896,7 +896,7 @@ def __(add_num_annotation, detailed_mean_result, plt, sns):
plt.legend(title="")
add_num_annotation(ax)
plt.xticks(rotation=45)
plt.show()
plt.save()
return ax,


Expand Down
Loading

0 comments on commit 856b8bf

Please sign in to comment.