Skip to content

Commit

Permalink
Rebase main
Browse files Browse the repository at this point in the history
  • Loading branch information
Gangmuk authored and nwangfw committed Jan 23, 2025
1 parent 86787ba commit e9c996b
Show file tree
Hide file tree
Showing 9 changed files with 1,389 additions and 88 deletions.
268 changes: 260 additions & 8 deletions benchmarks/generator/client.py

Large diffs are not rendered by default.

11 changes: 11 additions & 0 deletions benchmarks/generator/generate_workload_jsonl.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/bin/bash

SHAREGPT_FILE_PATH="sharedgpt/ShareGPT_V3_unfiltered_cleaned_split.json"

python workload_generator.py \
--prompt-file ${SHAREGPT_FILE_PATH} \
--trace-type synthetic-from-csv-file \
--model "Qwen/Qwen2.5-Coder-7B-Instruct" \
--output-dir "workload" \
--stats-csv "input_trace/maas/window_0-20241017_2102-20241017_2110.csv"
# --stats-csv "input_trace/maas/window_0-20241017_2102-20241017_2132.csv"
205 changes: 205 additions & 0 deletions benchmarks/generator/plot-output.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
#!/usr/bin/env python3
import os
import json
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import argparse
import sys
from datetime import datetime
def read_experiment_file(file_path):
try:
with open(file_path, 'r', encoding='utf-8') as f:
return f.readlines()
except FileNotFoundError:
raise FileNotFoundError(f"Could not find the file: {file_path}")
except Exception as e:
raise Exception(f"Error reading file: {e}")

def parse_experiment_output(lines):
results = []
for line in lines:
if not line.strip():
continue
try:
data = json.loads(line.strip())
required_fields = ['output', 'prompt_tokens', 'output_tokens', 'total_tokens',
'start_time', 'end_time', 'latency', 'throughput']
if any(field not in data for field in required_fields):
continue
results.append(data)
except json.JSONDecodeError:
continue

if not results:
raise ValueError("No valid data entries found")

# Sort by start_time
results.sort(key=lambda x: x['start_time'])

df = pd.DataFrame(results)
base_time = df['start_time'].iloc[0]
df['start_time'] = df['start_time'] -base_time
df['end_time'] = df['end_time'] - base_time
df['second_bucket'] = df['start_time'].astype(int)
rps_series = df.groupby('second_bucket').size()
df['rps'] = df['second_bucket'].map(rps_series)
df['timestamp'] = df['start_time']
df['input_length'] = df['prompt_tokens']
df['output_length'] = df['output_tokens']

return df


def analyze_performance(df):
try:
stats = {
'Sample Count': len(df),
'Average Latency': df['latency'].mean(),
'Latency P50': df['latency'].quantile(0.50),
'Latency P90': df['latency'].quantile(0.90),
'Latency P95': df['latency'].quantile(0.95),
'Latency P99': df['latency'].quantile(0.99),
'Latency Std Dev': df['latency'].std(),
'Min Latency': df['latency'].min(),
'Max Latency': df['latency'].max(),
'Average RPS': df['rps'].mean(),
'Total Tokens (avg)': df['total_tokens'].mean(),
'Prompt Tokens (avg)': df['prompt_tokens'].mean(),
'Output Tokens (avg)': df['output_tokens'].mean(),
}
return stats
except Exception as e:
raise Exception(f"Error analyzing performance metrics: {e}")
def plot_latency_cdf(df, output_dir):
latencies_sorted = np.sort(df['latency'].values)
p = np.arange(1, len(latencies_sorted) + 1) / len(latencies_sorted)
ax = plt.figure().gca()
ax.plot(latencies_sorted, p)
ax.set_title('Latency CDF')
ax.set_xlabel('Latency (seconds)')
ax.set_ylabel('Probability')
ax.set_xlim(left=0)
ax.set_ylim(bottom=0, top=1.01)
ax.grid(True)

output_path = f'{output_dir}/latency_cdf.pdf'
plt.savefig(output_path, bbox_inches='tight')
print(f"** Saved: {output_path}")


def plot_latency_trend(df, output_dir):
fig, (ax1, ax2, ax3, ax4, ax5) = plt.subplots(5, 1, figsize=(10, 15))
fig.tight_layout(pad=3.0)

ax1.plot(df['start_time'], df['latency'])
ax1.set_title('Latency over Time')
ax1.set_xlabel('Time (seconds)')
ax1.set_ylabel('Latency (seconds)')
ax1.grid(True)

ax2.plot(df['start_time'], df['rps'])
ax2.set_ylabel('Requests per Second')
ax2.set_xlabel('Time (seconds)')
ax2.grid(True)

ax3.plot(df['start_time'], df['input_length'])
ax3.set_ylabel('Input Length (tokens)')
ax3.set_xlabel('Time (seconds)')
ax3.grid(True)

ax4.plot(df['start_time'], df['output_length'])
ax4.set_ylabel('Output Length (tokens)')
ax4.set_xlabel('Time (seconds)')
ax4.grid(True)

ax5.plot(df['start_time'], df['total_tokens'])
ax5.set_ylabel('Total Tokens')
ax5.set_xlabel('Time (seconds)')
ax5.grid(True)

output_path = f'{output_dir}/time_series.pdf'
fig.savefig(output_path, bbox_inches='tight')
print(f"** Saved: {output_path}")

def plot_correlation(df, output_dir):
fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(9, 3))
fig.tight_layout(pad=3.0)

sns.scatterplot(data=df, x='input_length', y='latency', ax=ax1)
sns.scatterplot(data=df, x='output_length', y='latency', ax=ax2)
sns.scatterplot(data=df, x='total_tokens', y='latency', ax=ax3)

# ax1.set_title('Input Length vs Latency')
ax1.set_xlabel('Input Length (tokens)')
ax1.set_ylabel('Latency (seconds)')

# ax2.set_title('Output Length vs Latency')
ax2.set_xlabel('Output Length (tokens)')
ax2.set_ylabel('Latency (seconds)')

# ax3.set_title('Total Length vs Latency')
ax3.set_xlabel('Total Length (tokens)')
ax3.set_ylabel('Latency (seconds)')

output_path = f'{output_dir}/correlation.pdf'
fig.savefig(output_path, bbox_inches='tight')
print(f"** Saved: {output_path}")


def save_statistics(stats, output_dir):
try:
output_path = f'{output_dir}/performance_stats.txt'
print(f"** Saving statistics to: {output_path}")
with open(output_path, 'w') as f:
f.write("Performance Statistics:\n")
f.write("=====================\n\n")
for metric, value in stats.items():
if 'Count' in metric:
f.write(f"{metric}: {value:,.0f}\n")
elif 'Tokens' in metric:
f.write(f"{metric}: {value:,.1f}\n")
else:
f.write(f"{metric}: {value:.3f}\n")
return output_path
except Exception as e:
raise Exception(f"Error saving statistics: {e}")

def main():
parser = argparse.ArgumentParser(description='Analyze experiment performance metrics from JSONL file', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--input_file', type=str, help='Path to the experiment output file (JSONL format)')
parser.add_argument('--output-dir', type=str, default='./output', help='Directory to save the analysis results')
args = parser.parse_args()
try:
if args.output_dir != './output':
output_dir = f"output/{args.output_dir}"
if not os.path.exists(output_dir):
os.makedirs(output_dir)
print("Generating report...")
print(f"** Output directory: {output_dir}")
print(f"Reading data from {args.input_file}...")
lines = read_experiment_file(args.input_file)
df = parse_experiment_output(lines)
print(f"Successfully parsed {len(df)} valid entries")
print("\nCalculating statistics...")
stats = analyze_performance(df)
stats_file = save_statistics(stats, output_dir)
print(f"Statistics saved to: {stats_file}")

print("\nGenerating plots...")
plot_latency_cdf(df, output_dir)
plot_latency_trend(df, output_dir)
plot_correlation(df, output_dir)

print("\nKey Statistics:")
print(f"Number of samples: {stats['Sample Count']:,.0f}")
print(f"Latency (p50/p90/p99): {stats['Latency P50']:.3f}/{stats['Latency P90']:.3f}/{stats['Latency P99']:.3f} seconds")
print(f"Average RPS: {stats['Average RPS']:.2f}")
print(f"Average total tokens: {stats['Total Tokens (avg)']:.1f}")
except Exception as e:
print(f"Error: {e}", file=sys.stderr)
return 1
return 0
if __name__ == "__main__":
exit(main())
83 changes: 83 additions & 0 deletions benchmarks/generator/run-test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
#!/bin/bash

api_key=""
if [ -z "$api_key" ]; then
echo "API key is not set. Please set the API key in the script"
exit 1
fi

input_workload_path=$1
if [ -z "$input_workload_path" ]; then
echo "Usage: $0 <workload-path>"
exit 1
fi

## split the workload path to get the workload name. split by '/' and '.' get the last element.
workload_name=$(echo $input_workload_path | tr '/' '\n' | grep .jsonl | cut -d '.' -f 1)
workload_dir=$(dirname "$input_workload_path")
output_dir="${workload_dir}/output"
output_file_path="${workload_dir}/output/output-${workload_name}.jsonl"

if [ ! -d ${output_dir} ]; then
echo "output directory does not exist. Create the output directory"
mkdir -p ${output_dir}
fi

# if [ -f ${output_file_path} ]; then
# echo "output file exists. Create a new output file with timestamp"
# output_file_path="${output_file_path}-$(date +%s).jsonl"
# fi

echo "workload_name: $workload_name"
echo "workload_dir: $workload_dir"
echo "input_workload_path: $input_workload_path"
echo "output_dir: $output_dir"
echo "output_file_path: $output_file_path"




# exp 1: st1, et1
# exp 2: st2, et2

# prom

## run all the experiments

## apply config

## log the start time

## vke
python3 client.py --workload-path ${input_workload_path} \
--endpoint "http://101.126.74.79:8000" \
--model deepseek-coder-7b-instruct \
--api-key ${api_key} \
--output-file-path ${output_file_path}


## log the end time

## send http request to the prometheus server with start/end time parameter and write to a specific path
# """
# curl -G 'http://your-prometheus-server:9090/api/v1/query_range' \
# --data-urlencode 'query=<your_log_query>' \
# --data-urlencode 'start=2024-01-16T00:00:00Z' \
# --data-urlencode 'end=2024-01-17T00:00:00Z' \
# --data-urlencode 'step=1h' > /path/to/your/log.json
# """

## plot


## local setup
# python3 client.py --workload-path ${input_workload_path} \
# --endpoint "http://localhost:8000" \
# --model llama2-7b \
# --api-key sk-kFJ12nKsFVfVmGpj3QzX65s4RbN2xJqWzPYCjYu7wT3BlbLi \
# --output-file-path mock-output.jsonl

echo "----------------------------------------------"
python3 plot-output.py \
--input_file ${output_file_path} \
--output-dir "workload/${workload_name}/output"
Loading

0 comments on commit e9c996b

Please sign in to comment.