diff --git a/tools/perf/accumulate_results.py b/tools/perf/accumulate_results.py new file mode 100644 index 0000000000..1f56b41745 --- /dev/null +++ b/tools/perf/accumulate_results.py @@ -0,0 +1,22 @@ +import glob + +import pandas as pd + + +def concat_all_results(outputs_filename="all_outputs.csv", files_regex="*_bs*.csv"): + """Concatenates all files via regex to an output file""" + candidate_files = glob.glob(files_regex) + + if candidate_files: + df = pd.concat( + [pd.read_csv(file_name, index_col=0) for file_name in candidate_files] + ).reset_index(drop=True) + df.to_csv(outputs_filename) + print(f"Saved concatenated outputs to {outputs_filename}") + + else: + print(f"No outputs to generate {outputs_filename}") + + +if __name__ == "__main__": + concat_all_results() diff --git a/tools/perf/benchmark.sh b/tools/perf/benchmark.sh index 524071eed2..4a7cd24487 100644 --- a/tools/perf/benchmark.sh +++ b/tools/perf/benchmark.sh @@ -7,115 +7,137 @@ python hub.py batch_sizes=(1 2 4 8 16 32 64 128 256) large_model_batch_sizes=(1 2 4 8 16 32 64) +backends=("torch" "ts_trt" "dynamo" "torch_compile" "inductor") +backends_no_torchscript=("torch" "dynamo" "torch_compile" "inductor") # Benchmark VGG16 model echo "Benchmarking VGG16 model" for bs in ${batch_sizes[@]} do - python perf_run.py --model ${MODELS_DIR}/vgg16_scripted.jit.pt \ - --model_torch vgg16 \ - --precision fp32,fp16 --inputs="(${bs}, 3, 224, 224)" \ - --batch_size ${bs} \ - --truncate \ - --backends torch,ts_trt,dynamo,torch_compile,inductor \ - --report "vgg16_perf_bs${bs}.txt" + for backend in ${backends[@]} + do + python perf_run.py --model ${MODELS_DIR}/vgg16_scripted.jit.pt \ + --model_torch vgg16 \ + --precision fp16 --inputs="(${bs}, 3, 224, 224)" \ + --batch_size ${bs} \ + --truncate \ + --backends ${backend} \ + --report "vgg16_perf_bs${bs}_backend_${backend}.csv" + done done # Benchmark AlexNet model echo "Benchmarking AlexNet model" for bs in ${batch_sizes[@]} do - python perf_run.py --model ${MODELS_DIR}/alexnet_scripted.jit.pt \ - --model_torch alexnet \ - --precision fp32,fp16 --inputs="(${bs}, 3, 227, 227)" \ - --batch_size ${bs} \ - --truncate \ - --backends torch,ts_trt,dynamo,torch_compile,inductor \ - --report "alexnet_perf_bs${bs}.txt" + for backend in ${backends[@]} + do + python perf_run.py --model ${MODELS_DIR}/alexnet_scripted.jit.pt \ + --model_torch alexnet \ + --precision fp16 --inputs="(${bs}, 3, 227, 227)" \ + --batch_size ${bs} \ + --truncate \ + --backends ${backend} \ + --report "alexnet_perf_bs${bs}_backend_${backend}.csv" + done done # Benchmark Resnet50 model echo "Benchmarking Resnet50 model" for bs in ${batch_sizes[@]} do - python perf_run.py --model ${MODELS_DIR}/resnet50_scripted.jit.pt \ - --model_torch resnet50 \ - --precision fp32,fp16 --inputs="(${bs}, 3, 224, 224)" \ - --batch_size ${bs} \ - --truncate \ - --backends torch,ts_trt,dynamo,torch_compile,inductor \ - --report "resnet50_perf_bs${bs}.txt" + for backend in ${backends[@]} + do + python perf_run.py --model ${MODELS_DIR}/resnet50_scripted.jit.pt \ + --model_torch resnet50 \ + --precision fp16 --inputs="(${bs}, 3, 224, 224)" \ + --batch_size ${bs} \ + --truncate \ + --backends ${backend} \ + --report "resnet50_perf_bs${bs}_backend_${backend}.csv" + done done # Benchmark VIT model echo "Benchmarking VIT model" for bs in ${batch_sizes[@]} do - python perf_run.py --model ${MODELS_DIR}/vit_scripted.jit.pt \ - --model_torch vit \ - --precision fp32,fp16 --inputs="(${bs}, 3, 224, 224)" \ - --batch_size ${bs} \ - --truncate \ - --backends torch,ts_trt,dynamo,torch_compile,inductor \ - --report "vit_perf_bs${bs}.txt" + for backend in ${backends[@]} + do + python perf_run.py --model ${MODELS_DIR}/vit_scripted.jit.pt \ + --model_torch vit \ + --precision fp16 --inputs="(${bs}, 3, 224, 224)" \ + --batch_size ${bs} \ + --truncate \ + --backends ${backend} \ + --report "vit_perf_bs${bs}_backend_${backend}.csv" + done done # Benchmark VIT Large model echo "Benchmarking VIT Large model" for bs in ${large_model_batch_sizes[@]} do - python perf_run.py --model ${MODELS_DIR}/vit_large_scripted.jit.pt \ - --model_torch vit_large \ - --precision fp32,fp16 --inputs="(${bs}, 3, 224, 224)" \ - --truncate \ - --batch_size ${bs} \ - --backends torch,ts_trt,dynamo,torch_compile,inductor \ - --report "vit_large_perf_bs${bs}.txt" + for backend in ${backends[@]} + do + python perf_run.py --model ${MODELS_DIR}/vit_large_scripted.jit.pt \ + --model_torch vit_large \ + --precision fp16 --inputs="(${bs}, 3, 224, 224)" \ + --batch_size ${bs} \ + --truncate \ + --backends ${backend} \ + --report "vit_large_perf_bs${bs}_backend_${backend}.csv" + done done # Benchmark EfficientNet-B0 model echo "Benchmarking EfficientNet-B0 model" for bs in ${batch_sizes[@]} do - python perf_run.py --model ${MODELS_DIR}/efficientnet_b0_scripted.jit.pt \ - --model_torch efficientnet_b0 \ - --precision fp32,fp16 --inputs="(${bs}, 3, 224, 224)" \ - --batch_size ${bs} \ - --truncate \ - --backends torch,ts_trt,dynamo,torch_compile,inductor \ - --report "efficientnet_b0_perf_bs${bs}.txt" + for backend in ${backends[@]} + do + python perf_run.py --model ${MODELS_DIR}/efficientnet_b0_scripted.jit.pt \ + --model_torch efficientnet_b0 \ + --precision fp16 --inputs="(${bs}, 3, 224, 224)" \ + --batch_size ${bs} \ + --truncate \ + --backends ${backend} \ + --report "efficientnet_b0_perf_bs${bs}_backend_${backend}.csv" + done done # Benchmark Stable Diffusion UNet model echo "Benchmarking SD UNet model" for bs in ${large_model_batch_sizes[@]} do - python perf_run.py --model_torch sd_unet \ - --precision fp32,fp16 --inputs="(${bs}, 4, 64, 64)@fp16;(${bs})@fp16;(${bs}, 1, 768)@fp16" \ - --batch_size ${bs} \ - --backends torch,dynamo,torch_compile,inductor \ - --truncate \ - --report "sd_unet_perf_bs${bs}.txt" + for backend in ${backends_no_torchscript[@]} + do + python perf_run.py --model_torch sd_unet \ + --precision fp16 --inputs="(${bs}, 4, 64, 64);(${bs});(${bs}, 1, 768)" \ + --batch_size ${bs} \ + --truncate \ + --backends ${backend} \ + --report "sd_unet_perf_bs${bs}_backend_${backend}.csv" + done done # Benchmark BERT model echo "Benchmarking Huggingface BERT base model" for bs in ${batch_sizes[@]} do - python perf_run.py --model ${MODELS_DIR}/bert_base_uncased_traced.jit.pt \ - --model_torch "bert_base_uncased" \ - --precision fp32 --inputs="(${bs}, 128)@int32;(${bs}, 128)@int32" \ - --batch_size ${bs} \ - --backends torch,ts_trt,dynamo,torch_compile,inductor \ - --truncate \ - --report "bert_base_perf_bs${bs}.txt" + for backend in ${backends[@]} + do + python perf_run.py --model ${MODELS_DIR}/bert_base_uncased_traced.jit.pt \ + --model_torch "bert_base_uncased" \ + --precision fp16 --inputs="(${bs}, 128)@int32;(${bs}, 128)@int32" \ + --batch_size ${bs} \ + --truncate \ + --backends ${backend} \ + --report "bert_base_perf_bs${bs}_backend_${backend}.csv" + done done # Collect and concatenate all results echo "Concatenating all results" -(echo "Output of All Model Runs"; echo) >> all_outputs.txt; - -for i in $(ls *_bs*.txt); - do (echo $i; cat $i; echo; echo) >> all_outputs.txt; -done +python accumulate_results.py diff --git a/tools/perf/perf_run.py b/tools/perf/perf_run.py index 58d03de4d1..5a91831fe9 100644 --- a/tools/perf/perf_run.py +++ b/tools/perf/perf_run.py @@ -578,23 +578,26 @@ def recordStats(backend, timings, precision, batch_size=1, compile_time_s=None): if model_torch is not None: model_torch = model_torch.half() - status = run( - model, - backends, - input_tensors, - params, - precision, - batch_size, - is_trt_engine, - model_torch=model_torch, - ) + with torch.no_grad(): + status = run( + model, + backends, + input_tensors, + params, + precision, + batch_size, + is_trt_engine, + model_torch=model_torch, + ) # Generate report print("Model Summary: ", model_name) summary = pd.DataFrame(results) + summary.insert( + loc=0, + column="model_name", + value=(model_name_torch if model_name_torch is not None else model_name), + ) print(summary) if args.report: - with open(args.report, "w") as file: - file.write("Model Summary: " + model_name + "\n") - file.write(summary.to_string()) - file.close() + summary.to_csv(args.report)