From bbc24c77ea9a37106dc1ed70ae2178b5ef82a9da Mon Sep 17 00:00:00 2001 From: Alexei-V-Ivanov-AMD <156011006+Alexei-V-Ivanov-AMD@users.noreply.github.com> Date: Thu, 4 Apr 2024 06:09:26 -0500 Subject: [PATCH] Update measure_ppl2_MC.py Adding functionality to ingest scaling factors upon merge of the PR #3290 --- benchmarks/measure_ppl2_MC.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/benchmarks/measure_ppl2_MC.py b/benchmarks/measure_ppl2_MC.py index 62ff37b5e2ae1..e3ea3e1a945bc 100755 --- a/benchmarks/measure_ppl2_MC.py +++ b/benchmarks/measure_ppl2_MC.py @@ -72,6 +72,8 @@ def vllm_init(args): kv_cache_dtype=args.kv_cache_dtype, #scales_path=args.kv_cache_scales_path # if args.kv_cache_scales_path!='' else None, + quantization-param-path=args.kv_cache_scales_path + if args.kv_cache_scales_path!='' else None, enforce_eager=args.enforce_eager) sampling_params = SamplingParams(n=1,