From 07f7e74b36b730e93c60a8fb241f24e9ddd2c3df Mon Sep 17 00:00:00 2001 From: Tyler Osterberg Date: Mon, 17 Jun 2024 16:28:21 -0700 Subject: [PATCH] [aot] Fix aot quantization for weight only quantization --- serving/docker/partition/partition.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/serving/docker/partition/partition.py b/serving/docker/partition/partition.py index d7ff0d966..4d36b6f53 100644 --- a/serving/docker/partition/partition.py +++ b/serving/docker/partition/partition.py @@ -35,6 +35,8 @@ ALLOW_PATTERNS = ["*.json", "*.pt", "*.bin", "*.txt"] +WEIGHT_ONLY_QUANTIZATION_TYPES = ["static_int8"] + class PartitionService(object): @@ -326,7 +328,9 @@ def main(): extract_python_jar(PYTHON_CACHE_DIR) service = PartitionService(properties_manager) - if properties_manager.properties.get('option.quantize'): + if properties_manager.properties.get( + 'option.quantize') and properties_manager.properties.get( + 'option.quantize') not in WEIGHT_ONLY_QUANTIZATION_TYPES: service.run_quantization() else: service.run_partition()