diff --git a/serving/docker/partition/partition.py b/serving/docker/partition/partition.py index d7ff0d966..4d36b6f53 100644 --- a/serving/docker/partition/partition.py +++ b/serving/docker/partition/partition.py @@ -35,6 +35,8 @@ ALLOW_PATTERNS = ["*.json", "*.pt", "*.bin", "*.txt"] +WEIGHT_ONLY_QUANTIZATION_TYPES = ["static_int8"] + class PartitionService(object): @@ -326,7 +328,9 @@ def main(): extract_python_jar(PYTHON_CACHE_DIR) service = PartitionService(properties_manager) - if properties_manager.properties.get('option.quantize'): + if properties_manager.properties.get( + 'option.quantize') and properties_manager.properties.get( + 'option.quantize') not in WEIGHT_ONLY_QUANTIZATION_TYPES: service.run_quantization() else: service.run_partition()