Fix loading (ModelCloud#39)

* Fix model loader code is using bad default of float16 * Try to fix from_quantized error * set dtype as fp16 in inference mode
DeJoker · Jun 17, 2024 · 29560e5 · 29560e5
1 parent b15688d
commit 29560e5
Show file tree

Hide file tree

Showing 2 changed files with 7 additions and 3 deletions.
diff --git a/auto_gptq/modeling/_base.py b/auto_gptq/modeling/_base.py
@@ -706,14 +706,14 @@ def from_quantized(
             disable_exllama = True
 
         # == step1: prepare configs and file names == #
-        config = AutoConfig.from_pretrained(
+        config: PretrainedConfig = AutoConfig.from_pretrained(
             model_name_or_path,
             trust_remote_code=trust_remote_code,
             **cached_file_kwargs,
         )
 
         if torch_dtype == "auto":
-            torch_dtype = auto_dtype_from_config(config)
+            torch_dtype = auto_dtype_from_config(config, quant_inference=True)
         elif not isinstance(torch_dtype, torch.dtype):
             raise ValueError(f"torch_dtype value of `{torch_dtype}` is not a torch.dtype instance.")
 

diff --git a/auto_gptq/modeling/_utils.py b/auto_gptq/modeling/_utils.py
@@ -528,7 +528,11 @@ def get_checkpoints(
 
 
 # return the most stable tensor dtype for quantization while minimizing vram
-def auto_dtype_from_config(config: PretrainedConfig) -> torch.dtype:
+def auto_dtype_from_config(config: PretrainedConfig, quant_inference: bool = False) -> torch.dtype:
+    # all the gptq inference kernels are float16 only
+    if quant_inference:
+        return torch.float16
+
     dtype = getattr(config, "torch_dtype")
     if not dtype or not isinstance(dtype, torch.dtype):
         raise ValueError("Your model config.json does not have torch_dtype set. Please check for model " "corruption.")