Add debug conditions

nikita-savelyevv · Jan 23, 2025 · f65b076 · f65b076
1 parent d86cf54
commit f65b076
Show file tree

Hide file tree

Showing 3 changed files with 25 additions and 13 deletions.
diff --git a/nncf/openvino/optimized_functions/functions.py b/nncf/openvino/optimized_functions/functions.py
@@ -66,7 +66,13 @@ def do_int_quantization(
             {"compressed_weight": compressed_weight_dtype, "zero_point": compressed_weight_dtype}
         )
 
-    model = get_compress_weight_model(
+    import os
+    recompile = bool(int(os.environ.get("RECOMPILE", "0")))
+    ov_model_params.dynamic_shapes = bool(int(os.environ.get("DYNAMIC_COMPRESSION", "0")))
+    ov_model_params.release_memory = bool(int(os.environ.get("RELEASE_MEMORY", "0")))
+    ov_model_params.share_outputs = bool(int(os.environ.get("SHARE_OUTPUTS", "0")))
+
+    get_model_fn = lambda: get_compress_weight_model(
         ov_model_params,
         config,
         weight_shape,
@@ -75,6 +81,12 @@ def do_int_quantization(
         reduction_axes,
     )
 
+    if recompile:
+        with disable_results_caching(OV_MODEL_CACHE):
+            model = get_model_fn()
+    else:
+        model = get_model_fn()
+
     if precomputed_scale is None:
         # weight -> compressed_weight, scale, (zero_point)
         results = model([weight])

diff --git a/nncf/quantization/algorithms/weight_compression/openvino_backend.py b/nncf/quantization/algorithms/weight_compression/openvino_backend.py
@@ -8,6 +8,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import os
 from typing import Dict, Iterable, List, Optional, Tuple
 
 import openvino as ov
@@ -239,14 +240,13 @@ def _create_compression_subgraph(
             raise nncf.ParameterNotSupportedError(f"{compression_config.mode.value} is not supported.")
 
         original_shape = weight.shape
-        with disable_results_caching(OV_MODEL_CACHE):
-            compressed_weight = compress_weight(
-                weight,
-                reduction_axes,
-                compression_config,
-                layer_scales,
-                layer_zero_points,
-            )
+        compressed_weight = compress_weight(
+            weight,
+            reduction_axes,
+            compression_config,
+            layer_scales,
+            layer_zero_points,
+        )
 
         compressed_const = create_ov_const_from_tensor(
             compressed_weight.tensor, compression_dtype, name=const_node_name
@@ -293,7 +293,7 @@ def transform_model(
             const_node = self.name_to_node_mapping[const_node_name]
             const_node_output = const_node.output(0)
             const_dtype = const_node_output.get_element_type()
-            weight = get_const_value(const_node, cast_bf16_to_fp32=False)
+            weight = get_const_value(const_node, cast_bf16_to_fp32=bool(int(os.environ.get("NUMPY_COMPRESSION", "0"))))
             # Creation of ov.Tensor is required for two reasons:
             #   1. To be able to process BF16 weight properly
             #   2. To indicate that it is allowed for the compressed constant to be returned as int4/uint4 if needed

diff --git a/nncf/quantization/algorithms/weight_compression/weight_lowering.py b/nncf/quantization/algorithms/weight_compression/weight_lowering.py
@@ -8,7 +8,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
+import os
 from dataclasses import dataclass
 from typing import Optional, Tuple, Union
 
@@ -458,7 +458,7 @@ def do_int_quantization(
         weight, reduction_axes = reshape_weight_for_grouped_quantization(weight, reduction_axes, config.group_size)
 
     # Optimized implementation
-    if is_openvino_available() and weight.backend in [TensorBackend.ov, TensorBackend.numpy]:
+    if is_openvino_available() and weight.backend in [TensorBackend.ov, TensorBackend.numpy] and not bool(int(os.environ.get("NUMPY_COMPRESSION", "0"))):
         from nncf.openvino.optimized_functions import do_int_quantization as do_int_quantization_ov
 
         return do_int_quantization_ov(
@@ -512,7 +512,7 @@ def quantize_dequantize_weight(
         (and zero point).
     """
     # Optimized implementation
-    if is_openvino_available() and weight.backend in [TensorBackend.ov, TensorBackend.numpy]:
+    if is_openvino_available() and weight.backend in [TensorBackend.ov, TensorBackend.numpy] and not bool(int(os.environ.get("NUMPY_COMPRESSION", "0"))):
         from nncf.openvino.optimized_functions import quantize_dequantize_weight as quantize_dequantize_weight_ov
 
         return quantize_dequantize_weight_ov(