Skip to content

Commit

Permalink
Add debug conditions
Browse files Browse the repository at this point in the history
  • Loading branch information
nikita-savelyevv committed Jan 23, 2025
1 parent d86cf54 commit f65b076
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 13 deletions.
14 changes: 13 additions & 1 deletion nncf/openvino/optimized_functions/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,13 @@ def do_int_quantization(
{"compressed_weight": compressed_weight_dtype, "zero_point": compressed_weight_dtype}
)

model = get_compress_weight_model(
import os
recompile = bool(int(os.environ.get("RECOMPILE", "0")))
ov_model_params.dynamic_shapes = bool(int(os.environ.get("DYNAMIC_COMPRESSION", "0")))
ov_model_params.release_memory = bool(int(os.environ.get("RELEASE_MEMORY", "0")))
ov_model_params.share_outputs = bool(int(os.environ.get("SHARE_OUTPUTS", "0")))

get_model_fn = lambda: get_compress_weight_model(
ov_model_params,
config,
weight_shape,
Expand All @@ -75,6 +81,12 @@ def do_int_quantization(
reduction_axes,
)

if recompile:
with disable_results_caching(OV_MODEL_CACHE):
model = get_model_fn()
else:
model = get_model_fn()

if precomputed_scale is None:
# weight -> compressed_weight, scale, (zero_point)
results = model([weight])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from typing import Dict, Iterable, List, Optional, Tuple

import openvino as ov
Expand Down Expand Up @@ -239,14 +240,13 @@ def _create_compression_subgraph(
raise nncf.ParameterNotSupportedError(f"{compression_config.mode.value} is not supported.")

original_shape = weight.shape
with disable_results_caching(OV_MODEL_CACHE):
compressed_weight = compress_weight(
weight,
reduction_axes,
compression_config,
layer_scales,
layer_zero_points,
)
compressed_weight = compress_weight(
weight,
reduction_axes,
compression_config,
layer_scales,
layer_zero_points,
)

compressed_const = create_ov_const_from_tensor(
compressed_weight.tensor, compression_dtype, name=const_node_name
Expand Down Expand Up @@ -293,7 +293,7 @@ def transform_model(
const_node = self.name_to_node_mapping[const_node_name]
const_node_output = const_node.output(0)
const_dtype = const_node_output.get_element_type()
weight = get_const_value(const_node, cast_bf16_to_fp32=False)
weight = get_const_value(const_node, cast_bf16_to_fp32=bool(int(os.environ.get("NUMPY_COMPRESSION", "0"))))
# Creation of ov.Tensor is required for two reasons:
# 1. To be able to process BF16 weight properly
# 2. To indicate that it is allowed for the compressed constant to be returned as int4/uint4 if needed
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
from dataclasses import dataclass
from typing import Optional, Tuple, Union

Expand Down Expand Up @@ -458,7 +458,7 @@ def do_int_quantization(
weight, reduction_axes = reshape_weight_for_grouped_quantization(weight, reduction_axes, config.group_size)

# Optimized implementation
if is_openvino_available() and weight.backend in [TensorBackend.ov, TensorBackend.numpy]:
if is_openvino_available() and weight.backend in [TensorBackend.ov, TensorBackend.numpy] and not bool(int(os.environ.get("NUMPY_COMPRESSION", "0"))):
from nncf.openvino.optimized_functions import do_int_quantization as do_int_quantization_ov

return do_int_quantization_ov(
Expand Down Expand Up @@ -512,7 +512,7 @@ def quantize_dequantize_weight(
(and zero point).
"""
# Optimized implementation
if is_openvino_available() and weight.backend in [TensorBackend.ov, TensorBackend.numpy]:
if is_openvino_available() and weight.backend in [TensorBackend.ov, TensorBackend.numpy] and not bool(int(os.environ.get("NUMPY_COMPRESSION", "0"))):
from nncf.openvino.optimized_functions import quantize_dequantize_weight as quantize_dequantize_weight_ov

return quantize_dequantize_weight_ov(
Expand Down

0 comments on commit f65b076

Please sign in to comment.