openvinotoolkit · vladimir-paramuzov · Feb 24, 2025 · Feb 24, 2025
@@ -153,8 +153,8 @@ struct weightless_cache_manager {
 
     size_t bin_offset = SIZE_MAX;
     size_t original_size = SIZE_MAX;
-    ov::element::Type original_dtype = ov::element::Type_t::undefined;
-    ov::element::Type curr_dtype = ov::element::Type_t::undefined;
+    ov::element::Type original_dtype = ov::element::Type_t::dynamic;
+    ov::element::Type curr_dtype = ov::element::Type_t::dynamic;
     ov::Shape shape{};
 
     bool should_run_reorder() const {

@@ -8,7 +8,7 @@ OV_CONFIG_RELEASE_OPTION(ov::device, id, "0", "ID of the current device")
 OV_CONFIG_RELEASE_OPTION(ov, cache_dir, "", "Directory where model cache can be stored. Caching is disabled if empty")
 OV_CONFIG_RELEASE_OPTION(ov, num_streams, 1, "Number of streams to be used for inference")
 OV_CONFIG_RELEASE_OPTION(ov, compilation_num_threads, std::max(1, static_cast<int>(std::thread::hardware_concurrency())), "Max number of CPU threads used for model compilation for the stages that supports parallelism")
-OV_CONFIG_RELEASE_OPTION(ov::hint, inference_precision, ov::element::f16, "Model floating-point inference precision. Supported values: { f16, f32, undefined }", [](ov::element::Type t) { return t == ov::element::f16 || t == ov::element::f32 || t == ov::element::undefined; })
+OV_CONFIG_RELEASE_OPTION(ov::hint, inference_precision, ov::element::f16, "Model floating-point inference precision. Supported values: { f16, f32, dynamic }", [](ov::element::Type t) { return t == ov::element::f16 || t == ov::element::f32 || t == ov::element::dynamic; })
 OV_CONFIG_RELEASE_OPTION(ov::hint, model_priority, ov::hint::Priority::MEDIUM, "High-level hint that defines the priority of the model. It may impact number of threads used for model compilton and inference as well as device queue settings")
 OV_CONFIG_RELEASE_OPTION(ov::hint, performance_mode, ov::hint::PerformanceMode::LATENCY, "High-level hint that defines target model inference mode. It may impact number of streams, auto batching, etc")
 OV_CONFIG_RELEASE_OPTION(ov::hint, execution_mode, ov::hint::ExecutionMode::PERFORMANCE, "High-level hint that defines the most important metric for the model. Performance mode allows unsafe optimizations that may reduce the model accuracy")
@@ -26,7 +26,7 @@ OV_CONFIG_RELEASE_OPTION(ov::internal, query_model_ratio, 1.0f, "")
 OV_CONFIG_RELEASE_OPTION(ov, cache_mode, ov::CacheMode::OPTIMIZE_SPEED, "Cache mode defines the trade-off between the model compilation time and the disk space required for the cache")
 OV_CONFIG_RELEASE_OPTION(ov, cache_encryption_callbacks, ov::EncryptionCallbacks{}, "Callbacks used to encrypt/decrypt the model")
 OV_CONFIG_RELEASE_OPTION(ov::hint, dynamic_quantization_group_size, 0, "")
-OV_CONFIG_RELEASE_OPTION(ov::hint, kv_cache_precision, ov::element::undefined, "")
+OV_CONFIG_RELEASE_OPTION(ov::hint, kv_cache_precision, ov::element::dynamic, "")
 OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, enable_kernels_reuse, false, "")
 OV_CONFIG_RELEASE_OPTION(ov, weights_path, "", "Path to the model weights file used for weightless caching")
 OV_CONFIG_RELEASE_OPTION(ov::hint, activations_scale_factor, -1.0f, "Scalar floating point value that is used for runtime activation tensor scaling with fp16 inference precision")

@@ -154,13 +154,13 @@ program::program(engine& engine_ref,
       _is_body_program(is_body_program),
       _compilation_context(compilation_context) {
     init_primitives();
+    _config.finalize(_engine);
     GPU_DEBUG_INFO << "Program config\n" << _config.to_string();
     init_program();
     prepare_nodes(topology);
     program_node::reset_unique_id();
     if (no_optimizations) {
         init_graph();
-        _config.finalize(_engine);
     } else {
         build_program(is_internal);
         if (_is_body_program) {

@@ -203,7 +203,7 @@ TEST_P(CheckWeightlessCacheAccuracyLowPrecision, MatmulWeightsDecompression) {
                                                                             ov::element::f32,
                                                                             model_dtype,
                                                                             ov::element::f32,
-                                                                            ov::element::undefined,
+                                                                            ov::element::dynamic,
                                                                             true,
                                                                             ov::test::DecompressionType::full,
                                                                             ov::test::DecompressionType::full,