Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[GPU] Dynamic element type instead of undefined. Earlier config finalize call #29127

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/plugins/intel_gpu/include/intel_gpu/primitives/data.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -153,8 +153,8 @@ struct weightless_cache_manager {

size_t bin_offset = SIZE_MAX;
size_t original_size = SIZE_MAX;
ov::element::Type original_dtype = ov::element::Type_t::undefined;
ov::element::Type curr_dtype = ov::element::Type_t::undefined;
ov::element::Type original_dtype = ov::element::Type_t::dynamic;
ov::element::Type curr_dtype = ov::element::Type_t::dynamic;
ov::Shape shape{};

bool should_run_reorder() const {
Expand Down
4 changes: 2 additions & 2 deletions src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ OV_CONFIG_RELEASE_OPTION(ov::device, id, "0", "ID of the current device")
OV_CONFIG_RELEASE_OPTION(ov, cache_dir, "", "Directory where model cache can be stored. Caching is disabled if empty")
OV_CONFIG_RELEASE_OPTION(ov, num_streams, 1, "Number of streams to be used for inference")
OV_CONFIG_RELEASE_OPTION(ov, compilation_num_threads, std::max(1, static_cast<int>(std::thread::hardware_concurrency())), "Max number of CPU threads used for model compilation for the stages that supports parallelism")
OV_CONFIG_RELEASE_OPTION(ov::hint, inference_precision, ov::element::f16, "Model floating-point inference precision. Supported values: { f16, f32, undefined }", [](ov::element::Type t) { return t == ov::element::f16 || t == ov::element::f32 || t == ov::element::undefined; })
OV_CONFIG_RELEASE_OPTION(ov::hint, inference_precision, ov::element::f16, "Model floating-point inference precision. Supported values: { f16, f32, dynamic }", [](ov::element::Type t) { return t == ov::element::f16 || t == ov::element::f32 || t == ov::element::dynamic; })
OV_CONFIG_RELEASE_OPTION(ov::hint, model_priority, ov::hint::Priority::MEDIUM, "High-level hint that defines the priority of the model. It may impact number of threads used for model compilton and inference as well as device queue settings")
OV_CONFIG_RELEASE_OPTION(ov::hint, performance_mode, ov::hint::PerformanceMode::LATENCY, "High-level hint that defines target model inference mode. It may impact number of streams, auto batching, etc")
OV_CONFIG_RELEASE_OPTION(ov::hint, execution_mode, ov::hint::ExecutionMode::PERFORMANCE, "High-level hint that defines the most important metric for the model. Performance mode allows unsafe optimizations that may reduce the model accuracy")
Expand All @@ -26,7 +26,7 @@ OV_CONFIG_RELEASE_OPTION(ov::internal, query_model_ratio, 1.0f, "")
OV_CONFIG_RELEASE_OPTION(ov, cache_mode, ov::CacheMode::OPTIMIZE_SPEED, "Cache mode defines the trade-off between the model compilation time and the disk space required for the cache")
OV_CONFIG_RELEASE_OPTION(ov, cache_encryption_callbacks, ov::EncryptionCallbacks{}, "Callbacks used to encrypt/decrypt the model")
OV_CONFIG_RELEASE_OPTION(ov::hint, dynamic_quantization_group_size, 0, "")
OV_CONFIG_RELEASE_OPTION(ov::hint, kv_cache_precision, ov::element::undefined, "")
OV_CONFIG_RELEASE_OPTION(ov::hint, kv_cache_precision, ov::element::dynamic, "")
OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, enable_kernels_reuse, false, "")
OV_CONFIG_RELEASE_OPTION(ov, weights_path, "", "Path to the model weights file used for weightless caching")
OV_CONFIG_RELEASE_OPTION(ov::hint, activations_scale_factor, -1.0f, "Scalar floating point value that is used for runtime activation tensor scaling with fp16 inference precision")
Expand Down
2 changes: 1 addition & 1 deletion src/plugins/intel_gpu/src/graph/program.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -154,13 +154,13 @@ program::program(engine& engine_ref,
_is_body_program(is_body_program),
_compilation_context(compilation_context) {
init_primitives();
_config.finalize(_engine);
GPU_DEBUG_INFO << "Program config\n" << _config.to_string();
init_program();
prepare_nodes(topology);
program_node::reset_unique_id();
if (no_optimizations) {
init_graph();
_config.finalize(_engine);
} else {
build_program(is_internal);
if (_is_body_program) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ TEST_P(CheckWeightlessCacheAccuracyLowPrecision, MatmulWeightsDecompression) {
ov::element::f32,
model_dtype,
ov::element::f32,
ov::element::undefined,
ov::element::dynamic,
true,
ov::test::DecompressionType::full,
ov::test::DecompressionType::full,
Expand Down
Loading