From af6c6778687d5dee98ca9963f6fbc544dfb53e00 Mon Sep 17 00:00:00 2001 From: Mingyu Kim Date: Fri, 9 Feb 2024 17:25:02 +0900 Subject: [PATCH] [GPU] Exclude gemm from async compilation if the primimtive's dynamic impl is optimized kernel. (#22721) ### Details: - Do not make spurious task. It caused memory leak from task_key and promise objects. - keys are generated before queueing and it was not removed when cache entry is not added into lru impl cache. - This mostly fix memory leak and reduces permanent cache size. - Remaining issue for memory leak: futures are just added without freeing. ### Tickets: - 131417 --- .../src/graph/compilation_context.cpp | 2 +- .../intel_gpu/src/graph/primitive_inst.cpp | 18 ++++++++++-------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/compilation_context.cpp b/src/plugins/intel_gpu/src/graph/compilation_context.cpp index f811b84ec73c4f..f115cee970c777 100644 --- a/src/plugins/intel_gpu/src/graph/compilation_context.cpp +++ b/src/plugins/intel_gpu/src/graph/compilation_context.cpp @@ -28,8 +28,8 @@ class CompilationContext : public ICompilationContext { futures.emplace_back(promise->get_future()); if (_task_keys.find(key) == _task_keys.end()) { - _task_keys.insert(key); if (_task_executor != nullptr) { + _task_keys.insert(key); _task_executor->run([task, promise] { task(); promise->set_value(); diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index 21e1fdef6816b9..c752396de67a67 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -713,7 +713,15 @@ bool primitive_inst::use_async_compilation() { } } - return (_node->is_type() || compile_fc_impls || _node->is_type() || + bool compile_gemm_impls = _node->is_type(); + if (compile_gemm_impls) { + // Do not async-compile if opt_gemm is chosen for iGPU + // Do async-compile if it is to be executed from onednn + compile_gemm_impls = _node->get_selected_impl() && _node->get_selected_impl()->get_kernel_name().find("gemm_ref") != std::string::npos; + compile_gemm_impls |= (_node->get_preferred_impl_type() == impl_types::onednn); + } + + return (_node->is_type() || compile_fc_impls || compile_gemm_impls || (_node->is_type() && _node->get_selected_impl() && _node->get_selected_impl()->get_kernel_name().find("softmax_gpu_ref") != std::string::npos)); } @@ -830,13 +838,7 @@ bool primitive_inst::update_impl() { if (!can_be_optimized()) { auto impl = _node->type()->choose_impl(*_node, updated_params_no_dyn_pad); - // In the case of gemm, if current dynamic impl is not gemm_ref and newly chosen impl is gemm_ref, - // the newly chosen impl is not added to the impl cache for beffer performance. - if (_node->is_type() && - (_node->get_selected_impl() && _node->get_selected_impl()->get_kernel_name().find("gemm_ref") == std::string::npos) && - impl->get_kernel_name().find("gemm_ref") != std::string::npos) { - return; - } + if (impl->get_kernels_source().size() > 0) { auto kernels = _program->get_kernels_cache().compile(updated_params_no_dyn_pad, impl->get_kernels_source()); impl->set_kernels(kernels);