[GPU] Exclude gemm from async compilation if the primimtive's dynamic…

… impl is optimized kernel. (openvinotoolkit#22721) ### Details: - Do not make spurious task. It caused memory leak from task_key and promise objects. - keys are generated before queueing and it was not removed when cache entry is not added into lru impl cache. - This mostly fix memory leak and reduces permanent cache size. - Remaining issue for memory leak: futures are just added without freeing. ### Tickets: - 131417
akuporos · Feb 9, 2024 · af6c677 · af6c677
1 parent edc346f
commit af6c677
Show file tree

Hide file tree

Showing 2 changed files with 11 additions and 9 deletions.
diff --git a/src/plugins/intel_gpu/src/graph/compilation_context.cpp b/src/plugins/intel_gpu/src/graph/compilation_context.cpp
@@ -28,8 +28,8 @@ class CompilationContext : public ICompilationContext {
         futures.emplace_back(promise->get_future());
 
         if (_task_keys.find(key) == _task_keys.end()) {
-            _task_keys.insert(key);
             if (_task_executor != nullptr) {
+                _task_keys.insert(key);
                 _task_executor->run([task, promise] {
                     task();
                     promise->set_value();

diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
@@ -713,7 +713,15 @@ bool primitive_inst::use_async_compilation() {
         }
     }
 
-    return (_node->is_type<convolution>() || compile_fc_impls || _node->is_type<gemm>() ||
+    bool compile_gemm_impls = _node->is_type<gemm>();
+    if (compile_gemm_impls) {
+        // Do not async-compile if opt_gemm is chosen for iGPU
+        // Do async-compile if it is to be executed from onednn
+        compile_gemm_impls = _node->get_selected_impl() && _node->get_selected_impl()->get_kernel_name().find("gemm_ref") != std::string::npos;
+        compile_gemm_impls |= (_node->get_preferred_impl_type() == impl_types::onednn);
+    }
+
+    return (_node->is_type<convolution>() || compile_fc_impls || compile_gemm_impls ||
             (_node->is_type<softmax>() && _node->get_selected_impl() &&
              _node->get_selected_impl()->get_kernel_name().find("softmax_gpu_ref") != std::string::npos));
 }
@@ -830,13 +838,7 @@ bool primitive_inst::update_impl() {
 
                         if (!can_be_optimized()) {
                             auto impl = _node->type()->choose_impl(*_node, updated_params_no_dyn_pad);
-                            // In the case of gemm, if current dynamic impl is not gemm_ref and newly chosen impl is gemm_ref,
-                            // the newly chosen impl is not added to the impl cache for beffer performance.
-                            if (_node->is_type<gemm>() &&
-                                    (_node->get_selected_impl() && _node->get_selected_impl()->get_kernel_name().find("gemm_ref") == std::string::npos) &&
-                                    impl->get_kernel_name().find("gemm_ref") != std::string::npos) {
-                                return;
-                            }
+
                             if (impl->get_kernels_source().size() > 0) {
                                 auto kernels = _program->get_kernels_cache().compile(updated_params_no_dyn_pad, impl->get_kernels_source());
                                 impl->set_kernels(kernels);