diff --git a/src/core/src/any.cpp b/src/core/src/any.cpp index 54c6b4f9d868f5..d99eb64199797c 100644 --- a/src/core/src/any.cpp +++ b/src/core/src/any.cpp @@ -4,8 +4,12 @@ #include "openvino/core/any.hpp" +#include #include #include +#include + +#include "openvino/util/common_util.hpp" namespace { template bool contains_type_index(Container&& types, const std::type_info& user_type) { @@ -202,9 +206,14 @@ namespace util { void Read::operator()(std::istream& is, bool& value) const { std::string str; is >> str; - if (str == "YES") { + + constexpr std::array off = {"0", "false", "off", "no"}; + constexpr std::array on = {"1", "true", "on", "yes"}; + str = util::to_lower(str); + + if (std::find(on.begin(), on.end(), str) != on.end()) { value = true; - } else if (str == "NO") { + } else if (std::find(off.begin(), off.end(), str) != off.end()) { value = false; } else { OPENVINO_THROW("Could not convert to bool from string " + str); diff --git a/src/inference/CMakeLists.txt b/src/inference/CMakeLists.txt index 22c28c2acde6e6..40528735f80071 100644 --- a/src/inference/CMakeLists.txt +++ b/src/inference/CMakeLists.txt @@ -76,6 +76,10 @@ target_compile_definitions(${TARGET_NAME}_obj PRIVATE IMPLEMENT_OPENVINO_RUNTIME_API $<$:PROXY_PLUGIN_ENABLED>) +if(ENABLE_DEBUG_CAPS) + target_compile_definitions(${TARGET_NAME}_obj PUBLIC ENABLE_DEBUG_CAPS) +endif() + target_include_directories(${TARGET_NAME}_obj SYSTEM PRIVATE $ $<$:$>) @@ -87,7 +91,7 @@ target_include_directories(${TARGET_NAME}_obj PRIVATE # for ov_plugins.hpp $,$>,${CMAKE_CURRENT_BINARY_DIR}/$,${CMAKE_CURRENT_BINARY_DIR}>) -target_link_libraries(${TARGET_NAME}_obj PRIVATE openvino::itt openvino::util openvino::core::dev) +target_link_libraries(${TARGET_NAME}_obj PRIVATE openvino::itt openvino::util openvino::core::dev nlohmann_json::nlohmann_json) ov_mark_target_as_cc(${TARGET_NAME}_obj) # OpenVINO Runtime is public API => need to mark this library as important for ABI free @@ -133,6 +137,10 @@ target_link_libraries(${TARGET_NAME}_s PRIVATE openvino::itt ${CMAKE_DL_LIBS} target_compile_definitions(${TARGET_NAME}_s PUBLIC USE_STATIC_IE) +if(ENABLE_DEBUG_CAPS) + target_compile_definitions(${TARGET_NAME}_s PUBLIC ENABLE_DEBUG_CAPS) +endif() + set_target_properties(${TARGET_NAME}_s PROPERTIES INTERPROCEDURAL_OPTIMIZATION_RELEASE ${ENABLE_LTO}) # LTO diff --git a/src/inference/dev_api/openvino/runtime/internal_properties.hpp b/src/inference/dev_api/openvino/runtime/internal_properties.hpp index 9b2f08c17a7fe0..e26024622580e7 100644 --- a/src/inference/dev_api/openvino/runtime/internal_properties.hpp +++ b/src/inference/dev_api/openvino/runtime/internal_properties.hpp @@ -90,5 +90,11 @@ static constexpr Property compiled_model_runtime_p */ static constexpr Property query_model_ratio{"QUERY_MODEL_RATIO"}; +/** + * @brief Allow execution of low precision transformations in plugin's pipelines + * @ingroup ov_dev_api_plugin_api + */ +static constexpr Property enable_lp_transformations{"LP_TRANSFORMS_MODE"}; + } // namespace internal } // namespace ov diff --git a/src/inference/dev_api/openvino/runtime/plugin_config.hpp b/src/inference/dev_api/openvino/runtime/plugin_config.hpp new file mode 100644 index 00000000000000..109165b1cd01c6 --- /dev/null +++ b/src/inference/dev_api/openvino/runtime/plugin_config.hpp @@ -0,0 +1,416 @@ +// Copyright (C) 2024-2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include + +#include "openvino/core/attribute_visitor.hpp" +#include "openvino/core/except.hpp" +#include "openvino/runtime/iremote_context.hpp" +#include "openvino/runtime/properties.hpp" + +#define OV_CONFIG_DECLARE_OPTION(PropertyNamespace, PropertyVar, Visibility, ...) \ +public: \ + const decltype(PropertyNamespace::PropertyVar)::value_type& get_##PropertyVar() const { \ + if (m_is_finalized) { \ + return m_##PropertyVar.value; \ + } else { \ + if (m_user_properties.find(PropertyNamespace::PropertyVar.name()) != m_user_properties.end()) { \ + return m_user_properties.at(PropertyNamespace::PropertyVar.name()) \ + .as(); \ + } else { \ + return m_##PropertyVar.value; \ + } \ + } \ + } \ + \ +private: \ + ConfigOption m_##PropertyVar{ \ + this, \ + PropertyNamespace::PropertyVar.name(), \ + #PropertyNamespace "::" #PropertyVar, \ + __VA_ARGS__}; + +#define OV_CONFIG_RELEASE_OPTION(PropertyNamespace, PropertyVar, ...) \ + OV_CONFIG_DECLARE_OPTION(PropertyNamespace, PropertyVar, OptionVisibility::RELEASE, __VA_ARGS__) + +#define OV_CONFIG_RELEASE_INTERNAL_OPTION(PropertyNamespace, PropertyVar, ...) \ + OV_CONFIG_DECLARE_OPTION(PropertyNamespace, PropertyVar, OptionVisibility::RELEASE_INTERNAL, __VA_ARGS__) + +#ifdef ENABLE_DEBUG_CAPS +# define OV_CONFIG_DEBUG_GLOBAL_OPTION(PropertyNamespace, PropertyVar, ...) \ + public: \ + static const decltype(PropertyNamespace::PropertyVar)::value_type& get_##PropertyVar() { \ + static PluginConfig::GlobalOptionInitializer init_helper(PropertyNamespace::PropertyVar.name(), \ + m_allowed_env_prefix, \ + m_##PropertyVar); \ + return init_helper.m_option.value; \ + } \ + \ + private: \ + static inline ConfigOption \ + m_##PropertyVar{nullptr, \ + PropertyNamespace::PropertyVar.name(), \ + #PropertyNamespace "::" #PropertyVar, \ + __VA_ARGS__}; \ + OptionRegistrationHelper m_##PropertyVar##_rh{this, PropertyNamespace::PropertyVar.name(), &m_##PropertyVar}; + +# define OV_CONFIG_DEBUG_OPTION(PropertyNamespace, PropertyVar, ...) \ + OV_CONFIG_DECLARE_OPTION(PropertyNamespace, PropertyVar, OptionVisibility::DEBUG, __VA_ARGS__) +#else +# define OV_CONFIG_DEBUG_GLOBAL_OPTION(...) +# define OV_CONFIG_DEBUG_OPTION(...) +#endif + +namespace ov { +enum class OptionVisibility : uint8_t { + RELEASE = 1 << 0, // Option can be set for any build type via public interface, environment and config file + RELEASE_INTERNAL = 1 << 1, // Option can be set for any build type via environment and config file only + DEBUG = 1 << 2, // Option can be set for debug builds only via environment and config file + DEBUG_GLOBAL = 1 << 3, // Global option can be set for debug builds only via environment and config file + ANY = 0xFF, // Any visibility is valid +}; + +inline OptionVisibility operator&(OptionVisibility a, OptionVisibility b) { + using T = std::underlying_type_t; + return static_cast(static_cast(a) & static_cast(b)); +} + +inline OptionVisibility operator|(OptionVisibility a, OptionVisibility b) { + using T = std::underlying_type_t; + return static_cast(static_cast(a) | static_cast(b)); +} + +inline OptionVisibility operator~(OptionVisibility a) { + using T = std::underlying_type_t; + return static_cast(~static_cast(a)); +} + +inline std::ostream& operator<<(std::ostream& os, const OptionVisibility& visibility) { + switch (visibility) { + case OptionVisibility::RELEASE: + os << "RELEASE"; + break; + case OptionVisibility::RELEASE_INTERNAL: + os << "RELEASE_INTERNAL"; + break; + case OptionVisibility::DEBUG: + os << "DEBUG"; + break; + case OptionVisibility::DEBUG_GLOBAL: + os << "DEBUG_GLOBAL"; + break; + case OptionVisibility::ANY: + os << "ANY"; + break; + default: + os << "UNKNOWN"; + break; + } + + return os; +} + +struct ConfigOptionBase { + ConfigOptionBase(std::string_view prop_name, std::string_view desc) : property_name(prop_name), description(desc) {} + virtual ~ConfigOptionBase() = default; + + virtual void set_any(const ov::Any& any) = 0; + virtual ov::Any get_any() const = 0; + virtual bool is_valid_value(const ov::Any& val) const = 0; + virtual OptionVisibility get_visibility() const = 0; + + std::string_view property_name; + std::string_view description; +}; + +// Base class for configuration of plugins +// Implementation should provide a list of properties with default values and validators (optional) +// and prepare a map string property name -> ConfigOptionBase pointer +// For the sake of efficiency, we expect that plugin properties are defined as class members of the derived class +// and accessed directly in the plugin's code (i.e. w/o get_property()/set_property() calls) +// get/set property members are provided to handle external property access +// The class provides a helpers to read the properties from configuration file and from environment variables +// +// Expected order of properties resolution: +// 1. Assign default value for each property per device +// 2. Save user properties passed via Core::set_property() call to user_properties +// 3. Save user properties passed via Core::compile_model() call to user_properties +// 4. Apply RT info properties to user_properties if they were not set by user +// 5. Read and apply properties from the config file as user_properties +// 6. Read and apply properties from the the environment variables as user_properties +// 7. Apply user_properties to actual plugin properties +// 8. Update dependant properties if they were not set by user either way +class OPENVINO_RUNTIME_API PluginConfig { +public: + PluginConfig() {} + virtual ~PluginConfig() = default; + + // Disable copy and move as we need to setup m_options_map properly and ensure that + // values are a part of current config object + PluginConfig(const PluginConfig& other) = delete; + PluginConfig& operator=(const PluginConfig& other) = delete; + PluginConfig(PluginConfig&& other) = delete; + PluginConfig& operator=(PluginConfig&& other) = delete; + + void set_property(const ov::AnyMap& properties); + void set_user_property(const ov::AnyMap& properties, OptionVisibility allowed_visibility = OptionVisibility::ANY); + Any get_property(const std::string& name, OptionVisibility allowed_visibility = OptionVisibility::ANY) const; + + template + util::EnableIfAllStringAny set_property(Properties&&... properties) { + set_property(ov::AnyMap{std::forward(properties)...}); + } + template + util::EnableIfAllStringAny set_user_property(Properties&&... properties) { + set_user_property(ov::AnyMap{std::forward(properties)...}); + } + + std::string to_string() const; + + void finalize(const IRemoteContext* context, const ov::Model* model); + + bool visit_attributes(ov::AttributeVisitor& visitor); + + void register_option(const std::string& name, ConfigOptionBase* ptr) { + m_options_map.emplace(name, ptr); + } + +protected: + template + class GlobalOptionInitializer { + public: + GlobalOptionInitializer(const std::string& name, std::string_view prefix, OptionType& option) + : m_option(option) { + auto val = PluginConfig::read_env(name, prefix, &option); + if (!val.empty()) { + std::cout << "Non default global config value for " << name << " = " << val.template as() + << std::endl; + option.set_any(val); + } + } + + OptionType& m_option; + }; + + virtual void apply_model_specific_options(const IRemoteContext* context, const ov::Model& model) {} + void apply_env_options(); + void apply_config_options(std::string_view device_name, std::filesystem::path config_path = ""); + virtual void finalize_impl(const IRemoteContext* context) {} + + template + bool is_set_by_user(const ov::Property& property) const { + return m_user_properties.find(property.name()) != m_user_properties.end(); + } + + ConfigOptionBase* get_option_ptr(const std::string& name) const { + auto it = m_options_map.find(name); + OPENVINO_ASSERT(it != m_options_map.end(), "Option not found: ", name); + OPENVINO_ASSERT(it->second != nullptr, "Option is invalid: ", name); + + return it->second; + } + + template + void apply_rt_info_property(const ov::Property& property, const ov::RTMap& rt_info) { + if (!is_set_by_user(property)) { + auto rt_info_val = rt_info.find(property.name()); + if (rt_info_val != rt_info.end()) { + set_user_property({property(rt_info_val->second.template as())}, OptionVisibility::RELEASE); + } + } + } + + ov::AnyMap read_config_file(std::filesystem::path filename, std::string_view target_device_name) const; + ov::AnyMap read_env() const; + static ov::Any read_env(const std::string& option_name, std::string_view prefix, const ConfigOptionBase* option); + void cleanup_unsupported(ov::AnyMap& config) const; + + std::map m_options_map; + using OptionMapEntry = decltype(m_options_map)::value_type; + + // List of properties explicitly set by user via Core::set_property() or Core::compile_model() or ov::Model's + // runtime info + ov::AnyMap m_user_properties; + + std::string_view get_help_message(const std::string& name = "") const; + void print_help() const; + + bool m_is_finalized = false; + + inline static const std::string_view m_allowed_env_prefix = "OV_"; +}; + +class OptionRegistrationHelper { +public: + OptionRegistrationHelper(PluginConfig* config, std::string_view name, ConfigOptionBase* option) { + if (config) + config->register_option(std::string{name}, option); + } +}; + +template +struct TypedOption : public ConfigOptionBase { + TypedOption(const T& default_val, std::string_view prop_name, std::string_view desc) + : ConfigOptionBase(prop_name, desc), + value(default_val) {} + T value; +}; + +template +struct ConfigOption : public TypedOption { + ConfigOption(PluginConfig* config, + std::string_view name, + std::string_view prop_name, + const T& default_val, + std::string_view desc, + std::function validator = nullptr) + : TypedOption(default_val, prop_name, desc), + validator(validator) { + OptionRegistrationHelper option(config, name, this); + } + constexpr static const auto visibility = visibility_; + + void set_any(const ov::Any& any) override { + if (validator) + OPENVINO_ASSERT(validator(any.as()), "Invalid value: ", any.as()); + this->value = any.as(); + } + + ov::Any get_any() const override { + return ov::Any(this->value); + } + + bool is_valid_value(const ov::Any& val) const override { + try { + auto v = val.as(); + return validator ? validator(v) : true; + } catch (std::exception&) { + return false; + } + } + + OptionVisibility get_visibility() const override { + return visibility; + } + + operator T() const { + return this->value; + } + + ConfigOption& operator=(const T& val) { + this->value = val; + return *this; + } + + template >> + bool operator==(const U& val) const { + return this->value == static_cast(val); + } + + template >> + bool operator!=(const U& val) const { + return !(*this == val); + } + +private: + std::function validator; +}; + +template <> +class OPENVINO_RUNTIME_API AttributeAdapter : public DirectValueAccessor { +public: + AttributeAdapter(ConfigOptionBase*& value) : DirectValueAccessor(value) {} + + OPENVINO_RTTI("AttributeAdapter"); +}; + +template <> +class OPENVINO_RUNTIME_API AttributeAdapter : public DirectValueAccessor { +public: + AttributeAdapter(ov::AnyMap& value) : DirectValueAccessor(value) {} + + OPENVINO_RTTI("AttributeAdapter"); +}; + +template +class OstreamAttributeVisitor : public ov::AttributeVisitor { + OStreamType& os; + +public: + OstreamAttributeVisitor(OStreamType& os) : os(os) {} + + void on_adapter(const std::string& name, ov::ValueAccessor& adapter) override { + os << adapter.get(); + } + + void on_adapter(const std::string& name, ov::ValueAccessor& adapter) override { + if (auto a = ov::as_type>(&adapter)) { + return handle_option(a->get()); + } else if (auto a = ov::as_type>(&adapter)) { + const auto& props = a->get(); + os << props.size(); + for (auto& kv : props) { + os << kv.first << kv.second.as(); + } + } else { + OPENVINO_THROW("Attribute ", name, " can't be processed\n"); + } + } + + void handle_option(ConfigOptionBase* option) { + if (option->get_visibility() == OptionVisibility::RELEASE || + option->get_visibility() == OptionVisibility::RELEASE_INTERNAL) + os << option->get_any().as(); + } +}; + +template +class IstreamAttributeVisitor : public ov::AttributeVisitor { + IStreamType& is; + +public: + IstreamAttributeVisitor(IStreamType& is) : is(is) {} + + void on_adapter(const std::string& name, ov::ValueAccessor& adapter) override { + if (auto a = ov::as_type>(&adapter)) { + return handle_option(a->get()); + } else if (auto a = ov::as_type>(&adapter)) { + size_t size; + is >> size; + ov::AnyMap props; + for (size_t i = 0; i < size; i++) { + std::string name, val; + is >> name; + is >> val; + props[name] = val; + } + a->set(props); + } else { + OPENVINO_THROW("Attribute ", name, " can't be processed\n"); + } + } + + void on_adapter(const std::string& name, ov::ValueAccessor& adapter) override { + bool val; + is >> val; + adapter.set(val); + } + + void handle_option(ConfigOptionBase* option) { + if (option->get_visibility() == OptionVisibility::RELEASE || + option->get_visibility() == OptionVisibility::RELEASE_INTERNAL) { + std::string s; + is >> s; + if (option->is_valid_value(s)) + option->set_any(s); + } + } +}; + +} // namespace ov diff --git a/src/inference/src/dev/plugin_config.cpp b/src/inference/src/dev/plugin_config.cpp new file mode 100644 index 00000000000000..d4268da8d827ec --- /dev/null +++ b/src/inference/src/dev/plugin_config.cpp @@ -0,0 +1,319 @@ +// Copyright (C) 2024-2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/runtime/plugin_config.hpp" + +#include +#include +#include +#include +#include + +#include "openvino/core/any.hpp" +#include "openvino/core/except.hpp" +#include "openvino/runtime/device_id_parser.hpp" +#include "openvino/util/common_util.hpp" +#include "openvino/util/env_util.hpp" + +#ifdef JSON_HEADER +# include +#else +# include +#endif + +#ifdef _WIN32 +# ifndef NOMINMAX +# define NOMINMAX +# endif +# include +#else +# include +# include +#endif + +namespace { +size_t get_terminal_width() { + const size_t default_width = 120; +#ifdef _WIN32 + CONSOLE_SCREEN_BUFFER_INFO csbi; + if (GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &csbi)) { + return csbi.srWindow.Right - csbi.srWindow.Left + 1; + } else { + return default_width; + } +#elif defined(__linux__) + struct winsize w; + if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &w) == 0) { + return w.ws_col; + } else { + return default_width; + } +#else + return default_width; +#endif +} +} // namespace + +namespace ov { + +ov::Any PluginConfig::get_property(const std::string& name, OptionVisibility allowed_visibility) const { + if (m_user_properties.find(name) != m_user_properties.end()) { + return m_user_properties.at(name); + } + + auto option = get_option_ptr(name); + OPENVINO_ASSERT((allowed_visibility & option->get_visibility()) == option->get_visibility(), + "Couldn't get unknown property: ", + name); + + return option->get_any(); +} + +void PluginConfig::set_property(const ov::AnyMap& config) { + OPENVINO_ASSERT(!m_is_finalized, "Setting property after config finalization is prohibited"); + + for (auto& [name, val] : config) { + get_option_ptr(name)->set_any(val); + } +} + +void PluginConfig::set_user_property(const ov::AnyMap& config, OptionVisibility allowed_visibility) { + OPENVINO_ASSERT(!m_is_finalized, "Setting property after config finalization is prohibited"); + + for (auto& [name, val] : config) { + auto option = get_option_ptr(name); + if ((allowed_visibility & option->get_visibility()) != option->get_visibility()) { + OPENVINO_THROW("Couldn't set unknown property: ", name); + } + if (!option->is_valid_value(val)) { + OPENVINO_THROW("Invalid value: ", + val.as(), + " for property: ", + name, + "\nProperty description: ", + get_help_message(name)); + } + + m_user_properties[name] = val; + } +} + +void PluginConfig::finalize(const IRemoteContext* context, const ov::Model* model) { + if (m_is_finalized) + return; + + if (model) + apply_model_specific_options(context, *model); + + // Copy internal properties before applying hints to ensure that + // a property set by hint won't be overriden by a value in user config. + // E.g num_streams=AUTO && hint=THROUGHPUT + // If we apply hints first and then copy all values from user config to internal one, + // then we'll get num_streams=AUTO in final config while some integer number is expected. + for (const auto& [name, value] : m_user_properties) { + auto& option = m_options_map.at(name); + option->set_any(value); + } + + finalize_impl(context); + +#ifdef ENABLE_DEBUG_CAPS + apply_env_options(); +#endif + + // Clear properties after finalize_impl to be able to check if a property was set by user during plugin-side + // finalization + m_user_properties.clear(); + + m_is_finalized = true; +} + +bool PluginConfig::visit_attributes(ov::AttributeVisitor& visitor) { + visitor.on_attribute("m_user_properties", m_user_properties); + for (auto& [name, option] : m_options_map) { + visitor.on_attribute(name + "__internal", option); + } + + return true; +} + +void PluginConfig::apply_env_options() { + ov::AnyMap env_properties = read_env(); + cleanup_unsupported(env_properties); + for (auto& [name, val] : env_properties) { + std::cout << "Non default env value for " << name << " = " << val.as() << std::endl; + } + set_property(env_properties); +} + +void PluginConfig::apply_config_options(std::string_view device_name, std::filesystem::path config_path) { + if (!config_path.empty()) { + ov::AnyMap config_properties = read_config_file(config_path, device_name); + cleanup_unsupported(config_properties); + for (auto& [name, val] : config_properties) { + std::cout << "Non default config value for " << name << " = " << val.as() << std::endl; + } + set_property(config_properties); + } +} + +ov::AnyMap PluginConfig::read_config_file(std::filesystem::path filename, std::string_view target_device_name) const { + if (filename.empty()) + return {}; + + ov::AnyMap config; + + std::ifstream ifs(filename); + if (!ifs.is_open()) { + return config; + } + + nlohmann::json json_config; + try { + ifs >> json_config; + } catch (const std::exception&) { + return config; + } + + DeviceIDParser parser(std::string{target_device_name}); + for (auto item = json_config.cbegin(), end = json_config.cend(); item != end; ++item) { + const std::string& device_name = item.key(); + if (DeviceIDParser(device_name).get_device_name() != parser.get_device_name()) + continue; + + const auto& item_value = item.value(); + for (auto option = item_value.cbegin(), item_value_end = item_value.cend(); option != item_value_end; + ++option) { + config[option.key()] = option.value().get(); + } + } + + return config; +} + +ov::Any PluginConfig::read_env(const std::string& option_name, + std::string_view prefix, + const ConfigOptionBase* option) { + auto var_name = std::string(prefix) + option_name; + const auto& val = ov::util::getenv_string(var_name.c_str()); + + if (!val.empty()) { + return val; + } else { + return ov::Any(); + } +} + +ov::AnyMap PluginConfig::read_env() const { + ov::AnyMap config; + + for (auto& [name, option] : m_options_map) { + if (auto val = read_env(name, m_allowed_env_prefix, option); !val.empty()) { + config[name] = val; + } + } + + return config; +} + +void PluginConfig::cleanup_unsupported(ov::AnyMap& config) const { + for (auto it = config.begin(); it != config.end();) { + auto& name = it->first; + auto opt_it = std::find_if(m_options_map.begin(), m_options_map.end(), [&](const OptionMapEntry& o) { + return o.first == name; + }); + if (opt_it == m_options_map.end() || opt_it->second->get_visibility() == OptionVisibility::DEBUG_GLOBAL) { + it = config.erase(it); + } else { + ++it; + } + } +} + +std::string PluginConfig::to_string() const { + std::stringstream ss; + + ss << "-----------------------------------------\n"; + ss << "PROPERTIES:\n"; + + for (const auto& [name, option] : m_options_map) { + ss << "\t" << name << ": " << option->get_any().as() << std::endl; + } + ss << "USER PROPERTIES:\n"; + for (const auto& [name, val] : m_user_properties) { + ss << "\t" << name << ": " << val.as() << std::endl; + } + + return ss.str(); +} + +void PluginConfig::print_help() const { + auto format_text = [](const std::string& cpp_name, + std::string_view str_name, + std::string_view desc, + size_t max_name_width, + size_t max_width) { + std::istringstream words(std::string{desc}); + std::ostringstream formatted_text; + std::string word; + std::vector words_vec; + + while (words >> word) { + words_vec.push_back(word); + } + + size_t j = 0; + size_t count_of_desc_lines = (desc.length() + max_width - 1) / max_width; + for (size_t i = 0; i < std::max(2, count_of_desc_lines); i++) { + if (i == 0) { + formatted_text << std::left << std::setw(max_name_width) << cpp_name; + } else if (i == 1) { + formatted_text << std::left << std::setw(max_name_width) << str_name; + } else { + formatted_text << std::left << std::setw(max_name_width) << ""; + } + + formatted_text << " | "; + + size_t line_length = max_name_width + 3; + for (; j < words_vec.size();) { + line_length += words_vec[j].size() + 1; + if (line_length > max_width) { + break; + } else { + formatted_text << words_vec[j] << " "; + } + j++; + } + formatted_text << "\n"; + } + return formatted_text.str(); + }; + + std::stringstream ss; + auto max_name_length_item = std::max_element(m_options_map.begin(), + m_options_map.end(), + [](const OptionMapEntry& a, const OptionMapEntry& b) { + return std::get<0>(a).size() < std::get<0>(b).size(); + }); + + const size_t max_name_width = + std::max(max_name_length_item->first.size(), max_name_length_item->second->property_name.size()) + 4; + const size_t terminal_width = get_terminal_width(); + // clang-format off + ss << std::left << std::setw(max_name_width) << "Option name" << " | Description\n"; + ss << std::left << std::setw(terminal_width) << std::setfill('-') << "" << "\n"; + // clang-format on + for (auto& [name, option] : m_options_map) { + ss << format_text(name, option->property_name, option->description, max_name_width, terminal_width) << "\n"; + } + + std::cout << ss.str(); +} + +std::string_view PluginConfig::get_help_message(const std::string& name) const { + return get_option_ptr(name)->description; +} + +} // namespace ov diff --git a/src/inference/tests/unit/config_test.cpp b/src/inference/tests/unit/config_test.cpp new file mode 100644 index 00000000000000..8e6fe94343a04f --- /dev/null +++ b/src/inference/tests/unit/config_test.cpp @@ -0,0 +1,383 @@ +// Copyright (C) 2018-2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include +#include +#include + +#include "openvino/core/any.hpp" +#include "openvino/core/except.hpp" +#include "openvino/core/model.hpp" +#include "openvino/core/node_vector.hpp" +#include "openvino/op/parameter.hpp" +#include "openvino/runtime/iremote_context.hpp" +#include "openvino/runtime/plugin_config.hpp" + +#ifdef OV_CONFIG_DECLARE_OPTION +# undef OV_CONFIG_DECLARE_OPTION +#endif + +#ifdef OV_CONFIG_DEBUG_GLOBAL_OPTION +# undef OV_CONFIG_DEBUG_GLOBAL_OPTION +#endif + +// Same as defined in header, just make members public +#define OV_CONFIG_DECLARE_OPTION(PropertyNamespace, PropertyVar, Visibility, ...) \ +public: \ + const decltype(PropertyNamespace::PropertyVar)::value_type& get_##PropertyVar() const { \ + if (m_is_finalized) { \ + return m_##PropertyVar.value; \ + } else { \ + if (m_user_properties.find(PropertyNamespace::PropertyVar.name()) != m_user_properties.end()) { \ + return m_user_properties.at(PropertyNamespace::PropertyVar.name()) \ + .as(); \ + } else { \ + return m_##PropertyVar.value; \ + } \ + } \ + } \ + ConfigOption m_##PropertyVar{ \ + this, \ + PropertyNamespace::PropertyVar.name(), \ + #PropertyNamespace "::" #PropertyVar, \ + __VA_ARGS__}; + +#ifdef ENABLE_DEBUG_CAPS +# define OV_CONFIG_DEBUG_GLOBAL_OPTION(PropertyNamespace, PropertyVar, ...) \ + public: \ + static const decltype(PropertyNamespace::PropertyVar)::value_type& get_##PropertyVar() { \ + static PluginConfig::GlobalOptionInitializer init_helper(PropertyNamespace::PropertyVar.name(), \ + m_allowed_env_prefix, \ + m_##PropertyVar); \ + return init_helper.m_option.value; \ + } \ + static inline ConfigOption \ + m_##PropertyVar{nullptr, \ + PropertyNamespace::PropertyVar.name(), \ + #PropertyNamespace "::" #PropertyVar, \ + __VA_ARGS__}; \ + OptionRegistrationHelper m_##PropertyVar##_rh{this, PropertyNamespace::PropertyVar.name(), &m_##PropertyVar}; +#else +# define OV_CONFIG_DEBUG_GLOBAL_OPTION(...) +#endif + +using namespace ::testing; +using namespace ov; + +static constexpr Property unsupported_property{"UNSUPPORTED_PROPERTY"}; +static constexpr Property bool_property{"BOOL_PROPERTY"}; +static constexpr Property int_property{"INT_PROPERTY"}; +static constexpr Property high_level_property{"HIGH_LEVEL_PROPERTY"}; +static constexpr Property low_level_property{"LOW_LEVEL_PROPERTY"}; +static constexpr Property release_internal_property{"RELEASE_INTERNAL_PROPERTY"}; + +#ifdef ENABLE_DEBUG_CAPS +static constexpr Property debug_property{"DEBUG_PROPERTY"}; +static constexpr Property debug_global_property{"DEBUG_GLOBAL_PROPERTY"}; +#endif + +namespace { +const std::string test_config_path = "test_debug_config_path.json"; +const std::string device_name = "SOME_DEVICE"; + +void dump_config(const std::string& filename, const std::string& config_content) { + std::ofstream ofs(filename); + if (!ofs.is_open()) { + throw std::runtime_error("Can't save config file \"" + filename + "\"."); + } + + ofs << config_content; +} + +void set_env(const std::string& name, const std::string& value) { +#ifdef _WIN32 + _putenv_s(name.c_str(), value.c_str()); +#else + ::setenv(name.c_str(), value.c_str(), 1); +#endif +} + +void unset_env(const std::string& name) { +#ifdef _WIN32 + _putenv_s(name.c_str(), ""); +#else + ::unsetenv(name.c_str()); +#endif +} + +} // namespace + +struct EmptyTestConfig : public ov::PluginConfig { + std::vector get_supported_properties() const { + std::vector supported_properties; + for (const auto& [name, option] : m_options_map) { + supported_properties.push_back(name); + } + return supported_properties; + } +}; + +struct NotEmptyTestConfig; +struct NotEmptyTestConfig : public ov::PluginConfig { + NotEmptyTestConfig() {} + + NotEmptyTestConfig(const NotEmptyTestConfig& other) : NotEmptyTestConfig() { + m_user_properties = other.m_user_properties; + for (const auto& [name, option] : other.m_options_map) { + m_options_map.at(name)->set_any(option->get_any()); + } + } + + std::vector get_supported_properties() const { + std::vector supported_properties; + for (const auto& [name, option] : m_options_map) { + supported_properties.push_back(name); + } + return supported_properties; + } + + void finalize_impl(const IRemoteContext* context) override { + if (!is_set_by_user(low_level_property)) { + m_low_level_property.value = m_high_level_property.value; + } +#ifdef ENABLE_DEBUG_CAPS + apply_config_options(device_name, test_config_path); +#endif + } + + void apply_model_specific_options(const IRemoteContext* context, const ov::Model& model) override { + apply_rt_info_property(high_level_property, model.get_rt_info("runtime_options")); + } + + using ov::PluginConfig::get_option_ptr; + using ov::PluginConfig::is_set_by_user; + + OV_CONFIG_RELEASE_OPTION(, bool_property, true, "") + OV_CONFIG_RELEASE_OPTION(, int_property, -1, "") + OV_CONFIG_RELEASE_OPTION(, high_level_property, "", "") + OV_CONFIG_RELEASE_OPTION(, low_level_property, "", "") + OV_CONFIG_RELEASE_INTERNAL_OPTION(, release_internal_property, 1, "") + OV_CONFIG_DEBUG_OPTION(, debug_property, 2, "") + OV_CONFIG_DEBUG_GLOBAL_OPTION(, debug_global_property, 4, "") +}; + +TEST(plugin_config, can_create_empty_config) { + ASSERT_NO_THROW(EmptyTestConfig cfg; ASSERT_EQ(cfg.get_supported_properties().size(), 0);); +} + +TEST(plugin_config, can_create_not_empty_config) { +#ifdef ENABLE_DEBUG_CAPS + size_t expected_options_num = 7; +#else + size_t expected_options_num = 5; +#endif + ASSERT_NO_THROW(NotEmptyTestConfig cfg; ASSERT_EQ(cfg.get_supported_properties().size(), expected_options_num);); +} + +TEST(plugin_config, can_set_get_property) { + NotEmptyTestConfig cfg; + ASSERT_NO_THROW(cfg.get_bool_property()); + ASSERT_EQ(cfg.get_bool_property(), true); + ASSERT_NO_THROW(cfg.set_property(bool_property(false))); + ASSERT_EQ(cfg.get_bool_property(), false); +} + +TEST(plugin_config, throw_for_unsupported_property) { + NotEmptyTestConfig cfg; + ASSERT_ANY_THROW(cfg.get_property(unsupported_property.name())); + ASSERT_ANY_THROW(cfg.set_property(unsupported_property(10.0f))); +} + +TEST(plugin_config, can_direct_access_to_properties) { + NotEmptyTestConfig cfg; + ASSERT_EQ(cfg.m_int_property.value, cfg.get_int_property()); + ASSERT_NO_THROW(cfg.set_user_property(int_property(1))); + ASSERT_EQ(cfg.m_int_property.value, -1); // user property doesn't impact member value until finalize() is called + + cfg.m_int_property.value = 2; + ASSERT_EQ(cfg.get_int_property(), 1); // stil 1 as user property was set previously +} + +TEST(plugin_config, finalization_updates_member) { + NotEmptyTestConfig cfg; + ASSERT_NO_THROW(cfg.set_user_property(bool_property(false))); + ASSERT_EQ(cfg.m_bool_property.value, true); // user property doesn't impact member value until finalize() is called + + cfg.finalize(nullptr, {}); + + ASSERT_EQ(cfg.m_bool_property.value, false); // now the value has changed +} + +TEST(plugin_config, get_property_before_finalization_returns_user_property_if_set) { + NotEmptyTestConfig cfg; + + ASSERT_EQ(cfg.get_bool_property(), true); // default value + ASSERT_EQ(cfg.m_bool_property.value, true); // default value + + cfg.m_bool_property.value = false; // update member directly + ASSERT_EQ(cfg.get_bool_property(), false); // OK, return the class member value as no user property was set + + ASSERT_NO_THROW(cfg.set_user_property(bool_property(true))); + ASSERT_TRUE(cfg.is_set_by_user(bool_property)); + ASSERT_EQ(cfg.get_bool_property(), true); // now user property value is returned + ASSERT_EQ(cfg.m_bool_property.value, false); // but class member is not updated + + cfg.finalize(nullptr, {}); + ASSERT_EQ(cfg.get_bool_property(), cfg.m_bool_property.value); // equal after finalization + ASSERT_FALSE(cfg.is_set_by_user(bool_property)); // and user property is cleared +} + +TEST(plugin_config, finalization_updates_dependant_properties) { + NotEmptyTestConfig cfg; + + cfg.set_user_property(high_level_property("value1")); + ASSERT_TRUE(cfg.is_set_by_user(high_level_property)); + ASSERT_FALSE(cfg.is_set_by_user(low_level_property)); + + cfg.finalize(nullptr, {}); + ASSERT_EQ(cfg.m_high_level_property.value, "value1"); + ASSERT_EQ(cfg.m_low_level_property.value, "value1"); + ASSERT_FALSE(cfg.is_set_by_user(high_level_property)); + ASSERT_FALSE(cfg.is_set_by_user(low_level_property)); +} + +TEST(plugin_config, can_set_property_from_rt_info) { + NotEmptyTestConfig cfg; + + RTMap rt_info = { + {high_level_property.name(), "value1"}, + {int_property.name(), 10} // int_property is not applied from rt info + }; + + auto p1 = std::make_shared(); + auto r1 = std::make_shared(p1); + ov::Model m(ov::OutputVector{r1}, ov::ParameterVector{p1}); + m.set_rt_info(rt_info, {"runtime_options"}); + + // default values + ASSERT_EQ(cfg.m_high_level_property.value, ""); + ASSERT_EQ(cfg.m_low_level_property.value, ""); + ASSERT_EQ(cfg.m_int_property.value, -1); + + cfg.finalize(nullptr, &m); + + ASSERT_EQ(cfg.m_high_level_property.value, "value1"); + ASSERT_EQ(cfg.m_low_level_property.value, "value1"); // dependant is updated too + ASSERT_EQ(cfg.m_int_property.value, -1); // still default +} + +TEST(plugin_config, can_copy_config) { + NotEmptyTestConfig cfg1; + + cfg1.m_high_level_property.value = "value1"; + cfg1.m_low_level_property.value = "value2"; + cfg1.m_int_property.value = 1; + cfg1.set_property(bool_property(false)); + + NotEmptyTestConfig cfg2 = cfg1; + ASSERT_EQ(cfg2.m_high_level_property.value, "value1"); + ASSERT_EQ(cfg2.m_low_level_property.value, "value2"); + ASSERT_EQ(cfg2.m_int_property.value, 1); + ASSERT_EQ(cfg2.get_bool_property(), false); // ensure user properties are copied too + + // check that cfg1 modification doesn't impact a copy + cfg1.set_property(high_level_property("value3")); + cfg1.m_int_property.value = 3; + ASSERT_EQ(cfg2.m_high_level_property.value, "value1"); + ASSERT_EQ(cfg2.m_int_property.value, 1); +} + +TEST(plugin_config, set_property_throw_for_non_release_options) { + NotEmptyTestConfig cfg; + ASSERT_ANY_THROW(cfg.set_user_property({release_internal_property(10)}, OptionVisibility::RELEASE)); +#ifdef ENABLE_DEBUG_CAPS + ASSERT_ANY_THROW(cfg.set_user_property({debug_property(10)}, OptionVisibility::RELEASE)); +#endif +} + +TEST(plugin_config, visibility_is_correct) { + NotEmptyTestConfig cfg; + ASSERT_EQ(cfg.get_option_ptr(release_internal_property.name())->get_visibility(), + OptionVisibility::RELEASE_INTERNAL); + ASSERT_EQ(cfg.get_option_ptr(int_property.name())->get_visibility(), OptionVisibility::RELEASE); + +#ifdef ENABLE_DEBUG_CAPS + ASSERT_EQ(cfg.get_option_ptr(debug_property.name())->get_visibility(), OptionVisibility::DEBUG); +#endif +} + +TEST(plugin_config, can_read_from_env_with_debug_caps) { + try { + NotEmptyTestConfig cfg; + ASSERT_EQ(cfg.get_int_property(), -1); + set_env("OV_INT_PROPERTY", "10"); + ASSERT_EQ(cfg.get_int_property(), -1); // env is applied after finalization only for build with debug caps + +#ifdef ENABLE_DEBUG_CAPS + set_env("OV_DEBUG_PROPERTY", "20"); + ASSERT_EQ(cfg.get_debug_property(), 2); // same for debug option +#endif + + cfg.finalize(nullptr, nullptr); + +#ifdef ENABLE_DEBUG_CAPS + ASSERT_EQ(cfg.get_int_property(), 10); + ASSERT_EQ(cfg.get_debug_property(), 20); +#else + ASSERT_EQ(cfg.get_int_property(), -1); // no effect +#endif + } catch (std::exception&) { + } + + unset_env("OV_INT_PROPERTY"); +#ifdef ENABLE_DEBUG_CAPS + unset_env("OV_DEBUG_PROPERTY"); +#endif +} + +TEST(plugin_config, can_read_from_config) { + const std::filesystem::path filepath = test_config_path; + try { + NotEmptyTestConfig cfg; + std::string config = "{\"SOME_DEVICE\":{\"DEBUG_PROPERTY\":\"20\",\"INT_PROPERTY\":\"10\"}}"; + + dump_config(filepath.generic_string(), config); + + ASSERT_EQ(cfg.get_int_property(), -1); // config is applied after finalization only for build with debug caps +#ifdef ENABLE_DEBUG_CAPS + ASSERT_EQ(cfg.get_debug_property(), 2); // same for debug option +#endif + + cfg.finalize(nullptr, nullptr); +#ifdef ENABLE_DEBUG_CAPS + ASSERT_EQ(cfg.get_int_property(), 10); + ASSERT_EQ(cfg.get_debug_property(), 20); +#else + ASSERT_EQ(cfg.get_int_property(), -1); // no effect +#endif + } catch (std::exception&) { + } + + std::filesystem::remove(filepath); +} + +#ifdef ENABLE_DEBUG_CAPS + +TEST(plugin_config, global_property_read_env_on_first_call) { + try { + set_env("OV_DEBUG_GLOBAL_PROPERTY", "10"); + ASSERT_EQ(NotEmptyTestConfig::get_debug_global_property(), 10); + + set_env("OV_DEBUG_GLOBAL_PROPERTY", "20"); + ASSERT_EQ(NotEmptyTestConfig::get_debug_global_property(), 10); + } catch (std::exception&) { + } + + unset_env("OV_DEBUG_GLOBAL_PROPERTY"); +} +#endif diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp index ef656322ef3288..b0a71005c6a16c 100644 --- a/src/plugins/intel_cpu/src/config.cpp +++ b/src/plugins/intel_cpu/src/config.cpp @@ -210,7 +210,7 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) { ov::internal::exclusive_async_requests.name(), ". Expected only true/false"); } - } else if (key == ov::intel_cpu::lp_transforms_mode.name()) { + } else if (key == ov::internal::enable_lp_transformations.name()) { try { lpTransformsMode = val.as() ? LPTransformsMode::On : LPTransformsMode::Off; } catch (ov::Exception&) { diff --git a/src/plugins/intel_cpu/src/internal_properties.hpp b/src/plugins/intel_cpu/src/internal_properties.hpp index 35f7729d1453d3..bfa584eb27bbf4 100644 --- a/src/plugins/intel_cpu/src/internal_properties.hpp +++ b/src/plugins/intel_cpu/src/internal_properties.hpp @@ -15,11 +15,6 @@ namespace ov::intel_cpu { */ static constexpr Property cpu_runtime_cache_capacity{"CPU_RUNTIME_CACHE_CAPACITY"}; -/** - * @brief Allow low precision transform. - */ -static constexpr Property lp_transforms_mode{"LP_TRANSFORMS_MODE"}; - /** * @brief Enum to define possible snippets mode hints. */ diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/properties_tests.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/properties_tests.cpp index 6d288d9f5ede8b..fc98a2659f9127 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/properties_tests.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/properties_tests.cpp @@ -52,7 +52,7 @@ const std::vector cpu_inproperties = { {ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT), {ov::hint::num_requests.name(), "should be int"}}, {{ov::num_streams.name(), "OFF"}}, - {{ov::hint::enable_cpu_pinning.name(), "OFF"}}, + {{ov::hint::enable_cpu_pinning.name(), "NOT_BOOL"}}, }; INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, diff --git a/src/plugins/intel_gpu/CMakeLists.txt b/src/plugins/intel_gpu/CMakeLists.txt index 3a847c292c30ba..3322f2f3f1a875 100644 --- a/src/plugins/intel_gpu/CMakeLists.txt +++ b/src/plugins/intel_gpu/CMakeLists.txt @@ -51,6 +51,7 @@ endif() if(ENABLE_GPU_DEBUG_CAPS) add_definitions(-DGPU_DEBUG_CONFIG=1) + add_definitions(-DENABLE_DEBUG_CAPS=1) endif() set(INTEL_GPU_TARGET_OCL_VERSION "200" CACHE STRING "Target version of OpenCL which should be used by GPU plugin") diff --git a/src/plugins/intel_gpu/docs/gpu_debug_utils.md b/src/plugins/intel_gpu/docs/gpu_debug_utils.md index 0708d9dd3557b7..de0d51ba0a8ba0 100644 --- a/src/plugins/intel_gpu/docs/gpu_debug_utils.md +++ b/src/plugins/intel_gpu/docs/gpu_debug_utils.md @@ -3,113 +3,65 @@ This document is a list of useful debug features / tricks that might be used to find root cause of performance / functional issues. Some of them are available by default, but some others might require plugin recompilation. -## Debug Config - -`Debug_config` is an infrastructure that contains several easy-to-use debugging features. It has various control parameters, which you can check from the source code `cldnn::debug_configuration`. - ### How to use it -First, this feature should be enabled from cmake configuration `ENABLE_DEBUG_CAPS`. When OpenVINO is released, it is turned off by default. +First, debug features should be enabled from cmake configuration `ENABLE_DEBUG_CAPS`. When OpenVINO is released, it is turned off by default. The parameters can be set from an environment variable when calling inference engine API. +The environment variable name is concatenation of `OV_` prefix and string identifier of corresponding ov property (for instance, one of the properties here `src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp`) + ``` -$ OV_GPU_Verbose=1 ./benchmark_app ... # Run benchmark_app with OV_GPU_Verbose option -$ OV_GPU_DumpLayersPath="dump/" ./benchmark_app ... # Run benchmark_app and store intermediate buffers into dump/ directory. +$ OV_VERBOSE=1 ./benchmark_app ... # Run benchmark_app with OV_VERBOSE option +$ OV_GPU_DUMP_TENSORS_PATH="dump/" ./benchmark_app ... # Run benchmark_app and store intermediate buffers into dump/ directory. ``` For Windows OS, use the following syntax: ``` Windows Power Shell: -> $env:OV_GPU_Verbose=1 -> .\benchmark_app.exe ... # Run benchmark_app with OV_GPU_Verbose option +> $env:OV_VERBOSE=1 +> .\benchmark_app.exe ... # Run benchmark_app with OV_VERBOSE option Windows cmd.exe: -> set "OV_GPU_Verbose=1" -> benchmark_app.exe ... # Run benchmark_app with OV_GPU_Verbose option +> set "OV_VERBOSE=1" +> benchmark_app.exe ... # Run benchmark_app with OV_VERBOSE option ``` -### Options syntax +Alternative approach, is to prepate config file in json format and set path to it via `OV_DEBUG_CONFIG=path` option. +NOTE: + 1. Options set via environment has higher priority than options from the config file. + 2. Global options can't be activated via config to avoid mess when the finalize() call changes the value of globally visible variable which may lead to some weird behavior. +Config example: +```json +{"GPU.1":{"OV_VERBOSE":"ON","PERF_COUNT":"ON"}} +``` -Plugin is able to parse different naming styles for debug options: -1. `OV_GPU_SOME_OPTION` -2. `OV_GPU_SomeOption` +### Option types +Plugin config supports 4 option types: +1. `OV_CONFIG_RELEASE_OPTION` - options that are available via public API for any kind of builds +1. `OV_CONFIG_RELEASE_INTERNAL_OPTION` - available for any build type, but can't be set via public API. +1. `OV_CONFIG_DEBUG_OPTION` - these options are available for the builds with `ENABLE_DEBUG_CAPS` only. +1. `OV_CONFIG_DEBUG_GLOBAL_OPTION` - same as above, but a little bit different behavior (see below). -Behavior when both versions are specified is not defined. +The difference between "local" and "global" options is that value for local option is resolved during "finalize" call for the config class which typically happens somewhere after `Core::{compile,import,query}_model()` call. That means that the local options can be set per-model basis for the multi model pipelines if env is modified from the code for each model. -Some options also allow multiple prefixes: `OV` and `OV_GPU`. `OV` prefix is intended to be used for options common for all OpenVINO components. When an option is set twice with different prefixes, then `OV_GPU` has higher priority. +Value of the global options is read from env on the first access to the option, or set as default value if not present in environment. Global option variables are static members of the config which is needed to activate some basic debug capabilities (such as logging) in the arbitrary part of the project w/o need to pass `ExecutionConfig` object to all the places where we need to log something. ### List of parameters -This is a part of the full list. To get all parameters, see OV_GPU_Help result. - -* `OV_GPU_Help`: Shows help message of debug config. -* `OV_GPU_Verbose`: Verbose execution. Currently, `Verbose=1` and `2` are supported. -* `OV_GPU_PrintMultiKernelPerf`: Prints kernel latency for multi-kernel primitives. This is turned on by setting `1`. Execution time is printed. -* `OV_GPU_DisableUsm`: Disables the usage of usm (unified shared memory). This is turned on by setting `1`. -* `OV_GPU_DisableOnednn`: Disables oneDNN for the hardware with XMX (If GPU does not have XMX, it does not have any effect) -* `OV_GPU_DumpGraphs`: Dumps optimized graphs into the path that this variable points. This is turned on by setting the destination path into this variable. -* `OV_GPU_DumpSources`: Dumps openCL sources -* `OV_GPU_DumpLayersPath`: Enables intermediate buffer dump and store the tensors. This is turned on by setting the destination path into this variable. You can check the exact layer name from `OV_GPU_ListLayers=1`. -* `OV_GPU_DumpLayers`: Dumps intermediate buffers only for the layers that this variable specifies. Multiple layers can be specified with a space delimiter. Dump feature should be enabled through `OV_GPU_DumpLayersPath`. -* `OV_GPU_DumpLayersResult`: Dumps output buffers of result layers only. -* `OV_GPU_DumpLayersDstOnly`: When dumping intermediate buffer, dumps destination buffer only. This is turned on by setting `1`. -* `OV_GPU_DumpLayersLimitBatch`: Limits the size of a batch to dump. -* `OV_GPU_DryRunPath`: Dry runs and serializes the execution graph into the specified path. -* `OV_GPU_BaseBatchForMemEstimation`: Base batch size to be used in memory estimation. -* `OV_GPU_AfterProc`: Runs inference after the specified process PIDs are finished, separated by space. Supported only on Linux. -* `OV_GPU_SerialCompile`: Serializes creating primitives and compiling kernels. -* `OV_GPU_ForceImplType`: Forces implementation type of a target primitive or a layer. [primitive or layout_name]:[impl_type] For primitives, `fc:onednn`, `fc:ocl`, `do:cpu`, `do:ocl`, `reduce:ocl` and `reduce:oneDNN` are supported -* `OV_GPU_MaxKernelsPerBatch`: Maximum number of kernels in a batch during compiling kernels. +Full options list is defined in `src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl` file. This can also be printed to console by setting `OV_HELP=1` option -### How to check debug-config works -If you are uncertain whether debug-config is working or not, you can confirm that with OV_GPU_Help. OV_GPU_Help will just show the help message and terminate the current application. If the help message is properly printed, you can basically believe that this debug config is working correctly. Please note that it requires full execution of inference because the help message is printed from GPU plugin. If you just run `benchmark_app` without any option, it will not show the benchmark_app help message, not the debug-config help message. +### How to check debug-config works +All options that are found in environment or config file are printed to stdout: ``` -$ OV_GPU_Help=1 ./benchmark_app -m resnet_v1.5_50.xml -d GPU -[Step 1/11] Parsing and validating input arguments -[ INFO ] Parsing input parameters -[Step 2/11] Loading OpenVINO Runtime -[ INFO ] OpenVINO: -[ INFO ] Build ................................. 2024.2.0 -[ INFO ] -[ INFO ] Device info: -GPU_Debug: Config Help = 1 -GPU_Debug: Supported environment variables for debugging -GPU_Debug: - OV_GPU_Help Print help messages -GPU_Debug: - OV_GPU_Verbose Verbose execution -GPU_Debug: - OV_GPU_VerboseColor Print verbose color -GPU_Debug: - OV_GPU_ListLayers Print layers names -GPU_Debug: - OV_GPU_PrintMultiKernelPerf Print execution time of each kernel in multi-kernel primitimive -GPU_Debug: - OV_GPU_PrintInputDataShapes Print data_shapes of input layers for benchmark_app. -GPU_Debug: - OV_GPU_DisableUsm Disable usm usage -GPU_Debug: - OV_GPU_DisableOnednn Disable onednn for discrete GPU (no effect for integrated GPU) -GPU_Debug: - OV_GPU_DisableOnednnOptPostOps Disable onednn optimize post operators +$ OV_VERBOSE=1 ./benchmark_app ... - -``` - -You can also check the message from the debug-config parser. As shown below, if env variable is detected, it will print the variable name and configuration. -``` -$ OV_GPU_Verbose=1 OV_GPU_DumpGraphs=graph/ ./benchmark_app -m resnet.xml -d GPU -[Step 1/11] Parsing and validating input arguments -[ INFO ] Parsing input parameters -[Step 2/11] Loading OpenVINO Runtime -[ INFO ] OpenVINO: -[ INFO ] Build ................................. 2024.2.0 -[ INFO ] -[ INFO ] Device info: -GPU_Debug: Config Verbose = 1 # OV_GPU_Verbose is recognized -GPU_Debug: Config DumpGraphs = graph/ # OV_GPU_DumpGraphs is recognized -[ INFO ] GPU -[ INFO ] Build ................................. 2024.2.0 -[ INFO ] -[Step 3/11] Setting device configuration +Non default env value for VERBOSE = 1 ... ``` - ## Dump execution graph The execution graph (also known as a runtime graph) is a device-specific graph after all transformations applied by the plugin. It is a very useful @@ -195,7 +147,7 @@ So it allows you to quickly check the execution time of some operation on the de *Intel_GPU* plugin allows you to dump some info about intermediate stages in the graph optimizer. -* You can dump graphs with `OV_GPU_DumpGraphs` of debug config. For the usage of debug config, see the [link](#debug-config). +* You can dump graphs with `OV_GPU_DUMP_GRAPHS_PATH` of debug config. For the usage of debug config, see the [link](#debug-config). For each stage, it dumps: @@ -213,7 +165,7 @@ The main graph usually has `program_id = 0`. Graphs with other `program_id` valu Since *Intel_GPU* source tree contains only *templates* of the OpenCLâ„¢ kernels, it is quite important to get full kernels source code. -* You can use `OV_GPU_DumpSources` of debug config. For the usage of debug config, see [link](#debug-config). +* You can use `OV_GPU_DUMP_SOURCES_PATH` of debug config. For the usage of debug config, see [link](#debug-config). When this key is enabled, the plugin dumps multiple files with the following names: @@ -245,17 +197,17 @@ When the source is dumped, it contains a huge amount of macros(`#define`). For r ## Layer in/out buffer dumps In some cases, you might want to get actual values in each layer execution to compare it with some reference blob. To do that, choose the -`OV_GPU_DumpLayersPath` option in debug config. For the usage of debug config, see [link](#debug-config). +`OV_GPU_DUMP_TENSORS_PATH` option in debug config. For the usage of debug config, see [link](#debug-config). As a prerequisite, enable `ENABLE_DEBUG_CAPS` from the cmake configuration. -Then, check the runtime layer name by executing *benchmark_app* with `OV_GPU_Verbose=1`. It is better to check it with `OV_GPU_Verbose=1` than through IR because this may be slightly different. `OV_GPU_Verbose=1` will show the log of execution of each layer. +Then, check the runtime layer name by executing *benchmark_app* with `OV_VERBOSE=1`. It is better to check it with `OV_VERBOSE=1` than through IR because this may be slightly different. `OV_VERBOSE=1` will show the log of execution of each layer. ``` # As a prerequisite, enable ENABLE_DEBUG_CAPS from cmake configuration. -export OV_GPU_DumpLayersPath=path/to/dir -export OV_GPU_DumpLayers="layer_name_to_dump1 layer_name_to_dump2" -export OV_GPU_DumpLayersDstOnly=1 # Set as 1 when you want to dump dest buff only +export OV_GPU_DUMP_TENSORS_PATH=path/to/dir +export OV_GPU_DUMP_LAYER_NAMES="layer_name_to_dump1 layer_name_to_dump2" +export OV_GPU_DUMP_TENSORS=out # only out tensors should be saved ``` Dump files are named in the following convention: @@ -271,17 +223,6 @@ shape: [b:1, f:1280, x:1, y:1, z:1, w:1, g:1] (count: 1280, original format: b_f For troubleshooting the accuracy, you may want to compare the results of GPU plugin and CPU plugin. For CPU dump, see [Blob dumping](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_cpu/docs/debug_capabilities/blob_dumping.md) -## Run int8 model on Gen9 HW - -As Gen9 HW does not have hardware acceleration, low-precision transformations are disabled by default. Therefore, quantized networks are executed in full precision (FP16 or FP32), with explicit execution of quantize operations. -If you do not have Gen12 HW, but want to debug the network's accuracy or performance of simple operations (which does not require dp4a support), then you can enable low precision pipeline on Gen9, with one of the following approaches: -1. Add `ov::intel_gpu::enable_lp_transformations(true)` option to the plugin config. -2. Enforce `supports_imad = true` [here](https://github.com/openvinotoolkit/openvino/blob/master/inference-engine/thirdparty/clDNN/src/gpu/device_info.cpp#L226) -3. Enforce `conf.enableInt8 = true` [here](https://github.com/openvinotoolkit/openvino/blob/master/inference-engine/src/cldnn_engine/cldnn_engine.cpp#L366) - -After that, the plugin will run exactly the same scope of transformations as on Gen12 HW and generate similar kernels (a small difference is possible due to different EUs count). - - ## Checking OpenCL execution OpenVINO GPU plugin runs on top of opencl. [opencl-intercept-layer](https://github.com/intel/opencl-intercept-layer/) is a very handy tool to check opencl execution. diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp index 2a99b9dfef4b76..dabfe8ce09acd6 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp @@ -197,7 +197,7 @@ struct network { void set_reuse_variable_mem(bool reuse = false); bool is_reuse_variable_mem() { return _reuse_variable_mem; } - const ExecutionConfig& get_config() const { return _config; } + const ExecutionConfig& get_config() const { return _program->get_config(); } std::shared_ptr get_shape_predictor() { return _shape_predictor; } void set_shape_predictor(std::shared_ptr shape_predictor) { _shape_predictor = shape_predictor; } @@ -210,7 +210,6 @@ struct network { using output_chains_map = std::map>; uint32_t net_id = 0; program::ptr _program; - ExecutionConfig _config; engine& _engine; stream::ptr _stream; std::unique_ptr _memory_pool; diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp index c775537a514dde..96bfff48820aaa 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp @@ -309,12 +309,11 @@ struct program { std::vector allocating_order; std::unique_ptr pm; std::unique_ptr _layout_optimizer; - bool is_internal; - bool _is_body_program; + bool is_internal = false; + bool _is_body_program = false; // if subgraph can be optimized if it consists of only inputs and corresponding outputs bool _can_be_optimized; std::unique_ptr _impls_cache; - const size_t _impls_cache_capacity = 300; std::shared_ptr _compilation_context; bool _loaded_from_cache = false; diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/common_utils.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/common_utils.hpp index e4340bc167ccbb..3889833aca4fc9 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/common_utils.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/common_utils.hpp @@ -34,9 +34,8 @@ enum class TensorType { #define TensorValue(val) static_cast(val) inline bool can_use_usm_host(cldnn::engine& engine, const uint64_t total_output_bytes) { - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->use_usm_host == 1) { return true; } - GPU_DEBUG_IF(debug_config->use_usm_host == 2) { return false; } + GPU_DEBUG_IF(ExecutionConfig::get_usm_policy() == 1) { return true; } + GPU_DEBUG_IF(ExecutionConfig::get_usm_policy() == 2) { return false; } auto can_use_usm = engine.use_unified_shared_memory(); // When output size is large, it is better not to write to usm_host directly diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp index 869d32825b8761..cec27660baa2b4 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp @@ -43,7 +43,6 @@ class Plugin : public ov::IPlugin { bool is_metric(const std::string& name) const; ov::Any get_metric(const std::string& name, const ov::AnyMap& arguments) const; - void set_cache_info(const std::shared_ptr& model, ExecutionConfig& properties) const; public: Plugin(); diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp index 51087405f09769..389c7f7e2b54a1 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp @@ -79,7 +79,7 @@ struct PerfCounter { class ProgramBuilder final { public: - ProgramBuilder(std::shared_ptr model, cldnn::engine& engine, const ExecutionConfig& config, bool partialBuild = false, + ProgramBuilder(std::shared_ptr model, cldnn::engine& engine, const ExecutionConfig& config, std::shared_ptr task_executor = nullptr, std::shared_ptr compilation_context = nullptr, bool innerProgram = false); @@ -137,8 +137,7 @@ class ProgramBuilder final { void add_primitive(const ov::Node& op, std::shared_ptr prim, std::vector aliases = {}); - bool use_new_shape_infer() const { return allow_new_shape_infer; } - bool requires_new_shape_infer(const std::shared_ptr& op) const; + bool use_new_shape_infer() const { return m_config.get_allow_new_shape_infer(); } bool is_inner_program() const { return m_is_inner_program; } bool is_query_mode() { return queryMode; } @@ -156,8 +155,6 @@ class ProgramBuilder final { std::shared_ptr m_topology; CustomLayerMap m_custom_layers; - bool allow_new_shape_infer = false; - bool queryMode; std::shared_ptr m_task_executor; @@ -171,8 +168,7 @@ class ProgramBuilder final { void prepare_build(); void cleanup_build(); - // TODO(eunsoo): remove createTopolpgyOnly argument and add another method to create topology from ngraph function - std::shared_ptr build(const std::vector>& ops, bool partialBuild = false, bool innerProgram = false); + std::shared_ptr build(const std::vector>& ops, bool innerProgram = false); void CreateSingleLayerPrimitive(const std::shared_ptr& op); }; diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_context.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_context.hpp index 8440bd6824ef9e..8f7e7ac45bca2e 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_context.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_context.hpp @@ -19,7 +19,6 @@ #include #include #include -#include namespace ov::intel_gpu { @@ -37,6 +36,7 @@ class RemoteContextImpl : public ov::IRemoteContext { ov::SoPtr create_tensor(const ov::element::Type& type, const ov::Shape& shape, const ov::AnyMap& params) override; cldnn::engine& get_engine() { return *m_engine; } + const cldnn::engine& get_engine() const { return *m_engine; } ov::intel_gpu::gpu_handle_param get_external_queue() const { return m_external_queue; } cldnn::memory::ptr try_get_cached_memory(size_t hash); diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp index 2ce1397c44bb68..4367d2062d7325 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp @@ -4,12 +4,10 @@ #pragma once #include -#include -#include -#include -#include #include +#include +#include "intel_gpu/runtime/execution_config.hpp" namespace ov::intel_gpu { // Verbose log levels: @@ -39,145 +37,65 @@ enum class LogLevel : int8_t { TRACE_DETAIL = 4 }; +std::ostream& get_verbose_stream(); } // namespace ov::intel_gpu #ifdef GPU_DEBUG_CONFIG -#if defined(_WIN32) -#define SEPARATE '\\' -#else -#define SEPARATE '/' -#endif -#define GPU_FILENAME (strrchr(__FILE__, SEPARATE) ? strrchr(__FILE__, SEPARATE) + 1 : __FILE__) + +namespace color { +static constexpr const char dark_gray[] = "\033[1;30m"; +static constexpr const char blue[] = "\033[1;34m"; +static constexpr const char purple[] = "\033[1;35m"; +static constexpr const char cyan[] = "\033[1;36m"; +static constexpr const char reset[] = "\033[0m"; +} // namespace color + +static constexpr const char prefix[] = "GPU_Debug: "; + #define GPU_DEBUG_IF(cond) if (cond) +#define GPU_DEBUG_VALUE_OR(debug_value, release_value) debug_value #define GPU_DEBUG_CODE(...) __VA_ARGS__ + #define GPU_DEBUG_DEFINE_MEM_LOGGER(stage) \ - cldnn::instrumentation::mem_usage_logger mem_logger{stage, cldnn::debug_configuration::get_instance()->verbose >= 2}; -#define GPU_DEBUG_PROFILED_STAGE(stage) \ - auto stage_prof = cldnn::instrumentation::profiled_stage(\ - !cldnn::debug_configuration::get_instance()->dump_profiling_data.empty(), *this, stage) + cldnn::instrumentation::mem_usage_logger mem_logger{stage, ov::intel_gpu::ExecutionConfig::get_verbose() >= 2}; + +#define GPU_DEBUG_PROFILED_STAGE(stage) \ + auto stage_prof = cldnn::instrumentation::profiled_stage( \ + !get_config().get_dump_profiling_data_path().empty(), *this, stage) + #define GPU_DEBUG_PROFILED_STAGE_CACHE_HIT(val) stage_prof.set_cache_hit(val) #define GPU_DEBUG_PROFILED_STAGE_MEMALLOC_INFO(info) stage_prof.add_memalloc_info(info) -#define GPU_DEBUG_LOG_RAW_INT(min_verbose_level) if (cldnn::debug_configuration::get_instance()->verbose >= min_verbose_level) \ - ((cldnn::debug_configuration::get_instance()->verbose_color == 0) ? GPU_DEBUG_LOG_PREFIX : GPU_DEBUG_LOG_COLOR_PREFIX) -#define GPU_DEBUG_LOG_RAW(min_verbose_level) GPU_DEBUG_LOG_RAW_INT(static_cast::type>(min_verbose_level)) -#define GPU_DEBUG_LOG_PREFIX \ - *cldnn::debug_configuration::verbose_stream << cldnn::debug_configuration::prefix << GPU_FILENAME << ":" <<__LINE__ << ":" << __func__ << ": " -#define GPU_DEBUG_LOG_COLOR_PREFIX *cldnn::debug_configuration::verbose_stream << DARK_GRAY << cldnn::debug_configuration::prefix << \ - BLUE << GPU_FILENAME << ":" << PURPLE << __LINE__ << ":" << CYAN << __func__ << ": " << RESET -#define DARK_GRAY "\033[1;30m" -#define BLUE "\033[1;34m" -#define PURPLE "\033[1;35m" -#define CYAN "\033[1;36m" -#define RESET "\033[0m" +#define GPU_DEBUG_LOG_PREFIX ov::intel_gpu::get_verbose_stream() \ + << prefix \ + << std::filesystem::path(__FILE__).filename().generic_string() << ":" \ + << std::to_string(__LINE__) << ":" \ + << __func__ << ": " + +#define GPU_DEBUG_LOG_COLOR_PREFIX ov::intel_gpu::get_verbose_stream() \ + << color::dark_gray << std::string(prefix) \ + << color::blue << std::filesystem::path(__FILE__).filename().generic_string() << ":" \ + << color::purple << std::to_string(__LINE__) << ":" \ + << color::cyan << __func__ << ": " << color::reset + +#define GPU_DEBUG_LOG_RAW_INT(min_verbose_level) if (ov::intel_gpu::ExecutionConfig::get_verbose() >= min_verbose_level) \ + (ov::intel_gpu::ExecutionConfig::get_verbose_color() ? GPU_DEBUG_LOG_COLOR_PREFIX : GPU_DEBUG_LOG_PREFIX) + +#define GPU_DEBUG_LOG_RAW(min_verbose_level) \ + GPU_DEBUG_LOG_RAW_INT(static_cast::type>(min_verbose_level)) #else #define GPU_DEBUG_IF(cond) if (0) +#define GPU_DEBUG_VALUE_OR(debug_value, release_value) release_value #define GPU_DEBUG_CODE(...) #define GPU_DEBUG_DEFINE_MEM_LOGGER(stage) #define GPU_DEBUG_PROFILED_STAGE(stage) #define GPU_DEBUG_PROFILED_STAGE_CACHE_HIT(val) #define GPU_DEBUG_PROFILED_STAGE_MEMALLOC_INFO(info) -#define GPU_DEBUG_LOG_RAW(min_verbose_level) if (0) *cldnn::debug_configuration::verbose_stream << cldnn::debug_configuration::prefix +#define GPU_DEBUG_LOG_RAW(min_verbose_level) if (0) ov::intel_gpu::get_verbose_stream() #endif -// Macro below is inserted to avoid unused variable warning when GPU_DEBUG_CONFIG is OFF -#define GPU_DEBUG_GET_INSTANCE(name) auto name = cldnn::debug_configuration::get_instance(); (void)(name); - #define GPU_DEBUG_COUT GPU_DEBUG_LOG_RAW(ov::intel_gpu::LogLevel::DISABLED) #define GPU_DEBUG_INFO GPU_DEBUG_LOG_RAW(ov::intel_gpu::LogLevel::INFO) #define GPU_DEBUG_LOG GPU_DEBUG_LOG_RAW(ov::intel_gpu::LogLevel::LOG) #define GPU_DEBUG_TRACE GPU_DEBUG_LOG_RAW(ov::intel_gpu::LogLevel::TRACE) #define GPU_DEBUG_TRACE_DETAIL GPU_DEBUG_LOG_RAW(ov::intel_gpu::LogLevel::TRACE_DETAIL) - -namespace cldnn { - -class debug_configuration { -private: - debug_configuration(); - -public: - static const char *prefix; - int help; // Print help messages - int verbose; // Verbose execution - int verbose_color; // Print verbose color - std::string verbose_file; // Verbose log to file - int list_layers; // Print list layers - int print_multi_kernel_perf; // Print execution time of each kernel in multi-kernel primitimive - int print_input_data_shapes; // Print the input data_shape for benchmark_app. - int disable_usm; // Disable usm usage - int disable_onednn; // Disable onednn for discrete GPU (no effect for integrated GPU) - int disable_onednn_opt_post_ops; // Disable onednn optimize post operators - std::string dump_profiling_data; // Enables dump of extended performance profiling to specified dir - int dump_profiling_data_per_iter; // Enables dump of extended performance profiling to specified dir for each iteration - int host_time_profiling; // Enables measurement of scheduling time spend on the host - std::string dump_graphs; // Dump optimized graph - std::string dump_sources; // Dump opencl sources - std::string dump_layers_path; // Enable dumping intermediate buffers and set the dest path - std::vector dump_layers; // Dump intermediate buffers of specified layers only - std::string dry_run_path; // Dry run and serialize execution graph into the specified path - int dump_layers_dst_only; // Dump only output of layers - int dump_layers_result; // Dump result layers - int dump_layers_input; // Dump input layers - int dump_layers_limit_batch; // Limit the size of batch to dump - int dump_layers_raw; // Dump raw data. - int dump_layers_binary; // Dump binary data. - int dump_memory_pool; // Dump memory pool status at each iteration - std::set dump_memory_pool_iters; // List of iteration's memory pool status - std::string dump_memory_pool_path; // Enable dumping memory pool status to csv file and set the dest path - int base_batch_for_memory_estimation; // Base batch size to be used in memory estimation - std::vector after_proc; // Start inference after the listed processes - int serialize_compile; // Serialize creating primitives and compiling kernels - std::vector forced_impl_types; // Force implementation type either ocl or onednn - int max_kernels_per_batch; // Maximum number of kernels in a batch during compiling kernels - int impls_cache_capacity; // The maximum number of entries in the kernel impl cache - int enable_sdpa; // Allows to control SDPA decomposition - int disable_async_compilation; // Disable async compilation - int disable_winograd_conv; // Disable Winograd conv - int disable_dynamic_impl; // Disable dynamic implementation - int disable_runtime_buffer_fusing; // Disable runtime buffer fusing - int disable_memory_reuse; // Disable memmory reuse among layers - int disable_build_time_weight_reorder_for_dynamic_nodes; // Disable build time weight reordering for dynamic nodes - int disable_runtime_skip_reorder; // Disable runtime skip reorder - int disable_primitive_fusing; // Disable primitive fusing - int disable_fake_alignment; // Disable fake alignment - int use_usm_host; // Set explicit usm_host usage for network input and output - std::vector dynamic_quantize_layers_without_onednn; // Specify Fully-connected layers which enable Dynamic quantization - int use_kv_cache_compression; // Enable KV-cache compression - int dynamic_quantize_group_size; // Enable Dynamic quantization for fully connected primitive by specified group size - int dynamic_quantize_asym; // Use asymmetric dynamic quantization - int disable_horizontal_fc_fusion; // Disable fc horizontal fusion - int disable_fc_swiglu_fusion; // Disable swiglu fusion to fc - std::set dump_iteration; // Dump n-th execution of network. - std::vector load_layers_raw_dump; // List of layers to load dumped raw binary and filenames - static const debug_configuration *get_instance(); - bool is_target_dump_prof_data_iteration(int64_t iteration) const; - std::vector get_filenames_for_matched_layer_loading_binaries(const std::string& id) const; - std::string get_name_for_dump(const std::string& file_name) const; - bool is_layer_for_dumping(const std::string& layerName, bool is_output = false, bool is_input = false) const; - bool is_target_iteration(int64_t iteration) const; - std::string get_matched_from_filelist(const std::vector& file_names, std::string pattern) const; - bool is_layer_name_matched(const std::string& layer_name, const std::string& pattern) const; - - struct memory_preallocation_params { - bool is_initialized = false; - - // Iterations mode preallocation - size_t next_iters_preallocation_count = 0; - size_t max_per_iter_size = 0; - size_t max_per_dim_diff = 0; - - // Percentage mode preallocation - float buffers_preallocation_ratio = 0.0f; - } mem_preallocation_params; - - struct dump_profiling_data_iter_params { - bool is_enabled = false; - int64_t start = 0; - int64_t end = 0; - } dump_prof_data_iter_params; - - static std::ostream* verbose_stream; - static const int DYNAMIC_QUANTIZE_GROUP_SIZE_NOT_SET = -2; -}; - -} // namespace cldnn diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp index 96e09605eaa998..fe7f8547d433d0 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp @@ -1,177 +1,54 @@ -// Copyright (C) 2022 Intel Corporation +// Copyright (C) 2024 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // #pragma once +#include "openvino/core/any.hpp" +#include "openvino/runtime/plugin_config.hpp" +#include "intel_gpu/runtime/device_info.hpp" #include "intel_gpu/runtime/internal_properties.hpp" -#include "intel_gpu/runtime/device.hpp" +#include "openvino/runtime/internal_properties.hpp" +#include namespace ov::intel_gpu { -enum class PropertyVisibility { - INTERNAL = 0, - PUBLIC = 1 -}; - -inline std::ostream& operator<<(std::ostream& os, const PropertyVisibility& visibility) { - switch (visibility) { - case PropertyVisibility::PUBLIC: os << "PUBLIC"; break; - case PropertyVisibility::INTERNAL: os << "INTERNAL"; break; - default: os << "UNKNOWN"; break; - } - - return os; -} - -class BaseValidator { -public: - using Ptr = std::shared_ptr; - virtual ~BaseValidator() = default; - virtual bool is_valid(const ov::Any& v) const = 0; -}; - -class FuncValidator : public BaseValidator { -public: -explicit FuncValidator(std::function func) : m_func(func) { } - bool is_valid(const ov::Any& v) const override { - return m_func(v); - } - -private: - std::function m_func; -}; - -// PropertyTypeValidator ensures that value can be converted to given property type -template -class PropertyTypeValidator : public BaseValidator { -public: - bool is_valid(const ov::Any& v) const override { - try { - v.as(); - return true; - } catch (ov::Exception&) { - return false; - } - } -}; - -class ExecutionConfig { -public: +struct ExecutionConfig : public ov::PluginConfig { ExecutionConfig(); ExecutionConfig(std::initializer_list values) : ExecutionConfig() { set_property(ov::AnyMap(values)); } explicit ExecutionConfig(const ov::AnyMap& properties) : ExecutionConfig() { set_property(properties); } explicit ExecutionConfig(const ov::AnyMap::value_type& property) : ExecutionConfig() { set_property(property); } - void set_default(); - void set_property(const ov::AnyMap& properties); - void set_user_property(const ov::AnyMap& properties); - Any get_property(const std::string& name) const; - bool is_set_by_user(const std::string& name) const; - bool is_supported(const std::string& name) const; - void register_property_impl(const std::pair& propertiy, PropertyVisibility visibility, BaseValidator::Ptr validator); - - template ::type = true> - void register_property_impl() { } - - template - void register_property_impl(const std::tuple, ValueT>& property, PropertyInitializer&&... properties) { - auto p = std::get<0>(property)(std::get<1>(property)); - auto v = std::dynamic_pointer_cast(std::make_shared>()); - register_property_impl(std::move(p), visibility, std::move(v)); - register_property_impl(properties...); - } + // Default operators copy config as is including finalized flag state + // In case if the config need updates after finalization clone() method shall be used as it resets finalized flag value. + // That's needed to avoid unexpected options update as we call finalization twice: in transformation pipeline + // and in cldnn::program c-tor (which is needed to handle unit tests mainly). So this second call may cause unwanted side effects + // if config is not marked as finalized, which could have easily happened if copy operator reset finalization flag + ExecutionConfig(const ExecutionConfig& other); + ExecutionConfig& operator=(const ExecutionConfig& other); + ExecutionConfig clone() const; - template - typename std::enable_if::value, void>::type - register_property_impl(const std::tuple, ValueT, ValidatorT>& property, PropertyInitializer&&... properties) { - auto p = std::get<0>(property)(std::get<1>(property)); - auto v = std::dynamic_pointer_cast(std::make_shared(std::get<2>(property))); - register_property_impl(std::move(p), visibility, std::move(v)); - register_property_impl(properties...); - } + void finalize(cldnn::engine& engine); + using ov::PluginConfig::finalize; - template - typename std::enable_if, ValidatorT>::value, void>::type - register_property_impl(const std::tuple, ValueT, ValidatorT>& property, PropertyInitializer&&... properties) { - auto p = std::get<0>(property)(std::get<1>(property)); - auto v = std::dynamic_pointer_cast(std::make_shared(std::get<2>(property))); - register_property_impl(std::move(p), visibility, std::move(v)); - register_property_impl(properties...); - } + const ov::AnyMap& get_user_properties() const { return m_user_properties; } - template - void register_property(PropertyInitializer&&... properties) { - register_property_impl(properties...); - } - - template - util::EnableIfAllStringAny set_property(Properties&&... properties) { - set_property(ov::AnyMap{std::forward(properties)...}); - } - - template - util::EnableIfAllStringAny set_user_property(Properties&&... properties) { - set_user_property(ov::AnyMap{std::forward(properties)...}); - } - - template - bool is_set_by_user(const ov::Property& property) const { - return is_set_by_user(property.name()); - } - - template - T get_property(const ov::Property& property) const { - return get_property(property.name()).template as(); - } +protected: + void finalize_impl(const IRemoteContext* context) override; + void apply_model_specific_options(const IRemoteContext* context, const ov::Model& model) override; + void apply_rt_info(const IRemoteContext* context, const ov::RTMap& rt_info, bool is_llm); void apply_user_properties(const cldnn::device_info& info); - - // Note that RT info property value has lower priority than values set by user via core.set_property or passed to compile_model call - // So this method should be called after setting all user properties, but before apply_user_properties() call. - void apply_rt_info(const cldnn::device_info& info, const ov::RTMap& rt_info, const bool is_llm); - - std::string to_string() const; - -protected: void apply_hints(const cldnn::device_info& info); void apply_execution_hints(const cldnn::device_info& info); void apply_performance_hints(const cldnn::device_info& info); void apply_priority_hints(const cldnn::device_info& info); - void apply_debug_options(const cldnn::device_info& info); - - template - void apply_rt_info_property(const ov::Property& property, const ov::RTMap& rt_info) { - if (!is_set_by_user(property)) { - auto rt_info_val = rt_info.find(property.name()); - if (rt_info_val != rt_info.end()) { - set_user_property(property(rt_info_val->second.template as())); - } - } - } -private: - ov::AnyMap internal_properties; - ov::AnyMap user_properties; - - std::map supported_properties; - std::map property_validators; - - bool finalized = false; + #include "intel_gpu/runtime/options.inl" }; } // namespace ov::intel_gpu namespace cldnn { using ov::intel_gpu::ExecutionConfig; -} // namespace cldnn +} diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp index 765333e971842e..5ef5f00b636f8d 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp @@ -4,6 +4,7 @@ #pragma once +#include "intel_gpu/runtime/shape_predictor.hpp" #include "openvino/runtime/properties.hpp" #include "openvino/runtime/intel_gpu/properties.hpp" @@ -35,6 +36,83 @@ inline std::ostream& operator<<(std::ostream& os, const QueueTypes& val) { return os; } +inline std::istream& operator>>(std::istream& is, QueueTypes& val) { + std::string str; + is >> str; + if (str == "in-order") { + val = QueueTypes::in_order; + } else if (str == "out-of-order") { + val = QueueTypes::out_of_order; + } else { + OPENVINO_THROW("Unsupported QueueTypes value: ", str); + } + return is; +} + +enum class DumpFormat : uint8_t { + binary = 0, + text = 1, + text_raw = 2, +}; + +inline std::ostream& operator<<(std::ostream& os, const DumpFormat& val) { + switch (val) { + case DumpFormat::binary: os << "binary"; break; + case DumpFormat::text: os << "text"; break; + case DumpFormat::text_raw: os << "text_raw"; break; + default: os << "unknown"; + } + + return os; +} + +inline std::istream& operator>>(std::istream& is, DumpFormat& val) { + std::string str; + is >> str; + if (str == "binary") { + val = DumpFormat::binary; + } else if (str == "text") { + val = DumpFormat::text; + } else if (str == "text_raw") { + val = DumpFormat::text_raw; + } else { + OPENVINO_THROW("Unsupported DumpFormat value: ", str); + } + return is; +} + +enum class DumpTensors : uint8_t { + all = 0, + in = 1, + out = 2, +}; + +inline std::ostream& operator<<(std::ostream& os, const DumpTensors& val) { + switch (val) { + case DumpTensors::all: os << "all"; break; + case DumpTensors::in: os << "in"; break; + case DumpTensors::out: os << "out"; break; + default: os << "unknown"; + } + + return os; +} + +inline std::istream& operator>>(std::istream& is, DumpTensors& val) { + std::string str; + is >> str; + if (str == "all") { + val = DumpTensors::all; + } else if (str == "in") { + val = DumpTensors::in; + } else if (str == "out") { + val = DumpTensors::out; + } else { + OPENVINO_THROW("Unsupported DumpTensors value: ", str); + } + return is; +} + /** * @brief Defines queue type that must be used for model execution */ @@ -45,17 +123,48 @@ static constexpr Property optimize_data{"GPU_OPTIM static constexpr Property allow_static_input_reorder{"GPU_ALLOW_STATIC_INPUT_REORDER"}; static constexpr Property partial_build_program{"GPU_PARTIAL_BUILD"}; static constexpr Property allow_new_shape_infer{"GPU_ALLOW_NEW_SHAPE_INFER"}; -static constexpr Property use_only_static_kernels_for_dynamic_shape{"GPU_USE_ONLY_STATIC_KERNELS_FOR_DYNAMIC_SHAPE"}; -static constexpr Property dump_graphs{"GPU_DUMP_GRAPHS"}; static constexpr Property, PropertyMutability::RW> custom_outputs{"GPU_CUSTOM_OUTPUTS"}; static constexpr Property force_implementations{"GPU_FORCE_IMPLEMENTATIONS"}; static constexpr Property config_file{"CONFIG_FILE"}; -static constexpr Property enable_lp_transformations{"LP_TRANSFORMS_MODE"}; -static constexpr Property max_dynamic_batch{"DYN_BATCH_LIMIT"}; -static constexpr Property nv12_two_inputs{"GPU_NV12_TWO_INPUTS"}; static constexpr Property buffers_preallocation_ratio{"GPU_BUFFERS_PREALLOCATION_RATIO"}; static constexpr Property max_kernels_per_batch{"GPU_MAX_KERNELS_PER_BATCH"}; -static constexpr Property use_onednn{"USE_ONEDNN"}; +static constexpr Property use_onednn{"GPU_USE_ONEDNN"}; + +static constexpr Property help{"HELP"}; +static constexpr Property verbose{"VERBOSE"}; +static constexpr Property verbose_color{"VERBOSE_COLOR"}; +static constexpr Property debug_config{"GPU_DEBUG_CONFIG"}; +static constexpr Property log_to_file{"GPU_LOG_TO_FILE"}; +static constexpr Property disable_usm{"GPU_DISABLE_USM"}; +static constexpr Property disable_onednn_post_ops_opt{"GPU_DISABLE_ONEDNN_POST_OPS_OPT"}; +static constexpr Property dump_graphs_path{"GPU_DUMP_GRAPHS_PATH"}; +static constexpr Property dump_profiling_data_path{"GPU_DUMP_PROFILING_DATA_PATH"}; +static constexpr Property dump_profiling_data_per_iter{"GPU_DUMP_PROFILING_DATA_PER_ITER"}; +static constexpr Property dump_sources_path{"GPU_DUMP_SOURCES_PATH"}; +static constexpr Property dump_tensors_path{"GPU_DUMP_TENSORS_PATH"}; +static constexpr Property dry_run_path{"GPU_DRY_RUN_PATH"}; +static constexpr Property dump_tensors{"GPU_DUMP_TENSORS"}; +static constexpr Property, ov::PropertyMutability::RW> dump_layer_names{"GPU_DUMP_LAYER_NAMES"}; +static constexpr Property dump_tensors_format{"GPU_DUMP_TENSORS_FORMAT"}; +static constexpr Property dump_memory_pool_path{"GPU_DUMP_MEMORY_POOL_PATH"}; +static constexpr Property dump_memory_pool{"GPU_DUMP_MEMORY_POOL"}; +static constexpr Property dump_batch_limit{"GPU_DUMP_BATCH_LIMIT"}; +static constexpr Property, ov::PropertyMutability::RW> dump_iterations{"GPU_DUMP_ITERATIONS"}; +static constexpr Property host_time_profiling{"GPU_HOST_TIME_PROFILING"}; +static constexpr Property impls_cache_capacity{"GPU_IMPLS_CACHE_CAPACITY"}; +static constexpr Property disable_async_compilation{"GPU_DISABLE_ASYNC_COMPILATION"}; +static constexpr Property disable_runtime_buffer_fusing{"GPU_DISABLE_RUNTIME_BUFFER_FUSING"}; +static constexpr Property disable_memory_reuse{"GPU_DISABLE_MEMORY_REUSE"}; +static constexpr Property disable_post_ops_fusions{"GPU_DISABLE_POST_OPS_FUSIONS"}; +static constexpr Property disable_horizontal_fc_fusion{"GPU_DISABLE_HORIZONTAL_FC_FUSION"}; +static constexpr Property disable_fc_swiglu_fusion{"GPU_DISABLE_FC_SWIGLU_FUSION"}; +static constexpr Property disable_fake_alignment{"GPU_DISABLE_FAKE_ALIGNMENT"}; +static constexpr Property disable_runtime_skip_reorder{"GPU_DISABLE_RUNTIME_SKIP_REORDER"}; +static constexpr Property usm_policy{"GPU_USM_POLICY"}; +static constexpr Property asym_dynamic_quantization{"GPU_ASYM_DYNAMIC_QUANTIZATION"}; +static constexpr Property shape_predictor_settings{"GPU_SHAPE_PREDICTOR_SETTINGS"}; +static constexpr Property, ov::PropertyMutability::RW> load_dump_raw_binary{"GPU_LOAD_DUMP_RAW_BINARY"}; +static constexpr Property, ov::PropertyMutability::RW> start_after_processes{"GPU_START_AFTER_PROCESSES"}; } // namespace ov::intel_gpu diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/memory_pool.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/memory_pool.hpp index 87e0b2990b7902..1d27eaf63efb86 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/memory_pool.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/memory_pool.hpp @@ -4,6 +4,7 @@ #pragma once +#include "intel_gpu/runtime/execution_config.hpp" #include "layout.hpp" #include "memory_caps.hpp" #include "utils.hpp" @@ -110,8 +111,6 @@ struct padded_pool_comparer { // - Improve memory consumption class memory_pool { - memory_pool(); - memory_ptr alloc_memory(const layout& layout, allocation_type type, bool reset = true); static bool has_conflict(const memory_set&, const std::unordered_set&, uint32_t network_id); @@ -119,9 +118,10 @@ class memory_pool { std::map, padded_pool_comparer> _padded_pool; std::multimap _no_reusable_pool; engine* _engine; + const ExecutionConfig& _config; public: - explicit memory_pool(engine& engine); + explicit memory_pool(engine& engine, const ExecutionConfig& config); ~memory_pool(); memory_ptr get_memory(const layout& layout, const primitive_id& id, diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl new file mode 100644 index 00000000000000..6e89da62ebf1e9 --- /dev/null +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl @@ -0,0 +1,82 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +// Namespace, property name, default value, [validator], description +OV_CONFIG_RELEASE_OPTION(ov, enable_profiling, false, "Enable profiling for the plugin") +OV_CONFIG_RELEASE_OPTION(ov::device, id, "0", "ID of the current device") +OV_CONFIG_RELEASE_OPTION(ov, cache_dir, "", "Directory where model cache can be stored. Caching is disabled if empty") +OV_CONFIG_RELEASE_OPTION(ov, num_streams, 1, "Number of streams to be used for inference") +OV_CONFIG_RELEASE_OPTION(ov, compilation_num_threads, std::max(1, static_cast(std::thread::hardware_concurrency())), "Max number of CPU threads used for model compilation for the stages that supports parallelism") +OV_CONFIG_RELEASE_OPTION(ov::hint, inference_precision, ov::element::f16, "Model floating-point inference precision. Supported values: { f16, f32, undefined }", [](ov::element::Type t) { return t == ov::element::f16 || t == ov::element::f32 || t == ov::element::undefined; }) +OV_CONFIG_RELEASE_OPTION(ov::hint, model_priority, ov::hint::Priority::MEDIUM, "High-level hint that defines the priority of the model. It may impact number of threads used for model compilton and inference as well as device queue settings") +OV_CONFIG_RELEASE_OPTION(ov::hint, performance_mode, ov::hint::PerformanceMode::LATENCY, "High-level hint that defines target model inference mode. It may impact number of streams, auto batching, etc") +OV_CONFIG_RELEASE_OPTION(ov::hint, execution_mode, ov::hint::ExecutionMode::PERFORMANCE, "High-level hint that defines the most important metric for the model. Performance mode allows unsafe optimizations that may reduce the model accuracy") +OV_CONFIG_RELEASE_OPTION(ov::hint, num_requests, 0, "Hint that provides number of requests populated by the application") +OV_CONFIG_RELEASE_OPTION(ov::hint, enable_cpu_pinning, false, "Controls if CPU threads are pinned to the cores or not") +OV_CONFIG_RELEASE_OPTION(ov::hint, enable_cpu_reservation, false, "Cpu Reservation means reserve cpus which will not be used by other plugin or compiled model") +OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, host_task_priority, ov::hint::Priority::MEDIUM, "Low-level hint that controls core types used for host tasks") +OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, queue_throttle, ov::intel_gpu::hint::ThrottleLevel::MEDIUM, "Low-level hint that controls the queue throttle level") +OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, queue_priority, ov::hint::Priority::MEDIUM, "Low-level hint that controls queue priority property") +OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, enable_sdpa_optimization, true, "Enable/Disable fused SDPA primitive execution") +OV_CONFIG_RELEASE_OPTION(ov::intel_gpu, enable_loop_unrolling, true, "Enable/Disable Loop/TensorIterator operation unrolling") +OV_CONFIG_RELEASE_OPTION(ov::intel_gpu, disable_winograd_convolution, false, "Enable/Disable winograd convolution implementation if available") +OV_CONFIG_RELEASE_OPTION(ov::internal, exclusive_async_requests, false, "") +OV_CONFIG_RELEASE_OPTION(ov::internal, query_model_ratio, 1.0f, "") +OV_CONFIG_RELEASE_OPTION(ov, cache_mode, ov::CacheMode::OPTIMIZE_SPEED, "Cache mode defines the trade-off between the model compilation time and the disk space required for the cache") +OV_CONFIG_RELEASE_OPTION(ov, cache_encryption_callbacks, ov::EncryptionCallbacks{}, "Callbacks used to encrypt/decrypt the model") +OV_CONFIG_RELEASE_OPTION(ov::hint, dynamic_quantization_group_size, 0, "") +OV_CONFIG_RELEASE_OPTION(ov::hint, kv_cache_precision, ov::element::undefined, "") +OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, enable_kernels_reuse, false, "") +OV_CONFIG_RELEASE_OPTION(ov, weights_path, "", "Path to the model weights file used for weightless caching") +OV_CONFIG_RELEASE_OPTION(ov::hint, activations_scale_factor, -1.0f, "Scalar floating point value that is used for runtime activation tensor scaling with fp16 inference precision") +OV_CONFIG_RELEASE_OPTION(ov::internal, enable_lp_transformations, false, "Enable/Disable Low precision transformations set") +OV_CONFIG_RELEASE_OPTION(ov::intel_gpu, config_file, "", "Path to custom layers config file") + +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, shape_predictor_settings, {10, 16 * 1024, 2, 1.1f}, "Preallocation settings") +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, queue_type, QueueTypes::out_of_order, "Type of the queue that must be used for model execution. May be in-order or out-of-order") +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, optimize_data, false, "Enable/Disable data flow optimizations for cldnn::program") +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, enable_memory_pool, true, "Enable/Disable memory pool usage") +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, allow_static_input_reorder, false, "Controls if weights tensors can be reordered during model compilation to more friendly layout for specific kernel") +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, custom_outputs, std::vector{}, "List of output primitive names") +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, force_implementations, ImplForcingMap{}, "Specifies the list of forced implementations for the primitives") +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, partial_build_program, false, "Early exit from model compilation process which allows faster execution graph dumping") +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, allow_new_shape_infer, false, "Switch between new and old shape inference flow. Shall be removed soon") +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, use_onednn, false, "Enable/Disable onednn for usage for particular model/platform") +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, max_kernels_per_batch, 8, "Controls how many kernels we combine into batch for more efficient ocl compilation") +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, impls_cache_capacity, 300, "Controls capacity of LRU implementations cache that is created for each program object for dynamic models") + +OV_CONFIG_DEBUG_GLOBAL_OPTION(ov::intel_gpu, help, false, "Print help message for all config options") +OV_CONFIG_DEBUG_GLOBAL_OPTION(ov::intel_gpu, verbose, 0, "Enable logging for debugging purposes. The higher value the more verbose output. 0 - Disabled, 4 - Maximum verbosity") +OV_CONFIG_DEBUG_GLOBAL_OPTION(ov::intel_gpu, verbose_color, true, "Enable coloring for verbose logs") +OV_CONFIG_DEBUG_GLOBAL_OPTION(ov::intel_gpu, disable_usm, false, "Disable USM memory allocations and use only cl_mem") +OV_CONFIG_DEBUG_GLOBAL_OPTION(ov::intel_gpu, usm_policy, 0, "0: default, 1: use usm_host, 2: do not use usm_host") +OV_CONFIG_DEBUG_GLOBAL_OPTION(ov::intel_gpu, dump_batch_limit, std::numeric_limits::max(), "Max number of batch elements to dump") +OV_CONFIG_DEBUG_GLOBAL_OPTION(ov::intel_gpu, dump_profiling_data_per_iter, false, "Save profiling data w/o per-iteration aggregation") +OV_CONFIG_DEBUG_GLOBAL_OPTION(ov::intel_gpu, log_to_file, "", "Save verbose log to specified file") +OV_CONFIG_DEBUG_GLOBAL_OPTION(ov::intel_gpu, debug_config, "", "Path to debug config in json format") + +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_onednn_post_ops_opt, false, "Disable optimization pass for onednn post-ops") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_profiling_data_path, "", "Save csv file with per-stage and per-primitive profiling data to specified folder") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_graphs_path, "", "Save intermediate graph representations during model compilation pipeline to specified folder") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_sources_path, "", "Save generated sources for each kernel to specified folder") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_tensors_path, "", "Save intermediate in/out tensors of each primitive to specified folder") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_tensors, ov::intel_gpu::DumpTensors::all, "Tensor types to dump. Supported values: all, inputs, outputs") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_tensors_format, ov::intel_gpu::DumpFormat::text, "Format of the tensors dump. Supported values: binary, text, text_raw") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_layer_names, std::vector{}, "Activate dump for specified layers only") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_memory_pool_path, "", "Save csv file with memory pool info to specified folder") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_memory_pool, false, "Enable verbose output for memory pool") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_iterations, std::set{}, "Space separated list of iterations where other dump options should be enabled") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, host_time_profiling, 0, "Measre and print host time spent from the beginning of the infer until all host work is done and plugin is ready to block thread on the final clFinish() call") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_async_compilation, false, "Disable feature that allows to asyncrhonously prepare static-shaped implementations for the primitives with shape-agnostic kernels selected during compilation") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_runtime_buffer_fusing, false, "Disable runtime inplace optimizations for operations like concat and crop") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_post_ops_fusions, false, "Disable fusions of operations as post-ops/fused-ops") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_horizontal_fc_fusion, false, "Disable pass which merges QKV projections into single MatMul") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_fc_swiglu_fusion, false, "Disable pass which merges FC and SwiGLU ops") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_fake_alignment, false, "Disable fake alignment feature which tries to keep gpu friendly memory alignment for arbitrary tensor shapes") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_memory_reuse, false, "Disable memory reuse for activation tensors") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_runtime_skip_reorder, false, "Disable skip reorder optimization applied in runtime") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, asym_dynamic_quantization, false, "Enforce asymmetric mode for dynamically quantized activations") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, load_dump_raw_binary, std::vector{}, "List of layers to load raw binary") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, start_after_processes, std::vector{}, "Start inference after specified list of processes") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dry_run_path, "", "Enables mode which partially compiles a model and stores runtime model into specified directory") diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/profiling.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/profiling.hpp index b3eca9a78fba89..7d0dba80e7c017 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/profiling.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/profiling.hpp @@ -9,6 +9,7 @@ #include #include #include +#include "intel_gpu/runtime/execution_config.hpp" #if defined(_WIN32) #ifndef NOMINMAX @@ -157,7 +158,7 @@ class profiled_stage { , _obj(obj) , _stage(stage) { GPU_DEBUG_IF(profiling_enabled) { - _per_iter_mode = cldnn::debug_configuration::get_instance()->dump_profiling_data_per_iter != 0; + _per_iter_mode = GPU_DEBUG_VALUE_OR(ov::intel_gpu::ExecutionConfig::get_dump_profiling_data_per_iter(), false); _start = std::chrono::high_resolution_clock::now(); } } diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/shape_predictor.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/shape_predictor.hpp index 469c676b4b0311..a246c6d1fdda8f 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/shape_predictor.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/shape_predictor.hpp @@ -14,23 +14,20 @@ class engine; struct ShapePredictor { public: - using Ptr = std::shared_ptr; - ShapePredictor(const engine* engine, float buffers_preallocation_ratio) - : _engine(engine) - , _buffers_preallocation_ratio(buffers_preallocation_ratio) { - static_assert(_max_deque_size >= 2, "[GPU] Deque is supposed to contain at least 2 elements for prediction"); - } + struct Settings { + // Iterations mode preallocation + size_t next_iters_preallocation_count = 10; + size_t max_per_iter_size = 16 * 1024; + size_t max_per_dim_diff = 2; - ShapePredictor(const engine* engine, - size_t next_iters_preallocation_count, - size_t max_per_iter_size, - size_t max_per_dim_diff, - float buffers_preallocation_ratio) + // Percentage mode preallocation + float buffers_preallocation_ratio = 1.1f; + }; + + using Ptr = std::shared_ptr; + ShapePredictor(const engine* engine, const Settings& settings) : _engine(engine) - , _next_iters_preallocation_count(next_iters_preallocation_count) - , _max_per_iter_size(max_per_iter_size) - , _max_per_dim_diff(max_per_dim_diff) - , _buffers_preallocation_ratio(buffers_preallocation_ratio) { + , _settings(settings) { static_assert(_max_deque_size >= 2, "[GPU] Deque is supposed to contain at least 2 elements for prediction"); } @@ -73,13 +70,7 @@ struct ShapePredictor { std::map> _shapes_info; const engine* _engine; - // Iterations mode preallocation - const size_t _next_iters_preallocation_count = 10; - const size_t _max_per_iter_size = 16 * 1024; // 16KB => maximum preallocation size is 16KB * 10iters = 160KB - const size_t _max_per_dim_diff = 2; - - // Percentage mode preallocation - const float _buffers_preallocation_ratio = 1.0f; + const Settings _settings; }; } // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/broadcast.cpp b/src/plugins/intel_gpu/src/graph/broadcast.cpp index 0f69379fa8e217..95d060ef0b760e 100644 --- a/src/plugins/intel_gpu/src/graph/broadcast.cpp +++ b/src/plugins/intel_gpu/src/graph/broadcast.cpp @@ -149,7 +149,7 @@ void broadcast_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) { + _node->get_program().get_config().get_enable_memory_pool()) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs[0] = input_memory_ptr(); diff --git a/src/plugins/intel_gpu/src/graph/crop.cpp b/src/plugins/intel_gpu/src/graph/crop.cpp index d9f6ebd8b71872..fec36fb9ff5c81 100644 --- a/src/plugins/intel_gpu/src/graph/crop.cpp +++ b/src/plugins/intel_gpu/src/graph/crop.cpp @@ -278,7 +278,7 @@ void crop_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) { + _node->get_program().get_config().get_enable_memory_pool()) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs[0] = _network.get_engine().reinterpret_buffer(input_memory(), _impl_params->get_output_layout()); diff --git a/src/plugins/intel_gpu/src/graph/debug_helper.cpp b/src/plugins/intel_gpu/src/graph/debug_helper.cpp index b69d10e137010e..fb1a618c27cf60 100644 --- a/src/plugins/intel_gpu/src/graph/debug_helper.cpp +++ b/src/plugins/intel_gpu/src/graph/debug_helper.cpp @@ -3,6 +3,9 @@ // #include "debug_helper.hpp" +#include +#include "intel_gpu/runtime/execution_config.hpp" +#include "intel_gpu/runtime/internal_properties.hpp" #include "openvino/util/file_util.hpp" #ifdef GPU_DEBUG_CONFIG @@ -44,8 +47,7 @@ template void dump(memory::ptr mem, stream& stream, std::ofstream& file_stream, bool dump_raw) { auto&& size = mem->get_layout().get_tensor(); - GPU_DEBUG_GET_INSTANCE(debug_config); - auto batch_size = std::max(std::min(debug_config->dump_layers_limit_batch, size.batch[0]), 1); + auto batch_size = std::max(std::min(ExecutionConfig::get_dump_batch_limit(), size.batch[0]), 1); tensor tmp_size(size); tmp_size.batch[0] = batch_size; if (tmp_size == size) { @@ -121,8 +123,7 @@ void unpack(cldnn::data_types type, uint8_t input, int8_t &v0, int8_t &v1) { void dump_i4u4(cldnn::data_types type, memory::ptr mem, stream& stream, std::ofstream& file_stream, bool dump_raw) { auto&& size = mem->get_layout().get_tensor(); - GPU_DEBUG_GET_INSTANCE(debug_config); - auto batch_size = std::max(std::min(debug_config->dump_layers_limit_batch, size.batch[0]), 1); + auto batch_size = std::max(std::min(ExecutionConfig::get_dump_batch_limit(), size.batch[0]), 1); tensor tmp_size(size); tmp_size.batch[0] = batch_size; if (tmp_size == size) { @@ -160,11 +161,16 @@ void dump_i4u4(cldnn::data_types type, memory::ptr mem, stream& stream, std::ofs file_stream << buffer.str(); } -void log_memory_to_file(memory::ptr mem, layout data_layout, stream& stream, std::string layerName, bool dump_raw) { - std::cout << "Dump " << (dump_raw ? "raw " : "") << layerName << std::endl; - GPU_DEBUG_GET_INSTANCE(debug_config); - std::string filename = debug_config->get_name_for_dump(layerName); - filename = debug_config->dump_layers_path + filename + ".txt"; +std::string get_name_for_dump(const std::string& file_name) { + std::string filename = file_name; + std::replace(filename.begin(), filename.end(), '\\', '_'); + std::replace(filename.begin(), filename.end(), '/', '_'); + std::replace(filename.begin(), filename.end(), ' ', '_'); + std::replace(filename.begin(), filename.end(), ':', '_'); + return filename; +} + +void log_memory_to_file(memory::ptr mem, layout data_layout, stream& stream, std::string filename, bool dump_raw) { std::ofstream file_stream(filename); if (!mem) { file_stream << "Empty" << std::endl; @@ -195,9 +201,7 @@ void log_memory_to_file(memory::ptr mem, layout data_layout, stream& stream, std std::cout << "Dump for this data type is not supported: " << dt_to_str(mem_dt) << std::endl; } -} // namespace - -static std::string get_file_path_for_binary_dump(cldnn::layout layout, std::string name) { +std::string get_file_path_for_binary_dump(cldnn::layout layout, const std::string& name, const std::string& dump_layers_path) { std::string filename; std::string data_type = ov::element::Type(layout.data_type).get_type_name(); std::string format = layout.format.to_string(); @@ -207,64 +211,148 @@ static std::string get_file_path_for_binary_dump(cldnn::layout layout, std::stri tensor += ("_" + to_string(dims[r])); } -#ifdef GPU_DEBUG_CONFIG - GPU_DEBUG_GET_INSTANCE(debug_config); - std::string layer_name = debug_config->get_name_for_dump(name); - filename = debug_config->dump_layers_path + layer_name - + "__" + data_type + "_" + tensor + "__" + format + ".bin"; -#endif + std::string layer_name = get_name_for_dump(name); + filename = dump_layers_path + layer_name + "__" + data_type + "_" + tensor + "__" + format + ".bin"; return filename; } +bool is_target_iteration(int64_t iteration, const std::set dump_iteration) { + if (iteration < 0) + return true; + + if (dump_iteration.empty()) + return true; + + if (dump_iteration.find(iteration) == std::end(dump_iteration)) + return false; + + return true; +} + +std::string get_matched_from_filelist(const std::vector& file_names, std::string pattern) { + for (const auto& file : file_names) { + auto found = file.find(pattern); + if (found != std::string::npos) { + return file; + } + } + + return std::string(); +} + +bool is_layer_name_matched(const std::string& layer_name, const std::string& pattern) { + auto upper_layer_name = std::string(layer_name.length(), '\0'); + std::transform(layer_name.begin(), layer_name.end(), upper_layer_name.begin(), ::toupper); + auto upper_pattern = std::string(pattern.length(), '\0'); + std::transform(pattern.begin(), pattern.end(), upper_pattern.begin(), ::toupper); + + // Check pattern from exec_graph + size_t pos = upper_layer_name.find(':'); + auto upper_exec_graph_name = upper_layer_name.substr(pos + 1, upper_layer_name.size()); + if (upper_exec_graph_name.compare(upper_pattern) == 0) { + return true; + } + + // Check pattern with regular expression + std::regex re(upper_pattern); + return std::regex_match(upper_layer_name, re); +} + +bool is_layer_for_dumping(const ExecutionConfig& config, const std::string& layer_name) { + const auto& dump_layers = config.get_dump_layer_names(); + if (dump_layers.empty()) + return true; + + auto iter = std::find_if(dump_layers.begin(), dump_layers.end(), [&](const std::string& dl){ + return is_layer_name_matched(layer_name, dl); + }); + return (iter != dump_layers.end()); +} + +std::vector get_filenames_for_matched_layer_loading_binaries(const ExecutionConfig& config, const std::string& id) { + std::vector file_names; + if (config.get_load_dump_raw_binary().empty()) + return file_names; + + for (const auto& load_layer : config.get_load_dump_raw_binary()) { + size_t file = load_layer.rfind(":"); + if (file != std::string::npos) { + if (id == load_layer.substr(0, file)) { + auto file_name_str = load_layer.substr(file + 1); + size_t head = 0; + size_t found = 0; + do { + found = file_name_str.find(",", head); + if (found != std::string::npos) + file_names.push_back(file_name_str.substr(head, (found - head))); + else + file_names.push_back(file_name_str.substr(head)); + + head = found+1; + GPU_DEBUG_LOG << " Layer name loading raw dump : " << load_layer.substr(0, file) << " / the dump file : " + << file_names.back() << std::endl; + } while (found != std::string::npos); + + return file_names; + } + } + } + + return file_names; +} + +} // namespace + NodeDebugHelper::NodeDebugHelper(const primitive_inst& inst) : m_inst(inst) , m_stream(inst.get_network().get_stream()) , m_network(inst.get_network()) , m_program(inst.get_network().get_program().get()) , m_iter(m_network.iteration) { + const auto& config = m_network.get_config(); // Load binary dump for input layers - if (!debug_config->load_layers_raw_dump.empty()) { + if (!config.get_load_dump_raw_binary().empty()) { const std::string layer_name = m_inst.id(); - auto files = debug_config->get_filenames_for_matched_layer_loading_binaries(layer_name); + auto files = get_filenames_for_matched_layer_loading_binaries(config, layer_name); if (!files.empty()) { if (m_inst.is_input()) { // Loading binary dumps for output tensors of input-layers : only one output exists or index(dstN) exists - auto dump_file = debug_config->get_matched_from_filelist(files, "_dst0__"); + auto dump_file = get_matched_from_filelist(files, "_dst0__"); OPENVINO_ASSERT((files.size() == 1 || dump_file.length() != 0), "Unexpected binary dump for input layer"); - OPENVINO_ASSERT(files.size() == m_inst.outputs_memory_count(), "Mis-match dump file count"); + OPENVINO_ASSERT(files.size() == m_inst.outputs_memory_count(), "Mismatch dump file count"); for (size_t i = 0; i < m_inst.outputs_memory_count(); i++) { auto dump_file = files[0]; if (files.size() > 1 || m_inst.outputs_memory_count() != 1) { std::string pattern = "_dst" + std::to_string(i) + "__"; - dump_file = debug_config->get_matched_from_filelist(files, pattern); + dump_file = get_matched_from_filelist(files, pattern); } OPENVINO_ASSERT((dump_file.length() > 0), "Could not find expected pattern '_dst[N]__' for binary dump"); GPU_DEBUG_COUT << " Load binary dump : " << dump_file << " for " << layer_name << std::endl; std::vector bin = ov::util::load_binary(dump_file); - OPENVINO_ASSERT(!bin.empty(), "Failure loading binary from OV_GPU_LoadDumpRawBinary : " + dump_file); + OPENVINO_ASSERT(!bin.empty(), "Failure loading binary from OV_LOAD_DUMP_RAW_BINARY : " + dump_file); auto output_mem = m_inst.output_memory_ptr(i); - OPENVINO_ASSERT(output_mem->size() == bin.size(), "memory size mis-match for OV_GPU_LoadDumpRawBinary : " + layer_name + OPENVINO_ASSERT(output_mem->size() == bin.size(), "memory size mis-match for OV_LOAD_DUMP_RAW_BINARY : " + layer_name + "\n Expected size : " + to_string(output_mem->size()) + ", Binary : " + to_string(bin.size())); output_mem->copy_from(m_stream, static_cast(&bin[0]), true); } } else { - auto check_dst = debug_config->get_matched_from_filelist(files, "_dst0__"); + auto check_dst = get_matched_from_filelist(files, "_dst0__"); OPENVINO_ASSERT(check_dst.length() == 0, "Expected to load binaries for inputs of " + layer_name); // Loading input tensors for any layer - auto dump_file = debug_config->get_matched_from_filelist(files, "_src0__"); + auto dump_file = get_matched_from_filelist(files, "_src0__"); OPENVINO_ASSERT(dump_file.length() != 0, "Could not find expected pattern '_src[N]__' for binary dump input : " + layer_name); for (size_t i = 0; i < m_inst.dependencies().size(); i++) { auto dump_file = files[0]; if (files.size() > 1 || m_inst.dependencies().size() != 1) { std::string pattern = "_src" + std::to_string(i) + "__"; - dump_file = debug_config->get_matched_from_filelist(files, pattern); + dump_file = get_matched_from_filelist(files, pattern); } if (dump_file.length() == 0) { GPU_DEBUG_COUT << " Skip loading for input(" << i << ") of " << layer_name << std::endl; @@ -274,11 +362,11 @@ NodeDebugHelper::NodeDebugHelper(const primitive_inst& inst) GPU_DEBUG_COUT << " Load binary dump : " << dump_file << " for input(" << i << ") of " << layer_name << std::endl; std::vector bin = ov::util::load_binary(dump_file); - OPENVINO_ASSERT(!bin.empty(), "Failure loading binary from OV_GPU_LoadDumpRawBinary : " + dump_file); + OPENVINO_ASSERT(!bin.empty(), "Failure loading binary from OV_LOAD_DUMP_RAW_BINARY : " + dump_file); auto input_mem = m_inst.dep_memory_ptr(i); if (input_mem->size() != bin.size()) { - std::cout << "WARNING: memory size mis-match for OV_GPU_LoadDumpRawBinary : " + layer_name + std::cout << "WARNING: memory size mis-match for OV_LOAD_DUMP_RAW_BINARY : " + layer_name << " " << input_mem->size() << " / " << bin.size() << std::endl; bin.resize(input_mem->size()); } @@ -290,12 +378,12 @@ NodeDebugHelper::NodeDebugHelper(const primitive_inst& inst) } // Dump input buffers of 'inst' - if (debug_config->dump_layers_path.length() > 0) { - const std::string layer_name = inst.id(); + if (config.get_dump_tensors_path().length() > 0) { + const std::string& layer_name = inst.id(); - if (debug_config->is_target_iteration(m_iter) && - debug_config->dump_layers_dst_only == 0 && debug_config->is_layer_for_dumping(layer_name)) { - std::string debug_str_for_bin_load = " Command for loading : OV_GPU_LoadDumpRawBinary=\"" + layer_name + ":"; + if (is_target_iteration(m_iter, config.get_dump_iterations()) && + config.get_dump_tensors() != ov::intel_gpu::DumpTensors::out && is_layer_for_dumping(config, layer_name)) { + std::string debug_str_for_bin_load = " Command for loading : OV_LOAD_DUMP_RAW_BINARY=\"" + layer_name + ":"; for (size_t i = 0; i < m_inst.dependencies().size(); i++) { std::string name = get_file_prefix() + "_src" + std::to_string(i); auto input_mem = m_inst.dep_memory_ptr(i); @@ -306,24 +394,27 @@ NodeDebugHelper::NodeDebugHelper(const primitive_inst& inst) auto dep = m_inst.dependencies().at(i); auto input_layout = dep.first->get_output_layout(dep.second); - GPU_DEBUG_IF(debug_config->dump_layers_binary) { + if (config.get_dump_tensors_format() == ov::intel_gpu::DumpFormat::binary) { // Binary dump : raw - auto filename = get_file_path_for_binary_dump(input_layout, name); + auto filename = get_file_path_for_binary_dump(input_layout, name, config.get_dump_tensors_path()); mem_lock lock(input_mem, m_stream); ov::util::save_binary(filename, lock.data(), input_mem->size()); - GPU_DEBUG_COUT << " Dump layer src : " << layer_name << " to " << filename << std::endl; + GPU_DEBUG_COUT << " Dump layer src : " << layer_name << " to " << filename << std::endl; debug_str_for_bin_load += (filename + ","); } else { + const bool dump_raw = config.get_dump_tensors_format() == ov::intel_gpu::DumpFormat::text_raw; + GPU_DEBUG_COUT << " Dump " << (dump_raw ? "raw " : "") << name << std::endl; + auto filename = config.get_dump_tensors_path() + get_name_for_dump(name) + ".txt"; log_memory_to_file(input_mem, input_layout, m_stream, - name, - debug_config->dump_layers_raw); + filename, + dump_raw); } } - if (debug_config->dump_layers_binary && !inst.is_input()) { + if (config.get_dump_tensors_format() == ov::intel_gpu::DumpFormat::binary && !inst.is_input()) { debug_str_for_bin_load[debug_str_for_bin_load.size()-1] = '\"'; GPU_DEBUG_COUT << debug_str_for_bin_load << std::endl; } @@ -333,14 +424,16 @@ NodeDebugHelper::NodeDebugHelper(const primitive_inst& inst) NodeDebugHelper::~NodeDebugHelper() { + const auto& config = m_network.get_config(); // Dump output buffers of 'inst' - if (debug_config->dump_layers_path.length() > 0) { + if (config.get_dump_tensors_path().length() > 0) { m_stream.finish(); const std::string layer_name = m_inst.id(); - GPU_DEBUG_IF(debug_config->is_target_iteration(m_iter) && - debug_config->is_layer_for_dumping(layer_name, m_inst.is_output(), m_inst.is_input())) { - std::string debug_str_for_bin_load = " Command for loading : OV_GPU_LoadDumpRawBinary=\"" + if (is_target_iteration(m_iter, config.get_dump_iterations()) && + config.get_dump_tensors() != ov::intel_gpu::DumpTensors::in && + is_layer_for_dumping(config, layer_name)) { + std::string debug_str_for_bin_load = " Command for loading : OV_LOAD_DUMP_RAW_BINARY=\"" + layer_name + ":"; for (size_t i = 0; i < m_inst.outputs_memory_count(); i++) { std::string name = get_file_prefix() + "_dst" + std::to_string(i); @@ -350,22 +443,29 @@ NodeDebugHelper::~NodeDebugHelper() { continue; } - GPU_DEBUG_IF(debug_config->dump_layers_binary) { + if (config.get_dump_tensors_format() == ov::intel_gpu::DumpFormat::binary) { // Binary dump : raw auto output_layout = m_inst.get_output_layout(i); - auto filename = get_file_path_for_binary_dump(output_layout, name); + auto filename = get_file_path_for_binary_dump(output_layout, name, config.get_dump_tensors_path()); mem_lock lock(output_mem, m_stream); ov::util::save_binary(filename, lock.data(), output_mem->size()); GPU_DEBUG_COUT << " Dump layer dst : " << layer_name << " to " << filename << std::endl; debug_str_for_bin_load += (filename + ","); } else { + const bool dump_raw = config.get_dump_tensors_format() == ov::intel_gpu::DumpFormat::text_raw; + GPU_DEBUG_COUT << " Dump " << (dump_raw ? "raw " : "") << name << std::endl; + auto filename = config.get_dump_tensors_path() + get_name_for_dump(name) + ".txt"; // Text dump - log_memory_to_file(output_mem, m_inst.get_output_layout(i), m_stream, name, debug_config->dump_layers_raw); + log_memory_to_file(output_mem, + m_inst.get_output_layout(i), + m_stream, + filename, + dump_raw); } } - GPU_DEBUG_IF(debug_config->dump_layers_binary && m_inst.is_input()) { + if (config.get_dump_tensors_format() == ov::intel_gpu::DumpFormat::binary && m_inst.is_input()) { debug_str_for_bin_load[debug_str_for_bin_load.size()-1] = '\"'; GPU_DEBUG_COUT << debug_str_for_bin_load << std::endl;; } @@ -377,13 +477,14 @@ NetworkDebugHelper::NetworkDebugHelper(const network& net) : m_network(net) , m_iter(net.iteration) { auto net_id = m_network.get_id(); - GPU_DEBUG_IF(debug_config->dump_memory_pool > 0) { - auto& iters = debug_config->dump_memory_pool_iters; + const auto& config = m_network.get_config(); + if (config.get_dump_memory_pool()) { + auto& iters = config.get_dump_iterations(); if (iters.empty() || iters.find(m_iter) != iters.end()) { GPU_DEBUG_COUT << "============================================================================" << std::endl; GPU_DEBUG_COUT << "Start network execution (net_id : " << net_id << ", iter :" << m_iter << ")" << std::endl; if (m_iter == 0 && net_id > 0) { - dump_memory_pool(debug_config->dump_memory_pool_path, m_iter); + dump_memory_pool(config.get_dump_memory_pool_path(), m_iter); GPU_DEBUG_COUT << "============================================================================" << std::endl; } } @@ -391,38 +492,14 @@ NetworkDebugHelper::NetworkDebugHelper(const network& net) GPU_DEBUG_TRACE << "============================================================================" << std::endl; GPU_DEBUG_TRACE << "Start network execution (net_id : " << net_id << ", iter :" << m_iter << ")" << std::endl; } - - if (debug_config->list_layers == 1) { - for (auto& inst : m_network._exec_order) { - GPU_DEBUG_COUT << inst->id() << std::endl; - if (inst->get_node().is_type()) { - auto& loop_node = inst->get_node().as(); - for (auto& prim : loop_node.get_body_program()->get_processing_order()) { - GPU_DEBUG_COUT << "\t" << prim->id() << std::endl; - } - } else if (inst->get_node().is_type()) { - auto& cond_node = inst->get_node().as(); - GPU_DEBUG_COUT << "* Branch_True" << std::endl; - for (auto& prim : cond_node.get_branch_true().inner_program->get_processing_order()) { - GPU_DEBUG_COUT << "\t" << prim->id() << std::endl; - } - GPU_DEBUG_COUT << "* Branch_False" << std::endl; - for (auto& prim : cond_node.get_branch_false().inner_program->get_processing_order()) { - GPU_DEBUG_COUT << "\t" << prim->id() << std::endl; - } - } - } - - if (!m_network.is_internal()) - exit(0); - } } NetworkDebugHelper::~NetworkDebugHelper() { auto prog = m_network.get_program().get(); auto net_id = m_network.get_id(); + const auto& config = prog->get_config(); // print '-data_shape' option for benchmark_app - if (debug_config->print_input_data_shapes == 1) { + if (config.get_verbose() >= 4) { std::stringstream data_shape_str; auto add_string = [&data_shape_str](std::string str) { data_shape_str << ((data_shape_str.rdbuf()->in_avail() == 0) ? " -data_shape " : ",") << str; @@ -443,7 +520,7 @@ NetworkDebugHelper::~NetworkDebugHelper() { << data_shape_str.str() << std::endl; } - if (!debug_config->dump_graphs.empty() && debug_config->is_target_iteration(m_iter)) { + if (!config.get_dump_graphs_path().empty() && is_target_iteration(m_iter, config.get_dump_iterations())) { auto get_fixed_str = [](int value, int length = 2) -> std::string { std::ostringstream ss; ss << std::setw(length) << std::setfill('0') << std::to_string(value); @@ -459,10 +536,10 @@ NetworkDebugHelper::~NetworkDebugHelper() { } } - if (debug_config->dump_memory_pool > 0) { - auto& iters = debug_config->dump_memory_pool_iters; + if (config.get_dump_memory_pool()) { + auto& iters = config.get_dump_iterations(); if (iters.empty() || iters.find(m_iter) != iters.end()) { - dump_memory_pool(debug_config->dump_memory_pool_path, m_iter); + dump_memory_pool(config.get_dump_memory_pool_path(), m_iter); GPU_DEBUG_COUT << "============================================================================" << std::endl; } } diff --git a/src/plugins/intel_gpu/src/graph/debug_helper.hpp b/src/plugins/intel_gpu/src/graph/debug_helper.hpp index 61572433cea494..e2137733cf73fc 100644 --- a/src/plugins/intel_gpu/src/graph/debug_helper.hpp +++ b/src/plugins/intel_gpu/src/graph/debug_helper.hpp @@ -39,8 +39,6 @@ class NodeDebugHelper { const network& m_network; const program* m_program; const size_t m_iter; - - const debug_configuration* debug_config = cldnn ::debug_configuration ::get_instance(); }; class NetworkDebugHelper { @@ -52,8 +50,6 @@ class NetworkDebugHelper { void dump_memory_pool(std::string dump_path, int64_t curr_iter) const; const network& m_network; const size_t m_iter; - - const debug_configuration* debug_config = cldnn ::debug_configuration ::get_instance(); }; #define NETWORK_DEBUG(net) NetworkDebugHelper __network_debug_helper(net) diff --git a/src/plugins/intel_gpu/src/graph/eltwise.cpp b/src/plugins/intel_gpu/src/graph/eltwise.cpp index 7805cb575aed9a..581f3f08dc120d 100644 --- a/src/plugins/intel_gpu/src/graph/eltwise.cpp +++ b/src/plugins/intel_gpu/src/graph/eltwise.cpp @@ -393,7 +393,7 @@ eltwise_inst::typed_primitive_inst(network& network, eltwise_node const& node) : ""); } } else { - bool use_new_shape_infer = network.get_config().get_property(ov::intel_gpu::allow_new_shape_infer); + bool use_new_shape_infer = network.get_config().get_allow_new_shape_infer(); auto input0_pshape = node.get_input_pshape(0); for (size_t i = 1; i < inputs_count; ++i) { diff --git a/src/plugins/intel_gpu/src/graph/fully_connected.cpp b/src/plugins/intel_gpu/src/graph/fully_connected.cpp index d8324cfb2e047a..d9f88477a124d0 100644 --- a/src/plugins/intel_gpu/src/graph/fully_connected.cpp +++ b/src/plugins/intel_gpu/src/graph/fully_connected.cpp @@ -250,8 +250,7 @@ kernel_impl_params fully_connected_inst::get_fake_aligned_params(kernel_impl_par } } - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_fake_alignment) { + GPU_DEBUG_IF(orig_impl_param.get_program().get_config().get_disable_fake_alignment()) { can_apply_fake_alignment = false; } diff --git a/src/plugins/intel_gpu/src/graph/gather.cpp b/src/plugins/intel_gpu/src/graph/gather.cpp index 2020f10015f916..7d503faf8a2255 100644 --- a/src/plugins/intel_gpu/src/graph/gather.cpp +++ b/src/plugins/intel_gpu/src/graph/gather.cpp @@ -150,7 +150,7 @@ void gather_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) { + _node->get_program().get_config().get_enable_memory_pool()) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs[0] = input_memory_ptr(); diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp index 333afe18775e0b..cffb4457000380 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp @@ -161,7 +161,7 @@ bool add_required_reorders::test_format(cldnn::program_node& node, format reques } void add_required_reorders::run(program& p) { - bool optimize_data = p.get_config().get_property(ov::intel_gpu::optimize_data); + bool optimize_data = p.get_config().get_optimize_data(); auto usr_itr = p.get_processing_order().begin(); while (usr_itr != p.get_processing_order().end()) { auto& usr = *usr_itr++; diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/build_implementations.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/build_implementations.cpp index 4c1b1008434144..ef4300c33bfea1 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/build_implementations.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/build_implementations.cpp @@ -11,7 +11,7 @@ using namespace cldnn; void build_implementations::run(program& p) { OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, "pass::build_implementations"); - if (p.get_config().get_property(ov::intel_gpu::partial_build_program)) { + if (p.get_config().get_partial_build_program()) { return; } diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/graph_initializations.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/graph_initializations.cpp index eec55260e2ea4b..6c38bce8dd9e31 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/graph_initializations.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/graph_initializations.cpp @@ -17,7 +17,7 @@ using namespace cldnn; namespace cldnn { void graph_initializations::set_outputs(program& p) { - auto custom_outputs = p.get_config().get_property(ov::intel_gpu::custom_outputs); + auto custom_outputs = p.get_config().get_custom_outputs(); if (!custom_outputs.empty()) { for (auto const& output : custom_outputs) { OPENVINO_ASSERT(p.has_node(output), "not found custom output node in current cldnn::program: ", output); @@ -37,7 +37,7 @@ void graph_initializations::set_outputs(program& p) { void graph_initializations::run(program& p) { set_outputs(p); - auto forcing_map = p.get_config().get_property(ov::intel_gpu::force_implementations); + auto forcing_map = p.get_config().get_force_implementations(); for (auto& kv : forcing_map) { if (p.has_node(kv.first)) { p.get_node(kv.first).set_forced_impl_type(kv.second.impl_type); diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/post_optimize_weights.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/post_optimize_weights.cpp index 4d21869dfa3953..2786a9e8e85b99 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/post_optimize_weights.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/post_optimize_weights.cpp @@ -43,10 +43,6 @@ void post_optimize_weights::optimize_weights(T& node, program& p) { return; if (impl->is_dynamic()) { - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_build_time_weight_reorder_for_dynamic_nodes) { - return; - } // TODO: To relax current limitation w.r.t the future optimization of weight reorder process // In dynamic shape, selected weight format can change in runtime. However reordering blocked format to blocked format is not fully verified yet. // So we need to enable other primitives such as convolution with verifying reorder b/w the possible layouts diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp index 03e4af4d16359b..1c79ab27101808 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp @@ -80,8 +80,7 @@ bool concat_in_place_optimization::match(const program_node& concat_node, if (concat_node.is_output() || concat_params.fused_desc.size() > 0 || concat_node.is_in_shape_of_subgraph()) return false; bool do_runtime_buffer_fusing = true; - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_runtime_buffer_fusing) { + GPU_DEBUG_IF(concat_node.get_config().get_disable_runtime_buffer_fusing()) { do_runtime_buffer_fusing = false; } @@ -522,8 +521,7 @@ bool crop_in_place_optimization::match(const program_node& node, return false; if (node.get_users().size() > 0) { - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_runtime_buffer_fusing && node.is_dynamic()) { + GPU_DEBUG_IF(node.get_config().get_disable_runtime_buffer_fusing() && node.is_dynamic()) { return false; } diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp index ce5333f95a1b59..622b7cff4101ad 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp @@ -56,6 +56,9 @@ using namespace cldnn; void prepare_primitive_fusing::run(program& p) { + GPU_DEBUG_IF(p.get_config().get_disable_post_ops_fusions()) + return; + fuse_reorders(p); remove_redundant_reshape(p); fuse_swiglu(p); @@ -165,10 +168,7 @@ void prepare_primitive_fusing::fuse_reorders(program &p) { } void prepare_primitive_fusing::fuse_swiglu(program &p) { - GPU_DEBUG_GET_INSTANCE(debug_config); - bool disable_fc_swiglu_fusion = false; - GPU_DEBUG_IF(debug_config->disable_fc_swiglu_fusion == 1) - disable_fc_swiglu_fusion = true; + bool disable_fc_swiglu_fusion = GPU_DEBUG_VALUE_OR(p.get_config().get_disable_fc_swiglu_fusion(), false); // Apply only for high performant GPU if (disable_fc_swiglu_fusion || p.get_engine().get_device_info().execution_units_count < 128) return; diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing_through.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing_through.cpp index f63f1bf4efbe21..33320126a9d910 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing_through.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing_through.cpp @@ -16,6 +16,9 @@ using namespace cldnn; void prepare_primitive_fusing_through::run(program& p) { + GPU_DEBUG_IF(p.get_config().get_disable_post_ops_fusions()) + return; + auto try_fuse_through = [&](program_node& node) -> std::vector { // This function tries to fuse peer_node to first non reorder or reshape previous primitive. // It returns chain of primitives (reshapes and reorders) including potential fused_node (e.g. Conv, FC, etc) diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp index 34314155837197..435b67cafaf7e5 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "intel_gpu/runtime/internal_properties.hpp" #include "pass_manager.h" #include "program_node.h" #include "intel_gpu/runtime/engine.hpp" @@ -132,9 +133,10 @@ propagate_constants::calculate(engine& engine, if (!has_non_trivial_constants) return {}; - ExecutionConfig cf_config = config; + ExecutionConfig cf_config = config.clone(); cf_config.set_property(ov::intel_gpu::optimize_data(false)); cf_config.set_property(ov::intel_gpu::custom_outputs(const_outputs)); + cf_config.finalize(engine); network::ptr net = network::build_network(engine, nodes, cf_config, task_executor, true); std::map weightless_cache_map; for (auto& cin : const_inputs) { diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp index 7b3cbdabe639a0..088afd84f5ff6a 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp @@ -679,8 +679,6 @@ void insert_reorders(program& p, const std::map& fm } // namespace void reorder_inputs::run(program& p, reorder_factory& rf) { - GPU_DEBUG_GET_INSTANCE(debug_config); - auto& lo = p.get_layout_optimizer(); auto fmt_map = get_preferred_formats(p, lo); @@ -704,7 +702,7 @@ void reorder_inputs::run(program& p, reorder_factory& rf) { GPU_DEBUG_LOG_PASS << " " << node_ptr->id() << " " << fmt_to_str(fmt) << std::endl; } - GPU_DEBUG_IF(debug_config->verbose >= 2) { + GPU_DEBUG_IF(p.get_config().get_verbose() >= 2) { reorder_cnt total_reorder_count = std::accumulate(p.get_processing_order().begin(), p.get_processing_order().end(), diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp index a4e6d989543837..8b3a73c74f3aa6 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp @@ -74,7 +74,7 @@ void select_preferred_formats::run(program& p) { } #endif // ENABLE_ONEDNN_FOR_GPU - auto forcing_map = p.get_config().get_property(ov::intel_gpu::force_implementations); + auto forcing_map = p.get_config().get_force_implementations(); for (auto n : p.get_processing_order()) { n->recalc_output_layout(); @@ -121,7 +121,7 @@ void select_preferred_formats::run(program& p) { optimize_conv_permute(*n); } } catch (std::exception& exception) { - GPU_DEBUG_INFO << "WARNING(select_preferred_formats): " << exception.what() << std::endl; + GPU_DEBUG_LOG << "WARNING(select_preferred_formats): " << exception.what() << std::endl; } print_selected_formats(*n); } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp index d7813c03d80f99..b33a391dadea4c 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp @@ -203,7 +203,7 @@ struct fully_connected_impl : typed_primitive_impl_ocl { params.quantization = kernel_selector::QuantizationType::NONE; } - params.dynamic_quantization_group_size = impl_param.get_program().get_config().get_property(ov::hint::dynamic_quantization_group_size); + params.dynamic_quantization_group_size = impl_param.get_program().get_config().get_dynamic_quantization_group_size(); return params; } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp index 624db86e38342c..9b73d9711a5a32 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp @@ -1204,13 +1204,13 @@ void set_params(const kernel_impl_params& param_info, kernel_selector::params& p params.engineInfo.ip_version = device_info.ip_version; params.engineInfo.arch = kernel_selector::gpu_arch(static_cast::type>(device_info.arch)); - auto impl_forcing = config.get_property(ov::intel_gpu::force_implementations); + auto impl_forcing = config.get_force_implementations(); if (impl_forcing.count(param_info.desc->id) != 0) { params.forceImplementation = impl_forcing.at(param_info.desc->id).kernel_name; } - params.allowStaticInputReordering = config.get_property(ov::intel_gpu::optimize_data) || config.get_property(ov::intel_gpu::allow_static_input_reorder); + params.allowStaticInputReordering = config.get_optimize_data() || config.get_allow_static_input_reorder(); params.allowInputReordering = false; } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp index cf7ac0f7b3f5a3..195a2443016dfd 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp @@ -111,7 +111,7 @@ namespace cldnn { std::mutex kernels_cache::_mutex; std::string kernels_cache::get_cache_path() const { - auto path = _config.get_property(ov::cache_dir); + auto path = _config.get_cache_dir(); if (path.empty()) { return {}; } @@ -123,20 +123,12 @@ std::string kernels_cache::get_cache_path() const { } bool kernels_cache::is_cache_enabled() const { - if (!_config.get_property(ov::intel_gpu::allow_new_shape_infer) && - (_config.get_property(ov::cache_mode) == ov::CacheMode::OPTIMIZE_SPEED)) { + if (!_config.get_allow_new_shape_infer() && + (_config.get_cache_mode() == ov::CacheMode::OPTIMIZE_SPEED)) { return false; } - return !_config.get_property(ov::cache_dir).empty(); -} - -size_t kernels_cache::get_max_kernels_per_batch() const { - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->max_kernels_per_batch >= 1) { - return static_cast(debug_config->max_kernels_per_batch); - } - return _config.get_property(ov::intel_gpu::max_kernels_per_batch); + return !_config.get_cache_dir().empty(); } void kernels_cache::get_program_source(const kernels_code& kernels_source_code, std::vector* all_batches) const { @@ -205,7 +197,7 @@ void kernels_cache::get_program_source(const kernels_code& kernels_source_code, // Create new kernels batch when the limit is reached // and current kernel's entry_point is duplicated in this kernels batch - if (current_bucket.back().kernels_counter >= get_max_kernels_per_batch() + if (current_bucket.back().kernels_counter >= _config.get_max_kernels_per_batch() || current_bucket.back().entry_point_to_id.find(entry_point) != current_bucket.back().entry_point_to_id.end() || need_separate_batch(entry_point)) { const auto& batch_id = static_cast(current_bucket.size()); @@ -246,11 +238,7 @@ void kernels_cache::get_program_source(const kernels_code& kernels_source_code, b.hash_value = std::hash()(full_code); - std::string dump_sources_dir = ""; - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(!debug_config->dump_sources.empty()) { - dump_sources_dir = debug_config->dump_sources; - } + std::string dump_sources_dir = GPU_DEBUG_VALUE_OR(_config.get_dump_sources_path(), ""); // Add -g -s to build options to allow IGC assembly dumper to associate assembler sources with corresponding OpenCL kernel code lines // Should be used with the IGC_ShaderDump option @@ -306,11 +294,9 @@ void kernels_cache::build_batch(const batch_program& batch, compiled_kernels& co auto& cl_build_device = dynamic_cast(*_device); bool dump_sources = batch.dump_custom_program; - std::string dump_sources_dir = ""; - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(!debug_config->dump_sources.empty()) { + std::string dump_sources_dir = GPU_DEBUG_VALUE_OR(_config.get_dump_sources_path(), ""); + GPU_DEBUG_IF(!dump_sources_dir.empty()) { dump_sources = true; - dump_sources_dir = debug_config->dump_sources; } std::string err_log; // accumulated build log from all program's parts (only contains messages from parts which @@ -385,7 +371,7 @@ void kernels_cache::build_batch(const batch_program& batch, compiled_kernels& co if (is_cache_enabled()) { // If kernels caching is enabled, then we save compiled bucket to binary file with name ${code_hash_value}.cl_cache // Note: Bin file contains full bucket, not separate kernels, so kernels reuse across different models is quite limited - // Bucket size can be changed in get_max_kernels_per_batch() method, but forcing it to 1 will lead to much longer + // Bucket size can be changed by max_kernels_per_batch config option, but forcing it to 1 will lead to much longer // compile time. std::lock_guard lock(cacheAccessMutex); ov::intel_gpu::save_binary(cached_bin_name, getProgramBinaries(std::move(program))); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.hpp b/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.hpp index 79ccfd050f4a93..5eeb314339a613 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.hpp @@ -114,7 +114,6 @@ class kernels_cache { std::string get_cache_path() const; bool is_cache_enabled() const; - size_t get_max_kernels_per_batch() const; bool _reuse_kernels = false; diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.hpp index 3e46b3bc2dfbd0..167f712a687ed6 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.hpp @@ -18,8 +18,9 @@ struct ConcatenationImplementationManager : public ImplementationManager { bool validate_impl(const program_node& node) const override { assert(node.is_type()); + const auto& config = node.get_program().get_config(); const auto& info = node.get_program().get_engine().get_device_info(); - if (!info.supports_immad || info.arch == gpu_arch::unknown) + if (!info.supports_immad || info.arch == gpu_arch::unknown || !config.get_use_onednn()) return false; static const std::vector supported_types = { ov::element::f16, ov::element::u8, ov::element::i8 }; diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp index 6f8502423047b6..0f8d271bc3a6cd 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp @@ -157,7 +157,6 @@ struct convolution_onednn : typed_primitive_onednn_impl { dnnl::memory::desc desc = onednn::layout_to_memory_desc(a_zp->get_layout(), dnnl::memory::format_tag::a, true); args.insert({DNNL_ARG_ATTR_ZERO_POINTS | DNNL_ARG_SRC, a_zp->get_onednn_memory(desc)}); - GPU_DEBUG_GET_INSTANCE(debug_config); GPU_DEBUG_TRACE_DETAIL << instance.id() << " activations_zero_points: " << " " << a_zp->get_layout().to_short_string() << std::endl; } @@ -167,7 +166,6 @@ struct convolution_onednn : typed_primitive_onednn_impl { dnnl::memory::desc desc = onednn::layout_to_memory_desc(w_zp->get_layout(), dnnl::memory::format_tag::a, true); args.insert({DNNL_ARG_ATTR_ZERO_POINTS | DNNL_ARG_WEIGHTS, w_zp->get_onednn_memory(desc)}); - GPU_DEBUG_GET_INSTANCE(debug_config); GPU_DEBUG_TRACE_DETAIL << instance.id() << " weights_zero_points: " << " " << w_zp->get_layout().to_short_string() << std::endl; } diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.hpp index c3f599fc5db9f6..430c42dee57f75 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.hpp @@ -23,8 +23,9 @@ struct ConvolutionImplementationManager : public ImplementationManager { bool validate_impl(const program_node& node) const override { assert(node.is_type()); + const auto& config = node.get_program().get_config(); const auto& info = node.get_program().get_engine().get_device_info(); - if (!info.supports_immad || info.arch == gpu_arch::unknown) + if (!info.supports_immad || info.arch == gpu_arch::unknown || !config.get_use_onednn()) return false; const auto& conv_node = node.as(); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.hpp index 039cf36261caa0..238214f82dc6fb 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.hpp @@ -19,8 +19,9 @@ struct DeconvolutionImplementationManager : public ImplementationManager { bool validate_impl(const program_node& node) const override { assert(node.is_type()); + const auto& config = node.get_program().get_config(); const auto& info = node.get_program().get_engine().get_device_info(); - if (!info.supports_immad || info.arch == gpu_arch::unknown) + if (!info.supports_immad || info.arch == gpu_arch::unknown || !config.get_use_onednn()) return false; const auto& deconv_node = node.as(); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.hpp index c4dc5f7faa6531..731a83372a9dfc 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.hpp @@ -21,8 +21,9 @@ struct FullyConnectedImplementationManager : public ImplementationManager { bool validate_impl(const program_node& node) const override { assert(node.is_type()); + const auto& config = node.get_program().get_config(); const auto& info = node.get_program().get_engine().get_device_info(); - if (!info.supports_immad || info.arch == gpu_arch::unknown) + if (!info.supports_immad || info.arch == gpu_arch::unknown || !config.get_use_onednn()) return false; const auto& fc_node = node.as(); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.hpp index 6c576d177043ee..3d64d2009490c0 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.hpp @@ -18,8 +18,9 @@ struct GemmImplementationManager : public ImplementationManager { bool validate_impl(const program_node& node) const override { assert(node.is_type()); + const auto& config = node.get_program().get_config(); const auto& info = node.get_program().get_engine().get_device_info(); - if (!info.supports_immad || info.arch == gpu_arch::unknown) + if (!info.supports_immad || info.arch == gpu_arch::unknown || !config.get_use_onednn()) return false; const auto& gemm_node = node.as(); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/lstm_seq_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/lstm_seq_onednn.hpp index 6fd16a4dd04acf..4b2615c62e2747 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/lstm_seq_onednn.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/lstm_seq_onednn.hpp @@ -22,10 +22,10 @@ struct LSTMSeqImplementationManager : public ImplementationManager { bool validate_impl(const program_node& node) const override { assert(node.is_type()); + const auto& config = node.get_program().get_config(); const auto& info = node.get_program().get_engine().get_device_info(); - if (info.arch == gpu_arch::unknown) + if (info.arch == gpu_arch::unknown || !config.get_use_onednn()) return false; - const auto& lstm_seq_node = node.as(); const auto& in_layout = lstm_seq_node.get_input_layout(0); const auto& out_layout = lstm_seq_node.get_output_layout(0); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.hpp index 4710b0c77b83c7..ced0316e13a08f 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.hpp @@ -19,8 +19,9 @@ struct PoolingImplementationManager : public ImplementationManager { bool validate_impl(const program_node& node) const override { assert(node.is_type()); + const auto& config = node.get_program().get_config(); const auto& info = node.get_program().get_engine().get_device_info(); - if (!info.supports_immad || info.arch == gpu_arch::unknown) + if (!info.supports_immad || info.arch == gpu_arch::unknown || !config.get_use_onednn()) return false; const auto& in_layout = node.get_input_layout(0); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h b/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h index 05a3dc5b2a9e4b..fe5920355e29c7 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h @@ -47,12 +47,11 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl { _engine(&engine), _attrs(attrs), _pd(pd) { - _enable_profiling = config.get_property(ov::enable_profiling); + _enable_profiling = config.get_enable_profiling(); _scratchpad_md = _pd.scratchpad_desc(); - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->verbose >= 4) { + GPU_DEBUG_IF(config.get_verbose() >= 4) { if (_scratchpad_md.get_size() > 0) { static std::atomic_llong total{0}; int64_t size = _scratchpad_md.get_size() / 1048576; @@ -70,9 +69,8 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl { _engine(&engine), _pd(), _prim() { - _enable_profiling = config.get_property(ov::enable_profiling); - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(!debug_config->dump_profiling_data.empty()) { + _enable_profiling = config.get_enable_profiling(); + GPU_DEBUG_IF(!config.get_dump_profiling_data_path().empty()) { _enable_profiling = true; } } @@ -318,7 +316,7 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl { private: std::string get_cache_directory(const ExecutionConfig& config) const { - auto path = config.get_property(ov::cache_dir); + auto path = config.get_cache_dir(); if (path.empty()) { return {}; } @@ -343,7 +341,7 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl { void build_primitive(const ExecutionConfig& config) { auto cache_outpath = get_cache_directory(config); - if (!config.get_property(ov::intel_gpu::allow_new_shape_infer)) { + if (!config.get_allow_new_shape_infer()) { cache_outpath = ""; } diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/reduce_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/reduce_onednn.hpp index 68d963fd9e369f..4a4a4c60df032d 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/reduce_onednn.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/reduce_onednn.hpp @@ -48,8 +48,9 @@ struct ReduceImplementationManager : public ImplementationManager { bool validate_impl(const program_node& node) const override { assert(node.is_type()); + const auto& config = node.get_program().get_config(); const auto& info = node.get_program().get_engine().get_device_info(); - if (!info.supports_immad || info.arch == gpu_arch::unknown) + if (!info.supports_immad || info.arch == gpu_arch::unknown || !config.get_use_onednn()) return false; const auto& reduce_node = node.as(); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.hpp index c4117058da88e9..824069f56b9583 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.hpp @@ -56,8 +56,9 @@ struct ReorderImplementationManager : public ImplementationManager { if (output_fmt == format::custom) return true; + const auto& config = node.get_program().get_config(); const auto& info = node.get_program().get_engine().get_device_info(); - if (!info.supports_immad || info.arch == gpu_arch::unknown) + if (!info.supports_immad || info.arch == gpu_arch::unknown || !config.get_use_onednn()) return false; if (!one_of(input_fmt.value, supported_formats) || !one_of(output_fmt.value, supported_formats)) diff --git a/src/plugins/intel_gpu/src/graph/impls/registry/implementation_manager.cpp b/src/plugins/intel_gpu/src/graph/impls/registry/implementation_manager.cpp index fdb2f151de8986..0ce180380f14b5 100644 --- a/src/plugins/intel_gpu/src/graph/impls/registry/implementation_manager.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/registry/implementation_manager.cpp @@ -58,7 +58,7 @@ std::unique_ptr ImplementationManager::create(const program_node if (auto impl = create_impl(node, params)) { update_impl(*impl, params); impl->set_node_params(node); - impl->can_share_kernels = node.get_program().get_config().get_property(ov::intel_gpu::hint::enable_kernels_reuse); + impl->can_share_kernels = node.get_program().get_config().get_enable_kernels_reuse(); return impl; } diff --git a/src/plugins/intel_gpu/src/graph/impls/registry/non_max_suppression_impls.cpp b/src/plugins/intel_gpu/src/graph/impls/registry/non_max_suppression_impls.cpp index d974b78f8e6d14..ce461632631d15 100644 --- a/src/plugins/intel_gpu/src/graph/impls/registry/non_max_suppression_impls.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/registry/non_max_suppression_impls.cpp @@ -59,7 +59,7 @@ const std::vector>& Registry(scores_layout.get_partial_shape()[0].get_length()); const size_t kClassNum = static_cast(scores_layout.get_partial_shape()[1].get_length()); const size_t kNStreams = - static_cast(node.get_program().get_config().get_property(ov::streams::num)); + static_cast(node.get_program().get_config().get_num_streams()); const size_t kKeyValue = kBatchNum * std::min(kClassNum, static_cast(8)) * kNStreams; return kKeyValue > 64; } diff --git a/src/plugins/intel_gpu/src/graph/include/primitive_inst.h b/src/plugins/intel_gpu/src/graph/include/primitive_inst.h index 01286a1c6d04bc..619693f3b1a6fc 100644 --- a/src/plugins/intel_gpu/src/graph/include/primitive_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/primitive_inst.h @@ -199,6 +199,8 @@ class primitive_inst { program_node const& get_node() const { return *_node; } network& get_network() const { return _network; } uint32_t get_network_id() const; + const ExecutionConfig& get_config() const { return get_network().get_config(); } + virtual event::ptr set_output_memory(memory::ptr mem, bool check = true, size_t idx = 0); void check_memory_to_set(const memory& mem, const layout& layout) const; const std::list& get_users() const { return _node->get_users(); } diff --git a/src/plugins/intel_gpu/src/graph/include/program_node.h b/src/plugins/intel_gpu/src/graph/include/program_node.h index 269a3c30fd293c..229dec6a80c77e 100644 --- a/src/plugins/intel_gpu/src/graph/include/program_node.h +++ b/src/plugins/intel_gpu/src/graph/include/program_node.h @@ -158,6 +158,7 @@ struct program_node { program& get_program() { return myprog; } program& get_program() const { return myprog; } + const ExecutionConfig& get_config() const { return myprog.get_config(); } primitive_impl* get_selected_impl() const { return selected_impl.get(); } void set_selected_impl(std::unique_ptr impl); diff --git a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp index 167b254a247637..dfd65cd9b58067 100644 --- a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp +++ b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp @@ -435,15 +435,10 @@ bool should_use_winograd_2x3_s1(const convolution_node& node, layout const& input_layout, layout const& weights_layout, bool output_size_handling_enabled) { - bool disable_winograd_conv = node.get_program().get_config().get_property(ov::intel_gpu::disable_winograd_convolution); + bool disable_winograd_conv = node.get_program().get_config().get_disable_winograd_convolution(); if (disable_winograd_conv) return false; - // cases when NOT to use winograd - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_winograd_conv == 1) - return false; - auto prim = node.get_primitive(); if (input_layout.data_type != data_types::f16 || (input_layout.is_static() && input_layout.feature() % 64 != 0) // current algorithm is effective for ifm to be multiply of 64 @@ -1134,73 +1129,12 @@ bool layout_optimizer::is_primitive_implemented_for_onednn(program_node& node) { return false; } -impl_types layout_optimizer::get_forced_impl_type_by_config(program_node& node) { -#ifdef GPU_DEBUG_CONFIG - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(!debug_config->forced_impl_types.empty()) { - // Forcing impl type of one primitive - for (const auto& forced_impl_type : debug_config->forced_impl_types) { - if (node.is_type()) { - if (forced_impl_type == "fc:ocl") - return impl_types::ocl; - else if (forced_impl_type == "fc:onednn") - return impl_types::onednn; - } else if (node.is_type()) { - if (forced_impl_type == "gemm:ocl") - return impl_types::ocl; - else if (forced_impl_type == "gemm:onednn") - return impl_types::onednn; - } else if (node.is_type()) { - if (forced_impl_type == "do:cpu") - return impl_types::cpu; - else if (forced_impl_type == "do:ocl") - return impl_types::ocl; - } else if (node.is_type()) { - if (forced_impl_type == "reduce:ocl") - return impl_types::ocl; - else if (forced_impl_type == "reduce:onednn") - return impl_types::onednn; - } else if (node.is_type()) { - if (forced_impl_type == "concat:ocl") - return impl_types::ocl; - else if (forced_impl_type == "concat:onednn") - return impl_types::onednn; - } - - // Forcing one layer - size_t found_type = forced_impl_type.rfind(":"); - if (found_type != std::string::npos) { - impl_types preferred_type = impl_types::any; - auto impl_type = forced_impl_type.substr(found_type + 1); - if (impl_type == "ocl") - preferred_type = impl_types::ocl; - else if (impl_type == "onednn") - preferred_type = impl_types::onednn; - else if (impl_type == "cpu") - preferred_type = impl_types::cpu; - - if (node.id() == forced_impl_type.substr(0, found_type)) { - GPU_DEBUG_LOG << " Forced implementation type : " << forced_impl_type.substr(0, found_type) << " : " - << forced_impl_type.substr(found_type + 1) << std::endl; - return preferred_type; - } - } - } - } -#endif - - return impl_types::any; -} - impl_types layout_optimizer::get_preferred_impl_type(program_node& node, format preferred_format) { if (!_forcing_map.empty() && _forcing_map.count(node.id()) != 0) { auto forced_impl = _forcing_map.at(node.id()).second; if (forced_impl != impl_types::any) return forced_impl; } - auto forced_impl = get_forced_impl_type_by_config(node); - if (forced_impl != impl_types::any) - return forced_impl; auto shape_type = shape_types::any; diff --git a/src/plugins/intel_gpu/src/graph/network.cpp b/src/plugins/intel_gpu/src/graph/network.cpp index aa7c9a55775e6d..1b310fd4542f86 100644 --- a/src/plugins/intel_gpu/src/graph/network.cpp +++ b/src/plugins/intel_gpu/src/graph/network.cpp @@ -60,7 +60,7 @@ namespace cldnn { namespace { #ifdef GPU_DEBUG_CONFIG -void dump_perf_data_raw(std::string dump_path, const std::list>& exec_order) { +void dump_perf_data_raw(std::string dump_path, bool per_iter_mode, const std::list>& exec_order) { auto layouts_to_str = [](const std::vector& layouts) -> std::string { std::stringstream s; for (size_t i = 0; i < layouts.size(); i++) { @@ -71,7 +71,6 @@ void dump_perf_data_raw(std::string dump_path, const std::listdump_profiling_data_per_iter != 0; const std::string perf_raw_csv_header = per_iter_mode ? "prim_id,prim_type,stage,net_in_shapes,in_shapes,out_shapes,impl,iter,time_usec\n" : "prim_id,prim_type,stage,net_in_shapes,in_shapes,out_shapes,impl,iters,time_usec\n"; std::ofstream of(dump_path); @@ -139,13 +138,12 @@ void dump_perf_data_raw(std::string dump_path, const std::list& pids) { bool need_to_wait; do { need_to_wait = false; struct stat buffer; - for (auto pid : debug_config->after_proc) { + for (auto pid : pids) { auto path = "/proc/" + pid; std::cout << "check " + path << std::endl; if (stat(path.c_str(), &buffer) == 0) { @@ -158,8 +156,7 @@ void wait_for_the_turn() { } #else -void dump_perf_data_raw(std::string, const std::list>&) {} -void wait_for_the_turn() {} +void dump_perf_data_raw(std::string, bool per_iter_mode, const std::list>&) {} #endif } // namespace @@ -174,33 +171,22 @@ opt pass). */ network::network(program::ptr program, stream::ptr stream, bool is_internal, bool is_primary_stream) : _program(program) - , _config(program->get_config()) , _engine(program->get_engine()) , _stream(stream) - , _memory_pool(new memory_pool(program->get_engine())) + , _memory_pool(new memory_pool(program->get_engine(), program->get_config())) , _internal(is_internal) , _is_primary_stream(is_primary_stream) - , _enable_profiling(program->get_config().get_property(ov::enable_profiling)) + , _enable_profiling(program->get_config().get_enable_profiling()) , _reset_arguments(true) - , _shape_predictor(new ShapePredictor(&program->get_engine(), program->get_config().get_property(ov::intel_gpu::buffers_preallocation_ratio))) { + , _shape_predictor(new ShapePredictor(&program->get_engine(), program->get_config().get_shape_predictor_settings())) { if (!_internal) { net_id = get_unique_net_id(); } - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->after_proc.size() != 0) { - wait_for_the_turn(); - } - - GPU_DEBUG_IF(debug_config->mem_preallocation_params.is_initialized) { - auto& mem_preallocation_params = debug_config->mem_preallocation_params; - _shape_predictor.reset(new ShapePredictor(&program->get_engine(), - mem_preallocation_params.next_iters_preallocation_count, - mem_preallocation_params.max_per_iter_size, - mem_preallocation_params.max_per_dim_diff, - mem_preallocation_params.buffers_preallocation_ratio)); - } - + GPU_DEBUG_CODE( + if (get_config().get_start_after_processes().size() != 0) { + wait_for_the_turn(get_config().get_start_after_processes()); + }); calculate_weights_cache_capacity(); allocate_primitives(); configure_primitives_second_output(); @@ -238,9 +224,9 @@ network::~network() { if (_program != nullptr) _program->cancel_compilation_context(); _memory_pool->clear_pool_for_network(net_id); - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(!debug_config->dump_profiling_data.empty()) { - dump_perf_data_raw(debug_config->dump_profiling_data + "/perf_raw" + std::to_string(net_id) + ".csv", _exec_order); + std::string dump_path = GPU_DEBUG_VALUE_OR(get_config().get_dump_profiling_data_path(), ""); + GPU_DEBUG_IF(!dump_path.empty()) { + dump_perf_data_raw(dump_path + "/perf_raw" + std::to_string(net_id) + ".csv", false, _exec_order); } } @@ -398,7 +384,7 @@ void network::calculate_weights_cache_capacity() { } // Sum all weights constants for each stream - required_mem_size += weights_const_size * _config.get_property(ov::streams::num); + required_mem_size += weights_const_size * get_config().get_num_streams(); // Add all other constants (shared between streams) required_mem_size += total_const_size - weights_const_size; diff --git a/src/plugins/intel_gpu/src/graph/non_max_suppression.cpp b/src/plugins/intel_gpu/src/graph/non_max_suppression.cpp index 2732476a063f8f..df29ed36a7fd12 100644 --- a/src/plugins/intel_gpu/src/graph/non_max_suppression.cpp +++ b/src/plugins/intel_gpu/src/graph/non_max_suppression.cpp @@ -157,7 +157,7 @@ void non_max_suppression_gather_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[i]) && - _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) { + _node->get_program().get_config().get_enable_memory_pool()) { _network.get_memory_pool().release_memory(_outputs[i].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs[i] = {_network.get_engine().reinterpret_buffer(input_memory(i), _impl_params->get_output_layout(i))}; diff --git a/src/plugins/intel_gpu/src/graph/permute.cpp b/src/plugins/intel_gpu/src/graph/permute.cpp index bf87e78e4bbbc5..c4fddfde978c2d 100644 --- a/src/plugins/intel_gpu/src/graph/permute.cpp +++ b/src/plugins/intel_gpu/src/graph/permute.cpp @@ -146,7 +146,7 @@ void permute_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) { + _node->get_program().get_config().get_enable_memory_pool()) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs = {_network.get_engine().reinterpret_buffer(input_memory(), _impl_params->get_output_layout())}; diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index 5774c828b2d59a..14a1072422f442 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -164,7 +164,7 @@ static memory::ptr get_memory_from_pool(engine& _engine, OPENVINO_ASSERT(!layout.is_dynamic() || layout.has_upper_bound(), "[GPU] Can't allocate output for dynamic layout without upper bound"); // Use layout with max tensor for dynamic shape with upper bound - if (_node.get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) { + if (_node.get_program().get_config().get_enable_memory_pool()) { if (curr_memory != nullptr) pool.release_memory(curr_memory, _node.get_unique_id(), _node.id(), net_id); return pool.get_memory(layout, @@ -558,7 +558,6 @@ void primitive_inst::clear_output_memory() { void primitive_inst::realloc_if_needed(bool prev_execution_skipped) { OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, openvino::itt::handle("realloc_if_needed: " + id())); - GPU_DEBUG_GET_INSTANCE(debug_config); GPU_DEBUG_PROFILED_STAGE(instrumentation::pipeline_stage::memory_allocation); const auto& users = get_user_insts(); @@ -837,11 +836,6 @@ void primitive_inst::realloc_if_needed(bool prev_execution_skipped) { } int32_t tmp_prealloc_count = get_prealloc_iter_num(); - GPU_DEBUG_IF(debug_config->mem_preallocation_params.is_initialized) { - // If debug config is set, repsect the config most - tmp_prealloc_count = -1; - } - // If we allocated too large memory, reclaim the memory. for (size_t i = 0; i < updated_layouts.size(); ++i) { bool reclaim = 0; @@ -1083,8 +1077,7 @@ void primitive_inst::realloc_if_needed(bool prev_execution_skipped) { } bool primitive_inst::use_async_compilation() { - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_async_compilation) { + GPU_DEBUG_IF(get_config().get_disable_async_compilation()) { return false; } @@ -1276,8 +1269,7 @@ void primitive_inst::update_paddings() { void primitive_inst::do_runtime_skip_reorder() { OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, openvino::itt::handle("do_runtime_skip_reorder: " + id())); - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_runtime_skip_reorder) { + GPU_DEBUG_IF(get_config().get_disable_runtime_skip_reorder()) { return; } if (can_be_optimized()) @@ -1582,8 +1574,7 @@ void primitive_inst::do_runtime_in_place_concat() { return false; }; OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, openvino::itt::handle("do_runtime_in_place_concat: " + id())); - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_runtime_buffer_fusing) { + GPU_DEBUG_IF(get_config().get_disable_runtime_buffer_fusing()) { return; } if (update_shape_done_by_other) { @@ -1692,8 +1683,7 @@ void primitive_inst::do_runtime_skip_scatter_update() { void primitive_inst::do_runtime_in_place_crop() { OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, openvino::itt::handle("do_runtime_in_place_crop: " + id())); - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_runtime_buffer_fusing) { + GPU_DEBUG_IF(get_config().get_disable_runtime_buffer_fusing()) { return; } @@ -1986,8 +1976,7 @@ void primitive_inst::execute() { set_out_event(_impl->execute(_impl_params->dep_events, *this)); - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(!debug_config->dump_profiling_data.empty()) { + GPU_DEBUG_IF(!get_config().get_dump_profiling_data_path().empty()) { auto ev = _impl_params->out_event; get_network().get_stream().wait_for_events({ev}); @@ -2043,7 +2032,7 @@ primitive_inst::primitive_inst(network & network, program_node const& node, bool : _network(network) , _node(&node) , _node_output_layout(node.get_output_layout()) - , _use_shared_kernels(node.get_program().get_config().get_property(ov::intel_gpu::hint::enable_kernels_reuse)) + , _use_shared_kernels(node.get_program().get_config().get_enable_kernels_reuse()) , _impl_params(node.get_kernel_impl_params()) , _impl(node.get_selected_impl() ? node.get_selected_impl()->clone() : nullptr) , _runtime_memory_dependencies(node.get_memory_dependencies()) @@ -2324,8 +2313,7 @@ void primitive_inst::update_weights() { reorder_impl->set_arguments(*reorder_inst, args); add_dep_event(reorder_impl->execute({}, *reorder_inst)); - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(!debug_config->dump_profiling_data.empty()) { + GPU_DEBUG_IF(!get_config().get_dump_profiling_data_path().empty()) { stream.wait_for_events(_impl_params->dep_events); } @@ -2600,8 +2588,8 @@ cldnn::network::ptr primitive_inst::get_unfused_subgraph() { ExecutionConfig subgraph_config{ ov::intel_gpu::allow_static_input_reorder(true), ov::intel_gpu::allow_new_shape_infer(true), - ov::enable_profiling(get_network().get_config().get_property(ov::enable_profiling)), - ov::intel_gpu::use_onednn(get_network().get_config().get_property(ov::intel_gpu::use_onednn)) + ov::enable_profiling(get_network().get_config().get_enable_profiling()), + ov::intel_gpu::use_onednn(get_network().get_config().get_use_onednn()) }; auto prog = program::build_program(get_network().get_engine(), t, @@ -2759,42 +2747,31 @@ bool primitive_inst::is_valid_fusion() const { } void primitive_inst::add_profiling_data(instrumentation::pipeline_stage stage, bool cache_hit, std::string memalloc_info, int64_t time, bool per_iter_mode) { - GPU_DEBUG_GET_INSTANCE(debug_config); -#ifdef GPU_DEBUG_CONFIG - int64_t curr_iter = -1; - GPU_DEBUG_IF(debug_config->dump_prof_data_iter_params.is_enabled) { - curr_iter = get_network().get_current_iteration_num(); - } - GPU_DEBUG_IF(curr_iter < 0 || debug_config->is_target_dump_prof_data_iteration(curr_iter)) { -#else - { -#endif - instrumentation::perf_counter_key key { - _network.get_input_layouts(), - _impl_params->input_layouts, - _impl_params->output_layouts, - get_implementation_name(), - stage, + instrumentation::perf_counter_key key { + _network.get_input_layouts(), + _impl_params->input_layouts, + _impl_params->output_layouts, + get_implementation_name(), + stage, #ifdef GPU_DEBUG_CONFIG - per_iter_mode ? get_network().get_current_iteration_num() : 0, + per_iter_mode ? get_network().get_current_iteration_num() : 0, #else - 0, + 0, #endif - cache_hit, - memalloc_info - }; - - auto hash = instrumentation::perf_counter_hash()(key); - auto& d = _profiling_data[hash]; - if (_profiling_info.find(hash) == _profiling_info.end()) { - _profiling_info.emplace(hash, key); - } + cache_hit, + memalloc_info + }; - auto& total_time = std::get<0>(d); - auto& total_iter = std::get<1>(d); - total_time += time; - total_iter++; + auto hash = instrumentation::perf_counter_hash()(key); + auto& d = _profiling_data[hash]; + if (_profiling_info.find(hash) == _profiling_info.end()) { + _profiling_info.emplace(hash, key); } + + auto& total_time = std::get<0>(d); + auto& total_iter = std::get<1>(d); + total_time += time; + total_iter++; } std::string primitive_inst::get_implementation_name() const { diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp index c3de17f8a196d3..df03981a6057d5 100644 --- a/src/plugins/intel_gpu/src/graph/program.cpp +++ b/src/plugins/intel_gpu/src/graph/program.cpp @@ -16,7 +16,6 @@ #include "intel_gpu/runtime/compilation_context.hpp" #include "intel_gpu/graph/program.hpp" -#include "auto_tuner.h" #include "layout_optimizer.h" #include "pass_manager.h" #include "primitive_type.h" @@ -91,7 +90,6 @@ #include #include #include -#include #include #include #include @@ -107,8 +105,8 @@ using namespace cldnn; using namespace ov::intel_gpu; static ov::threading::IStreamsExecutor::Config make_task_executor_config(const ExecutionConfig& config, std::string tags, int num_streams = 0) { - int streams = (num_streams > 0) ? num_streams : config.get_property(ov::compilation_num_threads); - auto priority = config.get_property(ov::intel_gpu::hint::host_task_priority); + int streams = (num_streams > 0) ? num_streams : config.get_compilation_num_threads(); + auto priority = config.get_host_task_priority(); auto core_type = ov::hint::SchedulingCoreType::ANY_CORE; switch (priority) { case ov::hint::Priority::LOW: core_type = ov::hint::SchedulingCoreType::ECORE_ONLY; break; @@ -116,7 +114,7 @@ static ov::threading::IStreamsExecutor::Config make_task_executor_config(const E case ov::hint::Priority::HIGH: core_type = ov::hint::SchedulingCoreType::PCORE_ONLY; break; default: OPENVINO_ASSERT(false, "[GPU] Can't create task executor: invalid host task priority value: ", priority); } - bool enable_cpu_pinning = config.get_property(ov::hint::enable_cpu_pinning); + bool enable_cpu_pinning = config.get_enable_cpu_pinning(); ov::threading::IStreamsExecutor::Config task_executor_config(tags, streams, @@ -162,7 +160,7 @@ program::program(engine& engine_ref, program_node::reset_unique_id(); if (no_optimizations) { init_graph(); - _config.apply_user_properties(_engine.get_device_info()); + _config.finalize(_engine); } else { build_program(is_internal); if (_is_body_program) { @@ -198,7 +196,7 @@ program::program(engine& engine_ref, _task_executor(std::move(task_executor)), processing_order(), is_internal(is_internal) { - _config.apply_user_properties(_engine.get_device_info()); + _config.finalize(_engine); init_primitives(); init_program(); prepare_nodes(nodes); @@ -211,8 +209,8 @@ program::program(engine& engine, const ExecutionConfig& config) _config(config), processing_order() { init_primitives(); - _config.apply_user_properties(_engine.get_device_info()); - new_shape_infer = _config.get_property(ov::intel_gpu::allow_new_shape_infer); + _config.finalize(_engine); + new_shape_infer = _config.get_allow_new_shape_infer(); _layout_optimizer = std::make_unique(); } @@ -220,11 +218,10 @@ program::~program() { } void program::init_program() { - GPU_DEBUG_GET_INSTANCE(debug_config); set_options(); pm = std::unique_ptr(new pass_manager(*this)); - new_shape_infer = _config.get_property(ov::intel_gpu::allow_new_shape_infer); + new_shape_infer = _config.get_allow_new_shape_infer(); if (_task_executor == nullptr) _task_executor = program::make_task_executor(_config); @@ -232,19 +229,14 @@ void program::init_program() { kernel_selector::KernelBase::get_db().get_batch_headers(), kernel_selector::KernelBase::get_db().get_cm_batch_headers())); - _kernels_cache->set_kernels_reuse(get_config().get_property(ov::intel_gpu::hint::enable_kernels_reuse)); + _kernels_cache->set_kernels_reuse(_config.get_enable_kernels_reuse()); if (!_compilation_context) _compilation_context = program::make_compilation_context(_config); _layout_optimizer = std::make_unique(); - size_t impls_cache_capacity = _impls_cache_capacity; - GPU_DEBUG_IF(debug_config->impls_cache_capacity >= 0) { - impls_cache_capacity = debug_config->impls_cache_capacity; - } - - _impls_cache = std::make_unique(impls_cache_capacity); + _impls_cache = std::make_unique(get_config().get_impls_cache_capacity()); // Remove items of compilation context's internal queue when some impl is popped in kernels_cache // compilation context's queue check duplication of inserted task _impls_cache->set_remove_item_callback([this](ImplementationsCache::ItemType& item) { @@ -486,26 +478,17 @@ void program::set_options() { static std::atomic id_gen{0}; prog_id = ++id_gen; assert(prog_id != 0); - if (!_config.get_property(ov::intel_gpu::force_implementations).empty()) { - _config.set_property(ov::intel_gpu::optimize_data(true)); - } - - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(!debug_config->dump_graphs.empty()) { - _config.set_property(ov::intel_gpu::dump_graphs(debug_config->dump_graphs)); - } } void program::build_program(bool is_internal) { init_graph(); - _config.apply_user_properties(_engine.get_device_info()); + _config.finalize(_engine); { pre_optimize_graph(is_internal); } run_graph_compilation(); { post_optimize_graph(is_internal); } - GPU_DEBUG_GET_INSTANCE(debug_config); #ifdef GPU_DEBUG_CONFIG - if (debug_config->dry_run_path.empty() || is_internal) { + if (get_config().get_dry_run_path().empty() || is_internal) { #else { #endif @@ -528,10 +511,6 @@ void program::init_graph() { for (auto& node : processing_order) { if (!node->is_type()) node->get_output_layouts(); - if (node->is_type()) { - _config.set_property(ov::intel_gpu::use_onednn(true)); - _config.set_property(ov::intel_gpu::queue_type(QueueTypes::in_order)); - } } // Perform initial shape_of subgraphs markup apply_opt_pass(); @@ -549,7 +528,7 @@ void program::pre_optimize_graph(bool is_internal) { bool output_size_handling_enabled = analyze_output_size_handling_need(); - bool optimize_data = _config.get_property(ov::intel_gpu::optimize_data); + bool optimize_data = _config.get_optimize_data(); if (optimize_data) { apply_opt_pass(); } @@ -559,26 +538,13 @@ void program::pre_optimize_graph(bool is_internal) { reorder_factory rf; if (optimize_data) { - GPU_DEBUG_GET_INSTANCE(debug_config); -#ifdef GPU_DEBUG_CONFIG - GPU_DEBUG_IF(!debug_config->disable_primitive_fusing) { -#else - { -#endif - apply_opt_pass(); - } + apply_opt_pass(); apply_opt_pass(); apply_opt_pass(); -#ifdef GPU_DEBUG_CONFIG - GPU_DEBUG_IF(!debug_config->disable_primitive_fusing) { -#else - { -#endif - apply_opt_pass(); - } + apply_opt_pass(); apply_opt_pass(); @@ -626,7 +592,7 @@ void program::post_optimize_graph(bool is_internal) { reorder_factory rf; - bool optimize_data = _config.get_property(ov::intel_gpu::optimize_data); + bool optimize_data = _config.get_optimize_data(); if (!is_internal) { apply_opt_pass(rf); @@ -634,10 +600,9 @@ void program::post_optimize_graph(bool is_internal) { apply_opt_pass(false, true); // TODO: do we need it at this place also? - auto partial_build = _config.get_property(ov::intel_gpu::partial_build_program); + auto partial_build = _config.get_partial_build_program(); #ifdef GPU_DEBUG_CONFIG - GPU_DEBUG_GET_INSTANCE(debug_config); - if (!is_internal && (!partial_build || !debug_config->dry_run_path.empty())) { + if (!is_internal && (!partial_build || !_config.get_dry_run_path().empty())) { #else if (!is_internal && !partial_build) { #endif @@ -653,7 +618,7 @@ void program::post_optimize_graph(bool is_internal) { // Recalculate processing order after all graph transformation to keep optimal primitives ordering // for OOO queue - if (_config.get_property(ov::intel_gpu::queue_type) == QueueTypes::out_of_order) + if (_config.get_queue_type() == QueueTypes::out_of_order) get_processing_order().calculate_BFS_processing_order(); apply_opt_pass(); @@ -777,7 +742,7 @@ const std::vector& program::get_allocating_order(bool forced_updat } void program::prepare_memory_dependencies() { - if (!_config.get_property(ov::intel_gpu::enable_memory_pool)) + if (!_config.get_enable_memory_pool()) return; for (auto& node : get_processing_order()) { node->add_memory_dependency(node->get_unique_id()); @@ -1408,8 +1373,7 @@ program::primitives_info program::get_current_stage_info() const { } void program::save_pass_info(std::string pass_name) { - // TODO: Directory path here can be probably changed to some bool flag - if (!_config.get_property(ov::intel_gpu::dump_graphs).empty()) + GPU_DEBUG_IF(!_config.get_dump_graphs_path().empty()) optimizer_passes_info.emplace_back(pass_name, get_current_stage_info()); } @@ -1437,7 +1401,7 @@ const program::primitives_info& program::get_primitives_info() const { return pr void program::apply_opt_pass(base_pass& pass) { pm->run(*this, pass); } void program::set_layout_optimizer_attributes(layout_optimizer& lo) { - lo.set_implementation_forcing(_config.get_property(ov::intel_gpu::force_implementations)); + lo.set_implementation_forcing(_config.get_force_implementations()); // first pass to set layout optimization_attributes for topology @@ -1663,15 +1627,15 @@ void program::set_layout_optimizer_attributes(layout_optimizer& lo) { lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::bs_fs_yx_bsv16_fsv16_network, 1); #ifdef ENABLE_ONEDNN_FOR_GPU - bool enable_onednn_for_tests = get_config().get_property(ov::intel_gpu::optimize_data) || is_internal_program(); + bool enable_onednn_for_tests = get_config().get_optimize_data() || is_internal_program(); auto& engine = get_engine(); if (engine.get_device_info().vendor_id == INTEL_VENDOR_ID && - get_config().get_property(ov::intel_gpu::queue_type) == QueueTypes::in_order && + get_config().get_queue_type() == QueueTypes::in_order && enable_onednn_for_tests) { if (engine.get_device_info().supports_immad) { lo.add_all_onednn_impls_optimization_attribute(); } else { - if (get_config().get_property(ov::intel_gpu::use_onednn)) { + if (get_config().get_use_onednn()) { lo.enable_onednn_for(); } } @@ -1681,7 +1645,7 @@ void program::set_layout_optimizer_attributes(layout_optimizer& lo) { std::pair program::get_estimated_device_mem_usage() { auto max_alloc_size = get_engine().get_device_info().max_alloc_mem_size; - memory_pool pool(get_engine()); + memory_pool pool(get_engine(), get_config()); int64_t const_sum = 0; #ifdef __unix__ @@ -1885,8 +1849,8 @@ void program::load(cldnn::BinaryInputBuffer& ib) { init_program(); std::shared_ptr mapped_memory = nullptr; - std::string weights_path = _config.get_property(ov::weights_path); - if (_config.get_property(ov::cache_mode) == ov::CacheMode::OPTIMIZE_SIZE && + std::string weights_path = _config.get_weights_path(); + if (_config.get_cache_mode() == ov::CacheMode::OPTIMIZE_SIZE && ov::util::validate_weights_path(weights_path)) { mapped_memory = ov::load_mmap_object(weights_path); } diff --git a/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp b/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp index 6a09fcd10eb513..eb5c152a361a16 100644 --- a/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp +++ b/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp @@ -3,6 +3,7 @@ // #include "program_dump_graph.h" +#include "intel_gpu/runtime/debug_configuration.hpp" #include "to_string_utils.h" #include "data_inst.h" #include "condition_inst.h" @@ -139,7 +140,6 @@ void close_stream(std::ofstream& graph) { graph.close(); } std::string get_node_id(const program_node* ptr) { return "node_" + std::to_string(reinterpret_cast(ptr)); } void dump_full_node(std::ofstream& out, const program_node* node) { - GPU_DEBUG_GET_INSTANCE(debug_config); try { out << node->type()->to_string(*node); } catch(const std::exception& e) { @@ -157,7 +157,7 @@ void dump_full_node(std::ofstream& out, const program_node* node) { } // namespace std::string get_dir_path(const ExecutionConfig& config) { - auto path = config.get_property(ov::intel_gpu::dump_graphs); + std::string path = GPU_DEBUG_VALUE_OR(config.get_dump_graphs_path(), ""); if (path.empty()) { return {}; } diff --git a/src/plugins/intel_gpu/src/graph/program_node.cpp b/src/plugins/intel_gpu/src/graph/program_node.cpp index b7767c14f9abff..65df228d6c733f 100644 --- a/src/plugins/intel_gpu/src/graph/program_node.cpp +++ b/src/plugins/intel_gpu/src/graph/program_node.cpp @@ -1851,8 +1851,7 @@ void program_node::create_onednn_primitive_attributes( // Trying to combine multiplications and additions which are placed one after another. // We do it in the cycle because some optimization cases can be simplified again from time to time do { - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_onednn_opt_post_ops) + GPU_DEBUG_IF(get_config().get_disable_onednn_post_ops_opt()) break; optimized_post_ops = try_optimize_post_ops(fused_ops, optimized_post_ops, attrs, optimization_is_finished); } while (!optimization_is_finished); diff --git a/src/plugins/intel_gpu/src/graph/reorder.cpp b/src/plugins/intel_gpu/src/graph/reorder.cpp index 76b15ca54bcff8..75a4011eeefc25 100644 --- a/src/plugins/intel_gpu/src/graph/reorder.cpp +++ b/src/plugins/intel_gpu/src/graph/reorder.cpp @@ -287,7 +287,7 @@ void reorder_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) { + _node->get_program().get_config().get_enable_memory_pool()) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } diff --git a/src/plugins/intel_gpu/src/graph/reshape.cpp b/src/plugins/intel_gpu/src/graph/reshape.cpp index fc874e29f70ac4..b6e5b23a0f6476 100644 --- a/src/plugins/intel_gpu/src/graph/reshape.cpp +++ b/src/plugins/intel_gpu/src/graph/reshape.cpp @@ -320,7 +320,7 @@ void reshape_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) { + _node->get_program().get_config().get_enable_memory_pool()) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs = {_network.get_engine().reinterpret_buffer(input_memory(), _impl_params->get_output_layout())}; diff --git a/src/plugins/intel_gpu/src/graph/scatter_elements_update.cpp b/src/plugins/intel_gpu/src/graph/scatter_elements_update.cpp index 316acef0e492e8..a651baa50002fa 100644 --- a/src/plugins/intel_gpu/src/graph/scatter_elements_update.cpp +++ b/src/plugins/intel_gpu/src/graph/scatter_elements_update.cpp @@ -75,7 +75,7 @@ void scatter_elements_update_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) { + _node->get_program().get_config().get_enable_memory_pool()) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs = {_network.get_engine().reinterpret_buffer(input_memory(), _impl_params->get_output_layout())}; diff --git a/src/plugins/intel_gpu/src/graph/scatter_nd_update.cpp b/src/plugins/intel_gpu/src/graph/scatter_nd_update.cpp index 2f109f83df428f..3dbec05dbbe3b3 100644 --- a/src/plugins/intel_gpu/src/graph/scatter_nd_update.cpp +++ b/src/plugins/intel_gpu/src/graph/scatter_nd_update.cpp @@ -86,7 +86,7 @@ void scatter_nd_update_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) { + _node->get_program().get_config().get_enable_memory_pool()) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs = {_network.get_engine().reinterpret_buffer(input_memory(), _impl_params->get_output_layout())}; diff --git a/src/plugins/intel_gpu/src/graph/scatter_update.cpp b/src/plugins/intel_gpu/src/graph/scatter_update.cpp index 4291ee67caa3ef..947507533796e0 100644 --- a/src/plugins/intel_gpu/src/graph/scatter_update.cpp +++ b/src/plugins/intel_gpu/src/graph/scatter_update.cpp @@ -66,7 +66,7 @@ void scatter_update_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) { + _node->get_program().get_config().get_enable_memory_pool()) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs = {_network.get_engine().reinterpret_buffer(input_memory(), _impl_params->get_output_layout())}; diff --git a/src/plugins/intel_gpu/src/graph/select.cpp b/src/plugins/intel_gpu/src/graph/select.cpp index 5435de5598bea0..a3b6ad9166c964 100644 --- a/src/plugins/intel_gpu/src/graph/select.cpp +++ b/src/plugins/intel_gpu/src/graph/select.cpp @@ -95,7 +95,7 @@ select_inst::typed_primitive_inst(network& network, select_node const& node) : p 3, ""); - bool allow_new_shape_infer = network.get_program()->get_config().get_property(ov::intel_gpu::allow_new_shape_infer); + bool allow_new_shape_infer = network.get_program()->get_config().get_allow_new_shape_infer(); // Broadcast check is performed in ngraph shape infer of select when allow_new_shape_infer=true if (!allow_new_shape_infer) { if (node.get_primitive()->broadcast_spec.m_type == ov::op::AutoBroadcastType::NONE) { diff --git a/src/plugins/intel_gpu/src/graph/strided_slice.cpp b/src/plugins/intel_gpu/src/graph/strided_slice.cpp index 007553b8a9d192..674e7649bc9820 100644 --- a/src/plugins/intel_gpu/src/graph/strided_slice.cpp +++ b/src/plugins/intel_gpu/src/graph/strided_slice.cpp @@ -208,7 +208,7 @@ void strided_slice_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) { + _node->get_program().get_config().get_enable_memory_pool()) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs[0] = input_memory_ptr(); diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/dynamic_quantize/dynamic_quantize_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/dynamic_quantize/dynamic_quantize_kernel_ref.cpp index a103a159faaf5d..72f3cc9120b9f1 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/dynamic_quantize/dynamic_quantize_kernel_ref.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/dynamic_quantize/dynamic_quantize_kernel_ref.cpp @@ -70,7 +70,6 @@ JitConstants DynamicQuantizeKernelRef::GetJitConstants(const dynamic_quantize_pa } CommonDispatchData DynamicQuantizeKernelRef::SetDefault(const dynamic_quantize_params& params) const { - GPU_DEBUG_GET_INSTANCE(debug_config); CommonDispatchData dispatchData; OPENVINO_ASSERT(params.outputs[0].GetLayout() == DataLayout::bfyx, "It supports only 4d tensor"); diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp index d0d3f293fe8797..e2f740fde9600b 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp @@ -89,26 +89,6 @@ static bool is_per_token_dynamic_quantize(const fully_connected_params& params) static size_t get_dynamic_quantize_group_size(const fully_connected_params& params) { auto dynamic_quantization_group_size = params.dynamic_quantization_group_size; - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->dynamic_quantize_group_size != debug_config->DYNAMIC_QUANTIZE_GROUP_SIZE_NOT_SET) { - dynamic_quantization_group_size = debug_config->dynamic_quantize_group_size; - - // Specify which Fully-connected layer would be dynamic-quantized - GPU_DEBUG_IF(!debug_config->dynamic_quantize_layers_without_onednn.empty()) { - auto layers = debug_config->dynamic_quantize_layers_without_onednn; - auto iter = std::find_if(layers.begin(), layers.end(), [&](const std::string& pattern){ - return debug_config->is_layer_name_matched(params.layerID, pattern); - }); - - if (iter != layers.end()) { - dynamic_quantization_group_size = debug_config->dynamic_quantize_group_size; - GPU_DEBUG_COUT << "Found specified Fully-connected layer [" << params.layerID << "]. Enable Dynamic-quantize." << std::endl; - } else { - dynamic_quantization_group_size = 0; - } - } - } - size_t scale_group_size = get_scale_group_size(params); size_t zp_group_num = params.decompression_zero_point.Feature().v; size_t zp_group_size = 0; diff --git a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp index 42ba3e46f83d01..eb6952eaed28f4 100644 --- a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp +++ b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp @@ -5,6 +5,7 @@ #include "openvino/runtime/iplugin.hpp" #include "openvino/runtime/intel_gpu/properties.hpp" #include "openvino/runtime/internal_properties.hpp" +#include "openvino/runtime/plugin_config.hpp" #include "openvino/util/weights_path.hpp" #include "intel_gpu/graph/serialization/binary_buffer.hpp" @@ -20,17 +21,17 @@ namespace ov::intel_gpu { namespace { std::shared_ptr create_task_executor(const std::shared_ptr& plugin, const ExecutionConfig& config) { - if (config.get_property(ov::internal::exclusive_async_requests)) { + if (config.get_exclusive_async_requests()) { // exclusive_async_requests essentially disables the streams (and hence should be checked first) => aligned with // the CPU behavior return plugin->get_executor_manager()->get_executor("GPU"); - } else if (config.get_property(ov::hint::enable_cpu_pinning) || - config.get_property(ov::hint::enable_cpu_reservation)) { - bool enable_cpu_pinning = config.get_property(ov::hint::enable_cpu_pinning); - bool enable_cpu_reservation = config.get_property(ov::hint::enable_cpu_reservation); + } else if (config.get_enable_cpu_pinning() || + config.get_enable_cpu_reservation()) { + bool enable_cpu_pinning = config.get_enable_cpu_pinning(); + bool enable_cpu_reservation = config.get_enable_cpu_reservation(); return std::make_shared( ov::threading::IStreamsExecutor::Config{"Intel GPU plugin executor", - config.get_property(ov::num_streams), + config.get_num_streams(), 1, ov::hint::SchedulingCoreType::PCORE_ONLY, enable_cpu_reservation, @@ -38,7 +39,7 @@ std::shared_ptr create_task_executor(const std::sh } else { return std::make_shared( ov::threading::IStreamsExecutor::Config{"Intel GPU plugin executor", - config.get_property(ov::num_streams), + config.get_num_streams(), 0, ov::hint::SchedulingCoreType::ANY_CORE, false, @@ -62,7 +63,7 @@ CompiledModel::CompiledModel(std::shared_ptr model, m_outputs(ov::ICompiledModel::outputs()), m_loaded_from_cache(false) { auto graph_base = std::make_shared(model, m_context, m_config, 0); - for (uint16_t n = 0; n < m_config.get_property(ov::num_streams); n++) { + for (uint16_t n = 0; n < m_config.get_num_streams(); n++) { auto graph = n == 0 ? graph_base : std::make_shared(graph_base, n); m_graphs.push_back(graph); } @@ -157,7 +158,7 @@ CompiledModel::CompiledModel(cldnn::BinaryInputBuffer& ib, } auto graph_base = std::make_shared(ib, context, m_config, 0); - for (uint16_t n = 0; n < m_config.get_property(ov::num_streams); n++) { + for (uint16_t n = 0; n < m_config.get_num_streams(); n++) { auto graph = n == 0 ? graph_base : std::make_shared(graph_base, n); m_graphs.push_back(graph); } @@ -179,8 +180,8 @@ std::shared_ptr CompiledModel::create_infer_request() co void CompiledModel::export_model(std::ostream& model) const { // If ov::CacheMode::OPTIMIZE_SIZE is set, do the export iff it's possible to do weightless caching // which requires the weights_path. - ov::CacheMode cache_mode = m_config.get_property(ov::cache_mode); - std::string weights_path = m_config.get_property(ov::weights_path); + ov::CacheMode cache_mode = m_config.get_cache_mode(); + std::string weights_path = m_config.get_weights_path(); if (cache_mode == ov::CacheMode::OPTIMIZE_SIZE && !ov::util::validate_weights_path(weights_path)) return; @@ -188,7 +189,7 @@ void CompiledModel::export_model(std::ostream& model) const { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "CompiledModel::export_model"); OPENVINO_ASSERT(!m_graphs.empty(), "[GPU] Model not loaded"); - const ov::EncryptionCallbacks encryption_callbacks = m_config.get_property(ov::cache_encryption_callbacks); + const ov::EncryptionCallbacks encryption_callbacks = m_config.get_cache_encryption_callbacks(); // Do not allow encryption for CacheMode::OPTIMIZE_SPEED - the cache size may cause severe memory penalty. const bool encryption_enabled = encryption_callbacks.encrypt && cache_mode == ov::CacheMode::OPTIMIZE_SIZE; @@ -291,15 +292,15 @@ ov::Any CompiledModel::get_property(const std::string& name) const { } else if (name == ov::loaded_from_cache) { return decltype(ov::loaded_from_cache)::value_type {m_loaded_from_cache}; } else if (name == ov::optimal_number_of_infer_requests) { - unsigned int nr = m_config.get_property(ov::num_streams); - if (m_config.get_property(ov::hint::performance_mode) != ov::hint::PerformanceMode::LATENCY) + unsigned int nr = m_config.get_num_streams(); + if (m_config.get_performance_mode() != ov::hint::PerformanceMode::LATENCY) nr *= 2; return decltype(ov::optimal_number_of_infer_requests)::value_type {nr}; } else if (name == ov::execution_devices) { return decltype(ov::execution_devices)::value_type{m_context->get_device_name()}; } - return m_config.get_property(name); + return m_config.get_property(name, OptionVisibility::RELEASE); } std::shared_ptr CompiledModel::create_sync_infer_request() const { diff --git a/src/plugins/intel_gpu/src/plugin/graph.cpp b/src/plugins/intel_gpu/src/plugin/graph.cpp index 6859728076fb6a..cba0d6aab9276d 100644 --- a/src/plugins/intel_gpu/src/plugin/graph.cpp +++ b/src/plugins/intel_gpu/src/plugin/graph.cpp @@ -2,7 +2,10 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "intel_gpu/graph/serialization/helpers.hpp" #include "intel_gpu/runtime/layout.hpp" +#include "openvino/core/any.hpp" +#include "openvino/runtime/plugin_config.hpp" #include "openvino/runtime/threading/executor_manager.hpp" #include "openvino/runtime/exec_model_info.hpp" #include "openvino/pass/serialize.hpp" @@ -38,7 +41,7 @@ Graph::Graph(std::shared_ptr model, const RemoteContextImpl::Ptr& con : m_context(context) , m_config(config) , m_stream_id(stream_id) { - auto program_builder = std::make_shared(model, get_engine(), config, false); + auto program_builder = std::make_shared(model, get_engine(), config); m_config = program_builder->get_config(); build(program_builder->get_compiled_program()); @@ -85,15 +88,11 @@ Graph::Graph(cldnn::BinaryInputBuffer &ib, const RemoteContextImpl::Ptr& context ib >> perfEntry.parentPrimitive; } } - { - bool bool_prop_value; - ib >> bool_prop_value; - m_config.set_property(ov::intel_gpu::partial_build_program(bool_prop_value)); - ib >> bool_prop_value; - m_config.set_property(ov::intel_gpu::optimize_data(bool_prop_value)); - ib >> bool_prop_value; - m_config.set_property(ov::intel_gpu::allow_new_shape_infer(bool_prop_value)); - } + + IstreamAttributeVisitor visitor(ib); + m_config.visit_attributes(visitor); + m_config.set_user_property(config.get_user_properties()); // Copy user properties if those were modified on import call + m_config.finalize(context.get(), nullptr); auto imported_prog = std::make_shared(get_engine(), m_config); imported_prog->load(ib); @@ -114,9 +113,8 @@ Graph::Graph(std::shared_ptr graph, uint16_t stream_id) } Graph::~Graph() { - GPU_DEBUG_IF(cldnn::debug_configuration::get_instance()->host_time_profiling) { - const auto log_level = cldnn::debug_configuration::get_instance()->host_time_profiling; - + auto log_level = GPU_DEBUG_VALUE_OR(m_config.get_host_time_profiling(), 0); + GPU_DEBUG_IF(log_level) { auto get_time_str = [](int64_t time_mcs, int64_t iters_num = 1) { double time = static_cast(time_mcs); time /= iters_num; @@ -177,25 +175,26 @@ void Graph::build(std::shared_ptr program) { auto external_queue = m_context->get_external_queue(); if (external_queue) { - OPENVINO_ASSERT(m_config.get_property(ov::num_streams) == 1, "[GPU] Throughput streams can't be used with shared queue!"); + OPENVINO_ASSERT(m_config.get_num_streams() == 1, "[GPU] Throughput streams can't be used with shared queue!"); const auto &engine = program->get_engine(); m_network = std::make_shared(program, engine.create_stream(m_config, external_queue), m_stream_id); } else { m_network = std::make_shared(program, m_stream_id); } - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(!debug_config->dry_run_path.empty()) { - ov::pass::Serialize(debug_config->dry_run_path, "").run_on_model(get_runtime_model()); + std::string dry_run_path = GPU_DEBUG_VALUE_OR(m_config.get_dry_run_path(), ""); + std::string dump_graphs_path = GPU_DEBUG_VALUE_OR(m_config.get_dump_graphs_path(), ""); + GPU_DEBUG_IF(!dry_run_path.empty()) { + ov::pass::Serialize(dry_run_path, "").run_on_model(get_runtime_model()); exit(0); } - GPU_DEBUG_IF(!debug_config->dump_graphs.empty() && m_stream_id == 0) { + GPU_DEBUG_IF(!dump_graphs_path.empty() && m_stream_id == 0) { static int net_id = 0; auto steps_info = get_network()->get_optimizer_passes_info(); size_t step_idx = 0; for (auto& step : steps_info) { - auto xml_path = debug_config->dump_graphs + std::to_string(net_id) + "_" + std::to_string(step_idx) + "_" + step.first + "_graph.xml"; + auto xml_path = dump_graphs_path + std::to_string(net_id) + "_" + std::to_string(step_idx) + "_" + step.first + "_graph.xml"; ov::pass::Serialize(xml_path, "").run_on_model(get_runtime_model(step.second, true)); step_idx++; } @@ -209,7 +208,7 @@ bool Graph::use_external_queue() const { std::shared_ptr Graph::get_runtime_model(std::vector& primitives_info, bool filter_const_primitives) { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Graph::get_runtime_model"); - if (m_config.get_property(ov::enable_profiling)) { + if (m_config.get_enable_profiling()) { try { // Update may throw an exception for step-by-step runtime graph dump, // since network->get_executed_primitives() method can't be called before network execution @@ -520,11 +519,8 @@ void Graph::export_model(cldnn::BinaryOutputBuffer &ob) { ob << perf_item.second.second.parentPrimitive; } } - { - ob << m_config.get_property(ov::intel_gpu::partial_build_program); - ob << m_config.get_property(ov::intel_gpu::optimize_data); - ob << m_config.get_property(ov::intel_gpu::allow_new_shape_infer); - } + OstreamAttributeVisitor visitor(ob); + m_config.visit_attributes(visitor); ob.set_stream(m_network->get_stream_ptr().get()); m_network->get_program()->save(ob); diff --git a/src/plugins/intel_gpu/src/plugin/ops/condition.cpp b/src/plugins/intel_gpu/src/plugin/ops/condition.cpp index 5c797b622aa28b..da080544363d00 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/condition.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/condition.cpp @@ -1,6 +1,7 @@ // Copyright (C) 2023 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // +#include "intel_gpu/runtime/internal_properties.hpp" #include "openvino/op/if.hpp" #include "intel_gpu/plugin/program_builder.hpp" #include "intel_gpu/primitives/condition.hpp" @@ -20,17 +21,12 @@ static cldnn::condition::branch gen_branch(ProgramBuilder& p, const std::shared_ << internal_body->get_friendly_name() << ", num inputs: " << op->get_input_size() << std::endl; - auto config = p.get_config(); - { - auto custom_outputs = config.get_property(ov::intel_gpu::custom_outputs); - if (!custom_outputs.empty()) { - config.set_property(ov::intel_gpu::custom_outputs(std::vector({}))); - } - } - config.set_property(ov::intel_gpu::max_dynamic_batch(1)); + auto config = p.get_config().clone(); + config.set_property(ov::intel_gpu::custom_outputs(std::vector({}))); config.set_property(ov::intel_gpu::allow_new_shape_infer(op->is_dynamic() || p.use_new_shape_infer())); + config.finalize(p.get_engine()); - ProgramBuilder prog(internal_body, p.get_engine(), config, false, p.get_task_executor(), p.get_compilation_context(), true); + ProgramBuilder prog(internal_body, p.get_engine(), config, p.get_task_executor(), p.get_compilation_context(), true); branch.inner_program = prog.get_compiled_program(); auto& input_map = branch.input_map; diff --git a/src/plugins/intel_gpu/src/plugin/ops/loop.cpp b/src/plugins/intel_gpu/src/plugin/ops/loop.cpp index 3e052c134390ae..556738c5df52ea 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/loop.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/loop.cpp @@ -297,13 +297,12 @@ static void CreateCommonLoopOp(ProgramBuilder& p, const std::shared_ptr #include #include -#include #include #include #include @@ -24,22 +23,19 @@ #include "intel_gpu/runtime/debug_configuration.hpp" #include "intel_gpu/runtime/device_query.hpp" #include "intel_gpu/runtime/execution_config.hpp" +#include "intel_gpu/runtime/internal_properties.hpp" #include "intel_gpu/runtime/itt.hpp" +#include "openvino/core/any.hpp" #include "openvino/core/deprecated.hpp" -#include "openvino/op/gather.hpp" -#include "openvino/op/concat.hpp" -#include "openvino/op/paged_attention.hpp" #include "openvino/pass/manager.hpp" -#include "openvino/pass/pattern/op/wrap_type.hpp" -#include "openvino/pass/pattern/op/or.hpp" #include "openvino/pass/visualize_tree.hpp" #include "openvino/runtime/device_id_parser.hpp" #include "openvino/runtime/intel_gpu/properties.hpp" #include "openvino/runtime/internal_properties.hpp" #include "openvino/runtime/make_tensor.hpp" #include "openvino/runtime/performance_heuristics.hpp" +#include "openvino/runtime/plugin_config.hpp" #include "openvino/runtime/properties.hpp" -#include "openvino/util/common_util.hpp" #include "openvino/util/weights_path.hpp" #include "transformations/common_optimizations/dimension_tracking.hpp" #include "transformations/init_node_info.hpp" @@ -66,33 +62,6 @@ namespace ov::intel_gpu { #include "intel_gpu/plugin/primitives_list.hpp" #undef REGISTER_FACTORY -const auto is_llm = [](const std::shared_ptr& model) -> bool { - using namespace ov::pass::pattern; - - auto past = wrap_type(); - auto convert_past = wrap_type({past}); - auto gather_input = std::make_shared(OutputVector{past, convert_past}); - auto beam_idx = wrap_type(); - auto gather_past = wrap_type({gather_input, beam_idx, wrap_type()}); - auto gather_convert = wrap_type({gather_past}); - auto concat_past_input = std::make_shared(OutputVector{past, convert_past, gather_past, gather_convert}); - auto concat = wrap_type({concat_past_input, any_input()}); - auto convert_present = wrap_type({concat}); - auto present_input = std::make_shared(OutputVector{concat, convert_present}); - auto present = wrap_type({present_input}); - - auto kvcache_matcher = std::make_shared(present, "KVCacheMatcher"); - - for (auto& op : model->get_ordered_ops()) { - if (kvcache_matcher->match(op) || - ov::is_type(op)) { - return true; - } - } - - return false; -}; - void Plugin::register_primitives() const { #define REGISTER_FACTORY(op_version, op_name) FACTORY_CALL(op_version, op_name) #include "intel_gpu/plugin/primitives_list.hpp" @@ -128,18 +97,30 @@ std::shared_ptr Plugin::clone_and_transform_model(const std::shared_p const ExecutionConfig& config, const std::shared_ptr& context) const { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::clone_and_transform_model"); - GPU_DEBUG_GET_INSTANCE(debug_config); GPU_DEBUG_DEFINE_MEM_LOGGER("Plugin::clone_and_transform_model"); auto cloned_model = model->clone(); OPENVINO_ASSERT(cloned_model != nullptr, "[GPU] Failed to clone model!"); - GPU_DEBUG_IF(!debug_config->dump_graphs.empty()) { - auto path_base = debug_config->dump_graphs + "/" + cloned_model->get_name(); + // Here we create a copy of the config to finalize it and ensure that transformation pipe can use correct options values + // This is manily needed to correctly update lower level properties when higher level option is set by user + // For example, transformation use inference_precision hint which may be updated by execution_mode property. + // Update itself will happen on finalization stage, so we must call it to have correct passes flow. + // The reason why we can't do finalization once and then just run all graph transformations is that + // part of the tranformations may actually impact some properties. For example, LSTMSequence op presense + // impacts value of use_onednn property. But in order to understand if there's an op of this type we have to run + // common optimizations which may do subgraph fusion to LSTMSequence op. So basically, final value of use_onednn + // property can be computed for transformed model only. + auto config_copy = config.clone(); + config_copy.finalize(context.get(), model.get()); + + std::string dump_path = GPU_DEBUG_VALUE_OR(config_copy.get_dump_graphs_path(), ""); + GPU_DEBUG_IF(!dump_path.empty()) { + auto path_base = dump_path + "/" + cloned_model->get_name(); ov::pass::VisualizeTree(path_base + ".svg").run_on_model(cloned_model); } - transform_model(cloned_model, config, context); + transform_model(cloned_model, config_copy, context); // Transformations for some reason may drop output tensor names, so here we copy those from the original model auto new_results = cloned_model->get_results(); @@ -154,8 +135,8 @@ std::shared_ptr Plugin::clone_and_transform_model(const std::shared_p new_res->set_friendly_name(old_res->get_friendly_name()); } - GPU_DEBUG_IF(!debug_config->dump_graphs.empty()) { - auto path_base = debug_config->dump_graphs + "/" + cloned_model->get_name() + "_" + "transformed_func"; + GPU_DEBUG_IF(!dump_path.empty()) { + auto path_base = dump_path + "/" + cloned_model->get_name() + "_" + "transformed_func"; ov::pass::VisualizeTree(path_base + ".svg").run_on_model(cloned_model); } return cloned_model; @@ -194,22 +175,6 @@ Plugin::Plugin() { m_compiled_model_runtime_properties["OV_VERSION"] = ov_version.buildNumber; } -void Plugin::set_cache_info(const std::shared_ptr& model, ExecutionConfig& config) const { - // WEIGHTS_PATH is used for the weightless cache mechanism which is used only with - // ov::CacheMode::OPTIMIZE_SIZE setting. Not setting WEIGHTS_PATH will result in not - // using that mechanism. - if (config.get_property(ov::cache_mode) != ov::CacheMode::OPTIMIZE_SIZE) { - return; - } - - const auto& rt_info = model->get_rt_info(); - auto weights_path = rt_info.find("__weights_path"); - if (weights_path != rt_info.end()) { - ov::AnyMap weights_path_property{{"WEIGHTS_PATH", weights_path->second}}; - config.set_property(weights_path_property); - } -} - std::shared_ptr Plugin::compile_model(const std::shared_ptr& model, const ov::AnyMap& orig_config) const { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::compile_model"); std::string device_id = get_device_id(orig_config); @@ -219,14 +184,11 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< OPENVINO_ASSERT(m_configs_map.find(device_id) != m_configs_map.end(), "[GPU] compile_model: Couldn't find config for GPU with id ", device_id); ExecutionConfig config = m_configs_map.at(device_id); - config.set_user_property(orig_config); - if (model->has_rt_info("runtime_options")) - config.apply_rt_info(context->get_engine().get_device_info(), model->get_rt_info("runtime_options"), is_llm(model)); - config.apply_user_properties(context->get_engine().get_device_info()); - - set_cache_info(model, config); + config.set_user_property(orig_config, OptionVisibility::RELEASE); auto transformed_model = clone_and_transform_model(model, config, context); + + config.finalize(context.get(), transformed_model.get()); { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::compile_model::CreateCompiledModel"); return std::make_shared(transformed_model, shared_from_this(), context, config); @@ -242,14 +204,12 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< OPENVINO_ASSERT(m_configs_map.find(device_id) != m_configs_map.end(), "[GPU] compile_model: Couldn't find config for GPU with id ", device_id); ExecutionConfig config = m_configs_map.at(device_id); - config.set_user_property(orig_config); - if (model->has_rt_info("runtime_options")) - config.apply_rt_info(context_impl->get_engine().get_device_info(), model->get_rt_info("runtime_options"), is_llm(model)); - config.apply_user_properties(context_impl->get_engine().get_device_info()); - - set_cache_info(model, config); + config.set_user_property(orig_config, OptionVisibility::RELEASE); auto transformed_model = clone_and_transform_model(model, config, context_impl); + + config.finalize(context_impl.get(), transformed_model.get()); + return std::make_shared(transformed_model, shared_from_this(), context_impl, config); } @@ -277,7 +237,7 @@ ov::SoPtr Plugin::get_default_context(const AnyMap& params) void Plugin::set_property(const ov::AnyMap &config) { auto update_config = [](ExecutionConfig& config, const ov::AnyMap& user_config) { - config.set_user_property(user_config); + config.set_user_property(user_config, OptionVisibility::RELEASE); // Check that custom layers config can be loaded if (user_config.find(ov::intel_gpu::config_file.name()) != user_config.end()) { CustomLayerMap custom_layers; @@ -312,14 +272,12 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr& auto ctx = get_default_context(device_id); ExecutionConfig config = m_configs_map.at(device_id); - config.set_user_property(orig_config); - if (model->has_rt_info("runtime_options")) - config.apply_rt_info(ctx->get_engine().get_device_info(), model->get_rt_info("runtime_options"), is_llm(model)); - config.apply_user_properties(ctx->get_engine().get_device_info()); + config.set_user_property(orig_config, OptionVisibility::RELEASE); + config.finalize(ctx.get(), model.get()); ProgramBuilder prog(ctx->get_engine(), config); - float query_model_ratio = config.get_property(ov::internal::query_model_ratio.name()).as(); + float query_model_ratio = config.get_query_model_ratio(); auto supported = ov::get_supported_nodes(model, [&config,&ctx,this](std::shared_ptr& model) { @@ -369,11 +327,10 @@ std::shared_ptr Plugin::import_model(std::istream& model, } ExecutionConfig config = m_configs_map.at(device_id); - config.set_user_property(_orig_config); - config.apply_user_properties(context_impl->get_engine().get_device_info()); + config.set_user_property(_orig_config, OptionVisibility::RELEASE); - ov::CacheMode cache_mode = config.get_property(ov::cache_mode); - ov::EncryptionCallbacks encryption_callbacks = config.get_property(ov::cache_encryption_callbacks); + ov::CacheMode cache_mode = config.get_cache_mode(); + ov::EncryptionCallbacks encryption_callbacks = config.get_cache_encryption_callbacks(); const bool encryption_enabled = encryption_callbacks.decrypt && cache_mode == ov::CacheMode::OPTIMIZE_SIZE; std::unique_ptr ib_ptr = @@ -390,9 +347,8 @@ std::shared_ptr Plugin::import_model(std::istream& model, return nullptr; } - std::string weights_path = config.get_property(ov::weights_path); - if (config.get_property(ov::cache_mode) == ov::CacheMode::OPTIMIZE_SIZE && - !ov::util::validate_weights_path(weights_path)) { + std::string weights_path = config.get_weights_path(); + if (config.get_cache_mode() == ov::CacheMode::OPTIMIZE_SIZE && !ov::util::validate_weights_path(weights_path)) { return nullptr; } @@ -478,7 +434,7 @@ ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& options) OPENVINO_ASSERT(m_configs_map.find(device_id) != m_configs_map.end(), "[GPU] get_property: Couldn't find config for GPU with id ", device_id); const auto& c = m_configs_map.at(device_id); - return c.get_property(name); + return c.get_property(name, OptionVisibility::RELEASE); } auto StringRightTrim = [](std::string string, std::string substring, bool case_sensitive = true) { @@ -512,8 +468,6 @@ bool Plugin::is_metric(const std::string& name) const { ov::Any Plugin::get_metric(const std::string& name, const ov::AnyMap& options) const { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::get_metric"); - GPU_DEBUG_GET_INSTANCE(debug_config); - auto device_id = get_property(ov::device::id.name(), options).as(); auto iter = m_device_map.find(std::to_string(cldnn::device_query::device_id)); @@ -687,12 +641,12 @@ std::vector Plugin::get_device_capabilities(const cldnn::device_inf } uint32_t Plugin::get_max_batch_size(const ov::AnyMap& options) const { - GPU_DEBUG_GET_INSTANCE(debug_config); auto device_id = get_property(ov::device::id.name(), options).as(); auto context = get_default_contexts().at(device_id); const auto& device_info = context->get_engine().get_device_info(); - const auto& config = m_configs_map.at(device_id); - uint32_t n_streams = static_cast(config.get_property(ov::num_streams)); + auto config = m_configs_map.at(device_id); + config.set_property(ov::intel_gpu::partial_build_program(true)); + uint32_t n_streams = static_cast(config.get_num_streams()); uint64_t occupied_device_mem = 0; auto statistic_result = get_metric(ov::intel_gpu::memory_statistics.name(), options).as>(); auto occupied_usm_dev = statistic_result.find("usm_device_current"); @@ -744,17 +698,14 @@ uint32_t Plugin::get_max_batch_size(const ov::AnyMap& options) const { OPENVINO_THROW("[GPU_MAX_BATCH_SIZE] ov::hint::model should be std::shared_ptr type"); } + config.finalize(context.get(), model.get()); + size_t base_batch_size = 16; // empirically decided for DG1 auto& engine = get_default_context(device_id)->get_engine(); std::shared_ptr program; - GPU_DEBUG_IF(debug_config->base_batch_for_memory_estimation > 0) { - size_t user_specified_base_batch_size = debug_config->base_batch_for_memory_estimation; - base_batch_size = (user_specified_base_batch_size != base_batch_size) ? user_specified_base_batch_size : base_batch_size; - } - auto cloned_model = model->clone(); try { @@ -809,7 +760,7 @@ uint32_t Plugin::get_max_batch_size(const ov::AnyMap& options) const { TransformationsPipeline transformations(config, context); transformations.apply(cloned_model); - program = std::make_shared(cloned_model, engine, config, true); + program = std::make_shared(cloned_model, engine, config); std::pair device_memory_usage = program->get_compiled_program()->get_estimated_device_mem_usage(); if (device_memory_usage.first == static_cast(-1L) && device_memory_usage.second == static_cast(-1L)) { return static_cast(max_batch_size); diff --git a/src/plugins/intel_gpu/src/plugin/program_builder.cpp b/src/plugins/intel_gpu/src/plugin/program_builder.cpp index 2abc8bb65df6ac..66e4133a4dc4c5 100644 --- a/src/plugins/intel_gpu/src/plugin/program_builder.cpp +++ b/src/plugins/intel_gpu/src/plugin/program_builder.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "intel_gpu/runtime/internal_properties.hpp" #include "openvino/core/rt_info/weightless_caching_attributes.hpp" #include "openvino/op/constant.hpp" #include "openvino/op/split.hpp" @@ -11,7 +12,7 @@ #include "openvino/op/loop.hpp" #include "openvino/op/search_sorted.hpp" #include "openvino/op/stft.hpp" -#include "ov_ops/dynamic_quantize.hpp" +#include "openvino/runtime/properties.hpp" #include "intel_gpu/plugin/common_utils.hpp" #include "intel_gpu/plugin/program_builder.hpp" @@ -61,7 +62,6 @@ std::string layer_type_name_ID(const std::shared_ptr& op) { } ProgramBuilder::ProgramBuilder(std::shared_ptr model, cldnn::engine& engine, const ExecutionConfig& config, - bool partial_build, std::shared_ptr task_executor, std::shared_ptr compilation_context, bool is_inner_program) @@ -105,20 +105,11 @@ ProgramBuilder::ProgramBuilder(std::shared_ptr model, cldnn::engine& config_path += "/cldnn_global_custom_kernels/cldnn_global_custom_kernels.xml"; CustomLayer::LoadFromFile(config_path, m_custom_layers, true); - auto custom_layers_config = m_config.get_property(ov::intel_gpu::config_file); + auto custom_layers_config = m_config.get_config_file(); CustomLayer::LoadFromFile(custom_layers_config, m_custom_layers, custom_layers_config.empty()); auto ops = model->get_ordered_ops(); - // In the case of dynamic models, because most of the layers are mapped to shape agnostic kernels, - // smaller # of kernels are built compared to static models. - // So having smaller batch size is even better for dynamic model as we can do more parallel build. - if (model->is_dynamic()) { - m_config.set_property(ov::intel_gpu::max_kernels_per_batch(4)); - } else { - m_config.set_property(ov::intel_gpu::max_kernels_per_batch(8)); - } - - m_program = build(ops, partial_build, is_inner_program); + m_program = build(ops, is_inner_program); } ProgramBuilder::ProgramBuilder(cldnn::engine& engine, const ExecutionConfig& config) @@ -148,24 +139,8 @@ void ProgramBuilder::cleanup_build() { #endif } -std::shared_ptr ProgramBuilder::build(const std::vector>& ops, bool partial_build, bool is_inner_program) { +std::shared_ptr ProgramBuilder::build(const std::vector>& ops, bool is_inner_program) { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "ProgramBuilder::build"); - // In the case of inner program, allow_new_shape_infer flag is setted by outside of program. - // So, do not check allow_new_shape_infer for inner program build - for (const auto& op : ops) { - if (requires_new_shape_infer(op)) { - allow_new_shape_infer = true; - break; - } - } - - if (is_inner_program) { - allow_new_shape_infer = (m_config.get_property(ov::intel_gpu::allow_new_shape_infer) || allow_new_shape_infer); - } - - m_config.set_property(ov::intel_gpu::partial_build_program(partial_build)); - m_config.set_property(ov::intel_gpu::optimize_data(true)); - m_config.set_property(ov::intel_gpu::allow_new_shape_infer(allow_new_shape_infer)); prepare_build(); { @@ -211,7 +186,6 @@ bool ProgramBuilder::is_op_supported(const std::shared_ptr& op) { if (!data_types_are_supported(op.get())) return false; - allow_new_shape_infer = requires_new_shape_infer(op); CreateSingleLayerPrimitive(op); cleanup_build(); DisableQueryMode(); @@ -268,7 +242,7 @@ std::vector ProgramBuilder::GetInputInfo(const std::shared_pt // Note: Currently Split/Variadic Split are divided to multiple crops // LSTMCell contains its own body network, and each output has a unique pid // But there is no need to maintain output port index for the next node e.g. Result - bool is_legacy_multiple_outputs = !allow_new_shape_infer + bool is_legacy_multiple_outputs = !use_new_shape_infer() || ov::is_type(prevOp) || ov::is_type(prevOp) || ov::is_type(prevOp); @@ -309,7 +283,7 @@ void ProgramBuilder::add_primitive(const ov::Node& op, std::shared_ptrorigin_op_name = op.get_friendly_name(); prim->origin_op_type_name = op.get_type_name(); - if (this->m_config.get_property(ov::cache_mode) == ov::CacheMode::OPTIMIZE_SIZE) { + if (this->m_config.get_cache_mode() == ov::CacheMode::OPTIMIZE_SIZE) { if (auto data_prim = dynamic_cast(prim.get())) { auto rt_info = op.get_rt_info(); @@ -340,7 +314,7 @@ void ProgramBuilder::add_primitive(const ov::Node& op, std::shared_ptrorigin_op_type_name = prim->type_string(); } - if (this->m_config.get_property(ov::enable_profiling) && should_profile) { + if (this->m_config.get_enable_profiling() && should_profile) { profiling_ids.push_back(prim_id); init_profile_info(*prim); } @@ -352,51 +326,6 @@ void ProgramBuilder::add_primitive(const ov::Node& op, std::shared_ptradd_primitive(prim); } -bool ProgramBuilder::requires_new_shape_infer(const std::shared_ptr& op) const { - if (op->is_dynamic()) { - return true; - } - - // HACK: SearchSorted has specific shape requirements. - // E.g. static input shapes: sorted:[8], values:[2,3,4] are prefectly fine, - // but sorted:[8,1,1,1], values:[2,3,4,1] is not valid. - // Similar case for STFT. - if (ov::is_type(op) || ov::is_type(op)) - return true; - - if (ov::is_type(op)) - return true; - - if (ov::is_type(op)) { - const auto body_function = std::static_pointer_cast(op)->get_function(); - if (body_function->is_dynamic()) - return true; - } - - if (ov::is_type(op) || ov::is_type(op)) { - return true; - } - // When input node has dynamic shape with 4 dimension, this function return false - // because op.is_dynamic() which only checks input shapes return false. - // So, in the case of input data, we need to check output shape. - for (size_t i = 0; i < op->get_output_size(); i++) { - if (op->get_output_partial_shape(i).is_dynamic()) - return true; - } - - for (size_t i = 0; i < op->get_output_size(); i++) { - if (op->get_output_partial_shape(i).size() > 6) - return true; - } - - for (size_t i = 0; i < op->get_input_size(); i++) { - if (op->get_input_partial_shape(i).size() > 6) - return true; - } - - return false; -} - int64_t ProgramBuilder::get_parameter_index(const std::shared_ptr& parameter) const { return m_model->get_parameter_index(parameter); } diff --git a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp index 2e6d2c21343977..bff069461d8685 100644 --- a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp +++ b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp @@ -91,20 +91,9 @@ SyncInferRequest::SyncInferRequest(const std::shared_ptr& c : ov::ISyncInferRequest(compiled_model) , m_graph(compiled_model->get_graph(0)) , m_context(std::static_pointer_cast(compiled_model->get_context_impl())) - , m_shape_predictor(new cldnn::ShapePredictor(&m_graph->get_engine(), m_graph->get_config().get_property(ov::intel_gpu::buffers_preallocation_ratio))) - , m_enable_profiling(m_graph->get_config().get_property(ov::enable_profiling)) + , m_shape_predictor(new cldnn::ShapePredictor(&m_graph->get_engine(), m_graph->get_config().get_shape_predictor_settings())) + , m_enable_profiling(m_graph->get_config().get_enable_profiling()) , m_use_external_queue(m_graph->use_external_queue()) { - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->mem_preallocation_params.is_initialized) { - auto& mem_preallocation_params = debug_config->mem_preallocation_params; - m_shape_predictor.reset( - new cldnn::ShapePredictor(&m_graph->get_engine(), - mem_preallocation_params.next_iters_preallocation_count, - mem_preallocation_params.max_per_iter_size, - mem_preallocation_params.max_per_dim_diff, - mem_preallocation_params.buffers_preallocation_ratio)); - } - init_mappings(); allocate_inputs(); allocate_outputs(); @@ -295,15 +284,16 @@ void SyncInferRequest::enqueue() { m_internal_outputs = network->execute(dependencies); auto network_enqueue_end = std::chrono::high_resolution_clock::now(); + [[maybe_unused]] const auto& config = network->get_config(); + // If dump layers path is set, only runs first inference. - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->dump_layers_path.length() > 0 && debug_config->dump_iteration.empty()) { + GPU_DEBUG_IF(!config.get_dump_tensors_path().empty() && config.get_dump_iterations().empty()) { GPU_DEBUG_INFO << "Only run first inference to dump layers." << std::endl; exit(0); } auto enqueue_end = std::chrono::high_resolution_clock::now(); - GPU_DEBUG_IF(cldnn::debug_configuration::get_instance()->host_time_profiling) { + GPU_DEBUG_IF(config.get_host_time_profiling()) { network_enqueue_time = std::chrono::duration_cast(network_enqueue_end - network_enqueue_start).count(); const uint64_t total_time = std::chrono::duration_cast(enqueue_end - enqueue_start).count(); @@ -400,7 +390,7 @@ void SyncInferRequest::wait() { auto mem_shape = output_layout.get_shape(); // In case of old shape infer we need to shrink out tensor shape to avoid redudnant dimensions that occur due to rank extension // For new shape infer this shouldn't happen, thus remove that WA once we migrate to ngraph-based shape infer for all cases - if (!m_graph->get_config().get_property(ov::intel_gpu::allow_new_shape_infer)) { + if (!m_graph->get_config().get_allow_new_shape_infer()) { OPENVINO_ASSERT(port.get_partial_shape().is_static(), "[GPU] Unexpected dynamic shape for legacy shape inference"); OPENVINO_ASSERT(ov::shape_size(port.get_shape()) == ov::shape_size(mem_shape), "[GPU] Unexpected elements count for output tensor"); mem_shape = port.get_shape(); @@ -481,7 +471,7 @@ void SyncInferRequest::wait() { } auto wait_end = std::chrono::high_resolution_clock::now(); - GPU_DEBUG_IF(cldnn::debug_configuration::get_instance()->host_time_profiling) { + GPU_DEBUG_IF(m_graph->get_config().get_host_time_profiling()) { auto& exec_time_info = m_graph->host_exec_times.back(); const uint64_t total_time = std::chrono::duration_cast(wait_end - wait_start).count(); @@ -873,7 +863,7 @@ std::vector SyncInferRequest::prepare_input(const std::string auto memory = device_tensor->get_memory(); // WA to extend shape to ranks expected by legacy shape infer. Remove after full migration to new shape infer - if (!m_graph->get_config().get_property(ov::intel_gpu::allow_new_shape_infer)) { + if (!m_graph->get_config().get_allow_new_shape_infer()) { auto new_layout = memory->get_layout(); new_layout.set_partial_shape(m_graph->get_input_layouts().at(input_idx).get_shape()); memory = engine.reinterpret_buffer(*memory, new_layout); diff --git a/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.cpp b/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.cpp index 6c0d50be96e7ae..66fe9d9c9e0fc6 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.cpp @@ -16,9 +16,8 @@ namespace ov::intel_gpu { -DynamicQuantizeFullyConnected::DynamicQuantizeFullyConnected(uint64_t group_size) +DynamicQuantizeFullyConnected::DynamicQuantizeFullyConnected(uint64_t group_size, bool asymmetric) : ov::pass::MatcherPass() { - GPU_DEBUG_GET_INSTANCE(debug_config); using namespace ov::pass::pattern; using QuantizationType = ov::op::internal::DynamicQuantize::QuantizationType; @@ -55,9 +54,7 @@ DynamicQuantizeFullyConnected::DynamicQuantizeFullyConnected(uint64_t group_size config.scale_dt = element::f16; config.group_sizes = shape_group_size; - // AZP does not support grouped size dyn-quan - // XXX: This is currently wrapped as GPU_DEBUG_IF as dynamic_quantize_asym is not exposed through public API. - GPU_DEBUG_IF(debug_config->dynamic_quantize_asym && group_size == UINT64_MAX) { + if (asymmetric && group_size == UINT64_MAX) { config.quantization_type = QuantizationType::Asymmetric; config.quantization_dt = element::u8; config.zp_dt = element::u8; // it supports u8 only now diff --git a/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.hpp b/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.hpp index 85d32fbfdcea84..f8b13685389f1d 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.hpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.hpp @@ -11,7 +11,7 @@ namespace ov::intel_gpu { class DynamicQuantizeFullyConnected: public ov::pass::MatcherPass { public: OPENVINO_MATCHER_PASS_RTTI("DynamicQuantizeFullyConnected"); - DynamicQuantizeFullyConnected(uint64_t group_size); + DynamicQuantizeFullyConnected(uint64_t group_size, bool asymmetric = false); }; } // namespace ov::intel_gpu diff --git a/src/plugins/intel_gpu/src/plugin/transformations/fc_horizontal_fusion.cpp b/src/plugins/intel_gpu/src/plugin/transformations/fc_horizontal_fusion.cpp index 8568c334682548..68e78de7a23f16 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/fc_horizontal_fusion.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/fc_horizontal_fusion.cpp @@ -20,7 +20,6 @@ namespace ov::intel_gpu { FullyConnectedHorizontalFusion::FullyConnectedHorizontalFusion(bool fuse_mlp_swiglu) { using namespace ov::pass::pattern; - GPU_DEBUG_GET_INSTANCE(debug_config); // Three FCs connected to the same input size_t min_num_fcs_to_fuse = 3; // Note: diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index 6fe6cb14cd54c4..261cff11586fa8 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -335,7 +335,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { const ov::element::TypeVector supported_woq_types = {ov::element::u8, ov::element::i8, ov::element::u4, ov::element::i4}; bool enableInt8; ov::element::Type infer_precision = ov::element::undefined; - bool unroll_loop = config.get_property(ov::intel_gpu::enable_loop_unrolling); + bool unroll_loop = config.get_enable_loop_unrolling(); { ov::pass::Manager manager("Plugin:GPU"); auto pass_config = manager.get_pass_config(); @@ -348,7 +348,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { } auto is_model_quantized = ov::pass::low_precision::LowPrecision::isFunctionQuantized(func); - enableInt8 = config.get_property(ov::intel_gpu::enable_lp_transformations) && is_model_quantized; + enableInt8 = config.get_enable_lp_transformations() && is_model_quantized; manager.register_pass( std::vector{ ov::element::i8, ov::element::u8, ov::element::i4, ov::element::u4 }, @@ -381,7 +381,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { }; // Add conversion from FP data types to infer precision if it's specified - infer_precision = config.get_property(ov::hint::inference_precision); + infer_precision = config.get_inference_precision(); if (infer_precision != ov::element::undefined) { if (!fp_precision_supported(infer_precision)) infer_precision = fallback_precision; @@ -459,11 +459,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { manager.register_pass(); pass_config->set_callback([&](const std::shared_ptr node){ - GPU_DEBUG_IF(cldnn::debug_configuration::get_instance()->enable_sdpa != -1) { - GPU_DEBUG_CODE(return cldnn::debug_configuration::get_instance()->enable_sdpa == 1); - } - - if (!config.get_property(ov::intel_gpu::hint::enable_sdpa_optimization)) + if (!config.get_enable_sdpa_optimization()) return false; auto sdpa = ov::as_type_ptr(node); @@ -1024,7 +1020,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { // This Validate is needed for proper data type propagation after applying IncreasePositionIdsPrecision pass manager.register_pass(); - float activations_scale_factor = config.get_property(ov::hint::activations_scale_factor); + float activations_scale_factor = config.get_activations_scale_factor(); if (activations_scale_factor > 0.f && infer_precision == ov::element::f16) { using namespace ov::pass::low_precision; @@ -1089,13 +1085,9 @@ void TransformationsPipeline::apply(std::shared_ptr func) { manager.register_pass(); manager.register_pass(); - bool disable_horizontal_fc_fusion = false; - bool disable_fc_swiglu_fusion = false; - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_horizontal_fc_fusion == 1) - disable_horizontal_fc_fusion = true; - GPU_DEBUG_IF(debug_config->disable_fc_swiglu_fusion == 1) - disable_fc_swiglu_fusion = true; + bool disable_horizontal_fc_fusion = GPU_DEBUG_VALUE_OR(config.get_disable_horizontal_fc_fusion(), false); + bool disable_fc_swiglu_fusion = GPU_DEBUG_VALUE_OR(config.get_disable_fc_swiglu_fusion(), false); + // mlp fusion is only supported for cldnn on high performant GPUis bool fuse_mlp_swiglu = !device_info.supports_immad && device_info.execution_units_count >= 128 && @@ -1133,7 +1125,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { manager.register_pass(); manager.register_pass(); - auto kv_cache_compression_dt = config.get_property(ov::hint::kv_cache_precision); + auto kv_cache_compression_dt = config.get_kv_cache_precision(); manager.register_pass(kv_cache_compression_dt, device_info.supports_immad); manager.register_pass(); @@ -1151,7 +1143,8 @@ void TransformationsPipeline::apply(std::shared_ptr func) { manager.register_pass(); if (device_info.supports_immad) { - auto dynamic_quantization_group_size = config.get_property(ov::hint::dynamic_quantization_group_size); + bool asymmetric_dyn_quant = GPU_DEBUG_VALUE_OR(config.get_asym_dynamic_quantization(), false); + auto dynamic_quantization_group_size = config.get_dynamic_quantization_group_size(); pass_config->set_callback([=](const_node_ptr& root) -> bool { for (size_t i = 0 ; i < root->get_input_node_shared_ptr(0)->get_output_size(); ++i) { if (root->get_input_node_shared_ptr(0)->get_output_element_type(i) == ov::element::Type_t::f32) { @@ -1169,14 +1162,14 @@ void TransformationsPipeline::apply(std::shared_ptr func) { // AZP does not support 8bit weight // XXX: This is currently wrapped as GPU_DEBUG_IF as dynamic_quantize_asym is not exposed through public API. - GPU_DEBUG_IF(debug_config->dynamic_quantize_asym + GPU_DEBUG_IF(asymmetric_dyn_quant && (root->get_input_element_type(1) == ov::element::i8 || root->get_input_element_type(1) == ov::element::u8)) { GPU_DEBUG_TRACE << root->get_friendly_name() << " dyn_quan is turned off: asym quantization does not support 8bit weight" << std::endl; return true; } // AZP does not support grouped size dyn-quan - GPU_DEBUG_IF(debug_config->dynamic_quantize_asym && (dynamic_quantization_group_size != UINT64_MAX)) { + GPU_DEBUG_IF(asymmetric_dyn_quant && (dynamic_quantization_group_size != UINT64_MAX)) { GPU_DEBUG_TRACE << root->get_friendly_name() << " dyn_quan is turned off: asym quantization does not support grouped quantization" << " ('DynamicQuantizeAsym' is enabled with grouped size dyn-quan)" << std::endl; return true; @@ -1193,7 +1186,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { return false; }); - manager.register_pass(dynamic_quantization_group_size); + manager.register_pass(dynamic_quantization_group_size, asymmetric_dyn_quant); } // Remove Pad in front of MaxPool if both the pads_begin and pads_end are zero. @@ -1202,7 +1195,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { // This is supposed to be the last pass to ensure that we don't have name collisions until // GPU plugin stops using friendly names for program creation manager.register_pass(true); - GPU_DEBUG_IF(cldnn::debug_configuration::get_instance()->verbose >= 1) { + GPU_DEBUG_IF(config.get_verbose() >= 1) { manager.register_pass(); } manager.run_passes(func); diff --git a/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp b/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp index 550d740f772a16..cb36a8e0349457 100644 --- a/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp +++ b/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp @@ -3,604 +3,23 @@ // #include "intel_gpu/runtime/debug_configuration.hpp" -#include -#include +#include "intel_gpu/runtime/execution_config.hpp" #include -#include -#include -#include -#include #include -namespace cldnn { -const char *debug_configuration::prefix = "GPU_Debug: "; -std::ostream* debug_configuration::verbose_stream; - -// Default policy is that dump_configuration will override other configuration from IE. - -#ifdef GPU_DEBUG_CONFIG - -#define GPU_DEBUG_COUT_ std::cout << cldnn::debug_configuration::prefix - -template -void print_option(std::string option_name, T option_value) { - GPU_DEBUG_COUT_ << "Config " << option_name << " = " << option_value << std::endl; -} - -static std::string to_upper_case(const std::string& var) { - std::stringstream s; - - for (size_t i = 0; i < var.size(); i++) { - if (std::isupper(var[i])) { - if (i != 0) { - s << "_"; - } - s << var[i]; - } else { - s << static_cast(std::toupper(var[i])); - } - } - - return s.str(); -} - -static std::vector get_possible_option_names(const std::string& var, std::vector allowed_option_prefixes) { - std::vector result; - - for (auto& prefix : allowed_option_prefixes) { - result.push_back(prefix + var); - result.push_back(prefix + to_upper_case(var)); - } - - return result; -} - -template -T convert_to(const std::string &str) { - std::istringstream ss(str); - T res; - ss >> res; - return res; -} - -template <> -std::string convert_to(const std::string &str) { - return str; -} - -static std::set parse_int_set(std::string& str) { - std::set int_array; - // eliminate '"' from string to avoid parsing error - str.erase(std::remove_if(str.begin(), str.end(), [](char c) { - return c == '\"'; }), str.end()); - if (str.size() > 0) { - str = " " + str + " "; - std::istringstream ss(str); - std::string token; - while (ss >> token) { - try { - int_array.insert(static_cast(std::stol(token))); - } catch(const std::exception &) { - int_array.clear(); - GPU_DEBUG_COUT << "Argument was ignored. It cannot be parsed to integer array: " << str << std::endl; - break; - } - } - } - return int_array; -} - -template -void get_debug_env_var(const std::string &var, T &val, std::vector allowed_option_prefixes) { - bool found = false; - for (auto o : get_possible_option_names(var, allowed_option_prefixes)) { - if (const auto env_var = std::getenv(o.c_str())) { - val = convert_to(env_var); - found = true; - } - } - - if (found) { - print_option(var, val); - } -} - -template -void get_gpu_debug_env_var(const std::string &var, T &val) { - return get_debug_env_var(var, val, {"OV_GPU_"}); -} - -template -void get_common_debug_env_var(const std::string &var, T &val) { - // The list below should be prioritized from lowest to highest prefix priority - // If an option is set several times with different prefixes, version with the highest priority will be actually used. - // This may allow to enable global option with some value and override this value for GPU plugin - // For example: OV_GPU_Verbose=2 OV_Verbose=1 ./my_app => this->verbose == 2 - // In that case we enable Verbose (with level = 1) for all OV components that support this option, but for GPU plugin we increase verbose level to 2 - std::vector allowed_option_prefixes = { - "OV_", - "OV_GPU_" - }; - - return get_debug_env_var(var, val, allowed_option_prefixes); -} - -static void print_help_messages() { - std::vector> message_list; - message_list.emplace_back("OV_GPU_Help", "Print help messages"); - message_list.emplace_back("OV_GPU_Verbose", "Verbose execution"); - message_list.emplace_back("OV_GPU_VerboseColor", "Print verbose color"); - message_list.emplace_back("OV_GPU_VerboseFile", "Filename to dump verbose log"); - message_list.emplace_back("OV_GPU_ListLayers", "Print layers names"); - message_list.emplace_back("OV_GPU_PrintMultiKernelPerf", "Print execution time of each kernel in multi-kernel primitimive"); - message_list.emplace_back("OV_GPU_PrintInputDataShapes", "Print data_shapes of input layers for benchmark_app."); - message_list.emplace_back("OV_GPU_DisableUsm", "Disable usm usage"); - message_list.emplace_back("OV_GPU_DisableOnednn", "Disable onednn for discrete GPU (no effect for integrated GPU)"); - message_list.emplace_back("OV_GPU_DisableOnednnOptPostOps", "Disable onednn optimize post operators"); - message_list.emplace_back("OV_GPU_DumpProfilingData", "Enables dump of extended profiling information to specified directory." - " Please use OV_GPU_DumpProfilingDataPerIter=1 env variable to collect performance per iteration." - " Note: Performance impact may be significant as this option enforces host side sync after each primitive"); - message_list.emplace_back("OV_GPU_DumpProfilingDataIteration", "Enable collecting profiling data only at iterations with requested range. " - "For example for dump profiling data only when iteration is from 10 to 20, you can use " - "OV_GPU_DumpProfilingDataIteration='10..20'. Additionally, you can dump profiling data only " - "from one specific iteration by giving the same values for the start and end, and the open " - "ended range is also available by range from given start to the last iteration as -1. e.g. " - "OV_GPU_DumpProfilingDataIteration='10..-1'"); - message_list.emplace_back("OV_GPU_HostTimeProfiling", "Enable collecting of model enqueue time spent on the host"); - message_list.emplace_back("OV_GPU_DumpGraphs", "1) dump ngraph before and after transformation. 2) dump graph in model compiling." - "3) dump graph in execution."); - message_list.emplace_back("OV_GPU_DumpSources", "Dump opencl sources"); - message_list.emplace_back("OV_GPU_DumpLayersPath", "Enable dumping intermediate buffers and set the dest path"); - message_list.emplace_back("OV_GPU_DumpLayers", "Dump intermediate buffers of specified layers only, separated by space." - " Support case-insensitive and regular expression. For example .*conv.*"); - message_list.emplace_back("OV_GPU_DumpLayersResult", "Dump output buffers of result layers only"); - message_list.emplace_back("OV_GPU_DumpLayersInput", "Dump intermediate buffers of input layers only"); - message_list.emplace_back("OV_GPU_DumpLayersDstOnly", "Dump only output of layers"); - message_list.emplace_back("OV_GPU_DumpLayersLimitBatch", "Limit the size of batch to dump"); - message_list.emplace_back("OV_GPU_DumpLayersRaw", "If true, dump data is stored in raw memory format."); - message_list.emplace_back("OV_GPU_DumpLayersRawBinary", "If true, dump data is stored in binary format."); - message_list.emplace_back("OV_GPU_DryRunPath", "Dry run and serialize execution graph into the specified path"); - message_list.emplace_back("OV_GPU_BaseBatchForMemEstimation", "Base batch size to be used in memory estimation"); - message_list.emplace_back("OV_GPU_AfterProc", "Run inference after the specified process PIDs are finished, separated by space." - " Supported on only on linux."); - message_list.emplace_back("OV_GPU_SerialCompile", "Serialize creating primitives and compiling kernels"); - message_list.emplace_back("OV_GPU_ForceImplTypes", "Force implementation type of a target primitive or layer. [primitive or layer_name]:[impl_type]" - " For example fc:onednn gemm:onednn reduce:ocl do:cpu" - " For primitives fc, gemm, do, reduce, concat are supported. Separated by space."); - message_list.emplace_back("OV_GPU_MaxKernelsPerBatch", "Maximum number of kernels in a batch during compiling kernels"); - message_list.emplace_back("OV_GPU_ImplsCacheCapacity", "The maximum number of entries in the kernel impl cache"); - message_list.emplace_back("OV_GPU_DisableAsyncCompilation", "Disable async compilation"); - message_list.emplace_back("OV_GPU_DisableWinogradConv", "Disable Winograd convolution"); - message_list.emplace_back("OV_GPU_DisableDynamicImpl", "Disable dynamic implementation"); - message_list.emplace_back("OV_GPU_DisableRuntimeBufferFusing", "Disable runtime buffer fusing"); - message_list.emplace_back("OV_GPU_DisableMemoryReuse", "Disable memory reuse"); - message_list.emplace_back("OV_GPU_EnableSDPA", "This allows the enforcement of SDPA decomposition logic: 0 completely disables SDPA kernel usage, " - "and 1 enables it for all the cases."); - message_list.emplace_back("OV_GPU_DumpMemoryPool", "Dump memory pool contents of each iteration"); - message_list.emplace_back("OV_GPU_DumpMemoryPoolIters", "List of iterations to dump memory pool status, separated by space."); - message_list.emplace_back("OV_GPU_DumpMemoryPoolPath", "Enable dumping memory pool status to csv file and set the dest path"); - message_list.emplace_back("OV_GPU_DisableBuildTimeWeightReorderForDynamicNodes", "Disable build time weight reorder for dynmaic nodes."); - message_list.emplace_back("OV_GPU_DisableRuntimeSkipReorder", "Disable runtime skip reorder."); - message_list.emplace_back("OV_GPU_DisablePrimitiveFusing", "Disable primitive fusing"); - message_list.emplace_back("OV_GPU_DisableFakeAlignment", "Disable fake alignment"); - message_list.emplace_back("OV_GPU_UseUsmHost", "Set explicit policy for usm host usage for network input/output. " - "0: default, 1: use usm_host, 2: do not use usm_host"); - message_list.emplace_back("OV_GPU_KVCacheCompression", "Enable/Disable KV-cache compression"); - message_list.emplace_back("OV_GPU_DynamicQuantizeLayersWithoutOnednn", "Enable Dynamic quantization for specified Fully connected layers only, " - "separated by space. Support case-insensitive and regular expression. For example .*fully_connected.*"); - message_list.emplace_back("OV_GPU_DynamicQuantizeGroupSize", "Specify a group size of dynamic quantization to enable " - "dynamic quantization for Fully-connected primitive."); - message_list.emplace_back("OV_GPU_DynamicQuantizeAsym", "Enable asymmetric dynamic quantization when set as 1."); - message_list.emplace_back("OV_GPU_DisableHorizontalFCFusion", "Disable horizontal fc fusion"); - message_list.emplace_back("OV_GPU_DisableFCSwigluFusion", "Disable fc + swiglu fusion"); - message_list.emplace_back("OV_GPU_DumpIteration", "Dump n-th execution of network, separated by space."); - message_list.emplace_back("OV_GPU_MemPreallocationOptions", "Controls buffer pre-allocation feature. Expects 4 values separated by space in " - "the following order: number of iterations for pre-allocation(int), max size of single iteration in bytes(int), " - "max per-dim allowed diff(int), unconditional buffers preallocation ratio(float). For example for disabling memory " - "preallocation at all, you can use OV_GPU_MemPreallocationOptions='0 0 0 1.0'"); - message_list.emplace_back("OV_GPU_LoadDumpRawBinary", - "Specified layers which are loading dumped binary files generated by OV_GPU_DumpLayersRawBinary debug-config." - " Currently, other layers except input-layer('parameter' type) are loading binaries for only input." - " Different input or output tensors are seperated by ','. Different layers are separated by space. For example, " - " \"[input_layer_name1]:[binary_dumped_file1],[binary_dump_file2] [input_layer_name2]:[binary_dump_1],[binary_dump_2]\""); - - auto max_name_length_item = std::max_element(message_list.begin(), message_list.end(), - [](std::pair& a, std::pair& b){ - return a.first.size() < b.first.size(); - }); - int name_width = static_cast(max_name_length_item->first.size()) + 2; - - GPU_DEBUG_COUT_ << "Supported environment variables for debugging" << std::endl; - for (auto& p : message_list) { - GPU_DEBUG_COUT_ << " - " << std::left << std::setw(name_width) << p.first + " " << p.second << std::endl; - } -} - -#endif - -debug_configuration::debug_configuration() - : help(0) - , verbose(0) - , verbose_color(0) - , verbose_file() - , list_layers(0) - , print_multi_kernel_perf(0) - , print_input_data_shapes(0) - , disable_usm(0) - , disable_onednn(0) - , disable_onednn_opt_post_ops(0) - , dump_profiling_data(std::string("")) - , dump_profiling_data_per_iter(0) - , host_time_profiling(0) - , dump_graphs(std::string()) - , dump_sources(std::string()) - , dump_layers_path(std::string()) - , dry_run_path(std::string()) - , dump_layers_dst_only(0) - , dump_layers_result(0) - , dump_layers_input(0) - , dump_layers_limit_batch(std::numeric_limits::max()) - , dump_layers_raw(0) - , dump_layers_binary(0) - , dump_memory_pool(0) - , dump_memory_pool_path(std::string()) - , base_batch_for_memory_estimation(-1) - , serialize_compile(0) - , max_kernels_per_batch(0) - , impls_cache_capacity(-1) - , enable_sdpa(-1) - , disable_async_compilation(0) - , disable_winograd_conv(0) - , disable_dynamic_impl(0) - , disable_runtime_buffer_fusing(0) - , disable_memory_reuse(0) - , disable_build_time_weight_reorder_for_dynamic_nodes(0) - , disable_runtime_skip_reorder(0) - , disable_primitive_fusing(0) - , disable_fake_alignment(0) - , use_usm_host(0) - , use_kv_cache_compression(-1) - , dynamic_quantize_group_size(DYNAMIC_QUANTIZE_GROUP_SIZE_NOT_SET) - , dynamic_quantize_asym(0) - , disable_horizontal_fc_fusion(0) - , disable_fc_swiglu_fusion(0) { +namespace ov::intel_gpu { +std::ostream& get_verbose_stream() { #ifdef GPU_DEBUG_CONFIG - get_gpu_debug_env_var("Help", help); - get_common_debug_env_var("Verbose", verbose); - get_gpu_debug_env_var("VerboseColor", verbose_color); - get_gpu_debug_env_var("VerboseFile", verbose_file); - get_gpu_debug_env_var("ListLayers", list_layers); - get_gpu_debug_env_var("PrintMultiKernelPerf", print_multi_kernel_perf); - get_gpu_debug_env_var("PrintInputDataShapes", print_input_data_shapes); - get_gpu_debug_env_var("DisableUsm", disable_usm); - get_gpu_debug_env_var("DumpGraphs", dump_graphs); - get_gpu_debug_env_var("DumpSources", dump_sources); - get_gpu_debug_env_var("DumpLayersPath", dump_layers_path); - get_gpu_debug_env_var("DumpLayersLimitBatch", dump_layers_limit_batch); - get_gpu_debug_env_var("DumpLayersRaw", dump_layers_raw); - get_gpu_debug_env_var("DumpLayersRawBinary", dump_layers_binary); - get_gpu_debug_env_var("DumpLayersDstOnly", dump_layers_dst_only); - get_gpu_debug_env_var("DumpLayersResult", dump_layers_result); - get_gpu_debug_env_var("DumpLayersInput", dump_layers_input); - get_gpu_debug_env_var("DisableOnednn", disable_onednn); - get_gpu_debug_env_var("DisableOnednnOptPostOps", disable_onednn_opt_post_ops); - get_gpu_debug_env_var("DumpProfilingData", dump_profiling_data); - get_gpu_debug_env_var("DumpProfilingDataPerIter", dump_profiling_data_per_iter); - get_gpu_debug_env_var("HostTimeProfiling", host_time_profiling); - std::string dump_prof_data_iter_str; - get_gpu_debug_env_var("DumpProfilingDataIteration", dump_prof_data_iter_str); - get_gpu_debug_env_var("DryRunPath", dry_run_path); - get_gpu_debug_env_var("DumpMemoryPool", dump_memory_pool); - std::string dump_runtime_memory_pool_iters_str; - get_gpu_debug_env_var("DumpMemoryPoolIters", dump_runtime_memory_pool_iters_str); - get_gpu_debug_env_var("DumpMemoryPoolPath", dump_memory_pool_path); - get_gpu_debug_env_var("BaseBatchForMemEstimation", base_batch_for_memory_estimation); - std::string dump_layers_str; - get_gpu_debug_env_var("DumpLayers", dump_layers_str); - std::string after_proc_str; - get_gpu_debug_env_var("AfterProc", after_proc_str); - get_gpu_debug_env_var("SerialCompile", serialize_compile); - std::string forced_impl_types_str; - get_gpu_debug_env_var("ForceImplTypes", forced_impl_types_str); - get_gpu_debug_env_var("MaxKernelsPerBatch", max_kernels_per_batch); - get_gpu_debug_env_var("ImplsCacheCapacity", impls_cache_capacity); - get_gpu_debug_env_var("EnableSDPA", enable_sdpa); - get_gpu_debug_env_var("DisableAsyncCompilation", disable_async_compilation); - get_gpu_debug_env_var("DisableWinogradConv", disable_winograd_conv); - get_gpu_debug_env_var("DisableDynamicImpl", disable_dynamic_impl); - get_gpu_debug_env_var("DisableRuntimeBufferFusing", disable_runtime_buffer_fusing); - get_gpu_debug_env_var("DisableMemoryReuse", disable_memory_reuse); - get_gpu_debug_env_var("DisableBuildTimeWeightReorderForDynamicNodes", disable_build_time_weight_reorder_for_dynamic_nodes); - get_gpu_debug_env_var("DisableRuntimeSkipReorder", disable_runtime_skip_reorder); - get_gpu_debug_env_var("DisablePrimitiveFusing", disable_primitive_fusing); - get_gpu_debug_env_var("DisableFakeAlignment", disable_fake_alignment); - get_gpu_debug_env_var("UseUsmHost", use_usm_host); - get_gpu_debug_env_var("KVCacheCompression", use_kv_cache_compression); - get_gpu_debug_env_var("DynamicQuantizeGroupSize", dynamic_quantize_group_size); - get_gpu_debug_env_var("DynamicQuantizeAsym", dynamic_quantize_asym); - get_gpu_debug_env_var("DisableHorizontalFCFusion", disable_horizontal_fc_fusion); - get_gpu_debug_env_var("DisableFCSwigluFusion", disable_fc_swiglu_fusion); - std::string dump_iteration_str; - get_gpu_debug_env_var("DumpIteration", dump_iteration_str); - std::string mem_preallocation_params_str; - get_gpu_debug_env_var("MemPreallocationOptions", mem_preallocation_params_str); - std::string load_dump_raw_bin_str; - get_gpu_debug_env_var("LoadDumpRawBinary", load_dump_raw_bin_str); - std::string dynamic_quantize_layers_without_onednn_str; - get_gpu_debug_env_var("DynamicQuantizeLayersWithoutOnednn", dynamic_quantize_layers_without_onednn_str); - - if (help > 0) { - print_help_messages(); - exit(0); - } - - if (verbose_file.length() > 0) { + if (ExecutionConfig::get_log_to_file().length() > 0) { static std::ofstream fout; - fout.open(verbose_file); - verbose_stream = &fout; + if (!fout.is_open()) + fout.open(ExecutionConfig::get_log_to_file()); + return fout; } else { - verbose_stream = &std::cout; - } - - if (dump_prof_data_iter_str.length() > 0) { - dump_prof_data_iter_str = " " + dump_prof_data_iter_str + " "; - std::istringstream iss(dump_prof_data_iter_str); - char dot; - int64_t start, end; - bool is_valid_range = false; - if (iss >> start >> dot >> dot >> end) { - if (start <= end || end == -1) { - try { - is_valid_range = true; - dump_prof_data_iter_params.start = start; - dump_prof_data_iter_params.end = end; - } catch(const std::exception &) { - is_valid_range = false; - } - } - } - if (!is_valid_range) - std::cout << "OV_GPU_DumpProfilingDataIteration was ignored. It cannot be parsed to valid iteration range." << std::endl; - dump_prof_data_iter_params.is_enabled = is_valid_range; - } - - if (dump_layers_str.length() > 0) { - // Insert delimiter for easier parsing when used - dump_layers_str = " " + dump_layers_str + " "; - std::stringstream ss(dump_layers_str); - std::string layer; - while (ss >> layer) { - dump_layers.push_back(layer); - } - } - - if (dynamic_quantize_layers_without_onednn_str.length() > 0) { - // Insert delimiter for easier parsing when used - dynamic_quantize_layers_without_onednn_str = " " + dynamic_quantize_layers_without_onednn_str + " "; - std::stringstream ss(dynamic_quantize_layers_without_onednn_str); - std::string layer; - while (ss >> layer) { - dynamic_quantize_layers_without_onednn.push_back(layer); - } - } - - if (forced_impl_types_str.length() > 0) { - forced_impl_types_str = " " + forced_impl_types_str + " "; - std::stringstream ss(forced_impl_types_str); - std::string type; - while (ss >> type) { - forced_impl_types.push_back(type); - } - } - - // Parsing for loading binary files - if (load_dump_raw_bin_str.length() > 0) { - load_dump_raw_bin_str = " " + load_dump_raw_bin_str + " "; - std::stringstream ss(load_dump_raw_bin_str); - std::string type; - while (ss >> type) { - load_layers_raw_dump.push_back(type); - } - } - - if (dump_iteration_str.size() > 0) { - dump_iteration = parse_int_set(dump_iteration_str); - } - - if (dump_runtime_memory_pool_iters_str.size() > 0) { - dump_memory_pool_iters = parse_int_set(dump_runtime_memory_pool_iters_str); - } - - if (mem_preallocation_params_str.size() > 0) { - mem_preallocation_params_str = " " + mem_preallocation_params_str + " "; - std::istringstream ss(mem_preallocation_params_str); - std::vector params; - std::string param; - while (ss >> param) - params.push_back(param); - - bool correct_params = params.size() == 4; - if (correct_params) { - try { - mem_preallocation_params.next_iters_preallocation_count = std::stol(params[0]); - mem_preallocation_params.max_per_iter_size = std::stol(params[1]); - mem_preallocation_params.max_per_dim_diff = std::stol(params[2]); - mem_preallocation_params.buffers_preallocation_ratio = std::stof(params[3]); - } catch(const std::exception &) { - correct_params = false; - } - } - - if (!correct_params) - GPU_DEBUG_COUT_ << "OV_GPU_MemPreallocationOptions were ignored, because they cannot be parsed.\n"; - - mem_preallocation_params.is_initialized = correct_params; - } - - if (after_proc_str.length() > 0) { -#ifdef _WIN32 - GPU_DEBUG_COUT_ << "Warning: OV_GPU_AfterProc is supported only on linux" << std::endl; -#else - after_proc_str = " " + after_proc_str + " "; // Insert delimiter for easier parsing when used - std::stringstream ss(after_proc_str); - std::string pid; - while (ss >> pid) { - after_proc.push_back(pid); - } -#endif - } -#endif -} - -const debug_configuration *debug_configuration::get_instance() { - static std::unique_ptr instance(nullptr); -#ifdef GPU_DEBUG_CONFIG - static std::mutex _m; - std::lock_guard lock(_m); - if (nullptr == instance) - instance.reset(new debug_configuration()); - return instance.get(); -#else - return nullptr; -#endif -} - -bool debug_configuration::is_target_dump_prof_data_iteration(int64_t iteration) const { -#ifdef GPU_DEBUG_CONFIG - if (iteration < 0) - return true; - - if (dump_prof_data_iter_params.start > iteration) - return false; - - if (dump_prof_data_iter_params.start <= dump_prof_data_iter_params.end && - dump_prof_data_iter_params.end < iteration) - return false; - - return true; -#else - return false; -#endif -} - -std::vector debug_configuration::get_filenames_for_matched_layer_loading_binaries(const std::string& id) const { - std::vector file_names; -#ifdef GPU_DEBUG_CONFIG - if (load_layers_raw_dump.empty()) - return file_names; - - for (const auto& load_layer : load_layers_raw_dump) { - size_t file = load_layer.rfind(":"); - if (file != std::string::npos) { - if (id == load_layer.substr(0, file)) { - auto file_name_str = load_layer.substr(file + 1); - size_t head = 0; - size_t found = 0; - do { - found = file_name_str.find(",", head); - if (found != std::string::npos) - file_names.push_back(file_name_str.substr(head, (found - head))); - else - file_names.push_back(file_name_str.substr(head)); - - head = found+1; - GPU_DEBUG_LOG << " Layer name loading raw dump : " << load_layer.substr(0, file) << " / the dump file : " - << file_names.back() << std::endl; - } while (found != std::string::npos); - - return file_names; - } - } - } -#endif - - return file_names; -} - -std::string debug_configuration::get_matched_from_filelist(const std::vector& file_names, std::string pattern) const { -#ifdef GPU_DEBUG_CONFIG - for (const auto& file : file_names) { - auto found = file.find(pattern); - if (found != std::string::npos) { - return file; - } + return std::cout; } -#endif - return std::string(); -} - -std::string debug_configuration::get_name_for_dump(const std::string& file_name) const { - std::string filename = file_name; -#ifdef GPU_DEBUG_CONFIG - std::replace(filename.begin(), filename.end(), '\\', '_'); - std::replace(filename.begin(), filename.end(), '/', '_'); - std::replace(filename.begin(), filename.end(), ' ', '_'); - std::replace(filename.begin(), filename.end(), ':', '_'); -#endif - return filename; -} - -bool debug_configuration::is_layer_name_matched(const std::string& layer_name, const std::string& pattern) const { -#ifdef GPU_DEBUG_CONFIG - auto upper_layer_name = std::string(layer_name.length(), '\0'); - std::transform(layer_name.begin(), layer_name.end(), upper_layer_name.begin(), ::toupper); - auto upper_pattern = std::string(pattern.length(), '\0'); - std::transform(pattern.begin(), pattern.end(), upper_pattern.begin(), ::toupper); - - // Check pattern from exec_graph - size_t pos = upper_layer_name.find(':'); - auto upper_exec_graph_name = upper_layer_name.substr(pos + 1, upper_layer_name.size()); - if (upper_exec_graph_name.compare(upper_pattern) == 0) { - return true; - } - - // Check pattern with regular expression - std::regex re(upper_pattern); - return std::regex_match(upper_layer_name, re); -#else - return false; -#endif -} - -bool debug_configuration::is_layer_for_dumping(const std::string& layer_name, bool is_output, bool is_input) const { -#ifdef GPU_DEBUG_CONFIG - // Dump result layer - if (is_output == true && dump_layers_result == 1 && - (layer_name.find("constant:") == std::string::npos)) - return true; - // Dump all layers - if (dump_layers.empty() && dump_layers_result == 0 && dump_layers_input == 0) - return true; - - // Dump input layers - size_t pos = layer_name.find(':'); - auto type = layer_name.substr(0, pos); - if (is_input == true && type == "parameter" && dump_layers_input == 1) - return true; - - auto iter = std::find_if(dump_layers.begin(), dump_layers.end(), [&](const std::string& dl){ - return is_layer_name_matched(layer_name, dl); - }); - return (iter != dump_layers.end()); -#else - return false; -#endif -} - -bool debug_configuration::is_target_iteration(int64_t iteration) const { -#ifdef GPU_DEBUG_CONFIG - if (iteration < 0) - return true; - - if (dump_iteration.empty()) - return true; - - if (dump_iteration.find(iteration) == std::end(dump_iteration)) - return false; - - return true; #else - return false; + return std::cout; #endif } -} // namespace cldnn +} // namespace ov::intel_gpu diff --git a/src/plugins/intel_gpu/src/runtime/device.cpp b/src/plugins/intel_gpu/src/runtime/device.cpp index fa027ebe9e2e33..428d18f6c51775 100644 --- a/src/plugins/intel_gpu/src/runtime/device.cpp +++ b/src/plugins/intel_gpu/src/runtime/device.cpp @@ -65,8 +65,7 @@ float device::get_gops(cldnn::data_types dt) const { } bool device::use_unified_shared_memory() const { - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_usm) { + GPU_DEBUG_IF(ExecutionConfig::get_disable_usm()) { return false; } if (get_mem_caps().supports_usm()) { diff --git a/src/plugins/intel_gpu/src/runtime/engine.cpp b/src/plugins/intel_gpu/src/runtime/engine.cpp index b5ec7da3fab705..73bceb8bea8659 100644 --- a/src/plugins/intel_gpu/src/runtime/engine.cpp +++ b/src/plugins/intel_gpu/src/runtime/engine.cpp @@ -71,8 +71,7 @@ const device::ptr engine::get_device() const { } bool engine::use_unified_shared_memory() const { - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_usm) { + GPU_DEBUG_IF(ExecutionConfig::get_disable_usm()) { return false; } if (_device->get_mem_caps().supports_usm()) { diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp index 7d2a9d5f90fc8b..bc52d991247492 100644 --- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp +++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp @@ -1,308 +1,305 @@ -// Copyright (C) 2022 Intel Corporation +// Copyright (C) 2024 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // #include "intel_gpu/runtime/execution_config.hpp" -#include "intel_gpu/runtime/debug_configuration.hpp" +#include "intel_gpu/plugin/remote_context.hpp" +#include "openvino/core/any.hpp" +#include "openvino/core/model.hpp" +#include "openvino/op/concat.hpp" +#include "openvino/op/convert.hpp" +#include "openvino/op/gather.hpp" +#include "openvino/op/loop.hpp" +#include "openvino/op/lstm_sequence.hpp" +#include "openvino/op/paged_attention.hpp" +#include "openvino/op/search_sorted.hpp" +#include "openvino/op/stft.hpp" +#include "openvino/pass/pattern/matcher.hpp" +#include "openvino/pass/pattern/op/label.hpp" +#include "openvino/pass/pattern/op/or.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "ov_ops/dynamic_quantize.hpp" #include "openvino/runtime/internal_properties.hpp" +#include "intel_gpu/runtime/internal_properties.hpp" +#include "openvino/runtime/plugin_config.hpp" #include "openvino/runtime/properties.hpp" -#include namespace ov::intel_gpu { -ExecutionConfig::ExecutionConfig() { - set_default(); -} +namespace { -class InferencePrecisionValidator : public BaseValidator { -public: - bool is_valid(const ov::Any& v) const override { - auto precision = v.as(); - return precision == ov::element::f16 || precision == ov::element::f32 || precision == ov::element::undefined; - } -}; - -class PerformanceModeValidator : public BaseValidator { -public: - bool is_valid(const ov::Any& v) const override { - auto mode = v.as(); - return mode == ov::hint::PerformanceMode::CUMULATIVE_THROUGHPUT || - mode == ov::hint::PerformanceMode::THROUGHPUT || - mode == ov::hint::PerformanceMode::LATENCY; +ov::RTMap get_rt_info(const ov::Model& model) { + ov::RTMap rt_info; + if (model.has_rt_info("runtime_options")) + rt_info = model.get_rt_info("runtime_options"); + + if (model.has_rt_info("__weights_path")) { + rt_info[ov::weights_path.name()] = model.get_rt_info("__weights_path"); } -}; - -void ExecutionConfig::set_default() { - register_property( - std::make_tuple(ov::device::id, "0"), - std::make_tuple(ov::enable_profiling, false), - std::make_tuple(ov::cache_dir, ""), - std::make_tuple(ov::num_streams, 1), - std::make_tuple(ov::compilation_num_threads, std::max(1, static_cast(std::thread::hardware_concurrency()))), - std::make_tuple(ov::hint::inference_precision, ov::element::f16, InferencePrecisionValidator()), - std::make_tuple(ov::hint::model_priority, ov::hint::Priority::MEDIUM), - std::make_tuple(ov::hint::performance_mode, ov::hint::PerformanceMode::LATENCY, PerformanceModeValidator()), - std::make_tuple(ov::hint::execution_mode, ov::hint::ExecutionMode::PERFORMANCE), - std::make_tuple(ov::hint::num_requests, 0), - std::make_tuple(ov::hint::enable_cpu_pinning, false), - std::make_tuple(ov::hint::enable_cpu_reservation, false), - - std::make_tuple(ov::intel_gpu::hint::host_task_priority, ov::hint::Priority::MEDIUM), - std::make_tuple(ov::intel_gpu::hint::queue_throttle, ov::intel_gpu::hint::ThrottleLevel::MEDIUM), - std::make_tuple(ov::intel_gpu::hint::queue_priority, ov::hint::Priority::MEDIUM), - std::make_tuple(ov::intel_gpu::hint::enable_sdpa_optimization, true), - std::make_tuple(ov::intel_gpu::enable_loop_unrolling, true), - std::make_tuple(ov::intel_gpu::disable_winograd_convolution, false), - std::make_tuple(ov::internal::exclusive_async_requests, false), - std::make_tuple(ov::internal::query_model_ratio, 1.0f), - std::make_tuple(ov::cache_mode, ov::CacheMode::OPTIMIZE_SPEED), - std::make_tuple(ov::cache_encryption_callbacks, EncryptionCallbacks{}), - std::make_tuple(ov::hint::dynamic_quantization_group_size, 0), - std::make_tuple(ov::hint::kv_cache_precision, ov::element::f16), - std::make_tuple(ov::intel_gpu::hint::enable_kernels_reuse, false), - std::make_tuple(ov::weights_path, ""), - std::make_tuple(ov::hint::activations_scale_factor, -1.f), - - // Legacy API properties - std::make_tuple(ov::intel_gpu::nv12_two_inputs, false), - std::make_tuple(ov::intel_gpu::config_file, ""), - std::make_tuple(ov::intel_gpu::enable_lp_transformations, false)); - - register_property( - std::make_tuple(ov::intel_gpu::max_dynamic_batch, 1), - std::make_tuple(ov::intel_gpu::queue_type, QueueTypes::out_of_order), - std::make_tuple(ov::intel_gpu::optimize_data, false), - std::make_tuple(ov::intel_gpu::enable_memory_pool, true), - std::make_tuple(ov::intel_gpu::allow_static_input_reorder, false), - std::make_tuple(ov::intel_gpu::custom_outputs, std::vector{}), - std::make_tuple(ov::intel_gpu::dump_graphs, ""), - std::make_tuple(ov::intel_gpu::force_implementations, ImplForcingMap{}), - std::make_tuple(ov::intel_gpu::partial_build_program, false), - std::make_tuple(ov::intel_gpu::allow_new_shape_infer, false), - std::make_tuple(ov::intel_gpu::use_only_static_kernels_for_dynamic_shape, false), - std::make_tuple(ov::intel_gpu::buffers_preallocation_ratio, 1.1f), - std::make_tuple(ov::intel_gpu::max_kernels_per_batch, 8), - std::make_tuple(ov::intel_gpu::use_onednn, false)); + return rt_info; } -void ExecutionConfig::register_property_impl(const std::pair& property, PropertyVisibility visibility, BaseValidator::Ptr validator) { - property_validators[property.first] = validator; - supported_properties[property.first] = visibility; - internal_properties[property.first] = property.second; -} -void ExecutionConfig::set_property(const AnyMap& config) { - for (auto& kv : config) { - auto& name = kv.first; - auto& val = kv.second; - OPENVINO_ASSERT(is_supported(kv.first), "[GPU] Attempt to set property ", name, " (", val.as(), ") which was not registered!\n"); - OPENVINO_ASSERT(property_validators.at(name)->is_valid(val), "[GPU] Invalid value for property ", name, ": ", val.as()); - internal_properties[name] = val; +bool requires_new_shape_infer(const std::shared_ptr& op) { + if (op->is_dynamic()) { + return true; } -} -bool ExecutionConfig::is_supported(const std::string& name) const { - bool supported = supported_properties.find(name) != supported_properties.end(); - bool has_validator = property_validators.find(name) != property_validators.end(); + // HACK: SearchSorted has specific shape requirements. + // E.g. static input shapes: sorted:[8], values:[2,3,4] are prefectly fine, + // but sorted:[8,1,1,1], values:[2,3,4,1] is not valid. + // Similar case for STFT. + if (ov::is_type(op) || ov::is_type(op)) + return true; - return supported && has_validator; -} + if (ov::is_type(op)) + return true; -bool ExecutionConfig::is_set_by_user(const std::string& name) const { - return user_properties.find(name) != user_properties.end(); -} + if (ov::is_type(op)) { + const auto body_function = std::static_pointer_cast(op)->get_function(); + if (body_function->is_dynamic()) + return true; + } -void ExecutionConfig::set_user_property(const AnyMap& config) { - for (auto& kv : config) { - auto& name = kv.first; - auto& val = kv.second; - bool supported = is_supported(name) && supported_properties.at(name) == PropertyVisibility::PUBLIC; - OPENVINO_ASSERT(supported, "[GPU] Attempt to set user property ", name, " (", val.as(), ") which was not registered or internal!\n"); - OPENVINO_ASSERT(property_validators.at(name)->is_valid(val), "[GPU] Invalid value for property ", name, ": `", val.as(), "`"); + if (ov::is_type(op) || ov::is_type(op)) { + return true; + } + // When input node has dynamic shape with 4 dimension, this function return false + // because op.is_dynamic() which only checks input shapes return false. + // So, in the case of input data, we need to check output shape. + for (size_t i = 0; i < op->get_output_size(); i++) { + if (op->get_output_partial_shape(i).is_dynamic()) + return true; + } - user_properties[kv.first] = kv.second; + for (size_t i = 0; i < op->get_output_size(); i++) { + if (op->get_output_partial_shape(i).size() > 6) + return true; } -} -Any ExecutionConfig::get_property(const std::string& name) const { - if (user_properties.find(name) != user_properties.end()) { - return user_properties.at(name); + for (size_t i = 0; i < op->get_input_size(); i++) { + if (op->get_input_partial_shape(i).size() > 6) + return true; } - OPENVINO_ASSERT(internal_properties.find(name) != internal_properties.end(), "[GPU] Can't get internal property with name ", name); - return internal_properties.at(name); + return false; } -void ExecutionConfig::apply_execution_hints(const cldnn::device_info& info) { - if (is_set_by_user(ov::hint::execution_mode)) { - const auto mode = get_property(ov::hint::execution_mode); - if (!is_set_by_user(ov::hint::inference_precision)) { - if (mode == ov::hint::ExecutionMode::ACCURACY) { - set_property(ov::hint::inference_precision(ov::element::undefined)); - } else if (mode == ov::hint::ExecutionMode::PERFORMANCE) { - if (info.supports_fp16) - set_property(ov::hint::inference_precision(ov::element::f16)); - else - set_property(ov::hint::inference_precision(ov::element::f32)); - } +bool is_llm(const ov::Model& model) { + using namespace ov::pass::pattern; + + auto past = wrap_type(); + auto convert_past = wrap_type({past}); + auto gather_input = std::make_shared(OutputVector{past, convert_past}); + auto beam_idx = wrap_type(); + auto gather_past = wrap_type({gather_input, beam_idx, wrap_type()}); + auto gather_convert = wrap_type({gather_past}); + auto concat_past_input = std::make_shared(OutputVector{past, convert_past, gather_past, gather_convert}); + auto concat = wrap_type({concat_past_input, any_input()}); + auto convert_present = wrap_type({concat}); + auto present_input = std::make_shared(OutputVector{concat, convert_present}); + auto present = wrap_type({present_input}); + + auto kvcache_matcher = std::make_shared(present, "KVCacheMatcher"); + + for (auto& op : model.get_ordered_ops()) { + if (kvcache_matcher->match(op) || ov::is_type(op)) { + return true; } } + + return false; } -void ExecutionConfig::apply_performance_hints(const cldnn::device_info& info) { - if (is_set_by_user(ov::hint::performance_mode)) { - const auto mode = get_property(ov::hint::performance_mode); - if (!is_set_by_user(ov::num_streams)) { - if (mode == ov::hint::PerformanceMode::LATENCY) { - set_property(ov::num_streams(1)); - } else if (mode == ov::hint::PerformanceMode::THROUGHPUT) { - set_property(ov::num_streams(ov::streams::AUTO)); - } - } - } +} // namespace - if (get_property(ov::num_streams) == ov::streams::AUTO) { - int32_t n_streams = std::max(info.num_ccs, 2); - set_property(ov::num_streams(n_streams)); - } +ExecutionConfig::ExecutionConfig() : ov::PluginConfig() { } - if (get_property(ov::internal::exclusive_async_requests)) { - set_property(ov::num_streams(1)); +ExecutionConfig::ExecutionConfig(const ExecutionConfig& other) : ExecutionConfig() { + m_user_properties = other.m_user_properties; + m_is_finalized = other.m_is_finalized; + for (const auto& kv : other.m_options_map) { + m_options_map.at(kv.first)->set_any(kv.second->get_any()); } +} - // Allow kernels reuse only for single-stream scenarios - if (get_property(ov::intel_gpu::hint::enable_kernels_reuse)) { - if (get_property(ov::num_streams) != 1) { - set_property(ov::intel_gpu::hint::enable_kernels_reuse(false)); - } +ExecutionConfig& ExecutionConfig::operator=(const ExecutionConfig& other) { + m_user_properties = other.m_user_properties; + m_is_finalized = other.m_is_finalized; + for (const auto& kv : other.m_options_map) { + m_options_map.at(kv.first)->set_any(kv.second->get_any()); } + return *this; } -void ExecutionConfig::apply_priority_hints(const cldnn::device_info& info) { - if (is_set_by_user(ov::hint::model_priority)) { - const auto priority = get_property(ov::hint::model_priority); - if (!is_set_by_user(ov::intel_gpu::hint::queue_priority)) { - set_property(ov::intel_gpu::hint::queue_priority(priority)); - } - } +ExecutionConfig ExecutionConfig::clone() const { + ExecutionConfig new_config = *this; + new_config.m_is_finalized = false; + return new_config; } -void ExecutionConfig::apply_debug_options(const cldnn::device_info& info) { - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(!debug_config->dump_graphs.empty()) { - set_property(ov::intel_gpu::dump_graphs(debug_config->dump_graphs)); - } +void ExecutionConfig::finalize(cldnn::engine& engine) { + auto ctx = std::make_shared("GPU", std::vector{engine.get_device()}); + PluginConfig::finalize(ctx.get(), nullptr); +} - GPU_DEBUG_IF(debug_config->serialize_compile == 1) { - set_property(ov::compilation_num_threads(1)); +void ExecutionConfig::apply_rt_info(const IRemoteContext* context, const ov::RTMap& rt_info, bool is_llm) { + const auto& info = dynamic_cast(context)->get_engine().get_device_info(); + if (!info.supports_immad) { + apply_rt_info_property(ov::hint::kv_cache_precision, rt_info); } + if (!is_llm) + apply_rt_info_property(ov::hint::activations_scale_factor, rt_info); - GPU_DEBUG_IF(!debug_config->dump_profiling_data.empty()) { - GPU_DEBUG_COUT << "[WARNING] ov::enable_profiling property was forced because of enabled OV_GPU_DumpProfilingData debug option\n"; - set_property(ov::enable_profiling(true)); - } + apply_rt_info_property(ov::hint::dynamic_quantization_group_size, rt_info); - GPU_DEBUG_IF(debug_config->disable_dynamic_impl == 1) { - set_property(ov::intel_gpu::use_only_static_kernels_for_dynamic_shape(true)); + // WEIGHTS_PATH is used for the weightless cache mechanism which is used only with + // ov::CacheMode::OPTIMIZE_SIZE setting. Not setting WEIGHTS_PATH will result in not + // using that mechanism. + if (get_cache_mode() == ov::CacheMode::OPTIMIZE_SIZE) { + apply_rt_info_property(ov::weights_path, rt_info); } +} - GPU_DEBUG_IF(debug_config->dynamic_quantize_group_size != debug_config->DYNAMIC_QUANTIZE_GROUP_SIZE_NOT_SET) { - if (debug_config->dynamic_quantize_group_size == -1) - set_property(ov::hint::dynamic_quantization_group_size(UINT64_MAX)); - else - set_property(ov::hint::dynamic_quantization_group_size(debug_config->dynamic_quantize_group_size)); - } +void ExecutionConfig::apply_model_specific_options(const IRemoteContext* context, const ov::Model& model) { + apply_rt_info(context, get_rt_info(model), is_llm(model)); - GPU_DEBUG_IF(debug_config->use_kv_cache_compression != -1) { - GPU_DEBUG_IF(debug_config->use_kv_cache_compression == 1) { - set_property(ov::hint::kv_cache_precision(ov::element::i8)); - } else { - set_property(ov::hint::kv_cache_precision(ov::element::undefined)); + const auto& ops = model.get_ops(); + + std::function)> process_op = [&, this](std::shared_ptr op) { + if (requires_new_shape_infer(op)) { + m_allow_new_shape_infer = true; + } + // In the case of dynamic models, because most of the layers are mapped to shape agnostic kernels, + // smaller # of kernels are built compared to static models. + // So having smaller batch size is even better for dynamic model as we can do more parallel build. + if (op->is_dynamic()) { + m_max_kernels_per_batch = 4; + } + + // Allow using onednn for models with LSTMSequence op as it's much more performant than existing ocl impl + if (ov::is_type(op)) { + m_use_onednn = true; + } + + if (auto multi_subgraph_op = ov::as_type_ptr(op)) { + for (const auto& sub_graph : multi_subgraph_op->get_functions()) { + for (auto& sub_op : sub_graph->get_ops()) { + process_op(sub_op); + } + } } + }; + + for (const auto& op : ops) { + process_op(op); } -} -void ExecutionConfig::apply_hints(const cldnn::device_info& info) { - apply_execution_hints(info); - apply_performance_hints(info); - apply_priority_hints(info); - apply_debug_options(info); + m_optimize_data = true; } -void ExecutionConfig::apply_user_properties(const cldnn::device_info& info) { - if (finalized) - return; - - // Copy internal properties before applying hints to ensure that - // a property set by hint won't be overriden by a value in user config. - // E.g num_streams=AUTO && hint=THROUGHPUT - // If we apply hints first and then copy all values from user config to internal one, - // then we'll get num_streams=AUTO in final config while some integer number is expected. - for (auto& kv : user_properties) { - internal_properties[kv.first] = kv.second; +void ExecutionConfig::finalize_impl(const IRemoteContext* context) { + GPU_DEBUG_IF(get_help()) { + print_help(); + exit(-1); } + + const auto& info = dynamic_cast(context)->get_engine().get_device_info(); apply_hints(info); - if (!is_set_by_user(ov::intel_gpu::enable_lp_transformations)) { - set_property(ov::intel_gpu::enable_lp_transformations(info.supports_imad || info.supports_immad)); - } - if (info.supports_immad) { - set_property(ov::intel_gpu::use_onednn(true)); + if (!is_set_by_user(ov::internal::enable_lp_transformations)) { + m_enable_lp_transformations = info.supports_imad || info.supports_immad; } - if (get_property(ov::intel_gpu::use_onednn)) { - set_property(ov::intel_gpu::queue_type(QueueTypes::in_order)); + if (!is_set_by_user(ov::intel_gpu::use_onednn) && info.supports_immad) { + m_use_onednn = true; } - if (!is_set_by_user(ov::hint::enable_cpu_reservation)) { - if (get_property(ov::hint::enable_cpu_pinning)) { - set_property(ov::hint::enable_cpu_reservation(true)); - } - } - if (get_property(ov::hint::enable_cpu_reservation)) { - if (!is_set_by_user(ov::hint::enable_cpu_pinning)) { - set_property(ov::hint::enable_cpu_pinning(true)); - } + if (get_use_onednn()) { + m_queue_type = QueueTypes::in_order; } - if (!is_set_by_user(ov::hint::kv_cache_precision) || get_property(ov::hint::kv_cache_precision) == ov::element::undefined) { + if (!is_set_by_user(ov::hint::kv_cache_precision) || get_kv_cache_precision() == ov::element::undefined) { if (info.supports_immad) { // MFDNN-11755 - set_property(ov::hint::kv_cache_precision(get_property(ov::hint::inference_precision))); + m_kv_cache_precision = get_inference_precision(); } else { // Enable KV-cache compression by default for non-systolic platforms only - set_property(ov::hint::kv_cache_precision(ov::element::i8)); + m_kv_cache_precision = ov::element::i8; } } // Enable dynamic quantization by default for non-systolic platforms - if (!is_set_by_user(ov::hint::dynamic_quantization_group_size) && - get_property(ov::hint::dynamic_quantization_group_size) == 0 && !info.supports_immad) { - set_property(ov::hint::dynamic_quantization_group_size(32)); + if (!is_set_by_user(ov::hint::dynamic_quantization_group_size) && get_dynamic_quantization_group_size() == 0 && !info.supports_immad) { + m_dynamic_quantization_group_size = 32; } - finalized = true; + if (!get_force_implementations().empty()) { + m_optimize_data = true; + } - user_properties.clear(); +#ifdef ENABLE_DEBUG_CAPS + // For now we apply env/config only for build with debug caps, but it can be updated in the future to allow + // reading release options for any build type + apply_config_options(context->get_device_name(), get_debug_config()); +#endif // ENABLE_DEBUG_CAPS } -void ExecutionConfig::apply_rt_info(const cldnn::device_info& info, const ov::RTMap& rt_info, const bool is_llm) { - if (!info.supports_immad) { - apply_rt_info_property(ov::hint::kv_cache_precision, rt_info); +void ExecutionConfig::apply_hints(const cldnn::device_info& info) { + apply_execution_hints(info); + apply_performance_hints(info); + apply_priority_hints(info); +} + +void ExecutionConfig::apply_execution_hints(const cldnn::device_info& info) { + if (is_set_by_user(ov::hint::execution_mode)) { + const auto mode = get_execution_mode(); + if (!is_set_by_user(ov::hint::inference_precision)) { + if (mode == ov::hint::ExecutionMode::ACCURACY) { + m_inference_precision = ov::element::undefined; + } else if (mode == ov::hint::ExecutionMode::PERFORMANCE) { + if (info.supports_fp16) + m_inference_precision = ov::element::f16; + else + m_inference_precision = ov::element::f32; + } + } } - if (!is_llm) - apply_rt_info_property(ov::hint::activations_scale_factor, rt_info); - apply_rt_info_property(ov::hint::dynamic_quantization_group_size, rt_info); } -std::string ExecutionConfig::to_string() const { - std::stringstream s; - s << "internal properties:\n"; - for (auto& kv : internal_properties) { - s << "\t" << kv.first << ": " << kv.second.as() << std::endl; +void ExecutionConfig::apply_performance_hints(const cldnn::device_info& info) { + if (is_set_by_user(ov::hint::performance_mode)) { + const auto mode = get_performance_mode(); + if (!is_set_by_user(ov::num_streams)) { + if (mode == ov::hint::PerformanceMode::LATENCY) { + m_num_streams = 1; + } else if (mode == ov::hint::PerformanceMode::THROUGHPUT) { + m_num_streams = ov::streams::AUTO; + } + } } - s << "user properties:\n"; - for (auto& kv : user_properties) { - s << "\t" << kv.first << ": " << kv.second.as() << std::endl; + + if (get_num_streams() == ov::streams::AUTO) { + int32_t n_streams = std::max(info.num_ccs, 2); + m_num_streams = n_streams; + } + + if (get_exclusive_async_requests()) { + m_num_streams = 1; + } + + // Allow kernels reuse only for single-stream scenarios + if (get_enable_kernels_reuse()) { + if (get_num_streams() != 1) { + m_enable_kernels_reuse = false; + } + } +} + +void ExecutionConfig::apply_priority_hints(const cldnn::device_info& info) { + if (is_set_by_user(ov::hint::model_priority)) { + const auto priority = get_model_priority(); + if (!is_set_by_user(ov::intel_gpu::hint::queue_priority)) { + m_queue_priority = priority; + } } - return s.str(); } } // namespace ov::intel_gpu diff --git a/src/plugins/intel_gpu/src/runtime/memory_pool.cpp b/src/plugins/intel_gpu/src/runtime/memory_pool.cpp index 1417680c985632..a899f78eb565d3 100644 --- a/src/plugins/intel_gpu/src/runtime/memory_pool.cpp +++ b/src/plugins/intel_gpu/src/runtime/memory_pool.cpp @@ -52,7 +52,6 @@ void memory_pool::release_memory(memory* mem, const size_t& unique_id, primitive auto type = mem->get_allocation_type(); const auto _layout_bytes_count = _layout.bytes_count(); - GPU_DEBUG_GET_INSTANCE(debug_config); { auto it = _non_padded_pool.lower_bound(_layout_bytes_count); @@ -67,7 +66,7 @@ void memory_pool::release_memory(memory* mem, const size_t& unique_id, primitive } if (it->second._users.empty()) { #ifdef GPU_DEBUG_CONFIG - GPU_DEBUG_IF(debug_config->dump_memory_pool) { + GPU_DEBUG_IF(_config.get_dump_memory_pool()) { auto released_mem_size = it->first; total_mem_size_non_padded_pool -= released_mem_size; if (type == allocation_type::usm_host) @@ -104,7 +103,7 @@ void memory_pool::release_memory(memory* mem, const size_t& unique_id, primitive } if (list_itr->_users.empty()) { #ifdef GPU_DEBUG_CONFIG - GPU_DEBUG_IF(debug_config->dump_memory_pool) { + GPU_DEBUG_IF(_config.get_dump_memory_pool()) { auto released_mem_size = mem->size(); total_mem_size_padded_pool -= released_mem_size; if (type == allocation_type::usm_host) @@ -128,14 +127,14 @@ void memory_pool::release_memory(memory* mem, const size_t& unique_id, primitive } } #ifdef GPU_DEBUG_CONFIG - GPU_DEBUG_IF(debug_config->dump_memory_pool) { + GPU_DEBUG_IF(_config.get_dump_memory_pool()) { auto iter = std::find_if(_no_reusable_mems.begin(), _no_reusable_mems.end(), [&](const cldnn::memory_record& r) { return (network_id == r._network_id && type == r._type && mem->get_internal_params().mem == r._memory->get_internal_params().mem); }); if (iter != _no_reusable_mems.end()) { - GPU_DEBUG_IF(debug_config->dump_memory_pool) { + GPU_DEBUG_IF(_config.get_dump_memory_pool()) { auto released_mem_size = iter->_users.begin()->_mem_size; total_mem_size_no_reusable -= released_mem_size; if (type == allocation_type::usm_host) @@ -183,8 +182,7 @@ memory::ptr memory_pool::get_from_non_padded_pool(const layout& layout, memory_record({{MEM_USER(unique_id, network_id, prim_id, layout_bytes_count)}}, mem, network_id, type)); #ifdef GPU_DEBUG_CONFIG { - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->dump_memory_pool) { + GPU_DEBUG_IF(_config.get_dump_memory_pool()) { total_mem_size_non_padded_pool += layout_bytes_count; if (type == allocation_type::usm_host) mem_size_non_padded_pool_host += layout_bytes_count; @@ -225,8 +223,7 @@ memory::ptr memory_pool::get_from_padded_pool(const layout& layout, memory_record({{MEM_USER(unique_id, network_id, prim_id, mem->size())}}, mem, network_id, type)); #ifdef GPU_DEBUG_CONFIG { - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->dump_memory_pool) { + GPU_DEBUG_IF(_config.get_dump_memory_pool()) { const auto allocated_mem_size = mem->size(); total_mem_size_padded_pool += allocated_mem_size; if (type == allocation_type::usm_host) @@ -242,8 +239,7 @@ memory::ptr memory_pool::get_from_padded_pool(const layout& layout, _padded_pool.emplace(layout, std::move(list)); #ifdef GPU_DEBUG_CONFIG { - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->dump_memory_pool) { + GPU_DEBUG_IF(_config.get_dump_memory_pool()) { const auto allocated_mem_size = mem->size(); total_mem_size_padded_pool += allocated_mem_size; if (type == allocation_type::usm_host) @@ -300,8 +296,7 @@ memory::ptr memory_pool::get_memory(const layout& layout, bool reset, bool is_dynamic) { bool do_reuse = reusable_across_network; - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_memory_reuse) { + GPU_DEBUG_IF(_config.get_disable_memory_reuse()) { do_reuse = false; } if (do_reuse) { @@ -316,7 +311,7 @@ memory::ptr memory_pool::get_memory(const layout& layout, // images (reuse not yet implemented) auto mem = alloc_memory(layout, type, reset); #ifdef GPU_DEBUG_CONFIG - GPU_DEBUG_IF(debug_config->dump_memory_pool) { + GPU_DEBUG_IF(_config.get_dump_memory_pool()) { auto allocated_mem_size = mem->size(); _no_reusable_mems.push_back( memory_record({{MEM_USER(unique_id, network_id, prim_id, allocated_mem_size)}}, mem, network_id, type)); @@ -330,7 +325,7 @@ memory::ptr memory_pool::get_memory(const layout& layout, } else { auto mem = alloc_memory(layout, type, reset); #ifdef GPU_DEBUG_CONFIG - GPU_DEBUG_IF(debug_config->dump_memory_pool) { + GPU_DEBUG_IF(_config.get_dump_memory_pool()) { auto allocated_mem_size = mem->size(); _no_reusable_mems.push_back( memory_record({{MEM_USER(unique_id, network_id, prim_id, allocated_mem_size)}}, mem, network_id, type)); @@ -344,7 +339,6 @@ memory::ptr memory_pool::get_memory(const layout& layout, } void memory_pool::clear_pool_for_network(uint32_t network_id) { - GPU_DEBUG_GET_INSTANCE(debug_config); // free up _non_padded_pool for this network { auto itr = _non_padded_pool.begin(); @@ -354,7 +348,7 @@ void memory_pool::clear_pool_for_network(uint32_t network_id) { if (record._network_id == network_id) { #ifdef GPU_DEBUG_CONFIG - GPU_DEBUG_IF(debug_config->dump_memory_pool) { + GPU_DEBUG_IF(_config.get_dump_memory_pool()) { auto released_mem_size = itr->first; total_mem_size_non_padded_pool -= released_mem_size; if (record._type == allocation_type::usm_host) @@ -388,7 +382,7 @@ void memory_pool::clear_pool_for_network(uint32_t network_id) { if (list.empty()) { #ifdef GPU_DEBUG_CONFIG - GPU_DEBUG_IF(debug_config->dump_memory_pool) { + GPU_DEBUG_IF(_config.get_dump_memory_pool()) { auto released_mem_size = itr->first.bytes_count(); total_mem_size_padded_pool -= released_mem_size; if (type == allocation_type::usm_host) @@ -404,12 +398,12 @@ void memory_pool::clear_pool_for_network(uint32_t network_id) { #ifdef GPU_DEBUG_CONFIG // free up _no_reusable_mems for this network - GPU_DEBUG_IF(debug_config->dump_memory_pool) { + GPU_DEBUG_IF(_config.get_dump_memory_pool()) { auto itr = _no_reusable_mems.begin(); while (itr != _no_reusable_mems.end()) { auto& record = *itr; if (itr->_network_id == network_id) { - GPU_DEBUG_IF(debug_config->dump_memory_pool) { + GPU_DEBUG_IF(_config.get_dump_memory_pool()) { auto released_mem_size = itr->_users.begin()->_mem_size; total_mem_size_no_reusable -= released_mem_size; if (record._type == allocation_type::usm_host) @@ -439,7 +433,9 @@ void memory_pool::clear_pool_for_network(uint32_t network_id) { } } -memory_pool::memory_pool(engine& engine) : _engine(&engine) { } +memory_pool::memory_pool(engine& engine, const ExecutionConfig& config) : _engine(&engine), _config(config) { + (void)(_config); // Silence unused warning +} #ifdef GPU_DEBUG_CONFIG inline std::string get_mb_size(size_t size) { diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_device.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_device.cpp index 6e2e15fadd43ff..f6a6ee00469427 100644 --- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_device.cpp +++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_device.cpp @@ -295,9 +295,6 @@ device_info init_device_info(const cl::Device& device, const cl::Context& contex GPU_DEBUG_INFO << "GPU version: " << static_cast(info.gfx_ver.major) << "." << static_cast(info.gfx_ver.minor) << "." << static_cast(info.gfx_ver.revision) << (info.has_separate_cache ? " with separate cache" : "") << std::endl; - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_onednn) - info.supports_immad = false; } else if (nv_device_attr_supported) { info.gfx_ver = {static_cast(device.getInfo()), static_cast(device.getInfo()), diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp index 11fab0106bff83..d4c0e0e46b281d 100644 --- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp +++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp @@ -64,7 +64,7 @@ void ocl_engine::create_onednn_engine(const ExecutionConfig& config) { auto casted = std::dynamic_pointer_cast(_device); OPENVINO_ASSERT(casted, "[GPU] Invalid device type stored in ocl_engine"); - std::string cache_dir = config.get_property(ov::cache_dir); + const auto& cache_dir = config.get_cache_dir(); if (cache_dir.empty()) { _onednn_engine = std::make_shared(dnnl::ocl_interop::make_engine(casted->get_device().get(), casted->get_context().get())); } else { diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_event.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_event.cpp index dbf2a01aa4eadf..d722b5f3bc1bfd 100644 --- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_event.cpp +++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_event.cpp @@ -219,16 +219,6 @@ bool ocl_events::get_profiling_info_impl(std::listprint_multi_kernel_perf) { - if (period.stage == instrumentation::profiling_stage::executing) { - GPU_DEBUG_TRACE << "Multi-kernel time: "; - for (auto& duration : all_durations[period.stage]) - GPU_DEBUG_TRACE << " " << (duration.second - duration.first) / 1000; - GPU_DEBUG_TRACE << " Total " << sum / 1000 << std::endl; - } - } - info.push_back(get_profiling_interval(period.stage, 0, sum)); } diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.cpp index e227c94c7dc06d..bc01a8174292e4 100644 --- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.cpp +++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.cpp @@ -189,22 +189,22 @@ void set_arguments_impl(ocl_kernel_type& kernel, } // namespace ocl_stream::ocl_stream(const ocl_engine &engine, const ExecutionConfig& config) - : stream(config.get_property(ov::intel_gpu::queue_type), stream::get_expected_sync_method(config)) + : stream(config.get_queue_type(), stream::get_expected_sync_method(config)) , _engine(engine) { auto context = engine.get_cl_context(); auto device = engine.get_cl_device(); ocl::command_queues_builder queue_builder; - queue_builder.set_profiling(config.get_property(ov::enable_profiling)); + queue_builder.set_profiling(config.get_enable_profiling()); queue_builder.set_out_of_order(m_queue_type == QueueTypes::out_of_order); OPENVINO_ASSERT(m_sync_method != SyncMethods::none || m_queue_type == QueueTypes::in_order, "[GPU] Unexpected sync method (none) is specified for out_of_order queue"); bool priorty_extensions = engine.extension_supported("cl_khr_priority_hints") && engine.extension_supported("cl_khr_create_command_queue"); - queue_builder.set_priority_mode(config.get_property(ov::intel_gpu::hint::queue_priority), priorty_extensions); + queue_builder.set_priority_mode(config.get_queue_priority(), priorty_extensions); bool throttle_extensions = engine.extension_supported("cl_khr_throttle_hints") && engine.extension_supported("cl_khr_create_command_queue"); - queue_builder.set_throttle_mode(config.get_property(ov::intel_gpu::hint::queue_throttle), throttle_extensions); + queue_builder.set_throttle_mode(config.get_queue_throttle(), throttle_extensions); bool queue_families_extension = engine.get_device_info().supports_queue_families; queue_builder.set_supports_queue_families(queue_families_extension); diff --git a/src/plugins/intel_gpu/src/runtime/shape_predictor.cpp b/src/plugins/intel_gpu/src/runtime/shape_predictor.cpp index 3b925f07361fff..3a028218a3f62f 100644 --- a/src/plugins/intel_gpu/src/runtime/shape_predictor.cpp +++ b/src/plugins/intel_gpu/src/runtime/shape_predictor.cpp @@ -67,7 +67,7 @@ std::pair ShapePredictor::predict_preallocation_shape(const std int32_t custom_prealloc_dim) { size_t next_iters_prealloc_count = custom_next_iters_prealloc_count > 0 ? static_cast(custom_next_iters_prealloc_count) - : _next_iters_preallocation_count; + : _settings.next_iters_preallocation_count; const auto& current_shape = layout.get_shape(); auto dt_bitwidth = ov::element::Type(layout.data_type).bitwidth(); @@ -122,7 +122,7 @@ std::pair ShapePredictor::predict_preallocation_shape(const std // to avoid huge unexpected memory preallocations if (can_use_iterations_preallocation) { for (size_t i = 0; i < diffs[0].size(); ++i) { - if (diffs[0][i] > _max_per_dim_diff) { + if (diffs[0][i] > _settings.max_per_dim_diff) { can_use_iterations_preallocation = false; break; } @@ -132,7 +132,7 @@ std::pair ShapePredictor::predict_preallocation_shape(const std for (size_t i = 0; i < current_shape.size(); ++i) single_iter_shape.push_back(diffs[0][i] == 0 ? current_shape[i] : 1); - if (ceil_div(ov::shape_size(single_iter_shape) * dt_bitwidth, 8) > _max_per_iter_size) + if (ceil_div(ov::shape_size(single_iter_shape) * dt_bitwidth, 8) > _settings.max_per_iter_size) can_use_iterations_preallocation = false; } @@ -142,13 +142,13 @@ std::pair ShapePredictor::predict_preallocation_shape(const std auto preallocation_shape = diffs[0] * mul_shape; auto new_shape = current_shape + preallocation_shape; return {true, new_shape}; - } else if (_buffers_preallocation_ratio > 1.0f) { + } else if (_settings.buffers_preallocation_ratio > 1.0f) { if (format::is_blocked(layout.format)) return {false, {}}; // Apply percentage buffer preallocation auto current_shape_size = ov::shape_size(current_shape); ov::Shape new_shape_size(current_shape.size(), 1); - new_shape_size[0] = static_cast(current_shape_size * _buffers_preallocation_ratio); + new_shape_size[0] = static_cast(current_shape_size * _settings.buffers_preallocation_ratio); return {true, new_shape_size}; } } diff --git a/src/plugins/intel_gpu/src/runtime/stream.cpp b/src/plugins/intel_gpu/src/runtime/stream.cpp index 0516a551f19177..913d84d8f476f5 100644 --- a/src/plugins/intel_gpu/src/runtime/stream.cpp +++ b/src/plugins/intel_gpu/src/runtime/stream.cpp @@ -20,8 +20,8 @@ QueueTypes stream::detect_queue_type(engine_types engine_type, void* queue_handl } SyncMethods stream::get_expected_sync_method(const ExecutionConfig& config) { - auto profiling = config.get_property(ov::enable_profiling); - auto queue_type = config.get_property(ov::intel_gpu::queue_type); + auto profiling = config.get_enable_profiling(); + auto queue_type = config.get_queue_type(); return profiling ? SyncMethods::events : queue_type == QueueTypes::out_of_order ? SyncMethods::barriers : SyncMethods::none; } diff --git a/src/plugins/intel_gpu/tests/functional/concurrency/gpu_reservation_test.cpp b/src/plugins/intel_gpu/tests/functional/concurrency/gpu_reservation_test.cpp index fb9711e7605859..c78b472b9c54cb 100644 --- a/src/plugins/intel_gpu/tests/functional/concurrency/gpu_reservation_test.cpp +++ b/src/plugins/intel_gpu/tests/functional/concurrency/gpu_reservation_test.cpp @@ -28,6 +28,10 @@ TEST_F(GpuReservationTest, Mutiple_CompiledModel_Reservation) { models.emplace_back(ov::test::utils::make_multi_single_conv()); auto core = ov::test::utils::PluginCache::get().core(); + + auto available_devices = core->get_available_devices(); + if (std::find(available_devices.begin(), available_devices.end(), ov::test::utils::DEVICE_CPU) == available_devices.end()) + GTEST_SKIP(); core->set_property(target_devices[1], config); ov::AnyMap property_config = {{ov::num_streams.name(), 1}, @@ -54,4 +58,4 @@ TEST_F(GpuReservationTest, Mutiple_CompiledModel_Reservation) { if (thread.joinable()) thread.join(); } -} \ No newline at end of file +} diff --git a/src/plugins/intel_gpu/tests/unit/fusions/convolution_fusion_test.cpp b/src/plugins/intel_gpu/tests/unit/fusions/convolution_fusion_test.cpp index 6d8f231b93576a..6fb20181eb1936 100644 --- a/src/plugins/intel_gpu/tests/unit/fusions/convolution_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/fusions/convolution_fusion_test.cpp @@ -318,7 +318,7 @@ class WeightsPrimitiveFusingTestOneDNN : public BaseFusingTest result; for (auto& shape : in_shapes) @@ -74,7 +76,9 @@ TEST_P(shape_predictor_tests_b_fs_yx_fsv16, prediction) { auto& expected_predicted_shape = p.expected_predicted_shape; auto& engine = get_test_engine(); - ShapePredictor sp(&engine, p.buffers_preallocation_ratio); + ShapePredictor::Settings settings; + settings.buffers_preallocation_ratio = p.buffers_preallocation_ratio; + ShapePredictor sp(&engine, settings); std::pair result; for (auto& shape : in_shapes) @@ -121,8 +125,10 @@ INSTANTIATE_TEST_SUITE_P(smoke, shape_predictor_tests_b_fs_yx_fsv16, TEST(shape_predictor_tests, check_max_buffer_size) { auto& engine = get_test_engine(); - const auto& buffers_preallocation_ratio = 1.1; - ShapePredictor sp(&engine, buffers_preallocation_ratio); + const auto& buffers_preallocation_ratio = 1.1f; + ShapePredictor::Settings settings; + settings.buffers_preallocation_ratio = buffers_preallocation_ratio; + ShapePredictor sp(&engine, settings); const auto max_alloc_mem_size = engine.get_device_info().max_alloc_mem_size; auto layout = cldnn::layout({static_cast(max_alloc_mem_size)}, ov::element::u8, format::bfyx); diff --git a/src/plugins/intel_gpu/tests/unit/passes/prepare_primitive_fusing_test.cpp b/src/plugins/intel_gpu/tests/unit/passes/prepare_primitive_fusing_test.cpp index cd400128a55234..3ef21c0288f0c8 100644 --- a/src/plugins/intel_gpu/tests/unit/passes/prepare_primitive_fusing_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/passes/prepare_primitive_fusing_test.cpp @@ -525,7 +525,7 @@ TEST(prepare_primitive_fusing, fuse_constant_transposes_removal_check) { auto prog = program::build_program(engine, topology, config, false, true); - prog->get_layout_optimizer().set_implementation_forcing(config.get_property(ov::intel_gpu::force_implementations)); + prog->get_layout_optimizer().set_implementation_forcing(config.get_force_implementations()); program_wrapper::apply_opt_pass(*prog); ASSERT_TRUE(!has_node(*prog, "permute")); diff --git a/src/plugins/intel_gpu/tests/unit/passes/remove_redundant_reorders_tests.cpp b/src/plugins/intel_gpu/tests/unit/passes/remove_redundant_reorders_tests.cpp index 1fbeab7e67ac2d..b46033f15d77db 100644 --- a/src/plugins/intel_gpu/tests/unit/passes/remove_redundant_reorders_tests.cpp +++ b/src/plugins/intel_gpu/tests/unit/passes/remove_redundant_reorders_tests.cpp @@ -136,7 +136,7 @@ TEST(remove_redundant_reorders, skip_reorder_fusing_when_sibling_not_support_pad auto prog = program::build_program(engine, topology, config, false, true); config.set_property(ov::intel_gpu::optimize_data(true)); - bool optimize_data = config.get_property(ov::intel_gpu::optimize_data); + bool optimize_data = config.get_optimize_data(); program_wrapper::apply_opt_pass(*prog, optimize_data); ASSERT_NE(prog, nullptr); @@ -165,7 +165,7 @@ TEST(remove_redundant_reorders, not_to_fuse_reshape_with_fused_prims) { auto prog = program::build_program(engine, topology, config, false, true); program_wrapper::apply_opt_pass(*prog); - bool optimize_data = config.get_property(ov::intel_gpu::optimize_data); + bool optimize_data = config.get_optimize_data(); program_wrapper::apply_opt_pass(*prog, optimize_data); ASSERT_NE(prog, nullptr); @@ -204,7 +204,7 @@ TEST(remove_redundant_reorders, not_to_fuse_permute) { auto prog = program::build_program(engine, topology, config, false, true); ASSERT_NE(prog, nullptr); - bool opt_data = config.get_property(ov::intel_gpu::optimize_data); + bool opt_data = config.get_optimize_data(); program_wrapper::apply_opt_pass(*prog); program_wrapper::apply_opt_pass(*prog, opt_data); @@ -266,7 +266,7 @@ TEST(remove_redundant_reorders, remove_fused) { auto prog = program::build_program(engine, topology, config, false, true); program_wrapper::apply_opt_pass(*prog); - bool optimize_data = config.get_property(ov::intel_gpu::optimize_data); + bool optimize_data = config.get_optimize_data(); program_wrapper::apply_opt_pass(*prog, optimize_data); ASSERT_NE(prog, nullptr); @@ -293,7 +293,7 @@ TEST(remove_redundant_reorders, fuse_reorder_to_prev_mvn_dyn) { config.set_property(ov::intel_gpu::optimize_data(true)); auto prog = program::build_program(engine, topology, config, false, true); - bool optimize_data = config.get_property(ov::intel_gpu::optimize_data); + bool optimize_data = config.get_optimize_data(); program_wrapper::apply_opt_pass(*prog, optimize_data); ASSERT_NE(prog, nullptr); @@ -336,7 +336,7 @@ TEST(remove_redundant_reorders, fuse_reorder_to_prev_concat_dyn) { config.set_property(ov::intel_gpu::optimize_data(true)); auto prog = program::build_program(engine, topology, config, false, true); - bool optimize_data = config.get_property(ov::intel_gpu::optimize_data); + bool optimize_data = config.get_optimize_data(); program_wrapper::apply_opt_pass(*prog, optimize_data); ASSERT_NE(prog, nullptr); diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/concatenation_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/concatenation_gpu_test.cpp index ebcad325112310..f5f8a5a0ba2429 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/concatenation_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/concatenation_gpu_test.cpp @@ -1487,7 +1487,7 @@ struct concat_gpu_4d_implicit : public concat_gpu { } auto outputs = concat_network->execute(); - bool concat_opt_enabled = config.get_property(ov::intel_gpu::optimize_data); + bool concat_opt_enabled = config.get_optimize_data(); bool concat_opt_result = std::static_pointer_cast(concat_network->get_primitive("concat"))->can_be_optimized(); EXPECT_EQ(concat_opt_enabled, concat_opt_result); @@ -1707,7 +1707,7 @@ struct concat_gpu_4d_implicit_onednn : public concat_gpu { } auto outputs = concat_network.execute(); - bool concat_opt_enabled = config.get_property(ov::intel_gpu::optimize_data); + bool concat_opt_enabled = config.get_optimize_data(); bool concat_opt_result = std::static_pointer_cast(concat_network.get_primitive("concat"))->node->can_be_optimized(); EXPECT_EQ(concat_opt_enabled, concat_opt_result); @@ -1875,7 +1875,7 @@ struct concat_gpu_4d_implicit_mix_types_onednn : public concat_gpu { } auto outputs = concat_network.execute(); - bool concat_opt_enabled = config.get_property(ov::intel_gpu::optimize_data); + bool concat_opt_enabled = config.get_optimize_data(); bool concat_opt_result = std::static_pointer_cast(concat_network.get_primitive("concat_final"))->node->can_be_optimized(); EXPECT_EQ(concat_opt_enabled, concat_opt_result); @@ -2022,7 +2022,7 @@ struct concat_gpu_4d_explicit : public concat_gpu { } auto outputs = concat_network.execute(); - bool concat_opt_enabled = config.get_property(ov::intel_gpu::optimize_data); + bool concat_opt_enabled = config.get_optimize_data(); bool concat_opt_result = std::static_pointer_cast(concat_network.get_primitive("concat"))->node->can_be_optimized(); // If sibling is using onednn impl and batch > 1, the onednn impl cannot process the implicit concat'ed buffer. diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/crop_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/crop_gpu_test.cpp index 385c6bf812cd41..3085ece407f3f5 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/crop_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/crop_gpu_test.cpp @@ -1316,7 +1316,6 @@ TEST_P(crop_gpu_dynamic, i32_in2x3x2x2_crop_offsets) { } } } - config2.set_property(ov::intel_gpu::use_only_static_kernels_for_dynamic_shape(true)); network network2(engine, topology, config2); // run with static kernel network2.set_input_data("input", input); auto outputs2 = network2.execute(); diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/debug_config_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/debug_config_gpu_test.cpp index 6a96b694eea1c5..7c08fe85c07eeb 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/debug_config_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/debug_config_gpu_test.cpp @@ -10,7 +10,7 @@ using namespace ::tests; TEST(debug_config_test, check_debug_config_off_on_release) { #ifdef NDEBUG - GPU_DEBUG_GET_INSTANCE(debug_config); + auto config = get_test_default_config(get_test_engine()); GPU_DEBUG_IF(1) { GTEST_FAIL(); /* This should be disabled in case of release build */ } diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/loop_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/loop_gpu_test.cpp index 4939630fab3c57..fc20272b2dec6b 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/loop_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/loop_gpu_test.cpp @@ -55,7 +55,6 @@ static program::ptr build_program(engine& engine, ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); config.set_property(ov::intel_gpu::custom_outputs(output_names_vec)); - config.set_property(ov::intel_gpu::max_dynamic_batch(1)); config.set_property(ov::intel_gpu::allow_new_shape_infer(allow_new_shape_infer)); return program::build_program(engine, body_topology, config, false, false, true); @@ -837,7 +836,7 @@ static void test_loop_gpu_multiple_shapes(ov::PartialShape body_input_layout, permute("permute1", input_info("input_origin"), {0, 1, 2, 3}), concatenation("input1", {input_info("permute1"), input_info("input_origin")}, 0), loop("loop", - {input_info(actual_iteration_count_id), input_info(initial_condition_id), input_info("input1"), input_info("input2")}, + {input_info(actual_iteration_count_id), input_info(initial_condition_id), input_info("input1"), input_info("input2")}, body_program, trip_count_id, initial_condition_id, actual_iteration_count_id, input_primitive_maps, output_primitive_maps, back_edges, num_iterations, body_current_iteration_id, body_execution_condition_id, 2), @@ -1105,7 +1104,6 @@ static void test_loop_gpu_wo_trip_count_update_primitive_id(ov::PartialShape bod auto body_program = build_program(engine, body, body_execution_condition_id, output_primitive_maps, back_edges, true); auto const_shape = engine.allocate_memory({ov::PartialShape{4}, data_types::i32, format::bfyx}); - std::vector body_input_layouts; for (size_t i = 0; i < body_input_layout.size(); i++) { diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/reorder_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/reorder_gpu_test.cpp index 60fc52135f2068..1aaca9f68124fa 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/reorder_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/reorder_gpu_test.cpp @@ -1913,7 +1913,7 @@ TEST(reorder_gpu_opt, non_trivial_remove_redundant) auto outputs = net.execute(); auto executed_primitives = net.get_executed_primitives(); - if (config.get_property(ov::intel_gpu::queue_type) != QueueTypes::out_of_order) + if (config.get_queue_type() != QueueTypes::out_of_order) GTEST_SKIP(); ASSERT_TRUE(executed_primitives.count("in") == 1); diff --git a/src/plugins/intel_gpu/tests/unit/test_utils/program_wrapper.h b/src/plugins/intel_gpu/tests/unit/test_utils/program_wrapper.h index 77b519d8e3cf5f..09c081abbb6fe6 100644 --- a/src/plugins/intel_gpu/tests/unit/test_utils/program_wrapper.h +++ b/src/plugins/intel_gpu/tests/unit/test_utils/program_wrapper.h @@ -38,7 +38,9 @@ namespace cldnn p.prepare_memory_dependencies(); } static void update_configs_properties(program& p, const ov::AnyMap& properties) { - p._config.set_property(properties); + auto config_copy = p._config.clone(); + config_copy.set_property(properties); + p._config = config_copy; } }; diff --git a/src/plugins/intel_gpu/tests/unit/test_utils/test_utils.cpp b/src/plugins/intel_gpu/tests/unit/test_utils/test_utils.cpp index 8536813a4481c6..1ef614715e4337 100644 --- a/src/plugins/intel_gpu/tests/unit/test_utils/test_utils.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_utils/test_utils.cpp @@ -54,14 +54,14 @@ void generic_test::run_single_test(bool is_caching_test) { } } std::string input_name = "input" + std::to_string(i); - if ((i == 0) && generic_params->network_config.get_property(ov::intel_gpu::optimize_data)) { + if ((i == 0) && generic_params->network_config.get_optimize_data()) { // Add reorder after the first input in case of optimize data flag since it might change the input layout. input_name = "input0_init"; } // First input is provided to the network as input_layout. // Other inputs are provided as input_layout if optimize data flag is off. Otherwise they are provided as data. - if ((i == 0) || !generic_params->network_config.get_property(ov::intel_gpu::optimize_data)) { + if ((i == 0) || !generic_params->network_config.get_optimize_data()) { topology.add(input_layout(input_name, input_mems[i]->get_layout())); input_layouts_names.push_back(input_name); } else { @@ -74,7 +74,7 @@ void generic_test::run_single_test(bool is_caching_test) { } } - if (generic_params->network_config.get_property(ov::intel_gpu::optimize_data)) { + if (generic_params->network_config.get_optimize_data()) { // Add reorder after the first input in case of optimize data flag since it might change the input layout. topology.add(reorder("input0", input_info("input0_init"), input_mems[0]->get_layout())); }