Skip to content

Commit

Permalink
[NPU] Optimize remote context path (#29070)
Browse files Browse the repository at this point in the history
### Details:
- *Global remote properties can be set through the context creation
method -> they will be used to create tensors from that context if no
other properties are shared when creating the tensors. If properties are
shared, then they will be merged with the global remote properties.*
 - *Get the device in the Remote context ctor*

---------

Signed-off-by: Bogdan Pereanu <bogdan.pereanu@intel.com>
  • Loading branch information
pereanub authored Feb 24, 2025
1 parent 5094c8f commit 69d4b3b
Show file tree
Hide file tree
Showing 10 changed files with 175 additions and 67 deletions.
10 changes: 6 additions & 4 deletions src/plugins/intel_npu/src/backend/include/zero_device.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,12 @@ class ZeroDevice : public IDevice {
ov::intel_npu::MemType mem_type = ov::intel_npu::MemType::L0_INTERNAL_BUF,
void* mem = nullptr) override;

ov::SoPtr<ov::ITensor> createHostTensor(std::shared_ptr<ov::IRemoteContext> context,
const ov::element::Type& element_type,
const ov::Shape& shape,
const Config& config) override;
ov::SoPtr<ov::ITensor> createHostTensor(
std::shared_ptr<ov::IRemoteContext> context,
const ov::element::Type& element_type,
const ov::Shape& shape,
const Config& config,
ov::intel_npu::TensorType tensor_type = ov::intel_npu::TensorType::BINDED) override;

ZeroDevice& operator=(const ZeroDevice&) = delete;
ZeroDevice(const ZeroDevice&) = delete;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ class ZeroHostTensor : public ov::ITensor {
const std::shared_ptr<ZeroInitStructsHolder>& init_structs,
const ov::element::Type element_type,
const ov::Shape& shape,
const Config& config);
const Config& config,
ov::intel_npu::TensorType tensor_type = ov::intel_npu::TensorType::BINDED);

~ZeroHostTensor() override = default;

Expand Down
5 changes: 3 additions & 2 deletions src/plugins/intel_npu/src/backend/src/zero_device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,7 @@ ov::SoPtr<ov::IRemoteTensor> ZeroDevice::createRemoteTensor(std::shared_ptr<ov::
ov::SoPtr<ov::ITensor> ZeroDevice::createHostTensor(std::shared_ptr<ov::IRemoteContext> context,
const ov::element::Type& element_type,
const ov::Shape& shape,
const Config& config) {
return {std::make_shared<ZeroHostTensor>(context, _initStructs, element_type, shape, config)};
const Config& config,
ov::intel_npu::TensorType tensor_type) {
return {std::make_shared<ZeroHostTensor>(context, _initStructs, element_type, shape, config, tensor_type)};
};
5 changes: 3 additions & 2 deletions src/plugins/intel_npu/src/backend/src/zero_host_tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,14 @@ ZeroHostTensor::ZeroHostTensor(const std::shared_ptr<ov::IRemoteContext>& contex
const std::shared_ptr<ZeroInitStructsHolder>& init_structs,
const ov::element::Type element_type,
const ov::Shape& shape,
const Config& config)
const Config& config,
ov::intel_npu::TensorType tensor_type)
: _impl(std::make_shared<ZeroRemoteTensor>(context,
init_structs,
element_type,
shape,
config,
ov::intel_npu::TensorType::BINDED,
tensor_type,
ov::intel_npu::MemType::L0_INTERNAL_BUF)) {}

void* ZeroHostTensor::data(const ov::element::Type&) const {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,10 +90,12 @@ class IDevice : public std::enable_shared_from_this<IDevice> {
ov::intel_npu::MemType mem_type = ov::intel_npu::MemType::L0_INTERNAL_BUF,
void* mem = nullptr);

virtual ov::SoPtr<ov::ITensor> createHostTensor(std::shared_ptr<ov::IRemoteContext> context,
const ov::element::Type& element_type,
const ov::Shape& shape,
const Config& config);
virtual ov::SoPtr<ov::ITensor> createHostTensor(
std::shared_ptr<ov::IRemoteContext> context,
const ov::element::Type& element_type,
const ov::Shape& shape,
const Config& config,
ov::intel_npu::TensorType tensor_type = ov::intel_npu::TensorType::BINDED);

protected:
virtual ~IDevice() = default;
Expand Down
3 changes: 2 additions & 1 deletion src/plugins/intel_npu/src/common/src/npu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,8 @@ ov::SoPtr<ov::IRemoteTensor> IDevice::createRemoteTensor(std::shared_ptr<ov::IRe
ov::SoPtr<ov::ITensor> IDevice::createHostTensor(std::shared_ptr<ov::IRemoteContext>,
const ov::element::Type&,
const ov::Shape&,
const Config&) {
const Config&,
ov::intel_npu::TensorType) {
OPENVINO_THROW("Create Host Tensor is not supported");
}

Expand Down
12 changes: 9 additions & 3 deletions src/plugins/intel_npu/src/plugin/include/remote_context.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,16 @@

#include "backends.hpp"
#include "intel_npu/config/config.hpp"
#include "openvino/runtime/intel_npu/remote_properties.hpp"
#include "openvino/runtime/iremote_context.hpp"

namespace intel_npu {

class RemoteContextImpl : public ov::IRemoteContext {
public:
RemoteContextImpl(const std::shared_ptr<const NPUBackends>& backends, const Config& config);
RemoteContextImpl(const std::shared_ptr<const NPUBackends>& backends,
const Config& config,
const ov::AnyMap& remote_properties = {});

/**
* @brief Returns name of a device on which underlying object is allocated.
Expand Down Expand Up @@ -54,11 +57,14 @@ class RemoteContextImpl : public ov::IRemoteContext {
private:
std::shared_ptr<ov::IRemoteContext> get_this_shared_ptr();

std::shared_ptr<const NPUBackends> _backends;

const Config _config;
std::shared_ptr<intel_npu::IDevice> _device;
ov::AnyMap _properties;
std::string _device_name;

std::optional<ov::intel_npu::MemType> _mem_type_object = std::nullopt;
std::optional<ov::intel_npu::TensorType> _tensor_type_object = std::nullopt;
std::optional<void*> _mem_handle_object = std::nullopt;
};

} // namespace intel_npu
4 changes: 2 additions & 2 deletions src/plugins/intel_npu/src/plugin/src/plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -786,8 +786,8 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
return compile_model(model, properties);
}

ov::SoPtr<ov::IRemoteContext> Plugin::create_context(const ov::AnyMap& remote_properties) const {
return get_default_context(remote_properties);
ov::SoPtr<ov::IRemoteContext> Plugin::create_context(const ov::AnyMap& remoteProperties) const {
return std::make_shared<RemoteContextImpl>(_backends, _globalConfig, remoteProperties);
}

ov::SoPtr<ov::IRemoteContext> Plugin::get_default_context(const ov::AnyMap&) const {
Expand Down
108 changes: 60 additions & 48 deletions src/plugins/intel_npu/src/plugin/src/remote_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,15 @@
#include "remote_context.hpp"

#include "intel_npu/config/common.hpp"
#include "openvino/runtime/intel_npu/remote_properties.hpp"

using namespace ov::intel_npu;

namespace {

template <typename Type>
std::optional<Type> extract_object(const ov::AnyMap& params, const ov::Property<Type>& p, bool isMandatory = true) {
std::optional<Type> extract_object(const ov::AnyMap& params, const ov::Property<Type>& p) {
auto itrHandle = params.find(p.name());
if (itrHandle == params.end()) {
if (isMandatory) {
OPENVINO_THROW("No parameter ", p.name(), " found in parameters map");
}

return std::nullopt;
}

Expand All @@ -29,11 +24,23 @@ std::optional<Type> extract_object(const ov::AnyMap& params, const ov::Property<

namespace intel_npu {

RemoteContextImpl::RemoteContextImpl(const std::shared_ptr<const NPUBackends>& backends, const Config& config)
: _backends(backends),
_config(config),
RemoteContextImpl::RemoteContextImpl(const std::shared_ptr<const NPUBackends>& backends,
const Config& config,
const ov::AnyMap& remote_properties)
: _config(config),
_device(backends->getDevice(_config.get<DEVICE_ID>())),
_properties({l0_context(backends->getContext())}),
_device_name("NPU") {}
_device_name("NPU") {
if (_device == nullptr) {
OPENVINO_THROW("Device is not available");
}

if (!remote_properties.empty()) {
_mem_type_object = extract_object(remote_properties, mem_type);
_tensor_type_object = extract_object(remote_properties, tensor_type);
_mem_handle_object = extract_object(remote_properties, mem_handle);
}
}

const ov::AnyMap& RemoteContextImpl::get_property() const {
return _properties;
Expand All @@ -42,55 +49,60 @@ const ov::AnyMap& RemoteContextImpl::get_property() const {
ov::SoPtr<ov::IRemoteTensor> RemoteContextImpl::create_tensor(const ov::element::Type& type,
const ov::Shape& shape,
const ov::AnyMap& params) {
auto device = _backends->getDevice(_config.get<DEVICE_ID>());
if (device == nullptr) {
OPENVINO_THROW("Device is not available");
// Local remote properties
std::optional<ov::intel_npu::MemType> mem_type_object = std::nullopt;
std::optional<ov::intel_npu::TensorType> tensor_type_object = std::nullopt;
std::optional<void*> mem_handle_object = std::nullopt;

if (!params.empty()) {
// Save local remote properties.
mem_type_object = extract_object(params, mem_type);
tensor_type_object = extract_object(params, tensor_type);
mem_handle_object = extract_object(params, mem_handle);
}

if (params.empty()) {
return device->createRemoteTensor(get_this_shared_ptr(), type, shape, _config);
// Merge local remote properties with global remote properties.
if (!mem_type_object.has_value()) {
mem_type_object = _mem_type_object;
}
if (!tensor_type_object.has_value()) {
tensor_type_object = _tensor_type_object;
}
if (!mem_handle_object.has_value()) {
mem_handle_object = _mem_handle_object;
}

auto mem_type_object = extract_object(params, mem_type);

TensorType tensor_type_object = TensorType::BINDED;
void* mem_handle_object = nullptr;

switch (*mem_type_object) {
case MemType::L0_INTERNAL_BUF: {
auto object = extract_object(params, tensor_type, false);
if (object.has_value()) {
tensor_type_object = *object;
}
break;
// Mem_type shall be set if any other property is set.
if (!mem_type_object.has_value() && (mem_handle_object.has_value() || tensor_type_object.has_value())) {
OPENVINO_THROW("Parameter ", mem_type.name(), " must be set");
}
case MemType::SHARED_BUF: {
auto object = extract_object(params, mem_handle);
if (object.has_value()) {
mem_handle_object = *object;
}
break;

if (!mem_type_object.has_value()) {
return _device->createRemoteTensor(get_this_shared_ptr(), type, shape, _config);
}
default:
OPENVINO_THROW("Unsupported shared object type ", *mem_type_object);

// Mem_handle shall be set if mem_type is a shared memory type.
if (*mem_type_object == MemType::SHARED_BUF && !mem_handle_object.has_value()) {
OPENVINO_THROW("No parameter ", mem_handle.name(), " found in parameters map");
}

return device->createRemoteTensor(get_this_shared_ptr(),
type,
shape,
_config,
tensor_type_object,
*mem_type_object,
mem_handle_object);
return _device->createRemoteTensor(
get_this_shared_ptr(),
type,
shape,
_config,
tensor_type_object.has_value() ? *tensor_type_object : ov::intel_npu::TensorType::BINDED,
*mem_type_object,
*mem_handle_object);
}

ov::SoPtr<ov::ITensor> RemoteContextImpl::create_host_tensor(const ov::element::Type type, const ov::Shape& shape) {
auto device = _backends->getDevice(_config.get<DEVICE_ID>());
if (device == nullptr) {
OPENVINO_THROW("Device is not available");
}

return device->createHostTensor(get_this_shared_ptr(), type, shape, _config);
return _device->createHostTensor(
get_this_shared_ptr(),
type,
shape,
_config,
_tensor_type_object.has_value() ? *_tensor_type_object : ov::intel_npu::TensorType::BINDED);
}

const std::string& RemoteContextImpl::get_device_name() const {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,88 @@ TEST_P(RemoteRunTests, CheckRemoteTensorInternalBuf) {
OV_ASSERT_NO_THROW(inference_request.infer());
}

TEST_P(RemoteRunTests, CheckRemoteTensorInternalBufSetPropertyInContext) {
// Skip test according to plugin specific disabledTestPatterns() (if any)
SKIP_IF_CURRENT_TEST_IS_DISABLED()
ov::InferRequest inference_request;

ov::AnyMap params = {{ov::intel_npu::mem_type.name(), ov::intel_npu::MemType::L0_INTERNAL_BUF},
{ov::intel_npu::tensor_type.name(), {ov::intel_npu::TensorType::INPUT}}};

auto context = core->create_context(target_device, params);
OV_ASSERT_NO_THROW(compiled_model = core->compile_model(ov_model, context, configuration));
OV_ASSERT_NO_THROW(inference_request = compiled_model.create_infer_request());

auto tensor = inference_request.get_input_tensor();
auto remote_tensor = context.create_tensor(ov::element::f32, tensor.get_shape());
tensor = {};

ov::Tensor check_remote_tensor;
OV_ASSERT_NO_THROW(check_remote_tensor = remote_tensor);
ASSERT_THROW(check_remote_tensor.data(), ov::Exception);

OV_ASSERT_NO_THROW(inference_request.set_input_tensor(check_remote_tensor));
OV_ASSERT_NO_THROW(inference_request.infer());
}

TEST_P(RemoteRunTests, CheckRemoteTensorSetOnlyTensorType) {
// Skip test according to plugin specific disabledTestPatterns() (if any)
SKIP_IF_CURRENT_TEST_IS_DISABLED()
ov::InferRequest inference_request;

ov::AnyMap params = {{ov::intel_npu::tensor_type.name(), {ov::intel_npu::TensorType::INPUT}}};

auto context = core->create_context(target_device, params);
OV_ASSERT_NO_THROW(compiled_model = core->compile_model(ov_model, context, configuration));
OV_ASSERT_NO_THROW(inference_request = compiled_model.create_infer_request());

auto tensor = inference_request.get_input_tensor();
ASSERT_THROW(auto remote_tensor = context.create_tensor(ov::element::f32, tensor.get_shape()), ov::Exception);
}

TEST_P(RemoteRunTests, CheckRemoteTensorInternalBufSetPropertyInContextandChangedInTensor) {
// Skip test according to plugin specific disabledTestPatterns() (if any)
SKIP_IF_CURRENT_TEST_IS_DISABLED()
ov::InferRequest inference_request;

ov::AnyMap paramsContext = {{ov::intel_npu::mem_type.name(), ov::intel_npu::MemType::L0_INTERNAL_BUF},
{ov::intel_npu::tensor_type.name(), {ov::intel_npu::TensorType::INPUT}}};

auto context = core->create_context(target_device, paramsContext);
OV_ASSERT_NO_THROW(compiled_model = core->compile_model(ov_model, context, configuration));
OV_ASSERT_NO_THROW(inference_request = compiled_model.create_infer_request());

ov::AnyMap paramsTensor = {{ov::intel_npu::tensor_type.name(), {ov::intel_npu::TensorType::BINDED}}};

auto tensor = inference_request.get_input_tensor();
auto remote_tensor = context.create_tensor(ov::element::f32, tensor.get_shape(), paramsTensor);
tensor = {};

ov::Tensor check_remote_tensor;
OV_ASSERT_NO_THROW(check_remote_tensor = remote_tensor);

OV_ASSERT_NO_THROW(inference_request.set_input_tensor(check_remote_tensor));
OV_ASSERT_NO_THROW(inference_request.infer());
}

TEST_P(RemoteRunTests, CheckRemoteTensorInternalBufSetPropertyInContextandChangedInTensorExpectToFail) {
// Skip test according to plugin specific disabledTestPatterns() (if any)
SKIP_IF_CURRENT_TEST_IS_DISABLED()
ov::InferRequest inference_request;

ov::AnyMap paramsContext = {{ov::intel_npu::tensor_type.name(), {ov::intel_npu::TensorType::INPUT}}};

auto context = core->create_context(target_device, paramsContext);
OV_ASSERT_NO_THROW(compiled_model = core->compile_model(ov_model, context, configuration));
OV_ASSERT_NO_THROW(inference_request = compiled_model.create_infer_request());

ov::AnyMap paramsTensor = {{ov::intel_npu::tensor_type.name(), {ov::intel_npu::TensorType::BINDED}}};

auto tensor = inference_request.get_input_tensor();
ASSERT_THROW(auto remote_tensor = context.create_tensor(ov::element::f32, tensor.get_shape(), paramsTensor),
ov::Exception);
}

TEST_P(RemoteRunTests, CheckImportModelPath) {
// Skip test according to plugin specific disabledTestPatterns() (if any)
SKIP_IF_CURRENT_TEST_IS_DISABLED()
Expand Down

0 comments on commit 69d4b3b

Please sign in to comment.