From a84a058f9e0286e76efd5ad4d14d3634cfc9ed55 Mon Sep 17 00:00:00 2001
From: Maajid khan <n.maajidkhan@gmail.com>
Date: Thu, 12 Nov 2020 04:46:30 +0530
Subject: [PATCH] [OpenVINO-EP] Enabling Multi Device support (#5740)

* Enabling Multi Device support for UEP

Signed-off-by: MaajidKhan <n.maajidkhan@gmail.com>

* Minor fix added
*Added a simple fix to determine OpenVINO
version for Arm build as well

Signed-off-by: MaajidKhan <n.maajidkhan@gmail.com>
---
 BUILD.md                                      |  8 ++--
 cmake/CMakeLists.txt                          |  9 +++-
 dockerfiles/README.md                         | 47 ++++++++++++++++---
 .../OpenVINO-ExecutionProvider.md             | 18 +++++--
 .../openvino/backends/backend_factory.cc      |  2 +-
 .../openvino/openvino_execution_provider.h    | 12 ++---
 .../openvino/ov_versions/capability_2021_1.cc | 29 ++++++++++++
 .../python/onnxruntime_pybind_state.cc        |  3 ++
 tools/ci_build/build.py                       | 14 ++++--
 9 files changed, 114 insertions(+), 28 deletions(-)
diff --git a/BUILD.md b/BUILD.md
index 81a2ff3d7d83c..f5f428a5507e8 100644
--- a/BUILD.md
+++ b/BUILD.md
@@ -405,16 +405,18 @@ See more information on the OpenVINO Execution Provider [here](./docs/execution_
 | <code>VAD-M_FP16</code> | Intel<sup>®</sup> Vision Accelerator Design based on 8 Movidius<sup>TM</sup> MyriadX VPUs |
 | <code>VAD-F_FP32</code> | Intel<sup>®</sup> Vision Accelerator Design with an Intel<sup>®</sup> Arria<sup>®</sup> 10 FPGA |
 | <code>HETERO:<DEVICE_TYPE_1>,<DEVICE_TYPE_2>,<DEVICE_TYPE_3>...</code> | All Intel<sup>®</sup> silicons mentioned above |
+| <code>MULTI:<DEVICE_TYPE_1>,<DEVICE_TYPE_2>,<DEVICE_TYPE_3>...</code> | All Intel<sup>®</sup> silicons mentioned above |
 
-Specifying Hardware Target for HETERO Build:
+Specifying Hardware Target for HETERO or Multi-Device Build:
 
 HETERO:<DEVICE_TYPE_1>,<DEVICE_TYPE_2>,<DEVICE_TYPE_3>...
+MULTI:<DEVICE_TYPE_1>,<DEVICE_TYPE_2>,<DEVICE_TYPE_3>...
 The <DEVICE_TYPE> can be any of these devices from this list ['CPU','GPU','MYRIAD','FPGA','HDDL']
 
-A minimum of two DEVICE_TYPE'S should be specified for a valid HETERO Build.
+A minimum of two DEVICE_TYPE'S should be specified for a valid HETERO or Multi-Device Build.
 
 Example:
-HETERO:MYRIAD,CPU        HETERO:HDDL,GPU,CPU
+HETERO:MYRIAD,CPU  HETERO:HDDL,GPU,CPU  MULTI:MYRIAD,GPU,CPU
 
 For more information on OpenVINO Execution Provider&#39;s ONNX Layer support, Topology support, and Intel hardware enabled, please refer to the document [OpenVINO-ExecutionProvider.md](./docs/execution_providers/OpenVINO-ExecutionProvider.md) in <code>$onnxruntime_root/docs/execution_providers</code>
 
diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
index 9b3b0b3b75b86..18f762e60559d 100644
--- a/cmake/CMakeLists.txt
+++ b/cmake/CMakeLists.txt
@@ -975,8 +975,8 @@ if(onnxruntime_USE_OPENVINO)
 
   add_definitions(-DUSE_OPENVINO=1)
 
-  if (EXISTS "$ENV{INTEL_OPENVINO_DIR}/inference_engine/version.txt")
-    file(READ $ENV{INTEL_OPENVINO_DIR}/inference_engine/version.txt VER)
+  if (EXISTS "$ENV{INTEL_OPENVINO_DIR}/deployment_tools/inference_engine/version.txt")
+    file(READ $ENV{INTEL_OPENVINO_DIR}/deployment_tools/inference_engine/version.txt VER)
   endif()
 
   if (NOT DEFINED ENV{INTEL_OPENVINO_DIR})
@@ -1037,6 +1037,11 @@ if(onnxruntime_USE_OPENVINO)
     add_definitions(-DDEVICE_NAME="${onnxruntime_USE_OPENVINO_DEVICE}")
   endif()
 
+  if(onnxruntime_USE_OPENVINO_MULTI)
+    add_definitions(-DOPENVINO_CONFIG_MULTI=1)
+    add_definitions(-DDEVICE_NAME="${onnxruntime_USE_OPENVINO_DEVICE}")
+  endif()
+
 endif()
 
 if (onnxruntime_USE_VITISAI)
diff --git a/dockerfiles/README.md b/dockerfiles/README.md
index 45cdb97931e32..ff4299340590d 100644
--- a/dockerfiles/README.md
+++ b/dockerfiles/README.md
@@ -127,13 +127,27 @@ Therefore, ONNX RT Execution Provider for **nGraph** will be deprecated starting
 
 2. DEVICE: Specifies the hardware target for building OpenVINO Execution Provider. Below are the options for different Intel target devices.
 
-	| Device Option | Target Device |
-	| --------- | -------- |
-	| <code>CPU_FP32</code> | Intel<sup></sup> CPUs |
-	| <code>GPU_FP32</code> |Intel<sup></sup> Integrated Graphics |
-	| <code>GPU_FP16</code> | Intel<sup></sup> Integrated Graphics |
-	| <code>MYRIAD_FP16</code> | Intel<sup></sup> Movidius<sup>TM</sup> USB sticks |
-	| <code>VAD-M_FP16</code> | Intel<sup></sup> Vision Accelerator Design based on Movidius<sup>TM</sup> MyriadX VPUs |
+  | Device Option | Target Device |
+  | --------- | -------- |
+  | <code>CPU_FP32</code> | Intel<sup></sup> CPUs |
+  | <code>GPU_FP32</code> |Intel<sup></sup> Integrated Graphics |
+  | <code>GPU_FP16</code> | Intel<sup></sup> Integrated Graphics |
+  | <code>MYRIAD_FP16</code> | Intel<sup></sup> Movidius<sup>TM</sup> USB sticks |
+  | <code>VAD-M_FP16</code> | Intel<sup></sup> Vision Accelerator Design based on Movidius<sup>TM</sup> MyriadX VPUs |
+  | <code>HETERO:<DEVICE_TYPE_1>,<DEVICE_TYPE_2>,<DEVICE_TYPE_3>...</code> | All Intel<sup>®</sup> silicons mentioned above |
+  | <code>MULTI:<DEVICE_TYPE_1>,<DEVICE_TYPE_2>,<DEVICE_TYPE_3>...</code> | All Intel<sup>®</sup> silicons mentioned above | 
+
+  Specifying Hardware Target for HETERO or Multi-Device Build:
+
+  HETERO:<DEVICE_TYPE_1>,<DEVICE_TYPE_2>..
+  MULTI:<DEVICE_TYPE_1>,<DEVICE_TYPE_2>..
+  The <DEVICE_TYPE> can be any of these devices from this list ['CPU','GPU','MYRIAD','FPGA','HDDL']
+
+  A minimum of two DEVICE_TYPE'S should be specified for a valid HETERO or Multi-Device Build.
+
+  Example:
+  HETERO:MYRIAD,CPU  HETERO:HDDL,GPU,CPU  MULTI:MYRIAD,GPU,CPU
+
 *This is the hardware accelerator target that is enabled by **default** in the container image. After building the container image for one default target, the application may explicitly choose a different target at run time with the same container by using the [Dynamic device selction API](https://github.com/microsoft/onnxruntime/blob/master/docs/execution_providers/OpenVINO-ExecutionProvider.md#dynamic-device-selection).*
 
 
@@ -192,6 +206,25 @@ Therefore, ONNX RT Execution Provider for **nGraph** will be deprecated starting
     docker run -it --device --mount type=bind,source=/var/tmp,destination=/var/tmp --device /dev/ion:/dev/ion  onnxruntime-vadm:latest
 
     ```
+
+### OpenVINO on HETERO or Multi-Device Build
+
+1. Build the docker image from the DockerFile in this repository.
+
+     for HETERO:
+     ```
+      docker build --rm -t onnxruntime-HETERO --build-arg DEVICE=HETERO:<DEVICE_TYPE_1>,<DEVICE_TYPE_2>,<DEVICE_TYPE_3>... --network host -f <Dockerfile> .
+     ```
+
+     for MULTI:
+     ```
+      docker build --rm -t onnxruntime-MULTI --build-arg DEVICE=MULTI:<DEVICE_TYPE_1>,<DEVICE_TYPE_2>,<DEVICE_TYPE_3>... --network host -f <Dockerfile> .
+     ```
+
+2. Install the required rules, drivers and other packages as required from the steps above for each of the DEVICE_TYPE accordingly that would be added for the HETERO or MULTI Device build type.
+
+3. Run the docker image as mentioned in the above steps
+
 ## ARM 32v7
 *Public Preview*
 
diff --git a/docs/execution_providers/OpenVINO-ExecutionProvider.md b/docs/execution_providers/OpenVINO-ExecutionProvider.md
index 6ffb29c49fb12..4359ba3639381 100644
--- a/docs/execution_providers/OpenVINO-ExecutionProvider.md
+++ b/docs/execution_providers/OpenVINO-ExecutionProvider.md
@@ -35,19 +35,20 @@ The following table lists all the available configuratoin optoins and the Key-Va
 
 | **Key** | **Key type** | **Allowable Values** | **Value type** | **Description** |
 | --- | --- | --- | --- | --- |
-| device_type | string | CPU_FP32, GPU_FP32, GPU_FP16, MYRIAD_FP16, VAD-M_FP16, VAD-F_FP32, Any valid Hetero combination | string | Overrides the accelerator hardware type and precision with these values at runtime. If this option is not explicitly set, default hardware and precision specified during build time is used. |
+| device_type | string | CPU_FP32, GPU_FP32, GPU_FP16, MYRIAD_FP16, VAD-M_FP16, VAD-F_FP32, Any valid Hetero combination, Any valid Multi-Device combination | string | Overrides the accelerator hardware type and precision with these values at runtime. If this option is not explicitly set, default hardware and precision specified during build time is used. |
 | device_id   | string | Any valid OpenVINO device ID | string | Selects a particular hardware device for inference. The list of valid OpenVINO device ID's available on a platform can be obtained either by Python API (`onnxruntime.capi._pybind_state.get_available_openvino_device_ids()`) or by [OpenVINO C/C++ API](https://docs.openvinotoolkit.org/latest/classInferenceEngine_1_1Core.html#acb212aa879e1234f51b845d2befae41c). If this option is not explicitly set, an arbitrary free device will be automatically selected by OpenVINO runtime.|
 | enable_vpu_fast_compile | string | True/False | boolean | This option is only available for MYRIAD_FP16 VPU devices. During initialization of the VPU device with compiled model, Fast-compile may be optionally enabled to speeds up the model's compilation to VPU device specific format. This in-turn speeds up model initialization time. However, enabling this option may slowdown inference due to some of the optimizations not being fully applied, so caution is to be exercised while enabling this option. |
 | num_of_threads | string | Any unsigned positive number other than 0 | size_t | Overrides the accelerator default value of number of threads with this value at runtime. If this option is not explicitly set, default value of 8 is used during build time. |
 
-Valid Hetero combination's:
+Valid Hetero or Multi-Device combination's:
 HETERO:<DEVICE_TYPE_1>,<DEVICE_TYPE_2>,<DEVICE_TYPE_3>...
+MULTI:<DEVICE_TYPE_1>,<DEVICE_TYPE_2>,<DEVICE_TYPE_3>...
 The <DEVICE_TYPE> can be any of these devices from this list ['CPU','GPU','MYRIAD','FPGA','HDDL']
 
-A minimum of two DEVICE_TYPE'S should be specified for a valid HETERO Build.
+A minimum of two DEVICE_TYPE'S should be specified for a valid HETERO or Multi-Device Build.
 
 Example:
-HETERO:MYRIAD,CPU         HETERO:HDDL,GPU,CPU
+HETERO:MYRIAD,CPU  HETERO:HDDL,GPU,CPU  MULTI:MYRIAD,GPU,CPU
 
 ## Other configuration settings
 ### Onnxruntime Graph Optimization level
@@ -271,3 +272,12 @@ To utilize all available hardware more efficiently during one inference
 For more information on Heterogeneous plugin of OpenVINO, please refer to the following
 [documentation](https://docs.openvinotoolkit.org/latest/openvino_docs_IE_DG_supported_plugins_HETERO.html).
 
+## Multi-Device Execution for OpenVINO EP
+
+Multi-Device plugin automatically assigns inference requests to available computational devices to execute the requests in parallel. Potential gains are as follows
+
+Improved throughput that multiple devices can deliver (compared to single-device execution)
+More consistent performance, since the devices can now share the inference burden (so that if one device is becoming too busy, another device can take more of the load)
+
+For more information on Multi-Device plugin of OpenVINO, please refer to the following
+[documentation](https://docs.openvinotoolkit.org/latest/openvino_docs_IE_DG_supported_plugins_MULTI.html#introducing_multi_device_execution).
\ No newline at end of file
diff --git a/onnxruntime/core/providers/openvino/backends/backend_factory.cc b/onnxruntime/core/providers/openvino/backends/backend_factory.cc
index f5be572b51254..aae3daafbb278 100644
--- a/onnxruntime/core/providers/openvino/backends/backend_factory.cc
+++ b/onnxruntime/core/providers/openvino/backends/backend_factory.cc
@@ -19,7 +19,7 @@ BackendFactory::MakeBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
   std::string type = global_context.device_type;
   if (type.find("HDDL") != std::string::npos) {
     return std::make_shared<VADMBackend>(model_proto, global_context, subgraph_context);
-  } else if (type == "CPU" || type == "GPU" || type == "MYRIAD" || type.find("HETERO") != std::string::npos) {
+  } else if (type == "CPU" || type == "GPU" || type == "MYRIAD" || type.find("HETERO") != std::string::npos || type.find("MULTI") != std::string::npos) {
     return std::make_shared<BasicBackend>(model_proto, global_context, subgraph_context);
   } else {
     ORT_THROW("[OpenVINO-EP] Backend factory error: Unknown backend type: " + type);
diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.h b/onnxruntime/core/providers/openvino/openvino_execution_provider.h
index e9350f0886dfc..67ead849cb2ff 100644
--- a/onnxruntime/core/providers/openvino/openvino_execution_provider.h
+++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.h
@@ -17,10 +17,10 @@ namespace onnxruntime {
 static void print_build_options()
 {
     std::cout << "[ERROR] INVALID DEVICE BUILD TYPE SPECIFIED" << std::endl;
-    std::cout << "Specify the keyword HETERO followed by the devices in the order of priority you want to build" << std::endl;
-    std::cout << "The different hardware devices that can be added with HETERO build ";
+    std::cout << "Specify the keyword HETERO (or) MULTI followed by the devices in the order of priority you want to build" << std::endl;
+    std::cout << "The different hardware devices that can be added with HETERO/MULTI build ";
     std::cout << "are ['CPU','GPU','MYRIAD','FPGA','HDDL']" << std::endl;
-    std::cout << "An example of how to specify the HETERO build type. Ex: HETERO:GPU,CPU" << std::endl;
+    std::cout << "An example of how to specify the HETERO or MULTI build type. Ex: HETERO:GPU,CPU  Ex: MULTI:MYRIAD,CPU" << std::endl;
 }
 
 static std::vector<std::string> split(const std::string &s, char delim) {
@@ -86,12 +86,12 @@ struct OpenVINOExecutionProviderInfo {
       #elif defined OPENVINO_CONFIG_VAD_F
       device_type_ = "HETERO:FPGA,CPU";
       precision_ = "FP32";
-      #elif defined OPENVINO_CONFIG_HETERO
+      #elif defined OPENVINO_CONFIG_HETERO || defined OPENVINO_CONFIG_MULTI
         #ifdef DEVICE_NAME
           #define DEVICE DEVICE_NAME
         #endif
       dev_type = DEVICE;
-      if (dev_type.find("HETERO") == 0) {
+      if (dev_type.find("HETERO") == 0 || dev_type.find("MULTI") == 0) {
           std::vector<std::string> devices = parseDevices(dev_type);
           precision_ = "FP16";
           if(devices[0] == "CPU" || devices[0] == "GPU") {
@@ -118,7 +118,7 @@ struct OpenVINOExecutionProviderInfo {
     } else if (dev_type == "VAD-F_FP32") {
       device_type_ = "HETERO:FPGA,CPU";
       precision_ = "FP32";
-    } else if (dev_type.find("HETERO") == 0) {
+    } else if (dev_type.find("HETERO") == 0 || dev_type.find("MULTI") == 0) {
       std::vector<std::string> devices = parseDevices(dev_type);
       precision_ = "FP16";
       if(devices[0] == "CPU" || devices[0] == "GPU") {
diff --git a/onnxruntime/core/providers/openvino/ov_versions/capability_2021_1.cc b/onnxruntime/core/providers/openvino/ov_versions/capability_2021_1.cc
index e4f09ad1068d5..23111ad852608 100644
--- a/onnxruntime/core/providers/openvino/ov_versions/capability_2021_1.cc
+++ b/onnxruntime/core/providers/openvino/ov_versions/capability_2021_1.cc
@@ -208,6 +208,35 @@ bool IsOpSupported(std::string name, std::string device) {
         supported_ops.insert(supported_ops_cpu.begin(), supported_ops_cpu.end());
      }
     }
+  } else if (device.find("MULTI") == 0) {
+    std::vector<std::string> devices;
+    std::stringstream s_stream(device);
+    while(s_stream.good()) {
+      std::string substr;
+      getline(s_stream, substr, ',');
+      devices.push_back(substr);
+    }
+    if (!common_supported_ops.count(name) == 0) {
+      return true;
+    }
+    for (auto& it : devices) {
+      if(it == "MYRIAD" || "HDDL") {
+        if (supported_ops_vpu.count(name) == 0)  {
+          return false;
+        }
+      }
+      if(it == "GPU") {
+       if (supported_ops_gpu.count(name) == 0)  {
+          return false;
+        }
+      }
+      if(it == "CPU") {
+        if (supported_ops_cpu.count(name) == 0)  {
+          return false;
+        }
+     }
+    }
+    return true;
   }
   return supported_ops.find(name) != supported_ops.end();
 }
diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc
index 6bc48dafbab34..d8e71b45eac24 100644
--- a/onnxruntime/python/onnxruntime_pybind_state.cc
+++ b/onnxruntime/python/onnxruntime_pybind_state.cc
@@ -82,6 +82,9 @@ struct OrtStatus {
 #elif OPENVINO_CONFIG_VAD_F
 #define BACKEND_OPENVINO "-OPENVINO_VAD_F"
 
+#elif OPENVINO_CONFIG_MULTI
+#define BACKEND_OPENVINO "-OPENVINO_MULTI"
+
 #elif OPENVINO_CONFIG_HETERO
 #define BACKEND_OPENVINO "-OPENVINO_HETERO"
 #endif
diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py
index 6c93a0f3722c0..3bae668432605 100755
--- a/tools/ci_build/build.py
+++ b/tools/ci_build/build.py
@@ -244,7 +244,7 @@ def verify_device_type(device_read):
         res = False
         if(device_read in choices):
             res = True
-        elif(device_read.startswith("HETERO:")):
+        elif(device_read.startswith("HETERO:") or device_read.startswith("MULTI:")):
             res = True
             comma_separated_devices = device_read.split(":")
             comma_separated_devices = comma_separated_devices[1].split(',')
@@ -258,18 +258,20 @@ def verify_device_type(device_read):
                     break
 
         def Invalid_Hetero_Build():
-            print("\n" + "If trying to build Hetero, specifiy the supported devices along with it")
-            print("specify the keyword HETERO followed by the devices in the order of priority you want to build")
-            print("The different hardware devices that can be added in HETERO ")
+            print("\n" + "If trying to build Hetero or Multi, specifiy the supported devices along with it." + + "\n")
+            print("specify the keyword HETERO or MULTI followed by the devices ")
+            print("in the order of priority you want to build" + "\n")
+            print("The different hardware devices that can be added in HETERO or MULTI")
             print("are ['CPU','GPU','MYRIAD','FPGA','HDDL']" + "\n")
             print("An example of how to specify the hetero build type. Ex: HETERO:GPU,CPU" + "\n")
+            print("An example of how to specify the MULTI build type. Ex: MULTI:MYRIAD,CPU" + "\n")
             sys.exit("Wrong Build Type selected")
 
         if(res is False):
             print("\n" + "You have selcted wrong configuration for the build.")
             print("pick the build type for specific Hardware Device from following options: ", choices)
             print("\n")
-            if not device_read.startswith("HETERO:"):
+            if not (device_read.startswith("HETERO:") or device_read.startswith("MULTI:")):
                 Invalid_Hetero_Build()
             sys.exit("Wrong Build Type selected")
 
@@ -776,6 +778,8 @@ def generate_build_tree(cmake_path, source_dir, build_dir, cuda_home, cudnn_home
                        "-Donnxruntime_USE_OPENVINO_HETERO=" + (
                            "ON" if args.use_openvino.startswith("HETERO") else "OFF"),
                        "-Donnxruntime_USE_OPENVINO_DEVICE=" + (args.use_openvino),
+                       "-Donnxruntime_USE_OPENVINO_MULTI=" + (
+                           "ON" if args.use_openvino.startswith("MULTI") else "OFF"),
                        "-Donnxruntime_USE_OPENVINO_BINARY=" + (
                            "ON" if args.use_openvino else "OFF")]
     # temp turn on only for linux gpu build