From c03dcb39c487e46d47722d7fe83144111c525acb Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Mon, 29 Sep 2025 09:40:06 -0700 Subject: [PATCH 01/15] Create new folder for plugin ep python example --- python/plugin_EP/README.md | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 python/plugin_EP/README.md diff --git a/python/plugin_EP/README.md b/python/plugin_EP/README.md new file mode 100644 index 000000000..e69de29bb From 5c444bc43cb3707c3bad64eb1570956c69dd1011 Mon Sep 17 00:00:00 2001 From: Chi Lo <54722500+chilo-ms@users.noreply.github.com> Date: Mon, 29 Sep 2025 10:04:41 -0700 Subject: [PATCH 02/15] Add README for running inference with Plugin EP Added prerequisites for running inference with a Plugin EP. --- python/plugin_EP/README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/python/plugin_EP/README.md b/python/plugin_EP/README.md index e69de29bb..31ed166b4 100644 --- a/python/plugin_EP/README.md +++ b/python/plugin_EP/README.md @@ -0,0 +1,4 @@ +# Running Inference with a Plugin EP +## Prerequisites +- A dynamic/shared EP library that exports the functions `CreateEpFactories()` and `ReleaseEpFactory()`. +- ONNX Runtime built as a shared library (e.g., `onnxruntime.dll` on Windows or `libonnxruntime.so` on Linux), since the EP library relies on the public ORT C API (which is ABI-stable) to interact with ONNX Runtime. From d7d81448302228f0a0fa5277a41c0a731510a34e Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Mon, 29 Sep 2025 10:09:01 -0700 Subject: [PATCH 03/15] Add a python reference --- python/plugin_EP/plugin_ep_inference.py | 31 +++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 python/plugin_EP/plugin_ep_inference.py diff --git a/python/plugin_EP/plugin_ep_inference.py b/python/plugin_EP/plugin_ep_inference.py new file mode 100644 index 000000000..b8dccb5f1 --- /dev/null +++ b/python/plugin_EP/plugin_ep_inference.py @@ -0,0 +1,31 @@ +import onnxruntime as onnxrt +import numpy as np + +ep_lib_path = "C:\\path\\to\\plugin_trt_ep\\TensorRTEp.dll" +ep_name = "TensorRTEp" +ep_registration_name = ep_name + +onnxrt.register_execution_provider_library(ep_registration_name, ep_lib_path) + +ep_devices = onnxrt.get_ep_devices() +trt_ep_device = None +for ep_device in ep_devices: + if ep_device.ep_name == ep_name: + trt_ep_device = ep_device + +assert trt_ep_device != None +sess_options = onnxrt.SessionOptions() +sess_options.add_provider_for_devices([trt_ep_device], {'trt_engine_cache_enable': '1'}) + +assert sess_options.has_providers() == True + +# Run sample model and check output +sess = onnxrt.InferenceSession("C:\\modles\\mul_1.onnx", sess_options=sess_options) + +x = np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], dtype=np.float32) +input_name = sess.get_inputs()[0].name +res = sess.run([], {input_name: x}) +output_expected = np.array([[1.0, 4.0], [9.0, 16.0], [25.0, 36.0]], dtype=np.float32) +np.testing.assert_allclose(output_expected, res[0], rtol=1e-05, atol=1e-08) + +onnxrt.unregister_execution_provider_library(ep_registration_name) \ No newline at end of file From fe88992eb088ad5b77af7c1baa41fa5d66ea2535 Mon Sep 17 00:00:00 2001 From: Chi Lo <54722500+chilo-ms@users.noreply.github.com> Date: Mon, 29 Sep 2025 10:31:32 -0700 Subject: [PATCH 04/15] Update TensorRTEp plugin EP inference example Refactor plugin EP registration and session creation for TensorRTEp. --- python/plugin_EP/plugin_ep_inference.py | 27 +++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/python/plugin_EP/plugin_ep_inference.py b/python/plugin_EP/plugin_ep_inference.py index b8dccb5f1..4528ad295 100644 --- a/python/plugin_EP/plugin_ep_inference.py +++ b/python/plugin_EP/plugin_ep_inference.py @@ -1,31 +1,46 @@ import onnxruntime as onnxrt import numpy as np - + +# Path to the plugin EP library ep_lib_path = "C:\\path\\to\\plugin_trt_ep\\TensorRTEp.dll" -ep_name = "TensorRTEp" -ep_registration_name = ep_name +# Registration name can be anything the application chooses +ep_registration_name = "TensorRTEp" +# EP name should match the name assigned by the EP factory when creating the EP (i.e., in the implementation of OrtEP::CreateEp) +ep_name = ep_registration_name +# Register plugin EP library with ONNX Runtime onnxrt.register_execution_provider_library(ep_registration_name, ep_lib_path) +# +# Create ORT session with explicit OrtEpDevice(s) +# + +# Find the OrtEpDevice for "TensorRTEp" ep_devices = onnxrt.get_ep_devices() trt_ep_device = None for ep_device in ep_devices: if ep_device.ep_name == ep_name: trt_ep_device = ep_device -assert trt_ep_device != None +assert trt_ep_device != None + sess_options = onnxrt.SessionOptions() + +# Equivalent to the C API's SessionOptionsAppendExecutionProvider_V2 that appends "TensorRTEp" to ORT session option sess_options.add_provider_for_devices([trt_ep_device], {'trt_engine_cache_enable': '1'}) assert sess_options.has_providers() == True -# Run sample model and check output +# Create ORT session with "TensorRTEp" plugin EP sess = onnxrt.InferenceSession("C:\\modles\\mul_1.onnx", sess_options=sess_options) +# Run sample model and check output x = np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], dtype=np.float32) input_name = sess.get_inputs()[0].name res = sess.run([], {input_name: x}) output_expected = np.array([[1.0, 4.0], [9.0, 16.0], [25.0, 36.0]], dtype=np.float32) np.testing.assert_allclose(output_expected, res[0], rtol=1e-05, atol=1e-08) -onnxrt.unregister_execution_provider_library(ep_registration_name) \ No newline at end of file +# Unregister the library using the application-specified registration name. +# Must only unregister a library after all sessions that use the library have been released. +onnxrt.unregister_execution_provider_library(ep_registration_name) From a98aa8bb9b2143760d713030accf7a321e917443 Mon Sep 17 00:00:00 2001 From: Chi Lo <54722500+chilo-ms@users.noreply.github.com> Date: Mon, 29 Sep 2025 11:15:41 -0700 Subject: [PATCH 05/15] Update README with inference instructions for Plugin EP Added instructions for running inference with explicit and automatic EP selection. --- python/plugin_EP/README.md | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/python/plugin_EP/README.md b/python/plugin_EP/README.md index 31ed166b4..362e29e89 100644 --- a/python/plugin_EP/README.md +++ b/python/plugin_EP/README.md @@ -2,3 +2,27 @@ ## Prerequisites - A dynamic/shared EP library that exports the functions `CreateEpFactories()` and `ReleaseEpFactory()`. - ONNX Runtime built as a shared library (e.g., `onnxruntime.dll` on Windows or `libonnxruntime.so` on Linux), since the EP library relies on the public ORT C API (which is ABI-stable) to interact with ONNX Runtime. + +## Run Inference with explicit OrtEpDevice(s) + +Please see `plugin_ep_inference.py` for details +1. Register plugin EP library with ONNX Runtime via `onnxruntime.register_execution_provider_library()` +2. Find the OrtEpDevice for that ep name via `onnxruntime.get_ep_devices()` +3. Append the ep to ORT session option via `sess_options.add_provider_for_devices` +4. Create ORT session with the ep +5. Run ORT session +6. Unregister plugin EP library via `onnxruntime.unregister_execution_provider_library()` + + + ## Run Inference with automatic EP selection + The workflow is the same as above except #2 and #3 step and should be replaced with `sess_options.set_provider_selection_policy(policy)`, + "policy" could be: + - `onnxruntime.OrtExecutionProviderDevicePolicy_DEFAULT` + - `onnxruntime.OrtExecutionProviderDevicePolicy_PREFER_CPU` + - `onnxruntime.OrtExecutionProviderDevicePolicy_PREFER_NPU` + - `onnxruntime.OrtExecutionProviderDevicePolicy_PREFER_GPU` + - `onnxruntime.OrtExecutionProviderDevicePolicy_MAX_PERFORMANCE` + - `onnxruntime.OrtExecutionProviderDevicePolicy_MAX_EFFICIENCY` + - `onnxruntime.OrtExecutionProviderDevicePolicy_MIN_OVERALL_POWER` + + From 6f27473d599576cebab3b43df44a0cdd8ad100c6 Mon Sep 17 00:00:00 2001 From: Chi Lo <54722500+chilo-ms@users.noreply.github.com> Date: Mon, 29 Sep 2025 11:19:21 -0700 Subject: [PATCH 06/15] Add note for mul_1.onnx file location Added a note about the location of the mul_1.onnx file. --- python/plugin_EP/plugin_ep_inference.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/python/plugin_EP/plugin_ep_inference.py b/python/plugin_EP/plugin_ep_inference.py index 4528ad295..92ae0f7d5 100644 --- a/python/plugin_EP/plugin_ep_inference.py +++ b/python/plugin_EP/plugin_ep_inference.py @@ -44,3 +44,8 @@ # Unregister the library using the application-specified registration name. # Must only unregister a library after all sessions that use the library have been released. onnxrt.unregister_execution_provider_library(ep_registration_name) + + +# Note: +# The mul_1.onnx can be found here: +# https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/test/testdata/mul_1.onnx From d8861c2c1eaa9bfd714cb45760598740e8993ae8 Mon Sep 17 00:00:00 2001 From: Chi Lo <54722500+chilo-ms@users.noreply.github.com> Date: Mon, 29 Sep 2025 11:42:27 -0700 Subject: [PATCH 07/15] Enhance README with code examples for Plugin EP usage Updated the README to include code examples for running inference with a Plugin EP and clarified the steps for explicit and automatic EP selection. --- python/plugin_EP/README.md | 48 ++++++++++++++++++++++++++++++-------- 1 file changed, 38 insertions(+), 10 deletions(-) diff --git a/python/plugin_EP/README.md b/python/plugin_EP/README.md index 362e29e89..9bae00094 100644 --- a/python/plugin_EP/README.md +++ b/python/plugin_EP/README.md @@ -5,18 +5,43 @@ ## Run Inference with explicit OrtEpDevice(s) -Please see `plugin_ep_inference.py` for details -1. Register plugin EP library with ONNX Runtime via `onnxruntime.register_execution_provider_library()` -2. Find the OrtEpDevice for that ep name via `onnxruntime.get_ep_devices()` -3. Append the ep to ORT session option via `sess_options.add_provider_for_devices` -4. Create ORT session with the ep -5. Run ORT session -6. Unregister plugin EP library via `onnxruntime.unregister_execution_provider_library()` +Please see `plugin_ep_inference.py` for a full example. +1. Register plugin EP library with ONNX Runtime + ````python + onnxruntime.register_execution_provider_library("plugin_ep.so") + ```` +2. Find the OrtEpDevice for that EP + ````Python + ep_device = onnxruntime.get_ep_devices() + for ep_device in ep_devices: + if ep_device.ep_name == ep_name: + target_ep_device = ep_device + ```` +3. Append the EP to ORT session option + ````Python + sess_options.add_provider_for_devices([target_ep_device], {}) + ```` +5. Create ORT session with the EP + ```Python + sess = onnxrt.InferenceSession("/path/to/model", sess_options=sess_options) + ```` +6. Run ORT session + ````Python + res = sess.run([], {input_name: x}) + ```` +7. Unregister plugin EP library + ```Python + onnxruntime.unregister_execution_provider_library(ep_registration_name) + ```` ## Run Inference with automatic EP selection - The workflow is the same as above except #2 and #3 step and should be replaced with `sess_options.set_provider_selection_policy(policy)`, - "policy" could be: + The workflow is the same as above except for step 2 and 3. + Instead, set the selection policy directly + ````Python + sess_options.set_provider_selection_policy(policy) + ```` + Available "policy": - `onnxruntime.OrtExecutionProviderDevicePolicy_DEFAULT` - `onnxruntime.OrtExecutionProviderDevicePolicy_PREFER_CPU` - `onnxruntime.OrtExecutionProviderDevicePolicy_PREFER_NPU` @@ -25,4 +50,7 @@ Please see `plugin_ep_inference.py` for details - `onnxruntime.OrtExecutionProviderDevicePolicy_MAX_EFFICIENCY` - `onnxruntime.OrtExecutionProviderDevicePolicy_MIN_OVERALL_POWER` - + ## Note + For additional APIs and details on plugin EP usage, see the official documentation: + https://onnxruntime.ai/docs/execution-providers/plugin-ep-libraries.html#using-a-plugin-ep-library + From 5274e64cd4f12663aaa1951bddb1be02611e6723 Mon Sep 17 00:00:00 2001 From: Chi Lo <54722500+chilo-ms@users.noreply.github.com> Date: Mon, 29 Sep 2025 12:00:23 -0700 Subject: [PATCH 08/15] Update README for Python API and prerequisites --- python/plugin_EP/README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/python/plugin_EP/README.md b/python/plugin_EP/README.md index 9bae00094..d5ac2a3ce 100644 --- a/python/plugin_EP/README.md +++ b/python/plugin_EP/README.md @@ -1,7 +1,8 @@ -# Running Inference with a Plugin EP +# Running Inference with a Plugin EP using Python API ## Prerequisites +- ONNX Runtime version >= 1.23.0 - A dynamic/shared EP library that exports the functions `CreateEpFactories()` and `ReleaseEpFactory()`. -- ONNX Runtime built as a shared library (e.g., `onnxruntime.dll` on Windows or `libonnxruntime.so` on Linux), since the EP library relies on the public ORT C API (which is ABI-stable) to interact with ONNX Runtime. +- ORT GPU python wheel installed. ## Run Inference with explicit OrtEpDevice(s) @@ -54,3 +55,4 @@ Please see `plugin_ep_inference.py` for a full example. For additional APIs and details on plugin EP usage, see the official documentation: https://onnxruntime.ai/docs/execution-providers/plugin-ep-libraries.html#using-a-plugin-ep-library + From 67fec78ab0e2ebd8f0718f9b90b64397f4cd5095 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Mon, 29 Sep 2025 14:53:51 -0700 Subject: [PATCH 09/15] Add plugin ep c++ example --- c_cxx/plugin_EP/CMakeLists.txt | 30 +++++++++++ c_cxx/plugin_EP/app.cc | 99 ++++++++++++++++++++++++++++++++++ 2 files changed, 129 insertions(+) create mode 100644 c_cxx/plugin_EP/CMakeLists.txt create mode 100644 c_cxx/plugin_EP/app.cc diff --git a/c_cxx/plugin_EP/CMakeLists.txt b/c_cxx/plugin_EP/CMakeLists.txt new file mode 100644 index 000000000..ce4780f4d --- /dev/null +++ b/c_cxx/plugin_EP/CMakeLists.txt @@ -0,0 +1,30 @@ +# usage: +# cd build/ +# cmake -S ../ -B ./ -DCMAKE_BUILD_TYPE=Debug -DORT_HOME=/path/to/ort_package/onnxruntime-win-x64-gpu-1.23.0 +cmake_minimum_required(VERSION 3.26) +project(plugin_ep_app VERSION 1.0) +set(CMAKE_CXX_STANDARD 17) + +file(GLOB app_src "./*.cc") +add_executable(app ${app_src}) + +# Add dependencies +include(FetchContent) + +# Add GSL +FetchContent_Declare( + gsl + GIT_REPOSITORY https://github.com/microsoft/GSL.git + GIT_TAG v4.0.0 # Use a specific tag or commit +) + +FetchContent_MakeAvailable(gsl) + + +set(DEPS_PATH "${CMAKE_BINARY_DIR}/_deps") + +target_include_directories(app PUBLIC "${ORT_HOME}/include" + "${DEPS_PATH}/gsl-src/include" # GSL is header-only +) + +target_link_libraries(app PUBLIC "onnxruntime.dll") diff --git a/c_cxx/plugin_EP/app.cc b/c_cxx/plugin_EP/app.cc new file mode 100644 index 000000000..9132becd5 --- /dev/null +++ b/c_cxx/plugin_EP/app.cc @@ -0,0 +1,99 @@ +#include "onnxruntime_cxx_api.h" +#include +#include +#include + +int RunInference() { + const OrtApi* ort_api = OrtGetApiBase()->GetApi(ORT_API_VERSION); + Ort::Env env; + + // Registration name can be anything the application chooses + const char* lib_registration_name = "TensorRTEp"; + + // Register plugin EP library with ONNX Runtime. + env.RegisterExecutionProviderLibrary( + lib_registration_name, // Registration name can be anything the application chooses. + ORT_TSTR("TensorRTEp.dll") // Path to the plugin EP library. + ); + + // Unregister the library using the application-specified registration name. + // Must only unregister a library after all sessions that use the library have been released. + auto unregister_plugin_eps_at_scope_exit = gsl::finally([&]() { + env.UnregisterExecutionProviderLibrary(lib_registration_name); + }); + + { + std::vector ep_devices = env.GetEpDevices(); + // EP name should match the name assigned by the EP factory when creating the EP (i.e., in the implementation of OrtEP::CreateEp()) + std::string ep_name = lib_registration_name; + + // Find the Ort::EpDevice for "TensorRTEp". + std::vector selected_ep_devices = {}; + for (Ort::ConstEpDevice ep_device : ep_devices) { + if (std::string(ep_device.EpName()) == ep_name) { + selected_ep_devices.push_back(ep_device); + break; + } + } + + if (selected_ep_devices[0] == nullptr) { + // Did not find EP. Report application error ... + std::cerr << "Did not find EP: " << ep_name << std::endl; + return -1; + } + + std::unordered_map ep_options; // Optional EP options. + Ort::SessionOptions session_options; + session_options.AppendExecutionProvider_V2(env, selected_ep_devices, ep_options); + + Ort::Session session(env, ORT_TSTR("mul_1.onnx"), session_options); + + // Get default ORT allocator + Ort::AllocatorWithDefaultOptions allocator; + + // Get input name + Ort::AllocatedStringPtr input_name_ptr = session.GetInputNameAllocated(0, allocator); // Keep the smart pointer alive to avoid dangling pointer + const char* input_name = input_name_ptr.get(); + + // Input data + std::vector input_values = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; + + // Input shape: (3, 2) + std::vector input_shape{3, 2}; + + // Create tensor + Ort::MemoryInfo memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); + + Ort::Value input_tensor = Ort::Value::CreateTensor(memory_info, input_values.data(), input_values.size(), + input_shape.data(), input_shape.size()); + + // Get output name + Ort::AllocatedStringPtr output_name_ptr = + session.GetOutputNameAllocated(0, allocator); // Keep the smart pointer alive to avoid dangling pointer + const char* output_name = output_name_ptr.get(); + + // Run session + std::vector input_names{input_name}; + std::vector output_names{output_name}; + + auto output_tensors = + session.Run(Ort::RunOptions{nullptr}, input_names.data(), &input_tensor, 1, output_names.data(), 1); + + // Extract output + float* output_data = output_tensors.front().GetTensorMutableData(); + + std::cout << "Output:" << std::endl; + for (int i = 0; i < 6; i++) { + std::cout << output_data[i] << " "; + } + std::cout << std::endl; + + // Expected output: [[1,4],[9,16],[25,36]] + } + + return 0; +} + +int main(int argc, char* argv[]) { + return RunInference(); +} \ No newline at end of file From 184dc35967b7c8e63770ca11e2dbe3df984752bf Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Mon, 29 Sep 2025 14:56:27 -0700 Subject: [PATCH 10/15] Add readme --- c_cxx/plugin_EP/README.md | 58 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 c_cxx/plugin_EP/README.md diff --git a/c_cxx/plugin_EP/README.md b/c_cxx/plugin_EP/README.md new file mode 100644 index 000000000..8535c6874 --- /dev/null +++ b/c_cxx/plugin_EP/README.md @@ -0,0 +1,58 @@ +# Running Inference with a Plugin EP using Python API +## Prerequisites +- ONNX Runtime version >= 1.23.0 +- A dynamic/shared EP library that exports the functions `CreateEpFactories()` and `ReleaseEpFactory()`. +- ORT GPU python wheel installed. + +## Run Inference with explicit OrtEpDevice(s) + +Please see `plugin_ep_inference.py` for a full example. +1. Register plugin EP library with ONNX Runtime + ````python + onnxruntime.register_execution_provider_library("plugin_ep.so") + ```` +2. Find the OrtEpDevice for that EP + ````Python + ep_device = onnxruntime.get_ep_devices() + for ep_device in ep_devices: + if ep_device.ep_name == ep_name: + target_ep_device = ep_device + ```` +3. Append the EP to ORT session option + ````Python + sess_options.add_provider_for_devices([target_ep_device], {}) + ```` +5. Create ORT session with the EP + ```Python + sess = onnxrt.InferenceSession("/path/to/model", sess_options=sess_options) + ```` +6. Run ORT session + ````Python + res = sess.run([], {input_name: x}) + ```` +7. Unregister plugin EP library + ```Python + onnxruntime.unregister_execution_provider_library(ep_registration_name) + ```` + + + ## Run Inference with automatic EP selection + The workflow is the same as above except for step 2 and 3. + Instead, set the selection policy directly + ````Python + sess_options.set_provider_selection_policy(policy) + ```` + Available "policy": + - `onnxruntime.OrtExecutionProviderDevicePolicy_DEFAULT` + - `onnxruntime.OrtExecutionProviderDevicePolicy_PREFER_CPU` + - `onnxruntime.OrtExecutionProviderDevicePolicy_PREFER_NPU` + - `onnxruntime.OrtExecutionProviderDevicePolicy_PREFER_GPU` + - `onnxruntime.OrtExecutionProviderDevicePolicy_MAX_PERFORMANCE` + - `onnxruntime.OrtExecutionProviderDevicePolicy_MAX_EFFICIENCY` + - `onnxruntime.OrtExecutionProviderDevicePolicy_MIN_OVERALL_POWER` + + ## Note + For additional APIs and details on plugin EP usage, see the official documentation: + https://onnxruntime.ai/docs/execution-providers/plugin-ep-libraries.html#using-a-plugin-ep-library + + From f488dd846fea1bc982ee91e699c254bc67cb90b0 Mon Sep 17 00:00:00 2001 From: Chi Lo <54722500+chilo-ms@users.noreply.github.com> Date: Mon, 29 Sep 2025 15:17:42 -0700 Subject: [PATCH 11/15] Change API reference from Python to C++ in README Updated README to reflect C++ API usage instead of Python API and added additional prerequisites. --- c_cxx/plugin_EP/README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/c_cxx/plugin_EP/README.md b/c_cxx/plugin_EP/README.md index 8535c6874..d432c53ea 100644 --- a/c_cxx/plugin_EP/README.md +++ b/c_cxx/plugin_EP/README.md @@ -1,8 +1,10 @@ -# Running Inference with a Plugin EP using Python API +# Running Inference with a Plugin EP using C++ API ## Prerequisites - ONNX Runtime version >= 1.23.0 - A dynamic/shared EP library that exports the functions `CreateEpFactories()` and `ReleaseEpFactory()`. -- ORT GPU python wheel installed. +- ONNX Runtime built as a shared library (e.g., `onnxruntime.dll` on Windows or `libonnxruntime.so` on Linux), since the EP library relies on the public ORT C API (which is ABI-stable) to interact with ONNX Runtime. +- The `onnxruntime_providers_shared.dll` (Windows) or `libonnxruntime_providers_shared.so` (Linux) is also required. When a plugin EP is registered, ONNX Runtime internally calls `LoadPluginOrProviderBridge`, which depends on this shared library to determine whether the EP DLL is a plugin or a provider-bridge. +- If you are using a pre-built ONNX Runtime package, all required libraries (e.g., `onnxruntime.dll`, `onnxruntime_providers_shared.dll`, etc.) are already included. ## Run Inference with explicit OrtEpDevice(s) From b0ab382d637036cf1eccfd069a4d693231492165 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Mon, 29 Sep 2025 15:20:38 -0700 Subject: [PATCH 12/15] rename --- c_cxx/plugin_EP/{app.cc => plugin_ep_inference.cc} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename c_cxx/plugin_EP/{app.cc => plugin_ep_inference.cc} (100%) diff --git a/c_cxx/plugin_EP/app.cc b/c_cxx/plugin_EP/plugin_ep_inference.cc similarity index 100% rename from c_cxx/plugin_EP/app.cc rename to c_cxx/plugin_EP/plugin_ep_inference.cc From 23b99a13daadea283b5d80359bce4101424c4c18 Mon Sep 17 00:00:00 2001 From: Chi Lo <54722500+chilo-ms@users.noreply.github.com> Date: Mon, 29 Sep 2025 15:36:44 -0700 Subject: [PATCH 13/15] Update README for plugin EP with C++ examples --- c_cxx/plugin_EP/README.md | 59 ++++++++++++++++++++++----------------- 1 file changed, 34 insertions(+), 25 deletions(-) diff --git a/c_cxx/plugin_EP/README.md b/c_cxx/plugin_EP/README.md index d432c53ea..cbdf76652 100644 --- a/c_cxx/plugin_EP/README.md +++ b/c_cxx/plugin_EP/README.md @@ -8,33 +8,42 @@ ## Run Inference with explicit OrtEpDevice(s) -Please see `plugin_ep_inference.py` for a full example. +Please see `plugin_ep_inference.cc` for a full example. 1. Register plugin EP library with ONNX Runtime - ````python - onnxruntime.register_execution_provider_library("plugin_ep.so") + ````c++ + env.RegisterExecutionProviderLibrary( + "plugin_ep", // Registration name can be anything the application chooses. + ORT_TSTR("plugin_ep.so") // Path to the plugin EP library. + ); ```` -2. Find the OrtEpDevice for that EP - ````Python - ep_device = onnxruntime.get_ep_devices() - for ep_device in ep_devices: - if ep_device.ep_name == ep_name: - target_ep_device = ep_device +2. Find the OrtEpDevice for that plugin EP + ````c++ + // Find the Ort::EpDevice for ep_name + std::vector selected_ep_devices = {}; + for (Ort::ConstEpDevice ep_device : ep_devices) { + if (std::string(ep_device.EpName()) == ep_name) { + selected_ep_devices.push_back(ep_device); + break; + } + } ```` 3. Append the EP to ORT session option - ````Python - sess_options.add_provider_for_devices([target_ep_device], {}) + ````c++ + Ort::SessionOptions session_options; + session_options.AppendExecutionProvider_V2(env, selected_ep_devices, ep_options); ```` 5. Create ORT session with the EP - ```Python - sess = onnxrt.InferenceSession("/path/to/model", sess_options=sess_options) + ````c++ + Ort::Session session(env, ORT_TSTR("path\to\model"), session_options); ```` 6. Run ORT session - ````Python - res = sess.run([], {input_name: x}) + ````c++ + auto output_tensors = + session.Run(Ort::RunOptions{nullptr}, input_names.data(), &input_tensor, 1, output_names.data(), 1); ```` 7. Unregister plugin EP library - ```Python - onnxruntime.unregister_execution_provider_library(ep_registration_name) + ````c++ + env.UnregisterExecutionProviderLibrary(lib_registration_name); ```` @@ -42,16 +51,16 @@ Please see `plugin_ep_inference.py` for a full example. The workflow is the same as above except for step 2 and 3. Instead, set the selection policy directly ````Python - sess_options.set_provider_selection_policy(policy) + session_options.SetEpSelectionPolicy(OrtExecutionProviderDevicePolicy_PREFER_GPU); ```` Available "policy": - - `onnxruntime.OrtExecutionProviderDevicePolicy_DEFAULT` - - `onnxruntime.OrtExecutionProviderDevicePolicy_PREFER_CPU` - - `onnxruntime.OrtExecutionProviderDevicePolicy_PREFER_NPU` - - `onnxruntime.OrtExecutionProviderDevicePolicy_PREFER_GPU` - - `onnxruntime.OrtExecutionProviderDevicePolicy_MAX_PERFORMANCE` - - `onnxruntime.OrtExecutionProviderDevicePolicy_MAX_EFFICIENCY` - - `onnxruntime.OrtExecutionProviderDevicePolicy_MIN_OVERALL_POWER` + - `OrtExecutionProviderDevicePolicy_DEFAULT` + - `OrtExecutionProviderDevicePolicy_PREFER_CPU` + - `OrtExecutionProviderDevicePolicy_PREFER_NPU` + - `OrtExecutionProviderDevicePolicy_PREFER_GPU` + - `OrtExecutionProviderDevicePolicy_MAX_PERFORMANCE` + - `OrtExecutionProviderDevicePolicy_MAX_EFFICIENCY` + - `OrtExecutionProviderDevicePolicy_MIN_OVERALL_POWER` ## Note For additional APIs and details on plugin EP usage, see the official documentation: From 5ca428446a73d1710c8357b8cc5613293b9bafe1 Mon Sep 17 00:00:00 2001 From: Chi Lo <54722500+chilo-ms@users.noreply.github.com> Date: Mon, 29 Sep 2025 15:40:03 -0700 Subject: [PATCH 14/15] Add usage instructions for CMake build --- c_cxx/plugin_EP/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/c_cxx/plugin_EP/CMakeLists.txt b/c_cxx/plugin_EP/CMakeLists.txt index ce4780f4d..b6f0c657c 100644 --- a/c_cxx/plugin_EP/CMakeLists.txt +++ b/c_cxx/plugin_EP/CMakeLists.txt @@ -1,6 +1,7 @@ # usage: # cd build/ # cmake -S ../ -B ./ -DCMAKE_BUILD_TYPE=Debug -DORT_HOME=/path/to/ort_package/onnxruntime-win-x64-gpu-1.23.0 +# cmake --build ./ --config Debug cmake_minimum_required(VERSION 3.26) project(plugin_ep_app VERSION 1.0) set(CMAKE_CXX_STANDARD 17) From 54fe49d16a070c7d83cc9726c6374572f685b117 Mon Sep 17 00:00:00 2001 From: Chi Lo <54722500+chilo-ms@users.noreply.github.com> Date: Mon, 29 Sep 2025 15:51:06 -0700 Subject: [PATCH 15/15] Update Added missing newline at end of file and included note about mul_1.onnx location. --- c_cxx/plugin_EP/plugin_ep_inference.cc | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/c_cxx/plugin_EP/plugin_ep_inference.cc b/c_cxx/plugin_EP/plugin_ep_inference.cc index 9132becd5..4713b56e4 100644 --- a/c_cxx/plugin_EP/plugin_ep_inference.cc +++ b/c_cxx/plugin_EP/plugin_ep_inference.cc @@ -96,4 +96,8 @@ int RunInference() { int main(int argc, char* argv[]) { return RunInference(); -} \ No newline at end of file +} + +// Note: +// The mul_1.onnx can be found here: +// https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/test/testdata/mul_1.onnx