From c03dcb39c487e46d47722d7fe83144111c525acb Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Mon, 29 Sep 2025 09:40:06 -0700
Subject: [PATCH 01/15] Create new folder for plugin ep python example

---
 python/plugin_EP/README.md | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 python/plugin_EP/README.md

diff --git a/python/plugin_EP/README.md b/python/plugin_EP/README.md
new file mode 100644
index 000000000..e69de29bb

From 5c444bc43cb3707c3bad64eb1570956c69dd1011 Mon Sep 17 00:00:00 2001
From: Chi Lo <54722500+chilo-ms@users.noreply.github.com>
Date: Mon, 29 Sep 2025 10:04:41 -0700
Subject: [PATCH 02/15] Add README for running inference with Plugin EP

Added prerequisites for running inference with a Plugin EP.
---
 python/plugin_EP/README.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/python/plugin_EP/README.md b/python/plugin_EP/README.md
index e69de29bb..31ed166b4 100644
--- a/python/plugin_EP/README.md
+++ b/python/plugin_EP/README.md
@@ -0,0 +1,4 @@
+# Running Inference with a Plugin EP
+## Prerequisites
+- A dynamic/shared EP library that exports the functions `CreateEpFactories()` and `ReleaseEpFactory()`.
+- ONNX Runtime built as a shared library (e.g., `onnxruntime.dll` on Windows or `libonnxruntime.so` on Linux), since the EP library relies on the public ORT C API (which is ABI-stable) to interact with ONNX Runtime. 

From d7d81448302228f0a0fa5277a41c0a731510a34e Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Mon, 29 Sep 2025 10:09:01 -0700
Subject: [PATCH 03/15] Add a python reference

---
 python/plugin_EP/plugin_ep_inference.py | 31 +++++++++++++++++++++++++
 1 file changed, 31 insertions(+)
 create mode 100644 python/plugin_EP/plugin_ep_inference.py

diff --git a/python/plugin_EP/plugin_ep_inference.py b/python/plugin_EP/plugin_ep_inference.py
new file mode 100644
index 000000000..b8dccb5f1
--- /dev/null
+++ b/python/plugin_EP/plugin_ep_inference.py
@@ -0,0 +1,31 @@
+import onnxruntime as onnxrt
+import numpy as np
+                                                                                                                                                                                                                                                                      
+ep_lib_path = "C:\\path\\to\\plugin_trt_ep\\TensorRTEp.dll"
+ep_name = "TensorRTEp"
+ep_registration_name = ep_name
+
+onnxrt.register_execution_provider_library(ep_registration_name, ep_lib_path)
+
+ep_devices = onnxrt.get_ep_devices()
+trt_ep_device = None
+for ep_device in ep_devices:
+    if ep_device.ep_name == ep_name:
+        trt_ep_device = ep_device
+
+assert trt_ep_device != None                                                                                                                                                                                                                                                            
+sess_options = onnxrt.SessionOptions()
+sess_options.add_provider_for_devices([trt_ep_device], {'trt_engine_cache_enable': '1'})
+
+assert sess_options.has_providers() == True
+
+# Run sample model and check output
+sess = onnxrt.InferenceSession("C:\\modles\\mul_1.onnx", sess_options=sess_options)
+
+x = np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], dtype=np.float32)
+input_name = sess.get_inputs()[0].name
+res = sess.run([], {input_name: x})
+output_expected = np.array([[1.0, 4.0], [9.0, 16.0], [25.0, 36.0]], dtype=np.float32)
+np.testing.assert_allclose(output_expected, res[0], rtol=1e-05, atol=1e-08)
+
+onnxrt.unregister_execution_provider_library(ep_registration_name)
\ No newline at end of file

From fe88992eb088ad5b77af7c1baa41fa5d66ea2535 Mon Sep 17 00:00:00 2001
From: Chi Lo <54722500+chilo-ms@users.noreply.github.com>
Date: Mon, 29 Sep 2025 10:31:32 -0700
Subject: [PATCH 04/15] Update TensorRTEp plugin EP inference example

Refactor plugin EP registration and session creation for TensorRTEp.
---
 python/plugin_EP/plugin_ep_inference.py | 27 +++++++++++++++++++------
 1 file changed, 21 insertions(+), 6 deletions(-)

diff --git a/python/plugin_EP/plugin_ep_inference.py b/python/plugin_EP/plugin_ep_inference.py
index b8dccb5f1..4528ad295 100644
--- a/python/plugin_EP/plugin_ep_inference.py
+++ b/python/plugin_EP/plugin_ep_inference.py
@@ -1,31 +1,46 @@
 import onnxruntime as onnxrt
 import numpy as np
-                                                                                                                                                                                                                                                                      
+
+# Path to the plugin EP library
 ep_lib_path = "C:\\path\\to\\plugin_trt_ep\\TensorRTEp.dll"
-ep_name = "TensorRTEp"
-ep_registration_name = ep_name
+# Registration name can be anything the application chooses
+ep_registration_name = "TensorRTEp"
+# EP name should match the name assigned by the EP factory when creating the EP (i.e., in the implementation of OrtEP::CreateEp)
+ep_name = ep_registration_name
 
+# Register plugin EP library with ONNX Runtime
 onnxrt.register_execution_provider_library(ep_registration_name, ep_lib_path)
 
+#
+# Create ORT session with explicit OrtEpDevice(s)
+#
+
+# Find the OrtEpDevice for "TensorRTEp"
 ep_devices = onnxrt.get_ep_devices()
 trt_ep_device = None
 for ep_device in ep_devices:
     if ep_device.ep_name == ep_name:
         trt_ep_device = ep_device
 
-assert trt_ep_device != None                                                                                                                                                                                                                                                            
+assert trt_ep_device != None
+
 sess_options = onnxrt.SessionOptions()
+
+# Equivalent to the C API's SessionOptionsAppendExecutionProvider_V2 that appends "TensorRTEp" to ORT session option
 sess_options.add_provider_for_devices([trt_ep_device], {'trt_engine_cache_enable': '1'})
 
 assert sess_options.has_providers() == True
 
-# Run sample model and check output
+# Create ORT session with "TensorRTEp" plugin EP
 sess = onnxrt.InferenceSession("C:\\modles\\mul_1.onnx", sess_options=sess_options)
 
+# Run sample model and check output
 x = np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], dtype=np.float32)
 input_name = sess.get_inputs()[0].name
 res = sess.run([], {input_name: x})
 output_expected = np.array([[1.0, 4.0], [9.0, 16.0], [25.0, 36.0]], dtype=np.float32)
 np.testing.assert_allclose(output_expected, res[0], rtol=1e-05, atol=1e-08)
 
-onnxrt.unregister_execution_provider_library(ep_registration_name)
\ No newline at end of file
+# Unregister the library using the application-specified registration name.
+# Must only unregister a library after all sessions that use the library have been released.
+onnxrt.unregister_execution_provider_library(ep_registration_name)

From a98aa8bb9b2143760d713030accf7a321e917443 Mon Sep 17 00:00:00 2001
From: Chi Lo <54722500+chilo-ms@users.noreply.github.com>
Date: Mon, 29 Sep 2025 11:15:41 -0700
Subject: [PATCH 05/15] Update README with inference instructions for Plugin EP

Added instructions for running inference with explicit and automatic EP selection.
---
 python/plugin_EP/README.md | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/python/plugin_EP/README.md b/python/plugin_EP/README.md
index 31ed166b4..362e29e89 100644
--- a/python/plugin_EP/README.md
+++ b/python/plugin_EP/README.md
@@ -2,3 +2,27 @@
 ## Prerequisites
 - A dynamic/shared EP library that exports the functions `CreateEpFactories()` and `ReleaseEpFactory()`.
 - ONNX Runtime built as a shared library (e.g., `onnxruntime.dll` on Windows or `libonnxruntime.so` on Linux), since the EP library relies on the public ORT C API (which is ABI-stable) to interact with ONNX Runtime. 
+
+## Run Inference with explicit OrtEpDevice(s)
+
+Please see `plugin_ep_inference.py` for details
+1. Register plugin EP library with ONNX Runtime via `onnxruntime.register_execution_provider_library()`
+2. Find the OrtEpDevice for that ep name via `onnxruntime.get_ep_devices()`
+3. Append the ep to ORT session option via `sess_options.add_provider_for_devices`
+4. Create ORT session with the ep
+5. Run ORT session
+6. Unregister plugin EP library via `onnxruntime.unregister_execution_provider_library()`
+
+
+ ## Run Inference with automatic EP selection
+ The workflow is the same as above except #2 and #3 step and should be replaced with `sess_options.set_provider_selection_policy(policy)`,
+ "policy" could be:
+ - `onnxruntime.OrtExecutionProviderDevicePolicy_DEFAULT`
+ - `onnxruntime.OrtExecutionProviderDevicePolicy_PREFER_CPU`
+ - `onnxruntime.OrtExecutionProviderDevicePolicy_PREFER_NPU`
+ - `onnxruntime.OrtExecutionProviderDevicePolicy_PREFER_GPU`
+ - `onnxruntime.OrtExecutionProviderDevicePolicy_MAX_PERFORMANCE`
+ - `onnxruntime.OrtExecutionProviderDevicePolicy_MAX_EFFICIENCY`
+ - `onnxruntime.OrtExecutionProviderDevicePolicy_MIN_OVERALL_POWER`
+
+ 

From 6f27473d599576cebab3b43df44a0cdd8ad100c6 Mon Sep 17 00:00:00 2001
From: Chi Lo <54722500+chilo-ms@users.noreply.github.com>
Date: Mon, 29 Sep 2025 11:19:21 -0700
Subject: [PATCH 06/15] Add note for mul_1.onnx file location

Added a note about the location of the mul_1.onnx file.
---
 python/plugin_EP/plugin_ep_inference.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/python/plugin_EP/plugin_ep_inference.py b/python/plugin_EP/plugin_ep_inference.py
index 4528ad295..92ae0f7d5 100644
--- a/python/plugin_EP/plugin_ep_inference.py
+++ b/python/plugin_EP/plugin_ep_inference.py
@@ -44,3 +44,8 @@
 # Unregister the library using the application-specified registration name.
 # Must only unregister a library after all sessions that use the library have been released.
 onnxrt.unregister_execution_provider_library(ep_registration_name)
+
+
+# Note:
+# The mul_1.onnx can be found here:
+# https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/test/testdata/mul_1.onnx

From d8861c2c1eaa9bfd714cb45760598740e8993ae8 Mon Sep 17 00:00:00 2001
From: Chi Lo <54722500+chilo-ms@users.noreply.github.com>
Date: Mon, 29 Sep 2025 11:42:27 -0700
Subject: [PATCH 07/15] Enhance README with code examples for Plugin EP usage

Updated the README to include code examples for running inference with a Plugin EP and clarified the steps for explicit and automatic EP selection.
---
 python/plugin_EP/README.md | 48 ++++++++++++++++++++++++++++++--------
 1 file changed, 38 insertions(+), 10 deletions(-)

diff --git a/python/plugin_EP/README.md b/python/plugin_EP/README.md
index 362e29e89..9bae00094 100644
--- a/python/plugin_EP/README.md
+++ b/python/plugin_EP/README.md
@@ -5,18 +5,43 @@
 
 ## Run Inference with explicit OrtEpDevice(s)
 
-Please see `plugin_ep_inference.py` for details
-1. Register plugin EP library with ONNX Runtime via `onnxruntime.register_execution_provider_library()`
-2. Find the OrtEpDevice for that ep name via `onnxruntime.get_ep_devices()`
-3. Append the ep to ORT session option via `sess_options.add_provider_for_devices`
-4. Create ORT session with the ep
-5. Run ORT session
-6. Unregister plugin EP library via `onnxruntime.unregister_execution_provider_library()`
+Please see `plugin_ep_inference.py` for a full example.
+1. Register plugin EP library with ONNX Runtime
+   ````python
+   onnxruntime.register_execution_provider_library("plugin_ep.so")
+   ````
+2. Find the OrtEpDevice for that EP
+   ````Python
+   ep_device = onnxruntime.get_ep_devices()
+   for ep_device in ep_devices:
+       if ep_device.ep_name == ep_name:
+           target_ep_device = ep_device
+    ````
+3. Append the EP to ORT session option
+    ````Python
+    sess_options.add_provider_for_devices([target_ep_device], {})
+    ````
+5. Create ORT session with the EP
+    ```Python
+    sess = onnxrt.InferenceSession("/path/to/model", sess_options=sess_options)
+    ````
+6. Run ORT session
+   ````Python
+   res = sess.run([], {input_name: x})
+   ````
+7. Unregister plugin EP library
+    ```Python
+   onnxruntime.unregister_execution_provider_library(ep_registration_name)
+   ````
 
 
  ## Run Inference with automatic EP selection
- The workflow is the same as above except #2 and #3 step and should be replaced with `sess_options.set_provider_selection_policy(policy)`,
- "policy" could be:
+ The workflow is the same as above except for step 2 and 3.
+ Instead, set the selection policy directly 
+ ````Python
+ sess_options.set_provider_selection_policy(policy)
+ ````
+ Available "policy":
  - `onnxruntime.OrtExecutionProviderDevicePolicy_DEFAULT`
  - `onnxruntime.OrtExecutionProviderDevicePolicy_PREFER_CPU`
  - `onnxruntime.OrtExecutionProviderDevicePolicy_PREFER_NPU`
@@ -25,4 +50,7 @@ Please see `plugin_ep_inference.py` for details
  - `onnxruntime.OrtExecutionProviderDevicePolicy_MAX_EFFICIENCY`
  - `onnxruntime.OrtExecutionProviderDevicePolicy_MIN_OVERALL_POWER`
 
- 
+ ## Note
+ For additional APIs and details on plugin EP usage, see the official documentation:
+ https://onnxruntime.ai/docs/execution-providers/plugin-ep-libraries.html#using-a-plugin-ep-library
+

From 5274e64cd4f12663aaa1951bddb1be02611e6723 Mon Sep 17 00:00:00 2001
From: Chi Lo <54722500+chilo-ms@users.noreply.github.com>
Date: Mon, 29 Sep 2025 12:00:23 -0700
Subject: [PATCH 08/15] Update README for Python API and prerequisites

---
 python/plugin_EP/README.md | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/python/plugin_EP/README.md b/python/plugin_EP/README.md
index 9bae00094..d5ac2a3ce 100644
--- a/python/plugin_EP/README.md
+++ b/python/plugin_EP/README.md
@@ -1,7 +1,8 @@
-# Running Inference with a Plugin EP
+# Running Inference with a Plugin EP using Python API
 ## Prerequisites
+- ONNX Runtime version >= 1.23.0
 - A dynamic/shared EP library that exports the functions `CreateEpFactories()` and `ReleaseEpFactory()`.
-- ONNX Runtime built as a shared library (e.g., `onnxruntime.dll` on Windows or `libonnxruntime.so` on Linux), since the EP library relies on the public ORT C API (which is ABI-stable) to interact with ONNX Runtime. 
+- ORT GPU python wheel installed.
 
 ## Run Inference with explicit OrtEpDevice(s)
 
@@ -54,3 +55,4 @@ Please see `plugin_ep_inference.py` for a full example.
  For additional APIs and details on plugin EP usage, see the official documentation:
  https://onnxruntime.ai/docs/execution-providers/plugin-ep-libraries.html#using-a-plugin-ep-library
 
+

From 67fec78ab0e2ebd8f0718f9b90b64397f4cd5095 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Mon, 29 Sep 2025 14:53:51 -0700
Subject: [PATCH 09/15] Add plugin ep c++ example

---
 c_cxx/plugin_EP/CMakeLists.txt | 30 +++++++++++
 c_cxx/plugin_EP/app.cc         | 99 ++++++++++++++++++++++++++++++++++
 2 files changed, 129 insertions(+)
 create mode 100644 c_cxx/plugin_EP/CMakeLists.txt
 create mode 100644 c_cxx/plugin_EP/app.cc

diff --git a/c_cxx/plugin_EP/CMakeLists.txt b/c_cxx/plugin_EP/CMakeLists.txt
new file mode 100644
index 000000000..ce4780f4d
--- /dev/null
+++ b/c_cxx/plugin_EP/CMakeLists.txt
@@ -0,0 +1,30 @@
+# usage:
+# cd build/
+# cmake -S ../ -B ./ -DCMAKE_BUILD_TYPE=Debug -DORT_HOME=/path/to/ort_package/onnxruntime-win-x64-gpu-1.23.0
+cmake_minimum_required(VERSION 3.26)
+project(plugin_ep_app VERSION 1.0)
+set(CMAKE_CXX_STANDARD 17)
+
+file(GLOB app_src "./*.cc")
+add_executable(app ${app_src})
+
+# Add dependencies
+include(FetchContent)
+
+# Add GSL
+FetchContent_Declare(
+  gsl
+  GIT_REPOSITORY https://github.com/microsoft/GSL.git
+  GIT_TAG        v4.0.0  # Use a specific tag or commit
+)
+
+FetchContent_MakeAvailable(gsl)
+
+
+set(DEPS_PATH "${CMAKE_BINARY_DIR}/_deps")
+
+target_include_directories(app PUBLIC "${ORT_HOME}/include"
+                                      "${DEPS_PATH}/gsl-src/include" # GSL is header-only
+)
+
+target_link_libraries(app PUBLIC "onnxruntime.dll")
diff --git a/c_cxx/plugin_EP/app.cc b/c_cxx/plugin_EP/app.cc
new file mode 100644
index 000000000..9132becd5
--- /dev/null
+++ b/c_cxx/plugin_EP/app.cc
@@ -0,0 +1,99 @@
+#include "onnxruntime_cxx_api.h"
+#include <iostream>
+#include <vector>
+#include <gsl/gsl>
+
+int RunInference() { 
+  const OrtApi* ort_api = OrtGetApiBase()->GetApi(ORT_API_VERSION);
+  Ort::Env env;
+
+  // Registration name can be anything the application chooses
+  const char* lib_registration_name = "TensorRTEp";
+
+  // Register plugin EP library with ONNX Runtime.
+  env.RegisterExecutionProviderLibrary(
+      lib_registration_name,      // Registration name can be anything the application chooses.
+      ORT_TSTR("TensorRTEp.dll")  // Path to the plugin EP library.
+  );
+
+  // Unregister the library using the application-specified registration name.
+  // Must only unregister a library after all sessions that use the library have been released.
+  auto unregister_plugin_eps_at_scope_exit = gsl::finally([&]() { 
+    env.UnregisterExecutionProviderLibrary(lib_registration_name);
+  });
+
+  {
+    std::vector<Ort::ConstEpDevice> ep_devices = env.GetEpDevices();
+    // EP name should match the name assigned by the EP factory when creating the EP (i.e., in the implementation of OrtEP::CreateEp())
+    std::string ep_name = lib_registration_name;
+
+    // Find the Ort::EpDevice for "TensorRTEp".
+    std::vector<Ort::ConstEpDevice> selected_ep_devices = {};
+    for (Ort::ConstEpDevice ep_device : ep_devices) {
+      if (std::string(ep_device.EpName()) == ep_name) {
+        selected_ep_devices.push_back(ep_device);
+        break;
+      }
+    }
+
+    if (selected_ep_devices[0] == nullptr) {
+      // Did not find EP. Report application error ...
+      std::cerr << "Did not find EP: " << ep_name << std::endl;
+      return -1;
+    }
+
+    std::unordered_map<std::string, std::string> ep_options;  // Optional EP options.
+    Ort::SessionOptions session_options;
+    session_options.AppendExecutionProvider_V2(env, selected_ep_devices, ep_options);
+
+    Ort::Session session(env, ORT_TSTR("mul_1.onnx"), session_options);
+
+    // Get default ORT allocator
+    Ort::AllocatorWithDefaultOptions allocator;
+
+    // Get input name
+    Ort::AllocatedStringPtr input_name_ptr = session.GetInputNameAllocated(0, allocator); // Keep the smart pointer alive to avoid dangling pointer
+    const char* input_name = input_name_ptr.get();
+
+    // Input data
+    std::vector<float> input_values = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
+
+    // Input shape: (3, 2)
+    std::vector<int64_t> input_shape{3, 2};
+
+    // Create tensor
+    Ort::MemoryInfo memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
+
+    Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info, input_values.data(), input_values.size(),
+                                                              input_shape.data(), input_shape.size());
+
+    // Get output name
+    Ort::AllocatedStringPtr output_name_ptr =
+        session.GetOutputNameAllocated(0, allocator);  // Keep the smart pointer alive to avoid dangling pointer
+    const char* output_name = output_name_ptr.get();
+
+    // Run session
+    std::vector<const char*> input_names{input_name};
+    std::vector<const char*> output_names{output_name};
+
+    auto output_tensors =
+        session.Run(Ort::RunOptions{nullptr}, input_names.data(), &input_tensor, 1, output_names.data(), 1);
+
+    // Extract output
+    float* output_data = output_tensors.front().GetTensorMutableData<float>();
+
+    std::cout << "Output:" << std::endl;
+    for (int i = 0; i < 6; i++) {
+      std::cout << output_data[i] << " ";
+    }
+    std::cout << std::endl;
+
+    // Expected output: [[1,4],[9,16],[25,36]]
+  }
+
+  return 0;
+}
+
+int main(int argc, char* argv[]) {
+  return RunInference();
+}
\ No newline at end of file

From 184dc35967b7c8e63770ca11e2dbe3df984752bf Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Mon, 29 Sep 2025 14:56:27 -0700
Subject: [PATCH 10/15] Add readme

---
 c_cxx/plugin_EP/README.md | 58 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 58 insertions(+)
 create mode 100644 c_cxx/plugin_EP/README.md

diff --git a/c_cxx/plugin_EP/README.md b/c_cxx/plugin_EP/README.md
new file mode 100644
index 000000000..8535c6874
--- /dev/null
+++ b/c_cxx/plugin_EP/README.md
@@ -0,0 +1,58 @@
+# Running Inference with a Plugin EP using Python API
+## Prerequisites
+- ONNX Runtime version >= 1.23.0
+- A dynamic/shared EP library that exports the functions `CreateEpFactories()` and `ReleaseEpFactory()`.
+- ORT GPU python wheel installed.
+
+## Run Inference with explicit OrtEpDevice(s)
+
+Please see `plugin_ep_inference.py` for a full example.
+1. Register plugin EP library with ONNX Runtime
+   ````python
+   onnxruntime.register_execution_provider_library("plugin_ep.so")
+   ````
+2. Find the OrtEpDevice for that EP
+   ````Python
+   ep_device = onnxruntime.get_ep_devices()
+   for ep_device in ep_devices:
+       if ep_device.ep_name == ep_name:
+           target_ep_device = ep_device
+    ````
+3. Append the EP to ORT session option
+    ````Python
+    sess_options.add_provider_for_devices([target_ep_device], {})
+    ````
+5. Create ORT session with the EP
+    ```Python
+    sess = onnxrt.InferenceSession("/path/to/model", sess_options=sess_options)
+    ````
+6. Run ORT session
+   ````Python
+   res = sess.run([], {input_name: x})
+   ````
+7. Unregister plugin EP library
+    ```Python
+   onnxruntime.unregister_execution_provider_library(ep_registration_name)
+   ````
+
+
+ ## Run Inference with automatic EP selection
+ The workflow is the same as above except for step 2 and 3.
+ Instead, set the selection policy directly 
+ ````Python
+ sess_options.set_provider_selection_policy(policy)
+ ````
+ Available "policy":
+ - `onnxruntime.OrtExecutionProviderDevicePolicy_DEFAULT`
+ - `onnxruntime.OrtExecutionProviderDevicePolicy_PREFER_CPU`
+ - `onnxruntime.OrtExecutionProviderDevicePolicy_PREFER_NPU`
+ - `onnxruntime.OrtExecutionProviderDevicePolicy_PREFER_GPU`
+ - `onnxruntime.OrtExecutionProviderDevicePolicy_MAX_PERFORMANCE`
+ - `onnxruntime.OrtExecutionProviderDevicePolicy_MAX_EFFICIENCY`
+ - `onnxruntime.OrtExecutionProviderDevicePolicy_MIN_OVERALL_POWER`
+
+ ## Note
+ For additional APIs and details on plugin EP usage, see the official documentation:
+ https://onnxruntime.ai/docs/execution-providers/plugin-ep-libraries.html#using-a-plugin-ep-library
+
+

From f488dd846fea1bc982ee91e699c254bc67cb90b0 Mon Sep 17 00:00:00 2001
From: Chi Lo <54722500+chilo-ms@users.noreply.github.com>
Date: Mon, 29 Sep 2025 15:17:42 -0700
Subject: [PATCH 11/15] Change API reference from Python to C++ in README

Updated README to reflect C++ API usage instead of Python API and added additional prerequisites.
---
 c_cxx/plugin_EP/README.md | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/c_cxx/plugin_EP/README.md b/c_cxx/plugin_EP/README.md
index 8535c6874..d432c53ea 100644
--- a/c_cxx/plugin_EP/README.md
+++ b/c_cxx/plugin_EP/README.md
@@ -1,8 +1,10 @@
-# Running Inference with a Plugin EP using Python API
+# Running Inference with a Plugin EP using C++ API
 ## Prerequisites
 - ONNX Runtime version >= 1.23.0
 - A dynamic/shared EP library that exports the functions `CreateEpFactories()` and `ReleaseEpFactory()`.
-- ORT GPU python wheel installed.
+- ONNX Runtime built as a shared library (e.g., `onnxruntime.dll` on Windows or `libonnxruntime.so` on Linux), since the EP library relies on the public ORT C API (which is ABI-stable) to interact with ONNX Runtime.
+- The `onnxruntime_providers_shared.dll` (Windows) or `libonnxruntime_providers_shared.so` (Linux) is also required. When a plugin EP is registered, ONNX Runtime internally calls `LoadPluginOrProviderBridge`, which depends on this shared library to determine whether the EP DLL is a plugin or a provider-bridge.
+- If you are using a pre-built ONNX Runtime package, all required libraries (e.g., `onnxruntime.dll`, `onnxruntime_providers_shared.dll`, etc.) are already included.
 
 ## Run Inference with explicit OrtEpDevice(s)
 

From b0ab382d637036cf1eccfd069a4d693231492165 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Mon, 29 Sep 2025 15:20:38 -0700
Subject: [PATCH 12/15] rename

---
 c_cxx/plugin_EP/{app.cc => plugin_ep_inference.cc} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename c_cxx/plugin_EP/{app.cc => plugin_ep_inference.cc} (100%)

diff --git a/c_cxx/plugin_EP/app.cc b/c_cxx/plugin_EP/plugin_ep_inference.cc
similarity index 100%
rename from c_cxx/plugin_EP/app.cc
rename to c_cxx/plugin_EP/plugin_ep_inference.cc

From 23b99a13daadea283b5d80359bce4101424c4c18 Mon Sep 17 00:00:00 2001
From: Chi Lo <54722500+chilo-ms@users.noreply.github.com>
Date: Mon, 29 Sep 2025 15:36:44 -0700
Subject: [PATCH 13/15] Update README for plugin EP with C++ examples

---
 c_cxx/plugin_EP/README.md | 59 ++++++++++++++++++++++-----------------
 1 file changed, 34 insertions(+), 25 deletions(-)

diff --git a/c_cxx/plugin_EP/README.md b/c_cxx/plugin_EP/README.md
index d432c53ea..cbdf76652 100644
--- a/c_cxx/plugin_EP/README.md
+++ b/c_cxx/plugin_EP/README.md
@@ -8,33 +8,42 @@
 
 ## Run Inference with explicit OrtEpDevice(s)
 
-Please see `plugin_ep_inference.py` for a full example.
+Please see `plugin_ep_inference.cc` for a full example.
 1. Register plugin EP library with ONNX Runtime
-   ````python
-   onnxruntime.register_execution_provider_library("plugin_ep.so")
+   ````c++
+   env.RegisterExecutionProviderLibrary(
+       "plugin_ep",              // Registration name can be anything the application chooses.
+       ORT_TSTR("plugin_ep.so")  // Path to the plugin EP library.
+   );
    ````
-2. Find the OrtEpDevice for that EP
-   ````Python
-   ep_device = onnxruntime.get_ep_devices()
-   for ep_device in ep_devices:
-       if ep_device.ep_name == ep_name:
-           target_ep_device = ep_device
+2. Find the OrtEpDevice for that plugin EP
+   ````c++
+   // Find the Ort::EpDevice for ep_name
+    std::vector<Ort::ConstEpDevice> selected_ep_devices = {};
+    for (Ort::ConstEpDevice ep_device : ep_devices) {
+      if (std::string(ep_device.EpName()) == ep_name) {
+        selected_ep_devices.push_back(ep_device);
+        break;
+      }
+    }
     ````
 3. Append the EP to ORT session option
-    ````Python
-    sess_options.add_provider_for_devices([target_ep_device], {})
+    ````c++
+    Ort::SessionOptions session_options;
+    session_options.AppendExecutionProvider_V2(env, selected_ep_devices, ep_options);
     ````
 5. Create ORT session with the EP
-    ```Python
-    sess = onnxrt.InferenceSession("/path/to/model", sess_options=sess_options)
+    ````c++
+    Ort::Session session(env, ORT_TSTR("path\to\model"), session_options);
     ````
 6. Run ORT session
-   ````Python
-   res = sess.run([], {input_name: x})
+   ````c++
+    auto output_tensors =
+        session.Run(Ort::RunOptions{nullptr}, input_names.data(), &input_tensor, 1, output_names.data(), 1);
    ````
 7. Unregister plugin EP library
-    ```Python
-   onnxruntime.unregister_execution_provider_library(ep_registration_name)
+   ````c++
+   env.UnregisterExecutionProviderLibrary(lib_registration_name);
    ````
 
 
@@ -42,16 +51,16 @@ Please see `plugin_ep_inference.py` for a full example.
  The workflow is the same as above except for step 2 and 3.
  Instead, set the selection policy directly 
  ````Python
- sess_options.set_provider_selection_policy(policy)
+ session_options.SetEpSelectionPolicy(OrtExecutionProviderDevicePolicy_PREFER_GPU);
  ````
  Available "policy":
- - `onnxruntime.OrtExecutionProviderDevicePolicy_DEFAULT`
- - `onnxruntime.OrtExecutionProviderDevicePolicy_PREFER_CPU`
- - `onnxruntime.OrtExecutionProviderDevicePolicy_PREFER_NPU`
- - `onnxruntime.OrtExecutionProviderDevicePolicy_PREFER_GPU`
- - `onnxruntime.OrtExecutionProviderDevicePolicy_MAX_PERFORMANCE`
- - `onnxruntime.OrtExecutionProviderDevicePolicy_MAX_EFFICIENCY`
- - `onnxruntime.OrtExecutionProviderDevicePolicy_MIN_OVERALL_POWER`
+ - `OrtExecutionProviderDevicePolicy_DEFAULT`
+ - `OrtExecutionProviderDevicePolicy_PREFER_CPU`
+ - `OrtExecutionProviderDevicePolicy_PREFER_NPU`
+ - `OrtExecutionProviderDevicePolicy_PREFER_GPU`
+ - `OrtExecutionProviderDevicePolicy_MAX_PERFORMANCE`
+ - `OrtExecutionProviderDevicePolicy_MAX_EFFICIENCY`
+ - `OrtExecutionProviderDevicePolicy_MIN_OVERALL_POWER`
 
  ## Note
  For additional APIs and details on plugin EP usage, see the official documentation:

From 5ca428446a73d1710c8357b8cc5613293b9bafe1 Mon Sep 17 00:00:00 2001
From: Chi Lo <54722500+chilo-ms@users.noreply.github.com>
Date: Mon, 29 Sep 2025 15:40:03 -0700
Subject: [PATCH 14/15] Add usage instructions for CMake build

---
 c_cxx/plugin_EP/CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/c_cxx/plugin_EP/CMakeLists.txt b/c_cxx/plugin_EP/CMakeLists.txt
index ce4780f4d..b6f0c657c 100644
--- a/c_cxx/plugin_EP/CMakeLists.txt
+++ b/c_cxx/plugin_EP/CMakeLists.txt
@@ -1,6 +1,7 @@
 # usage:
 # cd build/
 # cmake -S ../ -B ./ -DCMAKE_BUILD_TYPE=Debug -DORT_HOME=/path/to/ort_package/onnxruntime-win-x64-gpu-1.23.0
+# cmake --build ./ --config Debug
 cmake_minimum_required(VERSION 3.26)
 project(plugin_ep_app VERSION 1.0)
 set(CMAKE_CXX_STANDARD 17)

From 54fe49d16a070c7d83cc9726c6374572f685b117 Mon Sep 17 00:00:00 2001
From: Chi Lo <54722500+chilo-ms@users.noreply.github.com>
Date: Mon, 29 Sep 2025 15:51:06 -0700
Subject: [PATCH 15/15] Update

Added missing newline at end of file and included note about mul_1.onnx location.
---
 c_cxx/plugin_EP/plugin_ep_inference.cc | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/c_cxx/plugin_EP/plugin_ep_inference.cc b/c_cxx/plugin_EP/plugin_ep_inference.cc
index 9132becd5..4713b56e4 100644
--- a/c_cxx/plugin_EP/plugin_ep_inference.cc
+++ b/c_cxx/plugin_EP/plugin_ep_inference.cc
@@ -96,4 +96,8 @@ int RunInference() {
 
 int main(int argc, char* argv[]) {
   return RunInference();
-}
\ No newline at end of file
+}
+
+// Note:
+// The mul_1.onnx can be found here:
+// https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/test/testdata/mul_1.onnx