From 9838c5eb39ddafaba34d95708d31b29b8e4a071f Mon Sep 17 00:00:00 2001
From: Jaswanth51 <jaswanth.gannamaneni@intel.com>
Date: Wed, 7 Jan 2026 22:14:33 -0800
Subject: [PATCH 1/4] update OpenVino Deprecation notice to 2025.3/ORT 1.23 and
 fix load_config parsing value error in documentation examples

---
 .../OpenVINO-ExecutionProvider.md             | 40 ++++++-------------
 1 file changed, 13 insertions(+), 27 deletions(-)

diff --git a/docs/execution-providers/OpenVINO-ExecutionProvider.md b/docs/execution-providers/OpenVINO-ExecutionProvider.md
index 04b37aa2c516d..3c79bb77f6581 100644
--- a/docs/execution-providers/OpenVINO-ExecutionProvider.md
+++ b/docs/execution-providers/OpenVINO-ExecutionProvider.md
@@ -147,7 +147,7 @@ Runs the same model on multiple devices in parallel to improve device utilizatio
 ---
 
 ### `precision`
-**DEPRECATED:** This option is deprecated and can be set via `load_config` using the `INFERENCE_PRECISION_HINT` property.
+**DEPRECATED:** This option is deprecated since OpenVINO 2025.3/ORT 1.23 and can be set via `load_config` using the `INFERENCE_PRECISION_HINT` property.
 - Controls numerical precision during inference, balancing **performance** and **accuracy**.
 
 **Precision Support on Devices:**
@@ -167,7 +167,7 @@ Runs the same model on multiple devices in parallel to improve device utilizatio
 ---
 ### `num_of_threads` & `num_streams`
 
-**DEPRECATED:** These options are deprecated and can be set via `load_config` using the `INFERENCE_NUM_THREADS` and `NUM_STREAMS` properties respectively.
+**DEPRECATED:** These options are deprecated since OpenVINO 2025.3/ORT 1.23 and can be set via `load_config` using the `INFERENCE_NUM_THREADS` and `NUM_STREAMS` properties respectively.
 
 **Multi-Threading**
 
@@ -185,7 +185,7 @@ Manages parallel inference streams for throughput optimization (default: `1` for
 
 ### `cache_dir`
 
-**DEPRECATED:** This option is deprecated and can be set via `load_config` using the `CACHE_DIR` property.
+**DEPRECATED:** This option is deprecated since OpenVINO 2025.3/ORT 1.23 and can be set via `load_config` using the `CACHE_DIR` property.
 
 Enables model caching to significantly reduce subsequent load times. Supports CPU, NPU, and GPU devices with kernel caching on iGPU/dGPU.
 
@@ -327,7 +327,7 @@ Property keys used in `load_config` JSON must match the string literal defined i
 
 ### `enable_qdq_optimizer`
 
-**DEPRECATED:** This option is deprecated and can be set via `load_config` using the `NPU_QDQ_OPTIMIZATION` property.
+**DEPRECATED:** This option is deprecated since OpenVINO 2025.3/ORT 1.23 and can be set via `load_config` using the `NPU_QDQ_OPTIMIZATION` property.
 
 NPU-specific optimization for Quantize-Dequantize (QDQ) operations in the inference graph. This optimizer enhances ORT quantized models by:
 
@@ -362,7 +362,7 @@ This configuration is required for optimal NPU memory allocation and management.
 
 ### `model_priority`
 
-**DEPRECATED:** This option is deprecated and can be set via `load_config` using the `MODEL_PRIORITY` property.
+**DEPRECATED:** This option is deprecated since OpenVINO 2025.3/ORT 1.23 and can be set via `load_config` using the `MODEL_PRIORITY` property.
 
 Configures resource allocation priority for multi-model deployment scenarios.
 
@@ -404,15 +404,14 @@ Configures resource allocation priority for multi-model deployment scenarios.
 ---
 
 ## Examples
-
 ### Python
-
-#### Using load_config with JSON file
+#### Using load_config with JSON string
 ```python
 import onnxruntime as ort
 import json
+import openvino
 
-# Create config file
+# Create config
 config = {
     "AUTO": {
         "PERFORMANCE_HINT": "THROUGHPUT",
@@ -420,20 +419,16 @@ config = {
         "DEVICE_PROPERTIES": "{CPU:{INFERENCE_PRECISION_HINT:f32,NUM_STREAMS:3},GPU:{INFERENCE_PRECISION_HINT:f32,NUM_STREAMS:5}}"
     }
 }
-
-with open("ov_config.json", "w") as f:
-    json.dump(config, f)
-
 # Use config with session
-options = {"device_type": "AUTO", "load_config": "ov_config.json"}
+options = {"device_type": "AUTO", "load_config": json.dumps(config)}
 session = ort.InferenceSession("model.onnx", 
                                 providers=[("OpenVINOExecutionProvider", options)])
 ```
-
 #### Using load_config for CPU
 ```python
 import onnxruntime as ort
 import json
+import openvino
 
 # Create CPU config
 config = {
@@ -443,19 +438,15 @@ config = {
         "INFERENCE_NUM_THREADS": "8"
     }
 }
-
-with open("cpu_config.json", "w") as f:
-    json.dump(config, f)
-
-options = {"device_type": "CPU", "load_config": "cpu_config.json"}
+options = {"device_type": "CPU", "load_config": json.dumps(config)}
 session = ort.InferenceSession("model.onnx", 
                                 providers=[("OpenVINOExecutionProvider", options)])
 ```
-
 #### Using load_config for GPU
 ```python
 import onnxruntime as ort
 import json
+import openvino
 
 # Create GPU config with caching
 config = {
@@ -465,16 +456,11 @@ config = {
         "PERFORMANCE_HINT": "LATENCY"
     }
 }
-
-with open("gpu_config.json", "w") as f:
-    json.dump(config, f)
-
-options = {"device_type": "GPU", "load_config": "gpu_config.json"}
+options = {"device_type": "GPU", "load_config": json.dumps(config)}
 session = ort.InferenceSession("model.onnx", 
                                 providers=[("OpenVINOExecutionProvider", options)])
 ```
 
-
 --- 
 ### Python API
 Key-Value pairs for config options can be set using InferenceSession API as follow:-

From 86742f7880c3dd541345a35487bf23e1b41d38a3 Mon Sep 17 00:00:00 2001
From: Jaswanth51 <jaswanth.gannamaneni@intel.com>
Date: Tue, 13 Jan 2026 20:58:35 -0800
Subject: [PATCH 2/4] update documentation

---
 .../OpenVINO-ExecutionProvider.md             | 38 +++++++++++++++++--
 1 file changed, 35 insertions(+), 3 deletions(-)

diff --git a/docs/execution-providers/OpenVINO-ExecutionProvider.md b/docs/execution-providers/OpenVINO-ExecutionProvider.md
index 3c79bb77f6581..d7cb0c22ac192 100644
--- a/docs/execution-providers/OpenVINO-ExecutionProvider.md
+++ b/docs/execution-providers/OpenVINO-ExecutionProvider.md
@@ -30,9 +30,9 @@ ONNX Runtime OpenVINO™ Execution Provider is compatible with three latest rele
 
 |ONNX Runtime|OpenVINO™|Notes|
 |---|---|---| 
+|1.24.0|2025.4.1|[Details](https://github.com/intel/onnxruntime/releases/tag/v5.9)|
 |1.23.0|2025.3|[Details](https://github.com/intel/onnxruntime/releases/tag/v5.8)|
 |1.22.0|2025.1|[Details](https://github.com/intel/onnxruntime/releases/tag/v5.7)|
-|1.21.0|2025.0|[Details](https://github.com/intel/onnxruntime/releases/tag/v5.6)|
 
 ## Build
 
@@ -185,9 +185,10 @@ Manages parallel inference streams for throughput optimization (default: `1` for
 
 ### `cache_dir`
 
-**DEPRECATED:** This option is deprecated since OpenVINO 2025.3/ORT 1.23 and can be set via `load_config` using the `CACHE_DIR` property.
+**DEPRECATED:** This option is deprecated since OpenVINO 2025.3/ORT 1.23 and can be set via `load_config` using the `CACHE_DIR` property. `cache_dir` is configured **per-session** rather than globally.
 
-Enables model caching to significantly reduce subsequent load times. Supports CPU, NPU, and GPU devices with kernel caching on iGPU/dGPU.
+
+Enables model caching to significantly reduce subsequent load times. Supports CPU, NPU, and GPU devices with kernel caching on iGPU/dGPU.  
 
 **Benefits**
 - Saves compiled models and `cl_cache` files for dynamic shapes
@@ -210,6 +211,8 @@ Enables model caching to significantly reduce subsequent load times. Supports CP
 - Better compatibility with future OpenVINO releases
 - No property name translation required
 
+
+
 #### JSON Configuration Format
 ```json
 {
@@ -219,6 +222,34 @@ Enables model caching to significantly reduce subsequent load times. Supports CP
 }
 ```
 
+`load_config` now supports nested JSON objects up to **8 levels deep** for complex device configurations.
+
+**Maximum Nesting:** 8 levels deep.
+
+**Example: Multi-Level Nested Configuration**
+```python
+import onnxruntime as ort
+import json
+
+# Complex nested configuration for AUTO device
+config = {
+    "AUTO": {
+        "PERFORMANCE_HINT": "THROUGHPUT",
+        "DEVICE_PROPERTIES": {
+            "CPU": {
+                "INFERENCE_PRECISION_HINT": "f32",
+                "NUM_STREAMS": "3",
+                "INFERENCE_NUM_THREADS": "8"
+            },
+            "GPU": {
+                "INFERENCE_PRECISION_HINT": "f16",
+                "NUM_STREAMS": "5"
+            }
+        }
+    }
+}
+```
+
 **Supported Device Names:**
 - `"CPU"` - Intel CPU
 - `"GPU"` - Intel integrated/discrete GPU
@@ -401,6 +432,7 @@ Configures resource allocation priority for multi-model deployment scenarios.
 
 `input_image[NCHW],output_tensor[NC]`
 
+
 ---
 
 ## Examples

From be08cb3863cedc8769b2dda423b57353b800a455 Mon Sep 17 00:00:00 2001
From: Jaswanth51 <jaswanth.gannamaneni@intel.com>
Date: Tue, 10 Feb 2026 21:43:46 +0530
Subject: [PATCH 3/4] Address review: fix cache_dir benefits, load_config
 description, examples

---
 .../OpenVINO-ExecutionProvider.md             | 39 ++++++++-----------
 1 file changed, 17 insertions(+), 22 deletions(-)

diff --git a/docs/execution-providers/OpenVINO-ExecutionProvider.md b/docs/execution-providers/OpenVINO-ExecutionProvider.md
index d7cb0c22ac192..1cee20539dc9f 100644
--- a/docs/execution-providers/OpenVINO-ExecutionProvider.md
+++ b/docs/execution-providers/OpenVINO-ExecutionProvider.md
@@ -79,7 +79,7 @@ Runtime parameters set during OpenVINO Execution Provider initialization to cont
 | [**num_of_threads**](#num_of_threads--num_streams) | string | Any positive integer > 0 | size_t | Control number of inference threads |
 | [**num_streams**](#num_of_threads--num_streams) | string | Any positive integer > 0 | size_t | Set parallel execution streams for throughput |
 | [**cache_dir**](#cache_dir) | string | Valid filesystem path | string | Enable openvino model caching for improved latency  |
-| [**load_config**](#load_config) | string | JSON file path | string | Load and set custom/HW specific OpenVINO properties from JSON |
+| [**load_config**](#load_config) | string | JSON string | string | Load and set custom/HW specific OpenVINO properties from JSON |
 | [**enable_qdq_optimizer**](#enable_qdq_optimizer) | string | True/False | boolean | Enable QDQ optimization for NPU |
 | [**disable_dynamic_shapes**](#disable_dynamic_shapes) | string | True/False | boolean | Convert dynamic models to static shapes |
 | [**reshape_input**](#reshape_input) | string | input_name[shape_bounds] | string | Specify upper and lower bound for dynamic shaped inputs for improved performance with NPU |
@@ -87,7 +87,7 @@ Runtime parameters set during OpenVINO Execution Provider initialization to cont
 
 **Deprecation Notice**
 
-The following provider options are **deprecated** and should be migrated to `load_config` for better compatibility with future releases.
+The following provider options are **deprecated since ORT 1.23** and should be migrated to `load_config` for better compatibility with future releases.
 
 | Deprecated Provider Option | `load_config` Equivalent | Recommended Migration |
 |---------------------------|------------------------|----------------------|
@@ -185,26 +185,25 @@ Manages parallel inference streams for throughput optimization (default: `1` for
 
 ### `cache_dir`
 
-**DEPRECATED:** This option is deprecated since OpenVINO 2025.3/ORT 1.23 and can be set via `load_config` using the `CACHE_DIR` property. `cache_dir` is configured **per-session** rather than globally.
+**DEPRECATED:** This option is deprecated since OpenVINO 2025.3/ORT 1.23 and can be set via `load_config` using the `CACHE_DIR` property.
 
 
 Enables model caching to significantly reduce subsequent load times. Supports CPU, NPU, and GPU devices with kernel caching on iGPU/dGPU.  
 
 **Benefits**
-- Saves compiled models and `cl_cache` files for dynamic shapes
+- Saves compiled models for faster subsequent loading
 - Eliminates recompilation overhead on subsequent runs
-- Particularly useful for complex models and frequent application restarts
-
+- Particularly useful for optimizing application startup latencies, especially for complex models
 
 ---
 
 ### `load_config`
 
-**Recommended Configuration Method** for setting OpenVINO runtime properties. Provides direct access to OpenVINO properties through a JSON configuration file during runtime.
+**Recommended Configuration Method** for setting OpenVINO runtime properties. Provides direct access to OpenVINO properties through a JSON String during runtime.
 
 #### Overview
 
-`load_config` enables fine-grained control over OpenVINO inference behavior by loading properties from a JSON file. This is the **preferred method** for configuring advanced OpenVINO features, offering:
+`load_config` enables fine-grained control over OpenVINO inference behavior by loading properties from a JSON String. This is the **preferred method** for configuring advanced OpenVINO features, offering:
 
 - Direct access to OpenVINO runtime properties
 - Device-specific configuration
@@ -237,13 +236,12 @@ config = {
         "PERFORMANCE_HINT": "THROUGHPUT",
         "DEVICE_PROPERTIES": {
             "CPU": {
-                "INFERENCE_PRECISION_HINT": "f32",
-                "NUM_STREAMS": "3",
-                "INFERENCE_NUM_THREADS": "8"
+                "PERFORMANCE_HINT": "LATENCY",
+                "NUM_STREAMS": "3"
             },
             "GPU": {
-                "INFERENCE_PRECISION_HINT": "f16",
-                "NUM_STREAMS": "5"
+                "EXECUTION_MODE_HINT": "ACCURACY",
+                "PERFORMANCE_HINT": "LATENCY"
             }
         }
     }
@@ -441,14 +439,12 @@ Configures resource allocation priority for multi-model deployment scenarios.
 ```python
 import onnxruntime as ort
 import json
-import openvino
 
 # Create config
 config = {
     "AUTO": {
         "PERFORMANCE_HINT": "THROUGHPUT",
-        "PERF_COUNT": "NO",
-        "DEVICE_PROPERTIES": "{CPU:{INFERENCE_PRECISION_HINT:f32,NUM_STREAMS:3},GPU:{INFERENCE_PRECISION_HINT:f32,NUM_STREAMS:5}}"
+        "DEVICE_PROPERTIES": "{GPU:{EXECUTION_MODE_HINT:ACCURACY,PERFORMANCE_HINT:LATENCY}}"
     }
 }
 # Use config with session
@@ -456,18 +452,17 @@ options = {"device_type": "AUTO", "load_config": json.dumps(config)}
 session = ort.InferenceSession("model.onnx", 
                                 providers=[("OpenVINOExecutionProvider", options)])
 ```
+
 #### Using load_config for CPU
 ```python
 import onnxruntime as ort
 import json
-import openvino
 
 # Create CPU config
 config = {
     "CPU": {
-        "INFERENCE_PRECISION_HINT": "f32",
-        "NUM_STREAMS": "3",
-        "INFERENCE_NUM_THREADS": "8"
+        "PERFORMANCE_HINT": "LATENCY",
+        "NUM_STREAMS": "1"
     }
 }
 options = {"device_type": "CPU", "load_config": json.dumps(config)}
@@ -478,12 +473,11 @@ session = ort.InferenceSession("model.onnx",
 ```python
 import onnxruntime as ort
 import json
-import openvino
 
 # Create GPU config with caching
 config = {
     "GPU": {
-        "INFERENCE_PRECISION_HINT": "f16",
+        "EXECUTION_MODE_HINT": "ACCURACY",
         "CACHE_DIR": "./model_cache",
         "PERFORMANCE_HINT": "LATENCY"
     }
@@ -491,6 +485,7 @@ config = {
 options = {"device_type": "GPU", "load_config": json.dumps(config)}
 session = ort.InferenceSession("model.onnx", 
                                 providers=[("OpenVINOExecutionProvider", options)])
+
 ```
 
 --- 

From 0084458833d19bfe1421ed7d16374216734e1591 Mon Sep 17 00:00:00 2001
From: Jaswanth51 <jaswanth.gannamaneni@intel.com>
Date: Mon, 2 Mar 2026 10:32:37 +0530
Subject: [PATCH 4/4] Update OpenVINO-ExecutionProvider.md

---
 docs/execution-providers/OpenVINO-ExecutionProvider.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/execution-providers/OpenVINO-ExecutionProvider.md b/docs/execution-providers/OpenVINO-ExecutionProvider.md
index 1cee20539dc9f..eaf3a5c81b3ac 100644
--- a/docs/execution-providers/OpenVINO-ExecutionProvider.md
+++ b/docs/execution-providers/OpenVINO-ExecutionProvider.md
@@ -30,7 +30,7 @@ ONNX Runtime OpenVINO™ Execution Provider is compatible with three latest rele
 
 |ONNX Runtime|OpenVINO™|Notes|
 |---|---|---| 
-|1.24.0|2025.4.1|[Details](https://github.com/intel/onnxruntime/releases/tag/v5.9)|
+|1.24.1|2025.4.1|[Details](https://github.com/intel/onnxruntime/releases/tag/v5.9)|
 |1.23.0|2025.3|[Details](https://github.com/intel/onnxruntime/releases/tag/v5.8)|
 |1.22.0|2025.1|[Details](https://github.com/intel/onnxruntime/releases/tag/v5.7)|
 
@@ -832,4 +832,4 @@ In order to showcase what you can do with the OpenVINO™ Execution Provider for
 
 [Tutorial: Using OpenVINO™ Execution Provider for ONNX Runtime Python Wheel Packages](https://www.intel.com/content/www/us/en/artificial-intelligence/posts/openvino-execution-provider-for-onnx-runtime.html)
 
----
\ No newline at end of file
+---