From 9838c5eb39ddafaba34d95708d31b29b8e4a071f Mon Sep 17 00:00:00 2001 From: Jaswanth51 Date: Wed, 7 Jan 2026 22:14:33 -0800 Subject: [PATCH 1/4] update OpenVino Deprecation notice to 2025.3/ORT 1.23 and fix load_config parsing value error in documentation examples --- .../OpenVINO-ExecutionProvider.md | 40 ++++++------------- 1 file changed, 13 insertions(+), 27 deletions(-) diff --git a/docs/execution-providers/OpenVINO-ExecutionProvider.md b/docs/execution-providers/OpenVINO-ExecutionProvider.md index 04b37aa2c516d..3c79bb77f6581 100644 --- a/docs/execution-providers/OpenVINO-ExecutionProvider.md +++ b/docs/execution-providers/OpenVINO-ExecutionProvider.md @@ -147,7 +147,7 @@ Runs the same model on multiple devices in parallel to improve device utilizatio --- ### `precision` -**DEPRECATED:** This option is deprecated and can be set via `load_config` using the `INFERENCE_PRECISION_HINT` property. +**DEPRECATED:** This option is deprecated since OpenVINO 2025.3/ORT 1.23 and can be set via `load_config` using the `INFERENCE_PRECISION_HINT` property. - Controls numerical precision during inference, balancing **performance** and **accuracy**. **Precision Support on Devices:** @@ -167,7 +167,7 @@ Runs the same model on multiple devices in parallel to improve device utilizatio --- ### `num_of_threads` & `num_streams` -**DEPRECATED:** These options are deprecated and can be set via `load_config` using the `INFERENCE_NUM_THREADS` and `NUM_STREAMS` properties respectively. +**DEPRECATED:** These options are deprecated since OpenVINO 2025.3/ORT 1.23 and can be set via `load_config` using the `INFERENCE_NUM_THREADS` and `NUM_STREAMS` properties respectively. **Multi-Threading** @@ -185,7 +185,7 @@ Manages parallel inference streams for throughput optimization (default: `1` for ### `cache_dir` -**DEPRECATED:** This option is deprecated and can be set via `load_config` using the `CACHE_DIR` property. +**DEPRECATED:** This option is deprecated since OpenVINO 2025.3/ORT 1.23 and can be set via `load_config` using the `CACHE_DIR` property. Enables model caching to significantly reduce subsequent load times. Supports CPU, NPU, and GPU devices with kernel caching on iGPU/dGPU. @@ -327,7 +327,7 @@ Property keys used in `load_config` JSON must match the string literal defined i ### `enable_qdq_optimizer` -**DEPRECATED:** This option is deprecated and can be set via `load_config` using the `NPU_QDQ_OPTIMIZATION` property. +**DEPRECATED:** This option is deprecated since OpenVINO 2025.3/ORT 1.23 and can be set via `load_config` using the `NPU_QDQ_OPTIMIZATION` property. NPU-specific optimization for Quantize-Dequantize (QDQ) operations in the inference graph. This optimizer enhances ORT quantized models by: @@ -362,7 +362,7 @@ This configuration is required for optimal NPU memory allocation and management. ### `model_priority` -**DEPRECATED:** This option is deprecated and can be set via `load_config` using the `MODEL_PRIORITY` property. +**DEPRECATED:** This option is deprecated since OpenVINO 2025.3/ORT 1.23 and can be set via `load_config` using the `MODEL_PRIORITY` property. Configures resource allocation priority for multi-model deployment scenarios. @@ -404,15 +404,14 @@ Configures resource allocation priority for multi-model deployment scenarios. --- ## Examples - ### Python - -#### Using load_config with JSON file +#### Using load_config with JSON string ```python import onnxruntime as ort import json +import openvino -# Create config file +# Create config config = { "AUTO": { "PERFORMANCE_HINT": "THROUGHPUT", @@ -420,20 +419,16 @@ config = { "DEVICE_PROPERTIES": "{CPU:{INFERENCE_PRECISION_HINT:f32,NUM_STREAMS:3},GPU:{INFERENCE_PRECISION_HINT:f32,NUM_STREAMS:5}}" } } - -with open("ov_config.json", "w") as f: - json.dump(config, f) - # Use config with session -options = {"device_type": "AUTO", "load_config": "ov_config.json"} +options = {"device_type": "AUTO", "load_config": json.dumps(config)} session = ort.InferenceSession("model.onnx", providers=[("OpenVINOExecutionProvider", options)]) ``` - #### Using load_config for CPU ```python import onnxruntime as ort import json +import openvino # Create CPU config config = { @@ -443,19 +438,15 @@ config = { "INFERENCE_NUM_THREADS": "8" } } - -with open("cpu_config.json", "w") as f: - json.dump(config, f) - -options = {"device_type": "CPU", "load_config": "cpu_config.json"} +options = {"device_type": "CPU", "load_config": json.dumps(config)} session = ort.InferenceSession("model.onnx", providers=[("OpenVINOExecutionProvider", options)]) ``` - #### Using load_config for GPU ```python import onnxruntime as ort import json +import openvino # Create GPU config with caching config = { @@ -465,16 +456,11 @@ config = { "PERFORMANCE_HINT": "LATENCY" } } - -with open("gpu_config.json", "w") as f: - json.dump(config, f) - -options = {"device_type": "GPU", "load_config": "gpu_config.json"} +options = {"device_type": "GPU", "load_config": json.dumps(config)} session = ort.InferenceSession("model.onnx", providers=[("OpenVINOExecutionProvider", options)]) ``` - --- ### Python API Key-Value pairs for config options can be set using InferenceSession API as follow:- From 86742f7880c3dd541345a35487bf23e1b41d38a3 Mon Sep 17 00:00:00 2001 From: Jaswanth51 Date: Tue, 13 Jan 2026 20:58:35 -0800 Subject: [PATCH 2/4] update documentation --- .../OpenVINO-ExecutionProvider.md | 38 +++++++++++++++++-- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/docs/execution-providers/OpenVINO-ExecutionProvider.md b/docs/execution-providers/OpenVINO-ExecutionProvider.md index 3c79bb77f6581..d7cb0c22ac192 100644 --- a/docs/execution-providers/OpenVINO-ExecutionProvider.md +++ b/docs/execution-providers/OpenVINO-ExecutionProvider.md @@ -30,9 +30,9 @@ ONNX Runtime OpenVINO™ Execution Provider is compatible with three latest rele |ONNX Runtime|OpenVINO™|Notes| |---|---|---| +|1.24.0|2025.4.1|[Details](https://github.com/intel/onnxruntime/releases/tag/v5.9)| |1.23.0|2025.3|[Details](https://github.com/intel/onnxruntime/releases/tag/v5.8)| |1.22.0|2025.1|[Details](https://github.com/intel/onnxruntime/releases/tag/v5.7)| -|1.21.0|2025.0|[Details](https://github.com/intel/onnxruntime/releases/tag/v5.6)| ## Build @@ -185,9 +185,10 @@ Manages parallel inference streams for throughput optimization (default: `1` for ### `cache_dir` -**DEPRECATED:** This option is deprecated since OpenVINO 2025.3/ORT 1.23 and can be set via `load_config` using the `CACHE_DIR` property. +**DEPRECATED:** This option is deprecated since OpenVINO 2025.3/ORT 1.23 and can be set via `load_config` using the `CACHE_DIR` property. `cache_dir` is configured **per-session** rather than globally. -Enables model caching to significantly reduce subsequent load times. Supports CPU, NPU, and GPU devices with kernel caching on iGPU/dGPU. + +Enables model caching to significantly reduce subsequent load times. Supports CPU, NPU, and GPU devices with kernel caching on iGPU/dGPU. **Benefits** - Saves compiled models and `cl_cache` files for dynamic shapes @@ -210,6 +211,8 @@ Enables model caching to significantly reduce subsequent load times. Supports CP - Better compatibility with future OpenVINO releases - No property name translation required + + #### JSON Configuration Format ```json { @@ -219,6 +222,34 @@ Enables model caching to significantly reduce subsequent load times. Supports CP } ``` +`load_config` now supports nested JSON objects up to **8 levels deep** for complex device configurations. + +**Maximum Nesting:** 8 levels deep. + +**Example: Multi-Level Nested Configuration** +```python +import onnxruntime as ort +import json + +# Complex nested configuration for AUTO device +config = { + "AUTO": { + "PERFORMANCE_HINT": "THROUGHPUT", + "DEVICE_PROPERTIES": { + "CPU": { + "INFERENCE_PRECISION_HINT": "f32", + "NUM_STREAMS": "3", + "INFERENCE_NUM_THREADS": "8" + }, + "GPU": { + "INFERENCE_PRECISION_HINT": "f16", + "NUM_STREAMS": "5" + } + } + } +} +``` + **Supported Device Names:** - `"CPU"` - Intel CPU - `"GPU"` - Intel integrated/discrete GPU @@ -401,6 +432,7 @@ Configures resource allocation priority for multi-model deployment scenarios. `input_image[NCHW],output_tensor[NC]` + --- ## Examples From be08cb3863cedc8769b2dda423b57353b800a455 Mon Sep 17 00:00:00 2001 From: Jaswanth51 Date: Tue, 10 Feb 2026 21:43:46 +0530 Subject: [PATCH 3/4] Address review: fix cache_dir benefits, load_config description, examples --- .../OpenVINO-ExecutionProvider.md | 39 ++++++++----------- 1 file changed, 17 insertions(+), 22 deletions(-) diff --git a/docs/execution-providers/OpenVINO-ExecutionProvider.md b/docs/execution-providers/OpenVINO-ExecutionProvider.md index d7cb0c22ac192..1cee20539dc9f 100644 --- a/docs/execution-providers/OpenVINO-ExecutionProvider.md +++ b/docs/execution-providers/OpenVINO-ExecutionProvider.md @@ -79,7 +79,7 @@ Runtime parameters set during OpenVINO Execution Provider initialization to cont | [**num_of_threads**](#num_of_threads--num_streams) | string | Any positive integer > 0 | size_t | Control number of inference threads | | [**num_streams**](#num_of_threads--num_streams) | string | Any positive integer > 0 | size_t | Set parallel execution streams for throughput | | [**cache_dir**](#cache_dir) | string | Valid filesystem path | string | Enable openvino model caching for improved latency | -| [**load_config**](#load_config) | string | JSON file path | string | Load and set custom/HW specific OpenVINO properties from JSON | +| [**load_config**](#load_config) | string | JSON string | string | Load and set custom/HW specific OpenVINO properties from JSON | | [**enable_qdq_optimizer**](#enable_qdq_optimizer) | string | True/False | boolean | Enable QDQ optimization for NPU | | [**disable_dynamic_shapes**](#disable_dynamic_shapes) | string | True/False | boolean | Convert dynamic models to static shapes | | [**reshape_input**](#reshape_input) | string | input_name[shape_bounds] | string | Specify upper and lower bound for dynamic shaped inputs for improved performance with NPU | @@ -87,7 +87,7 @@ Runtime parameters set during OpenVINO Execution Provider initialization to cont **Deprecation Notice** -The following provider options are **deprecated** and should be migrated to `load_config` for better compatibility with future releases. +The following provider options are **deprecated since ORT 1.23** and should be migrated to `load_config` for better compatibility with future releases. | Deprecated Provider Option | `load_config` Equivalent | Recommended Migration | |---------------------------|------------------------|----------------------| @@ -185,26 +185,25 @@ Manages parallel inference streams for throughput optimization (default: `1` for ### `cache_dir` -**DEPRECATED:** This option is deprecated since OpenVINO 2025.3/ORT 1.23 and can be set via `load_config` using the `CACHE_DIR` property. `cache_dir` is configured **per-session** rather than globally. +**DEPRECATED:** This option is deprecated since OpenVINO 2025.3/ORT 1.23 and can be set via `load_config` using the `CACHE_DIR` property. Enables model caching to significantly reduce subsequent load times. Supports CPU, NPU, and GPU devices with kernel caching on iGPU/dGPU. **Benefits** -- Saves compiled models and `cl_cache` files for dynamic shapes +- Saves compiled models for faster subsequent loading - Eliminates recompilation overhead on subsequent runs -- Particularly useful for complex models and frequent application restarts - +- Particularly useful for optimizing application startup latencies, especially for complex models --- ### `load_config` -**Recommended Configuration Method** for setting OpenVINO runtime properties. Provides direct access to OpenVINO properties through a JSON configuration file during runtime. +**Recommended Configuration Method** for setting OpenVINO runtime properties. Provides direct access to OpenVINO properties through a JSON String during runtime. #### Overview -`load_config` enables fine-grained control over OpenVINO inference behavior by loading properties from a JSON file. This is the **preferred method** for configuring advanced OpenVINO features, offering: +`load_config` enables fine-grained control over OpenVINO inference behavior by loading properties from a JSON String. This is the **preferred method** for configuring advanced OpenVINO features, offering: - Direct access to OpenVINO runtime properties - Device-specific configuration @@ -237,13 +236,12 @@ config = { "PERFORMANCE_HINT": "THROUGHPUT", "DEVICE_PROPERTIES": { "CPU": { - "INFERENCE_PRECISION_HINT": "f32", - "NUM_STREAMS": "3", - "INFERENCE_NUM_THREADS": "8" + "PERFORMANCE_HINT": "LATENCY", + "NUM_STREAMS": "3" }, "GPU": { - "INFERENCE_PRECISION_HINT": "f16", - "NUM_STREAMS": "5" + "EXECUTION_MODE_HINT": "ACCURACY", + "PERFORMANCE_HINT": "LATENCY" } } } @@ -441,14 +439,12 @@ Configures resource allocation priority for multi-model deployment scenarios. ```python import onnxruntime as ort import json -import openvino # Create config config = { "AUTO": { "PERFORMANCE_HINT": "THROUGHPUT", - "PERF_COUNT": "NO", - "DEVICE_PROPERTIES": "{CPU:{INFERENCE_PRECISION_HINT:f32,NUM_STREAMS:3},GPU:{INFERENCE_PRECISION_HINT:f32,NUM_STREAMS:5}}" + "DEVICE_PROPERTIES": "{GPU:{EXECUTION_MODE_HINT:ACCURACY,PERFORMANCE_HINT:LATENCY}}" } } # Use config with session @@ -456,18 +452,17 @@ options = {"device_type": "AUTO", "load_config": json.dumps(config)} session = ort.InferenceSession("model.onnx", providers=[("OpenVINOExecutionProvider", options)]) ``` + #### Using load_config for CPU ```python import onnxruntime as ort import json -import openvino # Create CPU config config = { "CPU": { - "INFERENCE_PRECISION_HINT": "f32", - "NUM_STREAMS": "3", - "INFERENCE_NUM_THREADS": "8" + "PERFORMANCE_HINT": "LATENCY", + "NUM_STREAMS": "1" } } options = {"device_type": "CPU", "load_config": json.dumps(config)} @@ -478,12 +473,11 @@ session = ort.InferenceSession("model.onnx", ```python import onnxruntime as ort import json -import openvino # Create GPU config with caching config = { "GPU": { - "INFERENCE_PRECISION_HINT": "f16", + "EXECUTION_MODE_HINT": "ACCURACY", "CACHE_DIR": "./model_cache", "PERFORMANCE_HINT": "LATENCY" } @@ -491,6 +485,7 @@ config = { options = {"device_type": "GPU", "load_config": json.dumps(config)} session = ort.InferenceSession("model.onnx", providers=[("OpenVINOExecutionProvider", options)]) + ``` --- From 0084458833d19bfe1421ed7d16374216734e1591 Mon Sep 17 00:00:00 2001 From: Jaswanth51 Date: Mon, 2 Mar 2026 10:32:37 +0530 Subject: [PATCH 4/4] Update OpenVINO-ExecutionProvider.md --- docs/execution-providers/OpenVINO-ExecutionProvider.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/execution-providers/OpenVINO-ExecutionProvider.md b/docs/execution-providers/OpenVINO-ExecutionProvider.md index 1cee20539dc9f..eaf3a5c81b3ac 100644 --- a/docs/execution-providers/OpenVINO-ExecutionProvider.md +++ b/docs/execution-providers/OpenVINO-ExecutionProvider.md @@ -30,7 +30,7 @@ ONNX Runtime OpenVINO™ Execution Provider is compatible with three latest rele |ONNX Runtime|OpenVINO™|Notes| |---|---|---| -|1.24.0|2025.4.1|[Details](https://github.com/intel/onnxruntime/releases/tag/v5.9)| +|1.24.1|2025.4.1|[Details](https://github.com/intel/onnxruntime/releases/tag/v5.9)| |1.23.0|2025.3|[Details](https://github.com/intel/onnxruntime/releases/tag/v5.8)| |1.22.0|2025.1|[Details](https://github.com/intel/onnxruntime/releases/tag/v5.7)| @@ -832,4 +832,4 @@ In order to showcase what you can do with the OpenVINO™ Execution Provider for [Tutorial: Using OpenVINO™ Execution Provider for ONNX Runtime Python Wheel Packages](https://www.intel.com/content/www/us/en/artificial-intelligence/posts/openvino-execution-provider-for-onnx-runtime.html) ---- \ No newline at end of file +---