From e383dcbe0137eb997e1700330ca11eb59f3c40b3 Mon Sep 17 00:00:00 2001 From: Swanand Mhalagi Date: Thu, 1 Jun 2023 12:35:45 -0700 Subject: [PATCH 01/30] Create README.md --- IPEX_OOB_with_triton/README.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 IPEX_OOB_with_triton/README.md diff --git a/IPEX_OOB_with_triton/README.md b/IPEX_OOB_with_triton/README.md new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/IPEX_OOB_with_triton/README.md @@ -0,0 +1 @@ + From 416eab10397dc82e8d2cc8dc3597f1c7aa377b01 Mon Sep 17 00:00:00 2001 From: Swanand Mhalagi Date: Thu, 1 Jun 2023 12:57:32 -0700 Subject: [PATCH 02/30] Update README.md --- IPEX_OOB_with_triton/README.md | 66 ++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/IPEX_OOB_with_triton/README.md b/IPEX_OOB_with_triton/README.md index 8b13789..34ddcae 100644 --- a/IPEX_OOB_with_triton/README.md +++ b/IPEX_OOB_with_triton/README.md @@ -1 +1,67 @@ +# Serving DenseNet models with IPEX (w/o python backend) and Triton Server + +## Description +This sample provide code to integrate Intel® Extension for PyTorch (IPEX) with Triton Inference Server framework. This readme provides a methodology to run IPEX model with out writting python backend (model.py) script for triton server. + +## Preparation +Make sure that Docker is installed on host instance. +Sample images from ImageNet dataset. + +## Supported models +Currently AI Inference samples support following Bert models finetuned on Squad dataset: +- DenseNet121 - PyTorch+IPEX [DenseNet121](https://pytorch.org/hub/pytorch_vision_densenet/ "DenseNet121") + +## Possible run scenarios +AI Inference samples allow user to run inference on localhost or on remote Triton Server Host. +By default config.properties is filled with localhost run option. + +### Execution on localhost + +#### 1 Download the LibTorch .zip file for the PyTorch +Here is the list of triton containers and their corresponding framework versions - https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html#framework-matrix-2023 +This example uses triton container 23.05 which uses PyTorch version 2.0.0. We will download the CPU only cxx 11 ABI package as follows +wget https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-2.0.0%2Bcpu.zip + +uzip libtorch-cxx11-abi-shared-with-deps-2.0.0%2Bcpu.zip + +#### 2 Get IPEX .so files for triton +Visit https://intel.github.io/intel-extension-for-pytorch/latest/tutorials/installation.html#install-via-source-compilation and copy the link for your correspinding cxx11 ABI PyTorch version (2.0.0) - + +wget https://intel-extension-for-pytorch.s3.amazonaws.com/libipex/cpu/libintel-ext-pt-cxx11-abi-2.0.0%2Bcpu.run![image](https://github.com/swanandmhalagi/intel-ai-inference-samples/assets/88686809/48d8511e-f436-4cfa-b926-4921b59ac333) + +#### 3 Create and copy libintel-ext-pt-cpu.so files + +bash .run install + +#### 4 Create a docker container + +docker run -it -p8000:8000 -p8001:8001 -p8002:8002 -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:23.05-py3 + +docker cp /lib/libintel-ext-pt-cpu.so :/opt/tritonserver/backends/pytorch/ + +`$ python3 client_imagenet.py --dataset /home/ubuntu/ImageNet/imagenet_images ` - sends requests to Triton Server Host for DenseNet model. This file uses ImagesNet images for inference. + +## Additional info +Downloading and loading models take some time, so please wait until you run client_imagenet.py. +Model loading progress can be tracked by following Triton Server Host docker container logs. + +## Support +Please submit your questions, feature requests, and bug reports on the [GitHub issues page](https://github.com/intel/intel-ai-inference-samples/issues). + +## License +AI Inference samples project is licensed under Apache License Version 2.0. Refer to the [LICENSE](../LICENSE) file for the full license text and copyright notice. + +This distribution includes third party software governed by separate license terms. + +3-clause BSD license: +- [model.py](./model_repository/densenet/1/model.py) - for PyTorch (IPEX) + +This third party software, even if included with the distribution of the Intel software, may be governed by separate license terms, including without limitation, third party license terms, other Intel software license terms, and open source software license terms. These separate license terms govern your use of the third party programs as set forth in the [THIRD-PARTY-PROGRAMS](./THIRD-PARTY-PROGRAMS) file. + +## Trademark Information +Intel, the Intel logo, OpenVINO, the OpenVINO logo and Intel Xeon are trademarks of Intel Corporation or its subsidiaries. +* Other names and brands may be claimed as the property of others. + +©Intel Corporation + From c370ce7590304bc7d1a0fd5428552e7a36212296 Mon Sep 17 00:00:00 2001 From: Swanand Mhalagi Date: Thu, 1 Jun 2023 13:09:30 -0700 Subject: [PATCH 03/30] Update README.md --- IPEX_OOB_with_triton/README.md | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/IPEX_OOB_with_triton/README.md b/IPEX_OOB_with_triton/README.md index 34ddcae..dec16a4 100644 --- a/IPEX_OOB_with_triton/README.md +++ b/IPEX_OOB_with_triton/README.md @@ -18,29 +18,34 @@ By default config.properties is filled with localhost run option. ### Execution on localhost #### 1 Download the LibTorch .zip file for the PyTorch -Here is the list of triton containers and their corresponding framework versions - https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html#framework-matrix-2023 -This example uses triton container 23.05 which uses PyTorch version 2.0.0. We will download the CPU only cxx 11 ABI package as follows -wget https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-2.0.0%2Bcpu.zip +[Here](https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html#framework-matrix-2023) is the list of triton containers and their corresponding framework versions. -uzip libtorch-cxx11-abi-shared-with-deps-2.0.0%2Bcpu.zip +This example uses triton container 23.05 which uses PyTorch version 2.0.0. We will download the CPU only cxx11 ABI package for PyTorch 2.0.0 as follows -#### 2 Get IPEX .so files for triton -Visit https://intel.github.io/intel-extension-for-pytorch/latest/tutorials/installation.html#install-via-source-compilation and copy the link for your correspinding cxx11 ABI PyTorch version (2.0.0) - +`$ wget https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-2.0.0%2Bcpu.zip' -wget https://intel-extension-for-pytorch.s3.amazonaws.com/libipex/cpu/libintel-ext-pt-cxx11-abi-2.0.0%2Bcpu.run![image](https://github.com/swanandmhalagi/intel-ai-inference-samples/assets/88686809/48d8511e-f436-4cfa-b926-4921b59ac333) +`$ uzip libtorch-cxx11-abi-shared-with-deps-2.0.0%2Bcpu.zip` - unpack the source -#### 3 Create and copy libintel-ext-pt-cpu.so files +#### 2 Create IPEX .so files for triton +[Visit](https://intel.github.io/intel-extension-for-pytorch/latest/tutorials/installation.html#install-via-source-compilation) and copy the link for your correspinding cxx11 ABI PyTorch version (2.0.0) - -bash .run install +`$ wget https://intel-extension-for-pytorch.s3.amazonaws.com/libipex/cpu/libintel-ext-pt-cxx11-abi-2.0.0%2Bcpu.run` + +`$ bash libintel-ext-pt-cxx11-abi-2.0.0%2Bcpu.run install libtorch/' - this will create libintel-ext-pt-cpu.so at libtorch/lib -#### 4 Create a docker container +#### 3 Create a docker container and copy files -docker run -it -p8000:8000 -p8001:8001 -p8002:8002 -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:23.05-py3 +`$ docker run -it -p8000:8000 -p8001:8001 -p8002:8002 -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:23.05-py3' + +`$ docker cp /lib/libintel-ext-pt-cpu.so :/opt/tritonserver/backends/pytorch/` + +`$ cd backends/pyorch & LD_PRELOAD="$(pwd)/libintel-ext-pt-cpu.so" tritonserver --model-repository=/models` -docker cp /lib/libintel-ext-pt-cpu.so :/opt/tritonserver/backends/pytorch/ +#### 4 Run inference `$ python3 client_imagenet.py --dataset /home/ubuntu/ImageNet/imagenet_images ` - sends requests to Triton Server Host for DenseNet model. This file uses ImagesNet images for inference. + ## Additional info Downloading and loading models take some time, so please wait until you run client_imagenet.py. Model loading progress can be tracked by following Triton Server Host docker container logs. From 3a95f148c52db33f3ce784ec6d2c4ffe6be2b601 Mon Sep 17 00:00:00 2001 From: Swanand Mhalagi Date: Thu, 1 Jun 2023 13:10:08 -0700 Subject: [PATCH 04/30] Update README.md --- IPEX_OOB_with_triton/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/IPEX_OOB_with_triton/README.md b/IPEX_OOB_with_triton/README.md index dec16a4..b07ff0d 100644 --- a/IPEX_OOB_with_triton/README.md +++ b/IPEX_OOB_with_triton/README.md @@ -22,7 +22,7 @@ By default config.properties is filled with localhost run option. This example uses triton container 23.05 which uses PyTorch version 2.0.0. We will download the CPU only cxx11 ABI package for PyTorch 2.0.0 as follows -`$ wget https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-2.0.0%2Bcpu.zip' +`$ wget https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-2.0.0%2Bcpu.zip` `$ uzip libtorch-cxx11-abi-shared-with-deps-2.0.0%2Bcpu.zip` - unpack the source @@ -31,11 +31,11 @@ This example uses triton container 23.05 which uses PyTorch version 2.0.0. We wi `$ wget https://intel-extension-for-pytorch.s3.amazonaws.com/libipex/cpu/libintel-ext-pt-cxx11-abi-2.0.0%2Bcpu.run` -`$ bash libintel-ext-pt-cxx11-abi-2.0.0%2Bcpu.run install libtorch/' - this will create libintel-ext-pt-cpu.so at libtorch/lib +`$ bash libintel-ext-pt-cxx11-abi-2.0.0%2Bcpu.run install libtorch/` - this will create libintel-ext-pt-cpu.so at libtorch/lib #### 3 Create a docker container and copy files -`$ docker run -it -p8000:8000 -p8001:8001 -p8002:8002 -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:23.05-py3' +`$ docker run -it -p8000:8000 -p8001:8001 -p8002:8002 -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:23.05-py3` `$ docker cp /lib/libintel-ext-pt-cpu.so :/opt/tritonserver/backends/pytorch/` From 198dd7362ddcd696d7ab7e2de1f2738211b1b056 Mon Sep 17 00:00:00 2001 From: Swanand Mhalagi Date: Thu, 1 Jun 2023 13:14:25 -0700 Subject: [PATCH 05/30] Update README.md --- IPEX_OOB_with_triton/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/IPEX_OOB_with_triton/README.md b/IPEX_OOB_with_triton/README.md index b07ff0d..6682e1e 100644 --- a/IPEX_OOB_with_triton/README.md +++ b/IPEX_OOB_with_triton/README.md @@ -18,9 +18,9 @@ By default config.properties is filled with localhost run option. ### Execution on localhost #### 1 Download the LibTorch .zip file for the PyTorch -[Here](https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html#framework-matrix-2023) is the list of triton containers and their corresponding framework versions. +This example uses triton container 23.05 which uses PyTorch version 2.0.0. [Here](https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html#framework-matrix-2023) is the list of triton containers and their corresponding built-in framework versions. -This example uses triton container 23.05 which uses PyTorch version 2.0.0. We will download the CPU only cxx11 ABI package for PyTorch 2.0.0 as follows +We will download the LibTorch 2.0.0 (C++\CPU cxx11 ABI) package as follows `$ wget https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-2.0.0%2Bcpu.zip` From 423b3070967a0df97ac248646ceeb9946add56d6 Mon Sep 17 00:00:00 2001 From: Swanand Mhalagi Date: Thu, 1 Jun 2023 13:23:58 -0700 Subject: [PATCH 06/30] Update README.md --- IPEX_OOB_with_triton/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/IPEX_OOB_with_triton/README.md b/IPEX_OOB_with_triton/README.md index 6682e1e..d790189 100644 --- a/IPEX_OOB_with_triton/README.md +++ b/IPEX_OOB_with_triton/README.md @@ -37,7 +37,7 @@ We will download the LibTorch 2.0.0 (C++\CPU cxx11 ABI) package as follows `$ docker run -it -p8000:8000 -p8001:8001 -p8002:8002 -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:23.05-py3` -`$ docker cp /lib/libintel-ext-pt-cpu.so :/opt/tritonserver/backends/pytorch/` +`$ docker cp /lib/libintel-ext-pt-cpu.so :/opt/tritonserver/backends/pytorch/` - on a separate terminal `$ cd backends/pyorch & LD_PRELOAD="$(pwd)/libintel-ext-pt-cpu.so" tritonserver --model-repository=/models` From 2b01ece356c64ab81ce3e3af0e33409f8f961f69 Mon Sep 17 00:00:00 2001 From: Swanand Mhalagi Date: Thu, 1 Jun 2023 13:29:48 -0700 Subject: [PATCH 07/30] Update README.md --- IPEX_OOB_with_triton/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/IPEX_OOB_with_triton/README.md b/IPEX_OOB_with_triton/README.md index d790189..954ab02 100644 --- a/IPEX_OOB_with_triton/README.md +++ b/IPEX_OOB_with_triton/README.md @@ -39,7 +39,7 @@ We will download the LibTorch 2.0.0 (C++\CPU cxx11 ABI) package as follows `$ docker cp /lib/libintel-ext-pt-cpu.so :/opt/tritonserver/backends/pytorch/` - on a separate terminal -`$ cd backends/pyorch & LD_PRELOAD="$(pwd)/libintel-ext-pt-cpu.so" tritonserver --model-repository=/models` +`$ cd backends/pytorch/ ; LD_PRELOAD="$(pwd)/libintel-ext-pt-cpu.so" tritonserver --model-repository=/models` #### 4 Run inference From 96d4a72c4eeeb119d2b68f02770a7981d608c7db Mon Sep 17 00:00:00 2001 From: Swanand Mhalagi Date: Thu, 1 Jun 2023 13:34:08 -0700 Subject: [PATCH 08/30] Update README.md --- IPEX_OOB_with_triton/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/IPEX_OOB_with_triton/README.md b/IPEX_OOB_with_triton/README.md index 954ab02..da93acf 100644 --- a/IPEX_OOB_with_triton/README.md +++ b/IPEX_OOB_with_triton/README.md @@ -35,9 +35,9 @@ We will download the LibTorch 2.0.0 (C++\CPU cxx11 ABI) package as follows #### 3 Create a docker container and copy files -`$ docker run -it -p8000:8000 -p8001:8001 -p8002:8002 -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:23.05-py3` +`$ docker run -it -p8000:8000 -p8001:8001 -p8002:8002 --name ipex_triton -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:23.05-py3` -`$ docker cp /lib/libintel-ext-pt-cpu.so :/opt/tritonserver/backends/pytorch/` - on a separate terminal +`$ docker cp libtorch/lib/libintel-ext-pt-cpu.so ipex_triton:/opt/tritonserver/backends/pytorch/` - on a separate terminal `$ cd backends/pytorch/ ; LD_PRELOAD="$(pwd)/libintel-ext-pt-cpu.so" tritonserver --model-repository=/models` From 6146a9ecfbbaa5e6314a53f648a18e05363505dd Mon Sep 17 00:00:00 2001 From: Swanand Mhalagi Date: Thu, 1 Jun 2023 13:35:22 -0700 Subject: [PATCH 09/30] Update README.md --- IPEX_OOB_with_triton/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/IPEX_OOB_with_triton/README.md b/IPEX_OOB_with_triton/README.md index da93acf..fcf79bc 100644 --- a/IPEX_OOB_with_triton/README.md +++ b/IPEX_OOB_with_triton/README.md @@ -39,7 +39,7 @@ We will download the LibTorch 2.0.0 (C++\CPU cxx11 ABI) package as follows `$ docker cp libtorch/lib/libintel-ext-pt-cpu.so ipex_triton:/opt/tritonserver/backends/pytorch/` - on a separate terminal -`$ cd backends/pytorch/ ; LD_PRELOAD="$(pwd)/libintel-ext-pt-cpu.so" tritonserver --model-repository=/models` +`$ cd backends/pytorch/ ; LD_PRELOAD="$(pwd)/libintel-ext-pt-cpu.so" tritonserver --model-repository=/models` - on the container #### 4 Run inference From 6083977c9ca4719c84c333805e3a553720518e17 Mon Sep 17 00:00:00 2001 From: swanandmhalagi Date: Thu, 1 Jun 2023 21:17:50 +0000 Subject: [PATCH 10/30] IPEX implementation w/o python backend --- IPEX_OOB_with_triton/THIRD-PARTY-PROGRAMS | 28 +++++++++ IPEX_OOB_with_triton/client_imagenet.py | 63 +++++++++++++++++++ .../model_repository/densenet/1/README.md | 1 + .../model_repository/densenet/config.pbtxt | 44 +++++++++++++ 4 files changed, 136 insertions(+) create mode 100644 IPEX_OOB_with_triton/THIRD-PARTY-PROGRAMS create mode 100644 IPEX_OOB_with_triton/client_imagenet.py create mode 100644 IPEX_OOB_with_triton/model_repository/densenet/1/README.md create mode 100644 IPEX_OOB_with_triton/model_repository/densenet/config.pbtxt diff --git a/IPEX_OOB_with_triton/THIRD-PARTY-PROGRAMS b/IPEX_OOB_with_triton/THIRD-PARTY-PROGRAMS new file mode 100644 index 0000000..dc1ff4f --- /dev/null +++ b/IPEX_OOB_with_triton/THIRD-PARTY-PROGRAMS @@ -0,0 +1,28 @@ +1. model.py (triton/model_utils/bert_common/1/model.py) +2. model.py (triton/model_utils/bert_common_ov/1/model.py) + +Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of NVIDIA CORPORATION nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/IPEX_OOB_with_triton/client_imagenet.py b/IPEX_OOB_with_triton/client_imagenet.py new file mode 100644 index 0000000..9c9f6f5 --- /dev/null +++ b/IPEX_OOB_with_triton/client_imagenet.py @@ -0,0 +1,63 @@ +import numpy as np +from torchvision import transforms +from PIL import Image +import tritonclient.http as httpclient +from tritonclient.utils import triton_to_np_dtype +import os +import time + +# preprocessing function +def image_preprocess(img_path="img2.jpg"): + img = Image.open(img_path) + preprocess = transforms.Compose([ + transforms.Resize(224), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ]) + return preprocess(img).numpy() + +directory = '/home/ubuntu/ImageNet/imagenet_images' +filelist = [] + +for root, dirs, files in os.walk(directory): + for file in files: + #append the file name to the list + filelist.append(os.path.join(root,file)) + +for x in range(0, 3): + iteration = 0 + elapsed_time = 0 + for name in filelist: + try: + elapsed = 0 + start_epoch = time.time() + print(name) + transformed_img = image_preprocess(name) + + # Setting up client + client = httpclient.InferenceServerClient(url="localhost:8000", concurrency=2) + + # specify the names of the input and output layer(s) of our model + inputs = httpclient.InferInput("input__0", transformed_img.shape, datatype="FP32") + inputs.set_data_from_numpy(transformed_img, binary_data=True) + + outputs = httpclient.InferRequestedOutput("OUTPUT__0", binary_data=True, class_count=1000) + + # Querying the server + results = client.infer(model_name="densenet", inputs=[inputs], outputs=[outputs]) + predictions = results.as_numpy('OUTPUT__0') + print(predictions[:5]) + end_epoch = time.time() + + #time.sleep(5) + iteration = iteration + 1 + elapsed = end_epoch - start_epoch + elapsed_time = elapsed_time + elapsed + print("Per Sample Inference Latency in sec", elapsed) + except Exception: + pass + +print("Total Iteration", iteration) +print("Total elapsed time", elapsed_time) +print("Avg elapsed time per sample in sec", elapsed_time/iteration) diff --git a/IPEX_OOB_with_triton/model_repository/densenet/1/README.md b/IPEX_OOB_with_triton/model_repository/densenet/1/README.md new file mode 100644 index 0000000..3a9f087 --- /dev/null +++ b/IPEX_OOB_with_triton/model_repository/densenet/1/README.md @@ -0,0 +1 @@ +### Place model.pt file in this directory diff --git a/IPEX_OOB_with_triton/model_repository/densenet/config.pbtxt b/IPEX_OOB_with_triton/model_repository/densenet/config.pbtxt new file mode 100644 index 0000000..a137b03 --- /dev/null +++ b/IPEX_OOB_with_triton/model_repository/densenet/config.pbtxt @@ -0,0 +1,44 @@ +max_batch_size: 0 +backend: "python" +input[ +{ + name: "input__0" + data_type: TYPE_FP32 + dims: [ 3, 224, 224 ] + reshape { shape: [ 1, 3, 224, 224 ] } +} +] +output:[ +{ + name: "OUTPUT__0" + data_type: TYPE_FP32 + dims: [ 1, 1000 ,1, 1] + reshape { shape: [ 1, 1000 ] } +} +] + +instance_group [ + { + count: 1 + kind: KIND_CPU + } +] + +parameters [ + { + key: "origin" + value: {string_value: 'densenet'} + }, + { + # Batch sizes to split (e.g. "[1,2,4,8]"). Default: "[]" if "dynamic_shape" else "[1]" + # Set to "[]" if no split is needed. + key: "batches" + value: {string_value: "[1]"} + }, + { + # Dynamic shape support. Default: "true" + # If set to "false" - INPUT0 shape to be defined + key: "dynamic_shape" + value: {string_value: "true"} + } +] \ No newline at end of file From db271654505a9a9f801f284167f0163cc7d26520 Mon Sep 17 00:00:00 2001 From: Swanand Mhalagi Date: Thu, 1 Jun 2023 14:24:09 -0700 Subject: [PATCH 11/30] Update README.md --- IPEX_OOB_with_triton/README.md | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/IPEX_OOB_with_triton/README.md b/IPEX_OOB_with_triton/README.md index fcf79bc..7cf7cd9 100644 --- a/IPEX_OOB_with_triton/README.md +++ b/IPEX_OOB_with_triton/README.md @@ -1,20 +1,15 @@ -# Serving DenseNet models with IPEX (w/o python backend) and Triton Server +# Serving DenseNet models with IPEX (w/o python backend) on Triton Server ## Description -This sample provide code to integrate Intel® Extension for PyTorch (IPEX) with Triton Inference Server framework. This readme provides a methodology to run IPEX model with out writting python backend (model.py) script for triton server. +This readme provides a methodology to run Intel® Extension for PyTorch (IPEX) optimized model with out writting python backend (model.py) script for triton server. ## Preparation -Make sure that Docker is installed on host instance. +Docker installed on host instance. Sample images from ImageNet dataset. ## Supported models -Currently AI Inference samples support following Bert models finetuned on Squad dataset: - DenseNet121 - PyTorch+IPEX [DenseNet121](https://pytorch.org/hub/pytorch_vision_densenet/ "DenseNet121") -## Possible run scenarios -AI Inference samples allow user to run inference on localhost or on remote Triton Server Host. -By default config.properties is filled with localhost run option. - ### Execution on localhost #### 1 Download the LibTorch .zip file for the PyTorch @@ -34,15 +29,13 @@ We will download the LibTorch 2.0.0 (C++\CPU cxx11 ABI) package as follows `$ bash libintel-ext-pt-cxx11-abi-2.0.0%2Bcpu.run install libtorch/` - this will create libintel-ext-pt-cpu.so at libtorch/lib #### 3 Create a docker container and copy files - `$ docker run -it -p8000:8000 -p8001:8001 -p8002:8002 --name ipex_triton -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:23.05-py3` `$ docker cp libtorch/lib/libintel-ext-pt-cpu.so ipex_triton:/opt/tritonserver/backends/pytorch/` - on a separate terminal `$ cd backends/pytorch/ ; LD_PRELOAD="$(pwd)/libintel-ext-pt-cpu.so" tritonserver --model-repository=/models` - on the container -#### 4 Run inference - +#### 4 Run inference `$ python3 client_imagenet.py --dataset /home/ubuntu/ImageNet/imagenet_images ` - sends requests to Triton Server Host for DenseNet model. This file uses ImagesNet images for inference. From 9214ba7589c09a1305649af97d80bea51b9ba28d Mon Sep 17 00:00:00 2001 From: Swanand Mhalagi Date: Thu, 1 Jun 2023 14:24:56 -0700 Subject: [PATCH 12/30] Update README.md --- IPEX_OOB_with_triton/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/IPEX_OOB_with_triton/README.md b/IPEX_OOB_with_triton/README.md index 7cf7cd9..a3e8259 100644 --- a/IPEX_OOB_with_triton/README.md +++ b/IPEX_OOB_with_triton/README.md @@ -1,4 +1,4 @@ -# Serving DenseNet models with IPEX (w/o python backend) on Triton Server +## Serving DenseNet models with IPEX® (w/o python backend) on Triton Server ## Description This readme provides a methodology to run Intel® Extension for PyTorch (IPEX) optimized model with out writting python backend (model.py) script for triton server. From 46492ef1ac72935986951343458e09d1c02704e6 Mon Sep 17 00:00:00 2001 From: Swanand Mhalagi Date: Thu, 1 Jun 2023 14:26:16 -0700 Subject: [PATCH 13/30] Update README.md --- IPEX_OOB_with_triton/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/IPEX_OOB_with_triton/README.md b/IPEX_OOB_with_triton/README.md index a3e8259..39dfdb9 100644 --- a/IPEX_OOB_with_triton/README.md +++ b/IPEX_OOB_with_triton/README.md @@ -6,6 +6,7 @@ This readme provides a methodology to run Intel® Extension for PyTorch (IPEX) o ## Preparation Docker installed on host instance. Sample images from ImageNet dataset. +Place IPEX optimized model at IPEX_OOB_with_triton/model_repository/densenet/1/ ## Supported models - DenseNet121 - PyTorch+IPEX [DenseNet121](https://pytorch.org/hub/pytorch_vision_densenet/ "DenseNet121") From dbb5a11781faaef029fad04c9f21c464059b207b Mon Sep 17 00:00:00 2001 From: Swanand Mhalagi Date: Thu, 1 Jun 2023 14:26:30 -0700 Subject: [PATCH 14/30] Update README.md --- IPEX_OOB_with_triton/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/IPEX_OOB_with_triton/README.md b/IPEX_OOB_with_triton/README.md index 39dfdb9..c436a94 100644 --- a/IPEX_OOB_with_triton/README.md +++ b/IPEX_OOB_with_triton/README.md @@ -5,7 +5,9 @@ This readme provides a methodology to run Intel® Extension for PyTorch (IPEX) o ## Preparation Docker installed on host instance. + Sample images from ImageNet dataset. + Place IPEX optimized model at IPEX_OOB_with_triton/model_repository/densenet/1/ ## Supported models From 68db6befa0baea4da66a4c09ebaa91848a2de2ba Mon Sep 17 00:00:00 2001 From: Swanand Mhalagi Date: Thu, 1 Jun 2023 14:27:06 -0700 Subject: [PATCH 15/30] Update README.md --- IPEX_OOB_with_triton/README.md | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/IPEX_OOB_with_triton/README.md b/IPEX_OOB_with_triton/README.md index c436a94..2248ffb 100644 --- a/IPEX_OOB_with_triton/README.md +++ b/IPEX_OOB_with_triton/README.md @@ -4,11 +4,9 @@ This readme provides a methodology to run Intel® Extension for PyTorch (IPEX) optimized model with out writting python backend (model.py) script for triton server. ## Preparation -Docker installed on host instance. - -Sample images from ImageNet dataset. - -Place IPEX optimized model at IPEX_OOB_with_triton/model_repository/densenet/1/ +- Docker installed on host instance. +- Sample images from ImageNet dataset. +- Place IPEX optimized model at IPEX_OOB_with_triton/model_repository/densenet/1/ ## Supported models - DenseNet121 - PyTorch+IPEX [DenseNet121](https://pytorch.org/hub/pytorch_vision_densenet/ "DenseNet121") @@ -41,7 +39,6 @@ We will download the LibTorch 2.0.0 (C++\CPU cxx11 ABI) package as follows #### 4 Run inference `$ python3 client_imagenet.py --dataset /home/ubuntu/ImageNet/imagenet_images ` - sends requests to Triton Server Host for DenseNet model. This file uses ImagesNet images for inference. - ## Additional info Downloading and loading models take some time, so please wait until you run client_imagenet.py. Model loading progress can be tracked by following Triton Server Host docker container logs. From 0b4c17e4628b27964d179f0bf8ab41c06bbda15b Mon Sep 17 00:00:00 2001 From: Swanand Mhalagi Date: Thu, 1 Jun 2023 14:31:41 -0700 Subject: [PATCH 16/30] Update config.pbtxt --- .../model_repository/densenet/config.pbtxt | 29 ++----------------- 1 file changed, 2 insertions(+), 27 deletions(-) diff --git a/IPEX_OOB_with_triton/model_repository/densenet/config.pbtxt b/IPEX_OOB_with_triton/model_repository/densenet/config.pbtxt index a137b03..cbfa98e 100644 --- a/IPEX_OOB_with_triton/model_repository/densenet/config.pbtxt +++ b/IPEX_OOB_with_triton/model_repository/densenet/config.pbtxt @@ -1,5 +1,6 @@ +name: "densenet" +platform: "pytorch_libtorch" max_batch_size: 0 -backend: "python" input[ { name: "input__0" @@ -16,29 +17,3 @@ output:[ reshape { shape: [ 1, 1000 ] } } ] - -instance_group [ - { - count: 1 - kind: KIND_CPU - } -] - -parameters [ - { - key: "origin" - value: {string_value: 'densenet'} - }, - { - # Batch sizes to split (e.g. "[1,2,4,8]"). Default: "[]" if "dynamic_shape" else "[1]" - # Set to "[]" if no split is needed. - key: "batches" - value: {string_value: "[1]"} - }, - { - # Dynamic shape support. Default: "true" - # If set to "false" - INPUT0 shape to be defined - key: "dynamic_shape" - value: {string_value: "true"} - } -] \ No newline at end of file From 64ee650d0e06742ad7361e3ca3b7d9fca492988e Mon Sep 17 00:00:00 2001 From: Swanand Mhalagi Date: Thu, 1 Jun 2023 14:33:07 -0700 Subject: [PATCH 17/30] Update README.md --- IPEX_OOB_with_triton/README.md | 5 ----- 1 file changed, 5 deletions(-) diff --git a/IPEX_OOB_with_triton/README.md b/IPEX_OOB_with_triton/README.md index 2248ffb..e8584b5 100644 --- a/IPEX_OOB_with_triton/README.md +++ b/IPEX_OOB_with_triton/README.md @@ -49,11 +49,6 @@ Please submit your questions, feature requests, and bug reports on the [GitHub ## License AI Inference samples project is licensed under Apache License Version 2.0. Refer to the [LICENSE](../LICENSE) file for the full license text and copyright notice. -This distribution includes third party software governed by separate license terms. - -3-clause BSD license: -- [model.py](./model_repository/densenet/1/model.py) - for PyTorch (IPEX) - This third party software, even if included with the distribution of the Intel software, may be governed by separate license terms, including without limitation, third party license terms, other Intel software license terms, and open source software license terms. These separate license terms govern your use of the third party programs as set forth in the [THIRD-PARTY-PROGRAMS](./THIRD-PARTY-PROGRAMS) file. ## Trademark Information From b5c152a65f2fa61bcb5fb8cfe0825ef5d574f134 Mon Sep 17 00:00:00 2001 From: Swanand Mhalagi Date: Thu, 1 Jun 2023 14:38:59 -0700 Subject: [PATCH 18/30] Update README.md --- IPEX_OOB_with_triton/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/IPEX_OOB_with_triton/README.md b/IPEX_OOB_with_triton/README.md index e8584b5..5abd596 100644 --- a/IPEX_OOB_with_triton/README.md +++ b/IPEX_OOB_with_triton/README.md @@ -1,7 +1,7 @@ ## Serving DenseNet models with IPEX® (w/o python backend) on Triton Server ## Description -This readme provides a methodology to run Intel® Extension for PyTorch (IPEX) optimized model with out writting python backend (model.py) script for triton server. +This readme provides a methodology to run Intel® Extension for PyTorch (IPEX) optimized model without writing python backend (model.py) script for triton server. ## Preparation - Docker installed on host instance. @@ -23,7 +23,7 @@ We will download the LibTorch 2.0.0 (C++\CPU cxx11 ABI) package as follows `$ uzip libtorch-cxx11-abi-shared-with-deps-2.0.0%2Bcpu.zip` - unpack the source #### 2 Create IPEX .so files for triton -[Visit](https://intel.github.io/intel-extension-for-pytorch/latest/tutorials/installation.html#install-via-source-compilation) and copy the link for your correspinding cxx11 ABI PyTorch version (2.0.0) - +[Visit](https://intel.github.io/intel-extension-for-pytorch/latest/tutorials/installation.html#install-via-source-compilation) and copy the link for your corresponding cxx11 ABI PyTorch version (2.0.0) - `$ wget https://intel-extension-for-pytorch.s3.amazonaws.com/libipex/cpu/libintel-ext-pt-cxx11-abi-2.0.0%2Bcpu.run` From 922bdd175b079793b9c7dcbc7afed4e0995ec49b Mon Sep 17 00:00:00 2001 From: Swanand Mhalagi Date: Thu, 1 Jun 2023 14:43:10 -0700 Subject: [PATCH 19/30] Update README.md --- IPEX_OOB_with_triton/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/IPEX_OOB_with_triton/README.md b/IPEX_OOB_with_triton/README.md index 5abd596..9d56480 100644 --- a/IPEX_OOB_with_triton/README.md +++ b/IPEX_OOB_with_triton/README.md @@ -13,7 +13,7 @@ This readme provides a methodology to run Intel® Extension for PyTorch (IPEX) o ### Execution on localhost -#### 1 Download the LibTorch .zip file for the PyTorch +#### 1 Download the LibTorch .zip file This example uses triton container 23.05 which uses PyTorch version 2.0.0. [Here](https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html#framework-matrix-2023) is the list of triton containers and their corresponding built-in framework versions. We will download the LibTorch 2.0.0 (C++\CPU cxx11 ABI) package as follows From 23699042051390f37ca98fc7d87fd5d12f09ec36 Mon Sep 17 00:00:00 2001 From: Swanand Mhalagi Date: Wed, 7 Jun 2023 09:55:44 -0700 Subject: [PATCH 20/30] Update README.md Fixed Typo --- IPEX_OOB_with_triton/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/IPEX_OOB_with_triton/README.md b/IPEX_OOB_with_triton/README.md index 9d56480..e6eb3ce 100644 --- a/IPEX_OOB_with_triton/README.md +++ b/IPEX_OOB_with_triton/README.md @@ -20,7 +20,7 @@ We will download the LibTorch 2.0.0 (C++\CPU cxx11 ABI) package as follows `$ wget https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-2.0.0%2Bcpu.zip` -`$ uzip libtorch-cxx11-abi-shared-with-deps-2.0.0%2Bcpu.zip` - unpack the source +`$ unzip libtorch-cxx11-abi-shared-with-deps-2.0.0%2Bcpu.zip` - unpack the source #### 2 Create IPEX .so files for triton [Visit](https://intel.github.io/intel-extension-for-pytorch/latest/tutorials/installation.html#install-via-source-compilation) and copy the link for your corresponding cxx11 ABI PyTorch version (2.0.0) - From b662a574f5245a6033586b35c2c27d6b88d585b0 Mon Sep 17 00:00:00 2001 From: Swanand Mhalagi Date: Wed, 7 Jun 2023 10:17:55 -0700 Subject: [PATCH 21/30] Update README.md Added one story --- IPEX_OOB_with_triton/README.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/IPEX_OOB_with_triton/README.md b/IPEX_OOB_with_triton/README.md index e6eb3ce..79a97c5 100644 --- a/IPEX_OOB_with_triton/README.md +++ b/IPEX_OOB_with_triton/README.md @@ -29,14 +29,17 @@ We will download the LibTorch 2.0.0 (C++\CPU cxx11 ABI) package as follows `$ bash libintel-ext-pt-cxx11-abi-2.0.0%2Bcpu.run install libtorch/` - this will create libintel-ext-pt-cpu.so at libtorch/lib -#### 3 Create a docker container and copy files +#### 3 Copy the IPEX model at desired directory +Place the ipex.optimize() saved model at the /model_repository + +#### 4 Create a docker container and copy files `$ docker run -it -p8000:8000 -p8001:8001 -p8002:8002 --name ipex_triton -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:23.05-py3` `$ docker cp libtorch/lib/libintel-ext-pt-cpu.so ipex_triton:/opt/tritonserver/backends/pytorch/` - on a separate terminal `$ cd backends/pytorch/ ; LD_PRELOAD="$(pwd)/libintel-ext-pt-cpu.so" tritonserver --model-repository=/models` - on the container -#### 4 Run inference +#### 5 Run inference `$ python3 client_imagenet.py --dataset /home/ubuntu/ImageNet/imagenet_images ` - sends requests to Triton Server Host for DenseNet model. This file uses ImagesNet images for inference. ## Additional info From 514d7bffc00c96311e0e61a092ddfbb7312c7fe1 Mon Sep 17 00:00:00 2001 From: Swanand Mhalagi Date: Wed, 7 Jun 2023 17:09:02 -0700 Subject: [PATCH 22/30] Update README.md --- IPEX_OOB_with_triton/README.md | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/IPEX_OOB_with_triton/README.md b/IPEX_OOB_with_triton/README.md index 79a97c5..5db5984 100644 --- a/IPEX_OOB_with_triton/README.md +++ b/IPEX_OOB_with_triton/README.md @@ -1,4 +1,4 @@ -## Serving DenseNet models with IPEX® (w/o python backend) on Triton Server +## Serving DenseNet models with IPEX® (w/ PyTorch backend) on Triton Server ## Description This readme provides a methodology to run Intel® Extension for PyTorch (IPEX) optimized model without writing python backend (model.py) script for triton server. @@ -8,9 +8,6 @@ This readme provides a methodology to run Intel® Extension for PyTorch (IPEX) o - Sample images from ImageNet dataset. - Place IPEX optimized model at IPEX_OOB_with_triton/model_repository/densenet/1/ -## Supported models -- DenseNet121 - PyTorch+IPEX [DenseNet121](https://pytorch.org/hub/pytorch_vision_densenet/ "DenseNet121") - ### Execution on localhost #### 1 Download the LibTorch .zip file From 269bff6b7b74bdc201a69f8667d091a1221bdb88 Mon Sep 17 00:00:00 2001 From: Swanand Mhalagi Date: Wed, 7 Jun 2023 17:18:25 -0700 Subject: [PATCH 23/30] Update README.md --- IPEX_OOB_with_triton/README.md | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/IPEX_OOB_with_triton/README.md b/IPEX_OOB_with_triton/README.md index 5db5984..68fcb4a 100644 --- a/IPEX_OOB_with_triton/README.md +++ b/IPEX_OOB_with_triton/README.md @@ -1,7 +1,7 @@ -## Serving DenseNet models with IPEX® (w/ PyTorch backend) on Triton Server +## Serving models with IPEX® and PyTorch backend on Triton Server ## Description -This readme provides a methodology to run Intel® Extension for PyTorch (IPEX) optimized model without writing python backend (model.py) script for triton server. +This readme provides a methodology to run Intel® Extension for PyTorch (IPEX) optimized model for triton server. ## Preparation - Docker installed on host instance. @@ -10,6 +10,17 @@ This readme provides a methodology to run Intel® Extension for PyTorch (IPEX) o ### Execution on localhost +#### 1 Copy the IPEX model at desired directory +Place the ipex optimized model at the /model_repository + +#### 2 Run docker file + +`$ docker build -t tritonserver_custom -f Dockerfile.ipex .` + +#### 5 Run inference +`$ python3 client_imagenet.py --dataset /home/ubuntu/ImageNet/imagenet_images ` - sends requests to Triton Server Host for DenseNet model. This file uses ImagesNet images for inference. + + #### 1 Download the LibTorch .zip file This example uses triton container 23.05 which uses PyTorch version 2.0.0. [Here](https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html#framework-matrix-2023) is the list of triton containers and their corresponding built-in framework versions. @@ -27,7 +38,7 @@ We will download the LibTorch 2.0.0 (C++\CPU cxx11 ABI) package as follows `$ bash libintel-ext-pt-cxx11-abi-2.0.0%2Bcpu.run install libtorch/` - this will create libintel-ext-pt-cpu.so at libtorch/lib #### 3 Copy the IPEX model at desired directory -Place the ipex.optimize() saved model at the /model_repository +Place the ipex optimized model at the /model_repository #### 4 Create a docker container and copy files `$ docker run -it -p8000:8000 -p8001:8001 -p8002:8002 --name ipex_triton -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:23.05-py3` @@ -36,8 +47,7 @@ Place the ipex.optimize() saved model at the /model_repository `$ cd backends/pytorch/ ; LD_PRELOAD="$(pwd)/libintel-ext-pt-cpu.so" tritonserver --model-repository=/models` - on the container -#### 5 Run inference -`$ python3 client_imagenet.py --dataset /home/ubuntu/ImageNet/imagenet_images ` - sends requests to Triton Server Host for DenseNet model. This file uses ImagesNet images for inference. + ## Additional info Downloading and loading models take some time, so please wait until you run client_imagenet.py. From 3b109541755c714877874c113a42d6cbf1ae6ece Mon Sep 17 00:00:00 2001 From: Swanand Mhalagi Date: Wed, 7 Jun 2023 17:19:13 -0700 Subject: [PATCH 24/30] Dockerfile for IPEX --- IPEX_OOB_with_triton/Dockerfile | 34 +++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 IPEX_OOB_with_triton/Dockerfile diff --git a/IPEX_OOB_with_triton/Dockerfile b/IPEX_OOB_with_triton/Dockerfile new file mode 100644 index 0000000..478d386 --- /dev/null +++ b/IPEX_OOB_with_triton/Dockerfile @@ -0,0 +1,34 @@ +# Copyright (c) 2022 Intel Corporation +# SPDX-License-Identifier: Apache 2.0 + +FROM nvcr.io/nvidia/tritonserver:23.05-py3 + +# this installs utils such as numactl and libjemalloc +RUN sed -i '50d' /etc/apt/sources.list && \ + apt-get update && \ + apt-get install --no-install-recommends -y numactl \ + libjemalloc-dev && \ + apt-get clean + +# DOWNLOAD IPEX +# The versions of the IPEX C++ library can be found here: +# https://intel.github.io/intel-extension-for-pytorch/cpu/latest/tutorials/installation.html#install-c-sdk +# The version of IPEX needs to align with the version of PyTorch on +# the tritonserver Docker image that you're using. For example, +# the Docker image nvcr.io/nvidia/tritonserver:23.05-py3 comes with PyTorch 2.0.0, +# as found here: +# https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html#framework-matrix-2023 +RUN BASEDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" && \ + curl https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-2.0.0%2Bcpu.zip -o ${BASEDIR}/libtorch.zip && \ + unzip -o libtorch.zip -d ${BASEDIR} && \ + curl https://intel-extension-for-pytorch.s3.amazonaws.com/libipex/cpu/libintel-ext-pt-cxx11-abi-2.0.0%2Bcpu.run -o ${BASEDIR}/ipex.run && \ + bash ${BASEDIR}/ipex.run install ${BASEDIR}/libtorch && \ + cp ${BASEDIR}/libtorch/lib/libintel-ext-pt-cpu.so /opt/tritonserver/backends/pytorch/ +# When in the Docker container, you can now run tritonserver like this: +# LD_PRELOAD="/opt/tritonserver/backends/pytorch/libintel-ext-pt-cpu.so ${LD_PRELOAD}" tritonserver --model-repository=/models + +# you can append ", dirty_decay_ms:9000000000,muzzy_decay_ms:9000000000" to malloc conf for optimal performance but these can sometimes cause OOM crash +ENV MALLOC_CONF="oversize_threshold:1,background_thread:true,metadata_thp:auto" +ENV LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libjemalloc.so /opt/tritonserver/backends/pytorch/libintel-ext-pt-cpu.so ${LD_PRELOAD}" +ENV DNNL_PRIMITIVE_CACHE_CAPACITY=1024 +ENV DNNL_MAX_CPU_ISA="AVX512_CORE_AMX" From 9c7b854de87d8bcfc0288474a1ed475cb29f7c40 Mon Sep 17 00:00:00 2001 From: Swanand Mhalagi Date: Wed, 7 Jun 2023 17:31:03 -0700 Subject: [PATCH 25/30] Update Dockerfile --- IPEX_OOB_with_triton/Dockerfile | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/IPEX_OOB_with_triton/Dockerfile b/IPEX_OOB_with_triton/Dockerfile index 478d386..9f58b78 100644 --- a/IPEX_OOB_with_triton/Dockerfile +++ b/IPEX_OOB_with_triton/Dockerfile @@ -10,14 +10,26 @@ RUN sed -i '50d' /etc/apt/sources.list && \ libjemalloc-dev && \ apt-get clean -# DOWNLOAD IPEX -# The versions of the IPEX C++ library can be found here: -# https://intel.github.io/intel-extension-for-pytorch/cpu/latest/tutorials/installation.html#install-c-sdk +# Step 1 Download the LibTorch .zip file +# This example uses triton container 23.05 which uses PyTorch version 2.0.0. +# List of Triton server and corresponding FWs https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html#framework-matrix-2023 # The version of IPEX needs to align with the version of PyTorch on # the tritonserver Docker image that you're using. For example, # the Docker image nvcr.io/nvidia/tritonserver:23.05-py3 comes with PyTorch 2.0.0, -# as found here: -# https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html#framework-matrix-2023 +# wget https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-2.0.0%2Bcpu.zip +# unzip libtorch-cxx11-abi-shared-with-deps-2.0.0%2Bcpu.zip + +# Step 2 DOWNLOAD IPEX +# The versions of the IPEX C++ library can be found here: +# https://intel.github.io/intel-extension-for-pytorch/cpu/latest/tutorials/installation.html#install-c-sdk +# wget https://intel-extension-for-pytorch.s3.amazonaws.com/libipex/cpu/libintel-ext-pt-cxx11-abi-2.0.0%2Bcpu.run +# bash libintel-ext-pt-cxx11-abi-2.0.0%2Bcpu.run install libtorch/ + +# step 3 Create a docker container and copy files +# docker run -it -p8000:8000 -p8001:8001 -p8002:8002 --name ipex_triton -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:23.05-py3` +# docker cp libtorch/lib/libintel-ext-pt-cpu.so ipex_triton:/opt/tritonserver/backends/pytorch/` - on a separate terminal +# cd backends/pytorch/ ; LD_PRELOAD="$(pwd)/libintel-ext-pt-cpu.so" tritonserver --model-repository=/models` - on the container + RUN BASEDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" && \ curl https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-2.0.0%2Bcpu.zip -o ${BASEDIR}/libtorch.zip && \ unzip -o libtorch.zip -d ${BASEDIR} && \ From bf53d0da0be51bc146d1f44e05795bac021434e0 Mon Sep 17 00:00:00 2001 From: Swanand Mhalagi Date: Wed, 7 Jun 2023 17:32:16 -0700 Subject: [PATCH 26/30] Update README.md --- IPEX_OOB_with_triton/README.md | 34 ++++------------------------------ 1 file changed, 4 insertions(+), 30 deletions(-) diff --git a/IPEX_OOB_with_triton/README.md b/IPEX_OOB_with_triton/README.md index 68fcb4a..e0f994c 100644 --- a/IPEX_OOB_with_triton/README.md +++ b/IPEX_OOB_with_triton/README.md @@ -11,42 +11,16 @@ This readme provides a methodology to run Intel® Extension for PyTorch (IPEX) o ### Execution on localhost #### 1 Copy the IPEX model at desired directory + Place the ipex optimized model at the /model_repository #### 2 Run docker file -`$ docker build -t tritonserver_custom -f Dockerfile.ipex .` - -#### 5 Run inference -`$ python3 client_imagenet.py --dataset /home/ubuntu/ImageNet/imagenet_images ` - sends requests to Triton Server Host for DenseNet model. This file uses ImagesNet images for inference. - - -#### 1 Download the LibTorch .zip file -This example uses triton container 23.05 which uses PyTorch version 2.0.0. [Here](https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html#framework-matrix-2023) is the list of triton containers and their corresponding built-in framework versions. - -We will download the LibTorch 2.0.0 (C++\CPU cxx11 ABI) package as follows - -`$ wget https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-2.0.0%2Bcpu.zip` - -`$ unzip libtorch-cxx11-abi-shared-with-deps-2.0.0%2Bcpu.zip` - unpack the source - -#### 2 Create IPEX .so files for triton -[Visit](https://intel.github.io/intel-extension-for-pytorch/latest/tutorials/installation.html#install-via-source-compilation) and copy the link for your corresponding cxx11 ABI PyTorch version (2.0.0) - - -`$ wget https://intel-extension-for-pytorch.s3.amazonaws.com/libipex/cpu/libintel-ext-pt-cxx11-abi-2.0.0%2Bcpu.run` - -`$ bash libintel-ext-pt-cxx11-abi-2.0.0%2Bcpu.run install libtorch/` - this will create libintel-ext-pt-cpu.so at libtorch/lib - -#### 3 Copy the IPEX model at desired directory -Place the ipex optimized model at the /model_repository - -#### 4 Create a docker container and copy files -`$ docker run -it -p8000:8000 -p8001:8001 -p8002:8002 --name ipex_triton -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:23.05-py3` +`$ docker build -t tritonserver_custom -f Dockerfile .` -`$ docker cp libtorch/lib/libintel-ext-pt-cpu.so ipex_triton:/opt/tritonserver/backends/pytorch/` - on a separate terminal - -`$ cd backends/pytorch/ ; LD_PRELOAD="$(pwd)/libintel-ext-pt-cpu.so" tritonserver --model-repository=/models` - on the container +#### 3 Run inference +`$ python3 client_imagenet.py --dataset /home/ubuntu/ImageNet/imagenet_images ` - sends requests to Triton Server Host for sample model. This file uses ImagesNet images for inference. ## Additional info From cc1d67451d9ca82e48242be86fcbd4d5f3be30f8 Mon Sep 17 00:00:00 2001 From: Swanand Mhalagi Date: Wed, 7 Jun 2023 17:33:04 -0700 Subject: [PATCH 27/30] Update README.md --- IPEX_OOB_with_triton/README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/IPEX_OOB_with_triton/README.md b/IPEX_OOB_with_triton/README.md index e0f994c..c6d2038 100644 --- a/IPEX_OOB_with_triton/README.md +++ b/IPEX_OOB_with_triton/README.md @@ -6,7 +6,6 @@ This readme provides a methodology to run Intel® Extension for PyTorch (IPEX) o ## Preparation - Docker installed on host instance. - Sample images from ImageNet dataset. -- Place IPEX optimized model at IPEX_OOB_with_triton/model_repository/densenet/1/ ### Execution on localhost From 0a14d11f4c1fabdc50d937ed582728ea88e054f7 Mon Sep 17 00:00:00 2001 From: Swanand Mhalagi Date: Wed, 7 Jun 2023 22:18:04 -0700 Subject: [PATCH 28/30] Update README.md --- IPEX_OOB_with_triton/README.md | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/IPEX_OOB_with_triton/README.md b/IPEX_OOB_with_triton/README.md index c6d2038..0eea1f1 100644 --- a/IPEX_OOB_with_triton/README.md +++ b/IPEX_OOB_with_triton/README.md @@ -1,7 +1,7 @@ ## Serving models with IPEX® and PyTorch backend on Triton Server ## Description -This readme provides a methodology to run Intel® Extension for PyTorch (IPEX) optimized model for triton server. +This readme provides a methodology to run Intel® Extension for PyTorch (IPEX) optimized model on triton server. ## Preparation - Docker installed on host instance. @@ -13,14 +13,15 @@ This readme provides a methodology to run Intel® Extension for PyTorch (IPEX) o Place the ipex optimized model at the /model_repository -#### 2 Run docker file +#### 2 Create and Run Triton container -`$ docker build -t tritonserver_custom -f Dockerfile .` +`$ docker build -t tritonserver_ipex -f Dockerfile .` -#### 3 Run inference +`$ docker run -it --rm -p8000:8000 -p8001:8001 -p8002:8002 -v$(pwd)/model_repository:/models --name ai_inference_host tritonserver_ipex:latest tritonserver --model-repository=/models` -`$ python3 client_imagenet.py --dataset /home/ubuntu/ImageNet/imagenet_images ` - sends requests to Triton Server Host for sample model. This file uses ImagesNet images for inference. +#### 3 Run inference with a client script +`$ python3 client_imagenet.py --dataset /home/ubuntu/ImageNet/imagenet_images ` - sends requests to Triton Server Host for sample model. This file uses ImagesNet images for inference. ## Additional info Downloading and loading models take some time, so please wait until you run client_imagenet.py. From b7a3e781560fff67ea3272a59b023de563a935fd Mon Sep 17 00:00:00 2001 From: Swanand Mhalagi Date: Wed, 7 Jun 2023 22:21:50 -0700 Subject: [PATCH 29/30] Update Dockerfile Added procedure comments --- IPEX_OOB_with_triton/Dockerfile | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/IPEX_OOB_with_triton/Dockerfile b/IPEX_OOB_with_triton/Dockerfile index 9f58b78..e53632d 100644 --- a/IPEX_OOB_with_triton/Dockerfile +++ b/IPEX_OOB_with_triton/Dockerfile @@ -19,16 +19,16 @@ RUN sed -i '50d' /etc/apt/sources.list && \ # wget https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-2.0.0%2Bcpu.zip # unzip libtorch-cxx11-abi-shared-with-deps-2.0.0%2Bcpu.zip -# Step 2 DOWNLOAD IPEX +# Step 2 Download IPEX binary # The versions of the IPEX C++ library can be found here: -# https://intel.github.io/intel-extension-for-pytorch/cpu/latest/tutorials/installation.html#install-c-sdk +# https://intel.github.io/intel-extension-for-pytorch/cpu/latest/tutorials/installation.html#install-c-sdk # wget https://intel-extension-for-pytorch.s3.amazonaws.com/libipex/cpu/libintel-ext-pt-cxx11-abi-2.0.0%2Bcpu.run # bash libintel-ext-pt-cxx11-abi-2.0.0%2Bcpu.run install libtorch/ -# step 3 Create a docker container and copy files -# docker run -it -p8000:8000 -p8001:8001 -p8002:8002 --name ipex_triton -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:23.05-py3` -# docker cp libtorch/lib/libintel-ext-pt-cpu.so ipex_triton:/opt/tritonserver/backends/pytorch/` - on a separate terminal -# cd backends/pytorch/ ; LD_PRELOAD="$(pwd)/libintel-ext-pt-cpu.so" tritonserver --model-repository=/models` - on the container +# Step 3 Copy .so files to container image +# docker run -it -p8000:8000 -p8001:8001 -p8002:8002 --name ipex_triton -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:23.05-py3` +# docker cp libtorch/lib/libintel-ext-pt-cpu.so ipex_triton:/opt/tritonserver/backends/pytorch/` +# cd backends/pytorch/ ; LD_PRELOAD="$(pwd)/libintel-ext-pt-cpu.so" tritonserver --model-repository=/models` RUN BASEDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" && \ curl https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-2.0.0%2Bcpu.zip -o ${BASEDIR}/libtorch.zip && \ @@ -36,8 +36,9 @@ RUN BASEDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" && \ curl https://intel-extension-for-pytorch.s3.amazonaws.com/libipex/cpu/libintel-ext-pt-cxx11-abi-2.0.0%2Bcpu.run -o ${BASEDIR}/ipex.run && \ bash ${BASEDIR}/ipex.run install ${BASEDIR}/libtorch && \ cp ${BASEDIR}/libtorch/lib/libintel-ext-pt-cpu.so /opt/tritonserver/backends/pytorch/ -# When in the Docker container, you can now run tritonserver like this: -# LD_PRELOAD="/opt/tritonserver/backends/pytorch/libintel-ext-pt-cpu.so ${LD_PRELOAD}" tritonserver --model-repository=/models + +# Step 4 When in the Docker container, you can now run tritonserver like this: +# LD_PRELOAD="/opt/tritonserver/backends/pytorch/libintel-ext-pt-cpu.so ${LD_PRELOAD}" tritonserver --model-repository=/models # you can append ", dirty_decay_ms:9000000000,muzzy_decay_ms:9000000000" to malloc conf for optimal performance but these can sometimes cause OOM crash ENV MALLOC_CONF="oversize_threshold:1,background_thread:true,metadata_thp:auto" From 7d8052eb804d2f27866d15fc21334a4e1cdd8fa6 Mon Sep 17 00:00:00 2001 From: Swanand Mhalagi Date: Tue, 20 Jun 2023 11:34:56 -0700 Subject: [PATCH 30/30] Update Dockerfile Removed env variables --- IPEX_OOB_with_triton/Dockerfile | 2 -- 1 file changed, 2 deletions(-) diff --git a/IPEX_OOB_with_triton/Dockerfile b/IPEX_OOB_with_triton/Dockerfile index e53632d..65d9cda 100644 --- a/IPEX_OOB_with_triton/Dockerfile +++ b/IPEX_OOB_with_triton/Dockerfile @@ -43,5 +43,3 @@ RUN BASEDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" && \ # you can append ", dirty_decay_ms:9000000000,muzzy_decay_ms:9000000000" to malloc conf for optimal performance but these can sometimes cause OOM crash ENV MALLOC_CONF="oversize_threshold:1,background_thread:true,metadata_thp:auto" ENV LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libjemalloc.so /opt/tritonserver/backends/pytorch/libintel-ext-pt-cpu.so ${LD_PRELOAD}" -ENV DNNL_PRIMITIVE_CACHE_CAPACITY=1024 -ENV DNNL_MAX_CPU_ISA="AVX512_CORE_AMX"