diff --git a/IPEX_OOB_with_triton/Dockerfile b/IPEX_OOB_with_triton/Dockerfile new file mode 100644 index 0000000..65d9cda --- /dev/null +++ b/IPEX_OOB_with_triton/Dockerfile @@ -0,0 +1,45 @@ +# Copyright (c) 2022 Intel Corporation +# SPDX-License-Identifier: Apache 2.0 + +FROM nvcr.io/nvidia/tritonserver:23.05-py3 + +# this installs utils such as numactl and libjemalloc +RUN sed -i '50d' /etc/apt/sources.list && \ + apt-get update && \ + apt-get install --no-install-recommends -y numactl \ + libjemalloc-dev && \ + apt-get clean + +# Step 1 Download the LibTorch .zip file +# This example uses triton container 23.05 which uses PyTorch version 2.0.0. +# List of Triton server and corresponding FWs https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html#framework-matrix-2023 +# The version of IPEX needs to align with the version of PyTorch on +# the tritonserver Docker image that you're using. For example, +# the Docker image nvcr.io/nvidia/tritonserver:23.05-py3 comes with PyTorch 2.0.0, +# wget https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-2.0.0%2Bcpu.zip +# unzip libtorch-cxx11-abi-shared-with-deps-2.0.0%2Bcpu.zip + +# Step 2 Download IPEX binary +# The versions of the IPEX C++ library can be found here: +# https://intel.github.io/intel-extension-for-pytorch/cpu/latest/tutorials/installation.html#install-c-sdk +# wget https://intel-extension-for-pytorch.s3.amazonaws.com/libipex/cpu/libintel-ext-pt-cxx11-abi-2.0.0%2Bcpu.run +# bash libintel-ext-pt-cxx11-abi-2.0.0%2Bcpu.run install libtorch/ + +# Step 3 Copy .so files to container image +# docker run -it -p8000:8000 -p8001:8001 -p8002:8002 --name ipex_triton -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:23.05-py3` +# docker cp libtorch/lib/libintel-ext-pt-cpu.so ipex_triton:/opt/tritonserver/backends/pytorch/` +# cd backends/pytorch/ ; LD_PRELOAD="$(pwd)/libintel-ext-pt-cpu.so" tritonserver --model-repository=/models` + +RUN BASEDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" && \ + curl https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-2.0.0%2Bcpu.zip -o ${BASEDIR}/libtorch.zip && \ + unzip -o libtorch.zip -d ${BASEDIR} && \ + curl https://intel-extension-for-pytorch.s3.amazonaws.com/libipex/cpu/libintel-ext-pt-cxx11-abi-2.0.0%2Bcpu.run -o ${BASEDIR}/ipex.run && \ + bash ${BASEDIR}/ipex.run install ${BASEDIR}/libtorch && \ + cp ${BASEDIR}/libtorch/lib/libintel-ext-pt-cpu.so /opt/tritonserver/backends/pytorch/ + +# Step 4 When in the Docker container, you can now run tritonserver like this: +# LD_PRELOAD="/opt/tritonserver/backends/pytorch/libintel-ext-pt-cpu.so ${LD_PRELOAD}" tritonserver --model-repository=/models + +# you can append ", dirty_decay_ms:9000000000,muzzy_decay_ms:9000000000" to malloc conf for optimal performance but these can sometimes cause OOM crash +ENV MALLOC_CONF="oversize_threshold:1,background_thread:true,metadata_thp:auto" +ENV LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libjemalloc.so /opt/tritonserver/backends/pytorch/libintel-ext-pt-cpu.so ${LD_PRELOAD}" diff --git a/IPEX_OOB_with_triton/README.md b/IPEX_OOB_with_triton/README.md new file mode 100644 index 0000000..0eea1f1 --- /dev/null +++ b/IPEX_OOB_with_triton/README.md @@ -0,0 +1,44 @@ +## Serving models with IPEX® and PyTorch backend on Triton Server + +## Description +This readme provides a methodology to run Intel® Extension for PyTorch (IPEX) optimized model on triton server. + +## Preparation +- Docker installed on host instance. +- Sample images from ImageNet dataset. + +### Execution on localhost + +#### 1 Copy the IPEX model at desired directory + +Place the ipex optimized model at the /model_repository + +#### 2 Create and Run Triton container + +`$ docker build -t tritonserver_ipex -f Dockerfile .` + +`$ docker run -it --rm -p8000:8000 -p8001:8001 -p8002:8002 -v$(pwd)/model_repository:/models --name ai_inference_host tritonserver_ipex:latest tritonserver --model-repository=/models` + +#### 3 Run inference with a client script + +`$ python3 client_imagenet.py --dataset /home/ubuntu/ImageNet/imagenet_images ` - sends requests to Triton Server Host for sample model. This file uses ImagesNet images for inference. + +## Additional info +Downloading and loading models take some time, so please wait until you run client_imagenet.py. +Model loading progress can be tracked by following Triton Server Host docker container logs. + +## Support +Please submit your questions, feature requests, and bug reports on the [GitHub issues page](https://github.com/intel/intel-ai-inference-samples/issues). + +## License +AI Inference samples project is licensed under Apache License Version 2.0. Refer to the [LICENSE](../LICENSE) file for the full license text and copyright notice. + +This third party software, even if included with the distribution of the Intel software, may be governed by separate license terms, including without limitation, third party license terms, other Intel software license terms, and open source software license terms. These separate license terms govern your use of the third party programs as set forth in the [THIRD-PARTY-PROGRAMS](./THIRD-PARTY-PROGRAMS) file. + +## Trademark Information +Intel, the Intel logo, OpenVINO, the OpenVINO logo and Intel Xeon are trademarks of Intel Corporation or its subsidiaries. +* Other names and brands may be claimed as the property of others. + +©Intel Corporation + + diff --git a/IPEX_OOB_with_triton/THIRD-PARTY-PROGRAMS b/IPEX_OOB_with_triton/THIRD-PARTY-PROGRAMS new file mode 100644 index 0000000..dc1ff4f --- /dev/null +++ b/IPEX_OOB_with_triton/THIRD-PARTY-PROGRAMS @@ -0,0 +1,28 @@ +1. model.py (triton/model_utils/bert_common/1/model.py) +2. model.py (triton/model_utils/bert_common_ov/1/model.py) + +Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of NVIDIA CORPORATION nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/IPEX_OOB_with_triton/client_imagenet.py b/IPEX_OOB_with_triton/client_imagenet.py new file mode 100644 index 0000000..9c9f6f5 --- /dev/null +++ b/IPEX_OOB_with_triton/client_imagenet.py @@ -0,0 +1,63 @@ +import numpy as np +from torchvision import transforms +from PIL import Image +import tritonclient.http as httpclient +from tritonclient.utils import triton_to_np_dtype +import os +import time + +# preprocessing function +def image_preprocess(img_path="img2.jpg"): + img = Image.open(img_path) + preprocess = transforms.Compose([ + transforms.Resize(224), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ]) + return preprocess(img).numpy() + +directory = '/home/ubuntu/ImageNet/imagenet_images' +filelist = [] + +for root, dirs, files in os.walk(directory): + for file in files: + #append the file name to the list + filelist.append(os.path.join(root,file)) + +for x in range(0, 3): + iteration = 0 + elapsed_time = 0 + for name in filelist: + try: + elapsed = 0 + start_epoch = time.time() + print(name) + transformed_img = image_preprocess(name) + + # Setting up client + client = httpclient.InferenceServerClient(url="localhost:8000", concurrency=2) + + # specify the names of the input and output layer(s) of our model + inputs = httpclient.InferInput("input__0", transformed_img.shape, datatype="FP32") + inputs.set_data_from_numpy(transformed_img, binary_data=True) + + outputs = httpclient.InferRequestedOutput("OUTPUT__0", binary_data=True, class_count=1000) + + # Querying the server + results = client.infer(model_name="densenet", inputs=[inputs], outputs=[outputs]) + predictions = results.as_numpy('OUTPUT__0') + print(predictions[:5]) + end_epoch = time.time() + + #time.sleep(5) + iteration = iteration + 1 + elapsed = end_epoch - start_epoch + elapsed_time = elapsed_time + elapsed + print("Per Sample Inference Latency in sec", elapsed) + except Exception: + pass + +print("Total Iteration", iteration) +print("Total elapsed time", elapsed_time) +print("Avg elapsed time per sample in sec", elapsed_time/iteration) diff --git a/IPEX_OOB_with_triton/model_repository/densenet/1/README.md b/IPEX_OOB_with_triton/model_repository/densenet/1/README.md new file mode 100644 index 0000000..3a9f087 --- /dev/null +++ b/IPEX_OOB_with_triton/model_repository/densenet/1/README.md @@ -0,0 +1 @@ +### Place model.pt file in this directory diff --git a/IPEX_OOB_with_triton/model_repository/densenet/config.pbtxt b/IPEX_OOB_with_triton/model_repository/densenet/config.pbtxt new file mode 100644 index 0000000..cbfa98e --- /dev/null +++ b/IPEX_OOB_with_triton/model_repository/densenet/config.pbtxt @@ -0,0 +1,19 @@ +name: "densenet" +platform: "pytorch_libtorch" +max_batch_size: 0 +input[ +{ + name: "input__0" + data_type: TYPE_FP32 + dims: [ 3, 224, 224 ] + reshape { shape: [ 1, 3, 224, 224 ] } +} +] +output:[ +{ + name: "OUTPUT__0" + data_type: TYPE_FP32 + dims: [ 1, 1000 ,1, 1] + reshape { shape: [ 1, 1000 ] } +} +]