From 5121da4f699ca772c81619c5384a0f0a5faf5cbb Mon Sep 17 00:00:00 2001 From: barfy007jb1 Date: Thu, 20 Feb 2025 09:37:47 +0000 Subject: [PATCH 1/2] Adding nvidias HPL benchmark --- .../nvidia_hpc_bench/Dockerfile.nvidia_hpl | 11 +++++++++ .../base_images/Dockerfile.nvidia_hpc_base | 24 +++++++++++++++++++ dockerfiles/build_images.sh | 15 ++++++++++-- 3 files changed, 48 insertions(+), 2 deletions(-) create mode 100644 dockerfiles/app_images/nvidia_hpc_bench/Dockerfile.nvidia_hpl create mode 100644 dockerfiles/base_images/Dockerfile.nvidia_hpc_base diff --git a/dockerfiles/app_images/nvidia_hpc_bench/Dockerfile.nvidia_hpl b/dockerfiles/app_images/nvidia_hpc_bench/Dockerfile.nvidia_hpl new file mode 100644 index 0000000..3fa8ece --- /dev/null +++ b/dockerfiles/app_images/nvidia_hpc_bench/Dockerfile.nvidia_hpl @@ -0,0 +1,11 @@ +#Use an ARG to spesify the base image +ARG BASE_IMAGE=harbor.stfc.ac.uk/stfc-cloud-staging/iris-bench/nvidia_hpc_base:latest + +# USe the base image +FROM ${BASE_IMAGE} + +# Copy built HPL from the build image +WORKDIR /root + +# Run HPL automatically on container start +ENTRYPOINT ["/bin/bash", "-c", "./launch_hpl_experiment.py -c 1 -s dgxa100_80GG --maxnodes 6 --cruntime enroot"] diff --git a/dockerfiles/base_images/Dockerfile.nvidia_hpc_base b/dockerfiles/base_images/Dockerfile.nvidia_hpc_base new file mode 100644 index 0000000..40c95d4 --- /dev/null +++ b/dockerfiles/base_images/Dockerfile.nvidia_hpc_base @@ -0,0 +1,24 @@ +# Use the official Ubuntu base image +FROM ubuntu:20.04 + +# Set the working directory +WORKDIR /root + +# Set non-interactive frontend for apt-get +ENV DEBIAN_FRONTEND=noninteractive + +# Install dependencies +RUN apt-get update && \ + apt-get install -y wget python3-pip git + +# Download Nvidia NGC HPC Benchmarks +RUN git clone https://github.com/NVIDIA/deepops.git && \ + cd deepops/workloads/bit/hpl + +RUN apt install unzip + +# Install NGC CLI +RUN wget --content-disposition https://api.ngc.nvidia.com/v2/resources/nvidia/ngc-apps/ngc_cli/versions/3.60.1/files/ngccli_linux.zip -O ngccli_linux.zip && unzip ngccli_linux.zip && \ + chmod u+x ngc-cli/ngc && \ + echo "export PATH=\"\$PATH:$(pwd)/ngc-cli\"" >> ~/.bash_profile && source ~/.bash_profile && \ + echo "auth.csv" | ngc config set --format_type csv diff --git a/dockerfiles/build_images.sh b/dockerfiles/build_images.sh index 9fd4f01..75a3be0 100755 --- a/dockerfiles/build_images.sh +++ b/dockerfiles/build_images.sh @@ -22,11 +22,12 @@ BASE_IMAGES_DIR="base_images" APP_IMAGES_DIR="app_images" # List of base images to build -BASE_IMAGES=("mantid_base" "sciml_base") +BASE_IMAGES=("mantid_base" "sciml_base" "nvidia_hpc_base") # List of app images to build SCIML_IMAGES=("mnist_tf_keras" "stemdl_classification" "synthetic_regression") MANTID_IMAGES=("mantid_run_1" "mantid_run_4" "mantid_run_5" "mantid_run_8") +NVIDIA_IMAGES=("nvidia_hpl") # Build base images first echo "Building base images..." @@ -55,6 +56,16 @@ for IMAGE in "${MANTID_IMAGES[@]}"; do docker build -f $DOCKERFILE -t ${IMAGE_TAG} --build-arg BASE_IMAGE=mantid_base:latest . done +# Build nvidia_hpc_bench images +echo "Building nvidia_hpc_bench images..." +for IMAGE in "${NVIDIA_IMAGES[@]}"; do + DOCKERFILE="${APP_IMAGES_DIR}/nvidia_hpc_bench/Dockerfile.${IMAGE}" + IMAGE_TAG="${IMAGE}:latest" + echo "Building app image: ${IMAGE_TAG}..." + docker build -f $DOCKERFILE -t ${IMAGE_TAG} --build-arg BASE_IMAGE=nvidia_hpc_base:latest . +done + + # Build dummy image echo "Building dummy image..." DOCKERFILE="${APP_IMAGES_DIR}/Dockerfile.dummy" @@ -62,4 +73,4 @@ IMAGE_TAG="dummy:latest" echo "Building app image: ${IMAGE_TAG}..." docker build -f $DOCKERFILE -t ${IMAGE_TAG} . -echo -e "Build process completed.\n" \ No newline at end of file +echo -e "Build process completed.\n" From 31d216d2e2baa575a49ca0dd38acf1b11e8867e7 Mon Sep 17 00:00:00 2001 From: barfy007jb1 Date: Thu, 20 Feb 2025 11:01:52 +0000 Subject: [PATCH 2/2] Adding example Auth file --- dockerfiles/base_images/Dockerfile.nvidia_hpc_base | 2 +- dockerfiles/base_images/auth.json | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) create mode 100644 dockerfiles/base_images/auth.json diff --git a/dockerfiles/base_images/Dockerfile.nvidia_hpc_base b/dockerfiles/base_images/Dockerfile.nvidia_hpc_base index 40c95d4..1314056 100644 --- a/dockerfiles/base_images/Dockerfile.nvidia_hpc_base +++ b/dockerfiles/base_images/Dockerfile.nvidia_hpc_base @@ -21,4 +21,4 @@ RUN apt install unzip RUN wget --content-disposition https://api.ngc.nvidia.com/v2/resources/nvidia/ngc-apps/ngc_cli/versions/3.60.1/files/ngccli_linux.zip -O ngccli_linux.zip && unzip ngccli_linux.zip && \ chmod u+x ngc-cli/ngc && \ echo "export PATH=\"\$PATH:$(pwd)/ngc-cli\"" >> ~/.bash_profile && source ~/.bash_profile && \ - echo "auth.csv" | ngc config set --format_type csv + ngc config set --format_type json diff --git a/dockerfiles/base_images/auth.json b/dockerfiles/base_images/auth.json new file mode 100644 index 0000000..6b3dabc --- /dev/null +++ b/dockerfiles/base_images/auth.json @@ -0,0 +1,13 @@ +{ + "key": "apikey", + "source": "user settings", + "value": "" +},{ + "key": "format_type", + "source": "global argument", + "value": "json" +},{ + "key": "org", + "source": "user settings", + "value": "" +}