iSPIRT · kapilvgit · Sep 6, 2025 · Jun 9, 2025 · Jun 9, 2025 · Jul 19, 2025
diff --git a/.github/workflows/ci-build.yml b/.github/workflows/ci-build.yml
@@ -43,18 +43,18 @@ jobs:
       - name: Install wheel
         run: pip install wheel 
 
-      - name: Build encrypted filesystem artifacta, contract ledger client & depa-training container
+      - name: Build encrypted filesystem artifacts, contract ledger client & depa-training container
         run:  ci/build.sh
 
       - name: Build container images
         run: cd ${{ github.workspace }}/scenarios/covid && ./ci/build.sh
 
       - name: Run pre-processing
-        run: cd ./scenarios/covid/deployment/docker && ./preprocess.sh
+        run: cd ./scenarios/covid/deployment/local && ./preprocess.sh
 
       - name: Run model saving
-        run: cd ./scenarios/covid/deployment/docker && ./save-model.sh
+        run: cd ./scenarios/covid/deployment/local && ./save-model.sh
 
       - name: Run training
-        run: cd ./scenarios/covid/deployment/docker && ./train.sh
+        run: cd ./scenarios/covid/deployment/local && ./train.sh
 
diff --git a/.github/workflows/ci-local.yml b/.github/workflows/ci-local.yml
@@ -42,10 +42,10 @@ jobs:
         run: cd ${{ github.workspace }}/ci && ./pull-containers.sh
 
       - name: Run pre-processing
-        run: cd ./scenarios/covid/deployment/docker && ./preprocess.sh
+        run: cd ./scenarios/covid/deployment/local && ./preprocess.sh
 
       - name: Run model saving
-        run: cd ./scenarios/covid/deployment/docker && ./save-model.sh
+        run: cd ./scenarios/covid/deployment/local && ./save-model.sh
 
       - name: Run training
-        run: cd ./scenarios/covid/deployment/docker && ./train.sh
+        run: cd ./scenarios/covid/deployment/local && ./train.sh
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -79,31 +79,34 @@ jobs:
         run: sudo usermod -aG docker $USER
 
       - name: Run pre-processing
-        run: cd ${{ github.workspace }}/scenarios/covid/deployment/docker && ./preprocess.sh
+        run: cd ${{ github.workspace }}/scenarios/covid/deployment/local && ./preprocess.sh
 
       - name: Run model saving
-        run: cd ${{ github.workspace }}/scenarios/covid/deployment/docker && ./save-model.sh
+        run: cd ${{ github.workspace }}/scenarios/covid/deployment/local && ./save-model.sh
 
       - name: Pull container images for generating policy
         run: cd ${{ github.workspace }}/ci && ./pull-containers.sh
 
+      - name: Consolidate pipeline configuration
+        run: cd ${{ github.workspace }}/scenarios/covid/ && ./config/consolidate_pipeline.sh
+
       - name: create storage and containers 
-        run: cd ${{ github.workspace }}/scenarios/covid/data && ./1-create-storage-containers.sh
+        run: cd ${{ github.workspace }}/scenarios/covid/deployment/azure && ./1-create-storage-containers.sh
 
       - name: create azure key vault
-        run: cd ${{ github.workspace }}/scenarios/covid/data && ./2-create-akv.sh 
+        run: cd ${{ github.workspace }}/scenarios/covid/deployment/azure && ./2-create-akv.sh 
 
       - name: Import data and model encryption keys with key release policies
-        run: cd ${{ github.workspace }}/scenarios/covid/data && ./3-import-keys.sh
+        run: cd ${{ github.workspace }}/scenarios/covid/deployment/azure && ./3-import-keys.sh
 
       - name: Encrypt data and models
-        run: cd ${{ github.workspace }}/scenarios/covid/data && ./4-encrypt-data.sh
+        run: cd ${{ github.workspace }}/scenarios/covid/deployment/azure && ./4-encrypt-data.sh
 
       - name: Upload data and model
-        run: cd ${{ github.workspace }}/scenarios/covid/data && ./5-upload-encrypted-data.sh
+        run: cd ${{ github.workspace }}/scenarios/covid/deployment/azure && ./5-upload-encrypted-data.sh
 
       - name: Run training
-        run: cd ${{ github.workspace }}/scenarios/covid/deployment/aci && ./deploy.sh -c ${{ github.event.inputs.contract }} -p ../../config/pipeline_config.json 
+        run: cd ${{ github.workspace }}/scenarios/covid/deployment/azure && ./deploy.sh -c ${{ github.event.inputs.contract }} -p ../../config/pipeline_config.json 
 
       - name: Dump training container logs
         run: sleep 200 && az container logs --name depa-training-covid --resource-group $AZURE_RESOURCE_GROUP --container-name depa-training
@@ -112,7 +115,7 @@ jobs:
         run: az container logs --name depa-training-covid --resource-group $AZURE_RESOURCE_GROUP --container-name encrypted-storage-sidecar
 
       - name: Download and decrypt model
-        run: cd ${{ github.workspace }}/scenarios/covid/data && ./6-download-decrypt-model.sh
+        run: cd ${{ github.workspace }}/scenarios/covid/deployment/azure && ./6-download-decrypt-model.sh
 
       - name: Clean up resource group and all resources 
         run: az group delete --yes --name $AZURE_RESOURCE_GROUP

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -34,7 +34,7 @@ jobs:
             context: ./scenarios/covid/src
             buildargs: |
           - dockerfile: ./scenarios/covid/ci/Dockerfile.modelsave
-            name: ccr-model-save
+            name: covid-model-save
             context: ./scenarios/covid/src
             buildargs: |
           - dockerfile: ./ci/Dockerfile.encfs

diff --git a/.gitignore b/.gitignore
@@ -1 +1,10 @@
 **/*.onnx
+**/*.pth
+**/*.pt
+**/*.img
+**/*.bin
+**/*.pem
+
+venv/
+
+**/__pycache__/
diff --git a/README.md b/README.md
@@ -6,28 +6,31 @@
 
 ## GitHub Codespaces
 
-The simplest way to setup a development environment is using [GitHub Codespaces](https://github.com/codespaces). The repository includes a [devcontainer.json](../../.devcontainer/devcontainer.json), which customizes your codespace to install all required dependencies. Please ensure you allocate at least 64GB disk space in your codespace. Also, run the following command in the codespace to update submodules.
+The simplest way to setup a development environment is using [GitHub Codespaces](https://github.com/codespaces). The repository includes a [devcontainer.json](.devcontainer/devcontainer.json), which customizes your codespace to install all required dependencies. Please ensure you allocate at least 8 vCPUs and 64GB disk space in your codespace. Also, run the following command in the codespace to update submodules.
 
 ```bash
 git submodule update --init --recursive
 ```
 
 ## Local Development Environment
 
-Alternatively, you can build and develop locally in a Linux environment (we have tested with Ubuntu 20.04 and 22.04), or Windows with WSL 2. Install the following dependencies. 
+Alternatively, you can build and develop locally in a Linux environment (we have tested with Ubuntu 20.04 and 22.04), or Windows with WSL 2. 
 
-- [docker](https://docs.docker.com/engine/install/ubuntu/) and docker-compose. After installing docker, add your user to the docker group using `sudo usermod -aG docker $USER`, and log back in to a shell. 
-- make (install using ```sudo apt-get install make```)
-- Python 3.6.9 and pip 
-- [Go](https://go.dev/doc/install). Follow the instructions to install Go. After installing, ensure that the PATH environment variable is set to include ```go``` runtime.
-- Python wheel package (install using ```pip install wheel```)
-
-Clone this repo as follows. 
+Clone this repo to your local machine / virtual machine as follows. 
 
 ```bash
 git clone --recursive http://github.com/iSPIRT/depa-training
+cd depa-training
 ```
 
+Install the below listed dependencies by running the [install-prerequisites.sh](./install-prerequisites.sh) script.
+
+```bash
+./install-prerequisites.sh
+```
+
+Note: You may need to restart your machine to ensure that the changes take effect.
+
 ## Build CCR containers
 
 To build your own CCR container images, use the following command from the root of the repository. 
@@ -44,16 +47,39 @@ This scripts build the following containers.
 Alternatively, you can use pre-built container images from the ispirt repository by setting the following environment variable. Docker hub has started throttling which may effect the upload/download time, especially when images are bigger size. So, It is advisable to use other container registries, we are using azure container registry as shown below
 ```bash
 export CONTAINER_REGISTRY=ispirt.azurecr.io
+./ci/pull-containers.sh
 ```
 
 # Scenarios
 
 This repository contains two samples that illustrate the kinds of scenarios DEPA for Training can support. 
 
-- [Training a differentially private COVID prediction model on private datasets](./scenarios/covid/README.md)
-- [Convolutional Neural Network training on MNIST dataset](./scenarios/mnist/README.md)
+Follow the links to build and deploy these scenarios. 
+
+| Scenario name | Scenario type | Task type | Privacy | No. of TDPs* | Data type (format) | Model type (format) | Join type (No. of datasets) | 
+|--------------|---------------|-----------------|--------------|-----------|------------|------------|------------|
+| [COVID-19](./scenarios/covid/README.md) | Training - Deep Learning | Binary Classification | Differentially Private | 3 | PII tabular data (CSV) | MLP (ONNX) | Horizontal (3)|
+| [BraTS](./scenarios/brats/README.md) | Training - Deep Learning | Image Segmentation | Differentially Private | 4 | MRI scans data (NIfTI/PNG) | UNet (Safetensors) | Vertical (4)|
+| [Credit Risk](./scenarios/credit-risk/README.md) | Training - Classical ML | Binary Classification | Differentially Private | 4 | PII tabular data (Parquet) | XGBoost (JSON) | Horizontal (4)|
+| [CIFAR-10](./scenarios/cifar10/README.md) | Training - Deep Learning | Multi-class Image Classification | NA | 1 | Non-PII image data (SafeTensors) | CNN (Safetensors) | NA (1)|
+| [MNIST](./scenarios/mnist/README.md) | Training - Deep Learning | Multi-class Image Classification | NA | 1 | Non-PII image data (HDF5) | CNN (ONNX) | NA (1)|
+
+_NA: Not Applicable_ <br>
+_DL: Deep Learning, ML: Classical Machine Learning_ <br>
+_*Training Data Providers (TDPs) involved in the scenario._
+
+## Build your own Scenarios
+
+A guide to build your own scenarios is coming soon. Stay tuned!
+
+Currently, DEPA for Training supports the following training frameworks, libraries and file formats (more will be included soon):
+
+- Training frameworks: PyTorch, Scikit-learn, XGBoost
+- Libraries: Opacus, PySpark, Pandas
+- File formats (for models and datasets): ONNX, Safetensors, Parquet, CSV, HDF5, PNG
+
+Note: Due to security reasons, we do not support Pickle based file formats such as .pkl, .pt/.pth, .npy/.npz, .joblib, etc.
 
-Follow these links to build and deploy these scenarios. 
 
 # Contributing
 

diff --git a/ci/Dockerfile.train b/ci/Dockerfile.train
@@ -1,23 +1,33 @@
-FROM ubuntu:20.04
+FROM ubuntu:22.04
 
 ENV DEBIAN_FRONTEND="noninteractive"
 
 RUN apt-get update && apt-get -y upgrade \
     && apt-get install -y curl \
-    && apt-get install -y python3.9 python3.9-dev python3.9-distutils \
-    && apt-get install -y openjdk-8-jdk
+    && apt-get install -y python3 python3-dev python3-distutils \
+    && apt-get install -y openjdk-17-jdk
 
 ## Install pip
 RUN curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py
-RUN python3.9 get-pip.py
+RUN python3 get-pip.py
 
 ## Install dependencies
 RUN pip3 install torch torchvision --index-url https://download.pytorch.org/whl/cpu
-RUN pip3 --default-timeout=1000 install pyspark pandas opacus onnx onnx2pytorch scikit-learn scipy matplotlib 
+RUN pip3 --default-timeout=1000 install pyspark pandas opacus==1.5.3 onnx onnx2pytorch scikit-learn scipy matplotlib 
+RUN pip3 install safetensors h5py pyarrow xgboost
+
+# For computer vision tasks
+RUN pip3 install --default-timeout=100 opencv-python pillow monai==1.4.0
+
+# # For natural language processing tasks
+# RUN pip3 install transformers datasets peft
 
 RUN apt-get install -y jq
 
-# Install contract ledger client
+ENV JAVA_HOME /usr/lib/jvm/java-17-openjdk-amd64/
+RUN export JAVA_HOME
+
+# Install pytrain package for training
 COPY train/dist/pytrain-0.0.1-py3-none-any.whl .
 RUN pip3 install pytrain-0.0.1-py3-none-any.whl
 

diff --git a/ci/pull-containers.sh b/ci/pull-containers.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-containers=("ccr-model-save:latest" "depa-training:latest" "depa-training-encfs:latest")
+containers=("depa-training:latest" "depa-training-encfs:latest")
 for container in "${containers[@]}"
 do
   docker pull $CONTAINER_REGISTRY"/"$container