Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
165acc2
Added LLM-finetune scenario local execution
saranggalada Jun 9, 2025
d96a870
Added MRI-Segmentation scenario local execution
saranggalada Jun 9, 2025
b0536cb
added aci deployment steps
saranggalada Jul 19, 2025
3249c7a
Update load_base_model.py
saranggalada Jul 25, 2025
1c8585d
build scripts
kapilvgit Jul 26, 2025
11f9a4f
script permissions
kapilvgit Jul 26, 2025
628710a
support PrivateTrainVision in policy
kapilvgit Jul 26, 2025
c2b9cf1
Update pipeline_config.json
saranggalada Jul 27, 2025
ffdbf07
standardized config param names
saranggalada Jul 27, 2025
c1fa213
naming changes
kapilvgit Jul 29, 2025
15db91a
epsilon
kapilvgit Jul 29, 2025
7652bcf
bug fixes and automation
saranggalada Aug 8, 2025
909c655
unified pytrain across scenarios and added bring your custom code fun…
saranggalada Aug 13, 2025
988ce63
fixed file naming and path handling
saranggalada Aug 15, 2025
4940e45
introduced training customization wrappers and configurations. Added …
saranggalada Aug 24, 2025
33aaa0a
Updated Readmes
saranggalada Aug 24, 2025
cd9aa20
Updated Readmes
saranggalada Aug 24, 2025
9fe320c
security fixes to prevent malicious code injection and unapproved mod…
saranggalada Aug 26, 2025
9df91e9
updated scenarios table
saranggalada Aug 26, 2025
742323a
Added XGBoost support and Credit-Risk scenario
saranggalada Sep 5, 2025
382de66
Update README.md
saranggalada Sep 5, 2025
0d3a90e
Update ci-build.yml
saranggalada Sep 5, 2025
22e0c2f
Update ci-local.yml
saranggalada Sep 5, 2025
853ba1d
updated ci
saranggalada Sep 5, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .github/workflows/ci-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,18 +43,18 @@ jobs:
- name: Install wheel
run: pip install wheel

- name: Build encrypted filesystem artifacta, contract ledger client & depa-training container
- name: Build encrypted filesystem artifacts, contract ledger client & depa-training container
run: ci/build.sh

- name: Build container images
run: cd ${{ github.workspace }}/scenarios/covid && ./ci/build.sh

- name: Run pre-processing
run: cd ./scenarios/covid/deployment/docker && ./preprocess.sh
run: cd ./scenarios/covid/deployment/local && ./preprocess.sh

- name: Run model saving
run: cd ./scenarios/covid/deployment/docker && ./save-model.sh
run: cd ./scenarios/covid/deployment/local && ./save-model.sh

- name: Run training
run: cd ./scenarios/covid/deployment/docker && ./train.sh
run: cd ./scenarios/covid/deployment/local && ./train.sh

6 changes: 3 additions & 3 deletions .github/workflows/ci-local.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,10 @@ jobs:
run: cd ${{ github.workspace }}/ci && ./pull-containers.sh

- name: Run pre-processing
run: cd ./scenarios/covid/deployment/docker && ./preprocess.sh
run: cd ./scenarios/covid/deployment/local && ./preprocess.sh

- name: Run model saving
run: cd ./scenarios/covid/deployment/docker && ./save-model.sh
run: cd ./scenarios/covid/deployment/local && ./save-model.sh

- name: Run training
run: cd ./scenarios/covid/deployment/docker && ./train.sh
run: cd ./scenarios/covid/deployment/local && ./train.sh
21 changes: 12 additions & 9 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -79,31 +79,34 @@ jobs:
run: sudo usermod -aG docker $USER

- name: Run pre-processing
run: cd ${{ github.workspace }}/scenarios/covid/deployment/docker && ./preprocess.sh
run: cd ${{ github.workspace }}/scenarios/covid/deployment/local && ./preprocess.sh

- name: Run model saving
run: cd ${{ github.workspace }}/scenarios/covid/deployment/docker && ./save-model.sh
run: cd ${{ github.workspace }}/scenarios/covid/deployment/local && ./save-model.sh

- name: Pull container images for generating policy
run: cd ${{ github.workspace }}/ci && ./pull-containers.sh

- name: Consolidate pipeline configuration
run: cd ${{ github.workspace }}/scenarios/covid/ && ./config/consolidate_pipeline.sh

- name: create storage and containers
run: cd ${{ github.workspace }}/scenarios/covid/data && ./1-create-storage-containers.sh
run: cd ${{ github.workspace }}/scenarios/covid/deployment/azure && ./1-create-storage-containers.sh

- name: create azure key vault
run: cd ${{ github.workspace }}/scenarios/covid/data && ./2-create-akv.sh
run: cd ${{ github.workspace }}/scenarios/covid/deployment/azure && ./2-create-akv.sh

- name: Import data and model encryption keys with key release policies
run: cd ${{ github.workspace }}/scenarios/covid/data && ./3-import-keys.sh
run: cd ${{ github.workspace }}/scenarios/covid/deployment/azure && ./3-import-keys.sh

- name: Encrypt data and models
run: cd ${{ github.workspace }}/scenarios/covid/data && ./4-encrypt-data.sh
run: cd ${{ github.workspace }}/scenarios/covid/deployment/azure && ./4-encrypt-data.sh

- name: Upload data and model
run: cd ${{ github.workspace }}/scenarios/covid/data && ./5-upload-encrypted-data.sh
run: cd ${{ github.workspace }}/scenarios/covid/deployment/azure && ./5-upload-encrypted-data.sh

- name: Run training
run: cd ${{ github.workspace }}/scenarios/covid/deployment/aci && ./deploy.sh -c ${{ github.event.inputs.contract }} -p ../../config/pipeline_config.json
run: cd ${{ github.workspace }}/scenarios/covid/deployment/azure && ./deploy.sh -c ${{ github.event.inputs.contract }} -p ../../config/pipeline_config.json

- name: Dump training container logs
run: sleep 200 && az container logs --name depa-training-covid --resource-group $AZURE_RESOURCE_GROUP --container-name depa-training
Expand All @@ -112,7 +115,7 @@ jobs:
run: az container logs --name depa-training-covid --resource-group $AZURE_RESOURCE_GROUP --container-name encrypted-storage-sidecar

- name: Download and decrypt model
run: cd ${{ github.workspace }}/scenarios/covid/data && ./6-download-decrypt-model.sh
run: cd ${{ github.workspace }}/scenarios/covid/deployment/azure && ./6-download-decrypt-model.sh

- name: Clean up resource group and all resources
run: az group delete --yes --name $AZURE_RESOURCE_GROUP
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ jobs:
context: ./scenarios/covid/src
buildargs: |
- dockerfile: ./scenarios/covid/ci/Dockerfile.modelsave
name: ccr-model-save
name: covid-model-save
context: ./scenarios/covid/src
buildargs: |
- dockerfile: ./ci/Dockerfile.encfs
Expand Down
9 changes: 9 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,10 @@
**/*.onnx
**/*.pth
**/*.pt
**/*.img
**/*.bin
**/*.pem

venv/

**/__pycache__/
50 changes: 38 additions & 12 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,28 +6,31 @@

## GitHub Codespaces

The simplest way to setup a development environment is using [GitHub Codespaces](https://github.com/codespaces). The repository includes a [devcontainer.json](../../.devcontainer/devcontainer.json), which customizes your codespace to install all required dependencies. Please ensure you allocate at least 64GB disk space in your codespace. Also, run the following command in the codespace to update submodules.
The simplest way to setup a development environment is using [GitHub Codespaces](https://github.com/codespaces). The repository includes a [devcontainer.json](.devcontainer/devcontainer.json), which customizes your codespace to install all required dependencies. Please ensure you allocate at least 8 vCPUs and 64GB disk space in your codespace. Also, run the following command in the codespace to update submodules.

```bash
git submodule update --init --recursive
```

## Local Development Environment

Alternatively, you can build and develop locally in a Linux environment (we have tested with Ubuntu 20.04 and 22.04), or Windows with WSL 2. Install the following dependencies.
Alternatively, you can build and develop locally in a Linux environment (we have tested with Ubuntu 20.04 and 22.04), or Windows with WSL 2.

- [docker](https://docs.docker.com/engine/install/ubuntu/) and docker-compose. After installing docker, add your user to the docker group using `sudo usermod -aG docker $USER`, and log back in to a shell.
- make (install using ```sudo apt-get install make```)
- Python 3.6.9 and pip
- [Go](https://go.dev/doc/install). Follow the instructions to install Go. After installing, ensure that the PATH environment variable is set to include ```go``` runtime.
- Python wheel package (install using ```pip install wheel```)

Clone this repo as follows.
Clone this repo to your local machine / virtual machine as follows.

```bash
git clone --recursive http://github.com/iSPIRT/depa-training
cd depa-training
```

Install the below listed dependencies by running the [install-prerequisites.sh](./install-prerequisites.sh) script.

```bash
./install-prerequisites.sh
```

Note: You may need to restart your machine to ensure that the changes take effect.

## Build CCR containers

To build your own CCR container images, use the following command from the root of the repository.
Expand All @@ -44,16 +47,39 @@ This scripts build the following containers.
Alternatively, you can use pre-built container images from the ispirt repository by setting the following environment variable. Docker hub has started throttling which may effect the upload/download time, especially when images are bigger size. So, It is advisable to use other container registries, we are using azure container registry as shown below
```bash
export CONTAINER_REGISTRY=ispirt.azurecr.io
./ci/pull-containers.sh
```

# Scenarios

This repository contains two samples that illustrate the kinds of scenarios DEPA for Training can support.

- [Training a differentially private COVID prediction model on private datasets](./scenarios/covid/README.md)
- [Convolutional Neural Network training on MNIST dataset](./scenarios/mnist/README.md)
Follow the links to build and deploy these scenarios.

| Scenario name | Scenario type | Task type | Privacy | No. of TDPs* | Data type (format) | Model type (format) | Join type (No. of datasets) |
|--------------|---------------|-----------------|--------------|-----------|------------|------------|------------|
| [COVID-19](./scenarios/covid/README.md) | Training - Deep Learning | Binary Classification | Differentially Private | 3 | PII tabular data (CSV) | MLP (ONNX) | Horizontal (3)|
| [BraTS](./scenarios/brats/README.md) | Training - Deep Learning | Image Segmentation | Differentially Private | 4 | MRI scans data (NIfTI/PNG) | UNet (Safetensors) | Vertical (4)|
| [Credit Risk](./scenarios/credit-risk/README.md) | Training - Classical ML | Binary Classification | Differentially Private | 4 | PII tabular data (Parquet) | XGBoost (JSON) | Horizontal (4)|
| [CIFAR-10](./scenarios/cifar10/README.md) | Training - Deep Learning | Multi-class Image Classification | NA | 1 | Non-PII image data (SafeTensors) | CNN (Safetensors) | NA (1)|
| [MNIST](./scenarios/mnist/README.md) | Training - Deep Learning | Multi-class Image Classification | NA | 1 | Non-PII image data (HDF5) | CNN (ONNX) | NA (1)|

_NA: Not Applicable_ <br>
_DL: Deep Learning, ML: Classical Machine Learning_ <br>
_*Training Data Providers (TDPs) involved in the scenario._

## Build your own Scenarios

A guide to build your own scenarios is coming soon. Stay tuned!

Currently, DEPA for Training supports the following training frameworks, libraries and file formats (more will be included soon):

- Training frameworks: PyTorch, Scikit-learn, XGBoost
- Libraries: Opacus, PySpark, Pandas
- File formats (for models and datasets): ONNX, Safetensors, Parquet, CSV, HDF5, PNG

Note: Due to security reasons, we do not support Pickle based file formats such as .pkl, .pt/.pth, .npy/.npz, .joblib, etc.

Follow these links to build and deploy these scenarios.

# Contributing

Expand Down
22 changes: 16 additions & 6 deletions ci/Dockerfile.train
Original file line number Diff line number Diff line change
@@ -1,23 +1,33 @@
FROM ubuntu:20.04
FROM ubuntu:22.04

ENV DEBIAN_FRONTEND="noninteractive"

RUN apt-get update && apt-get -y upgrade \
&& apt-get install -y curl \
&& apt-get install -y python3.9 python3.9-dev python3.9-distutils \
&& apt-get install -y openjdk-8-jdk
&& apt-get install -y python3 python3-dev python3-distutils \
&& apt-get install -y openjdk-17-jdk

## Install pip
RUN curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py
RUN python3.9 get-pip.py
RUN python3 get-pip.py

## Install dependencies
RUN pip3 install torch torchvision --index-url https://download.pytorch.org/whl/cpu
RUN pip3 --default-timeout=1000 install pyspark pandas opacus onnx onnx2pytorch scikit-learn scipy matplotlib
RUN pip3 --default-timeout=1000 install pyspark pandas opacus==1.5.3 onnx onnx2pytorch scikit-learn scipy matplotlib
RUN pip3 install safetensors h5py pyarrow xgboost

# For computer vision tasks
RUN pip3 install --default-timeout=100 opencv-python pillow monai==1.4.0

# # For natural language processing tasks
# RUN pip3 install transformers datasets peft

RUN apt-get install -y jq

# Install contract ledger client
ENV JAVA_HOME /usr/lib/jvm/java-17-openjdk-amd64/
RUN export JAVA_HOME

# Install pytrain package for training
COPY train/dist/pytrain-0.0.1-py3-none-any.whl .
RUN pip3 install pytrain-0.0.1-py3-none-any.whl

Expand Down
2 changes: 1 addition & 1 deletion ci/pull-containers.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/bash

containers=("ccr-model-save:latest" "depa-training:latest" "depa-training-encfs:latest")
containers=("depa-training:latest" "depa-training-encfs:latest")
for container in "${containers[@]}"
do
docker pull $CONTAINER_REGISTRY"/"$container
Expand Down
Loading
Loading