From d19e4261b9026d21b4e8a4f2e211393b4a365eb1 Mon Sep 17 00:00:00 2001 From: Falco Riccardo <83835195+falric05@users.noreply.github.com> Date: Thu, 24 Apr 2025 11:58:35 +0200 Subject: [PATCH 01/99] Enhance Airflow environment setup and PDF browsing functionality (#3) * Add setup script and update entrypoint for Airflow environment initialization * Updates documentation for creating .env file and removes setup script * Add an initialization DAG to run the initialization script every two hours and launch the main DAG * Add Dockerfiles and entrypoint scripts for Airflow and web GUI setup * Remove the 'heasarc' directory creation from the Dockerfiles and the web GUI entrypoint script to make the given folder be mounted as the 'heasarc' directory * Refactor file handling in DataPipeline to use shutil.move for better directory management * Add an interface to browse PDF files in DL0 directory * Update .gitignore to exclude all files in the given folder except explorer.js and index.html * Update .gitignore to exclude all files in the data directory except explorer.js and index.html * Update the initialization DAG to start immediately and improve task management * Remove unused timedelta import in cosipipe_cosipy.py * Update cosipipe_cosipy.py Removed commented line code. * Rename initialization DAG to 'cosipy_contactsimulator' for clarity * Update README.md to improve DAG build and testing instructions * Update UI text for clarity and consistency in explorer.js and index.html * Updates the instructions in README.md for building and running Docker on Mac and Linux, improving clarity and consistency. * Set the start date of the DAG 'cosipy_contactsimulator' to a specific time to avoid unexpected behavior --- .gitignore | 5 ++ README.md | 106 +++++++++++++++++-------- dags/cosipipe_cosipy.py | 60 +++++++++++++- env/{Dockerfile => Dockerfile.airflow} | 7 +- env/Dockerfile.webgui | 21 +++++ env/docker-compose.yaml | 38 +++++++-- env/entrypoint-airflow.sh | 52 ++++++++++++ env/entrypoint-webgui.sh | 7 ++ env/entrypoint.sh | 6 -- pipeline/data/explorer.js | 50 ++++++++++++ pipeline/data/index.html | 12 +++ 11 files changed, 310 insertions(+), 54 deletions(-) rename env/{Dockerfile => Dockerfile.airflow} (91%) create mode 100644 env/Dockerfile.webgui create mode 100644 env/entrypoint-airflow.sh create mode 100644 env/entrypoint-webgui.sh delete mode 100644 env/entrypoint.sh create mode 100644 pipeline/data/explorer.js create mode 100644 pipeline/data/index.html diff --git a/.gitignore b/.gitignore index 33c1d12..c41823d 100644 --- a/.gitignore +++ b/.gitignore @@ -160,3 +160,8 @@ cython_debug/ # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ + +# Data +data/ +!data/explorer.js +!data/index.html diff --git a/README.md b/README.md index 58ff8bc..5f2777b 100644 --- a/README.md +++ b/README.md @@ -10,20 +10,52 @@ We assume that the cosiflow repository is in your $HOME directory. cd $HOME/cosiflow/env ``` -Mac: +* **Mac**: change the following lines of `Dockerile.airflow` + + ```Dockerfile + # ARM processors (Mac) + # Definisci la variabile per il file Miniconda + ARG MINICONDA=Miniconda3-latest-Linux-aarch64.sh + # INTEL/AMD processors + ARG MINICONDA=Miniconda3-latest-Linux-x86_64.sh + ``` -```bash -docker build --platform linux/arm64 -t airflow:1.0.0 -f Dockerfile . -``` + in + + ```Dockerfile + # ARM processors (Mac) + # Definisci la variabile per il file Miniconda + ARG MINICONDA=Miniconda3-latest-Linux-aarch64.sh + # INTEL/AMD processors + # ARG MINICONDA=Miniconda3-latest-Linux-x86_64.sh + ``` -Linux: + Then run: -```bash -docker build -t airflow:1.1.0 -f Dockerfile . -``` + ```bash + docker compose build + ``` + +* **Linux**: + + ```bash + docker compose build + ``` ## Execute the docker compose to start containers +Before running the following command to start the containers, make sure to create a `.env` file with the following structure in `cosiflow/env`: + +```env +AIRFLOW_ADMIN_USERNAME=admin +AIRFLOW_ADMIN_EMAIL=admin@localhost +AIRFLOW_ADMIN_PASSWORD= +``` + +Replace `` with your desired password. + +Now you can start the containers. + ```bash docker compose up -d ``` @@ -34,7 +66,7 @@ If you want to enter into the postgre docker container: `docker compose exec air ## Connect to the web server using a browser -localhost:8080 +Connect to http://localhost:8080, with your browser. Note: if you use a remote server you can change the `docker-compose.yaml` file to use another port. @@ -51,17 +83,11 @@ then from your local pc you can forward the port in this way: ssh -N -L 28080:localhost:28080 [user]@[remote machine] ``` -and open the airflow webpace from your local pc at `localhost:28080` +and open the airflow webpace from your local pc at http://localhost:28080 Login with username: `admin` password: `` -To obtain the password `` execute this command after the initialization of the containers - -```bash -docker compose logs | grep pass -``` - -### Shutdown the dockers +## Shutdown the dockers ```bash docker compose down -v @@ -69,33 +95,43 @@ docker compose down -v ## Test the cosipy DAG -Enter in the docker airflow +* Manual pipeline initialization -```bash -docker compose exec airflow bash -``` + 1. Activate the DAG named `"cosipt_test_v0"` from the airflow website. -First download the data file from wasabi. + 2. Enter in the docker `airflow` -```bash -cd /shared_dir/pipeline -source activate cosipy -python initialize_pipeline.py -``` + ```bash + docker compose exec airflow bash + ``` -This script downloads the input file from wasabi and move it in `/home/gamma/workspace/data` + 3. Download the data file from wasabi, using `cosipy` library. -Now we must activate the DAG named `"cosipt_test_v0"` from the airflow website + ```bash + cd /shared_dir/pipeline + source activate cosipy + python initialize_pipeline.py + ``` -Then we have to copy the file in the input directory to trigger the DAG + This script downloads the input file from wasabi and move it in `/home/gamma/workspace/data` -```bash -cd /home/gamma/workspace/data -cp GalacticScan.inc1.id1.crab2hr.extracted.tra.gz input -``` + 4. Then we have to copy the file in the input directory to trigger the DAG + + ```bash + cd /home/gamma/workspace/data + cp GalacticScan.inc1.id1.crab2hr.extracted.tra.gz input + ``` + +* Automatic pipeline initialization (download a file every 2 hours): -We should see that the DAG started to process the data. + 1. Activate the DAG `"cosipy_contactsimulator"`, which will download and move the files downloaded from wasabi via `cosipy` + + 2. Activate the DAG `"cosipt_test_v0"` + +Finally, we should see that the DAG `"cosipt_test_v0"` started to process the data. This directory `/home/gamma/workspace/heasarc/dl0` contains several folders with this format `2025-01-24_14-31-56`. Inside the folder we have the results of the analysis. + +We can visualize the results at the following link http://localhost:8081. \ No newline at end of file diff --git a/dags/cosipipe_cosipy.py b/dags/cosipipe_cosipy.py index db87c80..f77e1b4 100644 --- a/dags/cosipipe_cosipy.py +++ b/dags/cosipipe_cosipy.py @@ -9,7 +9,7 @@ from inotify_simple import INotify, flags from airflow.exceptions import AirflowSkipException from airflow.operators.dagrun_operator import TriggerDagRunOperator - +import shutil # Import necessary Airflow classes and standard libraries # Define a data pipeline class for monitoring, ingesting, and storing DL0 files @@ -89,9 +89,13 @@ def ingest_and_store_dl0_sensor(self, **kwargs): os.makedirs(f'{self.heasarc_dir}/dl0', exist_ok=True) timestamp_utc = datetime.datetime.now(datetime.UTC).strftime('%Y-%m-%d_%H-%M-%S') new_dir = f'{self.heasarc_dir}/dl0/{timestamp_utc}' - os.makedirs(new_dir, exist_ok=True) - stored_file_path = f"{new_dir}/{os.path.basename(input_files)}" - os.rename(input_files, stored_file_path) + if os.path.isdir(input_files): + shutil.move(input_files, new_dir) + else: + os.makedirs(os.path.dirname(new_dir), exist_ok=True) + shutil.move(input_files, new_dir) + # List the files in the new directory and get the tar.gz file + stored_file_path = os.path.join(new_dir, os.listdir(new_dir)[0]) self.logger.info(f"Stored DL0 file: {stored_file_path}") # Push the new file path to XCom for further use ti.xcom_push(key='stored_dl0_file', value=stored_file_path) @@ -146,3 +150,51 @@ def ingest_and_store_dl0_sensor(self, **kwargs): ) wait_for_new_file_sensor_task >> ingest_and_store_dl0_task_sensor >> generate_plots >> trigger_next_run + +# Separate DAG that runs the initialization script every two hours and then triggers the main DAG +with DAG('cosipy_contactsimulator', + default_args={ + 'owner': 'airflow', + 'start_date': datetime.datetime(2025, 4, 23, 14, 0, 0), # Set the start date for the DAG to a specific time. + # NOTE: you cannot use datetime.datetime.now() since it is re-evaluated every time the file is imported (i.e. + # every DAG parse by Airflow). This makes the start_date unstable, and can cause weird behavior like + # no scheduled runs, or inconsistent triggers. + }, + schedule_interval=datetime.timedelta(hours=2), # Execute every 2 hours + catchup=False, # Do not run past scheduled runs + max_active_runs=1, # Only one instance of this DAG can run at a time + ) as init_dag: + + # Task to run the pipeline initialization script in the cosipy environment + initialize_pipeline_task = BashOperator( + task_id='initialize_pipeline_task', + bash_command=""" + cd /shared_dir/pipeline && + source activate cosipy && + python /shared_dir/pipeline/initialize_pipeline.py + """, + dag=init_dag + ) + + # Task to move a specific initial file to a timestamped folder in the input directory + copy_initfile_task = BashOperator( + task_id='copy_initfile_task', + bash_command=""" + FILE_NAME=GalacticScan.inc1.id1.crab2hr.extracted.tra.gz && + TIMESTAMP=$(date +"%Y-%m-%d_%H-%M-%S") && + DEST_DIR="/home/gamma/workspace/data/input/$TIMESTAMP" && + mkdir -p "$DEST_DIR" && + mv "/home/gamma/workspace/data/$FILE_NAME" "$DEST_DIR/" + """, + dag=init_dag + ) + + # Task to trigger the main DAG that handles monitoring and processing + trigger_main_dag = TriggerDagRunOperator( + task_id="trigger_cosipy_test_v0", + trigger_dag_id="cosipy_test_v0", + dag=init_dag + ) + + # Define task execution order + initialize_pipeline_task >> copy_initfile_task >> trigger_main_dag diff --git a/env/Dockerfile b/env/Dockerfile.airflow similarity index 91% rename from env/Dockerfile rename to env/Dockerfile.airflow index f6dac44..60284fb 100644 --- a/env/Dockerfile +++ b/env/Dockerfile.airflow @@ -38,7 +38,7 @@ RUN chown -R gamma:gamma /home/gamma USER gamma -COPY entrypoint.sh /home/gamma/entrypoint.sh +COPY entrypoint-airflow.sh /home/gamma/entrypoint-airflow.sh RUN export PATH=$PATH:/opt/conda/bin && conda config --append channels conda-forge && conda config --set channel_priority strict @@ -61,17 +61,16 @@ RUN mkdir /data01 RUN chown -R gamma:gamma /data01 RUN mkdir /data02 RUN chown -R gamma:gamma /data02 -RUN chmod +x /home/gamma/entrypoint.sh +RUN chmod +x /home/gamma/entrypoint-airflow.sh USER gamma RUN mkdir /home/gamma/workspace #dir to run pipeline RUN mkdir /home/gamma/workspace/data RUN mkdir /home/gamma/workspace/data/input -RUN mkdir /home/gamma/workspace/heasarc RUN mkdir /home/gamma/workspace/log ENV PATH="/opt/conda/bin:$PATH" -#ENTRYPOINT ["/home/gamma/entrypoint.sh"] +#ENTRYPOINT ["/home/gamma/entrypoint-airflow.sh"] diff --git a/env/Dockerfile.webgui b/env/Dockerfile.webgui new file mode 100644 index 0000000..82c7944 --- /dev/null +++ b/env/Dockerfile.webgui @@ -0,0 +1,21 @@ +# Dockerfile.webgui +FROM python:3.9-slim + +# Crea utente gamma +RUN useradd gamma +USER gamma +WORKDIR /home/gamma + +# Copia lo script di entrypoint +COPY entrypoint-webgui.sh /home/gamma/entrypoint-webgui.sh + +USER root +RUN chmod +x /home/gamma/entrypoint-webgui.sh + +# Crea le directory richieste (opzionale) +# RUN mkdir -p /home/gamma/workspace/heasarc + +# Installa eventuali pacchetti (opzionale) +# RUN pip install flask ... + +# ENTRYPOINT ["bash", "/home/gamma/entrypoint-webgui.sh"] \ No newline at end of file diff --git a/env/docker-compose.yaml b/env/docker-compose.yaml index 542c4ce..5901695 100644 --- a/env/docker-compose.yaml +++ b/env/docker-compose.yaml @@ -1,35 +1,63 @@ - - services: postgres: - image: postgres + image: postgres:15 container_name: cosi_postgres environment: - POSTGRES_USER=airflow_user - POSTGRES_PASSWORD=secure_password - POSTGRES_DB=airflow_db - #volumes: + healthcheck: + test: ["CMD", "pg_isready", "-U", "airflow_user"] + interval: 5s + timeout: 5s + retries: 10 + # volumes: # - ${HOME}/postgres_data:/var/lib/postgresql/data #restart: always airflow: image: airflow:1.1.0 + build: + context: . + dockerfile: Dockerfile.airflow container_name: cosi_airflow environment: - AIRFLOW_HOME=/home/gamma/airflow - DISPLAY=${DISPLAY} - AIRFLOW__CORE__LOAD_EXAMPLES=False + # env_file: + # - .env volumes: - ../dags:/home/gamma//airflow/dags - ./airflow.cfg.postgresql:/home/gamma/airflow/airflow.cfg - /tmp/.X11-unix:/tmp/.X11-unix:rw - ${HOME}/cosiflow:/shared_dir + - ${HOME}/cosiflow/pipeline/data:/home/gamma/workspace/heasarc ports: - "8080:8080" - "28888:28888" #jupyter notebook + depends_on: + postgres: + condition: + service_healthy + #restart: always - entrypoint: ["bash", "/home/gamma/entrypoint.sh"] + entrypoint: ["bash", "/home/gamma/entrypoint-airflow.sh"] #entrypoint: ["tail", "-f", "/dev/null"] + webgui: + build: + context: . + dockerfile: Dockerfile.webgui + container_name: cosi_webgui + environment: + - DISPLAY=${DISPLAY} + volumes: + - ${HOME}/cosiflow/pipeline/data:/home/gamma/workspace/heasarc + - ${HOME}/cosiflow:/shared_dir + ports: + - "8081:8081" + entrypoint: ["bash", "/home/gamma/entrypoint-webgui.sh"] + volumes: postgres_data: diff --git a/env/entrypoint-airflow.sh b/env/entrypoint-airflow.sh new file mode 100644 index 0000000..76345cc --- /dev/null +++ b/env/entrypoint-airflow.sh @@ -0,0 +1,52 @@ +#!/bin/bash +set -euo pipefail + +cd /home/gamma + +ENV_FILE="/shared_dir/env/.env" + +if [ ! -f "$ENV_FILE" ]; then + echo "❌ Missing .env file at $ENV_FILE" + echo "👉 Please create the file with the following structure:" + echo "" + echo "AIRFLOW_ADMIN_USERNAME=admin" + echo "AIRFLOW_ADMIN_EMAIL=admin@localhost" + echo "AIRFLOW_ADMIN_PASSWORD=yourpassword" + exit 1 +fi + +# Load environment variables +set -o allexport +source "$ENV_FILE" +set +o allexport + +# Check required variables +if [ -z "${AIRFLOW_ADMIN_USERNAME:-}" ] || [ -z "${AIRFLOW_ADMIN_EMAIL:-}" ] || [ -z "${AIRFLOW_ADMIN_PASSWORD:-}" ]; then + echo "❌ Missing one or more required environment variables in $ENV_FILE" + exit 1 +fi + +# Activate conda environment +source activate gamma +export PATH="$PATH:~/.local/bin" +echo "✅ Environment activated." + +# Initialize Airflow DB +airflow db init + +# Create admin user if not present +if ! airflow users list | grep -q "$AIRFLOW_ADMIN_USERNAME"; then + airflow users create \ + --username "$AIRFLOW_ADMIN_USERNAME" \ + --firstname COSI \ + --lastname Admin \ + --role Admin \ + --email "$AIRFLOW_ADMIN_EMAIL" \ + --password "$AIRFLOW_ADMIN_PASSWORD" + echo "✅ Admin user created." +else + echo "ℹ️ Admin user already exists. Skipping creation." +fi + +# Start Airflow +airflow standalone diff --git a/env/entrypoint-webgui.sh b/env/entrypoint-webgui.sh new file mode 100644 index 0000000..9b79338 --- /dev/null +++ b/env/entrypoint-webgui.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +# Start the HEASARC HTTP server +cd /home/gamma/workspace/heasarc +# Start the HTTP server +echo "✅ Starting HEASARC HTTP server at localhost:8081." +python3 -m http.server 8081 \ No newline at end of file diff --git a/env/entrypoint.sh b/env/entrypoint.sh deleted file mode 100644 index 63ffe6e..0000000 --- a/env/entrypoint.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -cd /home/gamma -source activate gamma -export PATH="$PATH:~/.local/bin" -echo $PATH -airflow standalone diff --git a/pipeline/data/explorer.js b/pipeline/data/explorer.js new file mode 100644 index 0000000..751dae9 --- /dev/null +++ b/pipeline/data/explorer.js @@ -0,0 +1,50 @@ +document.addEventListener("DOMContentLoaded", function () { + const rootPath = "./dl0/"; + const container = document.getElementById("explorer"); + + async function loadFolders() { + try { + const res = await fetch(rootPath); + const text = await res.text(); + + // Estrae i nomi delle directory dall'elenco (funziona con server tipo Python http.server) + const parser = new DOMParser(); + const html = parser.parseFromString(text, "text/html"); + const links = Array.from(html.querySelectorAll("a")) + .map(a => a.getAttribute("href")) + .filter(href => href.endsWith("/") && href !== "../"); + + for (const folder of links) { + const folderPath = rootPath + folder; + const div = document.createElement("div"); + div.innerHTML = `

${folder.replace(/\/$/, "")}

    Loading...
`; + container.appendChild(div); + await loadFolderContent(folderPath, `${folder}-list`); + } + + } catch (err) { + container.innerHTML = `

Error loading folders: ${err}

`; + } + } + + async function loadFolderContent(folderPath, listId) { + try { + const res = await fetch(folderPath); + const text = await res.text(); + const parser = new DOMParser(); + const html = parser.parseFromString(text, "text/html"); + const files = Array.from(html.querySelectorAll("a")) + .map(a => a.getAttribute("href")) + .filter(name => name.endsWith(".pdf")); + + const ul = document.getElementById(listId); + ul.innerHTML = files.length + ? files.map(file => `
  • ${file}
  • `).join("") + : "
  • No PDF found
  • "; + } catch (err) { + document.getElementById(listId).innerHTML = `
  • Loading error: ${err}
  • `; + } + } + + loadFolders(); + }); \ No newline at end of file diff --git a/pipeline/data/index.html b/pipeline/data/index.html new file mode 100644 index 0000000..e1f1a8b --- /dev/null +++ b/pipeline/data/index.html @@ -0,0 +1,12 @@ + + + + + DL3 Browser + + +

    Explore DL3 Results

    +
    Loading...
    + + + \ No newline at end of file From ae6d3aa5ba69d16a7b0cefa53bce7854b11aa6f1 Mon Sep 17 00:00:00 2001 From: falric05 Date: Tue, 29 Apr 2025 10:58:34 +0200 Subject: [PATCH 02/99] Add bootstrap script and configuration for Airflow with UID and GID --- env/bootstrap.sh | 15 +++++++++++++++ env/docker-compose.override.yaml | 14 ++++++++++++++ 2 files changed, 29 insertions(+) create mode 100755 env/bootstrap.sh create mode 100644 env/docker-compose.override.yaml diff --git a/env/bootstrap.sh b/env/bootstrap.sh new file mode 100755 index 0000000..ed3557e --- /dev/null +++ b/env/bootstrap.sh @@ -0,0 +1,15 @@ +#!/bin/bash +set -euo pipefail + +if [ $# -ne 0 ]; then + printf "\n\033[31mUsage: bootstrap.sh (no arguments)\033[0m\n\n" + exit 1 +fi + +MY_UID="$(id -u)" +MY_GID="$(id -g)" + +echo -e "\n[INFO] Starting containers with UID=${MY_UID} and GID=${MY_GID}..." + +HOST_UID="${MY_UID}" HOST_GID="${MY_GID}" docker compose build +# docker compose up \ No newline at end of file diff --git a/env/docker-compose.override.yaml b/env/docker-compose.override.yaml new file mode 100644 index 0000000..7be7be0 --- /dev/null +++ b/env/docker-compose.override.yaml @@ -0,0 +1,14 @@ +version: '3.8' +services: + airflow: + entrypoint: + - /bin/bash + - -c + - | + echo "[INFO] Adjusting UID and GID of user gamma..." + usermod -u ${HOST_UID} gamma + groupmod -g ${HOST_GID} gamma + exec /home/gamma/entrypoint-airflow.sh + environment: + - HOST_UID=${HOST_UID} + - HOST_GID=${HOST_GID} \ No newline at end of file From 9be2f48a357484695207bc8c5a5321b6ffa95905 Mon Sep 17 00:00:00 2001 From: falric05 Date: Tue, 29 Apr 2025 11:00:41 +0200 Subject: [PATCH 03/99] Add DL3 Explorer plugin with web interface for viewing result pipeline files --- env/docker-compose.yaml | 3 +- env/entrypoint-airflow.sh | 5 +- plugins/__init__.py | 0 plugins/dl3_explorer/dl3_explorer_plugin.py | 66 +++++++++++++++++++ .../dl3_explorer/dl3_explorer_view_plugin.py | 20 ++++++ plugins/dl3_explorer/templates/explorer.html | 36 ++++++++++ 6 files changed, 127 insertions(+), 3 deletions(-) create mode 100644 plugins/__init__.py create mode 100644 plugins/dl3_explorer/dl3_explorer_plugin.py create mode 100644 plugins/dl3_explorer/dl3_explorer_view_plugin.py create mode 100644 plugins/dl3_explorer/templates/explorer.html diff --git a/env/docker-compose.yaml b/env/docker-compose.yaml index 5901695..3b09fe1 100644 --- a/env/docker-compose.yaml +++ b/env/docker-compose.yaml @@ -28,7 +28,8 @@ services: # env_file: # - .env volumes: - - ../dags:/home/gamma//airflow/dags + - ../dags:/home/gamma/airflow/dags + - ../plugins:/home/gamma/airflow/plugins - ./airflow.cfg.postgresql:/home/gamma/airflow/airflow.cfg - /tmp/.X11-unix:/tmp/.X11-unix:rw - ${HOME}/cosiflow:/shared_dir diff --git a/env/entrypoint-airflow.sh b/env/entrypoint-airflow.sh index 76345cc..2c75b8e 100644 --- a/env/entrypoint-airflow.sh +++ b/env/entrypoint-airflow.sh @@ -48,5 +48,6 @@ else echo "ℹ️ Admin user already exists. Skipping creation." fi -# Start Airflow -airflow standalone +# Start webserver (in background) and scheduler +airflow webserver --port 8080 & +airflow scheduler diff --git a/plugins/__init__.py b/plugins/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/plugins/dl3_explorer/dl3_explorer_plugin.py b/plugins/dl3_explorer/dl3_explorer_plugin.py new file mode 100644 index 0000000..dde12b1 --- /dev/null +++ b/plugins/dl3_explorer/dl3_explorer_plugin.py @@ -0,0 +1,66 @@ +import os +import traceback +from pathlib import Path +from airflow.plugins_manager import AirflowPlugin +from airflow.models import BaseOperator +from flask import Blueprint, render_template, send_from_directory, redirect, url_for, session +from flask_login import login_required, current_user + +DL0_FOLDER = os.path.join(os.path.expanduser("~"), "workspace", "heasarc", "dl0") + +# Definiamo il percorso assoluto alla cartella del plugin +plugin_folder = os.path.dirname(os.path.abspath(__file__)) + +# Blueprint con percorso assoluto a templates e static +dl3_explorer_bp = Blueprint( + "dl3_explorer_bp", + __name__, + template_folder=os.path.join(plugin_folder, "templates"), + static_folder=os.path.join(plugin_folder, "static"), + url_prefix='/dl3browser' +) + +@dl3_explorer_bp.route('/') +def explorer_home(): + if not current_user.is_authenticated: + return redirect('/login/?next=/dl3browser/') + try: + folders = sorted([f for f in os.listdir(DL0_FOLDER) if os.path.isdir(os.path.join(DL0_FOLDER, f))]) + return render_template("explorer.html", folders=folders) + except PermissionError: + abort(403) + except Exception as e: + error_traceback = traceback.format_exc() + return f"Error loading folders: {e}\n\nTraceback:\n{error_traceback}", 500 + +@dl3_explorer_bp.route('/folder/') +@login_required +def explorer_folder(foldername): + try: + folder_path = os.path.join(DL0_FOLDER, foldername) + if not os.path.commonpath([DL0_FOLDER, folder_path]).startswith(DL0_FOLDER): + abort(403) + + files = sorted([f for f in os.listdir(folder_path) if f.endswith(".pdf")]) + return render_template("explorer.html", folders=[], files=files, foldername=foldername) + except PermissionError: + abort(403) + except Exception as e: + error_traceback = traceback.format_exc() + return f"Error loading files: {e}\n\nTraceback:\n{error_traceback}", 500 + +@dl3_explorer_bp.route('/download/') +@login_required +def download_file(filepath): + abs_path = os.path.join(DL0_FOLDER, filepath) + folder, filename = os.path.split(abs_path) + return send_from_directory(folder, filename, as_attachment=True) + +class DummyOperator(BaseOperator): + def execute(self, context): + pass + +class DL3ExplorerPlugin(AirflowPlugin): + name = "dl3_explorer_plugin" + operators = [DummyOperator] + flask_blueprints = [dl3_explorer_bp] \ No newline at end of file diff --git a/plugins/dl3_explorer/dl3_explorer_view_plugin.py b/plugins/dl3_explorer/dl3_explorer_view_plugin.py new file mode 100644 index 0000000..ea03225 --- /dev/null +++ b/plugins/dl3_explorer/dl3_explorer_view_plugin.py @@ -0,0 +1,20 @@ +from airflow.plugins_manager import AirflowPlugin +from flask import redirect +from flask_appbuilder import BaseView, expose + +class DL3ExplorerView(BaseView): + default_view = "redirect_to_dl3" + + @expose("/") + def redirect_to_dl3(self): + return redirect("/dl3browser/") + +class DL3ExplorerViewPlugin(AirflowPlugin): + name = "dl3_explorer_view_plugin" + appbuilder_views = [ + { + "name": "DL3 Browser", + "category": "Results Browser", + "view": DL3ExplorerView() + } + ] diff --git a/plugins/dl3_explorer/templates/explorer.html b/plugins/dl3_explorer/templates/explorer.html new file mode 100644 index 0000000..413ccf3 --- /dev/null +++ b/plugins/dl3_explorer/templates/explorer.html @@ -0,0 +1,36 @@ + + + + + DL3 Explorer + + +

    DL3 File Browser

    + + {% if folders and folders|length > 0 %} +

    Folders:

    + + {% endif %} + + {% if files and files|length > 0 %} +

    Files in {{ foldername }}:

    + + {% endif %} + + \ No newline at end of file From 72ad43942810e3d843153d1a33f82a65ec2c9237 Mon Sep 17 00:00:00 2001 From: falric05 Date: Tue, 29 Apr 2025 11:12:20 +0200 Subject: [PATCH 04/99] Removed the service in the webgui compose, because it is integrated in the airflow service, and updated the data path in the docker-compose --- .gitignore | 4 +-- env/Dockerfile.webgui | 21 -------------- env/docker-compose.yaml | 16 +---------- env/entrypoint-webgui.sh | 7 ----- pipeline/data/explorer.js | 50 --------------------------------- pipeline/data/index.html | 12 -------- pipeline/generate_plot.py | 42 --------------------------- pipeline/initialize_pipeline.py | 18 ------------ 8 files changed, 2 insertions(+), 168 deletions(-) delete mode 100644 env/Dockerfile.webgui delete mode 100644 env/entrypoint-webgui.sh delete mode 100644 pipeline/data/explorer.js delete mode 100644 pipeline/data/index.html delete mode 100644 pipeline/generate_plot.py delete mode 100644 pipeline/initialize_pipeline.py diff --git a/.gitignore b/.gitignore index c41823d..0031ffd 100644 --- a/.gitignore +++ b/.gitignore @@ -161,7 +161,5 @@ cython_debug/ # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ -# Data +# Data Folder data/ -!data/explorer.js -!data/index.html diff --git a/env/Dockerfile.webgui b/env/Dockerfile.webgui deleted file mode 100644 index 82c7944..0000000 --- a/env/Dockerfile.webgui +++ /dev/null @@ -1,21 +0,0 @@ -# Dockerfile.webgui -FROM python:3.9-slim - -# Crea utente gamma -RUN useradd gamma -USER gamma -WORKDIR /home/gamma - -# Copia lo script di entrypoint -COPY entrypoint-webgui.sh /home/gamma/entrypoint-webgui.sh - -USER root -RUN chmod +x /home/gamma/entrypoint-webgui.sh - -# Crea le directory richieste (opzionale) -# RUN mkdir -p /home/gamma/workspace/heasarc - -# Installa eventuali pacchetti (opzionale) -# RUN pip install flask ... - -# ENTRYPOINT ["bash", "/home/gamma/entrypoint-webgui.sh"] \ No newline at end of file diff --git a/env/docker-compose.yaml b/env/docker-compose.yaml index 3b09fe1..ab72167 100644 --- a/env/docker-compose.yaml +++ b/env/docker-compose.yaml @@ -33,7 +33,7 @@ services: - ./airflow.cfg.postgresql:/home/gamma/airflow/airflow.cfg - /tmp/.X11-unix:/tmp/.X11-unix:rw - ${HOME}/cosiflow:/shared_dir - - ${HOME}/cosiflow/pipeline/data:/home/gamma/workspace/heasarc + - ${HOME}/cosiflow/data:/home/gamma/workspace/heasarc ports: - "8080:8080" - "28888:28888" #jupyter notebook @@ -46,19 +46,5 @@ services: entrypoint: ["bash", "/home/gamma/entrypoint-airflow.sh"] #entrypoint: ["tail", "-f", "/dev/null"] - webgui: - build: - context: . - dockerfile: Dockerfile.webgui - container_name: cosi_webgui - environment: - - DISPLAY=${DISPLAY} - volumes: - - ${HOME}/cosiflow/pipeline/data:/home/gamma/workspace/heasarc - - ${HOME}/cosiflow:/shared_dir - ports: - - "8081:8081" - entrypoint: ["bash", "/home/gamma/entrypoint-webgui.sh"] - volumes: postgres_data: diff --git a/env/entrypoint-webgui.sh b/env/entrypoint-webgui.sh deleted file mode 100644 index 9b79338..0000000 --- a/env/entrypoint-webgui.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash - -# Start the HEASARC HTTP server -cd /home/gamma/workspace/heasarc -# Start the HTTP server -echo "✅ Starting HEASARC HTTP server at localhost:8081." -python3 -m http.server 8081 \ No newline at end of file diff --git a/pipeline/data/explorer.js b/pipeline/data/explorer.js deleted file mode 100644 index 751dae9..0000000 --- a/pipeline/data/explorer.js +++ /dev/null @@ -1,50 +0,0 @@ -document.addEventListener("DOMContentLoaded", function () { - const rootPath = "./dl0/"; - const container = document.getElementById("explorer"); - - async function loadFolders() { - try { - const res = await fetch(rootPath); - const text = await res.text(); - - // Estrae i nomi delle directory dall'elenco (funziona con server tipo Python http.server) - const parser = new DOMParser(); - const html = parser.parseFromString(text, "text/html"); - const links = Array.from(html.querySelectorAll("a")) - .map(a => a.getAttribute("href")) - .filter(href => href.endsWith("/") && href !== "../"); - - for (const folder of links) { - const folderPath = rootPath + folder; - const div = document.createElement("div"); - div.innerHTML = `

    ${folder.replace(/\/$/, "")}

      Loading...
    `; - container.appendChild(div); - await loadFolderContent(folderPath, `${folder}-list`); - } - - } catch (err) { - container.innerHTML = `

    Error loading folders: ${err}

    `; - } - } - - async function loadFolderContent(folderPath, listId) { - try { - const res = await fetch(folderPath); - const text = await res.text(); - const parser = new DOMParser(); - const html = parser.parseFromString(text, "text/html"); - const files = Array.from(html.querySelectorAll("a")) - .map(a => a.getAttribute("href")) - .filter(name => name.endsWith(".pdf")); - - const ul = document.getElementById(listId); - ul.innerHTML = files.length - ? files.map(file => `
  • ${file}
  • `).join("") - : "
  • No PDF found
  • "; - } catch (err) { - document.getElementById(listId).innerHTML = `
  • Loading error: ${err}
  • `; - } - } - - loadFolders(); - }); \ No newline at end of file diff --git a/pipeline/data/index.html b/pipeline/data/index.html deleted file mode 100644 index e1f1a8b..0000000 --- a/pipeline/data/index.html +++ /dev/null @@ -1,12 +0,0 @@ - - - - - DL3 Browser - - -

    Explore DL3 Results

    -
    Loading...
    - - - \ No newline at end of file diff --git a/pipeline/generate_plot.py b/pipeline/generate_plot.py deleted file mode 100644 index 5b2194f..0000000 --- a/pipeline/generate_plot.py +++ /dev/null @@ -1,42 +0,0 @@ -import sys,os -from cosipy.util import fetch_wasabi_file -from cosipy import BinnedData -from pathlib import Path - -#/home/gamma/workspace/heasarc/dl0/2025-01-24_14-16-50/GalacticScan.inc1.id1.crab2hr.extracted.tra.gz - -# create the inputs.yaml file to process the data. -print("test") -print(sys.argv[1]) -file_path = sys.argv[1] -dir_name = os.path.dirname(file_path) - -content_to_write = f"""#----------# -# Data I/O: - -# data files available on the COSI Sharepoint: https://drive.google.com/drive/folders/1UdLfuLp9Fyk4dNussn1wt7WEOsTWrlQ6 -data_file: {file_path} # full path -ori_file: "NA" # full path -unbinned_output: 'hdf5' # 'fits' or 'hdf5' -time_bins: 60 # time bin size in seconds. Takes int, float, or list of bin edges. -energy_bins: [100., 200., 500., 1000., 2000., 5000.] # Takes list. Needs to match response. -phi_pix_size: 6 # binning of Compton scattering anlge [deg] -nside: 8 # healpix binning of psi chi local -scheme: 'ring' # healpix binning of psi chi local -tmin: 1835478000.0 # Min time cut in seconds. -tmax: 1835485200.0 # Max time cut in seconds. -#----------# -""" - -dir_name_path = Path(dir_name) - -# Open the file in write mode and write the content -with open(dir_name_path / "inputs.yaml", "w") as file: - file.write(content_to_write) - - -analysis = BinnedData(dir_name_path / "inputs.yaml") -analysis.read_tra(output_name = dir_name_path / "unbinned_data") -analysis.get_binned_data() -analysis.get_raw_spectrum(output_name = file_path.replace(".crab2hr.extracted.tra.gz","")) -analysis.get_raw_lightcurve(output_name = file_path.replace(".crab2hr.extracted.tra.gz","")) diff --git a/pipeline/initialize_pipeline.py b/pipeline/initialize_pipeline.py deleted file mode 100644 index d2a15eb..0000000 --- a/pipeline/initialize_pipeline.py +++ /dev/null @@ -1,18 +0,0 @@ -from cosipy.util import fetch_wasabi_file -import os -import shutil -from pathlib import Path - -# This script must be executed the first time we install this airflow app to obtain a file used to test the DAG - -home_dir = Path(os.environ['HOME']) -new_path = os.path.join(home_dir, "workspace", "data", "GalacticScan.inc1.id1.crab2hr.extracted.tra.gz") - -# Check if the file already exists -if os.path.exists(new_path): - print(f"File {new_path} already exists. Removing it to fetch a new one.") - # If the file exists, remove it - os.remove(new_path) - -fetch_wasabi_file(file='ComptonSphere/mini-DC2/GalacticScan.inc1.id1.crab2hr.extracted.tra.gz', - output=new_path) From 14f8256408468141664e495049e4b6e843375f40 Mon Sep 17 00:00:00 2001 From: falric05 Date: Tue, 29 Apr 2025 11:33:25 +0200 Subject: [PATCH 05/99] Enhance DL3 Explorer UI with improved styling and navigation links --- plugins/dl3_explorer/templates/explorer.html | 61 ++++++++++++++++++-- 1 file changed, 57 insertions(+), 4 deletions(-) diff --git a/plugins/dl3_explorer/templates/explorer.html b/plugins/dl3_explorer/templates/explorer.html index 413ccf3..e237597 100644 --- a/plugins/dl3_explorer/templates/explorer.html +++ b/plugins/dl3_explorer/templates/explorer.html @@ -3,17 +3,70 @@ DL3 Explorer +

    DL3 File Browser

    + + {% if folders and folders|length > 0 %}

    Folders: