From b795e7d7118c9139e812973c5e4dbe68bf98fdae Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Tue, 3 Feb 2026 22:14:40 +0300 Subject: [PATCH 01/79] feat: add auth-service --- .gitmodules | 5 +- src/Dockerfile | 18 +++++ src/repos/swarm-cloud | 2 +- .../system/sp-svc-auth-service.service | 17 +++++ .../usr/local/bin/sp-svc-auth-service.sh | 22 ++++++ src/swarm-scripts/71.setup-auth-service.sh | 70 +++++++++++++++++++ 6 files changed, 132 insertions(+), 2 deletions(-) create mode 100644 src/rootfs/files/configs/etc/systemd/system/sp-svc-auth-service.service create mode 100644 src/rootfs/files/configs/usr/local/bin/sp-svc-auth-service.sh create mode 100644 src/swarm-scripts/71.setup-auth-service.sh diff --git a/.gitmodules b/.gitmodules index df23a469..2149b005 100644 --- a/.gitmodules +++ b/.gitmodules @@ -6,4 +6,7 @@ url = git@github.com:Super-Protocol/swarm-cloud.git [submodule "swarm-db"] path = src/repos/swarm-db - url = git@github.com:Super-Protocol/swarm-db.git \ No newline at end of file + url = git@github.com:Super-Protocol/swarm-db.git +[submodule "src/repos/sp-swarm-services"] + path = src/repos/sp-swarm-services + url = git@github.com:Super-Protocol/sp-swarm-services.git diff --git a/src/Dockerfile b/src/Dockerfile index a886c489..1aa16e9b 100644 --- a/src/Dockerfile +++ b/src/Dockerfile @@ -160,6 +160,20 @@ RUN make RUN make build-linux-amd64 ### Swarm DB finish +### Auth service files start +FROM node:24-bookworm-slim AS auth_service_files_builder +WORKDIR /work/sp-swarm-services + +# ensure bash/find exist for the prepare script +RUN apt-get update \ + && apt-get install -y --no-install-recommends bash findutils ca-certificates \ + && rm -rf /var/lib/apt/lists/* + +# copy full repo as requested, then prepare a minimal auth-service bundle into /out +COPY repos/sp-swarm-services/ ./ +RUN bash ./scripts/prepare-auth-service-files.sh /out +### Auth service files finish + ### Start rootfs ### FROM ubuntu:noble-20250714 AS rootfs_builder RUN apt update && apt install -y wget gcc make build-essential debootstrap petname gettext @@ -335,6 +349,10 @@ ADD rootfs/files/scripts/build_swarm_cloud.sh /buildroot/files/scripts/ RUN chmod +x /buildroot/files/scripts/build_swarm_cloud.sh RUN --security=insecure /buildroot/files/scripts/build_swarm_cloud.sh +# Auth service (prepared in separate build stage) +RUN mkdir -p ${OUTPUTDIR}/etc/auth-service +COPY --from=auth_service_files_builder /out/ ${OUTPUTDIR}/etc/auth-service/ + # make /opt/swarm-cloud-api point to built swarm-cloud artifacts RUN mkdir -p ${OUTPUTDIR}/opt && ln -s /usr/local/lib/swarm-cloud-api ${OUTPUTDIR}/opt/swarm-cloud-api diff --git a/src/repos/swarm-cloud b/src/repos/swarm-cloud index 25ce9fda..39b67728 160000 --- a/src/repos/swarm-cloud +++ b/src/repos/swarm-cloud @@ -1 +1 @@ -Subproject commit 25ce9fda182b1a148fe96a6d57573b807aea986a +Subproject commit 39b67728d3353142e3ed974c0c30f43b116208e5 diff --git a/src/rootfs/files/configs/etc/systemd/system/sp-svc-auth-service.service b/src/rootfs/files/configs/etc/systemd/system/sp-svc-auth-service.service new file mode 100644 index 00000000..1e8cee1c --- /dev/null +++ b/src/rootfs/files/configs/etc/systemd/system/sp-svc-auth-service.service @@ -0,0 +1,17 @@ +[Unit] +Description=SP Swarm Service - auth-service +After=network-online.target +Wants=network-online.target + +[Service] +Type=simple +ExecStartPre=/bin/chmod +x /usr/local/bin/sp-svc-auth-service.sh +ExecStart=/usr/local/bin/sp-svc-auth-service.sh +Environment=NODE_ENV=production CONFIG_FILE=/etc/auth-service/apps/auth-service/configuration.yaml +Restart=always +RestartSec=5 +StandardOutput=append:/var/log/sp-svc-auth-service.log +StandardError=append:/var/log/sp-svc-auth-service-err.log + +[Install] +WantedBy=multi-user.target diff --git a/src/rootfs/files/configs/usr/local/bin/sp-svc-auth-service.sh b/src/rootfs/files/configs/usr/local/bin/sp-svc-auth-service.sh new file mode 100644 index 00000000..d62314cf --- /dev/null +++ b/src/rootfs/files/configs/usr/local/bin/sp-svc-auth-service.sh @@ -0,0 +1,22 @@ +#!/bin/bash +set -euo pipefail + +BASE_DIR="/etc/auth-service/" +APP_PATH="apps/auth-service" +APP_DIR="${BASE_DIR}/$APP_PATH" +SP_CONFIG="${APP_DIR}/configuration.yaml" + +# Prefer configuration supplied via provider disk; fallback to example +if [[ -f "${SP_CONFIG}" ]]; then + export CONFIG_FILE="${SP_CONFIG}" +elif [[ -f "${APP_DIR}/configuration.example.yaml" ]] && [[ "${ALLOW_EXAMPLE_CONFIG:-}" == "true" || "${NODE_ENV:-production}" != "production" ]]; then + cp -f "${APP_DIR}/configuration.example.yaml" "${SP_CONFIG}" + export CONFIG_FILE="${SP_CONFIG}" +else + echo "ERROR: No configuration found for ${APP_PATH}. Expected one of: ${SP_CONFIG} or ${APP_DIR}/configuration.example.yaml" >&2 + exit 1 +fi + +export NODE_ENV="${NODE_ENV:-production}" +cd "${BASE_DIR}" +exec npm run start -w $APP_PATH diff --git a/src/swarm-scripts/71.setup-auth-service.sh b/src/swarm-scripts/71.setup-auth-service.sh new file mode 100644 index 00000000..5fed5757 --- /dev/null +++ b/src/swarm-scripts/71.setup-auth-service.sh @@ -0,0 +1,70 @@ + +#!/bin/bash +set -euo pipefail + +# This script bootstraps the auth-service into SwarmDB via swarm-cli. +# Run it INSIDE the container. Assumes python3 and swarm-cli.py are available. +# +# Notes: +# - The service manifest is expected to be available on all nodes at: +# ${LOCATION_PATH}/manifest.yaml +# If you don't have a manifest yet, set ALLOW_MISSING_MANIFEST=1 to still +# register the ClusterService (manifest will be stored as NULL). +# - auth-service dependencies (MongoDB/NATS/etc.) are expected to be expressed +# in the manifest (stateExpr/commands) and handled by provision workers. + +DB_HOST=${DB_HOST:-127.0.0.1} +DB_PORT=${DB_PORT:-3306} +DB_USER=${DB_USER:-root} +DB_NAME=${DB_NAME:-swarmdb} + +# Service descriptors +SERVICE_NAME=${SERVICE_NAME:-auth-service} +SERVICE_VERSION=${SERVICE_VERSION:-1.0.0} +CLUSTER_POLICY=${CLUSTER_POLICY:-auth-service} +CLUSTER_ID=${CLUSTER_ID:-auth-service} + +# Location stored in ClusterServices; must exist on all nodes (baked into image) +# Default aligns with sp-vm image build which stages auth-service into /etc/auth-service. +LOCATION_PATH=${LOCATION_PATH:-/etc/auth-service} +MANIFEST_PATH=${MANIFEST_PATH:-${LOCATION_PATH}/manifest.yaml} +SERVICE_PK="${CLUSTER_POLICY}:${SERVICE_NAME}" + +ALLOW_MISSING_MANIFEST=${ALLOW_MISSING_MANIFEST:-0} + +if [ ! -f "$MANIFEST_PATH" ]; then + if [ "$ALLOW_MISSING_MANIFEST" = "1" ] || [ "$ALLOW_MISSING_MANIFEST" = "true" ]; then + echo "Warning: manifest not found at: $MANIFEST_PATH (continuing due to ALLOW_MISSING_MANIFEST=1)" >&2 + else + echo "Manifest not found at: $MANIFEST_PATH" >&2 + echo "If you want to register the service without a manifest, set ALLOW_MISSING_MANIFEST=1" >&2 + exit 1 + fi +fi + +echo "Ensuring ClusterPolicy '$CLUSTER_POLICY'..." +if DB_HOST="$DB_HOST" DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" \ + python3 "$(dirname "$0")/swarm-cli.py" get ClusterPolicies "$CLUSTER_POLICY" >/dev/null 2>&1; then + echo "ClusterPolicy '$CLUSTER_POLICY' already exists, skipping creation." +else + echo "Creating ClusterPolicy '$CLUSTER_POLICY'..." + DB_HOST="$DB_HOST" DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" \ + python3 "$(dirname "$0")/swarm-cli.py" create ClusterPolicies "$CLUSTER_POLICY" --minSize=1 --maxSize=1 --maxClusters=1 +fi + +echo "Ensuring ClusterService '$SERVICE_PK'..." +if DB_HOST="$DB_HOST" DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" \ + python3 "$(dirname "$0")/swarm-cli.py" get ClusterServices "$SERVICE_PK" >/dev/null 2>&1; then + echo "ClusterService '$SERVICE_PK' already exists, skipping creation." +else + echo "Creating ClusterService '$SERVICE_PK'..." + DB_HOST="$DB_HOST" DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" \ + python3 "$(dirname "$0")/swarm-cli.py" create ClusterServices "$SERVICE_PK" \ + --name="$SERVICE_NAME" \ + --cluster_policy="$CLUSTER_POLICY" \ + --version="$SERVICE_VERSION" \ + --location="$LOCATION_PATH" +fi + +echo "Done. The provision worker will reconcile '$SERVICE_NAME' shortly." + From 7600d49d86ac0ec3fe96c08498d9d618bf127b78 Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Tue, 3 Feb 2026 22:26:56 +0300 Subject: [PATCH 02/79] fix: correct submodule path for sp-swarm-services --- .gitmodules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitmodules b/.gitmodules index 2149b005..571d580c 100644 --- a/.gitmodules +++ b/.gitmodules @@ -7,6 +7,6 @@ [submodule "swarm-db"] path = src/repos/swarm-db url = git@github.com:Super-Protocol/swarm-db.git -[submodule "src/repos/sp-swarm-services"] +[submodule "sp-swarm-services"] path = src/repos/sp-swarm-services url = git@github.com:Super-Protocol/sp-swarm-services.git From 6ba4aaf651981d3761924c77475e2cde74f2dccb Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Tue, 3 Feb 2026 22:31:52 +0300 Subject: [PATCH 03/79] feat: add sp-swarm-services submodule --- src/repos/sp-swarm-services | 1 + 1 file changed, 1 insertion(+) create mode 160000 src/repos/sp-swarm-services diff --git a/src/repos/sp-swarm-services b/src/repos/sp-swarm-services new file mode 160000 index 00000000..2f62e590 --- /dev/null +++ b/src/repos/sp-swarm-services @@ -0,0 +1 @@ +Subproject commit 2f62e5900f2a3e54b17670145180a4eeb2e8fe71 From aeecfbaa75bcbabe4dada9a5557fb163aea855aa Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Wed, 4 Feb 2026 01:51:25 +0300 Subject: [PATCH 04/79] fix: update LOCATION_PATH to align with service provisioner structure --- src/swarm-scripts/71.setup-auth-service.sh | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/swarm-scripts/71.setup-auth-service.sh b/src/swarm-scripts/71.setup-auth-service.sh index 5fed5757..eaf24dff 100644 --- a/src/swarm-scripts/71.setup-auth-service.sh +++ b/src/swarm-scripts/71.setup-auth-service.sh @@ -24,9 +24,11 @@ SERVICE_VERSION=${SERVICE_VERSION:-1.0.0} CLUSTER_POLICY=${CLUSTER_POLICY:-auth-service} CLUSTER_ID=${CLUSTER_ID:-auth-service} -# Location stored in ClusterServices; must exist on all nodes (baked into image) -# Default aligns with sp-vm image build which stages auth-service into /etc/auth-service. -LOCATION_PATH=${LOCATION_PATH:-/etc/auth-service} +# Location stored in ClusterServices; must exist on all nodes. +# The service provisioner (manifest.yaml + main.py) is baked into the image under +# /etc/swarm-cloud/services/${SERVICE_NAME}. The application payload lives under +# /etc/auth-service and is referenced by the provisioner. +LOCATION_PATH=${LOCATION_PATH:-/etc/swarm-cloud/services/${SERVICE_NAME}} MANIFEST_PATH=${MANIFEST_PATH:-${LOCATION_PATH}/manifest.yaml} SERVICE_PK="${CLUSTER_POLICY}:${SERVICE_NAME}" From e350e6cca32a8577c9827b347d022fcc4dbaed9b Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Wed, 4 Feb 2026 02:11:49 +0300 Subject: [PATCH 05/79] feat: add domain-initializer bootstrap script for SwarmDB --- .../72.setup-domain-initializer.sh | 69 +++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 src/swarm-scripts/72.setup-domain-initializer.sh diff --git a/src/swarm-scripts/72.setup-domain-initializer.sh b/src/swarm-scripts/72.setup-domain-initializer.sh new file mode 100644 index 00000000..70ca54b6 --- /dev/null +++ b/src/swarm-scripts/72.setup-domain-initializer.sh @@ -0,0 +1,69 @@ +#!/bin/bash +set -euo pipefail + +# This script bootstraps the domain-initializer service into SwarmDB via swarm-cli. +# Run it INSIDE the container. Assumes python3 and swarm-cli.py are available. +# +# Notes: +# - The service manifest is expected to be available on all nodes at: +# ${LOCATION_PATH}/manifest.yaml +# If you don't have a manifest yet, set ALLOW_MISSING_MANIFEST=1 to still +# register the ClusterService (manifest will be stored as NULL). +# - domain-initializer dependencies are expected to be expressed in the manifest +# (stateExpr/commands) and handled by provision workers. + +DB_HOST=${DB_HOST:-127.0.0.1} +DB_PORT=${DB_PORT:-3306} +DB_USER=${DB_USER:-root} +DB_NAME=${DB_NAME:-swarmdb} + +# Service descriptors +SERVICE_NAME=${SERVICE_NAME:-domain-initializer} +SERVICE_VERSION=${SERVICE_VERSION:-1.0.0} +CLUSTER_POLICY=${CLUSTER_POLICY:-domain-initializer} +CLUSTER_ID=${CLUSTER_ID:-domain-initializer} + +# Location stored in ClusterServices; must exist on all nodes. +# The service provisioner (manifest.yaml + main.py) is baked into the image under +# /etc/swarm-cloud/services/${SERVICE_NAME}. +LOCATION_PATH=${LOCATION_PATH:-/etc/swarm-cloud/services/${SERVICE_NAME}} +MANIFEST_PATH=${MANIFEST_PATH:-${LOCATION_PATH}/manifest.yaml} +SERVICE_PK="${CLUSTER_POLICY}:${SERVICE_NAME}" + +ALLOW_MISSING_MANIFEST=${ALLOW_MISSING_MANIFEST:-0} + +if [ ! -f "$MANIFEST_PATH" ]; then + if [ "$ALLOW_MISSING_MANIFEST" = "1" ] || [ "$ALLOW_MISSING_MANIFEST" = "true" ]; then + echo "Warning: manifest not found at: $MANIFEST_PATH (continuing due to ALLOW_MISSING_MANIFEST=1)" >&2 + else + echo "Manifest not found at: $MANIFEST_PATH" >&2 + echo "If you want to register the service without a manifest, set ALLOW_MISSING_MANIFEST=1" >&2 + exit 1 + fi +fi + +echo "Ensuring ClusterPolicy '$CLUSTER_POLICY'..." +if DB_HOST="$DB_HOST" DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" \ + python3 "$(dirname "$0")/swarm-cli.py" get ClusterPolicies "$CLUSTER_POLICY" >/dev/null 2>&1; then + echo "ClusterPolicy '$CLUSTER_POLICY' already exists, skipping creation." +else + echo "Creating ClusterPolicy '$CLUSTER_POLICY'..." + DB_HOST="$DB_HOST" DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" \ + python3 "$(dirname "$0")/swarm-cli.py" create ClusterPolicies "$CLUSTER_POLICY" --minSize=1 --maxSize=1 --maxClusters=1 +fi + +echo "Ensuring ClusterService '$SERVICE_PK'..." +if DB_HOST="$DB_HOST" DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" \ + python3 "$(dirname "$0")/swarm-cli.py" get ClusterServices "$SERVICE_PK" >/dev/null 2>&1; then + echo "ClusterService '$SERVICE_PK' already exists, skipping creation." +else + echo "Creating ClusterService '$SERVICE_PK'..." + DB_HOST="$DB_HOST" DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" \ + python3 "$(dirname "$0")/swarm-cli.py" create ClusterServices "$SERVICE_PK" \ + --name="$SERVICE_NAME" \ + --cluster_policy="$CLUSTER_POLICY" \ + --version="$SERVICE_VERSION" \ + --location="$LOCATION_PATH" +fi + +echo "Done. The provision worker will reconcile '$SERVICE_NAME' shortly." From a538b71bd803a77308d1a37ebce8c0650b2895b8 Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Wed, 4 Feb 2026 02:39:06 +0300 Subject: [PATCH 06/79] feat: add bootstrap script for redis-sentinel service in SwarmDB --- src/swarm-scripts/68.setup-redis-sentinel.sh | 56 ++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 src/swarm-scripts/68.setup-redis-sentinel.sh diff --git a/src/swarm-scripts/68.setup-redis-sentinel.sh b/src/swarm-scripts/68.setup-redis-sentinel.sh new file mode 100644 index 00000000..5bedc18c --- /dev/null +++ b/src/swarm-scripts/68.setup-redis-sentinel.sh @@ -0,0 +1,56 @@ +#!/bin/bash +set -euo pipefail + +# This script bootstraps the redis-sentinel service into SwarmDB via swarm-cli. +# Run it INSIDE the container. Assumes python3 and swarm-cli.py are available. +# +# Notes: +# - The redis-sentinel manifest and main.py are provided by the image at: +# /etc/swarm-cloud/services/redis-sentinel/{manifest.yaml, main.py} +# This script only registers ClusterPolicy and ClusterService in SwarmDB. +# - redis-sentinel depends on WireGuard + Redis clusters as expressed in its stateExpr. + +DB_HOST=${DB_HOST:-127.0.0.1} +DB_PORT=${DB_PORT:-3306} +DB_USER=${DB_USER:-root} +DB_NAME=${DB_NAME:-swarmdb} + +# Service descriptors +SERVICE_NAME=${SERVICE_NAME:-redis-sentinel} +SERVICE_VERSION=${SERVICE_VERSION:-1.0.0} +CLUSTER_POLICY=${CLUSTER_POLICY:-redis-sentinel} +CLUSTER_ID=${CLUSTER_ID:-redis-sentinel} + +# Location and manifest inside the container. +# IMPORTANT: This script runs only on one node. All nodes must have the same location available already +# (baked into the image), so we point to /etc/swarm-cloud/services/${SERVICE_NAME}. +LOCATION_PATH=${LOCATION_PATH:-/etc/swarm-cloud/services/${SERVICE_NAME}} +MANIFEST_PATH=${MANIFEST_PATH:-${LOCATION_PATH}/manifest.yaml} +SERVICE_PK="${CLUSTER_POLICY}:${SERVICE_NAME}" + +if [ ! -f "$MANIFEST_PATH" ]; then + echo "Manifest not found at: $MANIFEST_PATH" >&2 + exit 1 +fi + +echo "Ensuring ClusterPolicy '$CLUSTER_POLICY'..." +if DB_HOST="$DB_HOST" DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" \ + python3 "$(dirname "$0")/swarm-cli.py" get ClusterPolicies "$CLUSTER_POLICY" >/dev/null 2>&1; then + echo "ClusterPolicy '$CLUSTER_POLICY' already exists, skipping creation." +else + echo "Creating ClusterPolicy '$CLUSTER_POLICY'..." + DB_HOST="$DB_HOST" DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" \ + python3 "$(dirname "$0")/swarm-cli.py" create ClusterPolicies "$CLUSTER_POLICY" --minSize=1 --maxSize=3 --maxClusters=1 +fi + +echo "Ensuring ClusterService '$SERVICE_PK'..." +if DB_HOST="$DB_HOST" DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" \ + python3 "$(dirname "$0")/swarm-cli.py" get ClusterServices "$SERVICE_PK" >/dev/null 2>&1; then + echo "ClusterService '$SERVICE_PK' already exists, skipping creation." +else + echo "Creating ClusterService '$SERVICE_PK'..." + DB_HOST="$DB_HOST" DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" \ + python3 "$(dirname "$0")/swarm-cli.py" create ClusterServices "$SERVICE_PK" --name="$SERVICE_NAME" --cluster_policy="$CLUSTER_POLICY" --version="$SERVICE_VERSION" --location="$LOCATION_PATH" --omit-command-init +fi + +echo "Done. The provision worker will reconcile '$SERVICE_NAME' shortly." From ff37e79b2df010402aeb98eb1bbe9b1d5ae3237d Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Wed, 4 Feb 2026 11:12:31 +0300 Subject: [PATCH 07/79] Revert "feat: add bootstrap script for redis-sentinel service in SwarmDB" This reverts commit fabb02d176ac019a09af42f7415a0675f530566f. --- src/swarm-scripts/68.setup-redis-sentinel.sh | 56 -------------------- 1 file changed, 56 deletions(-) delete mode 100644 src/swarm-scripts/68.setup-redis-sentinel.sh diff --git a/src/swarm-scripts/68.setup-redis-sentinel.sh b/src/swarm-scripts/68.setup-redis-sentinel.sh deleted file mode 100644 index 5bedc18c..00000000 --- a/src/swarm-scripts/68.setup-redis-sentinel.sh +++ /dev/null @@ -1,56 +0,0 @@ -#!/bin/bash -set -euo pipefail - -# This script bootstraps the redis-sentinel service into SwarmDB via swarm-cli. -# Run it INSIDE the container. Assumes python3 and swarm-cli.py are available. -# -# Notes: -# - The redis-sentinel manifest and main.py are provided by the image at: -# /etc/swarm-cloud/services/redis-sentinel/{manifest.yaml, main.py} -# This script only registers ClusterPolicy and ClusterService in SwarmDB. -# - redis-sentinel depends on WireGuard + Redis clusters as expressed in its stateExpr. - -DB_HOST=${DB_HOST:-127.0.0.1} -DB_PORT=${DB_PORT:-3306} -DB_USER=${DB_USER:-root} -DB_NAME=${DB_NAME:-swarmdb} - -# Service descriptors -SERVICE_NAME=${SERVICE_NAME:-redis-sentinel} -SERVICE_VERSION=${SERVICE_VERSION:-1.0.0} -CLUSTER_POLICY=${CLUSTER_POLICY:-redis-sentinel} -CLUSTER_ID=${CLUSTER_ID:-redis-sentinel} - -# Location and manifest inside the container. -# IMPORTANT: This script runs only on one node. All nodes must have the same location available already -# (baked into the image), so we point to /etc/swarm-cloud/services/${SERVICE_NAME}. -LOCATION_PATH=${LOCATION_PATH:-/etc/swarm-cloud/services/${SERVICE_NAME}} -MANIFEST_PATH=${MANIFEST_PATH:-${LOCATION_PATH}/manifest.yaml} -SERVICE_PK="${CLUSTER_POLICY}:${SERVICE_NAME}" - -if [ ! -f "$MANIFEST_PATH" ]; then - echo "Manifest not found at: $MANIFEST_PATH" >&2 - exit 1 -fi - -echo "Ensuring ClusterPolicy '$CLUSTER_POLICY'..." -if DB_HOST="$DB_HOST" DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" \ - python3 "$(dirname "$0")/swarm-cli.py" get ClusterPolicies "$CLUSTER_POLICY" >/dev/null 2>&1; then - echo "ClusterPolicy '$CLUSTER_POLICY' already exists, skipping creation." -else - echo "Creating ClusterPolicy '$CLUSTER_POLICY'..." - DB_HOST="$DB_HOST" DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" \ - python3 "$(dirname "$0")/swarm-cli.py" create ClusterPolicies "$CLUSTER_POLICY" --minSize=1 --maxSize=3 --maxClusters=1 -fi - -echo "Ensuring ClusterService '$SERVICE_PK'..." -if DB_HOST="$DB_HOST" DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" \ - python3 "$(dirname "$0")/swarm-cli.py" get ClusterServices "$SERVICE_PK" >/dev/null 2>&1; then - echo "ClusterService '$SERVICE_PK' already exists, skipping creation." -else - echo "Creating ClusterService '$SERVICE_PK'..." - DB_HOST="$DB_HOST" DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" \ - python3 "$(dirname "$0")/swarm-cli.py" create ClusterServices "$SERVICE_PK" --name="$SERVICE_NAME" --cluster_policy="$CLUSTER_POLICY" --version="$SERVICE_VERSION" --location="$LOCATION_PATH" --omit-command-init -fi - -echo "Done. The provision worker will reconcile '$SERVICE_NAME' shortly." From 2275e50d65d8f95cd1f790b2cabea06d6dcee028 Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Wed, 4 Feb 2026 16:28:52 +0300 Subject: [PATCH 08/79] fix: update submodules --- src/repos/sp-swarm-services | 2 +- src/repos/swarm-cloud | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/repos/sp-swarm-services b/src/repos/sp-swarm-services index 2f62e590..bc934129 160000 --- a/src/repos/sp-swarm-services +++ b/src/repos/sp-swarm-services @@ -1 +1 @@ -Subproject commit 2f62e5900f2a3e54b17670145180a4eeb2e8fe71 +Subproject commit bc934129e8568683981b64e9de764033393b7e14 diff --git a/src/repos/swarm-cloud b/src/repos/swarm-cloud index 39b67728..ee32d600 160000 --- a/src/repos/swarm-cloud +++ b/src/repos/swarm-cloud @@ -1 +1 @@ -Subproject commit 39b67728d3353142e3ed974c0c30f43b116208e5 +Subproject commit ee32d600d30542a91613fc6a778158c701d55236 From 0b3b9d267a311614f20d43f15ad8753b18898a62 Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Wed, 4 Feb 2026 17:34:08 +0300 Subject: [PATCH 09/79] feat: add auth-service configuration and scripts to Dockerfile --- src/Dockerfile | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/Dockerfile b/src/Dockerfile index 1aa16e9b..4ed76afd 100644 --- a/src/Dockerfile +++ b/src/Dockerfile @@ -305,6 +305,12 @@ RUN chmod +x ${OUTPUTDIR}/usr/local/bin/swarm-cloud-api.sh ${OUTPUTDIR}/usr/loca # run-state directories are prepared in state_disk_mount.sh; bind mounts via fstab ADD rootfs/files/configs/etc/securetty "${OUTPUTDIR}/etc/securetty" +# auth-service (runs as a swarm service) +ADD rootfs/files/configs/etc/systemd/system/sp-svc-auth-service.service ${OUTPUTDIR}/etc/systemd/system/sp-svc-auth-service.service +ADD rootfs/files/configs/usr/local/bin/sp-svc-auth-service.sh ${OUTPUTDIR}/usr/local/bin/sp-svc-auth-service.sh +RUN chmod +x ${OUTPUTDIR}/usr/local/bin/sp-svc-auth-service.sh +RUN ln -sf /etc/systemd/system/sp-svc-auth-service.service "${OUTPUTDIR}/etc/systemd/system/multi-user.target.wants/sp-svc-auth-service.service" + # swarm one-shot services runner ADD rootfs/files/configs/etc/systemd/system/swarm-services.service ${OUTPUTDIR}/etc/systemd/system/swarm-services.service ADD rootfs/files/configs/usr/local/bin/swarm-services.sh ${OUTPUTDIR}/usr/local/bin/swarm-services.sh From 1c54f8b8fbd7dc359248945999021dc7364fc1a2 Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Wed, 4 Feb 2026 17:36:54 +0300 Subject: [PATCH 10/79] fix: update submodule reference for swarm-cloud --- src/repos/swarm-cloud | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/repos/swarm-cloud b/src/repos/swarm-cloud index ee32d600..819e4467 160000 --- a/src/repos/swarm-cloud +++ b/src/repos/swarm-cloud @@ -1 +1 @@ -Subproject commit ee32d600d30542a91613fc6a778158c701d55236 +Subproject commit 819e4467b33a32cd5b58ffa83759052ab29b2bee From 142e9ef93461e752b4681e00f2ed1faedcce743a Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Wed, 4 Feb 2026 19:10:17 +0300 Subject: [PATCH 11/79] fix: update submodule references for sp-swarm-services and swarm-cloud --- src/repos/sp-swarm-services | 2 +- src/repos/swarm-cloud | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/repos/sp-swarm-services b/src/repos/sp-swarm-services index bc934129..82e2c793 160000 --- a/src/repos/sp-swarm-services +++ b/src/repos/sp-swarm-services @@ -1 +1 @@ -Subproject commit bc934129e8568683981b64e9de764033393b7e14 +Subproject commit 82e2c793078f2f5e607669ed8b635b21636583f9 diff --git a/src/repos/swarm-cloud b/src/repos/swarm-cloud index 819e4467..4a1d0351 160000 --- a/src/repos/swarm-cloud +++ b/src/repos/swarm-cloud @@ -1 +1 @@ -Subproject commit 819e4467b33a32cd5b58ffa83759052ab29b2bee +Subproject commit 4a1d03513e5aa5040be8061697e2bc118f05ff4e From 1104909875dc230a67938d301cdf8b4651e14200 Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Wed, 4 Feb 2026 23:23:56 +0300 Subject: [PATCH 12/79] feat: build and run swarm-cloud-ui --- src/repos/swarm-cloud | 2 +- src/rootfs/files/scripts/build_swarm_cloud.sh | 34 +++++++++++------- src/services/apps/swarm-cloud-ui.sh | 17 +++++---- src/swarm-scripts/64.setup-swarm-cloud-api.sh | 35 +++++++++++++++++++ 4 files changed, 68 insertions(+), 20 deletions(-) diff --git a/src/repos/swarm-cloud b/src/repos/swarm-cloud index 4a1d0351..af921fc1 160000 --- a/src/repos/swarm-cloud +++ b/src/repos/swarm-cloud @@ -1 +1 @@ -Subproject commit 4a1d03513e5aa5040be8061697e2bc118f05ff4e +Subproject commit af921fc19a05730c9af7eb92f60c626dd2e42db8 diff --git a/src/rootfs/files/scripts/build_swarm_cloud.sh b/src/rootfs/files/scripts/build_swarm_cloud.sh index e6cbaeee..6d1e6982 100644 --- a/src/rootfs/files/scripts/build_swarm_cloud.sh +++ b/src/rootfs/files/scripts/build_swarm_cloud.sh @@ -37,18 +37,28 @@ function build_swarm_cloud() { log_info "publishing built swarm-node artifacts to /usr/local/lib/swarm-cloud"; chroot "${OUTPUTDIR}" /bin/bash -lc 'set -e; mkdir -p /usr/local/lib/swarm-cloud/apps/swarm-node'; chroot "${OUTPUTDIR}" /bin/bash -lc 'cp -r /opt/swarm-cloud/apps/swarm-node/{dist,node_modules} /usr/local/lib/swarm-cloud/apps/swarm-node/'; -# -# # swarm-cloud-ui -# log_info "building swarm-cloud-ui"; -# chroot "${OUTPUTDIR}" /bin/bash -lc 'cd /opt/swarm-cloud && pnpm nx build swarm-cloud-ui --output-style=stream'; -# -# log_info "deploying swarm-cloud-ui via pnpm deploy to /usr/local/lib/swarm-cloud/apps/swarm-cloud-ui"; -# chroot "${OUTPUTDIR}" /bin/bash -lc 'set -e; mkdir -p /usr/local/lib/swarm-cloud/apps/swarm-cloud-ui'; -# chroot "${OUTPUTDIR}" /bin/bash -lc 'cp -r /opt/swarm-cloud/apps/swarm-cloud-ui/{.next,node_modules} /usr/local/lib/swarm-cloud/apps/swarm-cloud-ui/'; -# -# log_info "copying shared UI libraries to /usr/local/lib/swarm-cloud/libs"; -# chroot "${OUTPUTDIR}" /bin/bash -lc 'mkdir -p /usr/local/lib/swarm-cloud/libs'; -# chroot "${OUTPUTDIR}" /bin/bash -lc 'cp -r /opt/swarm-cloud/libs/ui /usr/local/lib/swarm-cloud/libs/ui'; + + # swarm-cloud-ui + log_info "building swarm-cloud-ui"; + chroot "${OUTPUTDIR}" /bin/bash -lc 'cd /opt/swarm-cloud && pnpm nx build swarm-cloud-ui --output-style=stream'; + + log_info "publishing built swarm-cloud-ui (Next standalone) to /usr/local/lib/swarm-cloud/apps/swarm-cloud-ui"; + chroot "${OUTPUTDIR}" /bin/bash -lc 'set -e; rm -rf /usr/local/lib/swarm-cloud/apps/swarm-cloud-ui; mkdir -p /usr/local/lib/swarm-cloud/apps/swarm-cloud-ui'; + chroot "${OUTPUTDIR}" /bin/bash -lc ' +set -euo pipefail; +UI_SRC=/opt/swarm-cloud/apps/swarm-cloud-ui; +UI_DST=/usr/local/lib/swarm-cloud/apps/swarm-cloud-ui; + +# public assets (optional) +cp -a "${UI_SRC}/public" "${UI_DST}/public" 2>/dev/null || true; + +# copy standalone server bundle +cp -a "${UI_SRC}/.next/standalone/." "${UI_DST}/"; + +# next standalone expects static assets under apps/swarm-cloud-ui/.next/static +mkdir -p "${UI_DST}/apps/swarm-cloud-ui/.next"; +cp -a "${UI_SRC}/.next/static" "${UI_DST}/apps/swarm-cloud-ui/.next/static"; +'; # # In the deployed UI lib, TypeScript sources live under libs/ui/src, but some imports # # reference sibling TS modules with a .js extension (e.g. "../lib/utils.js", "./button.js"). diff --git a/src/services/apps/swarm-cloud-ui.sh b/src/services/apps/swarm-cloud-ui.sh index 75e800b2..17924ad9 100644 --- a/src/services/apps/swarm-cloud-ui.sh +++ b/src/services/apps/swarm-cloud-ui.sh @@ -4,12 +4,12 @@ set -euo pipefail # This script starts the swarm-cloud-ui frontend in the same layout that the VM image uses. # According to build_swarm_cloud.sh and the Dockerfile, the built UI is published to: -# /usr/local/lib/swarm-cloud/dist/apps/swarm-cloud-ui +# /usr/local/lib/swarm-cloud/apps/swarm-cloud-ui # All dependencies are installed at image build time in build_swarm_cloud.sh; this script # MUST NOT run pnpm install or modify node_modules at runtime. SWARM_CLOUD_ROOT="/usr/local/lib/swarm-cloud" -SWARM_CLOUD_UI_DIR="${SWARM_CLOUD_ROOT}/dist/apps/swarm-cloud-ui" +SWARM_CLOUD_UI_DIR="${SWARM_CLOUD_ROOT}/apps/swarm-cloud-ui" cd "${SWARM_CLOUD_UI_DIR}" @@ -21,11 +21,14 @@ fi LISTEN_INTERFACE="${LISTEN_INTERFACE:-0.0.0.0}" SWARM_CLOUD_UI_PORT="${SWARM_CLOUD_UI_PORT:-3000}" -echo "Starting swarm-cloud-ui in development mode with Next.js (pnpm deploy layout)..." +echo "Starting swarm-cloud-ui in production mode (Next standalone)..." echo " Host: ${LISTEN_INTERFACE}" echo " Port: ${SWARM_CLOUD_UI_PORT}" -NODE_ENV=development exec node \ - node_modules/next/dist/bin/next dev \ - --hostname "${LISTEN_INTERFACE}" \ - --port "${SWARM_CLOUD_UI_PORT}" +if [[ ! -f "apps/swarm-cloud-ui/server.js" ]]; then + echo "Expected standalone server entrypoint not found: ${SWARM_CLOUD_UI_DIR}/apps/swarm-cloud-ui/server.js" >&2 + exit 1 +fi + +NODE_ENV=production HOSTNAME="${LISTEN_INTERFACE}" PORT="${SWARM_CLOUD_UI_PORT}" exec node \ + apps/swarm-cloud-ui/server.js diff --git a/src/swarm-scripts/64.setup-swarm-cloud-api.sh b/src/swarm-scripts/64.setup-swarm-cloud-api.sh index c4a354c4..96fd9e5e 100644 --- a/src/swarm-scripts/64.setup-swarm-cloud-api.sh +++ b/src/swarm-scripts/64.setup-swarm-cloud-api.sh @@ -26,6 +26,12 @@ SERVICE_VERSION=${SERVICE_VERSION:-1.0.0} CLUSTER_POLICY=${CLUSTER_POLICY:-swarm-cloud-api} CLUSTER_ID=${CLUSTER_ID:-swarm-cloud-api} +# swarm-cloud-ui descriptors +UI_SERVICE_NAME=${UI_SERVICE_NAME:-swarm-cloud-ui} +UI_SERVICE_VERSION=${UI_SERVICE_VERSION:-1.0.0} +UI_CLUSTER_POLICY=${UI_CLUSTER_POLICY:-swarm-cloud-ui} +UI_CLUSTER_ID=${UI_CLUSTER_ID:-swarm-cloud-ui} + # Location and manifest inside the container. # IMPORTANT: This script runs only on one node. All nodes must have the same location available already # (baked into the image), so we point to /etc/swarm-cloud/services/${SERVICE_NAME}. @@ -33,11 +39,20 @@ LOCATION_PATH=${LOCATION_PATH:-/etc/swarm-cloud/services/${SERVICE_NAME}} MANIFEST_PATH=${MANIFEST_PATH:-${LOCATION_PATH}/manifest.yaml} SERVICE_PK="${CLUSTER_POLICY}:${SERVICE_NAME}" +UI_LOCATION_PATH=${UI_LOCATION_PATH:-/etc/swarm-cloud/services/${UI_SERVICE_NAME}} +UI_MANIFEST_PATH=${UI_MANIFEST_PATH:-${UI_LOCATION_PATH}/manifest.yaml} +UI_SERVICE_PK="${UI_CLUSTER_POLICY}:${UI_SERVICE_NAME}" + if [ ! -f "$MANIFEST_PATH" ]; then echo "Manifest not found at: $MANIFEST_PATH" >&2 exit 1 fi +if [ ! -f "$UI_MANIFEST_PATH" ]; then + echo "Manifest not found at: $UI_MANIFEST_PATH" >&2 + exit 1 +fi + echo "Ensuring ClusterPolicy '$CLUSTER_POLICY'..." if DB_HOST="$DB_HOST" DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" \ python3 "$(dirname "$0")/swarm-cli.py" get ClusterPolicies "$CLUSTER_POLICY" >/dev/null 2>&1; then @@ -58,4 +73,24 @@ else python3 "$(dirname "$0")/swarm-cli.py" create ClusterServices "$SERVICE_PK" --name="$SERVICE_NAME" --cluster_policy="$CLUSTER_POLICY" --version="$SERVICE_VERSION" --location="$LOCATION_PATH" fi +echo "Ensuring ClusterPolicy '$UI_CLUSTER_POLICY'..." +if DB_HOST="$DB_HOST" DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" \ + python3 "$(dirname "$0")/swarm-cli.py" get ClusterPolicies "$UI_CLUSTER_POLICY" >/dev/null 2>&1; then + echo "ClusterPolicy '$UI_CLUSTER_POLICY' already exists, skipping creation." +else + echo "Creating ClusterPolicy '$UI_CLUSTER_POLICY'..." + DB_HOST="$DB_HOST" DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" \ + python3 "$(dirname "$0")/swarm-cli.py" create ClusterPolicies "$UI_CLUSTER_POLICY" --minSize=1 --maxSize=1 --maxClusters=1 +fi + +echo "Ensuring ClusterService '$UI_SERVICE_PK'..." +if DB_HOST="$DB_HOST" DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" \ + python3 "$(dirname "$0")/swarm-cli.py" get ClusterServices "$UI_SERVICE_PK" >/dev/null 2>&1; then + echo "ClusterService '$UI_SERVICE_PK' already exists, skipping creation." +else + echo "Creating ClusterService '$UI_SERVICE_PK'..." + DB_HOST="$DB_HOST" DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" \ + python3 "$(dirname "$0")/swarm-cli.py" create ClusterServices "$UI_SERVICE_PK" --name="$UI_SERVICE_NAME" --cluster_policy="$UI_CLUSTER_POLICY" --version="$UI_SERVICE_VERSION" --location="$UI_LOCATION_PATH" +fi + echo "Done. The provision worker will reconcile '$SERVICE_NAME' shortly." From 4198b728448d3f93b495e9c315c8a1429328cfaa Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Thu, 5 Feb 2026 00:16:20 +0300 Subject: [PATCH 13/79] fix: update submodule reference for swarm-cloud --- src/repos/swarm-cloud | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/repos/swarm-cloud b/src/repos/swarm-cloud index af921fc1..bf994b60 160000 --- a/src/repos/swarm-cloud +++ b/src/repos/swarm-cloud @@ -1 +1 @@ -Subproject commit af921fc19a05730c9af7eb92f60c626dd2e42db8 +Subproject commit bf994b6039fa9225450199636852d8f0acc60f82 From 41b744a3a9a29cf0245219b2aff1b2314ce28938 Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Thu, 5 Feb 2026 00:38:15 +0300 Subject: [PATCH 14/79] fix: update submodule reference for swarm-cloud --- src/repos/swarm-cloud | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/repos/swarm-cloud b/src/repos/swarm-cloud index bf994b60..91ba8fef 160000 --- a/src/repos/swarm-cloud +++ b/src/repos/swarm-cloud @@ -1 +1 @@ -Subproject commit bf994b6039fa9225450199636852d8f0acc60f82 +Subproject commit 91ba8fefd32afd5209f7a4c2c687d01421285403 From d24ab61fd9d3247fc3b46b965b80e278a3308388 Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Thu, 5 Feb 2026 01:53:03 +0300 Subject: [PATCH 15/79] fix: update submodule reference for swarm-cloud --- src/repos/swarm-cloud | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/repos/swarm-cloud b/src/repos/swarm-cloud index 91ba8fef..a5e72813 160000 --- a/src/repos/swarm-cloud +++ b/src/repos/swarm-cloud @@ -1 +1 @@ -Subproject commit 91ba8fefd32afd5209f7a4c2c687d01421285403 +Subproject commit a5e728133c0f222213d5cf34fb4b06eab19acee2 From 7449dbe026bf83a89a58e719230dcdd5315a21e2 Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Thu, 5 Feb 2026 02:13:19 +0300 Subject: [PATCH 16/79] feat: add script to start swarm-cloud-ui frontend --- .../files/configs/usr/local/bin}/swarm-cloud-ui.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename src/{services/apps => rootfs/files/configs/usr/local/bin}/swarm-cloud-ui.sh (93%) diff --git a/src/services/apps/swarm-cloud-ui.sh b/src/rootfs/files/configs/usr/local/bin/swarm-cloud-ui.sh similarity index 93% rename from src/services/apps/swarm-cloud-ui.sh rename to src/rootfs/files/configs/usr/local/bin/swarm-cloud-ui.sh index 17924ad9..c6599328 100644 --- a/src/services/apps/swarm-cloud-ui.sh +++ b/src/rootfs/files/configs/usr/local/bin/swarm-cloud-ui.sh @@ -14,7 +14,7 @@ SWARM_CLOUD_UI_DIR="${SWARM_CLOUD_ROOT}/apps/swarm-cloud-ui" cd "${SWARM_CLOUD_UI_DIR}" if ! command -v node >/dev/null 2>&1; then - echo "Node.js is not installed or not in PATH. Please install Node.js first." >&2 + echo "Node.js is not installed or not in PATH." >&2 exit 1 fi From c1e3eb5afc50ec505e3c8b745b5692007e011319 Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Thu, 5 Feb 2026 02:32:00 +0300 Subject: [PATCH 17/79] feat: add swarm-cloud-ui script to VM root filesystem --- src/Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Dockerfile b/src/Dockerfile index 4ed76afd..9386770d 100644 --- a/src/Dockerfile +++ b/src/Dockerfile @@ -325,6 +325,8 @@ RUN ln -sf /dev/null "${OUTPUTDIR}/etc/systemd/system/getty@ttyS0.service" # swarm binaries into VM rootfs RUN mkdir -p "${OUTPUTDIR}/usr/local/lib/swarm-cloud" "${OUTPUTDIR}/usr/local/bin" +ADD rootfs/files/configs/usr/local/bin/swarm-cloud-ui.sh ${OUTPUTDIR}/usr/local/bin/swarm-cloud-ui.sh +RUN chmod +x ${OUTPUTDIR}/usr/local/bin/swarm-cloud-ui.sh COPY --from=swarm-db-build /app/swarm/swarm-db/swarm-db-linux-amd64 ${OUTPUTDIR}/usr/local/bin/swarm-db-linux-amd64 COPY repos/swarm-cloud ${OUTPUTDIR}/opt/swarm-cloud RUN mkdir -p ${OUTPUTDIR}/etc/swarm-cloud From abf51db77ab96c17b505c1a0cf524ac07ba274fe Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Thu, 5 Feb 2026 03:03:04 +0300 Subject: [PATCH 18/79] fix: update submodule reference for swarm-cloud --- src/repos/swarm-cloud | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/repos/swarm-cloud b/src/repos/swarm-cloud index a5e72813..09155d75 160000 --- a/src/repos/swarm-cloud +++ b/src/repos/swarm-cloud @@ -1 +1 @@ -Subproject commit a5e728133c0f222213d5cf34fb4b06eab19acee2 +Subproject commit 09155d75eebd1bce504bf82318c066998495dbd5 From 15bb24b284ff143ae37265fa32abfad1fd023fc4 Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Thu, 5 Feb 2026 03:53:29 +0300 Subject: [PATCH 19/79] fix: update submodule reference for swarm-cloud --- src/repos/swarm-cloud | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/repos/swarm-cloud b/src/repos/swarm-cloud index 09155d75..793217fc 160000 --- a/src/repos/swarm-cloud +++ b/src/repos/swarm-cloud @@ -1 +1 @@ -Subproject commit 09155d75eebd1bce504bf82318c066998495dbd5 +Subproject commit 793217fc419e9d5b005bbd4e297aa529ae8a1a96 From 1d44ec1f6ef8c68dccd211eb0e6335db27894305 Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Thu, 5 Feb 2026 04:39:44 +0300 Subject: [PATCH 20/79] fix: update submodule reference for swarm-cloud --- src/repos/swarm-cloud | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/repos/swarm-cloud b/src/repos/swarm-cloud index 793217fc..bdf5aa72 160000 --- a/src/repos/swarm-cloud +++ b/src/repos/swarm-cloud @@ -1 +1 @@ -Subproject commit 793217fc419e9d5b005bbd4e297aa529ae8a1a96 +Subproject commit bdf5aa725eec7748f8828c54f8b71a92c3a09c39 From 633402a6fda4ef98e3486f56270f9cb744f782a4 Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Thu, 5 Feb 2026 05:29:25 +0300 Subject: [PATCH 21/79] fix: update submodule reference for swarm-cloud --- src/repos/swarm-cloud | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/repos/swarm-cloud b/src/repos/swarm-cloud index bdf5aa72..0a8b09e8 160000 --- a/src/repos/swarm-cloud +++ b/src/repos/swarm-cloud @@ -1 +1 @@ -Subproject commit bdf5aa725eec7748f8828c54f8b71a92c3a09c39 +Subproject commit 0a8b09e87c81a0cc39180ad31d45249593c7064e From b13c809758d0aeadf2bd7d969c8dbcffff358520 Mon Sep 17 00:00:00 2001 From: Max Andreev Date: Mon, 5 Jan 2026 15:25:16 +0300 Subject: [PATCH 22/79] Add signature for mrenclave-005859454c42f7d2bf2143b24cbb8623ca1655d3b694bb69b7645cd4e568183a (#136) --- ...d2bf2143b24cbb8623ca1655d3b694bb69b7645cd4e568183a.json | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 signatures/tdx/pre-release/mrenclave-005859454c42f7d2bf2143b24cbb8623ca1655d3b694bb69b7645cd4e568183a.json diff --git a/signatures/tdx/pre-release/mrenclave-005859454c42f7d2bf2143b24cbb8623ca1655d3b694bb69b7645cd4e568183a.json b/signatures/tdx/pre-release/mrenclave-005859454c42f7d2bf2143b24cbb8623ca1655d3b694bb69b7645cd4e568183a.json new file mode 100644 index 00000000..c2fe6443 --- /dev/null +++ b/signatures/tdx/pre-release/mrenclave-005859454c42f7d2bf2143b24cbb8623ca1655d3b694bb69b7645cd4e568183a.json @@ -0,0 +1,7 @@ +{ + "mrenclave": "005859454c42f7d2bf2143b24cbb8623ca1655d3b694bb69b7645cd4e568183a", + "signature": "S2cBl3yovLVALEvV4YMMw0wDRLGJJ0baquerS4HQELCbMvDZ3oSz35Bo3v2L5x5+H6/zPjXmTyfg/Nu8duarSP9xRnSCog6+cKmAsCoT0jCqez7yphlbpjrO2imQw8Zp2Hq2cBbof+jwLTeOwzv2dk+1fMTe6oWxFCj45g0E41dSLWl5Ox2oofAEMzW/QFkhyh3/XlttgY1+dxHxbtpspmBg7ZFALTrBaK2U60mWJlsSUxhnlnpqpwVfhri1L4cUR9Cds+wmZ296iUWgFasIh6qTsx73BiFwTLCPEAApgDY+ETHUX6tStuIHX1FXpkzFFOWtWXW17KOD1Q49PjvSLcshZEtNZZD5zFU69oHuu1dfdicRXr2cBLLxZB4SYWRackxYToQ9sJdZ1vkxRjcad4zW2GY5HAhMYOSUgebfzydT0ixnEfsvaC+LnRC5d0bCf5NtVZA9F3KFTJ/gMbn7+nzjt5VV6ebCf/5mzf3EscBx52QE3h3kx7AliVn/9qai", + "build": "build-295", + "description": "test/debug/swarm", + "creationDate": "2026-01-05T12:00:10.685Z" +} From 1a8f951d55ccd632147d79e9e3aec2a5c2e249b9 Mon Sep 17 00:00:00 2001 From: Vasily Kraev Date: Fri, 23 Jan 2026 11:39:39 +0200 Subject: [PATCH 23/79] Add signature for mrenclave-dbf91a376403a78483473da4c78c170cdb30daf62ed7269b692c3f25d86d833f (#141) --- ...8483473da4c78c170cdb30daf62ed7269b692c3f25d86d833f.json | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 signatures/tdx/pre-release/mrenclave-dbf91a376403a78483473da4c78c170cdb30daf62ed7269b692c3f25d86d833f.json diff --git a/signatures/tdx/pre-release/mrenclave-dbf91a376403a78483473da4c78c170cdb30daf62ed7269b692c3f25d86d833f.json b/signatures/tdx/pre-release/mrenclave-dbf91a376403a78483473da4c78c170cdb30daf62ed7269b692c3f25d86d833f.json new file mode 100644 index 00000000..475c94f6 --- /dev/null +++ b/signatures/tdx/pre-release/mrenclave-dbf91a376403a78483473da4c78c170cdb30daf62ed7269b692c3f25d86d833f.json @@ -0,0 +1,7 @@ +{ + "mrenclave": "dbf91a376403a78483473da4c78c170cdb30daf62ed7269b692c3f25d86d833f", + "signature": "POPSfX3b6mbWSo7bekIG/hSqAX/5kK0BskADHh/7Sro6E4PO4mSum7CL9xhdv2V/mpmp7zu1zjlT/wpBvi8VnAhpTHBysm7XITS2vofvwo192ow/IQJAXRlNmammObVD+tRuLaBIG6jjLii0acaM2ukKKFC+Lv8mIVT3MV5vsrmxK6ojZhCX8hMM9hB1D0aq4t/SbJcMNNh/q2aKiVbaVFb7czJkjYUM/1DjtzjTzZn1DMeOXLb8ZSaoMKgatLKRrN4iRmg6hI7gXXhcL2iR0IqjuDEZAoEBq88eS83fJWCia/VjoKPB3jHue8ikD3EG4YaZADEp77xv/RY4IgM2Mq1vwuId6qsONS6uEO9BGOtc79gwZ9a9Au+CMoWUDw/nEGT7NRsfmKxNNHMaKzLUwixLu1HAC+/XZkAbRNCAI9j7KPX5zWvnXUBmfqXBrMeZEK6kmBJn38buM0hkkULtlOVTxyuWEnKqfGU5wOXnOyAemfGc2u+vAXWQ22dsgb4/", + "build": "build-286", + "description": "argo_branch=main argo_sp_env=develop sp-debug=true", + "creationDate": "2026-01-23T09:39:01.919Z" +} From a0e87a099a1131bb535aa7a63c319bf317868720 Mon Sep 17 00:00:00 2001 From: Vasily Kraev Date: Wed, 28 Jan 2026 22:37:13 +0300 Subject: [PATCH 24/79] Add LICENSE file --- LICENSE | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 LICENSE diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..14a02e89 --- /dev/null +++ b/LICENSE @@ -0,0 +1,66 @@ +Business Source License 1.1 + +License text copyright (c) 2024 Super Protocol, All Rights Reserved. +"Business Source License" is a trademark of MariaDB Corporation Ab. + +----------------------------------------------------------------------------- + +Parameters + +- Licensor: + Super Protocol + +- Licensed Work: + Super Protocol Software + The Licensed Work is (c) 2024 Super Protocol + +- Additional Use Grant: + None + +- Change Date: + 2028-01-01 + +- Change License: + GNU General Public License v2.0 or later + +----------------------------------------------------------------------------- + +Terms + +The Licensor hereby grants you the right to copy, modify, create derivative works, redistribute, and make non-production use of the Licensed Work. The Licensor may make an Additional Use Grant permitting limited production use. + +Effective on the Change Date, or the fourth anniversary of the first publicly available distribution of a specific version of the Licensed Work under this License, whichever comes first, the Licensor hereby grants you rights under the terms of the Change License, and the rights granted in the paragraph above terminate. + +If your use of the Licensed Work does not comply with the requirements currently in effect as described in this License, you must purchase a commercial license from the Licensor, its affiliated entities, or authorized resellers, or you must refrain from using the Licensed Work. + +All copies of the original and modified Licensed Work, and derivative works of the Licensed Work, are subject to this License. This License applies separately for each version of the Licensed Work and the Change Date may vary for each version of the Licensed Work released by Licensor. + +You must conspicuously display this License on each original or modified copy of the Licensed Work. If you receive the Licensed Work in original or modified form from a third party, the terms and conditions set forth in this License apply to your use of that work. + +Any use of the Licensed Work in violation of this License will automatically terminate your rights under this License for the current and all other versions of the Licensed Work. + +This License does not grant you any right in any trademark or logo of Licensor or its affiliates (provided that you may use a trademark or logo of Licensor as expressly required by this License). + +TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON AN "AS IS" BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, EXPRESS OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND TITLE. + +MariaDB hereby grants you permission to use this License’s text to license your works, and to refer to it using the trademark "Business Source License," as long as you comply with the Covenants of Licensor below. + +----------------------------------------------------------------------------- + +Covenants of Licensor + +In consideration of the right to use this License’s text and the "Business Source License" name and trademark, Licensor covenants to MariaDB, and to all other recipients of the licensed work to be provided by Licensor: + +1. To specify as the Change License the GPL Version 2.0 or any later version, or a license that is compatible with GPL Version 2.0 or a later version, where "compatible" means that software provided under the Change License can be included in a program with software provided under GPL Version 2.0 or a later version. Licensor may specify additional Change Licenses without limitation. + +2. To either: (a) specify an additional grant of rights to use that does not impose any additional restriction on the rights granted in this License, as the Additional Use Grant; or (b) insert the text "None." + +3. To specify a Change Date. + +4. Not to modify this License in any other way. + +----------------------------------------------------------------------------- + +Notice + +The Business Source License (this document, or the "License") is not an Open Source license. However, the Licensed Work will eventually be made available under an Open Source License, as stated in this License. \ No newline at end of file From 91cce3db05130321b93bcd8be648070bb4a267b3 Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Fri, 13 Feb 2026 17:01:51 +0300 Subject: [PATCH 25/79] feat: add route-manager bootstrap script for SwarmDB --- src/repos/sp-swarm-services | 2 +- src/repos/swarm-cloud | 2 +- src/swarm-scripts/73.setup-route-manager.sh | 69 +++++++++++++++++++++ 3 files changed, 71 insertions(+), 2 deletions(-) create mode 100644 src/swarm-scripts/73.setup-route-manager.sh diff --git a/src/repos/sp-swarm-services b/src/repos/sp-swarm-services index 82e2c793..8d72269f 160000 --- a/src/repos/sp-swarm-services +++ b/src/repos/sp-swarm-services @@ -1 +1 @@ -Subproject commit 82e2c793078f2f5e607669ed8b635b21636583f9 +Subproject commit 8d72269f7765440bde02d46e3ee2e9fbcb306b2d diff --git a/src/repos/swarm-cloud b/src/repos/swarm-cloud index 0a8b09e8..43e3c9e4 160000 --- a/src/repos/swarm-cloud +++ b/src/repos/swarm-cloud @@ -1 +1 @@ -Subproject commit 0a8b09e87c81a0cc39180ad31d45249593c7064e +Subproject commit 43e3c9e4c9325604ba67de82402ad105bb7180ae diff --git a/src/swarm-scripts/73.setup-route-manager.sh b/src/swarm-scripts/73.setup-route-manager.sh new file mode 100644 index 00000000..84b96084 --- /dev/null +++ b/src/swarm-scripts/73.setup-route-manager.sh @@ -0,0 +1,69 @@ +#!/bin/bash +set -euo pipefail + +# This script bootstraps the route-manager service into SwarmDB via swarm-cli. +# Run it INSIDE the container. Assumes python3 and swarm-cli.py are available. +# +# Notes: +# - The service manifest is expected to be available on all nodes at: +# ${LOCATION_PATH}/manifest.yaml +# If you don't have a manifest yet, set ALLOW_MISSING_MANIFEST=1 to still +# register the ClusterService (manifest will be stored as NULL). +# - route-manager dependencies are expected to be expressed in the manifest +# (stateExpr/commands) and handled by provision workers. + +DB_HOST=${DB_HOST:-127.0.0.1} +DB_PORT=${DB_PORT:-3306} +DB_USER=${DB_USER:-root} +DB_NAME=${DB_NAME:-swarmdb} + +# Service descriptors +SERVICE_NAME=${SERVICE_NAME:-route-manager} +SERVICE_VERSION=${SERVICE_VERSION:-1.0.0} +CLUSTER_POLICY=${CLUSTER_POLICY:-route-manager} +CLUSTER_ID=${CLUSTER_ID:-route-manager} + +# Location stored in ClusterServices; must exist on all nodes. +# The service provisioner (manifest.yaml + main.py) is baked into the image under +# /etc/swarm-cloud/services/${SERVICE_NAME}. +LOCATION_PATH=${LOCATION_PATH:-/etc/swarm-cloud/services/${SERVICE_NAME}} +MANIFEST_PATH=${MANIFEST_PATH:-${LOCATION_PATH}/manifest.yaml} +SERVICE_PK="${CLUSTER_POLICY}:${SERVICE_NAME}" + +ALLOW_MISSING_MANIFEST=${ALLOW_MISSING_MANIFEST:-0} + +if [ ! -f "$MANIFEST_PATH" ]; then + if [ "$ALLOW_MISSING_MANIFEST" = "1" ] || [ "$ALLOW_MISSING_MANIFEST" = "true" ]; then + echo "Warning: manifest not found at: $MANIFEST_PATH (continuing due to ALLOW_MISSING_MANIFEST=1)" >&2 + else + echo "Manifest not found at: $MANIFEST_PATH" >&2 + echo "If you want to register the service without a manifest, set ALLOW_MISSING_MANIFEST=1" >&2 + exit 1 + fi +fi + +echo "Ensuring ClusterPolicy '$CLUSTER_POLICY'..." +if DB_HOST="$DB_HOST" DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" \ + python3 "$(dirname "$0")/swarm-cli.py" get ClusterPolicies "$CLUSTER_POLICY" >/dev/null 2>&1; then + echo "ClusterPolicy '$CLUSTER_POLICY' already exists, skipping creation." +else + echo "Creating ClusterPolicy '$CLUSTER_POLICY'..." + DB_HOST="$DB_HOST" DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" \ + python3 "$(dirname "$0")/swarm-cli.py" create ClusterPolicies "$CLUSTER_POLICY" --minSize=1 --maxSize=1 --maxClusters=1 +fi + +echo "Ensuring ClusterService '$SERVICE_PK'..." +if DB_HOST="$DB_HOST" DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" \ + python3 "$(dirname "$0")/swarm-cli.py" get ClusterServices "$SERVICE_PK" >/dev/null 2>&1; then + echo "ClusterService '$SERVICE_PK' already exists, skipping creation." +else + echo "Creating ClusterService '$SERVICE_PK'..." + DB_HOST="$DB_HOST" DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" \ + python3 "$(dirname "$0")/swarm-cli.py" create ClusterServices "$SERVICE_PK" \ + --name="$SERVICE_NAME" \ + --cluster_policy="$CLUSTER_POLICY" \ + --version="$SERVICE_VERSION" \ + --location="$LOCATION_PATH" +fi + +echo "Done. The provision worker will reconcile '$SERVICE_NAME' shortly." From 8cee3217a7ffda67192e80d549757cac27c3543b Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Fri, 13 Feb 2026 17:07:15 +0300 Subject: [PATCH 26/79] fix: update submodule reference for sp-swarm-services --- src/repos/sp-swarm-services | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/repos/sp-swarm-services b/src/repos/sp-swarm-services index 8d72269f..0930a50b 160000 --- a/src/repos/sp-swarm-services +++ b/src/repos/sp-swarm-services @@ -1 +1 @@ -Subproject commit 8d72269f7765440bde02d46e3ee2e9fbcb306b2d +Subproject commit 0930a50bf42e581ee7ff5d6f259edc733ea6319f From f563d184c070a9a5934964aaaac248339c139935 Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Fri, 13 Feb 2026 17:29:17 +0300 Subject: [PATCH 27/79] fix: update submodule reference for sp-swarm-services --- src/repos/sp-swarm-services | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/repos/sp-swarm-services b/src/repos/sp-swarm-services index 0930a50b..3dfe2a92 160000 --- a/src/repos/sp-swarm-services +++ b/src/repos/sp-swarm-services @@ -1 +1 @@ -Subproject commit 0930a50bf42e581ee7ff5d6f259edc733ea6319f +Subproject commit 3dfe2a92571b1faa3b6c23a167e4cd46049ce885 From 47903f7124b9a14678c9233b46fb99da7d4c1b12 Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Fri, 13 Feb 2026 19:02:48 +0300 Subject: [PATCH 28/79] fix: remove unused search line from resolv.conf --- src/rootfs/files/configs/etc/resolv.conf | 1 - 1 file changed, 1 deletion(-) diff --git a/src/rootfs/files/configs/etc/resolv.conf b/src/rootfs/files/configs/etc/resolv.conf index 861a2b7e..16ef5206 100644 --- a/src/rootfs/files/configs/etc/resolv.conf +++ b/src/rootfs/files/configs/etc/resolv.conf @@ -2,4 +2,3 @@ nameserver 127.0.0.53 nameserver 1.1.1.1 nameserver 8.8.8.8 options edns0 trust-ad -search From 9aee3e8bc50e8ba62c47b7d41f7926ed05e8e856 Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Fri, 13 Feb 2026 19:15:17 +0300 Subject: [PATCH 29/79] fix: update submodule reference for swarm-cloud --- src/repos/swarm-cloud | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/repos/swarm-cloud b/src/repos/swarm-cloud index 43e3c9e4..cc7fa9f9 160000 --- a/src/repos/swarm-cloud +++ b/src/repos/swarm-cloud @@ -1 +1 @@ -Subproject commit 43e3c9e4c9325604ba67de82402ad105bb7180ae +Subproject commit cc7fa9f9194ab99fa2922e830a111cb67673339a From e6a2e4481450f254c58b8f9e5f0f4a52d786d727 Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Fri, 13 Feb 2026 21:05:34 +0300 Subject: [PATCH 30/79] fix: update submodule reference for swarm-cloud --- src/repos/swarm-cloud | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/repos/swarm-cloud b/src/repos/swarm-cloud index cc7fa9f9..f73734b7 160000 --- a/src/repos/swarm-cloud +++ b/src/repos/swarm-cloud @@ -1 +1 @@ -Subproject commit cc7fa9f9194ab99fa2922e830a111cb67673339a +Subproject commit f73734b73f91bae461cf7cbb05608f85053127a7 From 093f13a1c7ebd7f0040a22d48d98ddc63745ffd6 Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Fri, 13 Feb 2026 22:17:01 +0300 Subject: [PATCH 31/79] fix: update submodule reference for swarm-cloud --- src/repos/swarm-cloud | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/repos/swarm-cloud b/src/repos/swarm-cloud index f73734b7..3a682648 160000 --- a/src/repos/swarm-cloud +++ b/src/repos/swarm-cloud @@ -1 +1 @@ -Subproject commit f73734b73f91bae461cf7cbb05608f85053127a7 +Subproject commit 3a682648b5fefdf92610927dc499f4e961f320ff From 7239b1b3adb5f44dc8d472a1586773d8d0e705da Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Fri, 13 Feb 2026 23:22:23 +0300 Subject: [PATCH 32/79] fix: update submodule reference for swarm-cloud --- src/repos/swarm-cloud | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/repos/swarm-cloud b/src/repos/swarm-cloud index 3a682648..57eb73cf 160000 --- a/src/repos/swarm-cloud +++ b/src/repos/swarm-cloud @@ -1 +1 @@ -Subproject commit 3a682648b5fefdf92610927dc499f4e961f320ff +Subproject commit 57eb73cf498e58d8b545b3da8c0a0e22aa80c3fa From fb90129f91b188bdc20b4bb029ecaa2206064916 Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Sat, 14 Feb 2026 00:31:05 +0300 Subject: [PATCH 33/79] fix: update submodule reference for swarm-cloud --- src/repos/swarm-cloud | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/repos/swarm-cloud b/src/repos/swarm-cloud index 57eb73cf..a82a81a5 160000 --- a/src/repos/swarm-cloud +++ b/src/repos/swarm-cloud @@ -1 +1 @@ -Subproject commit 57eb73cf498e58d8b545b3da8c0a0e22aa80c3fa +Subproject commit a82a81a5f5e9973f83a1ac6368fad24d52efce91 From d23ccb9adbbddb7ea556e23615cc96d9bf3f0dc4 Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Sat, 14 Feb 2026 01:03:34 +0300 Subject: [PATCH 34/79] fix: update submodule reference for swarm-cloud --- src/repos/swarm-cloud | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/repos/swarm-cloud b/src/repos/swarm-cloud index a82a81a5..6c7055fa 160000 --- a/src/repos/swarm-cloud +++ b/src/repos/swarm-cloud @@ -1 +1 @@ -Subproject commit a82a81a5f5e9973f83a1ac6368fad24d52efce91 +Subproject commit 6c7055fac5b2ac41bda7f294f09413b4def357c9 From 5a0510d4799b52e744c19a9f6152a77a7754f246 Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Sun, 15 Feb 2026 08:19:17 +0300 Subject: [PATCH 35/79] fix: update submodule reference for swarm-cloud --- src/repos/swarm-cloud | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/repos/swarm-cloud b/src/repos/swarm-cloud index 6c7055fa..eb165aa5 160000 --- a/src/repos/swarm-cloud +++ b/src/repos/swarm-cloud @@ -1 +1 @@ -Subproject commit 6c7055fac5b2ac41bda7f294f09413b4def357c9 +Subproject commit eb165aa57cd4e3683eb265cab26f11571b6045c9 From 70d096d9f5d612b62954f5933712fbd650ce4733 Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Sun, 15 Feb 2026 09:05:58 +0300 Subject: [PATCH 36/79] fix: update submodule reference for swarm-cloud --- src/repos/swarm-cloud | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/repos/swarm-cloud b/src/repos/swarm-cloud index eb165aa5..96604452 160000 --- a/src/repos/swarm-cloud +++ b/src/repos/swarm-cloud @@ -1 +1 @@ -Subproject commit eb165aa57cd4e3683eb265cab26f11571b6045c9 +Subproject commit 966044529c8bf41460c4ec01865ba543edd9502d From ca61964f974dfeec18c80f230255999cfac61e85 Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Sun, 15 Feb 2026 11:00:59 +0300 Subject: [PATCH 37/79] fix: update submodule reference for swarm-cloud --- src/repos/swarm-cloud | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/repos/swarm-cloud b/src/repos/swarm-cloud index 96604452..93dfff49 160000 --- a/src/repos/swarm-cloud +++ b/src/repos/swarm-cloud @@ -1 +1 @@ -Subproject commit 966044529c8bf41460c4ec01865ba543edd9502d +Subproject commit 93dfff49b881c5ae961f7022311085fc785a5400 From d009f1a1151918758717c6a3a49849803e3ab875 Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Mon, 16 Feb 2026 19:25:36 +0300 Subject: [PATCH 38/79] fix: update submodule reference for swarm-cloud --- src/repos/swarm-cloud | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/repos/swarm-cloud b/src/repos/swarm-cloud index 93dfff49..43cc19f4 160000 --- a/src/repos/swarm-cloud +++ b/src/repos/swarm-cloud @@ -1 +1 @@ -Subproject commit 93dfff49b881c5ae961f7022311085fc785a5400 +Subproject commit 43cc19f4f5465d43d1fda39032c21f6b703c8373 From ea8beee6dcebf7e791a509bd6fcdfc132b34a75f Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Mon, 16 Feb 2026 20:28:14 +0300 Subject: [PATCH 39/79] fix: update submodule reference for swarm-cloud --- src/repos/swarm-cloud | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/repos/swarm-cloud b/src/repos/swarm-cloud index 43cc19f4..73be7cea 160000 --- a/src/repos/swarm-cloud +++ b/src/repos/swarm-cloud @@ -1 +1 @@ -Subproject commit 43cc19f4f5465d43d1fda39032c21f6b703c8373 +Subproject commit 73be7cead177122128c979c2adb2c88140829e20 From e0e78b9836a06ca1384742c48a219a449ab16a26 Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Tue, 17 Feb 2026 03:33:03 +0300 Subject: [PATCH 40/79] fix: update submodule reference for swarm-cloud --- src/repos/swarm-cloud | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/repos/swarm-cloud b/src/repos/swarm-cloud index 73be7cea..2c33d94e 160000 --- a/src/repos/swarm-cloud +++ b/src/repos/swarm-cloud @@ -1 +1 @@ -Subproject commit 73be7cead177122128c979c2adb2c88140829e20 +Subproject commit 2c33d94ef91cb3423ae074c1ca39792615bba102 From 01407b2f3f356332822b4549111487a4b6a23454 Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Tue, 17 Feb 2026 22:48:06 +0300 Subject: [PATCH 41/79] fix: update submodule references for sp-swarm-services and swarm-cloud --- src/repos/sp-swarm-services | 2 +- src/repos/swarm-cloud | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/repos/sp-swarm-services b/src/repos/sp-swarm-services index 3dfe2a92..81710298 160000 --- a/src/repos/sp-swarm-services +++ b/src/repos/sp-swarm-services @@ -1 +1 @@ -Subproject commit 3dfe2a92571b1faa3b6c23a167e4cd46049ce885 +Subproject commit 81710298b244dded2ce32d48459d145fa2601e82 diff --git a/src/repos/swarm-cloud b/src/repos/swarm-cloud index 2c33d94e..0c4eafe2 160000 --- a/src/repos/swarm-cloud +++ b/src/repos/swarm-cloud @@ -1 +1 @@ -Subproject commit 2c33d94ef91cb3423ae074c1ca39792615bba102 +Subproject commit 0c4eafe2f0da5906fc063c1efa496f9b3a4fd1b0 From c9a7f841dec2ed1c8b77f2e261f5422d8f0973fd Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Wed, 18 Feb 2026 22:34:52 +0300 Subject: [PATCH 42/79] fix: update submodule reference for swarm-cloud --- src/repos/swarm-cloud | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/repos/swarm-cloud b/src/repos/swarm-cloud index 0c4eafe2..a220237c 160000 --- a/src/repos/swarm-cloud +++ b/src/repos/swarm-cloud @@ -1 +1 @@ -Subproject commit 0c4eafe2f0da5906fc063c1efa496f9b3a4fd1b0 +Subproject commit a220237c0eec38f0a05d7e2d1aaf572982f8bdde From da8e6d8b64fd2f35aaea76955eddac1d9892ebf7 Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Thu, 19 Feb 2026 11:54:01 +0300 Subject: [PATCH 43/79] fix: disable rk2 sha256 check --- src/Dockerfile | 3 ++- src/rootfs/files/scripts/install_rke2.sh | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/Dockerfile b/src/Dockerfile index 9386770d..59a552bd 100644 --- a/src/Dockerfile +++ b/src/Dockerfile @@ -202,7 +202,8 @@ RUN --security=insecure /buildroot/files/scripts/install_gve.sh # RKE2: clean default installation inside rootfs (no legacy scripts) ARG RKE2_VERSION=v1.32.8+rke2r1 -ARG RKE2_INSTALL_SHA256=2d24db2184dd6b1a5e281fa45cc9a8234c889394721746f89b5fe953fdaaf40a +# TODO: RKE2 install script changes often. Need to create more robust mechanism to handle updates without breaking the build. Possible solutions: pinning to specific commit SHA, or hosting a static copy of the install script in this repo and updating it as needed. +# ARG RKE2_INSTALL_SHA256=2d24db2184dd6b1a5e281fa45cc9a8234c889394721746f89b5fe953fdaaf40a ENV INSTALL_RKE2_VERSION=${RKE2_VERSION} ENV RKE2_INSTALL_SHA256=${RKE2_INSTALL_SHA256} ADD rootfs/files/scripts/install_rke2.sh /buildroot/files/scripts/install_rke2.sh diff --git a/src/rootfs/files/scripts/install_rke2.sh b/src/rootfs/files/scripts/install_rke2.sh index 656df3ad..04b2a9c0 100755 --- a/src/rootfs/files/scripts/install_rke2.sh +++ b/src/rootfs/files/scripts/install_rke2.sh @@ -21,8 +21,9 @@ function install_rke2() { mkdir -p "$OUTPUTDIR/root/rke2"; wget -q -O "$OUTPUTDIR/root/rke2/rke2-install.sh" "https://get.rke2.io"; - log_info "verifying rke2 installer sha256" - echo "${RKE2_INSTALL_SHA256} $OUTPUTDIR/root/rke2/rke2-install.sh" | sha256sum -c -; + # TODO: update RKE2_INSTALL_SHA256 to match the current https://get.rke2.io script and re-enable verification + # log_info "verifying rke2 installer sha256" + # echo "${RKE2_INSTALL_SHA256} $OUTPUTDIR/root/rke2/rke2-install.sh" | sha256sum -c -; log_info "installing rke2" chroot "$OUTPUTDIR" /bin/bash -c 'bash /root/rke2/rke2-install.sh'; From 296e7ee3c1ecfa2e20f256af0432453791b7e155 Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Thu, 19 Feb 2026 20:53:46 +0300 Subject: [PATCH 44/79] fix: update submodule reference for sp-swarm-services --- src/repos/sp-swarm-services | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/repos/sp-swarm-services b/src/repos/sp-swarm-services index 81710298..49874f89 160000 --- a/src/repos/sp-swarm-services +++ b/src/repos/sp-swarm-services @@ -1 +1 @@ -Subproject commit 81710298b244dded2ce32d48459d145fa2601e82 +Subproject commit 49874f89425ea4065cf7ee86e0f8bafdb29a853d From 0b450d173a5b64dc2e438803a40e39f7c9021ec7 Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Thu, 19 Feb 2026 23:09:42 +0300 Subject: [PATCH 45/79] fix: update submodule reference for swarm-cloud --- src/repos/swarm-cloud | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/repos/swarm-cloud b/src/repos/swarm-cloud index a220237c..e1cb8410 160000 --- a/src/repos/swarm-cloud +++ b/src/repos/swarm-cloud @@ -1 +1 @@ -Subproject commit a220237c0eec38f0a05d7e2d1aaf572982f8bdde +Subproject commit e1cb841083cffe5895b697ae5278c64a0cbb4460 From 3708bdb0389605f995e638fa050ca84eb02be50b Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Fri, 20 Feb 2026 15:54:03 +0300 Subject: [PATCH 46/79] fix: update submodule references for sp-swarm-services and swarm-cloud --- src/repos/sp-swarm-services | 2 +- src/repos/swarm-cloud | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/repos/sp-swarm-services b/src/repos/sp-swarm-services index 49874f89..6dc4b926 160000 --- a/src/repos/sp-swarm-services +++ b/src/repos/sp-swarm-services @@ -1 +1 @@ -Subproject commit 49874f89425ea4065cf7ee86e0f8bafdb29a853d +Subproject commit 6dc4b926ef8df5afa84f50ffcdd706e0c51252c1 diff --git a/src/repos/swarm-cloud b/src/repos/swarm-cloud index e1cb8410..51913f77 160000 --- a/src/repos/swarm-cloud +++ b/src/repos/swarm-cloud @@ -1 +1 @@ -Subproject commit e1cb841083cffe5895b697ae5278c64a0cbb4460 +Subproject commit 51913f77bb42a588258afa681c99c537b5e91eb9 From 2e7ecd2734a30d4511f24e2adece5375ba77436f Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Fri, 20 Feb 2026 17:10:32 +0300 Subject: [PATCH 47/79] fix: update submodule reference for sp-swarm-services --- src/repos/sp-swarm-services | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/repos/sp-swarm-services b/src/repos/sp-swarm-services index 6dc4b926..6ca47797 160000 --- a/src/repos/sp-swarm-services +++ b/src/repos/sp-swarm-services @@ -1 +1 @@ -Subproject commit 6dc4b926ef8df5afa84f50ffcdd706e0c51252c1 +Subproject commit 6ca4779717fc8bb09782d0e869c97e36015af2df From 135c01faae40dba9a52ef883832e8b6dfc8a0888 Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Fri, 20 Feb 2026 18:59:12 +0300 Subject: [PATCH 48/79] fix: update submodule references for sp-swarm-services and swarm-cloud --- src/repos/sp-swarm-services | 2 +- src/repos/swarm-cloud | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/repos/sp-swarm-services b/src/repos/sp-swarm-services index 6ca47797..928c7f25 160000 --- a/src/repos/sp-swarm-services +++ b/src/repos/sp-swarm-services @@ -1 +1 @@ -Subproject commit 6ca4779717fc8bb09782d0e869c97e36015af2df +Subproject commit 928c7f2597d4d54ec0a5cc30911084c4c81bce83 diff --git a/src/repos/swarm-cloud b/src/repos/swarm-cloud index 51913f77..dd4811b1 160000 --- a/src/repos/swarm-cloud +++ b/src/repos/swarm-cloud @@ -1 +1 @@ -Subproject commit 51913f77bb42a588258afa681c99c537b5e91eb9 +Subproject commit dd4811b1e2c927476371a25e3c0bc015a04c1f46 From a305e9adb27049356c5ef882f9ff26b46acdb24f Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Fri, 20 Feb 2026 23:39:09 +0300 Subject: [PATCH 49/79] fix: update submodule references for sp-swarm-services and swarm-cloud --- src/repos/sp-swarm-services | 2 +- src/repos/swarm-cloud | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/repos/sp-swarm-services b/src/repos/sp-swarm-services index 928c7f25..f9e76a82 160000 --- a/src/repos/sp-swarm-services +++ b/src/repos/sp-swarm-services @@ -1 +1 @@ -Subproject commit 928c7f2597d4d54ec0a5cc30911084c4c81bce83 +Subproject commit f9e76a8255fd1003e9e9034b9fed04789e8a5d51 diff --git a/src/repos/swarm-cloud b/src/repos/swarm-cloud index dd4811b1..0f9417c9 160000 --- a/src/repos/swarm-cloud +++ b/src/repos/swarm-cloud @@ -1 +1 @@ -Subproject commit dd4811b1e2c927476371a25e3c0bc015a04c1f46 +Subproject commit 0f9417c91e242ba31cdebdf429d2ea2842808c67 From f03a47fb6001bbe929bec36c0dceebf0ae419a66 Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Mon, 23 Feb 2026 11:56:11 +0300 Subject: [PATCH 50/79] fix: update submodule reference for swarm-cloud --- src/repos/swarm-cloud | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/repos/swarm-cloud b/src/repos/swarm-cloud index 0f9417c9..06e010a2 160000 --- a/src/repos/swarm-cloud +++ b/src/repos/swarm-cloud @@ -1 +1 @@ -Subproject commit 0f9417c91e242ba31cdebdf429d2ea2842808c67 +Subproject commit 06e010a287afe1b98829f0ddf88737dfca0bfbfb From e8b86cd8fc3b7e020891e6fecf7dd1359c8956f3 Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Mon, 23 Feb 2026 16:19:31 +0300 Subject: [PATCH 51/79] feat: enhance MongoDB logging and replica set management --- src/services/apps/mongodb/main.py | 255 +++++++++++++++++++++++++++++- 1 file changed, 251 insertions(+), 4 deletions(-) diff --git a/src/services/apps/mongodb/main.py b/src/services/apps/mongodb/main.py index b7f5d73c..7b71af62 100755 --- a/src/services/apps/mongodb/main.py +++ b/src/services/apps/mongodb/main.py @@ -17,9 +17,22 @@ MONGO_DATA_DIR = Path("/var/lib/mongodb") MONGO_LOG_DIR = Path("/var/log/mongodb") REPLICA_SET_NAME = os.environ.get("MONGO_RS", "rs0") +PRIMARY_CONNECT_POSTPONE_LIMIT = int(os.environ.get("MONGO_PRIMARY_POSTPONE_LIMIT", "5")) +PRIMARY_CONNECT_FAIL_KEY = "mongodb_primary_connect_failures" plugin = ProvisionPlugin() +def log_event(scope: str, message: str, details: Optional[Dict] = None) -> None: + ts = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) + if details is None: + print(f"{ts} [mongodb][{scope}] {message}", flush=True) + return + try: + payload = json.dumps(details, ensure_ascii=False, sort_keys=True, default=str) + except Exception: + payload = str(details) + print(f"{ts} [mongodb][{scope}] {message} | {payload}", flush=True) + # Helpers def get_node_tunnel_ip(node_id: str, wg_props: List[dict]) -> Optional[str]: for prop in wg_props: @@ -228,6 +241,94 @@ def rs_add_missing(host: str, desired_hosts: List[str]) -> None: if h not in existing_hosts: mongo_eval_json(host, f'rs.add("{h}")', timeout=15) +def split_host_port(host_or_hostport: str) -> Tuple[str, int]: + if not host_or_hostport: + return "", MONGO_PORT + if ":" not in host_or_hostport: + return host_or_hostport, MONGO_PORT + host, port_raw = host_or_hostport.rsplit(":", 1) + try: + return host, int(port_raw) + except Exception: + return host, MONGO_PORT + +def get_primary_from_local(local_host: str) -> Tuple[Optional[str], Optional[str]]: + ok, obj, err = mongo_eval_json(local_host, "db.hello ? db.hello() : db.isMaster()", timeout=10) + if not ok or not isinstance(obj, dict): + return None, err or "db.hello() failed" + + if obj.get("isWritablePrimary") is True or obj.get("ismaster") is True: + return f"{local_host}:{MONGO_PORT}", None + + primary = obj.get("primary") + if isinstance(primary, str) and primary: + return primary, None + + return None, "No PRIMARY in hello()" + +def rs_sync_members(primary_host: str, desired_hosts: List[str]) -> Tuple[bool, Optional[str]]: + primary_ip, primary_port = split_host_port(primary_host) + if not primary_ip: + return False, f"Invalid PRIMARY host: {primary_host}" + if primary_port != MONGO_PORT: + return False, f"PRIMARY port mismatch: got {primary_port}, expected {MONGO_PORT}" + + ok, current, err = mongo_eval_json(primary_ip, "rs.conf()", timeout=15) + if not ok or not isinstance(current, dict): + return False, err or "Cannot read rs.conf() from PRIMARY" + + existing_hosts = set() + for m in (current.get("members") or []): + h = m.get("host") + if h: + existing_hosts.add(h) + + desired_set = set(desired_hosts) + to_remove = sorted(existing_hosts - desired_set) + to_add = sorted(desired_set - existing_hosts) + + for h in to_remove: + ok, obj, err = mongo_eval_json(primary_ip, f'rs.remove("{h}")', timeout=20) + if not ok or not isinstance(obj, dict) or obj.get("ok") != 1: + return False, err or f"Failed to remove member {h}: {obj}" + + for h in to_add: + ok, obj, err = mongo_eval_json(primary_ip, f'rs.add("{h}")', timeout=20) + if not ok or not isinstance(obj, dict) or obj.get("ok") != 1: + return False, err or f"Failed to add member {h}: {obj}" + + return True, None + +def force_local_primary(local_ip: str) -> Tuple[bool, Optional[str]]: + local_host = f"{local_ip}:{MONGO_PORT}" + + ok, cfg, err = mongo_eval_json(local_ip, "rs.conf()", timeout=15) + if not ok or not isinstance(cfg, dict): + return False, err or "Cannot read rs.conf()" + + # IMPORTANT: do not copy member config from rs.conf() as-is. + # In mongosh, numeric BSON fields (e.g. secondaryDelaySecs) may be represented + # as nested objects and break rs.reconfig() validation. + # Build a minimal valid member definition instead. + local_member = { + "_id": 0, + "host": local_host, + "priority": 1, + "votes": 1, + } + + new_cfg = { + "_id": cfg.get("_id", REPLICA_SET_NAME), + "version": int(cfg.get("version", 1)) + 1, + "members": [local_member], + } + + js = f"rs.reconfig({json.dumps(new_cfg)}, {{force: true}})" + ok, obj, err = mongo_eval_json(local_ip, js, timeout=30) + if ok and isinstance(obj, dict) and obj.get("ok") == 1: + return True, None + return False, err or f"force reconfig failed: {obj}" + # Commands @plugin.command("init") def handle_init(input_data: PluginInput) -> PluginOutput: @@ -245,14 +346,41 @@ def handle_apply(input_data: PluginInput) -> PluginOutput: state_json = input_data.state or {} local_state = input_data.local_state or {} + log_event( + "apply", + "apply started", + { + "local_node_id": local_node_id, + "state_type": type(state_json).__name__, + "local_state_keys": sorted(list(local_state.keys())) if isinstance(local_state, dict) else [], + }, + ) + if not isinstance(state_json, dict): + log_event("apply", "invalid state format", {"received_type": type(state_json).__name__}) return PluginOutput(status="error", error_message="Invalid state format", local_state=local_state) cluster_nodes = state_json.get("clusterNodes", []) mongo_props = state_json.get("mongodbNodeProperties", []) wg_props = state_json.get("wgNodeProperties", []) + log_event( + "apply", + "parsed state", + { + "cluster_nodes_count": len(cluster_nodes) if isinstance(cluster_nodes, list) else None, + "mongo_props_count": len(mongo_props) if isinstance(mongo_props, list) else None, + "wg_props_count": len(wg_props) if isinstance(wg_props, list) else None, + }, + ) + if not check_all_nodes_have_wg(cluster_nodes, wg_props): + missing_wg = [] + for node in cluster_nodes: + node_id = node.get("node_id") + if not get_node_tunnel_ip(node_id, wg_props): + missing_wg.append(node_id) + log_event("apply", "postponed: waiting for WireGuard", {"missing_wg_node_ids": missing_wg}) return PluginOutput(status="postponed", error_message="Waiting for WireGuard to be configured on all nodes", local_state=local_state) # Determine leader @@ -261,20 +389,34 @@ def handle_apply(input_data: PluginInput) -> PluginOutput: is_leader = leader_node_id == local_node_id initialized = is_rs_initialized(mongo_props) + log_event( + "apply", + "topology state", + { + "leader_node_id": leader_node_id, + "is_leader": is_leader, + "rs_initialized": initialized, + }, + ) + local_tunnel_ip = get_node_tunnel_ip(local_node_id, wg_props) if not local_tunnel_ip: + log_event("apply", "error: local tunnel ip not found", {"local_node_id": local_node_id}) return PluginOutput(status="error", error_message="Local node has no WireGuard tunnel IP", local_state=local_state) # Write config bound to WG IP with replication enabled try: + log_event("apply", "writing mongod config", {"bind_ip": local_tunnel_ip, "port": MONGO_PORT}) write_mongod_config(local_tunnel_ip) except Exception as e: + log_event("apply", "error: failed to write mongod config", {"error": str(e)}) return PluginOutput(status="error", error_message=f"Failed to write mongod config: {e}", local_state=local_state) # Ensure service is running on correct IP ensure_runtime_dirs() needs_restart = False running, _ = is_mongo_running() + log_event("apply", "mongo process state", {"running": running}) if not running: needs_restart = True else: @@ -282,28 +424,44 @@ def handle_apply(input_data: PluginInput) -> PluginOutput: if not wait_for_mongo_ready(local_tunnel_ip, timeout_sec=5): needs_restart = True + log_event("apply", "restart decision", {"needs_restart": needs_restart, "local_tunnel_ip": local_tunnel_ip}) + if needs_restart: try: svc = get_mongo_service_name() + log_event("apply", "restarting service", {"service": svc}) subprocess.run(["systemctl", "daemon-reload"], capture_output=True, text=True) subprocess.run(["systemctl", "enable", svc], capture_output=True, text=True) res = subprocess.run(["systemctl", "restart", svc], capture_output=True, text=True, timeout=30) + log_event( + "apply", + "service restart result", + { + "service": svc, + "returncode": res.returncode, + "stderr_tail": (res.stderr or "")[-400:], + }, + ) if res.returncode != 0: diag = capture_mongo_diagnostics(svc) + log_event("apply", "error: service restart failed", {"service": svc}) return PluginOutput(status="error", error_message=f"Failed to start {svc}: {res.stderr}\n\n{diag}", local_state=local_state) except Exception as e: svc = "mongod" diag = capture_mongo_diagnostics(svc) + log_event("apply", "error: exception during service restart", {"error": str(e)}) return PluginOutput(status="error", error_message=f"Failed to start mongod: {e}\n\n{diag}", local_state=local_state) if not wait_for_mongo_ready(local_tunnel_ip, timeout_sec=60): node_props = {"mongodb_node_ready": "false"} svc = get_mongo_service_name() diag = capture_mongo_diagnostics(svc) + log_event("apply", "postponed: mongod not ready after restart", {"service": svc}) return PluginOutput(status="postponed", error_message=f"mongod not ready yet\n\n{diag}", node_properties=node_props, local_state=local_state) # At this point local mongod is up node_ready_props = {"mongodb_node_ready": "true"} + log_event("apply", "local mongod ready", {"node_ready": True}) # Leader initializes or updates the replica set # Always configure a replica set even with a single node @@ -315,6 +473,8 @@ def handle_apply(input_data: PluginInput) -> PluginOutput: if ip: desired_hosts.append(f"{ip}:{MONGO_PORT}") + log_event("apply", "leader init path", {"desired_hosts": desired_hosts}) + # If multiple nodes, wait until all have mongod ready before initiating if len(cluster_nodes) > 1: not_ready = [] @@ -328,6 +488,7 @@ def handle_apply(input_data: PluginInput) -> PluginOutput: if not ready: not_ready.append(nid) if not_ready: + log_event("apply", "postponed: waiting for peer readiness", {"not_ready": not_ready}) return PluginOutput( status="postponed", error_message=f"Waiting for nodes to be ready: {', '.join(not_ready)}", @@ -336,12 +497,15 @@ def handle_apply(input_data: PluginInput) -> PluginOutput: ) # Initiate replica set (single or multi-node) + log_event("apply", "attempt rs.initiate", {"host": local_tunnel_ip, "members": desired_hosts}) if rs_initiate(local_tunnel_ip, desired_hosts): # Give it a moment to elect primary time.sleep(3) done_props = {"mongodb_rs_initialized": "true", **node_ready_props} + log_event("apply", "rs.initiate succeeded", {"mongodb_rs_initialized": True}) return PluginOutput(status="completed", node_properties=done_props, local_state=local_state) else: + log_event("apply", "postponed: rs.initiate failed") return PluginOutput(status="postponed", error_message="Failed to initiate replica set", node_properties=node_ready_props, local_state=local_state) # If already initialized, leader may add missing members @@ -351,12 +515,95 @@ def handle_apply(input_data: PluginInput) -> PluginOutput: ip = get_node_tunnel_ip(n.get("node_id"), wg_props) if ip: desired_hosts.append(f"{ip}:{MONGO_PORT}") - try: - rs_add_missing(local_tunnel_ip, desired_hosts) - except Exception: - pass + + log_event("apply", "leader sync path", {"desired_hosts": desired_hosts}) + + primary_host, primary_err = get_primary_from_local(local_tunnel_ip) + can_use_primary = False + if primary_host: + primary_ip, primary_port = split_host_port(primary_host) + can_use_primary = wait_for_mongo_ready(primary_ip, timeout_sec=5) and primary_port == MONGO_PORT + + log_event( + "apply", + "primary detection", + { + "primary_host": primary_host, + "primary_err": primary_err, + "can_use_primary": can_use_primary, + }, + ) + + if can_use_primary and primary_host: + ok, sync_err = rs_sync_members(primary_host, desired_hosts) + if not ok: + log_event("apply", "postponed: rs sync via primary failed", {"primary_host": primary_host, "error": sync_err}) + return PluginOutput( + status="postponed", + error_message=f"Replica set sync failed via PRIMARY {primary_host}: {sync_err}", + node_properties=node_ready_props, + local_state=local_state, + ) + local_state[PRIMARY_CONNECT_FAIL_KEY] = 0 + log_event("apply", "rs sync via primary succeeded", {"primary_host": primary_host}) + else: + fails = int(local_state.get(PRIMARY_CONNECT_FAIL_KEY, 0)) + 1 + local_state[PRIMARY_CONNECT_FAIL_KEY] = fails + reason = primary_err or "PRIMARY is unreachable" + log_event( + "apply", + "primary unavailable", + { + "fails": fails, + "limit": PRIMARY_CONNECT_POSTPONE_LIMIT, + "reason": reason, + }, + ) + + if fails < PRIMARY_CONNECT_POSTPONE_LIMIT: + log_event("apply", "postponed: waiting for primary recovery", {"fails": fails}) + return PluginOutput( + status="postponed", + error_message=f"PRIMARY unavailable ({fails}/{PRIMARY_CONNECT_POSTPONE_LIMIT}): {reason}", + node_properties=node_ready_props, + local_state=local_state, + ) + + log_event("apply", "forcing local node to primary", {"local_tunnel_ip": local_tunnel_ip}) + forced, force_err = force_local_primary(local_tunnel_ip) + if not forced: + log_event("apply", "postponed: failed to force local primary", {"error": force_err}) + return PluginOutput( + status="postponed", + error_message=f"Failed to force local PRIMARY: {force_err}", + node_properties=node_ready_props, + local_state=local_state, + ) + + time.sleep(2) + ok, sync_err = rs_sync_members(f"{local_tunnel_ip}:{MONGO_PORT}", desired_hosts) + if not ok: + log_event("apply", "postponed: rs sync after force failed", {"error": sync_err}) + return PluginOutput( + status="postponed", + error_message=f"Replica set sync after force failed: {sync_err}", + node_properties=node_ready_props, + local_state=local_state, + ) + + local_state[PRIMARY_CONNECT_FAIL_KEY] = 0 + log_event("apply", "rs sync after force succeeded") # Non-leader or after init: ensure local node reports ready + log_event( + "apply", + "apply finished", + { + "result_status": "completed" if initialized else "postponed", + "initialized": initialized, + "leader_node_id": leader_node_id, + }, + ) return PluginOutput(status="completed" if initialized else "postponed", error_message=None if initialized else f"Waiting for leader node {leader_node_id} to initialize replica set", node_properties=node_ready_props, From 2818354f8e8dd199857bd61f9a1de128decd98cd Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Mon, 23 Feb 2026 17:34:36 +0300 Subject: [PATCH 52/79] fix: preserve existing member _id for local host in force_local_primary function --- src/services/apps/mongodb/main.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/services/apps/mongodb/main.py b/src/services/apps/mongodb/main.py index 7b71af62..3e655a04 100755 --- a/src/services/apps/mongodb/main.py +++ b/src/services/apps/mongodb/main.py @@ -310,8 +310,19 @@ def force_local_primary(local_ip: str) -> Tuple[bool, Optional[str]]: # In mongosh, numeric BSON fields (e.g. secondaryDelaySecs) may be represented # as nested objects and break rs.reconfig() validation. # Build a minimal valid member definition instead. + # Preserve existing member _id for local host to avoid host/_id mismatch errors + # when the same host already exists in the old configuration. + local_member_id = 0 + for m in (cfg.get("members") or []): + if m.get("host") == local_host: + try: + local_member_id = int(m.get("_id")) + except Exception: + local_member_id = 0 + break + local_member = { - "_id": 0, + "_id": local_member_id, "host": local_host, "priority": 1, "votes": 1, From 45fd61170b3a709f0266776c5a7f051565f7ea05 Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Wed, 4 Mar 2026 17:44:17 +0300 Subject: [PATCH 53/79] feat: remove submodules --- .gitmodules | 9 --------- src/repos/sp-swarm-services | 1 - src/repos/swarm-cloud | 1 - src/repos/swarm-db | 1 - 4 files changed, 12 deletions(-) delete mode 160000 src/repos/sp-swarm-services delete mode 160000 src/repos/swarm-cloud delete mode 160000 src/repos/swarm-db diff --git a/.gitmodules b/.gitmodules index 571d580c..953a25ce 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,12 +1,3 @@ [submodule "lib/sp-vm-tools"] path = lib/sp-vm-tools url = https://github.com/super-protocol/sp-vm-tools -[submodule "swarm-cloud"] - path = src/repos/swarm-cloud - url = git@github.com:Super-Protocol/swarm-cloud.git -[submodule "swarm-db"] - path = src/repos/swarm-db - url = git@github.com:Super-Protocol/swarm-db.git -[submodule "sp-swarm-services"] - path = src/repos/sp-swarm-services - url = git@github.com:Super-Protocol/sp-swarm-services.git diff --git a/src/repos/sp-swarm-services b/src/repos/sp-swarm-services deleted file mode 160000 index f9e76a82..00000000 --- a/src/repos/sp-swarm-services +++ /dev/null @@ -1 +0,0 @@ -Subproject commit f9e76a8255fd1003e9e9034b9fed04789e8a5d51 diff --git a/src/repos/swarm-cloud b/src/repos/swarm-cloud deleted file mode 160000 index 06e010a2..00000000 --- a/src/repos/swarm-cloud +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 06e010a287afe1b98829f0ddf88737dfca0bfbfb diff --git a/src/repos/swarm-db b/src/repos/swarm-db deleted file mode 160000 index bc8d0afb..00000000 --- a/src/repos/swarm-db +++ /dev/null @@ -1 +0,0 @@ -Subproject commit bc8d0afbb78ac2153443677e7fdf4969ae29f119 From d67950a6946d65c76096fd956153add2655ec381 Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Wed, 4 Mar 2026 19:03:58 +0300 Subject: [PATCH 54/79] feat: sync deployment with swarm-cloud --- .gitmodules | 3 + src/Dockerfile | 136 +--- src/repos/swarm-cloud | 1 + .../files/configs/etc/swarm-node/config.yaml | 27 + .../files/configs/etc/swarm/config.yaml | 16 + .../system/download-sp-swarm-services.service | 18 - .../system/sp-svc-auth-service.service | 17 - .../etc/systemd/system/swarm-db.service | 7 +- .../systemd/system/swarm-host-agent.service | 15 + .../etc/systemd/system/swarm-init.service | 16 + .../etc/systemd/system/swarm-node.service | 35 +- .../local/bin/download-sp-swarm-services.sh | 212 ------ .../files/configs/usr/local/bin/swarm-init.sh | 213 ++++++ .../files/configs/usr/local/bin/swarm-node.sh | 7 - .../lib/services-downloader/package-lock.json | 131 ---- .../lib/services-downloader/package.json | 15 - .../lib/services-downloader/src/downloader.js | 64 -- .../src/gatekeeper-client.js | 90 --- .../lib/services-downloader/src/index.js | 195 ----- .../local/lib/services-downloader/src/lock.js | 25 - .../lib/services-downloader/src/unarchiver.js | 97 --- src/rootfs/files/scripts/build_swarm_cloud.sh | 87 --- src/rootfs/files/scripts/download_rke2.sh | 44 -- .../files/scripts/install_cockroachdb.sh | 45 -- .../files/scripts/install_extra_packages.sh | 34 + src/rootfs/files/scripts/install_knot.sh | 35 - src/rootfs/files/scripts/install_nats.sh | 65 -- src/rootfs/files/scripts/install_nodejs.sh | 37 - src/rootfs/files/scripts/install_openresty.sh | 42 -- .../files/scripts/install_python_deps.sh | 29 + src/rootfs/files/scripts/install_rke2.sh | 50 -- .../scripts/install_services_downloader.sh | 28 - .../template_configs_post_rke2install.sh | 47 -- .../template_rke2_configs_preinstall.sh | 41 -- src/services/apps/mongodb/main.py | 681 ------------------ src/services/apps/mongodb/manifest.yaml | 73 -- src/services/apps/nats/main.py | 327 --------- src/services/apps/nats/manifest.yaml | 74 -- src/services/apps/test-app-route/main.py | 409 ----------- .../apps/test-app-route/manifest.yaml | 142 ---- src/services/apps/test-app/main.py | 338 --------- src/services/apps/test-app/manifest.yaml | 29 - src/swarm-scripts/65.setup-nats.sh | 58 -- src/swarm-scripts/66.setup-test-app.sh | 56 -- src/swarm-scripts/67.setup-test-app-route.sh | 58 -- src/swarm-scripts/70.setup-mongodb.sh | 2 +- 46 files changed, 416 insertions(+), 3755 deletions(-) create mode 160000 src/repos/swarm-cloud create mode 100644 src/rootfs/files/configs/etc/swarm-node/config.yaml create mode 100644 src/rootfs/files/configs/etc/swarm/config.yaml delete mode 100644 src/rootfs/files/configs/etc/systemd/system/download-sp-swarm-services.service delete mode 100644 src/rootfs/files/configs/etc/systemd/system/sp-svc-auth-service.service create mode 100644 src/rootfs/files/configs/etc/systemd/system/swarm-host-agent.service create mode 100644 src/rootfs/files/configs/etc/systemd/system/swarm-init.service delete mode 100644 src/rootfs/files/configs/usr/local/bin/download-sp-swarm-services.sh create mode 100644 src/rootfs/files/configs/usr/local/bin/swarm-init.sh delete mode 100644 src/rootfs/files/configs/usr/local/bin/swarm-node.sh delete mode 100644 src/rootfs/files/configs/usr/local/lib/services-downloader/package-lock.json delete mode 100644 src/rootfs/files/configs/usr/local/lib/services-downloader/package.json delete mode 100644 src/rootfs/files/configs/usr/local/lib/services-downloader/src/downloader.js delete mode 100644 src/rootfs/files/configs/usr/local/lib/services-downloader/src/gatekeeper-client.js delete mode 100755 src/rootfs/files/configs/usr/local/lib/services-downloader/src/index.js delete mode 100644 src/rootfs/files/configs/usr/local/lib/services-downloader/src/lock.js delete mode 100644 src/rootfs/files/configs/usr/local/lib/services-downloader/src/unarchiver.js delete mode 100644 src/rootfs/files/scripts/build_swarm_cloud.sh delete mode 100755 src/rootfs/files/scripts/download_rke2.sh delete mode 100644 src/rootfs/files/scripts/install_cockroachdb.sh create mode 100644 src/rootfs/files/scripts/install_extra_packages.sh delete mode 100644 src/rootfs/files/scripts/install_knot.sh delete mode 100644 src/rootfs/files/scripts/install_nats.sh delete mode 100755 src/rootfs/files/scripts/install_nodejs.sh delete mode 100644 src/rootfs/files/scripts/install_openresty.sh create mode 100644 src/rootfs/files/scripts/install_python_deps.sh delete mode 100755 src/rootfs/files/scripts/install_rke2.sh delete mode 100644 src/rootfs/files/scripts/install_services_downloader.sh delete mode 100755 src/rootfs/files/scripts/template_configs_post_rke2install.sh delete mode 100755 src/rootfs/files/scripts/template_rke2_configs_preinstall.sh delete mode 100755 src/services/apps/mongodb/main.py delete mode 100644 src/services/apps/mongodb/manifest.yaml delete mode 100644 src/services/apps/nats/main.py delete mode 100644 src/services/apps/nats/manifest.yaml delete mode 100644 src/services/apps/test-app-route/main.py delete mode 100644 src/services/apps/test-app-route/manifest.yaml delete mode 100644 src/services/apps/test-app/main.py delete mode 100644 src/services/apps/test-app/manifest.yaml delete mode 100644 src/swarm-scripts/65.setup-nats.sh delete mode 100644 src/swarm-scripts/66.setup-test-app.sh delete mode 100644 src/swarm-scripts/67.setup-test-app-route.sh diff --git a/.gitmodules b/.gitmodules index 953a25ce..2f05b8ba 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule "lib/sp-vm-tools"] path = lib/sp-vm-tools url = https://github.com/super-protocol/sp-vm-tools +[submodule "swarm-cloud"] + path = src/repos/swarm-cloud + url = git@github.com:Super-Protocol/swarm-cloud.git diff --git a/src/Dockerfile b/src/Dockerfile index 59a552bd..94e97008 100644 --- a/src/Dockerfile +++ b/src/Dockerfile @@ -152,27 +152,6 @@ ADD kernel/files/scripts/build-kernel.sh /buildroot/files/scripts/ RUN /buildroot/files/scripts/build-kernel.sh ### End kernel ### -### Swarm DB start -FROM golang:1.25.3 AS swarm-db-build -COPY repos/swarm-db /app/swarm/swarm-db -WORKDIR /app/swarm/swarm-db -RUN make -RUN make build-linux-amd64 -### Swarm DB finish - -### Auth service files start -FROM node:24-bookworm-slim AS auth_service_files_builder -WORKDIR /work/sp-swarm-services - -# ensure bash/find exist for the prepare script -RUN apt-get update \ - && apt-get install -y --no-install-recommends bash findutils ca-certificates \ - && rm -rf /var/lib/apt/lists/* - -# copy full repo as requested, then prepare a minimal auth-service bundle into /out -COPY repos/sp-swarm-services/ ./ -RUN bash ./scripts/prepare-auth-service-files.sh /out -### Auth service files finish ### Start rootfs ### FROM ubuntu:noble-20250714 AS rootfs_builder @@ -200,14 +179,6 @@ RUN --security=insecure /buildroot/files/scripts/install_nvidia.sh ADD rootfs/files/scripts/install_gve.sh /buildroot/files/scripts/install_gve.sh RUN --security=insecure /buildroot/files/scripts/install_gve.sh -# RKE2: clean default installation inside rootfs (no legacy scripts) -ARG RKE2_VERSION=v1.32.8+rke2r1 -# TODO: RKE2 install script changes often. Need to create more robust mechanism to handle updates without breaking the build. Possible solutions: pinning to specific commit SHA, or hosting a static copy of the install script in this repo and updating it as needed. -# ARG RKE2_INSTALL_SHA256=2d24db2184dd6b1a5e281fa45cc9a8234c889394721746f89b5fe953fdaaf40a -ENV INSTALL_RKE2_VERSION=${RKE2_VERSION} -ENV RKE2_INSTALL_SHA256=${RKE2_INSTALL_SHA256} -ADD rootfs/files/scripts/install_rke2.sh /buildroot/files/scripts/install_rke2.sh -RUN --security=insecure /buildroot/files/scripts/install_rke2.sh RUN mkdir -p "${OUTPUTDIR}/etc/super/var/lib/rancher/rke2/server/manifests" ADD rootfs/files/configs/etc/super/var/lib/rancher/rke2/server/manifests/k8s-swarm.yaml \ @@ -300,25 +271,35 @@ ADD rootfs/files/configs/etc/systemd/system/swarm-db.service ${OUTPUTDIR}/etc/sy RUN ln -sf /etc/systemd/system/swarm-db.service "${OUTPUTDIR}/etc/systemd/system/multi-user.target.wants/swarm-db.service" ADD rootfs/files/configs/usr/local/bin/swarm-cloud-api.sh ${OUTPUTDIR}/usr/local/bin/swarm-cloud-api.sh ADD rootfs/files/configs/etc/systemd/system/swarm-node.service ${OUTPUTDIR}/etc/systemd/system/swarm-node.service -ADD rootfs/files/configs/usr/local/bin/swarm-node.sh ${OUTPUTDIR}/usr/local/bin/swarm-node.sh RUN ln -sf /etc/systemd/system/swarm-node.service "${OUTPUTDIR}/etc/systemd/system/multi-user.target.wants/swarm-node.service" -RUN chmod +x ${OUTPUTDIR}/usr/local/bin/swarm-cloud-api.sh ${OUTPUTDIR}/usr/local/bin/swarm-node.sh +RUN chmod +x ${OUTPUTDIR}/usr/local/bin/swarm-cloud-api.sh # run-state directories are prepared in state_disk_mount.sh; bind mounts via fstab ADD rootfs/files/configs/etc/securetty "${OUTPUTDIR}/etc/securetty" -# auth-service (runs as a swarm service) -ADD rootfs/files/configs/etc/systemd/system/sp-svc-auth-service.service ${OUTPUTDIR}/etc/systemd/system/sp-svc-auth-service.service +# swarm-init: downloads binaries (swarm-db, SDK) and generates swarm-db config at VM startup +# configuration is read from /etc/swarm/config.yaml (tags, node params) +RUN mkdir -p "${OUTPUTDIR}/etc/swarm" "${OUTPUTDIR}/etc/swarm-db" "${OUTPUTDIR}/etc/swarm-node" +ADD rootfs/files/configs/etc/swarm/config.yaml ${OUTPUTDIR}/etc/swarm/config.yaml +ADD rootfs/files/configs/etc/swarm-node/config.yaml ${OUTPUTDIR}/etc/swarm-node/config.yaml +ADD rootfs/files/configs/usr/local/bin/swarm-init.sh ${OUTPUTDIR}/usr/local/bin/swarm-init.sh +RUN chmod +x ${OUTPUTDIR}/usr/local/bin/swarm-init.sh +ADD rootfs/files/configs/etc/systemd/system/swarm-init.service ${OUTPUTDIR}/etc/systemd/system/swarm-init.service +RUN ln -sf /etc/systemd/system/swarm-init.service "${OUTPUTDIR}/etc/systemd/system/multi-user.target.wants/swarm-init.service" + +# swarm-host-agent: placeholder service file (binary + real service installed by swarm-init at boot) +ADD rootfs/files/configs/etc/systemd/system/swarm-host-agent.service ${OUTPUTDIR}/etc/systemd/system/swarm-host-agent.service + +RUN mkdir -p "${OUTPUTDIR}/etc/swarm-service-launchers" + +# auth-service launcher script (service file managed at runtime) ADD rootfs/files/configs/usr/local/bin/sp-svc-auth-service.sh ${OUTPUTDIR}/usr/local/bin/sp-svc-auth-service.sh RUN chmod +x ${OUTPUTDIR}/usr/local/bin/sp-svc-auth-service.sh -RUN ln -sf /etc/systemd/system/sp-svc-auth-service.service "${OUTPUTDIR}/etc/systemd/system/multi-user.target.wants/sp-svc-auth-service.service" # swarm one-shot services runner ADD rootfs/files/configs/etc/systemd/system/swarm-services.service ${OUTPUTDIR}/etc/systemd/system/swarm-services.service ADD rootfs/files/configs/usr/local/bin/swarm-services.sh ${OUTPUTDIR}/usr/local/bin/swarm-services.sh RUN chmod +x ${OUTPUTDIR}/usr/local/bin/swarm-services.sh RUN ln -sf /etc/systemd/system/swarm-services.service "${OUTPUTDIR}/etc/systemd/system/multi-user.target.wants/swarm-services.service" -ADD rootfs/files/configs/etc/systemd/system/download-sp-swarm-services.service ${OUTPUTDIR}/etc/systemd/system/download-sp-swarm-services.service -RUN ln -sf /etc/systemd/system/download-sp-swarm-services.service "${OUTPUTDIR}/etc/systemd/system/multi-user.target.wants/download-sp-swarm-services.service" # disabling serial getty ADD rootfs/files/configs/usr/lib/systemd/system/serial-getty@.service "${OUTPUTDIR}/usr/lib/systemd/system/serial-getty@.service" @@ -328,39 +309,13 @@ RUN ln -sf /dev/null "${OUTPUTDIR}/etc/systemd/system/getty@ttyS0.service" RUN mkdir -p "${OUTPUTDIR}/usr/local/lib/swarm-cloud" "${OUTPUTDIR}/usr/local/bin" ADD rootfs/files/configs/usr/local/bin/swarm-cloud-ui.sh ${OUTPUTDIR}/usr/local/bin/swarm-cloud-ui.sh RUN chmod +x ${OUTPUTDIR}/usr/local/bin/swarm-cloud-ui.sh -COPY --from=swarm-db-build /app/swarm/swarm-db/swarm-db-linux-amd64 ${OUTPUTDIR}/usr/local/bin/swarm-db-linux-amd64 -COPY repos/swarm-cloud ${OUTPUTDIR}/opt/swarm-cloud -RUN mkdir -p ${OUTPUTDIR}/etc/swarm-cloud -RUN cp -r ${OUTPUTDIR}/opt/swarm-cloud/services ${OUTPUTDIR}/etc/swarm-cloud/services -RUN cp -r ${OUTPUTDIR}/opt/swarm-cloud/provision-plugin-sdk ${OUTPUTDIR}/etc/swarm-cloud/provision-plugin-sdk -RUN chmod +x ${OUTPUTDIR}/usr/local/bin/swarm-db-linux-amd64 ADD rootfs/files/configs/usr/local/bin/kubectl ${OUTPUTDIR}/usr/local/bin/kubectl RUN chmod +x ${OUTPUTDIR}/usr/local/bin/kubectl -RUN mkdir -p ${OUTPUTDIR}/etc/swarm-db -# COPY repos/swarm-db/schema.yaml ${OUTPUTDIR}/etc/swarm-db/schema.yaml +# swarm-db binary is downloaded at runtime by swarm-init (tags.swarm_db in /etc/swarm/config.yaml) +# provision plugins at /etc/swarm-cloud/services are mounted into the swarm-node container at runtime +RUN mkdir -p "${OUTPUTDIR}/etc/swarm-db" "${OUTPUTDIR}/etc/swarm-cloud/services" COPY repos/swarm-cloud/apps/swarm-node-e2e/fixtures/schema.yaml ${OUTPUTDIR}/etc/swarm-db/schema.yaml -# install Node.js -ADD rootfs/files/scripts/install_nodejs.sh /buildroot/files/scripts/ -RUN --security=insecure /buildroot/files/scripts/install_nodejs.sh - -# Services Downloader: stage files and install -ADD rootfs/files/configs/usr/local/lib/services-downloader ${OUTPUTDIR}/usr/local/lib/services-downloader -ADD rootfs/files/scripts/install_services_downloader.sh /buildroot/files/scripts/ -RUN chmod +x /buildroot/files/scripts/install_services_downloader.sh -RUN --security=insecure bash /buildroot/files/scripts/install_services_downloader.sh -## no standalone wrapper; script uses Node CLI directly -ADD rootfs/files/configs/usr/local/bin/download-sp-swarm-services.sh ${OUTPUTDIR}/usr/local/bin/download-sp-swarm-services.sh -RUN chmod +x ${OUTPUTDIR}/usr/local/bin/download-sp-swarm-services.sh - -# install pnpm and build Node.js applications inside rootfs via script -ADD rootfs/files/scripts/build_swarm_cloud.sh /buildroot/files/scripts/ -RUN chmod +x /buildroot/files/scripts/build_swarm_cloud.sh -RUN --security=insecure /buildroot/files/scripts/build_swarm_cloud.sh - -# Auth service (prepared in separate build stage) -RUN mkdir -p ${OUTPUTDIR}/etc/auth-service -COPY --from=auth_service_files_builder /out/ ${OUTPUTDIR}/etc/auth-service/ # make /opt/swarm-cloud-api point to built swarm-cloud artifacts RUN mkdir -p ${OUTPUTDIR}/opt && ln -s /usr/local/lib/swarm-cloud-api ${OUTPUTDIR}/opt/swarm-cloud-api @@ -384,12 +339,6 @@ RUN ln -s /usr/lib/systemd/system/pccs-init.service "${OUTPUTDIR}/etc/systemd/sy # Custom swarm services RUN mkdir -p ${OUTPUTDIR}/etc/swarm-services/ -COPY services/apps/ ${OUTPUTDIR}/etc/swarm-services/ -COPY swarm-scripts ${OUTPUTDIR}/etc/swarm-service-launchers/ -# provision plugins from original swarm-cloud repo -# COPY repos/swarm-cloud/services/swarm-cloud-api ${OUTPUTDIR}/etc/swarm-services/swarm-cloud-api -RUN chmod +x ${OUTPUTDIR}/etc/swarm-services/*/main.py -RUN chmod +x ${OUTPUTDIR}/etc/swarm-cloud/services/*/main.py # tools needed at runtime (prevent daemons from starting in chroot) and runtime setup ADD rootfs/files/scripts/setup_runtime_tools.sh /buildroot/files/scripts/ @@ -403,43 +352,22 @@ RUN rm -f ${OUTPUTDIR}/etc/systemd/system/multi-user.target.wants/mongod.service && rm -f ${OUTPUTDIR}/etc/systemd/system/default.target.wants/mongod.service \ && rm -f ${OUTPUTDIR}/etc/systemd/system/multi-user.target.wants/mongodb.service \ && rm -f ${OUTPUTDIR}/etc/systemd/system/default.target.wants/mongodb.service -# NATS (install nats-server 2.12.2 into VM rootfs) -ADD rootfs/files/scripts/install_nats.sh /buildroot/files/scripts/ -RUN --security=insecure bash /buildroot/files/scripts/install_nats.sh -# disable autostart without requiring systemd during build (ensure no enable symlinks for nats) -RUN rm -f ${OUTPUTDIR}/etc/systemd/system/multi-user.target.wants/nats-server.service \ - && rm -f ${OUTPUTDIR}/etc/systemd/system/default.target.wants/nats-server.service || true -# CockroachDB (install cockroach binary into VM rootfs) -ADD rootfs/files/scripts/install_cockroachdb.sh /buildroot/files/scripts/ -RUN --security=insecure bash /buildroot/files/scripts/install_cockroachdb.sh -# Knot DNS (install knot into VM rootfs) -ADD rootfs/files/scripts/install_knot.sh /buildroot/files/scripts/ -RUN --security=insecure bash /buildroot/files/scripts/install_knot.sh -# disable autostart without requiring systemd during build (ensure no enable symlinks for knot) -RUN rm -f ${OUTPUTDIR}/etc/systemd/system/multi-user.target.wants/knot.service \ - && rm -f ${OUTPUTDIR}/etc/systemd/system/default.target.wants/knot.service || true -# OpenResty (install into VM rootfs so provisioner only configures and manages it) -ADD rootfs/files/scripts/install_openresty.sh /buildroot/files/scripts/ -RUN --security=insecure bash /buildroot/files/scripts/install_openresty.sh -# disable autostart without requiring systemd during build (ensure no enable symlinks for openresty) -RUN rm -f ${OUTPUTDIR}/etc/systemd/system/multi-user.target.wants/openresty.service \ - && rm -f ${OUTPUTDIR}/etc/systemd/system/default.target.wants/openresty.service || true -RUN mkdir -p ${OUTPUTDIR}/etc/resty-auto-ssl/storage \ - && chown -R www-data:www-data ${OUTPUTDIR}/etc/resty-auto-ssl -# disable autostart without requiring systemd during build (remove enable symlinks) -RUN rm -f ${OUTPUTDIR}/etc/systemd/system/multi-user.target.wants/redis-server.service \ - && rm -f ${OUTPUTDIR}/etc/systemd/system/default.target.wants/redis-server.service \ - && rm -f ${OUTPUTDIR}/etc/systemd/system/multi-user.target.wants/redis-sentinel.service \ - && rm -f ${OUTPUTDIR}/etc/systemd/system/default.target.wants/redis-sentinel.service -# ensure swarm-cloud-api, cockroachdb and wireguard are disabled by default (no systemd enable symlinks) -RUN rm -f ${OUTPUTDIR}/etc/systemd/system/multi-user.target.wants/cockroachdb.service \ - && rm -f ${OUTPUTDIR}/etc/systemd/system/multi-user.target.wants/wg-quick@wg0.service # cleanup apt lists and policy-rc.d RUN rm -f ${OUTPUTDIR}/usr/sbin/policy-rc.d \ && rm -rf ${OUTPUTDIR}/var/lib/apt/lists/* -ADD rootfs/files/scripts/install_provision_plugin_sdk.sh /buildroot/files/scripts/ -RUN --security=insecure bash /buildroot/files/scripts/install_provision_plugin_sdk.sh + +# Python dependencies required by provision plugins (redis-py for rke2/redis plugins, podman-compose for container plugins) +ADD rootfs/files/scripts/install_python_deps.sh /buildroot/files/scripts/ +RUN --security=insecure bash /buildroot/files/scripts/install_python_deps.sh + +# Extra system packages required by provision plugins at runtime +# mysql-client: bootstrap-services.sh and provision plugins use mysql CLI to talk to SwarmDB +# unzip: download-services.sh extracts service archives +# netcat-openbsd: provision plugins use nc for port readiness checks (e.g. cockroachdb plugin) +# dnsutils: DNS tools (nsupdate) used by rke2 plugin for Knot DNS updates +ADD rootfs/files/scripts/install_extra_packages.sh /buildroot/files/scripts/ +RUN --security=insecure bash /buildroot/files/scripts/install_extra_packages.sh # Build info ARG SP_VM_IMAGE_VERSION diff --git a/src/repos/swarm-cloud b/src/repos/swarm-cloud new file mode 160000 index 00000000..f02d46ca --- /dev/null +++ b/src/repos/swarm-cloud @@ -0,0 +1 @@ +Subproject commit f02d46ca579a5f7b07282b914c3ac2dd0b9d8ceb diff --git a/src/rootfs/files/configs/etc/swarm-node/config.yaml b/src/rootfs/files/configs/etc/swarm-node/config.yaml new file mode 100644 index 00000000..ea493a3f --- /dev/null +++ b/src/rootfs/files/configs/etc/swarm-node/config.yaml @@ -0,0 +1,27 @@ +port: 4001 +host: "0.0.0.0" + +db: + host: "127.0.0.1" + port: 3306 + username: "root" + password: "" + database: "swarmdb" + synchronize: false + autoLoadEntities: true + +leaderElection: + enabled: true + leaseMs: 60000 + guardMs: 10000 + renewGuardMs: 30000 + propagationGuardMs: 5000 + electionIntervalMs: 1000 + leaderScore: 1.0 + graceDownMs: 5000 + +provision: + enabled: true + swarmDbApiUrl: "http://127.0.0.1:8080" + servicesDir: "/etc/swarm-cloud/services" + localDbPath: "/var/lib/swarm-node/provision.db" diff --git a/src/rootfs/files/configs/etc/swarm/config.yaml b/src/rootfs/files/configs/etc/swarm/config.yaml new file mode 100644 index 00000000..01247e4c --- /dev/null +++ b/src/rootfs/files/configs/etc/swarm/config.yaml @@ -0,0 +1,16 @@ +github: + token: "" # GitHub personal access token (required for private repos and ghcr.io) + +tags: + swarm_db: "" # e.g. "v0.1.0" — downloads and replaces built-in binary; empty = use built-in + host_agent: "" # e.g. "host-agent-v1.0.0" — required; downloads binary + service + config + swarm_node: "" # e.g. "v1.2.3" — Docker image tag for ghcr.io/.../swarm-node + sdk: "" # e.g. "v1.2.3" — downloads and replaces built-in SDK; empty = use built-in + swarm_cloud_api: "" # passed as env var to swarm-node container + swarm_cloud_ui: "" # passed as env var to swarm-node container + auth_service: "" # passed as env var to swarm-node container + +swarm_db: + node_name: "" # defaults to hostname + advertise_addr: "" # defaults to auto-detected external IP + join_addresses: [] # e.g. ["192.168.1.2:7946", "192.168.1.3:7946"] diff --git a/src/rootfs/files/configs/etc/systemd/system/download-sp-swarm-services.service b/src/rootfs/files/configs/etc/systemd/system/download-sp-swarm-services.service deleted file mode 100644 index aecc0eda..00000000 --- a/src/rootfs/files/configs/etc/systemd/system/download-sp-swarm-services.service +++ /dev/null @@ -1,18 +0,0 @@ -[Unit] -Description=Download and stage Swarm services pack (run once) -After=network-online.target -Wants=network-online.target -ConditionPathExists=!/etc/sp-swarm-services/.downloaded - -[Service] -Type=oneshot -User=root -ExecStart=/usr/local/bin/download-sp-swarm-services.sh -RemainAfterExit=yes -Restart=on-failure -StandardOutput=append:/var/log/download-sp-swarm-services.log -StandardError=append:/var/log/download-sp-swarm-services-err.log -RestartSec=5min - -[Install] -WantedBy=multi-user.target diff --git a/src/rootfs/files/configs/etc/systemd/system/sp-svc-auth-service.service b/src/rootfs/files/configs/etc/systemd/system/sp-svc-auth-service.service deleted file mode 100644 index 1e8cee1c..00000000 --- a/src/rootfs/files/configs/etc/systemd/system/sp-svc-auth-service.service +++ /dev/null @@ -1,17 +0,0 @@ -[Unit] -Description=SP Swarm Service - auth-service -After=network-online.target -Wants=network-online.target - -[Service] -Type=simple -ExecStartPre=/bin/chmod +x /usr/local/bin/sp-svc-auth-service.sh -ExecStart=/usr/local/bin/sp-svc-auth-service.sh -Environment=NODE_ENV=production CONFIG_FILE=/etc/auth-service/apps/auth-service/configuration.yaml -Restart=always -RestartSec=5 -StandardOutput=append:/var/log/sp-svc-auth-service.log -StandardError=append:/var/log/sp-svc-auth-service-err.log - -[Install] -WantedBy=multi-user.target diff --git a/src/rootfs/files/configs/etc/systemd/system/swarm-db.service b/src/rootfs/files/configs/etc/systemd/system/swarm-db.service index 5a621707..b56b6a66 100644 --- a/src/rootfs/files/configs/etc/systemd/system/swarm-db.service +++ b/src/rootfs/files/configs/etc/systemd/system/swarm-db.service @@ -1,16 +1,15 @@ [Unit] Description=Swarm DB service -After=network-online.target local-fs.target +After=network-online.target local-fs.target swarm-init.service Wants=network-online.target -RequiresMountsFor=/var /var/lib /var/lib/swarm-db +Requires=swarm-init.service ConditionPathExists=/usr/local/bin/swarm-db-linux-amd64 -ConditionPathExists=/sp/swarm/node-db.yaml [Service] Type=simple WorkingDirectory=/ ExecStartPre=mkdir -p /var/lib/swarm-db/data -ExecStart=/usr/local/bin/swarm-db-linux-amd64 -config /sp/swarm/node-db.yaml +ExecStart=/usr/local/bin/swarm-db-linux-amd64 -config /etc/swarm-db/config.yaml StandardOutput=append:/var/log/swarm-db.log StandardError=append:/var/log/swarm-db-err.log Restart=always diff --git a/src/rootfs/files/configs/etc/systemd/system/swarm-host-agent.service b/src/rootfs/files/configs/etc/systemd/system/swarm-host-agent.service new file mode 100644 index 00000000..000ebb6e --- /dev/null +++ b/src/rootfs/files/configs/etc/systemd/system/swarm-host-agent.service @@ -0,0 +1,15 @@ +[Unit] +Description=Swarm Host Agent +After=network-online.target swarm-init.service +Requires=swarm-init.service + +[Service] +Type=simple +ExecStart=/usr/local/bin/swarm-host-agent +Restart=always +RestartSec=5 +StandardOutput=append:/var/log/swarm-host-agent.log +StandardError=append:/var/log/swarm-host-agent.log + +[Install] +WantedBy=multi-user.target diff --git a/src/rootfs/files/configs/etc/systemd/system/swarm-init.service b/src/rootfs/files/configs/etc/systemd/system/swarm-init.service new file mode 100644 index 00000000..896e90e5 --- /dev/null +++ b/src/rootfs/files/configs/etc/systemd/system/swarm-init.service @@ -0,0 +1,16 @@ +[Unit] +Description=Swarm Initialization (download binaries and generate configs) +After=network-online.target +Wants=network-online.target +Before=swarm-db.service swarm-node.service + +[Service] +Type=oneshot +RemainAfterExit=yes +ExecStart=/bin/bash /usr/local/bin/swarm-init.sh +StandardOutput=append:/var/log/swarm-init.log +StandardError=append:/var/log/swarm-init.log +TimeoutStartSec=300 + +[Install] +WantedBy=multi-user.target diff --git a/src/rootfs/files/configs/etc/systemd/system/swarm-node.service b/src/rootfs/files/configs/etc/systemd/system/swarm-node.service index 36bb9d83..6025feb6 100644 --- a/src/rootfs/files/configs/etc/systemd/system/swarm-node.service +++ b/src/rootfs/files/configs/etc/systemd/system/swarm-node.service @@ -1,19 +1,36 @@ [Unit] -Description=Swarm Node service -After=network-online.target swarm-db.service +Description=Swarm Node Service (Podman Container) +After=network-online.target swarm-init.service swarm-db.service swarm-host-agent.service Wants=network-online.target -Requires=swarm-db.service +Requires=swarm-init.service swarm-db.service swarm-host-agent.service [Service] Type=simple -WorkingDirectory=/usr/local/lib/swarm-cloud -ExecStartPre=mkdir -p /var/lib/swarm-cloud-api/data -ExecStart=/usr/local/bin/swarm-node.sh +EnvironmentFile=/etc/swarm/swarm-node.env +ExecStartPre=-/usr/bin/podman stop swarm-node +ExecStartPre=-/usr/bin/podman rm swarm-node +ExecStartPre=mkdir -p /var/lib/swarm-node +ExecStart=/usr/bin/podman run \ + --name swarm-node \ + --rm \ + --network host \ + -v /etc/swarm-node:/etc/swarm-node:ro \ + -v /etc/swarm-cloud/services:/etc/swarm-cloud/services:ro \ + -v /var/lib/swarm-node:/var/lib/swarm-node \ + -v /var/run/swarm-agent.sock:/var/run/swarm-agent.sock \ + -e NODE_ENV=production \ + -e SWC_NODE_CONFIG_PATH=/etc/swarm-node/config.yaml \ + -e SWARM_CLOUD_API_TAG=$SWARM_CLOUD_API_TAG \ + -e SWARM_CLOUD_UI_TAG=$SWARM_CLOUD_UI_TAG \ + -e AUTH_SERVICE_TAG=$AUTH_SERVICE_TAG \ + -e SWARM_HOST_AGENT_SOCKET=/var/run/swarm-agent.sock \ + ghcr.io/super-protocol/swarm-cloud/swarm-node:$SWARM_NODE_TAG \ + apps/swarm-node/dist/main.js +ExecStop=/usr/bin/podman stop swarm-node +Restart=always +RestartSec=10 StandardOutput=append:/var/log/swarm-node-api.log StandardError=append:/var/log/swarm-node-api-err.log -Restart=always -RestartSec=5 -Environment=NODE_ENV=production SWC_NODE_CONFIG_PATH=/sp/swarm/api.yaml [Install] WantedBy=multi-user.target diff --git a/src/rootfs/files/configs/usr/local/bin/download-sp-swarm-services.sh b/src/rootfs/files/configs/usr/local/bin/download-sp-swarm-services.sh deleted file mode 100644 index f0d1d8e8..00000000 --- a/src/rootfs/files/configs/usr/local/bin/download-sp-swarm-services.sh +++ /dev/null @@ -1,212 +0,0 @@ -#!/bin/bash -set -euo pipefail - -# Download and stage Swarm service pack into /etc/sp-swarm-services. -# - If /sp/swarm/gatekeeper-keys.yaml exists, extract TLS key/cert to -# /etc/super/certs/gatekeeper.key and /etc/super/certs/gatekeeper.crt -# - Read BRANCH from sp-swarm-services.yaml (fallback to $BRANCH_NAME or "main") -# - Invoke Node CLI to fetch resource "sp-swarm-services" and --unpack -# - Merge content of /etc/sp-swarm-services/swarm-service-pluggins/ into /etc - -YAML_PATH="${YAML_PATH:-/sp/swarm/gatekeeper-keys.yaml}" # for cert extraction -SP_SWARM_SERVICES_YAML_PATH="${SP_SWARM_SERVICES_YAML_PATH:-/sp/swarm/sp-swarm-services.yaml}" -SSL_CERT_PATH="${SSL_CERT_PATH:-/etc/super/certs/gatekeeper.crt}" -SSL_KEY_PATH="${SSL_KEY_PATH:-/etc/super/certs/gatekeeper.key}" -GK_ENV="${GATEKEEPER_ENV:-mainnet}" -TARGET_DIR="${TARGET_DIR:-/etc/sp-swarm-services}" -RESOURCE_NAME="sp-swarm-services" - -log() { - local ts - ts="$(date -u '+%Y-%m-%dT%H:%M:%SZ' 2>/dev/null || date '+%Y-%m-%dT%H:%M:%S%z')" - printf "[%s] [download-sp-swarm-services] %s\n" "$ts" "$*" >&2; -} - -is_cloud_mode() { - # Cloud images are built with SP_VM_IMAGE_VERSION like: cloud-build- - if [[ -f /etc/sp-release ]] && grep -q "cloud-build" /etc/sp-release 2>/dev/null; then - return 0 - fi - # Also exposed via kernel cmdline as: build= - if [[ -r /proc/cmdline ]] && grep -q "build=cloud-build" /proc/cmdline 2>/dev/null; then - return 0 - fi - return 1 -} - -# Helpers: YAML block extraction and PEM normalization -list_top_keys() { - awk 'BEGIN{FS":"} /^[A-Za-z0-9_.-]+:[[:space:]]*/{print $1}' "$YAML_PATH" | sort -u || true -} - -extract_block_from_yaml() { - # $1: key name (e.g., key, cert) - local keyname="$1" - local awk_program - read -r -d '' awk_program <<'AWK' -function ltrim(s){ sub(/^\r?/, "", s); return s } -BEGIN{ inblk=0; found=0 } -{ - sub("\r$", "", $0) - if (inblk==0 && $0 ~ "^" KEY "[[:space:]]*:") { - idx = index($0, ":") - rest = substr($0, idx+1) - gsub(/^[[:space:]]+/, "", rest) - if (rest ~ /^\|[+-]?([[:space:]]*)?$/ || rest ~ /^$/) { - inblk=1; found=1; next - } else { - inline=rest - gsub(/[[:space:]]+$/, "", inline) - if (inline ~ /^".*"$/ || inline ~ /^'.*'$/) { inline=substr(inline,2,length(inline)-2); gsub(/\\n/, "\n", inline) } - print inline; exit - } - } else if (inblk==1) { - if ($0 ~ /^[A-Za-z0-9_.-]+:[[:space:]]*/) { exit } - print $0 - } -} -END { } -AWK - awk -v KEY="$keyname" "$awk_program" "$YAML_PATH" -} - -deindent_block() { - awk ' - { sub("\r$", "", $0); line=$0; match(line, /^[[:space:]]*/); ind=RLENGTH; if (min=="" || ind0) print substr(lines[i], min+1); else print lines[i] } } - ' -} - -trim_to_pem() { - awk ' - BEGIN{begin=0} - { sub("\r$", "", $0); if (begin==0) { if ($0 ~ /^-----BEGIN[[:space:]]/) { begin=1; print $0 } } else { print $0 } } - ' | awk 'NF>0' -} - - -## TODO: temporary solution. Need to use subroot cert and key -ensure_gatekeeper_certs_from_yaml() { - # If outputs already exist, skip - if [[ -f "$SSL_KEY_PATH" && -f "$SSL_CERT_PATH" ]]; then - log "TLS key/cert already present — skipping extraction"; - return 0 - fi - - if [[ ! -f "$YAML_PATH" ]]; then - log "YAML not found: $YAML_PATH — skipping cert extraction"; - return 0 - fi - - install -d "$(dirname "$SSL_CERT_PATH")" - - # Extract raw content for key and cert from YAML (supports block scalar and inline) - local key_content cert_content - key_content="$(extract_block_from_yaml key || true)" - cert_content="$(extract_block_from_yaml cert || true)" - - if [[ -z "${key_content//[[:space:]]/}" ]]; then - log "ERROR: key block not found in $YAML_PATH"; - log "Top-level keys: $(list_top_keys | tr '\n' ' ')"; - return 1 - fi - if [[ -z "${cert_content//[[:space:]]/}" ]]; then - log "ERROR: cert block not found in $YAML_PATH"; - log "Top-level keys: $(list_top_keys | tr '\n' ' ')"; - return 1 - fi - - # Normalize: deindent and trim strictly to PEM BEGIN..END - printf "%s\n" "$key_content" | deindent_block | trim_to_pem > "$SSL_KEY_PATH" - printf "%s\n" "$cert_content" | deindent_block | trim_to_pem > "$SSL_CERT_PATH" - - # Sanity checks - if ! grep -q "^-----BEGIN PRIVATE KEY" "$SSL_KEY_PATH"; then - log "ERROR: key PEM header not found after extraction"; return 1 - fi - if ! grep -q "^-----BEGIN CERTIFICATE" "$SSL_CERT_PATH"; then - log "ERROR: cert PEM header not found after extraction"; return 1 - fi - - # Optional openssl validation - if command -v openssl >/dev/null 2>&1; then - if ! openssl pkey -in "$SSL_KEY_PATH" -noout >/dev/null 2>&1; then - log "ERROR: openssl failed to parse key: $SSL_KEY_PATH"; return 1 - fi - if ! openssl x509 -in "$SSL_CERT_PATH" -noout >/dev/null 2>&1; then - log "ERROR: openssl failed to parse cert: $SSL_CERT_PATH"; return 1 - fi - fi - - chmod 600 "$SSL_KEY_PATH" || true - chmod 644 "$SSL_CERT_PATH" || true - if [[ $(id -u) -eq 0 ]]; then - chown root:root "$SSL_KEY_PATH" "$SSL_CERT_PATH" || true - fi - log "Wrote key to $SSL_KEY_PATH and cert to $SSL_CERT_PATH" -} - -parse_branch_name() { - local branch="" - if [[ -f "$SP_SWARM_SERVICES_YAML_PATH" ]]; then - # Read only 'branch' key (expected in sp-swarm-services.yaml) - branch=$(awk ' - BEGIN{br=""} - /^[[:space:]]*branch[[:space:]]*:/ { sub(/^[[:space:]]*branch[[:space:]]*:[[:space:]]*/, "", $0); br=$0; gsub(/[\r\n\t\f]+/, "", br); print br; exit } - ' "$SP_SWARM_SERVICES_YAML_PATH") - fi - - # Trim quotes and whitespace - branch="${branch//\"/}" - branch="${branch//\'/}" - branch="$(printf "%s" "$branch" | sed -E 's/^[[:space:]]+|[[:space:]]+$//g')" - - if [[ -z "$branch" ]]; then - branch="${BRANCH_NAME:-main}" - log "Branch not found in YAML; using: $branch" - else - log "Using branch from YAML: $branch" - fi - - printf "%s" "$branch" -} - -main() { - # In cloud mode this unit/script must be a no-op. - if is_cloud_mode; then - log "Cloud mode detected — exiting" - exit 0 - fi - - # If sp-swarm-services YAML is missing, do nothing and exit 0 - if [[ ! -f "$SP_SWARM_SERVICES_YAML_PATH" ]]; then - log "sp-swarm-services.yaml not found: $SP_SWARM_SERVICES_YAML_PATH — exiting" - exit 0 - fi - - ensure_gatekeeper_certs_from_yaml || exit 1 - - install -d "$TARGET_DIR" - local branch - branch="$(parse_branch_name)" - - log "Running Node services-downloader for $RESOURCE_NAME (branch=$branch)" - if ! /usr/bin/env node /usr/local/lib/services-downloader/src/index.js \ - --resource-name "$RESOURCE_NAME" \ - --branch-name "$branch" \ - --ssl-cert-path "$SSL_CERT_PATH" \ - --ssl-key-path "$SSL_KEY_PATH" \ - --environment "$GK_ENV" \ - --unpack-with-absolute-path; then - log "ERROR: services-downloader failed"; exit 1 - fi - - # Mark as completed to stop future retries - mkdir -p "$TARGET_DIR" - touch "$TARGET_DIR/.downloaded" - log "Marked as completed: $TARGET_DIR/.downloaded" - - log "Done" -} - -main "$@" diff --git a/src/rootfs/files/configs/usr/local/bin/swarm-init.sh b/src/rootfs/files/configs/usr/local/bin/swarm-init.sh new file mode 100644 index 00000000..71ddab6a --- /dev/null +++ b/src/rootfs/files/configs/usr/local/bin/swarm-init.sh @@ -0,0 +1,213 @@ +#!/bin/bash +set -euo pipefail + +CONFIG="/etc/swarm/config.yaml" + +log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] [swarm-init] $*"; } + +log "starting swarm initialization" + +# Read a scalar value from /etc/swarm/config.yaml via python3+pyyaml +cfg() { + python3 -c " +import yaml +c = yaml.safe_load(open('$CONFIG')) or {} +v = c +for k in '$1'.split('.'): + v = v.get(k) if isinstance(v, dict) else None +print('' if v is None else v)" +} + +GITHUB_TOKEN=$(cfg "github.token") +SWARM_DB_TAG=$(cfg "tags.swarm_db") +HOST_AGENT_TAG=$(cfg "tags.host_agent") +SWARM_NODE_TAG=$(cfg "tags.swarm_node") +SDK_TAG=$(cfg "tags.sdk") +SWARM_CLOUD_API_TAG=$(cfg "tags.swarm_cloud_api") +SWARM_CLOUD_UI_TAG=$(cfg "tags.swarm_cloud_ui") +AUTH_SERVICE_TAG=$(cfg "tags.auth_service") +NODE_NAME=$(cfg "swarm_db.node_name") +ADVERTISE_ADDR=$(cfg "swarm_db.advertise_addr") + +# Resolve node name +[ -z "$NODE_NAME" ] && NODE_NAME=$(hostname) + +# Auto-detect external IP if not configured +if [ -z "$ADVERTISE_ADDR" ]; then + log "auto-detecting external IP..." + ADVERTISE_ADDR=$(curl -sf --max-time 5 https://myip.wtf/json \ + | python3 -c "import sys,json; print(json.load(sys.stdin).get('YourFuckingIPAddress',''))" 2>/dev/null || true) + [ -z "$ADVERTISE_ADDR" ] && \ + ADVERTISE_ADDR=$(curl -sf --max-time 5 https://api.ipify.org 2>/dev/null || true) + if [ -z "$ADVERTISE_ADDR" ]; then + log "WARNING: could not detect external IP, using 127.0.0.1" + ADVERTISE_ADDR="127.0.0.1" + fi + log "detected advertise_addr: $ADVERTISE_ADDR" +fi + +# Download a GitHub release asset to a local file path +# Usage: download_github_asset +download_github_asset() { + local owner="$1" repo="$2" tag="$3" filename="$4" dest="$5" + local auth_args=() + [ -n "$GITHUB_TOKEN" ] && auth_args=(-H "Authorization: token $GITHUB_TOKEN") + + local rel_file; rel_file=$(mktemp) + if ! curl -sf "${auth_args[@]}" \ + "https://api.github.com/repos/$owner/$repo/releases/tags/$tag" \ + -o "$rel_file"; then + rm -f "$rel_file" + log "ERROR: failed to fetch release info for $owner/$repo@$tag" + return 1 + fi + + local asset_id + asset_id=$(python3 -c " +import json +with open('$rel_file') as f: + data = json.load(f) +for a in data.get('assets', []): + if a['name'] == '$filename': + print(a['id']); break +" 2>/dev/null || true) + rm -f "$rel_file" + + if [ -z "$asset_id" ]; then + log "ERROR: asset '$filename' not found in $owner/$repo@$tag" + return 1 + fi + + curl -sfL "${auth_args[@]}" \ + -H "Accept: application/octet-stream" \ + -o "$dest" \ + "https://api.github.com/repos/$owner/$repo/releases/assets/$asset_id" +} + +# Install swarm-db binary from GitHub Releases +if [ -n "$SWARM_DB_TAG" ]; then + log "installing swarm-db $SWARM_DB_TAG..." + FILENAME="swarm-db-${SWARM_DB_TAG}-linux-amd64.tar.gz" + TMP=$(mktemp -d) + download_github_asset "Super-Protocol" "swarm-db" "$SWARM_DB_TAG" "$FILENAME" "$TMP/swarm-db.tar.gz" + tar xzf "$TMP/swarm-db.tar.gz" -C "$TMP" + install -m 755 "$TMP/swarm-db" /usr/local/bin/swarm-db-linux-amd64 + rm -rf "$TMP" + log "swarm-db $SWARM_DB_TAG installed" +else + log "tags.swarm_db not set, using built-in swarm-db binary" +fi + +# Install provision-plugin-sdk from GitHub Releases +if [ -n "$SDK_TAG" ]; then + log "installing provision-plugin-sdk $SDK_TAG..." + FILENAME="provision-plugin-sdk-${SDK_TAG}.tar.gz" + TMP=$(mktemp -d) + download_github_asset "Super-Protocol" "swarm-cloud" "$SDK_TAG" "$FILENAME" "$TMP/sdk.tar.gz" + tar xzf "$TMP/sdk.tar.gz" -C "$TMP" + pip3 install --break-system-packages --quiet "$TMP" + rm -rf "$TMP" + log "provision-plugin-sdk $SDK_TAG installed" +else + log "tags.sdk not set, using built-in provision-plugin-sdk" +fi + +# Install swarm-host-agent from GitHub Releases +# Tag format: "host-agent-vX.Y.Z" → release tag "release-vX.Y.Z" +if [ -n "$HOST_AGENT_TAG" ]; then + log "installing swarm-host-agent $HOST_AGENT_TAG..." + if [[ "$HOST_AGENT_TAG" == release-* ]]; then + RELEASE_TAG="$HOST_AGENT_TAG" + elif [[ "$HOST_AGENT_TAG" == host-agent-* ]]; then + VERSION="${HOST_AGENT_TAG#host-agent-}" + RELEASE_TAG="release-$VERSION" + else + RELEASE_TAG="release-$HOST_AGENT_TAG" + fi + FILENAME="swarm-host-agent-${RELEASE_TAG}-linux-amd64.tar.gz" + TMP=$(mktemp -d) + download_github_asset "Super-Protocol" "swarm-cloud" "$RELEASE_TAG" "$FILENAME" "$TMP/host-agent.tar.gz" + tar xzf "$TMP/host-agent.tar.gz" -C "$TMP" + EXTRACT_DIR=$(tar -tzf "$TMP/host-agent.tar.gz" | head -1 | cut -f1 -d"/") + install -m 755 "$TMP/$EXTRACT_DIR/swarm-host-agent" /usr/local/bin/swarm-host-agent + mkdir -p /etc/swarm + cp "$TMP/$EXTRACT_DIR/host-agent.yaml" /etc/swarm/host-agent.yaml + cp "$TMP/$EXTRACT_DIR/swarm-host-agent.service" /etc/systemd/system/swarm-host-agent.service + rm -rf "$TMP" + log "swarm-host-agent $RELEASE_TAG installed" + systemctl daemon-reload + systemctl enable swarm-host-agent.service +else + log "ERROR: tags.host_agent is required" + exit 1 +fi + +# Authenticate to ghcr.io for pulling swarm-node container image +if [ -n "$GITHUB_TOKEN" ]; then + log "authenticating to ghcr.io..." + echo "$GITHUB_TOKEN" | podman login ghcr.io -u oauth2 --password-stdin + log "ghcr.io login successful" +else + log "WARNING: github.token not set, skipping ghcr.io login (image must be publicly accessible)" +fi + +# Generate /etc/swarm/swarm-node.env for swarm-node.service EnvironmentFile +log "generating /etc/swarm/swarm-node.env..." +mkdir -p /etc/swarm +cat > /etc/swarm/swarm-node.env << EOF +SWARM_NODE_TAG=${SWARM_NODE_TAG} +SWARM_CLOUD_API_TAG=${SWARM_CLOUD_API_TAG} +SWARM_CLOUD_UI_TAG=${SWARM_CLOUD_UI_TAG} +AUTH_SERVICE_TAG=${AUTH_SERVICE_TAG} +EOF + +# Generate /etc/swarm-db/config.yaml from /etc/swarm/config.yaml parameters +log "generating /etc/swarm-db/config.yaml (node=$NODE_NAME, advertise=$ADVERTISE_ADDR)..." +mkdir -p /etc/swarm-db /var/lib/swarm-db + +NODE_NAME_VAL="$NODE_NAME" ADVERTISE_ADDR_VAL="$ADVERTISE_ADDR" \ +python3 - << 'PYEOF' +import yaml, os + +with open('/etc/swarm/config.yaml') as f: + swarm_cfg = yaml.safe_load(f) or {} + +join_addresses = (swarm_cfg.get('swarm_db') or {}).get('join_addresses') or [] + +config = { + 'node': { + 'name': os.environ['NODE_NAME_VAL'], + 'host': '0.0.0.0', + 'port': 8001, + 'data_dir': '/var/lib/swarm-db', + 'schema_file': '/etc/swarm-db/schema.yaml', + }, + 'memberlist': { + 'bind_addr': '0.0.0.0', + 'bind_port': 7946, + 'advertise_addr': os.environ['ADVERTISE_ADDR_VAL'], + 'advertise_port': 7946, + 'join_addresses': join_addresses, + 'gossip_interval': '200ms', + 'probe_interval': '1s', + 'probe_timeout': '500ms', + 'suspicion_max_time_multiplier': 6, + }, + 'sql': { + 'enabled': True, + 'host': '0.0.0.0', + 'port': 3306, + 'system_database': 'swarmdb', + }, + 'jq': { + 'enabled': True, + 'host': '0.0.0.0', + 'port': 8080, + }, +} + +with open('/etc/swarm-db/config.yaml', 'w') as f: + yaml.dump(config, f, default_flow_style=False) +PYEOF + +log "swarm-init completed successfully" diff --git a/src/rootfs/files/configs/usr/local/bin/swarm-node.sh b/src/rootfs/files/configs/usr/local/bin/swarm-node.sh deleted file mode 100644 index df702e73..00000000 --- a/src/rootfs/files/configs/usr/local/bin/swarm-node.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash - -set -euo pipefail - -cd /usr/local/lib/swarm-cloud - -exec node ./apps/swarm-node/dist/main.js diff --git a/src/rootfs/files/configs/usr/local/lib/services-downloader/package-lock.json b/src/rootfs/files/configs/usr/local/lib/services-downloader/package-lock.json deleted file mode 100644 index 0dc13562..00000000 --- a/src/rootfs/files/configs/usr/local/lib/services-downloader/package-lock.json +++ /dev/null @@ -1,131 +0,0 @@ -{ - "name": "services-downloader", - "version": "1.0.0", - "lockfileVersion": 3, - "requires": true, - "packages": { - "": { - "name": "services-downloader", - "version": "1.0.0", - "license": "ISC", - "dependencies": { - "@super-protocol/sp-files-addon": "^0.13.2", - "proper-lockfile": "^4.1.2" - }, - "bin": { - "sp-services-downloader": "src/index.js" - } - }, - "node_modules/@super-protocol/sp-files-addon": { - "version": "0.13.2", - "resolved": "https://registry.npmjs.org/@super-protocol/sp-files-addon/-/sp-files-addon-0.13.2.tgz", - "integrity": "sha512-IZhxPu7TLlrlPS5bzVIvYnDDoGqtkm3fQjm/xuovXG3PM6ErGPRzRn0xpqCbkvXhRM04mTXoUmdT86TyhCh3qA==", - "license": "MIT", - "engines": { - "node": ">= 16" - }, - "optionalDependencies": { - "@super-protocol/sp-files-addon-darwin-arm64": "0.13.2", - "@super-protocol/sp-files-addon-darwin-x64": "0.13.2", - "@super-protocol/sp-files-addon-linux-x64-gnu": "0.13.2", - "@super-protocol/sp-files-addon-win32-x64-msvc": "0.13.2" - } - }, - "node_modules/@super-protocol/sp-files-addon-darwin-arm64": { - "version": "0.13.2", - "resolved": "https://registry.npmjs.org/@super-protocol/sp-files-addon-darwin-arm64/-/sp-files-addon-darwin-arm64-0.13.2.tgz", - "integrity": "sha512-lIZrgqG8fIVAo9ZmTUZaWWsmRsstGBKdjel7IkRA9uQIPxa4vsbItjQEtySj4nIxlvOjbfE1YAAOeOIxrXfAmw==", - "cpu": [ - "arm64" - ], - "license": "MIT", - "optional": true, - "os": [ - "darwin" - ], - "engines": { - "node": ">= 10" - } - }, - "node_modules/@super-protocol/sp-files-addon-darwin-x64": { - "version": "0.13.2", - "resolved": "https://registry.npmjs.org/@super-protocol/sp-files-addon-darwin-x64/-/sp-files-addon-darwin-x64-0.13.2.tgz", - "integrity": "sha512-tMlsBcAZgfraen+J2cmgaqXrM6rRSxBwLjv9sc3bnnTd8yELT2zBDRLm6+lykQpiwHAS/oPZrMe52oDmPCuV+A==", - "cpu": [ - "x64" - ], - "license": "MIT", - "optional": true, - "os": [ - "darwin" - ], - "engines": { - "node": ">= 16" - } - }, - "node_modules/@super-protocol/sp-files-addon-linux-x64-gnu": { - "version": "0.13.2", - "resolved": "https://registry.npmjs.org/@super-protocol/sp-files-addon-linux-x64-gnu/-/sp-files-addon-linux-x64-gnu-0.13.2.tgz", - "integrity": "sha512-eJ22VQ9Irx3Oh7uCvQ/CYKDfYWi+buEju9XN4EXBUV6WTmenUSWpUd0WWoyPIBe+Gx3XtmpETQ2VQ8ZU+XeQcA==", - "cpu": [ - "x64" - ], - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">= 16" - } - }, - "node_modules/@super-protocol/sp-files-addon-win32-x64-msvc": { - "version": "0.13.2", - "resolved": "https://registry.npmjs.org/@super-protocol/sp-files-addon-win32-x64-msvc/-/sp-files-addon-win32-x64-msvc-0.13.2.tgz", - "integrity": "sha512-p13EjHkh+ipHIjyowLMj6+0LBB49C4DP7xNuXRWlmrPJjSuGhnMXqqq8DvRQWqz+h+EubxBK/Lyt0zEOmllewg==", - "cpu": [ - "x64" - ], - "license": "MIT", - "optional": true, - "os": [ - "win32" - ], - "engines": { - "node": ">= 16" - } - }, - "node_modules/graceful-fs": { - "version": "4.2.11", - "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz", - "integrity": "sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==", - "license": "ISC" - }, - "node_modules/proper-lockfile": { - "version": "4.1.2", - "resolved": "https://registry.npmjs.org/proper-lockfile/-/proper-lockfile-4.1.2.tgz", - "integrity": "sha512-TjNPblN4BwAWMXU8s9AEz4JmQxnD1NNL7bNOY/AKUzyamc379FWASUhc/K1pL2noVb+XmZKLL68cjzLsiOAMaA==", - "license": "MIT", - "dependencies": { - "graceful-fs": "^4.2.4", - "retry": "^0.12.0", - "signal-exit": "^3.0.2" - } - }, - "node_modules/retry": { - "version": "0.12.0", - "resolved": "https://registry.npmjs.org/retry/-/retry-0.12.0.tgz", - "integrity": "sha512-9LkiTwjUh6rT555DtE9rTX+BKByPfrMzEAtnlEtdEwr3Nkffwiihqe2bWADg+OQRjt9gl6ICdmB/ZFDCGAtSow==", - "license": "MIT", - "engines": { - "node": ">= 4" - } - }, - "node_modules/signal-exit": { - "version": "3.0.7", - "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-3.0.7.tgz", - "integrity": "sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ==", - "license": "ISC" - } - } -} diff --git a/src/rootfs/files/configs/usr/local/lib/services-downloader/package.json b/src/rootfs/files/configs/usr/local/lib/services-downloader/package.json deleted file mode 100644 index 2271cdb6..00000000 --- a/src/rootfs/files/configs/usr/local/lib/services-downloader/package.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "name": "services-downloader", - "version": "1.0.0", - "main": "index.js", - "bin": { - "sp-services-downloader": "src/index.js" - }, - "author": "Super Protocol", - "license": "ISC", - "description": "", - "dependencies": { - "@super-protocol/sp-files-addon": "^0.13.2", - "proper-lockfile": "^4.1.2" - } -} diff --git a/src/rootfs/files/configs/usr/local/lib/services-downloader/src/downloader.js b/src/rootfs/files/configs/usr/local/lib/services-downloader/src/downloader.js deleted file mode 100644 index bdfb56da..00000000 --- a/src/rootfs/files/configs/usr/local/lib/services-downloader/src/downloader.js +++ /dev/null @@ -1,64 +0,0 @@ -const path = require('path'); -const fs = require('fs/promises'); -const { download } = require('@super-protocol/sp-files-addon'); - -async function ensureDir(dir) { - await fs.mkdir(dir, { recursive: true }); -} - -async function resourceExists(filePath) { - try { - const stat = await fs.stat(filePath); - if (stat.isFile()) return true; - if (stat.isDirectory()) { - const entries = await fs.readdir(filePath); - return entries.length > 0; - } - return false; - } catch { - // Path does not exist or not accessible - return false; - } -} - -// Resource helpers and plain download only; orchestration happens in CLI - -/** - * Download resource using sp-files-addon. - * Performs plain download; concurrency control handled by caller. - * - * @param {Object} params - * @param {string} params.resourceName - * @param {string} params.branchName - * @param {Object} params.resource - Resource definition for sp-files-addon - * @param {Object} [params.encryption] - Optional encryption { key, iv } - * @param {string} params.targetDir - Local directory where the resource will be downloaded - * @param {number} [params.threads] - Parallelism for sp-files-addon - * @returns {Promise<{ hash: string, size: number, targetDir: string }>} download result - */ -async function downloadResource(params) { - const { resourceName, branchName, targetDir, resource, encryption } = params; - if (!resourceName || !branchName) throw new Error('resourceName and branchName are required'); - if (!targetDir) throw new Error('targetDir is required'); - if (!resource) { - throw new Error('Resource is missing in parameters'); - } - - await ensureDir(targetDir); - - const result = await download(resource, targetDir, { - encryption, - threads: params.threads, - retry: { maxRetries: 5, initialDelayMs: 1000 }, - progressCallback: ({ key, current, total }) => { - const t = typeof total === 'number' ? total : 0; - const c = typeof current === 'number' ? current : 0; - const pct = t > 0 ? Math.floor((c / t) * 100) : 0; - console.log(`${key} ${c}/${t} (${pct}%)`); - }, - }); - - return { hash: result.hash, size: result.size, targetDir }; -} - -module.exports = { downloadResource, resourceExists }; diff --git a/src/rootfs/files/configs/usr/local/lib/services-downloader/src/gatekeeper-client.js b/src/rootfs/files/configs/usr/local/lib/services-downloader/src/gatekeeper-client.js deleted file mode 100644 index 2b4d08c8..00000000 --- a/src/rootfs/files/configs/usr/local/lib/services-downloader/src/gatekeeper-client.js +++ /dev/null @@ -1,90 +0,0 @@ -const https = require('https'); -const { URL } = require('url'); - -const getResourceFromGatekeeper = async (params) => { - const { resourceName, branchName, sslKeyPem, sslCertPem } = params; - const urlString = getUrl(resourceName, branchName, params.environment || 'mainnet'); - - const agent = new https.Agent({ - key: sslKeyPem, - cert: sslCertPem, - rejectUnauthorized: true, - }); - - const buf = await new Promise((resolve, reject) => { - try { - const urlObj = new URL(urlString); - - const req = https.request( - { - protocol: urlObj.protocol, - hostname: urlObj.hostname, - port: urlObj.port, - path: urlObj.pathname + urlObj.search, - method: 'GET', - headers: { Accept: 'application/json' }, - agent, - timeout: params.timeout || 30000, - }, - (res) => { - const chunks = []; - res.on('data', (chunk) => chunks.push(chunk)); - res.on('end', () => { - const body = Buffer.concat(chunks); - const ok = res.statusCode >= 200 && res.statusCode < 300; - if (ok) { - resolve(body); - } else { - const error = new Error( - `Gatekeeper request failed: ${res.statusCode} ${ - res.statusMessage - } - ${body.toString('utf8')}`, - ); - error.statusCode = res.statusCode; - error.headers = res.headers; - error.body = body; - reject(error); - } - }); - }, - ); - - req.on('error', reject); - req.on('timeout', () => req.destroy(new Error('Request timed out'))); - req.end(); - } catch (e) { - reject(e); - } - }); - - return parseGatekeeperResourceResponse(buf); -}; - -function parseGatekeeperResourceResponse(buf) { - let responseData; - try { - responseData = JSON.parse(buf.toString('utf8')); - } catch (e) { - const sample = buf.slice(0, 256).toString('utf8'); - throw new Error(`Invalid Gatekeeper response JSON: ${e.message}. Sample: ${sample}`); - } - - // { - // resource: { type: 'STORJ', filepath: '...' }, - // encryption: { key: 'hex', iv: 'hex' } - // } - const data = responseData.data; - if (!data?.resource || !data?.encryption) { - throw new Error('Gatekeeper response is invalid - missing resource or encryption field'); - } - - return data; -} - -const getUrl = (resourceName, branchName, environment) => { - const subdomain = `secrets-gatekeeper${environment === 'mainnet' ? '' : `-${environment}`}`; - - return `https://${subdomain}.superprotocol.io:44443/resources/${resourceName}/${branchName}`; -}; - -module.exports = { getResourceFromGatekeeper }; diff --git a/src/rootfs/files/configs/usr/local/lib/services-downloader/src/index.js b/src/rootfs/files/configs/usr/local/lib/services-downloader/src/index.js deleted file mode 100755 index b40d26d3..00000000 --- a/src/rootfs/files/configs/usr/local/lib/services-downloader/src/index.js +++ /dev/null @@ -1,195 +0,0 @@ -#!/usr/bin/env node -const fs = require('fs/promises'); -const os = require('os'); -const path = require('path'); -const { downloadResource, resourceExists } = require('./downloader'); -const { getResourceFromGatekeeper } = require('./gatekeeper-client'); -const { unpackTarGz, unpackTarGzAbsolute } = require('./unarchiver'); -const { acquireResourceLock } = require('./lock'); - -function printHelp() { - const text = ` -Services Downloader CLI - -Usage: - sp-services-downloader --resource-name --branch-name - --ssl-cert-path --ssl-key-path - [--environment ] [--threads ] [--timeout ] - <--download-to | --unpack-to | --unpack-with-absolute-path> - -Required arguments: - --resource-name Logical resource name (used for locking) - --branch-name Branch name (used for locking) - --ssl-cert-path Path to client SSL certificate (PEM) - --ssl-key-path Path to client SSL private key (PEM) - -Optional arguments: - --environment Gatekeeper environment (default: mainnet) - --threads Parallel threads for download - --timeout Request timeout to Gatekeeper in ms (default: 30000) - --download-to Download resource into the specified directory (no unpack) - --unpack-to Download to temp and unpack archive contents to the specified directory - --unpack-with-absolute-path - Download to temp and unpack archive entries with absolute paths directly to '/' - (when set, no directory is required) - --help Show this help - -Examples: - sp-services-downloader --resource-name svc --branch-name main - --ssl-cert-path /secrets/client.crt --ssl-key-path /secrets/client.key - --download-to /tmp/svc - - sp-services-downloader --resource-name svc --branch-name main - --ssl-cert-path /secrets/client.crt --ssl-key-path /secrets/client.key - --unpack-to /etc/sp-swarm-services - - sp-services-downloader --resource-name svc --branch-name main - --ssl-cert-path /secrets/client.crt --ssl-key-path /secrets/client.key - --unpack-with-absolute-path`; - process.stdout.write(text); -} - -function parseArgs(argv) { - const args = {}; - for (let i = 0; i < argv.length; i++) { - const a = argv[i]; - if (!a.startsWith('--')) continue; - const key = a.slice(2); - const next = argv[i + 1]; - if (next && !next.startsWith('--')) { - args[key] = next; - i++; - } else { - args[key] = true; - } - } - return args; -} - -async function main() { - const args = parseArgs(process.argv.slice(2)); - if (args.help) { - printHelp(); - return; - } - try { - const resourceName = args['resource-name']; - const branchName = args['branch-name']; - const downloadTo = args['download-to']; - const unpackTo = args['unpack-to']; - const sslCertPath = args['ssl-cert-path']; - const sslKeyPath = args['ssl-key-path']; - const environment = args.environment || 'mainnet'; - const timeout = args.timeout ? Number(args.timeout) : 30000; - - const unpackWithAbs = !!args['unpack-with-absolute-path']; - if (!resourceName || !branchName || !sslCertPath || !sslKeyPath) { - throw new Error('Missing required arguments. See --help'); - } - // Mode selection: exactly one of the mode flags must be provided - const modeCount = [!!downloadTo, !!unpackTo, unpackWithAbs].filter(Boolean).length; - if (modeCount !== 1) { - throw new Error( - 'Specify exactly one mode: --download-to | --unpack-to | --unpack-with-absolute-path', - ); - } - if (downloadTo && typeof downloadTo !== 'string') { - throw new Error('Invalid --download-to value'); - } - if (unpackTo && typeof unpackTo !== 'string') { - throw new Error('Invalid --unpack-to value'); - } - - const [sslCertPem, sslKeyPem] = await Promise.all([ - fs.readFile(sslCertPath, 'utf8'), - fs.readFile(sslKeyPath, 'utf8'), - ]); - - console.info(`[INFO] fetching resource ${resourceName}@${branchName} env=${environment}`); - const { resource, encryption } = await getResourceFromGatekeeper({ - resourceName, - branchName, - sslKeyPem, - sslCertPem, - environment, - timeout, - }); - - const threads = args.threads ? Number(args.threads) : undefined; - - // Acquire per-resource lock - const release = await acquireResourceLock(resourceName, branchName); - console.info(`[INFO] lock acquired for ${resourceName}/${branchName}`); - - try { - // Skip if destination already populated (download-to or unpack-to) - if (downloadTo && (await resourceExists(downloadTo))) { - console.info(`[INFO] skip: target already populated -> ${downloadTo}`); - process.stdout.write( - JSON.stringify({ ok: true, hash: 'unknown', size: 0, targetDir: downloadTo }) + '\n', - ); - return; - } - if (unpackTo && (await resourceExists(unpackTo))) { - console.info(`[INFO] skip: target already populated -> ${unpackTo}`); - process.stdout.write( - JSON.stringify({ ok: true, hash: 'unknown', size: 0, targetDir: unpackTo }) + '\n', - ); - return; - } - - let downloadDir = downloadTo || unpackTo || '/'; - let tempDir; - try { - if (unpackTo || unpackWithAbs) { - const tempPrefix = path.join(os.tmpdir(), 'sp-services-downloader-'); - tempDir = await fs.mkdtemp(tempPrefix); - console.info(`[INFO] unpack enabled: downloading archive to temp -> ${tempDir}`); - downloadDir = tempDir; - } - - const result = await downloadResource({ - resourceName, - branchName, - targetDir: downloadDir, - resource, - encryption, - threads, - }); - - if (unpackWithAbs) { - console.info(`[INFO] unpack-with-absolute-path: extracting archive entries to /`); - await unpackTarGzAbsolute(downloadDir); - } else if (unpackTo) { - console.info(`[INFO] unpacking from temp to target -> ${unpackTo}`); - await unpackTarGz(downloadDir, unpackTo); - } - - const outTarget = unpackWithAbs ? '/' : downloadTo || unpackTo; - process.stdout.write( - JSON.stringify({ ok: true, hash: result.hash, size: result.size, targetDir: outTarget }) + - '\n', - ); - } finally { - if (tempDir) { - try { - await fs.rm(tempDir, { recursive: true, force: true }); - console.info(`[INFO] cleaned temp directory -> ${tempDir}`); - } catch (cleanupErr) { - console.warn(`[WARN] failed to clean temp directory ${tempDir}: ${cleanupErr.message}`); - } - } - } - } finally { - await release(); - console.info(`[INFO] lock released for ${resourceName}/${branchName}`); - } - } catch (e) { - process.stderr.write(`[ERROR] ${e.message}\n`); - process.exitCode = 1; - } -} - -if (require.main === module) { - main(); -} diff --git a/src/rootfs/files/configs/usr/local/lib/services-downloader/src/lock.js b/src/rootfs/files/configs/usr/local/lib/services-downloader/src/lock.js deleted file mode 100644 index d9e1e3ba..00000000 --- a/src/rootfs/files/configs/usr/local/lib/services-downloader/src/lock.js +++ /dev/null @@ -1,25 +0,0 @@ -const path = require('path'); -const os = require('os'); -const fs = require('fs/promises'); -const lockfile = require('proper-lockfile'); - -function buildLockName(resourceName, branchName) { - const safe = (s) => encodeURIComponent(String(s || '')); - return `${safe(resourceName)}__${safe(branchName)}`; -} - -async function acquireResourceLock(resourceName, branchName, options = {}) { - const baseDir = options.baseDir || path.join(os.tmpdir(), 'sp-services-downloader-locks'); - await fs.mkdir(baseDir, { recursive: true }); - const lockTarget = path.join(baseDir, buildLockName(resourceName, branchName)); - - const release = await lockfile.lock(lockTarget, { - stale: options.staleMs || 60_000, - retries: options.retries || { retries: 120, factor: 1, minTimeout: 500 }, - realpath: false, - }); - - return release; -} - -module.exports = { acquireResourceLock, buildLockName }; diff --git a/src/rootfs/files/configs/usr/local/lib/services-downloader/src/unarchiver.js b/src/rootfs/files/configs/usr/local/lib/services-downloader/src/unarchiver.js deleted file mode 100644 index 71b1c2b7..00000000 --- a/src/rootfs/files/configs/usr/local/lib/services-downloader/src/unarchiver.js +++ /dev/null @@ -1,97 +0,0 @@ -const path = require('path'); -const fs = require('fs/promises'); -const { execFile } = require('child_process'); - -async function hasFiles(dir) { - try { - const stat = await fs.stat(dir); - if (!stat.isDirectory()) return false; - const entries = await fs.readdir(dir); - return entries.length > 0; - } catch { - return false; - } -} - -async function ensureDir(dir) { - await fs.mkdir(dir, { recursive: true }); -} - -async function findTarLike(targetDir) { - const entries = await fs.readdir(targetDir, { withFileTypes: true }); - for (const e of entries) { - const full = path.join(targetDir, e.name); - if (e.isFile()) { - const name = e.name.toLowerCase(); - if (name.endsWith('.tar.gz') || name.endsWith('.tgz') || name.endsWith('.tar')) { - return full; - } - } else if (e.isDirectory()) { - try { - const nested = await findTarLike(full); - if (nested) return nested; - } catch {} - } - } - return null; -} - -function execTarExtract(tarFile, destDir) { - return new Promise((resolve, reject) => { - const lower = tarFile.toLowerCase(); - const args = - lower.endsWith('.tar.gz') || lower.endsWith('.tgz') - ? ['-xzf', tarFile, '-C', destDir] - : ['-xf', tarFile, '-C', destDir]; - execFile('tar', args, (err) => { - if (err) return reject(err); - resolve(); - }); - }); -} - -function execTarExtractAbsolute(tarFile) { - return new Promise((resolve, reject) => { - const lower = tarFile.toLowerCase(); - const args = - lower.endsWith('.tar.gz') || lower.endsWith('.tgz') - ? ['-xzf', tarFile, '-C', '/', '-p', '-P', '-k'] - : ['-xf', tarFile, '-C', '/', '-p', '-P', '-k']; - execFile('tar', args, (err) => { - if (err) return reject(err); - resolve(); - }); - }); -} - -async function unpackTarGz(targetDir, unpackTarTo) { - await ensureDir(unpackTarTo); - const destHasFiles = await hasFiles(unpackTarTo); - if (destHasFiles) { - console.info(`[INFO] unpack skip: destination not empty: ${unpackTarTo}`); - return false; - } - - const tarFile = await findTarLike(targetDir); - if (!tarFile) { - console.info(`[INFO] unpack skip: no archive found under ${targetDir}`); - return false; - } - - await execTarExtract(tarFile, unpackTarTo); - console.info(`[INFO] unpacked ${path.basename(tarFile)} to ${unpackTarTo}`); - return true; -} - -async function unpackTarGzAbsolute(targetDir) { - const tarFile = await findTarLike(targetDir); - if (!tarFile) { - console.info(`[INFO] unpack-with-absolute-path skip: no archive found under ${targetDir}`); - return false; - } - await execTarExtractAbsolute(tarFile); - console.info(`[INFO] unpack-with-absolute-path: unpacked ${path.basename(tarFile)} to /`); - return true; -} - -module.exports = { unpackTarGz, unpackTarGzAbsolute }; diff --git a/src/rootfs/files/scripts/build_swarm_cloud.sh b/src/rootfs/files/scripts/build_swarm_cloud.sh deleted file mode 100644 index 6d1e6982..00000000 --- a/src/rootfs/files/scripts/build_swarm_cloud.sh +++ /dev/null @@ -1,87 +0,0 @@ -#!/bin/bash - -# bash unofficial strict mode; -set -euo pipefail; - -# public, required -# OUTPUTDIR - -# private -BUILDROOT="/buildroot"; - -# init logging -source "${BUILDROOT}/files/scripts/log.sh"; - -# chroot functions -source "${BUILDROOT}/files/scripts/chroot.sh"; - -function build_swarm_cloud() { - log_info "enabling corepack inside rootfs"; - chroot "${OUTPUTDIR}" /bin/bash -lc 'corepack enable'; - - log_info "installing Node.js dependencies with pnpm"; - chroot "${OUTPUTDIR}" /bin/bash -lc 'export NX_DAEMON=false NX_ADD_PLUGINS=false NX_NO_CLOUD=true; cd /opt/swarm-cloud && pnpm install --frozen-lockfile'; - - # swarm-cloud-api - log_info "building swarm-cloud-api"; - chroot "${OUTPUTDIR}" /bin/bash -lc 'cd /opt/swarm-cloud && pnpm nx build swarm-cloud-api --output-style=stream'; - - log_info "publishing built swarm-cloud-api artifacts to /usr/local/lib/swarm-cloud"; - chroot "${OUTPUTDIR}" /bin/bash -lc 'set -e; mkdir -p /usr/local/lib/swarm-cloud/apps/swarm-cloud-api'; - chroot "${OUTPUTDIR}" /bin/bash -lc 'cp -r /opt/swarm-cloud/apps/swarm-cloud-api/{dist,node_modules} /usr/local/lib/swarm-cloud/apps/swarm-cloud-api/'; - - # swarm-node - log_info "building swarm-node"; - chroot "${OUTPUTDIR}" /bin/bash -lc 'cd /opt/swarm-cloud && pnpm nx build swarm-node --output-style=stream'; - - log_info "publishing built swarm-node artifacts to /usr/local/lib/swarm-cloud"; - chroot "${OUTPUTDIR}" /bin/bash -lc 'set -e; mkdir -p /usr/local/lib/swarm-cloud/apps/swarm-node'; - chroot "${OUTPUTDIR}" /bin/bash -lc 'cp -r /opt/swarm-cloud/apps/swarm-node/{dist,node_modules} /usr/local/lib/swarm-cloud/apps/swarm-node/'; - - # swarm-cloud-ui - log_info "building swarm-cloud-ui"; - chroot "${OUTPUTDIR}" /bin/bash -lc 'cd /opt/swarm-cloud && pnpm nx build swarm-cloud-ui --output-style=stream'; - - log_info "publishing built swarm-cloud-ui (Next standalone) to /usr/local/lib/swarm-cloud/apps/swarm-cloud-ui"; - chroot "${OUTPUTDIR}" /bin/bash -lc 'set -e; rm -rf /usr/local/lib/swarm-cloud/apps/swarm-cloud-ui; mkdir -p /usr/local/lib/swarm-cloud/apps/swarm-cloud-ui'; - chroot "${OUTPUTDIR}" /bin/bash -lc ' -set -euo pipefail; -UI_SRC=/opt/swarm-cloud/apps/swarm-cloud-ui; -UI_DST=/usr/local/lib/swarm-cloud/apps/swarm-cloud-ui; - -# public assets (optional) -cp -a "${UI_SRC}/public" "${UI_DST}/public" 2>/dev/null || true; - -# copy standalone server bundle -cp -a "${UI_SRC}/.next/standalone/." "${UI_DST}/"; - -# next standalone expects static assets under apps/swarm-cloud-ui/.next/static -mkdir -p "${UI_DST}/apps/swarm-cloud-ui/.next"; -cp -a "${UI_SRC}/.next/static" "${UI_DST}/apps/swarm-cloud-ui/.next/static"; -'; - -# # In the deployed UI lib, TypeScript sources live under libs/ui/src, but some imports -# # reference sibling TS modules with a .js extension (e.g. "../lib/utils.js", "./button.js"). -# # Next + TS expect extension-less imports for TS modules. Adjust imports only in the -# # deployed copy (do not touch the original sources under src/repos). -# chroot "${OUTPUTDIR}" /bin/bash -lc "\ -# find /usr/local/lib/swarm-cloud/dist/libs/ui/src -type f \\( -name '*.ts' -o -name '*.tsx' \\) -print0 \ -# | xargs -0 sed -i 's/\\.js\\([\"'\"'\"']\\)/\\1/g'" - -# log_info "copying workspace-level Node.js dependencies and configs to /usr/local/lib/swarm-cloud"; -# chroot "${OUTPUTDIR}" /bin/bash -lc 'mkdir -p /usr/local/lib/swarm-cloud/node_modules'; -# # copy the *contents* of node_modules so that the .pnpm layout and symlink targets remain valid - - # common - chroot "${OUTPUTDIR}" /bin/bash -lc 'cp /opt/swarm-cloud/package.json /usr/local/lib/swarm-cloud/package.json'; - chroot "${OUTPUTDIR}" /bin/bash -lc 'cp /opt/swarm-cloud/pnpm-lock.yaml /usr/local/lib/swarm-cloud/pnpm-lock.yaml'; - chroot "${OUTPUTDIR}" /bin/bash -lc 'cp /opt/swarm-cloud/pnpm-workspace.yaml /usr/local/lib/swarm-cloud/pnpm-workspace.yaml'; - chroot "${OUTPUTDIR}" /bin/bash -lc 'cp -a /opt/swarm-cloud/node_modules/. /usr/local/lib/swarm-cloud/node_modules/'; - - log_info "removing sources from /opt/swarm-cloud"; - chroot "${OUTPUTDIR}" /bin/bash -lc 'rm -rf /opt/swarm-cloud || true'; -} - -chroot_init; -build_swarm_cloud; -chroot_deinit; diff --git a/src/rootfs/files/scripts/download_rke2.sh b/src/rootfs/files/scripts/download_rke2.sh deleted file mode 100755 index 83353f05..00000000 --- a/src/rootfs/files/scripts/download_rke2.sh +++ /dev/null @@ -1,44 +0,0 @@ -#!/bin/bash - -# bash unofficial strict mode; -set -euo pipefail; - -# public, required -# OUTPUTDIR - -# private -BUILDROOT="/buildroot"; -RKE2_VERSION="v1.30.3+rke2r1"; -SHA_CHECKSUMS_TXT="445ead9865914fa2e6d6a59affd00babc462480efebf438d207961f740ab83a2"; -SHA_INSTALL_SH="2d24db2184dd6b1a5e281fa45cc9a8234c889394721746f89b5fe953fdaaf40a"; - -# init loggggging; -source "$BUILDROOT/files/scripts/log.sh"; - -function download_rke2() { - log_info "downloading rke2 install scripts" - mkdir -p "$OUTPUTDIR/root/rke2"; - wget \ - "https://github.com/rancher/rke2/releases/download/${RKE2_VERSION}/rke2-images.linux-amd64.tar.zst" \ - -O "$OUTPUTDIR/root/rke2/rke2-images.linux-amd64.tar.zst"; - wget \ - "https://github.com/rancher/rke2/releases/download/${RKE2_VERSION}/rke2.linux-amd64.tar.gz" \ - -O "$OUTPUTDIR/root/rke2/rke2.linux-amd64.tar.gz"; - wget \ - "https://github.com/rancher/rke2/releases/download/${RKE2_VERSION}/sha256sum-amd64.txt" \ - -O "$OUTPUTDIR/root/rke2/sha256sum-amd64.txt"; - wget \ - "https://get.rke2.io" \ - -O "$OUTPUTDIR/root/rke2/rke2-install.sh"; -} - -function validate_checksum() { - log_info "validating checksums"; - pushd "$OUTPUTDIR/root/rke2"; - echo "$SHA_CHECKSUMS_TXT sha256sum-amd64.txt" | sha256sum --check - echo "$SHA_INSTALL_SH rke2-install.sh" | sha256sum --check - popd; -} - -download_rke2; -validate_checksum; diff --git a/src/rootfs/files/scripts/install_cockroachdb.sh b/src/rootfs/files/scripts/install_cockroachdb.sh deleted file mode 100644 index e20da789..00000000 --- a/src/rootfs/files/scripts/install_cockroachdb.sh +++ /dev/null @@ -1,45 +0,0 @@ -#!/bin/bash - -# bash unofficial strict mode -set -euo pipefail - -# private -BUILDROOT="/buildroot" - -# init logging -source "$BUILDROOT/files/scripts/log.sh" - -# chroot functions -source "$BUILDROOT/files/scripts/chroot.sh" - -function install_cockroachdb() { - log_info "installing CockroachDB binary inside VM rootfs" - - # Ensure required tools are present - chroot "$OUTPUTDIR" /bin/bash -lc 'set -e; apt-get update' - chroot "$OUTPUTDIR" /bin/bash -lc 'set -e; apt-get install -y --no-install-recommends wget ca-certificates tar' - - # Download and install latest CockroachDB binary - chroot "$OUTPUTDIR" /bin/bash -lc ' - set -e; - arch=$(uname -m); - case "$arch" in - x86_64) cr_arch=amd64 ;; - aarch64|arm64) cr_arch=arm64 ;; - *) cr_arch=amd64 ;; - esac; - cd /tmp; - wget -q https://binaries.cockroachdb.com/cockroach-latest.linux-${cr_arch}.tgz -O cockroach.tgz; - tar -xzf cockroach.tgz; - dir=$(tar -tzf cockroach.tgz | head -1 | cut -d/ -f1); - cp "$dir/cockroach" /usr/local/bin/cockroach; - chmod 0755 /usr/local/bin/cockroach; - rm -rf "$dir" cockroach.tgz; - ' - - chroot "$OUTPUTDIR" /bin/bash -lc 'apt-get clean' -} - -chroot_init -install_cockroachdb -chroot_deinit diff --git a/src/rootfs/files/scripts/install_extra_packages.sh b/src/rootfs/files/scripts/install_extra_packages.sh new file mode 100644 index 00000000..3a7dde0a --- /dev/null +++ b/src/rootfs/files/scripts/install_extra_packages.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +# bash unofficial strict mode +set -euo pipefail + +# private +BUILDROOT="/buildroot" + +# init logging +source "$BUILDROOT/files/scripts/log.sh" + +# chroot functions +source "$BUILDROOT/files/scripts/chroot.sh" + +function install_extra_packages() { + log_info "installing extra system packages for cloud-init compatibility" + + chroot "$OUTPUTDIR" /bin/bash -lc "apt-get update" + + # podman: container runtime used by cloud-init-style swarm services + # (cloud-init runs swarm-node as a Podman container; also needed by provision plugins) + # unzip: used to extract service archives (download-services.sh) + # NOTE: mysql-client, netcat-openbsd, dnsutils are already installed by setup_runtime_tools.sh + chroot "$OUTPUTDIR" /bin/bash -lc "apt-get install -y --no-install-recommends \ + podman \ + unzip" + + chroot "$OUTPUTDIR" /bin/bash -lc "apt-get clean" + log_info "extra packages installed successfully" +} + +chroot_init +install_extra_packages +chroot_deinit diff --git a/src/rootfs/files/scripts/install_knot.sh b/src/rootfs/files/scripts/install_knot.sh deleted file mode 100644 index 6d34ce6a..00000000 --- a/src/rootfs/files/scripts/install_knot.sh +++ /dev/null @@ -1,35 +0,0 @@ -#!/bin/bash - -# bash unofficial strict mode -set -euo pipefail - -# private -BUILDROOT="/buildroot" - -# init logging -source "$BUILDROOT/files/scripts/log.sh" - -# chroot functions -source "$BUILDROOT/files/scripts/chroot.sh" - -function install_knot() { - log_info "installing Knot DNS into VM rootfs" - - # Base tools and add-apt-repository support - chroot "$OUTPUTDIR" /bin/bash -lc 'set -e; apt-get update' - chroot "$OUTPUTDIR" /bin/bash -lc 'set -e; apt-get install -y --no-install-recommends software-properties-common ca-certificates' - - # Add upstream PPA and install knot - chroot "$OUTPUTDIR" /bin/bash -lc ' - set -e; - add-apt-repository -y ppa:cz.nic-labs/knot-dns; - apt-get update; - apt-get install -y knot; - ' - - chroot "$OUTPUTDIR" /bin/bash -lc 'apt-get clean' -} - -chroot_init -install_knot -chroot_deinit diff --git a/src/rootfs/files/scripts/install_nats.sh b/src/rootfs/files/scripts/install_nats.sh deleted file mode 100644 index ced2cf2c..00000000 --- a/src/rootfs/files/scripts/install_nats.sh +++ /dev/null @@ -1,65 +0,0 @@ -#!/bin/bash - -# bash unofficial strict mode -set -euo pipefail - -# private -BUILDROOT="/buildroot" - -# init logging -source "$BUILDROOT/files/scripts/log.sh" - -# chroot functions -source "$BUILDROOT/files/scripts/chroot.sh" - -function install_nats() { - local NATS_VERSION="2.12.2" - local NATS_PKG="nats-server-v${NATS_VERSION}-linux-amd64" - local NATS_URL="https://github.com/nats-io/nats-server/releases/download/v${NATS_VERSION}/${NATS_PKG}.tar.gz" - - log_info "installing NATS (nats-server v${NATS_VERSION}) inside VM rootfs" - - # prerequisites - chroot "$OUTPUTDIR" /bin/bash -lc 'set -e; apt update' - chroot "$OUTPUTDIR" /bin/bash -lc 'set -e; apt install -y --no-install-recommends curl ca-certificates tar' - - # download and install binary - chroot "$OUTPUTDIR" /bin/bash -lc "set -e; cd /tmp && curl -fsSL '${NATS_URL}' -o ${NATS_PKG}.tar.gz" - chroot "$OUTPUTDIR" /bin/bash -lc "set -e; cd /tmp && tar -xzf ${NATS_PKG}.tar.gz" - chroot "$OUTPUTDIR" /bin/bash -lc "set -e; install -m 0755 /tmp/${NATS_PKG}/nats-server /usr/local/bin/nats-server" - - # create user/group if absent - chroot "$OUTPUTDIR" /bin/bash -lc "getent group nats >/dev/null 2>&1 || groupadd --system nats" - chroot "$OUTPUTDIR" /bin/bash -lc "id -u nats >/dev/null 2>&1 || useradd --system --no-create-home --gid nats nats" - - # directories - chroot "$OUTPUTDIR" /bin/bash -lc 'set -e; mkdir -p /etc/nats /var/lib/nats /var/log/nats' - chroot "$OUTPUTDIR" /bin/bash -lc 'set -e; chown -R nats:nats /etc/nats /var/lib/nats /var/log/nats || true' - - # systemd unit - cat > "${OUTPUTDIR}/usr/lib/systemd/system/nats-server.service" <<'UNIT' -[Unit] -Description=NATS Server -After=network-online.target wg-quick.target -Wants=network-online.target wg-quick.target - -[Service] -User=nats -Group=nats -ExecStart=/usr/local/bin/nats-server -c /etc/nats/nats-server.conf -Restart=always -RestartSec=2 -LimitNOFILE=100000 - -[Install] -WantedBy=multi-user.target -UNIT - - # cleanup - chroot "$OUTPUTDIR" /bin/bash -lc "rm -rf /tmp/${NATS_PKG} /tmp/${NATS_PKG}.tar.gz" - chroot "$OUTPUTDIR" /bin/bash -lc 'apt clean' -} - -chroot_init -install_nats -chroot_deinit diff --git a/src/rootfs/files/scripts/install_nodejs.sh b/src/rootfs/files/scripts/install_nodejs.sh deleted file mode 100755 index 2d66755d..00000000 --- a/src/rootfs/files/scripts/install_nodejs.sh +++ /dev/null @@ -1,37 +0,0 @@ -#!/bin/bash - -# bash unofficial strict mode; -set -euo pipefail; - -# public, required -# OUTPUTDIR - -# private -BUILDROOT="/buildroot"; - -# init loggggging; -source "${BUILDROOT}/files/scripts/log.sh"; - -# chroot functions -source "${BUILDROOT}/files/scripts/chroot.sh"; - -function install_nodejs() { - log_info "adding NodeSource repository"; - chroot "${OUTPUTDIR}" /bin/bash -c 'curl -sL https://deb.nodesource.com/setup_22.x | bash -'; - - log_info "installing Node.js"; - chroot "${OUTPUTDIR}" /bin/bash -c 'DEBIAN_FRONTEND=noninteractive apt install -y nodejs'; - - # Verify installation - local NODE_VERSION=$(chroot "${OUTPUTDIR}" /bin/bash -c 'node --version' 2>/dev/null || true); - if [ -z "${NODE_VERSION}" ]; then - log_fail "Node.js installation failed"; - return 1; - fi - - log_info "Node.js ${NODE_VERSION} installed successfully"; -} - -chroot_init; -install_nodejs; -chroot_deinit; diff --git a/src/rootfs/files/scripts/install_openresty.sh b/src/rootfs/files/scripts/install_openresty.sh deleted file mode 100644 index 3c228279..00000000 --- a/src/rootfs/files/scripts/install_openresty.sh +++ /dev/null @@ -1,42 +0,0 @@ -#!/bin/bash - -# bash unofficial strict mode -set -euo pipefail - -# private -BUILDROOT="/buildroot" - -# init logging -source "$BUILDROOT/files/scripts/log.sh" - -# chroot functions -source "$BUILDROOT/files/scripts/chroot.sh" - -function install_openresty() { - log_info "installing OpenResty and Lua tooling inside VM rootfs" - - # base prerequisites for adding GPG key and repo (per official docs) - chroot "$OUTPUTDIR" /bin/bash -lc 'set -e; apt-get update' - chroot "$OUTPUTDIR" /bin/bash -lc 'set -e; apt-get install -y --no-install-recommends wget gnupg ca-certificates lsb-release' - - # import OpenResty GPG key and create keyring (Ubuntu 22+/24+ style) - chroot "$OUTPUTDIR" /bin/bash -lc 'set -e; wget -O - https://openresty.org/package/pubkey.gpg | gpg --dearmor -o /usr/share/keyrings/openresty.gpg' - - # add OpenResty APT repository with signed-by (per https://openresty.org/en/linux-packages.html) - chroot "$OUTPUTDIR" /bin/bash -lc 'set -e; codename=$(lsb_release -sc); arch=$(dpkg --print-architecture); echo "deb [arch=${arch} signed-by=/usr/share/keyrings/openresty.gpg] http://openresty.org/package/ubuntu ${codename} main" > /etc/apt/sources.list.d/openresty.list' - - chroot "$OUTPUTDIR" /bin/bash -lc 'set -e; apt-get update' - - # install OpenResty itself (no recommends to keep image small); luarocks separately - chroot "$OUTPUTDIR" /bin/bash -lc 'set -e; apt-get install -y --no-install-recommends openresty' - chroot "$OUTPUTDIR" /bin/bash -lc 'set -e; apt-get install -y luarocks' - - # install required Lua modules (best effort – warnings only on failure) - chroot "$OUTPUTDIR" /bin/bash -lc 'set -e; for m in lua-resty-auto-ssl lua-resty-redis lua-resty-http; do echo "[*] installing $m via luarocks"; if ! luarocks install "$m"; then echo "[!] warning: failed to install $m" >&2; fi; done' - - chroot "$OUTPUTDIR" /bin/bash -lc 'set -e; apt-get clean' -} - -chroot_init -install_openresty -chroot_deinit diff --git a/src/rootfs/files/scripts/install_python_deps.sh b/src/rootfs/files/scripts/install_python_deps.sh new file mode 100644 index 00000000..c3c85ad3 --- /dev/null +++ b/src/rootfs/files/scripts/install_python_deps.sh @@ -0,0 +1,29 @@ +#!/bin/bash + +# bash unofficial strict mode +set -euo pipefail + +# private +BUILDROOT="/buildroot" + +# init logging +source "$BUILDROOT/files/scripts/log.sh" + +# chroot functions +source "$BUILDROOT/files/scripts/chroot.sh" + +function install_python_deps() { + log_info "installing Python dependencies for provision plugins" + # redis-py is already installed by setup_runtime_tools.sh, but ensure correct version range + # (setup_runtime_tools.sh installs latest; provision plugins require >=5.0.0,<6.0.0) + chroot "$OUTPUTDIR" /bin/bash -lc "pip3 install --break-system-packages 'redis>=5.0.0,<6.0.0'" + # podman-compose: required for provision plugins that orchestrate Podman containers + chroot "$OUTPUTDIR" /bin/bash -lc "pip3 install --break-system-packages podman-compose" + # pyyaml: required by swarm-init.sh to parse /etc/swarm/config.yaml at runtime + chroot "$OUTPUTDIR" /bin/bash -lc "pip3 install --break-system-packages pyyaml" + log_info "Python dependencies installed successfully" +} + +chroot_init +install_python_deps +chroot_deinit diff --git a/src/rootfs/files/scripts/install_rke2.sh b/src/rootfs/files/scripts/install_rke2.sh deleted file mode 100755 index 04b2a9c0..00000000 --- a/src/rootfs/files/scripts/install_rke2.sh +++ /dev/null @@ -1,50 +0,0 @@ -#!/bin/bash - -# bash unofficial strict mode; -set -euo pipefail; - -# public, required -# OUTPUTDIR -# RKE2_INSTALL_SHA256 - -# private -BUILDROOT="/buildroot"; - -# init logging; -source "$BUILDROOT/files/scripts/log.sh"; - -# chroot functions -source "$BUILDROOT/files/scripts/chroot.sh"; - -function install_rke2() { - log_info "staging rke2 installer into rootfs" - mkdir -p "$OUTPUTDIR/root/rke2"; - wget -q -O "$OUTPUTDIR/root/rke2/rke2-install.sh" "https://get.rke2.io"; - - # TODO: update RKE2_INSTALL_SHA256 to match the current https://get.rke2.io script and re-enable verification - # log_info "verifying rke2 installer sha256" - # echo "${RKE2_INSTALL_SHA256} $OUTPUTDIR/root/rke2/rke2-install.sh" | sha256sum -c -; - - log_info "installing rke2" - chroot "$OUTPUTDIR" /bin/bash -c 'bash /root/rke2/rke2-install.sh'; - rm -rf "$OUTPUTDIR/root/rke2"; -} - -function disable_rke2_service() { - log_info "disabling rke2 services" - chroot "$OUTPUTDIR" /bin/bash -c 'systemctl disable rke2-server.service || true'; - chroot "$OUTPUTDIR" /bin/bash -c 'systemctl disable rke2-agent.service || true'; -} - -function add_aliases() { - log_info "adding kubectl aliases" - echo "export KUBECONFIG=/etc/rancher/rke2/rke2.yaml" >> "$OUTPUTDIR/etc/profile"; - echo "alias k='/usr/local/bin/kubectl'" >> "$OUTPUTDIR/etc/profile"; - echo "alias kubectl='/usr/local/bin/kubectl'" >> "$OUTPUTDIR/etc/profile"; -} - -chroot_init; -install_rke2; -disable_rke2_service; -chroot_deinit; -add_aliases; diff --git a/src/rootfs/files/scripts/install_services_downloader.sh b/src/rootfs/files/scripts/install_services_downloader.sh deleted file mode 100644 index 7a0edc5c..00000000 --- a/src/rootfs/files/scripts/install_services_downloader.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/bash - -# bash unofficial strict mode; -set -euo pipefail; - -# public, required -# OUTPUTDIR - -# private -BUILDROOT="/buildroot"; - -# init logging; -source "${BUILDROOT}/files/scripts/log.sh"; - -# chroot functions -source "${BUILDROOT}/files/scripts/chroot.sh"; - -function install_services_downloader() { - log_info "installing services-downloader dependencies (npm ci)"; - chroot "${OUTPUTDIR}" /bin/bash -c 'cd /usr/local/lib/services-downloader && npm ci'; - - # quick smoke test prints help via node directly - chroot "${OUTPUTDIR}" /bin/bash -c 'node /usr/local/lib/services-downloader/src/index.js --help >/dev/null || true'; -} - -chroot_init; -install_services_downloader; -chroot_deinit; diff --git a/src/rootfs/files/scripts/template_configs_post_rke2install.sh b/src/rootfs/files/scripts/template_configs_post_rke2install.sh deleted file mode 100755 index 801de545..00000000 --- a/src/rootfs/files/scripts/template_configs_post_rke2install.sh +++ /dev/null @@ -1,47 +0,0 @@ -#!/bin/bash - -# bash unofficial strict mode; -set -euo pipefail; - -# public, required -# OUTPUTDIR -# LOCAL_REGISTRY_HOST - -# private -BUILDROOT="/buildroot"; - -# init loggggging; -source "$BUILDROOT/files/scripts/log.sh"; - -function template_rke2_configs_postinstall() { - log_info "templating rke2 configs after install"; - mkdir -p "$OUTPUTDIR/etc/super/var/lib/rancher/rke2/agent/etc/containerd"; - envsubst \ - '$LOCAL_REGISTRY_HOST' \ - < "$BUILDROOT/files/configs/etc/super/var/lib/rancher/rke2/agent/etc/containerd/config.toml.tmpl.tmpl" \ - > "$OUTPUTDIR/etc/super/var/lib/rancher/rke2/agent/etc/containerd/config.toml.tmpl"; -} - -function append_to_files() { - log_info "appending to configs after rke2 install"; - cat \ - "$BUILDROOT/files/configs/usr/local/lib/systemd/system/rke2-server.env.append" \ - >> "$OUTPUTDIR/usr/local/lib/systemd/system/rke2-server.env"; - cat \ - "$BUILDROOT/files/configs/etc/multipath.conf.append" \ - >> "$OUTPUTDIR/etc/multipath.conf"; - cat \ - "$BUILDROOT/files/configs/etc/sysctl.conf.append" \ - >> "$OUTPUTDIR/etc/sysctl.conf"; -} - -function finalize_rke2() { - log_info "finalizing rke2 install"; - mkdir -p "$OUTPUTDIR/etc/kubernetes"; - mkdir -p "$OUTPUTDIR/etc/super/etc/iscsi"; - cp -a "$OUTPUTDIR/etc/iscsi/." "$OUTPUTDIR/etc/super/etc/iscsi/"; -} - -template_rke2_configs_postinstall; -append_to_files; -finalize_rke2; diff --git a/src/rootfs/files/scripts/template_rke2_configs_preinstall.sh b/src/rootfs/files/scripts/template_rke2_configs_preinstall.sh deleted file mode 100755 index 44472ba5..00000000 --- a/src/rootfs/files/scripts/template_rke2_configs_preinstall.sh +++ /dev/null @@ -1,41 +0,0 @@ -#!/bin/bash - -# bash unofficial strict mode; -set -euo pipefail; - -# public, required -# OUTPUTDIR -# LOCAL_REGISTRY_HOST -# SUPER_REGISTRY_HOST - -# private -BUILDROOT="/buildroot"; - -# init loggggging; -source "$BUILDROOT/files/scripts/log.sh"; - -function check_args() { - if [[ -z "${LOCAL_REGISTRY_HOST:-""}" ]]; then - log_fail "LOCAL_REGISTRY_HOST is required"; - fi - if [[ -z "${SUPER_REGISTRY_HOST:-""}" ]]; then - log_fail "SUPER_REGISTRY_HOST is required"; - fi -} - -function template_rke2_configs_preinstall() { - log_info "templating rke2 configs before install"; - mkdir -p "$OUTPUTDIR/etc/rancher/rke2"; - NODENAME="$(cat "$OUTPUTDIR/etc/hostname")" \ - envsubst \ - '$LOCAL_REGISTRY_HOST,$NODENAME' \ - < "$BUILDROOT/files/configs/etc/rancher/rke2/config.yaml.tmpl" \ - > "$OUTPUTDIR/etc/rancher/rke2/config.yaml"; - envsubst \ - '$SUPER_REGISTRY_HOST,$LOCAL_REGISTRY_HOST' \ - < "$BUILDROOT/files/configs/etc/rancher/rke2/registries.yaml.tmpl" \ - > "$OUTPUTDIR/etc/rancher/rke2/registries.yaml"; -} - -check_args; -template_rke2_configs_preinstall; diff --git a/src/services/apps/mongodb/main.py b/src/services/apps/mongodb/main.py deleted file mode 100755 index 3e655a04..00000000 --- a/src/services/apps/mongodb/main.py +++ /dev/null @@ -1,681 +0,0 @@ -#!/usr/bin/env python3 - -import sys -import os -import shutil -import subprocess -import json -import time -from pathlib import Path -from typing import Optional, Tuple, List, Dict - -from provision_plugin_sdk import ProvisionPlugin, PluginInput, PluginOutput - -# Configuration -MONGO_PORT = int(os.environ.get("MONGO_PORT", "27017")) -MONGO_CONFIG_FILE = Path("/etc/mongod.conf") -MONGO_DATA_DIR = Path("/var/lib/mongodb") -MONGO_LOG_DIR = Path("/var/log/mongodb") -REPLICA_SET_NAME = os.environ.get("MONGO_RS", "rs0") -PRIMARY_CONNECT_POSTPONE_LIMIT = int(os.environ.get("MONGO_PRIMARY_POSTPONE_LIMIT", "5")) -PRIMARY_CONNECT_FAIL_KEY = "mongodb_primary_connect_failures" - -plugin = ProvisionPlugin() - -def log_event(scope: str, message: str, details: Optional[Dict] = None) -> None: - ts = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) - if details is None: - print(f"{ts} [mongodb][{scope}] {message}", flush=True) - return - try: - payload = json.dumps(details, ensure_ascii=False, sort_keys=True, default=str) - except Exception: - payload = str(details) - print(f"{ts} [mongodb][{scope}] {message} | {payload}", flush=True) - -# Helpers -def get_node_tunnel_ip(node_id: str, wg_props: List[dict]) -> Optional[str]: - for prop in wg_props: - if prop.get("node_id") == node_id and prop.get("name") == "tunnel_ip": - return prop.get("value") - return None - -def check_all_nodes_have_wg(cluster_nodes: List[dict], wg_props: List[dict]) -> bool: - for node in cluster_nodes: - if not get_node_tunnel_ip(node.get("node_id"), wg_props): - return False - return True - -def is_rs_initialized(mongo_props: List[dict]) -> bool: - for prop in mongo_props: - if prop.get("name") == "mongodb_rs_initialized" and prop.get("value") == "true": - return True - return False - -def get_mongo_service_name() -> str: - # Prefer "mongod", fallback to "mongodb" - try: - res = subprocess.run(["systemctl", "status", "mongod"], capture_output=True, text=True) - if res.returncode in (0, 3): # active or inactive - return "mongod" - except Exception: - pass - try: - res = subprocess.run(["systemctl", "status", "mongodb"], capture_output=True, text=True) - if res.returncode in (0, 3): - return "mongodb" - except Exception: - pass - return "mongod" - -def is_mongo_available() -> bool: - return shutil.which("mongod") is not None - -def install_mongodb(): - # Try installing via apt (best effort, Ubuntu expected) - if not os.path.exists("/etc/os-release"): - raise Exception("Cannot detect OS: /etc/os-release not found") - with open("/etc/os-release", "r") as f: - os_release = f.read().lower() - if "ubuntu" not in os_release: - raise Exception("Unsupported OS for MongoDB installation") - - # Update and try packages commonly available - res = subprocess.run(["apt-get", "update"], capture_output=True, text=True) - if res.returncode != 0: - raise Exception(f"apt-get update failed: {res.stderr}") - - # Prefer 'mongodb' first (may exist in Ubuntu repos), fallback to 'mongodb-org' (requires repo) - for pkg in (["mongodb"], ["mongodb-org"]): - res = subprocess.run(["apt-get", "install", "-y", *pkg], capture_output=True, text=True) - if res.returncode == 0: - return - raise Exception("Failed to install MongoDB via apt (mongodb, mongodb-org)") - -def write_mongod_config(bind_ip: str): - MONGO_DATA_DIR.mkdir(parents=True, exist_ok=True) - MONGO_LOG_DIR.mkdir(parents=True, exist_ok=True) - # Minimal YAML config - cfg = f"""# managed by provision plugin -storage: - dbPath: {str(MONGO_DATA_DIR)} -systemLog: - destination: file - logAppend: true - path: {str(MONGO_LOG_DIR)}/mongod.log -net: - bindIp: 127.0.0.1,{bind_ip} - port: {MONGO_PORT} -replication: - replSetName: {REPLICA_SET_NAME} -processManagement: - timeZoneInfo: /usr/share/zoneinfo -""" - MONGO_CONFIG_FILE.write_text(cfg) - -def ensure_runtime_dirs(): - try: - # Ensure data, log and runtime dirs exist and owned by mongodb - MONGO_DATA_DIR.mkdir(parents=True, exist_ok=True) - MONGO_LOG_DIR.mkdir(parents=True, exist_ok=True) - run_dir = Path("/run/mongodb") - run_dir.mkdir(parents=True, exist_ok=True) - try: - shutil.chown(str(MONGO_DATA_DIR), user="mongodb", group="mongodb") - shutil.chown(str(MONGO_LOG_DIR), user="mongodb", group="mongodb") - shutil.chown(str(run_dir), user="mongodb", group="mongodb") - except Exception: - # If user/group not present or chown fails, ignore; systemd tmpfiles may fix it - pass - except Exception: - pass - -def capture_mongo_diagnostics(svc: str) -> str: - parts: List[str] = [] - try: - res = subprocess.run(["systemctl", "status", svc, "--no-pager"], capture_output=True, text=True, timeout=10) - parts.append(f"systemctl status {svc}:\n{(res.stdout or '')}\n{(res.stderr or '')}") - except Exception as e: - parts.append(f"systemctl status {svc} error: {e}") - try: - res = subprocess.run(["journalctl", "-u", svc, "-n", "200", "--no-pager"], capture_output=True, text=True, timeout=10) - parts.append(f"journalctl -u {svc} -n 200:\n{(res.stdout or '')}\n{(res.stderr or '')}") - except Exception as e: - parts.append(f"journalctl fetch error: {e}") - try: - log_path = MONGO_LOG_DIR / "mongod.log" - if log_path.exists(): - with open(log_path, "r") as f: - lines = f.readlines()[-200:] - parts.append("tail -n 200 /var/log/mongodb/mongod.log:\n" + "".join(lines)) - else: - parts.append("mongod.log not found at /var/log/mongodb/mongod.log") - except Exception as e: - parts.append(f"read mongod.log error: {e}") - return "\n\n".join(parts) - -def mongo_shell_binary() -> Optional[str]: - for b in ("mongosh", "mongo"): - if shutil.which(b): - return b - return None - -def mongo_eval_json(host: str, js: str, timeout: int = 10) -> Tuple[bool, Optional[dict], Optional[str]]: - """ - Execute JS and try to parse JSON result. We wrap the expression to JSON.stringify(). - """ - bin_ = mongo_shell_binary() - if not bin_: - return False, None, "No mongo shell (mongosh or mongo) found" - cmd = [ - bin_, - f"mongodb://{host}:{MONGO_PORT}/admin", - "--quiet", - "--eval", - f"try {{ let r=({js}); r = (r===undefined)? {{ok:1}} : r; print(JSON.stringify(r)); }} catch(e) {{ print(JSON.stringify({{ok:0, error:''+e}})); }}" - ] - try: - res = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout) - out = (res.stdout or "").strip().splitlines() - line = out[-1] if out else "" - try: - obj = json.loads(line) if line else None - except Exception: - obj = None - ok = res.returncode == 0 and isinstance(obj, dict) - return ok, obj, res.stderr - except Exception as e: - return False, None, str(e) - -def wait_for_mongo_ready(host: str, timeout_sec: int = 60) -> bool: - start = time.time() - # If mongo shell not available, fallback to checking TCP port open - if not mongo_shell_binary(): - import socket - while time.time() - start < timeout_sec: - try: - with socket.create_connection((host, MONGO_PORT), timeout=2): - return True - except Exception: - time.sleep(2) - return False - while time.time() - start < timeout_sec: - ok, obj, _ = mongo_eval_json(host, "db.runCommand({ping:1})", timeout=5) - if ok and obj and obj.get("ok") == 1: - return True - time.sleep(2) - return False - -def is_mongo_running() -> Tuple[bool, Optional[str]]: - try: - svc = get_mongo_service_name() - res = subprocess.run(["systemctl", "is-active", svc], capture_output=True, text=True) - active = res.stdout.strip() == "active" - return active, None if active else f"Service status: {res.stdout.strip()}" - except Exception as e: - return False, f"Failed to check service status: {str(e)}" - -def rs_status(host: str) -> Tuple[Optional[dict], Optional[str]]: - ok, obj, err = mongo_eval_json(host, "rs.status()", timeout=10) - if ok and obj: - return obj, None - return None, err - -def rs_initiate(host: str, members_hosts: List[str]) -> bool: - members = [{"_id": i, "host": h} for i, h in enumerate(members_hosts)] - js = f'rs.initiate({{ _id: "{REPLICA_SET_NAME}", members: {json.dumps(members)} }})' - ok, obj, _ = mongo_eval_json(host, js, timeout=20) - return bool(ok and obj and obj.get("ok") == 1) - -def rs_add_missing(host: str, desired_hosts: List[str]) -> None: - ok, current, _ = mongo_eval_json(host, "rs.conf()", timeout=10) - if not ok or not isinstance(current, dict): - return - cfg = current - existing_hosts = set() - for m in (cfg.get("members") or []): - h = m.get("host") - if h: - existing_hosts.add(h) - for h in desired_hosts: - if h not in existing_hosts: - mongo_eval_json(host, f'rs.add("{h}")', timeout=15) - -def split_host_port(host_or_hostport: str) -> Tuple[str, int]: - if not host_or_hostport: - return "", MONGO_PORT - if ":" not in host_or_hostport: - return host_or_hostport, MONGO_PORT - host, port_raw = host_or_hostport.rsplit(":", 1) - try: - return host, int(port_raw) - except Exception: - return host, MONGO_PORT - -def get_primary_from_local(local_host: str) -> Tuple[Optional[str], Optional[str]]: - ok, obj, err = mongo_eval_json(local_host, "db.hello ? db.hello() : db.isMaster()", timeout=10) - if not ok or not isinstance(obj, dict): - return None, err or "db.hello() failed" - - if obj.get("isWritablePrimary") is True or obj.get("ismaster") is True: - return f"{local_host}:{MONGO_PORT}", None - - primary = obj.get("primary") - if isinstance(primary, str) and primary: - return primary, None - - return None, "No PRIMARY in hello()" - -def rs_sync_members(primary_host: str, desired_hosts: List[str]) -> Tuple[bool, Optional[str]]: - primary_ip, primary_port = split_host_port(primary_host) - if not primary_ip: - return False, f"Invalid PRIMARY host: {primary_host}" - if primary_port != MONGO_PORT: - return False, f"PRIMARY port mismatch: got {primary_port}, expected {MONGO_PORT}" - - ok, current, err = mongo_eval_json(primary_ip, "rs.conf()", timeout=15) - if not ok or not isinstance(current, dict): - return False, err or "Cannot read rs.conf() from PRIMARY" - - existing_hosts = set() - for m in (current.get("members") or []): - h = m.get("host") - if h: - existing_hosts.add(h) - - desired_set = set(desired_hosts) - to_remove = sorted(existing_hosts - desired_set) - to_add = sorted(desired_set - existing_hosts) - - for h in to_remove: - ok, obj, err = mongo_eval_json(primary_ip, f'rs.remove("{h}")', timeout=20) - if not ok or not isinstance(obj, dict) or obj.get("ok") != 1: - return False, err or f"Failed to remove member {h}: {obj}" - - for h in to_add: - ok, obj, err = mongo_eval_json(primary_ip, f'rs.add("{h}")', timeout=20) - if not ok or not isinstance(obj, dict) or obj.get("ok") != 1: - return False, err or f"Failed to add member {h}: {obj}" - - return True, None - -def force_local_primary(local_ip: str) -> Tuple[bool, Optional[str]]: - local_host = f"{local_ip}:{MONGO_PORT}" - - ok, cfg, err = mongo_eval_json(local_ip, "rs.conf()", timeout=15) - if not ok or not isinstance(cfg, dict): - return False, err or "Cannot read rs.conf()" - - # IMPORTANT: do not copy member config from rs.conf() as-is. - # In mongosh, numeric BSON fields (e.g. secondaryDelaySecs) may be represented - # as nested objects and break rs.reconfig() validation. - # Build a minimal valid member definition instead. - # Preserve existing member _id for local host to avoid host/_id mismatch errors - # when the same host already exists in the old configuration. - local_member_id = 0 - for m in (cfg.get("members") or []): - if m.get("host") == local_host: - try: - local_member_id = int(m.get("_id")) - except Exception: - local_member_id = 0 - break - - local_member = { - "_id": local_member_id, - "host": local_host, - "priority": 1, - "votes": 1, - } - - new_cfg = { - "_id": cfg.get("_id", REPLICA_SET_NAME), - "version": int(cfg.get("version", 1)) + 1, - "members": [local_member], - } - - js = f"rs.reconfig({json.dumps(new_cfg)}, {{force: true}})" - ok, obj, err = mongo_eval_json(local_ip, js, timeout=30) - if ok and isinstance(obj, dict) and obj.get("ok") == 1: - return True, None - return False, err or f"force reconfig failed: {obj}" - -# Commands -@plugin.command("init") -def handle_init(input_data: PluginInput) -> PluginOutput: - try: - if not is_mongo_available(): - install_mongodb() - MONGO_LOG_DIR.mkdir(parents=True, exist_ok=True) - return PluginOutput(status="completed", local_state=input_data.local_state) - except Exception as e: - return PluginOutput(status="error", error_message=str(e), local_state=input_data.local_state) - -@plugin.command("apply") -def handle_apply(input_data: PluginInput) -> PluginOutput: - local_node_id = input_data.local_node_id - state_json = input_data.state or {} - local_state = input_data.local_state or {} - - log_event( - "apply", - "apply started", - { - "local_node_id": local_node_id, - "state_type": type(state_json).__name__, - "local_state_keys": sorted(list(local_state.keys())) if isinstance(local_state, dict) else [], - }, - ) - - if not isinstance(state_json, dict): - log_event("apply", "invalid state format", {"received_type": type(state_json).__name__}) - return PluginOutput(status="error", error_message="Invalid state format", local_state=local_state) - - cluster_nodes = state_json.get("clusterNodes", []) - mongo_props = state_json.get("mongodbNodeProperties", []) - wg_props = state_json.get("wgNodeProperties", []) - - log_event( - "apply", - "parsed state", - { - "cluster_nodes_count": len(cluster_nodes) if isinstance(cluster_nodes, list) else None, - "mongo_props_count": len(mongo_props) if isinstance(mongo_props, list) else None, - "wg_props_count": len(wg_props) if isinstance(wg_props, list) else None, - }, - ) - - if not check_all_nodes_have_wg(cluster_nodes, wg_props): - missing_wg = [] - for node in cluster_nodes: - node_id = node.get("node_id") - if not get_node_tunnel_ip(node_id, wg_props): - missing_wg.append(node_id) - log_event("apply", "postponed: waiting for WireGuard", {"missing_wg_node_ids": missing_wg}) - return PluginOutput(status="postponed", error_message="Waiting for WireGuard to be configured on all nodes", local_state=local_state) - - # Determine leader - cluster = state_json.get("cluster", {}) - leader_node_id = cluster.get("leader_node") - is_leader = leader_node_id == local_node_id - initialized = is_rs_initialized(mongo_props) - - log_event( - "apply", - "topology state", - { - "leader_node_id": leader_node_id, - "is_leader": is_leader, - "rs_initialized": initialized, - }, - ) - - local_tunnel_ip = get_node_tunnel_ip(local_node_id, wg_props) - if not local_tunnel_ip: - log_event("apply", "error: local tunnel ip not found", {"local_node_id": local_node_id}) - return PluginOutput(status="error", error_message="Local node has no WireGuard tunnel IP", local_state=local_state) - - # Write config bound to WG IP with replication enabled - try: - log_event("apply", "writing mongod config", {"bind_ip": local_tunnel_ip, "port": MONGO_PORT}) - write_mongod_config(local_tunnel_ip) - except Exception as e: - log_event("apply", "error: failed to write mongod config", {"error": str(e)}) - return PluginOutput(status="error", error_message=f"Failed to write mongod config: {e}", local_state=local_state) - - # Ensure service is running on correct IP - ensure_runtime_dirs() - needs_restart = False - running, _ = is_mongo_running() - log_event("apply", "mongo process state", {"running": running}) - if not running: - needs_restart = True - else: - # best-effort ping on WG IP - if not wait_for_mongo_ready(local_tunnel_ip, timeout_sec=5): - needs_restart = True - - log_event("apply", "restart decision", {"needs_restart": needs_restart, "local_tunnel_ip": local_tunnel_ip}) - - if needs_restart: - try: - svc = get_mongo_service_name() - log_event("apply", "restarting service", {"service": svc}) - subprocess.run(["systemctl", "daemon-reload"], capture_output=True, text=True) - subprocess.run(["systemctl", "enable", svc], capture_output=True, text=True) - res = subprocess.run(["systemctl", "restart", svc], capture_output=True, text=True, timeout=30) - log_event( - "apply", - "service restart result", - { - "service": svc, - "returncode": res.returncode, - "stderr_tail": (res.stderr or "")[-400:], - }, - ) - if res.returncode != 0: - diag = capture_mongo_diagnostics(svc) - log_event("apply", "error: service restart failed", {"service": svc}) - return PluginOutput(status="error", error_message=f"Failed to start {svc}: {res.stderr}\n\n{diag}", local_state=local_state) - except Exception as e: - svc = "mongod" - diag = capture_mongo_diagnostics(svc) - log_event("apply", "error: exception during service restart", {"error": str(e)}) - return PluginOutput(status="error", error_message=f"Failed to start mongod: {e}\n\n{diag}", local_state=local_state) - - if not wait_for_mongo_ready(local_tunnel_ip, timeout_sec=60): - node_props = {"mongodb_node_ready": "false"} - svc = get_mongo_service_name() - diag = capture_mongo_diagnostics(svc) - log_event("apply", "postponed: mongod not ready after restart", {"service": svc}) - return PluginOutput(status="postponed", error_message=f"mongod not ready yet\n\n{diag}", node_properties=node_props, local_state=local_state) - - # At this point local mongod is up - node_ready_props = {"mongodb_node_ready": "true"} - log_event("apply", "local mongod ready", {"node_ready": True}) - - # Leader initializes or updates the replica set - # Always configure a replica set even with a single node - if is_leader and not initialized: - # Build desired members from all cluster nodes (their WG IPs) - desired_hosts = [] - for n in cluster_nodes: - ip = get_node_tunnel_ip(n.get("node_id"), wg_props) - if ip: - desired_hosts.append(f"{ip}:{MONGO_PORT}") - - log_event("apply", "leader init path", {"desired_hosts": desired_hosts}) - - # If multiple nodes, wait until all have mongod ready before initiating - if len(cluster_nodes) > 1: - not_ready = [] - for n in cluster_nodes: - nid = n.get("node_id") - ready = False - for p in mongo_props: - if p.get("node_id") == nid and p.get("name") == "mongodb_node_ready" and p.get("value") == "true": - ready = True - break - if not ready: - not_ready.append(nid) - if not_ready: - log_event("apply", "postponed: waiting for peer readiness", {"not_ready": not_ready}) - return PluginOutput( - status="postponed", - error_message=f"Waiting for nodes to be ready: {', '.join(not_ready)}", - node_properties=node_ready_props, - local_state=local_state - ) - - # Initiate replica set (single or multi-node) - log_event("apply", "attempt rs.initiate", {"host": local_tunnel_ip, "members": desired_hosts}) - if rs_initiate(local_tunnel_ip, desired_hosts): - # Give it a moment to elect primary - time.sleep(3) - done_props = {"mongodb_rs_initialized": "true", **node_ready_props} - log_event("apply", "rs.initiate succeeded", {"mongodb_rs_initialized": True}) - return PluginOutput(status="completed", node_properties=done_props, local_state=local_state) - else: - log_event("apply", "postponed: rs.initiate failed") - return PluginOutput(status="postponed", error_message="Failed to initiate replica set", node_properties=node_ready_props, local_state=local_state) - - # If already initialized, leader may add missing members - if is_leader and initialized: - desired_hosts = [] - for n in cluster_nodes: - ip = get_node_tunnel_ip(n.get("node_id"), wg_props) - if ip: - desired_hosts.append(f"{ip}:{MONGO_PORT}") - - log_event("apply", "leader sync path", {"desired_hosts": desired_hosts}) - - primary_host, primary_err = get_primary_from_local(local_tunnel_ip) - can_use_primary = False - if primary_host: - primary_ip, primary_port = split_host_port(primary_host) - can_use_primary = wait_for_mongo_ready(primary_ip, timeout_sec=5) and primary_port == MONGO_PORT - - log_event( - "apply", - "primary detection", - { - "primary_host": primary_host, - "primary_err": primary_err, - "can_use_primary": can_use_primary, - }, - ) - - if can_use_primary and primary_host: - ok, sync_err = rs_sync_members(primary_host, desired_hosts) - if not ok: - log_event("apply", "postponed: rs sync via primary failed", {"primary_host": primary_host, "error": sync_err}) - return PluginOutput( - status="postponed", - error_message=f"Replica set sync failed via PRIMARY {primary_host}: {sync_err}", - node_properties=node_ready_props, - local_state=local_state, - ) - local_state[PRIMARY_CONNECT_FAIL_KEY] = 0 - log_event("apply", "rs sync via primary succeeded", {"primary_host": primary_host}) - else: - fails = int(local_state.get(PRIMARY_CONNECT_FAIL_KEY, 0)) + 1 - local_state[PRIMARY_CONNECT_FAIL_KEY] = fails - reason = primary_err or "PRIMARY is unreachable" - log_event( - "apply", - "primary unavailable", - { - "fails": fails, - "limit": PRIMARY_CONNECT_POSTPONE_LIMIT, - "reason": reason, - }, - ) - - if fails < PRIMARY_CONNECT_POSTPONE_LIMIT: - log_event("apply", "postponed: waiting for primary recovery", {"fails": fails}) - return PluginOutput( - status="postponed", - error_message=f"PRIMARY unavailable ({fails}/{PRIMARY_CONNECT_POSTPONE_LIMIT}): {reason}", - node_properties=node_ready_props, - local_state=local_state, - ) - - log_event("apply", "forcing local node to primary", {"local_tunnel_ip": local_tunnel_ip}) - forced, force_err = force_local_primary(local_tunnel_ip) - if not forced: - log_event("apply", "postponed: failed to force local primary", {"error": force_err}) - return PluginOutput( - status="postponed", - error_message=f"Failed to force local PRIMARY: {force_err}", - node_properties=node_ready_props, - local_state=local_state, - ) - - time.sleep(2) - ok, sync_err = rs_sync_members(f"{local_tunnel_ip}:{MONGO_PORT}", desired_hosts) - if not ok: - log_event("apply", "postponed: rs sync after force failed", {"error": sync_err}) - return PluginOutput( - status="postponed", - error_message=f"Replica set sync after force failed: {sync_err}", - node_properties=node_ready_props, - local_state=local_state, - ) - - local_state[PRIMARY_CONNECT_FAIL_KEY] = 0 - log_event("apply", "rs sync after force succeeded") - - # Non-leader or after init: ensure local node reports ready - log_event( - "apply", - "apply finished", - { - "result_status": "completed" if initialized else "postponed", - "initialized": initialized, - "leader_node_id": leader_node_id, - }, - ) - return PluginOutput(status="completed" if initialized else "postponed", - error_message=None if initialized else f"Waiting for leader node {leader_node_id} to initialize replica set", - node_properties=node_ready_props, - local_state=local_state) - -@plugin.command("health") -def handle_health(input_data: PluginInput) -> PluginOutput: - state_json = input_data.state or {} - local_state = input_data.local_state or {} - local_node_id = input_data.local_node_id - - running, err = is_mongo_running() - if not running: - if err and "Failed to" in err: - return PluginOutput(status="error", error_message=err, local_state=local_state) - return PluginOutput(status="postponed", error_message=err or "mongod not running", local_state=local_state) - - wg_props = state_json.get("wgNodeProperties", []) if isinstance(state_json, dict) else [] - ip = get_node_tunnel_ip(local_node_id, wg_props) - if not ip: - return PluginOutput(status="postponed", error_message="No tunnel IP available", local_state=local_state) - - if not wait_for_mongo_ready(ip, timeout_sec=5): - return PluginOutput(status="postponed", error_message="MongoDB ping failed", local_state=local_state) - - # Check rs.status() ok if initialized - st, _ = rs_status(ip) - if st and st.get("ok") == 1: - return PluginOutput(status="completed", local_state=local_state) - # If not initialized yet, still healthy if process is running - return PluginOutput(status="postponed", error_message="Replica set not healthy/initialized yet", local_state=local_state) - -@plugin.command("finalize") -def handle_finalize(input_data: PluginInput) -> PluginOutput: - # No-op for now; graceful removal could be implemented (step down, remove member, etc.) - return PluginOutput(status="completed", local_state=input_data.local_state or {}) - -@plugin.command("destroy") -def handle_destroy(input_data: PluginInput) -> PluginOutput: - try: - svc = get_mongo_service_name() - subprocess.run(["systemctl", "stop", svc], check=False, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) - subprocess.run(["systemctl", "disable", svc], check=False, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) - - if MONGO_CONFIG_FILE.exists(): - try: - MONGO_CONFIG_FILE.unlink() - except Exception: - pass - if MONGO_DATA_DIR.exists(): - shutil.rmtree(MONGO_DATA_DIR, ignore_errors=True) - if MONGO_LOG_DIR.exists(): - shutil.rmtree(MONGO_LOG_DIR, ignore_errors=True) - - node_properties = { - "mongodb_node_ready": None, - "mongodb_rs_initialized": None, - } - return PluginOutput(status="completed", node_properties=node_properties, local_state={}) - except Exception as e: - return PluginOutput(status="error", error_message=f"Failed to destroy MongoDB: {e}", local_state={}) - -if __name__ == "__main__": - plugin.run() diff --git a/src/services/apps/mongodb/manifest.yaml b/src/services/apps/mongodb/manifest.yaml deleted file mode 100644 index 5923c799..00000000 --- a/src/services/apps/mongodb/manifest.yaml +++ /dev/null @@ -1,73 +0,0 @@ -name: mongodb -version: 1.0.0 -commands: - - init - - apply - - health - - finalize - - destroy -healthcheckIntervalSecs: 60 -entrypoint: main.py -stateExpr: - engine: jq - query: | - ($swarmdb.clusters[] | select(.id == "{{ clusterId }}" and .deleted_ts == null)) as $cluster | - - ([$swarmdb.clusternodes[] | select(.cluster == "{{ clusterId }}" and .deleted_ts == null)]) as $mongoClusterNodes | - - ($mongoClusterNodes | map(.node)) as $mongoNodeIds | - - ( - $swarmdb.clusters[] | - select(.cluster_policy == "wireguard" and .deleted_ts == null) | - select( - ( - [$swarmdb.clusternodes[] | select(.deleted_ts == null and (.node | IN($mongoNodeIds[])))] | - length > 0 - ) - ) - ) as $wgCluster | - - { - cluster: { - id: $cluster.id, - cluster_policy: $cluster.cluster_policy, - leader_node: $cluster.leader_node - }, - - clusterNodes: [ - $mongoClusterNodes[] | - {id, node_id: .node, cluster} - ] | sort_by(.id, .node_id, .cluster), - - mongodbNodeProperties: [ - $swarmdb.clusternodeproperties[] | - select( - (.cluster_node | startswith("{{ clusterId }}:")) and - .deleted_ts == null and - (.name | startswith("mongodb_")) - ) | - {cluster_node, name, value, node_id: ( .cluster_node as $cn | $swarmdb.clusternodes[] | select(.id == $cn)) | .node} - ] | sort_by(.cluster_node, .name, .value, .node_id), - - wgCluster: { - id: $wgCluster.id - }, - - wgNodeProperties: [ - $swarmdb.clusternodeproperties[] | - select( - (.cluster_node | startswith($wgCluster.id)) and - .deleted_ts == null and - .name == "tunnel_ip" - ) | - {cluster_node, name, value, node_id: ( .cluster_node as $cn | $swarmdb.clusternodes[] | select(.id == $cn)) | .node} - ] | sort_by(.cluster_node, .name, .value, .node_id), - - nodeAddrs: [ - $swarmdb.nodes[] | - select(.node_id | IN($mongoNodeIds[])) | - {node_id: .node_id, addr: .addr, port: .port} - ] | sort_by(.node_id, .addr, .port) - } - diff --git a/src/services/apps/nats/main.py b/src/services/apps/nats/main.py deleted file mode 100644 index ef2bb628..00000000 --- a/src/services/apps/nats/main.py +++ /dev/null @@ -1,327 +0,0 @@ -#!/usr/bin/env python3 - -import sys -import os -import shutil -import subprocess -import socket -import time -from pathlib import Path -from typing import Optional - -from provision_plugin_sdk import ProvisionPlugin, PluginInput, PluginOutput -import pwd -import grp - -# Configuration -NATS_VERSION = os.environ.get("NATS_VERSION", "2") # informational -NATS_CLIENT_PORT = 4222 -NATS_CLUSTER_PORT = 6222 -NATS_MONITOR_PORT = 8222 -NATS_CONFIG_DIR = Path("/etc/nats") -NATS_CONFIG_FILE = NATS_CONFIG_DIR / "nats-server.conf" -NATS_DATA_DIR = Path("/var/lib/nats") -NATS_SERVICE_NAME = "nats-server" -NATS_BIN = "nats-server" -CLUSTER_NAME = os.environ.get("NATS_CLUSTER_NAME", "swarm-nats") - -# Plugin setup -plugin = ProvisionPlugin() - - -# Helpers -def _get_uid_gid(user: str, group: str) -> tuple[int, int]: - """Return uid,gid for user/group, fallback to 0 if not found.""" - try: - uid = pwd.getpwnam(user).pw_uid - except KeyError: - uid = 0 - try: - gid = grp.getgrnam(group).gr_gid - except KeyError: - gid = 0 - return uid, gid - - -def _ensure_dir_owned(path: Path, user: str, group: str, mode: int = 0o750) -> None: - """Ensure directory exists with given owner and mode.""" - path.mkdir(parents=True, exist_ok=True) - uid, gid = _get_uid_gid(user, group) - try: - os.chown(path.as_posix(), uid, gid) - except PermissionError: - # Not fatal; service may still have access if perms allow - pass - try: - os.chmod(path.as_posix(), mode) - except PermissionError: - pass - - -def get_node_tunnel_ip(node_id: str, wg_props: list) -> Optional[str]: - for prop in wg_props: - if prop.get("node_id") == node_id and prop.get("name") == "tunnel_ip": - return prop.get("value") - return None - - -def check_all_nodes_have_wg(cluster_nodes: list, wg_props: list) -> bool: - for node in cluster_nodes: - node_id = node.get("node_id") - if not get_node_tunnel_ip(node_id, wg_props): - return False - return True - - -def get_leader_node(state_json: dict) -> Optional[str]: - cluster = state_json.get("cluster", {}) - return cluster.get("leader_node") - - -def is_nats_available() -> bool: - return shutil.which(NATS_BIN) is not None - - -def install_nats(): - try: - if not os.path.exists("/etc/os-release"): - raise Exception("Cannot detect OS: /etc/os-release not found") - with open("/etc/os-release", "r") as f: - os_release = f.read() - if "ubuntu" in os_release.lower(): - r = subprocess.run(["apt-get", "update"], capture_output=True, text=True) - if r.returncode != 0: - raise Exception(f"apt-get update failed: {r.stderr}") - # Use distro package if available - r = subprocess.run(["apt-get", "install", "-y", "nats-server"], capture_output=True, text=True) - if r.returncode != 0: - raise Exception(f"nats-server installation failed: {r.stderr}") - return - raise Exception("Unsupported OS for NATS installation") - except Exception as e: - print(f"[!] Failed to install NATS: {e}", file=sys.stderr) - raise - - -def write_nats_config(local_node_id: str, local_tunnel_ip: str, cluster_nodes: list, wg_props: list): - NATS_CONFIG_DIR.mkdir(parents=True, exist_ok=True) - # Ensure data dir is owned by nats so JetStream can create subdirs - _ensure_dir_owned(NATS_DATA_DIR, user="nats", group="nats", mode=0o750) - - # Build routes for all peers except self - routes = [] - for node in cluster_nodes: - nid = node.get("node_id") - t_ip = get_node_tunnel_ip(nid, wg_props) - if not t_ip: - continue - if nid == local_node_id: - continue - routes.append(f"nats://{t_ip}:{NATS_CLUSTER_PORT}") - - cfg_lines = [ - f"port: {NATS_CLIENT_PORT}", - f"http: {NATS_MONITOR_PORT}", - f"host: {local_tunnel_ip}", - "", - "jetstream: {", - f" store_dir: \"{(NATS_DATA_DIR / 'jetstream').as_posix()}\"", - "}", - f"server_name: {local_node_id}", - ] - # Only enable clustering when there are peers to route to. - if routes: - cfg_lines += [ - "", - "cluster: {", - f" name: {CLUSTER_NAME},", - f" host: {local_tunnel_ip},", - f" port: {NATS_CLUSTER_PORT},", - " routes: [", - ] - for r in routes: - cfg_lines.append(f' "{r}",') - cfg_lines += [ - " ]", - "}", - ] - cfg_lines += [ - "", - "resolver: memory", - "no_auth_user: ''", - ] - - NATS_CONFIG_FILE.write_text("\n".join(cfg_lines) + "\n") - - -def wait_for_tcp(ip: str, port: int, timeout_sec: int = 60) -> bool: - start = time.time() - last_err = None - while time.time() - start < timeout_sec: - try: - with socket.create_connection((ip, port), timeout=3): - return True - except Exception as e: - last_err = str(e) - time.sleep(2) - print(f"[!] Port {ip}:{port} not reachable within {timeout_sec}s. Last error: {last_err}", file=sys.stderr) - return False - - -def is_service_active(service: str) -> tuple[bool, Optional[str]]: - try: - result = subprocess.run(["systemctl", "is-active", service], capture_output=True, text=True) - active = result.stdout.strip() == "active" - return active, None if active else f"Service status: {result.stdout.strip()}" - except Exception as e: - return False, f"Failed to check service status: {str(e)}" - - -def is_cluster_initialized(nats_props: list) -> bool: - for prop in nats_props: - if prop.get("name") == "nats_cluster_initialized" and prop.get("value") == "true": - return True - return False - - -def mark_node_ready() -> dict: - return {"nats_node_ready": "true"} - - -# Commands -@plugin.command("init") -def handle_init(input_data: PluginInput) -> PluginOutput: - try: - if not is_nats_available(): - install_nats() - # Ensure runtime dirs exist and owned by nats - _ensure_dir_owned(Path("/var/log/nats"), user="nats", group="nats", mode=0o750) - _ensure_dir_owned(NATS_DATA_DIR, user="nats", group="nats", mode=0o750) - return PluginOutput(status="completed", local_state=input_data.local_state) - except Exception as e: - return PluginOutput(status="error", error_message=str(e), local_state=input_data.local_state) - - -@plugin.command("apply") -def handle_apply(input_data: PluginInput) -> PluginOutput: - local_node_id = input_data.local_node_id - state_json = input_data.state or {} - local_state = input_data.local_state or {} - - if not isinstance(state_json, dict): - return PluginOutput(status="error", error_message="Invalid state format", local_state=local_state) - - cluster_nodes = state_json.get("clusterNodes", []) - wg_props = state_json.get("wgNodeProperties", []) - nats_props = state_json.get("natsNodeProperties", []) - - if not check_all_nodes_have_wg(cluster_nodes, wg_props): - return PluginOutput( - status="postponed", - error_message="Waiting for WireGuard to be configured on all nodes", - local_state=local_state, - ) - - leader_node_id = get_leader_node(state_json) - is_leader = (leader_node_id == local_node_id) - cluster_initialized = is_cluster_initialized(nats_props) - - local_tunnel_ip = get_node_tunnel_ip(local_node_id, wg_props) - if not local_tunnel_ip: - return PluginOutput(status="error", error_message="Local node has no WireGuard tunnel IP", local_state=local_state) - - # Write NATS config based on current cluster view - try: - write_nats_config(local_node_id, local_tunnel_ip, cluster_nodes, wg_props) - except Exception as e: - return PluginOutput(status="error", error_message=f"Failed to write NATS config: {e}", local_state=local_state) - - # Enable and (re)start service if needed - active, _ = is_service_active(NATS_SERVICE_NAME) - needs_restart = not active - - try: - subprocess.run(["systemctl", "enable", NATS_SERVICE_NAME], capture_output=True, text=True) - result = subprocess.run(["systemctl", "restart", NATS_SERVICE_NAME], capture_output=True, text=True) - if result.returncode != 0: - return PluginOutput(status="error", error_message=f"Failed to start NATS: {result.stderr}", local_state=local_state) - except Exception as e: - return PluginOutput(status="error", error_message=f"Failed to start NATS: {e}", local_state=local_state) - - # Wait for client port to be ready - if not wait_for_tcp(local_tunnel_ip, NATS_CLIENT_PORT, timeout_sec=60): - return PluginOutput( - status="postponed", - error_message="NATS did not become ready within timeout", - node_properties=mark_node_ready(), - local_state=local_state, - ) - - # Leader marks cluster initialized (NATS clustering forms via routes automatically) - if is_leader and not cluster_initialized: - node_properties = {"nats_cluster_initialized": "true", "nats_node_ready": "true"} - return PluginOutput(status="completed", node_properties=node_properties, local_state=local_state) - - if cluster_initialized: - # Already initialized — ensure this node is marked ready - return PluginOutput(status="completed", node_properties=mark_node_ready(), local_state=local_state) - - # Non-leader: mark ready and wait for leader - return PluginOutput( - status="postponed", - error_message=f"Waiting for leader node {leader_node_id} to mark cluster initialized", - node_properties=mark_node_ready(), - local_state=local_state, - ) - - -@plugin.command("health") -def handle_health(input_data: PluginInput) -> PluginOutput: - local_node_id = input_data.local_node_id - state_json = input_data.state or {} - local_state = input_data.local_state or {} - - active, err = is_service_active(NATS_SERVICE_NAME) - if not active: - if err and "Failed to" in err: - return PluginOutput(status="error", error_message=err, local_state=local_state) - return PluginOutput(status="postponed", error_message=err or "NATS service is not running", local_state=local_state) - - wg_props = state_json.get("wgNodeProperties", []) if isinstance(state_json, dict) else [] - local_tunnel_ip = get_node_tunnel_ip(local_node_id, wg_props) - if not local_tunnel_ip: - return PluginOutput(status="postponed", error_message="No tunnel IP available", local_state=local_state) - - # Check TCP connectivity to client port - if not wait_for_tcp(local_tunnel_ip, NATS_CLIENT_PORT, timeout_sec=5): - return PluginOutput(status="postponed", error_message="NATS not accepting connections yet", local_state=local_state) - - return PluginOutput(status="completed", local_state=local_state) - - -@plugin.command("finalize") -def handle_finalize(input_data: PluginInput) -> PluginOutput: - # No-op for now; could implement graceful cluster changes if needed - return PluginOutput(status="completed", local_state=input_data.local_state or {}) - - -@plugin.command("destroy") -def handle_destroy(input_data: PluginInput) -> PluginOutput: - try: - subprocess.run(["systemctl", "stop", NATS_SERVICE_NAME], check=False, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) - subprocess.run(["systemctl", "disable", NATS_SERVICE_NAME], check=False, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) - if NATS_CONFIG_DIR.exists(): - shutil.rmtree(NATS_CONFIG_DIR, ignore_errors=True) - if NATS_DATA_DIR.exists(): - shutil.rmtree(NATS_DATA_DIR, ignore_errors=True) - node_properties = { - "nats_node_ready": None, - "nats_cluster_initialized": None, - } - return PluginOutput(status="completed", node_properties=node_properties, local_state={}) - except Exception as e: - return PluginOutput(status="error", error_message=f"Failed to destroy NATS: {e}", local_state={}) - - -if __name__ == "__main__": - plugin.run() diff --git a/src/services/apps/nats/manifest.yaml b/src/services/apps/nats/manifest.yaml deleted file mode 100644 index cd12ea2d..00000000 --- a/src/services/apps/nats/manifest.yaml +++ /dev/null @@ -1,74 +0,0 @@ -name: nats -version: 1.0.0 -commands: - - init - - apply - - health - - finalize - - destroy -healthcheckIntervalSecs: 60 -entrypoint: main.py -stateExpr: - engine: jq - query: | - ($swarmdb.clusters[] | select(.id == "{{ clusterId }}" and .deleted_ts == null)) as $cluster | - - ([$swarmdb.clusternodes[] | select(.cluster == "{{ clusterId }}" and .deleted_ts == null)]) as $natsClusterNodes | - - ($natsClusterNodes | map(.node)) as $natsNodeIds | - - ( - $swarmdb.clusters[] | - select(.cluster_policy == "wireguard" and .deleted_ts == null) | - select( - ( - [$swarmdb.clusternodes[] | select(.deleted_ts == null and (.node | IN($natsNodeIds[])))] | - length > 0 - ) - ) - ) as $wgCluster | - - { - cluster: { - id: $cluster.id, - cluster_policy: $cluster.cluster_policy, - leader_node: $cluster.leader_node - }, - - clusterNodes: [ - $natsClusterNodes[] | - {id, node_id: .node, cluster} - ] | sort_by(.id, .node_id, .cluster), - - natsNodeProperties: [ - $swarmdb.clusternodeproperties[] | - select( - (.cluster_node | startswith("{{ clusterId }}:")) and - .deleted_ts == null and - (.name | startswith("nats_")) - ) | - {cluster_node, name, value, node_id: ( .cluster_node as $cn | $swarmdb.clusternodes[] | select(.id == $cn)) | .node} - ] | sort_by(.cluster_node, .name, .value, .node_id), - - wgCluster: { - id: $wgCluster.id - }, - - wgNodeProperties: [ - $swarmdb.clusternodeproperties[] | - select( - (.cluster_node | startswith($wgCluster.id)) and - .deleted_ts == null and - .name == "tunnel_ip" - ) | - {cluster_node, name, value, node_id: ( .cluster_node as $cn | $swarmdb.clusternodes[] | select(.id == $cn)) | .node} - ] | sort_by(.cluster_node, .name, .value, .node_id), - - nodeAddrs: [ - $swarmdb.nodes[] | - select(.node_id | IN($natsNodeIds[])) | - {node_id: .node_id, addr: .addr, port: .port} - ] | sort_by(.node_id, .addr, .port) - } - - diff --git a/src/services/apps/test-app-route/main.py b/src/services/apps/test-app-route/main.py deleted file mode 100644 index f7c2bddb..00000000 --- a/src/services/apps/test-app-route/main.py +++ /dev/null @@ -1,409 +0,0 @@ -#!/usr/bin/env python3 - -import json -import sys -import time -from typing import List, Tuple, Optional - -from provision_plugin_sdk import ProvisionPlugin, PluginInput, PluginOutput - - -ROUTE_DOMAIN = "test.test.oresty.superprotocol.io" -ROUTE_KEY = f"manual-routes:{ROUTE_DOMAIN}" -APP_PORT = 34567 - - -plugin = ProvisionPlugin() - - -def get_node_tunnel_ip(node_id: str, wg_props: list) -> Optional[str]: - """Get WireGuard tunnel IP for a node.""" - for prop in wg_props: - if prop.get("node_id") == node_id and prop.get("name") == "tunnel_ip": - return prop.get("value") - return None - - -def get_leader_node(state_json: dict) -> Optional[str]: - """Get leader node ID from cluster info.""" - cluster = state_json.get("cluster", {}) - return cluster.get("leader_node") - - -def is_local_node_leader(local_node_id: str, state_json: dict) -> bool: - """Check if the local node is the cluster leader.""" - return get_leader_node(state_json) == local_node_id - - -def get_sentinel_connection_info(state_json: dict) -> List[Tuple[str, int]]: - """Get Redis Sentinel connection endpoints.""" - sentinel_props = state_json.get("sentinelNodeProperties", []) - wg_props = state_json.get("sentinelWgNodeProperties", []) or state_json.get("wgNodeProperties", []) - - endpoints: List[Tuple[str, int]] = [] - for prop in sentinel_props: - if prop.get("name") != "redis_sentinel_node_ready" or prop.get("value") != "true": - continue - node_id = prop.get("node_id") - if not node_id: - continue - tunnel_ip = get_node_tunnel_ip(node_id, wg_props) - if tunnel_ip: - endpoints.append((tunnel_ip, 26379)) - - return sorted(set(endpoints)) - - -def get_redis_master_endpoint(state_json: dict) -> Tuple[Tuple[str, int] | None, str | None]: - """Resolve Redis master via Sentinel.""" - sentinel_endpoints = get_sentinel_connection_info(state_json) - if not sentinel_endpoints: - return None, "No Redis Sentinel endpoints available" - - try: - import redis - except ImportError: - return None, "redis-py library not installed" - - last_error: str | None = None - for host, port in sentinel_endpoints: - try: - r = redis.Redis( - host=host, - port=port, - decode_responses=True, - socket_connect_timeout=2, - ) - res = r.execute_command("SENTINEL", "get-master-addr-by-name", "redis-master") - if isinstance(res, (list, tuple)) and len(res) >= 2: - return (res[0], int(res[1])), None - except Exception as e: - last_error = f"Sentinel {host}:{port} error: {e}" - - return None, last_error or "Failed to resolve Redis master via Sentinel" - - -def ensure_route_in_redis(state_json: dict) -> Tuple[bool, str | None]: - """Create or update the OpenResty route in Redis Cluster. - - Targets are derived from Swarm state: - - For each node in the test-app cluster (clusterNodes), - we find its WireGuard tunnel IP via wgNodeProperties. - - Each such IP becomes a backend URL http://:APP_PORT. - """ - master_endpoint, err = get_redis_master_endpoint(state_json) - if not master_endpoint: - msg = err or "No Redis master endpoint available" - print(f"[!] {msg}", file=sys.stderr) - return False, msg - - cluster_nodes = state_json.get("clusterNodes", []) - wg_props = state_json.get("wgNodeProperties", []) - - # Collect tunnel IPs of all nodes that run test-app - tunnel_ips: List[str] = [] - for node in cluster_nodes: - node_id = node.get("node_id") - ip = get_node_tunnel_ip(node_id, wg_props) - if ip: - tunnel_ips.append(ip) - - if not tunnel_ips: - msg = "No WireGuard tunnel IPs available for test-app nodes" - print(f"[!] {msg}", file=sys.stderr) - return False, msg - - try: - import redis - except ImportError: - return False, "redis-py library not installed" - - # A few retries in case Sentinel/Redis is still converging or there are - # short-lived connectivity issues. - max_retries = 20 - retry_delay_sec = 5 - last_error: str | None = None - - for attempt in range(1, max_retries + 1): - try: - print( - f"[*] Attempt {attempt}/{max_retries} to set route {ROUTE_KEY} " - f"in Redis via master={master_endpoint}", - file=sys.stderr, - ) - host, port = master_endpoint - r = redis.Redis( - host=host, - port=port, - decode_responses=True, - socket_connect_timeout=5, - ) - r.ping() - - route_config = { - "targets": [ - {"url": f"http://{ip}:{APP_PORT}", "weight": 1} - for ip in tunnel_ips - ], - "policy": "rr", - "preserve_host": False, - } - - r.set(ROUTE_KEY, json.dumps(route_config)) - print( - f"[*] Successfully set route {ROUTE_KEY} -> {json.dumps(route_config)} " - f"in Redis on attempt {attempt}", - file=sys.stderr, - ) - return True, None - except Exception as e: - last_error = f"Failed to write route to Redis on attempt {attempt}: {e}" - print(f"[!] {last_error}", file=sys.stderr) - - if attempt < max_retries: - time.sleep(retry_delay_sec) - - # All attempts failed - return False, last_error or "Failed to write route to Redis after retries" - - -def delete_route_from_redis(state_json: dict) -> Tuple[bool, str | None]: - """Delete the OpenResty route from Redis Cluster.""" - master_endpoint, err = get_redis_master_endpoint(state_json) - if not master_endpoint: - return False, err or "No Redis master endpoint available" - - try: - import redis - except ImportError: - return False, "redis-py library not installed" - - try: - host, port = master_endpoint - r = redis.Redis( - host=host, - port=port, - decode_responses=True, - socket_connect_timeout=5, - ) - r.delete(ROUTE_KEY) - print(f"[*] Deleted route {ROUTE_KEY} from Redis", file=sys.stderr) - return True, None - except Exception as e: - error_msg = f"Failed to delete route from Redis: {str(e)}" - print(f"[!] {error_msg}", file=sys.stderr) - return False, error_msg - - -@plugin.command("init") -def handle_init(input_data: PluginInput) -> PluginOutput: - """Wait until Redis is reachable (for leader); non-leaders are no-op.""" - local_state = input_data.local_state or {} - state_json = input_data.state or {} - - if not isinstance(state_json, dict): - return PluginOutput( - status="error", - error_message="Invalid state format in init", - local_state=local_state, - ) - - local_node_id = input_data.local_node_id - - # Only leader needs to wait for Redis; other nodes can treat init as no-op. - if not is_local_node_leader(local_node_id, state_json): - return PluginOutput(status="completed", local_state=local_state) - - master_endpoint, err = get_redis_master_endpoint(state_json) - if not master_endpoint: - msg = err or "No Redis master endpoint available (init)" - print(f"[!] {msg}", file=sys.stderr) - return PluginOutput( - status="postponed", - error_message=msg, - local_state=local_state, - ) - - # Optionally verify that Redis cluster is reachable from at least one endpoint. - try: - import redis - except ImportError: - msg = "redis-py library not installed (init)" - print(f"[!] {msg}", file=sys.stderr) - return PluginOutput( - status="postponed", - error_message=msg, - local_state=local_state, - ) - - max_retries = 3 - retry_delay_sec = 5 - last_error: str | None = None - - for attempt in range(1, max_retries + 1): - try: - print( - f"[*] init: attempt {attempt}/{max_retries} to ping Redis master " - f"via {master_endpoint}", - file=sys.stderr, - ) - host, port = master_endpoint - r = redis.Redis( - host=host, - port=port, - decode_responses=True, - socket_connect_timeout=5, - ) - r.ping() - - print( - f"[*] init: Redis master is reachable on attempt {attempt}", - file=sys.stderr, - ) - return PluginOutput(status="completed", local_state=local_state) - except Exception as e: - last_error = f"init: failed to ping Redis master on attempt {attempt}: {e}" - print(f"[!] {last_error}", file=sys.stderr) - if attempt < max_retries: - time.sleep(retry_delay_sec) - - # If we reach here, Redis is still not reachable — postpone init. - return PluginOutput( - status="postponed", - error_message=last_error or "init: Redis master is not reachable", - local_state=local_state, - ) - - -@plugin.command("apply") -def handle_apply(input_data: PluginInput) -> PluginOutput: - """Create the Redis route for test-app on the leader node only.""" - local_node_id = input_data.local_node_id - state_json = input_data.state or {} - local_state = input_data.local_state or {} - - if not isinstance(state_json, dict): - return PluginOutput( - status="error", - error_message="Invalid state format", - local_state=local_state, - ) - - # Only leader node should write the route - if not is_local_node_leader(local_node_id, state_json): - return PluginOutput( - status="completed", - local_state=local_state, - ) - - success, error = ensure_route_in_redis(state_json) - if not success: - # Also log the error locally so it shows up in node logs even if - # the executor does not print error_message from PluginOutput. - if error: - print(f"[!] test-app-route apply: {error}", file=sys.stderr) - return PluginOutput( - status="postponed", - error_message=error or "Failed to configure route in Redis", - local_state=local_state, - ) - - node_properties = {"test_app_route_configured": "true"} - return PluginOutput( - status="completed", - node_properties=node_properties, - local_state=local_state, - ) - - -@plugin.command("health") -def handle_health(input_data: PluginInput) -> PluginOutput: - """Leader can optionally verify that the route exists; others are no-op.""" - local_node_id = input_data.local_node_id - state_json = input_data.state or {} - local_state = input_data.local_state or {} - - if not isinstance(state_json, dict): - return PluginOutput( - status="error", - error_message="Invalid state format", - local_state=local_state, - ) - - if not is_local_node_leader(local_node_id, state_json): - # Non-leader nodes do not manage this route - return PluginOutput(status="completed", local_state=local_state) - - try: - import redis - except ImportError: - # If library is missing, health is postponed rather than fatal - return PluginOutput( - status="postponed", - error_message="redis-py library not installed", - local_state=local_state, - ) - - master_endpoint, err = get_redis_master_endpoint(state_json) - if not master_endpoint: - return PluginOutput( - status="postponed", - error_message=err or "No Redis master endpoint available", - local_state=local_state, - ) - - try: - host, port = master_endpoint - r = redis.Redis( - host=host, - port=port, - decode_responses=True, - socket_connect_timeout=5, - ) - value = r.get(ROUTE_KEY) - if not value: - return PluginOutput( - status="postponed", - error_message=f"Route {ROUTE_KEY} not found in Redis", - local_state=local_state, - ) - except Exception as e: - return PluginOutput( - status="postponed", - error_message=f"Failed to verify route in Redis: {e}", - local_state=local_state, - ) - - return PluginOutput(status="completed", local_state=local_state) - - -@plugin.command("finalize") -def handle_finalize(input_data: PluginInput) -> PluginOutput: - """No special finalize logic required.""" - local_state = input_data.local_state or {} - return PluginOutput(status="completed", local_state=local_state) - - -@plugin.command("destroy") -def handle_destroy(input_data: PluginInput) -> PluginOutput: - """On leader node, remove the route from Redis.""" - local_node_id = input_data.local_node_id - state_json = input_data.state or {} - local_state = input_data.local_state or {} - - # Only leader attempts to clean up the route - if isinstance(state_json, dict) and is_local_node_leader(local_node_id, state_json): - delete_route_from_redis(state_json) - - node_properties = { - "test_app_route_configured": None, - } - return PluginOutput( - status="completed", - node_properties=node_properties, - local_state=local_state, - ) - - -if __name__ == "__main__": - plugin.run() diff --git a/src/services/apps/test-app-route/manifest.yaml b/src/services/apps/test-app-route/manifest.yaml deleted file mode 100644 index d1817f66..00000000 --- a/src/services/apps/test-app-route/manifest.yaml +++ /dev/null @@ -1,142 +0,0 @@ -name: test-app-route -version: 1.0.0 -commands: - - init - - apply - - health - - finalize - - destroy -healthcheckIntervalSecs: 60 -entrypoint: main.py -stateExpr: - engine: jq - query: | - ($swarmdb.clusters[] | select(.id == "{{ clusterId }}" and .deleted_ts == null)) as $cluster | - - ([$swarmdb.clusternodes[] | select(.cluster == "{{ clusterId }}" and .deleted_ts == null)]) as $appClusterNodes | - - ($appClusterNodes | map(.node)) as $appNodeIds | - - # Find Redis cluster with at least one ready node (not bound to app nodes) - ( - [ - $swarmdb.clusters[] | - select(.cluster_policy == "redis" and .deleted_ts == null) | - . as $c | - select( - ( - [$swarmdb.clusternodeproperties[] | - select( - (.cluster_node | startswith($c.id)) and - .deleted_ts == null and - .name == "redis_node_ready" and - .value == "true" - ) - ] | length > 0 - ) - ) - ] | .[0] - ) as $redisCluster | - - # Redis cluster nodes - ([$swarmdb.clusternodes[] | select(.cluster == $redisCluster.id and .deleted_ts == null)]) as $redisClusterNodes | - ($redisClusterNodes | map(.node)) as $redisNodeIds | - - # Find Redis Sentinel cluster - ( - $swarmdb.clusters[] | - select(.cluster_policy == "redis-sentinel" and .deleted_ts == null) - ) as $sentinelCluster | - - # Redis Sentinel cluster nodes - ([$swarmdb.clusternodes[] | select(.cluster == $sentinelCluster.id and .deleted_ts == null)]) as $sentinelClusterNodes | - ($sentinelClusterNodes | map(.node)) as $sentinelNodeIds | - - # Find WireGuard cluster that contains any Redis nodes - ( - $swarmdb.clusters[] | - select(.cluster_policy == "wireguard" and .deleted_ts == null) | - select( - ( - [$swarmdb.clusternodes[] | select(.deleted_ts == null and (.node | IN($redisNodeIds[])))] | - length > 0 - ) - ) - ) as $wgCluster | - - # Find WireGuard cluster that contains any Redis Sentinel nodes - ( - $swarmdb.clusters[] | - select(.cluster_policy == "wireguard" and .deleted_ts == null) | - select( - ( - [$swarmdb.clusternodes[] | select(.deleted_ts == null and (.node | IN($sentinelNodeIds[])))] | - length > 0 - ) - ) - ) as $sentinelWgCluster | - - { - cluster: { - id: $cluster.id, - cluster_policy: $cluster.cluster_policy, - leader_node: $cluster.leader_node - }, - - clusterNodes: [ - $appClusterNodes[] | - {id, node_id: .node, cluster} - ] | sort_by(.id, .node_id, .cluster), - - redisCluster: { - id: $redisCluster.id - }, - - redisNodeProperties: [ - $swarmdb.clusternodeproperties[] | - select( - (.cluster_node | startswith($redisCluster.id)) and - .deleted_ts == null and - (.name | startswith("redis_")) - ) | - {cluster_node, name, value, node_id: ( .cluster_node as $cn | $swarmdb.clusternodes[] | select(.id == $cn)) | .node} - ] | sort_by(.cluster_node, .name, .value, .node_id), - - sentinelCluster: { - id: $sentinelCluster.id - }, - - sentinelNodeProperties: [ - $swarmdb.clusternodeproperties[] | - select( - (.cluster_node | startswith($sentinelCluster.id)) and - .deleted_ts == null and - (.name | startswith("redis_sentinel_")) - ) | - {cluster_node, name, value, node_id: ( .cluster_node as $cn | $swarmdb.clusternodes[] | select(.id == $cn)) | .node} - ] | sort_by(.cluster_node, .name, .value, .node_id), - - wgCluster: { - id: $wgCluster.id - }, - - wgNodeProperties: [ - $swarmdb.clusternodeproperties[] | - select( - (.cluster_node | startswith($wgCluster.id)) and - .deleted_ts == null and - .name == "tunnel_ip" - ) | - {cluster_node, name, value, node_id: ( .cluster_node as $cn | $swarmdb.clusternodes[] | select(.id == $cn)) | .node} - ] | sort_by(.cluster_node, .name, .value, .node_id), - - sentinelWgNodeProperties: [ - $swarmdb.clusternodeproperties[] | - select( - (.cluster_node | startswith($sentinelWgCluster.id)) and - .deleted_ts == null and - .name == "tunnel_ip" - ) | - {cluster_node, name, value, node_id: ( .cluster_node as $cn | $swarmdb.clusternodes[] | select(.id == $cn)) | .node} - ] | sort_by(.cluster_node, .name, .value, .node_id) - } diff --git a/src/services/apps/test-app/main.py b/src/services/apps/test-app/main.py deleted file mode 100644 index 46f6433d..00000000 --- a/src/services/apps/test-app/main.py +++ /dev/null @@ -1,338 +0,0 @@ -#!/usr/bin/env python3 - -import os -import socket -import subprocess -import sys -import time -from http.server import BaseHTTPRequestHandler, HTTPServer -from pathlib import Path - -from provision_plugin_sdk import ProvisionPlugin, PluginInput, PluginOutput - - -APP_PORT = int(os.environ.get("TEST_APP_PORT", "34567")) -APP_SCRIPT_PATH = Path("/usr/local/bin/test-app-server.py") -SYSTEMD_SERVICE_NAME = "test-app" -SYSTEMD_SERVICE_PATH = Path(f"/etc/systemd/system/{SYSTEMD_SERVICE_NAME}.service") - - -plugin = ProvisionPlugin() - - -class HelloWorldHandler(BaseHTTPRequestHandler): - """Simple HTTP handler that responds with 'Hello World' to all methods.""" - - def _send_hello(self): - body = b"Hello World" - self.send_response(200) - self.send_header("Content-Type", "text/plain; charset=utf-8") - self.send_header("Content-Length", str(len(body))) - self.end_headers() - self.wfile.write(body) - - def log_message(self, format: str, *args): - # Log to stderr with a simple prefix to avoid polluting stdout used by the plugin - sys.stderr.write(f"[test-app] {self.address_string()} - {format % args}\n") - - def do_GET(self): - self._send_hello() - - def do_POST(self): - self._send_hello() - - def do_PUT(self): - self._send_hello() - - def do_DELETE(self): - self._send_hello() - - def do_PATCH(self): - self._send_hello() - - def do_HEAD(self): - # HEAD should not include body, but we still reuse status/headers - self.send_response(200) - self.send_header("Content-Type", "text/plain; charset=utf-8") - self.send_header("Content-Length", "0") - self.end_headers() - - def do_OPTIONS(self): - self.send_response(200) - self.send_header("Allow", "GET,POST,PUT,DELETE,PATCH,HEAD,OPTIONS") - self.end_headers() - - -def write_app_script(): - """Write the test-app HTTP server script to disk if it does not exist.""" - APP_SCRIPT_PATH.parent.mkdir(parents=True, exist_ok=True) - - script_content = f"""#!/usr/bin/env python3 -from http.server import BaseHTTPRequestHandler, HTTPServer -import sys - - -class HelloWorldHandler(BaseHTTPRequestHandler): - def _send_hello(self): - body = b"Hello World" - self.send_response(200) - self.send_header("Content-Type", "text/plain; charset=utf-8") - self.send_header("Content-Length", str(len(body))) - self.end_headers() - self.wfile.write(body) - - def log_message(self, format, *args): - sys.stderr.write(f"[test-app] {{self.address_string()}} - {{format % args}}\\n") - - def do_GET(self): - self._send_hello() - - def do_POST(self): - self._send_hello() - - def do_PUT(self): - self._send_hello() - - def do_DELETE(self): - self._send_hello() - - def do_PATCH(self): - self._send_hello() - - def do_HEAD(self): - self.send_response(200) - self.send_header("Content-Type", "text/plain; charset=utf-8") - self.send_header("Content-Length", "0") - self.end_headers() - - def do_OPTIONS(self): - self.send_response(200) - self.send_header("Allow", "GET,POST,PUT,DELETE,PATCH,HEAD,OPTIONS") - self.end_headers() - - -def run(): - server = HTTPServer(("0.0.0.0", {APP_PORT}), HelloWorldHandler) - sys.stderr.write(f"[test-app] Listening on 0.0.0.0:{APP_PORT}\\n") - server.serve_forever() - - -if __name__ == "__main__": - run() -""" - - APP_SCRIPT_PATH.write_text(script_content) - APP_SCRIPT_PATH.chmod(0o755) - - -def write_systemd_service(): - """Create or update systemd service unit for test-app.""" - service_content = f"""[Unit] -Description=Test App HTTP Server -After=network.target - -[Service] -Type=simple -ExecStart=/usr/bin/python3 {APP_SCRIPT_PATH} -Restart=always -RestartSec=5 -User=root - -[Install] -WantedBy=multi-user.target -""" - - SYSTEMD_SERVICE_PATH.parent.mkdir(parents=True, exist_ok=True) - SYSTEMD_SERVICE_PATH.write_text(service_content) - - # Reload systemd units - subprocess.run(["systemctl", "daemon-reload"], check=False) - - -def is_service_running() -> tuple[bool, str | None]: - """Check if test-app systemd service is running.""" - try: - result = subprocess.run( - ["systemctl", "is-active", SYSTEMD_SERVICE_NAME], - capture_output=True, - text=True, - ) - is_active = result.stdout.strip() == "active" - return is_active, None if is_active else f"Service status: {result.stdout.strip()}" - except Exception as e: - return False, f"Failed to check service status: {str(e)}" - - -def wait_for_port_ready(timeout_sec: int = 30) -> bool: - """Wait until APP_PORT is listening on localhost.""" - deadline = time.time() + timeout_sec - last_error = None - - while time.time() < deadline: - sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - sock.settimeout(2) - try: - sock.connect(("127.0.0.1", APP_PORT)) - sock.close() - return True - except Exception as e: - last_error = str(e) - time.sleep(1) - finally: - sock.close() - - print(f"[!] test-app did not open port {APP_PORT} within {timeout_sec}s: {last_error}", file=sys.stderr) - return False - - -@plugin.command("init") -def handle_init(input_data: PluginInput) -> PluginOutput: - """Init is a no-op for test-app (no packages to install).""" - local_state = input_data.local_state or {} - return PluginOutput(status="completed", local_state=local_state) - - -@plugin.command("apply") -def handle_apply(input_data: PluginInput) -> PluginOutput: - """Deploy and start the test-app HTTP server.""" - local_state = input_data.local_state or {} - - try: - write_app_script() - write_systemd_service() - except Exception as e: - return PluginOutput( - status="error", - error_message=f"Failed to write test-app files: {e}", - local_state=local_state, - ) - - # Enable and restart systemd service - try: - result = subprocess.run( - ["systemctl", "enable", SYSTEMD_SERVICE_NAME], - capture_output=True, - text=True, - ) - if result.returncode != 0: - return PluginOutput( - status="error", - error_message=f"Failed to enable {SYSTEMD_SERVICE_NAME}: {result.stderr}", - local_state=local_state, - ) - - result = subprocess.run( - ["systemctl", "restart", SYSTEMD_SERVICE_NAME], - capture_output=True, - text=True, - ) - if result.returncode != 0: - return PluginOutput( - status="error", - error_message=f"Failed to start {SYSTEMD_SERVICE_NAME}: {result.stderr}", - local_state=local_state, - ) - except Exception as e: - return PluginOutput( - status="error", - error_message=f"Failed to start {SYSTEMD_SERVICE_NAME}: {e}", - local_state=local_state, - ) - - # Wait for the port to become ready - if not wait_for_port_ready(timeout_sec=30): - return PluginOutput( - status="postponed", - error_message=f"{SYSTEMD_SERVICE_NAME} did not become ready on port {APP_PORT}", - local_state=local_state, - ) - - node_properties = {"test_app_ready": "true"} - return PluginOutput( - status="completed", - node_properties=node_properties, - local_state=local_state, - ) - - -@plugin.command("health") -def handle_health(input_data: PluginInput) -> PluginOutput: - """Check that test-app service is running.""" - local_state = input_data.local_state or {} - - running, error = is_service_running() - if not running: - if error and "Failed to" in error: - return PluginOutput(status="error", error_message=error, local_state=local_state) - return PluginOutput(status="postponed", error_message=error or "test-app service is not running", local_state=local_state) - - # Optionally verify port is still open - if not wait_for_port_ready(timeout_sec=5): - return PluginOutput( - status="postponed", - error_message=f"{SYSTEMD_SERVICE_NAME} port {APP_PORT} is not reachable", - local_state=local_state, - ) - - return PluginOutput(status="completed", local_state=local_state) - - -@plugin.command("finalize") -def handle_finalize(input_data: PluginInput) -> PluginOutput: - """Gracefully stop test-app before node removal.""" - local_state = input_data.local_state or {} - try: - subprocess.run( - ["systemctl", "stop", SYSTEMD_SERVICE_NAME], - check=False, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - ) - except Exception as e: - print(f"[!] Failed to stop {SYSTEMD_SERVICE_NAME}: {e}", file=sys.stderr) - - return PluginOutput(status="completed", local_state=local_state) - - -@plugin.command("destroy") -def handle_destroy(input_data: PluginInput) -> PluginOutput: - """Completely remove test-app service and script.""" - try: - subprocess.run( - ["systemctl", "stop", SYSTEMD_SERVICE_NAME], - check=False, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - ) - subprocess.run( - ["systemctl", "disable", SYSTEMD_SERVICE_NAME], - check=False, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - ) - - if SYSTEMD_SERVICE_PATH.exists(): - SYSTEMD_SERVICE_PATH.unlink() - - if APP_SCRIPT_PATH.exists(): - APP_SCRIPT_PATH.unlink() - - node_properties = { - "test_app_ready": None, - } - - return PluginOutput( - status="completed", - node_properties=node_properties, - local_state={}, - ) - except Exception as e: - return PluginOutput( - status="error", - error_message=f"Failed to destroy {SYSTEMD_SERVICE_NAME}: {e}", - local_state={}, - ) - - -if __name__ == "__main__": - plugin.run() diff --git a/src/services/apps/test-app/manifest.yaml b/src/services/apps/test-app/manifest.yaml deleted file mode 100644 index b197e773..00000000 --- a/src/services/apps/test-app/manifest.yaml +++ /dev/null @@ -1,29 +0,0 @@ -name: test-app -version: 1.0.0 -commands: - - init - - apply - - health - - finalize - - destroy -healthcheckIntervalSecs: 60 -entrypoint: main.py -stateExpr: - engine: jq - query: | - ($swarmdb.clusters[] | select(.id == "{{ clusterId }}" and .deleted_ts == null)) as $cluster | - - ([$swarmdb.clusternodes[] | select(.cluster == "{{ clusterId }}" and .deleted_ts == null)]) as $appClusterNodes | - - { - cluster: { - id: $cluster.id, - cluster_policy: $cluster.cluster_policy, - leader_node: $cluster.leader_node - }, - - clusterNodes: [ - $appClusterNodes[] | - {id, node_id: .node, cluster} - ] | sort_by(.id, .node_id, .cluster) - } diff --git a/src/swarm-scripts/65.setup-nats.sh b/src/swarm-scripts/65.setup-nats.sh deleted file mode 100644 index 9d304044..00000000 --- a/src/swarm-scripts/65.setup-nats.sh +++ /dev/null @@ -1,58 +0,0 @@ -#!/bin/bash -set -euo pipefail - -# This script bootstraps the nats service into SwarmDB via mysql client. -# Run it INSIDE the container. Assumes mysql client is available. -# -# Note: -# - The nats manifest and main.py are provided by the image at: -# /etc/swarm-services/nats/{manifest.yaml, main.py} -# This script only registers service records in SwarmDB. -# - nats depends on a WireGuard cluster existing and sharing nodes with it. -# When bootstrapping WireGuard, prefer ClusterPolicy id 'wireguard' to match nats's stateExpr. -# - -DB_HOST=${DB_HOST:-127.0.0.1} -DB_PORT=${DB_PORT:-3306} -DB_USER=${DB_USER:-root} -DB_NAME=${DB_NAME:-swarmdb} - -# Service descriptors -SERVICE_NAME=${SERVICE_NAME:-nats} -SERVICE_VERSION=${SERVICE_VERSION:-1.0.0} -CLUSTER_POLICY=${CLUSTER_POLICY:-nats} -CLUSTER_ID=${CLUSTER_ID:-nats} - -# Location and manifest inside the container. -# IMPORTANT: This script runs only on one node. All nodes must have the same location available already -# (baked into the image), so we point to /etc/swarm-services/${SERVICE_NAME}. -LOCATION_PATH=${LOCATION_PATH:-/etc/swarm-services/${SERVICE_NAME}} -MANIFEST_PATH=${MANIFEST_PATH:-${LOCATION_PATH}/manifest.yaml} -SERVICE_PK="${CLUSTER_POLICY}:${SERVICE_NAME}" - -if [ ! -f "$MANIFEST_PATH" ]; then - echo "Manifest not found at: $MANIFEST_PATH" >&2 - exit 1 -fi - -echo "Ensuring ClusterPolicy '$CLUSTER_POLICY'..." -if DB_HOST="$DB_HOST" DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" \ - python3 "$(dirname "$0")/swarm-cli.py" get ClusterPolicies "$CLUSTER_POLICY" >/dev/null 2>&1; then - echo "ClusterPolicy '$CLUSTER_POLICY' already exists, skipping creation." -else - echo "Creating ClusterPolicy '$CLUSTER_POLICY'..." - DB_HOST="$DB_HOST" DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" \ - python3 "$(dirname "$0")/swarm-cli.py" create ClusterPolicies "$CLUSTER_POLICY" --minSize=1 --maxSize=3 --maxClusters=1 -fi - -echo "Ensuring ClusterService '$SERVICE_PK'..." -if DB_HOST="$DB_HOST" DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" \ - python3 "$(dirname "$0")/swarm-cli.py" get ClusterServices "$SERVICE_PK" >/dev/null 2>&1; then - echo "ClusterService '$SERVICE_PK' already exists, skipping creation." -else - echo "Creating ClusterService '$SERVICE_PK'..." - DB_HOST="$DB_HOST" DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" \ - python3 "$(dirname "$0")/swarm-cli.py" create ClusterServices "$SERVICE_PK" --name="$SERVICE_NAME" --cluster_policy="$CLUSTER_POLICY" --version="$SERVICE_VERSION" --location="$LOCATION_PATH" --omit-command-init -fi - -echo "Done. The provision worker will reconcile '$SERVICE_NAME' shortly." diff --git a/src/swarm-scripts/66.setup-test-app.sh b/src/swarm-scripts/66.setup-test-app.sh deleted file mode 100644 index 18ba36d2..00000000 --- a/src/swarm-scripts/66.setup-test-app.sh +++ /dev/null @@ -1,56 +0,0 @@ -#!/bin/bash -set -euo pipefail - -# This script bootstraps the test-app service into SwarmDB via swarm-cli. -# Run it INSIDE the container. Assumes mysql client and swarm-cli.py are available. -# -# Notes: -# - The test-app manifest and main.py are expected to be available at: -# /etc/swarm-services/test-app/{manifest.yaml, main.py} -# This script only registers ClusterPolicy and ClusterService. -# - -DB_HOST=${DB_HOST:-127.0.0.1} -DB_PORT=${DB_PORT:-3306} -DB_USER=${DB_USER:-root} -DB_NAME=${DB_NAME:-swarmdb} - -# Service descriptors -SERVICE_NAME=${SERVICE_NAME:-test-app} -SERVICE_VERSION=${SERVICE_VERSION:-1.0.0} -CLUSTER_POLICY=${CLUSTER_POLICY:-test-app} -CLUSTER_ID=${CLUSTER_ID:-test-app} - -# Location and manifest inside the container. -# IMPORTANT: This script runs only on one node. All nodes must have the same location available already -# (baked into the image), so we point to /etc/swarm-services/${SERVICE_NAME}. -LOCATION_PATH=${LOCATION_PATH:-/etc/swarm-services/${SERVICE_NAME}} -MANIFEST_PATH=${MANIFEST_PATH:-${LOCATION_PATH}/manifest.yaml} -SERVICE_PK="${CLUSTER_POLICY}:${SERVICE_NAME}" - -if [ ! -f "$MANIFEST_PATH" ]; then - echo "Manifest not found at: $MANIFEST_PATH" >&2 - exit 1 -fi - -echo "Ensuring ClusterPolicy '$CLUSTER_POLICY'..." -if DB_HOST="$DB_HOST" DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" \ - python3 "$(dirname "$0")/swarm-cli.py" get ClusterPolicies "$CLUSTER_POLICY" >/dev/null 2>&1; then - echo "ClusterPolicy '$CLUSTER_POLICY' already exists, skipping creation." -else - echo "Creating ClusterPolicy '$CLUSTER_POLICY'..." - DB_HOST="$DB_HOST" DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" \ - python3 "$(dirname "$0")/swarm-cli.py" create ClusterPolicies "$CLUSTER_POLICY" --minSize=1 --maxSize=1 --maxClusters=1 -fi - -echo "Ensuring ClusterService '$SERVICE_PK'..." -if DB_HOST="$DB_HOST" DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" \ - python3 "$(dirname "$0")/swarm-cli.py" get ClusterServices "$SERVICE_PK" >/dev/null 2>&1; then - echo "ClusterService '$SERVICE_PK' already exists, skipping creation." -else - echo "Creating ClusterService '$SERVICE_PK'..." - DB_HOST="$DB_HOST" DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" \ - python3 "$(dirname "$0")/swarm-cli.py" create ClusterServices "$SERVICE_PK" --name="$SERVICE_NAME" --cluster_policy="$CLUSTER_POLICY" --version="$SERVICE_VERSION" --location="$LOCATION_PATH" -fi - -echo "Done. The provision worker will reconcile '$SERVICE_NAME' shortly." diff --git a/src/swarm-scripts/67.setup-test-app-route.sh b/src/swarm-scripts/67.setup-test-app-route.sh deleted file mode 100644 index fc65886b..00000000 --- a/src/swarm-scripts/67.setup-test-app-route.sh +++ /dev/null @@ -1,58 +0,0 @@ -#!/bin/bash -set -euo pipefail - -# This script bootstraps the test-app-route service into SwarmDB via swarm-cli. -# Run it INSIDE the container. Assumes mysql client and swarm-cli.py are available. -# -# Notes: -# - The test-app-route manifest and main.py are expected to be available at: -# /etc/swarm-services/test-app-route/{manifest.yaml, main.py} -# This script only registers ClusterPolicy and ClusterService. -# - The logic to ensure that the Redis route is written only on the leader node -# is implemented inside the service's provision plugin (main.py), not here. -# - -DB_HOST=${DB_HOST:-127.0.0.1} -DB_PORT=${DB_PORT:-3306} -DB_USER=${DB_USER:-root} -DB_NAME=${DB_NAME:-swarmdb} - -# Service descriptors -SERVICE_NAME=${SERVICE_NAME:-test-app-route} -SERVICE_VERSION=${SERVICE_VERSION:-1.0.0} -CLUSTER_POLICY=${CLUSTER_POLICY:-test-app-route} -CLUSTER_ID=${CLUSTER_ID:-test-app-route} - -# Location and manifest inside the container. -# IMPORTANT: This script runs only on one node. All nodes must have the same location available already -# (baked into the image), so we point to /etc/swarm-services/${SERVICE_NAME}. -LOCATION_PATH=${LOCATION_PATH:-/etc/swarm-services/${SERVICE_NAME}} -MANIFEST_PATH=${MANIFEST_PATH:-${LOCATION_PATH}/manifest.yaml} -SERVICE_PK="${CLUSTER_POLICY}:${SERVICE_NAME}" - -if [ ! -f "$MANIFEST_PATH" ]; then - echo "Manifest not found at: $MANIFEST_PATH" >&2 - exit 1 -fi - -echo "Ensuring ClusterPolicy '$CLUSTER_POLICY'..." -if DB_HOST="$DB_HOST" DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" \ - python3 "$(dirname "$0")/swarm-cli.py" get ClusterPolicies "$CLUSTER_POLICY" >/dev/null 2>&1; then - echo "ClusterPolicy '$CLUSTER_POLICY' already exists, skipping creation." -else - echo "Creating ClusterPolicy '$CLUSTER_POLICY'..." - DB_HOST="$DB_HOST" DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" \ - python3 "$(dirname "$0")/swarm-cli.py" create ClusterPolicies "$CLUSTER_POLICY" --minSize=1 --maxSize=1 --maxClusters=1 -fi - -echo "Ensuring ClusterService '$SERVICE_PK'..." -if DB_HOST="$DB_HOST" DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" \ - python3 "$(dirname "$0")/swarm-cli.py" get ClusterServices "$SERVICE_PK" >/dev/null 2>&1; then - echo "ClusterService '$SERVICE_PK' already exists, skipping creation." -else - echo "Creating ClusterService '$SERVICE_PK'..." - DB_HOST="$DB_HOST" DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" \ - python3 "$(dirname "$0")/swarm-cli.py" create ClusterServices "$SERVICE_PK" --name="$SERVICE_NAME" --cluster_policy="$CLUSTER_POLICY" --version="$SERVICE_VERSION" --location="$LOCATION_PATH" -fi - -echo "Done. The provision worker will reconcile '$SERVICE_NAME' shortly." diff --git a/src/swarm-scripts/70.setup-mongodb.sh b/src/swarm-scripts/70.setup-mongodb.sh index 12a173c6..92437238 100644 --- a/src/swarm-scripts/70.setup-mongodb.sh +++ b/src/swarm-scripts/70.setup-mongodb.sh @@ -26,7 +26,7 @@ CLUSTER_ID=${CLUSTER_ID:-mongodb} # Location and manifest inside the container. # IMPORTANT: This script runs only on one node. All nodes must have the same location available already # (baked into the image), so we point to /etc/swarm-services/${SERVICE_NAME}. -LOCATION_PATH=${LOCATION_PATH:-/etc/swarm-services/${SERVICE_NAME}} +LOCATION_PATH=${LOCATION_PATH:-/etc/swarm-cloud/services/${SERVICE_NAME}} MANIFEST_PATH=${MANIFEST_PATH:-${LOCATION_PATH}/manifest.yaml} SERVICE_PK="${CLUSTER_POLICY}:${SERVICE_NAME}" From b361f7678a273a13d43fd9a053ac4b8c2c993c9f Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Wed, 4 Mar 2026 19:13:59 +0300 Subject: [PATCH 55/79] fix: nodejs is back! --- src/Dockerfile | 4 +++ src/rootfs/files/scripts/install_nodejs.sh | 37 ++++++++++++++++++++++ 2 files changed, 41 insertions(+) create mode 100644 src/rootfs/files/scripts/install_nodejs.sh diff --git a/src/Dockerfile b/src/Dockerfile index 94e97008..aeac5189 100644 --- a/src/Dockerfile +++ b/src/Dockerfile @@ -328,6 +328,10 @@ RUN chmod +x \ ${OUTPUTDIR}/usr/local/lib/swarm-cloud-api/swarm-cloud-api \ ${OUTPUTDIR}/usr/local/lib/swarm-cloud-api/schema-sync +# install Node.js (required by PCCS npm install and swarm-cloud-api) +ADD rootfs/files/scripts/install_nodejs.sh /buildroot/files/scripts/ +RUN --security=insecure bash /buildroot/files/scripts/install_nodejs.sh + # install PCCS ADD rootfs/files/scripts/install_pccs.sh /buildroot/files/scripts/ ADD rootfs/files/configs/pccs-init/ /buildroot/files/configs/pccs-init/ diff --git a/src/rootfs/files/scripts/install_nodejs.sh b/src/rootfs/files/scripts/install_nodejs.sh new file mode 100644 index 00000000..c7c8d0b9 --- /dev/null +++ b/src/rootfs/files/scripts/install_nodejs.sh @@ -0,0 +1,37 @@ +#!/bin/bash + +# bash unofficial strict mode; +set -euo pipefail; + +# public, required +# OUTPUTDIR + +# private +BUILDROOT="/buildroot"; + +# init loggggging; +source "${BUILDROOT}/files/scripts/log.sh"; + +# chroot functions +source "${BUILDROOT}/files/scripts/chroot.sh"; + +function install_nodejs() { + log_info "adding NodeSource repository"; + chroot "${OUTPUTDIR}" /bin/bash -c 'curl -sL https://deb.nodesource.com/setup_22.x | bash -'; + + log_info "installing Node.js"; + chroot "${OUTPUTDIR}" /bin/bash -c 'DEBIAN_FRONTEND=noninteractive apt install -y nodejs'; + + # Verify installation + local NODE_VERSION=$(chroot "${OUTPUTDIR}" /bin/bash -c 'node --version' 2>/dev/null || true); + if [ -z "${NODE_VERSION}" ]; then + log_fail "Node.js installation failed"; + return 1; + fi + + log_info "Node.js ${NODE_VERSION} installed successfully"; +} + +chroot_init; +install_nodejs; +chroot_deinit; From ad66112e8868263e834c68b2f01d4c8a552b9160 Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Wed, 4 Mar 2026 20:39:49 +0300 Subject: [PATCH 56/79] feat(swarm-init): powerDNS support, evidence key --- src/Dockerfile | 5 +- .../configs/{etc => sp}/swarm/config.yaml | 4 ++ .../usr/local/bin/sp-svc-auth-service.sh | 22 ------- .../files/configs/usr/local/bin/swarm-init.sh | 65 +++++++++++++++++-- 4 files changed, 67 insertions(+), 29 deletions(-) rename src/rootfs/files/configs/{etc => sp}/swarm/config.yaml (85%) delete mode 100644 src/rootfs/files/configs/usr/local/bin/sp-svc-auth-service.sh diff --git a/src/Dockerfile b/src/Dockerfile index aeac5189..835a5fe3 100644 --- a/src/Dockerfile +++ b/src/Dockerfile @@ -277,9 +277,8 @@ RUN chmod +x ${OUTPUTDIR}/usr/local/bin/swarm-cloud-api.sh ADD rootfs/files/configs/etc/securetty "${OUTPUTDIR}/etc/securetty" # swarm-init: downloads binaries (swarm-db, SDK) and generates swarm-db config at VM startup -# configuration is read from /etc/swarm/config.yaml (tags, node params) +# configuration is read from /sp/swarm/config.yaml (attached via provider config disk at boot) RUN mkdir -p "${OUTPUTDIR}/etc/swarm" "${OUTPUTDIR}/etc/swarm-db" "${OUTPUTDIR}/etc/swarm-node" -ADD rootfs/files/configs/etc/swarm/config.yaml ${OUTPUTDIR}/etc/swarm/config.yaml ADD rootfs/files/configs/etc/swarm-node/config.yaml ${OUTPUTDIR}/etc/swarm-node/config.yaml ADD rootfs/files/configs/usr/local/bin/swarm-init.sh ${OUTPUTDIR}/usr/local/bin/swarm-init.sh RUN chmod +x ${OUTPUTDIR}/usr/local/bin/swarm-init.sh @@ -311,7 +310,7 @@ ADD rootfs/files/configs/usr/local/bin/swarm-cloud-ui.sh ${OUTPUTDIR}/usr/local/ RUN chmod +x ${OUTPUTDIR}/usr/local/bin/swarm-cloud-ui.sh ADD rootfs/files/configs/usr/local/bin/kubectl ${OUTPUTDIR}/usr/local/bin/kubectl RUN chmod +x ${OUTPUTDIR}/usr/local/bin/kubectl -# swarm-db binary is downloaded at runtime by swarm-init (tags.swarm_db in /etc/swarm/config.yaml) +# swarm-db binary is downloaded at runtime by swarm-init (tags.swarm_db in /sp/swarm/config.yaml) # provision plugins at /etc/swarm-cloud/services are mounted into the swarm-node container at runtime RUN mkdir -p "${OUTPUTDIR}/etc/swarm-db" "${OUTPUTDIR}/etc/swarm-cloud/services" COPY repos/swarm-cloud/apps/swarm-node-e2e/fixtures/schema.yaml ${OUTPUTDIR}/etc/swarm-db/schema.yaml diff --git a/src/rootfs/files/configs/etc/swarm/config.yaml b/src/rootfs/files/configs/sp/swarm/config.yaml similarity index 85% rename from src/rootfs/files/configs/etc/swarm/config.yaml rename to src/rootfs/files/configs/sp/swarm/config.yaml index 01247e4c..0fc5ebb6 100644 --- a/src/rootfs/files/configs/etc/swarm/config.yaml +++ b/src/rootfs/files/configs/sp/swarm/config.yaml @@ -14,3 +14,7 @@ swarm_db: node_name: "" # defaults to hostname advertise_addr: "" # defaults to auto-detected external IP join_addresses: [] # e.g. ["192.168.1.2:7946", "192.168.1.3:7946"] + +powerdns_api_url: "" # e.g. "http://ns1.example.com:8081" +powerdns_api_key: "" # PowerDNS API key +base_domain: "" # e.g. "example.com" diff --git a/src/rootfs/files/configs/usr/local/bin/sp-svc-auth-service.sh b/src/rootfs/files/configs/usr/local/bin/sp-svc-auth-service.sh deleted file mode 100644 index d62314cf..00000000 --- a/src/rootfs/files/configs/usr/local/bin/sp-svc-auth-service.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash -set -euo pipefail - -BASE_DIR="/etc/auth-service/" -APP_PATH="apps/auth-service" -APP_DIR="${BASE_DIR}/$APP_PATH" -SP_CONFIG="${APP_DIR}/configuration.yaml" - -# Prefer configuration supplied via provider disk; fallback to example -if [[ -f "${SP_CONFIG}" ]]; then - export CONFIG_FILE="${SP_CONFIG}" -elif [[ -f "${APP_DIR}/configuration.example.yaml" ]] && [[ "${ALLOW_EXAMPLE_CONFIG:-}" == "true" || "${NODE_ENV:-production}" != "production" ]]; then - cp -f "${APP_DIR}/configuration.example.yaml" "${SP_CONFIG}" - export CONFIG_FILE="${SP_CONFIG}" -else - echo "ERROR: No configuration found for ${APP_PATH}. Expected one of: ${SP_CONFIG} or ${APP_DIR}/configuration.example.yaml" >&2 - exit 1 -fi - -export NODE_ENV="${NODE_ENV:-production}" -cd "${BASE_DIR}" -exec npm run start -w $APP_PATH diff --git a/src/rootfs/files/configs/usr/local/bin/swarm-init.sh b/src/rootfs/files/configs/usr/local/bin/swarm-init.sh index 71ddab6a..6db4ac48 100644 --- a/src/rootfs/files/configs/usr/local/bin/swarm-init.sh +++ b/src/rootfs/files/configs/usr/local/bin/swarm-init.sh @@ -1,13 +1,13 @@ #!/bin/bash set -euo pipefail -CONFIG="/etc/swarm/config.yaml" +CONFIG="/sp/swarm/config.yaml" log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] [swarm-init] $*"; } log "starting swarm initialization" -# Read a scalar value from /etc/swarm/config.yaml via python3+pyyaml +# Read a scalar value from /sp/swarm/config.yaml via python3+pyyaml cfg() { python3 -c " import yaml @@ -28,6 +28,9 @@ SWARM_CLOUD_UI_TAG=$(cfg "tags.swarm_cloud_ui") AUTH_SERVICE_TAG=$(cfg "tags.auth_service") NODE_NAME=$(cfg "swarm_db.node_name") ADVERTISE_ADDR=$(cfg "swarm_db.advertise_addr") +POWERDNS_API_URL=$(cfg "powerdns_api_url") +POWERDNS_API_KEY=$(cfg "powerdns_api_key") +BASE_DOMAIN=$(cfg "base_domain") # Resolve node name [ -z "$NODE_NAME" ] && NODE_NAME=$(hostname) @@ -161,7 +164,7 @@ SWARM_CLOUD_UI_TAG=${SWARM_CLOUD_UI_TAG} AUTH_SERVICE_TAG=${AUTH_SERVICE_TAG} EOF -# Generate /etc/swarm-db/config.yaml from /etc/swarm/config.yaml parameters +# Generate /etc/swarm-db/config.yaml from /sp/swarm/config.yaml parameters log "generating /etc/swarm-db/config.yaml (node=$NODE_NAME, advertise=$ADVERTISE_ADDR)..." mkdir -p /etc/swarm-db /var/lib/swarm-db @@ -169,7 +172,7 @@ NODE_NAME_VAL="$NODE_NAME" ADVERTISE_ADDR_VAL="$ADVERTISE_ADDR" \ python3 - << 'PYEOF' import yaml, os -with open('/etc/swarm/config.yaml') as f: +with open('/sp/swarm/config.yaml') as f: swarm_cfg = yaml.safe_load(f) or {} join_addresses = (swarm_cfg.get('swarm_db') or {}).get('join_addresses') or [] @@ -210,4 +213,58 @@ with open('/etc/swarm-db/config.yaml', 'w') as f: yaml.dump(config, f, default_flow_style=False) PYEOF +# Wait for swarm-db MySQL to become available, then insert SwarmSecrets +log "waiting for swarm-db MySQL to become available..." +mysql_host="127.0.0.1" +mysql_port="3306" +wait_timeout="120" +start_ts="$(date +%s)" +while true; do + if (exec 3<>/dev/tcp/"$mysql_host"/"$mysql_port") 2>/dev/null; then + exec 3>&- 3<&- + break + fi + elapsed=$(( $(date +%s) - start_ts )) + if [ "$elapsed" -ge "$wait_timeout" ]; then + log "WARNING: MySQL not available after ${wait_timeout}s, skipping SwarmSecrets insertion" + break + fi + sleep 1 +done + +log "inserting SwarmSecrets into swarm-db..." +AUTH_SERVICE_YAML="" +AUTH_SERVICE_YAML_PATH="/sp/swarm/auth-service.yaml" +[ -f "$AUTH_SERVICE_YAML_PATH" ] && AUTH_SERVICE_YAML=$(cat "$AUTH_SERVICE_YAML_PATH") + +# Generate RSA 4096 private key (PKCS8 PEM) for evidence signing. +# TODO: should we use subroot (intermediate CA) key hierarchy? +log "generating evidence signing key (RSA 4096)..." +EVIDENCE_SIGN_KEY=$(openssl genpkey -algorithm RSA -pkeyopt rsa_keygen_bits:4096 2>/dev/null) + +POWERDNS_API_URL="$POWERDNS_API_URL" \ +POWERDNS_API_KEY="$POWERDNS_API_KEY" \ +BASE_DOMAIN="$BASE_DOMAIN" \ +AUTH_SERVICE_YAML="$AUTH_SERVICE_YAML" \ +EVIDENCE_SIGN_KEY="$EVIDENCE_SIGN_KEY" \ +python3 - << 'PYEOF' +import subprocess, os + +def insert_secret(key, value): + if not value: + return + escaped = value.replace("'", "''") + sql = f"INSERT IGNORE INTO SwarmSecrets (id, value) VALUES ('{key}', '{escaped}');\n" + subprocess.run( + ["mysql", "-h", "127.0.0.1", "-P", "3306", "-u", "root", "swarmdb"], + input=sql, text=True, check=True, + ) + +insert_secret("powerdns_api_url", os.environ.get("POWERDNS_API_URL", "")) +insert_secret("powerdns_api_key", os.environ.get("POWERDNS_API_KEY", "")) +insert_secret("base_domain", os.environ.get("BASE_DOMAIN", "")) +insert_secret("auth_service_yaml", os.environ.get("AUTH_SERVICE_YAML", "")) +insert_secret("evidence_sign_key", os.environ.get("EVIDENCE_SIGN_KEY", "")) +PYEOF + log "swarm-init completed successfully" From 85983075a2eeae6615ed6e697c8ae0fc5fdf38be Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Wed, 4 Mar 2026 20:43:10 +0300 Subject: [PATCH 57/79] refactor: remove auth-service launcher script from Dockerfile --- src/Dockerfile | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/Dockerfile b/src/Dockerfile index 835a5fe3..6dc9e7fe 100644 --- a/src/Dockerfile +++ b/src/Dockerfile @@ -290,10 +290,6 @@ ADD rootfs/files/configs/etc/systemd/system/swarm-host-agent.service ${OUTPUTDIR RUN mkdir -p "${OUTPUTDIR}/etc/swarm-service-launchers" -# auth-service launcher script (service file managed at runtime) -ADD rootfs/files/configs/usr/local/bin/sp-svc-auth-service.sh ${OUTPUTDIR}/usr/local/bin/sp-svc-auth-service.sh -RUN chmod +x ${OUTPUTDIR}/usr/local/bin/sp-svc-auth-service.sh - # swarm one-shot services runner ADD rootfs/files/configs/etc/systemd/system/swarm-services.service ${OUTPUTDIR}/etc/systemd/system/swarm-services.service ADD rootfs/files/configs/usr/local/bin/swarm-services.sh ${OUTPUTDIR}/usr/local/bin/swarm-services.sh From 2b065b4e298aed7dcb6209f6aad2b11b31596079 Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Wed, 4 Mar 2026 20:44:12 +0300 Subject: [PATCH 58/79] refactor: delete lib/sp-vm-tools --- .gitmodules | 3 --- 1 file changed, 3 deletions(-) diff --git a/.gitmodules b/.gitmodules index 2f05b8ba..e0709140 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,3 @@ -[submodule "lib/sp-vm-tools"] - path = lib/sp-vm-tools - url = https://github.com/super-protocol/sp-vm-tools [submodule "swarm-cloud"] path = src/repos/swarm-cloud url = git@github.com:Super-Protocol/swarm-cloud.git From 4b210b8b963ba59fcb757f34b9146a6c7be81363 Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Wed, 4 Mar 2026 21:18:34 +0300 Subject: [PATCH 59/79] refactor: streamline package installation in setup_runtime_tools.sh --- src/rootfs/files/configs/usr/local/bin/swarm-init.sh | 2 +- src/rootfs/files/scripts/setup_runtime_tools.sh | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/src/rootfs/files/configs/usr/local/bin/swarm-init.sh b/src/rootfs/files/configs/usr/local/bin/swarm-init.sh index 6db4ac48..b36a0365 100644 --- a/src/rootfs/files/configs/usr/local/bin/swarm-init.sh +++ b/src/rootfs/files/configs/usr/local/bin/swarm-init.sh @@ -131,7 +131,7 @@ if [ -n "$HOST_AGENT_TAG" ]; then TMP=$(mktemp -d) download_github_asset "Super-Protocol" "swarm-cloud" "$RELEASE_TAG" "$FILENAME" "$TMP/host-agent.tar.gz" tar xzf "$TMP/host-agent.tar.gz" -C "$TMP" - EXTRACT_DIR=$(tar -tzf "$TMP/host-agent.tar.gz" | head -1 | cut -f1 -d"/") + EXTRACT_DIR=$(ls -1 "$TMP" | grep -v 'host-agent\.tar\.gz' | head -1) install -m 755 "$TMP/$EXTRACT_DIR/swarm-host-agent" /usr/local/bin/swarm-host-agent mkdir -p /etc/swarm cp "$TMP/$EXTRACT_DIR/host-agent.yaml" /etc/swarm/host-agent.yaml diff --git a/src/rootfs/files/scripts/setup_runtime_tools.sh b/src/rootfs/files/scripts/setup_runtime_tools.sh index 11f3a7a1..ab353cde 100644 --- a/src/rootfs/files/scripts/setup_runtime_tools.sh +++ b/src/rootfs/files/scripts/setup_runtime_tools.sh @@ -17,17 +17,15 @@ function setup_runtime_tools() { printf '#!/bin/sh\nexit 101\n' > "${OUTPUTDIR}/usr/sbin/policy-rc.d" chmod +x "${OUTPUTDIR}/usr/sbin/policy-rc.d" - log_info "installing runtime packages into rootfs (python3, redis, mysql client, openssl, netcat, dns tools)" + log_info "installing runtime packages into rootfs (python3, mysql client, openssl, netcat, dns tools)" chroot "${OUTPUTDIR}" /usr/bin/apt update chroot "${OUTPUTDIR}" /usr/bin/apt install -y --no-install-recommends \ - mysql-client python3 python3-pip redis-server redis-sentinel redis-tools openssl netcat-openbsd dnsutils + mysql-client python3 python3-pip openssl netcat-openbsd dnsutils chroot "${OUTPUTDIR}" /usr/bin/apt clean log_info "installing Python runtime dependencies" chroot "${OUTPUTDIR}" /bin/bash -lc 'python3 -m pip install --break-system-packages SQLAlchemy PyMySQL requests redis' - log_info "ensuring redis data/log directories exist with proper ownership" - chroot "${OUTPUTDIR}" /bin/bash -lc 'mkdir -p /var/lib/redis /var/log/redis && chown -R redis:redis /var/lib/redis /var/log/redis && chmod 0750 /var/lib/redis' } chroot_init From c62336c10d6e5edbaef9c720f1b3d76d42590069 Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Wed, 4 Mar 2026 21:38:19 +0300 Subject: [PATCH 60/79] refactor: remove unused local registry and MongoDB setup scripts from Dockerfile --- src/Dockerfile | 59 ------------------- .../etc/systemd/system/local-registry.service | 13 ---- .../configs/usr/local/bin/local-registry.sh | 24 -------- src/rootfs/files/scripts/install_mongodb.sh | 32 ---------- src/rootfs/files/scripts/refresh_ca_certs.sh | 22 ------- src/swarm-scripts/70.setup-mongodb.sh | 58 ------------------ 6 files changed, 208 deletions(-) delete mode 100644 src/rootfs/files/configs/etc/systemd/system/local-registry.service delete mode 100755 src/rootfs/files/configs/usr/local/bin/local-registry.sh delete mode 100644 src/rootfs/files/scripts/install_mongodb.sh delete mode 100755 src/rootfs/files/scripts/refresh_ca_certs.sh delete mode 100644 src/swarm-scripts/70.setup-mongodb.sh diff --git a/src/Dockerfile b/src/Dockerfile index 6dc9e7fe..7dcf790e 100644 --- a/src/Dockerfile +++ b/src/Dockerfile @@ -99,37 +99,6 @@ ADD initramfs/files/init.sh /initramfs-root/sbin/init.sh RUN gen_init_cpio /initramfs-root/initramfs.list | gzip -9 -n > /initramfs-root/initramfs.cpio.gz ### End ### -### Start certs ### -FROM ubuntu:24.04 AS certs_builder -RUN apt update && apt install -y openssl - -WORKDIR /buildroot -ARG SUPER_REGISTRY_HOST=registry.superprotocol.local -RUN openssl genrsa \ - -out "/buildroot/${SUPER_REGISTRY_HOST}.ca.key" 2048; -RUN openssl req -x509 -new -nodes \ - -key "/buildroot/${SUPER_REGISTRY_HOST}.ca.key" \ - -sha256 -days 3650 \ - -out "/buildroot/${SUPER_REGISTRY_HOST}.ca.crt" \ - -subj "/ST=Milk Galaxy/L=Planet Earth/O=SuperProtocol/OU=MyUnit/CN=SuperProtocol.com" -RUN openssl genrsa \ - -out "/buildroot/${SUPER_REGISTRY_HOST}.key" 2048; -RUN printf "[req]\ndefault_bits = 2048\nprompt = no\ndistinguished_name = req_distinguished_name\nreq_extensions = req_ext\n[req_distinguished_name]\nC = US\nST = Milk Galaxy\nL = Planet Earth\nO = SuperProtocol\nOU = MyUnit\nCN = ${SUPER_REGISTRY_HOST}\n[req_ext]\nsubjectAltName = @alt_names\n[alt_names]\nDNS.1 = ${SUPER_REGISTRY_HOST}\n[v3_ext]\nsubjectAltName = @alt_names\nbasicConstraints = CA:FALSE\nkeyUsage = digitalSignature,nonRepudiation,keyEncipherment,dataEncipherment\n" > "/buildroot/san.cnf" -RUN openssl req -new \ - -key "/buildroot/${SUPER_REGISTRY_HOST}.key" \ - -out "/buildroot/${SUPER_REGISTRY_HOST}.csr" \ - -config /buildroot/san.cnf -RUN openssl x509 -req \ - -in "/buildroot/${SUPER_REGISTRY_HOST}.csr" \ - -CA "/buildroot/${SUPER_REGISTRY_HOST}.ca.crt" \ - -CAkey "/buildroot/${SUPER_REGISTRY_HOST}.ca.key" \ - -CAcreateserial \ - -out "/buildroot/${SUPER_REGISTRY_HOST}.crt" \ - -days 3650 -sha256 \ - -extfile "/buildroot/san.cnf" \ - -extensions v3_ext -### End certs ### - ### Start kernel ### FROM ubuntu:24.04 AS kernel_builder RUN apt-get update && apt-get install -y wget gcc make build-essential curl libssl-dev bc elfutils libelf-dev bison flex cpio kmod rsync debhelper @@ -212,22 +181,6 @@ RUN sed -i '1 s|^.*$|-:root:ALL|' "${OUTPUTDIR}/etc/security/access.conf" RUN sed -i '1 s|^.*$|account required pam_access.so|' "${OUTPUTDIR}/etc/pam.d/login" ADD rootfs/files/configs/nvidia-persistenced.service ${OUTPUTDIR}/usr/lib/systemd/system/ -RUN mkdir -p "${OUTPUTDIR}/etc/super/certs" -COPY --from=certs_builder /buildroot/registry.superprotocol.local.ca.crt ${OUTPUTDIR}/usr/local/share/ca-certificates/registry.superprotocol.local.ca.crt -COPY --from=certs_builder /buildroot/registry.superprotocol.local.ca.crt ${OUTPUTDIR}/etc/super/certs/registry.superprotocol.local.ca.crt -COPY --from=certs_builder /buildroot/registry.superprotocol.local.key ${OUTPUTDIR}/etc/super/certs/registry.superprotocol.local.key -COPY --from=certs_builder /buildroot/registry.superprotocol.local.crt ${OUTPUTDIR}/etc/super/certs/registry.superprotocol.local.crt -# ADD rootfs/files/configs/cert/superprotocol-certs.sh ${OUTPUTDIR}/usr/local/bin/ -# ADD rootfs/files/configs/cert/superprotocol-certs.service ${OUTPUTDIR}/etc/systemd/system -# RUN ln -s /etc/systemd/system/superprotocol-certs.service ${OUTPUTDIR}/etc/systemd/system/multi-user.target.wants/superprotocol-certs.service -ADD rootfs/files/scripts/refresh_ca_certs.sh /buildroot/files/scripts/refresh_ca_certs.sh -RUN --security=insecure /buildroot/files/scripts/refresh_ca_certs.sh - -# check for presence of trusted self-signed CA from Super Protocol -RUN awk -v cmd='openssl x509 -noout -subject' '/BEGIN/{close(cmd)};{print | cmd}' < ${OUTPUTDIR}/etc/ssl/certs/ca-certificates.crt | grep -i 'super' - -ARG SUPER_REGISTRY_HOST=registry.superprotocol.local -RUN echo "127.0.0.1 $SUPER_REGISTRY_HOST $LOCAL_REGISTRY_HOST" >> "${OUTPUTDIR}/etc/hosts" RUN echo "sp-$(petname)" > "${OUTPUTDIR}/etc/hostname" ADD rootfs/files/configs/etc/sysctl.d/99-zzz-override_cilium.conf ${OUTPUTDIR}/etc/sysctl.d/99-zzz-override_cilium.conf ADD rootfs/files/configs/etc/resolv.conf ${OUTPUTDIR}/etc/resolv.conf @@ -258,10 +211,6 @@ ADD rootfs/files/configs/etc/multipath.conf.append /buildroot/files/configs/etc/ ADD rootfs/files/configs/etc/sysctl.conf.append /buildroot/files/configs/etc/sysctl.conf.append RUN mkdir -p "${OUTPUTDIR}/sp" -# Enable only the timer; service will be triggered by it -ADD rootfs/files/configs/etc/systemd/system/local-registry.service ${OUTPUTDIR}/etc/systemd/system/local-registry.service -ADD rootfs/files/configs/usr/local/bin/local-registry.sh ${OUTPUTDIR}/usr/local/bin/local-registry.sh -RUN ln -sf /etc/systemd/system/local-registry.service "${OUTPUTDIR}/etc/systemd/system/multi-user.target.wants/local-registry.service" ADD rootfs/files/configs/etc/systemd/system/hardening-vm.service ${OUTPUTDIR}/etc/systemd/system/hardening-vm.service ADD rootfs/files/configs/usr/local/bin/hardening-vm.sh ${OUTPUTDIR}/usr/local/bin/hardening-vm.sh RUN ln -sf /etc/systemd/system/hardening-vm.service "${OUTPUTDIR}/etc/systemd/system/multi-user.target.wants/hardening-vm.service" @@ -343,14 +292,6 @@ RUN mkdir -p ${OUTPUTDIR}/etc/swarm-services/ ADD rootfs/files/scripts/setup_runtime_tools.sh /buildroot/files/scripts/ RUN chmod +x /buildroot/files/scripts/setup_runtime_tools.sh RUN --security=insecure /buildroot/files/scripts/setup_runtime_tools.sh -# MongoDB (install official mongodb-org 7.0 via Jammy repository inside VM rootfs) -ADD rootfs/files/scripts/install_mongodb.sh /buildroot/files/scripts/ -RUN --security=insecure bash /buildroot/files/scripts/install_mongodb.sh -# disable autostart without requiring systemd during build (remove enable symlinks for MongoDB) -RUN rm -f ${OUTPUTDIR}/etc/systemd/system/multi-user.target.wants/mongod.service \ - && rm -f ${OUTPUTDIR}/etc/systemd/system/default.target.wants/mongod.service \ - && rm -f ${OUTPUTDIR}/etc/systemd/system/multi-user.target.wants/mongodb.service \ - && rm -f ${OUTPUTDIR}/etc/systemd/system/default.target.wants/mongodb.service # cleanup apt lists and policy-rc.d RUN rm -f ${OUTPUTDIR}/usr/sbin/policy-rc.d \ && rm -rf ${OUTPUTDIR}/var/lib/apt/lists/* diff --git a/src/rootfs/files/configs/etc/systemd/system/local-registry.service b/src/rootfs/files/configs/etc/systemd/system/local-registry.service deleted file mode 100644 index b3a2f3d3..00000000 --- a/src/rootfs/files/configs/etc/systemd/system/local-registry.service +++ /dev/null @@ -1,13 +0,0 @@ -[Unit] -Description=Run local registry & fileserver by using Hauler -After=network.target -Before=rke2-server.service - -[Service] -Type=simple -ExecStart=/usr/local/bin/local-registry.sh -Restart=always -RestartSec=5 - -[Install] -WantedBy=multi-user.target diff --git a/src/rootfs/files/configs/usr/local/bin/local-registry.sh b/src/rootfs/files/configs/usr/local/bin/local-registry.sh deleted file mode 100755 index d325dc67..00000000 --- a/src/rootfs/files/configs/usr/local/bin/local-registry.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/bash - -set -euo pipefail; - -SUPER_REGISTRY_HOST="registry.superprotocol.local"; -SUPER_CERTS_DIR="/opt/super/certs"; -SUPER_CERT_FILEPATH="${SUPER_CERTS_DIR}/${SUPER_REGISTRY_HOST}"; - -pkill hauler || true; - -sleep 3; # enterprise delay - -mkdir -p "/opt/hauler/.hauler"; - -find /etc/super/opt/hauler -type f -name "*.zst" | xargs /usr/local/bin/hauler store load --store /opt/hauler/store; - -nohup /usr/local/bin/hauler store serve fileserver --store /opt/hauler/store --directory /opt/hauler/registry & - -/usr/local/bin/hauler \ - store serve registry \ - --store /opt/hauler/store \ - --directory /opt/hauler/registry \ - --tls-cert="${SUPER_CERT_FILEPATH}.crt" \ - --tls-key="${SUPER_CERT_FILEPATH}.key"; diff --git a/src/rootfs/files/scripts/install_mongodb.sh b/src/rootfs/files/scripts/install_mongodb.sh deleted file mode 100644 index 0f9d0ffc..00000000 --- a/src/rootfs/files/scripts/install_mongodb.sh +++ /dev/null @@ -1,32 +0,0 @@ -#!/bin/bash - -# bash unofficial strict mode -set -euo pipefail - -# private -BUILDROOT="/buildroot" - -# init logging -source "$BUILDROOT/files/scripts/log.sh" - -# chroot functions -source "$BUILDROOT/files/scripts/chroot.sh" - -function install_mongodb() { - log_info "installing MongoDB (mongodb-org 7.0) inside VM rootfs" - chroot "$OUTPUTDIR" /bin/bash -lc 'set -e; apt update' - chroot "$OUTPUTDIR" /bin/bash -lc 'set -e; apt install -y --no-install-recommends gnupg curl' - chroot "$OUTPUTDIR" /bin/bash -lc 'set -e; curl -fsSL https://www.mongodb.org/static/pgp/server-7.0.asc | gpg --dearmor -o /usr/share/keyrings/mongodb-server-7.0.gpg' - chroot "$OUTPUTDIR" /bin/bash -lc 'set -e; echo "deb [ arch=amd64,arm64 signed-by=/usr/share/keyrings/mongodb-server-7.0.gpg ] https://repo.mongodb.org/apt/ubuntu jammy/mongodb-org/7.0 multiverse" > /etc/apt/sources.list.d/mongodb-org-7.0.list' - chroot "$OUTPUTDIR" /bin/bash -lc 'set -e; apt update' - chroot "$OUTPUTDIR" /bin/bash -lc 'set -e; apt install -y --no-install-recommends mongodb-org mongodb-mongosh' - chroot "$OUTPUTDIR" /bin/bash -lc 'set -e; apt clean' - chroot "$OUTPUTDIR" /bin/bash -lc 'set -e; mkdir -p /var/lib/mongodb /var/log/mongodb' - chroot "$OUTPUTDIR" /bin/bash -lc 'set -e; chown -R mongodb:mongodb /var/lib/mongodb /var/log/mongodb || true' -} - -chroot_init -install_mongodb -chroot_deinit - - diff --git a/src/rootfs/files/scripts/refresh_ca_certs.sh b/src/rootfs/files/scripts/refresh_ca_certs.sh deleted file mode 100755 index 8d46e848..00000000 --- a/src/rootfs/files/scripts/refresh_ca_certs.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash - -# bash unofficial strict mode; -set -euo pipefail; - -# private -BUILDROOT="/buildroot"; - -# init loggggging; -source "$BUILDROOT/files/scripts/log.sh"; - -# chroot functions -source "$BUILDROOT/files/scripts/chroot.sh"; - -function refresh_ca_certs() { - log_info "refreshing ca certs"; - chroot "$OUTPUTDIR" /bin/bash -c 'update-ca-certificates --fresh'; -} - -chroot_init; -refresh_ca_certs; -chroot_deinit; diff --git a/src/swarm-scripts/70.setup-mongodb.sh b/src/swarm-scripts/70.setup-mongodb.sh deleted file mode 100644 index 92437238..00000000 --- a/src/swarm-scripts/70.setup-mongodb.sh +++ /dev/null @@ -1,58 +0,0 @@ -#!/bin/bash -set -euo pipefail - -# This script bootstraps the mongodb service into SwarmDB via mysql client. -# Run it INSIDE the container. Assumes mysql client is available. -# -# Note: -# - The mongodb manifest and main.py are provided by the image at: -# /etc/swarm-services/mongodb/{manifest.yaml, main.py} -# This script only registers service records in SwarmDB. -# - mongodb depends on a WireGuard cluster existing and sharing nodes with it. -# When bootstrapping WireGuard, prefer ClusterPolicy id 'wireguard' to match mongodb's stateExpr. -# - -DB_HOST=${DB_HOST:-127.0.0.1} -DB_PORT=${DB_PORT:-3306} -DB_USER=${DB_USER:-root} -DB_NAME=${DB_NAME:-swarmdb} - -# Service descriptors -SERVICE_NAME=${SERVICE_NAME:-mongodb} -SERVICE_VERSION=${SERVICE_VERSION:-1.0.0} -CLUSTER_POLICY=${CLUSTER_POLICY:-mongodb} -CLUSTER_ID=${CLUSTER_ID:-mongodb} - -# Location and manifest inside the container. -# IMPORTANT: This script runs only on one node. All nodes must have the same location available already -# (baked into the image), so we point to /etc/swarm-services/${SERVICE_NAME}. -LOCATION_PATH=${LOCATION_PATH:-/etc/swarm-cloud/services/${SERVICE_NAME}} -MANIFEST_PATH=${MANIFEST_PATH:-${LOCATION_PATH}/manifest.yaml} -SERVICE_PK="${CLUSTER_POLICY}:${SERVICE_NAME}" - -if [ ! -f "$MANIFEST_PATH" ]; then - echo "Manifest not found at: $MANIFEST_PATH" >&2 - exit 1 -fi - -echo "Ensuring ClusterPolicy '$CLUSTER_POLICY'..." -if DB_HOST="$DB_HOST" DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" \ - python3 "$(dirname "$0")/swarm-cli.py" get ClusterPolicies "$CLUSTER_POLICY" >/dev/null 2>&1; then - echo "ClusterPolicy '$CLUSTER_POLICY' already exists, skipping creation." -else - echo "Creating ClusterPolicy '$CLUSTER_POLICY'..." - DB_HOST="$DB_HOST" DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" \ - python3 "$(dirname "$0")/swarm-cli.py" create ClusterPolicies "$CLUSTER_POLICY" --minSize=1 --maxSize=5 --maxClusters=1 -fi - -echo "Ensuring ClusterService '$SERVICE_PK'..." -if DB_HOST="$DB_HOST" DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" \ - python3 "$(dirname "$0")/swarm-cli.py" get ClusterServices "$SERVICE_PK" >/dev/null 2>&1; then - echo "ClusterService '$SERVICE_PK' already exists, skipping creation." -else - echo "Creating ClusterService '$SERVICE_PK'..." - DB_HOST="$DB_HOST" DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" \ - python3 "$(dirname "$0")/swarm-cli.py" create ClusterServices "$SERVICE_PK" --name="$SERVICE_NAME" --cluster_policy="$CLUSTER_POLICY" --version="$SERVICE_VERSION" --location="$LOCATION_PATH" --omit-command-init -fi - -echo "Done. The provision worker will reconcile '$SERVICE_NAME' shortly." From 20fcea5565c15cfe8ff09e335b291ef420d6d74e Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Wed, 4 Mar 2026 21:59:56 +0300 Subject: [PATCH 61/79] refactor: enhance swarm initialization and database configuration scripts --- src/Dockerfile | 7 +- .../etc/systemd/system/swarm-db.service | 4 +- .../etc/systemd/system/swarm-init.service | 13 +- .../etc/systemd/system/swarm-node.service | 4 +- .../usr/local/bin/generate-swarm-db-config.sh | 84 ++++++++++ .../files/configs/usr/local/bin/swarm-init.sh | 150 ++++++------------ 6 files changed, 145 insertions(+), 117 deletions(-) create mode 100644 src/rootfs/files/configs/usr/local/bin/generate-swarm-db-config.sh diff --git a/src/Dockerfile b/src/Dockerfile index 7dcf790e..9d806882 100644 --- a/src/Dockerfile +++ b/src/Dockerfile @@ -225,12 +225,15 @@ RUN chmod +x ${OUTPUTDIR}/usr/local/bin/swarm-cloud-api.sh # run-state directories are prepared in state_disk_mount.sh; bind mounts via fstab ADD rootfs/files/configs/etc/securetty "${OUTPUTDIR}/etc/securetty" -# swarm-init: downloads binaries (swarm-db, SDK) and generates swarm-db config at VM startup +# swarm-init: downloads binaries and seeds swarm-db (restarts until MySQL is available) +# generate-swarm-db-config: generates /etc/swarm-db/config.yaml at swarm-db start (ExecStartPre) # configuration is read from /sp/swarm/config.yaml (attached via provider config disk at boot) RUN mkdir -p "${OUTPUTDIR}/etc/swarm" "${OUTPUTDIR}/etc/swarm-db" "${OUTPUTDIR}/etc/swarm-node" ADD rootfs/files/configs/etc/swarm-node/config.yaml ${OUTPUTDIR}/etc/swarm-node/config.yaml ADD rootfs/files/configs/usr/local/bin/swarm-init.sh ${OUTPUTDIR}/usr/local/bin/swarm-init.sh -RUN chmod +x ${OUTPUTDIR}/usr/local/bin/swarm-init.sh +ADD rootfs/files/configs/usr/local/bin/generate-swarm-db-config.sh ${OUTPUTDIR}/usr/local/bin/generate-swarm-db-config.sh +RUN chmod +x ${OUTPUTDIR}/usr/local/bin/swarm-init.sh \ + ${OUTPUTDIR}/usr/local/bin/generate-swarm-db-config.sh ADD rootfs/files/configs/etc/systemd/system/swarm-init.service ${OUTPUTDIR}/etc/systemd/system/swarm-init.service RUN ln -sf /etc/systemd/system/swarm-init.service "${OUTPUTDIR}/etc/systemd/system/multi-user.target.wants/swarm-init.service" diff --git a/src/rootfs/files/configs/etc/systemd/system/swarm-db.service b/src/rootfs/files/configs/etc/systemd/system/swarm-db.service index b56b6a66..1b4ed1c3 100644 --- a/src/rootfs/files/configs/etc/systemd/system/swarm-db.service +++ b/src/rootfs/files/configs/etc/systemd/system/swarm-db.service @@ -1,14 +1,14 @@ [Unit] Description=Swarm DB service -After=network-online.target local-fs.target swarm-init.service +After=network-online.target local-fs.target Wants=network-online.target -Requires=swarm-init.service ConditionPathExists=/usr/local/bin/swarm-db-linux-amd64 [Service] Type=simple WorkingDirectory=/ ExecStartPre=mkdir -p /var/lib/swarm-db/data +ExecStartPre=/bin/bash /usr/local/bin/generate-swarm-db-config.sh ExecStart=/usr/local/bin/swarm-db-linux-amd64 -config /etc/swarm-db/config.yaml StandardOutput=append:/var/log/swarm-db.log StandardError=append:/var/log/swarm-db-err.log diff --git a/src/rootfs/files/configs/etc/systemd/system/swarm-init.service b/src/rootfs/files/configs/etc/systemd/system/swarm-init.service index 896e90e5..34368a26 100644 --- a/src/rootfs/files/configs/etc/systemd/system/swarm-init.service +++ b/src/rootfs/files/configs/etc/systemd/system/swarm-init.service @@ -1,16 +1,15 @@ [Unit] -Description=Swarm Initialization (download binaries and generate configs) -After=network-online.target -Wants=network-online.target -Before=swarm-db.service swarm-node.service +Description=Swarm Initialization (download binaries, configure services, seed swarm-db) +After=network-online.target swarm-db.service +Wants=network-online.target swarm-db.service [Service] -Type=oneshot -RemainAfterExit=yes +Type=simple ExecStart=/bin/bash /usr/local/bin/swarm-init.sh +Restart=on-failure +RestartSec=10 StandardOutput=append:/var/log/swarm-init.log StandardError=append:/var/log/swarm-init.log -TimeoutStartSec=300 [Install] WantedBy=multi-user.target diff --git a/src/rootfs/files/configs/etc/systemd/system/swarm-node.service b/src/rootfs/files/configs/etc/systemd/system/swarm-node.service index 6025feb6..65853833 100644 --- a/src/rootfs/files/configs/etc/systemd/system/swarm-node.service +++ b/src/rootfs/files/configs/etc/systemd/system/swarm-node.service @@ -1,8 +1,8 @@ [Unit] Description=Swarm Node Service (Podman Container) -After=network-online.target swarm-init.service swarm-db.service swarm-host-agent.service +After=network-online.target swarm-db.service swarm-host-agent.service Wants=network-online.target -Requires=swarm-init.service swarm-db.service swarm-host-agent.service +Requires=swarm-db.service swarm-host-agent.service [Service] Type=simple diff --git a/src/rootfs/files/configs/usr/local/bin/generate-swarm-db-config.sh b/src/rootfs/files/configs/usr/local/bin/generate-swarm-db-config.sh new file mode 100644 index 00000000..f79b1a47 --- /dev/null +++ b/src/rootfs/files/configs/usr/local/bin/generate-swarm-db-config.sh @@ -0,0 +1,84 @@ +#!/bin/bash +set -euo pipefail + +CONFIG="/sp/swarm/config.yaml" + +log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] [generate-swarm-db-config] $*"; } + +cfg() { + python3 -c " +import yaml +c = yaml.safe_load(open('$CONFIG')) or {} +v = c +for k in '$1'.split('.'): + v = v.get(k) if isinstance(v, dict) else None +print('' if v is None else v)" +} + +NODE_NAME=$(cfg "swarm_db.node_name") +ADVERTISE_ADDR=$(cfg "swarm_db.advertise_addr") + +[ -z "$NODE_NAME" ] && NODE_NAME=$(hostname) + +if [ -z "$ADVERTISE_ADDR" ]; then + log "auto-detecting external IP..." + ADVERTISE_ADDR=$(curl -sf --max-time 5 https://myip.wtf/json \ + | python3 -c "import sys,json; print(json.load(sys.stdin).get('YourFuckingIPAddress',''))" 2>/dev/null || true) + [ -z "$ADVERTISE_ADDR" ] && \ + ADVERTISE_ADDR=$(curl -sf --max-time 5 https://api.ipify.org 2>/dev/null || true) + if [ -z "$ADVERTISE_ADDR" ]; then + log "WARNING: could not detect external IP, using 127.0.0.1" + ADVERTISE_ADDR="127.0.0.1" + fi + log "detected advertise_addr: $ADVERTISE_ADDR" +fi + +log "generating /etc/swarm-db/config.yaml (node=$NODE_NAME, advertise=$ADVERTISE_ADDR)..." +mkdir -p /etc/swarm-db /var/lib/swarm-db + +NODE_NAME_VAL="$NODE_NAME" ADVERTISE_ADDR_VAL="$ADVERTISE_ADDR" \ +python3 - << 'PYEOF' +import yaml, os + +with open('/sp/swarm/config.yaml') as f: + swarm_cfg = yaml.safe_load(f) or {} + +join_addresses = (swarm_cfg.get('swarm_db') or {}).get('join_addresses') or [] + +config = { + 'node': { + 'name': os.environ['NODE_NAME_VAL'], + 'host': '0.0.0.0', + 'port': 8001, + 'data_dir': '/var/lib/swarm-db', + 'schema_file': '/etc/swarm-db/schema.yaml', + }, + 'memberlist': { + 'bind_addr': '0.0.0.0', + 'bind_port': 7946, + 'advertise_addr': os.environ['ADVERTISE_ADDR_VAL'], + 'advertise_port': 7946, + 'join_addresses': join_addresses, + 'gossip_interval': '200ms', + 'probe_interval': '1s', + 'probe_timeout': '500ms', + 'suspicion_max_time_multiplier': 6, + }, + 'sql': { + 'enabled': True, + 'host': '0.0.0.0', + 'port': 3306, + 'system_database': 'swarmdb', + }, + 'jq': { + 'enabled': True, + 'host': '0.0.0.0', + 'port': 8080, + }, +} + +with open('/etc/swarm-db/config.yaml', 'w') as f: + yaml.dump(config, f, default_flow_style=False) +PYEOF + +log "swarm-db config generated" diff --git a/src/rootfs/files/configs/usr/local/bin/swarm-init.sh b/src/rootfs/files/configs/usr/local/bin/swarm-init.sh index b36a0365..b3a00ee8 100644 --- a/src/rootfs/files/configs/usr/local/bin/swarm-init.sh +++ b/src/rootfs/files/configs/usr/local/bin/swarm-init.sh @@ -26,29 +26,10 @@ SDK_TAG=$(cfg "tags.sdk") SWARM_CLOUD_API_TAG=$(cfg "tags.swarm_cloud_api") SWARM_CLOUD_UI_TAG=$(cfg "tags.swarm_cloud_ui") AUTH_SERVICE_TAG=$(cfg "tags.auth_service") -NODE_NAME=$(cfg "swarm_db.node_name") -ADVERTISE_ADDR=$(cfg "swarm_db.advertise_addr") POWERDNS_API_URL=$(cfg "powerdns_api_url") POWERDNS_API_KEY=$(cfg "powerdns_api_key") BASE_DOMAIN=$(cfg "base_domain") -# Resolve node name -[ -z "$NODE_NAME" ] && NODE_NAME=$(hostname) - -# Auto-detect external IP if not configured -if [ -z "$ADVERTISE_ADDR" ]; then - log "auto-detecting external IP..." - ADVERTISE_ADDR=$(curl -sf --max-time 5 https://myip.wtf/json \ - | python3 -c "import sys,json; print(json.load(sys.stdin).get('YourFuckingIPAddress',''))" 2>/dev/null || true) - [ -z "$ADVERTISE_ADDR" ] && \ - ADVERTISE_ADDR=$(curl -sf --max-time 5 https://api.ipify.org 2>/dev/null || true) - if [ -z "$ADVERTISE_ADDR" ]; then - log "WARNING: could not detect external IP, using 127.0.0.1" - ADVERTISE_ADDR="127.0.0.1" - fi - log "detected advertise_addr: $ADVERTISE_ADDR" -fi - # Download a GitHub release asset to a local file path # Usage: download_github_asset download_github_asset() { @@ -87,21 +68,25 @@ for a in data.get('assets', []): "https://api.github.com/repos/$owner/$repo/releases/assets/$asset_id" } -# Install swarm-db binary from GitHub Releases +# Install swarm-db binary from GitHub Releases (idempotent: skip if already installed) if [ -n "$SWARM_DB_TAG" ]; then - log "installing swarm-db $SWARM_DB_TAG..." - FILENAME="swarm-db-${SWARM_DB_TAG}-linux-amd64.tar.gz" - TMP=$(mktemp -d) - download_github_asset "Super-Protocol" "swarm-db" "$SWARM_DB_TAG" "$FILENAME" "$TMP/swarm-db.tar.gz" - tar xzf "$TMP/swarm-db.tar.gz" -C "$TMP" - install -m 755 "$TMP/swarm-db" /usr/local/bin/swarm-db-linux-amd64 - rm -rf "$TMP" - log "swarm-db $SWARM_DB_TAG installed" + if [ -f "/usr/local/bin/swarm-db-linux-amd64" ]; then + log "swarm-db already installed, skipping" + else + log "installing swarm-db $SWARM_DB_TAG..." + FILENAME="swarm-db-${SWARM_DB_TAG}-linux-amd64.tar.gz" + TMP=$(mktemp -d) + download_github_asset "Super-Protocol" "swarm-db" "$SWARM_DB_TAG" "$FILENAME" "$TMP/swarm-db.tar.gz" + tar xzf "$TMP/swarm-db.tar.gz" -C "$TMP" + install -m 755 "$TMP/swarm-db" /usr/local/bin/swarm-db-linux-amd64 + rm -rf "$TMP" + log "swarm-db $SWARM_DB_TAG installed" + fi else log "tags.swarm_db not set, using built-in swarm-db binary" fi -# Install provision-plugin-sdk from GitHub Releases +# Install provision-plugin-sdk from GitHub Releases (pip install is idempotent) if [ -n "$SDK_TAG" ]; then log "installing provision-plugin-sdk $SDK_TAG..." FILENAME="provision-plugin-sdk-${SDK_TAG}.tar.gz" @@ -115,37 +100,41 @@ else log "tags.sdk not set, using built-in provision-plugin-sdk" fi -# Install swarm-host-agent from GitHub Releases +# Install swarm-host-agent from GitHub Releases (idempotent: skip if already installed) # Tag format: "host-agent-vX.Y.Z" → release tag "release-vX.Y.Z" if [ -n "$HOST_AGENT_TAG" ]; then - log "installing swarm-host-agent $HOST_AGENT_TAG..." - if [[ "$HOST_AGENT_TAG" == release-* ]]; then - RELEASE_TAG="$HOST_AGENT_TAG" - elif [[ "$HOST_AGENT_TAG" == host-agent-* ]]; then - VERSION="${HOST_AGENT_TAG#host-agent-}" - RELEASE_TAG="release-$VERSION" + if [ -f "/usr/local/bin/swarm-host-agent" ]; then + log "swarm-host-agent already installed, skipping" else - RELEASE_TAG="release-$HOST_AGENT_TAG" + log "installing swarm-host-agent $HOST_AGENT_TAG..." + if [[ "$HOST_AGENT_TAG" == release-* ]]; then + RELEASE_TAG="$HOST_AGENT_TAG" + elif [[ "$HOST_AGENT_TAG" == host-agent-* ]]; then + VERSION="${HOST_AGENT_TAG#host-agent-}" + RELEASE_TAG="release-$VERSION" + else + RELEASE_TAG="release-$HOST_AGENT_TAG" + fi + FILENAME="swarm-host-agent-${RELEASE_TAG}-linux-amd64.tar.gz" + TMP=$(mktemp -d) + download_github_asset "Super-Protocol" "swarm-cloud" "$RELEASE_TAG" "$FILENAME" "$TMP/host-agent.tar.gz" + tar xzf "$TMP/host-agent.tar.gz" -C "$TMP" + EXTRACT_DIR=$(ls -1 "$TMP" | grep -v 'host-agent\.tar\.gz' | head -1) + install -m 755 "$TMP/$EXTRACT_DIR/swarm-host-agent" /usr/local/bin/swarm-host-agent + mkdir -p /etc/swarm + cp "$TMP/$EXTRACT_DIR/host-agent.yaml" /etc/swarm/host-agent.yaml + cp "$TMP/$EXTRACT_DIR/swarm-host-agent.service" /etc/systemd/system/swarm-host-agent.service + rm -rf "$TMP" + log "swarm-host-agent $RELEASE_TAG installed" + systemctl daemon-reload + systemctl enable swarm-host-agent.service fi - FILENAME="swarm-host-agent-${RELEASE_TAG}-linux-amd64.tar.gz" - TMP=$(mktemp -d) - download_github_asset "Super-Protocol" "swarm-cloud" "$RELEASE_TAG" "$FILENAME" "$TMP/host-agent.tar.gz" - tar xzf "$TMP/host-agent.tar.gz" -C "$TMP" - EXTRACT_DIR=$(ls -1 "$TMP" | grep -v 'host-agent\.tar\.gz' | head -1) - install -m 755 "$TMP/$EXTRACT_DIR/swarm-host-agent" /usr/local/bin/swarm-host-agent - mkdir -p /etc/swarm - cp "$TMP/$EXTRACT_DIR/host-agent.yaml" /etc/swarm/host-agent.yaml - cp "$TMP/$EXTRACT_DIR/swarm-host-agent.service" /etc/systemd/system/swarm-host-agent.service - rm -rf "$TMP" - log "swarm-host-agent $RELEASE_TAG installed" - systemctl daemon-reload - systemctl enable swarm-host-agent.service else log "ERROR: tags.host_agent is required" exit 1 fi -# Authenticate to ghcr.io for pulling swarm-node container image +# Authenticate to ghcr.io for pulling swarm-node container image (idempotent) if [ -n "$GITHUB_TOKEN" ]; then log "authenticating to ghcr.io..." echo "$GITHUB_TOKEN" | podman login ghcr.io -u oauth2 --password-stdin @@ -154,7 +143,7 @@ else log "WARNING: github.token not set, skipping ghcr.io login (image must be publicly accessible)" fi -# Generate /etc/swarm/swarm-node.env for swarm-node.service EnvironmentFile +# Generate /etc/swarm/swarm-node.env for swarm-node.service EnvironmentFile (idempotent) log "generating /etc/swarm/swarm-node.env..." mkdir -p /etc/swarm cat > /etc/swarm/swarm-node.env << EOF @@ -164,56 +153,7 @@ SWARM_CLOUD_UI_TAG=${SWARM_CLOUD_UI_TAG} AUTH_SERVICE_TAG=${AUTH_SERVICE_TAG} EOF -# Generate /etc/swarm-db/config.yaml from /sp/swarm/config.yaml parameters -log "generating /etc/swarm-db/config.yaml (node=$NODE_NAME, advertise=$ADVERTISE_ADDR)..." -mkdir -p /etc/swarm-db /var/lib/swarm-db - -NODE_NAME_VAL="$NODE_NAME" ADVERTISE_ADDR_VAL="$ADVERTISE_ADDR" \ -python3 - << 'PYEOF' -import yaml, os - -with open('/sp/swarm/config.yaml') as f: - swarm_cfg = yaml.safe_load(f) or {} - -join_addresses = (swarm_cfg.get('swarm_db') or {}).get('join_addresses') or [] - -config = { - 'node': { - 'name': os.environ['NODE_NAME_VAL'], - 'host': '0.0.0.0', - 'port': 8001, - 'data_dir': '/var/lib/swarm-db', - 'schema_file': '/etc/swarm-db/schema.yaml', - }, - 'memberlist': { - 'bind_addr': '0.0.0.0', - 'bind_port': 7946, - 'advertise_addr': os.environ['ADVERTISE_ADDR_VAL'], - 'advertise_port': 7946, - 'join_addresses': join_addresses, - 'gossip_interval': '200ms', - 'probe_interval': '1s', - 'probe_timeout': '500ms', - 'suspicion_max_time_multiplier': 6, - }, - 'sql': { - 'enabled': True, - 'host': '0.0.0.0', - 'port': 3306, - 'system_database': 'swarmdb', - }, - 'jq': { - 'enabled': True, - 'host': '0.0.0.0', - 'port': 8080, - }, -} - -with open('/etc/swarm-db/config.yaml', 'w') as f: - yaml.dump(config, f, default_flow_style=False) -PYEOF - -# Wait for swarm-db MySQL to become available, then insert SwarmSecrets +# Wait for swarm-db MySQL — fail (and let systemd restart us) if not available log "waiting for swarm-db MySQL to become available..." mysql_host="127.0.0.1" mysql_port="3306" @@ -226,18 +166,20 @@ while true; do fi elapsed=$(( $(date +%s) - start_ts )) if [ "$elapsed" -ge "$wait_timeout" ]; then - log "WARNING: MySQL not available after ${wait_timeout}s, skipping SwarmSecrets insertion" - break + log "ERROR: MySQL not available after ${wait_timeout}s, will retry" + exit 1 fi sleep 1 done +# Insert SwarmSecrets (idempotent: INSERT IGNORE skips existing keys) log "inserting SwarmSecrets into swarm-db..." AUTH_SERVICE_YAML="" AUTH_SERVICE_YAML_PATH="/sp/swarm/auth-service.yaml" [ -f "$AUTH_SERVICE_YAML_PATH" ] && AUTH_SERVICE_YAML=$(cat "$AUTH_SERVICE_YAML_PATH") # Generate RSA 4096 private key (PKCS8 PEM) for evidence signing. +# INSERT IGNORE ensures only the first run inserts it; subsequent runs are no-ops. # TODO: should we use subroot (intermediate CA) key hierarchy? log "generating evidence signing key (RSA 4096)..." EVIDENCE_SIGN_KEY=$(openssl genpkey -algorithm RSA -pkeyopt rsa_keygen_bits:4096 2>/dev/null) From 06e20be63d19123236e4a9b7d6ccc04365855f6a Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Wed, 4 Mar 2026 22:51:20 +0300 Subject: [PATCH 62/79] refactor: update swarm-node.service to use optional environment file and improve ExecStart command --- .../etc/systemd/system/swarm-node.service | 36 ++++++++++--------- .../files/scripts/setup_runtime_tools.sh | 2 +- 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/src/rootfs/files/configs/etc/systemd/system/swarm-node.service b/src/rootfs/files/configs/etc/systemd/system/swarm-node.service index 65853833..335ba07c 100644 --- a/src/rootfs/files/configs/etc/systemd/system/swarm-node.service +++ b/src/rootfs/files/configs/etc/systemd/system/swarm-node.service @@ -6,26 +6,28 @@ Requires=swarm-db.service swarm-host-agent.service [Service] Type=simple -EnvironmentFile=/etc/swarm/swarm-node.env +EnvironmentFile=-/etc/swarm/swarm-node.env ExecStartPre=-/usr/bin/podman stop swarm-node ExecStartPre=-/usr/bin/podman rm swarm-node ExecStartPre=mkdir -p /var/lib/swarm-node -ExecStart=/usr/bin/podman run \ - --name swarm-node \ - --rm \ - --network host \ - -v /etc/swarm-node:/etc/swarm-node:ro \ - -v /etc/swarm-cloud/services:/etc/swarm-cloud/services:ro \ - -v /var/lib/swarm-node:/var/lib/swarm-node \ - -v /var/run/swarm-agent.sock:/var/run/swarm-agent.sock \ - -e NODE_ENV=production \ - -e SWC_NODE_CONFIG_PATH=/etc/swarm-node/config.yaml \ - -e SWARM_CLOUD_API_TAG=$SWARM_CLOUD_API_TAG \ - -e SWARM_CLOUD_UI_TAG=$SWARM_CLOUD_UI_TAG \ - -e AUTH_SERVICE_TAG=$AUTH_SERVICE_TAG \ - -e SWARM_HOST_AGENT_SOCKET=/var/run/swarm-agent.sock \ - ghcr.io/super-protocol/swarm-cloud/swarm-node:$SWARM_NODE_TAG \ - apps/swarm-node/dist/main.js +ExecStart=/bin/bash -c '\ + test -n "${SWARM_NODE_TAG}" || { echo "SWARM_NODE_TAG not set, waiting for swarm-init"; exit 1; }; \ + exec /usr/bin/podman run \ + --name swarm-node \ + --rm \ + --network host \ + -v /etc/swarm-node:/etc/swarm-node:ro \ + -v /etc/swarm-cloud/services:/etc/swarm-cloud/services:ro \ + -v /var/lib/swarm-node:/var/lib/swarm-node \ + -v /var/run/swarm-agent.sock:/var/run/swarm-agent.sock \ + -e NODE_ENV=production \ + -e SWC_NODE_CONFIG_PATH=/etc/swarm-node/config.yaml \ + -e SWARM_CLOUD_API_TAG="${SWARM_CLOUD_API_TAG}" \ + -e SWARM_CLOUD_UI_TAG="${SWARM_CLOUD_UI_TAG}" \ + -e AUTH_SERVICE_TAG="${AUTH_SERVICE_TAG}" \ + -e SWARM_HOST_AGENT_SOCKET=/var/run/swarm-agent.sock \ + "ghcr.io/super-protocol/swarm-cloud/swarm-node:${SWARM_NODE_TAG}" \ + apps/swarm-node/dist/main.js' ExecStop=/usr/bin/podman stop swarm-node Restart=always RestartSec=10 diff --git a/src/rootfs/files/scripts/setup_runtime_tools.sh b/src/rootfs/files/scripts/setup_runtime_tools.sh index ab353cde..06b2a1c7 100644 --- a/src/rootfs/files/scripts/setup_runtime_tools.sh +++ b/src/rootfs/files/scripts/setup_runtime_tools.sh @@ -20,7 +20,7 @@ function setup_runtime_tools() { log_info "installing runtime packages into rootfs (python3, mysql client, openssl, netcat, dns tools)" chroot "${OUTPUTDIR}" /usr/bin/apt update chroot "${OUTPUTDIR}" /usr/bin/apt install -y --no-install-recommends \ - mysql-client python3 python3-pip openssl netcat-openbsd dnsutils + mysql-client python3 python3-pip openssl netcat-openbsd dnsutils nano chroot "${OUTPUTDIR}" /usr/bin/apt clean log_info "installing Python runtime dependencies" From e03ebb076ec8124d2312cc091be6bbb560107372 Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Thu, 5 Mar 2026 10:51:22 +0300 Subject: [PATCH 63/79] refactor: update swarm services handling and configuration paths in Dockerfile and scripts --- src/Dockerfile | 3 +- .../files/configs/etc/swarm-node/config.yaml | 2 +- .../systemd/system/swarm-cloud-api.service | 18 ----- .../etc/systemd/system/swarm-node.service | 3 +- src/rootfs/files/configs/sp/swarm/config.yaml | 1 + .../files/configs/usr/local/bin/swarm-init.sh | 71 +++++++++++++++++++ 6 files changed, 77 insertions(+), 21 deletions(-) delete mode 100644 src/rootfs/files/configs/etc/systemd/system/swarm-cloud-api.service diff --git a/src/Dockerfile b/src/Dockerfile index 9d806882..4ce6b0f6 100644 --- a/src/Dockerfile +++ b/src/Dockerfile @@ -241,6 +241,7 @@ RUN ln -sf /etc/systemd/system/swarm-init.service "${OUTPUTDIR}/etc/systemd/syst ADD rootfs/files/configs/etc/systemd/system/swarm-host-agent.service ${OUTPUTDIR}/etc/systemd/system/swarm-host-agent.service RUN mkdir -p "${OUTPUTDIR}/etc/swarm-service-launchers" +COPY swarm-scripts ${OUTPUTDIR}/etc/swarm-service-launchers/ # swarm one-shot services runner ADD rootfs/files/configs/etc/systemd/system/swarm-services.service ${OUTPUTDIR}/etc/systemd/system/swarm-services.service @@ -260,7 +261,7 @@ ADD rootfs/files/configs/usr/local/bin/kubectl ${OUTPUTDIR}/usr/local/bin/kubect RUN chmod +x ${OUTPUTDIR}/usr/local/bin/kubectl # swarm-db binary is downloaded at runtime by swarm-init (tags.swarm_db in /sp/swarm/config.yaml) # provision plugins at /etc/swarm-cloud/services are mounted into the swarm-node container at runtime -RUN mkdir -p "${OUTPUTDIR}/etc/swarm-db" "${OUTPUTDIR}/etc/swarm-cloud/services" +RUN mkdir -p "${OUTPUTDIR}/etc/swarm-db" "${OUTPUTDIR}/etc/swarm-services" COPY repos/swarm-cloud/apps/swarm-node-e2e/fixtures/schema.yaml ${OUTPUTDIR}/etc/swarm-db/schema.yaml diff --git a/src/rootfs/files/configs/etc/swarm-node/config.yaml b/src/rootfs/files/configs/etc/swarm-node/config.yaml index ea493a3f..a132b1ee 100644 --- a/src/rootfs/files/configs/etc/swarm-node/config.yaml +++ b/src/rootfs/files/configs/etc/swarm-node/config.yaml @@ -23,5 +23,5 @@ leaderElection: provision: enabled: true swarmDbApiUrl: "http://127.0.0.1:8080" - servicesDir: "/etc/swarm-cloud/services" + servicesDir: "/etc/swarm-services" localDbPath: "/var/lib/swarm-node/provision.db" diff --git a/src/rootfs/files/configs/etc/systemd/system/swarm-cloud-api.service b/src/rootfs/files/configs/etc/systemd/system/swarm-cloud-api.service deleted file mode 100644 index 412fe3a5..00000000 --- a/src/rootfs/files/configs/etc/systemd/system/swarm-cloud-api.service +++ /dev/null @@ -1,18 +0,0 @@ -[Unit] -Description=Swarm Cloud API service -After=network-online.target swarm-db.service -Wants=network-online.target -Requires=swarm-db.service - -[Service] -Type=simple -WorkingDirectory=/usr/local/lib/swarm-cloud -ExecStart=/usr/local/bin/swarm-cloud-api.sh -StandardOutput=append:/var/log/swarm-cloud-api.log -StandardError=append:/var/log/swarm-cloud-api-err.log -Restart=always -RestartSec=5 -Environment=NODE_ENV=production - -[Install] -WantedBy=multi-user.target diff --git a/src/rootfs/files/configs/etc/systemd/system/swarm-node.service b/src/rootfs/files/configs/etc/systemd/system/swarm-node.service index 335ba07c..bb1c58a4 100644 --- a/src/rootfs/files/configs/etc/systemd/system/swarm-node.service +++ b/src/rootfs/files/configs/etc/systemd/system/swarm-node.service @@ -10,6 +10,7 @@ EnvironmentFile=-/etc/swarm/swarm-node.env ExecStartPre=-/usr/bin/podman stop swarm-node ExecStartPre=-/usr/bin/podman rm swarm-node ExecStartPre=mkdir -p /var/lib/swarm-node +ExecStartPre=chown -R 1001:1001 /var/lib/swarm-node ExecStart=/bin/bash -c '\ test -n "${SWARM_NODE_TAG}" || { echo "SWARM_NODE_TAG not set, waiting for swarm-init"; exit 1; }; \ exec /usr/bin/podman run \ @@ -17,7 +18,7 @@ ExecStart=/bin/bash -c '\ --rm \ --network host \ -v /etc/swarm-node:/etc/swarm-node:ro \ - -v /etc/swarm-cloud/services:/etc/swarm-cloud/services:ro \ + -v /etc/swarm-services:/etc/swarm-services:ro \ -v /var/lib/swarm-node:/var/lib/swarm-node \ -v /var/run/swarm-agent.sock:/var/run/swarm-agent.sock \ -e NODE_ENV=production \ diff --git a/src/rootfs/files/configs/sp/swarm/config.yaml b/src/rootfs/files/configs/sp/swarm/config.yaml index 0fc5ebb6..aa0f410b 100644 --- a/src/rootfs/files/configs/sp/swarm/config.yaml +++ b/src/rootfs/files/configs/sp/swarm/config.yaml @@ -6,6 +6,7 @@ tags: host_agent: "" # e.g. "host-agent-v1.0.0" — required; downloads binary + service + config swarm_node: "" # e.g. "v1.2.3" — Docker image tag for ghcr.io/.../swarm-node sdk: "" # e.g. "v1.2.3" — downloads and replaces built-in SDK; empty = use built-in + services: "" # e.g. "v1.2.3" — downloads all service .zip archives into /etc/swarm-services swarm_cloud_api: "" # passed as env var to swarm-node container swarm_cloud_ui: "" # passed as env var to swarm-node container auth_service: "" # passed as env var to swarm-node container diff --git a/src/rootfs/files/configs/usr/local/bin/swarm-init.sh b/src/rootfs/files/configs/usr/local/bin/swarm-init.sh index b3a00ee8..61647183 100644 --- a/src/rootfs/files/configs/usr/local/bin/swarm-init.sh +++ b/src/rootfs/files/configs/usr/local/bin/swarm-init.sh @@ -23,6 +23,7 @@ SWARM_DB_TAG=$(cfg "tags.swarm_db") HOST_AGENT_TAG=$(cfg "tags.host_agent") SWARM_NODE_TAG=$(cfg "tags.swarm_node") SDK_TAG=$(cfg "tags.sdk") +SERVICES_TAG=$(cfg "tags.services") SWARM_CLOUD_API_TAG=$(cfg "tags.swarm_cloud_api") SWARM_CLOUD_UI_TAG=$(cfg "tags.swarm_cloud_ui") AUTH_SERVICE_TAG=$(cfg "tags.auth_service") @@ -100,6 +101,76 @@ else log "tags.sdk not set, using built-in provision-plugin-sdk" fi +# Download swarm-services from GitHub Release into /etc/swarm-services (always overwrite) +if [ -n "$SERVICES_TAG" ]; then + log "downloading swarm-services $SERVICES_TAG..." + TMP=$(mktemp -d) + REL_FILE=$(mktemp) + auth_curl_args=() + [ -n "$GITHUB_TOKEN" ] && auth_curl_args=(-H "Authorization: token $GITHUB_TOKEN") + + if ! curl -sf "${auth_curl_args[@]}" \ + "https://api.github.com/repos/Super-Protocol/swarm-cloud/releases/tags/$SERVICES_TAG" \ + -o "$REL_FILE"; then + rm -f "$REL_FILE" + log "ERROR: failed to fetch release info for swarm-services $SERVICES_TAG" + exit 1 + fi + + GITHUB_TOKEN="$GITHUB_TOKEN" REL_FILE="$REL_FILE" TMP_DIR="$TMP" \ + python3 - << 'PYEOF' +import json, os, subprocess, re, zipfile + +github_token = os.environ.get('GITHUB_TOKEN', '') +rel_file = os.environ['REL_FILE'] +tmp_dir = os.environ['TMP_DIR'] +services_dir = '/etc/swarm-services' + +with open(rel_file) as f: + data = json.load(f) +os.unlink(rel_file) + +os.makedirs(services_dir, exist_ok=True) +auth_headers = ['-H', f'Authorization: token {github_token}'] if github_token else [] + +for asset in data.get('assets', []): + name = asset['name'] + if not name.endswith('.zip'): + continue + asset_id = asset['id'] + service_name = re.sub(r'^(.+?)-v[\d][^/]*\.zip$', r'\1', name) + dest = os.path.join(tmp_dir, name) + + subprocess.run( + ['curl', '-sfL'] + auth_headers + [ + '-H', 'Accept: application/octet-stream', + '-o', dest, + f'https://api.github.com/repos/Super-Protocol/swarm-cloud/releases/assets/{asset_id}', + ], + check=True, + ) + + svc_dir = os.path.join(services_dir, service_name) + os.makedirs(svc_dir, exist_ok=True) + with zipfile.ZipFile(dest, 'r') as zf: + zf.extractall(svc_dir) + + if not os.path.exists(os.path.join(svc_dir, 'manifest.yaml')): + print(f'ERROR: manifest.yaml not found in {service_name}', flush=True) + raise SystemExit(1) + + main_py = os.path.join(svc_dir, 'main.py') + if os.path.exists(main_py): + os.chmod(main_py, 0o755) + + print(f'installed service: {service_name}', flush=True) +PYEOF + rm -rf "$TMP" + log "swarm-services $SERVICES_TAG installed" +else + log "tags.services not set, skipping swarm-services download" +fi + # Install swarm-host-agent from GitHub Releases (idempotent: skip if already installed) # Tag format: "host-agent-vX.Y.Z" → release tag "release-vX.Y.Z" if [ -n "$HOST_AGENT_TAG" ]; then From fc8817ad6ca077312216a05efe6e1b44835e7583 Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Thu, 5 Mar 2026 11:06:33 +0300 Subject: [PATCH 64/79] refactor: update service paths from /etc/swarm-cloud to /etc/swarm-services in multiple setup scripts --- src/swarm-scripts/10.setup-wireguard.sh | 4 ++-- src/swarm-scripts/20.setup-hw-measurement.sh | 6 +++--- src/swarm-scripts/30.setup-latency-measurement.sh | 6 +++--- src/swarm-scripts/40.setup-geo-ip-measurement.sh | 6 +++--- src/swarm-scripts/50.setup-rke2.sh | 6 +++--- src/swarm-scripts/60.setup-redis.sh | 6 +++--- src/swarm-scripts/61.setup-cockroachdb.sh | 6 +++--- src/swarm-scripts/62.setup-knot.sh | 6 +++--- src/swarm-scripts/63.setup-openresty.sh | 6 +++--- src/swarm-scripts/64.setup-swarm-cloud-api.sh | 8 ++++---- src/swarm-scripts/71.setup-auth-service.sh | 4 ++-- src/swarm-scripts/72.setup-domain-initializer.sh | 4 ++-- src/swarm-scripts/73.setup-route-manager.sh | 4 ++-- 13 files changed, 36 insertions(+), 36 deletions(-) diff --git a/src/swarm-scripts/10.setup-wireguard.sh b/src/swarm-scripts/10.setup-wireguard.sh index 990608d7..5c2fab3d 100644 --- a/src/swarm-scripts/10.setup-wireguard.sh +++ b/src/swarm-scripts/10.setup-wireguard.sh @@ -16,8 +16,8 @@ CLUSTER_POLICY=${CLUSTER_POLICY:-wireguard} CLUSTER_ID=${CLUSTER_ID:-wireguard} # Path to manifest file INSIDE the container (configs are mounted to /configs) -MANIFEST_PATH=${MANIFEST_PATH:-/etc/swarm-cloud/services/${SERVICE_NAME}/manifest.yaml} -LOCATION_PATH=${LOCATION_PATH:-/etc/swarm-cloud/services/${SERVICE_NAME}} +MANIFEST_PATH=${MANIFEST_PATH:-/etc/swarm-services/${SERVICE_NAME}/manifest.yaml} +LOCATION_PATH=${LOCATION_PATH:-/etc/swarm-services/${SERVICE_NAME}} if [ ! -f "$MANIFEST_PATH" ]; then echo "Manifest not found at: $MANIFEST_PATH" >&2 diff --git a/src/swarm-scripts/20.setup-hw-measurement.sh b/src/swarm-scripts/20.setup-hw-measurement.sh index 00ebc94f..49cdbf08 100644 --- a/src/swarm-scripts/20.setup-hw-measurement.sh +++ b/src/swarm-scripts/20.setup-hw-measurement.sh @@ -6,7 +6,7 @@ set -euo pipefail # # Note: # - The hw-measurement manifest and main.py should be available inside the container at: -# /etc/swarm-cloud/services/hw-measurement/manifest.yaml and /etc/swarm-cloud/services/hw-measurement/main.py +# /etc/swarm-services/hw-measurement/manifest.yaml and /etc/swarm-services/hw-measurement/main.py # (mount or copy them similarly to the wireguard service) # # - hw-measurement depends on a WireGuard cluster existing and sharing nodes with it. @@ -24,8 +24,8 @@ CLUSTER_POLICY=${CLUSTER_POLICY:-hw-measurement} CLUSTER_ID=${CLUSTER_ID:-hw-measurement} # Path to manifest file INSIDE the container (configs are mounted to /configs) -MANIFEST_PATH=${MANIFEST_PATH:-/etc/swarm-cloud/services/${SERVICE_NAME}/manifest.yaml} -LOCATION_PATH=${LOCATION_PATH:-/etc/swarm-cloud/services/${SERVICE_NAME}} +MANIFEST_PATH=${MANIFEST_PATH:-/etc/swarm-services/${SERVICE_NAME}/manifest.yaml} +LOCATION_PATH=${LOCATION_PATH:-/etc/swarm-services/${SERVICE_NAME}} SERVICE_PK="${CLUSTER_POLICY}:${SERVICE_NAME}" if [ ! -f "$MANIFEST_PATH" ]; then diff --git a/src/swarm-scripts/30.setup-latency-measurement.sh b/src/swarm-scripts/30.setup-latency-measurement.sh index f7c0d93e..f0b1ff87 100644 --- a/src/swarm-scripts/30.setup-latency-measurement.sh +++ b/src/swarm-scripts/30.setup-latency-measurement.sh @@ -6,7 +6,7 @@ set -euo pipefail # # Note: # - The latency-measurement manifest and main.py should be available inside the container at: -# /etc/swarm-cloud/services/latency-measurement/manifest.yaml and /etc/swarm-cloud/services/latency-measurement/main.py +# /etc/swarm-services/latency-measurement/manifest.yaml and /etc/swarm-services/latency-measurement/main.py # (mount or copy them similarly to the wireguard service) # # - latency-measurement depends on a WireGuard cluster existing and sharing nodes with it. @@ -24,8 +24,8 @@ CLUSTER_POLICY=${CLUSTER_POLICY:-latency-measurement} CLUSTER_ID=${CLUSTER_ID:-latency-measurement} # Path to manifest file INSIDE the container (configs are mounted to /configs) -MANIFEST_PATH=${MANIFEST_PATH:-/etc/swarm-cloud/services/${SERVICE_NAME}/manifest.yaml} -LOCATION_PATH=${LOCATION_PATH:-/etc/swarm-cloud/services/${SERVICE_NAME}} +MANIFEST_PATH=${MANIFEST_PATH:-/etc/swarm-services/${SERVICE_NAME}/manifest.yaml} +LOCATION_PATH=${LOCATION_PATH:-/etc/swarm-services/${SERVICE_NAME}} SERVICE_PK="${CLUSTER_POLICY}:${SERVICE_NAME}" if [ ! -f "$MANIFEST_PATH" ]; then diff --git a/src/swarm-scripts/40.setup-geo-ip-measurement.sh b/src/swarm-scripts/40.setup-geo-ip-measurement.sh index 6dfe6cf5..b1741db0 100644 --- a/src/swarm-scripts/40.setup-geo-ip-measurement.sh +++ b/src/swarm-scripts/40.setup-geo-ip-measurement.sh @@ -6,7 +6,7 @@ set -euo pipefail # # Note: # - The geo-ip-measurement manifest and main.py should be available inside the container at: -# /etc/swarm-cloud/services/geo-ip-measurement/manifest.yaml and /etc/swarm-cloud/services/geo-ip-measurement/main.py +# /etc/swarm-services/geo-ip-measurement/manifest.yaml and /etc/swarm-services/geo-ip-measurement/main.py # (mount or copy them similarly to the wireguard service) # # - geo-ip-measurement depends on a WireGuard cluster existing and sharing nodes with it. @@ -24,8 +24,8 @@ CLUSTER_POLICY=${CLUSTER_POLICY:-geo-ip-measurement} CLUSTER_ID=${CLUSTER_ID:-geo-ip-measurement} # Path to manifest file INSIDE the container (configs are mounted to /configs) -MANIFEST_PATH=${MANIFEST_PATH:-/etc/swarm-cloud/services/${SERVICE_NAME}/manifest.yaml} -LOCATION_PATH=${LOCATION_PATH:-/etc/swarm-cloud/services/${SERVICE_NAME}} +MANIFEST_PATH=${MANIFEST_PATH:-/etc/swarm-services/${SERVICE_NAME}/manifest.yaml} +LOCATION_PATH=${LOCATION_PATH:-/etc/swarm-services/${SERVICE_NAME}} SERVICE_PK="${CLUSTER_POLICY}:${SERVICE_NAME}" if [ ! -f "$MANIFEST_PATH" ]; then diff --git a/src/swarm-scripts/50.setup-rke2.sh b/src/swarm-scripts/50.setup-rke2.sh index 477a45e3..2831ac85 100644 --- a/src/swarm-scripts/50.setup-rke2.sh +++ b/src/swarm-scripts/50.setup-rke2.sh @@ -6,7 +6,7 @@ set -euo pipefail # # Note: # - The rke2 manifest and main.py should be available inside the container at: -# /etc/swarm-cloud/services/rke2/manifest.yaml and /etc/swarm-cloud/services/rke2/main.py +# /etc/swarm-services/rke2/manifest.yaml and /etc/swarm-services/rke2/main.py # (mount or copy them similarly to the wireguard service) # # - rke2 depends on a WireGuard cluster existing and sharing nodes with it. @@ -24,8 +24,8 @@ CLUSTER_POLICY=${CLUSTER_POLICY:-rke2} CLUSTER_ID=${CLUSTER_ID:-rke2} # Path to manifest file INSIDE the container (configs are mounted to /configs) -MANIFEST_PATH=${MANIFEST_PATH:-/etc/swarm-cloud/services/${SERVICE_NAME}/manifest.yaml} -LOCATION_PATH=${LOCATION_PATH:-/etc/swarm-cloud/services/${SERVICE_NAME}} +MANIFEST_PATH=${MANIFEST_PATH:-/etc/swarm-services/${SERVICE_NAME}/manifest.yaml} +LOCATION_PATH=${LOCATION_PATH:-/etc/swarm-services/${SERVICE_NAME}} SERVICE_PK="${CLUSTER_POLICY}:${SERVICE_NAME}" if [ ! -f "$MANIFEST_PATH" ]; then diff --git a/src/swarm-scripts/60.setup-redis.sh b/src/swarm-scripts/60.setup-redis.sh index 352fe0c5..8824ffad 100644 --- a/src/swarm-scripts/60.setup-redis.sh +++ b/src/swarm-scripts/60.setup-redis.sh @@ -6,7 +6,7 @@ set -euo pipefail # # Note: # - The redis manifest and main.py are provided by the image at: -# /etc/swarm-cloud/services/redis/{manifest.yaml, main.py} +# /etc/swarm-services/redis/{manifest.yaml, main.py} # This script only registers service records in SwarmDB. # - redis depends on a WireGuard cluster existing and sharing nodes with it. # When bootstrapping WireGuard, prefer ClusterPolicy id 'wireguard' to match redis's stateExpr. @@ -28,9 +28,9 @@ SENTINEL_CLUSTER_POLICY=${SENTINEL_CLUSTER_POLICY:-redis-sentinel} SENTINEL_MAX_SIZE=${SENTINEL_MAX_SIZE:-3} # Location stored in ClusterServices; must exist on all nodes (baked into image) -REDIS_LOCATION_PATH=${REDIS_LOCATION_PATH:-/etc/swarm-cloud/services/${REDIS_SERVICE_NAME}} +REDIS_LOCATION_PATH=${REDIS_LOCATION_PATH:-/etc/swarm-services/${REDIS_SERVICE_NAME}} REDIS_MANIFEST_PATH=${REDIS_MANIFEST_PATH:-${REDIS_LOCATION_PATH}/manifest.yaml} -SENTINEL_LOCATION_PATH=${SENTINEL_LOCATION_PATH:-/etc/swarm-cloud/services/${SENTINEL_SERVICE_NAME}} +SENTINEL_LOCATION_PATH=${SENTINEL_LOCATION_PATH:-/etc/swarm-services/${SENTINEL_SERVICE_NAME}} SENTINEL_MANIFEST_PATH=${SENTINEL_MANIFEST_PATH:-${SENTINEL_LOCATION_PATH}/manifest.yaml} REDIS_SERVICE_PK="${REDIS_CLUSTER_POLICY}:${REDIS_SERVICE_NAME}" SENTINEL_SERVICE_PK="${SENTINEL_CLUSTER_POLICY}:${SENTINEL_SERVICE_NAME}" diff --git a/src/swarm-scripts/61.setup-cockroachdb.sh b/src/swarm-scripts/61.setup-cockroachdb.sh index 37cf2d3d..56a74c40 100644 --- a/src/swarm-scripts/61.setup-cockroachdb.sh +++ b/src/swarm-scripts/61.setup-cockroachdb.sh @@ -6,7 +6,7 @@ set -euo pipefail # # Notes: # - The cockroachdb manifest and main.py are provided by the image at: -# /etc/swarm-cloud/services/cockroachdb/{manifest.yaml, main.py} +# /etc/swarm-services/cockroachdb/{manifest.yaml, main.py} # We do not reimplement any logic here, only register ClusterPolicy and ClusterService. # - cockroachdb depends on WireGuard as expressed in its own manifest and provision plugin. # @@ -24,8 +24,8 @@ CLUSTER_ID=${CLUSTER_ID:-cockroachdb} # Location and manifest inside the container. # IMPORTANT: This script runs only on one node. All nodes must have the same location available already -# (baked into the image), so we point to /etc/swarm-cloud/services/${SERVICE_NAME}. -LOCATION_PATH=${LOCATION_PATH:-/etc/swarm-cloud/services/${SERVICE_NAME}} +# (baked into the image), so we point to /etc/swarm-services/${SERVICE_NAME}. +LOCATION_PATH=${LOCATION_PATH:-/etc/swarm-services/${SERVICE_NAME}} MANIFEST_PATH=${MANIFEST_PATH:-${LOCATION_PATH}/manifest.yaml} SERVICE_PK="${CLUSTER_POLICY}:${SERVICE_NAME}" diff --git a/src/swarm-scripts/62.setup-knot.sh b/src/swarm-scripts/62.setup-knot.sh index b370b743..5b36f5d0 100644 --- a/src/swarm-scripts/62.setup-knot.sh +++ b/src/swarm-scripts/62.setup-knot.sh @@ -6,7 +6,7 @@ set -euo pipefail # # Notes: # - The knot manifest and main.py are provided by the image at: -# /etc/swarm-cloud/services/knot/{manifest.yaml, main.py} +# /etc/swarm-services/knot/{manifest.yaml, main.py} # We do not reimplement any logic here, only register ClusterPolicy and ClusterService. # - knot depends on WireGuard as expressed in its own manifest and provision plugin. # @@ -24,8 +24,8 @@ CLUSTER_ID=${CLUSTER_ID:-knot} # Location and manifest inside the container. # IMPORTANT: This script runs only on one node. All nodes must have the same location available already -# (baked into the image), so we point to /etc/swarm-cloud/services/${SERVICE_NAME}. -LOCATION_PATH=${LOCATION_PATH:-/etc/swarm-cloud/services/${SERVICE_NAME}} +# (baked into the image), so we point to /etc/swarm-services/${SERVICE_NAME}. +LOCATION_PATH=${LOCATION_PATH:-/etc/swarm-services/${SERVICE_NAME}} MANIFEST_PATH=${MANIFEST_PATH:-${LOCATION_PATH}/manifest.yaml} SERVICE_PK="${CLUSTER_POLICY}:${SERVICE_NAME}" diff --git a/src/swarm-scripts/63.setup-openresty.sh b/src/swarm-scripts/63.setup-openresty.sh index 47bdcb8b..a2bc0d87 100644 --- a/src/swarm-scripts/63.setup-openresty.sh +++ b/src/swarm-scripts/63.setup-openresty.sh @@ -6,7 +6,7 @@ set -euo pipefail # # Note: # - The openresty manifest and main.py are provided by the image at: -# /etc/swarm-cloud/services/openresty/{manifest.yaml, main.py} +# /etc/swarm-services/openresty/{manifest.yaml, main.py} # This script only registers service records in SwarmDB. # - openresty depends on Redis + WireGuard clusters (see its stateExpr). # @@ -24,8 +24,8 @@ CLUSTER_ID=${CLUSTER_ID:-openresty} # Location and manifest inside the container. # IMPORTANT: This script runs only on one node. All nodes must have the same location available already -# (baked into the image), so we point to /etc/swarm-cloud/services/${SERVICE_NAME}. -LOCATION_PATH=${LOCATION_PATH:-/etc/swarm-cloud/services/${SERVICE_NAME}} +# (baked into the image), so we point to /etc/swarm-services/${SERVICE_NAME}. +LOCATION_PATH=${LOCATION_PATH:-/etc/swarm-services/${SERVICE_NAME}} MANIFEST_PATH=${MANIFEST_PATH:-${LOCATION_PATH}/manifest.yaml} SERVICE_PK="${CLUSTER_POLICY}:${SERVICE_NAME}" diff --git a/src/swarm-scripts/64.setup-swarm-cloud-api.sh b/src/swarm-scripts/64.setup-swarm-cloud-api.sh index 96fd9e5e..4ad8aa72 100644 --- a/src/swarm-scripts/64.setup-swarm-cloud-api.sh +++ b/src/swarm-scripts/64.setup-swarm-cloud-api.sh @@ -7,7 +7,7 @@ set -euo pipefail # # Notes: # - The swarm-cloud-api manifest and main.py are provided by the image at: -# /etc/swarm-cloud/services/swarm-cloud-api/{manifest.yaml, main.py} +# /etc/swarm-services/swarm-cloud-api/{manifest.yaml, main.py} # We do not reimplement any logic here, only register ClusterPolicy and ClusterService. # - swarm-cloud-api depends on CockroachDB, Redis, WireGuard and Knot as expressed # in its own manifest and provision plugin. @@ -34,12 +34,12 @@ UI_CLUSTER_ID=${UI_CLUSTER_ID:-swarm-cloud-ui} # Location and manifest inside the container. # IMPORTANT: This script runs only on one node. All nodes must have the same location available already -# (baked into the image), so we point to /etc/swarm-cloud/services/${SERVICE_NAME}. -LOCATION_PATH=${LOCATION_PATH:-/etc/swarm-cloud/services/${SERVICE_NAME}} +# (baked into the image), so we point to /etc/swarm-services/${SERVICE_NAME}. +LOCATION_PATH=${LOCATION_PATH:-/etc/swarm-services/${SERVICE_NAME}} MANIFEST_PATH=${MANIFEST_PATH:-${LOCATION_PATH}/manifest.yaml} SERVICE_PK="${CLUSTER_POLICY}:${SERVICE_NAME}" -UI_LOCATION_PATH=${UI_LOCATION_PATH:-/etc/swarm-cloud/services/${UI_SERVICE_NAME}} +UI_LOCATION_PATH=${UI_LOCATION_PATH:-/etc/swarm-services/${UI_SERVICE_NAME}} UI_MANIFEST_PATH=${UI_MANIFEST_PATH:-${UI_LOCATION_PATH}/manifest.yaml} UI_SERVICE_PK="${UI_CLUSTER_POLICY}:${UI_SERVICE_NAME}" diff --git a/src/swarm-scripts/71.setup-auth-service.sh b/src/swarm-scripts/71.setup-auth-service.sh index eaf24dff..816bedc2 100644 --- a/src/swarm-scripts/71.setup-auth-service.sh +++ b/src/swarm-scripts/71.setup-auth-service.sh @@ -26,9 +26,9 @@ CLUSTER_ID=${CLUSTER_ID:-auth-service} # Location stored in ClusterServices; must exist on all nodes. # The service provisioner (manifest.yaml + main.py) is baked into the image under -# /etc/swarm-cloud/services/${SERVICE_NAME}. The application payload lives under +# /etc/swarm-services/${SERVICE_NAME}. The application payload lives under # /etc/auth-service and is referenced by the provisioner. -LOCATION_PATH=${LOCATION_PATH:-/etc/swarm-cloud/services/${SERVICE_NAME}} +LOCATION_PATH=${LOCATION_PATH:-/etc/swarm-services/${SERVICE_NAME}} MANIFEST_PATH=${MANIFEST_PATH:-${LOCATION_PATH}/manifest.yaml} SERVICE_PK="${CLUSTER_POLICY}:${SERVICE_NAME}" diff --git a/src/swarm-scripts/72.setup-domain-initializer.sh b/src/swarm-scripts/72.setup-domain-initializer.sh index 70ca54b6..b7bbe465 100644 --- a/src/swarm-scripts/72.setup-domain-initializer.sh +++ b/src/swarm-scripts/72.setup-domain-initializer.sh @@ -25,8 +25,8 @@ CLUSTER_ID=${CLUSTER_ID:-domain-initializer} # Location stored in ClusterServices; must exist on all nodes. # The service provisioner (manifest.yaml + main.py) is baked into the image under -# /etc/swarm-cloud/services/${SERVICE_NAME}. -LOCATION_PATH=${LOCATION_PATH:-/etc/swarm-cloud/services/${SERVICE_NAME}} +# /etc/swarm-services/${SERVICE_NAME}. +LOCATION_PATH=${LOCATION_PATH:-/etc/swarm-services/${SERVICE_NAME}} MANIFEST_PATH=${MANIFEST_PATH:-${LOCATION_PATH}/manifest.yaml} SERVICE_PK="${CLUSTER_POLICY}:${SERVICE_NAME}" diff --git a/src/swarm-scripts/73.setup-route-manager.sh b/src/swarm-scripts/73.setup-route-manager.sh index 84b96084..696be333 100644 --- a/src/swarm-scripts/73.setup-route-manager.sh +++ b/src/swarm-scripts/73.setup-route-manager.sh @@ -25,8 +25,8 @@ CLUSTER_ID=${CLUSTER_ID:-route-manager} # Location stored in ClusterServices; must exist on all nodes. # The service provisioner (manifest.yaml + main.py) is baked into the image under -# /etc/swarm-cloud/services/${SERVICE_NAME}. -LOCATION_PATH=${LOCATION_PATH:-/etc/swarm-cloud/services/${SERVICE_NAME}} +# /etc/swarm-services/${SERVICE_NAME}. +LOCATION_PATH=${LOCATION_PATH:-/etc/swarm-services/${SERVICE_NAME}} MANIFEST_PATH=${MANIFEST_PATH:-${LOCATION_PATH}/manifest.yaml} SERVICE_PK="${CLUSTER_POLICY}:${SERVICE_NAME}" From 51628056c1bf2c445c1aa6c704a99567e8911abd Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Thu, 5 Mar 2026 13:13:09 +0300 Subject: [PATCH 65/79] refactor: add ncurses-term package to runtime tools installation --- src/rootfs/files/scripts/setup_runtime_tools.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rootfs/files/scripts/setup_runtime_tools.sh b/src/rootfs/files/scripts/setup_runtime_tools.sh index 06b2a1c7..b256d09d 100644 --- a/src/rootfs/files/scripts/setup_runtime_tools.sh +++ b/src/rootfs/files/scripts/setup_runtime_tools.sh @@ -20,7 +20,7 @@ function setup_runtime_tools() { log_info "installing runtime packages into rootfs (python3, mysql client, openssl, netcat, dns tools)" chroot "${OUTPUTDIR}" /usr/bin/apt update chroot "${OUTPUTDIR}" /usr/bin/apt install -y --no-install-recommends \ - mysql-client python3 python3-pip openssl netcat-openbsd dnsutils nano + mysql-client python3 python3-pip openssl netcat-openbsd dnsutils nano ncurses-term chroot "${OUTPUTDIR}" /usr/bin/apt clean log_info "installing Python runtime dependencies" From caa45433edfbd3ce191acd26f49eab530bff12ef Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Thu, 5 Mar 2026 13:29:17 +0300 Subject: [PATCH 66/79] refactor: update swarm-services.service to include swarm-init.service in dependencies --- .../files/configs/etc/systemd/system/swarm-services.service | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/rootfs/files/configs/etc/systemd/system/swarm-services.service b/src/rootfs/files/configs/etc/systemd/system/swarm-services.service index 456995d2..11e37e10 100644 --- a/src/rootfs/files/configs/etc/systemd/system/swarm-services.service +++ b/src/rootfs/files/configs/etc/systemd/system/swarm-services.service @@ -1,7 +1,7 @@ [Unit] Description=Run Swarm setup scripts from /etc/swarm-service-launchers -After=network-online.target swarm-db.service swarm-node.service download-sp-swarm-services.service -Wants=network-online.target +After=network-online.target swarm-db.service swarm-node.service swarm-init.service download-sp-swarm-services.service +Wants=network-online.target swarm-init.service Requires=swarm-db.service swarm-node.service ConditionPathExists=/etc/swarm-service-launchers From f5542f3ea68106d1519c8559a53f154f5e76c755 Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Thu, 5 Mar 2026 14:30:21 +0300 Subject: [PATCH 67/79] refactor: remove unnecessary dependency on download-sp-swarm-services.service in swarm-services.service --- .../files/configs/etc/systemd/system/swarm-services.service | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rootfs/files/configs/etc/systemd/system/swarm-services.service b/src/rootfs/files/configs/etc/systemd/system/swarm-services.service index 11e37e10..f9d589e9 100644 --- a/src/rootfs/files/configs/etc/systemd/system/swarm-services.service +++ b/src/rootfs/files/configs/etc/systemd/system/swarm-services.service @@ -1,6 +1,6 @@ [Unit] Description=Run Swarm setup scripts from /etc/swarm-service-launchers -After=network-online.target swarm-db.service swarm-node.service swarm-init.service download-sp-swarm-services.service +After=network-online.target swarm-db.service swarm-node.service swarm-init.service Wants=network-online.target swarm-init.service Requires=swarm-db.service swarm-node.service ConditionPathExists=/etc/swarm-service-launchers From 097b497506d8c7f47a8cea84fd4afda840d47103 Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Thu, 5 Mar 2026 14:51:57 +0300 Subject: [PATCH 68/79] refactor: change service type from simple to oneshot in swarm-init.service --- src/rootfs/files/configs/etc/systemd/system/swarm-init.service | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rootfs/files/configs/etc/systemd/system/swarm-init.service b/src/rootfs/files/configs/etc/systemd/system/swarm-init.service index 34368a26..4169fc1d 100644 --- a/src/rootfs/files/configs/etc/systemd/system/swarm-init.service +++ b/src/rootfs/files/configs/etc/systemd/system/swarm-init.service @@ -4,7 +4,7 @@ After=network-online.target swarm-db.service Wants=network-online.target swarm-db.service [Service] -Type=simple +Type=oneshot ExecStart=/bin/bash /usr/local/bin/swarm-init.sh Restart=on-failure RestartSec=10 From 2fd0011813d9412acb26f3aa3aeef1769ec3a68a Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Fri, 6 Mar 2026 11:48:57 +0300 Subject: [PATCH 69/79] fi: use envs for swarm-host-agent --- .../configs/etc/systemd/system/swarm-host-agent.service | 1 + src/rootfs/files/configs/usr/local/bin/swarm-init.sh | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/src/rootfs/files/configs/etc/systemd/system/swarm-host-agent.service b/src/rootfs/files/configs/etc/systemd/system/swarm-host-agent.service index 000ebb6e..445f12de 100644 --- a/src/rootfs/files/configs/etc/systemd/system/swarm-host-agent.service +++ b/src/rootfs/files/configs/etc/systemd/system/swarm-host-agent.service @@ -5,6 +5,7 @@ Requires=swarm-init.service [Service] Type=simple +EnvironmentFile=/etc/swarm/swarm-host-agent.env ExecStart=/usr/local/bin/swarm-host-agent Restart=always RestartSec=5 diff --git a/src/rootfs/files/configs/usr/local/bin/swarm-init.sh b/src/rootfs/files/configs/usr/local/bin/swarm-init.sh index 61647183..2411b290 100644 --- a/src/rootfs/files/configs/usr/local/bin/swarm-init.sh +++ b/src/rootfs/files/configs/usr/local/bin/swarm-init.sh @@ -219,6 +219,11 @@ log "generating /etc/swarm/swarm-node.env..." mkdir -p /etc/swarm cat > /etc/swarm/swarm-node.env << EOF SWARM_NODE_TAG=${SWARM_NODE_TAG} +EOF + +# Generate /etc/swarm/swarm-host-agent.env for swarm-host-agent.service EnvironmentFile (idempotent) +log "generating /etc/swarm/swarm-host-agent.env..." +cat > /etc/swarm/swarm-host-agent.env << EOF SWARM_CLOUD_API_TAG=${SWARM_CLOUD_API_TAG} SWARM_CLOUD_UI_TAG=${SWARM_CLOUD_UI_TAG} AUTH_SERVICE_TAG=${AUTH_SERVICE_TAG} From 98457be5b6332f5754cad60e0f30321bf249ec8b Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Fri, 6 Mar 2026 15:17:31 +0300 Subject: [PATCH 70/79] refactor: replace ADD with COPY for swarm-host-agent.service in Dockerfile --- src/Dockerfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Dockerfile b/src/Dockerfile index 4ce6b0f6..c4f58355 100644 --- a/src/Dockerfile +++ b/src/Dockerfile @@ -177,8 +177,8 @@ ADD rootfs/files/configs/tdx-attest.conf ${OUTPUTDIR}/etc/ ADD rootfs/files/configs/chrony/chrony.conf ${OUTPUTDIR}/etc/chrony/ ADD rootfs/files/configs/chrony/chrony.service ${OUTPUTDIR}/lib/systemd/system/ -RUN sed -i '1 s|^.*$|-:root:ALL|' "${OUTPUTDIR}/etc/security/access.conf" -RUN sed -i '1 s|^.*$|account required pam_access.so|' "${OUTPUTDIR}/etc/pam.d/login" +# RUN sed -i '1 s|^.*$|-:root:ALL|' "${OUTPUTDIR}/etc/security/access.conf" +# RUN sed -i '1 s|^.*$|account required pam_access.so|' "${OUTPUTDIR}/etc/pam.d/login" ADD rootfs/files/configs/nvidia-persistenced.service ${OUTPUTDIR}/usr/lib/systemd/system/ RUN echo "sp-$(petname)" > "${OUTPUTDIR}/etc/hostname" @@ -238,7 +238,7 @@ ADD rootfs/files/configs/etc/systemd/system/swarm-init.service ${OUTPUTDIR}/etc/ RUN ln -sf /etc/systemd/system/swarm-init.service "${OUTPUTDIR}/etc/systemd/system/multi-user.target.wants/swarm-init.service" # swarm-host-agent: placeholder service file (binary + real service installed by swarm-init at boot) -ADD rootfs/files/configs/etc/systemd/system/swarm-host-agent.service ${OUTPUTDIR}/etc/systemd/system/swarm-host-agent.service +COPY rootfs/files/configs/etc/systemd/system/swarm-host-agent.service ${OUTPUTDIR}/etc/systemd/system/swarm-host-agent.service RUN mkdir -p "${OUTPUTDIR}/etc/swarm-service-launchers" COPY swarm-scripts ${OUTPUTDIR}/etc/swarm-service-launchers/ From 3d56718ed758b05fe579ce7ffa8e14c6339e855e Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Fri, 6 Mar 2026 15:52:31 +0300 Subject: [PATCH 71/79] refactor: remove installation of swarm-host-agent.service in swarm-init.sh --- src/rootfs/files/configs/usr/local/bin/swarm-init.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/src/rootfs/files/configs/usr/local/bin/swarm-init.sh b/src/rootfs/files/configs/usr/local/bin/swarm-init.sh index 2411b290..d695da9e 100644 --- a/src/rootfs/files/configs/usr/local/bin/swarm-init.sh +++ b/src/rootfs/files/configs/usr/local/bin/swarm-init.sh @@ -194,7 +194,6 @@ if [ -n "$HOST_AGENT_TAG" ]; then install -m 755 "$TMP/$EXTRACT_DIR/swarm-host-agent" /usr/local/bin/swarm-host-agent mkdir -p /etc/swarm cp "$TMP/$EXTRACT_DIR/host-agent.yaml" /etc/swarm/host-agent.yaml - cp "$TMP/$EXTRACT_DIR/swarm-host-agent.service" /etc/systemd/system/swarm-host-agent.service rm -rf "$TMP" log "swarm-host-agent $RELEASE_TAG installed" systemctl daemon-reload From 587ec808f2f27ca72744fd67278dc80f83fbaf43 Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Fri, 6 Mar 2026 16:19:08 +0300 Subject: [PATCH 72/79] refactor: remove redundant systemctl daemon-reload in swarm-init.sh --- src/rootfs/files/configs/usr/local/bin/swarm-init.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/rootfs/files/configs/usr/local/bin/swarm-init.sh b/src/rootfs/files/configs/usr/local/bin/swarm-init.sh index d695da9e..78f923d9 100644 --- a/src/rootfs/files/configs/usr/local/bin/swarm-init.sh +++ b/src/rootfs/files/configs/usr/local/bin/swarm-init.sh @@ -196,7 +196,6 @@ if [ -n "$HOST_AGENT_TAG" ]; then cp "$TMP/$EXTRACT_DIR/host-agent.yaml" /etc/swarm/host-agent.yaml rm -rf "$TMP" log "swarm-host-agent $RELEASE_TAG installed" - systemctl daemon-reload systemctl enable swarm-host-agent.service fi else @@ -232,7 +231,7 @@ EOF log "waiting for swarm-db MySQL to become available..." mysql_host="127.0.0.1" mysql_port="3306" -wait_timeout="120" +wait_timeout="240" start_ts="$(date +%s)" while true; do if (exec 3<>/dev/tcp/"$mysql_host"/"$mysql_port") 2>/dev/null; then From 754126ffadbd0940159329a758b468b503a02550 Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Fri, 6 Mar 2026 19:28:28 +0300 Subject: [PATCH 73/79] feat: add initialize-secrets migration --- .../etc/systemd/system/swarm-init.service | 4 +- .../etc/systemd/system/swarm-services.service | 4 +- .../files/configs/usr/local/bin/swarm-init.sh | 59 ------------------- src/swarm-scripts/00-initialize-secrets.sh | 44 ++++++++++++++ 4 files changed, 48 insertions(+), 63 deletions(-) create mode 100644 src/swarm-scripts/00-initialize-secrets.sh diff --git a/src/rootfs/files/configs/etc/systemd/system/swarm-init.service b/src/rootfs/files/configs/etc/systemd/system/swarm-init.service index 4169fc1d..ef368dec 100644 --- a/src/rootfs/files/configs/etc/systemd/system/swarm-init.service +++ b/src/rootfs/files/configs/etc/systemd/system/swarm-init.service @@ -1,7 +1,7 @@ [Unit] Description=Swarm Initialization (download binaries, configure services, seed swarm-db) -After=network-online.target swarm-db.service -Wants=network-online.target swarm-db.service +After=network-online.target +Wants=network-online.target [Service] Type=oneshot diff --git a/src/rootfs/files/configs/etc/systemd/system/swarm-services.service b/src/rootfs/files/configs/etc/systemd/system/swarm-services.service index f9d589e9..88a27125 100644 --- a/src/rootfs/files/configs/etc/systemd/system/swarm-services.service +++ b/src/rootfs/files/configs/etc/systemd/system/swarm-services.service @@ -1,8 +1,8 @@ [Unit] Description=Run Swarm setup scripts from /etc/swarm-service-launchers After=network-online.target swarm-db.service swarm-node.service swarm-init.service -Wants=network-online.target swarm-init.service -Requires=swarm-db.service swarm-node.service +Wants=network-online.target +Requires=swarm-db.service swarm-node.service swarm-init.service ConditionPathExists=/etc/swarm-service-launchers [Service] diff --git a/src/rootfs/files/configs/usr/local/bin/swarm-init.sh b/src/rootfs/files/configs/usr/local/bin/swarm-init.sh index 78f923d9..8174d8c7 100644 --- a/src/rootfs/files/configs/usr/local/bin/swarm-init.sh +++ b/src/rootfs/files/configs/usr/local/bin/swarm-init.sh @@ -27,9 +27,6 @@ SERVICES_TAG=$(cfg "tags.services") SWARM_CLOUD_API_TAG=$(cfg "tags.swarm_cloud_api") SWARM_CLOUD_UI_TAG=$(cfg "tags.swarm_cloud_ui") AUTH_SERVICE_TAG=$(cfg "tags.auth_service") -POWERDNS_API_URL=$(cfg "powerdns_api_url") -POWERDNS_API_KEY=$(cfg "powerdns_api_key") -BASE_DOMAIN=$(cfg "base_domain") # Download a GitHub release asset to a local file path # Usage: download_github_asset @@ -227,60 +224,4 @@ SWARM_CLOUD_UI_TAG=${SWARM_CLOUD_UI_TAG} AUTH_SERVICE_TAG=${AUTH_SERVICE_TAG} EOF -# Wait for swarm-db MySQL — fail (and let systemd restart us) if not available -log "waiting for swarm-db MySQL to become available..." -mysql_host="127.0.0.1" -mysql_port="3306" -wait_timeout="240" -start_ts="$(date +%s)" -while true; do - if (exec 3<>/dev/tcp/"$mysql_host"/"$mysql_port") 2>/dev/null; then - exec 3>&- 3<&- - break - fi - elapsed=$(( $(date +%s) - start_ts )) - if [ "$elapsed" -ge "$wait_timeout" ]; then - log "ERROR: MySQL not available after ${wait_timeout}s, will retry" - exit 1 - fi - sleep 1 -done - -# Insert SwarmSecrets (idempotent: INSERT IGNORE skips existing keys) -log "inserting SwarmSecrets into swarm-db..." -AUTH_SERVICE_YAML="" -AUTH_SERVICE_YAML_PATH="/sp/swarm/auth-service.yaml" -[ -f "$AUTH_SERVICE_YAML_PATH" ] && AUTH_SERVICE_YAML=$(cat "$AUTH_SERVICE_YAML_PATH") - -# Generate RSA 4096 private key (PKCS8 PEM) for evidence signing. -# INSERT IGNORE ensures only the first run inserts it; subsequent runs are no-ops. -# TODO: should we use subroot (intermediate CA) key hierarchy? -log "generating evidence signing key (RSA 4096)..." -EVIDENCE_SIGN_KEY=$(openssl genpkey -algorithm RSA -pkeyopt rsa_keygen_bits:4096 2>/dev/null) - -POWERDNS_API_URL="$POWERDNS_API_URL" \ -POWERDNS_API_KEY="$POWERDNS_API_KEY" \ -BASE_DOMAIN="$BASE_DOMAIN" \ -AUTH_SERVICE_YAML="$AUTH_SERVICE_YAML" \ -EVIDENCE_SIGN_KEY="$EVIDENCE_SIGN_KEY" \ -python3 - << 'PYEOF' -import subprocess, os - -def insert_secret(key, value): - if not value: - return - escaped = value.replace("'", "''") - sql = f"INSERT IGNORE INTO SwarmSecrets (id, value) VALUES ('{key}', '{escaped}');\n" - subprocess.run( - ["mysql", "-h", "127.0.0.1", "-P", "3306", "-u", "root", "swarmdb"], - input=sql, text=True, check=True, - ) - -insert_secret("powerdns_api_url", os.environ.get("POWERDNS_API_URL", "")) -insert_secret("powerdns_api_key", os.environ.get("POWERDNS_API_KEY", "")) -insert_secret("base_domain", os.environ.get("BASE_DOMAIN", "")) -insert_secret("auth_service_yaml", os.environ.get("AUTH_SERVICE_YAML", "")) -insert_secret("evidence_sign_key", os.environ.get("EVIDENCE_SIGN_KEY", "")) -PYEOF - log "swarm-init completed successfully" diff --git a/src/swarm-scripts/00-initialize-secrets.sh b/src/swarm-scripts/00-initialize-secrets.sh new file mode 100644 index 00000000..a5bcb1c2 --- /dev/null +++ b/src/swarm-scripts/00-initialize-secrets.sh @@ -0,0 +1,44 @@ +#!/bin/bash +set -euo pipefail + +CONFIG="/sp/swarm/config.yaml" + +cfg() { + python3 -c " +import yaml +c = yaml.safe_load(open('$CONFIG')) or {} +v = c +for k in '$1'.split('.'): + v = v.get(k) if isinstance(v, dict) else None +print('' if v is None else v)" +} + +DB_HOST=${DB_HOST:-127.0.0.1} +DB_PORT=${DB_PORT:-3306} +DB_USER=${DB_USER:-root} +DB_NAME=${DB_NAME:-swarmdb} + +POWERDNS_API_URL=$(cfg "powerdns_api_url") +POWERDNS_API_KEY=$(cfg "powerdns_api_key") +BASE_DOMAIN=$(cfg "base_domain") + +AUTH_SERVICE_YAML="" +AUTH_SERVICE_YAML_PATH="/sp/swarm/auth-service.yaml" +[ -f "$AUTH_SERVICE_YAML_PATH" ] && AUTH_SERVICE_YAML=$(cat "$AUTH_SERVICE_YAML_PATH") + +EVIDENCE_SIGN_KEY=$(openssl genpkey -algorithm RSA -pkeyopt rsa_keygen_bits:4096 2>/dev/null) + +ensure_secret() { + local key="$1" + local value="$2" + [ -n "$value" ] || return 0 + + DB_HOST="$DB_HOST" DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" \ + python3 "$(dirname "$0")/swarm-cli.py" create SwarmSecrets "$key" --value "$value" >/dev/null +} + +ensure_secret "powerdns_api_url" "$POWERDNS_API_URL" +ensure_secret "powerdns_api_key" "$POWERDNS_API_KEY" +ensure_secret "base_domain" "$BASE_DOMAIN" +ensure_secret "auth_service_yaml" "$AUTH_SERVICE_YAML" +ensure_secret "evidence_sign_key" "$EVIDENCE_SIGN_KEY" From 525f69c9fdaa6d0513c6c9b8e91d4c30b5c30f38 Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Fri, 6 Mar 2026 19:31:31 +0300 Subject: [PATCH 74/79] refactor: clean up Dockerfile by removing redundant commands --- src/Dockerfile | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/Dockerfile b/src/Dockerfile index c4f58355..97571e90 100644 --- a/src/Dockerfile +++ b/src/Dockerfile @@ -247,7 +247,6 @@ COPY swarm-scripts ${OUTPUTDIR}/etc/swarm-service-launchers/ ADD rootfs/files/configs/etc/systemd/system/swarm-services.service ${OUTPUTDIR}/etc/systemd/system/swarm-services.service ADD rootfs/files/configs/usr/local/bin/swarm-services.sh ${OUTPUTDIR}/usr/local/bin/swarm-services.sh RUN chmod +x ${OUTPUTDIR}/usr/local/bin/swarm-services.sh -RUN ln -sf /etc/systemd/system/swarm-services.service "${OUTPUTDIR}/etc/systemd/system/multi-user.target.wants/swarm-services.service" # disabling serial getty ADD rootfs/files/configs/usr/lib/systemd/system/serial-getty@.service "${OUTPUTDIR}/usr/lib/systemd/system/serial-getty@.service" @@ -296,9 +295,6 @@ RUN mkdir -p ${OUTPUTDIR}/etc/swarm-services/ ADD rootfs/files/scripts/setup_runtime_tools.sh /buildroot/files/scripts/ RUN chmod +x /buildroot/files/scripts/setup_runtime_tools.sh RUN --security=insecure /buildroot/files/scripts/setup_runtime_tools.sh -# cleanup apt lists and policy-rc.d -RUN rm -f ${OUTPUTDIR}/usr/sbin/policy-rc.d \ - && rm -rf ${OUTPUTDIR}/var/lib/apt/lists/* # Python dependencies required by provision plugins (redis-py for rke2/redis plugins, podman-compose for container plugins) @@ -318,6 +314,8 @@ ARG SP_VM_IMAGE_VERSION RUN bash -c '[[ -n "$SP_VM_IMAGE_VERSION" ]] && echo "$SP_VM_IMAGE_VERSION" > "${OUTPUTDIR}/etc/sp-release"' # after all! +# cleanup apt lists and policy-rc.d +RUN rm -f ${OUTPUTDIR}/usr/sbin/policy-rc.d ADD rootfs/files/scripts/cleanup_rootfs.sh /buildroot/files/scripts/ RUN --security=insecure /buildroot/files/scripts/cleanup_rootfs.sh ### End rootfs ### From 09dc475fd4a18f5e5591b390d54b28f7f62fffc9 Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Fri, 6 Mar 2026 20:20:25 +0300 Subject: [PATCH 75/79] refactor: update service dependencies in swarm-db and swarm-services --- src/rootfs/files/configs/etc/systemd/system/swarm-db.service | 4 ++-- .../files/configs/etc/systemd/system/swarm-services.service | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/rootfs/files/configs/etc/systemd/system/swarm-db.service b/src/rootfs/files/configs/etc/systemd/system/swarm-db.service index 1b4ed1c3..80b4edf6 100644 --- a/src/rootfs/files/configs/etc/systemd/system/swarm-db.service +++ b/src/rootfs/files/configs/etc/systemd/system/swarm-db.service @@ -1,7 +1,7 @@ [Unit] Description=Swarm DB service -After=network-online.target local-fs.target -Wants=network-online.target +After=network-online.target local-fs.target swarm-init.service +Wants=network-online.target swarm-init.service swarm-services.service ConditionPathExists=/usr/local/bin/swarm-db-linux-amd64 [Service] diff --git a/src/rootfs/files/configs/etc/systemd/system/swarm-services.service b/src/rootfs/files/configs/etc/systemd/system/swarm-services.service index 88a27125..9382e712 100644 --- a/src/rootfs/files/configs/etc/systemd/system/swarm-services.service +++ b/src/rootfs/files/configs/etc/systemd/system/swarm-services.service @@ -1,8 +1,8 @@ [Unit] Description=Run Swarm setup scripts from /etc/swarm-service-launchers -After=network-online.target swarm-db.service swarm-node.service swarm-init.service +After=network-online.target swarm-db.service swarm-init.service Wants=network-online.target -Requires=swarm-db.service swarm-node.service swarm-init.service +Requires=swarm-db.service swarm-init.service ConditionPathExists=/etc/swarm-service-launchers [Service] From a96996e90653f6a7a2e729c90ad8f34d3209cf82 Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Fri, 6 Mar 2026 21:17:17 +0300 Subject: [PATCH 76/79] refactor: remove --omit-command-init flag from ClusterServices creation in setup scripts --- src/swarm-scripts/50.setup-rke2.sh | 2 +- src/swarm-scripts/60.setup-redis.sh | 4 ++-- src/swarm-scripts/61.setup-cockroachdb.sh | 2 +- src/swarm-scripts/62.setup-knot.sh | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/swarm-scripts/50.setup-rke2.sh b/src/swarm-scripts/50.setup-rke2.sh index 2831ac85..e692e21d 100644 --- a/src/swarm-scripts/50.setup-rke2.sh +++ b/src/swarm-scripts/50.setup-rke2.sh @@ -50,7 +50,7 @@ if DB_HOST="$DB_HOST" DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" \ else echo "Creating ClusterService '$SERVICE_PK'..." DB_HOST="$DB_HOST" DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" \ - python3 "$(dirname "$0")/swarm-cli.py" create ClusterServices "$SERVICE_PK" --name="$SERVICE_NAME" --cluster_policy="$CLUSTER_POLICY" --version="$SERVICE_VERSION" --location="$LOCATION_PATH" --omit-command-init + python3 "$(dirname "$0")/swarm-cli.py" create ClusterServices "$SERVICE_PK" --name="$SERVICE_NAME" --cluster_policy="$CLUSTER_POLICY" --version="$SERVICE_VERSION" --location="$LOCATION_PATH" fi echo "Done. The provision worker will reconcile '$SERVICE_NAME' shortly." diff --git a/src/swarm-scripts/60.setup-redis.sh b/src/swarm-scripts/60.setup-redis.sh index 8824ffad..1ab3dca7 100644 --- a/src/swarm-scripts/60.setup-redis.sh +++ b/src/swarm-scripts/60.setup-redis.sh @@ -72,7 +72,7 @@ if DB_HOST="$DB_HOST" DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" \ else echo "Creating ClusterService '$REDIS_SERVICE_PK'..." DB_HOST="$DB_HOST" DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" \ - python3 "$(dirname "$0")/swarm-cli.py" create ClusterServices "$REDIS_SERVICE_PK" --name="$REDIS_SERVICE_NAME" --cluster_policy="$REDIS_CLUSTER_POLICY" --version="$REDIS_SERVICE_VERSION" --location="$REDIS_LOCATION_PATH" --omit-command-init + python3 "$(dirname "$0")/swarm-cli.py" create ClusterServices "$REDIS_SERVICE_PK" --name="$REDIS_SERVICE_NAME" --cluster_policy="$REDIS_CLUSTER_POLICY" --version="$REDIS_SERVICE_VERSION" --location="$REDIS_LOCATION_PATH" fi echo "Ensuring ClusterService '$SENTINEL_SERVICE_PK'..." @@ -82,7 +82,7 @@ if DB_HOST="$DB_HOST" DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" \ else echo "Creating ClusterService '$SENTINEL_SERVICE_PK'..." DB_HOST="$DB_HOST" DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" \ - python3 "$(dirname "$0")/swarm-cli.py" create ClusterServices "$SENTINEL_SERVICE_PK" --name="$SENTINEL_SERVICE_NAME" --cluster_policy="$SENTINEL_CLUSTER_POLICY" --version="$SENTINEL_SERVICE_VERSION" --location="$SENTINEL_LOCATION_PATH" --omit-command-init + python3 "$(dirname "$0")/swarm-cli.py" create ClusterServices "$SENTINEL_SERVICE_PK" --name="$SENTINEL_SERVICE_NAME" --cluster_policy="$SENTINEL_CLUSTER_POLICY" --version="$SENTINEL_SERVICE_VERSION" --location="$SENTINEL_LOCATION_PATH" fi echo "Done. The provision worker will reconcile '$REDIS_SERVICE_NAME' shortly." diff --git a/src/swarm-scripts/61.setup-cockroachdb.sh b/src/swarm-scripts/61.setup-cockroachdb.sh index 56a74c40..b0f3f384 100644 --- a/src/swarm-scripts/61.setup-cockroachdb.sh +++ b/src/swarm-scripts/61.setup-cockroachdb.sh @@ -51,7 +51,7 @@ if DB_HOST="$DB_HOST" DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" \ else echo "Creating ClusterService '$SERVICE_PK'..." DB_HOST="$DB_HOST" DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" \ - python3 "$(dirname "$0")/swarm-cli.py" create ClusterServices "$SERVICE_PK" --name="$SERVICE_NAME" --cluster_policy="$CLUSTER_POLICY" --version="$SERVICE_VERSION" --location="$LOCATION_PATH" --omit-command-init + python3 "$(dirname "$0")/swarm-cli.py" create ClusterServices "$SERVICE_PK" --name="$SERVICE_NAME" --cluster_policy="$CLUSTER_POLICY" --version="$SERVICE_VERSION" --location="$LOCATION_PATH" fi echo "Done. The provision worker will reconcile '$SERVICE_NAME' shortly." diff --git a/src/swarm-scripts/62.setup-knot.sh b/src/swarm-scripts/62.setup-knot.sh index 5b36f5d0..7651ebc1 100644 --- a/src/swarm-scripts/62.setup-knot.sh +++ b/src/swarm-scripts/62.setup-knot.sh @@ -51,7 +51,7 @@ if DB_HOST="$DB_HOST" DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" \ else echo "Creating ClusterService '$SERVICE_PK'..." DB_HOST="$DB_HOST" DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" \ - python3 "$(dirname "$0")/swarm-cli.py" create ClusterServices "$SERVICE_PK" --name="$SERVICE_NAME" --cluster_policy="$CLUSTER_POLICY" --version="$SERVICE_VERSION" --location="$LOCATION_PATH" --omit-command-init + python3 "$(dirname "$0")/swarm-cli.py" create ClusterServices "$SERVICE_PK" --name="$SERVICE_NAME" --cluster_policy="$CLUSTER_POLICY" --version="$SERVICE_VERSION" --location="$LOCATION_PATH" fi echo "Ensuring SwarmSecret 'base_domain' is present via swarm-cli..." From 5f4ecad63176c3d8ceb18c403cd4d7d4e776fa5c Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Fri, 6 Mar 2026 23:56:03 +0300 Subject: [PATCH 77/79] feat: add script to bootstrap mongodb service into SwarmDB --- src/swarm-scripts/65.setup-mongo.sh | 47 +++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 src/swarm-scripts/65.setup-mongo.sh diff --git a/src/swarm-scripts/65.setup-mongo.sh b/src/swarm-scripts/65.setup-mongo.sh new file mode 100644 index 00000000..da5137fa --- /dev/null +++ b/src/swarm-scripts/65.setup-mongo.sh @@ -0,0 +1,47 @@ +#!/bin/bash +set -euo pipefail + +# This script bootstraps the mongodb service into SwarmDB via swarm-cli. +# Run it INSIDE the container. Assumes mysql client and swarm-cli.py are available. + +DB_HOST=${DB_HOST:-127.0.0.1} +DB_PORT=${DB_PORT:-3306} +DB_USER=${DB_USER:-root} +DB_NAME=${DB_NAME:-swarmdb} + +# Service descriptors +SERVICE_NAME=${SERVICE_NAME:-mongodb} +SERVICE_VERSION=${SERVICE_VERSION:-1.0.0} +CLUSTER_POLICY=${CLUSTER_POLICY:-mongodb} + +# Location and manifest inside the container. +LOCATION_PATH=${LOCATION_PATH:-/etc/swarm-services/${SERVICE_NAME}} +MANIFEST_PATH=${MANIFEST_PATH:-${LOCATION_PATH}/manifest.yaml} +SERVICE_PK="${CLUSTER_POLICY}:${SERVICE_NAME}" + +if [ ! -f "$MANIFEST_PATH" ]; then + echo "Manifest not found at: $MANIFEST_PATH" >&2 + exit 1 +fi + +echo "Ensuring ClusterPolicy '$CLUSTER_POLICY'..." +if DB_HOST="$DB_HOST" DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" \ + python3 "$(dirname "$0")/swarm-cli.py" get ClusterPolicies "$CLUSTER_POLICY" >/dev/null 2>&1; then + echo "ClusterPolicy '$CLUSTER_POLICY' already exists, skipping creation." +else + echo "Creating ClusterPolicy '$CLUSTER_POLICY'..." + DB_HOST="$DB_HOST" DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" \ + python3 "$(dirname "$0")/swarm-cli.py" create ClusterPolicies "$CLUSTER_POLICY" --minSize=1 --maxSize=3 --maxClusters=1 +fi + +echo "Ensuring ClusterService '$SERVICE_PK'..." +if DB_HOST="$DB_HOST" DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" \ + python3 "$(dirname "$0")/swarm-cli.py" get ClusterServices "$SERVICE_PK" >/dev/null 2>&1; then + echo "ClusterService '$SERVICE_PK' already exists, skipping creation." +else + echo "Creating ClusterService '$SERVICE_PK'..." + DB_HOST="$DB_HOST" DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" \ + python3 "$(dirname "$0")/swarm-cli.py" create ClusterServices "$SERVICE_PK" --name="$SERVICE_NAME" --cluster_policy="$CLUSTER_POLICY" --version="$SERVICE_VERSION" --location="$LOCATION_PATH" +fi + +echo "Done. The provision worker will reconcile '$SERVICE_NAME' shortly." From ce3a6764840b94e01a5be0539ba3dadd707e4a0c Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Sat, 7 Mar 2026 00:01:12 +0300 Subject: [PATCH 78/79] feat: add script to bootstrap swarm-cloud-ui service into SwarmDB --- src/swarm-scripts/66.setup-swarm-cloud-ui.sh | 47 ++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 src/swarm-scripts/66.setup-swarm-cloud-ui.sh diff --git a/src/swarm-scripts/66.setup-swarm-cloud-ui.sh b/src/swarm-scripts/66.setup-swarm-cloud-ui.sh new file mode 100644 index 00000000..8a91e218 --- /dev/null +++ b/src/swarm-scripts/66.setup-swarm-cloud-ui.sh @@ -0,0 +1,47 @@ +#!/bin/bash +set -euo pipefail + +# This script bootstraps the swarm-cloud-ui service into SwarmDB via swarm-cli. +# Run it INSIDE the container. Assumes mysql client and swarm-cli.py are available. + +DB_HOST=${DB_HOST:-127.0.0.1} +DB_PORT=${DB_PORT:-3306} +DB_USER=${DB_USER:-root} +DB_NAME=${DB_NAME:-swarmdb} + +# Service descriptors +SERVICE_NAME=${SERVICE_NAME:-swarm-cloud-ui} +SERVICE_VERSION=${SERVICE_VERSION:-1.0.0} +CLUSTER_POLICY=${CLUSTER_POLICY:-swarm-cloud-ui} + +# Location and manifest inside the container. +LOCATION_PATH=${LOCATION_PATH:-/etc/swarm-services/${SERVICE_NAME}} +MANIFEST_PATH=${MANIFEST_PATH:-${LOCATION_PATH}/manifest.yaml} +SERVICE_PK="${CLUSTER_POLICY}:${SERVICE_NAME}" + +if [ ! -f "$MANIFEST_PATH" ]; then + echo "Manifest not found at: $MANIFEST_PATH" >&2 + exit 1 +fi + +echo "Ensuring ClusterPolicy '$CLUSTER_POLICY'..." +if DB_HOST="$DB_HOST" DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" \ + python3 "$(dirname "$0")/swarm-cli.py" get ClusterPolicies "$CLUSTER_POLICY" >/dev/null 2>&1; then + echo "ClusterPolicy '$CLUSTER_POLICY' already exists, skipping creation." +else + echo "Creating ClusterPolicy '$CLUSTER_POLICY'..." + DB_HOST="$DB_HOST" DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" \ + python3 "$(dirname "$0")/swarm-cli.py" create ClusterPolicies "$CLUSTER_POLICY" --minSize=1 --maxSize=1 --maxClusters=1 +fi + +echo "Ensuring ClusterService '$SERVICE_PK'..." +if DB_HOST="$DB_HOST" DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" \ + python3 "$(dirname "$0")/swarm-cli.py" get ClusterServices "$SERVICE_PK" >/dev/null 2>&1; then + echo "ClusterService '$SERVICE_PK' already exists, skipping creation." +else + echo "Creating ClusterService '$SERVICE_PK'..." + DB_HOST="$DB_HOST" DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" \ + python3 "$(dirname "$0")/swarm-cli.py" create ClusterServices "$SERVICE_PK" --name="$SERVICE_NAME" --cluster_policy="$CLUSTER_POLICY" --version="$SERVICE_VERSION" --location="$LOCATION_PATH" +fi + +echo "Done. The provision worker will reconcile '$SERVICE_NAME' shortly." From 3d95b4bceea337beeebb36309fbc3c76a2bf49a9 Mon Sep 17 00:00:00 2001 From: Vlad Marchuk Date: Sat, 7 Mar 2026 00:54:39 +0300 Subject: [PATCH 79/79] refactor: remove SwarmSecret creation for base_domain in setup-knot script --- src/swarm-scripts/62.setup-knot.sh | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/swarm-scripts/62.setup-knot.sh b/src/swarm-scripts/62.setup-knot.sh index 7651ebc1..30e4fcf6 100644 --- a/src/swarm-scripts/62.setup-knot.sh +++ b/src/swarm-scripts/62.setup-knot.sh @@ -54,8 +54,4 @@ else python3 "$(dirname "$0")/swarm-cli.py" create ClusterServices "$SERVICE_PK" --name="$SERVICE_NAME" --cluster_policy="$CLUSTER_POLICY" --version="$SERVICE_VERSION" --location="$LOCATION_PATH" fi -echo "Ensuring SwarmSecret 'base_domain' is present via swarm-cli..." -DB_HOST="$DB_HOST" DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" DB_PASSWORD="${DB_PASSWORD-}" \ - python3 "$(dirname "$0")/swarm-cli.py" create SwarmSecrets base_domain --value="test.oresty.superprotocol.io" - echo "Done. The provision worker will reconcile '$SERVICE_NAME' shortly."