From 86d81dac5d8091aee09491f28e2824b9de9bc64c Mon Sep 17 00:00:00 2001 From: bxvtr Date: Sat, 21 Feb 2026 15:37:28 +0000 Subject: [PATCH 1/3] CI: chain Argo build and backtest workflows --- .../{bug_report.md => bug-report.md} | 0 ...{feature_request.md => feature-request.md} | 0 .../workflows/argo-build-and-backtest.yaml | 129 +++++++ .github/workflows/deploy_argo_template.yaml | 64 ---- argo/run-backtest.yaml | 17 + argo/run-build.yaml | 8 +- argo/workflowtemplate-backtest-fanout.yaml | 302 ++++++++++++++++ argo/workflowtemplate-backtest.yaml | 331 ------------------ argo/workflowtemplate-build-push-ghcr.yaml | 2 +- 9 files changed, 454 insertions(+), 399 deletions(-) rename .github/ISSUE_TEMPLATE/{bug_report.md => bug-report.md} (100%) rename .github/ISSUE_TEMPLATE/{feature_request.md => feature-request.md} (100%) create mode 100644 .github/workflows/argo-build-and-backtest.yaml delete mode 100644 .github/workflows/deploy_argo_template.yaml create mode 100644 argo/run-backtest.yaml create mode 100644 argo/workflowtemplate-backtest-fanout.yaml delete mode 100644 argo/workflowtemplate-backtest.yaml diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug-report.md similarity index 100% rename from .github/ISSUE_TEMPLATE/bug_report.md rename to .github/ISSUE_TEMPLATE/bug-report.md diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature-request.md similarity index 100% rename from .github/ISSUE_TEMPLATE/feature_request.md rename to .github/ISSUE_TEMPLATE/feature-request.md diff --git a/.github/workflows/argo-build-and-backtest.yaml b/.github/workflows/argo-build-and-backtest.yaml new file mode 100644 index 0000000..e827782 --- /dev/null +++ b/.github/workflows/argo-build-and-backtest.yaml @@ -0,0 +1,129 @@ +name: Build Image and Run Argo Backtest + +on: + push: + branches: + - main + - "**" + workflow_dispatch: + inputs: + run_backtest: + description: "Run backtest after image build" + required: false + default: "false" + type: choice + options: + - "false" + - "true" + +jobs: + resolve-context: + runs-on: self-hosted + outputs: + namespace: ${{ steps.context.outputs.namespace }} + branch: ${{ steps.context.outputs.branch }} + + steps: + - name: Resolve deployment context + id: context + run: | + BRANCH_NAME="${GITHUB_REF#refs/heads/}" + + if [[ "$BRANCH_NAME" == "main" ]]; then + NAMESPACE="prod" + else + NAMESPACE="dev" + fi + + echo "namespace=$NAMESPACE" >> "$GITHUB_OUTPUT" + echo "branch=$BRANCH_NAME" >> "$GITHUB_OUTPUT" + + deploy-argo-resources: + runs-on: self-hosted + needs: resolve-context + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Ensure namespace exists + run: | + sudo microk8s kubectl get ns "${{ needs.resolve-context.outputs.namespace }}" || \ + sudo microk8s kubectl create ns "${{ needs.resolve-context.outputs.namespace }}" + + - name: Apply GHCR credentials + run: | + sudo microk8s kubectl -n "${{ needs.resolve-context.outputs.namespace }}" create secret docker-registry ghcr-secret \ + --docker-server=ghcr.io \ + --docker-username=git \ + --docker-password=${{ secrets.GHCR_TOKEN }} \ + --dry-run=client -o yaml | \ + sudo microk8s kubectl -n "${{ needs.resolve-context.outputs.namespace }}" apply -f - + + - name: Apply Argo WorkflowTemplates + run: | + sudo microk8s kubectl -n "${{ needs.resolve-context.outputs.namespace }}" apply \ + -f argo/workflowtemplate-build-push-ghcr.yaml \ + -f argo/workflowtemplate-backtest-fanout.yaml + + build-and-backtest: + runs-on: self-hosted + needs: + - resolve-context + - deploy-argo-resources + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Submit Argo build workflow + id: submit_build + run: | + export RUNTIME_COMMIT="${GITHUB_SHA}" + export PLATFORM_COMMIT="${GITHUB_SHA}" + export GIT_BRANCH="${{ needs.resolve-context.outputs.branch }}" + + BUILD_NAME=$( + envsubst < argo/run-build.yaml | \ + sudo microk8s kubectl -n "${{ needs.resolve-context.outputs.namespace }}" create -f - -o jsonpath='{.metadata.name}' + ) + + echo "build_name=$BUILD_NAME" >> "$GITHUB_OUTPUT" + echo "Submitted build workflow: $BUILD_NAME" + + - name: Wait for Argo build workflow to succeed + run: | + BUILD_NAME="${{ steps.submit_build.outputs.build_name }}" + NS="${{ needs.resolve-context.outputs.namespace }}" + + echo "Waiting for workflow $BUILD_NAME in namespace $NS..." + + for i in {1..240}; do + PHASE=$(sudo microk8s kubectl -n "$NS" get wf "$BUILD_NAME" -o jsonpath='{.status.phase}' 2>/dev/null || true) + + if [[ "$PHASE" == "Succeeded" ]]; then + echo "Build succeeded." + exit 0 + fi + + if [[ "$PHASE" == "Failed" || "$PHASE" == "Error" ]]; then + echo "Build ended with phase: $PHASE" + sudo microk8s kubectl -n "$NS" get wf "$BUILD_NAME" -o yaml || true + exit 1 + fi + + sleep 10 + done + + echo "Timed out waiting for build workflow to finish." + exit 1 + + - name: Submit Argo backtest workflow + if: ${{ needs.resolve-context.outputs.branch == 'main' || github.event.inputs.run_backtest == 'true' }} + run: | + export IMAGE_TAG="${GITHUB_SHA}" + + envsubst < argo/run-backtest.yaml | \ + sudo microk8s kubectl -n "${{ needs.resolve-context.outputs.namespace }}" apply -f - + + echo "Submitted backtest workflow in namespace: ${{ needs.resolve-context.outputs.namespace }}" diff --git a/.github/workflows/deploy_argo_template.yaml b/.github/workflows/deploy_argo_template.yaml deleted file mode 100644 index 299d860..0000000 --- a/.github/workflows/deploy_argo_template.yaml +++ /dev/null @@ -1,64 +0,0 @@ -name: Deploy & Run Argo Build - -on: - push: - branches: [main] - workflow_dispatch: - -jobs: - deploy-argo-template: - runs-on: self-hosted - permissions: - contents: read - - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Set Kubernetes namespace (prod vs dev) - run: | - if [ "${GITHUB_REF}" = "refs/heads/main" ]; then - echo "K8S_NAMESPACE=prod" >> $GITHUB_ENV - else - echo "K8S_NAMESPACE=dev" >> $GITHUB_ENV - fi - - - name: Set up kubectl - uses: azure/setup-kubectl@v4 - with: - version: "v1.29.0" - - - name: Ensure namespace exists - run: | - sudo microk8s kubectl get ns $K8S_NAMESPACE || sudo microk8s kubectl create ns $K8S_NAMESPACE - - - name: Apply secrets - run: | - sudo microk8s kubectl -n $K8S_NAMESPACE create secret docker-registry ghcr-secret \ - --docker-server=ghcr.io \ - --docker-username=git \ - --docker-password=${{ secrets.GHCR_TOKEN }} \ - --dry-run=client -o yaml | sudo microk8s kubectl apply -f - - - - name: Apply WorkflowTemplates - run: | - sudo microk8s kubectl apply \ - -f argo/workflowtemplate-backtest.yaml \ - -f argo/workflowtemplate-build-push-ghcr.yaml - - trigger-build: - runs-on: self-hosted - needs: deploy-argo-template - - steps: - - name: Set Kubernetes namespace (prod vs dev) - run: | - if [ "${GITHUB_REF}" = "refs/heads/main" ]; then - echo "K8S_NAMESPACE=prod" >> $GITHUB_ENV - else - echo "K8S_NAMESPACE=dev" >> $GITHUB_ENV - fi - - - name: Trigger Argo Build Workflow - run: | - sudo microk8s kubectl -n $K8S_NAMESPACE create -f argo/run-build.yaml diff --git a/argo/run-backtest.yaml b/argo/run-backtest.yaml new file mode 100644 index 0000000..faa4121 --- /dev/null +++ b/argo/run-backtest.yaml @@ -0,0 +1,17 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Workflow +metadata: + generateName: backtest-fanout- +spec: + workflowTemplateRef: + name: backtest-fanout + arguments: + parameters: + - name: image_repo + value: "ghcr.io/trading-engineering/trading-runtime" + - name: image_tag + value: "${IMAGE_TAG}" + - name: experiment_config + value: "/usr/local/lib/python3.11/site-packages/trading_runtime/argo/argo.json" + - name: scratch_root + value: "/mnt/scratch" diff --git a/argo/run-build.yaml b/argo/run-build.yaml index 67fadc1..141ebb6 100644 --- a/argo/run-build.yaml +++ b/argo/run-build.yaml @@ -7,7 +7,9 @@ spec: name: build-push-ghcr arguments: parameters: - - name: git_commit - value: "{{GIT_COMMIT}}" + - name: trading_runtime_commit + value: "${RUNTIME_COMMIT}" + - name: trading_platform_commit + value: "${PLATFORM_COMMIT}" - name: git_branch - value: "{{GIT_BRANCH}}" + value: "${GIT_BRANCH}" diff --git a/argo/workflowtemplate-backtest-fanout.yaml b/argo/workflowtemplate-backtest-fanout.yaml new file mode 100644 index 0000000..24209a5 --- /dev/null +++ b/argo/workflowtemplate-backtest-fanout.yaml @@ -0,0 +1,302 @@ +apiVersion: argoproj.io/v1alpha1 +kind: WorkflowTemplate +metadata: + name: backtest-fanout +spec: + entrypoint: backtest + + parallelism: 4 + + arguments: + parameters: + - name: image_repo + description: "Container image repo" + value: ghcr.io/trading-engineering/trading-runtime + + - name: image_tag + description: "Container image tag to run (recommended: commit SHA)" + value: main + + - name: experiment_config + description: "Path to experiment JSON inside the container" + value: /usr/local/lib/python3.11/site-packages/trading_runtime/argo/argo.json + + - name: scratch_root + description: "Scratch root inside the container" + value: /mnt/scratch + + templates: + - name: backtest + dag: + tasks: + - name: plan + template: plan + + - name: run-sweeps + dependencies: [plan] + template: run-sweep + parallelism: 3 + withParam: "{{tasks.plan.outputs.parameters.sweep-list}}" + arguments: + parameters: + - name: sweep-path + value: "{{item}}" + + - name: finalize-segments + dependencies: [run-sweeps] + template: segment-finalize + withParam: "{{tasks.plan.outputs.parameters.segment-list}}" + arguments: + parameters: + - name: experiment-id + value: "{{tasks.plan.outputs.parameters.experiment-id}}" + + - name: segment-id + value: "{{item.segment_id}}" + + - name: expected-sweeps + value: "{{item.expected_sweeps}}" + + - name: completed-sweeps + value: "{{item.expected_sweeps}}" + + - name: failed-sweeps + value: "0" + + - name: segment-started-at + value: "{{workflow.creationTimestamp}}" + + - name: scratch-root + value: "{{workflow.parameters.scratch_root}}" + + - name: finalize-experiment + dependencies: [finalize-segments] + template: experiment-finalize + arguments: + parameters: + - name: experiment-id + value: "{{tasks.plan.outputs.parameters.experiment-id}}" + + - name: expected-segments + value: "{{tasks.plan.outputs.parameters.expected-segments}}" + + - name: completed-segments + value: "{{tasks.plan.outputs.parameters.expected-segments}}" + + - name: failed-segments + value: "0" + + - name: experiment-started-at + value: "{{workflow.creationTimestamp}}" + + - name: scratch-root + value: "{{workflow.parameters.scratch_root}}" + + - name: plan + container: + image: "{{workflow.parameters.image_repo}}:{{workflow.parameters.image_tag}}" + imagePullPolicy: Always + + command: [python, -m] + args: + - trading_platform.backtest.runtime.entrypoint + - --config + - "{{workflow.parameters.experiment_config}}" + - --run + - --emit-dir + - /mnt/scratch/sweeps/{{workflow.uid}} + + resources: + requests: + cpu: "1" + memory: "2Gi" + limits: + cpu: "2" + memory: "4Gi" + + volumeMounts: + - name: scratch + mountPath: /mnt/scratch + + outputs: + parameters: + - name: experiment-id + valueFrom: + path: /mnt/scratch/sweeps/{{workflow.uid}}/experiment_id.txt + + - name: sweep-list + valueFrom: + path: /mnt/scratch/sweeps/{{workflow.uid}}/index.json + + - name: segment-list + valueFrom: + path: /mnt/scratch/sweeps/{{workflow.uid}}/segments.json + + - name: expected-segments + valueFrom: + path: /mnt/scratch/sweeps/{{workflow.uid}}/expected_segments.txt + + - name: run-sweep + inputs: + parameters: + - name: sweep-path + + podMetadata: + labels: + sweep: "{{inputs.parameters.sweep-path | base }}" + + container: + image: "{{workflow.parameters.image_repo}}:{{workflow.parameters.image_tag}}" + imagePullPolicy: Always + + command: [python, -m] + args: + - trading_platform.backtest.runtime.run_sweep + - --context + - "{{inputs.parameters.sweep-path}}" + - --scratch-root + - "{{workflow.parameters.scratch_root}}" + + resources: + requests: + cpu: "1" + memory: "2Gi" + limits: + cpu: "2" + memory: "4Gi" + + volumeMounts: + - name: scratch + mountPath: /mnt/scratch + + - name: segment-finalize + inputs: + parameters: + - name: experiment-id + - name: segment-id + - name: expected-sweeps + - name: completed-sweeps + - name: failed-sweeps + - name: segment-started-at + - name: scratch-root + + container: + image: "{{workflow.parameters.image_repo}}:{{workflow.parameters.image_tag}}" + imagePullPolicy: Always + + env: + - name: PROMETHEUS_PUSHGATEWAY_URL + value: http://prometheus-pushgateway.monitoring.svc.cluster.local:9091 + + - name: MLFLOW_TRACKING_URI + value: http://mlflow-svc.mlflow.svc.cluster.local:5000 + + - name: ARGO_WORKFLOW_UID + value: "{{workflow.uid}}" + + - name: PROMETHEUS_PUSHGATEWAY_GROUPING_KEY_JSON + value: '{"workflow_uid":"{{workflow.uid}}","segment_id":"{{inputs.parameters.segment-id}}"}' + + command: [python, -m] + args: + - trading_platform.backtest.runtime.segment_finalize_entrypoint + + - "--experiment-id" + - "{{inputs.parameters.experiment-id}}" + + - "--segment-id" + - "{{inputs.parameters.segment-id}}" + + - "--expected-sweeps" + - "{{inputs.parameters.expected-sweeps}}" + + - "--completed-sweeps" + - "{{inputs.parameters.completed-sweeps}}" + + - "--failed-sweeps" + - "{{inputs.parameters.failed-sweeps}}" + + - "--segment-started-at" + - "{{inputs.parameters.segment-started-at}}" + + - "--scratch-root" + - "{{inputs.parameters.scratch-root}}" + + resources: + requests: + cpu: "0.5" + memory: "512Mi" + limits: + cpu: "1" + memory: "1Gi" + + volumeMounts: + - name: scratch + mountPath: /mnt/scratch + + - name: experiment-finalize + inputs: + parameters: + - name: experiment-id + - name: expected-segments + - name: completed-segments + - name: failed-segments + - name: experiment-started-at + - name: scratch-root + + container: + image: "{{workflow.parameters.image_repo}}:{{workflow.parameters.image_tag}}" + imagePullPolicy: Always + + env: + - name: PROMETHEUS_PUSHGATEWAY_URL + value: http://prometheus-pushgateway.monitoring.svc.cluster.local:9091 + + - name: MLFLOW_TRACKING_URI + value: http://mlflow-svc.mlflow.svc.cluster.local:5000 + + - name: ARGO_WORKFLOW_UID + value: "{{workflow.uid}}" + + - name: PROMETHEUS_PUSHGATEWAY_GROUPING_KEY_JSON + value: '{"workflow_uid":"{{workflow.uid}}","scope":"experiment"}' + + command: [python, -m] + args: + - trading_platform.backtest.runtime.experiment_finalize_entrypoint + + - "--experiment-id" + - "{{inputs.parameters.experiment-id}}" + + - "--expected-segments" + - "{{inputs.parameters.expected-segments}}" + + - "--completed-segments" + - "{{inputs.parameters.completed-segments}}" + + - "--failed-segments" + - "{{inputs.parameters.failed-segments}}" + + - "--experiment-started-at" + - "{{inputs.parameters.experiment-started-at}}" + + - "--scratch-root" + - "{{inputs.parameters.scratch-root}}" + + resources: + requests: + cpu: "0.5" + memory: "512Mi" + limits: + cpu: "1" + memory: "1Gi" + + volumeMounts: + - name: scratch + mountPath: /mnt/scratch + + volumes: + - name: scratch + hostPath: + path: /mnt/scratch + type: Directory diff --git a/argo/workflowtemplate-backtest.yaml b/argo/workflowtemplate-backtest.yaml deleted file mode 100644 index 1be4b99..0000000 --- a/argo/workflowtemplate-backtest.yaml +++ /dev/null @@ -1,331 +0,0 @@ -apiVersion: argoproj.io/v1alpha1 -kind: ClusterWorkflowTemplate -metadata: - name: backtest-fanout - -spec: - entrypoint: backtest - - imagePullSecrets: - - name: ghcr-secret - - # global brake - parallelism: 4 - - arguments: - parameters: - - name: git_branch - description: "Git branch to build (must exist)" - value: main - - - name: experiment-config - description: "Path to experiment JSON inside the container" - value: /usr/local/lib/python3.11/site-packages/trading_runtime/argo/argo.json - - - name: scratch-root - description: "Scratch root inside the container" - value: /mnt/scratch - - templates: - - # -------------------------------------------------- - # MAIN DAG - # -------------------------------------------------- - - name: backtest - dag: - tasks: - - - name: plan - template: plan - - - name: run-sweeps - dependencies: [plan] - template: run-sweep - parallelism: 3 - withParam: "{{tasks.plan.outputs.parameters.sweep-list}}" - arguments: - parameters: - - name: sweep-path - value: "{{item}}" - - # ------------------------------- - # SEGMENT FINALIZATION (FAN-IN) - # ------------------------------- - - name: finalize-segments - dependencies: [run-sweeps] - template: segment-finalize - withParam: "{{tasks.plan.outputs.parameters.segment-list}}" - arguments: - parameters: - - name: experiment-id - value: "{{tasks.plan.outputs.parameters.experiment-id}}" - - - name: segment-id - value: "{{item.segment_id}}" - - - name: expected-sweeps - value: "{{item.expected_sweeps}}" - - # NOTE: These counts are currently EXPERIMENT-wide, not segment-wide. - # For segment-accurate counts, a per-segment fan-out of sweeps (nested DAG) is needed. - - name: completed-sweeps - value: "{{item.expected_sweeps}}" - - - name: failed-sweeps - value: "0" - - - name: segment-started-at - value: "{{workflow.creationTimestamp}}" - - - name: scratch-root - value: "{{workflow.parameters.scratch-root}}" - - # ------------------------------- - # EXPERIMENT FINALIZATION (FAN-IN) - # ------------------------------- - - name: finalize-experiment - dependencies: [finalize-segments] - template: experiment-finalize - arguments: - parameters: - - name: experiment-id - value: "{{tasks.plan.outputs.parameters.experiment-id}}" - - - name: expected-segments - value: "{{tasks.plan.outputs.parameters.expected-segments}}" - - - name: completed-segments - value: "{{tasks.plan.outputs.parameters.expected-segments}}" - - - name: failed-segments - value: "0" - - - name: experiment-started-at - value: "{{workflow.creationTimestamp}}" - - - name: scratch-root - value: "{{workflow.parameters.scratch-root}}" - - # -------------------------------------------------- - # PLANNING STEP (ORCHESTRATOR POD) - # -------------------------------------------------- - - name: plan - container: - image: ghcr.io/trading-engineering/trading-runtime:{{= replace(lower(workflow.parameters.git_branch), "/", "-") }} - imagePullPolicy: Always - - command: [python, -m] - args: - - trading_platform.backtest.runtime.entrypoint - - --config - - "{{workflow.parameters.experiment-config}}" - - --run - - --emit-dir - - /mnt/scratch/sweeps/{{workflow.uid}} - - resources: - requests: - cpu: "1" - memory: "2Gi" - limits: - cpu: "2" - memory: "4Gi" - - volumeMounts: - - name: scratch - mountPath: /mnt/scratch - - outputs: - parameters: - - name: experiment-id - valueFrom: - path: /mnt/scratch/sweeps/{{workflow.uid}}/experiment_id.txt - - - name: sweep-list - valueFrom: - path: /mnt/scratch/sweeps/{{workflow.uid}}/index.json - - - name: segment-list - valueFrom: - path: /mnt/scratch/sweeps/{{workflow.uid}}/segments.json - - - name: expected-segments - valueFrom: - path: /mnt/scratch/sweeps/{{workflow.uid}}/expected_segments.txt - - # -------------------------------------------------- - # SWEEP EXECUTION (WORKER PODS) - # -------------------------------------------------- - - name: run-sweep - inputs: - parameters: - - name: sweep-path - - podMetadata: - labels: - sweep: "{{inputs.parameters.sweep-path | base }}" - - container: - image: ghcr.io/trading-engineering/trading-runtime:{{= replace(lower(workflow.parameters.git_branch), "/", "-") }} - imagePullPolicy: Always - - command: [python, -m] - args: - - trading_platform.backtest.runtime.run_sweep - - --context - - "{{inputs.parameters.sweep-path}}" - - --scratch-root - - "{{workflow.parameters.scratch-root}}" - - resources: - requests: - cpu: "1" - memory: "2Gi" - limits: - cpu: "2" - memory: "4Gi" - - volumeMounts: - - name: scratch - mountPath: /mnt/scratch - - # -------------------------------------------------- - # SEGMENT FINALIZER POD - # -------------------------------------------------- - - name: segment-finalize - inputs: - parameters: - - name: experiment-id - - name: segment-id - - name: expected-sweeps - - name: completed-sweeps - - name: failed-sweeps - - name: segment-started-at - - name: scratch-root - - container: - image: ghcr.io/trading-engineering/trading-runtime:{{= replace(lower(workflow.parameters.git_branch), "/", "-") }} - imagePullPolicy: Always - - env: - - name: PROMETHEUS_PUSHGATEWAY_URL - value: http://prometheus-pushgateway.monitoring.svc.cluster.local:9091 - - - name: MLFLOW_TRACKING_URI - value: http://mlflow-svc.mlflow.svc.cluster.local:5000 - - - name: ARGO_WORKFLOW_UID - value: "{{workflow.uid}}" - - - name: PROMETHEUS_PUSHGATEWAY_GROUPING_KEY_JSON - value: '{"workflow_uid":"{{workflow.uid}}","segment_id":"{{inputs.parameters.segment-id}}"}' - - command: [python, -m] - args: - - trading_platform.backtest.runtime.segment_finalize_entrypoint - - - "--experiment-id" - - "{{inputs.parameters.experiment-id}}" - - - "--segment-id" - - "{{inputs.parameters.segment-id}}" - - - "--expected-sweeps" - - "{{inputs.parameters.expected-sweeps}}" - - - "--completed-sweeps" - - "{{inputs.parameters.completed-sweeps}}" - - - "--failed-sweeps" - - "{{inputs.parameters.failed-sweeps}}" - - - "--segment-started-at" - - "{{inputs.parameters.segment-started-at}}" - - - "--scratch-root" - - "{{inputs.parameters.scratch-root}}" - - resources: - requests: - cpu: "0.5" - memory: "512Mi" - limits: - cpu: "1" - memory: "1Gi" - - volumeMounts: - - name: scratch - mountPath: /mnt/scratch - - # -------------------------------------------------- - # EXPERIMENT FINALIZER POD - # -------------------------------------------------- - - name: experiment-finalize - inputs: - parameters: - - name: experiment-id - - name: expected-segments - - name: completed-segments - - name: failed-segments - - name: experiment-started-at - - name: scratch-root - - container: - image: ghcr.io/trading-engineering/trading-runtime:{{= replace(lower(workflow.parameters.git_branch), "/", "-") }} - imagePullPolicy: Always - - env: - - name: PROMETHEUS_PUSHGATEWAY_URL - value: http://prometheus-pushgateway.monitoring.svc.cluster.local:9091 - - - name: MLFLOW_TRACKING_URI - value: http://mlflow-svc.mlflow.svc.cluster.local:5000 - - - name: ARGO_WORKFLOW_UID - value: "{{workflow.uid}}" - - - name: PROMETHEUS_PUSHGATEWAY_GROUPING_KEY_JSON - value: '{"workflow_uid":"{{workflow.uid}}","scope":"experiment"}' - - command: [python, -m] - args: - - trading_platform.backtest.runtime.experiment_finalize_entrypoint - - - "--experiment-id" - - "{{inputs.parameters.experiment-id}}" - - - "--expected-segments" - - "{{inputs.parameters.expected-segments}}" - - - "--completed-segments" - - "{{inputs.parameters.completed-segments}}" - - - "--failed-segments" - - "{{inputs.parameters.failed-segments}}" - - - "--experiment-started-at" - - "{{inputs.parameters.experiment-started-at}}" - - - "--scratch-root" - - "{{inputs.parameters.scratch-root}}" - - resources: - requests: - cpu: "0.5" - memory: "512Mi" - limits: - cpu: "1" - memory: "1Gi" - - volumeMounts: - - name: scratch - mountPath: /mnt/scratch - - # -------------------------------------------------- - # VOLUMES - # -------------------------------------------------- - volumes: - - name: scratch - hostPath: - path: /mnt/scratch - type: Directory \ No newline at end of file diff --git a/argo/workflowtemplate-build-push-ghcr.yaml b/argo/workflowtemplate-build-push-ghcr.yaml index d9ac5d3..e680c1a 100644 --- a/argo/workflowtemplate-build-push-ghcr.yaml +++ b/argo/workflowtemplate-build-push-ghcr.yaml @@ -1,5 +1,5 @@ apiVersion: argoproj.io/v1alpha1 -kind: ClusterWorkflowTemplate +kind: WorkflowTemplate metadata: name: build-push-ghcr spec: From 6b1b4360dac7c22c71ec1b5429ac3b87dfb1779a Mon Sep 17 00:00:00 2001 From: bxvtr Date: Sat, 21 Feb 2026 16:04:50 +0000 Subject: [PATCH 2/3] small workflow fix --- .github/workflows/argo-build-and-backtest.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/argo-build-and-backtest.yaml b/.github/workflows/argo-build-and-backtest.yaml index e827782..bc9646f 100644 --- a/.github/workflows/argo-build-and-backtest.yaml +++ b/.github/workflows/argo-build-and-backtest.yaml @@ -4,7 +4,6 @@ on: push: branches: - main - - "**" workflow_dispatch: inputs: run_backtest: From 46a0ce63571f153fb0024caa12164ada7903295c Mon Sep 17 00:00:00 2001 From: bxvtr Date: Sat, 21 Feb 2026 21:32:52 +0000 Subject: [PATCH 3/3] fix --- .../workflows/argo-build-and-backtest.yaml | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/.github/workflows/argo-build-and-backtest.yaml b/.github/workflows/argo-build-and-backtest.yaml index bc9646f..39e8f7f 100644 --- a/.github/workflows/argo-build-and-backtest.yaml +++ b/.github/workflows/argo-build-and-backtest.yaml @@ -92,13 +92,23 @@ jobs: - name: Wait for Argo build workflow to succeed run: | + set -x + set -e + BUILD_NAME="${{ steps.submit_build.outputs.build_name }}" NS="${{ needs.resolve-context.outputs.namespace }}" echo "Waiting for workflow $BUILD_NAME in namespace $NS..." - for i in {1..240}; do - PHASE=$(sudo microk8s kubectl -n "$NS" get wf "$BUILD_NAME" -o jsonpath='{.status.phase}' 2>/dev/null || true) + sudo microk8s kubectl -n "$NS" get wf "$BUILD_NAME" -o yaml + + for i in {1..60}; do + echo "Checking phase..." + sudo microk8s kubectl -n "$NS" get wf "$BUILD_NAME" -o yaml + + PHASE=$(sudo microk8s kubectl -n "$NS" get wf "$BUILD_NAME" -o jsonpath='{.status.phase}') + + echo "Current phase: $PHASE" if [[ "$PHASE" == "Succeeded" ]]; then echo "Build succeeded." @@ -106,15 +116,15 @@ jobs: fi if [[ "$PHASE" == "Failed" || "$PHASE" == "Error" ]]; then - echo "Build ended with phase: $PHASE" - sudo microk8s kubectl -n "$NS" get wf "$BUILD_NAME" -o yaml || true + echo "Build failed." + sudo microk8s kubectl -n "$NS" describe wf "$BUILD_NAME" exit 1 fi sleep 10 done - echo "Timed out waiting for build workflow to finish." + echo "Timeout reached." exit 1 - name: Submit Argo backtest workflow