From a1282814ab739960cee53be0983ef55334eaf69a Mon Sep 17 00:00:00 2001 From: Shay Goldstein Date: Thu, 22 Jan 2026 18:46:36 +0200 Subject: [PATCH] [SeaweedFS] Migrate from MinIO to SeaweedFS as the S3-compatible storage backend --- README.md | 8 +- charts/mlrun-ce/Chart.yaml | 2 +- charts/mlrun-ce/README.md | 32 ++-- .../mlrun-ce/admin_installation_values.yaml | 2 +- ..._admin_cluster_ip_installation_values.yaml | 13 +- .../non_admin_installation_values.yaml | 13 +- charts/mlrun-ce/requirements.lock | 10 +- charts/mlrun-ce/requirements.yaml | 8 +- charts/mlrun-ce/templates/NOTES.txt | 37 ++--- charts/mlrun-ce/templates/_helpers.tpl | 58 ++++++- .../config/jupyter-env-configmap.yaml | 7 +- .../templates/config/mlrun-env-configmap.yaml | 7 +- .../config/s3-credentials-secret.yaml | 15 ++ .../configmaps/pipeline-install-config.yaml | 6 +- .../workflow-controller-configmap.yaml | 4 +- .../pipelines/deployments/ml-pipeline-ui.yaml | 4 +- .../pipelines/deployments/ml-pipeline.yaml | 7 +- .../secrets/mlpipeline-minio-artifact.yaml | 4 +- .../seaweedfs/s3-bucket-init-job.yaml | 51 ++++++ .../seaweedfs/seaweedfs-s3-config.yaml | 33 ++++ .../seaweedfs/seaweedfs-ui-ingress.yaml | 61 +++++++ .../seaweedfs/seaweedfs-ui-services.yaml | 35 ++++ charts/mlrun-ce/values.yaml | 151 +++++++++++++----- 23 files changed, 434 insertions(+), 134 deletions(-) create mode 100644 charts/mlrun-ce/templates/config/s3-credentials-secret.yaml create mode 100644 charts/mlrun-ce/templates/seaweedfs/s3-bucket-init-job.yaml create mode 100644 charts/mlrun-ce/templates/seaweedfs/seaweedfs-s3-config.yaml create mode 100644 charts/mlrun-ce/templates/seaweedfs/seaweedfs-ui-ingress.yaml create mode 100644 charts/mlrun-ce/templates/seaweedfs/seaweedfs-ui-services.yaml diff --git a/README.md b/README.md index 27430b3b..88267190 100644 --- a/README.md +++ b/README.md @@ -2,17 +2,17 @@ This repo contains the Helm charts for the MLRun Community Edition (CE) - a full open source MLOps stack. -The Open source MLRun ce chart includes the following stack: +The Open source MLRun CE chart includes the following stack: * Nuclio - https://github.com/nuclio/nuclio * MLRun - https://github.com/mlrun/mlrun * Jupyter - https://github.com/jupyter/notebook (+MLRun integrated) * MPI Operator - https://github.com/kubeflow/mpi-operator - * Minio - https://github.com/minio/minio/tree/master/helm/minio + * SeaweedFS - https://github.com/seaweedfs/seaweedfs (S3-compatible storage) * Spark Operator - https://github.com/GoogleCloudPlatform/spark-on-k8s-operator * Pipelines - https://github.com/kubeflow/pipelines * Prometheus stack - https://github.com/prometheus-community/helm-charts +## Installation - ## Installation - Refer to the installation instructions in the [README](charts/mlrun-ce/README.md) of the `mlrun-ce` chart. +Refer to the installation instructions in the [README](charts/mlrun-ce/README.md) of the `mlrun-ce` chart. diff --git a/charts/mlrun-ce/Chart.yaml b/charts/mlrun-ce/Chart.yaml index 01ac4c48..92bc7cb0 100644 --- a/charts/mlrun-ce/Chart.yaml +++ b/charts/mlrun-ce/Chart.yaml @@ -1,6 +1,6 @@ apiVersion: v1 name: mlrun-ce -version: 0.11.0-rc8 +version: 0.11.0-rc9 description: MLRun Open Source Stack home: https://iguazio.com icon: https://www.iguazio.com/wp-content/uploads/2019/10/Iguazio-Logo.png diff --git a/charts/mlrun-ce/README.md b/charts/mlrun-ce/README.md index d367bc6a..00fb5425 100644 --- a/charts/mlrun-ce/README.md +++ b/charts/mlrun-ce/README.md @@ -10,7 +10,7 @@ The Open source MLRun ce chart includes the following stack: * MLRun - https://github.com/mlrun/mlrun * Jupyter - https://github.com/jupyter/notebook (+MLRun integrated) * MPI Operator - https://github.com/kubeflow/mpi-operator -* Minio - https://github.com/minio/minio/tree/master/helm/minio +* SeaweedFS - https://github.com/seaweedfs/seaweedfs (S3-compatible storage) * Spark Operator - https://github.com/GoogleCloudPlatform/spark-on-k8s-operator * Pipelines - https://github.com/kubeflow/pipelines * Prometheus stack - https://github.com/prometheus-community/helm-charts @@ -162,9 +162,9 @@ helm --namespace mlrun \ --wait \ ... other overrides ... \ --set global.registry.url=${AWS_ACCOUNT}.dkr.ecr.${AWS_REGION}.amazonaws.com \ + --set global.registry.secretName=ecr-registry-credentials \ --set nuclio.dashboard.kaniko.registryProviderSecretName=aws-credentials \ --set mlrun.defaultDockerRegistrySecretName=aws-credentials \ - --set global.registry.secretName=ecr-registry-credentials \ mlrun/mlrun-ce ``` @@ -173,19 +173,23 @@ helm --namespace mlrun \ ## Usage Your applications are now available in your local browser: -- jupyter-notebook - http://nodeipaddress:30040 -- nuclio - http://nodeipaddress:30050 -- mlrun UI - http://nodeipaddress:30060 -- mlrun API (external) - http://nodeipaddress:30070 -- minio API - http://nodeipaddress:30080 -- minio UI - http://nodeipaddress:30090 -- pipeline UI - http://nodeipaddress:30100 -- grafana UI - http://nodeipaddress:30110 - +- Jupyter Notebook - http://nodeipaddress:30040 +- Nuclio - http://nodeipaddress:30050 +- MLRun UI - http://nodeipaddress:30060 +- MLRun API (external) - http://nodeipaddress:30070 +- SeaweedFS Admin UI (user/policy management) - http://nodeipaddress:30093 +- Pipeline UI - http://nodeipaddress:30100 +- Grafana UI - http://nodeipaddress:30010 +- Prometheus UI - http://nodeipaddress:30020 + +**With Ingress enabled**, the UI is available at: +- `https://seaweedfs-admin...lab.iguazeng.com` > **Note:** > The above links assume your Kubernetes cluster is exposed on localhost. > If that's not the case, the different components will be available on `externalHostAddress` +> +> For production deployments, consider enabling ingress for each service instead of using NodePorts. ## Start Working @@ -278,6 +282,6 @@ Refer to the [**Kubeflow documentation**](https://www.kubeflow.org/docs/started/ This table shows the versions of the main components in the MLRun CE chart: -| MLRun CE | MLRun | Nuclio | Jupyter lab | MPI Operator | Minio | Spark Operator | Pipelines | Kube-Prometheus-Stack | Prometheus | Grafana | -|------------|--------|----------|-------------|--------------|------------------------------|----------------|-----------|-----------------------|------------|---------| -| **0.10.0** | 1.10.0 | 1.15.9 | 4.5.0 | 0.2.3 | RELEASE.2024-04-18T19-09-19Z | 2.1.0 | 2.14.3 | 0.82.0 | 3.3.1 | 11.6.1 | +| MLRun CE | MLRun | Nuclio | Jupyter | MPI Operator | SeaweedFS | Spark Operator | Pipelines | Kube-Prometheus-Stack | +|------------|--------|--------|---------|--------------|-----------|----------------|-----------|-----------------------| +| **0.11.0** | 1.11.0 | 1.15.9 | 4.5.0 | 0.2.3 | 4.0.407 | 2.1.0 | 2.14.3 | 72.1.1 | diff --git a/charts/mlrun-ce/admin_installation_values.yaml b/charts/mlrun-ce/admin_installation_values.yaml index 0ef34729..c9b2bf23 100644 --- a/charts/mlrun-ce/admin_installation_values.yaml +++ b/charts/mlrun-ce/admin_installation_values.yaml @@ -36,7 +36,7 @@ mpi-operator: deployment: create: false -minio: +seaweedfs: enabled: false spark-operator: diff --git a/charts/mlrun-ce/non_admin_cluster_ip_installation_values.yaml b/charts/mlrun-ce/non_admin_cluster_ip_installation_values.yaml index fb1f4de3..a98463ad 100644 --- a/charts/mlrun-ce/non_admin_cluster_ip_installation_values.yaml +++ b/charts/mlrun-ce/non_admin_cluster_ip_installation_values.yaml @@ -35,17 +35,8 @@ mpi-operator: clusterResources: create: false -minio: - service: - type: ClusterIP - port: 9000 - nodePort: "" - consoleService: - type: ClusterIP - port: 9001 - nodePort: "" - mode: standalone - replicas: 1 +seaweedfs: + enabled: true timescaledb: service: diff --git a/charts/mlrun-ce/non_admin_installation_values.yaml b/charts/mlrun-ce/non_admin_installation_values.yaml index 460b1013..d84f02ee 100644 --- a/charts/mlrun-ce/non_admin_installation_values.yaml +++ b/charts/mlrun-ce/non_admin_installation_values.yaml @@ -34,17 +34,8 @@ mpi-operator: clusterResources: create: false -minio: - service: - type: NodePort - port: 9000 - nodePort: 30080 - consoleService: - type: NodePort - port: 9001 - nodePort: 30090 - mode: standalone - replicas: 1 +seaweedfs: + enabled: true spark-operator: enabled: false diff --git a/charts/mlrun-ce/requirements.lock b/charts/mlrun-ce/requirements.lock index 929d4e4b..fd36b055 100644 --- a/charts/mlrun-ce/requirements.lock +++ b/charts/mlrun-ce/requirements.lock @@ -8,9 +8,9 @@ dependencies: - name: mpi-operator repository: https://v3io.github.io/helm-charts/stable version: 0.6.0 -- name: minio - repository: https://charts.min.io/ - version: 5.2.0 +- name: seaweedfs + repository: https://seaweedfs.github.io/seaweedfs/helm + version: 4.0.407 - name: spark-operator repository: https://kubeflow.github.io/spark-operator version: 2.1.0 @@ -20,5 +20,5 @@ dependencies: - name: strimzi-kafka-operator repository: https://strimzi.io/charts/ version: 0.48.0 -digest: sha256:542955b13f2da791768eebe8e25092a8977fba987259ebe6ad85e4b977a7963b -generated: "2026-01-14T13:52:54.335415+02:00" +digest: sha256:f87ec580f73178cfc897d57e26f5d7b049900f1b7ef75bfe198ca327eb2ed06d +generated: "2026-02-12T23:52:46.490844+02:00" diff --git a/charts/mlrun-ce/requirements.yaml b/charts/mlrun-ce/requirements.yaml index 94669693..900754a0 100644 --- a/charts/mlrun-ce/requirements.yaml +++ b/charts/mlrun-ce/requirements.yaml @@ -9,10 +9,10 @@ dependencies: - name: mpi-operator version: "0.6.0" repository: "https://v3io.github.io/helm-charts/stable" - - name: minio - repository: "https://charts.min.io/" - version: "5.2.0" - condition: minio.enabled + - name: seaweedfs + repository: "https://seaweedfs.github.io/seaweedfs/helm" + version: "4.0.407" + condition: seaweedfs.enabled - name: spark-operator repository: "https://kubeflow.github.io/spark-operator" version: "2.1.0" diff --git a/charts/mlrun-ce/templates/NOTES.txt b/charts/mlrun-ce/templates/NOTES.txt index 90aee48c..980d3b54 100644 --- a/charts/mlrun-ce/templates/NOTES.txt +++ b/charts/mlrun-ce/templates/NOTES.txt @@ -1,5 +1,10 @@ -{{- if or (.Values.jupyterNotebook.enabled) (.Values.mlrun.enabled) (.Values.minio.enabled) (.Values.pipelines.enabled) (index .Values "kube-prometheus-stack" "enabled") }} +{{- if or (.Values.jupyterNotebook.enabled) (.Values.mlrun.enabled) (.Values.seaweedfs.enabled) (.Values.pipelines.enabled) (index .Values "kube-prometheus-stack" "enabled") }} You're up and running! +{{- if and .Values.pipelines.enabled (not .Values.seaweedfs.enabled) }} +⚠️ WARNING: seaweedfs.enabled must be set to true when using pipelines + for storing pipeline artifacts. Pipelines may not work correctly + without an S3-compatible storage backend! +{{- end }} {{- if .Values.jupyterNotebook.enabled }} {{- "\n" }} @@ -57,41 +62,29 @@ mlrun-api.{{ .Values.global.externalHostAddress }} {{- end }} {{- end }} -{{- if .Values.minio.enabled }} -{{- if ne .Values.global.infrastructure.kind "aws" }} -{{- "\n" }} -Minio UI is available at: -{{- if .Values.minio.ingress.enabled }} -{{ (index .Values.minio.consoleIngress.hosts 0) }} -{{- else }} -{{ .Values.global.externalHostAddress }}:{{ .Values.minio.consoleService.nodePort }} -{{- end }} -- username: {{ .Values.minio.rootUser }} -- password: {{ .Values.minio.rootPassword }} -{{- end }} -{{- end }} -{{- if .Values.minio.enabled }} +{{- if .Values.seaweedfs.enabled }} {{- if ne .Values.global.infrastructure.kind "aws" }} +{{- if and .Values.seaweedfs.admin .Values.seaweedfs.admin.enabled }} {{- "\n" }} -Minio API is available at: -{{- if .Values.minio.ingress.enabled }} -{{ (index .Values.minio.ingress.hosts 0) }} +SeaweedFS Admin UI is available at: +{{- if and .Values.seaweedfs.adminService .Values.seaweedfs.adminService.ingress .Values.seaweedfs.adminService.ingress.enabled }} +{{ .Values.seaweedfs.adminService.ingress.host }} {{- else }} -{{ .Values.global.externalHostAddress }}:{{ .Values.minio.service.nodePort }} +{{ .Values.global.externalHostAddress }}:{{ .Values.seaweedfs.adminService.nodePort | default 30093 }} +{{- end }} +- S3 credentials: minio / minio123 {{- end }} {{- end }} {{- end }} -{{- if and .Values.pipelines.enabled .Values.pipelines.ui.enabled -}} }} +{{- if and .Values.pipelines.enabled .Values.pipelines.ui.enabled }} {{- if ne .Values.global.infrastructure.kind "aws" }} -{{- if not .Values.minio.ingress.enabled }} {{- "\n" }} Pipelines UI is available at: {{ .Values.global.externalHostAddress }}:{{ .Values.pipelines.service.nodePort }} {{- end }} {{- end }} -{{- end }} {{- if index .Values "kube-prometheus-stack" "enabled" }} {{- "\n" }} diff --git a/charts/mlrun-ce/templates/_helpers.tpl b/charts/mlrun-ce/templates/_helpers.tpl index 94e0d5a9..27f76052 100644 --- a/charts/mlrun-ce/templates/_helpers.tpl +++ b/charts/mlrun-ce/templates/_helpers.tpl @@ -123,25 +123,75 @@ Create chart name and version as used by the chart label. {{- end -}} {{/* -Minio Service URL +============================================================================= +S3 Storage Backend Helpers +Supports both MinIO and SeaweedFS as S3-compatible storage backends +============================================================================= +*/}} + +{{/* +S3 Service URL - returns the endpoint URL for SeaweedFS +*/}} +{{- define "mlrun-ce.s3.service.url" -}} +http://seaweedfs-s3.{{.Release.Namespace}}.svc.cluster.local:{{ .Values.seaweedfs.s3.port }} +{{- end -}} + +{{/* +S3 Service Host - returns just the hostname for pipeline config +*/}} +{{- define "mlrun-ce.s3.service.host" -}} +seaweedfs-s3.{{.Release.Namespace}}.svc.cluster.local +{{- end -}} + +{{/* +S3 Service Port - returns the port for pipeline config +*/}} +{{- define "mlrun-ce.s3.service.port" -}} +{{- .Values.seaweedfs.s3.port | toString -}} +{{- end -}} + +{{/* +S3 Access Key - uses top-level s3.accessKey for all components (MLRun, Jupyter, Pipelines) +*/}} +{{- define "mlrun-ce.s3.accessKey" -}} +{{- .Values.s3.accessKey -}} +{{- end -}} + +{{/* +S3 Secret Key - uses top-level s3.secretKey for all components (MLRun, Jupyter, Pipelines) +*/}} +{{- define "mlrun-ce.s3.secretKey" -}} +{{- .Values.s3.secretKey -}} +{{- end -}} + +{{/* +S3 Bucket - uses top-level s3.bucket for all components +*/}} +{{- define "mlrun-ce.s3.bucket" -}} +{{- .Values.s3.bucket -}} +{{- end -}} + +{{/* +Legacy Minio Service URL - kept for backward compatibility */}} {{- define "mlrun-ce.minio.service.url" -}} -http://minio.{{.Release.Namespace}}.svc.cluster.local:{{ .Values.minio.service.port }} +{{ include "mlrun-ce.s3.service.url" . }} {{- end -}} {{- define "mlrun-ce.minio-pipeline.service.url" -}} -minio.{{.Release.Namespace}}.svc.cluster.local +{{ include "mlrun-ce.s3.service.host" . }} {{- end -}} {{/* MLRun storage auto mount params Global toggle is for fast toggling between on-prem/standalone and s3 cases Can be overriden if params are explicitly specified +Uses SeaweedFS as the storage backend */}} {{- define "mlrun.storage.auto.mount.params" -}} {{- if hasKey .Values.mlrun "storageAutoMountParams" -}} {{ .Values.mlrun.storageAutoMountParams }} {{- else if not .Values.global.infrastructure.aws.s3NonAnonymous -}} - "aws_access_key={{ .Values.minio.rootUser }},aws_secret_key={{ .Values.minio.rootPassword }},endpoint_url={{ include "mlrun-ce.minio.service.url" . }}" + "aws_access_key={{ include "mlrun-ce.s3.accessKey" . }},aws_secret_key={{ include "mlrun-ce.s3.secretKey" . }},endpoint_url={{ include "mlrun-ce.s3.service.url" . }}" {{- else -}} "non_anonymous=True" {{- end -}} diff --git a/charts/mlrun-ce/templates/config/jupyter-env-configmap.yaml b/charts/mlrun-ce/templates/config/jupyter-env-configmap.yaml index f26b38a6..4c555c46 100644 --- a/charts/mlrun-ce/templates/config/jupyter-env-configmap.yaml +++ b/charts/mlrun-ce/templates/config/jupyter-env-configmap.yaml @@ -5,11 +5,8 @@ kind: ConfigMap metadata: name: jupyter-common-env data: -{{- if not .Values.global.infrastructure.aws.s3NonAnonymous }} - AWS_ENDPOINT_URL_S3: {{ include "mlrun-ce.minio.service.url" . }} - AWS_SECRET_ACCESS_KEY: {{ .Values.minio.rootPassword }} - AWS_ACCESS_KEY_ID: {{ .Values.minio.rootUser }} -{{- end }} + # S3 credentials (AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_ENDPOINT_URL_S3) + # are now loaded from the 's3-credentials' Secret via envFrom secretRef MLRUN_STORAGE__AUTO_MOUNT_TYPE: {{ default "" .Values.mlrun.storageAutoMountType }} S3_NON_ANONYMOUS: {{ .Values.global.infrastructure.aws.s3NonAnonymous | toString | title | quote | default "\"True\"" }} MLRUN_CE__MODE: {{ .Values.jupyterNotebook.ce.mode | default "full" }} diff --git a/charts/mlrun-ce/templates/config/mlrun-env-configmap.yaml b/charts/mlrun-ce/templates/config/mlrun-env-configmap.yaml index 2381993e..a284aa5c 100644 --- a/charts/mlrun-ce/templates/config/mlrun-env-configmap.yaml +++ b/charts/mlrun-ce/templates/config/mlrun-env-configmap.yaml @@ -11,11 +11,8 @@ data: MLRUN_STORAGE__AUTO_MOUNT_PARAMS: {{ include "mlrun.storage.auto.mount.params" . }} MLRUN_HTTPDB__PROJECTS__LEADER: mlrun MLRUN_HTTPDB__PROJECTS__FOLLOWERS: nuclio -{{- if not .Values.global.infrastructure.aws.s3NonAnonymous }} - AWS_SECRET_ACCESS_KEY: {{ .Values.minio.rootPassword }} - AWS_ACCESS_KEY_ID: {{ .Values.minio.rootUser }} - AWS_ENDPOINT_URL_S3: {{ include "mlrun-ce.minio.service.url" . }} -{{- end }} + # S3 credentials (AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_ENDPOINT_URL_S3) + # are now loaded from the 's3-credentials' Secret via envFrom secretRef MLRUN_FUNCTION__SPEC__SERVICE_ACCOUNT__DEFAULT: {{ .Values.mlrun.api.functionSpecServiceAccountDefault | default "" | quote }} MLRUN_HTTPDB__REAL_PATH: s3:// MLRUN_ARTIFACT_PATH: s3://{{ $bucket_name }}/projects/{{ `{{run.project}}` }}/artifacts diff --git a/charts/mlrun-ce/templates/config/s3-credentials-secret.yaml b/charts/mlrun-ce/templates/config/s3-credentials-secret.yaml new file mode 100644 index 00000000..da8ffbd7 --- /dev/null +++ b/charts/mlrun-ce/templates/config/s3-credentials-secret.yaml @@ -0,0 +1,15 @@ +{{- if not .Values.global.infrastructure.aws.s3NonAnonymous }} +# S3 credentials secret for MLRun, Jupyter, and other components +# Uses AWS SDK standard environment variable names +apiVersion: v1 +kind: Secret +metadata: + name: s3-credentials + labels: + {{- include "mlrun-ce.common.labels" . | nindent 4 }} +type: Opaque +stringData: + AWS_ACCESS_KEY_ID: {{ include "mlrun-ce.s3.accessKey" . }} + AWS_SECRET_ACCESS_KEY: {{ include "mlrun-ce.s3.secretKey" . }} + AWS_ENDPOINT_URL_S3: {{ include "mlrun-ce.s3.service.url" . }} +{{- end }} diff --git a/charts/mlrun-ce/templates/pipelines/configmaps/pipeline-install-config.yaml b/charts/mlrun-ce/templates/pipelines/configmaps/pipeline-install-config.yaml index 872d2736..2a5ff2ef 100644 --- a/charts/mlrun-ce/templates/pipelines/configmaps/pipeline-install-config.yaml +++ b/charts/mlrun-ce/templates/pipelines/configmaps/pipeline-install-config.yaml @@ -1,13 +1,13 @@ {{- if .Values.pipelines.enabled -}} apiVersion: v1 data: - minioServiceHost: {{ include "mlrun-ce.minio-pipeline.service.url" . }} - minioServicePort: "{{ .Values.pipelines.minio.endpointPort }}" + objectStoreServiceHost: {{ include "mlrun-ce.s3.service.host" . }} + objectStoreServicePort: {{ include "mlrun-ce.s3.service.port" . | quote }} ConMaxLifeTime: 120s appName: pipeline appVersion: {{ .Values.pipelines.images.apiServer.tag }} autoUpdatePipelineDefaultVersion: "false" - bucketName: "{{ .Values.pipelines.minio.bucket }}" + bucketName: "{{ include "mlrun-ce.s3.bucket" . }}" cacheDb: cachedb cacheImage: {{ .Values.pipelines.images.cacheImage.repository }}:{{ .Values.pipelines.images.cacheImage.tag }} cacheNodeRestrictions: "false" diff --git a/charts/mlrun-ce/templates/pipelines/configmaps/workflow-controller-configmap.yaml b/charts/mlrun-ce/templates/pipelines/configmaps/workflow-controller-configmap.yaml index 63cca9dd..38d7b46b 100644 --- a/charts/mlrun-ce/templates/pipelines/configmaps/workflow-controller-configmap.yaml +++ b/charts/mlrun-ce/templates/pipelines/configmaps/workflow-controller-configmap.yaml @@ -9,8 +9,8 @@ data: artifactRepository: | archiveLogs: true s3: - endpoint: "{{ include "mlrun-ce.minio-pipeline.service.url" . }}:{{ .Values.pipelines.minio.endpointPort }}" - bucket: "{{ .Values.pipelines.minio.bucket }}" + endpoint: "{{ include "mlrun-ce.s3.service.host" . }}:{{ include "mlrun-ce.s3.service.port" . }}" + bucket: "{{ include "mlrun-ce.s3.bucket" . }}" insecure: true accessKeySecret: name: mlpipeline-minio-artifact diff --git a/charts/mlrun-ce/templates/pipelines/deployments/ml-pipeline-ui.yaml b/charts/mlrun-ce/templates/pipelines/deployments/ml-pipeline-ui.yaml index 2218bc88..3d008149 100644 --- a/charts/mlrun-ce/templates/pipelines/deployments/ml-pipeline-ui.yaml +++ b/charts/mlrun-ce/templates/pipelines/deployments/ml-pipeline-ui.yaml @@ -33,9 +33,9 @@ spec: - name: VIEWER_TENSORBOARD_POD_TEMPLATE_SPEC_PATH value: /etc/config/viewer-pod-template.json - name: MINIO_HOST - value: {{ include "mlrun-ce.minio-pipeline.service.url" . }} + value: {{ include "mlrun-ce.s3.service.host" . }} - name: MINIO_PORT - value: "{{ .Values.pipelines.minio.endpointPort }}" + value: {{ include "mlrun-ce.s3.service.port" . | quote }} - name: MINIO_NAMESPACE # This is required because otherwise the namespace is appended to the MinIO hostname used to fetch artifacts, causing the fetch to fail value: "" diff --git a/charts/mlrun-ce/templates/pipelines/deployments/ml-pipeline.yaml b/charts/mlrun-ce/templates/pipelines/deployments/ml-pipeline.yaml index ddaa48c9..fb124aa8 100644 --- a/charts/mlrun-ce/templates/pipelines/deployments/ml-pipeline.yaml +++ b/charts/mlrun-ce/templates/pipelines/deployments/ml-pipeline.yaml @@ -61,9 +61,12 @@ spec: valueFrom: configMapKeyRef: name: pipeline-install-config - key: minioServiceHost + key: objectStoreServiceHost - name: OBJECTSTORECONFIG_PORT - value: "9000" + valueFrom: + configMapKeyRef: + name: pipeline-install-config + key: objectStoreServicePort - name: OBJECTSTORECONFIG_SECURE value: "false" - name: OBJECTSTORECONFIG_BUCKETNAME diff --git a/charts/mlrun-ce/templates/pipelines/secrets/mlpipeline-minio-artifact.yaml b/charts/mlrun-ce/templates/pipelines/secrets/mlpipeline-minio-artifact.yaml index 0fc8a151..669c9232 100644 --- a/charts/mlrun-ce/templates/pipelines/secrets/mlpipeline-minio-artifact.yaml +++ b/charts/mlrun-ce/templates/pipelines/secrets/mlpipeline-minio-artifact.yaml @@ -1,8 +1,8 @@ {{- if .Values.pipelines.enabled -}} apiVersion: v1 data: - accesskey: {{ .Values.pipelines.minio.accessKey | b64enc | quote }} - secretkey: {{ .Values.pipelines.minio.secretKey | b64enc | quote }} + accesskey: {{ include "mlrun-ce.s3.accessKey" . | b64enc | quote }} + secretkey: {{ include "mlrun-ce.s3.secretKey" . | b64enc | quote }} kind: Secret metadata: annotations: diff --git a/charts/mlrun-ce/templates/seaweedfs/s3-bucket-init-job.yaml b/charts/mlrun-ce/templates/seaweedfs/s3-bucket-init-job.yaml new file mode 100644 index 00000000..3e037182 --- /dev/null +++ b/charts/mlrun-ce/templates/seaweedfs/s3-bucket-init-job.yaml @@ -0,0 +1,51 @@ +{{- if .Values.seaweedfs.enabled }} +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ .Release.Name }}-s3-bucket-init + labels: + {{- include "mlrun-ce.common.labels" . | nindent 4 }} + app.kubernetes.io/component: s3-bucket-init + annotations: + helm.sh/hook: post-install,post-upgrade + helm.sh/hook-weight: "10" + helm.sh/hook-delete-policy: before-hook-creation,hook-succeeded +spec: + ttlSecondsAfterFinished: 300 + backoffLimit: 5 + template: + metadata: + labels: + {{- include "mlrun-ce.common.labels" . | nindent 8 }} + app.kubernetes.io/component: s3-bucket-init + spec: + restartPolicy: OnFailure + initContainers: + - name: wait-for-seaweedfs + image: busybox:1.36 + env: + - name: S3_HOST + value: seaweedfs-s3.{{ .Release.Namespace }}.svc.cluster.local + - name: S3_PORT + value: {{ .Values.seaweedfs.s3.port | quote }} + command: + - /bin/sh + - -c + - "until nc -z $S3_HOST $S3_PORT; do sleep 5; done" + containers: + - name: create-bucket + image: amazon/aws-cli:2.15.0 + env: + - name: AWS_ACCESS_KEY_ID + value: {{ include "mlrun-ce.s3.accessKey" . | quote }} + - name: AWS_SECRET_ACCESS_KEY + value: {{ include "mlrun-ce.s3.secretKey" . | quote }} + - name: AWS_ENDPOINT_URL + value: {{ include "mlrun-ce.s3.service.url" . | quote }} + - name: BUCKET_NAME + value: {{ include "mlrun-ce.s3.bucket" . | quote }} + command: + - /bin/sh + - -c + - "aws s3 mb s3://$BUCKET_NAME || aws s3 ls s3://$BUCKET_NAME || exit 1" +{{- end }} diff --git a/charts/mlrun-ce/templates/seaweedfs/seaweedfs-s3-config.yaml b/charts/mlrun-ce/templates/seaweedfs/seaweedfs-s3-config.yaml new file mode 100644 index 00000000..3015df54 --- /dev/null +++ b/charts/mlrun-ce/templates/seaweedfs/seaweedfs-s3-config.yaml @@ -0,0 +1,33 @@ +{{- if .Values.seaweedfs.enabled }} +{{- if .Values.seaweedfs.s3.enableAuth }} +apiVersion: v1 +kind: Secret +metadata: + name: seaweedfs-s3-config + labels: + {{- include "mlrun-ce.common.labels" . | nindent 4 }} +type: Opaque +stringData: + seaweedfs_s3_config: | + { + "identities": [ + { + "name": "admin", + "credentials": [ + { + "accessKey": "{{ include "mlrun-ce.s3.accessKey" . }}", + "secretKey": "{{ include "mlrun-ce.s3.secretKey" . }}" + } + ], + "actions": [ + "Admin", + "Read", + "List", + "Tagging", + "Write" + ] + } + ] + } +{{- end }} +{{- end }} diff --git a/charts/mlrun-ce/templates/seaweedfs/seaweedfs-ui-ingress.yaml b/charts/mlrun-ce/templates/seaweedfs/seaweedfs-ui-ingress.yaml new file mode 100644 index 00000000..8cb3c701 --- /dev/null +++ b/charts/mlrun-ce/templates/seaweedfs/seaweedfs-ui-ingress.yaml @@ -0,0 +1,61 @@ +{{- if .Values.seaweedfs.enabled }} +# ============================================================================= +# SeaweedFS Admin UI Ingress +# This ingress exposes the Admin UI for external access via ingress controller +# ============================================================================= + +{{- if and .Values.seaweedfs.admin .Values.seaweedfs.admin.enabled .Values.seaweedfs.adminService .Values.seaweedfs.adminService.ingress .Values.seaweedfs.adminService.ingress.enabled }} +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: ingress-seaweedfs-admin + namespace: {{ .Release.Namespace }} + labels: + {{- include "mlrun-ce.common.labels" . | nindent 4 }} + app.kubernetes.io/component: seaweedfs-admin-ui + {{- with .Values.seaweedfs.adminService.ingress.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + ingressClassName: {{ .Values.seaweedfs.adminService.ingress.ingressClassName | default .Values.seaweedfs.adminService.ingress.className | default "nginx" }} + rules: + {{- $ingress := .Values.seaweedfs.adminService.ingress }} + {{- $defaultHost := printf "seaweedfs-adminservice.%s.%s" $.Release.Namespace ($.Values.global.externalHostAddress | default "localhost") }} + {{- /* Support both standard K8s format (hosts[0].host) and simple format (host) */ -}} + {{- $host := $defaultHost }} + {{- if and $ingress.hosts (index $ingress.hosts 0) }} + {{- $host = (index $ingress.hosts 0).host | default $defaultHost }} + {{- else if $ingress.host }} + {{- $host = $ingress.host }} + {{- end }} + - host: {{ $host }} + http: + paths: + {{- if and $ingress.hosts (index $ingress.hosts 0) (index $ingress.hosts 0).paths }} + {{- range (index $ingress.hosts 0).paths }} + - path: {{ .path | default "/" }} + pathType: {{ .pathType | default "Prefix" }} + backend: + service: + name: seaweedfs-admin-ui + port: + number: {{ $.Values.seaweedfs.adminService.port | default 23646 }} + {{- end }} + {{- else }} + - path: {{ $ingress.path | default "/" }} + pathType: {{ $ingress.pathType | default "Prefix" }} + backend: + service: + name: seaweedfs-admin-ui + port: + number: {{ $.Values.seaweedfs.adminService.port | default 23646 }} + {{- end }} + {{- if $ingress.tls }} + tls: + {{- toYaml $ingress.tls | nindent 4 }} + {{- end }} +{{- end }} + +{{- end }} diff --git a/charts/mlrun-ce/templates/seaweedfs/seaweedfs-ui-services.yaml b/charts/mlrun-ce/templates/seaweedfs/seaweedfs-ui-services.yaml new file mode 100644 index 00000000..c47c0b9b --- /dev/null +++ b/charts/mlrun-ce/templates/seaweedfs/seaweedfs-ui-services.yaml @@ -0,0 +1,35 @@ +{{- if .Values.seaweedfs.enabled }} +# ============================================================================= +# SeaweedFS Admin UI Service +# This service exposes the SeaweedFS Admin UI for external access, providing +# user & policy management functionality similar to MinIO Console IAM +# ============================================================================= + +{{- if and .Values.seaweedfs.admin .Values.seaweedfs.admin.enabled .Values.seaweedfs.adminService }} +--- +# Admin UI Service - User & policy management +# Similar to MinIO's Identity/Access Management (IAM) +apiVersion: v1 +kind: Service +metadata: + name: seaweedfs-admin-ui + namespace: {{ .Release.Namespace }} + labels: + {{- include "mlrun-ce.common.labels" . | nindent 4 }} + app.kubernetes.io/component: seaweedfs-admin-ui +spec: + type: {{ .Values.seaweedfs.adminService.type | default "NodePort" }} + ports: + - name: admin-ui + port: {{ .Values.seaweedfs.adminService.port | default 23646 }} + targetPort: {{ .Values.seaweedfs.admin.port | default 23646 }} + {{- if .Values.seaweedfs.adminService.nodePort }} + nodePort: {{ .Values.seaweedfs.adminService.nodePort }} + {{- end }} + protocol: TCP + selector: + app.kubernetes.io/name: seaweedfs + app.kubernetes.io/component: admin +{{- end }} + +{{- end }} diff --git a/charts/mlrun-ce/values.yaml b/charts/mlrun-ce/values.yaml index f403ec0b..bed3e5b0 100644 --- a/charts/mlrun-ce/values.yaml +++ b/charts/mlrun-ce/values.yaml @@ -2,7 +2,7 @@ global: # External host/ip to reach the k8s node. This might take various values if k8s is run in a VM or a cloud env externalHostAddress: localhost - registry: + registry: &userRegistry url: mustprovide secretName: nuclio: @@ -20,7 +20,19 @@ global: s3NonAnonymous: false domainNameCertificate: ~ +# ============================================================================= +# S3-compatible storage configuration +# These credentials are used by MLRun, Jupyter, and Kubeflow Pipelines +# to access the S3-compatible storage backend (SeaweedFS) +# ============================================================================= +s3: + accessKey: "minio" + secretKey: "minio123" + bucket: "mlrun" + nuclio: + global: + registry: *userRegistry # coupled with mlrun.nuclio.dashboardName template in mlrun chart fullnameOverride: nuclio controller: @@ -71,6 +83,8 @@ nuclio: mlrun: # set the type of filesystem to use: filesystem, s3 enabled: true + global: + registry: *userRegistry defaultFunctionPodResources: limits: cpu: "2" @@ -128,6 +142,9 @@ mlrun: envFrom: - configMapRef: name: mlrun-common-env + - secretRef: + name: s3-credentials + optional: true - configMapRef: name: mlrun-pipelines-config optional: true @@ -240,6 +257,9 @@ jupyterNotebook: - configMapRef: name: jupyter-common-env optional: true + - secretRef: + name: s3-credentials + optional: true persistence: enabled: true existingClaim: @@ -270,33 +290,102 @@ mpi-operator: deployment: create: true -minio: +seaweedfs: enabled: true - rootUser: minio - rootPassword: minio123 - ingress: - enabled: false - mode: standalone - replicas: 1 - resources: - requests: - memory: 0.5Gi - service: - type: NodePort - port: 9000 - nodePort: 30080 - consoleService: + global: + # Override parent chart's global.registry (which is a map) with empty string + # to prevent "map[secretName: url:mustprovide]" in image names + registry: "" + # Override default "001" to "000" for single-node setup (no replication) + replicationPlacement: "000" + + # Master server - metadata management + master: + port: 9333 + # Storage: use PVC instead of default hostPath + data: + type: "persistentVolumeClaim" + size: "1Gi" + logs: + type: "emptyDir" + resources: + requests: + memory: 128Mi + + # Volume server - actual data storage + volume: + port: 8080 + # Storage: use PVC instead of default hostPath + dataDirs: + - name: data + type: "persistentVolumeClaim" + size: "10Gi" + maxVolumes: 0 + logs: + type: "emptyDir" + resources: + requests: + memory: 256Mi + + # Filer server - file system interface + filer: + port: 8888 + # Storage: use PVC instead of default hostPath + data: + type: "persistentVolumeClaim" + size: "1Gi" + logs: + type: "emptyDir" + # Filer's embedded S3 gateway + s3: + enabled: true + port: 8333 + resources: + requests: + memory: 128Mi + + # S3 API gateway - MLRun connects to this endpoint + s3: + enabled: true # Default is false + port: 8333 + enableAuth: true # Default is false + # SeaweedFS S3 IAM configuration secret - defines authorized users/credentials + # and their permissions for S3 API access (Admin, Read, List, Write, etc.) + # Created by templates/seaweedfs/seaweedfs-s3-config.yaml using s3.accessKey/secretKey + existingConfigSecret: "seaweedfs-s3-config" + resources: + requests: + memory: 128Mi + + # Admin server - user and policy management UI + admin: + enabled: true # Default is false + port: 23646 + secret: + adminUser: "minio" + adminPassword: "minio123" + dataDir: "/data" + # Storage: use PVC instead of default emptyDir + data: + type: "persistentVolumeClaim" + size: "1Gi" + resources: + requests: + memory: 64Mi + + # Custom NodePort service for Admin UI external access + adminService: type: NodePort - port: 9001 - nodePort: 30090 - persistence: - enabled: true - size: 1Gi - fullnameOverride: minio - buckets: - - name: mlrun - policy: none - purge: false + port: 23646 + nodePort: 30093 + ingress: + enabled: false + className: "" + host: "" + path: / + pathType: Prefix + annotations: {} + tls: [] spark-operator: enabled: true @@ -347,12 +436,6 @@ pipelines: runAsGroup: 1001 fsGroup: 1001 fsGroupChangePolicy: OnRootMismatch - minio: - enabled: true - accessKey: "minio" - secretKey: "minio123" - endpointPort: "9000" - bucket: "mlrun" images: driver: repository: ghcr.io/kubeflow/kfp-driver @@ -403,9 +486,6 @@ pipelines: cacheImage: repository: gcr.io/google-containers/busybox tag: latest - minio: - repository: minio/minio - tag: "RELEASE.2025-10-15T17-29-55Z" kube-prometheus-stack: fullnameOverride: monitoring @@ -455,7 +535,6 @@ kube-prometheus-stack: nodePort: 30020 kube-state-metrics: fullnameOverride: state-metrics - prometheus-node-exporter: fullnameOverride: node-exporter hostNetwork: false