diff --git a/cmd/root.go b/cmd/root.go index c39e3887fe..fa338b40c9 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -53,7 +53,7 @@ HPC deployments on the Google Cloud Platform.`, logging.Fatal("cmd.Help function failed: %s", err) } }, - Version: "v1.58.0", + Version: "v1.59.0", Annotations: annotation, } ) diff --git a/community/modules/compute/htcondor-execute-point/versions.tf b/community/modules/compute/htcondor-execute-point/versions.tf index 40014c4bfb..6792674837 100644 --- a/community/modules/compute/htcondor-execute-point/versions.tf +++ b/community/modules/compute/htcondor-execute-point/versions.tf @@ -29,6 +29,6 @@ terraform { } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:htcondor-execute-point/v1.58.0" + module_name = "blueprints/terraform/hpc-toolkit:htcondor-execute-point/v1.59.0" } } diff --git a/community/modules/compute/mig/versions.tf b/community/modules/compute/mig/versions.tf index ed97d77150..ba0ec8771e 100644 --- a/community/modules/compute/mig/versions.tf +++ b/community/modules/compute/mig/versions.tf @@ -22,6 +22,6 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:mig/v1.58.0" + module_name = "blueprints/terraform/hpc-toolkit:mig/v1.59.0" } } diff --git a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/versions.tf b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/versions.tf index c3e2a4ac5f..58e3767763 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/versions.tf +++ b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/versions.tf @@ -24,6 +24,6 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:schedmd-slurm-gcp-v6-nodeset-dynamic/v1.58.0" + module_name = "blueprints/terraform/hpc-toolkit:schedmd-slurm-gcp-v6-nodeset-dynamic/v1.59.0" } } diff --git a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-tpu/versions.tf b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-tpu/versions.tf index da994918ae..d10bbac793 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-tpu/versions.tf +++ b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-tpu/versions.tf @@ -18,6 +18,6 @@ terraform { required_version = ">= 1.3" provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:schedmd-slurm-gcp-v6-nodeset-tpu/v1.58.0" + module_name = "blueprints/terraform/hpc-toolkit:schedmd-slurm-gcp-v6-nodeset-tpu/v1.59.0" } } diff --git a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/versions.tf b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/versions.tf index e3784c71d2..bf108c0bc5 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/versions.tf +++ b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/versions.tf @@ -24,6 +24,6 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:schedmd-slurm-gcp-v6-nodeset/v1.58.0" + module_name = "blueprints/terraform/hpc-toolkit:schedmd-slurm-gcp-v6-nodeset/v1.59.0" } } diff --git a/community/modules/compute/schedmd-slurm-gcp-v6-partition/versions.tf b/community/modules/compute/schedmd-slurm-gcp-v6-partition/versions.tf index 22b8b5aaab..a807892b3d 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v6-partition/versions.tf +++ b/community/modules/compute/schedmd-slurm-gcp-v6-partition/versions.tf @@ -18,6 +18,6 @@ terraform { required_version = ">= 1.3" provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:schedmd-slurm-gcp-v6-partition/v1.58.0" + module_name = "blueprints/terraform/hpc-toolkit:schedmd-slurm-gcp-v6-partition/v1.59.0" } } diff --git a/community/modules/database/slurm-cloudsql-federation/versions.tf b/community/modules/database/slurm-cloudsql-federation/versions.tf index d210843592..e295299eb3 100644 --- a/community/modules/database/slurm-cloudsql-federation/versions.tf +++ b/community/modules/database/slurm-cloudsql-federation/versions.tf @@ -26,10 +26,10 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:slurm-cloudsql-federation/v1.58.0" + module_name = "blueprints/terraform/hpc-toolkit:slurm-cloudsql-federation/v1.59.0" } provider_meta "google-beta" { - module_name = "blueprints/terraform/hpc-toolkit:slurm-cloudsql-federation/v1.58.0" + module_name = "blueprints/terraform/hpc-toolkit:slurm-cloudsql-federation/v1.59.0" } required_version = ">= 0.13.0" diff --git a/community/modules/file-system/cloud-storage-bucket/versions.tf b/community/modules/file-system/cloud-storage-bucket/versions.tf index 4e4591c750..47e10209cd 100644 --- a/community/modules/file-system/cloud-storage-bucket/versions.tf +++ b/community/modules/file-system/cloud-storage-bucket/versions.tf @@ -30,10 +30,10 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:cloud-storage-bucket/v1.58.0" + module_name = "blueprints/terraform/hpc-toolkit:cloud-storage-bucket/v1.59.0" } provider_meta "google-beta" { - module_name = "blueprints/terraform/hpc-toolkit:cloud-storage-bucket/v1.58.0" + module_name = "blueprints/terraform/hpc-toolkit:cloud-storage-bucket/v1.59.0" } required_version = ">= 0.14.0" } diff --git a/community/modules/file-system/nfs-server/versions.tf b/community/modules/file-system/nfs-server/versions.tf index b747233d9c..4f1a5b7f11 100644 --- a/community/modules/file-system/nfs-server/versions.tf +++ b/community/modules/file-system/nfs-server/versions.tf @@ -30,7 +30,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:nfs-server/v1.58.0" + module_name = "blueprints/terraform/hpc-toolkit:nfs-server/v1.59.0" } required_version = ">= 0.14.0" diff --git a/community/modules/files/fsi-montecarlo-on-batch/versions.tf b/community/modules/files/fsi-montecarlo-on-batch/versions.tf index 5dc7905f10..88f64a13bb 100644 --- a/community/modules/files/fsi-montecarlo-on-batch/versions.tf +++ b/community/modules/files/fsi-montecarlo-on-batch/versions.tf @@ -35,9 +35,9 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:fsi-montecarlo-on-batch/v1.58.0" + module_name = "blueprints/terraform/hpc-toolkit:fsi-montecarlo-on-batch/v1.59.0" } provider_meta "google-beta" { - module_name = "blueprints/terraform/hpc-toolkit:fsi-montecarlo-on-batch/v1.58.0" + module_name = "blueprints/terraform/hpc-toolkit:fsi-montecarlo-on-batch/v1.59.0" } } diff --git a/community/modules/internal/slurm-gcp/login/versions.tf b/community/modules/internal/slurm-gcp/login/versions.tf index c01a91173b..30b56170a8 100644 --- a/community/modules/internal/slurm-gcp/login/versions.tf +++ b/community/modules/internal/slurm-gcp/login/versions.tf @@ -24,6 +24,6 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:schedmd-slurm-gcp-v6-controller/v1.58.0" + module_name = "blueprints/terraform/hpc-toolkit:schedmd-slurm-gcp-v6-controller/v1.59.0" } } diff --git a/community/modules/network/private-service-access/versions.tf b/community/modules/network/private-service-access/versions.tf index aaa1b4a4c1..534c00bd4a 100644 --- a/community/modules/network/private-service-access/versions.tf +++ b/community/modules/network/private-service-access/versions.tf @@ -30,11 +30,11 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:private-service-access/v1.58.0" + module_name = "blueprints/terraform/hpc-toolkit:private-service-access/v1.59.0" } provider_meta "google-beta" { - module_name = "blueprints/terraform/hpc-toolkit:private-service-access/v1.58.0" + module_name = "blueprints/terraform/hpc-toolkit:private-service-access/v1.59.0" } required_version = ">= 1.2" diff --git a/community/modules/project/service-enablement/versions.tf b/community/modules/project/service-enablement/versions.tf index e508099479..af0ea91984 100644 --- a/community/modules/project/service-enablement/versions.tf +++ b/community/modules/project/service-enablement/versions.tf @@ -22,7 +22,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:service-enablement/v1.58.0" + module_name = "blueprints/terraform/hpc-toolkit:service-enablement/v1.59.0" } required_version = ">= 0.14.0" diff --git a/community/modules/pubsub/bigquery-sub/versions.tf b/community/modules/pubsub/bigquery-sub/versions.tf index 48aaaab724..46a2ce547f 100644 --- a/community/modules/pubsub/bigquery-sub/versions.tf +++ b/community/modules/pubsub/bigquery-sub/versions.tf @@ -26,10 +26,10 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:bigquery-sub/v1.58.0" + module_name = "blueprints/terraform/hpc-toolkit:bigquery-sub/v1.59.0" } provider_meta "google-beta" { - module_name = "blueprints/terraform/hpc-toolkit:bigquery-sub/v1.58.0" + module_name = "blueprints/terraform/hpc-toolkit:bigquery-sub/v1.59.0" } required_version = ">= 1.0" } diff --git a/community/modules/pubsub/topic/versions.tf b/community/modules/pubsub/topic/versions.tf index 7a19611197..f447b1c6d2 100644 --- a/community/modules/pubsub/topic/versions.tf +++ b/community/modules/pubsub/topic/versions.tf @@ -27,6 +27,6 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:topic/v1.58.0" + module_name = "blueprints/terraform/hpc-toolkit:topic/v1.59.0" } } diff --git a/community/modules/scheduler/htcondor-access-point/versions.tf b/community/modules/scheduler/htcondor-access-point/versions.tf index 560c17beae..c3cc8f819f 100644 --- a/community/modules/scheduler/htcondor-access-point/versions.tf +++ b/community/modules/scheduler/htcondor-access-point/versions.tf @@ -30,7 +30,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:htcondor-access-point/v1.58.0" + module_name = "blueprints/terraform/hpc-toolkit:htcondor-access-point/v1.59.0" } required_version = ">= 1.1" diff --git a/community/modules/scheduler/htcondor-central-manager/versions.tf b/community/modules/scheduler/htcondor-central-manager/versions.tf index 657de15676..e21ff1b146 100644 --- a/community/modules/scheduler/htcondor-central-manager/versions.tf +++ b/community/modules/scheduler/htcondor-central-manager/versions.tf @@ -26,7 +26,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:htcondor-central-manager/v1.58.0" + module_name = "blueprints/terraform/hpc-toolkit:htcondor-central-manager/v1.59.0" } required_version = ">= 1.1.0" diff --git a/community/modules/scheduler/htcondor-pool-secrets/versions.tf b/community/modules/scheduler/htcondor-pool-secrets/versions.tf index 1b12009cc4..8148449271 100644 --- a/community/modules/scheduler/htcondor-pool-secrets/versions.tf +++ b/community/modules/scheduler/htcondor-pool-secrets/versions.tf @@ -26,7 +26,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:htcondor-pool-secrets/v1.58.0" + module_name = "blueprints/terraform/hpc-toolkit:htcondor-pool-secrets/v1.59.0" } required_version = ">= 1.3.0" diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/versions.tf b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/versions.tf index 797a4c2bca..63b1202d93 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/versions.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/versions.tf @@ -28,6 +28,6 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:schedmd-slurm-gcp-v6-controller/v1.58.0" + module_name = "blueprints/terraform/hpc-toolkit:schedmd-slurm-gcp-v6-controller/v1.59.0" } } diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-login/versions.tf b/community/modules/scheduler/schedmd-slurm-gcp-v6-login/versions.tf index 9246bf10b0..450699468c 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-login/versions.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-login/versions.tf @@ -24,6 +24,6 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:schedmd-slurm-gcp-v6-login/v1.58.0" + module_name = "blueprints/terraform/hpc-toolkit:schedmd-slurm-gcp-v6-login/v1.59.0" } } diff --git a/community/modules/scripts/wait-for-startup/versions.tf b/community/modules/scripts/wait-for-startup/versions.tf index 4388f65349..94ee373c23 100644 --- a/community/modules/scripts/wait-for-startup/versions.tf +++ b/community/modules/scripts/wait-for-startup/versions.tf @@ -22,7 +22,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:wait-for-startup/v1.58.0" + module_name = "blueprints/terraform/hpc-toolkit:wait-for-startup/v1.59.0" } required_version = ">= 0.14.0" diff --git a/community/modules/scripts/windows-startup-script/versions.tf b/community/modules/scripts/windows-startup-script/versions.tf index 8ef2163712..b42491a7e2 100644 --- a/community/modules/scripts/windows-startup-script/versions.tf +++ b/community/modules/scripts/windows-startup-script/versions.tf @@ -16,7 +16,7 @@ terraform { provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:windows-startup-script/v1.58.0" + module_name = "blueprints/terraform/hpc-toolkit:windows-startup-script/v1.59.0" } required_version = ">= 0.14.0" diff --git a/examples/gke-consumption-options/dws-calendar.md b/examples/gke-consumption-options/dws-calendar.md index 179785ddf9..4b93700752 100644 --- a/examples/gke-consumption-options/dws-calendar.md +++ b/examples/gke-consumption-options/dws-calendar.md @@ -10,4 +10,4 @@ The [gke-a3-ultragpu](./examples/gke-a3-ultragpu) example can be used to create Refer to [Create an AI-optimized GKE cluster with default configuration](https://cloud.google.com/ai-hypercomputer/docs/create/gke-ai-hypercompute#use-cluster-toolkit) for instructions on creating the GKE-A3U cluster. -Refer to [Deploy and run NCCL test](https://cloud.google.com/ai-hypercomputer/docs/create/gke-ai-hypercompute#deploy-run-nccl-tas-test) for instructions on running a NCCL test on the GKE A3 Ultra cluster. +Refer to [Deploy and run NCCL test](https://cloud.google.com/ai-hypercomputer/docs/create/gke-ai-hypercompute#deploy-run-nccl-tas-test) for instructions on running a NCCL test on the GKE A3 Ultragpu cluster. diff --git a/modules/compute/gke-node-pool/versions.tf b/modules/compute/gke-node-pool/versions.tf index ca090feefb..96a103ef47 100644 --- a/modules/compute/gke-node-pool/versions.tf +++ b/modules/compute/gke-node-pool/versions.tf @@ -26,7 +26,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:gke-node-pool/v1.58.0" + module_name = "blueprints/terraform/hpc-toolkit:gke-node-pool/v1.59.0" } provider_meta "google-beta" { module_name = "blueprints/terraform/hpc-toolkit:gke-node-pool/v1.45.0" diff --git a/modules/compute/vm-instance/versions.tf b/modules/compute/vm-instance/versions.tf index 4ab83d076f..11b3ba57c2 100644 --- a/modules/compute/vm-instance/versions.tf +++ b/modules/compute/vm-instance/versions.tf @@ -31,10 +31,10 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:vm-instance/v1.58.0" + module_name = "blueprints/terraform/hpc-toolkit:vm-instance/v1.59.0" } provider_meta "google-beta" { - module_name = "blueprints/terraform/hpc-toolkit:vm-instance/v1.58.0" + module_name = "blueprints/terraform/hpc-toolkit:vm-instance/v1.59.0" } required_version = ">= 1.3.0" diff --git a/modules/file-system/filestore/versions.tf b/modules/file-system/filestore/versions.tf index 9ebef7d908..b4b156160a 100644 --- a/modules/file-system/filestore/versions.tf +++ b/modules/file-system/filestore/versions.tf @@ -26,10 +26,10 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:filestore/v1.58.0" + module_name = "blueprints/terraform/hpc-toolkit:filestore/v1.59.0" } provider_meta "google-beta" { - module_name = "blueprints/terraform/hpc-toolkit:filestore/v1.58.0" + module_name = "blueprints/terraform/hpc-toolkit:filestore/v1.59.0" } required_version = ">= 1.3.0" diff --git a/modules/file-system/gke-persistent-volume/versions.tf b/modules/file-system/gke-persistent-volume/versions.tf index 7bed64f237..8105d17896 100644 --- a/modules/file-system/gke-persistent-volume/versions.tf +++ b/modules/file-system/gke-persistent-volume/versions.tf @@ -29,6 +29,6 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:gke-persistent-volume/v1.58.0" + module_name = "blueprints/terraform/hpc-toolkit:gke-persistent-volume/v1.59.0" } } diff --git a/modules/file-system/gke-storage/versions.tf b/modules/file-system/gke-storage/versions.tf index 9c447d78ee..42e0fc29d6 100644 --- a/modules/file-system/gke-storage/versions.tf +++ b/modules/file-system/gke-storage/versions.tf @@ -16,6 +16,6 @@ terraform { required_version = ">= 1.5" provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:gke-storage/v1.58.0" + module_name = "blueprints/terraform/hpc-toolkit:gke-storage/v1.59.0" } } diff --git a/modules/file-system/managed-lustre/versions.tf b/modules/file-system/managed-lustre/versions.tf index f01ee79664..0f72b7474d 100644 --- a/modules/file-system/managed-lustre/versions.tf +++ b/modules/file-system/managed-lustre/versions.tf @@ -26,10 +26,10 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:managed-lustre/v1.58.0" + module_name = "blueprints/terraform/hpc-toolkit:managed-lustre/v1.59.0" } provider_meta "google-beta" { - module_name = "blueprints/terraform/hpc-toolkit:managed-lustre/v1.58.0" + module_name = "blueprints/terraform/hpc-toolkit:managed-lustre/v1.59.0" } required_version = ">= 1.3.0" diff --git a/modules/monitoring/dashboard/versions.tf b/modules/monitoring/dashboard/versions.tf index 1be5e2b924..0e590a3762 100644 --- a/modules/monitoring/dashboard/versions.tf +++ b/modules/monitoring/dashboard/versions.tf @@ -22,7 +22,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:dashboard/v1.58.0" + module_name = "blueprints/terraform/hpc-toolkit:dashboard/v1.59.0" } required_version = ">= 0.14.0" diff --git a/modules/network/firewall-rules/versions.tf b/modules/network/firewall-rules/versions.tf index b6cc8cb1f1..e0a4b09680 100644 --- a/modules/network/firewall-rules/versions.tf +++ b/modules/network/firewall-rules/versions.tf @@ -22,7 +22,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:firewall-rules/v1.58.0" + module_name = "blueprints/terraform/hpc-toolkit:firewall-rules/v1.59.0" } required_version = ">= 1.5" diff --git a/modules/network/pre-existing-subnetwork/versions.tf b/modules/network/pre-existing-subnetwork/versions.tf index a0da152f75..b84098cde1 100644 --- a/modules/network/pre-existing-subnetwork/versions.tf +++ b/modules/network/pre-existing-subnetwork/versions.tf @@ -22,7 +22,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:pre-existing-subnetwork/v1.58.0" + module_name = "blueprints/terraform/hpc-toolkit:pre-existing-subnetwork/v1.59.0" } required_version = ">= 1.5" diff --git a/modules/network/pre-existing-vpc/versions.tf b/modules/network/pre-existing-vpc/versions.tf index 01ed0ef2f1..b8a9a9a400 100644 --- a/modules/network/pre-existing-vpc/versions.tf +++ b/modules/network/pre-existing-vpc/versions.tf @@ -22,7 +22,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:pre-existing-vpc/v1.58.0" + module_name = "blueprints/terraform/hpc-toolkit:pre-existing-vpc/v1.59.0" } required_version = ">= 1.5" diff --git a/modules/scheduler/batch-login-node/versions.tf b/modules/scheduler/batch-login-node/versions.tf index 9ed48fca45..2477374e4f 100644 --- a/modules/scheduler/batch-login-node/versions.tf +++ b/modules/scheduler/batch-login-node/versions.tf @@ -22,7 +22,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:batch-login-node/v1.58.0" + module_name = "blueprints/terraform/hpc-toolkit:batch-login-node/v1.59.0" } required_version = ">= 0.14.0" diff --git a/modules/scheduler/gke-cluster/versions.tf b/modules/scheduler/gke-cluster/versions.tf index 68063bc9df..51023d441b 100644 --- a/modules/scheduler/gke-cluster/versions.tf +++ b/modules/scheduler/gke-cluster/versions.tf @@ -30,7 +30,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:gke-cluster/v1.58.0" + module_name = "blueprints/terraform/hpc-toolkit:gke-cluster/v1.59.0" } provider_meta "google-beta" { diff --git a/modules/scheduler/pre-existing-gke-cluster/versions.tf b/modules/scheduler/pre-existing-gke-cluster/versions.tf index c5be94d28d..419292ebbd 100644 --- a/modules/scheduler/pre-existing-gke-cluster/versions.tf +++ b/modules/scheduler/pre-existing-gke-cluster/versions.tf @@ -23,7 +23,7 @@ terraform { } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:pre-existing-gke-cluster/v1.58.0" + module_name = "blueprints/terraform/hpc-toolkit:pre-existing-gke-cluster/v1.59.0" } required_version = ">= 1.3" diff --git a/modules/scripts/startup-script/versions.tf b/modules/scripts/startup-script/versions.tf index 7ec6cd59b6..135e621886 100644 --- a/modules/scripts/startup-script/versions.tf +++ b/modules/scripts/startup-script/versions.tf @@ -30,7 +30,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:startup-script/v1.58.0" + module_name = "blueprints/terraform/hpc-toolkit:startup-script/v1.59.0" } required_version = ">= 1.5" diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/test-validation/test-gke-a3u-dws-flex-start-queued-provisioning.yml b/tools/cloud-build/daily-tests/ansible_playbooks/test-validation/test-gke-a3u-dws-flex-start-queued-provisioning.yml deleted file mode 100644 index 7740764058..0000000000 --- a/tools/cloud-build/daily-tests/ansible_playbooks/test-validation/test-gke-a3u-dws-flex-start-queued-provisioning.yml +++ /dev/null @@ -1,91 +0,0 @@ -# Copyright 2024 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -- name: Assert variables are defined - ansible.builtin.assert: - that: - - region is defined - - custom_vars.project is defined - -- name: Get cluster credentials for kubectl - delegate_to: localhost - ansible.builtin.command: gcloud container clusters get-credentials {{ deployment_name }} --region {{ region }} --project {{ custom_vars.project }} - -- name: Create a job - delegate_to: localhost - ansible.builtin.shell: | - array=({{ workspace }}/examples/gke-consumption-options/dws-flex-start-queued-provisioning/sample-job.yaml) - kubectl create -f ${array[0]} - echo ${array[0]} - args: - executable: /bin/bash - changed_when: False - -- name: Wait for job to complete - delegate_to: localhost - ansible.builtin.command: | - kubectl get job --field-selector status.successful=1 - register: job_completion - until: job_completion.stdout_lines | length > 1 - retries: 30 - delay: 60 - -- name: Print job_completion debug output - ansible.builtin.debug: - var: job_completion.stdout_lines - -- name: Clean up - delegate_to: localhost - ansible.builtin.shell: | - kubectl delete jobset --all - -- name: Run the NCCL test - delegate_to: localhost - ansible.builtin.shell: | - kubectl create -f {{ workspace }}/examples/gke-consumption-options/dws-flex-start-queued-provisioning/nccl-jobset-example.yaml - args: - executable: /bin/bash - -- name: Wait for Job to hit 2/2 completions - delegate_to: localhost - ansible.builtin.shell: | - kubectl get job --field-selector status.successful=2 - register: job_completion - until: job_completion.stdout_lines | length > 1 - retries: 60 - delay: 60 - -- name: Fetch logs from the 0-0 pod and save to pod_logs.txt - delegate_to: localhost - ansible.builtin.shell: | - pod_0_0_name="$(kubectl get pods --no-headers -o custom-columns="Name:.metadata.name" | awk '{if ($1 ~ "0-0") print $0}')" - kubectl logs ${pod_0_0_name} > pod_logs.txt - cat pod_logs.txt - register: nccl_test_logs - -- name: Print the NCCL test logs from 0-0 pod - debug: - msg: "{{nccl_test_logs.stdout}}" - -- name: Ensure average bus bandwidth is >= 100 GB/s - delegate_to: localhost - ansible.builtin.shell: | - grep -o '# Avg bus bandwidth : [0-9\.]*' pod_logs.txt | cut -d ':' -f 2 | awk '{$1=$1;print}' - register: avg_bus_bandwidth - failed_when: avg_bus_bandwidth.stdout | float < 100 - -- name: Clean up - delegate_to: localhost - ansible.builtin.shell: | - kubectl delete jobset --all diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/test-validation/test-gke-a3u-dws-flex-start.yml b/tools/cloud-build/daily-tests/ansible_playbooks/test-validation/test-gke-a3u-dws-flex-start.yml deleted file mode 100644 index 69e36bc224..0000000000 --- a/tools/cloud-build/daily-tests/ansible_playbooks/test-validation/test-gke-a3u-dws-flex-start.yml +++ /dev/null @@ -1,91 +0,0 @@ -# Copyright 2024 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -- name: Assert variables are defined - ansible.builtin.assert: - that: - - region is defined - - custom_vars.project is defined - -- name: Get cluster credentials for kubectl - delegate_to: localhost - ansible.builtin.command: gcloud container clusters get-credentials {{ deployment_name }} --region {{ region }} --project {{ custom_vars.project }} - -- name: Create a job - delegate_to: localhost - ansible.builtin.shell: | - array=({{ workspace }}/examples/gke-consumption-options/dws-flex-start/dws-flex-start.yaml) - kubectl create -f ${array[0]} - echo ${array[0]} - args: - executable: /bin/bash - changed_when: False - -- name: Wait for job to complete - delegate_to: localhost - ansible.builtin.command: | - kubectl get job --field-selector status.successful=1 - register: job_completion - until: job_completion.stdout_lines | length > 1 - retries: 30 - delay: 60 - -- name: Print job_completion debug output - ansible.builtin.debug: - var: job_completion.stdout_lines - -- name: Clean up - delegate_to: localhost - ansible.builtin.shell: | - kubectl delete jobset --all - -- name: Run the NCCL test - delegate_to: localhost - ansible.builtin.shell: | - kubectl create -f {{ workspace }}/examples/gke-consumption-options/dws-flex-start/nccl-jobset-example.yaml - args: - executable: /bin/bash - -- name: Wait for NCCL Job to hit 2/2 completions - delegate_to: localhost - ansible.builtin.shell: | - kubectl get job --field-selector status.successful=2 - register: job_completion - until: job_completion.stdout_lines | length > 1 - retries: 60 - delay: 60 - -- name: Fetch logs from the 0-0 pod and save to pod_logs.txt - delegate_to: localhost - ansible.builtin.shell: | - pod_0_0_name="$(kubectl get pods --no-headers -o custom-columns="Name:.metadata.name" | awk '{if ($1 ~ "0-0") print $0}')" - kubectl logs ${pod_0_0_name} > pod_logs.txt - cat pod_logs.txt - register: nccl_test_logs - -- name: Print the NCCL test logs from 0-0 pod - debug: - msg: "{{nccl_test_logs.stdout}}" - -- name: Ensure average bus bandwidth is >= 100 GB/s - delegate_to: localhost - ansible.builtin.shell: | - grep -o '# Avg bus bandwidth : [0-9\.]*' pod_logs.txt | cut -d ':' -f 2 | awk '{$1=$1;print}' - register: avg_bus_bandwidth - failed_when: avg_bus_bandwidth.stdout | float < 100 - -- name: Clean up - delegate_to: localhost - ansible.builtin.shell: | - kubectl delete jobset --all diff --git a/tools/cloud-build/daily-tests/builds/gke-a3u-dws-flex-start-queued-provisioning.yaml b/tools/cloud-build/daily-tests/builds/gke-a3u-dws-flex-start-queued-provisioning.yaml deleted file mode 100644 index 017fa35ce4..0000000000 --- a/tools/cloud-build/daily-tests/builds/gke-a3u-dws-flex-start-queued-provisioning.yaml +++ /dev/null @@ -1,53 +0,0 @@ -# Copyright 2023 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - ---- -tags: -- gke -- m.gke-cluster -- m.gke-node-pool -- m.service-account -- m.gpu-rdma-vpc -- m.kubectl-apply -- m.vpc - -timeout: 14400s # 4hr -steps: -- id: gke-dws-flex-start - name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner - entrypoint: /bin/bash - env: - - "ANSIBLE_HOST_KEY_CHECKING=false" - - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" - args: - - -c - - | - set -x -e - cd /workspace && make - BUILD_ID_FULL=$BUILD_ID - BUILD_ID_SHORT=$${BUILD_ID_FULL:0:6} - EXAMPLE_BP=examples/gke-consumption-options/dws-flex-start-queued-provisioning/gke-a3-ultragpu.yaml - - # adding vm to act as remote node - echo ' - id: remote-node' >> $${EXAMPLE_BP} - echo ' source: modules/compute/vm-instance' >> $${EXAMPLE_BP} - echo ' use: [gke-a3-ultra-net-0]' >> $${EXAMPLE_BP} - echo ' settings:' >> $${EXAMPLE_BP} - echo ' machine_type: e2-standard-2' >> $${EXAMPLE_BP} - echo ' name_prefix: remote-node' >> $${EXAMPLE_BP} - echo ' add_deployment_name_before_prefix: true' >> $${EXAMPLE_BP} - - ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml \ - --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ - --extra-vars="@tools/cloud-build/daily-tests/tests/gke-a3u-dws-flex-start-queued-provisioning.yml" diff --git a/tools/cloud-build/daily-tests/builds/gke-a3u-dws-flex-start.yaml b/tools/cloud-build/daily-tests/builds/gke-a3u-dws-flex-start.yaml deleted file mode 100644 index 916664818f..0000000000 --- a/tools/cloud-build/daily-tests/builds/gke-a3u-dws-flex-start.yaml +++ /dev/null @@ -1,53 +0,0 @@ -# Copyright 2023 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - ---- -tags: -- gke -- m.gke-cluster -- m.gke-node-pool -- m.service-account -- m.gpu-rdma-vpc -- m.kubectl-apply -- m.vpc - -timeout: 14400s # 4hr -steps: -- id: gke-dws-flex-start - name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner - entrypoint: /bin/bash - env: - - "ANSIBLE_HOST_KEY_CHECKING=false" - - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" - args: - - -c - - | - set -x -e - cd /workspace && make - BUILD_ID_FULL=$BUILD_ID - BUILD_ID_SHORT=$${BUILD_ID_FULL:0:6} - EXAMPLE_BP=examples/gke-consumption-options/dws-flex-start/gke-a3-ultragpu.yaml - - # adding vm to act as remote node - echo ' - id: remote-node' >> $${EXAMPLE_BP} - echo ' source: modules/compute/vm-instance' >> $${EXAMPLE_BP} - echo ' use: [gke-a3-ultra-net-0]' >> $${EXAMPLE_BP} - echo ' settings:' >> $${EXAMPLE_BP} - echo ' machine_type: e2-standard-2' >> $${EXAMPLE_BP} - echo ' name_prefix: remote-node' >> $${EXAMPLE_BP} - echo ' add_deployment_name_before_prefix: true' >> $${EXAMPLE_BP} - - ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml \ - --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ - --extra-vars="@tools/cloud-build/daily-tests/tests/gke-a3u-dws-flex-start.yml" diff --git a/tools/cloud-build/daily-tests/tests/gke-a3u-dws-flex-start-queued-provisioning.yml b/tools/cloud-build/daily-tests/tests/gke-a3u-dws-flex-start-queued-provisioning.yml deleted file mode 100644 index b772be2bcd..0000000000 --- a/tools/cloud-build/daily-tests/tests/gke-a3u-dws-flex-start-queued-provisioning.yml +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - ---- - -# region, zone must be defined -# in build file with --extra-vars flag! -test_name: gke-a3u-dws-flex-start -deployment_name: gke-dws-fs-{{ build }} -workspace: /workspace -blueprint_yaml: "{{ workspace }}/examples/gke-consumption-options/dws-flex-start-queued-provisioning/gke-a3-ultragpu.yaml" -network: "{{ deployment_name }}-net-0" -region: europe-west4 -zone: europe-west4-a -remote_node: "{{ deployment_name }}-remote-node-0" -cli_deployment_vars: - region: "{{ region }}" - zone: "{{ zone }}" - authorized_cidr: "{{ build_ip.stdout }}/32" - gcp_public_cidrs_access_enabled: false -custom_vars: - project: "{{ project }}" -post_deploy_tests: -- test-validation/test-gke-a3u-dws-flex-start-queued-provisioning.yml diff --git a/tools/cloud-build/daily-tests/tests/gke-a3u-dws-flex-start.yml b/tools/cloud-build/daily-tests/tests/gke-a3u-dws-flex-start.yml deleted file mode 100644 index d7de07477a..0000000000 --- a/tools/cloud-build/daily-tests/tests/gke-a3u-dws-flex-start.yml +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - ---- - -# region, zone must be defined -# in build file with --extra-vars flag! -test_name: gke-a3u-dws-flex-start -deployment_name: gke-dws-fs-{{ build }} -workspace: /workspace -blueprint_yaml: "{{ workspace }}/examples/gke-consumption-options/dws-flex-start/gke-a3-ultragpu.yaml" -network: "{{ deployment_name }}-net-0" -region: europe-west4 -zone: europe-west4-a -remote_node: "{{ deployment_name }}-remote-node-0" -cli_deployment_vars: - region: "{{ region }}" - zone: "{{ zone }}" - authorized_cidr: "{{ build_ip.stdout }}/32" - gcp_public_cidrs_access_enabled: false -custom_vars: - project: "{{ project }}" -post_deploy_tests: -- test-validation/test-gke-a3u-dws-flex-start.yml