From 2f1267a5bd3301ebb5a4ba1ae0b543e4c245531c Mon Sep 17 00:00:00 2001 From: Akanksha Gupta Date: Thu, 18 Sep 2025 22:14:02 +0000 Subject: [PATCH 1/3] Not working - add pod annotations --- Makefile | 2 +- api/v1/pathwaysjob_types.go | 2 ++ ...ways-job.pathways.domain_pathwaysjobs.yaml | 35 +++++++++++++++++++ config/manager/kustomization.yaml | 4 +-- internal/controller/pathwaysjob_controller.go | 9 ++++- 5 files changed, 48 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 6f6fb95e..81126930 100644 --- a/Makefile +++ b/Makefile @@ -48,7 +48,7 @@ help: ## Display this help. .PHONY: manifests manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects. - $(CONTROLLER_GEN) rbac:roleName=manager-role crd webhook paths="./..." output:crd:artifacts:config=config/crd/bases + $(CONTROLLER_GEN) rbac:roleName=manager-role crd:generateEmbeddedObjectMeta=true webhook paths="./..." output:crd:artifacts:config=config/crd/bases .PHONY: generate generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations. diff --git a/api/v1/pathwaysjob_types.go b/api/v1/pathwaysjob_types.go index e5097fde..c7dbcb1a 100644 --- a/api/v1/pathwaysjob_types.go +++ b/api/v1/pathwaysjob_types.go @@ -172,6 +172,8 @@ type ControllerSpec struct { // https://pkg.go.dev/k8s.io/api/core/v1#PodTemplateSpec // +optional // +kubebuilder:validation:XValidation:rule="self == oldSelf",message="userPodTemplate is immutable" + // +kubebuilder:pruning:PreserveUnknownFields + // +crd:generateEmbeddedObjectMeta=true UserPodTemplate *corev1.PodTemplateSpec `json:"template,omitempty" protobuf:"bytes,6,opt,name=template"` // Enables elasticity and sets the maximum number of slices diff --git a/config/crd/bases/pathways-job.pathways.domain_pathwaysjobs.yaml b/config/crd/bases/pathways-job.pathways.domain_pathwaysjobs.yaml index 7a8ac1f5..106f73bf 100644 --- a/config/crd/bases/pathways-job.pathways.domain_pathwaysjobs.yaml +++ b/config/crd/bases/pathways-job.pathways.domain_pathwaysjobs.yaml @@ -102,6 +102,23 @@ spec: description: |- Standard object's metadata. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#metadata + properties: + annotations: + additionalProperties: + type: string + type: object + finalizers: + items: + type: string + type: array + labels: + additionalProperties: + type: string + type: object + name: + type: string + namespace: + type: string type: object spec: description: |- @@ -6852,6 +6869,23 @@ spec: May contain labels and annotations that will be copied into the PVC when creating it. No other fields are allowed and will be rejected during validation. + properties: + annotations: + additionalProperties: + type: string + type: object + finalizers: + items: + type: string + type: array + labels: + additionalProperties: + type: string + type: object + name: + type: string + namespace: + type: string type: object spec: description: |- @@ -8181,6 +8215,7 @@ spec: - containers type: object type: object + x-kubernetes-preserve-unknown-fields: true x-kubernetes-validations: - message: userPodTemplate is immutable rule: self == oldSelf diff --git a/config/manager/kustomization.yaml b/config/manager/kustomization.yaml index 9b986dc0..1c9c2e5b 100644 --- a/config/manager/kustomization.yaml +++ b/config/manager/kustomization.yaml @@ -18,5 +18,5 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization images: - name: controller - newName: us-docker.pkg.dev/cloud-tpu-v2-images/pathways-job/pathwaysjob-controller - newTag: v0.1.2 + newName: us-docker.pkg.dev/cloud-tpu-multipod-dev/pathways/controller + newTag: akshu-ssi-test-2 diff --git a/internal/controller/pathwaysjob_controller.go b/internal/controller/pathwaysjob_controller.go index 301564c2..7412f4f0 100644 --- a/internal/controller/pathwaysjob_controller.go +++ b/internal/controller/pathwaysjob_controller.go @@ -218,11 +218,14 @@ func (r *PathwaysJobReconciler) createJobSet(ctx context.Context, pw *pathwaysjo workerJob, _ := MakeWorkerJob(ctx, pw) successPolicy := MakeSuccessPolicy(pw) + fmt.Println("findme1", pw) + fmt.Println("findme2", pw.GetObjectMeta()) mainJobSetConfig := jobsetv1alpha2.JobSet{ ObjectMeta: metav1.ObjectMeta{ Name: pw.GetName(), Namespace: pw.GetNamespace(), Labels: pw.GetObjectMeta().GetLabels(), + Annotations: pw.GetObjectMeta().GetAnnotations(), }, Spec: jobsetv1alpha2.JobSetSpec{ StartupPolicy: &jobsetv1alpha2.StartupPolicy{ @@ -874,12 +877,13 @@ func injectJAXBackendTargetIntoMainContainer(pw *pathwaysjob.PathwaysJob, pathwa func MakePathwaysHeadReplicatedJob(pw *pathwaysjob.PathwaysJob, pathwaysHeadPodSpec corev1.PodSpec) jobsetv1alpha2.ReplicatedJob { var annotations map[string]string - annotations = nil + annotations = pw.GetObjectMeta().GetAnnotations() if pw.Spec.Controller.DeploymentMode == pathwaysjob.Default { annotations = map[string]string{ "alpha.jobset.sigs.k8s.io/exclusive-topology": "kubernetes.io/hostname", } // needed so that head pods are placed exclusively on CPU nodes. } + fmt.Println("findme3 pw.GetObjectMeta().GetAnnotations():", pw.GetObjectMeta().GetAnnotations()) pathwaysHeadJob := jobsetv1alpha2.ReplicatedJob{ Name: PathwaysHeadJobName, Replicas: 1, @@ -893,6 +897,9 @@ func MakePathwaysHeadReplicatedJob(pw *pathwaysjob.PathwaysJob, pathwaysHeadPodS Parallelism: ptr.To(int32(1)), Template: corev1.PodTemplateSpec{ Spec: pathwaysHeadPodSpec, + ObjectMeta: metav1.ObjectMeta{ // todo + Annotations: pw.GetObjectMeta().GetAnnotations(), + }, }, }, }, From 73ed7e702a70f3a58b04adb1d6e97bfb9f504130 Mon Sep 17 00:00:00 2001 From: Akanksha Gupta Date: Thu, 18 Sep 2025 23:00:05 +0000 Subject: [PATCH 2/3] update --- config/manager/kustomization.yaml | 2 +- internal/controller/pathwaysjob_controller.go | 38 +++++++++++++------ 2 files changed, 27 insertions(+), 13 deletions(-) diff --git a/config/manager/kustomization.yaml b/config/manager/kustomization.yaml index 1c9c2e5b..95e2fd26 100644 --- a/config/manager/kustomization.yaml +++ b/config/manager/kustomization.yaml @@ -19,4 +19,4 @@ kind: Kustomization images: - name: controller newName: us-docker.pkg.dev/cloud-tpu-multipod-dev/pathways/controller - newTag: akshu-ssi-test-2 + newTag: akshu-ssi-test-3 diff --git a/internal/controller/pathwaysjob_controller.go b/internal/controller/pathwaysjob_controller.go index 7412f4f0..e0b3cb81 100644 --- a/internal/controller/pathwaysjob_controller.go +++ b/internal/controller/pathwaysjob_controller.go @@ -218,8 +218,8 @@ func (r *PathwaysJobReconciler) createJobSet(ctx context.Context, pw *pathwaysjo workerJob, _ := MakeWorkerJob(ctx, pw) successPolicy := MakeSuccessPolicy(pw) - fmt.Println("findme1", pw) - fmt.Println("findme2", pw.GetObjectMeta()) + log.Info("findme1", pw) + log.Info("findme2", pw.GetObjectMeta()) mainJobSetConfig := jobsetv1alpha2.JobSet{ ObjectMeta: metav1.ObjectMeta{ Name: pw.GetName(), @@ -851,6 +851,13 @@ func MakePathwaysHeadPodSpec(pw *pathwaysjob.PathwaysJob) *corev1.PodSpec { Containers: containerList, } // end PodSpec } + // The user pod template can have its own annotations. + // We should merge them with the annotations from the PathwaysJob. + if isUserPodProvided(pw) && pw.Spec.Controller.UserPodTemplate.Annotations != nil { + for k, v := range pw.GetObjectMeta().GetAnnotations() { + pw.Spec.Controller.UserPodTemplate.Annotations[k] = v + } + } return pathwaysHeadPodSpec } @@ -875,15 +882,21 @@ func injectJAXBackendTargetIntoMainContainer(pw *pathwaysjob.PathwaysJob, pathwa } -func MakePathwaysHeadReplicatedJob(pw *pathwaysjob.PathwaysJob, pathwaysHeadPodSpec corev1.PodSpec) jobsetv1alpha2.ReplicatedJob { +func MakePathwaysHeadReplicatedJob(ctx context.Context, pw *pathwaysjob.PathwaysJob, pathwaysHeadPodSpec corev1.PodSpec) jobsetv1alpha2.ReplicatedJob { var annotations map[string]string - annotations = pw.GetObjectMeta().GetAnnotations() + log := ctrl.LoggerFrom(ctx).WithValues("pathwaysjob", klog.KObj(pw)) + ctx = ctrl.LoggerInto(ctx, log) + log.Info("findme3", pw.GetObjectMeta().GetAnnotations()) + // Start with annotations from the PathwaysJob. + annotations = make(map[string]string) + for k, v := range pw.GetObjectMeta().GetAnnotations() { + annotations[k] = v + } + if pw.Spec.Controller.DeploymentMode == pathwaysjob.Default { - annotations = map[string]string{ - "alpha.jobset.sigs.k8s.io/exclusive-topology": "kubernetes.io/hostname", - } // needed so that head pods are placed exclusively on CPU nodes. + // needed so that head pods are placed exclusively on CPU nodes. + annotations["alpha.jobset.sigs.k8s.io/exclusive-topology"] = "kubernetes.io/hostname" } - fmt.Println("findme3 pw.GetObjectMeta().GetAnnotations():", pw.GetObjectMeta().GetAnnotations()) pathwaysHeadJob := jobsetv1alpha2.ReplicatedJob{ Name: PathwaysHeadJobName, Replicas: 1, @@ -897,8 +910,8 @@ func MakePathwaysHeadReplicatedJob(pw *pathwaysjob.PathwaysJob, pathwaysHeadPodS Parallelism: ptr.To(int32(1)), Template: corev1.PodTemplateSpec{ Spec: pathwaysHeadPodSpec, - ObjectMeta: metav1.ObjectMeta{ // todo - Annotations: pw.GetObjectMeta().GetAnnotations(), + ObjectMeta: metav1.ObjectMeta{ + Annotations: annotations, }, }, }, @@ -917,12 +930,13 @@ func MakePathwaysHeadJobForColocateHeadWithWorkersDeployment(ctx context.Context podSpec.Affinity = affinitySpec podSpec.Tolerations = tolerations - return MakePathwaysHeadReplicatedJob(pw, podSpec), nil + return MakePathwaysHeadReplicatedJob(ctx, pw, podSpec), nil } // Construct pathways-head replicated job containing Pathways RM, Pathways Proxy and the user job containers for the 'default' deployment mode. // In the default mode, the Pathways head pod is placed on CPU nodes. func MakePathwaysHeadJobForDefaultDeployment(ctx context.Context, pw *pathwaysjob.PathwaysJob) (jobsetv1alpha2.ReplicatedJob, error) { podSpec := *MakePathwaysHeadPodSpec(pw) - return MakePathwaysHeadReplicatedJob(pw, podSpec), nil + return MakePathwaysHeadReplicatedJob(ctx, pw, podSpec), nil } + From 02f522507e9471b4da1990c8b27b5965d702c341 Mon Sep 17 00:00:00 2001 From: Akanksha Gupta Date: Thu, 18 Sep 2025 23:25:23 +0000 Subject: [PATCH 3/3] update --- config/manager/kustomization.yaml | 2 +- internal/controller/pathwaysjob_controller.go | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/config/manager/kustomization.yaml b/config/manager/kustomization.yaml index 95e2fd26..53db6260 100644 --- a/config/manager/kustomization.yaml +++ b/config/manager/kustomization.yaml @@ -19,4 +19,4 @@ kind: Kustomization images: - name: controller newName: us-docker.pkg.dev/cloud-tpu-multipod-dev/pathways/controller - newTag: akshu-ssi-test-3 + newTag: akshu-ssi-test-4 diff --git a/internal/controller/pathwaysjob_controller.go b/internal/controller/pathwaysjob_controller.go index e0b3cb81..fa3ed4b1 100644 --- a/internal/controller/pathwaysjob_controller.go +++ b/internal/controller/pathwaysjob_controller.go @@ -218,8 +218,8 @@ func (r *PathwaysJobReconciler) createJobSet(ctx context.Context, pw *pathwaysjo workerJob, _ := MakeWorkerJob(ctx, pw) successPolicy := MakeSuccessPolicy(pw) - log.Info("findme1", pw) - log.Info("findme2", pw.GetObjectMeta()) + log.Info("findme1", "pw", pw) + log.Info("findme2", "meta", pw.GetObjectMeta()) mainJobSetConfig := jobsetv1alpha2.JobSet{ ObjectMeta: metav1.ObjectMeta{ Name: pw.GetName(), @@ -886,7 +886,7 @@ func MakePathwaysHeadReplicatedJob(ctx context.Context, pw *pathwaysjob.Pathways var annotations map[string]string log := ctrl.LoggerFrom(ctx).WithValues("pathwaysjob", klog.KObj(pw)) ctx = ctrl.LoggerInto(ctx, log) - log.Info("findme3", pw.GetObjectMeta().GetAnnotations()) + log.Info("findme3", "anno", pw.GetObjectMeta().GetAnnotations()) // Start with annotations from the PathwaysJob. annotations = make(map[string]string) for k, v := range pw.GetObjectMeta().GetAnnotations() {