From 304981ddaade1b73cb08844bc7d303c1be5619df Mon Sep 17 00:00:00 2001 From: majiayu000 <1835304752@qq.com> Date: Tue, 30 Dec 2025 20:35:12 +0800 Subject: [PATCH] feat(agent): add Kubernetes readiness and liveness probes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add health check endpoints to the agent's integration collector HTTP server: - /v1/health/ - liveness probe endpoint - /v1/ready/ - readiness probe endpoint Configure Kubernetes probes in the DaemonSet manifest: - readinessProbe: checks /v1/ready/ with 5s initial delay - livenessProbe: checks /v1/health/ with 15s initial delay This follows the standard Kubernetes pattern for health probes and helps with proper pod lifecycle management in Kubernetes environments. Closes #2311 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 Signed-off-by: majiayu000 <1835304752@qq.com> --- agent/docker/deepflow-agent-ds.yaml | 16 ++++++++++++++++ agent/src/integration_collector.rs | 12 ++++++++++++ 2 files changed, 28 insertions(+) diff --git a/agent/docker/deepflow-agent-ds.yaml b/agent/docker/deepflow-agent-ds.yaml index a1cdccc7fc5..5ca874b9912 100644 --- a/agent/docker/deepflow-agent-ds.yaml +++ b/agent/docker/deepflow-agent-ds.yaml @@ -24,6 +24,22 @@ spec: imagePullPolicy: Never securityContext: privileged: true + readinessProbe: + httpGet: + path: /v1/ready/ + port: 38086 + initialDelaySeconds: 5 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 3 + livenessProbe: + httpGet: + path: /v1/health/ + port: 38086 + initialDelaySeconds: 15 + periodSeconds: 20 + timeoutSeconds: 5 + failureThreshold: 3 volumeMounts: - name: sys-kernel-debug mountPath: /sys/kernel/debug diff --git a/agent/src/integration_collector.rs b/agent/src/integration_collector.rs index be9d83f1ed8..ede10e6f476 100644 --- a/agent/src/integration_collector.rs +++ b/agent/src/integration_collector.rs @@ -637,6 +637,18 @@ async fn handler( .body(doc_bytes.as_slice().into()) .unwrap()) } + // Health check endpoint for liveness probe + (&Method::GET, "/v1/health/") => Ok(Response::builder() + .status(StatusCode::OK) + .header("Content-Type", "application/json") + .body(r#"{"status":"healthy"}"#.into()) + .unwrap()), + // Readiness probe endpoint - returns 200 when the server is ready to accept traffic + (&Method::GET, "/v1/ready/") => Ok(Response::builder() + .status(StatusCode::OK) + .header("Content-Type", "application/json") + .body(r#"{"status":"ready"}"#.into()) + .unwrap()), // OpenTelemetry trace integration (&Method::POST, "/api/v1/otel/trace") => { if external_trace_integration_disabled {