From ccc8fc65c724bff2ad606032706991aaf025044a Mon Sep 17 00:00:00 2001 From: Devin <505259926@qq.com> Date: Tue, 3 Mar 2026 11:27:06 +0800 Subject: [PATCH 1/2] Improve health check error handling; Mark all unhealthy when start checkhealth failure. Update health check error logging to mark devices as unhealthy. Signed-off-by: devincd <505259926@qq.com> --- internal/plugin/server.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/internal/plugin/server.go b/internal/plugin/server.go index 8d089fca6..b9357f1fc 100644 --- a/internal/plugin/server.go +++ b/internal/plugin/server.go @@ -152,7 +152,10 @@ func (plugin *nvidiaDevicePlugin) Start(kubeletSocket string) error { // TODO: add MPS health check err := plugin.rm.CheckHealth(plugin.stop, plugin.health) if err != nil { - klog.Errorf("Failed to start health check: %v; continuing with health checks disabled", err) + klog.Errorf("Failed to start health check: %v; Marking all devices as unhealthy; continuing with health checks disabled", err) + for _, d := range plugin.Devices() { + plugin.health <- d + } } }() From 20df1c4fe97437100c5d69a317591128c210ed65 Mon Sep 17 00:00:00 2001 From: devincd <505259926@qq.com> Date: Tue, 3 Mar 2026 15:42:08 +0800 Subject: [PATCH 2/2] update log Signed-off-by: devincd <505259926@qq.com> --- internal/plugin/server.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/plugin/server.go b/internal/plugin/server.go index b9357f1fc..61fd7006e 100644 --- a/internal/plugin/server.go +++ b/internal/plugin/server.go @@ -152,7 +152,7 @@ func (plugin *nvidiaDevicePlugin) Start(kubeletSocket string) error { // TODO: add MPS health check err := plugin.rm.CheckHealth(plugin.stop, plugin.health) if err != nil { - klog.Errorf("Failed to start health check: %v; Marking all devices as unhealthy; continuing with health checks disabled", err) + klog.Errorf("Failed to start health check: %v; marking all devices as unhealthy; continuing with health checks disabled", err) for _, d := range plugin.Devices() { plugin.health <- d }