diff --git a/cmd/metrics/metadata.go b/cmd/metrics/metadata.go index db65de7e..4b7c7017 100644 --- a/cmd/metrics/metadata.go +++ b/cmd/metrics/metadata.go @@ -427,7 +427,11 @@ func getCPUInfo(t target.Target) (cpuInfo []map[string]string, err error) { cmd := exec.Command("cat", "/proc/cpuinfo") stdout, stderr, exitcode, err := t.RunCommand(cmd) if err != nil { - err = fmt.Errorf("failed to get cpuinfo: %s, %d, %v", stderr, exitcode, err) + err = fmt.Errorf("failed to execute cat command: %v", err) + return + } + if exitcode != 0 { + err = fmt.Errorf("failed to get cpuinfo: %s, exit code %d", stderr, exitcode) return } oneCPUInfo := make(map[string]string) @@ -479,8 +483,12 @@ func getNumGPCounters(uarch string) (numGPCounters int, err error) { func getLscpu(t target.Target) (output string, err error) { cmd := exec.Command("lscpu") output, stderr, exitcode, err := t.RunCommand(cmd) - if err != nil || exitcode != 0 { - err = fmt.Errorf("failed to run lscpu: %s, %d, %v", stderr, exitcode, err) + if err != nil { + err = fmt.Errorf("failed to execute lscpu command: %v", err) + return + } + if exitcode != 0 { + err = fmt.Errorf("failed to run lscpu: %s, exit code %d", stderr, exitcode) return } return diff --git a/cmd/metrics/nmi_watchdog.go b/cmd/metrics/nmi_watchdog.go index 4ba0d501..c230d450 100644 --- a/cmd/metrics/nmi_watchdog.go +++ b/cmd/metrics/nmi_watchdog.go @@ -50,10 +50,14 @@ func getNMIWatchdog(myTarget target.Target) (setting string, err error) { return } cmd := exec.Command(sysctl, "kernel.nmi_watchdog") // #nosec G204 // nosemgrep - stdout, _, _, err := myTarget.RunCommand(cmd) + stdout, _, exitCode, err := myTarget.RunCommand(cmd) if err != nil { return } + if exitCode != 0 { + err = fmt.Errorf("sysctl command returned exit code %d", exitCode) + return + } out := stdout setting = out[len(out)-2 : len(out)-1] return @@ -89,8 +93,8 @@ func setNMIWatchdog(myTarget target.Target, setting string, localTempDir string) // findSysctl - gets a useable path to sysctl or error func findSysctl(myTarget target.Target) (path string, err error) { cmd := exec.Command("which", "sysctl") - stdout, _, _, err := myTarget.RunCommand(cmd) - if err == nil { + stdout, _, exitCode, err := myTarget.RunCommand(cmd) + if err == nil && exitCode == 0 { //found it path = strings.TrimSpace(stdout) return @@ -98,8 +102,8 @@ func findSysctl(myTarget target.Target) (path string, err error) { // didn't find it on the path, try being specific sbinPath := "/usr/sbin/sysctl" cmd = exec.Command("which", sbinPath) - _, _, _, err = myTarget.RunCommand(cmd) - if err == nil { + _, _, exitCode, err = myTarget.RunCommand(cmd) + if err == nil && exitCode == 0 { // found it path = sbinPath return diff --git a/cmd/metrics/process.go b/cmd/metrics/process.go index 2ee19b24..60c49e33 100644 --- a/cmd/metrics/process.go +++ b/cmd/metrics/process.go @@ -35,7 +35,12 @@ var psRegex = `^\s*(\d+)\s+(\d+)\s+([\w\d\(\)\:\/_\-\:\.]+)\s+(.*)` // set of running processes. func GetProcesses(myTarget target.Target, pids []string) (processes []Process, err error) { for _, pid := range pids { - if processExists(myTarget, pid) { + var exists bool + exists, err = processExists(myTarget, pid) + if err != nil { + return processes, err + } + if exists { var process Process if process, err = getProcess(myTarget, pid); err != nil { return @@ -71,7 +76,11 @@ func GetHotProcesses(myTarget target.Target, maxProcesses int, filter string) (p cmd := exec.Command("ps", "-a", "-x", "-h", "-o", "pid,ppid,comm,cmd", "--sort=-%cpu") stdout, stderr, exitcode, err := myTarget.RunCommand(cmd) if err != nil { - err = fmt.Errorf("failed to get hot processes: %s, %d, %v", stderr, exitcode, err) + err = fmt.Errorf("failed to execute ps command: %v", err) + return + } + if exitcode != 0 { + err = fmt.Errorf("failed to get hot processes: %s, exit code %d", stderr, exitcode) return } psOutput := stdout @@ -177,14 +186,16 @@ done | sort -nr | head -n %d return } -func processExists(myTarget target.Target, pid string) (exists bool) { +func processExists(myTarget target.Target, pid string) (exists bool, err error) { cmd := exec.Command("ps", "-p", pid) - _, _, _, err := myTarget.RunCommand(cmd) + var exitCode int + _, _, exitCode, err = myTarget.RunCommand(cmd) if err != nil { - exists = false + slog.Error("failed to check if process exists", slog.String("PID", pid), slog.String("error", err.Error())) return } - exists = true + // ps -p returns 0 if process exists, non-zero otherwise + exists = exitCode == 0 return } @@ -192,7 +203,11 @@ func getProcess(myTarget target.Target, pid string) (process Process, err error) cmd := exec.Command("ps", "-q", pid, "h", "-o", "pid,ppid,comm,cmd", "ww") stdout, stderr, exitcode, err := myTarget.RunCommand(cmd) if err != nil { - err = fmt.Errorf("failed to get process: %s, %d, %v", stderr, exitcode, err) + err = fmt.Errorf("failed to execute ps command: %v", err) + return + } + if exitcode != 0 { + err = fmt.Errorf("failed to get process: %s, exit code %d", stderr, exitcode) return } psOutput := stdout diff --git a/internal/script/script.go b/internal/script/script.go index 171741fb..4d8b78c5 100644 --- a/internal/script/script.go +++ b/internal/script/script.go @@ -133,9 +133,13 @@ func RunScripts(myTarget target.Target, scripts []ScriptDefinition, continueOnSc reuseSSHConnection := false // don't reuse ssh connection on long-running commands, makes it difficult to kill the command stdout, stderr, exitcode, err := myTarget.RunCommandEx(cmd, timeout, newProcessGroup, reuseSSHConnection) if err != nil { - slog.Error("error running controller script on target", slog.String("stdout", stdout), slog.String("stderr", stderr), slog.Int("exitcode", exitcode), slog.String("error", err.Error())) + slog.Error("failed to execute controller script on target", slog.String("stdout", stdout), slog.String("stderr", stderr), slog.Int("exitcode", exitcode), slog.String("error", err.Error())) return nil, err } + if exitcode != 0 { + slog.Error("controller script returned non-zero exit code", slog.String("stdout", stdout), slog.String("stderr", stderr), slog.Int("exitcode", exitcode)) + return nil, fmt.Errorf("controller script returned exit code %d", exitcode) + } // parse output of controller script allScriptOutputs := parseControllerScriptOutput(stdout) for _, scriptOutput := range allScriptOutputs { diff --git a/internal/target/helpers.go b/internal/target/helpers.go index 4ac07067..461cd234 100644 --- a/internal/target/helpers.go +++ b/internal/target/helpers.go @@ -39,9 +39,13 @@ func installLkms(t Target, lkms []string) (installedLkms []string, err error) { } else { cmd = exec.Command("modprobe", "--first-time", lkm) } - _, _, _, err := t.RunCommandEx(cmd, 10, false, true) // #nosec G204 + _, _, exitCode, err := t.RunCommandEx(cmd, 10, false, true) // #nosec G204 if err != nil { - slog.Debug("kernel module already installed or problem installing", slog.String("lkm", lkm), slog.String("error", err.Error())) + slog.Error("failed to run modprobe command", slog.String("lkm", lkm), slog.String("error", err.Error())) + continue + } + if exitCode != 0 { + slog.Debug("kernel module already installed or problem installing", slog.String("lkm", lkm), slog.Int("exitCode", exitCode)) continue } slog.Debug("kernel module installed", slog.String("lkm", lkm)) @@ -73,9 +77,13 @@ func uninstallLkms(t Target, lkms []string) (err error) { } else { cmd = exec.Command("modprobe", "-r", lkm) } - _, _, _, err := t.RunCommandEx(cmd, 10, false, true) // #nosec G204 + _, _, exitCode, err := t.RunCommandEx(cmd, 10, false, true) // #nosec G204 if err != nil { - slog.Error("error uninstalling kernel module", slog.String("lkm", lkm), slog.String("error", err.Error())) + slog.Error("failed to run modprobe command", slog.String("lkm", lkm), slog.String("error", err.Error())) + continue + } + if exitCode != 0 { + slog.Error("error uninstalling kernel module", slog.String("lkm", lkm), slog.Int("exitCode", exitCode)) continue } slog.Debug("kernel module uninstalled", slog.String("lkm", lkm)) @@ -125,10 +133,14 @@ func runLocalCommandWithInputWithTimeout(cmd *exec.Cmd, input string, timeout in stdout = outbuf.String() stderr = errbuf.String() if err != nil { - exitError := &exec.ExitError{} + var exitError *exec.ExitError if errors.As(err, &exitError) { + // Command executed but returned non-zero exit code. + // This is not an execution failure, so return nil error. exitCode = exitError.ExitCode() + err = nil } + // Otherwise keep original error (actual execution failure) } return } @@ -197,11 +209,16 @@ func runLocalCommandWithInputWithTimeoutAsync(cmd *exec.Cmd, stdoutChannel chan }() err = cmd.Wait() if err != nil { - if exitError, ok := err.(*exec.ExitError); ok { + var exitError *exec.ExitError + if errors.As(err, &exitError) { + // Command executed but returned non-zero exit code. + // This is not an execution failure, so don't return error. exitcodeChannel <- exitError.ExitCode() } else { + // Actual execution failure slog.Error("unexpected error type while waiting for command to finish", slog.String("cmd", cmd.String()), slog.String("error", err.Error())) exitcodeChannel <- -1 + return err } } else { exitcodeChannel <- 0 @@ -221,10 +238,15 @@ func runLocalCommandWithInputWithTimeoutAsync(cmd *exec.Cmd, stdoutChannel chan // - err: An error if the command execution fails or if there is an issue retrieving the architecture. func getArchitecture(t Target) (arch string, err error) { cmd := exec.Command("uname", "-m") - arch, _, _, err = t.RunCommand(cmd) + var exitCode int + arch, _, exitCode, err = t.RunCommand(cmd) if err != nil { return } + if exitCode != 0 { + err = fmt.Errorf("uname command returned exit code %d", exitCode) + return + } arch = strings.TrimSpace(arch) return } diff --git a/internal/target/local_target.go b/internal/target/local_target.go index 74db072c..232a751d 100644 --- a/internal/target/local_target.go +++ b/internal/target/local_target.go @@ -182,15 +182,15 @@ func (t *LocalTarget) CanElevatePrivileges() bool { slog.Error("error writing sudo password", slog.String("error", err.Error())) } }() - _, _, _, err := t.RunCommand(cmd) - if err == nil { + _, _, exitCode, err := t.RunCommand(cmd) + if err == nil && exitCode == 0 { t.canElevate = 1 return true // sudo password works } } cmd := exec.Command("sudo", "-kS", "ls") - _, _, _, err := t.RunCommand(cmd) - if err == nil { // true - passwordless sudo works + _, _, exitCode, err := t.RunCommand(cmd) + if err == nil && exitCode == 0 { // true - passwordless sudo works t.canElevate = 1 return true } diff --git a/internal/target/remote_target.go b/internal/target/remote_target.go index f75313db..2355a042 100644 --- a/internal/target/remote_target.go +++ b/internal/target/remote_target.go @@ -87,10 +87,15 @@ func (t *RemoteTarget) CreateTempDirectory(rootDir string) (tempDir string, err root = fmt.Sprintf("--tmpdir=%s", rootDir) } cmd := exec.Command("mktemp", "-d", "-t", root, "perfspect.tmp.XXXXXXXXXX", "|", "xargs", "realpath") // #nosec G204 - tempDir, _, _, err = t.RunCommand(cmd) + var exitCode int + tempDir, _, exitCode, err = t.RunCommand(cmd) if err != nil { return } + if exitCode != 0 { + err = fmt.Errorf("mktemp command returned exit code %d", exitCode) + return + } tempDir = strings.TrimSpace(tempDir) t.tempDir = tempDir return @@ -127,7 +132,13 @@ func (t *RemoteTarget) GetTempDirectory() string { func (t *RemoteTarget) PushFile(srcPath string, dstDir string) error { stdout, stderr, exitCode, err := t.prepareAndRunSCPCommand(srcPath, dstDir, true) slog.Debug("push file", slog.String("srcPath", srcPath), slog.String("dstDir", dstDir), slog.String("stdout", stdout), slog.String("stderr", stderr), slog.Int("exitCode", exitCode)) - return err + if err != nil { + return err + } + if exitCode != 0 { + return fmt.Errorf("scp command returned exit code %d: %s", exitCode, stderr) + } + return nil } // PullFile copies a file from a remote source path to a local destination directory @@ -143,20 +154,40 @@ func (t *RemoteTarget) PushFile(srcPath string, dstDir string) error { func (t *RemoteTarget) PullFile(srcPath string, dstDir string) error { stdout, stderr, exitCode, err := t.prepareAndRunSCPCommand(srcPath, dstDir, false) slog.Debug("pull file", slog.String("srcPath", srcPath), slog.String("dstDir", dstDir), slog.String("stdout", stdout), slog.String("stderr", stderr), slog.Int("exitCode", exitCode)) - return err + if err != nil { + return err + } + if exitCode != 0 { + return fmt.Errorf("scp command returned exit code %d: %s", exitCode, stderr) + } + return nil } func (t *RemoteTarget) CreateDirectory(baseDir string, targetDir string) (dir string, err error) { dir = filepath.Join(baseDir, targetDir) cmd := exec.Command("mkdir", dir) - _, _, _, err = t.RunCommand(cmd) + var exitCode int + _, _, exitCode, err = t.RunCommand(cmd) + if err != nil { + return + } + if exitCode != 0 { + err = fmt.Errorf("mkdir command returned exit code %d", exitCode) + } return } func (t *RemoteTarget) RemoveDirectory(targetDir string) (err error) { if targetDir != "" { cmd := exec.Command("rm", "-rf", targetDir) - _, _, _, err = t.RunCommand(cmd) + var exitCode int + _, _, exitCode, err = t.RunCommand(cmd) + if err != nil { + return + } + if exitCode != 0 { + err = fmt.Errorf("rm command returned exit code %d", exitCode) + } } return } @@ -164,8 +195,8 @@ func (t *RemoteTarget) RemoveDirectory(targetDir string) (err error) { // CanConnect checks if the target is reachable. func (t *RemoteTarget) CanConnect() bool { cmd := exec.Command("exit", "0") - _, _, _, err := t.RunCommand(cmd) - return err == nil + _, _, exitCode, err := t.RunCommand(cmd) + return err == nil && exitCode == 0 } // CanElevatePrivileges (on RemoteTarget) checks if the user name is root or if sudo can be used to elevate privileges. @@ -179,8 +210,8 @@ func (t *RemoteTarget) CanElevatePrivileges() bool { return true } cmd := exec.Command("sudo", "-kS", "ls") - _, _, _, err := t.RunCommand(cmd) - if err == nil { // true - passwordless sudo works + _, _, exitCode, err := t.RunCommand(cmd) + if err == nil && exitCode == 0 { // true - passwordless sudo works t.canElevate = 1 return true } @@ -210,10 +241,13 @@ func (t *RemoteTarget) GetName() (host string) { func (t *RemoteTarget) GetUserPath() (string, error) { if t.userPath == "" { cmd := exec.Command("echo", "$PATH") - stdout, _, _, err := t.RunCommand(cmd) + stdout, _, exitCode, err := t.RunCommand(cmd) if err != nil { return "", err } + if exitCode != 0 { + return "", fmt.Errorf("echo command returned exit code %d", exitCode) + } t.userPath = strings.TrimSpace(stdout) } return t.userPath, nil diff --git a/internal/workflow/signals.go b/internal/workflow/signals.go index 186a1411..ac6cc1a3 100644 --- a/internal/workflow/signals.go +++ b/internal/workflow/signals.go @@ -5,7 +5,6 @@ package workflow import ( "context" - "errors" "fmt" "log/slog" "os" @@ -38,8 +37,14 @@ func signalProcessOnTarget(t target.Target, pidStr string, sigStr string) error // waitTime must exceed the controller script's kill_script graceful shutdown period (5s) // plus buffer for network latency when working with remote targets waitTime := 15 - _, _, _, err := t.RunCommandEx(cmd, waitTime, false, true) // #nosec G204 - return err + _, _, exitCode, err := t.RunCommandEx(cmd, waitTime, false, true) // #nosec G204 + if err != nil { + return err + } + if exitCode != 0 { + return fmt.Errorf("kill command returned exit code %d", exitCode) + } + return nil } // configureSignalHandler sets up a signal handler to catch SIGINT and SIGTERM @@ -72,18 +77,18 @@ func configureSignalHandler(myTargets []target.Target, statusFunc progress.Multi _ = statusFunc(t.GetName(), "Signal received, cleaning up...") } pidFilePath := filepath.Join(t.GetTempDirectory(), script.ControllerPIDFileName) - stdout, _, _, err := t.RunCommandEx(exec.Command("cat", pidFilePath), 5, false, true) // #nosec G204 - // if there's an error and the error type is exec.ExitError, the file likely doesn't exist - // so we can skip sending the signal to this target + stdout, _, exitCode, err := t.RunCommandEx(exec.Command("cat", pidFilePath), 5, false, true) // #nosec G204 + // if there's an execution error, log and skip if err != nil { - var exitErr *exec.ExitError - if errors.As(err, &exitErr) { - slog.Debug("target controller PID file not found, assuming script has already exited", slog.String("target", t.GetName())) - continue - } slog.Error("failed to retrieve target controller PID", slog.String("target", t.GetName()), slog.String("error", err.Error())) continue } + // if exit code is non-zero, the file likely doesn't exist + // so we can skip sending the signal to this target + if exitCode != 0 { + slog.Debug("target controller PID file not found, assuming script has already exited", slog.String("target", t.GetName())) + continue + } pid := strings.TrimSpace(stdout) // confirm pid is a valid integer if _, err := strconv.Atoi(pid); err != nil { @@ -108,16 +113,16 @@ func configureSignalHandler(myTargets []target.Target, statusFunc progress.Multi timedOut := false for { // determine if the process still exists - _, _, _, err := tgt.RunCommandEx(exec.Command("ps", "-p", pid), 5, false, true) // #nosec G204 + _, _, exitCode, err := tgt.RunCommandEx(exec.Command("ps", "-p", pid), 5, false, true) // #nosec G204 if err != nil { - var exitErr *exec.ExitError - if errors.As(err, &exitErr) { - slog.Debug("target controller process no longer exists", slog.String("target", tgt.GetName())) - break - } slog.Error("failed to check target controller process", slog.String("target", tgt.GetName()), slog.String("error", err.Error())) break } + // ps -p returns non-zero exit code if the process doesn't exist + if exitCode != 0 { + slog.Debug("target controller process no longer exists", slog.String("target", tgt.GetName())) + break + } // check for timeout select { case <-ctx.Done(): diff --git a/internal/workflow/targets.go b/internal/workflow/targets.go index f7134907..64a60375 100644 --- a/internal/workflow/targets.go +++ b/internal/workflow/targets.go @@ -480,10 +480,12 @@ func parseMountOutput(mountOutput string) ([]mountRecord, error) { // isDirNoExec checks if the target directory is on a file system that is mounted with noexec. func isDirNoExec(t target.Target, dir string) (bool, error) { dfCmd := exec.Command("df", "-P", dir) - dfOutput, _, _, err := t.RunCommand(dfCmd) + dfOutput, _, exitCode, err := t.RunCommand(dfCmd) if err != nil { - err = fmt.Errorf("failed to run df command: %w", err) - return false, err + return false, fmt.Errorf("failed to run df command: %w", err) + } + if exitCode != 0 { + return false, fmt.Errorf("df command returned exit code %d", exitCode) } filesystem, err := fieldFromDfpOutput(dfOutput, "Filesystem") if err != nil { @@ -494,10 +496,12 @@ func isDirNoExec(t target.Target, dir string) (bool, error) { return false, err } mountCmd := exec.Command("mount") - mountOutput, _, _, err := t.RunCommand(mountCmd) + mountOutput, _, exitCode, err := t.RunCommand(mountCmd) if err != nil { - err = fmt.Errorf("failed to run mount command: %w", err) - return false, err + return false, fmt.Errorf("failed to run mount command: %w", err) + } + if exitCode != 0 { + return false, fmt.Errorf("mount command returned exit code %d", exitCode) } mounts, err := parseMountOutput(mountOutput) if err != nil { @@ -539,11 +543,15 @@ func GetTargetVendor(t target.Target) (string, error) { vendor := t.GetVendor() if vendor == "" { cmd := exec.Command("bash", "-c", "lscpu | grep -i \"^Vendor ID:\" | awk '{print $NF}'") + var exitCode int var err error - vendor, _, _, err = t.RunCommand(cmd) + vendor, _, exitCode, err = t.RunCommand(cmd) if err != nil { return "", fmt.Errorf("failed to get target CPU vendor: %v", err) } + if exitCode != 0 { + return "", fmt.Errorf("failed to get target CPU vendor: command returned exit code %d", exitCode) + } vendor = strings.TrimSpace(vendor) t.SetVendor(vendor) } @@ -555,11 +563,15 @@ func GetTargetFamily(t target.Target) (string, error) { family := t.GetFamily() if family == "" { cmd := exec.Command("bash", "-c", "lscpu | grep -i \"^CPU family:\" | awk '{print $NF}'") + var exitCode int var err error - family, _, _, err = t.RunCommand(cmd) + family, _, exitCode, err = t.RunCommand(cmd) if err != nil { return "", fmt.Errorf("failed to get target CPU family: %v", err) } + if exitCode != 0 { + return "", fmt.Errorf("failed to get target CPU family: command returned exit code %d", exitCode) + } family = strings.TrimSpace(family) t.SetFamily(family) } @@ -571,11 +583,15 @@ func GetTargetModel(t target.Target) (string, error) { model := t.GetModel() if model == "" { cmd := exec.Command("bash", "-c", "lscpu | grep -i \"^Model:\" | awk '{print $NF}'") + var exitCode int var err error - model, _, _, err = t.RunCommand(cmd) + model, _, exitCode, err = t.RunCommand(cmd) if err != nil { return "", fmt.Errorf("failed to get target CPU model: %v", err) } + if exitCode != 0 { + return "", fmt.Errorf("failed to get target CPU model: command returned exit code %d", exitCode) + } model = strings.TrimSpace(model) t.SetModel(model) } @@ -587,11 +603,15 @@ func GetTargetStepping(t target.Target) (string, error) { stepping := t.GetStepping() if stepping == "" { cmd := exec.Command("bash", "-c", "lscpu | grep -i \"^Stepping:\" | awk '{print $NF}'") + var exitCode int var err error - stepping, _, _, err = t.RunCommand(cmd) + stepping, _, exitCode, err = t.RunCommand(cmd) if err != nil { return "", fmt.Errorf("failed to get target CPU stepping: %v", err) } + if exitCode != 0 { + return "", fmt.Errorf("failed to get target CPU stepping: command returned exit code %d", exitCode) + } stepping = strings.TrimSpace(stepping) t.SetStepping(stepping) }