diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index c022f02..c40a060 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -15,7 +15,7 @@ jobs: - name: Install dependencies run: | sudo apt-get update && sudo apt-get install -y \ - golang-1.22 \ + golang-1.23 \ git \ make \ gcc \ diff --git a/bpf/trace.c b/bpf/trace.c index da8fbf8..b36183f 100644 --- a/bpf/trace.c +++ b/bpf/trace.c @@ -15,14 +15,15 @@ extern int LINUX_KERNEL_VERSION __kconfig; // 定义数据结构来存储调度延迟信息 struct sched_latency_t { - __u32 pid; // 进程ID - __u32 tid; // 线程ID - __u64 delay_ns; // 调度延迟(纳秒) - __u64 ts; // 时间戳 - __u32 preempted_pid; // 被抢占的进程ID - char preempted_comm[16]; // 被抢占的进程名 - __u64 is_preempt; // 是否抢占(0: 否, 1: 是) - char comm[16]; // 进程名 + __u32 pid; // 进程ID + __u32 tid; // 线程ID + __u64 delay_ns; // 调度延迟(纳秒) + __u64 ts; // 时间戳 + __u32 preempted_pid; // 被抢占的进程ID + char preempted_comm[16]; // 被抢占的进程名 + __u64 is_preempt; // 是否抢占(0: 否, 1: 是) + char comm[16]; // 进程名 + __u32 preempted_pid_state; // 被抢占的进程状态 } __attribute__((packed)); struct sched_latency_t *unused_sched_latency_t __attribute__((unused)); @@ -201,6 +202,7 @@ static __always_inline void handle_sched_switch(u32 prev_pid, u32 prev_tgid, .tid = next_pid, .delay_ns = delay, .ts = now, + .preempted_pid_state = prev_state, }; bpf_probe_read_kernel_str(&latency.comm, sizeof(latency.comm), next_comm); diff --git a/cmd/config.yaml b/cmd/config.yaml index e7c9fbe..6d04848 100644 --- a/cmd/config.yaml +++ b/cmd/config.yaml @@ -5,7 +5,7 @@ btf: kernel: "/sys/kernel/btf/vmlinux" output: - type: file + type: clickhouse clickhouse: host: "192.168.200.201" port: "9000" diff --git a/deploy/sql/clickhouse/sched.ck b/deploy/sql/clickhouse/sched.ck index ad56c3e..05dc0dd 100644 --- a/deploy/sql/clickhouse/sched.ck +++ b/deploy/sql/clickhouse/sched.ck @@ -20,6 +20,8 @@ CREATE TABLE shepherd.sched_latency `date` Date DEFAULT today(), + `preempted_pid_state` UInt32, + `datetime` DateTime64(9) DEFAULT now64(9) ) ENGINE = MergeTree diff --git a/go.mod b/go.mod index 409b8e0..6346596 100644 --- a/go.mod +++ b/go.mod @@ -1,7 +1,6 @@ module github.com/cen-ngc5139/shepherd -go 1.22.4 -toolchain go1.24.1 +go 1.23.4 require ( github.com/ClickHouse/clickhouse-go/v2 v2.30.0 diff --git a/internal/output/output.go b/internal/output/output.go index 2f457f5..28bdc10 100644 --- a/internal/output/output.go +++ b/internal/output/output.go @@ -58,7 +58,8 @@ func (o *Output) InitSinkCli(cfg config.OutputConfig) (err error) { INSERT INTO sched_latency ( pid, tid, delay_ns, ts, preempted_pid, preempted_comm, - is_preempt, comm + is_preempt, comm, + preempted_pid_state ) `) if err != nil { diff --git a/internal/output/sched_delay.go b/internal/output/sched_delay.go index 8a849b4..a7529d2 100644 --- a/internal/output/sched_delay.go +++ b/internal/output/sched_delay.go @@ -113,6 +113,7 @@ func insertSchedMetrics(ctx context.Context, conn clickhouse.Conn, batch driver. sanitizeString(convertInt8ToString(event.PreemptedComm[:])), event.IsPreempt, sanitizeString(convertInt8ToString(event.Comm[:])), + event.PreemptedPidState, ) if err != nil { log.Errorf("failed to append to batch: %v", err) @@ -132,7 +133,8 @@ func insertSchedMetrics(ctx context.Context, conn clickhouse.Conn, batch driver. INSERT INTO sched_latency ( pid, tid, delay_ns, ts, preempted_pid, preempted_comm, - is_preempt, comm + is_preempt, comm, + preempted_pid_state ) `) if err != nil { diff --git a/internal/output/utils.go b/internal/output/utils.go index 87e4358..4c3a2db 100644 --- a/internal/output/utils.go +++ b/internal/output/utils.go @@ -67,3 +67,67 @@ func filterNonASCII(data []byte) string { func sanitizeString(s string) string { return strings.TrimSpace(s) } + + +// 线程状态常量 +const ( + TASK_RUNNING = 0x00000000 + TASK_INTERRUPTIBLE = 0x00000001 + TASK_UNINTERRUPTIBLE = 0x00000002 + TASK_STOPPED = 0x00000004 + TASK_TRACED = 0x00000008 + EXIT_DEAD = 0x00000010 + EXIT_ZOMBIE = 0x00000020 + EXIT_TRACE = EXIT_ZOMBIE | EXIT_DEAD + TASK_PARKED = 0x00000040 + TASK_DEAD = 0x00000080 + TASK_WAKEKILL = 0x00000100 + TASK_WAKING = 0x00000200 + TASK_NOLOAD = 0x00000400 + TASK_NEW = 0x00000800 + TASK_RTLOCK_WAIT = 0x00001000 + TASK_FREEZABLE = 0x00002000 + TASK_FREEZABLE_UNSAFE = 0x00004000 // 取决于: IS_ENABLED(CONFIG_LOCKDEP) + TASK_FROZEN = 0x00008000 + TASK_STATE_MAX = 0x00010000 // 截至 Linux 内核 6.9 +) + +// 任务状态映射表 +var taskStates = map[uint32]string{ + 0x00000000: "R", // "RUNNING" + 0x00000001: "S", // "INTERRUPTIBLE" + 0x00000002: "D", // "UNINTERRUPTIBLE" + 0x00000004: "T", // "STOPPED" + 0x00000008: "t", // "TRACED" + 0x00000010: "X", // "EXIT_DEAD" + 0x00000020: "Z", // "EXIT_ZOMBIE" + 0x00000040: "P", // "PARKED" + 0x00000080: "dd", // "DEAD" + 0x00000100: "wk", // "WAKEKILL" + 0x00000200: "wg", // "WAKING" + 0x00000400: "I", // "NOLOAD" + 0x00000800: "N", // "NEW" + 0x00001000: "rt", // "RTLOCK_WAIT" + 0x00002000: "fe", // "FREEZABLE" + 0x00004000: "fu", // "__TASK_FREEZABLE_UNSAFE = (0x00004000 * IS_ENABLED(CONFIG_LOCKDEP))" + 0x00008000: "fo", // "FROZEN" +} + +// GetTaskStateName 将内核任务状态位掩码转换为可读字符串 +func GetTaskStateName(taskState uint32) string { + if taskState == 0 { + return "R" + } + if taskState&TASK_NOLOAD != 0 { // 空闲内核线程等待工作 + return "I" + } + + var names []string + for state, name := range taskStates { + if taskState&state != 0 { + names = append(names, name) + } + } + + return strings.Join(names, "+") +} \ No newline at end of file