diff --git a/cgroup/cgroup.go b/cgroup/cgroup.go index 48dff4e..04c95c8 100644 --- a/cgroup/cgroup.go +++ b/cgroup/cgroup.go @@ -162,7 +162,7 @@ func containerByCgroup(cgroupPath string) (ContainerType, string, error) { switch { case cgroupPath == "/init": return ContainerTypeTalosRuntime, "/talos/init", nil - case prefix == "user.slice" || prefix == "init.scope": + case prefix == "user.slice" || prefix == "init.scope" || prefix == "systemd": return ContainerTypeStandaloneProcess, "", nil case prefix == "docker" || (prefix == "system.slice" && len(parts) > 1 && strings.HasPrefix(parts[1], "docker-")): matches := dockerIdRegexp.FindStringSubmatch(cgroupPath) @@ -194,6 +194,9 @@ func containerByCgroup(cgroupPath string) (ContainerType, string, error) { } return ContainerTypeTalosRuntime, path.Join("/talos/", matches[2]), nil case prefix == "system.slice" || prefix == "runtime.slice" || prefix == "reserved.slice" || prefix == "kube.slice" || prefix == "azure.slice": + if strings.HasSuffix(cgroupPath, ".scope") { + return ContainerTypeStandaloneProcess, "", nil + } matches := systemSliceIdRegexp.FindStringSubmatch(cgroupPath) if matches == nil { return ContainerTypeUnknown, "", fmt.Errorf("invalid systemd cgroup %s", cgroupPath) diff --git a/cgroup/cgroup_test.go b/cgroup/cgroup_test.go index 888d7ac..7e8b7af 100644 --- a/cgroup/cgroup_test.go +++ b/cgroup/cgroup_test.go @@ -205,4 +205,24 @@ func TestContainerByCgroup(t *testing.T) { as.Equal(ContainerTypeStandaloneProcess, typ) as.Equal("", id) as.Nil(err) + + typ, id, err = containerByCgroup("/systemd/system.slice") + as.Equal(ContainerTypeStandaloneProcess, typ) + as.Equal("", id) + as.Nil(err) + + typ, id, err = containerByCgroup("/system.slice/cri-containerd-69e8ded3c33c9d5e2b93acd74787b17a8629f74d6707bc5bb9b2e095337d0263.scope") + as.Equal(ContainerTypeStandaloneProcess, typ) + as.Equal("", id) + as.Nil(err) + + typ, id, err = containerByCgroup("/system.slice/run-ra2ddf9594bbf4a1986439b594f89eb0f.scope") + as.Equal(ContainerTypeStandaloneProcess, typ) + as.Equal("", id) + as.Nil(err) + + typ, id, err = containerByCgroup("/system.slice/docker-ba7b10d15d16e10e3de7a2dcd408a3d971169ae303f46cfad4c5453c6326fee2.scope") + as.Equal(ContainerTypeDocker, typ) + as.Equal("ba7b10d15d16e10e3de7a2dcd408a3d971169ae303f46cfad4c5453c6326fee2", id) + as.Nil(err) } diff --git a/containers/container.go b/containers/container.go index 54c186f..8b12ecf 100644 --- a/containers/container.go +++ b/containers/container.go @@ -39,16 +39,16 @@ type ContainerNetwork struct { } type ContainerMetadata struct { - name string - labels map[string]string - volumes map[string]string - logPath string - image string - logDecoder logparser.Decoder - hostListens map[string][]netaddr.IPPort - networks map[string]ContainerNetwork - env map[string]string - systemdTriggeredBy string + name string + labels map[string]string + volumes map[string]string + logPath string + image string + logDecoder logparser.Decoder + hostListens map[string][]netaddr.IPPort + networks map[string]ContainerNetwork + env map[string]string + systemd SystemdProperties } type Delays struct { @@ -237,8 +237,8 @@ func (c *Container) Collect(ch chan<- prometheus.Metric) { c.lock.Lock() defer c.lock.Unlock() - if c.metadata.image != "" || c.metadata.systemdTriggeredBy != "" { - ch <- gauge(metrics.ContainerInfo, 1, c.metadata.image, c.metadata.systemdTriggeredBy) + if c.metadata.image != "" || !c.metadata.systemd.IsEmpty() { + ch <- gauge(metrics.ContainerInfo, 1, c.metadata.image, c.metadata.systemd.TriggeredBy, c.metadata.systemd.Type) } ch <- counter(metrics.Restarts, float64(c.restarts)) diff --git a/containers/metrics.go b/containers/metrics.go index 4612314..0780261 100644 --- a/containers/metrics.go +++ b/containers/metrics.go @@ -61,7 +61,7 @@ var metrics = struct { Ip2Fqdn *prometheus.Desc }{ - ContainerInfo: metric("container_info", "Meta information about the container", "image", "systemd_triggered_by"), + ContainerInfo: metric("container_info", "Meta information about the container", "image", "systemd_triggered_by", "systemd_type"), Restarts: metric("container_restarts_total", "Number of times the container was restarted"), diff --git a/containers/registry.go b/containers/registry.go index a92225b..064b473 100644 --- a/containers/registry.go +++ b/containers/registry.go @@ -284,14 +284,10 @@ func (r *Registry) handleEvents(ch <-chan ebpftracer.Event) { if c := r.getOrCreateContainer(e.Pid); c != nil { c.onConnectionOpen(e.Pid, e.Fd, e.SrcAddr, e.DstAddr, e.ActualDstAddr, e.Timestamp, false, e.Duration) c.attachTlsUprobes(r.tracer, e.Pid) - } else { - klog.Infoln("TCP connection from unknown container", e) } case ebpftracer.EventTypeConnectionError: if c := r.getOrCreateContainer(e.Pid); c != nil { c.onConnectionOpen(e.Pid, e.Fd, e.SrcAddr, e.DstAddr, e.ActualDstAddr, 0, true, e.Duration) - } else { - klog.Infoln("TCP connection error from unknown container", e) } case ebpftracer.EventTypeConnectionClose: if c := r.containersByPid[e.Pid]; c != nil { @@ -377,6 +373,14 @@ func (r *Registry) getOrCreateContainer(pid uint32) *Container { r.containersByPidIgnored[pid] = &t return nil } + if cg.ContainerType == cgroup.ContainerTypeSystemdService && *flags.SkipSystemdSystemServices { + if md.systemd.IsSystemService() { + klog.InfoS("skipping system service", "id", id, "unit", md.systemd.Unit, "type", md.systemd.Type, "triggered_by", md.systemd.TriggeredBy, "pid", pid) + t := time.Now() + r.containersByPidIgnored[pid] = &t + return nil + } + } if c := r.containersById[id]; c != nil { klog.Warningln("id conflict:", id) @@ -542,7 +546,7 @@ func getContainerMetadata(cg *cgroup.Cgroup) (*ContainerMetadata, error) { switch cg.ContainerType { case cgroup.ContainerTypeSystemdService: md := &ContainerMetadata{} - md.systemdTriggeredBy = SystemdTriggeredBy(cg.ContainerId) + md.systemd = getSystemdProperties(cg.Id) return md, nil case cgroup.ContainerTypeDocker, cgroup.ContainerTypeContainerd, cgroup.ContainerTypeSandbox, cgroup.ContainerTypeCrio: default: diff --git a/containers/systemd.go b/containers/systemd.go index cf5dfd2..cddcb8a 100644 --- a/containers/systemd.go +++ b/containers/systemd.go @@ -18,6 +18,23 @@ import ( var ( dbusConn *dbus.Conn dbusTimeout = time.Second + + systemServicePrefixes = []string{ + "systemd-", + "dbus", + "getty", + "system-serial", + "system-getty", + "serial-getty", + "snapd", + "packagekit", + "unattended-upgrades", + "multipathd", + "qemu-guest-agent", + "irqbalance", + "networkd-dispatcher", + "rpcbind", + } ) func init() { @@ -39,18 +56,54 @@ func init() { } } -func SystemdTriggeredBy(id string) string { +type SystemdProperties struct { + Unit string + TriggeredBy string + Type string +} + +func (sp SystemdProperties) IsEmpty() bool { + return sp.TriggeredBy == "" && sp.Type == "" +} + +func (sp SystemdProperties) IsSystemService() bool { + switch sp.Type { + case "oneshot", "dbus": + return true + } + if strings.HasSuffix(sp.TriggeredBy, ".timer") { + return true + } + for _, prefix := range systemServicePrefixes { + if strings.HasPrefix(sp.Unit, prefix) { + return true + } + } + return false +} + +func getSystemdProperties(id string) SystemdProperties { + props := SystemdProperties{} if dbusConn == nil { - return "" + return props } ctx, cancel := context.WithTimeout(context.Background(), dbusTimeout) defer cancel() parts := strings.Split(id, "/") unit := parts[len(parts)-1] - if prop, _ := dbusConn.GetUnitPropertyContext(ctx, unit, "TriggeredBy"); prop != nil { - if values, _ := prop.Value.Value().([]string); len(values) > 0 { - return values[0] + props.Unit = unit + properties, err := dbusConn.GetAllPropertiesContext(ctx, unit) + if err != nil { + klog.Warningln("failed to get systemd properties:", err) + return props + } + if v, ok := properties["TriggeredBy"]; ok { + if values, _ := v.([]string); len(values) > 0 { + props.TriggeredBy = values[0] } } - return "" + if v, ok := properties["Type"]; ok { + props.Type, _ = v.(string) + } + return props } diff --git a/flags/flags.go b/flags/flags.go index ffd457d..23a7dd8 100644 --- a/flags/flags.go +++ b/flags/flags.go @@ -18,6 +18,8 @@ var ( ContainerAllowlist = kingpin.Flag("container-allowlist", "List of allowed containers (regex patterns)").Envar("CONTAINER_ALLOWLIST").Strings() ContainerDenylist = kingpin.Flag("container-denylist", "List of denied containers (regex patterns)").Envar("CONTAINER_DENYLIST").Strings() + SkipSystemdSystemServices = kingpin.Flag("skip-systemd-system-services", "Skip well-known systemd system services (apt, motd, udev, etc.)").Default("true").Envar("SKIP_SYSTEMD_SYSTEM_SERVICES").Bool() + ExcludeHTTPMetricsByPath = kingpin.Flag("exclude-http-requests-by-path", "Skip HTTP metrics and traces by path").Envar("EXCLUDE_HTTP_REQUESTS_BY_PATH").Strings() ExternalNetworksWhitelist = kingpin. diff --git a/proc/fd.go b/proc/fd.go index 5b294ea..8896ded 100644 --- a/proc/fd.go +++ b/proc/fd.go @@ -34,7 +34,7 @@ func ReadFds(pid uint32) ([]Fd, error) { } dest, err := os.Readlink(path.Join(fdDir, entry.Name())) if err != nil { - if os.IsNotExist(err) { + if !os.IsNotExist(err) { klog.Warningf("failed to read link '%s': %s", entry.Name(), err) } continue