Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion cgroup/cgroup.go
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ func containerByCgroup(cgroupPath string) (ContainerType, string, error) {
switch {
case cgroupPath == "/init":
return ContainerTypeTalosRuntime, "/talos/init", nil
case prefix == "user.slice" || prefix == "init.scope":
case prefix == "user.slice" || prefix == "init.scope" || prefix == "systemd":
return ContainerTypeStandaloneProcess, "", nil
case prefix == "docker" || (prefix == "system.slice" && len(parts) > 1 && strings.HasPrefix(parts[1], "docker-")):
matches := dockerIdRegexp.FindStringSubmatch(cgroupPath)
Expand Down Expand Up @@ -194,6 +194,9 @@ func containerByCgroup(cgroupPath string) (ContainerType, string, error) {
}
return ContainerTypeTalosRuntime, path.Join("/talos/", matches[2]), nil
case prefix == "system.slice" || prefix == "runtime.slice" || prefix == "reserved.slice" || prefix == "kube.slice" || prefix == "azure.slice":
if strings.HasSuffix(cgroupPath, ".scope") {
return ContainerTypeStandaloneProcess, "", nil
}
matches := systemSliceIdRegexp.FindStringSubmatch(cgroupPath)
if matches == nil {
return ContainerTypeUnknown, "", fmt.Errorf("invalid systemd cgroup %s", cgroupPath)
Expand Down
20 changes: 20 additions & 0 deletions cgroup/cgroup_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -205,4 +205,24 @@ func TestContainerByCgroup(t *testing.T) {
as.Equal(ContainerTypeStandaloneProcess, typ)
as.Equal("", id)
as.Nil(err)

typ, id, err = containerByCgroup("/systemd/system.slice")
as.Equal(ContainerTypeStandaloneProcess, typ)
as.Equal("", id)
as.Nil(err)

typ, id, err = containerByCgroup("/system.slice/cri-containerd-69e8ded3c33c9d5e2b93acd74787b17a8629f74d6707bc5bb9b2e095337d0263.scope")
as.Equal(ContainerTypeStandaloneProcess, typ)
as.Equal("", id)
as.Nil(err)

typ, id, err = containerByCgroup("/system.slice/run-ra2ddf9594bbf4a1986439b594f89eb0f.scope")
as.Equal(ContainerTypeStandaloneProcess, typ)
as.Equal("", id)
as.Nil(err)

typ, id, err = containerByCgroup("/system.slice/docker-ba7b10d15d16e10e3de7a2dcd408a3d971169ae303f46cfad4c5453c6326fee2.scope")
as.Equal(ContainerTypeDocker, typ)
as.Equal("ba7b10d15d16e10e3de7a2dcd408a3d971169ae303f46cfad4c5453c6326fee2", id)
as.Nil(err)
}
24 changes: 12 additions & 12 deletions containers/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,16 +39,16 @@ type ContainerNetwork struct {
}

type ContainerMetadata struct {
name string
labels map[string]string
volumes map[string]string
logPath string
image string
logDecoder logparser.Decoder
hostListens map[string][]netaddr.IPPort
networks map[string]ContainerNetwork
env map[string]string
systemdTriggeredBy string
name string
labels map[string]string
volumes map[string]string
logPath string
image string
logDecoder logparser.Decoder
hostListens map[string][]netaddr.IPPort
networks map[string]ContainerNetwork
env map[string]string
systemd SystemdProperties
}

type Delays struct {
Expand Down Expand Up @@ -237,8 +237,8 @@ func (c *Container) Collect(ch chan<- prometheus.Metric) {
c.lock.Lock()
defer c.lock.Unlock()

if c.metadata.image != "" || c.metadata.systemdTriggeredBy != "" {
ch <- gauge(metrics.ContainerInfo, 1, c.metadata.image, c.metadata.systemdTriggeredBy)
if c.metadata.image != "" || !c.metadata.systemd.IsEmpty() {
ch <- gauge(metrics.ContainerInfo, 1, c.metadata.image, c.metadata.systemd.TriggeredBy, c.metadata.systemd.Type)
}

ch <- counter(metrics.Restarts, float64(c.restarts))
Expand Down
2 changes: 1 addition & 1 deletion containers/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ var metrics = struct {

Ip2Fqdn *prometheus.Desc
}{
ContainerInfo: metric("container_info", "Meta information about the container", "image", "systemd_triggered_by"),
ContainerInfo: metric("container_info", "Meta information about the container", "image", "systemd_triggered_by", "systemd_type"),

Restarts: metric("container_restarts_total", "Number of times the container was restarted"),

Expand Down
14 changes: 9 additions & 5 deletions containers/registry.go
Original file line number Diff line number Diff line change
Expand Up @@ -284,14 +284,10 @@ func (r *Registry) handleEvents(ch <-chan ebpftracer.Event) {
if c := r.getOrCreateContainer(e.Pid); c != nil {
c.onConnectionOpen(e.Pid, e.Fd, e.SrcAddr, e.DstAddr, e.ActualDstAddr, e.Timestamp, false, e.Duration)
c.attachTlsUprobes(r.tracer, e.Pid)
} else {
klog.Infoln("TCP connection from unknown container", e)
}
case ebpftracer.EventTypeConnectionError:
if c := r.getOrCreateContainer(e.Pid); c != nil {
c.onConnectionOpen(e.Pid, e.Fd, e.SrcAddr, e.DstAddr, e.ActualDstAddr, 0, true, e.Duration)
} else {
klog.Infoln("TCP connection error from unknown container", e)
}
case ebpftracer.EventTypeConnectionClose:
if c := r.containersByPid[e.Pid]; c != nil {
Expand Down Expand Up @@ -377,6 +373,14 @@ func (r *Registry) getOrCreateContainer(pid uint32) *Container {
r.containersByPidIgnored[pid] = &t
return nil
}
if cg.ContainerType == cgroup.ContainerTypeSystemdService && *flags.SkipSystemdSystemServices {
if md.systemd.IsSystemService() {
klog.InfoS("skipping system service", "id", id, "unit", md.systemd.Unit, "type", md.systemd.Type, "triggered_by", md.systemd.TriggeredBy, "pid", pid)
t := time.Now()
r.containersByPidIgnored[pid] = &t
return nil
}
}

if c := r.containersById[id]; c != nil {
klog.Warningln("id conflict:", id)
Expand Down Expand Up @@ -542,7 +546,7 @@ func getContainerMetadata(cg *cgroup.Cgroup) (*ContainerMetadata, error) {
switch cg.ContainerType {
case cgroup.ContainerTypeSystemdService:
md := &ContainerMetadata{}
md.systemdTriggeredBy = SystemdTriggeredBy(cg.ContainerId)
md.systemd = getSystemdProperties(cg.Id)
return md, nil
case cgroup.ContainerTypeDocker, cgroup.ContainerTypeContainerd, cgroup.ContainerTypeSandbox, cgroup.ContainerTypeCrio:
default:
Expand Down
65 changes: 59 additions & 6 deletions containers/systemd.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,23 @@ import (
var (
dbusConn *dbus.Conn
dbusTimeout = time.Second

systemServicePrefixes = []string{
"systemd-",
"dbus",
"getty",
"system-serial",
"system-getty",
"serial-getty",
"snapd",
"packagekit",
"unattended-upgrades",
"multipathd",
"qemu-guest-agent",
"irqbalance",
"networkd-dispatcher",
"rpcbind",
}
)

func init() {
Expand All @@ -39,18 +56,54 @@ func init() {
}
}

func SystemdTriggeredBy(id string) string {
type SystemdProperties struct {
Unit string
TriggeredBy string
Type string
}

func (sp SystemdProperties) IsEmpty() bool {
return sp.TriggeredBy == "" && sp.Type == ""
}

func (sp SystemdProperties) IsSystemService() bool {
switch sp.Type {
case "oneshot", "dbus":
return true
}
if strings.HasSuffix(sp.TriggeredBy, ".timer") {
return true
}
for _, prefix := range systemServicePrefixes {
if strings.HasPrefix(sp.Unit, prefix) {
return true
}
}
return false
}

func getSystemdProperties(id string) SystemdProperties {
props := SystemdProperties{}
if dbusConn == nil {
return ""
return props
}
ctx, cancel := context.WithTimeout(context.Background(), dbusTimeout)
defer cancel()
parts := strings.Split(id, "/")
unit := parts[len(parts)-1]
if prop, _ := dbusConn.GetUnitPropertyContext(ctx, unit, "TriggeredBy"); prop != nil {
if values, _ := prop.Value.Value().([]string); len(values) > 0 {
return values[0]
props.Unit = unit
properties, err := dbusConn.GetAllPropertiesContext(ctx, unit)
if err != nil {
klog.Warningln("failed to get systemd properties:", err)
return props
}
if v, ok := properties["TriggeredBy"]; ok {
if values, _ := v.([]string); len(values) > 0 {
props.TriggeredBy = values[0]
}
}
return ""
if v, ok := properties["Type"]; ok {
props.Type, _ = v.(string)
}
return props
}
2 changes: 2 additions & 0 deletions flags/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ var (
ContainerAllowlist = kingpin.Flag("container-allowlist", "List of allowed containers (regex patterns)").Envar("CONTAINER_ALLOWLIST").Strings()
ContainerDenylist = kingpin.Flag("container-denylist", "List of denied containers (regex patterns)").Envar("CONTAINER_DENYLIST").Strings()

SkipSystemdSystemServices = kingpin.Flag("skip-systemd-system-services", "Skip well-known systemd system services (apt, motd, udev, etc.)").Default("true").Envar("SKIP_SYSTEMD_SYSTEM_SERVICES").Bool()

ExcludeHTTPMetricsByPath = kingpin.Flag("exclude-http-requests-by-path", "Skip HTTP metrics and traces by path").Envar("EXCLUDE_HTTP_REQUESTS_BY_PATH").Strings()

ExternalNetworksWhitelist = kingpin.
Expand Down
2 changes: 1 addition & 1 deletion proc/fd.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ func ReadFds(pid uint32) ([]Fd, error) {
}
dest, err := os.Readlink(path.Join(fdDir, entry.Name()))
if err != nil {
if os.IsNotExist(err) {
if !os.IsNotExist(err) {
klog.Warningf("failed to read link '%s': %s", entry.Name(), err)
}
continue
Expand Down
Loading