From 1b86700a796150631a3149426f198ec8fed6e34d Mon Sep 17 00:00:00 2001 From: MoeMahhouk Date: Fri, 6 Feb 2026 15:31:20 +0000 Subject: [PATCH 01/10] feat: add observability tooling for flashbox images --- bob-common/mkosi.conf | 4 + .../etc/prometheus/process-exporter.yml | 15 ++ .../etc/prometheus/prometheus.yml.tmpl | 55 ++++++ .../etc/prometheus/recording_rules.yml | 49 +++++ .../etc/systemd/system/fetch-config.service | 0 .../etc/systemd/system/node-exporter.service | 55 ++++++ .../systemd/system/process-exporter.service | 19 ++ .../etc/systemd/system/prometheus.service | 25 +++ .../systemd/system/searcher-firewall.service | 4 +- .../mkosi.extra/usr/bin/fetch-config.sh | 174 ++++++++++++++++++ .../mkosi.extra/usr/bin/init-container.sh | 12 ++ .../mkosi.extra/usr/bin/init-firewall.sh | 3 + bob-l1/mkosi.extra/etc/bob/dynamic-config.sh | 49 +++++ bob-l1/mkosi.extra/etc/bob/firewall-config | 12 ++ bob-l2/mkosi.extra/etc/bob/dynamic-config.sh | 49 +++++ bob-l2/mkosi.extra/etc/bob/firewall-config | 12 ++ bob-l2/mkosi.extra/usr/bin/fetch-config.sh | 105 ----------- 17 files changed, 535 insertions(+), 107 deletions(-) create mode 100644 bob-common/mkosi.extra/etc/prometheus/process-exporter.yml create mode 100644 bob-common/mkosi.extra/etc/prometheus/prometheus.yml.tmpl create mode 100644 bob-common/mkosi.extra/etc/prometheus/recording_rules.yml rename {bob-l2 => bob-common}/mkosi.extra/etc/systemd/system/fetch-config.service (100%) create mode 100644 bob-common/mkosi.extra/etc/systemd/system/node-exporter.service create mode 100644 bob-common/mkosi.extra/etc/systemd/system/process-exporter.service create mode 100644 bob-common/mkosi.extra/etc/systemd/system/prometheus.service create mode 100755 bob-common/mkosi.extra/usr/bin/fetch-config.sh create mode 100755 bob-l1/mkosi.extra/etc/bob/dynamic-config.sh create mode 100755 bob-l2/mkosi.extra/etc/bob/dynamic-config.sh delete mode 100755 bob-l2/mkosi.extra/usr/bin/fetch-config.sh diff --git a/bob-common/mkosi.conf b/bob-common/mkosi.conf index ebd8b785..896a5709 100644 --- a/bob-common/mkosi.conf +++ b/bob-common/mkosi.conf @@ -29,6 +29,10 @@ Packages=podman openssh-sftp-server udev libsnappy1v5 + prometheus + prometheus-node-exporter + prometheus-process-exporter + gomplate BuildPackages=build-essential git diff --git a/bob-common/mkosi.extra/etc/prometheus/process-exporter.yml b/bob-common/mkosi.extra/etc/prometheus/process-exporter.yml new file mode 100644 index 00000000..25889bc9 --- /dev/null +++ b/bob-common/mkosi.extra/etc/prometheus/process-exporter.yml @@ -0,0 +1,15 @@ +process_names: + # Monitor the searcher container cgroup + - name: "{{.Matches}}" + cmdline: + - 'podman' + + # Monitor the container init process + - name: "searcher-container" + cmdline: + - '/usr/bin/catatonit' + + # Monitor all processes in the searcher cgroup + - name: "searcher-cgroup" + cgroups: + - "machine.slice/machine-searcher*" diff --git a/bob-common/mkosi.extra/etc/prometheus/prometheus.yml.tmpl b/bob-common/mkosi.extra/etc/prometheus/prometheus.yml.tmpl new file mode 100644 index 00000000..931fb05c --- /dev/null +++ b/bob-common/mkosi.extra/etc/prometheus/prometheus.yml.tmpl @@ -0,0 +1,55 @@ +global: + scrape_interval: 15s + evaluation_interval: 15s + +# Recording rules for aggregated metrics +rule_files: + - /etc/prometheus/recording_rules.yml + +# Scrape configurations +scrape_configs: + # Node exporter on localhost + - job_name: 'node' + static_configs: + - targets: ['localhost:9100'] + metric_relabel_configs: + # Only keep aggregated metrics for remote write + - source_labels: [__name__] + regex: 'node_(cpu|memory|disk)_.*' + action: keep + + # Process exporter for container monitoring + - job_name: 'process' + static_configs: + - targets: ['localhost:9256'] + +# Remote write configuration (dynamically configured) +remote_write: +{{ if (datasource "config" "remote_write_flashbots_url") }} + # Flashbots endpoint + - url: {{ datasource "config" "remote_write_flashbots_url" }} + write_relabel_configs: + # Only send aggregated metrics + - source_labels: [__name__] + regex: 'flashbox:.*' + action: keep + {{ if (datasource "config" "remote_write_flashbots_auth") }} + basic_auth: + username: {{ datasource "config" "remote_write_flashbots_username" }} + password: {{ datasource "config" "remote_write_flashbots_password" }} + {{ end }} +{{ end }} + +{{ if (datasource "config" "remote_write_searcher_url") }} + # Searcher endpoint (optional) + - url: {{ datasource "config" "remote_write_searcher_url" }} + write_relabel_configs: + # Only send aggregated metrics + - source_labels: [__name__] + regex: 'flashbox:.*' + action: keep + {{ if (datasource "config" "remote_write_searcher_auth") }} + headers: + Authorization: {{ datasource "config" "remote_write_searcher_auth" }} + {{ end }} +{{ end }} diff --git a/bob-common/mkosi.extra/etc/prometheus/recording_rules.yml b/bob-common/mkosi.extra/etc/prometheus/recording_rules.yml new file mode 100644 index 00000000..976510c2 --- /dev/null +++ b/bob-common/mkosi.extra/etc/prometheus/recording_rules.yml @@ -0,0 +1,49 @@ +groups: + - name: flashbox_aggregated_metrics + interval: 30s # How often to evaluate rules + rules: + # CPU aggregated metrics + - record: flashbox:cpu_usage_percent + expr: 100 - (avg(rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) + + - record: flashbox:cpu_usage_percent_by_mode + expr: avg(rate(node_cpu_seconds_total[5m])) by (mode) * 100 + + # Memory aggregated metrics + - record: flashbox:memory_usage_percent + expr: (1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100 + + - record: flashbox:memory_available_gb + expr: node_memory_MemAvailable_bytes / 1024 / 1024 / 1024 + + # Disk aggregated metrics - both root and persistent + # Root filesystem (always available) + - record: flashbox:disk_usage_percent_root + expr: 100 - (node_filesystem_avail_bytes{mountpoint="/"} / node_filesystem_size_bytes{mountpoint="/"} * 100) + + # Persistent storage (available after mount) - returns no data if not mounted + - record: flashbox:disk_usage_percent_persistent + expr: 100 - (node_filesystem_avail_bytes{mountpoint="/persistent"} / node_filesystem_size_bytes{mountpoint="/persistent"} * 100) + + - record: flashbox:disk_io_read_mb_per_sec + expr: rate(node_disk_read_bytes_total[5m]) / 1024 / 1024 + + - record: flashbox:disk_io_write_mb_per_sec + expr: rate(node_disk_written_bytes_total[5m]) / 1024 / 1024 + + # Container health metrics (using process exporter) + - record: flashbox:container_alive + expr: up{job="process"} * on(instance) group_left(cgroup) namedprocess_namegroup_num_procs{groupname=~".*searcher-container.*"} + + - record: flashbox:container_cpu_percent + expr: rate(namedprocess_namegroup_cpu_seconds_total{groupname=~".*searcher-container.*"}[5m]) * 100 + + - record: flashbox:container_memory_mb + expr: namedprocess_namegroup_memory_bytes{groupname=~".*searcher-container.*"} / 1024 / 1024 + + # Network metrics (only counters, no detailed info) + - record: flashbox:network_receive_mb_total + expr: sum(node_network_receive_bytes_total) / 1024 / 1024 + + - record: flashbox:network_transmit_mb_total + expr: sum(node_network_transmit_bytes_total) / 1024 / 1024 diff --git a/bob-l2/mkosi.extra/etc/systemd/system/fetch-config.service b/bob-common/mkosi.extra/etc/systemd/system/fetch-config.service similarity index 100% rename from bob-l2/mkosi.extra/etc/systemd/system/fetch-config.service rename to bob-common/mkosi.extra/etc/systemd/system/fetch-config.service diff --git a/bob-common/mkosi.extra/etc/systemd/system/node-exporter.service b/bob-common/mkosi.extra/etc/systemd/system/node-exporter.service new file mode 100644 index 00000000..4e6b89a5 --- /dev/null +++ b/bob-common/mkosi.extra/etc/systemd/system/node-exporter.service @@ -0,0 +1,55 @@ +[Unit] +Description=Prometheus Node Exporter +Documentation=https://github.com/prometheus/node_exporter +After=network-online.target +Wants=network-online.target + +[Service] +Type=simple +User=prometheus +Group=prometheus +ExecStart=/usr/bin/prometheus-node-exporter \ + --web.listen-address=127.0.0.1:9100 \ + --collector.cpu \ + --collector.meminfo \ + --collector.diskstats \ + --collector.filesystem \ + --collector.netdev \ + --collector.loadavg \ + --no-collector.arp \ + --no-collector.bcache \ + --no-collector.bonding \ + --no-collector.conntrack \ + --no-collector.cpufreq \ + --no-collector.edac \ + --no-collector.entropy \ + --no-collector.filefd \ + --no-collector.hwmon \ + --no-collector.infiniband \ + --no-collector.ipvs \ + --no-collector.mdadm \ + --no-collector.netclass \ + --no-collector.netstat \ + --no-collector.nfs \ + --no-collector.nfsd \ + --no-collector.pressure \ + --no-collector.rapl \ + --no-collector.schedstat \ + --no-collector.sockstat \ + --no-collector.softnet \ + --no-collector.stat \ + --no-collector.textfile \ + --no-collector.thermal_zone \ + --no-collector.time \ + --no-collector.timex \ + --no-collector.udp_queues \ + --no-collector.uname \ + --no-collector.vmstat \ + --no-collector.xfs \ + --no-collector.zfs \ + --collector.filesystem.mount-points-exclude=^/(dev|proc|sys|run|var/lib/docker)($|/) +Restart=on-failure +RestartSec=5s + +[Install] +WantedBy=multi-user.target diff --git a/bob-common/mkosi.extra/etc/systemd/system/process-exporter.service b/bob-common/mkosi.extra/etc/systemd/system/process-exporter.service new file mode 100644 index 00000000..741d1112 --- /dev/null +++ b/bob-common/mkosi.extra/etc/systemd/system/process-exporter.service @@ -0,0 +1,19 @@ +[Unit] +Description=Prometheus Process Exporter +Documentation=https://github.com/ncabatoff/process-exporter +After=network-online.target searcher-container.service +Wants=network-online.target + +[Service] +Type=simple +User=prometheus +Group=prometheus +ExecStart=/usr/bin/process-exporter \ + --web.listen-address=127.0.0.1:9256 \ + --config.path=/etc/prometheus/process-exporter.yml \ + --children +Restart=on-failure +RestartSec=5s + +[Install] +WantedBy=multi-user.target diff --git a/bob-common/mkosi.extra/etc/systemd/system/prometheus.service b/bob-common/mkosi.extra/etc/systemd/system/prometheus.service new file mode 100644 index 00000000..5559530f --- /dev/null +++ b/bob-common/mkosi.extra/etc/systemd/system/prometheus.service @@ -0,0 +1,25 @@ +[Unit] +Description=Prometheus Monitoring System +Documentation=https://prometheus.io/docs/introduction/overview/ +After=network-online.target fetch-config.service +Wants=network-online.target +Requires=fetch-config.service + +[Service] +Type=simple +User=prometheus +Group=prometheus +ExecStartPre=/usr/bin/gomplate -f /etc/prometheus/prometheus.yml.tmpl -o /etc/prometheus/prometheus.yml -d config=/etc/flashbox/observability-config.json +ExecStart=/usr/bin/prometheus \ + --config.file=/etc/prometheus/prometheus.yml \ + --storage.tsdb.path=/var/lib/prometheus/ \ + --storage.tsdb.retention.time=24h \ + --web.console.templates=/usr/share/prometheus/consoles \ + --web.console.libraries=/usr/share/prometheus/console_libraries \ + --web.listen-address=127.0.0.1:9090 +ExecReload=/bin/kill -HUP $MAINPID +Restart=on-failure +RestartSec=5s + +[Install] +WantedBy=multi-user.target diff --git a/bob-common/mkosi.extra/etc/systemd/system/searcher-firewall.service b/bob-common/mkosi.extra/etc/systemd/system/searcher-firewall.service index 3812c1eb..93808d5b 100644 --- a/bob-common/mkosi.extra/etc/systemd/system/searcher-firewall.service +++ b/bob-common/mkosi.extra/etc/systemd/system/searcher-firewall.service @@ -1,7 +1,7 @@ [Unit] Description=Searcher Network and Firewall Rules -After=network.target network-setup.service -Requires=network-setup.service +After=network.target network-setup.service fetch-config.service +Requires=network-setup.service fetch-config.service [Service] Type=oneshot diff --git a/bob-common/mkosi.extra/usr/bin/fetch-config.sh b/bob-common/mkosi.extra/usr/bin/fetch-config.sh new file mode 100755 index 00000000..f8c0ea52 --- /dev/null +++ b/bob-common/mkosi.extra/usr/bin/fetch-config.sh @@ -0,0 +1,174 @@ +#!/bin/sh +set -eu -o pipefail + +# Common configuration fetching script for FlashBox (bob-l1 and bob-l2) +# This script provides shared functionality for configuration management +# Project-specific configuration should be done via /etc/bob/dynamic-config.sh + +CONFIG_PATH=/etc/bob/config.env +OBSERVABILITY_CONFIG_PATH=/etc/flashbox/observability-config.json + +# Don't override if config already exists +if [ -f "$CONFIG_PATH" ]; then + echo "Config already exists at $CONFIG_PATH, skipping" + exit 0 +fi + +# Helper functions +fetch_metadata_value() { + curl -s \ + --header "Metadata-Flavor: Google" \ + "http://metadata/computeMetadata/v1/instance/attributes/$1" +} + +get_ips_from_uris() { + # Extract IP addresses from URIs + echo "$1" | grep -oE '[0-9]{1,3}(\.[0-9]{1,3}){3}' || echo "" +} + +write_observability_config() { + local metrics_flashbots_url="$1" + local metrics_flashbots_username="$2" + local metrics_flashbots_password="$3" + local metrics_searcher_url="$4" + local metrics_searcher_auth="$5" + + # Extract IPs for firewall rules + local metrics_endpoint_1="" + local metrics_endpoint_2="" + + if [ -n "$metrics_flashbots_url" ]; then + metrics_endpoint_1=$(get_ips_from_uris "$metrics_flashbots_url" | head -1) + fi + if [ -n "$metrics_searcher_url" ]; then + metrics_endpoint_2=$(get_ips_from_uris "$metrics_searcher_url" | head -1) + fi + + # Append observability config to main config + cat <> "$CONFIG_PATH" +CONFIG_METRICS_FLASHBOTS_URL='${metrics_flashbots_url}' +CONFIG_METRICS_FLASHBOTS_USERNAME='${metrics_flashbots_username}' +CONFIG_METRICS_FLASHBOTS_PASSWORD='${metrics_flashbots_password}' +CONFIG_METRICS_SEARCHER_URL='${metrics_searcher_url}' +CONFIG_METRICS_SEARCHER_AUTH='${metrics_searcher_auth}' +METRICS_ENDPOINT_1='${metrics_endpoint_1}' +METRICS_ENDPOINT_2='${metrics_endpoint_2}' +EOF + + # Create observability config for Prometheus if metrics are configured + if [ -n "$metrics_flashbots_url" ] || [ -n "$metrics_searcher_url" ]; then + mkdir -p /etc/flashbox + cat < "$OBSERVABILITY_CONFIG_PATH" +{ + "remote_write_flashbots_url": "${metrics_flashbots_url}", + "remote_write_flashbots_username": "${metrics_flashbots_username}", + "remote_write_flashbots_password": "${metrics_flashbots_password}", + "remote_write_flashbots_auth": $([ -n "${metrics_flashbots_username}" ] && echo '"true"' || echo '""'), + "remote_write_searcher_url": "${metrics_searcher_url}", + "remote_write_searcher_auth": "${metrics_searcher_auth}" +} +EOF + echo "Observability configuration written to $OBSERVABILITY_CONFIG_PATH" + fi +} + +# Check for local QEMU development environment +if dmidecode -s system-manufacturer 2>/dev/null | grep -q "QEMU" && \ + [ -f /etc/systemd/system/serial-console.service ]; then + echo "Running in local QEMU dev image, using default test values" + + # Get default gateway (host in QEMU user-mode networking) + GATEWAY=$(ip route | awk '/default/ {print $3}') + if [ -z "$GATEWAY" ]; then + echo "Warning: Could not detect gateway, falling back to 10.0.2.2" + GATEWAY="10.0.2.2" + fi + + # Export gateway for custom script + export GATEWAY + + # Call project-specific configuration if it exists + if [ -x /etc/bob/dynamic-config.sh ]; then + echo "Running project-specific configuration..." + /etc/bob/dynamic-config.sh qemu "$CONFIG_PATH" + else + echo "Warning: No project-specific configuration found at /etc/bob/dynamic-config.sh" + fi + + # Add empty observability config for local dev + write_observability_config "" "" "" "" "" + + exit 0 +fi + +# Production configuration using Vault +echo "Fetching configuration from Vault..." + +# Get instance metadata +instance_name=$(fetch_metadata_value "name") +vault_addr=$(fetch_metadata_value "vault_addr") +vault_auth_mount=$(fetch_metadata_value "vault_auth_mount_gcp") +vault_kv_path=$(fetch_metadata_value "vault_kv_path") +vault_kv_common_suffix=$(fetch_metadata_value "vault_kv_common_suffix") + +# Authenticate with Vault using GCP identity +gcp_token=$(curl \ + --header "Metadata-Flavor: Google" \ + --data-urlencode "audience=http://vault/$instance_name" \ + --data-urlencode "format=full" \ + "http://metadata/computeMetadata/v1/instance/service-accounts/default/identity") + +vault_token=$(curl \ + --data "$(printf '{"role":"%s","jwt":"%s"}' "$instance_name" "$gcp_token")" \ + "${vault_addr}/v1/${vault_auth_mount}/login" | \ + jq -r .auth.client_token) + +# Fetch common and instance-specific data +common_data=$(curl \ + --header "X-Vault-Token: ${vault_token}" \ + "${vault_addr}/v1/${vault_kv_path}/node/${vault_kv_common_suffix}" | + jq -c .data.data) + +secret_data=$(curl \ + --header "X-Vault-Token: ${vault_token}" \ + "${vault_addr}/v1/${vault_kv_path}/node/${instance_name}" | + jq -c .data.data) + +# Merge data objects +data=$(echo "$common_data $secret_data" | jq -s 'add') + +# Helper to get values from merged data +get_data_value() { + echo "$data" | jq -rc --arg key "$1" '.[$key] // ""' +} + +# Export data for project-specific script +export VAULT_DATA="$data" +export -f get_data_value +export -f get_ips_from_uris + +# Call project-specific configuration +if [ -x /etc/bob/dynamic-config.sh ]; then + echo "Running project-specific configuration..." + /etc/bob/dynamic-config.sh vault "$CONFIG_PATH" +else + echo "Error: No project-specific configuration found at /etc/bob/dynamic-config.sh" + exit 1 +fi + +# Fetch observability configuration +metrics_flashbots_url=$(get_data_value metrics_flashbots_url) +metrics_flashbots_username=$(get_data_value metrics_flashbots_username) +metrics_flashbots_password=$(get_data_value metrics_flashbots_password) +metrics_searcher_url=$(get_data_value metrics_searcher_url) +metrics_searcher_auth=$(get_data_value metrics_searcher_auth) + +# Write observability configuration +write_observability_config \ + "$metrics_flashbots_url" \ + "$metrics_flashbots_username" \ + "$metrics_flashbots_password" \ + "$metrics_searcher_url" \ + "$metrics_searcher_auth" + +echo "Configuration successfully fetched and written to $CONFIG_PATH" diff --git a/bob-common/mkosi.extra/usr/bin/init-container.sh b/bob-common/mkosi.extra/usr/bin/init-container.sh index b4251905..201fd5f7 100755 --- a/bob-common/mkosi.extra/usr/bin/init-container.sh +++ b/bob-common/mkosi.extra/usr/bin/init-container.sh @@ -86,6 +86,18 @@ ns_iptables -A OUTPUT -p udp --sport $SEARCHER_INPUT_UDP_PORT -j DROP ns_iptables -A OUTPUT -p tcp --sport $SEARCHER_INPUT_UDP_PORT -j DROP ns_iptables -A OUTPUT -p tcp --sport $SEARCHER_INPUT_TCP_PORT -j DROP +# Block container from accessing metrics endpoints +# Source config.env to get METRICS_ENDPOINT_1 and METRICS_ENDPOINT_2 +. /etc/bob/config.env +if [ -n "${METRICS_ENDPOINT_1:-}" ]; then + echo "Blocking container access to metrics endpoint 1: $METRICS_ENDPOINT_1" + ns_iptables -A OUTPUT -d "$METRICS_ENDPOINT_1" -j DROP +fi +if [ -n "${METRICS_ENDPOINT_2:-}" ]; then + echo "Blocking container access to metrics endpoint 2: $METRICS_ENDPOINT_2" + ns_iptables -A OUTPUT -d "$METRICS_ENDPOINT_2" -j DROP +fi + # Helper, only used in sourced script below exec_in_container() { su -s /bin/sh searcher -c "podman exec $NAME /bin/sh -c '$1'" diff --git a/bob-common/mkosi.extra/usr/bin/init-firewall.sh b/bob-common/mkosi.extra/usr/bin/init-firewall.sh index 8701b55e..29cf9ca2 100755 --- a/bob-common/mkosi.extra/usr/bin/init-firewall.sh +++ b/bob-common/mkosi.extra/usr/bin/init-firewall.sh @@ -149,8 +149,11 @@ drop_dst_ip() { # (5) Load firewall rules in {MAINTENANCE,PRODUCTION}_{IN,OUT} chains. # Those are customized per image, see bob*/mkosi.extra/etc/bob/firewall-config # +# Source config.env for dynamic configuration (metrics endpoints, etc.) +# Then source firewall-config which uses those variables # `source` is not supported in dash ########################################################################### +. /etc/bob/config.env . /etc/bob/firewall-config ########################################################################### diff --git a/bob-l1/mkosi.extra/etc/bob/dynamic-config.sh b/bob-l1/mkosi.extra/etc/bob/dynamic-config.sh new file mode 100755 index 00000000..b5e457ab --- /dev/null +++ b/bob-l1/mkosi.extra/etc/bob/dynamic-config.sh @@ -0,0 +1,49 @@ +#!/bin/sh +set -eu + +# Project-specific dynamic configuration for bob-l1 +# Called by fetch-config.sh with mode (qemu/vault) and config path + +MODE="$1" +CONFIG_PATH="$2" + +if [ "$MODE" = "qemu" ]; then + # Local QEMU development configuration + # GATEWAY is exported by the common fetch-config.sh + cat <> "$CONFIG_PATH" +CONFIG_NETWORK_ID='1' +CONFIG_NETWORK_NAME='mainnet' +CONFIG_JWT_SECRET='1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef' +CONFIG_CL_STATIC_PEERS='' +CONFIG_EL_STATIC_PEERS='enode://abc123@${GATEWAY}:30303' +CONFIG_TITAN_IP='52.207.17.217' +CONFIG_FLASHBOTS_BUNDLE_1='18.221.59.61' +CONFIG_FLASHBOTS_BUNDLE_2='3.15.88.156' +CONFIG_FLASHBOTS_TX_STREAM_1='3.136.107.142' +CONFIG_FLASHBOTS_TX_STREAM_2='3.149.14.12' +EOF + +elif [ "$MODE" = "vault" ]; then + # Production configuration from Vault + # get_data_value and get_ips_from_uris are exported by fetch-config.sh + + # For bob-l1, we might not have Vault set up yet + # This is a placeholder for when Vault integration is added + echo "Warning: Vault configuration not yet implemented for bob-l1" + echo "Using environment variables or defaults..." + + # You can add Vault-based configuration here when ready + # For now, we can use environment variables as fallback + cat <> "$CONFIG_PATH" +CONFIG_NETWORK_ID='${CONFIG_NETWORK_ID:-1}' +CONFIG_NETWORK_NAME='${CONFIG_NETWORK_NAME:-mainnet}' +CONFIG_JWT_SECRET='${CONFIG_JWT_SECRET:-}' +CONFIG_CL_STATIC_PEERS='${CONFIG_CL_STATIC_PEERS:-}' +CONFIG_EL_STATIC_PEERS='${CONFIG_EL_STATIC_PEERS:-}' +CONFIG_TITAN_IP='${CONFIG_TITAN_IP:-52.207.17.217}' +CONFIG_FLASHBOTS_BUNDLE_1='${CONFIG_FLASHBOTS_BUNDLE_1:-18.221.59.61}' +CONFIG_FLASHBOTS_BUNDLE_2='${CONFIG_FLASHBOTS_BUNDLE_2:-3.15.88.156}' +CONFIG_FLASHBOTS_TX_STREAM_1='${CONFIG_FLASHBOTS_TX_STREAM_1:-3.136.107.142}' +CONFIG_FLASHBOTS_TX_STREAM_2='${CONFIG_FLASHBOTS_TX_STREAM_2:-3.149.14.12}' +EOF +fi diff --git a/bob-l1/mkosi.extra/etc/bob/firewall-config b/bob-l1/mkosi.extra/etc/bob/firewall-config index 6355d153..32f802f3 100644 --- a/bob-l1/mkosi.extra/etc/bob/firewall-config +++ b/bob-l1/mkosi.extra/etc/bob/firewall-config @@ -17,6 +17,9 @@ NTP_PORT=123 CL_P2P_PORT=9000 EL_P2P_PORT=30303 +# Observability endpoints are loaded from /etc/bob/config.env +# METRICS_ENDPOINT_1 and METRICS_ENDPOINT_2 variables + TITAN_IP="52.207.17.217" TITAN_BUNDLE_PORT=1338 # bundle submission (always on) TITAN_STATE_DIFF_PORT=42203 # state diff stream (production only) @@ -59,6 +62,15 @@ accept_dst_port $CHAIN_ALWAYS_OUT udp $CL_P2P_PORT "CL P2P (UDP)" # Note: this is accessible only from host, searcher netns has DROP on those accept_dst_port $CHAIN_ALWAYS_OUT udp $NTP_PORT "NTP" +# Observability metrics endpoints (always allowed, host-only) +# These are configured dynamically and only from host namespace +if [ -n "$METRICS_ENDPOINT_1" ]; then + accept_dst_ip_port $CHAIN_ALWAYS_OUT tcp "$METRICS_ENDPOINT_1" $HTTPS_PORT "Metrics endpoint 1 (Flashbots)" +fi +if [ -n "$METRICS_ENDPOINT_2" ]; then + accept_dst_ip_port $CHAIN_ALWAYS_OUT tcp "$METRICS_ENDPOINT_2" $HTTPS_PORT "Metrics endpoint 2 (Searcher)" +fi + # Titan builder bundle endpoints (always on) # Security note: This is a side channel. # While the operator will not be able to see the content of the packets, diff --git a/bob-l2/mkosi.extra/etc/bob/dynamic-config.sh b/bob-l2/mkosi.extra/etc/bob/dynamic-config.sh new file mode 100755 index 00000000..4f698fe9 --- /dev/null +++ b/bob-l2/mkosi.extra/etc/bob/dynamic-config.sh @@ -0,0 +1,49 @@ +#!/bin/sh +set -eu + +# Project-specific dynamic configuration for bob-l2 +# Called by fetch-config.sh with mode (qemu/vault) and config path + +MODE="$1" +CONFIG_PATH="$2" + +if [ "$MODE" = "qemu" ]; then + # Local QEMU development configuration + # GATEWAY is exported by the common fetch-config.sh + cat <> "$CONFIG_PATH" +CONFIG_NETWORK_ID='12345' +CONFIG_NETWORK_NAME='local-testnet' +CONFIG_JWT_SECRET='1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef' +CONFIG_EL_STATIC_PEERS='enode://abc123@${GATEWAY}:30303' +CONFIG_EL_PEERS_IPS='${GATEWAY}' +CONFIG_SIMULATOR_RPC_URL='http://${GATEWAY}:8545' +CONFIG_SIMULATOR_WS_URL='ws://${GATEWAY}:8546' +CONFIG_SIMULATOR_IP='${GATEWAY}' +EOF + +elif [ "$MODE" = "vault" ]; then + # Production configuration from Vault + # get_data_value and get_ips_from_uris are exported by fetch-config.sh + + network_id=$(get_data_value network_id) + network_name=$(get_data_value network_name) + jwt_secret=$(get_data_value jwt_secret) + + el_static_peers=$(get_data_value el_static_peers | jq -r 'join(",")') + el_peers_ips=$(get_ips_from_uris "$el_static_peers" | tr '\n' ',' | sed 's/,$//') + + simulator_rpc_url=$(get_data_value simulator_rpc_url) + simulator_ws_url=$(get_data_value simulator_ws_url) + simulator_ip=$(get_ips_from_uris "$simulator_rpc_url" | head -1) + + cat <> "$CONFIG_PATH" +CONFIG_NETWORK_ID='${network_id}' +CONFIG_NETWORK_NAME='${network_name}' +CONFIG_JWT_SECRET='${jwt_secret}' +CONFIG_EL_STATIC_PEERS='${el_static_peers}' +CONFIG_EL_PEERS_IPS='${el_peers_ips}' +CONFIG_SIMULATOR_RPC_URL='${simulator_rpc_url}' +CONFIG_SIMULATOR_WS_URL='${simulator_ws_url}' +CONFIG_SIMULATOR_IP='${simulator_ip}' +EOF +fi diff --git a/bob-l2/mkosi.extra/etc/bob/firewall-config b/bob-l2/mkosi.extra/etc/bob/firewall-config index 85e7925f..3685ce47 100644 --- a/bob-l2/mkosi.extra/etc/bob/firewall-config +++ b/bob-l2/mkosi.extra/etc/bob/firewall-config @@ -18,6 +18,9 @@ OP_NODE_P2P_PORT=9222 OP_GETH_P2P_PORT=40404 ENGINE_API_PORT=8651 +# Observability endpoints are loaded from /etc/bob/config.env +# METRICS_ENDPOINT_1 and METRICS_ENDPOINT_2 variables + # Simulator ports (production only) SIMULATOR_RPC_PORT=8645 SIMULATOR_WS_PORT=8646 @@ -47,6 +50,15 @@ accept_dst_port $CHAIN_ALWAYS_IN tcp $CVM_REVERSE_PROXY_PORT "CVM reverse-proxy" # See also init-container.sh accept_dst_port $CHAIN_ALWAYS_OUT udp $NTP_PORT "NTP" +# Observability metrics endpoints (always allowed, host-only) +# These are configured dynamically and only from host namespace +if [ -n "$METRICS_ENDPOINT_1" ]; then + accept_dst_ip_port $CHAIN_ALWAYS_OUT tcp "$METRICS_ENDPOINT_1" $HTTPS_PORT "Metrics endpoint 1 (Flashbots)" +fi +if [ -n "$METRICS_ENDPOINT_2" ]; then + accept_dst_ip_port $CHAIN_ALWAYS_OUT tcp "$METRICS_ENDPOINT_2" $HTTPS_PORT "Metrics endpoint 2 (Searcher)" +fi + ########################################################################### # (3) MAINTENANCE_IN: Inbound rules for Maintenance Mode ########################################################################### diff --git a/bob-l2/mkosi.extra/usr/bin/fetch-config.sh b/bob-l2/mkosi.extra/usr/bin/fetch-config.sh deleted file mode 100755 index ddbdf7fb..00000000 --- a/bob-l2/mkosi.extra/usr/bin/fetch-config.sh +++ /dev/null @@ -1,105 +0,0 @@ -#!/bin/sh -set -eu -o pipefail - -# This script fetches couple of pre-defined keys from Vault -# and writes them to /etc/bob/config.env as: -# CONFIG_{KEY}='{VALUE}' - -CONFIG_PATH=/etc/bob/config.env - -# Don't override if config already exists -if [ -f "$CONFIG_PATH" ]; then - echo "Config already exists at $CONFIG_PATH, skipping" - exit 0 -fi - -if dmidecode -s system-manufacturer 2>/dev/null | grep -q "QEMU" && \ - [ -f /etc/systemd/system/serial-console.service ]; then - echo "Running in local QEMU dev image, using default test values" - - # Get default gateway (host in QEMU user-mode networking) - GATEWAY=$(ip route | awk '/default/ {print $3}') - if [ -z "$GATEWAY" ]; then - echo "Warning: Could not detect gateway, falling back to 10.0.2.2" - GATEWAY="10.0.2.2" - fi - - cat < "$CONFIG_PATH" -CONFIG_NETWORK_ID='12345' -CONFIG_NETWORK_NAME='local-testnet' -CONFIG_JWT_SECRET='1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef' -CONFIG_EL_STATIC_PEERS='enode://abc123@${GATEWAY}:30303' -CONFIG_EL_PEERS_IPS='${GATEWAY}' -CONFIG_SIMULATOR_RPC_URL='http://${GATEWAY}:8545' -CONFIG_SIMULATOR_WS_URL='ws://${GATEWAY}:8546' -CONFIG_SIMULATOR_IP='${GATEWAY}' -EOF - - exit 0 -fi - -fetch_metadata_value() { - curl -s \ - --header "Metadata-Flavor: Google" \ - "http://metadata/computeMetadata/v1/instance/attributes/$1" -} - -instance_name=$(fetch_metadata_value "name") -vault_addr=$(fetch_metadata_value "vault_addr") -vault_auth_mount=$(fetch_metadata_value "vault_auth_mount_gcp") -vault_kv_path=$(fetch_metadata_value "vault_kv_path") -vault_kv_common_suffix=$(fetch_metadata_value "vault_kv_common_suffix") - -gcp_token=$(curl \ - --header "Metadata-Flavor: Google" \ - --data-urlencode "audience=http://vault/$instance_name" \ - --data-urlencode "format=full" \ - "http://metadata/computeMetadata/v1/instance/service-accounts/default/identity") - -vault_token=$(curl \ - --data "$(printf '{"role":"%s","jwt":"%s"}' "$instance_name" "$gcp_token")" \ - "${vault_addr}/v1/${vault_auth_mount}/login" | \ - jq -r .auth.client_token) - -common_data=$(curl \ - --header "X-Vault-Token: ${vault_token}" \ - "${vault_addr}/v1/${vault_kv_path}/node/${vault_kv_common_suffix}" | - jq -c .data.data) -secret_data=$(curl \ - --header "X-Vault-Token: ${vault_token}" \ - "${vault_addr}/v1/${vault_kv_path}/node/${instance_name}" | - jq -c .data.data) - -# merge objects -data=$(echo "$common_data $secret_data" | jq -s 'add') - -get_data_value() { - echo "$data" | jq -rc --arg key "$1" '.[$key]' -} - -get_ips_from_uris() { - # eh, good enough for our usecase - echo "$1" | grep -oE '[0-9]{1,3}(\.[0-9]{1,3}){3}' -} - -network_id=$(get_data_value network_id) -network_name=$(get_data_value network_name) -jwt_secret=$(get_data_value jwt_secret) - -el_static_peers=$(get_data_value el_static_peers | jq -r 'join(",")') -el_peers_ips=$(get_ips_from_uris "$el_static_peers" | tr '\n' ',' | sed 's/,$//') - -simulator_rpc_url=$(get_data_value simulator_rpc_url) -simulator_ws_url=$(get_data_value simulator_ws_url) -simulator_ip=$(get_ips_from_uris "$simulator_rpc_url") - -cat <> "$CONFIG_PATH" -CONFIG_NETWORK_ID='${network_id}' -CONFIG_NETWORK_NAME='${network_name}' -CONFIG_JWT_SECRET='${jwt_secret}' -CONFIG_EL_STATIC_PEERS='${el_static_peers}' -CONFIG_EL_PEERS_IPS='${el_peers_ips}' -CONFIG_SIMULATOR_RPC_URL='${simulator_rpc_url}' -CONFIG_SIMULATOR_WS_URL='${simulator_ws_url}' -CONFIG_SIMULATOR_IP='${simulator_ip}' -EOF From b920e8554d818d0ecff40419d7d2abd79435d05f Mon Sep 17 00:00:00 2001 From: MoeMahhouk Date: Thu, 12 Feb 2026 16:19:40 +0000 Subject: [PATCH 02/10] remove the extra metric endpoint --- .../etc/prometheus/prometheus.yml.tmpl | 13 ------- .../mkosi.extra/usr/bin/fetch-config.sh | 39 ++++++------------- .../mkosi.extra/usr/bin/init-container.sh | 14 +++---- bob-l1/mkosi.extra/etc/bob/firewall-config | 15 +++---- bob-l2/mkosi.extra/etc/bob/firewall-config | 15 +++---- 5 files changed, 29 insertions(+), 67 deletions(-) diff --git a/bob-common/mkosi.extra/etc/prometheus/prometheus.yml.tmpl b/bob-common/mkosi.extra/etc/prometheus/prometheus.yml.tmpl index 931fb05c..6cdef828 100644 --- a/bob-common/mkosi.extra/etc/prometheus/prometheus.yml.tmpl +++ b/bob-common/mkosi.extra/etc/prometheus/prometheus.yml.tmpl @@ -40,16 +40,3 @@ remote_write: {{ end }} {{ end }} -{{ if (datasource "config" "remote_write_searcher_url") }} - # Searcher endpoint (optional) - - url: {{ datasource "config" "remote_write_searcher_url" }} - write_relabel_configs: - # Only send aggregated metrics - - source_labels: [__name__] - regex: 'flashbox:.*' - action: keep - {{ if (datasource "config" "remote_write_searcher_auth") }} - headers: - Authorization: {{ datasource "config" "remote_write_searcher_auth" }} - {{ end }} -{{ end }} diff --git a/bob-common/mkosi.extra/usr/bin/fetch-config.sh b/bob-common/mkosi.extra/usr/bin/fetch-config.sh index f8c0ea52..98c56b89 100755 --- a/bob-common/mkosi.extra/usr/bin/fetch-config.sh +++ b/bob-common/mkosi.extra/usr/bin/fetch-config.sh @@ -30,42 +30,31 @@ write_observability_config() { local metrics_flashbots_url="$1" local metrics_flashbots_username="$2" local metrics_flashbots_password="$3" - local metrics_searcher_url="$4" - local metrics_searcher_auth="$5" - - # Extract IPs for firewall rules - local metrics_endpoint_1="" - local metrics_endpoint_2="" - + + # Extract IP for firewall rules + local metrics_endpoint="" + if [ -n "$metrics_flashbots_url" ]; then - metrics_endpoint_1=$(get_ips_from_uris "$metrics_flashbots_url" | head -1) - fi - if [ -n "$metrics_searcher_url" ]; then - metrics_endpoint_2=$(get_ips_from_uris "$metrics_searcher_url" | head -1) + metrics_endpoint=$(get_ips_from_uris "$metrics_flashbots_url" | head -1) fi - + # Append observability config to main config cat <> "$CONFIG_PATH" CONFIG_METRICS_FLASHBOTS_URL='${metrics_flashbots_url}' CONFIG_METRICS_FLASHBOTS_USERNAME='${metrics_flashbots_username}' CONFIG_METRICS_FLASHBOTS_PASSWORD='${metrics_flashbots_password}' -CONFIG_METRICS_SEARCHER_URL='${metrics_searcher_url}' -CONFIG_METRICS_SEARCHER_AUTH='${metrics_searcher_auth}' -METRICS_ENDPOINT_1='${metrics_endpoint_1}' -METRICS_ENDPOINT_2='${metrics_endpoint_2}' +METRICS_ENDPOINT='${metrics_endpoint}' EOF - + # Create observability config for Prometheus if metrics are configured - if [ -n "$metrics_flashbots_url" ] || [ -n "$metrics_searcher_url" ]; then + if [ -n "$metrics_flashbots_url" ]; then mkdir -p /etc/flashbox cat < "$OBSERVABILITY_CONFIG_PATH" { "remote_write_flashbots_url": "${metrics_flashbots_url}", "remote_write_flashbots_username": "${metrics_flashbots_username}", "remote_write_flashbots_password": "${metrics_flashbots_password}", - "remote_write_flashbots_auth": $([ -n "${metrics_flashbots_username}" ] && echo '"true"' || echo '""'), - "remote_write_searcher_url": "${metrics_searcher_url}", - "remote_write_searcher_auth": "${metrics_searcher_auth}" + "remote_write_flashbots_auth": $([ -n "${metrics_flashbots_username}" ] && echo '"true"' || echo '""') } EOF echo "Observability configuration written to $OBSERVABILITY_CONFIG_PATH" @@ -96,7 +85,7 @@ if dmidecode -s system-manufacturer 2>/dev/null | grep -q "QEMU" && \ fi # Add empty observability config for local dev - write_observability_config "" "" "" "" "" + write_observability_config "" "" "" exit 0 fi @@ -160,15 +149,11 @@ fi metrics_flashbots_url=$(get_data_value metrics_flashbots_url) metrics_flashbots_username=$(get_data_value metrics_flashbots_username) metrics_flashbots_password=$(get_data_value metrics_flashbots_password) -metrics_searcher_url=$(get_data_value metrics_searcher_url) -metrics_searcher_auth=$(get_data_value metrics_searcher_auth) # Write observability configuration write_observability_config \ "$metrics_flashbots_url" \ "$metrics_flashbots_username" \ - "$metrics_flashbots_password" \ - "$metrics_searcher_url" \ - "$metrics_searcher_auth" + "$metrics_flashbots_password" echo "Configuration successfully fetched and written to $CONFIG_PATH" diff --git a/bob-common/mkosi.extra/usr/bin/init-container.sh b/bob-common/mkosi.extra/usr/bin/init-container.sh index 201fd5f7..8aaa554f 100755 --- a/bob-common/mkosi.extra/usr/bin/init-container.sh +++ b/bob-common/mkosi.extra/usr/bin/init-container.sh @@ -86,16 +86,12 @@ ns_iptables -A OUTPUT -p udp --sport $SEARCHER_INPUT_UDP_PORT -j DROP ns_iptables -A OUTPUT -p tcp --sport $SEARCHER_INPUT_UDP_PORT -j DROP ns_iptables -A OUTPUT -p tcp --sport $SEARCHER_INPUT_TCP_PORT -j DROP -# Block container from accessing metrics endpoints -# Source config.env to get METRICS_ENDPOINT_1 and METRICS_ENDPOINT_2 +# Block container from accessing metrics endpoint +# Source config.env to get METRICS_ENDPOINT . /etc/bob/config.env -if [ -n "${METRICS_ENDPOINT_1:-}" ]; then - echo "Blocking container access to metrics endpoint 1: $METRICS_ENDPOINT_1" - ns_iptables -A OUTPUT -d "$METRICS_ENDPOINT_1" -j DROP -fi -if [ -n "${METRICS_ENDPOINT_2:-}" ]; then - echo "Blocking container access to metrics endpoint 2: $METRICS_ENDPOINT_2" - ns_iptables -A OUTPUT -d "$METRICS_ENDPOINT_2" -j DROP +if [ -n "${METRICS_ENDPOINT:-}" ]; then + echo "Blocking container access to metrics endpoint: $METRICS_ENDPOINT" + ns_iptables -A OUTPUT -d "$METRICS_ENDPOINT" -j DROP fi # Helper, only used in sourced script below diff --git a/bob-l1/mkosi.extra/etc/bob/firewall-config b/bob-l1/mkosi.extra/etc/bob/firewall-config index 32f802f3..9b02c240 100644 --- a/bob-l1/mkosi.extra/etc/bob/firewall-config +++ b/bob-l1/mkosi.extra/etc/bob/firewall-config @@ -17,8 +17,8 @@ NTP_PORT=123 CL_P2P_PORT=9000 EL_P2P_PORT=30303 -# Observability endpoints are loaded from /etc/bob/config.env -# METRICS_ENDPOINT_1 and METRICS_ENDPOINT_2 variables +# Observability endpoint is loaded from /etc/bob/config.env +# METRICS_ENDPOINT variable TITAN_IP="52.207.17.217" TITAN_BUNDLE_PORT=1338 # bundle submission (always on) @@ -62,13 +62,10 @@ accept_dst_port $CHAIN_ALWAYS_OUT udp $CL_P2P_PORT "CL P2P (UDP)" # Note: this is accessible only from host, searcher netns has DROP on those accept_dst_port $CHAIN_ALWAYS_OUT udp $NTP_PORT "NTP" -# Observability metrics endpoints (always allowed, host-only) -# These are configured dynamically and only from host namespace -if [ -n "$METRICS_ENDPOINT_1" ]; then - accept_dst_ip_port $CHAIN_ALWAYS_OUT tcp "$METRICS_ENDPOINT_1" $HTTPS_PORT "Metrics endpoint 1 (Flashbots)" -fi -if [ -n "$METRICS_ENDPOINT_2" ]; then - accept_dst_ip_port $CHAIN_ALWAYS_OUT tcp "$METRICS_ENDPOINT_2" $HTTPS_PORT "Metrics endpoint 2 (Searcher)" +# Observability metrics endpoint (always allowed, host-only) +# Configured dynamically and only from host namespace +if [ -n "$METRICS_ENDPOINT" ]; then + accept_dst_ip_port $CHAIN_ALWAYS_OUT tcp "$METRICS_ENDPOINT" $HTTPS_PORT "Metrics endpoint (Flashbots)" fi # Titan builder bundle endpoints (always on) diff --git a/bob-l2/mkosi.extra/etc/bob/firewall-config b/bob-l2/mkosi.extra/etc/bob/firewall-config index 3685ce47..635c0b9a 100644 --- a/bob-l2/mkosi.extra/etc/bob/firewall-config +++ b/bob-l2/mkosi.extra/etc/bob/firewall-config @@ -18,8 +18,8 @@ OP_NODE_P2P_PORT=9222 OP_GETH_P2P_PORT=40404 ENGINE_API_PORT=8651 -# Observability endpoints are loaded from /etc/bob/config.env -# METRICS_ENDPOINT_1 and METRICS_ENDPOINT_2 variables +# Observability endpoint is loaded from /etc/bob/config.env +# METRICS_ENDPOINT variable # Simulator ports (production only) SIMULATOR_RPC_PORT=8645 @@ -50,13 +50,10 @@ accept_dst_port $CHAIN_ALWAYS_IN tcp $CVM_REVERSE_PROXY_PORT "CVM reverse-proxy" # See also init-container.sh accept_dst_port $CHAIN_ALWAYS_OUT udp $NTP_PORT "NTP" -# Observability metrics endpoints (always allowed, host-only) -# These are configured dynamically and only from host namespace -if [ -n "$METRICS_ENDPOINT_1" ]; then - accept_dst_ip_port $CHAIN_ALWAYS_OUT tcp "$METRICS_ENDPOINT_1" $HTTPS_PORT "Metrics endpoint 1 (Flashbots)" -fi -if [ -n "$METRICS_ENDPOINT_2" ]; then - accept_dst_ip_port $CHAIN_ALWAYS_OUT tcp "$METRICS_ENDPOINT_2" $HTTPS_PORT "Metrics endpoint 2 (Searcher)" +# Observability metrics endpoint (always allowed, host-only) +# Configured dynamically and only from host namespace +if [ -n "$METRICS_ENDPOINT" ]; then + accept_dst_ip_port $CHAIN_ALWAYS_OUT tcp "$METRICS_ENDPOINT" $HTTPS_PORT "Metrics endpoint (Flashbots)" fi ########################################################################### From b0557e300e506d5098eaf337a88da581ad104e53 Mon Sep 17 00:00:00 2001 From: MoeMahhouk Date: Thu, 12 Feb 2026 17:27:33 +0000 Subject: [PATCH 03/10] refinements and enable observability services --- .../mkosi.extra/etc/prometheus/prometheus.yml.tmpl | 2 +- bob-common/mkosi.extra/usr/bin/fetch-config.sh | 10 ++++------ bob-common/mkosi.postinst | 7 ++++++- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/bob-common/mkosi.extra/etc/prometheus/prometheus.yml.tmpl b/bob-common/mkosi.extra/etc/prometheus/prometheus.yml.tmpl index 6cdef828..1d81854b 100644 --- a/bob-common/mkosi.extra/etc/prometheus/prometheus.yml.tmpl +++ b/bob-common/mkosi.extra/etc/prometheus/prometheus.yml.tmpl @@ -23,9 +23,9 @@ scrape_configs: static_configs: - targets: ['localhost:9256'] +{{ if (datasource "config" "remote_write_flashbots_url") }} # Remote write configuration (dynamically configured) remote_write: -{{ if (datasource "config" "remote_write_flashbots_url") }} # Flashbots endpoint - url: {{ datasource "config" "remote_write_flashbots_url" }} write_relabel_configs: diff --git a/bob-common/mkosi.extra/usr/bin/fetch-config.sh b/bob-common/mkosi.extra/usr/bin/fetch-config.sh index 98c56b89..51985be6 100755 --- a/bob-common/mkosi.extra/usr/bin/fetch-config.sh +++ b/bob-common/mkosi.extra/usr/bin/fetch-config.sh @@ -46,10 +46,9 @@ CONFIG_METRICS_FLASHBOTS_PASSWORD='${metrics_flashbots_password}' METRICS_ENDPOINT='${metrics_endpoint}' EOF - # Create observability config for Prometheus if metrics are configured - if [ -n "$metrics_flashbots_url" ]; then - mkdir -p /etc/flashbox - cat < "$OBSERVABILITY_CONFIG_PATH" + # Create observability config for Prometheus (always needed for gomplate templating) + mkdir -p /etc/flashbox + cat < "$OBSERVABILITY_CONFIG_PATH" { "remote_write_flashbots_url": "${metrics_flashbots_url}", "remote_write_flashbots_username": "${metrics_flashbots_username}", @@ -57,8 +56,7 @@ EOF "remote_write_flashbots_auth": $([ -n "${metrics_flashbots_username}" ] && echo '"true"' || echo '""') } EOF - echo "Observability configuration written to $OBSERVABILITY_CONFIG_PATH" - fi + echo "Observability configuration written to $OBSERVABILITY_CONFIG_PATH" } # Check for local QEMU development environment diff --git a/bob-common/mkosi.postinst b/bob-common/mkosi.postinst index 227beea5..56f618f0 100755 --- a/bob-common/mkosi.postinst +++ b/bob-common/mkosi.postinst @@ -22,14 +22,19 @@ mkdir "$BUILDROOT/etc/dropbear" mkdir -p "$BUILDROOT/etc/systemd/system/minimal.target.wants" for service in \ network-setup.service \ + fetch-config.service \ logrotate.timer \ delay-pipe.service \ wait-for-key.service \ searcher-firewall.service \ dropbear.service \ + input-only-proxy.service \ searcher-container.service \ ssh-pubkey-server.service \ - cvm-reverse-proxy.service + cvm-reverse-proxy.service \ + prometheus.service \ + node-exporter.service \ + process-exporter.service do mkosi-chroot systemctl enable "$service" ln -sf "/etc/systemd/system/$service" "$BUILDROOT/etc/systemd/system/minimal.target.wants/" From 0c9121cfe7e98650d268c4b989f4774984c68203 Mon Sep 17 00:00:00 2001 From: MoeMahhouk Date: Thu, 12 Feb 2026 19:08:04 +0000 Subject: [PATCH 04/10] fix gomplate build and prometheus permission issues --- bob-common/mkosi.build | 8 ++++++++ bob-common/mkosi.conf | 1 - .../mkosi.extra/etc/systemd/system/prometheus.service | 2 +- bob-common/mkosi.postinst | 3 +++ 4 files changed, 12 insertions(+), 2 deletions(-) diff --git a/bob-common/mkosi.build b/bob-common/mkosi.build index a277a877..d44cd227 100755 --- a/bob-common/mkosi.build +++ b/bob-common/mkosi.build @@ -52,3 +52,11 @@ build_rust_package \ "input-only-proxy" \ "v0.0.2" \ "https://github.com/flashbots/input-only-proxy" + +# Build gomplate (template engine for Prometheus config) +make_git_package \ + "gomplate" \ + "v4.3.3" \ + "https://github.com/hairyhenderson/gomplate" \ + 'go build -trimpath -ldflags "-s -w -buildid=" -o ./build/gomplate ./cmd/gomplate' \ + "build/gomplate:/usr/bin/gomplate" diff --git a/bob-common/mkosi.conf b/bob-common/mkosi.conf index 896a5709..8c571387 100644 --- a/bob-common/mkosi.conf +++ b/bob-common/mkosi.conf @@ -32,7 +32,6 @@ Packages=podman prometheus prometheus-node-exporter prometheus-process-exporter - gomplate BuildPackages=build-essential git diff --git a/bob-common/mkosi.extra/etc/systemd/system/prometheus.service b/bob-common/mkosi.extra/etc/systemd/system/prometheus.service index 5559530f..40ac4def 100644 --- a/bob-common/mkosi.extra/etc/systemd/system/prometheus.service +++ b/bob-common/mkosi.extra/etc/systemd/system/prometheus.service @@ -9,7 +9,7 @@ Requires=fetch-config.service Type=simple User=prometheus Group=prometheus -ExecStartPre=/usr/bin/gomplate -f /etc/prometheus/prometheus.yml.tmpl -o /etc/prometheus/prometheus.yml -d config=/etc/flashbox/observability-config.json +ExecStartPre=+/usr/bin/gomplate -f /etc/prometheus/prometheus.yml.tmpl -o /etc/prometheus/prometheus.yml -d config=/etc/flashbox/observability-config.json ExecStart=/usr/bin/prometheus \ --config.file=/etc/prometheus/prometheus.yml \ --storage.tsdb.path=/var/lib/prometheus/ \ diff --git a/bob-common/mkosi.postinst b/bob-common/mkosi.postinst index 56f618f0..d174cb41 100755 --- a/bob-common/mkosi.postinst +++ b/bob-common/mkosi.postinst @@ -40,6 +40,9 @@ do ln -sf "/etc/systemd/system/$service" "$BUILDROOT/etc/systemd/system/minimal.target.wants/" done +# Fix ownership for prometheus directories (mkosi ExtraTrees sets root:root) +mkosi-chroot chown -R prometheus:prometheus /var/lib/prometheus + # Don't reserve port 22 mkosi-chroot systemctl disable ssh.service ssh.socket mkosi-chroot systemctl mask ssh.service ssh.socket From 2468d13a5f8e21191311c08c7c49a64c7eba8d86 Mon Sep 17 00:00:00 2001 From: MoeMahhouk Date: Thu, 12 Feb 2026 19:56:49 +0000 Subject: [PATCH 05/10] fix gomplate syntax issues --- .../etc/prometheus/prometheus.yml.tmpl | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/bob-common/mkosi.extra/etc/prometheus/prometheus.yml.tmpl b/bob-common/mkosi.extra/etc/prometheus/prometheus.yml.tmpl index 1d81854b..6a65dc9a 100644 --- a/bob-common/mkosi.extra/etc/prometheus/prometheus.yml.tmpl +++ b/bob-common/mkosi.extra/etc/prometheus/prometheus.yml.tmpl @@ -23,20 +23,21 @@ scrape_configs: static_configs: - targets: ['localhost:9256'] -{{ if (datasource "config" "remote_write_flashbots_url") }} +{{- $config := (datasource "config") }} +{{- if $config.remote_write_flashbots_url }} + # Remote write configuration (dynamically configured) remote_write: # Flashbots endpoint - - url: {{ datasource "config" "remote_write_flashbots_url" }} + - url: {{ $config.remote_write_flashbots_url }} write_relabel_configs: # Only send aggregated metrics - source_labels: [__name__] regex: 'flashbox:.*' action: keep - {{ if (datasource "config" "remote_write_flashbots_auth") }} + {{- if $config.remote_write_flashbots_auth }} basic_auth: - username: {{ datasource "config" "remote_write_flashbots_username" }} - password: {{ datasource "config" "remote_write_flashbots_password" }} - {{ end }} -{{ end }} - + username: {{ $config.remote_write_flashbots_username }} + password: {{ $config.remote_write_flashbots_password }} + {{- end }} +{{- end }} From 8d00e76da390e91def6f1bacfaa35f71468e98b4 Mon Sep 17 00:00:00 2001 From: MoeMahhouk Date: Thu, 12 Feb 2026 21:26:35 +0000 Subject: [PATCH 06/10] extra refinement and fix binary naming --- bob-common/mkosi.extra/etc/prometheus/prometheus.yml.tmpl | 2 +- bob-common/mkosi.extra/etc/systemd/system/node-exporter.service | 1 + .../mkosi.extra/etc/systemd/system/process-exporter.service | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/bob-common/mkosi.extra/etc/prometheus/prometheus.yml.tmpl b/bob-common/mkosi.extra/etc/prometheus/prometheus.yml.tmpl index 6a65dc9a..86671e16 100644 --- a/bob-common/mkosi.extra/etc/prometheus/prometheus.yml.tmpl +++ b/bob-common/mkosi.extra/etc/prometheus/prometheus.yml.tmpl @@ -15,7 +15,7 @@ scrape_configs: metric_relabel_configs: # Only keep aggregated metrics for remote write - source_labels: [__name__] - regex: 'node_(cpu|memory|disk)_.*' + regex: 'node_(cpu|memory|disk|filesystem)_.*' action: keep # Process exporter for container monitoring diff --git a/bob-common/mkosi.extra/etc/systemd/system/node-exporter.service b/bob-common/mkosi.extra/etc/systemd/system/node-exporter.service index 4e6b89a5..eee8ceb1 100644 --- a/bob-common/mkosi.extra/etc/systemd/system/node-exporter.service +++ b/bob-common/mkosi.extra/etc/systemd/system/node-exporter.service @@ -47,6 +47,7 @@ ExecStart=/usr/bin/prometheus-node-exporter \ --no-collector.vmstat \ --no-collector.xfs \ --no-collector.zfs \ + --no-collector.systemd \ --collector.filesystem.mount-points-exclude=^/(dev|proc|sys|run|var/lib/docker)($|/) Restart=on-failure RestartSec=5s diff --git a/bob-common/mkosi.extra/etc/systemd/system/process-exporter.service b/bob-common/mkosi.extra/etc/systemd/system/process-exporter.service index 741d1112..f77b9615 100644 --- a/bob-common/mkosi.extra/etc/systemd/system/process-exporter.service +++ b/bob-common/mkosi.extra/etc/systemd/system/process-exporter.service @@ -8,7 +8,7 @@ Wants=network-online.target Type=simple User=prometheus Group=prometheus -ExecStart=/usr/bin/process-exporter \ +ExecStart=/usr/bin/prometheus-process-exporter \ --web.listen-address=127.0.0.1:9256 \ --config.path=/etc/prometheus/process-exporter.yml \ --children From 5431bd683506156984883387b03327649e7edb87 Mon Sep 17 00:00:00 2001 From: MoeMahhouk Date: Fri, 13 Feb 2026 11:00:35 +0000 Subject: [PATCH 07/10] remove unsupported cgroup option --- bob-common/mkosi.extra/etc/prometheus/process-exporter.yml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/bob-common/mkosi.extra/etc/prometheus/process-exporter.yml b/bob-common/mkosi.extra/etc/prometheus/process-exporter.yml index 25889bc9..e279aac3 100644 --- a/bob-common/mkosi.extra/etc/prometheus/process-exporter.yml +++ b/bob-common/mkosi.extra/etc/prometheus/process-exporter.yml @@ -8,8 +8,3 @@ process_names: - name: "searcher-container" cmdline: - '/usr/bin/catatonit' - - # Monitor all processes in the searcher cgroup - - name: "searcher-cgroup" - cgroups: - - "machine.slice/machine-searcher*" From 0187cadf1cafc6ac812892e42a8aeb916fbb0386 Mon Sep 17 00:00:00 2001 From: MoeMahhouk Date: Fri, 13 Feb 2026 11:35:08 +0000 Subject: [PATCH 08/10] fix container init process path in process-exporter config --- bob-common/mkosi.extra/etc/prometheus/process-exporter.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bob-common/mkosi.extra/etc/prometheus/process-exporter.yml b/bob-common/mkosi.extra/etc/prometheus/process-exporter.yml index e279aac3..a4333213 100644 --- a/bob-common/mkosi.extra/etc/prometheus/process-exporter.yml +++ b/bob-common/mkosi.extra/etc/prometheus/process-exporter.yml @@ -7,4 +7,4 @@ process_names: # Monitor the container init process - name: "searcher-container" cmdline: - - '/usr/bin/catatonit' + - 'catatonit' From 5ee980b2856f87d8de35a056182c148080f411a8 Mon Sep 17 00:00:00 2001 From: MoeMahhouk Date: Fri, 13 Feb 2026 12:37:21 +0000 Subject: [PATCH 09/10] remove the podman regex to capture the container metrics correctly --- .../mkosi.extra/etc/prometheus/process-exporter.yml | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/bob-common/mkosi.extra/etc/prometheus/process-exporter.yml b/bob-common/mkosi.extra/etc/prometheus/process-exporter.yml index a4333213..033f901d 100644 --- a/bob-common/mkosi.extra/etc/prometheus/process-exporter.yml +++ b/bob-common/mkosi.extra/etc/prometheus/process-exporter.yml @@ -1,10 +1,5 @@ process_names: - # Monitor the searcher container cgroup - - name: "{{.Matches}}" - cmdline: - - 'podman' - - # Monitor the container init process + # Monitor the searcher container (conmon + all children via --children flag) - name: "searcher-container" cmdline: - - 'catatonit' + - 'conmon.*searcher-container' From 0681f8dbf74878171a3a35463393f3dceaf5b9b8 Mon Sep 17 00:00:00 2001 From: MoeMahhouk Date: Fri, 13 Feb 2026 12:41:55 +0000 Subject: [PATCH 10/10] add network metrics to collect --- bob-common/mkosi.extra/etc/prometheus/prometheus.yml.tmpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bob-common/mkosi.extra/etc/prometheus/prometheus.yml.tmpl b/bob-common/mkosi.extra/etc/prometheus/prometheus.yml.tmpl index 86671e16..a9c1814a 100644 --- a/bob-common/mkosi.extra/etc/prometheus/prometheus.yml.tmpl +++ b/bob-common/mkosi.extra/etc/prometheus/prometheus.yml.tmpl @@ -15,7 +15,7 @@ scrape_configs: metric_relabel_configs: # Only keep aggregated metrics for remote write - source_labels: [__name__] - regex: 'node_(cpu|memory|disk|filesystem)_.*' + regex: 'node_(cpu|memory|disk|filesystem|network)_.*' action: keep # Process exporter for container monitoring