From c8fc59de7cff7f5d868de756857ba489960d70e1 Mon Sep 17 00:00:00 2001 From: hbc Date: Wed, 11 Feb 2026 15:59:59 -0800 Subject: [PATCH 1/8] feat: add qemu vm helper --- .gitignore | 5 +- hack/qemu/user-data.yaml | 40 +++ hack/qemu/vm.sh | 521 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 565 insertions(+), 1 deletion(-) create mode 100644 hack/qemu/user-data.yaml create mode 100755 hack/qemu/vm.sh diff --git a/.gitignore b/.gitignore index b79aa92..c1fccb8 100644 --- a/.gitignore +++ b/.gitignore @@ -50,4 +50,7 @@ config.json # Build artifacts /build/ -/dist/ \ No newline at end of file +/dist/ + +# qemu vm state +.vm diff --git a/hack/qemu/user-data.yaml b/hack/qemu/user-data.yaml new file mode 100644 index 0000000..a8a856d --- /dev/null +++ b/hack/qemu/user-data.yaml @@ -0,0 +1,40 @@ +#cloud-config + +# User configuration +users: + - name: ubuntu + sudo: ALL=(ALL) NOPASSWD:ALL + shell: /bin/bash + groups: [sudo] + lock_passwd: false + ssh_authorized_keys: + - __SSH_PUBLIC_KEY__ + +# Package management +package_update: true +package_upgrade: false +packages: + - curl + - jq + - apt-transport-https + - ca-certificates + - net-tools + - vim + +# Mount host repo into guest via virtio-9p +mounts: + - ["flexnode", "/flex-node", "9p", "trans=virtio,version=9p2000.L,nofail", "0", "0"] + +# Run commands on first boot +runcmd: + - mkdir -p /flex-node + - mount -a + - echo "hello, world" + +# Write files +write_files: + - path: /etc/flexnode/provisioned + content: | + provisioned=true + timestamp=$(date -u +"%Y-%m-%dT%H:%M:%SZ") + permissions: "0644" diff --git a/hack/qemu/vm.sh b/hack/qemu/vm.sh new file mode 100755 index 0000000..18e9462 --- /dev/null +++ b/hack/qemu/vm.sh @@ -0,0 +1,521 @@ +#!/usr/bin/env bash +# +# vm.sh - Manage QEMU-based Ubuntu VMs with cloud-init support +# +# Usage: +# ./hack/qemu/vm.sh [options] +# +# Commands: +# start Create and start a VM +# stop Stop a running VM +# logs Show VM serial console logs +# +# Start options: +# -n, --name VM name (default: flexnode-vm) +# -m, --memory Memory in MB (default: 2048) +# -c, --cpus Number of CPUs (default: 2) +# -d, --disk-size Disk size (default: 20G) +# -p, --ssh-port Host port forwarded to guest SSH (default: 2222) +# -i, --image Path to Ubuntu cloud image (downloaded if not present) +# -u, --user-data Path to cloud-init user-data file (default: hack/qemu/user-data.yaml) +# --no-snapshot Use the base image directly instead of creating a snapshot +# +# Stop options: +# -n, --name VM name (default: flexnode-vm) +# -f, --force Force kill (SIGKILL) instead of graceful shutdown (SIGTERM) +# --clean Also remove disk, seed ISO, and log files +# +# Logs options: +# -n, --name VM name (default: flexnode-vm) +# -f, --follow Follow log output (like tail -f) +# +# Examples: +# ./hack/qemu/vm.sh start +# ./hack/qemu/vm.sh start -n my-vm --memory 4096 --cpus 4 +# ./hack/qemu/vm.sh stop +# ./hack/qemu/vm.sh stop --force --clean +# ./hack/qemu/vm.sh logs +# ./hack/qemu/vm.sh logs --follow +# +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)" + +# ------------------------------------------------------------------- +# Detect host architecture +# ------------------------------------------------------------------- +HOST_ARCH="$(uname -m)" +case "${HOST_ARCH}" in + x86_64) GUEST_ARCH="amd64" ;; + aarch64|arm64) GUEST_ARCH="arm64" ;; + *) echo "[ERROR] Unsupported host architecture: ${HOST_ARCH}" >&2; exit 1 ;; +esac + +# ------------------------------------------------------------------- +# Defaults +# ------------------------------------------------------------------- +VM_NAME="flexnode-vm" +MEMORY="2048" +CPUS="2" +DISK_SIZE="20G" +SSH_PORT="2222" +USE_SNAPSHOT=true +FORCE=false +CLEAN=false + +VM_DIR="${REPO_ROOT}/.vm" +IMAGE_BASE_URL="https://cloud-images.ubuntu.com/minimal/releases/noble/release" +IMAGE_URL="${IMAGE_BASE_URL}/ubuntu-24.04-minimal-cloudimg-${GUEST_ARCH}.img" +IMAGE_FILE="" +USER_DATA="${SCRIPT_DIR}/user-data.yaml" + +# ------------------------------------------------------------------- +# Helpers +# ------------------------------------------------------------------- +info() { echo "[INFO] $*"; } +warn() { echo "[WARN] $*" >&2; } +error() { echo "[ERROR] $*" >&2; exit 1; } + +usage() { + cat <<'EOF' +Usage: + ./hack/qemu/vm.sh [options] + +Commands: + start Create and start a VM + stop Stop a running VM + logs Show VM serial console logs + +Start options: + -n, --name VM name (default: flexnode-vm) + -m, --memory Memory in MB (default: 2048) + -c, --cpus Number of CPUs (default: 2) + -d, --disk-size Disk size (default: 20G) + -p, --ssh-port Host port forwarded to guest SSH (default: 2222) + -i, --image Path to Ubuntu cloud image (downloaded if not present) + -u, --user-data Path to cloud-init user-data file (default: hack/qemu/user-data.yaml) + --no-snapshot Use the base image directly instead of creating a snapshot + +Stop options: + -n, --name VM name (default: flexnode-vm) + -f, --force Force kill (SIGKILL) instead of graceful shutdown (SIGTERM) + --clean Also remove disk, seed ISO, and log files + +Logs options: + -n, --name VM name (default: flexnode-vm) + -f, --follow Follow log output (like tail -f) + +Examples: + ./hack/qemu/vm.sh start + ./hack/qemu/vm.sh start -n my-vm --memory 4096 --cpus 4 + ./hack/qemu/vm.sh stop + ./hack/qemu/vm.sh stop --force --clean + ./hack/qemu/vm.sh logs + ./hack/qemu/vm.sh logs --follow +EOF + exit 0 +} + +check_deps() { + local qemu_bin + if [[ "${GUEST_ARCH}" == "arm64" ]]; then + qemu_bin="qemu-system-aarch64" + else + qemu_bin="qemu-system-x86_64" + fi + + local missing=() + for cmd in "${qemu_bin}" qemu-img; do + if ! command -v "$cmd" &>/dev/null; then + missing+=("$cmd") + fi + done + + # We need at least one ISO generation tool + if ! command -v mkisofs &>/dev/null && ! command -v genisoimage &>/dev/null && ! command -v hdiutil &>/dev/null; then + missing+=("mkisofs (or genisoimage or hdiutil)") + fi + + if [[ ${#missing[@]} -gt 0 ]]; then + echo "" + echo "Missing required dependencies: ${missing[*]}" + echo "" + echo "Install on macOS:" + echo " brew install qemu cdrtools" + echo "" + echo "Install on Ubuntu/Debian:" + echo " sudo apt-get install qemu-system-x86 qemu-utils genisoimage" + echo "" + exit 1 + fi +} + +# Build a cloud-init NoCloud seed ISO without requiring cloud-localds. +# Uses mkisofs, genisoimage, or hdiutil (macOS) — whichever is available. +create_seed_iso() { + local iso_path="$1" + local user_data="$2" + local meta_data="$3" + + local staging + staging="$(mktemp -d)" + cp "${user_data}" "${staging}/user-data" + cp "${meta_data}" "${staging}/meta-data" + + if command -v mkisofs &>/dev/null; then + mkisofs -output "${iso_path}" -volid cidata -joliet -rock \ + "${staging}/user-data" "${staging}/meta-data" + elif command -v genisoimage &>/dev/null; then + genisoimage -output "${iso_path}" -volid cidata -joliet -rock \ + "${staging}/user-data" "${staging}/meta-data" + elif command -v hdiutil &>/dev/null; then + hdiutil makehybrid -o "${iso_path}" -joliet -iso \ + -default-volume-name cidata "${staging}" + else + rm -rf "${staging}" + error "No ISO generation tool found" + fi + + rm -rf "${staging}" +} + +# =================================================================== +# Command: start +# =================================================================== +cmd_start() { + while [[ $# -gt 0 ]]; do + case "$1" in + -n|--name) VM_NAME="$2"; shift 2 ;; + -m|--memory) MEMORY="$2"; shift 2 ;; + -c|--cpus) CPUS="$2"; shift 2 ;; + -d|--disk-size) DISK_SIZE="$2"; shift 2 ;; + -p|--ssh-port) SSH_PORT="$2"; shift 2 ;; + -i|--image) IMAGE_FILE="$2"; shift 2 ;; + -u|--user-data) USER_DATA="$2"; shift 2 ;; + --no-snapshot) USE_SNAPSHOT=false; shift ;; + -h|--help) usage ;; + *) error "Unknown option: $1" ;; + esac + done + + check_deps + mkdir -p "${VM_DIR}" + + # --------------------------------------------------------------- + # Download Ubuntu cloud image if needed + # --------------------------------------------------------------- + if [[ -z "${IMAGE_FILE}" ]]; then + IMAGE_FILE="${VM_DIR}/ubuntu-cloud.img" + fi + + if [[ ! -f "${IMAGE_FILE}" ]]; then + info "Downloading Ubuntu cloud image..." + info "URL: ${IMAGE_URL}" + curl -L -o "${IMAGE_FILE}" "${IMAGE_URL}" + info "Download complete: ${IMAGE_FILE}" + else + info "Using existing image: ${IMAGE_FILE}" + fi + + # --------------------------------------------------------------- + # Create VM disk (snapshot backed by the cloud image) + # --------------------------------------------------------------- + VM_DISK="${VM_DIR}/${VM_NAME}.qcow2" + + if [[ "${USE_SNAPSHOT}" == true ]]; then + info "Creating snapshot disk: ${VM_DISK} (backed by base image)" + qemu-img create -f qcow2 -b "${IMAGE_FILE}" -F qcow2 "${VM_DISK}" "${DISK_SIZE}" + else + info "Copying base image to: ${VM_DISK}" + cp "${IMAGE_FILE}" "${VM_DISK}" + qemu-img resize "${VM_DISK}" "${DISK_SIZE}" + fi + + # --------------------------------------------------------------- + # Resolve local SSH public key + # --------------------------------------------------------------- + SSH_PUB_KEY="" + for key_file in "${HOME}/.ssh/id_ed25519.pub" "${HOME}/.ssh/id_rsa.pub" "${HOME}/.ssh/id_ecdsa.pub"; do + if [[ -f "${key_file}" ]]; then + SSH_PUB_KEY="$(cat "${key_file}")" + info "Using SSH public key: ${key_file}" + break + fi + done + + if [[ -z "${SSH_PUB_KEY}" ]]; then + warn "No SSH public key found in ~/.ssh/. The VM will not have key-based SSH access." + fi + + # --------------------------------------------------------------- + # Render user-data with SSH key into .vm/ + # --------------------------------------------------------------- + RENDERED_USER_DATA="${VM_DIR}/user-data.yaml" + + if [[ ! -f "${USER_DATA}" ]]; then + error "User-data template not found: ${USER_DATA}" + fi + + if [[ -n "${SSH_PUB_KEY}" ]]; then + sed "s|__SSH_PUBLIC_KEY__|${SSH_PUB_KEY}|g" "${USER_DATA}" > "${RENDERED_USER_DATA}" + else + # Remove the placeholder line entirely if no key is available + sed '/__SSH_PUBLIC_KEY__/d' "${USER_DATA}" > "${RENDERED_USER_DATA}" + fi + info "Rendered user-data: ${RENDERED_USER_DATA}" + + # --------------------------------------------------------------- + # Build cloud-init seed ISO + # --------------------------------------------------------------- + SEED_ISO="${VM_DIR}/${VM_NAME}-seed.iso" + META_DATA="${VM_DIR}/meta-data" + + # Create minimal meta-data + cat > "${META_DATA}" </dev/null | grep -q 1; then + ACCEL="-accel hvf" + fi + ;; + Linux) + if [[ -r /dev/kvm ]]; then + ACCEL="-accel kvm" + fi + ;; + esac + + # --------------------------------------------------------------- + # Launch VM in background + # --------------------------------------------------------------- + QEMU_PID_FILE="${VM_DIR}/${VM_NAME}.pid" + QEMU_LOG="${VM_DIR}/${VM_NAME}.log" + + info "============================================" + info " Launching VM: ${VM_NAME}" + info " Arch: ${GUEST_ARCH} (${HOST_ARCH})" + info " Memory: ${MEMORY} MB" + info " CPUs: ${CPUS}" + info " Disk: ${VM_DISK}" + info " SSH port: ${SSH_PORT} -> 22" + info " Mount: ${REPO_ROOT} -> /flex-node" + info " Log: ${QEMU_LOG}" + info " PID file: ${QEMU_PID_FILE}" + info "============================================" + + # shellcheck disable=SC2086 + "${QEMU_BIN}" \ + ${MACHINE_ARGS} \ + ${ACCEL} \ + -m "${MEMORY}" \ + -smp "${CPUS}" \ + -drive file="${VM_DISK}",format=qcow2,if=virtio \ + -drive file="${SEED_ISO}",format=raw,if=virtio \ + -netdev user,id=net0,hostfwd=tcp::"${SSH_PORT}"-:22 \ + -device virtio-net-pci,netdev=net0 \ + -virtfs local,path="${REPO_ROOT}",mount_tag=flexnode,security_model=mapped-xattr,id=flexnode0 \ + -daemonize \ + -pidfile "${QEMU_PID_FILE}" \ + -serial file:"${QEMU_LOG}" \ + -display none + + QEMU_PID="$(cat "${QEMU_PID_FILE}")" + info "VM started in background (PID: ${QEMU_PID})" + + # --------------------------------------------------------------- + # Wait for SSH to become available + # --------------------------------------------------------------- + info "Waiting for SSH to become available on localhost:${SSH_PORT}..." + + MAX_ATTEMPTS=60 + ATTEMPT=0 + while [[ ${ATTEMPT} -lt ${MAX_ATTEMPTS} ]]; do + ATTEMPT=$((ATTEMPT + 1)) + + # Check that the QEMU process is still alive + if ! kill -0 "${QEMU_PID}" 2>/dev/null; then + echo "" + error "QEMU process exited unexpectedly. Check log: ${QEMU_LOG}" + fi + + if ssh -o BatchMode=yes -o ConnectTimeout=2 -o StrictHostKeyChecking=no \ + -o UserKnownHostsFile=/dev/null -p "${SSH_PORT}" ubuntu@localhost \ + "true" 2>/dev/null; then + break + fi + + printf "." + sleep 3 + done + echo "" + + if [[ ${ATTEMPT} -ge ${MAX_ATTEMPTS} ]]; then + warn "SSH did not become available after ${MAX_ATTEMPTS} attempts." + warn "The VM may still be booting. Check log: ${QEMU_LOG}" + echo "" + echo "You can try connecting manually:" + echo "" + echo " ssh -o StrictHostKeyChecking=no -p ${SSH_PORT} ubuntu@localhost" + echo "" + echo "To stop the VM:" + echo " ./hack/qemu/vm.sh stop -n ${VM_NAME}" + exit 1 + fi + + info "VM is ready!" + echo "" + echo " ssh -o StrictHostKeyChecking=no -p ${SSH_PORT} ubuntu@localhost" + echo "" + echo "To stop the VM:" + echo " ./hack/qemu/vm.sh stop -n ${VM_NAME}" + echo "" +} + +# =================================================================== +# Command: stop +# =================================================================== +cmd_stop() { + while [[ $# -gt 0 ]]; do + case "$1" in + -n|--name) VM_NAME="$2"; shift 2 ;; + -f|--force) FORCE=true; shift ;; + --clean) CLEAN=true; shift ;; + -h|--help) usage ;; + *) error "Unknown option: $1" ;; + esac + done + + local pid_file="${VM_DIR}/${VM_NAME}.pid" + + if [[ ! -f "${pid_file}" ]]; then + error "PID file not found: ${pid_file}. Is the VM running?" + fi + + local pid + pid="$(cat "${pid_file}")" + + if ! kill -0 "${pid}" 2>/dev/null; then + warn "Process ${pid} is not running. Cleaning up stale PID file." + rm -f "${pid_file}" + else + if [[ "${FORCE}" == true ]]; then + info "Force killing VM '${VM_NAME}' (PID: ${pid})..." + kill -9 "${pid}" + else + info "Stopping VM '${VM_NAME}' (PID: ${pid})..." + kill "${pid}" + + # Wait for process to exit + local timeout=15 + while kill -0 "${pid}" 2>/dev/null && [[ ${timeout} -gt 0 ]]; do + sleep 1 + timeout=$((timeout - 1)) + done + + if kill -0 "${pid}" 2>/dev/null; then + warn "VM did not stop gracefully, sending SIGKILL..." + kill -9 "${pid}" 2>/dev/null || true + fi + fi + + rm -f "${pid_file}" + info "VM '${VM_NAME}' stopped." + fi + + if [[ "${CLEAN}" == true ]]; then + info "Cleaning up VM artifacts..." + rm -f "${VM_DIR}/${VM_NAME}.qcow2" + rm -f "${VM_DIR}/${VM_NAME}-seed.iso" + rm -f "${VM_DIR}/${VM_NAME}.log" + rm -f "${VM_DIR}/user-data.yaml" + rm -f "${VM_DIR}/meta-data" + info "Cleanup complete." + fi +} + +# =================================================================== +# Command: logs +# =================================================================== +cmd_logs() { + local follow=false + + while [[ $# -gt 0 ]]; do + case "$1" in + -n|--name) VM_NAME="$2"; shift 2 ;; + -f|--follow) follow=true; shift ;; + -h|--help) usage ;; + *) error "Unknown option: $1" ;; + esac + done + + local log_file="${VM_DIR}/${VM_NAME}.log" + + if [[ ! -f "${log_file}" ]]; then + error "Log file not found: ${log_file}. Has the VM been started?" + fi + + if [[ "${follow}" == true ]]; then + info "Following logs for '${VM_NAME}' (Ctrl-C to stop)..." + tail -f "${log_file}" + else + cat "${log_file}" + fi +} + +# =================================================================== +# Main: dispatch subcommand +# =================================================================== +if [[ $# -lt 1 ]]; then + usage +fi + +COMMAND="$1" +shift + +case "${COMMAND}" in + start) cmd_start "$@" ;; + stop) cmd_stop "$@" ;; + logs) cmd_logs "$@" ;; + -h|--help) usage ;; + *) error "Unknown command: ${COMMAND}. Use 'start', 'stop', or 'logs'." ;; +esac From b9860b2bca4d22b9e353eb44892bb9658ad2ff65 Mon Sep 17 00:00:00 2001 From: hbc Date: Wed, 11 Feb 2026 16:01:38 -0800 Subject: [PATCH 2/8] chore: simlify signal handling --- main.go | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/main.go b/main.go index 7edddd7..5eb1a8c 100644 --- a/main.go +++ b/main.go @@ -34,19 +34,9 @@ func main() { rootCmd.AddCommand(NewVersionCommand()) // Set up context with signal handling - ctx, cancel := context.WithCancel(context.Background()) + ctx, cancel := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM) defer cancel() - // Handle shutdown signals - sigCh := make(chan os.Signal, 1) - signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM) - go func() { - <-sigCh - // Use a basic logger for shutdown signal since context may not be available - fmt.Println("Received shutdown signal, cancelling operations...") - cancel() - }() - // Set up persistent pre-run to initialize config and logger rootCmd.PersistentPreRunE = func(cmd *cobra.Command, args []string) error { // Skip config loading for version command From 232a1070ff2ed735a93fc0dd107a31ca0ed7ea93 Mon Sep 17 00:00:00 2001 From: hbc Date: Wed, 11 Feb 2026 16:49:09 -0800 Subject: [PATCH 3/8] fix: RuntimeConfig -> RuncConfig --- pkg/config/structs.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/config/structs.go b/pkg/config/structs.go index 3c1cc4a..a17e1f8 100644 --- a/pkg/config/structs.go +++ b/pkg/config/structs.go @@ -10,7 +10,7 @@ type Config struct { Containerd ContainerdConfig `json:"containerd"` Kubernetes KubernetesConfig `json:"kubernetes"` CNI CNIConfig `json:"cni"` - Runc RuntimeConfig `json:"runc"` + Runc RuncConfig `json:"runc"` Node NodeConfig `json:"node"` Paths PathsConfig `json:"paths"` Npd NPDConfig `json:"npd"` @@ -84,8 +84,8 @@ type KubernetesConfig struct { URLTemplate string `json:"urlTemplate"` } -// RuntimeConfig holds configuration settings for the container runtime (runc). -type RuntimeConfig struct { +// RuncConfig holds configuration settings for the container runtime (runc). +type RuncConfig struct { Version string `json:"version"` URL string `json:"url"` } From 4c1b2f6d29df9d95e219a3b29fea7ef14e6b5061 Mon Sep 17 00:00:00 2001 From: hbc Date: Wed, 11 Feb 2026 19:48:18 -0800 Subject: [PATCH 4/8] fix: add minimal bootstrap process --- commands.go | 52 +++++++++++++++++++ main.go | 8 +-- pkg/bootstrapper/minimal.go | 44 ++++++++++++++++ .../system_configuration_installer.go | 7 +-- pkg/config/config.go | 3 ++ 5 files changed, 108 insertions(+), 6 deletions(-) create mode 100644 pkg/bootstrapper/minimal.go diff --git a/commands.go b/commands.go index bf8d798..75bfb1a 100644 --- a/commands.go +++ b/commands.go @@ -6,6 +6,7 @@ import ( "fmt" "os" "path/filepath" + "strings" "time" "github.com/sirupsen/logrus" @@ -25,6 +26,57 @@ var ( BuildTime = "unknown" ) +func NewApplyCommand() *cobra.Command { + var flagMode string + + cmd := &cobra.Command{ + Use: "apply", + Short: "Apply configuration to the node", + RunE: func(cmd *cobra.Command, args []string) error { + ctx := cmd.Context() + + logger := logger.GetLoggerFromContext(ctx) + + cfg, err := config.LoadConfig(configPath) + if err != nil { + return fmt.Errorf("failed to load config from %s: %w", configPath, err) + } + + var b interface { + Bootstrap(context.Context) (*bootstrapper.ExecutionResult, error) + } + + if strings.EqualFold(flagMode, "minimal") { + logger.Info("Using minimal bootstrapper mode") + b = bootstrapper.NewMinimal(cfg, logger) + } else { + logger.Info("Using full bootstrapper mode") + b = bootstrapper.New(cfg, logger) + } + + result, err := b.Bootstrap(ctx) + if err != nil { + return err + } + + fmt.Printf( + "Bootstrap completed with success: %t, duration: %v, steps: %d\n", + result.Success, result.Duration, result.StepCount, + ) + if !result.Success { + fmt.Printf("Bootstrap failed with error: %s\n", result.Error) + return fmt.Errorf("bootstrap failed: %s", result.Error) + } + + return nil + }, + } + + cmd.Flags().StringVar(&flagMode, "mode", "minimal", "minimal or full") + + return cmd +} + // NewAgentCommand creates a new agent command func NewAgentCommand() *cobra.Command { cmd := &cobra.Command{ diff --git a/main.go b/main.go index 5eb1a8c..d54579a 100644 --- a/main.go +++ b/main.go @@ -19,9 +19,10 @@ var ( func main() { rootCmd := &cobra.Command{ - Use: "aks-flex-node", - Short: "AKS Flex Node Agent", - Long: "Azure Kubernetes Service Flex Node Agent for edge computing scenarios", + Use: "aks-flex-node", + Short: "AKS Flex Node Agent", + Long: "Azure Kubernetes Service Flex Node Agent for edge computing scenarios", + SilenceUsage: true, } // Add global flags for configuration @@ -29,6 +30,7 @@ func main() { // Don't mark as required globally - we'll check in PersistentPreRunE for commands that need it // Add commands + rootCmd.AddCommand(NewApplyCommand()) rootCmd.AddCommand(NewAgentCommand()) rootCmd.AddCommand(NewUnbootstrapCommand()) rootCmd.AddCommand(NewVersionCommand()) diff --git a/pkg/bootstrapper/minimal.go b/pkg/bootstrapper/minimal.go new file mode 100644 index 0000000..c1ae49e --- /dev/null +++ b/pkg/bootstrapper/minimal.go @@ -0,0 +1,44 @@ +package bootstrapper + +import ( + "context" + + "github.com/sirupsen/logrus" + "go.goms.io/aks/AKSFlexNode/pkg/components/containerd" + "go.goms.io/aks/AKSFlexNode/pkg/components/kube_binaries" + "go.goms.io/aks/AKSFlexNode/pkg/components/kubelet" + "go.goms.io/aks/AKSFlexNode/pkg/components/npd" + "go.goms.io/aks/AKSFlexNode/pkg/components/runc" + "go.goms.io/aks/AKSFlexNode/pkg/components/services" + "go.goms.io/aks/AKSFlexNode/pkg/components/system_configuration" + "go.goms.io/aks/AKSFlexNode/pkg/config" +) + +type MinimalBootstrapper struct { + *BaseExecutor +} + +func NewMinimal(cfg *config.Config, logger *logrus.Logger) *MinimalBootstrapper { + return &MinimalBootstrapper{ + BaseExecutor: NewBaseExecutor(cfg, logger), + } +} + +func (b *MinimalBootstrapper) Bootstrap(ctx context.Context) (*ExecutionResult, error) { + // Define the bootstrap steps in order - using modules directly + steps := []Executor{ + system_configuration.NewInstaller(b.logger), + runc.NewInstaller(b.logger), + containerd.NewInstaller(b.logger), + kube_binaries.NewInstaller(b.logger), + kubelet.NewInstaller(b.logger), + npd.NewInstaller(b.logger), + services.NewInstaller(b.logger), + } + + return b.ExecuteSteps(ctx, steps, "bootstrap") +} + +func (b *MinimalBootstrapper) Unbootstrap(ctx context.Context) (*ExecutionResult, error) { + return nil, nil +} diff --git a/pkg/components/system_configuration/system_configuration_installer.go b/pkg/components/system_configuration/system_configuration_installer.go index 1eb669b..bdd3d87 100644 --- a/pkg/components/system_configuration/system_configuration_installer.go +++ b/pkg/components/system_configuration/system_configuration_installer.go @@ -33,9 +33,10 @@ func (i *Installer) Execute(ctx context.Context) error { } // Configure resolv.conf - if err := i.configureResolvConf(); err != nil { - return fmt.Errorf("failed to configure resolv.conf: %w", err) - } + // FIXME: this doesn't make sense to me, so disable for now + // if err := i.configureResolvConf(); err != nil { + // return fmt.Errorf("failed to configure resolv.conf: %w", err) + // } i.logger.Info("System configuration completed successfully") return nil diff --git a/pkg/config/config.go b/pkg/config/config.go index 498d91e..34e8685 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -259,6 +259,9 @@ func (c *Config) Validate() error { if c.Azure.TenantID == "" { return fmt.Errorf("azure.tenantId is required") } + if c.Azure.TargetCluster == nil { + return fmt.Errorf("azure.targetCluster configuration is required") + } if c.Azure.TargetCluster.Location == "" { return fmt.Errorf("azure.targetCluster.location is required") } From 530e677c72de6ac86c5d4818f4bd212112566aea Mon Sep 17 00:00:00 2001 From: hbc Date: Wed, 11 Feb 2026 23:16:12 -0800 Subject: [PATCH 5/8] doc: describe agent node host environment baseline --- docs/node-env.md | 288 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 288 insertions(+) create mode 100644 docs/node-env.md diff --git a/docs/node-env.md b/docs/node-env.md new file mode 100644 index 0000000..8dc3c15 --- /dev/null +++ b/docs/node-env.md @@ -0,0 +1,288 @@ +# Agent Node Host Environment + +## Overview + +Each Kubernetes agent (worker) node must be provisioned with the software and services +required to join a cluster and run workloads. Beyond this minimal baseline, certain +scenarios demand additional setup; for example, GPU-capable nodes may need NVIDIA +drivers and the appropriate device plugin to expose GPU resources to Kubernetes. + +At the same time, agent nodes are routinely restarted, patched, or replaced as part of +ongoing maintenance and upgrade cycles. The mechanisms for performing these lifecycle +operations vary across cloud and on-prem environments, depending on the available APIs +and underlying infrastructure. + +To ensure AKS flex nodes can function consistently across this broad range of environments, +this document defines the baseline runtime assumptions and requirements, and describes +the expected behaviors for key lifecycle operations. + +### Non-Goals + +- We will limit the support scope to Linux-based nodes and focus on Ubuntu distro for now. + This is because Ubuntu is the widely and commonly available Linux distribution + across the target environments. + +## Baseline Environment Requirements + +### CPU Only Nodes + +- A Linux-based OS with `systemd` init system; +- Modern Linux kernel (currently LTS or supported release) enabled with cgroup v2; +- Swap disabled; +- Syslog with rotation configured; +- Time synchronization configured; +- Proper host level DNS setup; +- Outbound connectivity to cluster control plane endpoint; +- Container runtime: + * `containerd` w/ 2.0+ version; + * `runc` +- Kubernetes components: + * `kubelet` matching with the target worker node version; + * Control plane public CA certificate(s); + * TLS bootstrap configurations; + * Other cloud provider binaries; +- NFTables / IPtables installed for Kubernetes network policies; +- Network forward, IP masquerade and bridge settings configured for Kubernetes networking; +- Support tools / binaries (e.g., `curl`, `ping`, etc) for diagnostics and troubleshooting; + +### GPU-Capable Nodes + +- All of the above CPU node requirements; +- GPU drivers and runtime (e.g. NVIDIA drivers and CUDA toolkit for NVIDIA GPUs) + compatible with OS kernel; +- RDMA, SR-IOV and InfiniBand drivers and runtime for GPU direct communication (if applicable); +- Updated container runtime with GPU support; + +### Additional Requirements + +- Node identity for identifying and authenticating the node to cluster control plane; +- CNI plugin binaries and configurations; +- Node-problem-detector; +- Node local DNS caching; +- Background auto-repair agent; +- Pre-cached container images for critical system components; +- Support for adding optional feature layers & customizations during node image + baking or bootstrapping process; +- In some environments, pre-built VHD images might not be available. In such + cases, the node bootstrapping process should also handle the initial OS image baking and provisioning to ensure a consistent baseline environment. +- In some environments, the node might have limited outbound connectivity + (e.g., no direct access to public internet). In such cases, the node bootstrapping process should also handle pulling necessary components + through proxy or fallback endpoints. + +## Node Lifecycle Operations + +This section describes lifecycle activities across heterogeneous environments. +Each operation defines: + +- **Inputs**: what information/config is required +- **Actions**: what the platform does +- **Expected behaviors**: node and cluster-level outcomes +- **Failure handling**: what happens when things go wrong + +### Node VHD Image Baking + +**Purpose**: Produce a base node image (VHD or similar) that satisfies baseline +requirements and can be instantiated consistently across environments. + +**Inputs**: + +- Base OS distribution and version (e.g., Ubuntu 24.04) +- System configurations +- Versions of container runtime, kubelet, and other components +- Optional feature layers (e.g., GPU drivers) + +**Actions**: + +- Install and configure system settings & tunings +- Install container runtime and Kubernetes components +- **Leave out**: cluster-specific configurations / credentials + +**Expected behaviors**: + +- Produced image is **immutable** and **reproducible** giving the same inputs. +- Sources for all installed components **MUST** be pinned with qualified versions + and checksums for traceability and security. +- Every baking step fully completes without partial failures. +- Image is able to boot successfully and reach a "ready-to-bootstrap" state. +- GPU image boot with drivers loaded. + +**Failure handling**: + +- Build pipeline produces actionable error messages +- Failed builds do not produce or overwrite existing images + +### Node Bootstrapping + +**Purpose**: Turn a newly created machine instance into a functional Kubernetes +node that can join the cluster and serve workloads. + +**Inputs**: + +- Cluster endpoint (API server URL, CA bundle) +- Kubelet bootstrap credentials +- Node configuration (e.g., kubelet config, runtime settings, node labels/taints) +- Environment-specific instance metadata (node name, region/zone). Can be + exposed later via cloud provider. + +**Actions**: + +- Ensure network & container runtime are ready +- Render kubelet configuration and start kubelet +- Kubelet performs TLS bootstrapping to obtain node credentials and join the + cluster +- Deploy and enable per node workloads + +**Expected behaviors**: + +- Node becomes `Ready` within a target SLA +- Node labels/taints are applied correctly +- Node reports correct capacity/allocatable resources, including GPU + if applicable +- Bootstrap process is **idempotent** and can be safely re-run on the same node + for transient failures + +**Failure handling**: + +- Ability to detect and report failure details and kind of failure + (i.e., transient vs terminal) for better troubleshooting and remediation + +### Node Bootstrapping w/ Baking + +**Purpose**: In environments without pre-baked images, the bootstrapping process should also handle the initial image baking and provisioning to ensure a consistent baseline environment. + +**Inputs**: + +- Same as Node VHD Image Baking and Node Bootstrapping +- (Optional) fallback/alternative endpoints for pulling necessary components in + environments with limited outbound connectivity + +**Actions**: + +- Perform image baking steps as described in Node VHD Image Baking +- Proceed with bootstrapping steps as described in Node Bootstrapping + +**Expected behaviors**: + +- All expected behaviors from both Node VHD Image Baking and Node Bootstrapping +- In addition, the process should be resilient to transient failures both phases + and support **idempotent** retries. + +**Failure handling**: + +- All failure handling mechanisms from both Node VHD Image Baking and Node Bootstrapping + +### Node Rebooting & Repairing + +_TODO_: This part needs more work and discussions + +**Purpose**: Handle planned and unplanned node reboots and repairs while +maintaining node health and minimizing disruption to workloads. + +**Node Rebooting** + +- Inputs: N/A +- Expected behaviors: + * Node is cordoned/drained before planned reboot + * Node becomes `Ready` within a target SLA after reboot + +**Node Repairing** + +- Inputs: N/A +- Expected behaviors: + * Monitoring components detect node issues and trigger repair actions + * Impacted services are being restarted + +**Failure handling**: + +- If node fails to recover within a defined SLA, it should be marked as + unhealthy and trigger replacement workflow. +- In case of repair failures, exponential backoff retries should be attempted; + errors should be exposed for troubleshooting and alerting. + +### Node Components Version Upgrades + +_TODO_: This part needs more details and breakdown designs + +**Purpose**: Upgrade on node components to newer versions. + +**Inputs**: + +- Target versions for components +- Upgrade strategy (e.g., in-place vs replacement) + +**Actions**: + +- Cordon/drain node to evict workloads +- In-place upgrade: + * Install newer versions of components/configurations + * Restart necessary services + * Verify node health and functionality + * Uncordon node +- Replacement upgrade: + * Deprovision existing node and underlying resources + * Provision new node with updated image or configurations + * Join new node to cluster and verify health + +**Expected behaviors**: + +- Node is reporting expected versions for components after upgrade +- In-place upgrade process is idempotent and can be safely retried in case of + transient failures + +**Failure handling**: + +- Failures should be reported for troubleshooting and alerting +- In-place upgrade failures should not leave node open to scheduling. + Provide rollback if possible or recommend node replacement otherwise. + +### Node Re-imaging + +**Purpose**: Re-image a node to restore it to a known good state, either for +recovering from failures or applying updates. + +**Inputs**: + +- Same as Node Bootstrapping, plus: +- Target node and target node image + +**Actions**: + +- Cordon and drain the node to evict workloads +- Re-image the underlying machine instance with the target image +- Perform bootstrapping steps to rejoin the cluster + +**Expected behaviors**: + +- Re-image results in a clean, baseline-compliant host state. +- Node returns to `Ready` state within a target SLA. +- Node identity (e.g., name) is preserved after re-imaging. +- Re-image is idempotent and can be safely retried in case of transient failures. + +**Failure handling**: + +- Re-image failures should be reported for troubleshooting and alerting +- Re-image failed node should not be left in schedulable state. + +### Node Deletion + +**Purpose**: Remove a node from the cluster intentionally, either for +scaling down, decommissioning, or drifting replacement. + +**Inputs**: node name + +**Actions**: + +- Cordon and drain the node to evict workloads if not forced deletion +- Delete node object from cluster +- Deprovision underlying compute / network resources if applicable + +**Expected behaviors**: + +- Node is gracefully removed from cluster and workloads are rescheduled +- No orphaned resources are left behind + +**Failure handling**: + +- PDB violations or other issues preventing eviction should be reported clearly +- Infrastructure resource clean up failure should be retried and alerted + if not successful after SLA \ No newline at end of file From 17991a8cfe07d4599fecdb287019d188d669f22f Mon Sep 17 00:00:00 2001 From: hbc Date: Wed, 11 Feb 2026 23:27:18 -0800 Subject: [PATCH 6/8] doc: refine wordings --- docs/node-env.md | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/docs/node-env.md b/docs/node-env.md index 8dc3c15..89d3690 100644 --- a/docs/node-env.md +++ b/docs/node-env.md @@ -21,15 +21,22 @@ the expected behaviors for key lifecycle operations. - We will limit the support scope to Linux-based nodes and focus on Ubuntu distro for now. This is because Ubuntu is the widely and commonly available Linux distribution across the target environments. +- Credential management (bootstrap token rotation, CA renewal, etc) is out of scope + for this document, but will be handled by the operations described below. +- Extra security harding and compliance requirements are out of scope for this document, + but can be added as optional layers on top of the baseline environment in the future. +- Detailed GPU device plugin requirements and enablement strategies will be addressed in + a separate document. ## Baseline Environment Requirements ### CPU Only Nodes - A Linux-based OS with `systemd` init system; -- Modern Linux kernel (currently LTS or supported release) enabled with cgroup v2; +- Modern Linux kernel (currently LTS or supported release, minimum 5.19) enabled + with cgroup v2, namespaces, overlayfs, eBPF etc for container support. - Swap disabled; -- Syslog with rotation configured; +- System level logging enabled with rotation configured; - Time synchronization configured; - Proper host level DNS setup; - Outbound connectivity to cluster control plane endpoint; @@ -64,10 +71,11 @@ the expected behaviors for key lifecycle operations. - Support for adding optional feature layers & customizations during node image baking or bootstrapping process; - In some environments, pre-built VHD images might not be available. In such - cases, the node bootstrapping process should also handle the initial OS image baking and provisioning to ensure a consistent baseline environment. + cases, the node bootstrapping process should also handle the initial OS image + baking and provisioning to ensure a consistent baseline environment. - In some environments, the node might have limited outbound connectivity - (e.g., no direct access to public internet). In such cases, the node bootstrapping process should also handle pulling necessary components - through proxy or fallback endpoints. + (e.g., no direct access to public internet). In such cases, the node bootstrapping + process should also handle pulling necessary components through proxy or fallback endpoints. ## Node Lifecycle Operations @@ -148,7 +156,9 @@ node that can join the cluster and serve workloads. ### Node Bootstrapping w/ Baking -**Purpose**: In environments without pre-baked images, the bootstrapping process should also handle the initial image baking and provisioning to ensure a consistent baseline environment. +**Purpose**: In environments without pre-baked images, the bootstrapping process +should also handle the initial image baking and provisioning to ensure a +consistent baseline environment. **Inputs**: @@ -180,14 +190,14 @@ maintaining node health and minimizing disruption to workloads. **Node Rebooting** -- Inputs: N/A +- Inputs: node name and reboot type (planned vs unplanned) - Expected behaviors: * Node is cordoned/drained before planned reboot * Node becomes `Ready` within a target SLA after reboot **Node Repairing** -- Inputs: N/A +- Inputs: node name and repair category - Expected behaviors: * Monitoring components detect node issues and trigger repair actions * Impacted services are being restarted @@ -203,7 +213,8 @@ maintaining node health and minimizing disruption to workloads. _TODO_: This part needs more details and breakdown designs -**Purpose**: Upgrade on node components to newer versions. +**Purpose**: Upgrade on node components (kubelet, container runtime, +CNI plugins) to newer versions. **Inputs**: @@ -266,7 +277,7 @@ recovering from failures or applying updates. ### Node Deletion **Purpose**: Remove a node from the cluster intentionally, either for -scaling down, decommissioning, or drifting replacement. +scaling down, decommissioning, or drift replacement. **Inputs**: node name From 3e0cb10805ea1440a4ff05bc7fb7c52a9677955c Mon Sep 17 00:00:00 2001 From: hbc Date: Wed, 11 Feb 2026 23:45:57 -0800 Subject: [PATCH 7/8] doc: wording fix --- docs/node-env.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/node-env.md b/docs/node-env.md index 89d3690..484763c 100644 --- a/docs/node-env.md +++ b/docs/node-env.md @@ -21,8 +21,8 @@ the expected behaviors for key lifecycle operations. - We will limit the support scope to Linux-based nodes and focus on Ubuntu distro for now. This is because Ubuntu is the widely and commonly available Linux distribution across the target environments. -- Credential management (bootstrap token rotation, CA renewal, etc) is out of scope - for this document, but will be handled by the operations described below. +- Credential management (bootstrap token distribution & rotation, CA renewal, etc) + is out of scope for this document, but will be handled by the operations described below. - Extra security harding and compliance requirements are out of scope for this document, but can be added as optional layers on top of the baseline environment in the future. - Detailed GPU device plugin requirements and enablement strategies will be addressed in From 5fb712b411be6c68bf466178957c22fa9e4d8da9 Mon Sep 17 00:00:00 2001 From: hbc Date: Thu, 12 Feb 2026 10:33:39 -0800 Subject: [PATCH 8/8] doc: feedbacks --- docs/node-env.md | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/docs/node-env.md b/docs/node-env.md index 484763c..bc1f1f9 100644 --- a/docs/node-env.md +++ b/docs/node-env.md @@ -40,17 +40,20 @@ the expected behaviors for key lifecycle operations. - Time synchronization configured; - Proper host level DNS setup; - Outbound connectivity to cluster control plane endpoint; -- Container runtime: +- Container runtime components: * `containerd` w/ 2.0+ version; * `runc` - Kubernetes components: * `kubelet` matching with the target worker node version; - * Control plane public CA certificate(s); - * TLS bootstrap configurations; * Other cloud provider binaries; - NFTables / IPtables installed for Kubernetes network policies; - Network forward, IP masquerade and bridge settings configured for Kubernetes networking; - Support tools / binaries (e.g., `curl`, `ping`, etc) for diagnostics and troubleshooting; +- Configurations: + * Standard container runtime configurations layout on the host; + * Standard Kubernetes node configurations layout on the host; + * Control plane public CA certificate(s); + * TLS bootstrap configurations; ### GPU-Capable Nodes @@ -58,7 +61,8 @@ the expected behaviors for key lifecycle operations. - GPU drivers and runtime (e.g. NVIDIA drivers and CUDA toolkit for NVIDIA GPUs) compatible with OS kernel; - RDMA, SR-IOV and InfiniBand drivers and runtime for GPU direct communication (if applicable); -- Updated container runtime with GPU support; +- Configurations: + * Updated container runtime configurations with support for GPU drivers and runtimes; ### Additional Requirements @@ -66,6 +70,7 @@ the expected behaviors for key lifecycle operations. - CNI plugin binaries and configurations; - Node-problem-detector; - Node local DNS caching; +- VPN components for cross region/cloud connectivity; - Background auto-repair agent; - Pre-cached container images for critical system components; - Support for adding optional feature layers & customizations during node image @@ -107,13 +112,19 @@ requirements and can be instantiated consistently across environments. **Expected behaviors**: -- Produced image is **immutable** and **reproducible** giving the same inputs. +- Produced image is **immutable**[^1] and **reproducible**[^2] giving the same inputs. - Sources for all installed components **MUST** be pinned with qualified versions and checksums for traceability and security. - Every baking step fully completes without partial failures. - Image is able to boot successfully and reach a "ready-to-bootstrap" state. - GPU image boot with drivers loaded. +[^1]: Immutable means once the image is built and published, it should not be modified. + Any updates or changes should trigger a new image build with a new version/tag. + +[^2]: Reproducible means given the same inputs and build process, the output image + should be identical with the installed components/configurations setup. + **Failure handling**: - Build pipeline produces actionable error messages @@ -127,10 +138,11 @@ node that can join the cluster and serve workloads. **Inputs**: - Cluster endpoint (API server URL, CA bundle) -- Kubelet bootstrap credentials +- Kubelet bootstrap credentials (node identity credentials) - Node configuration (e.g., kubelet config, runtime settings, node labels/taints) - Environment-specific instance metadata (node name, region/zone). Can be exposed later via cloud provider. +- VPN configurations for cross region/cloud connectivity if applicable **Actions**: