Skip to content
13 changes: 12 additions & 1 deletion spread.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ backends:
- ubuntu-20.04-64:
image: ubuntu-2004-64-virt-enabled
workers: 1
storage: 15G
storage: 20G
plan: n1-standard-4

qemu:
systems:
Expand Down Expand Up @@ -86,6 +87,16 @@ prepare: |
lxc image import tests/cache/lxd-ubuntu-16.04
fi
fi
# We need `mount fstype=devpts` for snapd to work properly within LXD
# containers.
#
# Without this change snapd is stuck in a forever loop on Ubuntu 20.04 and
# 22.04 container images while attempting to install the `lxd` snap as part
# of system initialization.
#
# https://github.com/canonical/lxd/blob/b13c4f528bf7c9703f673eaf15476c11e59a8074/lxd/apparmor/instance_lxc.go#L513
snap refresh --channel 5.21/stable lxd

# Start lxd instance
lxd init --auto

Expand Down
130 changes: 129 additions & 1 deletion spread/lxd.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,17 @@ func (p *lxdProvider) Allocate(ctx context.Context, system *System) (Server, err
if !p.options.Reuse {
args = append(args, "--ephemeral")
}
instanceType := p.lxdInstanceType(system)
if instanceType != "" {
args = append(args, "-t", instanceType)
}
vmRequested := p.backend.Vm || system.Vm
if vmRequested {
args = append(args, "--vm")
}
if system.Storage != Size(0) {
args = append(args, "-d", fmt.Sprintf("root,size=%d", system.Storage))
}
output, err := exec.Command("lxc", args...).CombinedOutput()
if err != nil {
err = outputErr(output, err)
Expand All @@ -129,7 +140,13 @@ func (p *lxdProvider) Allocate(ctx context.Context, system *System) (Server, err
}

printf("Waiting for lxd container %s to have an address...", name)
timeout := time.After(60 * time.Second)
maxTimeout := 30 * time.Second
if vmRequested {
// VM may take considerably longer to start
// TODO: should this be configurable?
maxTimeout = 180 * time.Second
}
timeout := time.After(maxTimeout)
retry := time.NewTicker(1 * time.Second)
defer retry.Stop()
for {
Expand All @@ -150,6 +167,34 @@ func (p *lxdProvider) Allocate(ctx context.Context, system *System) (Server, err
return nil, err
}
}
printf("Waiting for cloud-init in lxd instance %s to complete...", name)
timeout = time.After(60 * time.Second)
retry = time.NewTicker(1 * time.Second)
defer retry.Stop()
for {
err := p.cloudInitDone(name, 10)
if err != nil {
_, is_timeout := err.(*lxdCloudInitTimeoutError)
_, is_not_present := err.(*lxdCloudInitNotPresentError)
_, is_lxd_agent_not_ready := err.(*lxdAgentNotAvailableError)
if is_not_present {
printf(err.Error())
break
}
if !is_timeout && !is_lxd_agent_not_ready {
s.Discard(ctx)
return nil, err
}
} else {
break
}
select {
case <-retry.C:
case <-timeout:
s.Discard(ctx)
return nil, err
}
}

err = p.tuneSSH(name)
if err != nil {
Expand Down Expand Up @@ -220,6 +265,7 @@ func (p *lxdProvider) lxdImage(system *System) (string, error) {
}

type lxdImageInfo struct {
Type string `yaml:"Type"`
Properties struct {
OS string
Label string
Expand Down Expand Up @@ -330,6 +376,19 @@ NextImage:
return "", fmt.Errorf("cannot obtain info about lxd image %s: %v", fingerprint, err)
}

switch info.Type {
case "container":
if system.Vm {
continue
}
case "virtual-machine":
if !system.Vm {
continue
}
default:
logf("unknown image type %q: %q", fingerprint, info.Type)
}

props := info.Properties
aliases := strings.Split(props.Aliases, ",")

Expand Down Expand Up @@ -359,6 +418,17 @@ NextImage:
return "", errNoImage
}

func (p *lxdProvider) lxdInstanceType(system *System) string {
instanceType := ""
if system.Plan != "" {
instanceType = system.Plan
} else if p.backend.Plan != "" {
instanceType = p.backend.Plan
}

return instanceType
}

func lxdName(system *System) (string, error) {
filename := os.ExpandEnv("$HOME/.spread/lxd-count")
file, err := os.OpenFile(filename, os.O_RDWR, 0644)
Expand Down Expand Up @@ -478,6 +548,64 @@ func (p *lxdProvider) serverJSON(name string) (*lxdServerJSON, error) {
return nil, &lxdNoServerError{name}
}

type lxdCloudInitTimeoutError struct {
name string
}

func (e *lxdCloudInitTimeoutError) Error() string {
return fmt.Sprintf("cloud-init not ready yet %q", e.name)
}

type lxdCloudInitNotPresentError struct {
name string
}

func (e *lxdCloudInitNotPresentError) Error() string {
return fmt.Sprintf("cloud-init not present in %q, assuming ready state",
e.name)
}

type lxdAgentNotAvailableError struct {
name string
}

func (e *lxdAgentNotAvailableError) Error() string {
return fmt.Sprintf(
"instance not ready yet %q: Failed to connect to lxd-agent.", e.name)
}

func (p *lxdProvider) cloudInitDone(name string, timeout int) error {
output, err := exec.Command("lxc", "exec", name, "--", "sh", "-c",
fmt.Sprintf("timeout %ds cloud-init status --long --wait",
timeout)).CombinedOutput()
if err != nil {
switch exitCode := err.(*exec.ExitError).ExitCode(); exitCode {
case 124:
return &lxdCloudInitTimeoutError{name}
case 127:
return &lxdCloudInitNotPresentError{name}
case 255:
if bytes.HasPrefix(output,
[]byte("Error: LXD VM agent isn't currently running")) {
return &lxdAgentNotAvailableError{name}
}
default:
if bytes.HasPrefix(output,
[]byte("Error: Failed to connect to lxd-agent")) {
return &lxdAgentNotAvailableError{name}
} else if bytes.HasPrefix(output,
[]byte("Error: Instance is not running")) {
return &lxdAgentNotAvailableError{name}
}

return fmt.Errorf("cloud-init failed %q: (%d) %v",
name, exitCode, outputErr(output, err))
}
}
logf("cloud-init done: %q: %q", name, output)
return nil
}

func (p *lxdProvider) tuneSSH(name string) error {
cmds := [][]string{
// Attempt to enable root login with a password through SSH, which is
Expand Down
2 changes: 2 additions & 0 deletions spread/project.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ type Backend struct {

Priority OptionalInt
Manual bool
Vm bool
}

func (b *Backend) String() string { return fmt.Sprintf("backend %q", b.Name) }
Expand Down Expand Up @@ -139,6 +140,7 @@ type System struct {

Priority OptionalInt
Manual bool
Vm bool
}

func (system *System) String() string { return system.Backend + ":" + system.Name }
Expand Down
11 changes: 10 additions & 1 deletion tests/lxd/checks/main/task.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,13 @@
summary: Ensure it works.

execute: |
echo WORKS $SPREAD_SYSTEM
# Use presence of lxd-agent process as proof of being run in a VM.
#
# systemd-detect-virt(1) does unfortunately not work reliably in the CI
# environment.
#
# We also cannot rely on the fact that LXD populates manufacturer/product
# information for VMs because for all we know we are running in a container
# inside a LXD VM.
is_vm=$(pgrep lxd-agent &> /dev/null && echo 1 || echo 0)
echo WORKS NPROC=$(nproc) VM=$is_vm $SPREAD_SYSTEM
21 changes: 19 additions & 2 deletions tests/lxd/spread.yaml
Original file line number Diff line number Diff line change
@@ -1,13 +1,30 @@
project: spread

backends:
lxd:
lxd-a:
type: lxd
systems:
- ubuntu-16.04
- ubuntu-18.04
lxd-b:
type: lxd
systems:
- ubuntu-20.04
- ubuntu-22.04
- ubuntu-24.04
lxd-c:
type: lxd
systems:
- ubuntu-24.04:
- ubuntu-24.04-with-plan:
image: ubuntu-24.04
plan: c1-m1
lxd-d:
type: lxd
systems:
- ubuntu-24.04-vm:
image: ubuntu-24.04
plan: c1-m1
vm: true

path: /home/test

Expand Down
45 changes: 41 additions & 4 deletions tests/lxd/task.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,49 @@ prepare: |
fi

execute: |
spread -vv -reuse -resend &> task.out
spread -vv -reuse -resend lxd-a 2>&1 | tee task.out
spread -vv -reuse -resend lxd-b 2>&1 | tee -a task.out
spread -vv -reuse -resend lxd-c 2>&1 | tee -a task.out
spread -vv -reuse -resend lxd-d 2>&1 | tee -a task.out

for system in ubuntu-16.04 ubuntu-18.04 ubuntu-20.04 ubuntu-22.04 ubuntu-24.04; do
grep "lxd:${system}:checks/main" task.out
grep "^WORKS ${system}\$" task.out
for system in ubuntu-16.04 ubuntu-18.04; do
grep "lxd-a:${system}:checks/main" task.out
grep "^WORKS NPROC=[[:digit:]]\+ VM=0 ${system}\$" task.out
done

for system in ubuntu-20.04 ubuntu-22.04; do
grep "lxd-b:${system}:checks/main" task.out
grep "^WORKS NPROC=[[:digit:]]\+ VM=0 ${system}\$" task.out
done

for system in ubuntu-24.04; do
grep "lxd-c:${system}:checks/main" task.out
grep "^WORKS NPROC=[[:digit:]]\+ VM=0 ${system}\$" task.out
done

for system in ubuntu-24.04-with-plan; do
grep "lxd-c:${system}:checks/main" task.out
grep "^WORKS NPROC=1 VM=0 ${system}\$" task.out
done

for system in ubuntu-24.04-vm; do
grep "lxd-d:${system}:checks/main" task.out
grep "^WORKS NPROC=1 VM=1 ${system}\$" task.out
done

debug: |
snap list || true
df -h || true
free -h || true
lxc image list || true
lxc network list || true
lxc list || true
journalctl -u snap.lxd.daemon | cat || true
cat /var/snap/lxd/common/lxd/logs/dnsmasq.* || true
for i in $(lxc list | awk '/RUNNING/{print$2}'); do
lxc exec $i -- cat /var/log/cloud-init*.log || true
lxc exec $i -- sh -c 'journalctl | cat' || true
done
cat /proc/cpuinfo || true
dmesg || true
cat task.out || true