Skip to content

lxd: system bringup appears to be britle #222

@fnordahl

Description

@fnordahl
2025-03-19T16:42:48.3650957Z 2025-03-19 16:42:48 Preparing google:ubuntu-20.04-64:tests/reboot (mar191639-896695)...
2025-03-19T16:42:48.6455223Z 2025-03-19 16:42:48 Executing google:ubuntu-20.04-64:tests/reboot (mar191639-896695) (1/10)...
2025-03-19T16:44:24.0828409Z 2025-03-19 16:44:24 Error executing google:ubuntu-20.04-64:tests/reboot (mar191639-896695) : + spread -vv -reuse -resend
2025-03-19T16:44:24.3632779Z 2025-03-19 16:44:24 Debug output for google:ubuntu-20.04-64:tests/reboot (mar191639-896695) : 
2025-03-19T16:44:24.3634356Z -----
2025-03-19T16:44:24.3635168Z + cat task.out
2025-03-19T16:44:24.3636206Z 2025-03-19 16:42:48 Trying to read /home/test/src/github.com/snapcore/spread/tests/reboot/spread.yaml...
2025-03-19T16:44:24.3637928Z 2025-03-19 16:42:48 Found /home/test/src/github.com/snapcore/spread/tests/reboot/spread.yaml.
2025-03-19T16:44:24.3639186Z 2025-03-19 16:42:48 Loaded project: &spread.Project{
2025-03-19T16:44:24.3639984Z     Name:     "spread",
2025-03-19T16:44:24.3640480Z     Backends: {
2025-03-19T16:44:24.3641160Z         "lxd": &spread.Backend{
2025-03-19T16:44:24.3641801Z             Name:     "lxd",
2025-03-19T16:44:24.3642462Z             Type:     "lxd",
2025-03-19T16:44:24.3643011Z             Key:      "",
2025-03-19T16:44:24.3643589Z             Allocate: "",
2025-03-19T16:44:24.3644235Z             Discard:  "",
2025-03-19T16:44:24.3644876Z             Memory:   0,
2025-03-19T16:44:24.3645602Z             Plan:     "",
2025-03-19T16:44:24.3646231Z             Location: "",
2025-03-19T16:44:24.3646865Z             Storage:  0,
2025-03-19T16:44:24.3648173Z             Systems:  {
2025-03-19T16:44:24.3648964Z                 "ubuntu-16.04": &spread.System{
2025-03-19T16:44:24.3649749Z                     Backend:     "lxd",
2025-03-19T16:44:24.3650867Z                     Name:        "ubuntu-16.04",
2025-03-19T16:44:24.3651602Z                     Image:       "ubuntu-16.04",
2025-03-19T16:44:24.3652208Z                     Kernel:      "",
2025-03-19T16:44:24.3652755Z                     Username:    "",
2025-03-19T16:44:24.3653541Z                     Password:    "",
2025-03-19T16:44:24.3654086Z                     Workers:     1,
2025-03-19T16:44:24.3654661Z                     Storage:     0,
2025-03-19T16:44:24.3655231Z                     SecureBoot:  false,
2025-03-19T16:44:24.3655793Z                     Bios:        "",
2025-03-19T16:44:24.3656331Z                     CPUFamily:   "",
2025-03-19T16:44:24.3656934Z                     Plan:        "",
2025-03-19T16:44:24.3657657Z                     Environment: &spread.Environment{
2025-03-19T16:44:24.3658377Z                         err:  nil,
2025-03-19T16:44:24.3659010Z                         keys: {},
2025-03-19T16:44:24.3660118Z                         vals: {},
2025-03-19T16:44:24.3660847Z                     },
2025-03-19T16:44:24.3661315Z                     Variants: nil,
2025-03-19T16:44:24.3661949Z                     Priority: spread.OptionalInt{},
2025-03-19T16:44:24.3663571Z                     Manual:   false,
2025-03-19T16:44:24.3664126Z                     Vm:       false,
2025-03-19T16:44:24.3664653Z                 },
2025-03-19T16:44:24.3665228Z             },
2025-03-19T16:44:24.3665735Z             Prepare:     "",
2025-03-19T16:44:24.3666249Z             Restore:     "",
2025-03-19T16:44:24.3666785Z             Debug:       "",
2025-03-19T16:44:24.3724880Z             PrepareEach: "",
2025-03-19T16:44:24.3725842Z             RestoreEach: "",
2025-03-19T16:44:24.3726482Z             DebugEach:   "",
2025-03-19T16:44:24.3727071Z             Environment: &spread.Environment{
2025-03-19T16:44:24.3727841Z                 err:  nil,
2025-03-19T16:44:24.3728424Z                 keys: {},
2025-03-19T16:44:24.3729083Z                 vals: {},
2025-03-19T16:44:24.3729766Z             },
2025-03-19T16:44:24.3730308Z             Variants:    nil,
2025-03-19T16:44:24.3731029Z             WarnTimeout: spread.Timeout{},
2025-03-19T16:44:24.3731667Z             KillTimeout: spread.Timeout{},
2025-03-19T16:44:24.3732405Z             HaltTimeout: spread.Timeout{},
2025-03-19T16:44:24.3733065Z             Priority:    spread.OptionalInt{},
2025-03-19T16:44:24.3733945Z             Manual:      false,
2025-03-19T16:44:24.3734692Z             Vm:          false,
2025-03-19T16:44:24.3735203Z         },
2025-03-19T16:44:24.3735699Z     },
2025-03-19T16:44:24.3736306Z     Environment: &spread.Environment{
2025-03-19T16:44:24.3736889Z         err:  nil,
2025-03-19T16:44:24.3737476Z         keys: {},
2025-03-19T16:44:24.3738002Z         vals: {},
2025-03-19T16:44:24.3738442Z     },
2025-03-19T16:44:24.3738858Z     Repack:      "",
2025-03-19T16:44:24.3739563Z     Reroot:      "",
2025-03-19T16:44:24.3740275Z     Prepare:     "",
2025-03-19T16:44:24.3741078Z     Restore:     "",
2025-03-19T16:44:24.3741671Z     Debug:       "",
2025-03-19T16:44:24.3742661Z     PrepareEach: "",
2025-03-19T16:44:24.3743171Z     RestoreEach: "",
2025-03-19T16:44:24.3743638Z     DebugEach:   "",
2025-03-19T16:44:24.3744103Z     Suites:      {
2025-03-19T16:44:24.3744606Z         "checks/": &spread.Suite{
2025-03-19T16:44:24.3745200Z             Summary:     "Verification tasks.",
2025-03-19T16:44:24.3745824Z             Systems:     nil,
2025-03-19T16:44:24.3746423Z             Backends:    nil,
2025-03-19T16:44:24.3746965Z             Variants:    nil,
2025-03-19T16:44:24.3747726Z             Environment: &spread.Environment{
2025-03-19T16:44:24.3748417Z                 err:  nil,
2025-03-19T16:44:24.3748989Z                 keys: {},
2025-03-19T16:44:24.3749512Z                 vals: {},
2025-03-19T16:44:24.3750711Z             },
2025-03-19T16:44:24.3751163Z             Prepare:     "",
2025-03-19T16:44:24.3751744Z             Restore:     "",
2025-03-19T16:44:24.3752406Z             Debug:       "",
2025-03-19T16:44:24.3753018Z             PrepareEach: "",
2025-03-19T16:44:24.3753629Z             RestoreEach: "",
2025-03-19T16:44:24.3754523Z             DebugEach:   "",
2025-03-19T16:44:24.3755153Z             Name:        "checks/",
2025-03-19T16:44:24.3756044Z             Path:        "/home/test/src/github.com/snapcore/spread/tests/reboot/checks",
2025-03-19T16:44:24.3757006Z             Tasks:       {
2025-03-19T16:44:24.3757538Z                 "main": &spread.Task{
2025-03-19T16:44:24.3758132Z                     Suite:       "checks/",
2025-03-19T16:44:24.3758884Z                     Summary:     "Ensure it works.",
2025-03-19T16:44:24.3759574Z                     Details:     "",
2025-03-19T16:44:24.3760154Z                     Systems:     nil,
2025-03-19T16:44:24.3760877Z                     Backends:    nil,
2025-03-19T16:44:24.3761594Z                     Variants:    nil,
2025-03-19T16:44:24.3762325Z                     Environment: &spread.Environment{
2025-03-19T16:44:24.3763467Z                         err:  nil,
2025-03-19T16:44:24.3764058Z                         keys: {},
2025-03-19T16:44:24.3764754Z                         vals: {},
2025-03-19T16:44:24.3765371Z                     },
2025-03-19T16:44:24.3765977Z                     Samples:     1,
2025-03-19T16:44:24.3766740Z                     Prepare:     "",
2025-03-19T16:44:24.3767417Z                     Restore:     "",
2025-03-19T16:44:24.3769639Z                     Execute:     "echo \"REBOOT=$SPREAD_REBOOT\"\ncase $SPREAD_REBOOT in\n0)\n    REBOOT\n    ;;\n1)\n    REBOOT\n    ;;\n2)\n    REBOOT foo\n    ;;\nfoo)\n    REBOOT bar\n    ;;\nbar)\n    echo WORKS\n    ;;\n*)\n    echo 'Unexpected $SPREAD_REBOOT value:' $SPREAD_REBOOT\n    exit 1\n    ;;\nesac\n",
2025-03-19T16:44:24.3771723Z                     Debug:       "",
2025-03-19T16:44:24.3772769Z                     Artifacts:   nil,
2025-03-19T16:44:24.3773501Z                     Name:        "checks/main",
2025-03-19T16:44:24.3774622Z                     Path:        "/home/test/src/github.com/snapcore/spread/tests/reboot/checks/main",
2025-03-19T16:44:24.3775587Z                     WarnTimeout: spread.Timeout{},
2025-03-19T16:44:24.3776405Z                     KillTimeout: spread.Timeout{},
2025-03-19T16:44:24.3777226Z                     Priority:    spread.OptionalInt{},
2025-03-19T16:44:24.3778101Z                     Manual:      false,
2025-03-19T16:44:24.3778813Z                 },
2025-03-19T16:44:24.3779330Z             },
2025-03-19T16:44:24.3779859Z             WarnTimeout: spread.Timeout{},
2025-03-19T16:44:24.3780630Z             KillTimeout: spread.Timeout{},
2025-03-19T16:44:24.3781334Z             Priority:    spread.OptionalInt{},
2025-03-19T16:44:24.3781935Z             Manual:      false,
2025-03-19T16:44:24.3782733Z         },
2025-03-19T16:44:24.3783200Z     },
2025-03-19T16:44:24.3783640Z     RemotePath:  "/home/test",
2025-03-19T16:44:24.3784124Z     Include:     nil,
2025-03-19T16:44:24.3784720Z     Exclude:     nil,
2025-03-19T16:44:24.3785263Z     Rename:      nil,
2025-03-19T16:44:24.3786028Z     Path:        "/home/test/src/github.com/snapcore/spread/tests/reboot",
2025-03-19T16:44:24.3786925Z     WarnTimeout: spread.Timeout{},
2025-03-19T16:44:24.3787634Z     KillTimeout: spread.Timeout{},
2025-03-19T16:44:24.3788275Z }
2025-03-19T16:44:24.3788797Z 2025-03-19 16:42:48 Project content is packed for delivery (1.54KB).
2025-03-19T16:44:24.3789843Z 2025-03-19 16:42:48 Starting 1 worker for the following jobs: 1
2025-03-19T16:44:24.3790600Z     - lxd:ubuntu-16.04:checks/main
2025-03-19T16:44:24.3791272Z 2025-03-19 16:42:48 Allocating lxd:ubuntu-16.04...
2025-03-19T16:44:24.3792141Z 2025-03-19 16:42:49 Cannot find cached LXD image for lxd:ubuntu-16.04.
2025-03-19T16:44:24.3794366Z 2025-03-19 16:43:18 Waiting for lxd container spread-1-ubuntu-16-04 to have an address...
2025-03-19T16:44:24.3796121Z 2025-03-19 16:43:18 lxd list output: []*spread.lxdServerJSON{
2025-03-19T16:44:24.3796877Z     &spread.lxdServerJSON{
2025-03-19T16:44:24.3797491Z         Name:  "spread-1-ubuntu-16-04",
2025-03-19T16:44:24.3798526Z         State: struct { Network map[string]spread.lxdDeviceJSON "json:\"network\"" }{
2025-03-19T16:44:24.3799406Z             Network: {
2025-03-19T16:44:24.3799910Z                 "eth0": {
2025-03-19T16:44:24.3800449Z                     State:     "up",
2025-03-19T16:44:24.3801045Z                     Addresses: {
2025-03-19T16:44:24.3810697Z                         {Family:"inet6", Address:"fe80::216:3eff:fe73:4f61"},
2025-03-19T16:44:24.3811467Z                     },
2025-03-19T16:44:24.3812057Z                 },
2025-03-19T16:44:24.3812533Z                 "lo": {
2025-03-19T16:44:24.3813143Z                     State:     "up",
2025-03-19T16:44:24.3813813Z                     Addresses: {
2025-03-19T16:44:24.3814447Z                         {Family:"inet", Address:"127.0.0.1"},
2025-03-19T16:44:24.3815392Z                         {Family:"inet6", Address:"::1"},
2025-03-19T16:44:24.3816025Z                     },
2025-03-19T16:44:24.3817029Z                 },
2025-03-19T16:44:24.3817444Z             },
2025-03-19T16:44:24.3817832Z         },
2025-03-19T16:44:24.3818311Z     },
2025-03-19T16:44:24.3818736Z }
2025-03-19T16:44:24.3819268Z 2025-03-19 16:43:19 lxd list output: []*spread.lxdServerJSON{
2025-03-19T16:44:24.3819981Z     &spread.lxdServerJSON{
2025-03-19T16:44:24.3820569Z         Name:  "spread-1-ubuntu-16-04",
2025-03-19T16:44:24.3821419Z         State: struct { Network map[string]spread.lxdDeviceJSON "json:\"network\"" }{
2025-03-19T16:44:24.3822165Z             Network: {
2025-03-19T16:44:24.3822898Z                 "lo": {
2025-03-19T16:44:24.3823346Z                     State:     "up",
2025-03-19T16:44:24.3823868Z                     Addresses: {
2025-03-19T16:44:24.3824481Z                         {Family:"inet", Address:"127.0.0.1"},
2025-03-19T16:44:24.3825246Z                         {Family:"inet6", Address:"::1"},
2025-03-19T16:44:24.3825794Z                     },
2025-03-19T16:44:24.3826264Z                 },
2025-03-19T16:44:24.3826741Z                 "eth0": {
2025-03-19T16:44:24.3827274Z                     State:     "up",
2025-03-19T16:44:24.3827865Z                     Addresses: {
2025-03-19T16:44:24.3828623Z                         {Family:"inet6", Address:"fe80::216:3eff:fe73:4f61"},
2025-03-19T16:44:24.3829320Z                     },
2025-03-19T16:44:24.3829768Z                 },
2025-03-19T16:44:24.3830198Z             },
2025-03-19T16:44:24.3830601Z         },
2025-03-19T16:44:24.3830983Z     },
2025-03-19T16:44:24.3831327Z }
2025-03-19T16:44:24.3831805Z 2025-03-19 16:43:20 lxd list output: []*spread.lxdServerJSON{
2025-03-19T16:44:24.3832477Z     &spread.lxdServerJSON{
2025-03-19T16:44:24.3833029Z         Name:  "spread-1-ubuntu-16-04",
2025-03-19T16:44:24.3834305Z         State: struct { Network map[string]spread.lxdDeviceJSON "json:\"network\"" }{
2025-03-19T16:44:24.3835140Z             Network: {
2025-03-19T16:44:24.3835607Z                 "lo": {
2025-03-19T16:44:24.3836090Z                     State:     "up",
2025-03-19T16:44:24.3836623Z                     Addresses: {
2025-03-19T16:44:24.3837150Z                         {Family:"inet", Address:"127.0.0.1"},
2025-03-19T16:44:24.3837672Z                         {Family:"inet6", Address:"::1"},
2025-03-19T16:44:24.3838046Z                     },
2025-03-19T16:44:24.3838313Z                 },
2025-03-19T16:44:24.3838592Z                 "eth0": {
2025-03-19T16:44:24.3838914Z                     State:     "up",
2025-03-19T16:44:24.3839268Z                     Addresses: {
2025-03-19T16:44:24.3839745Z                         {Family:"inet6", Address:"fd42:7f57:a74e:c67d:216:3eff:fe73:4f61"},
2025-03-19T16:44:24.3840316Z                         {Family:"inet6", Address:"fe80::216:3eff:fe73:4f61"},
2025-03-19T16:44:24.3840726Z                     },
2025-03-19T16:44:24.3841667Z                 },
2025-03-19T16:44:24.3841931Z             },
2025-03-19T16:44:24.3842176Z         },
2025-03-19T16:44:24.3842405Z     },
2025-03-19T16:44:24.3842630Z }
2025-03-19T16:44:24.3842990Z 2025-03-19 16:43:21 lxd list output: []*spread.lxdServerJSON{
2025-03-19T16:44:24.3843431Z     &spread.lxdServerJSON{
2025-03-19T16:44:24.3843785Z         Name:  "spread-1-ubuntu-16-04",
2025-03-19T16:44:24.3844456Z         State: struct { Network map[string]spread.lxdDeviceJSON "json:\"network\"" }{
2025-03-19T16:44:24.3845153Z             Network: {
2025-03-19T16:44:24.3845446Z                 "lo": {
2025-03-19T16:44:24.3845889Z                     State:     "up",
2025-03-19T16:44:24.3846297Z                     Addresses: {
2025-03-19T16:44:24.3846758Z                         {Family:"inet", Address:"127.0.0.1"},
2025-03-19T16:44:24.3847276Z                         {Family:"inet6", Address:"::1"},
2025-03-19T16:44:24.3847706Z                     },
2025-03-19T16:44:24.3847985Z                 },
2025-03-19T16:44:24.3848368Z                 "eth0": {
2025-03-19T16:44:24.3848683Z                     State:     "up",
2025-03-19T16:44:24.3849136Z                     Addresses: {
2025-03-19T16:44:24.3849807Z                         {Family:"inet", Address:"10.144.14.120"},
2025-03-19T16:44:24.3850424Z                         {Family:"inet6", Address:"fd42:7f57:a74e:c67d:216:3eff:fe73:4f61"},
2025-03-19T16:44:24.3851058Z                         {Family:"inet6", Address:"fe80::216:3eff:fe73:4f61"},
2025-03-19T16:44:24.3851516Z                     },
2025-03-19T16:44:24.3851842Z                 },
2025-03-19T16:44:24.3852258Z             },
2025-03-19T16:44:24.3852620Z         },
2025-03-19T16:44:24.3852897Z     },
2025-03-19T16:44:24.3853131Z }
2025-03-19T16:44:24.3853566Z 2025-03-19 16:43:22 Allocated lxd:ubuntu-16.04 (spread-1-ubuntu-16-04).
2025-03-19T16:44:24.3854288Z 2025-03-19 16:43:22 Connecting to lxd:ubuntu-16.04 (spread-1-ubuntu-16-04)...
2025-03-19T16:44:24.3855764Z 2025-03-19 16:43:22 Cannot connect to lxd:ubuntu-16.04 (spread-1-ubuntu-16-04): cannot connect to lxd:ubuntu-16.04 (spread-1-ubuntu-16-04): ssh: handshake failed: read tcp 10.144.14.1:54610->10.144.14.120:22: read: connection reset by peer
2025-03-19T16:44:24.3857975Z 2025-03-19 16:43:27 Cannot connect to lxd:ubuntu-16.04 (spread-1-ubuntu-16-04): cannot connect to lxd:ubuntu-16.04 (spread-1-ubuntu-16-04): dial tcp 10.144.14.120:22: connect: connection refused
2025-03-19T16:44:24.3860042Z 2025-03-19 16:43:30 Cannot connect to lxd:ubuntu-16.04 (spread-1-ubuntu-16-04): cannot connect to lxd:ubuntu-16.04 (spread-1-ubuntu-16-04): dial tcp 10.144.14.120:22: connect: connection refused
2025-03-19T16:44:24.3861964Z 2025-03-19 16:43:38 Cannot connect to lxd:ubuntu-16.04 (spread-1-ubuntu-16-04): cannot connect to lxd:ubuntu-16.04 (spread-1-ubuntu-16-04): dial tcp 10.144.14.120:22: connect: connection refused
2025-03-19T16:44:24.3863888Z 2025-03-19 16:43:46 Cannot connect to lxd:ubuntu-16.04 (spread-1-ubuntu-16-04): cannot connect to lxd:ubuntu-16.04 (spread-1-ubuntu-16-04): dial tcp 10.144.14.120:22: connect: connection refused
2025-03-19T16:44:24.3865363Z 2025-03-19 16:43:54 Cannot connect to lxd:ubuntu-16.04 (spread-1-ubuntu-16-04): cannot connect to lxd:ubuntu-16.04 (spread-1-ubuntu-16-04): dial tcp 10.144.14.120:22: connect: connection refused
2025-03-19T16:44:24.3866808Z 2025-03-19 16:44:02 Cannot connect to lxd:ubuntu-16.04 (spread-1-ubuntu-16-04): cannot connect to lxd:ubuntu-16.04 (spread-1-ubuntu-16-04): dial tcp 10.144.14.120:22: connect: connection refused
2025-03-19T16:44:24.3868249Z 2025-03-19 16:44:10 Cannot connect to lxd:ubuntu-16.04 (spread-1-ubuntu-16-04): cannot connect to lxd:ubuntu-16.04 (spread-1-ubuntu-16-04): dial tcp 10.144.14.120:22: connect: connection refused
2025-03-19T16:44:24.3869976Z 2025-03-19 16:44:18 Cannot connect to lxd:ubuntu-16.04 (spread-1-ubuntu-16-04): cannot connect to lxd:ubuntu-16.04 (spread-1-ubuntu-16-04): dial tcp 10.144.14.120:22: connect: connection refused
2025-03-19T16:44:24.3871812Z 2025-03-19 16:44:22 Discarding lxd:ubuntu-16.04 (spread-1-ubuntu-16-04), cannot connect: cannot connect to lxd:ubuntu-16.04 (spread-1-ubuntu-16-04): dial tcp 10.144.14.120:22: connect: connection refused
2025-03-19T16:44:24.3873336Z 2025-03-19 16:44:24 Worker terminated.
2025-03-19T16:44:24.3874283Z 2025-03-19 16:44:24 Pending jobs after workers returned: 1
2025-03-19T16:44:24.3874729Z     - lxd:ubuntu-16.04:checks/main
2025-03-19T16:44:24.3875170Z 2025-03-19 16:44:24 Successful tasks: 0
2025-03-19T16:44:24.3875529Z 2025-03-19 16:44:24 Aborted tasks: 1
2025-03-19T16:44:24.3875922Z error: unsuccessful run

Looking at the code there are no attempts made to ensure boot-up of the instance is actually done, we could potentially improve the situation by running cloud-init status --wait before returning from Allocate().

I'll propose this as part of #221, as that would also solve my need to wait for the LXD VM agent to start.

0: https://github.com/canonical/spread/actions/runs/13951932405/job/39053459252

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions