Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
144 changes: 119 additions & 25 deletions phd-tests/framework/src/test_vm/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,10 @@ use crate::{
TestCtx,
};

use anyhow::{anyhow, Context, Result};
use anyhow::{anyhow, bail, Context, Result};
use camino::Utf8PathBuf;
use core::result::Result as StdResult;
use futures::FutureExt;
use propolis_client::{
instance_spec::{
ComponentV0, InstanceProperties, InstanceSpecGetResponse,
Expand Down Expand Up @@ -152,6 +153,106 @@ enum VmState {
Ensured { serial: SerialConsole },
}

/// Description of the acceptable status codes from executing a command in a
/// [`TestVm::run_shell_command`].
// This could reasonably have a `Status(u16)` variant to check specific non-zero
// statuses, but specific codes are not terribly portable! In the few cases we
// can expect a specific status for errors, those specific codes change between
// f.ex illumos and Linux guests.
enum StatusCheck {
Ok,
NotOk,
}

pub struct ShellOutputExecutor<'ctx> {
vm: &'ctx TestVm,
cmd: &'ctx str,
status_check: Option<StatusCheck>,
}

impl<'a> ShellOutputExecutor<'a> {
pub fn ignore_status(mut self) -> ShellOutputExecutor<'a> {
self.status_check = None;
self
}

pub fn check_ok(mut self) -> ShellOutputExecutor<'a> {
self.status_check = Some(StatusCheck::Ok);
self
}

pub fn check_err(mut self) -> ShellOutputExecutor<'a> {
self.status_check = Some(StatusCheck::NotOk);
self
}
}

impl<'a> std::future::IntoFuture for ShellOutputExecutor<'a> {
type Output = Result<String>;
type IntoFuture = futures::future::BoxFuture<'a, Result<String>>;

fn into_future(self) -> Self::IntoFuture {
Box::pin(async move {
// Allow the guest OS to transform the input command into a
// guest-specific command sequence. This accounts for the guest's
// shell type (which affects e.g. affects how it displays multi-line
// commands) and serial console buffering discipline.
let command_sequence =
self.vm.guest_os.shell_command_sequence(self.cmd);
self.vm.run_command_sequence(command_sequence).await?;
Comment on lines +201 to +202
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm, it occurs to me that this could maybe be jammed together into a method on self.vm, not that we are currently calling it anywhere else...


// `shell_command_sequence` promises that the generated command
// sequence clears buffer of everything up to and including the
// input command before actually issuing the final '\n' that issues
// the command. This ensures that the buffer contents returned by
// this call contain only the command's output.
let output = self
.vm
.wait_for_serial_output(
self.vm.guest_os.get_shell_prompt(),
Duration::from_secs(300),
)
.await?;

// Trim any leading newlines inserted when the command was issued
// and any trailing whitespace that isn't actually part of the
// command output. Any other embedded whitespace is the caller's
// problem.
let output = output.trim().to_string();

if let Some(check) = self.status_check {
let status_command_sequence =
self.vm.guest_os.shell_command_sequence("echo $?");
self.vm.run_command_sequence(status_command_sequence).await?;
let status = self
.vm
.wait_for_serial_output(
self.vm.guest_os.get_shell_prompt(),
Duration::from_secs(300),
)
.await?;
let status = status.trim().parse::<u16>()?;

match check {
StatusCheck::Ok => {
if status != 0 {
bail!("expected status 0, got {}", status);
}
}
StatusCheck::NotOk => {
if status == 0 {
bail!("expected non-zero status, got {}", status);
}
}
}
}

Ok(output)
})
.boxed()
}
}

/// A virtual machine running in a Propolis server. Test cases create these VMs
/// using the `factory::VmFactory` embedded in their test contexts.
///
Expand Down Expand Up @@ -885,30 +986,23 @@ impl TestVm {
/// waits for another shell prompt to appear using
/// [`Self::wait_for_serial_output`] and returns any text that was buffered
/// to the serial console after the command was sent.
pub async fn run_shell_command(&self, cmd: &str) -> Result<String> {
// Allow the guest OS to transform the input command into a
// guest-specific command sequence. This accounts for the guest's shell
// type (which affects e.g. affects how it displays multi-line commands)
// and serial console buffering discipline.
let command_sequence = self.guest_os.shell_command_sequence(cmd);
self.run_command_sequence(command_sequence).await?;

// `shell_command_sequence` promises that the generated command sequence
// clears buffer of everything up to and including the input command
// before actually issuing the final '\n' that issues the command.
// This ensures that the buffer contents returned by this call contain
// only the command's output.
let out = self
.wait_for_serial_output(
self.guest_os.get_shell_prompt(),
Duration::from_secs(300),
)
.await?;

// Trim any leading newlines inserted when the command was issued and
// any trailing whitespace that isn't actually part of the command
// output. Any other embedded whitespace is the caller's problem.
Ok(out.trim().to_string())
///
/// After running the shell command, sends `echo $?` to query and return the
/// command's return status as well.
///
/// This will return an error if the command returns a non-zero status by
/// default; to ignore the status or expect a non-zero as a positive
/// condition, see [`ShellOutputExecutor::ignore_status`] or
/// [`ShellOutputExecutor::check_err`].
pub fn run_shell_command<'a>(
&'a self,
cmd: &'a str,
) -> ShellOutputExecutor<'a> {
ShellOutputExecutor {
vm: self,
cmd,
status_check: Some(StatusCheck::Ok),
}
}

pub async fn graceful_reboot(&self) -> Result<()> {
Expand Down
18 changes: 12 additions & 6 deletions phd-tests/tests/src/boot_order.rs
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,8 @@ async fn guest_can_adjust_boot_order(ctx: &TestCtx) {

// If the guest doesn't have an EFI partition then there's no way for boot
// order preferences to be persisted.
let mountline = vm.run_shell_command("mount | grep efivarfs").await?;
let mountline =
vm.run_shell_command("mount | grep efivarfs").ignore_status().await?;

if !mountline.starts_with("efivarfs on ") {
warn!(
Expand All @@ -268,11 +269,16 @@ async fn guest_can_adjust_boot_order(ctx: &TestCtx) {

// Try adding a few new boot options, then add them to the boot order,
// reboot, and make sure they're all as we set them.
if !vm
.run_shell_command(&format!("ls {}", efipath(&bootvar(0xffff))))
.await?
.is_empty()
{
let bootffff_path = efipath(&bootvar(0xffff));
let bootffff_res = vm
.run_shell_command(&format!("ls {bootffff_path}"))
.ignore_status()
.await?;
// `ls` just prints the file path if it exists, but the error text varies a
// bit depending on Alpine, Ubuntu, Busybox, etc. Notionally we could
// `check_err()` above, but having a `BootFFFF` entry already is merely
// weird; we can still replace it and continue with the test.
if bootffff_res == bootffff_path {
warn!(
"guest environment already has a BootFFFF entry; \
is this not a fresh image?"
Expand Down
1 change: 1 addition & 0 deletions phd-tests/tests/src/cpuid.rs
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,7 @@ impl<'a> LinuxGuestTopo<'a> {
"ls {}",
Self::cpu_stem(this.cpus().await)
))
.ignore_status()
.await
.expect("can run ls of a directory that doesn't exist");
assert!(out.contains("No such file or directory"));
Expand Down
3 changes: 2 additions & 1 deletion phd-tests/tests/src/crucible/migrate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ async fn smoke_test(ctx: &TestCtx) {
source.launch().await?;
source.wait_to_boot().await?;

let lsout = source.run_shell_command("ls foo.bar 2> /dev/null").await?;
let lsout =
source.run_shell_command("ls foo.bar 2> /dev/null").check_err().await?;
assert_eq!(lsout, "");
source.run_shell_command("touch ./foo.bar").await?;
source.run_shell_command("sync ./foo.bar").await?;
Expand Down
3 changes: 2 additions & 1 deletion phd-tests/tests/src/crucible/smoke.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ async fn shutdown_persistence_test(ctx: &TestCtx) {

// Verify that the test file doesn't exist yet, then touch it, flush it, and
// shut down the VM.
let lsout = vm.run_shell_command("ls foo.bar 2> /dev/null").await?;
let lsout =
vm.run_shell_command("ls foo.bar 2> /dev/null").check_err().await?;
assert_eq!(lsout, "");
vm.run_shell_command("touch ./foo.bar").await?;
vm.run_shell_command("sync ./foo.bar").await?;
Expand Down
18 changes: 14 additions & 4 deletions phd-tests/tests/src/hw.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,25 @@ async fn lspci_lifecycle_test(ctx: &TestCtx) {
vm.launch().await?;
vm.wait_to_boot().await?;

let lspci = vm.run_shell_command(LSPCI).await?;
let lshw = vm.run_shell_command(LSHW).await?;
// XXX: do not `ignore_status()` on these commands! They fail for any number
// of reasons on different guests:
// * sudo may not exist (some Alpine)
// * lshw may not exist (Debian)
// * we may not input a sudo password (Ubuntu)
//
// see also: https://github.com/oxidecomputer/propolis/issues/792

let lspci = vm.run_shell_command(LSPCI).ignore_status().await?;
let lshw = vm.run_shell_command(LSHW).ignore_status().await?;
ctx.lifecycle_test(vm, &[Action::StopAndStart], move |vm| {
let lspci = lspci.clone();
let lshw = lshw.clone();
Box::pin(async move {
let new_lspci = vm.run_shell_command(LSPCI).await.unwrap();
let new_lspci =
vm.run_shell_command(LSPCI).ignore_status().await.unwrap();
assert_eq!(new_lspci, lspci);
let new_lshw = vm.run_shell_command(LSHW).await.unwrap();
let new_lshw =
vm.run_shell_command(LSHW).ignore_status().await.unwrap();
assert_eq!(new_lshw, lshw);
})
})
Expand Down
42 changes: 28 additions & 14 deletions phd-tests/tests/src/hyperv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,27 +24,28 @@ use uuid::Uuid;
/// startup and shutdown with Hyper-V emulation enabled.
async fn guest_detect_hyperv(vm: &TestVm) -> anyhow::Result<()> {
if vm.guest_os_kind().is_linux() {
// Many Linux distros come with systemd installed out of the box. On
// these distros, it's easiest to use `systemd-detect-virt` to determine
// whether the guest thinks it's running on a Hyper-V-compatible
// hypervisor. (Whether any actual enlightenments are enabled is another
// story, but those can often be detected by other means.)
let out = vm.run_shell_command("systemd-detect-virt").await?;
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

with this change, the shell command completes with status, so this change ends up being a motivator to detect the hypervisor in a more robust way. yay!

if out.contains("systemd-detect-virt: not found") {
warn!(
"guest doesn't support systemd-detect-virt, can't verify it \
detected Hyper-V support"
);
} else {
assert_eq!(out, "microsoft");
}
// One might imagine we could simply use `systemd-detect-virt` to check
// hypervisor information here, but it's not present out of the box on
// Alpine. In the interest of exercising Hyper-V in typical CI runs on a
// standard Alpine image, detect Hyper-V in a... worse but reliable way:
// looking for relevant logs in dmesg. This should work for all Linuxes
// from later than May-ish 2010 (>2.6.34 or so). If we don't see Hyper-V
// reported in dmesg either it's genuinely not detected, it's a very old
// Linux, or it's a new Linux and dmesg text has changed.
Comment on lines +33 to +34
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i suppose it could possibly also have fallen off the end of the dmesg ringbuffer if something is spewing out a ton of errors perhaps?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh god. file that under "conditions i never want to think about". but yes.

const HV_TEXT: &str = "Hypervisor detected: Microsoft Hyper-V";

// No "sudo" here because Alpine doesn't have sudo; for Linux tests we
// expect to run test commands as root anyway.
vm.run_shell_command(&format!("dmesg | grep \"{HV_TEXT}\"")).await?;
} else if vm.guest_os_kind().is_windows() {
// Windows is good about giving signals that it's running in a Hyper-V
// *root partition*, but offers no clear signal as to whether it has
// detected a Hyper-V host when it's running as a non-root guest. (There
// are methods for detecting whether Windows is running as a guest, but
// these don't identify the detected hypervisor type.)
warn!("running on Windows, can't verify it detected Hyper-V support");
} else {
warn!("unknown guest type, can't verify it detected Hyper-V support");
}

Ok(())
Expand All @@ -56,6 +57,16 @@ async fn hyperv_smoke_test(ctx: &TestCtx) {
cfg.guest_hv_interface(GuestHypervisorInterface::HyperV {
features: Default::default(),
});
// For reasons absolutely indecipherable to me, Alpine (3.16, kernel
// 5.15.41-0-virt) seems to lose some early dmesg lines if booted with less
// than four vCPUs. Among the early dmesg lines are `Hypervisor detected:
// Microsoft Hyper-V` that we look for as confirmation that Linux knows
// there's a Hyper-V-like hypervisor present.
Comment on lines +60 to +64
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. What

//
// I'd love to debug exactly why this is relevant to the contents of dmesg
// (the remaining log is identical and it doesn't seem that the ring buffer
// is full), but I really can't justify the time!
cfg.cpus(4);
let mut vm = ctx.spawn_vm(&cfg, None).await?;
vm.launch().await?;
vm.wait_to_boot().await?;
Expand All @@ -69,6 +80,9 @@ async fn hyperv_lifecycle_test(ctx: &TestCtx) {
cfg.guest_hv_interface(GuestHypervisorInterface::HyperV {
features: Default::default(),
});
// Spooky load-bearing vCPU count to preserve dmesg log lines. See the
// comment in `hyperv_smoke_test`.
cfg.cpus(4);
let mut vm = ctx.spawn_vm(&cfg, None).await?;
vm.launch().await?;
vm.wait_to_boot().await?;
Expand Down
19 changes: 14 additions & 5 deletions phd-tests/tests/src/migrate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,11 @@ mod from_base {
spawn_base_vm(ctx, "migration_from_base_and_back").await?;
source.launch().await?;
source.wait_to_boot().await?;
let lsout = source.run_shell_command("ls foo.bar 2> /dev/null").await?;
// `ls` with no results exits non-zero, so expect an error here.
let lsout = source
.run_shell_command("ls foo.bar 2> /dev/null")
.check_err()
.await?;
assert_eq!(lsout, "");

// create an empty file on the source VM.
Expand All @@ -74,8 +78,9 @@ mod from_base {
// the file should still exist on the target VM after migration.
let lsout = target
.run_shell_command("ls foo.bar")
.ignore_status()
.await
.expect("`ls foo.bar` should succeed");
.expect("can try to run `ls foo.bar`");
assert_eq!(lsout, "foo.bar");
})
},
Expand Down Expand Up @@ -275,7 +280,9 @@ mod running_process {
))
.await?;
vm.run_shell_command("chmod +x dirt.sh").await?;
let run_dirt = vm.run_shell_command("./dirt.sh").await?;
// When dirt.sh suspends itself, the parent shell will report a non-zero
// status (one example is 148: 128 + SIGTSTP aka 20 on Linux).
let run_dirt = vm.run_shell_command("./dirt.sh").check_err().await?;
assert!(run_dirt.contains("made dirt"), "dirt.sh failed: {run_dirt:?}");
assert!(
run_dirt.contains("Stopped"),
Expand Down Expand Up @@ -314,7 +321,8 @@ async fn multiple_migrations(ctx: &TestCtx) {
async fn run_smoke_test(ctx: &TestCtx, mut source: TestVm) -> Result<()> {
source.launch().await?;
source.wait_to_boot().await?;
let lsout = source.run_shell_command("ls foo.bar 2> /dev/null").await?;
let lsout =
source.run_shell_command("ls foo.bar 2> /dev/null").check_err().await?;
assert_eq!(lsout, "");

// create an empty file on the source VM.
Expand All @@ -329,8 +337,9 @@ async fn run_smoke_test(ctx: &TestCtx, mut source: TestVm) -> Result<()> {
// the file should still exist on the target VM after migration.
let lsout = target
.run_shell_command("ls foo.bar")
.ignore_status()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

since we're asserting on the output of ls below, it's mildly more interesting to try ls and get whatever it did print in the error w/ assert_eq!() instead of "ls exited with 1 but we don't know anything more about it".

this isn't super important but iirc i'd broken the test in a really confusing way where this helped explain what was happening, rather than just saying the right thing wasn't happening.

.await
.expect("`ls foo.bar` should succeed after migration");
.expect("can try to run `ls foo.bar`");
assert_eq!(lsout, "foo.bar");
})
},
Expand Down
Loading