From 5f60e9cf1b6647d742b29495ce4bb00830cfc16f Mon Sep 17 00:00:00 2001 From: iximeow Date: Sat, 7 Feb 2026 02:34:46 +0000 Subject: [PATCH 1/2] viona: only reset queues that have been taken out of reset 45af0f7 was too eager to reset rings. by resetting all rings regardless of previous SET_PAIRS, any RING_RESET beyond that point would EINVAL. Propolis then tracks that ring state as VRingState::Fatal, at which point operations like VqChange::Addresss immediately give up. I don't actaully understand what is different about the Ubuntu 24.04 I'm using versus the one we saw this issue on. By tracing viona operations with viona.d it's quite clear that something in the image on mb-0 is resetting the device before (during?) Linux booting, and that is not at all happening locally. In both cases the kernels are 6.8, but the patch levels are slightly different: the problematic image is 6.8.0-94-generic versus my 6.8.0-88-generic. Without this tracking of which rings are eligible *for* reset, that first device reset effectively bricks the NIC for the guest. None of my Ubuntu 24.04, 22.04, or Debian 13 images have this behavior, which is upsetting in its own right. For good measure, I've checked this change on FreeBSD 14.1 and a relatively recent helios as well. At least as far as my (apparently well-behaved??) images go, the viona NIC functions at boot, after a reboot, can seemingly do TCP without issue, etc. My test Windows image has not had functional networking since the VirtIO/multiqueue work landed, and is at least not *less* functional now. It's not clear if that's a my-image problem or a Propolis problem. --- lib/propolis/src/hw/virtio/queue.rs | 48 +++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 10 deletions(-) diff --git a/lib/propolis/src/hw/virtio/queue.rs b/lib/propolis/src/hw/virtio/queue.rs index 3767e9710..a78b83d41 100644 --- a/lib/propolis/src/hw/virtio/queue.rs +++ b/lib/propolis/src/hw/virtio/queue.rs @@ -933,6 +933,7 @@ pub struct Info { pub struct VirtQueues { len: AtomicUsize, + peak: AtomicUsize, queues: Vec>, } @@ -947,9 +948,11 @@ impl VirtQueues { Self::new_with_len(sizes.len(), sizes) } - pub fn new_with_len(len: usize, sizes: &[VqSize]) -> Self { + pub fn new_with_len(initial_len: usize, sizes: &[VqSize]) -> Self { assert!( - 0 < len && len <= sizes.len() && sizes.len() <= MAX_QUEUES, + 0 < initial_len + && initial_len <= sizes.len() + && sizes.len() <= MAX_QUEUES, "virtqueue size must be positive u16 and len must be smaller pos" ); let queues = sizes @@ -957,8 +960,9 @@ impl VirtQueues { .enumerate() .map(|(id, size)| Arc::new(VirtQueue::new(id as u16, *size))) .collect::>(); - let len = AtomicUsize::new(len); - Self { len, queues } + let len = AtomicUsize::new(initial_len); + let peak = AtomicUsize::new(initial_len); + Self { len, peak, queues } } pub fn set_len(&self, len: usize) -> Result<(), usize> { @@ -966,6 +970,26 @@ impl VirtQueues { return Err(len); } self.len.store(len, Ordering::Release); + let mut peak = self.peak.load(Ordering::Acquire); + loop { + if len <= peak { + break; + } + match self.peak.compare_exchange( + peak, + len, + Ordering::SeqCst, + Ordering::SeqCst, + ) { + Ok(_) => { + // We've updated the peak, all done + break; + } + Err(next_peak) => { + peak = next_peak; + } + } + } Ok(()) } @@ -978,6 +1002,10 @@ impl VirtQueues { self.len.load(Ordering::Relaxed) } + pub fn peak(&self) -> usize { + self.peak.load(Ordering::Relaxed) + } + pub const fn max_capacity(&self) -> usize { self.queues.len() } @@ -1005,13 +1033,13 @@ impl VirtQueues { self.queues[..len].iter().chain([self.get_control()]) } - /// Iterate all queues, regardless of the device's current configuration - /// happening to use them or not. - /// - /// This is primarily useful for operations like device reset and teardown - /// where we need to manage all *possible* device state. + /// Iterate all queues the device may have used; the current number of + /// VirtQueues may be lower than a previous high watermark, but in cases + /// like device reset and teardown we must manage all viona rings + /// corresponding to ever-active VirtQueues. pub fn iter_all(&self) -> impl std::iter::Iterator> { - self.queues.iter() + let peak = self.peak() - 1; + self.queues[..peak].iter().chain([self.get_control()]) } pub fn export(&self) -> migrate::VirtQueuesV1 { From 612fb742f91be6605bf05a122dee7eb7389689d0 Mon Sep 17 00:00:00 2001 From: iximeow Date: Sat, 7 Feb 2026 18:23:38 +0000 Subject: [PATCH 2/2] better to program computer when awake --- lib/propolis/src/hw/virtio/queue.rs | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/lib/propolis/src/hw/virtio/queue.rs b/lib/propolis/src/hw/virtio/queue.rs index a78b83d41..41ac7ddf7 100644 --- a/lib/propolis/src/hw/virtio/queue.rs +++ b/lib/propolis/src/hw/virtio/queue.rs @@ -971,15 +971,12 @@ impl VirtQueues { } self.len.store(len, Ordering::Release); let mut peak = self.peak.load(Ordering::Acquire); - loop { - if len <= peak { - break; - } + while len > peak { match self.peak.compare_exchange( peak, len, - Ordering::SeqCst, - Ordering::SeqCst, + Ordering::Relaxed, + Ordering::Relaxed, ) { Ok(_) => { // We've updated the peak, all done