diff --git a/src/lib.rs b/src/lib.rs index 6e21efea..3d59a56d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -11,6 +11,7 @@ pub mod log; pub mod manager; pub mod metrics; pub mod policy; +pub mod signer; pub mod state; pub mod tsig; pub mod units; diff --git a/src/signer/mod.rs b/src/signer/mod.rs new file mode 100644 index 00000000..31be3b79 --- /dev/null +++ b/src/signer/mod.rs @@ -0,0 +1,67 @@ +//! Signing zones. +// +// TODO: Move 'src/units/zone_signer.rs' here. + +use std::sync::Arc; + +use cascade_zonedata::SignedZoneBuilder; +use tracing::{debug, error}; + +use crate::{ + center::{Center, halt_zone}, + manager::record_zone_event, + zone::{HistoricalEvent, SigningTrigger, Zone}, +}; + +pub mod zone; + +//----------- sign() ----------------------------------------------------------- + +/// Sign or re-sign a zone. +/// +/// A new signed instance of the zone will be generated using `builder`. +/// `builder` provides access to the actual zone content, including previous +/// instances of the zone for incremental signing. +#[tracing::instrument( + level = "debug", + skip_all, + fields(zone = %zone.name), +)] +async fn sign( + center: Arc
, + zone: Arc, + builder: SignedZoneBuilder, + trigger: SigningTrigger, +) { + match center + .signer + .join_sign_zone_queue(¢er, &zone.name, !builder.have_next_loaded(), trigger) + .await + { + Ok(()) => {} + Err(error) if error.is_benign() => { + // Ignore this benign case. It was probably caused by dnst keyset + // cron triggering resigning before we even signed the first time, + // either because the zone was large and slow to load and sign, or + // because the unsigned zone was pending review. + debug!("Ignoring probably benign failure: {error}"); + } + Err(error) => { + error!("Signing failed: {error}"); + + // TODO: Inline these methods and use a single 'ZoneState' lock. + + halt_zone(¢er, &zone.name, true, &error.to_string()); + + record_zone_event( + ¢er, + &zone.name, + HistoricalEvent::SigningFailed { + trigger, + reason: error.to_string(), + }, + None, // TODO + ); + } + } +} diff --git a/src/signer/zone.rs b/src/signer/zone.rs new file mode 100644 index 00000000..8e1bd922 --- /dev/null +++ b/src/signer/zone.rs @@ -0,0 +1,258 @@ +//! Zone-specific signing state. + +use std::{sync::Arc, time::SystemTime}; + +use cascade_zonedata::SignedZoneBuilder; + +use crate::{ + center::Center, + util::AbortOnDrop, + zone::{SigningTrigger, Zone, ZoneHandle, ZoneState}, +}; + +//----------- SignerZoneHandle ------------------------------------------------- + +/// A handle for signer-related operations on a [`Zone`]. +pub struct SignerZoneHandle<'a> { + /// The zone being operated on. + pub zone: &'a Arc, + + /// The locked zone state. + pub state: &'a mut ZoneState, + + /// Cascade's global state. + pub center: &'a Arc
, +} + +impl SignerZoneHandle<'_> { + /// Access the generic [`ZoneHandle`]. + pub const fn zone(&mut self) -> ZoneHandle<'_> { + ZoneHandle { + zone: self.zone, + state: self.state, + center: self.center, + } + } + + /// Enqueue a signing operation for a newly loaded instance of the zone. + pub fn enqueue_sign(&mut self, builder: SignedZoneBuilder) { + // A zone can have at most one 'SignedZoneBuilder' at a time. Because + // we have 'builder', we are guaranteed that no other signing operations + // are ongoing right now. A re-signing operation may be enqueued, but it + // has lower priority than this (for now). + + assert!(self.state.signer.enqueued_sign.is_none()); + assert!(self.state.signer.ongoing.is_none()); + + // TODO: Keep state for a queue of pending (re-)signing operations, so + // that the number of simultaneous operations can be limited. At the + // moment, this queue is opaque and is handled within the asynchronous + // task. + + let handle = tokio::task::spawn(super::sign( + self.center.clone(), + self.zone.clone(), + builder, + SigningTrigger::ZoneChangesApproved, + )); + + self.state.signer.ongoing = Some(handle.into()); + } + + /// Enqueue a re-signing operation for the zone. + /// + /// ## Panics + /// + /// Panics if `keys_changed` and `sigs_need_refresh` are both `false`. + pub fn enqueue_resign(&mut self, keys_changed: bool, sigs_need_refresh: bool) { + assert!( + keys_changed || sigs_need_refresh, + "a reason for re-signing was not specified" + ); + + // If a re-signing operation has already been enqueued, add to it. + if let Some(resign) = &mut self.state.signer.enqueued_resign { + resign.keys_changed |= keys_changed; + resign.sigs_need_refresh |= sigs_need_refresh; + return; + } + + // Try to obtain a 'SignedZoneBuilder' so building can begin. + let builder = self.zone().storage().start_resign(); + + // TODO: Keep state for a queue of pending (re-)signing operations, so + // that the number of simultaneous operations can be limited. At the + // moment, this queue is opaque and is handled within the asynchronous + // task. + + // Try to initiate the re-sign immediately. + if let Some(builder) = builder { + // A zone can have at most one 'SignedZoneBuilder' at a time. + // Because we have 'builder', we are guaranteed that no other + // signing operations are ongoing right now. A re-signing operation + // may be enqueued, but it has lower priority than this (for now). + + assert!(self.state.signer.enqueued_sign.is_none()); + assert!(self.state.signer.ongoing.is_none()); + + // TODO: 'SigningTrigger' can't express multiple reasons. + let trigger = if keys_changed { + SigningTrigger::KeySetModifiedAfterCron + } else { + SigningTrigger::SignatureExpiration + }; + + let handle = tokio::task::spawn(super::sign( + self.center.clone(), + self.zone.clone(), + builder, + trigger, + )); + + self.state.signer.ongoing = Some(handle.into()); + } else { + // TODO: Track expiration time in 'SignerState'. + let expiration_time = self + .state + .next_min_expiration + .or(self.state.min_expiration) + .unwrap_or_else(|| panic!("re-sign enqueued but the zone has not been signed")) + .to_system_time(SystemTime::now()); + + self.state.signer.enqueued_resign = Some(EnqueuedResign { + builder: None, + keys_changed, + sigs_need_refresh, + expiration_time, + }); + } + } + + /// Start a pending enqueued re-sign. + /// + /// This should be called when the zone data storage is idle. If a re-sign + /// has been enqueued, it will be initiated, and `true` will be returned. + /// + /// This method cannot initiate enqueued signing operations; when a signing + /// operation is enqueued, it prevents the data storage from idling. + pub fn start_pending(&mut self) -> bool { + // An enqueued or ongoing signing operation holds a 'SignedZoneBuilder', + // which prevents the zone data storage from being idle. This method is + // only called if the zone data storage is idle. + assert!(self.state.signer.enqueued_sign.is_none()); + assert!( + self.state + .signer + .enqueued_resign + .as_ref() + .is_none_or(|o| o.builder.is_none()) + ); + assert!(self.state.signer.ongoing.is_none()); + + // Load the one enqueued re-sign operation, if it exists. + let Some(resign) = self.state.signer.enqueued_resign.take() else { + // A re-sign is not enqueued, nothing to do. + return false; + }; + let EnqueuedResign { + builder: _, + keys_changed, + sigs_need_refresh: _, // TODO + expiration_time: _, // TODO + } = resign; + + let builder = self + .zone() + .storage() + .start_resign() + .expect("'start_pending()' is only called when the zone data storage is idle"); + + // TODO: Once an explicit queue of signing operations has been + // implemented (for limiting the number of simultaneous operations), + // add the operation to the queue before starting the re-sign. If the + // queue is too full to start the operation yet, leave it enqueued. + + // TODO: 'SigningTrigger' can't express multiple reasons. + let trigger = if keys_changed { + SigningTrigger::KeySetModifiedAfterCron + } else { + SigningTrigger::SignatureExpiration + }; + + let handle = tokio::task::spawn(super::sign( + self.center.clone(), + self.zone.clone(), + builder, + trigger, + )); + + self.state.signer.ongoing = Some(handle.into()); + + true + } +} + +//----------- SignerState ------------------------------------------------------ + +/// State for signing a zone. +#[derive(Debug, Default)] +pub struct SignerState { + /// A handle to an ongoing operation, if any. + pub ongoing: Option, + + /// An enqueued signing operation, if any. + pub enqueued_sign: Option, + + /// An enqueued re-signing operation, if any. + pub enqueued_resign: Option, +} + +//----------- EnqueuedSign ----------------------------------------------------- + +/// An enqueued sign of a zone. +#[derive(Debug)] +pub struct EnqueuedSign { + /// The zone builder. + pub builder: SignedZoneBuilder, +} + +//----------- EnqueuedResign --------------------------------------------------- + +/// An enqueued re-sign of a zone. +#[derive(Debug)] +pub struct EnqueuedResign { + /// The zone builder, if obtained. + /// + /// The builder is necessary to begin re-signing. It is optional because + /// it might not be available when the re-sign operation is enqueued. + /// Even if the builder is obtained, the operation might not be ready + /// to start. + pub builder: Option, + + /// Whether zone signing keys have changed. + /// + /// This indicates the reason for re-signing; if it is `true`, re-signing + /// has been enqueued because the keys used to sign the zone have changed. + pub keys_changed: bool, + + /// Whether signatures need to be refreshed. + /// + /// This indicates the reason for re-signing; if it is `true`, re-signing + /// has been enqueued because signatures in the current instance of the zone + /// will expire soon. + pub sigs_need_refresh: bool, + + /// When signatures in the zone will expire. + /// + /// `self` represents an enqueued re-sign, which means that a current signed + /// instance of the zone exists. This field tracks the expiration time (not + /// the time to enqueue re-signing) for that instance, to ensure it will be + /// re-signed in time. + // + // TODO: Force loading to cancel if this gets too close? + pub expiration_time: SystemTime, + // + // TODO: + // - The ID of the signed instance to re-sign. + // Panic if the actual obtained instance does not match this. +} diff --git a/src/units/zone_signer.rs b/src/units/zone_signer.rs index c3ed83dc..9477f46d 100644 --- a/src/units/zone_signer.rs +++ b/src/units/zone_signer.rs @@ -331,7 +331,7 @@ impl ZoneSigner { /// be possible if the signable zone were definitely a ZoneApex zone /// rather than a LightWeightZone (and XFR-in zones are LightWeightZone /// instances). - async fn join_sign_zone_queue( + pub async fn join_sign_zone_queue( &self, center: &Arc
, zone_name: &StoredName, @@ -2011,7 +2011,7 @@ pub fn load_binary_file(path: &Path) -> Vec { bytes } -enum SignerError { +pub enum SignerError { SoaNotFound, CannotSignUnapprovedZone, CannotResignNonPublishedZone, @@ -2030,7 +2030,7 @@ enum SignerError { } impl SignerError { - fn is_benign(&self) -> bool { + pub fn is_benign(&self) -> bool { matches!( self, SignerError::CannotSignUnapprovedZone | SignerError::CannotResignNonPublishedZone diff --git a/src/zone/mod.rs b/src/zone/mod.rs index 0484416f..ce5c7596 100644 --- a/src/zone/mod.rs +++ b/src/zone/mod.rs @@ -22,6 +22,7 @@ use crate::{ config::Config, loader::zone::{LoaderState, LoaderZoneHandle}, policy::{Policy, PolicyVersion}, + signer::zone::{SignerState, SignerZoneHandle}, util::{deserialize_duration_from_secs, serialize_duration_as_secs}, }; @@ -70,6 +71,15 @@ impl ZoneHandle<'_> { } } + /// Consider signer-specific operations. + pub const fn signer(&mut self) -> SignerZoneHandle<'_> { + SignerZoneHandle { + zone: self.zone, + state: self.state, + center: self.center, + } + } + /// Consider storage-specific operations. pub const fn storage(&mut self) -> StorageZoneHandle<'_> { StorageZoneHandle { @@ -121,6 +131,9 @@ pub struct ZoneState { /// Loading new versions of the zone. pub loader: LoaderState, + /// Signing the zone. + pub signer: SignerState, + /// Data storage for the zone. pub storage: StorageState, // @@ -128,7 +141,6 @@ pub struct ZoneState { // - A log? // - Initialization? // - Key manager state - // - Signer state // - Server state } diff --git a/src/zone/storage.rs b/src/zone/storage.rs index b0b96855..984fed2e 100644 --- a/src/zone/storage.rs +++ b/src/zone/storage.rs @@ -8,7 +8,8 @@ use std::{fmt, sync::Arc}; use cascade_zonedata::{ LoadedZoneBuilder, LoadedZoneBuilt, LoadedZonePersister, LoadedZoneReader, LoadedZoneReviewer, - SignedZoneReviewer, ZoneCleaner, ZoneDataStorage, ZoneViewer, + SignedZoneBuilder, SignedZoneBuilt, SignedZoneReader, SignedZoneReviewer, ZoneCleaner, + ZoneDataStorage, ZoneViewer, }; use domain::zonetree; use tracing::{info, trace, trace_span, warn}; @@ -16,7 +17,7 @@ use tracing::{info, trace, trace_span, warn}; use crate::{ center::Center, util::{BackgroundTasks, force_future}, - zone::{HistoricalEvent, PipelineMode, Zone, ZoneHandle, ZoneState}, + zone::{HistoricalEvent, PipelineMode, SigningTrigger, Zone, ZoneHandle, ZoneState}, }; //----------- StorageZoneHandle ------------------------------------------------ @@ -311,6 +312,239 @@ impl StorageZoneHandle<'_> { } } +/// # Signer Operations +impl StorageZoneHandle<'_> { + /// Begin resigning the zone. + /// + /// If the zone data storage is not busy, a [`SignedZoneBuilder`] will be + /// returned through which the instance of the zone can be resigned. + /// Follow up by calling: + /// + /// - [`Self::finish_sign()`] when signing succeeds. + /// + /// - [`Self::give_up_sign()`] when signing fails. + /// + /// If the zone data storage is busy, [`None`] is returned; the signer + /// should enqueue the re-sign operation and wait for an idle notification. + pub fn start_resign(&mut self) -> Option { + // Examine the current state. + let machine = &mut self.state.storage.machine; + match machine.take() { + ZoneDataStorage::Passive(s) => { + // The zone storage is passive; no other operations are ongoing, + // and it is possible to begin re-signing. + trace!( + zone = %self.zone.name, + "Obtaining a 'SignedZoneBuilder' for performing a re-sign" + ); + + let (s, builder) = s.resign(); + *machine = ZoneDataStorage::Signing(s); + Some(builder) + } + + other => { + // The zone storage is in the middle of another operation. + trace!( + zone = %self.zone.name, + "Deferring re-sign because data storage is busy" + ); + + *machine = other; + None + } + } + } + + /// Finish (re-)signing. + /// + /// The prepared signed instance of the zone is finalized, and passed on + /// to the signed zone reviewer. + pub fn finish_sign(&mut self, built: SignedZoneBuilt) { + // Examine the current state. + let machine = &mut self.state.storage.machine; + match machine.take() { + ZoneDataStorage::Signing(s) => { + trace!( + zone = %self.zone.name, + "Successfully finishing the ongoing (re-)sign" + ); + + let (s, signed_reviewer) = s.finish(built); + *machine = ZoneDataStorage::ReviewSignedPending(s); + + // TODO: Use the instance ID here, which will not require + // examining the zone contents. + let serial = signed_reviewer.read_signed().unwrap().soa().rdata.serial; + self.state.record_event( + // TODO: Get the right trigger. + HistoricalEvent::SigningSucceeded { + trigger: SigningTrigger::SignatureExpiration, + }, + Some(domain::base::Serial(serial.into())), + ); + + self.start_signed_review(signed_reviewer); + } + + _ => unreachable!( + "'ZoneDataStorage::Signing' is the only state where a 'SignedZoneBuilt' is available" + ), + } + } + + /// Give up on the ongoing signing operation. + /// + /// Intermediate artifacts in the signed instance, and the upcoming loaded + /// instance (if any), will be cleaned up automatically, in the background. + /// Once the zone storage is idle, a notification will be sent. + pub fn give_up_sign(&mut self, builder: SignedZoneBuilder) { + // Examine the current state. + let machine = &mut self.state.storage.machine; + match machine.take() { + ZoneDataStorage::Signing(s) => { + trace!( + zone = %self.zone.name, + "Giving up on the ongoing (re-)sign" + ); + + let (s, loaded_reviewer) = s.give_up(builder); + // TODO: Communicate the new reviewer handle to the zone server. + let old_loaded_reviewer = + std::mem::replace(&mut self.state.storage.loaded_reviewer, loaded_reviewer); + let (s, cleaner) = s.stop_review(old_loaded_reviewer); + *machine = ZoneDataStorage::Cleaning(s); + self.start_cleanup(cleaner); + } + + _ => unreachable!( + "'ZoneDataStorage::Signing' is the only state where a 'SignedZoneBuilder' is available" + ), + } + } +} + +/// # Signer Review Operations +impl StorageZoneHandle<'_> { + /// Initiate review of a new signed instance of a zone. + fn start_signed_review(&mut self, signed_reviewer: SignedZoneReviewer) { + // NOTE: This function provides compatibility with 'zonetree's. + + let zone = self.zone.clone(); + let center = self.center.clone(); + let span = trace_span!("start_signed_review"); + self.state.storage.background_tasks.spawn_blocking(span, move || { + // Read the loaded instance. + let loaded_reader = signed_reviewer + .read_loaded() + .unwrap_or_else(|| unreachable!("The loader never returns an empty instance")); + + // Read the signed instance. + let signed_reader = signed_reviewer + .read_signed() + .unwrap_or_else(|| unreachable!("The signer never returns an empty instance")); + let serial = signed_reader.soa().rdata.serial; + + // Build a `zonetree` for the new instance. + let zonetree = Self::build_signed_zonetree(&zone, &loaded_reader, &signed_reader); + + // Insert the new `zonetree`. + center.signed_zones.rcu(|tree| { + let mut tree = Arc::unwrap_or_clone(tree.clone()); + let _ = tree.remove_zone(&zone.name, domain::base::iana::Class::IN); + tree.insert_zone(zonetree.clone()).unwrap(); + tree + }); + + let mut state = zone.state.lock().unwrap(); + + // TODO: Pass on the reviewer to the zone server. + let old_signed_reviewer = + std::mem::replace(&mut state.storage.signed_reviewer, signed_reviewer); + + // Transition into the reviewing state. + tracing::debug!("Transitioning zone state..."); + match state.storage.machine.take() { + ZoneDataStorage::ReviewSignedPending(s) => { + // For now, transition all the way back to 'Passive' state. + let s = s.start(old_signed_reviewer); + let (s, persister) = s.mark_approved(); + let persisted = persister.persist(); + let (s, viewer) = s.mark_complete(persisted); + let old_viewer = std::mem::replace(&mut state.storage.viewer, viewer); + let (s, cleaner) = s.switch(old_viewer); + state.storage.machine = ZoneDataStorage::Cleaning(s); + ZoneHandle { + zone: &zone, + state: &mut state, + center: ¢er, + } + .storage() + .start_cleanup(cleaner); + } + + _ => unreachable!( + "'ZoneDataStorage::ReviewSignedPending' is the only state where a 'SignedZoneReviewer' is available" + ), + } + + info!("Initiating review of newly-signed instance"); + + // TODO: 'on_seek_approval_for_zone' tries to lock zone state. + std::mem::drop(state); + + center.signed_review_server.on_seek_approval_for_zone( + ¢er, + zone.name.clone(), + domain::base::Serial(serial.into()), + ); + + state = zone.state.lock().unwrap(); + + state.storage.background_tasks.finish() + }); + } + + /// Build a `zonetree` for an signed instance of a zone. + fn build_signed_zonetree( + zone: &Arc, + loaded_reader: &LoadedZoneReader<'_>, + signed_reader: &SignedZoneReader<'_>, + ) -> zonetree::Zone { + use zonetree::{types::ZoneUpdate, update::ZoneUpdater}; + + let zone = + zonetree::ZoneBuilder::new(zone.name.clone(), domain::base::iana::Class::IN).build(); + + let mut updater = force_future(ZoneUpdater::new(zone.clone())).unwrap(); + + // Clear all existing records. + force_future(updater.apply(ZoneUpdate::DeleteAllRecords)).unwrap(); + + // Add every record in turn. + for record in signed_reader.records() { + let record: cascade_zonedata::OldParsedRecord = record.clone().into(); + force_future(updater.apply(ZoneUpdate::AddRecord(record))).unwrap(); + } + + // Add every loaded record in turn (excluding SOA). + // + // TODO: Which other records to exclude? DNSKEY, RRSIGs? + for record in loaded_reader.records() { + let record: cascade_zonedata::OldParsedRecord = record.clone().into(); + force_future(updater.apply(ZoneUpdate::AddRecord(record))).unwrap(); + } + + // Commit the update with the SOA record. + let soa: cascade_zonedata::OldParsedRecord = signed_reader.soa().clone().into(); + force_future(updater.apply(ZoneUpdate::Finished(soa))).unwrap(); + + zone + } + + // TODO: approve_signed() +} + /// # Background Tasks impl StorageZoneHandle<'_> { /// Run a cleanup of zone data. @@ -447,7 +681,12 @@ impl StorageZoneHandle<'_> { if self.zone().loader().start_pending() { // The zone storage is no longer idle. - //return; + return; + } + + if self.zone().signer().start_pending() { + // The zone storage is no longer idle. + // return; } } }