Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ pub mod log;
pub mod manager;
pub mod metrics;
pub mod policy;
pub mod signer;
pub mod state;
pub mod tsig;
pub mod units;
Expand Down
67 changes: 67 additions & 0 deletions src/signer/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
//! Signing zones.
//
// TODO: Move 'src/units/zone_signer.rs' here.

use std::sync::Arc;

use cascade_zonedata::SignedZoneBuilder;
use tracing::{debug, error};

use crate::{
center::{Center, halt_zone},
manager::record_zone_event,
zone::{HistoricalEvent, SigningTrigger, Zone},
};

pub mod zone;

//----------- sign() -----------------------------------------------------------

/// Sign or re-sign a zone.
///
/// A new signed instance of the zone will be generated using `builder`.
/// `builder` provides access to the actual zone content, including previous
/// instances of the zone for incremental signing.
#[tracing::instrument(
level = "debug",
skip_all,
fields(zone = %zone.name),
)]
async fn sign(
center: Arc<Center>,
zone: Arc<Zone>,
builder: SignedZoneBuilder,
trigger: SigningTrigger,
) {
match center
.signer
.join_sign_zone_queue(&center, &zone.name, !builder.have_next_loaded(), trigger)
.await
{
Ok(()) => {}
Err(error) if error.is_benign() => {
// Ignore this benign case. It was probably caused by dnst keyset
// cron triggering resigning before we even signed the first time,
// either because the zone was large and slow to load and sign, or
// because the unsigned zone was pending review.
debug!("Ignoring probably benign failure: {error}");
}
Err(error) => {
error!("Signing failed: {error}");

// TODO: Inline these methods and use a single 'ZoneState' lock.

halt_zone(&center, &zone.name, true, &error.to_string());

record_zone_event(
&center,
&zone.name,
HistoricalEvent::SigningFailed {
trigger,
reason: error.to_string(),
},
None, // TODO
);
}
}
}
258 changes: 258 additions & 0 deletions src/signer/zone.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,258 @@
//! Zone-specific signing state.

use std::{sync::Arc, time::SystemTime};

use cascade_zonedata::SignedZoneBuilder;

use crate::{
center::Center,
util::AbortOnDrop,
zone::{SigningTrigger, Zone, ZoneHandle, ZoneState},
};

//----------- SignerZoneHandle -------------------------------------------------

/// A handle for signer-related operations on a [`Zone`].
pub struct SignerZoneHandle<'a> {
/// The zone being operated on.
pub zone: &'a Arc<Zone>,

/// The locked zone state.
pub state: &'a mut ZoneState,

/// Cascade's global state.
pub center: &'a Arc<Center>,
}

impl SignerZoneHandle<'_> {
/// Access the generic [`ZoneHandle`].
pub const fn zone(&mut self) -> ZoneHandle<'_> {
ZoneHandle {
zone: self.zone,
state: self.state,
center: self.center,
}
}

/// Enqueue a signing operation for a newly loaded instance of the zone.
pub fn enqueue_sign(&mut self, builder: SignedZoneBuilder) {
// A zone can have at most one 'SignedZoneBuilder' at a time. Because
// we have 'builder', we are guaranteed that no other signing operations
// are ongoing right now. A re-signing operation may be enqueued, but it
// has lower priority than this (for now).

assert!(self.state.signer.enqueued_sign.is_none());
assert!(self.state.signer.ongoing.is_none());

// TODO: Keep state for a queue of pending (re-)signing operations, so
// that the number of simultaneous operations can be limited. At the
// moment, this queue is opaque and is handled within the asynchronous
// task.

let handle = tokio::task::spawn(super::sign(
self.center.clone(),
self.zone.clone(),
builder,
SigningTrigger::ZoneChangesApproved,
));

self.state.signer.ongoing = Some(handle.into());
}

/// Enqueue a re-signing operation for the zone.
///
/// ## Panics
///
/// Panics if `keys_changed` and `sigs_need_refresh` are both `false`.
pub fn enqueue_resign(&mut self, keys_changed: bool, sigs_need_refresh: bool) {
assert!(
keys_changed || sigs_need_refresh,
"a reason for re-signing was not specified"
);

// If a re-signing operation has already been enqueued, add to it.
if let Some(resign) = &mut self.state.signer.enqueued_resign {
resign.keys_changed |= keys_changed;
resign.sigs_need_refresh |= sigs_need_refresh;
return;
}

// Try to obtain a 'SignedZoneBuilder' so building can begin.
let builder = self.zone().storage().start_resign();

// TODO: Keep state for a queue of pending (re-)signing operations, so
// that the number of simultaneous operations can be limited. At the
// moment, this queue is opaque and is handled within the asynchronous
// task.

// Try to initiate the re-sign immediately.
if let Some(builder) = builder {
// A zone can have at most one 'SignedZoneBuilder' at a time.
// Because we have 'builder', we are guaranteed that no other
// signing operations are ongoing right now. A re-signing operation
// may be enqueued, but it has lower priority than this (for now).

assert!(self.state.signer.enqueued_sign.is_none());
assert!(self.state.signer.ongoing.is_none());

// TODO: 'SigningTrigger' can't express multiple reasons.
let trigger = if keys_changed {
SigningTrigger::KeySetModifiedAfterCron
} else {
SigningTrigger::SignatureExpiration
};

let handle = tokio::task::spawn(super::sign(
self.center.clone(),
self.zone.clone(),
builder,
trigger,
));

self.state.signer.ongoing = Some(handle.into());
} else {
// TODO: Track expiration time in 'SignerState'.
let expiration_time = self
.state
.next_min_expiration
.or(self.state.min_expiration)
.unwrap_or_else(|| panic!("re-sign enqueued but the zone has not been signed"))
.to_system_time(SystemTime::now());

self.state.signer.enqueued_resign = Some(EnqueuedResign {
builder: None,
keys_changed,
sigs_need_refresh,
expiration_time,
});
}
}

/// Start a pending enqueued re-sign.
///
/// This should be called when the zone data storage is idle. If a re-sign
/// has been enqueued, it will be initiated, and `true` will be returned.
///
/// This method cannot initiate enqueued signing operations; when a signing
/// operation is enqueued, it prevents the data storage from idling.
pub fn start_pending(&mut self) -> bool {
// An enqueued or ongoing signing operation holds a 'SignedZoneBuilder',
// which prevents the zone data storage from being idle. This method is
// only called if the zone data storage is idle.
assert!(self.state.signer.enqueued_sign.is_none());
assert!(
self.state
.signer
.enqueued_resign
.as_ref()
.is_none_or(|o| o.builder.is_none())
);
assert!(self.state.signer.ongoing.is_none());

// Load the one enqueued re-sign operation, if it exists.
let Some(resign) = self.state.signer.enqueued_resign.take() else {
// A re-sign is not enqueued, nothing to do.
return false;
};
let EnqueuedResign {
builder: _,
keys_changed,
sigs_need_refresh: _, // TODO
expiration_time: _, // TODO
} = resign;

let builder = self
.zone()
.storage()
.start_resign()
.expect("'start_pending()' is only called when the zone data storage is idle");

// TODO: Once an explicit queue of signing operations has been
// implemented (for limiting the number of simultaneous operations),
// add the operation to the queue before starting the re-sign. If the
// queue is too full to start the operation yet, leave it enqueued.

// TODO: 'SigningTrigger' can't express multiple reasons.
let trigger = if keys_changed {
SigningTrigger::KeySetModifiedAfterCron
} else {
SigningTrigger::SignatureExpiration
};

let handle = tokio::task::spawn(super::sign(
self.center.clone(),
self.zone.clone(),
builder,
trigger,
));

self.state.signer.ongoing = Some(handle.into());

true
}
}

//----------- SignerState ------------------------------------------------------

/// State for signing a zone.
#[derive(Debug, Default)]
pub struct SignerState {
/// A handle to an ongoing operation, if any.
pub ongoing: Option<AbortOnDrop>,

/// An enqueued signing operation, if any.
pub enqueued_sign: Option<EnqueuedSign>,

/// An enqueued re-signing operation, if any.
pub enqueued_resign: Option<EnqueuedResign>,
}

//----------- EnqueuedSign -----------------------------------------------------

/// An enqueued sign of a zone.
#[derive(Debug)]
pub struct EnqueuedSign {
/// The zone builder.
pub builder: SignedZoneBuilder,
}

//----------- EnqueuedResign ---------------------------------------------------

/// An enqueued re-sign of a zone.
#[derive(Debug)]
pub struct EnqueuedResign {
/// The zone builder, if obtained.
///
/// The builder is necessary to begin re-signing. It is optional because
/// it might not be available when the re-sign operation is enqueued.
/// Even if the builder is obtained, the operation might not be ready
/// to start.
pub builder: Option<SignedZoneBuilder>,

/// Whether zone signing keys have changed.
///
/// This indicates the reason for re-signing; if it is `true`, re-signing
/// has been enqueued because the keys used to sign the zone have changed.
pub keys_changed: bool,

/// Whether signatures need to be refreshed.
///
/// This indicates the reason for re-signing; if it is `true`, re-signing
/// has been enqueued because signatures in the current instance of the zone
/// will expire soon.
pub sigs_need_refresh: bool,

/// When signatures in the zone will expire.
///
/// `self` represents an enqueued re-sign, which means that a current signed
/// instance of the zone exists. This field tracks the expiration time (not
/// the time to enqueue re-signing) for that instance, to ensure it will be
/// re-signed in time.
//
// TODO: Force loading to cancel if this gets too close?
pub expiration_time: SystemTime,
//
// TODO:
// - The ID of the signed instance to re-sign.
// Panic if the actual obtained instance does not match this.
}
6 changes: 3 additions & 3 deletions src/units/zone_signer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ impl ZoneSigner {
/// be possible if the signable zone were definitely a ZoneApex zone
/// rather than a LightWeightZone (and XFR-in zones are LightWeightZone
/// instances).
async fn join_sign_zone_queue(
pub async fn join_sign_zone_queue(
&self,
center: &Arc<Center>,
zone_name: &StoredName,
Expand Down Expand Up @@ -2011,7 +2011,7 @@ pub fn load_binary_file(path: &Path) -> Vec<u8> {
bytes
}

enum SignerError {
pub enum SignerError {
SoaNotFound,
CannotSignUnapprovedZone,
CannotResignNonPublishedZone,
Expand All @@ -2030,7 +2030,7 @@ enum SignerError {
}

impl SignerError {
fn is_benign(&self) -> bool {
pub fn is_benign(&self) -> bool {
matches!(
self,
SignerError::CannotSignUnapprovedZone | SignerError::CannotResignNonPublishedZone
Expand Down
Loading