Skip to content

Commit

Permalink
[RSS] Initial blueprint: only set address for Crucible datasets (#7310
Browse files Browse the repository at this point in the history
)

This should prevent #7299 from occurring on new invocations of RSS; I'll
add a followup PR (with more testing) that adds the `CHECK` constraint
mentioned in that issue and a migration that does cleanup of any bad
dataset address rows.

While I was in here, fixed #7115. It was smaller than I thought;
should've done that a while ago.
  • Loading branch information
jgallagher authored Jan 7, 2025
1 parent 390ac60 commit 0603f0b
Show file tree
Hide file tree
Showing 14 changed files with 312 additions and 44 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 1 addition & 3 deletions common/src/disk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -112,9 +112,7 @@ impl DatasetName {
&self.pool_name
}

// TODO(https://github.com/oxidecomputer/omicron/issues/7115): Rename
// this to "kind?
pub fn dataset(&self) -> &DatasetKind {
pub fn kind(&self) -> &DatasetKind {
&self.kind
}

Expand Down
30 changes: 30 additions & 0 deletions nexus/reconfigurator/blippy/src/blippy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ use omicron_uuid_kinds::SledUuid;
use omicron_uuid_kinds::ZpoolUuid;
use std::collections::BTreeSet;
use std::net::IpAddr;
use std::net::SocketAddrV6;

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Note {
Expand Down Expand Up @@ -169,6 +170,17 @@ pub enum SledKind {
OrphanedDataset { dataset: BlueprintDatasetConfig },
/// A dataset claims to be on a zpool that does not exist.
DatasetOnNonexistentZpool { dataset: BlueprintDatasetConfig },
/// A Crucible dataset does not have its `address` set to its corresponding
/// Crucible zone.
CrucibleDatasetWithIncorrectAddress {
dataset: BlueprintDatasetConfig,
expected_address: SocketAddrV6,
},
/// A non-Crucible dataset has an address.
NonCrucibleDatasetWithAddress {
dataset: BlueprintDatasetConfig,
address: SocketAddrV6,
},
}

impl fmt::Display for SledKind {
Expand Down Expand Up @@ -352,6 +364,24 @@ impl fmt::Display for SledKind {
dataset.kind, dataset.id, dataset.pool
)
}
SledKind::CrucibleDatasetWithIncorrectAddress {
dataset,
expected_address,
} => {
write!(
f,
"Crucible dataset {} has bad address {:?} (expected {})",
dataset.id, dataset.address, expected_address,
)
}
SledKind::NonCrucibleDatasetWithAddress { dataset, address } => {
write!(
f,
"non-Crucible dataset ({:?} {}) has an address: {} \
(only Crucible datasets should have addresses)",
dataset.kind, dataset.id, address,
)
}
}
}
}
Expand Down
175 changes: 172 additions & 3 deletions nexus/reconfigurator/blippy/src/checks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,12 @@ use crate::blippy::Blippy;
use crate::blippy::Severity;
use crate::blippy::SledKind;
use nexus_sled_agent_shared::inventory::ZoneKind;
use nexus_types::deployment::blueprint_zone_type;
use nexus_types::deployment::BlueprintDatasetConfig;
use nexus_types::deployment::BlueprintDatasetFilter;
use nexus_types::deployment::BlueprintZoneConfig;
use nexus_types::deployment::BlueprintZoneFilter;
use nexus_types::deployment::BlueprintZoneType;
use nexus_types::deployment::OmicronZoneExternalIp;
use omicron_common::address::DnsSubnet;
use omicron_common::address::Ipv6Subnet;
Expand Down Expand Up @@ -413,6 +415,10 @@ fn check_datasets(blippy: &mut Blippy<'_>) {
}
}

// In a check below, we want to look up Crucible zones by zpool; build that
// map as we perform the next set of checks.
let mut crucible_zone_by_zpool = BTreeMap::new();

// There should be a dataset for every dataset referenced by a running zone
// (filesystem or durable).
for (sled_id, zone_config) in blippy
Expand All @@ -431,7 +437,7 @@ fn check_datasets(blippy: &mut Blippy<'_>) {
Some(dataset) => {
match sled_datasets
.get(&dataset.pool().id())
.and_then(|by_zpool| by_zpool.get(dataset.dataset()))
.and_then(|by_zpool| by_zpool.get(dataset.kind()))
{
Some(dataset) => {
expected_datasets.insert(dataset.id);
Expand Down Expand Up @@ -471,6 +477,21 @@ fn check_datasets(blippy: &mut Blippy<'_>) {
);
}
}

if dataset.kind == DatasetKind::Crucible {
match &zone_config.zone_type {
BlueprintZoneType::Crucible(crucible_zone_config) => {
crucible_zone_by_zpool.insert(
dataset.dataset.pool_name.id(),
crucible_zone_config,
);
}
_ => unreachable!(
"zone_type.durable_dataset() returned Crucible for \
non-Crucible zone type"
),
}
}
}
}

Expand Down Expand Up @@ -533,6 +554,50 @@ fn check_datasets(blippy: &mut Blippy<'_>) {
continue;
}
}

// All Crucible datasets should have their address set to the address of the
// Crucible zone on their same zpool, and all non-Crucible datasets should
// not have addresses.
for (sled_id, dataset) in blippy
.blueprint()
.all_omicron_datasets(BlueprintDatasetFilter::InService)
{
match dataset.kind {
DatasetKind::Crucible => {
let Some(blueprint_zone_type::Crucible { address, .. }) =
crucible_zone_by_zpool.get(&dataset.pool.id())
else {
// We already checked above that all datasets have
// corresponding zones, so a failure to find the zone for
// this dataset would have already produced a note; just
// skip it.
continue;
};
if dataset.address != Some(*address) {
blippy.push_sled_note(
sled_id,
Severity::Fatal,
SledKind::CrucibleDatasetWithIncorrectAddress {
dataset: dataset.clone(),
expected_address: *address,
},
);
}
}
_ => {
if let Some(address) = dataset.address {
blippy.push_sled_note(
sled_id,
Severity::Fatal,
SledKind::NonCrucibleDatasetWithAddress {
dataset: dataset.clone(),
address,
},
);
}
}
}
}
}

#[cfg(test)]
Expand Down Expand Up @@ -1294,7 +1359,7 @@ mod tests {
== durable_zone.zone_type.durable_dataset().unwrap().kind);
let root_dataset = root_zone.filesystem_dataset().unwrap();
let matches_root = (dataset.pool == *root_dataset.pool())
&& (dataset.kind == *root_dataset.dataset());
&& (dataset.kind == *root_dataset.kind());
!matches_durable && !matches_root
});

Expand Down Expand Up @@ -1465,7 +1530,7 @@ mod tests {
if (dataset.pool == durable_dataset.dataset.pool_name
&& dataset.kind == durable_dataset.kind)
|| (dataset.pool == *root_dataset.pool()
&& dataset.kind == *root_dataset.dataset())
&& dataset.kind == *root_dataset.kind())
{
Some(Note {
severity: Severity::Fatal,
Expand Down Expand Up @@ -1561,4 +1626,108 @@ mod tests {

logctx.cleanup_successful();
}

#[test]
fn test_dataset_with_bad_address() {
static TEST_NAME: &str = "test_dataset_with_bad_address";
let logctx = test_setup_log(TEST_NAME);
let (_, _, mut blueprint) = example(&logctx.log, TEST_NAME);

let crucible_addr_by_zpool = blueprint
.all_omicron_zones(BlueprintZoneFilter::ShouldBeRunning)
.filter_map(|(_, z)| match z.zone_type {
BlueprintZoneType::Crucible(
blueprint_zone_type::Crucible { address, .. },
) => {
let zpool_id = z
.zone_type
.durable_zpool()
.expect("crucible zone has durable zpool")
.id();
Some((zpool_id, address))
}
_ => None,
})
.collect::<BTreeMap<_, _>>();

// We have three ways a dataset address can be wrong:
//
// * A Crucible dataset has no address
// * A Crucible dataset has an address but it doesn't match its zone
// * A non-Crucible dataset has an address
//
// Make all three kinds of modifications to three different datasets and
// ensure we see all three noted.
let mut cleared_crucible_addr = false;
let mut changed_crucible_addr = false;
let mut set_non_crucible_addr = false;
let mut expected_notes = Vec::new();

for (sled_id, datasets_config) in
blueprint.blueprint_datasets.iter_mut()
{
for dataset in datasets_config.datasets.values_mut() {
match dataset.kind {
DatasetKind::Crucible => {
let bad_address = if !cleared_crucible_addr {
cleared_crucible_addr = true;
None
} else if !changed_crucible_addr {
changed_crucible_addr = true;
Some("[1234:5678:9abc::]:0".parse().unwrap())
} else {
continue;
};

dataset.address = bad_address;
let expected_address = *crucible_addr_by_zpool
.get(&dataset.pool.id())
.expect("found crucible zone for zpool");
expected_notes.push(Note {
severity: Severity::Fatal,
kind: Kind::Sled {
sled_id: *sled_id,
kind: SledKind::CrucibleDatasetWithIncorrectAddress {
dataset: dataset.clone(),
expected_address,
},
},
});
}
_ => {
if !set_non_crucible_addr {
set_non_crucible_addr = true;
let address = "[::1]:0".parse().unwrap();
dataset.address = Some(address);
expected_notes.push(Note {
severity: Severity::Fatal,
kind: Kind::Sled {
sled_id: *sled_id,
kind: SledKind::NonCrucibleDatasetWithAddress {
dataset: dataset.clone(),
address,
},
},
});
}
}
}
}
}

// We should have modified 3 datasets.
assert_eq!(expected_notes.len(), 3);

let report =
Blippy::new(&blueprint).into_report(BlippyReportSortKey::Kind);
eprintln!("{}", report.display());
for note in expected_notes {
assert!(
report.notes().contains(&note),
"did not find expected note {note:?}"
);
}

logctx.cleanup_successful();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2790,7 +2790,7 @@ pub mod test {
{
for dataset in dataset_configs {
let id = dataset.id;
let kind = dataset.name.dataset();
let kind = dataset.name.kind();
let by_kind: &mut BTreeMap<_, _> =
input_dataset_ids.entry(*zpool_id).or_default();
let prev = by_kind.insert(kind.clone(), id);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -460,7 +460,7 @@ impl ActiveSledEditor {
}

if let Some(dataset) = config.filesystem_dataset() {
self.datasets.expunge(&dataset.pool().id(), dataset.dataset())?;
self.datasets.expunge(&dataset.pool().id(), dataset.kind())?;
}
if let Some(dataset) = config.zone_type.durable_dataset() {
self.datasets
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ impl DatasetIdsBackfillFromDb {
let iter = resources.all_datasets(ZpoolFilter::InService).flat_map(
|(&zpool_id, configs)| {
configs.iter().map(move |config| {
(zpool_id, config.name.dataset().clone(), config.id)
(zpool_id, config.name.kind().clone(), config.id)
})
},
);
Expand Down Expand Up @@ -162,7 +162,7 @@ impl PartialDatasetConfig {

pub fn for_transient_zone(name: DatasetName) -> Self {
assert!(
matches!(name.dataset(), DatasetKind::TransientZone { .. }),
matches!(name.kind(), DatasetKind::TransientZone { .. }),
"for_transient_zone called with incorrect dataset kind: {name:?}"
);
Self {
Expand Down
1 change: 1 addition & 0 deletions sled-agent/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ guppy.workspace = true
hex-literal.workspace = true
http.workspace = true
hyper.workspace = true
nexus-reconfigurator-blippy.workspace = true
omicron-test-utils.workspace = true
pretty_assertions.workspace = true
rcgen.workspace = true
Expand Down
2 changes: 1 addition & 1 deletion sled-agent/src/artifact_store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -453,7 +453,7 @@ pub(crate) fn filter_dataset_mountpoints(
config
.datasets
.into_values()
.filter(|dataset| *dataset.name.dataset() == DatasetKind::Update)
.filter(|dataset| *dataset.name.kind() == DatasetKind::Update)
.map(|dataset| dataset.name.mountpoint(root))
}

Expand Down
2 changes: 1 addition & 1 deletion sled-agent/src/rack_setup/plan/sled.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ pub struct Plan {
}

impl Plan {
pub async fn create(
pub fn create(
log: &Logger,
config: &Config,
bootstrap_addrs: BTreeSet<Ipv6Addr>,
Expand Down
Loading

0 comments on commit 0603f0b

Please sign in to comment.