Skip to content

Commit

Permalink
[sled-agent][sim] Use a shared support bundle implementation (#7264)
Browse files Browse the repository at this point in the history
PR 3 / ???

This PR aims to re-use the support bundle management logic in
sled-agent/src/support_bundle/storage.rs for both the real and simulated
sled agent.

It accomplishes this goal with the following:

1. It creates a trait, `LocalStorage`, that abstracts access to storage.
The "real" sled agent accesses real storage, the simulated sled agent
can access the simulated storage APIs.
2. Reduce the usage of unnecessary async mutexes to make lifetimes
slightly more manageable. This happens to align with our guidance in RFD
400 (https://rfd.shared.oxide.computer/rfd/400#no_mutex), but has a
fall-out impact in replacing `.await` calls throughout Omicron.

As an end result of this PR, tests in subsequent PRs (e.g.
#7063) can rely on the
simulated sled agent to respond realistically to support bundle
requests, rather than using a stub implementation.
  • Loading branch information
smklein authored Jan 13, 2025
1 parent 58a8886 commit 310519e
Show file tree
Hide file tree
Showing 20 changed files with 1,035 additions and 660 deletions.
11 changes: 4 additions & 7 deletions nexus/src/app/sagas/disk_create.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1034,15 +1034,12 @@ pub(crate) mod test {
true
}

async fn no_regions_ensured(
sled_agent: &SledAgent,
test: &DiskTest<'_>,
) -> bool {
fn no_regions_ensured(sled_agent: &SledAgent, test: &DiskTest<'_>) -> bool {
for zpool in test.zpools() {
for dataset in &zpool.datasets {
let crucible_dataset =
sled_agent.get_crucible_dataset(zpool.id, dataset.id).await;
if !crucible_dataset.is_empty().await {
sled_agent.get_crucible_dataset(zpool.id, dataset.id);
if !crucible_dataset.is_empty() {
return false;
}
}
Expand Down Expand Up @@ -1073,7 +1070,7 @@ pub(crate) mod test {
.await
);
assert!(no_region_allocations_exist(datastore, &test).await);
assert!(no_regions_ensured(&sled_agent, &test).await);
assert!(no_regions_ensured(&sled_agent, &test));

assert!(test.crucible_resources_deleted().await);
}
Expand Down
2 changes: 1 addition & 1 deletion nexus/src/app/sagas/instance_create.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1346,7 +1346,7 @@ pub mod test {
assert!(disk_is_detached(datastore).await);
assert!(no_instances_or_disks_on_sled(&sled_agent).await);

let v2p_mappings = &*sled_agent.v2p_mappings.lock().await;
let v2p_mappings = &*sled_agent.v2p_mappings.lock().unwrap();
assert!(v2p_mappings.is_empty());
}

Expand Down
20 changes: 11 additions & 9 deletions nexus/src/app/sagas/instance_ip_attach.rs
Original file line number Diff line number Diff line change
Expand Up @@ -435,14 +435,16 @@ pub(crate) mod test {
&instance_id,
)
.await;
let mut eips = sled_agent.external_ips.lock().await;
let my_eips = eips.entry(vmm_id).or_default();
assert!(my_eips
.iter()
.any(|v| matches!(v, InstanceExternalIpBody::Floating(_))));
assert!(my_eips
.iter()
.any(|v| matches!(v, InstanceExternalIpBody::Ephemeral(_))));
{
let mut eips = sled_agent.external_ips.lock().unwrap();
let my_eips = eips.entry(vmm_id).or_default();
assert!(my_eips
.iter()
.any(|v| matches!(v, InstanceExternalIpBody::Floating(_))));
assert!(my_eips
.iter()
.any(|v| matches!(v, InstanceExternalIpBody::Ephemeral(_))));
}

// DB has records for SNAT plus the new IPs.
let db_eips = datastore
Expand Down Expand Up @@ -497,7 +499,7 @@ pub(crate) mod test {
&instance_id,
)
.await;
let mut eips = sled_agent.external_ips.lock().await;
let mut eips = sled_agent.external_ips.lock().unwrap();
let my_eips = eips.entry(vmm_id).or_default();
assert!(my_eips.is_empty());
}
Expand Down
10 changes: 6 additions & 4 deletions nexus/src/app/sagas/instance_ip_detach.rs
Original file line number Diff line number Diff line change
Expand Up @@ -405,9 +405,11 @@ pub(crate) mod test {
&instance_id,
)
.await;
let mut eips = sled_agent.external_ips.lock().await;
let my_eips = eips.entry(vmm_id).or_default();
assert!(my_eips.is_empty());
{
let mut eips = sled_agent.external_ips.lock().unwrap();
let my_eips = eips.entry(vmm_id).or_default();
assert!(my_eips.is_empty());
}

// DB only has record for SNAT.
let db_eips = datastore
Expand Down Expand Up @@ -467,7 +469,7 @@ pub(crate) mod test {
assert!(db_eips.iter().any(|v| v.kind == IpKind::SNat));

// No IP bindings remain on sled-agent.
let eips = &*sled_agent.external_ips.lock().await;
let eips = &*sled_agent.external_ips.lock().unwrap();
for (_nic_id, eip_set) in eips {
assert_eq!(eip_set.len(), 2);
}
Expand Down
10 changes: 4 additions & 6 deletions nexus/src/app/sagas/region_snapshot_replacement_start.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1411,8 +1411,8 @@ pub(crate) mod test {
for zpool in test.zpools() {
for dataset in &zpool.datasets {
let crucible_dataset =
sled_agent.get_crucible_dataset(zpool.id, dataset.id).await;
for region in crucible_dataset.list().await {
sled_agent.get_crucible_dataset(zpool.id, dataset.id);
for region in crucible_dataset.list() {
match region.state {
crucible_agent_client::types::State::Tombstoned
| crucible_agent_client::types::State::Destroyed => {
Expand Down Expand Up @@ -1745,8 +1745,7 @@ pub(crate) mod test {
.as_ref()
.unwrap()
.pantry
.set_auto_activate_volumes()
.await;
.set_auto_activate_volumes();

// Create a disk and a snapshot
let client = &cptestctx.external_client;
Expand Down Expand Up @@ -1904,8 +1903,7 @@ pub(crate) mod test {
.as_ref()
.unwrap()
.pantry
.set_auto_activate_volumes()
.await;
.set_auto_activate_volumes();

// Create a disk and a snapshot
let client = &cptestctx.external_client;
Expand Down
65 changes: 28 additions & 37 deletions nexus/test-utils/src/resource_helpers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1282,26 +1282,24 @@ impl<'a, N: NexusServer> DiskTest<'a, N> {
// Tell the simulated sled agent to create the disk and zpool containing
// these datasets.

sled_agent
.create_external_physical_disk(
physical_disk_id,
disk_identity.clone(),
)
.await;
sled_agent
.create_zpool(zpool.id, physical_disk_id, zpool.size.to_bytes())
.await;
sled_agent.create_external_physical_disk(
physical_disk_id,
disk_identity.clone(),
);
sled_agent.create_zpool(
zpool.id,
physical_disk_id,
zpool.size.to_bytes(),
);

for dataset in &zpool.datasets {
// Sled Agent side: Create the Dataset, make sure regions can be
// created immediately if Nexus requests anything.
let address =
sled_agent.create_crucible_dataset(zpool.id, dataset.id).await;
sled_agent.create_crucible_dataset(zpool.id, dataset.id);
let crucible =
sled_agent.get_crucible_dataset(zpool.id, dataset.id).await;
crucible
.set_create_callback(Box::new(|_| RegionState::Created))
.await;
sled_agent.get_crucible_dataset(zpool.id, dataset.id);
crucible.set_create_callback(Box::new(|_| RegionState::Created));

// Nexus side: Notify Nexus of the physical disk/zpool/dataset
// combination that exists.
Expand Down Expand Up @@ -1381,23 +1379,19 @@ impl<'a, N: NexusServer> DiskTest<'a, N> {
for dataset in &zpool.datasets {
let crucible = self
.get_sled(*sled_id)
.get_crucible_dataset(zpool.id, dataset.id)
.await;
.get_crucible_dataset(zpool.id, dataset.id);
let called = std::sync::atomic::AtomicBool::new(false);
crucible
.set_create_callback(Box::new(move |_| {
if !called.load(std::sync::atomic::Ordering::SeqCst)
{
called.store(
true,
std::sync::atomic::Ordering::SeqCst,
);
RegionState::Requested
} else {
RegionState::Created
}
}))
.await;
crucible.set_create_callback(Box::new(move |_| {
if !called.load(std::sync::atomic::Ordering::SeqCst) {
called.store(
true,
std::sync::atomic::Ordering::SeqCst,
);
RegionState::Requested
} else {
RegionState::Created
}
}));
}
}
}
Expand All @@ -1409,11 +1403,9 @@ impl<'a, N: NexusServer> DiskTest<'a, N> {
for dataset in &zpool.datasets {
let crucible = self
.get_sled(*sled_id)
.get_crucible_dataset(zpool.id, dataset.id)
.await;
.get_crucible_dataset(zpool.id, dataset.id);
crucible
.set_create_callback(Box::new(|_| RegionState::Failed))
.await;
.set_create_callback(Box::new(|_| RegionState::Failed));
}
}
}
Expand All @@ -1430,9 +1422,8 @@ impl<'a, N: NexusServer> DiskTest<'a, N> {
for dataset in &zpool.datasets {
let crucible = self
.get_sled(*sled_id)
.get_crucible_dataset(zpool.id, dataset.id)
.await;
if !crucible.is_empty().await {
.get_crucible_dataset(zpool.id, dataset.id);
if !crucible.is_empty() {
return false;
}
}
Expand Down
10 changes: 3 additions & 7 deletions nexus/tests/integration_tests/crucible_replacements.rs
Original file line number Diff line number Diff line change
Expand Up @@ -485,7 +485,6 @@ mod region_replacement {
.activate_background_attachment(
region_replacement.volume_id.to_string(),
)
.await
.unwrap();
}

Expand Down Expand Up @@ -1816,8 +1815,7 @@ async fn test_replacement_sanity(cptestctx: &ControlPlaneTestContext) {
.as_ref()
.unwrap()
.pantry
.set_auto_activate_volumes()
.await;
.set_auto_activate_volumes();

// Now, run all replacement tasks to completion
let internal_client = &cptestctx.internal_client;
Expand Down Expand Up @@ -1854,8 +1852,7 @@ async fn test_region_replacement_triple_sanity(
.as_ref()
.unwrap()
.pantry
.set_auto_activate_volumes()
.await;
.set_auto_activate_volumes();

// Create a disk and a snapshot and a disk from that snapshot
let client = &cptestctx.external_client;
Expand Down Expand Up @@ -1966,8 +1963,7 @@ async fn test_region_replacement_triple_sanity_2(
.as_ref()
.unwrap()
.pantry
.set_auto_activate_volumes()
.await;
.set_auto_activate_volumes();

// Create a disk and a snapshot and a disk from that snapshot
let client = &cptestctx.external_client;
Expand Down
10 changes: 3 additions & 7 deletions nexus/tests/integration_tests/disks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1340,9 +1340,7 @@ async fn test_disk_virtual_provisioning_collection_failed_delete(
.sled_agent
.sled_agent
.get_crucible_dataset(zpool.id, dataset.id)
.await
.set_region_deletion_error(true)
.await;
.set_region_deletion_error(true);

// Delete the disk - expect this to fail
NexusRequest::new(
Expand Down Expand Up @@ -1378,9 +1376,7 @@ async fn test_disk_virtual_provisioning_collection_failed_delete(
.sled_agent
.sled_agent
.get_crucible_dataset(zpool.id, dataset.id)
.await
.set_region_deletion_error(false)
.await;
.set_region_deletion_error(false);

// Request disk delete again
NexusRequest::new(
Expand Down Expand Up @@ -2478,7 +2474,7 @@ async fn test_no_halt_disk_delete_one_region_on_expunged_agent(
let zpool = disk_test.zpools().next().expect("Expected at least one zpool");
let dataset = &zpool.datasets[0];

cptestctx.sled_agent.sled_agent.drop_dataset(zpool.id, dataset.id).await;
cptestctx.sled_agent.sled_agent.drop_dataset(zpool.id, dataset.id);

// Spawn a task that tries to delete the disk
let disk_url = get_disk_url(DISK_NAME);
Expand Down
8 changes: 4 additions & 4 deletions nexus/tests/integration_tests/instances.rs
Original file line number Diff line number Diff line change
Expand Up @@ -646,7 +646,7 @@ async fn test_instance_start_creates_networking_state(

sled_agents.push(&cptestctx.sled_agent.sled_agent);
for agent in &sled_agents {
agent.v2p_mappings.lock().await.clear();
agent.v2p_mappings.lock().unwrap().clear();
}

// Start the instance and make sure that it gets to Running.
Expand Down Expand Up @@ -6251,7 +6251,7 @@ async fn test_instance_v2p_mappings(cptestctx: &ControlPlaneTestContext) {
// Validate that every sled no longer has the V2P mapping for this instance
for sled_agent in &sled_agents {
let condition = || async {
let v2p_mappings = sled_agent.v2p_mappings.lock().await;
let v2p_mappings = sled_agent.v2p_mappings.lock().unwrap();
if v2p_mappings.is_empty() {
Ok(())
} else {
Expand Down Expand Up @@ -6508,7 +6508,7 @@ async fn assert_sled_v2p_mappings(
vni: Vni,
) {
let condition = || async {
let v2p_mappings = sled_agent.v2p_mappings.lock().await;
let v2p_mappings = sled_agent.v2p_mappings.lock().unwrap();
let mapping = v2p_mappings.iter().find(|mapping| {
mapping.virtual_ip == nic.ip
&& mapping.virtual_mac == nic.mac
Expand Down Expand Up @@ -6580,7 +6580,7 @@ pub async fn assert_sled_vpc_routes(
kind: RouterKind::Custom(db_subnet.ipv4_block.0.into()),
};

let vpc_routes = sled_agent.vpc_routes.lock().await;
let vpc_routes = sled_agent.vpc_routes.lock().unwrap();
let sys_routes_found = vpc_routes
.iter()
.any(|(id, set)| *id == sys_key && set.routes == system_routes);
Expand Down
4 changes: 1 addition & 3 deletions nexus/tests/integration_tests/snapshots.rs
Original file line number Diff line number Diff line change
Expand Up @@ -980,9 +980,7 @@ async fn test_snapshot_unwind(cptestctx: &ControlPlaneTestContext) {
.sled_agent
.sled_agent
.get_crucible_dataset(zpool.id, dataset.id)
.await
.set_creating_a_running_snapshot_should_fail()
.await;
.set_creating_a_running_snapshot_should_fail();

// Issue snapshot request, expecting it to fail
let snapshots_url = format!("/v1/snapshots?project={}", PROJECT_NAME);
Expand Down
12 changes: 2 additions & 10 deletions nexus/tests/integration_tests/volume_management.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2529,9 +2529,7 @@ async fn test_disk_create_saga_unwinds_correctly(
.sled_agent
.sled_agent
.get_crucible_dataset(zpool.id, dataset.id)
.await
.set_region_creation_error(true)
.await;
.set_region_creation_error(true);

let disk_size = ByteCount::from_gibibytes_u32(2);
let base_disk = params::DiskCreate {
Expand Down Expand Up @@ -2599,9 +2597,7 @@ async fn test_snapshot_create_saga_unwinds_correctly(
.sled_agent
.sled_agent
.get_crucible_dataset(zpool.id, dataset.id)
.await
.set_region_creation_error(true)
.await;
.set_region_creation_error(true);

// Create a snapshot
let snapshot_create = params::SnapshotCreate {
Expand Down Expand Up @@ -4227,11 +4223,9 @@ async fn test_read_only_region_reference_counting(
TypedUuid::from_untyped_uuid(db_read_only_dataset.pool_id),
db_read_only_dataset.id(),
)
.await
.get(crucible_agent_client::types::RegionId(
read_only_region.id().to_string()
))
.await
.unwrap()
.state,
crucible_agent_client::types::State::Created
Expand Down Expand Up @@ -4299,11 +4293,9 @@ async fn test_read_only_region_reference_counting(
TypedUuid::from_untyped_uuid(db_read_only_dataset.pool_id),
db_read_only_dataset.id(),
)
.await
.get(crucible_agent_client::types::RegionId(
read_only_region.id().to_string()
))
.await
.unwrap()
.state,
crucible_agent_client::types::State::Destroyed
Expand Down
Loading

0 comments on commit 310519e

Please sign in to comment.