Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[reconfigurator-cli] Fix blueprint-edit failing if there are multiple expunged zones with the same external IP #7307

Merged
merged 8 commits into from
Jan 9, 2025
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions dev-tools/reconfigurator-cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ dropshot.workspace = true
humantime.workspace = true
indent_write.workspace = true
internal-dns-types.workspace = true
itertools.workspace = true
nexus-inventory.workspace = true
nexus-reconfigurator-planning.workspace = true
nexus-reconfigurator-simulation.workspace = true
Expand Down
52 changes: 44 additions & 8 deletions dev-tools/reconfigurator-cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use clap::ValueEnum;
use clap::{Args, Parser, Subcommand};
use indent_write::fmt::IndentWriter;
use internal_dns_types::diff::DnsDiff;
use itertools::Itertools;
use nexus_inventory::CollectionBuilder;
use nexus_reconfigurator_planning::blueprint_builder::BlueprintBuilder;
use nexus_reconfigurator_planning::example::ExampleSystemBuilder;
Expand Down Expand Up @@ -116,8 +117,8 @@ impl ReconfiguratorSim {
builder.set_internal_dns_version(parent_blueprint.internal_dns_version);
builder.set_external_dns_version(parent_blueprint.external_dns_version);

for (_, zone) in
parent_blueprint.all_omicron_zones(BlueprintZoneFilter::All)
for (_, zone) in parent_blueprint
.all_omicron_zones(BlueprintZoneFilter::ShouldBeRunning)
{
if let Some((external_ip, nic)) =
zone.zone_type.external_networking()
Expand Down Expand Up @@ -396,7 +397,10 @@ struct BlueprintPlanArgs {
/// id of the blueprint on which this one will be based
parent_blueprint_id: Uuid,
/// id of the inventory collection to use in planning
collection_id: CollectionUuid,
///
/// Must be provided unless there is only one collection in the loaded
/// state.
collection_id: Option<CollectionUuid>,
}

#[derive(Debug, Args)]
Expand All @@ -423,7 +427,7 @@ enum BlueprintEditCommands {
/// add a CockroachDB instance to a particular sled
AddCockroach { sled_id: SledUuid },
/// expunge a particular zone from a particular sled
ExpungeZone { sled_id: SledUuid, zone_id: OmicronZoneUuid },
ExpungeZone { zone_id: OmicronZoneUuid },
}

#[derive(Debug, Args)]
Expand Down Expand Up @@ -754,10 +758,25 @@ fn cmd_blueprint_plan(
let parent_blueprint_id = args.parent_blueprint_id;
let collection_id = args.collection_id;
let parent_blueprint = system.get_blueprint(parent_blueprint_id)?;
let collection = system.get_collection(collection_id)?;
let collection = match collection_id {
Some(collection_id) => system.get_collection(collection_id)?,
None => {
let mut all_collections_iter = system.all_collections();
match all_collections_iter.len() {
0 => bail!("cannot plan blueprint with no loaded collections"),
1 => all_collections_iter.next().expect("iter length is 1"),
_ => bail!(
"blueprint-plan: must specify collection ID (one of {:?})",
all_collections_iter.map(|c| c.id).join(", ")
),
}
}
};

let creator = "reconfigurator-sim";
let planning_input = sim.planning_input(parent_blueprint)?;
let planning_input = sim
.planning_input(parent_blueprint)
.context("failed to construct planning input")?;
let planner = Planner::new_based_on(
sim.log.clone(),
parent_blueprint,
Expand Down Expand Up @@ -791,7 +810,9 @@ fn cmd_blueprint_edit(
let blueprint_id = args.blueprint_id;
let blueprint = system.get_blueprint(blueprint_id)?;
let creator = args.creator.as_deref().unwrap_or("reconfigurator-cli");
let planning_input = sim.planning_input(blueprint)?;
let planning_input = sim
.planning_input(blueprint)
.context("failed to create planning input")?;

// TODO: We may want to do something other than just using the latest
// collection -- add a way to specify which collection to use.
Expand Down Expand Up @@ -828,7 +849,20 @@ fn cmd_blueprint_edit(
.context("failed to add CockroachDB zone")?;
format!("added CockroachDB zone to sled {}", sled_id)
}
BlueprintEditCommands::ExpungeZone { sled_id, zone_id } => {
BlueprintEditCommands::ExpungeZone { zone_id } => {
let mut parent_sled_id = None;
for sled_id in builder.sled_ids_with_zones() {
if builder
.current_sled_zones(sled_id, BlueprintZoneFilter::All)
.any(|z| z.id == zone_id)
Comment on lines +863 to +865
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this match all zones or some subset?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think All is right - in this case the user passed in an explicit zone ID, and we're just trying to find it. This would allow someone to try to use reconfigurator-cli to expunge an already-expunged zones, but that seems fine, I think?

{
parent_sled_id = Some(sled_id);
break;
}
}
let Some(sled_id) = parent_sled_id else {
bail!("could not find parent sled for zone {zone_id}");
};
builder
.sled_expunge_zone(sled_id, zone_id)
.context("failed to expunge zone")?;
Expand Down Expand Up @@ -1271,6 +1305,8 @@ fn cmd_load_example(
.num_nexus()
.map_or(NEXUS_REDUNDANCY, |n| n.into()),
)
.external_dns_count(3)
.context("invalid external DNS zone count")?
.create_zones(!args.no_zones)
.create_disks_in_blueprint(!args.no_disks_in_blueprint)
.build();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
load-example

blueprint-show 3f00b694-1b16-4aaa-8f78-e6b3a527b434
blueprint-edit 3f00b694-1b16-4aaa-8f78-e6b3a527b434 expunge-zone 90d08710-3c0b-4fae-b01e-9a826fd3124f

blueprint-show 366b0b68-d80e-4bc1-abd3-dc69837847e0
blueprint-plan 366b0b68-d80e-4bc1-abd3-dc69837847e0

blueprint-show 9c998c1d-1a7b-440a-ae0c-40f781dea6e2
blueprint-edit 9c998c1d-1a7b-440a-ae0c-40f781dea6e2 expunge-zone a9bc9526-feb0-4b9f-86e5-f56789a314f6
63 changes: 33 additions & 30 deletions dev-tools/reconfigurator-cli/tests/output/cmd-example-stdout
Original file line number Diff line number Diff line change
Expand Up @@ -91,15 +91,16 @@ parent: 02697f74-b14a-4418-90f0-c28b2a3a6aa9
clickhouse fe79023f-c5d5-4be5-ad2c-da4e9e9237e4 in service fd00:1122:3344:102::23
crucible 054f64a5-182c-4c28-8994-d2e082550201 in service fd00:1122:3344:102::26
crucible 3b5bffea-e5ed-44df-8468-fd4fa69757d8 in service fd00:1122:3344:102::27
crucible 4f2eb088-7d28-4c4e-a27c-746400ec65ba in service fd00:1122:3344:102::2f
crucible 53dd7fa4-899e-49ed-9fc2-48222db3e20d in service fd00:1122:3344:102::2a
crucible 7db307d4-a6ed-4c47-bddf-6759161bf64a in service fd00:1122:3344:102::2c
crucible 95ad9a1d-4063-4874-974c-2fc92830be27 in service fd00:1122:3344:102::29
crucible bc095417-e2f0-4e95-b390-9cc3fc6e3c6d in service fd00:1122:3344:102::28
crucible d90401f1-fbc2-42cb-bf17-309ee0f922fe in service fd00:1122:3344:102::2b
crucible e8f994c0-0a1b-40e6-8db1-40a8ca89e503 in service fd00:1122:3344:102::2d
crucible e9bf481e-323e-466e-842f-8107078c7137 in service fd00:1122:3344:102::2e
crucible f97aa057-6485-45d0-9cb4-4af5b0831d48 in service fd00:1122:3344:102::25
crucible_pantry eaec16c0-0d44-4847-b2d6-31a5151bae52 in service fd00:1122:3344:102::24
crucible_pantry f97aa057-6485-45d0-9cb4-4af5b0831d48 in service fd00:1122:3344:102::25
external_dns eaec16c0-0d44-4847-b2d6-31a5151bae52 in service fd00:1122:3344:102::24
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like these changes were just due to adding external DNS shifting the generated UUIDs around?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes (and this is strongly related to your other comment, will follow up there)

internal_dns 8b8f7c02-7a18-4268-b045-2e286b464c5d in service fd00:1122:3344:1::1
internal_ntp c67dd9a4-0d6c-4e9f-b28d-20003f211f7d in service fd00:1122:3344:102::21
nexus 94b45ce9-d3d8-413a-a76b-865da1f67930 in service fd00:1122:3344:102::22
Expand Down Expand Up @@ -128,20 +129,21 @@ parent: 02697f74-b14a-4418-90f0-c28b2a3a6aa9
---------------------------------------------------------------------------------------------
zone type zone id disposition underlay IP
---------------------------------------------------------------------------------------------
crucible 09937ebb-bb6a-495b-bc97-b58076b70a78 in service fd00:1122:3344:103::2c
crucible a999e5fa-3edc-4dac-919a-d7b554cdae58 in service fd00:1122:3344:103::26
crucible b416f299-c23c-46c8-9820-be2b66ffea0a in service fd00:1122:3344:103::27
crucible b5d5491d-b3aa-4727-8b55-f66e0581ea4f in service fd00:1122:3344:103::2b
crucible cc1dc86d-bd6f-4929-aa4a-9619012e9393 in service fd00:1122:3344:103::24
crucible cd3bb540-e605-465f-8c62-177ac482d850 in service fd00:1122:3344:103::29
crucible e8971ab3-fb7d-4ad8-aae3-7f2fe87c51f3 in service fd00:1122:3344:103::25
crucible f3628f0a-2301-4fc8-bcbf-961199771731 in service fd00:1122:3344:103::2d
crucible f52aa245-7e1b-46c0-8a31-e09725f02caf in service fd00:1122:3344:103::2a
crucible fae49024-6cec-444d-a6c4-83658ab015a4 in service fd00:1122:3344:103::28
crucible_pantry 728db429-8621-4e1e-9915-282aadfa27d1 in service fd00:1122:3344:103::23
internal_dns e7dd3e98-7fe7-4827-be7f-395ff9a5f542 in service fd00:1122:3344:2::1
internal_ntp 4f2eb088-7d28-4c4e-a27c-746400ec65ba in service fd00:1122:3344:103::21
nexus c8aa84a5-a802-46c9-adcd-d61e9c8393c9 in service fd00:1122:3344:103::22
crucible 09937ebb-bb6a-495b-bc97-b58076b70a78 in service fd00:1122:3344:103::2b
crucible a999e5fa-3edc-4dac-919a-d7b554cdae58 in service fd00:1122:3344:103::25
crucible b416f299-c23c-46c8-9820-be2b66ffea0a in service fd00:1122:3344:103::26
crucible b43ce109-90d6-46f9-9df0-8c68bfe6d4a0 in service fd00:1122:3344:103::2e
crucible b5d5491d-b3aa-4727-8b55-f66e0581ea4f in service fd00:1122:3344:103::2a
crucible cbe91cdc-cbb6-4760-aece-6ce08b67e85a in service fd00:1122:3344:103::2d
crucible cd3bb540-e605-465f-8c62-177ac482d850 in service fd00:1122:3344:103::28
crucible f3628f0a-2301-4fc8-bcbf-961199771731 in service fd00:1122:3344:103::2c
crucible f52aa245-7e1b-46c0-8a31-e09725f02caf in service fd00:1122:3344:103::29
crucible fae49024-6cec-444d-a6c4-83658ab015a4 in service fd00:1122:3344:103::27
crucible_pantry e8971ab3-fb7d-4ad8-aae3-7f2fe87c51f3 in service fd00:1122:3344:103::24
external_dns cc1dc86d-bd6f-4929-aa4a-9619012e9393 in service fd00:1122:3344:103::23
internal_dns 728db429-8621-4e1e-9915-282aadfa27d1 in service fd00:1122:3344:2::1
internal_ntp c8aa84a5-a802-46c9-adcd-d61e9c8393c9 in service fd00:1122:3344:103::21
nexus e7dd3e98-7fe7-4827-be7f-395ff9a5f542 in service fd00:1122:3344:103::22
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not really clear why this block and the next one changed. It looks like the IPs shifted around? Is that just because of the new external DNS zone too?

Copy link
Contributor Author

@jgallagher jgallagher Jan 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's more that the UUIDs continued shifting around. The zone UUIDs are all coming out of a deterministic RNG, but that's for the entire blueprint. So the initial sled got a new zone (external DNS), which means it now grabbed one more zone UUID than it did previously. Going up, the "new" UUID it got went to one of the crucible zones (4f2eb088-7d28-4c4e-a27c-746400ec65ba) - that UUID used to belong to this sled's internal NTP zone (the first one ExampleSystemBuilder allocates for a sled). Following that, the UUIDs for every zone on this sled (and all subsequent sleds) shifted.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, part of the goal of RNG trees was to minimize this shifting. Can it be restructured to achieve that?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looking at the output, I think each sled should definitely get a separate RNG.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great call - after pulling in #7321, all three blocks have just a few lines shifted around: https://github.com/oxidecomputer/omicron/pull/7307/files#diff-18cea7c50c6bb09d57e37c8bf51e112a953dc2827d2fe4f300f9b6aac2f5b0fb




Expand All @@ -167,20 +169,21 @@ parent: 02697f74-b14a-4418-90f0-c28b2a3a6aa9
---------------------------------------------------------------------------------------------
zone type zone id disposition underlay IP
---------------------------------------------------------------------------------------------
crucible 413d3e02-e19f-400a-9718-a662347538f0 in service fd00:1122:3344:101::24
crucible 6cb330f9-4609-4d6c-98ad-b5cc34245813 in service fd00:1122:3344:101::29
crucible 6d725df0-0189-4429-b270-3eeb891d39c8 in service fd00:1122:3344:101::28
crucible b5443ebd-1f5b-448c-8edc-b4ca25c25db1 in service fd00:1122:3344:101::25
crucible bb55534c-1042-4af4-ad2f-9590803695ac in service fd00:1122:3344:101::27
crucible c4296f9f-f902-4fc7-b896-178e56e60732 in service fd00:1122:3344:101::2d
crucible d14c165f-6370-4cce-9dba-3c6deb762cfc in service fd00:1122:3344:101::2c
crucible de65f128-30f7-422b-a234-d1fc8dd6ef78 in service fd00:1122:3344:101::2b
crucible e135441d-637e-4de9-8023-5ea0096347f3 in service fd00:1122:3344:101::26
crucible fee71ee6-da42-4a7f-a00e-f56b6a3327ce in service fd00:1122:3344:101::2a
crucible_pantry 315a3670-d019-425c-b7a6-c9429428b671 in service fd00:1122:3344:101::23
internal_dns 8b47e1e8-0396-4e44-a4a5-ea891405c9f2 in service fd00:1122:3344:3::1
internal_ntp cbe91cdc-cbb6-4760-aece-6ce08b67e85a in service fd00:1122:3344:101::21
nexus b43ce109-90d6-46f9-9df0-8c68bfe6d4a0 in service fd00:1122:3344:101::22
crucible 51afbaff-e6d8-4408-afed-a2e2d74724e7 in service fd00:1122:3344:101::2e
crucible 6cb330f9-4609-4d6c-98ad-b5cc34245813 in service fd00:1122:3344:101::27
crucible 6d725df0-0189-4429-b270-3eeb891d39c8 in service fd00:1122:3344:101::26
crucible bb55534c-1042-4af4-ad2f-9590803695ac in service fd00:1122:3344:101::25
crucible c4296f9f-f902-4fc7-b896-178e56e60732 in service fd00:1122:3344:101::2b
crucible c54e90ae-bcdc-42b4-98ed-a6985037aba7 in service fd00:1122:3344:101::2d
crucible d14c165f-6370-4cce-9dba-3c6deb762cfc in service fd00:1122:3344:101::2a
crucible de65f128-30f7-422b-a234-d1fc8dd6ef78 in service fd00:1122:3344:101::29
crucible faa91ff2-039d-4567-ace9-be27abb34f92 in service fd00:1122:3344:101::2c
crucible fee71ee6-da42-4a7f-a00e-f56b6a3327ce in service fd00:1122:3344:101::28
crucible_pantry e135441d-637e-4de9-8023-5ea0096347f3 in service fd00:1122:3344:101::24
external_dns b5443ebd-1f5b-448c-8edc-b4ca25c25db1 in service fd00:1122:3344:101::23
internal_dns 413d3e02-e19f-400a-9718-a662347538f0 in service fd00:1122:3344:3::1
internal_ntp 8b47e1e8-0396-4e44-a4a5-ea891405c9f2 in service fd00:1122:3344:101::21
nexus 315a3670-d019-425c-b7a6-c9429428b671 in service fd00:1122:3344:101::22


COCKROACHDB SETTINGS:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
Jan 06 21:08:52.995 INFO sufficient BoundaryNtp zones exist in plan, current_count: 0, desired_count: 0
Jan 06 21:08:52.995 INFO sufficient Clickhouse zones exist in plan, current_count: 1, desired_count: 1
Jan 06 21:08:52.995 INFO sufficient ClickhouseKeeper zones exist in plan, current_count: 0, desired_count: 0
Jan 06 21:08:52.995 INFO sufficient ClickhouseServer zones exist in plan, current_count: 0, desired_count: 0
Jan 06 21:08:52.995 INFO sufficient CockroachDb zones exist in plan, current_count: 0, desired_count: 0
Jan 06 21:08:52.995 INFO sufficient CruciblePantry zones exist in plan, current_count: 3, desired_count: 3
Jan 06 21:08:52.995 INFO sufficient InternalDns zones exist in plan, current_count: 3, desired_count: 3
Jan 06 21:08:52.995 INFO added zone to sled, kind: ExternalDns, sled_id: a88790de-5962-4871-8686-61c1fd5b7094
Jan 06 21:08:52.995 INFO sufficient Nexus zones exist in plan, current_count: 3, desired_count: 3
Jan 06 21:08:52.995 INFO sufficient Oximeter zones exist in plan, current_count: 0, desired_count: 0
Jan 06 21:08:52.996 INFO will ensure cockroachdb setting, value: DoNotModify, setting: cluster.preserve_downgrade_option
Loading
Loading