From cd3aabd75bdb937cecba1f99b306913adfd4bd2a Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 7 Nov 2024 12:54:04 -0800 Subject: [PATCH 01/25] Add support bundle API skeleton to Nexus --- nexus/external-api/output/nexus_tags.txt | 5 + nexus/external-api/src/lib.rs | 59 +++++ nexus/src/external_api/http_entrypoints.rs | 99 +++++++ nexus/types/src/external_api/params.rs | 1 + nexus/types/src/external_api/shared.rs | 37 +++ openapi/nexus.json | 291 ++++++++++++++++++++- 6 files changed, 480 insertions(+), 12 deletions(-) diff --git a/nexus/external-api/output/nexus_tags.txt b/nexus/external-api/output/nexus_tags.txt index 2d64679ca0..6ac531cf61 100644 --- a/nexus/external-api/output/nexus_tags.txt +++ b/nexus/external-api/output/nexus_tags.txt @@ -30,6 +30,11 @@ probe_create POST /experimental/v1/probes probe_delete DELETE /experimental/v1/probes/{probe} probe_list GET /experimental/v1/probes probe_view GET /experimental/v1/probes/{probe} +support_bundle_create POST /experimental/v1/system/support-bundles +support_bundle_delete DELETE /experimental/v1/system/support-bundles/{support_bundle} +support_bundle_download GET /experimental/v1/system/support-bundles/{support_bundle}/download +support_bundle_list GET /experimental/v1/system/support-bundles +support_bundle_view GET /experimental/v1/system/support-bundles/{support_bundle} API operations found with tag "images" OPERATION ID METHOD URL PATH diff --git a/nexus/external-api/src/lib.rs b/nexus/external-api/src/lib.rs index b05366e0c5..d7f523a900 100644 --- a/nexus/external-api/src/lib.rs +++ b/nexus/external-api/src/lib.rs @@ -2760,6 +2760,65 @@ pub trait NexusExternalApi { path_params: Path, ) -> Result; + // Support bundles (experimental) + + /// List all support bundles + #[endpoint { + method = GET, + path = "/experimental/v1/system/support-bundles", + tags = ["hidden"], // system/support-bundles: only one tag is allowed + }] + async fn support_bundle_list( + rqctx: RequestContext, + query_params: Query, + ) -> Result>, HttpError>; + + /// View a single support bundle + #[endpoint { + method = GET, + path = "/experimental/v1/system/support-bundles/{support_bundle}", + tags = ["hidden"], // system/support-bundles: only one tag is allowed + }] + async fn support_bundle_view( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError>; + + /// Download the contents of a single support bundle + #[endpoint { + method = GET, + path = "/experimental/v1/system/support-bundles/{support_bundle}/download", + tags = ["hidden"], // system/support-bundles: only one tag is allowed + }] + async fn support_bundle_download( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError>; + + /// Create a new support bundle + #[endpoint { + method = POST, + path = "/experimental/v1/system/support-bundles", + tags = ["hidden"], // system/support-bundles: only one tag is allowed + }] + async fn support_bundle_create( + rqctx: RequestContext, + ) -> Result, HttpError>; + + /// Delete an existing support bundle + /// + /// May also be used to cancel a support bundle which is currently being + /// collected, or to remove metadata for a support bundle that has failed. + #[endpoint { + method = DELETE, + path = "/experimental/v1/system/support-bundles/{support_bundle}", + tags = ["hidden"], // system/support-bundles: only one tag is allowed + }] + async fn support_bundle_delete( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError>; + // Probes (experimental) /// List instrumentation probes diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs index df8f1aa24e..655c56320e 100644 --- a/nexus/src/external_api/http_entrypoints.rs +++ b/nexus/src/external_api/http_entrypoints.rs @@ -5999,6 +5999,105 @@ impl NexusExternalApi for NexusExternalApiImpl { .await } + async fn support_bundle_list( + rqctx: RequestContext, + _query_params: Query, + ) -> Result>, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + + Err(nexus.unimplemented_todo(&opctx, crate::app::Unimpl::Public).await.into()) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } + + async fn support_bundle_view( + rqctx: RequestContext, + _path_params: Path, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + + Err(nexus.unimplemented_todo(&opctx, crate::app::Unimpl::Public).await.into()) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } + + async fn support_bundle_download( + rqctx: RequestContext, + _path_params: Path, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + + Err(nexus.unimplemented_todo(&opctx, crate::app::Unimpl::Public).await.into()) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } + + async fn support_bundle_create( + rqctx: RequestContext, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + + Err(nexus.unimplemented_todo(&opctx, crate::app::Unimpl::Public).await.into()) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } + + async fn support_bundle_delete( + rqctx: RequestContext, + _path_params: Path, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + + Err(nexus.unimplemented_todo(&opctx, crate::app::Unimpl::Public).await.into()) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } + async fn probe_list( rqctx: RequestContext, query_params: Query>, diff --git a/nexus/types/src/external_api/params.rs b/nexus/types/src/external_api/params.rs index 9af9d3be1e..a0bd36ed3c 100644 --- a/nexus/types/src/external_api/params.rs +++ b/nexus/types/src/external_api/params.rs @@ -89,6 +89,7 @@ path_param!(SshKeyPath, ssh_key, "SSH key"); path_param!(AddressLotPath, address_lot, "address lot"); path_param!(ProbePath, probe, "probe"); path_param!(CertificatePath, certificate, "certificate"); +path_param!(SupportBundlePath, support_bundle, "support bundle"); id_path_param!(GroupPath, group_id, "group"); diff --git a/nexus/types/src/external_api/shared.rs b/nexus/types/src/external_api/shared.rs index 9bfa9c8358..35367400e0 100644 --- a/nexus/types/src/external_api/shared.rs +++ b/nexus/types/src/external_api/shared.rs @@ -6,6 +6,8 @@ use std::net::IpAddr; +use chrono::DateTime; +use chrono::Utc; use omicron_common::api::external::Name; use omicron_common::api::internal::shared::NetworkInterface; use parse_display::FromStr; @@ -414,6 +416,41 @@ mod test { } } +#[derive(Debug, Clone, Copy, JsonSchema, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum SupportBundleState { + /// Support Bundle still actively being collected. + /// + /// This is the initial state for a Support Bundle, and it will + /// automatically transition to either "Failed" or "Active". + /// + /// If a user no longer wants to access a Support Bundle, they can + /// request cancellation, which will transition to the "Cancelling" state. + Collecting, + + /// Support Bundle has been cancelled, and will be deleted shortly. + Cancelling, + + /// Support Bundle was not created successfully, or was created and has lost + /// backing storage. + /// + /// The record of the bundle still exists for readability, but the only + /// valid operation on these bundles is to delete them. + Failed, + + /// Support Bundle has been processed, and is ready for usage. + Active, +} + +#[derive(Debug, Clone, JsonSchema, Serialize, Deserialize)] +pub struct SupportBundleInfo { + // XXX: Strongly-typed Support Bundle UUID? + pub id: Uuid, + pub time_created: DateTime, + pub reason_for_creation: String, + pub state: SupportBundleState, +} + #[derive(Debug, Clone, JsonSchema, Serialize, Deserialize)] pub struct ProbeInfo { pub id: Uuid, diff --git a/openapi/nexus.json b/openapi/nexus.json index f12ff4730c..6cf143ac49 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -301,6 +301,195 @@ } } }, + "/experimental/v1/system/support-bundles": { + "get": { + "tags": [ + "hidden" + ], + "summary": "List all support bundles", + "operationId": "support_bundle_list", + "parameters": [ + { + "in": "query", + "name": "limit", + "description": "Maximum number of items returned by a single call", + "schema": { + "nullable": true, + "type": "integer", + "format": "uint32", + "minimum": 1 + } + }, + { + "in": "query", + "name": "page_token", + "description": "Token returned by previous call to retrieve the subsequent page", + "schema": { + "nullable": true, + "type": "string" + } + }, + { + "in": "query", + "name": "sort_by", + "schema": { + "$ref": "#/components/schemas/IdSortMode" + } + } + ], + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SupportBundleInfoResultsPage" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + }, + "x-dropshot-pagination": { + "required": [] + } + }, + "post": { + "tags": [ + "hidden" + ], + "summary": "Create a new support bundle", + "operationId": "support_bundle_create", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SupportBundleInfo" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/experimental/v1/system/support-bundles/{support_bundle}": { + "get": { + "tags": [ + "hidden" + ], + "summary": "View a single support bundle", + "operationId": "support_bundle_view", + "parameters": [ + { + "in": "path", + "name": "support_bundle", + "description": "Name or ID of the support bundle", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + } + ], + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SupportBundleInfo" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + }, + "delete": { + "tags": [ + "hidden" + ], + "summary": "Delete an existing support bundle", + "description": "May also be used to cancel a support bundle which is currently being collected, or to remove metadata for a support bundle that has failed.", + "operationId": "support_bundle_delete", + "parameters": [ + { + "in": "path", + "name": "support_bundle", + "description": "Name or ID of the support bundle", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + } + ], + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SupportBundleInfo" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/experimental/v1/system/support-bundles/{support_bundle}/download": { + "get": { + "tags": [ + "hidden" + ], + "summary": "Download the contents of a single support bundle", + "operationId": "support_bundle_download", + "parameters": [ + { + "in": "path", + "name": "support_bundle", + "description": "Name or ID of the support bundle", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + } + ], + "responses": { + "default": { + "description": "", + "content": { + "*/*": { + "schema": {} + } + } + } + } + } + }, "/login/{silo_name}/saml/{provider_name}": { "post": { "tags": [ @@ -19990,6 +20179,84 @@ "items" ] }, + "SupportBundleInfo": { + "type": "object", + "properties": { + "id": { + "type": "string", + "format": "uuid" + }, + "reason_for_creation": { + "type": "string" + }, + "state": { + "$ref": "#/components/schemas/SupportBundleState" + }, + "time_created": { + "type": "string", + "format": "date-time" + } + }, + "required": [ + "id", + "reason_for_creation", + "state", + "time_created" + ] + }, + "SupportBundleInfoResultsPage": { + "description": "A single page of results", + "type": "object", + "properties": { + "items": { + "description": "list of items on this page of results", + "type": "array", + "items": { + "$ref": "#/components/schemas/SupportBundleInfo" + } + }, + "next_page": { + "nullable": true, + "description": "token used to fetch the next page of results (if any)", + "type": "string" + } + }, + "required": [ + "items" + ] + }, + "SupportBundleState": { + "oneOf": [ + { + "description": "Support Bundle still actively being collected.\n\nThis is the initial state for a Support Bundle, and it will automatically transition to either \"Failed\" or \"Active\".\n\nIf a user no longer wants to access a Support Bundle, they can request cancellation, which will transition to the \"Cancelling\" state.", + "type": "string", + "enum": [ + "collecting" + ] + }, + { + "description": "Support Bundle has been cancelled, and will be deleted shortly.", + "type": "string", + "enum": [ + "cancelling" + ] + }, + { + "description": "Support Bundle was not created successfully, or was created and has lost backing storage.\n\nThe record of the bundle still exists for readability, but the only valid operation on these bundles is to delete them.", + "type": "string", + "enum": [ + "failed" + ] + }, + { + "description": "Support Bundle has been processed, and is ready for usage.", + "type": "string", + "enum": [ + "active" + ] + } + ] + }, "Switch": { "description": "An operator's view of a Switch.", "type": "object", @@ -22440,6 +22707,18 @@ } ] }, + "IdSortMode": { + "description": "Supported set of sort modes for scanning by id only.\n\nCurrently, we only support scanning in ascending order.", + "oneOf": [ + { + "description": "sort in increasing order of \"id\"", + "type": "string", + "enum": [ + "id_ascending" + ] + } + ] + }, "DiskMetricName": { "type": "string", "enum": [ @@ -22459,18 +22738,6 @@ "descending" ] }, - "IdSortMode": { - "description": "Supported set of sort modes for scanning by id only.\n\nCurrently, we only support scanning in ascending order.", - "oneOf": [ - { - "description": "sort in increasing order of \"id\"", - "type": "string", - "enum": [ - "id_ascending" - ] - } - ] - }, "SystemMetricName": { "type": "string", "enum": [ From 3774ecbe97839af6fba78c7a55d8ec5c21eb4d62 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 7 Nov 2024 12:57:50 -0800 Subject: [PATCH 02/25] fmt --- nexus/src/external_api/http_entrypoints.rs | 28 +++++++++++++++++----- 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs index 655c56320e..3f22b95811 100644 --- a/nexus/src/external_api/http_entrypoints.rs +++ b/nexus/src/external_api/http_entrypoints.rs @@ -6002,7 +6002,8 @@ impl NexusExternalApi for NexusExternalApiImpl { async fn support_bundle_list( rqctx: RequestContext, _query_params: Query, - ) -> Result>, HttpError> { + ) -> Result>, HttpError> + { let apictx = rqctx.context(); let handler = async { let nexus = &apictx.context.nexus; @@ -6010,7 +6011,10 @@ impl NexusExternalApi for NexusExternalApiImpl { let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - Err(nexus.unimplemented_todo(&opctx, crate::app::Unimpl::Public).await.into()) + Err(nexus + .unimplemented_todo(&opctx, crate::app::Unimpl::Public) + .await + .into()) }; apictx .context @@ -6030,7 +6034,10 @@ impl NexusExternalApi for NexusExternalApiImpl { let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - Err(nexus.unimplemented_todo(&opctx, crate::app::Unimpl::Public).await.into()) + Err(nexus + .unimplemented_todo(&opctx, crate::app::Unimpl::Public) + .await + .into()) }; apictx .context @@ -6050,7 +6057,10 @@ impl NexusExternalApi for NexusExternalApiImpl { let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - Err(nexus.unimplemented_todo(&opctx, crate::app::Unimpl::Public).await.into()) + Err(nexus + .unimplemented_todo(&opctx, crate::app::Unimpl::Public) + .await + .into()) }; apictx .context @@ -6069,7 +6079,10 @@ impl NexusExternalApi for NexusExternalApiImpl { let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - Err(nexus.unimplemented_todo(&opctx, crate::app::Unimpl::Public).await.into()) + Err(nexus + .unimplemented_todo(&opctx, crate::app::Unimpl::Public) + .await + .into()) }; apictx .context @@ -6089,7 +6102,10 @@ impl NexusExternalApi for NexusExternalApiImpl { let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - Err(nexus.unimplemented_todo(&opctx, crate::app::Unimpl::Public).await.into()) + Err(nexus + .unimplemented_todo(&opctx, crate::app::Unimpl::Public) + .await + .into()) }; apictx .context From 0b3fd29b97db396a3e937c39ad2d83c3ade22797 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 7 Nov 2024 14:54:57 -0800 Subject: [PATCH 03/25] EXPECTORATE --- nexus/reconfigurator/planning/src/system.rs | 6 +++--- nexus/tests/output/uncovered-authz-endpoints.txt | 5 +++++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/nexus/reconfigurator/planning/src/system.rs b/nexus/reconfigurator/planning/src/system.rs index 66a5393275..28b8bcf2ae 100644 --- a/nexus/reconfigurator/planning/src/system.rs +++ b/nexus/reconfigurator/planning/src/system.rs @@ -845,9 +845,9 @@ impl Sled { name: config.name.full_name(), available: ByteCount::from_gibibytes_u32(1), used: ByteCount::from_gibibytes_u32(0), - quota: config.quota, - reservation: config.reservation, - compression: config.compression.to_string(), + quota: config.inner.quota, + reservation: config.inner.reservation, + compression: config.inner.compression.to_string(), }); } diff --git a/nexus/tests/output/uncovered-authz-endpoints.txt b/nexus/tests/output/uncovered-authz-endpoints.txt index c5091c5a3b..ec4790c071 100644 --- a/nexus/tests/output/uncovered-authz-endpoints.txt +++ b/nexus/tests/output/uncovered-authz-endpoints.txt @@ -1,13 +1,18 @@ API endpoints with no coverage in authz tests: probe_delete (delete "/experimental/v1/probes/{probe}") +support_bundle_delete (delete "/experimental/v1/system/support-bundles/{support_bundle}") probe_list (get "/experimental/v1/probes") probe_view (get "/experimental/v1/probes/{probe}") +support_bundle_list (get "/experimental/v1/system/support-bundles") +support_bundle_view (get "/experimental/v1/system/support-bundles/{support_bundle}") +support_bundle_download (get "/experimental/v1/system/support-bundles/{support_bundle}/download") ping (get "/v1/ping") networking_switch_port_status (get "/v1/system/hardware/switch-port/{port}/status") device_auth_request (post "/device/auth") device_auth_confirm (post "/device/confirm") device_access_token (post "/device/token") probe_create (post "/experimental/v1/probes") +support_bundle_create (post "/experimental/v1/system/support-bundles") login_saml (post "/login/{silo_name}/saml/{provider_name}") login_local (post "/v1/login/{silo_name}/local") logout (post "/v1/logout") From ec897f77162dfe4884bf5acc480c019d7715e54e Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Fri, 8 Nov 2024 15:30:20 -0800 Subject: [PATCH 04/25] Add database schema for support bundles --- nexus/db-model/src/lib.rs | 2 + nexus/db-model/src/schema.rs | 14 +++++ nexus/db-model/src/support_bundle.rs | 77 ++++++++++++++++++++++++++++ schema/crdb/dbinit.sql | 28 ++++++++++ 4 files changed, 121 insertions(+) create mode 100644 nexus/db-model/src/support_bundle.rs diff --git a/nexus/db-model/src/lib.rs b/nexus/db-model/src/lib.rs index 9137b0b3c3..546f53b1ac 100644 --- a/nexus/db-model/src/lib.rs +++ b/nexus/db-model/src/lib.rs @@ -96,6 +96,7 @@ mod sled_state; mod sled_underlay_subnet_allocation; mod snapshot; mod ssh_key; +mod support_bundle; mod switch; mod tuf_repo; mod typed_uuid; @@ -203,6 +204,7 @@ pub use sled_state::*; pub use sled_underlay_subnet_allocation::*; pub use snapshot::*; pub use ssh_key::*; +pub use support_bundle::*; pub use switch::*; pub use switch_interface::*; pub use switch_port::*; diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 8a97e43639..56f1e7e8ac 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -1362,6 +1362,20 @@ joinable!(tuf_repo_artifact -> tuf_repo (tuf_repo_id)); // Can't specify joinable for a composite primary key (tuf_repo_artifact -> // tuf_artifact). +table! { + support_bundle { + id -> Uuid, + time_created -> Timestamptz, + reason_for_creation -> Text, + reason_for_failure -> Nullable, + state -> crate::SupportBundleStateEnum, + zpool_id -> Uuid, + dataset_id -> Uuid, + + assigned_nexus -> Nullable, + } +} + /* hardware inventory */ table! { diff --git a/nexus/db-model/src/support_bundle.rs b/nexus/db-model/src/support_bundle.rs new file mode 100644 index 0000000000..56ebbf2e2f --- /dev/null +++ b/nexus/db-model/src/support_bundle.rs @@ -0,0 +1,77 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use super::impl_enum_type; +use crate::schema::support_bundle; +use crate::typed_uuid::DbTypedUuid; + +use chrono::{DateTime, Utc}; +use nexus_types::external_api::shared::SupportBundleInfo as SupportBundleView; +use nexus_types::external_api::shared::SupportBundleState as SupportBundleStateView; +use omicron_uuid_kinds::DatasetKind; +use omicron_uuid_kinds::ZpoolKind; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +impl_enum_type!( + #[derive(SqlType, Debug, QueryId)] + #[diesel(postgres_type(name = "support_bundle_state", schema = "public"))] + pub struct SupportBundleStateEnum; + + #[derive(Copy, Clone, Debug, AsExpression, FromSqlRow, Serialize, Deserialize, PartialEq)] + #[diesel(sql_type = SupportBundleStateEnum)] + pub enum SupportBundleState; + + // Enum values + Collecting => b"collecting" + Cancelling => b"cancelling" + Failed => b"failed" + Active => b"active" +); + +impl From for SupportBundleStateView { + fn from(state: SupportBundleState) -> Self { + use SupportBundleState::*; + + match state { + Collecting => SupportBundleStateView::Collecting, + Cancelling => SupportBundleStateView::Cancelling, + Failed => SupportBundleStateView::Failed, + Active => SupportBundleStateView::Active, + } + } +} + +#[derive( + Queryable, + Insertable, + Debug, + Clone, + Selectable, + Deserialize, + Serialize, + PartialEq, +)] +#[diesel(table_name = support_bundle)] +pub struct SupportBundle { + id: Uuid, + time_created: DateTime, + reason_for_creation: String, + reason_for_failure: Option, + state: SupportBundleState, + zpool_id: DbTypedUuid, + dataset_id: DbTypedUuid, + assigned_nexus: Option, +} + +impl From for SupportBundleView { + fn from(bundle: SupportBundle) -> Self { + Self { + id: bundle.id, + time_created: bundle.time_created, + reason_for_creation: bundle.reason_for_creation, + state: bundle.state.into(), + } + } +} diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index b095c4c89a..1288355463 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -2384,6 +2384,34 @@ CREATE TABLE IF NOT EXISTS omicron.public.tuf_repo_artifact ( /*******************************************************************/ +/* + * Support Bundles + */ + + +CREATE TYPE IF NOT EXISTS omicron.public.support_bundle_state AS ENUM ( + 'collecting', + 'cancelling', + 'failed', + 'active' +); + +CREATE TABLE IF NOT EXISTS omicron.public.support_bundle ( + id UUID NOT NULL, + time_created TIMESTAMPTZ NOT NULL, + reason_for_creation TEXT NOT NULL, + reason_for_failure TEXT, + state omicron.public.support_bundle_state NOT NULL, + zpool_id NOT NULL UUID, + dataset_id NOT NULL UUID, + + -- The Nexus which is in charge of collecting the support bundle, + -- and later managing its storage. + assigned_nexus UUID, +); + +/*******************************************************************/ + /* * DNS Propagation * From 38b6ea86de4452c0a831641a2c6bf8b7b5b59cff Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Fri, 8 Nov 2024 15:47:55 -0800 Subject: [PATCH 05/25] Add failure reason --- nexus/types/src/external_api/shared.rs | 1 + openapi/nexus.json | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/nexus/types/src/external_api/shared.rs b/nexus/types/src/external_api/shared.rs index 35367400e0..2928fab46b 100644 --- a/nexus/types/src/external_api/shared.rs +++ b/nexus/types/src/external_api/shared.rs @@ -448,6 +448,7 @@ pub struct SupportBundleInfo { pub id: Uuid, pub time_created: DateTime, pub reason_for_creation: String, + pub reason_for_failure: Option, pub state: SupportBundleState, } diff --git a/openapi/nexus.json b/openapi/nexus.json index 6cf143ac49..1aaba49e9e 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -20189,6 +20189,10 @@ "reason_for_creation": { "type": "string" }, + "reason_for_failure": { + "nullable": true, + "type": "string" + }, "state": { "$ref": "#/components/schemas/SupportBundleState" }, From 908f661a755cb7a1bf7ea4c159c0c1b830c1d809 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Fri, 8 Nov 2024 15:48:44 -0800 Subject: [PATCH 06/25] Merge reason for failure --- nexus/db-model/src/support_bundle.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/nexus/db-model/src/support_bundle.rs b/nexus/db-model/src/support_bundle.rs index 56ebbf2e2f..aedcdbf760 100644 --- a/nexus/db-model/src/support_bundle.rs +++ b/nexus/db-model/src/support_bundle.rs @@ -71,6 +71,7 @@ impl From for SupportBundleView { id: bundle.id, time_created: bundle.time_created, reason_for_creation: bundle.reason_for_creation, + reason_for_failure: bundle.reason_for_failure, state: bundle.state.into(), } } From aa3978f4cedac5ce787ce851f0c0c456e7f74021 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 12 Nov 2024 12:03:56 -0800 Subject: [PATCH 07/25] Fix SQL, moving forward on tests --- nexus/db-model/src/schema.rs | 1 + nexus/db-model/src/support_bundle.rs | 63 +- nexus/db-queries/src/db/datastore/dataset.rs | 25 +- nexus/db-queries/src/db/datastore/mod.rs | 30 + .../src/db/datastore/support_bundle.rs | 855 ++++++++++++++++++ nexus/types/src/external_api/shared.rs | 4 +- schema/crdb/dbinit.sql | 39 +- uuid-kinds/src/lib.rs | 1 + 8 files changed, 978 insertions(+), 40 deletions(-) create mode 100644 nexus/db-queries/src/db/datastore/support_bundle.rs diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 56f1e7e8ac..34a29e7297 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -2042,6 +2042,7 @@ allow_tables_to_appear_in_same_query!( console_session, sled, sled_resource, + support_bundle, router_route, vmm, volume, diff --git a/nexus/db-model/src/support_bundle.rs b/nexus/db-model/src/support_bundle.rs index aedcdbf760..f2450473f5 100644 --- a/nexus/db-model/src/support_bundle.rs +++ b/nexus/db-model/src/support_bundle.rs @@ -10,9 +10,14 @@ use chrono::{DateTime, Utc}; use nexus_types::external_api::shared::SupportBundleInfo as SupportBundleView; use nexus_types::external_api::shared::SupportBundleState as SupportBundleStateView; use omicron_uuid_kinds::DatasetKind; +use omicron_uuid_kinds::DatasetUuid; +use omicron_uuid_kinds::OmicronZoneKind; +use omicron_uuid_kinds::OmicronZoneUuid; +use omicron_uuid_kinds::SupportBundleKind; +use omicron_uuid_kinds::SupportBundleUuid; use omicron_uuid_kinds::ZpoolKind; +use omicron_uuid_kinds::ZpoolUuid; use serde::{Deserialize, Serialize}; -use uuid::Uuid; impl_enum_type!( #[derive(SqlType, Debug, QueryId)] @@ -25,17 +30,35 @@ impl_enum_type!( // Enum values Collecting => b"collecting" + Collected => b"collected" Cancelling => b"cancelling" Failed => b"failed" Active => b"active" ); +impl SupportBundleState { + pub fn might_have_dataset_storage(&self) -> bool { + use SupportBundleState::*; + + match self { + Collecting => false, + Collected => true, + Cancelling => true, + Failed => false, + Active => true, + } + } +} + impl From for SupportBundleStateView { fn from(state: SupportBundleState) -> Self { use SupportBundleState::*; match state { Collecting => SupportBundleStateView::Collecting, + // The distinction between "collected" and "collecting" is an + // internal detail that doesn't need to be exposed through the API. + Collected => SupportBundleStateView::Collecting, Cancelling => SupportBundleStateView::Cancelling, Failed => SupportBundleStateView::Failed, Active => SupportBundleStateView::Active, @@ -55,20 +78,40 @@ impl From for SupportBundleStateView { )] #[diesel(table_name = support_bundle)] pub struct SupportBundle { - id: Uuid, - time_created: DateTime, - reason_for_creation: String, - reason_for_failure: Option, - state: SupportBundleState, - zpool_id: DbTypedUuid, - dataset_id: DbTypedUuid, - assigned_nexus: Option, + pub id: DbTypedUuid, + pub time_created: DateTime, + pub reason_for_creation: String, + pub reason_for_failure: Option, + pub state: SupportBundleState, + pub zpool_id: DbTypedUuid, + pub dataset_id: DbTypedUuid, + pub assigned_nexus: Option>, +} + +impl SupportBundle { + pub fn new( + reason_for_creation: &'static str, + zpool_id: ZpoolUuid, + dataset_id: DatasetUuid, + nexus_id: OmicronZoneUuid, + ) -> Self { + Self { + id: SupportBundleUuid::new_v4().into(), + time_created: Utc::now(), + reason_for_creation: reason_for_creation.to_string(), + reason_for_failure: None, + state: SupportBundleState::Collecting, + zpool_id: zpool_id.into(), + dataset_id: dataset_id.into(), + assigned_nexus: Some(nexus_id.into()), + } + } } impl From for SupportBundleView { fn from(bundle: SupportBundle) -> Self { Self { - id: bundle.id, + id: bundle.id.into(), time_created: bundle.time_created, reason_for_creation: bundle.reason_for_creation, reason_for_failure: bundle.reason_for_failure, diff --git a/nexus/db-queries/src/db/datastore/dataset.rs b/nexus/db-queries/src/db/datastore/dataset.rs index 996f105254..e5a72c5fb3 100644 --- a/nexus/db-queries/src/db/datastore/dataset.rs +++ b/nexus/db-queries/src/db/datastore/dataset.rs @@ -347,14 +347,13 @@ impl DataStore { #[cfg(test)] mod test { use super::*; + use crate::db::datastore::test::bp_insert_and_make_target; use crate::db::pub_test_utils::TestDatabase; use nexus_db_model::Generation; use nexus_db_model::SledBaseboard; use nexus_db_model::SledSystemHardware; use nexus_db_model::SledUpdate; use nexus_reconfigurator_planning::blueprint_builder::BlueprintBuilder; - use nexus_types::deployment::Blueprint; - use nexus_types::deployment::BlueprintTarget; use omicron_common::api::internal::shared::DatasetKind as ApiDatasetKind; use omicron_test_utils::dev; use omicron_uuid_kinds::SledUuid; @@ -516,28 +515,6 @@ mod test { logctx.cleanup_successful(); } - async fn bp_insert_and_make_target( - opctx: &OpContext, - datastore: &DataStore, - bp: &Blueprint, - ) { - datastore - .blueprint_insert(opctx, bp) - .await - .expect("inserted blueprint"); - datastore - .blueprint_target_set_current( - opctx, - BlueprintTarget { - target_id: bp.id, - enabled: true, - time_made_target: Utc::now(), - }, - ) - .await - .expect("made blueprint the target"); - } - fn new_dataset_on(zpool_id: ZpoolUuid) -> Dataset { Dataset::new( Uuid::new_v4(), diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index 5344ffc1d9..c0b31ed2ef 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -93,6 +93,7 @@ mod sled; mod sled_instance; mod snapshot; mod ssh_key; +mod support_bundle; mod switch; mod switch_interface; mod switch_port; @@ -465,6 +466,8 @@ mod test { use nexus_db_fixed_data::silo::DEFAULT_SILO; use nexus_db_model::IpAttachState; use nexus_db_model::{to_db_typed_uuid, Generation}; + use nexus_types::deployment::Blueprint; + use nexus_types::deployment::BlueprintTarget; use nexus_types::external_api::params; use nexus_types::silo::DEFAULT_SILO_ID; use omicron_common::api::external::{ @@ -503,6 +506,33 @@ mod test { } } + /// Inserts a blueprint in the DB and forcibly makes it the target + /// + /// WARNING: This makes no attempts to validate the blueprint relative to + /// parents -- this is just a test-only helper to make testing + /// blueprint-specific checks easier. + pub async fn bp_insert_and_make_target( + opctx: &OpContext, + datastore: &DataStore, + bp: &Blueprint, + ) { + datastore + .blueprint_insert(opctx, bp) + .await + .expect("inserted blueprint"); + datastore + .blueprint_target_set_current( + opctx, + BlueprintTarget { + target_id: bp.id, + enabled: true, + time_made_target: Utc::now(), + }, + ) + .await + .expect("made blueprint the target"); + } + #[tokio::test] async fn test_project_creation() { let logctx = dev::test_setup_log("test_project_creation"); diff --git a/nexus/db-queries/src/db/datastore/support_bundle.rs b/nexus/db-queries/src/db/datastore/support_bundle.rs new file mode 100644 index 0000000000..d289127f4b --- /dev/null +++ b/nexus/db-queries/src/db/datastore/support_bundle.rs @@ -0,0 +1,855 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! [`DataStore`] methods on [`SupportBundle`]s. + +use super::DataStore; +use crate::authz; +use crate::context::OpContext; +use crate::db; +use crate::db::error::public_error_from_diesel; +use crate::db::error::ErrorHandler; +use crate::db::model::Dataset; +use crate::db::model::DatasetKind; +use crate::db::model::SupportBundle; +use crate::db::model::SupportBundleState; +use crate::db::pagination::paginated; +use crate::transaction_retry::OptionalError; +use async_bb8_diesel::AsyncRunQueryDsl; +use diesel::prelude::*; +use futures::FutureExt; +use nexus_types::identity::Asset; +use omicron_common::api::external; +use omicron_common::api::external::CreateResult; +use omicron_common::api::external::DataPageParams; +use omicron_common::api::external::Error; +use omicron_common::api::external::ListResultVec; +use omicron_common::api::external::LookupResult; +use omicron_uuid_kinds::BlueprintUuid; +use omicron_uuid_kinds::DatasetUuid; +use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::OmicronZoneUuid; +use omicron_uuid_kinds::SupportBundleUuid; +use omicron_uuid_kinds::ZpoolUuid; +use uuid::Uuid; + +const CANNOT_ALLOCATE_ERR_MSG: &'static str = +"Current policy limits support bundle creation to 'one per external disk', and \ + no disks are available. Either delete old support bundles or add additional \ + disks"; + +/// Provides a report on how many bundle were expunged, and why. +#[derive(Debug, Clone)] +pub struct SupportBundleExpungementReport { + pub bundles_failed_missing_datasets: usize, + pub bundles_cancelled_missing_nexus: usize, + pub bundles_failed_missing_nexus: usize, +} + +impl DataStore { + /// Creates a new support bundle. + /// + /// Requires that the UUID of the calling Nexus be supplied as input - + /// this particular Zone is responsible for the collection process. + /// + /// Note that really any functioning Nexus would work as the "assignee", + /// but it's clear that our instance will work, because we're currently + /// running. + pub async fn support_bundle_create( + &self, + opctx: &OpContext, + reason_for_creation: &'static str, + this_nexus_id: OmicronZoneUuid, + ) -> CreateResult { + opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; + let conn = self.pool_connection_authorized(opctx).await?; + + #[derive(Debug)] + enum SupportBundleError { + TooManyBundles, + } + + let err = OptionalError::new(); + self.transaction_retry_wrapper("support_bundle_create") + .transaction(&conn, |conn| { + let err = err.clone(); + + async move { + use db::schema::dataset::dsl as dataset_dsl; + use db::schema::support_bundle::dsl as support_bundle_dsl; + + // Observe all "non-deleted, debug datasets". + // + // Return the first one we find that doesn't already + // have a support bundle allocated to it. + let free_dataset = dataset_dsl::dataset + .filter(dataset_dsl::time_deleted.is_null()) + .filter(dataset_dsl::kind.eq(DatasetKind::Debug)) + .left_join(support_bundle_dsl::support_bundle.on( + dataset_dsl::id.eq(support_bundle_dsl::dataset_id), + )) + .filter(support_bundle_dsl::dataset_id.is_null()) + .select(Dataset::as_select()) + .first_async(&conn) + .await + .optional()?; + + let Some(dataset) = free_dataset else { + return Err( + err.bail(SupportBundleError::TooManyBundles) + ); + }; + + // We could check that "this_nexus_id" is not expunged, but + // we have some evidence that it is valid: this Nexus is + // currently running! + // + // Besides, we COULD be expunged immediately after inserting + // the SupportBundle. In this case, we'd fall back to the + // case of "clean up a bundle which is managed by an + // expunged Nexus" anyway. + + let bundle = SupportBundle::new( + reason_for_creation, + ZpoolUuid::from_untyped_uuid(dataset.pool_id), + DatasetUuid::from_untyped_uuid(dataset.id()), + this_nexus_id, + ); + + diesel::insert_into(support_bundle_dsl::support_bundle) + .values(bundle.clone()) + .execute_async(&conn) + .await?; + + Ok(bundle) + } + }) + .await + .map_err(|e| { + if let Some(err) = err.take() { + match err { + SupportBundleError::TooManyBundles => { + return external::Error::insufficient_capacity( + CANNOT_ALLOCATE_ERR_MSG, + "Support Bundle storage exhausted", + ); + } + } + } + public_error_from_diesel(e, ErrorHandler::Server) + }) + } + + /// Looks up a single support bundle + pub async fn support_bundle_get( + &self, + opctx: &OpContext, + id: SupportBundleUuid, + ) -> LookupResult { + opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; + use db::schema::support_bundle::dsl; + + let conn = self.pool_connection_authorized(opctx).await?; + dsl::support_bundle + .filter(dsl::id.eq(id.into_untyped_uuid())) + .select(SupportBundle::as_select()) + .first_async::(&*conn) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + + /// Lists one page of support bundles + pub async fn support_bundle_list( + &self, + opctx: &OpContext, + pagparams: &DataPageParams<'_, Uuid>, + ) -> ListResultVec { + opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; + use db::schema::support_bundle::dsl; + + let conn = self.pool_connection_authorized(opctx).await?; + paginated(dsl::support_bundle, dsl::id, pagparams) + .select(SupportBundle::as_select()) + .load_async(&*conn) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + + /// Marks support bundles as failed if their assigned Nexus or backing + /// dataset has been destroyed. + pub async fn support_bundle_fail_expunged( + &self, + opctx: &OpContext, + blueprint: &nexus_types::deployment::Blueprint, + ) -> Result { + opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; + + // For this blueprint: The set of all expunged Nexus zones + let invalid_nexus_zones = blueprint + .all_omicron_zones( + nexus_types::deployment::BlueprintZoneFilter::Expunged, + ) + .filter_map(|(_sled, zone)| { + if matches!( + zone.zone_type, + nexus_types::deployment::BlueprintZoneType::Nexus(_) + ) { + Some(zone.id.into_untyped_uuid()) + } else { + None + } + }) + .collect::>(); + let valid_nexus_zones = blueprint + .all_omicron_zones( + nexus_types::deployment::BlueprintZoneFilter::ShouldBeRunning, + ) + .filter_map(|(_sled, zone)| { + if matches!( + zone.zone_type, + nexus_types::deployment::BlueprintZoneType::Nexus(_) + ) { + Some(zone.id.into_untyped_uuid()) + } else { + None + } + }) + .collect::>(); + + // For this blueprint: The set of expunged debug datasets + let invalid_datasets = blueprint + .all_omicron_datasets( + nexus_types::deployment::BlueprintDatasetFilter::Expunged, + ) + .filter_map(|dataset_config| { + if matches!( + dataset_config.kind, + omicron_common::api::internal::shared::DatasetKind::Debug + ) { + Some(dataset_config.id.into_untyped_uuid()) + } else { + None + } + }) + .collect::>(); + + let conn = self.pool_connection_authorized(opctx).await?; + + self.transaction_if_current_blueprint_is( + &conn, + "support_bundle_fail_expunged", + opctx, + BlueprintUuid::from_untyped_uuid(blueprint.id), + |conn| { + let invalid_nexus_zones = invalid_nexus_zones.clone(); + let valid_nexus_zones = valid_nexus_zones.clone(); + let invalid_datasets = invalid_datasets.clone(); + async move { + use db::schema::support_bundle::dsl; + + // Find all bundles on datasets that no longer exist, and + // mark them "failed". + let bundles_with_bad_datasets = dsl::support_bundle + .filter(dsl::dataset_id.eq_any(invalid_datasets)) + .select(SupportBundle::as_select()) + .load_async(&*conn) + .await?; + + let bundles_failed_missing_datasets = + diesel::update(dsl::support_bundle) + .filter(dsl::state.ne(SupportBundleState::Failed)) + .filter( + dsl::id.eq_any( + bundles_with_bad_datasets + .iter() + .map(|b| b.id) + .collect::>(), + ), + ) + .set(( + dsl::state.eq(SupportBundleState::Failed), + dsl::reason_for_failure + .eq("Allocated dataset no longer exists"), + )) + .execute_async(&*conn) + .await?; + + let Some(arbitrary_valid_nexus) = + valid_nexus_zones.get(0).cloned() + else { + return Err(external::Error::internal_error( + "No valid Nexuses", + ) + .into()); + }; + + // Find all bundles on nexuses that no longer exist. + // + // If the bundle might have storage: + // - Mark it cancelled and re-assign the managing Nexus + // - Otherwise: mark it failed + let bundles_with_bad_nexuses = dsl::support_bundle + .filter(dsl::assigned_nexus.eq_any(invalid_nexus_zones)) + .select(SupportBundle::as_select()) + .load_async(&*conn) + .await?; + let (needs_cancellation, needs_failure): (Vec<_>, Vec<_>) = + bundles_with_bad_nexuses.into_iter().partition( + |bundle| bundle.state.might_have_dataset_storage(), + ); + + // Mark these support bundles as cancelled, and assign then + // to a nexus that should still exist. + // + // This should lead to their storage being freed, if it + // exists. + let bundles_cancelled_missing_nexus = diesel::update( + dsl::support_bundle, + ) + .filter(dsl::state.ne(SupportBundleState::Cancelling)) + .filter(dsl::state.ne(SupportBundleState::Failed)) + .filter( + dsl::id.eq_any( + needs_cancellation + .iter() + .map(|b| b.id) + .collect::>(), + ), + ) + .set(( + dsl::assigned_nexus.eq(arbitrary_valid_nexus), + dsl::state.eq(SupportBundleState::Cancelling), + dsl::reason_for_failure + .eq("Nexus managing this bundle no longer exists"), + )) + .execute_async(&*conn) + .await?; + + // Mark these support bundles as failed. + // + // If they don't have storage (e.g., they never got that + // far, or their underlying dataset was expunged) there + // isn't anything left to free. This means we can skip the + // "Cancelling" state and jump to "Failed". + let bundles_failed_missing_nexus = diesel::update( + dsl::support_bundle, + ) + .filter(dsl::state.ne(SupportBundleState::Cancelling)) + .filter(dsl::state.ne(SupportBundleState::Failed)) + .filter(dsl::id.eq_any( + needs_failure.iter().map(|b| b.id).collect::>(), + )) + .set(( + dsl::state.eq(SupportBundleState::Failed), + dsl::reason_for_failure + .eq("Nexus managing this bundle no longer exists"), + )) + .execute_async(&*conn) + .await?; + + Ok(SupportBundleExpungementReport { + bundles_failed_missing_datasets, + bundles_cancelled_missing_nexus, + bundles_failed_missing_nexus, + }) + } + .boxed() + }, + ) + .await + } + + /// Cancels a single support bundle. + /// + /// Note that this does not delete the support bundle record, but starts the + /// process by which it may later be deleted. + pub async fn support_bundle_cancel( + &self, + opctx: &OpContext, + id: SupportBundleUuid, + ) -> Result<(), Error> { + opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; + + use db::schema::support_bundle::dsl as support_bundle_dsl; + + let conn = self.pool_connection_authorized(opctx).await?; + diesel::update(support_bundle_dsl::support_bundle) + .filter(support_bundle_dsl::id.eq(id.into_untyped_uuid())) + .set(support_bundle_dsl::state.eq(SupportBundleState::Cancelling)) + .execute_async(&*conn) + .await + .map(|_rows_modified| ()) + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + Ok(()) + } + + /// Deletes a support bundle. + /// + /// This should only be invoked after all storage for the support bundle has + /// been cleared. + pub async fn support_bundle_delete( + &self, + opctx: &OpContext, + id: SupportBundleUuid, + ) -> Result<(), Error> { + opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; + + use db::schema::support_bundle::dsl as support_bundle_dsl; + + let conn = self.pool_connection_authorized(opctx).await?; + diesel::delete(support_bundle_dsl::support_bundle) + .filter(support_bundle_dsl::id.eq(id.into_untyped_uuid())) + .execute_async(&*conn) + .await + .map(|_rows_modified| ()) + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + Ok(()) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::db::datastore::test::bp_insert_and_make_target; + use crate::db::pub_test_utils::TestDatabase; + use nexus_db_model::Generation; + use nexus_db_model::SledBaseboard; + use nexus_db_model::SledSystemHardware; + use nexus_db_model::SledUpdate; + use nexus_db_model::Zpool; + use nexus_reconfigurator_planning::example::ExampleSystemBuilder; + use nexus_reconfigurator_planning::example::SimRngState; + use nexus_types::deployment::Blueprint; + use nexus_types::deployment::BlueprintDatasetDisposition; + use nexus_types::deployment::BlueprintDatasetFilter; + use nexus_types::deployment::BlueprintZoneFilter; + use nexus_types::deployment::BlueprintZoneType; + use omicron_common::api::internal::shared::DatasetKind::Debug as DebugDatasetKind; + use omicron_test_utils::dev; + use omicron_uuid_kinds::SledUuid; + use rand::Rng; + + // Pool/Dataset pairs, for debug datasets only. + struct TestPool { + pool: ZpoolUuid, + dataset: DatasetUuid, + } + + // Sleds and their pools, with a focus on debug datasets only. + struct TestSled { + sled: SledUuid, + pools: Vec, + } + + impl TestSled { + fn new_with_pool_count(pool_count: usize) -> Self { + Self { + sled: SledUuid::new_v4(), + pools: (0..pool_count) + .map(|_| TestPool { + pool: ZpoolUuid::new_v4(), + dataset: DatasetUuid::new_v4(), + }) + .collect(), + } + } + + fn new_from_blueprint(blueprint: &Blueprint) -> Vec { + let mut sleds = vec![]; + for (sled, datasets) in &blueprint.blueprint_datasets { + let pools = datasets + .datasets + .values() + .filter_map(|dataset| { + if !matches!(dataset.kind, DebugDatasetKind) + || !dataset + .disposition + .matches(BlueprintDatasetFilter::InService) + { + return None; + }; + + Some(TestPool { + pool: dataset.pool.id(), + dataset: dataset.id, + }) + }) + .collect(); + + sleds.push(TestSled { sled: *sled, pools }); + } + sleds + } + + async fn create_database_records( + &self, + datastore: &DataStore, + opctx: &OpContext, + ) { + let rack_id = Uuid::new_v4(); + let sled = SledUpdate::new( + *self.sled.as_untyped_uuid(), + "[::1]:0".parse().unwrap(), + SledBaseboard { + serial_number: format!( + "test-{}", + rand::thread_rng().gen::() + ), + part_number: "test-pn".to_string(), + revision: 0, + }, + SledSystemHardware { + is_scrimlet: false, + usable_hardware_threads: 128, + usable_physical_ram: (64 << 30).try_into().unwrap(), + reservoir_size: (16 << 30).try_into().unwrap(), + }, + rack_id, + Generation::new(), + ); + datastore.sled_upsert(sled).await.expect("failed to upsert sled"); + + // Create fake zpools that back our fake datasets. + for pool in &self.pools { + let zpool = Zpool::new( + *pool.pool.as_untyped_uuid(), + *self.sled.as_untyped_uuid(), + Uuid::new_v4(), + ); + datastore + .zpool_insert(opctx, zpool) + .await + .expect("failed to upsert zpool"); + + let dataset = Dataset::new( + pool.dataset.into_untyped_uuid(), + pool.pool.into_untyped_uuid(), + None, + DebugDatasetKind, + ); + datastore + .dataset_upsert(dataset) + .await + .expect("failed to upsert dataset"); + } + } + } + + // Creates a fake sled with `pool_count` zpools, and a debug dataset on each + // zpool. + async fn create_sled_and_zpools( + datastore: &DataStore, + opctx: &OpContext, + pool_count: usize, + ) -> TestSled { + let sled = TestSled::new_with_pool_count(pool_count); + sled.create_database_records(&datastore, &opctx).await; + sled + } + + async fn support_bundle_create_expect_no_capacity( + datastore: &DataStore, + opctx: &OpContext, + this_nexus_id: OmicronZoneUuid, + ) { + let err = datastore + .support_bundle_create(&opctx, "for tests", this_nexus_id) + .await + .expect_err("Shouldn't provision bundle without datasets"); + let Error::InsufficientCapacity { message } = err else { + panic!("Unexpected error: {err:?} - we expected 'InsufficientCapacity'"); + }; + assert_eq!( + CANNOT_ALLOCATE_ERR_MSG, + message.external_message(), + "Unexpected error: {message:?}" + ); + } + + #[tokio::test] + async fn test_bundle_create_capacity_limits() { + let logctx = dev::test_setup_log("test_bundle_create_capacity_limits"); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + let this_nexus_id = OmicronZoneUuid::new_v4(); + + // No sleds, no datasets. Allocation should fail. + + support_bundle_create_expect_no_capacity( + &datastore, + &opctx, + this_nexus_id, + ) + .await; + + // Create a sled with a couple pools. Allocation should succeed. + + const POOL_COUNT: usize = 2; + let _test_sled = + create_sled_and_zpools(&datastore, &opctx, POOL_COUNT).await; + let mut bundles = vec![]; + for _ in 0..POOL_COUNT { + bundles.push( + datastore + .support_bundle_create( + &opctx, + "for the test", + this_nexus_id, + ) + .await + .expect("Should be able to create bundle"), + ); + } + + // If we try to allocate any more bundles, we'll run out of capacity. + + support_bundle_create_expect_no_capacity( + &datastore, + &opctx, + this_nexus_id, + ) + .await; + + // If we cancel a bundle, it isn't deleted (yet). + // This operation should signify that we can start to free up + // storage on the dataset, but that needs to happen outside the + // database. + // + // We should still expect to hit capacity limits. + + datastore + .support_bundle_cancel(&opctx, bundles[0].id.into()) + .await + .expect("Should be able to cancel this bundle"); + support_bundle_create_expect_no_capacity( + &datastore, + &opctx, + this_nexus_id, + ) + .await; + + // If we delete a bundle, it should be gone. This means we can + // re-allocate from that dataset which was just freed up. + + datastore + .support_bundle_delete(&opctx, bundles[0].id.into()) + .await + .expect("Should be able to cancel this bundle"); + datastore + .support_bundle_create(&opctx, "for the test", this_nexus_id) + .await + .expect("Should be able to create bundle"); + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_crud_operations() { + let logctx = dev::test_setup_log("test_crud_operations"); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + let test_sled = create_sled_and_zpools(&datastore, &opctx, 1).await; + let reason = "Bundle for test"; + let this_nexus_id = OmicronZoneUuid::new_v4(); + + // Create the bundle, then observe it through the "getter" APIs + + let mut bundle = datastore + .support_bundle_create(&opctx, reason, this_nexus_id) + .await + .expect("Should be able to create bundle"); + assert_eq!(bundle.reason_for_creation, reason); + assert_eq!(bundle.reason_for_failure, None); + assert_eq!(bundle.assigned_nexus, Some(this_nexus_id.into())); + assert_eq!(bundle.state, SupportBundleState::Collecting); + assert_eq!(bundle.zpool_id, test_sled.pools[0].pool.into()); + assert_eq!(bundle.dataset_id, test_sled.pools[0].dataset.into()); + + let observed_bundle = datastore + .support_bundle_get(&opctx, bundle.id.into()) + .await + .expect("Should be able to get bundle we just created"); + // Overwrite this column; it is modified slightly upon database insertion. + bundle.time_created = observed_bundle.time_created; + assert_eq!(bundle, observed_bundle); + + let pagparams = DataPageParams::max_page(); + let observed_bundles = datastore + .support_bundle_list(&opctx, &pagparams) + .await + .expect("Should be able to get bundle we just created"); + assert_eq!(1, observed_bundles.len()); + assert_eq!(bundle, observed_bundles[0]); + + // Cancel the bundle, observe the new state + + datastore + .support_bundle_cancel(&opctx, bundle.id.into()) + .await + .expect("Should be able to cancel our bundle"); + let observed_bundle = datastore + .support_bundle_get(&opctx, bundle.id.into()) + .await + .expect("Should be able to get bundle we just created"); + assert_eq!(SupportBundleState::Cancelling, observed_bundle.state); + + // Delete the bundle, observe that it's gone + + datastore + .support_bundle_delete(&opctx, bundle.id.into()) + .await + .expect("Should be able to cancel our bundle"); + let observed_bundles = datastore + .support_bundle_list(&opctx, &pagparams) + .await + .expect("Should be able to get bundle we just created"); + assert!(observed_bundles.is_empty()); + + db.terminate().await; + logctx.cleanup_successful(); + } + + fn get_nexuses_from_blueprint( + bp: &Blueprint, + filter: BlueprintZoneFilter, + ) -> Vec { + bp.blueprint_zones + .values() + .map(|zones_config| { + let mut nexus_zones = vec![]; + for zone in &zones_config.zones { + if matches!(zone.zone_type, BlueprintZoneType::Nexus(_)) + && zone.disposition.matches(filter) + { + nexus_zones.push(zone.id); + } + } + nexus_zones + }) + .flatten() + .collect() + } + + fn get_debug_datasets_from_blueprint( + bp: &Blueprint, + filter: BlueprintDatasetFilter, + ) -> Vec { + bp.blueprint_datasets + .values() + .map(|datasets_config| { + let mut debug_datasets = vec![]; + for dataset in datasets_config.datasets.values() { + if matches!(dataset.kind, DebugDatasetKind) + && dataset.disposition.matches(filter) + { + debug_datasets.push(dataset.id); + } + } + debug_datasets + }) + .flatten() + .collect() + } + + #[tokio::test] + async fn test_bundle_expungement() { + static TEST_NAME: &str = "test_bundle_expungement"; + let logctx = dev::test_setup_log(TEST_NAME); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + let mut rng = SimRngState::from_seed(TEST_NAME); + let (_example, mut bp1) = ExampleSystemBuilder::new_with_rng( + &logctx.log, + rng.next_system_rng(), + ) + .build(); + + // Weirdly, the "ExampleSystemBuilder" blueprint has a parent blueprint, + // but which isn't exposed through the API. Since we're only able to see + // the blueprint it emits, that means we can't actually make it the + // target because "the parent blueprint is not the current target". + // + // Instead of dealing with that, we lie: claim this is the primordial + // blueprint, with no parent. + // + // Regardless, make this starter blueprint our target. + bp1.parent_blueprint_id = None; + bp_insert_and_make_target(&opctx, &datastore, &bp1).await; + + // Manually perform the equivalent of blueprint execution to populate + // database records. + let sleds = TestSled::new_from_blueprint(&bp1); + for sled in &sleds { + sled.create_database_records(&datastore, &opctx).await; + } + + // Extract Nexus and Dataset information from the generated blueprint. + let this_nexus_id = get_nexuses_from_blueprint( + &bp1, + BlueprintZoneFilter::ShouldBeRunning, + ) + .get(0) + .map(|id| *id) + .expect("There should be a Nexus in the example blueprint"); + let debug_datasets = get_debug_datasets_from_blueprint( + &bp1, + BlueprintDatasetFilter::InService, + ); + assert!(!debug_datasets.is_empty()); + + // When we create a bundle, it should exist on a dataset provisioned by + // the blueprint. + let bundle = datastore + .support_bundle_create(&opctx, "for the test", this_nexus_id) + .await + .expect("Should be able to create bundle"); + assert_eq!(bundle.assigned_nexus, Some(this_nexus_id.into())); + + assert!( + debug_datasets.contains(&DatasetUuid::from(bundle.dataset_id)), + "Bundle should have been allocated from a blueprint dataset" + ); + + // Expunge the bundle's dataset. + let bp2 = { + let mut bp2 = bp1.clone(); + bp2.id = Uuid::new_v4(); + bp2.parent_blueprint_id = Some(bp1.id); + + for datasets in bp2.blueprint_datasets.values_mut() { + for dataset in datasets.datasets.values_mut() { + if dataset.id == bundle.dataset_id.into() { + dataset.disposition = + BlueprintDatasetDisposition::Expunged; + } + } + } + bp2 + }; + bp_insert_and_make_target(&opctx, &datastore, &bp2).await; + + // TODO: Call this on bp1, observe no changes? + let report = datastore + .support_bundle_fail_expunged(&opctx, &bp2) + .await + .expect("Should have been able to mark bundle state as failed"); + + assert_eq!(1, report.bundles_failed_missing_datasets); + assert_eq!(0, report.bundles_cancelled_missing_nexus); + assert_eq!(0, report.bundles_failed_missing_nexus); + + // TODO: Another test, maybe? + // TODO: Find the nexus where we allocated the bundle + // TODO: expunge it + + db.terminate().await; + logctx.cleanup_successful(); + } +} diff --git a/nexus/types/src/external_api/shared.rs b/nexus/types/src/external_api/shared.rs index 2928fab46b..b53dd78cdb 100644 --- a/nexus/types/src/external_api/shared.rs +++ b/nexus/types/src/external_api/shared.rs @@ -10,6 +10,7 @@ use chrono::DateTime; use chrono::Utc; use omicron_common::api::external::Name; use omicron_common::api::internal::shared::NetworkInterface; +use omicron_uuid_kinds::SupportBundleUuid; use parse_display::FromStr; use schemars::JsonSchema; use serde::de::Error as _; @@ -444,8 +445,7 @@ pub enum SupportBundleState { #[derive(Debug, Clone, JsonSchema, Serialize, Deserialize)] pub struct SupportBundleInfo { - // XXX: Strongly-typed Support Bundle UUID? - pub id: Uuid, + pub id: SupportBundleUuid, pub time_created: DateTime, pub reason_for_creation: String, pub reason_for_failure: Option, diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 1288355463..fe29b2e65f 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -2390,24 +2390,55 @@ CREATE TABLE IF NOT EXISTS omicron.public.tuf_repo_artifact ( CREATE TYPE IF NOT EXISTS omicron.public.support_bundle_state AS ENUM ( + -- The bundle is currently being created. + -- + -- All storage for this bundle exists in Nexus's transient filesystem, as it + -- is being assembled. 'collecting', + + -- The bundle has been created, and is being transferred to a Sled. + -- + -- This bundle may been partially allocated to a Sled. + 'collected', + + -- The bundle has been explicitly marked cancelled. + -- + -- It may or may not have durable storage which needs to be freed. 'cancelling', + + -- The bundle, for whatever reason, has failed. + -- + -- It should no longer have any managed durable storage. 'failed', + + -- The bundle has been collected successfully, and has storage on + -- a particular sled. 'active' ); CREATE TABLE IF NOT EXISTS omicron.public.support_bundle ( - id UUID NOT NULL, + id UUID PRIMARY KEY, time_created TIMESTAMPTZ NOT NULL, reason_for_creation TEXT NOT NULL, reason_for_failure TEXT, state omicron.public.support_bundle_state NOT NULL, - zpool_id NOT NULL UUID, - dataset_id NOT NULL UUID, + zpool_id UUID NOT NULL, + dataset_id UUID NOT NULL, -- The Nexus which is in charge of collecting the support bundle, -- and later managing its storage. - assigned_nexus UUID, + assigned_nexus UUID +); + +-- The "UNIQUE" part of this index helps enforce that we allow one support bundle +-- per debug dataset. This constraint can be removed, if the query responsible +-- for allocation changes to allocate more intelligently. +CREATE UNIQUE INDEX IF NOT EXISTS one_bundle_per_dataset ON omicron.public.support_bundle ( + dataset_id +); + +CREATE INDEX IF NOT EXISTS lookup_bundle_by_neuxs ON omicron.public.support_bundle ( + assigned_nexus ); /*******************************************************************/ diff --git a/uuid-kinds/src/lib.rs b/uuid-kinds/src/lib.rs index 7947062a82..50b263fa0b 100644 --- a/uuid-kinds/src/lib.rs +++ b/uuid-kinds/src/lib.rs @@ -65,6 +65,7 @@ impl_typed_uuid_kind! { RackReset => "rack_reset", Region => "region", Sled => "sled", + SupportBundle => "support_bundle", TufRepo => "tuf_repo", Upstairs => "upstairs", UpstairsRepair => "upstairs_repair", From d6d53c974e66e64a48d99f83e3319d6fcd4a6973 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 12 Nov 2024 17:46:47 -0800 Subject: [PATCH 08/25] Refactor state machine --- nexus/db-model/src/support_bundle.rs | 39 +- .../src/db/datastore/support_bundle.rs | 522 ++++++++++++++---- nexus/types/src/external_api/shared.rs | 12 +- schema/crdb/dbinit.sql | 33 +- 4 files changed, 455 insertions(+), 151 deletions(-) diff --git a/nexus/db-model/src/support_bundle.rs b/nexus/db-model/src/support_bundle.rs index f2450473f5..0e53070877 100644 --- a/nexus/db-model/src/support_bundle.rs +++ b/nexus/db-model/src/support_bundle.rs @@ -30,22 +30,28 @@ impl_enum_type!( // Enum values Collecting => b"collecting" - Collected => b"collected" - Cancelling => b"cancelling" - Failed => b"failed" Active => b"active" + Destroying => b"destroying" + Failing => b"failing" + Failed => b"failed" ); impl SupportBundleState { - pub fn might_have_dataset_storage(&self) -> bool { + /// Returns the list of valid prior states. + /// + /// This is used to confirm that state updates are performed legally, + /// and defines the possible state transitions. + pub fn valid_old_states(&self) -> Vec { use SupportBundleState::*; match self { - Collecting => false, - Collected => true, - Cancelling => true, - Failed => false, - Active => true, + Collecting => vec![], + Active => vec![Collecting], + // The "Destroying" state is terminal. + Destroying => vec![Active, Collecting, Failing], + Failing => vec![Collecting, Active], + // The "Failed" state is terminal. + Failed => vec![Collecting, Active, Failing], } } } @@ -56,12 +62,17 @@ impl From for SupportBundleStateView { match state { Collecting => SupportBundleStateView::Collecting, - // The distinction between "collected" and "collecting" is an - // internal detail that doesn't need to be exposed through the API. - Collected => SupportBundleStateView::Collecting, - Cancelling => SupportBundleStateView::Cancelling, - Failed => SupportBundleStateView::Failed, Active => SupportBundleStateView::Active, + Destroying => SupportBundleStateView::Destroying, + // The distinction between "failing" and "failed" should not be + // visible to end-users. This is internal book-keeping to decide + // whether or not the bundle record can be safely deleted. + // + // Either way, it should be possible to delete the bundle. + // - "Failing" bundles will become "Destroying" + // - "Failed" bundles will be destroyed immediately + Failing => SupportBundleStateView::Failed, + Failed => SupportBundleStateView::Failed, } } } diff --git a/nexus/db-queries/src/db/datastore/support_bundle.rs b/nexus/db-queries/src/db/datastore/support_bundle.rs index d289127f4b..4d4a6645c2 100644 --- a/nexus/db-queries/src/db/datastore/support_bundle.rs +++ b/nexus/db-queries/src/db/datastore/support_bundle.rs @@ -15,6 +15,7 @@ use crate::db::model::DatasetKind; use crate::db::model::SupportBundle; use crate::db::model::SupportBundleState; use crate::db::pagination::paginated; +use crate::db::update_and_check::{UpdateAndCheck, UpdateStatus}; use crate::transaction_retry::OptionalError; use async_bb8_diesel::AsyncRunQueryDsl; use diesel::prelude::*; @@ -39,12 +40,29 @@ const CANNOT_ALLOCATE_ERR_MSG: &'static str = no disks are available. Either delete old support bundles or add additional \ disks"; +const FAILURE_REASON_NO_DATASET: &'static str = + "Allocated dataset no longer exists"; +const FAILURE_REASON_NO_NEXUS: &'static str = + "Nexus managing this bundle no longer exists"; + /// Provides a report on how many bundle were expunged, and why. -#[derive(Debug, Clone)] +#[derive(Default, Debug, Clone, PartialEq)] pub struct SupportBundleExpungementReport { + /// Bundles marked "failed" because the datasets storing them have been + /// expunged. pub bundles_failed_missing_datasets: usize, - pub bundles_cancelled_missing_nexus: usize, - pub bundles_failed_missing_nexus: usize, + /// Bundles deleted because the datasets storing them have been + /// expunged. + pub bundles_deleted_missing_datasets: usize, + + /// Bundles marked "destroying" because the nexuses managing them have been + /// expunged. + /// + /// These bundles should be re-assigned to a different nexus for cleanup. + pub bundles_failing_missing_nexus: usize, + + /// Bundles which had a new Nexus assigned to them. + pub bundles_reassigned: usize, } impl DataStore { @@ -248,110 +266,108 @@ impl DataStore { async move { use db::schema::support_bundle::dsl; - // Find all bundles on datasets that no longer exist, and - // mark them "failed". + // Find all bundles without backing storage. let bundles_with_bad_datasets = dsl::support_bundle .filter(dsl::dataset_id.eq_any(invalid_datasets)) .select(SupportBundle::as_select()) .load_async(&*conn) .await?; + // Split these bundles into two categories: + // - Ones that are being destroyed anyway, and that can be + // fully deleted. + // - Ones that are NOT being destroyed, and should be marked + // failed so the end-user has visibility into their + // destruction. + let (bundles_to_delete, bundles_to_fail): (Vec<_>, Vec<_>) = + bundles_with_bad_datasets.into_iter().partition( + |bundle| bundle.state == SupportBundleState::Destroying + ); + let bundles_to_delete = bundles_to_delete.into_iter().map(|b| b.id).collect::>(); + let bundles_to_fail = bundles_to_fail.into_iter().map(|b| b.id).collect::>(); + + // Find all non-destroying bundles on datasets that no + // longer exist, and mark them "failed". They skip the + // "failing" state because there is no remaining storage to + // be cleaned up. + let state = SupportBundleState::Failed; let bundles_failed_missing_datasets = diesel::update(dsl::support_bundle) - .filter(dsl::state.ne(SupportBundleState::Failed)) - .filter( - dsl::id.eq_any( - bundles_with_bad_datasets - .iter() - .map(|b| b.id) - .collect::>(), - ), - ) + .filter(dsl::state.eq_any(state.valid_old_states())) + .filter(dsl::id.eq_any(bundles_to_fail)) .set(( - dsl::state.eq(SupportBundleState::Failed), - dsl::reason_for_failure - .eq("Allocated dataset no longer exists"), + dsl::state.eq(state), + dsl::reason_for_failure.eq(FAILURE_REASON_NO_DATASET), )) .execute_async(&*conn) .await?; + // For bundles that are in the process of being destroyed, + // the dataset expungement speeds up the process. + let bundles_deleted_missing_datasets = + diesel::delete(dsl::support_bundle) + .filter(dsl::state.eq_any(state.valid_old_states())) + .filter(dsl::id.eq_any(bundles_to_delete)) + // This check should be redundant (we already + // partitioned above based on this state) but out of + // an abundance of catuion we don't auto-delete a + // bundle in any other state. + .filter(dsl::state.eq(SupportBundleState::Destroying)) + .execute_async(&*conn) + .await?; let Some(arbitrary_valid_nexus) = valid_nexus_zones.get(0).cloned() else { return Err(external::Error::internal_error( - "No valid Nexuses", + "No valid Nexuses, we cannot re-assign this support bundle", ) .into()); }; // Find all bundles on nexuses that no longer exist. - // - // If the bundle might have storage: - // - Mark it cancelled and re-assign the managing Nexus - // - Otherwise: mark it failed let bundles_with_bad_nexuses = dsl::support_bundle .filter(dsl::assigned_nexus.eq_any(invalid_nexus_zones)) .select(SupportBundle::as_select()) .load_async(&*conn) .await?; - let (needs_cancellation, needs_failure): (Vec<_>, Vec<_>) = - bundles_with_bad_nexuses.into_iter().partition( - |bundle| bundle.state.might_have_dataset_storage(), - ); - // Mark these support bundles as cancelled, and assign then + let bundles_to_mark_failing = bundles_with_bad_nexuses.iter() + .map(|b| b.id).collect::>(); + let bundles_to_reassign = bundles_with_bad_nexuses.iter() + .filter_map(|bundle| { + if bundle.state != SupportBundleState::Failed { + Some(bundle.id) + } else { + None + } + }).collect::>(); + + // Mark these support bundles as failing, and assign then // to a nexus that should still exist. // // This should lead to their storage being freed, if it // exists. - let bundles_cancelled_missing_nexus = diesel::update( - dsl::support_bundle, - ) - .filter(dsl::state.ne(SupportBundleState::Cancelling)) - .filter(dsl::state.ne(SupportBundleState::Failed)) - .filter( - dsl::id.eq_any( - needs_cancellation - .iter() - .map(|b| b.id) - .collect::>(), - ), - ) - .set(( - dsl::assigned_nexus.eq(arbitrary_valid_nexus), - dsl::state.eq(SupportBundleState::Cancelling), - dsl::reason_for_failure - .eq("Nexus managing this bundle no longer exists"), - )) - .execute_async(&*conn) - .await?; - - // Mark these support bundles as failed. - // - // If they don't have storage (e.g., they never got that - // far, or their underlying dataset was expunged) there - // isn't anything left to free. This means we can skip the - // "Cancelling" state and jump to "Failed". - let bundles_failed_missing_nexus = diesel::update( - dsl::support_bundle, - ) - .filter(dsl::state.ne(SupportBundleState::Cancelling)) - .filter(dsl::state.ne(SupportBundleState::Failed)) - .filter(dsl::id.eq_any( - needs_failure.iter().map(|b| b.id).collect::>(), - )) - .set(( - dsl::state.eq(SupportBundleState::Failed), - dsl::reason_for_failure - .eq("Nexus managing this bundle no longer exists"), - )) - .execute_async(&*conn) - .await?; + let state = SupportBundleState::Failing; + let bundles_failing_missing_nexus = diesel::update(dsl::support_bundle) + .filter(dsl::state.eq_any(state.valid_old_states())) + .filter(dsl::id.eq_any(bundles_to_mark_failing)) + .set(( + dsl::state.eq(state), + dsl::reason_for_failure.eq(FAILURE_REASON_NO_NEXUS), + )) + .execute_async(&*conn) + .await?; + let bundles_reassigned = diesel::update(dsl::support_bundle) + .filter(dsl::id.eq_any(bundles_to_reassign)) + .set(dsl::assigned_nexus.eq(arbitrary_valid_nexus)) + .execute_async(&*conn) + .await?; Ok(SupportBundleExpungementReport { bundles_failed_missing_datasets, - bundles_cancelled_missing_nexus, - bundles_failed_missing_nexus, + bundles_deleted_missing_datasets, + bundles_failing_missing_nexus, + bundles_reassigned, }) } .boxed() @@ -360,29 +376,35 @@ impl DataStore { .await } - /// Cancels a single support bundle. - /// - /// Note that this does not delete the support bundle record, but starts the - /// process by which it may later be deleted. - pub async fn support_bundle_cancel( + /// Updates the state of a support bundle + pub async fn support_bundle_update( &self, opctx: &OpContext, id: SupportBundleUuid, + state: SupportBundleState, ) -> Result<(), Error> { opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; - use db::schema::support_bundle::dsl as support_bundle_dsl; - + use db::schema::support_bundle::dsl; let conn = self.pool_connection_authorized(opctx).await?; - diesel::update(support_bundle_dsl::support_bundle) - .filter(support_bundle_dsl::id.eq(id.into_untyped_uuid())) - .set(support_bundle_dsl::state.eq(SupportBundleState::Cancelling)) - .execute_async(&*conn) + let result = diesel::update(dsl::support_bundle) + .filter(dsl::id.eq(id.into_untyped_uuid())) + .filter(dsl::state.eq_any(state.valid_old_states())) + .set(dsl::state.eq(state)) + .check_if_exists::(id.into_untyped_uuid()) + .execute_and_check(&*conn) .await - .map(|_rows_modified| ()) .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; - Ok(()) + match result.status { + UpdateStatus::Updated => Ok(()), + UpdateStatus::NotUpdatedButExists => { + Err(Error::invalid_request(format!( + "Cannot update support bundle state from {:?} to {:?}", + result.found.state, state + ))) + } + } } /// Deletes a support bundle. @@ -396,11 +418,16 @@ impl DataStore { ) -> Result<(), Error> { opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; - use db::schema::support_bundle::dsl as support_bundle_dsl; + use db::schema::support_bundle::dsl; let conn = self.pool_connection_authorized(opctx).await?; - diesel::delete(support_bundle_dsl::support_bundle) - .filter(support_bundle_dsl::id.eq(id.into_untyped_uuid())) + diesel::delete(dsl::support_bundle) + .filter( + dsl::state + .eq(SupportBundleState::Destroying) + .or(dsl::state.eq(SupportBundleState::Failed)), + ) + .filter(dsl::id.eq(id.into_untyped_uuid())) .execute_async(&*conn) .await .map(|_rows_modified| ()) @@ -425,6 +452,7 @@ mod test { use nexus_types::deployment::Blueprint; use nexus_types::deployment::BlueprintDatasetDisposition; use nexus_types::deployment::BlueprintDatasetFilter; + use nexus_types::deployment::BlueprintZoneDisposition; use nexus_types::deployment::BlueprintZoneFilter; use nexus_types::deployment::BlueprintZoneType; use omicron_common::api::internal::shared::DatasetKind::Debug as DebugDatasetKind; @@ -614,7 +642,7 @@ mod test { ) .await; - // If we cancel a bundle, it isn't deleted (yet). + // If we destroy a bundle, it isn't deleted (yet). // This operation should signify that we can start to free up // storage on the dataset, but that needs to happen outside the // database. @@ -622,9 +650,13 @@ mod test { // We should still expect to hit capacity limits. datastore - .support_bundle_cancel(&opctx, bundles[0].id.into()) + .support_bundle_update( + &opctx, + bundles[0].id.into(), + SupportBundleState::Destroying, + ) .await - .expect("Should be able to cancel this bundle"); + .expect("Should be able to destroy this bundle"); support_bundle_create_expect_no_capacity( &datastore, &opctx, @@ -638,7 +670,7 @@ mod test { datastore .support_bundle_delete(&opctx, bundles[0].id.into()) .await - .expect("Should be able to cancel this bundle"); + .expect("Should be able to destroy this bundle"); datastore .support_bundle_create(&opctx, "for the test", this_nexus_id) .await @@ -687,24 +719,28 @@ mod test { assert_eq!(1, observed_bundles.len()); assert_eq!(bundle, observed_bundles[0]); - // Cancel the bundle, observe the new state + // Destroy the bundle, observe the new state datastore - .support_bundle_cancel(&opctx, bundle.id.into()) + .support_bundle_update( + &opctx, + bundle.id.into(), + SupportBundleState::Destroying, + ) .await - .expect("Should be able to cancel our bundle"); + .expect("Should be able to destroy our bundle"); let observed_bundle = datastore .support_bundle_get(&opctx, bundle.id.into()) .await .expect("Should be able to get bundle we just created"); - assert_eq!(SupportBundleState::Cancelling, observed_bundle.state); + assert_eq!(SupportBundleState::Destroying, observed_bundle.state); // Delete the bundle, observe that it's gone datastore .support_bundle_delete(&opctx, bundle.id.into()) .await - .expect("Should be able to cancel our bundle"); + .expect("Should be able to destroy our bundle"); let observed_bundles = datastore .support_bundle_list(&opctx, &pagparams) .await @@ -757,9 +793,29 @@ mod test { .collect() } + fn expunge_dataset_for_bundle(bp: &mut Blueprint, bundle: &SupportBundle) { + for datasets in bp.blueprint_datasets.values_mut() { + for dataset in datasets.datasets.values_mut() { + if dataset.id == bundle.dataset_id.into() { + dataset.disposition = BlueprintDatasetDisposition::Expunged; + } + } + } + } + + fn expunge_nexus_for_bundle(bp: &mut Blueprint, bundle: &SupportBundle) { + for zones in bp.blueprint_zones.values_mut() { + for zone in &mut zones.zones { + if zone.id == bundle.assigned_nexus.unwrap().into() { + zone.disposition = BlueprintZoneDisposition::Expunged; + } + } + } + } + #[tokio::test] - async fn test_bundle_expungement() { - static TEST_NAME: &str = "test_bundle_expungement"; + async fn test_bundle_failed_from_expunged_dataset() { + static TEST_NAME: &str = "test_bundle_failed_from_expunged_dataset"; let logctx = dev::test_setup_log(TEST_NAME); let db = TestDatabase::new_with_datastore(&logctx.log).await; let (opctx, datastore) = (db.opctx(), db.datastore()); @@ -811,43 +867,277 @@ mod test { .await .expect("Should be able to create bundle"); assert_eq!(bundle.assigned_nexus, Some(this_nexus_id.into())); - assert!( debug_datasets.contains(&DatasetUuid::from(bundle.dataset_id)), "Bundle should have been allocated from a blueprint dataset" ); - // Expunge the bundle's dataset. + // If we try to "fail support bundles" from expunged datasets/nexuses, + // we should see a no-op. Nothing has been expunged yet! + let report = + datastore.support_bundle_fail_expunged(&opctx, &bp1).await.expect( + "Should have been able to perform no-op support bundle failure", + ); + assert_eq!(SupportBundleExpungementReport::default(), report); + + // Expunge the bundle's dataset (manually) let bp2 = { let mut bp2 = bp1.clone(); bp2.id = Uuid::new_v4(); bp2.parent_blueprint_id = Some(bp1.id); + expunge_dataset_for_bundle(&mut bp2, &bundle); + bp2 + }; + bp_insert_and_make_target(&opctx, &datastore, &bp2).await; - for datasets in bp2.blueprint_datasets.values_mut() { - for dataset in datasets.datasets.values_mut() { - if dataset.id == bundle.dataset_id.into() { - dataset.disposition = - BlueprintDatasetDisposition::Expunged; - } - } - } + datastore + .support_bundle_fail_expunged(&opctx, &bp1) + .await + .expect_err("bp1 is no longer the target; this should fail"); + let report = datastore + .support_bundle_fail_expunged(&opctx, &bp2) + .await + .expect("Should have been able to mark bundle state as failed"); + assert_eq!( + SupportBundleExpungementReport { + bundles_failed_missing_datasets: 1, + ..Default::default() + }, + report + ); + + let observed_bundle = datastore + .support_bundle_get(&opctx, bundle.id.into()) + .await + .expect("Should be able to get bundle we just failed"); + assert_eq!(SupportBundleState::Failed, observed_bundle.state); + assert!(observed_bundle + .reason_for_failure + .unwrap() + .contains(FAILURE_REASON_NO_DATASET)); + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_bundle_failed_from_expunged_nexus_no_reassign() { + static TEST_NAME: &str = + "test_bundle_failed_from_expunged_nexus_no_reassign"; + let logctx = dev::test_setup_log(TEST_NAME); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + let mut rng = SimRngState::from_seed(TEST_NAME); + let (_example, mut bp1) = ExampleSystemBuilder::new_with_rng( + &logctx.log, + rng.next_system_rng(), + ) + .build(); + + bp1.parent_blueprint_id = None; + bp_insert_and_make_target(&opctx, &datastore, &bp1).await; + + // Manually perform the equivalent of blueprint execution to populate + // database records. + let sleds = TestSled::new_from_blueprint(&bp1); + for sled in &sleds { + sled.create_database_records(&datastore, &opctx).await; + } + + // Extract Nexus and Dataset information from the generated blueprint. + let nexus_ids = get_nexuses_from_blueprint( + &bp1, + BlueprintZoneFilter::ShouldBeRunning, + ); + let debug_datasets = get_debug_datasets_from_blueprint( + &bp1, + BlueprintDatasetFilter::InService, + ); + assert!(!debug_datasets.is_empty()); + + // When we create a bundle, it should exist on a dataset provisioned by + // the blueprint. + let bundle = datastore + .support_bundle_create(&opctx, "for the test", nexus_ids[0]) + .await + .expect("Should be able to create bundle"); + + assert_eq!(bundle.state, SupportBundleState::Collecting); + assert_eq!(bundle.assigned_nexus, Some(nexus_ids[0].into())); + assert!( + debug_datasets.contains(&DatasetUuid::from(bundle.dataset_id)), + "Bundle should have been allocated from a blueprint dataset" + ); + + // Expunge the bundle's dataset. This marks it as "failed", and + // is a prerequisite for the bundle not later being re-assigned. + let bp2 = { + let mut bp2 = bp1.clone(); + bp2.id = Uuid::new_v4(); + bp2.parent_blueprint_id = Some(bp1.id); + expunge_dataset_for_bundle(&mut bp2, &bundle); bp2 }; bp_insert_and_make_target(&opctx, &datastore, &bp2).await; - // TODO: Call this on bp1, observe no changes? let report = datastore .support_bundle_fail_expunged(&opctx, &bp2) .await .expect("Should have been able to mark bundle state as failed"); + assert_eq!( + SupportBundleExpungementReport { + bundles_failed_missing_datasets: 1, + ..Default::default() + }, + report + ); + + let observed_bundle = datastore + .support_bundle_get(&opctx, bundle.id.into()) + .await + .expect("Should be able to get bundle we just failed"); + assert_eq!(SupportBundleState::Failed, observed_bundle.state); + assert!(observed_bundle + .reason_for_failure + .unwrap() + .contains(FAILURE_REASON_NO_DATASET)); + + // Expunge the bundle's Nexus + let bp3 = { + let mut bp3 = bp2.clone(); + bp3.id = Uuid::new_v4(); + bp3.parent_blueprint_id = Some(bp2.id); + expunge_nexus_for_bundle(&mut bp3, &bundle); + bp3 + }; + bp_insert_and_make_target(&opctx, &datastore, &bp3).await; + + let report = datastore + .support_bundle_fail_expunged(&opctx, &bp3) + .await + .expect("Should have been able to mark bundle state as failed"); + + // Although the record for this bundle already exists, it is not + // re-assigned, and the original reason for it failing (dataset loss) is + // preserved. + assert_eq!(SupportBundleExpungementReport::default(), report); + + let observed_bundle = datastore + .support_bundle_get(&opctx, bundle.id.into()) + .await + .expect("Should be able to get bundle we just failed"); + assert_eq!(SupportBundleState::Failed, observed_bundle.state); + assert!(observed_bundle + .reason_for_failure + .unwrap() + .contains(FAILURE_REASON_NO_DATASET)); + + datastore + .support_bundle_delete(&opctx, bundle.id.into()) + .await + .expect("Should have been able to delete support bundle"); + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_bundle_failed_from_expunged_nexus_with_reassign() { + static TEST_NAME: &str = + "test_bundle_failed_from_expunged_nexus_with_reassign"; + let logctx = dev::test_setup_log(TEST_NAME); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); - assert_eq!(1, report.bundles_failed_missing_datasets); - assert_eq!(0, report.bundles_cancelled_missing_nexus); - assert_eq!(0, report.bundles_failed_missing_nexus); + let mut rng = SimRngState::from_seed(TEST_NAME); + let (_example, mut bp1) = ExampleSystemBuilder::new_with_rng( + &logctx.log, + rng.next_system_rng(), + ) + .build(); + + bp1.parent_blueprint_id = None; + bp_insert_and_make_target(&opctx, &datastore, &bp1).await; - // TODO: Another test, maybe? - // TODO: Find the nexus where we allocated the bundle - // TODO: expunge it + // Manually perform the equivalent of blueprint execution to populate + // database records. + let sleds = TestSled::new_from_blueprint(&bp1); + for sled in &sleds { + sled.create_database_records(&datastore, &opctx).await; + } + + // Extract Nexus and Dataset information from the generated blueprint. + let nexus_ids = get_nexuses_from_blueprint( + &bp1, + BlueprintZoneFilter::ShouldBeRunning, + ); + let debug_datasets = get_debug_datasets_from_blueprint( + &bp1, + BlueprintDatasetFilter::InService, + ); + assert!(!debug_datasets.is_empty()); + + // When we create a bundle, it should exist on a dataset provisioned by + // the blueprint. + let bundle = datastore + .support_bundle_create(&opctx, "for the test", nexus_ids[0]) + .await + .expect("Should be able to create bundle"); + + assert_eq!(bundle.state, SupportBundleState::Collecting); + assert_eq!(bundle.assigned_nexus, Some(nexus_ids[0].into())); + assert!( + debug_datasets.contains(&DatasetUuid::from(bundle.dataset_id)), + "Bundle should have been allocated from a blueprint dataset" + ); + + // Update the bundle's state. + // + // This is what we would do when we finish collecting, and + // provisioned storage on a sled. + datastore + .support_bundle_update( + &opctx, + bundle.id.into(), + SupportBundleState::Active, + ) + .await + .expect("Should have been able to update state"); + + // Expunge the bundle's Nexus (manually) + let bp2 = { + let mut bp2 = bp1.clone(); + bp2.id = Uuid::new_v4(); + bp2.parent_blueprint_id = Some(bp1.id); + expunge_nexus_for_bundle(&mut bp2, &bundle); + bp2 + }; + bp_insert_and_make_target(&opctx, &datastore, &bp2).await; + + let report = datastore + .support_bundle_fail_expunged(&opctx, &bp2) + .await + .expect("Should have been able to mark bundle state as destroying"); + + assert_eq!( + SupportBundleExpungementReport { + bundles_failing_missing_nexus: 1, + bundles_reassigned: 1, + ..Default::default() + }, + report + ); + + let observed_bundle = datastore + .support_bundle_get(&opctx, bundle.id.into()) + .await + .expect("Should be able to get bundle we just failed"); + assert_eq!(SupportBundleState::Failing, observed_bundle.state); + assert!(observed_bundle + .reason_for_failure + .unwrap() + .contains(FAILURE_REASON_NO_NEXUS)); db.terminate().await; logctx.cleanup_successful(); diff --git a/nexus/types/src/external_api/shared.rs b/nexus/types/src/external_api/shared.rs index b53dd78cdb..9e9bc26ffe 100644 --- a/nexus/types/src/external_api/shared.rs +++ b/nexus/types/src/external_api/shared.rs @@ -423,20 +423,22 @@ pub enum SupportBundleState { /// Support Bundle still actively being collected. /// /// This is the initial state for a Support Bundle, and it will - /// automatically transition to either "Failed" or "Active". + /// automatically transition to either "Failing" or "Active". /// /// If a user no longer wants to access a Support Bundle, they can - /// request cancellation, which will transition to the "Cancelling" state. + /// request cancellation, which will transition to the "Destroying" state. Collecting, - /// Support Bundle has been cancelled, and will be deleted shortly. - Cancelling, + /// Support Bundle is being destroyed. + /// + /// Once backing storage has been freed, this bundle is destroyed. + Destroying, /// Support Bundle was not created successfully, or was created and has lost /// backing storage. /// /// The record of the bundle still exists for readability, but the only - /// valid operation on these bundles is to delete them. + /// valid operation on these bundles is to destroy them. Failed, /// Support Bundle has been processed, and is ready for usage. diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index fe29b2e65f..52af30612c 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -2392,28 +2392,29 @@ CREATE TABLE IF NOT EXISTS omicron.public.tuf_repo_artifact ( CREATE TYPE IF NOT EXISTS omicron.public.support_bundle_state AS ENUM ( -- The bundle is currently being created. -- - -- All storage for this bundle exists in Nexus's transient filesystem, as it - -- is being assembled. + -- It might have storage that is partially allocated on a sled. 'collecting', - -- The bundle has been created, and is being transferred to a Sled. - -- - -- This bundle may been partially allocated to a Sled. - 'collected', + -- The bundle has been collected successfully, and has storage on + -- a particular sled. + 'active', - -- The bundle has been explicitly marked cancelled. - -- - -- It may or may not have durable storage which needs to be freed. - 'cancelling', + -- The user has explicitly requested that a bundle be destroyed. + -- We must ensure that storage backing that bundle is gone before + -- it is automatically deleted. + 'destroying', - -- The bundle, for whatever reason, has failed. + -- The support bundle is failing. + -- This happens when Nexus is expunged partway through collection. -- - -- It should no longer have any managed durable storage. - 'failed', + -- A different Nexus must ensure that storage is gone before the + -- bundle can be marked "failed". + 'failing', - -- The bundle has been collected successfully, and has storage on - -- a particular sled. - 'active' + -- The bundle has finished failing. + -- + -- The only action that can be taken on this bundle is to delete it. + 'failed' ); CREATE TABLE IF NOT EXISTS omicron.public.support_bundle ( From 483801a9b0456ebc7d9352a9aaa24364491cb761 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 13 Nov 2024 11:33:30 -0800 Subject: [PATCH 09/25] Clippy, schema changes --- nexus/db-model/src/schema_versions.rs | 3 +- .../src/db/datastore/support_bundle.rs | 14 +++++----- openapi/nexus.json | 15 ++++++---- schema/crdb/dbinit.sql | 2 +- schema/crdb/support-bundles/up01.sql | 28 +++++++++++++++++++ schema/crdb/support-bundles/up02.sql | 14 ++++++++++ schema/crdb/support-bundles/up03.sql | 4 +++ schema/crdb/support-bundles/up04.sql | 4 +++ 8 files changed, 69 insertions(+), 15 deletions(-) create mode 100644 schema/crdb/support-bundles/up01.sql create mode 100644 schema/crdb/support-bundles/up02.sql create mode 100644 schema/crdb/support-bundles/up03.sql create mode 100644 schema/crdb/support-bundles/up04.sql diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index 14eba1cb1c..580b654b75 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -17,7 +17,7 @@ use std::collections::BTreeMap; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(113, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(114, 0, 0); /// List of all past database schema versions, in *reverse* order /// @@ -29,6 +29,7 @@ static KNOWN_VERSIONS: Lazy> = Lazy::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), + KnownVersion::new(114, "support-bundles"), KnownVersion::new(113, "add-tx-eq"), KnownVersion::new(112, "blueprint-dataset"), KnownVersion::new(111, "drop-omicron-zone-underlay-address"), diff --git a/nexus/db-queries/src/db/datastore/support_bundle.rs b/nexus/db-queries/src/db/datastore/support_bundle.rs index 4d4a6645c2..71d1070071 100644 --- a/nexus/db-queries/src/db/datastore/support_bundle.rs +++ b/nexus/db-queries/src/db/datastore/support_bundle.rs @@ -270,7 +270,7 @@ impl DataStore { let bundles_with_bad_datasets = dsl::support_bundle .filter(dsl::dataset_id.eq_any(invalid_datasets)) .select(SupportBundle::as_select()) - .load_async(&*conn) + .load_async(conn) .await?; // Split these bundles into two categories: @@ -299,7 +299,7 @@ impl DataStore { dsl::state.eq(state), dsl::reason_for_failure.eq(FAILURE_REASON_NO_DATASET), )) - .execute_async(&*conn) + .execute_async(conn) .await?; // For bundles that are in the process of being destroyed, // the dataset expungement speeds up the process. @@ -312,7 +312,7 @@ impl DataStore { // an abundance of catuion we don't auto-delete a // bundle in any other state. .filter(dsl::state.eq(SupportBundleState::Destroying)) - .execute_async(&*conn) + .execute_async(conn) .await?; let Some(arbitrary_valid_nexus) = @@ -328,7 +328,7 @@ impl DataStore { let bundles_with_bad_nexuses = dsl::support_bundle .filter(dsl::assigned_nexus.eq_any(invalid_nexus_zones)) .select(SupportBundle::as_select()) - .load_async(&*conn) + .load_async(conn) .await?; let bundles_to_mark_failing = bundles_with_bad_nexuses.iter() @@ -355,12 +355,12 @@ impl DataStore { dsl::state.eq(state), dsl::reason_for_failure.eq(FAILURE_REASON_NO_NEXUS), )) - .execute_async(&*conn) + .execute_async(conn) .await?; let bundles_reassigned = diesel::update(dsl::support_bundle) .filter(dsl::id.eq_any(bundles_to_reassign)) .set(dsl::assigned_nexus.eq(arbitrary_valid_nexus)) - .execute_async(&*conn) + .execute_async(conn) .await?; Ok(SupportBundleExpungementReport { @@ -392,7 +392,7 @@ impl DataStore { .filter(dsl::state.eq_any(state.valid_old_states())) .set(dsl::state.eq(state)) .check_if_exists::(id.into_untyped_uuid()) - .execute_and_check(&*conn) + .execute_and_check(&conn) .await .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; diff --git a/openapi/nexus.json b/openapi/nexus.json index 1aaba49e9e..18cb260fd0 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -20183,8 +20183,7 @@ "type": "object", "properties": { "id": { - "type": "string", - "format": "uuid" + "$ref": "#/components/schemas/TypedUuidForSupportBundleKind" }, "reason_for_creation": { "type": "string" @@ -20232,21 +20231,21 @@ "SupportBundleState": { "oneOf": [ { - "description": "Support Bundle still actively being collected.\n\nThis is the initial state for a Support Bundle, and it will automatically transition to either \"Failed\" or \"Active\".\n\nIf a user no longer wants to access a Support Bundle, they can request cancellation, which will transition to the \"Cancelling\" state.", + "description": "Support Bundle still actively being collected.\n\nThis is the initial state for a Support Bundle, and it will automatically transition to either \"Failing\" or \"Active\".\n\nIf a user no longer wants to access a Support Bundle, they can request cancellation, which will transition to the \"Destroying\" state.", "type": "string", "enum": [ "collecting" ] }, { - "description": "Support Bundle has been cancelled, and will be deleted shortly.", + "description": "Support Bundle is being destroyed.\n\nOnce backing storage has been freed, this bundle is destroyed.", "type": "string", "enum": [ - "cancelling" + "destroying" ] }, { - "description": "Support Bundle was not created successfully, or was created and has lost backing storage.\n\nThe record of the bundle still exists for readability, but the only valid operation on these bundles is to delete them.", + "description": "Support Bundle was not created successfully, or was created and has lost backing storage.\n\nThe record of the bundle still exists for readability, but the only valid operation on these bundles is to destroy them.", "type": "string", "enum": [ "failed" @@ -21275,6 +21274,10 @@ } } }, + "TypedUuidForSupportBundleKind": { + "type": "string", + "format": "uuid" + }, "UninitializedSled": { "description": "A sled that has not been added to an initialized rack yet", "type": "object", diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 52af30612c..0beb32c92a 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -4661,7 +4661,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - (TRUE, NOW(), NOW(), '113.0.0', NULL) + (TRUE, NOW(), NOW(), '114.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/schema/crdb/support-bundles/up01.sql b/schema/crdb/support-bundles/up01.sql new file mode 100644 index 0000000000..1260e79e18 --- /dev/null +++ b/schema/crdb/support-bundles/up01.sql @@ -0,0 +1,28 @@ +CREATE TYPE IF NOT EXISTS omicron.public.support_bundle_state AS ENUM ( + -- The bundle is currently being created. + -- + -- It might have storage that is partially allocated on a sled. + 'collecting', + + -- The bundle has been collected successfully, and has storage on + -- a particular sled. + 'active', + + -- The user has explicitly requested that a bundle be destroyed. + -- We must ensure that storage backing that bundle is gone before + -- it is automatically deleted. + 'destroying', + + -- The support bundle is failing. + -- This happens when Nexus is expunged partway through collection. + -- + -- A different Nexus must ensure that storage is gone before the + -- bundle can be marked "failed". + 'failing', + + -- The bundle has finished failing. + -- + -- The only action that can be taken on this bundle is to delete it. + 'failed' +); + diff --git a/schema/crdb/support-bundles/up02.sql b/schema/crdb/support-bundles/up02.sql new file mode 100644 index 0000000000..bc61e70480 --- /dev/null +++ b/schema/crdb/support-bundles/up02.sql @@ -0,0 +1,14 @@ +CREATE TABLE IF NOT EXISTS omicron.public.support_bundle ( + id UUID PRIMARY KEY, + time_created TIMESTAMPTZ NOT NULL, + reason_for_creation TEXT NOT NULL, + reason_for_failure TEXT, + state omicron.public.support_bundle_state NOT NULL, + zpool_id UUID NOT NULL, + dataset_id UUID NOT NULL, + + -- The Nexus which is in charge of collecting the support bundle, + -- and later managing its storage. + assigned_nexus UUID +); + diff --git a/schema/crdb/support-bundles/up03.sql b/schema/crdb/support-bundles/up03.sql new file mode 100644 index 0000000000..8d4bdb47ca --- /dev/null +++ b/schema/crdb/support-bundles/up03.sql @@ -0,0 +1,4 @@ +CREATE UNIQUE INDEX IF NOT EXISTS one_bundle_per_dataset ON omicron.public.support_bundle ( + dataset_id +); + diff --git a/schema/crdb/support-bundles/up04.sql b/schema/crdb/support-bundles/up04.sql new file mode 100644 index 0000000000..6df95640fd --- /dev/null +++ b/schema/crdb/support-bundles/up04.sql @@ -0,0 +1,4 @@ +CREATE INDEX IF NOT EXISTS lookup_bundle_by_neuxs ON omicron.public.support_bundle ( + assigned_nexus +); + From 5f78af9015371bff473b50651172cc98212611fc Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 13 Nov 2024 11:34:56 -0800 Subject: [PATCH 10/25] more clippy --- nexus/db-queries/src/db/datastore/support_bundle.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/nexus/db-queries/src/db/datastore/support_bundle.rs b/nexus/db-queries/src/db/datastore/support_bundle.rs index 71d1070071..eb099a1a48 100644 --- a/nexus/db-queries/src/db/datastore/support_bundle.rs +++ b/nexus/db-queries/src/db/datastore/support_bundle.rs @@ -757,7 +757,7 @@ mod test { ) -> Vec { bp.blueprint_zones .values() - .map(|zones_config| { + .flat_map(|zones_config| { let mut nexus_zones = vec![]; for zone in &zones_config.zones { if matches!(zone.zone_type, BlueprintZoneType::Nexus(_)) @@ -768,7 +768,6 @@ mod test { } nexus_zones }) - .flatten() .collect() } @@ -778,7 +777,7 @@ mod test { ) -> Vec { bp.blueprint_datasets .values() - .map(|datasets_config| { + .flat_map(|datasets_config| { let mut debug_datasets = vec![]; for dataset in datasets_config.datasets.values() { if matches!(dataset.kind, DebugDatasetKind) @@ -789,7 +788,6 @@ mod test { } debug_datasets }) - .flatten() .collect() } From 92b7d62aa28bb4e9ae9e2791ba889e6880968a7b Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 13 Nov 2024 12:10:32 -0800 Subject: [PATCH 11/25] Strongly-typed UUID --- nexus/types/src/external_api/shared.rs | 16 +++++++++------- openapi/nexus.json | 15 +++++++++------ uuid-kinds/src/lib.rs | 1 + 3 files changed, 19 insertions(+), 13 deletions(-) diff --git a/nexus/types/src/external_api/shared.rs b/nexus/types/src/external_api/shared.rs index 2928fab46b..9e9bc26ffe 100644 --- a/nexus/types/src/external_api/shared.rs +++ b/nexus/types/src/external_api/shared.rs @@ -10,6 +10,7 @@ use chrono::DateTime; use chrono::Utc; use omicron_common::api::external::Name; use omicron_common::api::internal::shared::NetworkInterface; +use omicron_uuid_kinds::SupportBundleUuid; use parse_display::FromStr; use schemars::JsonSchema; use serde::de::Error as _; @@ -422,20 +423,22 @@ pub enum SupportBundleState { /// Support Bundle still actively being collected. /// /// This is the initial state for a Support Bundle, and it will - /// automatically transition to either "Failed" or "Active". + /// automatically transition to either "Failing" or "Active". /// /// If a user no longer wants to access a Support Bundle, they can - /// request cancellation, which will transition to the "Cancelling" state. + /// request cancellation, which will transition to the "Destroying" state. Collecting, - /// Support Bundle has been cancelled, and will be deleted shortly. - Cancelling, + /// Support Bundle is being destroyed. + /// + /// Once backing storage has been freed, this bundle is destroyed. + Destroying, /// Support Bundle was not created successfully, or was created and has lost /// backing storage. /// /// The record of the bundle still exists for readability, but the only - /// valid operation on these bundles is to delete them. + /// valid operation on these bundles is to destroy them. Failed, /// Support Bundle has been processed, and is ready for usage. @@ -444,8 +447,7 @@ pub enum SupportBundleState { #[derive(Debug, Clone, JsonSchema, Serialize, Deserialize)] pub struct SupportBundleInfo { - // XXX: Strongly-typed Support Bundle UUID? - pub id: Uuid, + pub id: SupportBundleUuid, pub time_created: DateTime, pub reason_for_creation: String, pub reason_for_failure: Option, diff --git a/openapi/nexus.json b/openapi/nexus.json index 1aaba49e9e..18cb260fd0 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -20183,8 +20183,7 @@ "type": "object", "properties": { "id": { - "type": "string", - "format": "uuid" + "$ref": "#/components/schemas/TypedUuidForSupportBundleKind" }, "reason_for_creation": { "type": "string" @@ -20232,21 +20231,21 @@ "SupportBundleState": { "oneOf": [ { - "description": "Support Bundle still actively being collected.\n\nThis is the initial state for a Support Bundle, and it will automatically transition to either \"Failed\" or \"Active\".\n\nIf a user no longer wants to access a Support Bundle, they can request cancellation, which will transition to the \"Cancelling\" state.", + "description": "Support Bundle still actively being collected.\n\nThis is the initial state for a Support Bundle, and it will automatically transition to either \"Failing\" or \"Active\".\n\nIf a user no longer wants to access a Support Bundle, they can request cancellation, which will transition to the \"Destroying\" state.", "type": "string", "enum": [ "collecting" ] }, { - "description": "Support Bundle has been cancelled, and will be deleted shortly.", + "description": "Support Bundle is being destroyed.\n\nOnce backing storage has been freed, this bundle is destroyed.", "type": "string", "enum": [ - "cancelling" + "destroying" ] }, { - "description": "Support Bundle was not created successfully, or was created and has lost backing storage.\n\nThe record of the bundle still exists for readability, but the only valid operation on these bundles is to delete them.", + "description": "Support Bundle was not created successfully, or was created and has lost backing storage.\n\nThe record of the bundle still exists for readability, but the only valid operation on these bundles is to destroy them.", "type": "string", "enum": [ "failed" @@ -21275,6 +21274,10 @@ } } }, + "TypedUuidForSupportBundleKind": { + "type": "string", + "format": "uuid" + }, "UninitializedSled": { "description": "A sled that has not been added to an initialized rack yet", "type": "object", diff --git a/uuid-kinds/src/lib.rs b/uuid-kinds/src/lib.rs index 7947062a82..50b263fa0b 100644 --- a/uuid-kinds/src/lib.rs +++ b/uuid-kinds/src/lib.rs @@ -65,6 +65,7 @@ impl_typed_uuid_kind! { RackReset => "rack_reset", Region => "region", Sled => "sled", + SupportBundle => "support_bundle", TufRepo => "tuf_repo", Upstairs => "upstairs", UpstairsRepair => "upstairs_repair", From 6c529d1b8cdf0f5b7b6b536eb07aca636f9e27a7 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 13 Nov 2024 16:30:15 -0800 Subject: [PATCH 12/25] Add list-by-nexus method, test it --- .../src/db/datastore/support_bundle.rs | 156 +++++++++++++++++- 1 file changed, 155 insertions(+), 1 deletion(-) diff --git a/nexus/db-queries/src/db/datastore/support_bundle.rs b/nexus/db-queries/src/db/datastore/support_bundle.rs index eb099a1a48..562b1ec2b1 100644 --- a/nexus/db-queries/src/db/datastore/support_bundle.rs +++ b/nexus/db-queries/src/db/datastore/support_bundle.rs @@ -194,6 +194,29 @@ impl DataStore { .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) } + /// Lists one page of support bundles in a particular state, assigned to + /// a particular Nexus. + pub async fn support_bundle_list_assigned_to_nexus( + &self, + opctx: &OpContext, + pagparams: &DataPageParams<'_, Uuid>, + nexus_id: OmicronZoneUuid, + states: Vec, + ) -> ListResultVec { + opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; + use db::schema::support_bundle::dsl; + + let conn = self.pool_connection_authorized(opctx).await?; + paginated(dsl::support_bundle, dsl::id, pagparams) + .filter(dsl::assigned_nexus.eq(nexus_id.into_untyped_uuid())) + .filter(dsl::state.eq_any(states)) + .order(dsl::time_created.asc()) + .select(SupportBundle::as_select()) + .load_async(&*conn) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + /// Marks support bundles as failed if their assigned Nexus or backing /// dataset has been destroyed. pub async fn support_bundle_fail_expunged( @@ -376,7 +399,12 @@ impl DataStore { .await } - /// Updates the state of a support bundle + /// Updates the state of a support bundle. + /// + /// Returns: + /// - "Ok" if the bundle was updated successfully. + /// - "Err::InvalidRequest" if the bundle exists, but could not be updated + /// because the state transition is invalid. pub async fn support_bundle_update( &self, opctx: &OpContext, @@ -597,6 +625,132 @@ mod test { ); } + #[tokio::test] + async fn test_bundle_list_filtering() { + let logctx = dev::test_setup_log("test_bundle_create_capacity_limits"); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + let nexus_a = OmicronZoneUuid::new_v4(); + let nexus_b = OmicronZoneUuid::new_v4(); + + let _test_sled = create_sled_and_zpools(&datastore, &opctx, 5).await; + + let pagparams = DataPageParams::max_page(); + + // No bundles exist yet, so the list should be empty + + assert_eq!( + datastore + .support_bundle_list_assigned_to_nexus( + &opctx, + &pagparams, + nexus_a, + vec![SupportBundleState::Collecting] + ) + .await + .expect("Should always be able to list bundles"), + vec![] + ); + + // Create two bundles on "nexus A", one bundle on "nexus B" + + let bundle_a1 = datastore + .support_bundle_create(&opctx, "for the test", nexus_a) + .await + .expect("Should be able to create bundle"); + let bundle_a2 = datastore + .support_bundle_create(&opctx, "for the test", nexus_a) + .await + .expect("Should be able to create bundle"); + let bundle_b1 = datastore + .support_bundle_create(&opctx, "for the test", nexus_b) + .await + .expect("Should be able to create bundle"); + + assert_eq!( + datastore + .support_bundle_list_assigned_to_nexus( + &opctx, + &pagparams, + nexus_a, + vec![SupportBundleState::Collecting] + ) + .await + .expect("Should always be able to list bundles") + .iter() + .map(|b| b.id) + .collect::>(), + vec![bundle_a1.id, bundle_a2.id,] + ); + assert_eq!( + datastore + .support_bundle_list_assigned_to_nexus( + &opctx, + &pagparams, + nexus_b, + vec![SupportBundleState::Collecting] + ) + .await + .expect("Should always be able to list bundles") + .iter() + .map(|b| b.id) + .collect::>(), + vec![bundle_b1.id,] + ); + + // When we update the state of the bundles, the list results + // should also be filtered. + datastore + .support_bundle_update( + &opctx, + bundle_a1.id.into(), + SupportBundleState::Active, + ) + .await + .expect("Should have been able to update state"); + + // "bundle_a1" is no longer collecting, so it won't appear here. + assert_eq!( + datastore + .support_bundle_list_assigned_to_nexus( + &opctx, + &pagparams, + nexus_a, + vec![SupportBundleState::Collecting] + ) + .await + .expect("Should always be able to list bundles") + .iter() + .map(|b| b.id) + .collect::>(), + vec![bundle_a2.id,] + ); + + // ... but if we ask for enough states, it'll show up + assert_eq!( + datastore + .support_bundle_list_assigned_to_nexus( + &opctx, + &pagparams, + nexus_a, + vec![ + SupportBundleState::Active, + SupportBundleState::Collecting + ] + ) + .await + .expect("Should always be able to list bundles") + .iter() + .map(|b| b.id) + .collect::>(), + vec![bundle_a1.id, bundle_a2.id,] + ); + + db.terminate().await; + logctx.cleanup_successful(); + } + #[tokio::test] async fn test_bundle_create_capacity_limits() { let logctx = dev::test_setup_log("test_bundle_create_capacity_limits"); From e61365a1c000f22b06500685c23c1cc7e07babb3 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 25 Nov 2024 13:21:23 -0800 Subject: [PATCH 13/25] merge --- nexus/test-utils/src/resource_helpers.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nexus/test-utils/src/resource_helpers.rs b/nexus/test-utils/src/resource_helpers.rs index ce1fb38f01..6f704fac00 100644 --- a/nexus/test-utils/src/resource_helpers.rs +++ b/nexus/test-utils/src/resource_helpers.rs @@ -1175,7 +1175,7 @@ impl<'a, N: NexusServer> DiskTest<'a, N> { for disk in &disks_config.disks { self.add_zpool_with_dataset_ext( *sled_id, - disk.id, + PhysicalDiskUuid::from_untyped_uuid(disk.id), disk.pool_id, DatasetUuid::new_v4(), Self::DEFAULT_ZPOOL_SIZE_GIB, From 81c7121fa3c13e42167386dd579aaf86e70e1c08 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 25 Nov 2024 13:23:41 -0800 Subject: [PATCH 14/25] Merge UUID changes --- nexus/db-queries/src/db/datastore/support_bundle.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/nexus/db-queries/src/db/datastore/support_bundle.rs b/nexus/db-queries/src/db/datastore/support_bundle.rs index 562b1ec2b1..b89ce5cec0 100644 --- a/nexus/db-queries/src/db/datastore/support_bundle.rs +++ b/nexus/db-queries/src/db/datastore/support_bundle.rs @@ -28,7 +28,6 @@ use omicron_common::api::external::Error; use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupResult; use omicron_uuid_kinds::BlueprintUuid; -use omicron_uuid_kinds::DatasetUuid; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::OmicronZoneUuid; use omicron_uuid_kinds::SupportBundleUuid; @@ -131,7 +130,7 @@ impl DataStore { let bundle = SupportBundle::new( reason_for_creation, ZpoolUuid::from_untyped_uuid(dataset.pool_id), - DatasetUuid::from_untyped_uuid(dataset.id()), + dataset.id(), this_nexus_id, ); @@ -485,6 +484,7 @@ mod test { use nexus_types::deployment::BlueprintZoneType; use omicron_common::api::internal::shared::DatasetKind::Debug as DebugDatasetKind; use omicron_test_utils::dev; + use omicron_uuid_kinds::DatasetUuid; use omicron_uuid_kinds::SledUuid; use rand::Rng; @@ -581,7 +581,7 @@ mod test { .expect("failed to upsert zpool"); let dataset = Dataset::new( - pool.dataset.into_untyped_uuid(), + pool.dataset, pool.pool.into_untyped_uuid(), None, DebugDatasetKind, From 7c33f19af9504fba317ae3587602b8a47832b34b Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 27 Nov 2024 11:11:48 -0800 Subject: [PATCH 15/25] fix typed uuid merge --- nexus/test-utils/src/resource_helpers.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nexus/test-utils/src/resource_helpers.rs b/nexus/test-utils/src/resource_helpers.rs index ebf652b8b2..c5cb0231d1 100644 --- a/nexus/test-utils/src/resource_helpers.rs +++ b/nexus/test-utils/src/resource_helpers.rs @@ -1175,7 +1175,7 @@ impl<'a, N: NexusServer> DiskTest<'a, N> { for disk in &disks_config.disks { self.add_zpool_with_dataset_ext( *sled_id, - PhysicalDiskUuid::from_untyped_uuid(disk.id), + disk.id, disk.pool_id, DatasetUuid::new_v4(), Self::DEFAULT_ZPOOL_SIZE_GIB, From 10c3745349f5ef3c2890e208928b29dca7f74ef6 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 27 Nov 2024 11:16:30 -0800 Subject: [PATCH 16/25] more typed UUID merges --- nexus/db-queries/src/db/datastore/support_bundle.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/nexus/db-queries/src/db/datastore/support_bundle.rs b/nexus/db-queries/src/db/datastore/support_bundle.rs index b89ce5cec0..3f04cb1135 100644 --- a/nexus/db-queries/src/db/datastore/support_bundle.rs +++ b/nexus/db-queries/src/db/datastore/support_bundle.rs @@ -485,6 +485,7 @@ mod test { use omicron_common::api::internal::shared::DatasetKind::Debug as DebugDatasetKind; use omicron_test_utils::dev; use omicron_uuid_kinds::DatasetUuid; + use omicron_uuid_kinds::PhysicalDiskUuid; use omicron_uuid_kinds::SledUuid; use rand::Rng; @@ -573,7 +574,7 @@ mod test { let zpool = Zpool::new( *pool.pool.as_untyped_uuid(), *self.sled.as_untyped_uuid(), - Uuid::new_v4(), + PhysicalDiskUuid::new_v4(), ); datastore .zpool_insert(opctx, zpool) From 2b42d0e9eba874261cfe04e8dada79942d9b9633 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 27 Nov 2024 16:47:23 -0800 Subject: [PATCH 17/25] API updates pulled from 7187 as I built it --- clients/sled-agent-client/src/lib.rs | 3 + common/src/api/external/mod.rs | 18 +++ nexus/external-api/output/nexus_tags.txt | 1 + nexus/external-api/src/lib.rs | 17 ++- nexus/src/external_api/http_entrypoints.rs | 30 ++++- nexus/types/src/external_api/params.rs | 2 +- openapi/nexus.json | 145 ++++++++++++++++++--- sled-agent/api/src/lib.rs | 19 +-- 8 files changed, 196 insertions(+), 39 deletions(-) diff --git a/clients/sled-agent-client/src/lib.rs b/clients/sled-agent-client/src/lib.rs index f2eb957650..f818f241fa 100644 --- a/clients/sled-agent-client/src/lib.rs +++ b/clients/sled-agent-client/src/lib.rs @@ -75,12 +75,15 @@ progenitor::generate_api!( RouterVersion = omicron_common::api::internal::shared::RouterVersion, SledRole = nexus_sled_agent_shared::inventory::SledRole, SourceNatConfig = omicron_common::api::internal::shared::SourceNatConfig, + SupportBundleGetQueryParams = omicron_common::api::external::SupportBundleGetQueryParams, + SupportBundleQueryType = omicron_common::api::external::SupportBundleQueryType, SwitchLocation = omicron_common::api::external::SwitchLocation, TypedUuidForDatasetKind = omicron_uuid_kinds::DatasetUuid, TypedUuidForInstanceKind = omicron_uuid_kinds::InstanceUuid, TypedUuidForOmicronZoneKind = omicron_uuid_kinds::OmicronZoneUuid, TypedUuidForPropolisKind = omicron_uuid_kinds::PropolisUuid, TypedUuidForSledKind = omicron_uuid_kinds::SledUuid, + TypedUuidForSupportBundleKind = omicron_uuid_kinds::SupportBundleUuid, TypedUuidForZpoolKind = omicron_uuid_kinds::ZpoolUuid, Vni = omicron_common::api::external::Vni, ZpoolKind = omicron_common::zpool_name::ZpoolKind, diff --git a/common/src/api/external/mod.rs b/common/src/api/external/mod.rs index 38a9de0564..6db4d5c189 100644 --- a/common/src/api/external/mod.rs +++ b/common/src/api/external/mod.rs @@ -3090,6 +3090,24 @@ pub enum ImportExportPolicy { Allow(Vec), } +/// Query parameters for reading the support bundle +#[derive(Deserialize, Serialize, JsonSchema)] +pub struct SupportBundleGetQueryParams { + pub query_type: SupportBundleQueryType, +} + +/// Describes the type of access to the support bundle +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum SupportBundleQueryType { + /// Access the whole support bundle + Whole, + /// Access the names of all files within the support bundle + Index, + /// Access a specific file within the support bundle + Path { file_path: String }, +} + #[cfg(test)] mod test { use serde::Deserialize; diff --git a/nexus/external-api/output/nexus_tags.txt b/nexus/external-api/output/nexus_tags.txt index 0526a10b33..11e942496e 100644 --- a/nexus/external-api/output/nexus_tags.txt +++ b/nexus/external-api/output/nexus_tags.txt @@ -33,6 +33,7 @@ probe_view GET /experimental/v1/probes/{probe support_bundle_create POST /experimental/v1/system/support-bundles support_bundle_delete DELETE /experimental/v1/system/support-bundles/{support_bundle} support_bundle_download GET /experimental/v1/system/support-bundles/{support_bundle}/download +support_bundle_head HEAD /experimental/v1/system/support-bundles/{support_bundle}/download support_bundle_list GET /experimental/v1/system/support-bundles support_bundle_view GET /experimental/v1/system/support-bundles/{support_bundle} diff --git a/nexus/external-api/src/lib.rs b/nexus/external-api/src/lib.rs index 2860db740e..4c65825396 100644 --- a/nexus/external-api/src/lib.rs +++ b/nexus/external-api/src/lib.rs @@ -2793,6 +2793,19 @@ pub trait NexusExternalApi { async fn support_bundle_download( rqctx: RequestContext, path_params: Path, + body: TypedBody, + ) -> Result, HttpError>; + + /// Download the metadata of a single support bundle + #[endpoint { + method = HEAD, + path = "/experimental/v1/system/support-bundles/{support_bundle}/download", + tags = ["hidden"], // system/support-bundles: only one tag is allowed + }] + async fn support_bundle_head( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, ) -> Result, HttpError>; /// Create a new support bundle @@ -2803,7 +2816,7 @@ pub trait NexusExternalApi { }] async fn support_bundle_create( rqctx: RequestContext, - ) -> Result, HttpError>; + ) -> Result, HttpError>; /// Delete an existing support bundle /// @@ -2817,7 +2830,7 @@ pub trait NexusExternalApi { async fn support_bundle_delete( rqctx: RequestContext, path_params: Path, - ) -> Result, HttpError>; + ) -> Result; // Probes (experimental) diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs index 004a3482c5..bab84e8c38 100644 --- a/nexus/src/external_api/http_entrypoints.rs +++ b/nexus/src/external_api/http_entrypoints.rs @@ -81,6 +81,7 @@ use omicron_common::api::external::NameOrId; use omicron_common::api::external::Probe; use omicron_common::api::external::RouterRoute; use omicron_common::api::external::RouterRouteKind; +use omicron_common::api::external::SupportBundleGetQueryParams; use omicron_common::api::external::SwitchPort; use omicron_common::api::external::SwitchPortSettings; use omicron_common::api::external::SwitchPortSettingsView; @@ -6049,6 +6050,31 @@ impl NexusExternalApi for NexusExternalApiImpl { async fn support_bundle_download( rqctx: RequestContext, _path_params: Path, + _body: TypedBody, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + + Err(nexus + .unimplemented_todo(&opctx, crate::app::Unimpl::Public) + .await + .into()) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } + + async fn support_bundle_head( + rqctx: RequestContext, + _path_params: Path, + _body: TypedBody, ) -> Result, HttpError> { let apictx = rqctx.context(); let handler = async { @@ -6071,7 +6097,7 @@ impl NexusExternalApi for NexusExternalApiImpl { async fn support_bundle_create( rqctx: RequestContext, - ) -> Result, HttpError> { + ) -> Result, HttpError> { let apictx = rqctx.context(); let handler = async { let nexus = &apictx.context.nexus; @@ -6094,7 +6120,7 @@ impl NexusExternalApi for NexusExternalApiImpl { async fn support_bundle_delete( rqctx: RequestContext, _path_params: Path, - ) -> Result, HttpError> { + ) -> Result { let apictx = rqctx.context(); let handler = async { let nexus = &apictx.context.nexus; diff --git a/nexus/types/src/external_api/params.rs b/nexus/types/src/external_api/params.rs index 0929b2c04b..4a4af8b052 100644 --- a/nexus/types/src/external_api/params.rs +++ b/nexus/types/src/external_api/params.rs @@ -89,8 +89,8 @@ path_param!(SshKeyPath, ssh_key, "SSH key"); path_param!(AddressLotPath, address_lot, "address lot"); path_param!(ProbePath, probe, "probe"); path_param!(CertificatePath, certificate, "certificate"); -path_param!(SupportBundlePath, support_bundle, "support bundle"); +id_path_param!(SupportBundlePath, support_bundle, "support bundle"); id_path_param!(GroupPath, group_id, "group"); // TODO: The hardware resources should be represented by its UUID or a hardware diff --git a/openapi/nexus.json b/openapi/nexus.json index c9da9dac5a..fe91ff98e3 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -366,8 +366,8 @@ "summary": "Create a new support bundle", "operationId": "support_bundle_create", "responses": { - "200": { - "description": "successful operation", + "201": { + "description": "successful creation", "content": { "application/json": { "schema": { @@ -396,10 +396,11 @@ { "in": "path", "name": "support_bundle", - "description": "Name or ID of the support bundle", + "description": "ID of the support bundle", "required": true, "schema": { - "$ref": "#/components/schemas/NameOrId" + "type": "string", + "format": "uuid" } } ], @@ -433,23 +434,17 @@ { "in": "path", "name": "support_bundle", - "description": "Name or ID of the support bundle", + "description": "ID of the support bundle", "required": true, "schema": { - "$ref": "#/components/schemas/NameOrId" + "type": "string", + "format": "uuid" } } ], "responses": { - "200": { - "description": "successful operation", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/SupportBundleInfo" - } - } - } + "204": { + "description": "successful deletion" }, "4XX": { "$ref": "#/components/responses/Error" @@ -471,13 +466,63 @@ { "in": "path", "name": "support_bundle", - "description": "Name or ID of the support bundle", + "description": "ID of the support bundle", "required": true, "schema": { - "$ref": "#/components/schemas/NameOrId" + "type": "string", + "format": "uuid" } } ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SupportBundleGetQueryParams" + } + } + }, + "required": true + }, + "responses": { + "default": { + "description": "", + "content": { + "*/*": { + "schema": {} + } + } + } + } + }, + "head": { + "tags": [ + "hidden" + ], + "summary": "Download the metadata of a single support bundle", + "operationId": "support_bundle_head", + "parameters": [ + { + "in": "path", + "name": "support_bundle", + "description": "ID of the support bundle", + "required": true, + "schema": { + "type": "string", + "format": "uuid" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SupportBundleGetQueryParams" + } + } + }, + "required": true + }, "responses": { "default": { "description": "", @@ -20199,6 +20244,18 @@ "items" ] }, + "SupportBundleGetQueryParams": { + "description": "Query parameters for reading the support bundle", + "type": "object", + "properties": { + "query_type": { + "$ref": "#/components/schemas/SupportBundleQueryType" + } + }, + "required": [ + "query_type" + ] + }, "SupportBundleInfo": { "type": "object", "properties": { @@ -20248,6 +20305,60 @@ "items" ] }, + "SupportBundleQueryType": { + "description": "Describes the type of access to the support bundle", + "oneOf": [ + { + "description": "Access the whole support bundle", + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "whole" + ] + } + }, + "required": [ + "type" + ] + }, + { + "description": "Access the names of all files within the support bundle", + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "index" + ] + } + }, + "required": [ + "type" + ] + }, + { + "description": "Access a specific file within the support bundle", + "type": "object", + "properties": { + "file_path": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "path" + ] + } + }, + "required": [ + "file_path", + "type" + ] + } + ] + }, "SupportBundleState": { "oneOf": [ { diff --git a/sled-agent/api/src/lib.rs b/sled-agent/api/src/lib.rs index 634640079a..be0dbdf1d8 100644 --- a/sled-agent/api/src/lib.rs +++ b/sled-agent/api/src/lib.rs @@ -702,23 +702,8 @@ pub struct SupportBundleGetHeaders { range: String, } -/// Query parameters for reading the support bundle -#[derive(Deserialize, Serialize, JsonSchema)] -pub struct SupportBundleGetQueryParams { - pub query_type: SupportBundleQueryType, -} - -/// Describes the type of access to the support bundle -#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] -#[serde(tag = "type", rename_all = "snake_case")] -pub enum SupportBundleQueryType { - /// Access the whole support bundle - Whole, - /// Access the names of all files within the support bundle - Index, - /// Access a specific file within the support bundle - Path { file_path: String }, -} +pub use omicron_common::api::external::SupportBundleGetQueryParams; +pub use omicron_common::api::external::SupportBundleQueryType; #[derive(Deserialize, Debug, Serialize, JsonSchema, PartialEq)] #[serde(rename_all = "snake_case")] From 1fccea8089bf39ff8806a6c0f767da59a3b11aa3 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 2 Dec 2024 10:33:20 -0800 Subject: [PATCH 18/25] expectorate --- nexus/tests/output/uncovered-authz-endpoints.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/nexus/tests/output/uncovered-authz-endpoints.txt b/nexus/tests/output/uncovered-authz-endpoints.txt index ec4790c071..13fb389d6e 100644 --- a/nexus/tests/output/uncovered-authz-endpoints.txt +++ b/nexus/tests/output/uncovered-authz-endpoints.txt @@ -8,6 +8,7 @@ support_bundle_view (get "/experimental/v1/system/suppor support_bundle_download (get "/experimental/v1/system/support-bundles/{support_bundle}/download") ping (get "/v1/ping") networking_switch_port_status (get "/v1/system/hardware/switch-port/{port}/status") +support_bundle_head (head "/experimental/v1/system/support-bundles/{support_bundle}/download") device_auth_request (post "/device/auth") device_auth_confirm (post "/device/confirm") device_access_token (post "/device/token") From 55e9c24a7d44ea9768041e6ce623ea86683e852c Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Fri, 13 Dec 2024 13:14:38 -0800 Subject: [PATCH 19/25] use path params more --- nexus/external-api/output/nexus_tags.txt | 3 + nexus/external-api/src/lib.rs | 41 ++++- nexus/src/external_api/http_entrypoints.rs | 72 +++++++- nexus/types/src/external_api/params.rs | 9 + openapi/nexus.json | 193 ++++++++++++--------- 5 files changed, 225 insertions(+), 93 deletions(-) diff --git a/nexus/external-api/output/nexus_tags.txt b/nexus/external-api/output/nexus_tags.txt index 4c266f87b5..4fc92b18d8 100644 --- a/nexus/external-api/output/nexus_tags.txt +++ b/nexus/external-api/output/nexus_tags.txt @@ -33,7 +33,10 @@ probe_view GET /experimental/v1/probes/{probe support_bundle_create POST /experimental/v1/system/support-bundles support_bundle_delete DELETE /experimental/v1/system/support-bundles/{support_bundle} support_bundle_download GET /experimental/v1/system/support-bundles/{support_bundle}/download +support_bundle_download_file GET /experimental/v1/system/support-bundles/{support_bundle}/download/{file} support_bundle_head HEAD /experimental/v1/system/support-bundles/{support_bundle}/download +support_bundle_head_file HEAD /experimental/v1/system/support-bundles/{support_bundle}/download/{file} +support_bundle_index GET /experimental/v1/system/support-bundles/{support_bundle}/index support_bundle_list GET /experimental/v1/system/support-bundles support_bundle_view GET /experimental/v1/system/support-bundles/{support_bundle} timeseries_query POST /v1/timeseries/query diff --git a/nexus/external-api/src/lib.rs b/nexus/external-api/src/lib.rs index 6429cfd06b..54ba3ab34b 100644 --- a/nexus/external-api/src/lib.rs +++ b/nexus/external-api/src/lib.rs @@ -2793,7 +2793,7 @@ pub trait NexusExternalApi { query_params: Query, ) -> Result>, HttpError>; - /// View a single support bundle + /// View a support bundle #[endpoint { method = GET, path = "/experimental/v1/system/support-bundles/{support_bundle}", @@ -2804,7 +2804,18 @@ pub trait NexusExternalApi { path_params: Path, ) -> Result, HttpError>; - /// Download the contents of a single support bundle + /// Download the index of a support bundle + #[endpoint { + method = GET, + path = "/experimental/v1/system/support-bundles/{support_bundle}/index", + tags = ["hidden"], // system/support-bundles: only one tag is allowed + }] + async fn support_bundle_index( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError>; + + /// Download the contents of a support bundle #[endpoint { method = GET, path = "/experimental/v1/system/support-bundles/{support_bundle}/download", @@ -2813,10 +2824,20 @@ pub trait NexusExternalApi { async fn support_bundle_download( rqctx: RequestContext, path_params: Path, - body: TypedBody, ) -> Result, HttpError>; - /// Download the metadata of a single support bundle + /// Download a file within a support bundle + #[endpoint { + method = GET, + path = "/experimental/v1/system/support-bundles/{support_bundle}/download/{file}", + tags = ["hidden"], // system/support-bundles: only one tag is allowed + }] + async fn support_bundle_download_file( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError>; + + /// Download the metadata of a support bundle #[endpoint { method = HEAD, path = "/experimental/v1/system/support-bundles/{support_bundle}/download", @@ -2825,7 +2846,17 @@ pub trait NexusExternalApi { async fn support_bundle_head( rqctx: RequestContext, path_params: Path, - body: TypedBody, + ) -> Result, HttpError>; + + /// Download the metadata of a file within the support bundle + #[endpoint { + method = HEAD, + path = "/experimental/v1/system/support-bundles/{support_bundle}/download/{file}", + tags = ["hidden"], // system/support-bundles: only one tag is allowed + }] + async fn support_bundle_head_file( + rqctx: RequestContext, + path_params: Path, ) -> Result, HttpError>; /// Create a new support bundle diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs index bc451e2a76..cfc9f99851 100644 --- a/nexus/src/external_api/http_entrypoints.rs +++ b/nexus/src/external_api/http_entrypoints.rs @@ -81,7 +81,6 @@ use omicron_common::api::external::NameOrId; use omicron_common::api::external::Probe; use omicron_common::api::external::RouterRoute; use omicron_common::api::external::RouterRouteKind; -use omicron_common::api::external::SupportBundleGetQueryParams; use omicron_common::api::external::SwitchPort; use omicron_common::api::external::SwitchPortSettings; use omicron_common::api::external::SwitchPortSettingsView; @@ -6074,10 +6073,55 @@ impl NexusExternalApi for NexusExternalApiImpl { .await } + async fn support_bundle_index( + rqctx: RequestContext, + _path_params: Path, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + + Err(nexus + .unimplemented_todo(&opctx, crate::app::Unimpl::Public) + .await + .into()) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } + async fn support_bundle_download( rqctx: RequestContext, _path_params: Path, - _body: TypedBody, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + + Err(nexus + .unimplemented_todo(&opctx, crate::app::Unimpl::Public) + .await + .into()) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } + + async fn support_bundle_download_file( + rqctx: RequestContext, + _path_params: Path, ) -> Result, HttpError> { let apictx = rqctx.context(); let handler = async { @@ -6101,7 +6145,29 @@ impl NexusExternalApi for NexusExternalApiImpl { async fn support_bundle_head( rqctx: RequestContext, _path_params: Path, - _body: TypedBody, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + + Err(nexus + .unimplemented_todo(&opctx, crate::app::Unimpl::Public) + .await + .into()) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } + + async fn support_bundle_head_file( + rqctx: RequestContext, + _path_params: Path, ) -> Result, HttpError> { let apictx = rqctx.context(); let handler = async { diff --git a/nexus/types/src/external_api/params.rs b/nexus/types/src/external_api/params.rs index 4a4af8b052..4e616e698f 100644 --- a/nexus/types/src/external_api/params.rs +++ b/nexus/types/src/external_api/params.rs @@ -143,6 +143,15 @@ impl From for SiloSelector { } } +#[derive(Serialize, Deserialize, JsonSchema)] +pub struct SupportBundleFilePath { + #[serde(flatten)] + pub bundle: SupportBundlePath, + + /// The file within the bundle to download + pub file: String, +} + #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, PartialEq)] pub struct OptionalSiloSelector { /// Name or ID of the silo diff --git a/openapi/nexus.json b/openapi/nexus.json index fff479b9df..bc043059dd 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -390,7 +390,7 @@ "tags": [ "hidden" ], - "summary": "View a single support bundle", + "summary": "View a support bundle", "operationId": "support_bundle_view", "parameters": [ { @@ -460,7 +460,7 @@ "tags": [ "hidden" ], - "summary": "Download the contents of a single support bundle", + "summary": "Download the contents of a support bundle", "operationId": "support_bundle_download", "parameters": [ { @@ -474,16 +474,75 @@ } } ], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/SupportBundleGetQueryParams" + "responses": { + "default": { + "description": "", + "content": { + "*/*": { + "schema": {} + } + } + } + } + }, + "head": { + "tags": [ + "hidden" + ], + "summary": "Download the metadata of a support bundle", + "operationId": "support_bundle_head", + "parameters": [ + { + "in": "path", + "name": "support_bundle", + "description": "ID of the support bundle", + "required": true, + "schema": { + "type": "string", + "format": "uuid" + } + } + ], + "responses": { + "default": { + "description": "", + "content": { + "*/*": { + "schema": {} } } + } + } + } + }, + "/experimental/v1/system/support-bundles/{support_bundle}/download/{file}": { + "get": { + "tags": [ + "hidden" + ], + "summary": "Download a file within a support bundle", + "operationId": "support_bundle_download_file", + "parameters": [ + { + "in": "path", + "name": "file", + "description": "The file within the bundle to download", + "required": true, + "schema": { + "type": "string" + } }, - "required": true - }, + { + "in": "path", + "name": "support_bundle", + "description": "ID of the support bundle", + "required": true, + "schema": { + "type": "string", + "format": "uuid" + } + } + ], "responses": { "default": { "description": "", @@ -499,9 +558,18 @@ "tags": [ "hidden" ], - "summary": "Download the metadata of a single support bundle", - "operationId": "support_bundle_head", + "summary": "Download the metadata of a file within the support bundle", + "operationId": "support_bundle_head_file", "parameters": [ + { + "in": "path", + "name": "file", + "description": "The file within the bundle to download", + "required": true, + "schema": { + "type": "string" + } + }, { "in": "path", "name": "support_bundle", @@ -513,16 +581,37 @@ } } ], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/SupportBundleGetQueryParams" + "responses": { + "default": { + "description": "", + "content": { + "*/*": { + "schema": {} } } - }, - "required": true - }, + } + } + } + }, + "/experimental/v1/system/support-bundles/{support_bundle}/index": { + "get": { + "tags": [ + "hidden" + ], + "summary": "Download the index of a support bundle", + "operationId": "support_bundle_index", + "parameters": [ + { + "in": "path", + "name": "support_bundle", + "description": "ID of the support bundle", + "required": true, + "schema": { + "type": "string", + "format": "uuid" + } + } + ], "responses": { "default": { "description": "", @@ -20293,18 +20382,6 @@ "items" ] }, - "SupportBundleGetQueryParams": { - "description": "Query parameters for reading the support bundle", - "type": "object", - "properties": { - "query_type": { - "$ref": "#/components/schemas/SupportBundleQueryType" - } - }, - "required": [ - "query_type" - ] - }, "SupportBundleInfo": { "type": "object", "properties": { @@ -20354,60 +20431,6 @@ "items" ] }, - "SupportBundleQueryType": { - "description": "Describes the type of access to the support bundle", - "oneOf": [ - { - "description": "Access the whole support bundle", - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "whole" - ] - } - }, - "required": [ - "type" - ] - }, - { - "description": "Access the names of all files within the support bundle", - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "index" - ] - } - }, - "required": [ - "type" - ] - }, - { - "description": "Access a specific file within the support bundle", - "type": "object", - "properties": { - "file_path": { - "type": "string" - }, - "type": { - "type": "string", - "enum": [ - "path" - ] - } - }, - "required": [ - "file_path", - "type" - ] - } - ] - }, "SupportBundleState": { "oneOf": [ { From efca59637727022106bf19f7f9db0f4c28a3afdb Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Fri, 13 Dec 2024 14:02:55 -0800 Subject: [PATCH 20/25] Review feedback --- nexus/db-model/src/support_bundle.rs | 5 +- .../src/db/datastore/support_bundle.rs | 142 ++++++++++++++++-- schema/crdb/dbinit.sql | 2 +- schema/crdb/support-bundles/up04.sql | 2 +- 4 files changed, 133 insertions(+), 18 deletions(-) diff --git a/nexus/db-model/src/support_bundle.rs b/nexus/db-model/src/support_bundle.rs index 0e53070877..a4b14d363b 100644 --- a/nexus/db-model/src/support_bundle.rs +++ b/nexus/db-model/src/support_bundle.rs @@ -51,7 +51,7 @@ impl SupportBundleState { Destroying => vec![Active, Collecting, Failing], Failing => vec![Collecting, Active], // The "Failed" state is terminal. - Failed => vec![Collecting, Active, Failing], + Failed => vec![Active, Collecting, Failing], } } } @@ -69,8 +69,9 @@ impl From for SupportBundleStateView { // whether or not the bundle record can be safely deleted. // // Either way, it should be possible to delete the bundle. + // If a user requests that we delete a bundle in these states: // - "Failing" bundles will become "Destroying" - // - "Failed" bundles will be destroyed immediately + // - "Failed" bundles can be deleted immediately Failing => SupportBundleStateView::Failed, Failed => SupportBundleStateView::Failed, } diff --git a/nexus/db-queries/src/db/datastore/support_bundle.rs b/nexus/db-queries/src/db/datastore/support_bundle.rs index 3f04cb1135..8f96c4fd7c 100644 --- a/nexus/db-queries/src/db/datastore/support_bundle.rs +++ b/nexus/db-queries/src/db/datastore/support_bundle.rs @@ -36,8 +36,8 @@ use uuid::Uuid; const CANNOT_ALLOCATE_ERR_MSG: &'static str = "Current policy limits support bundle creation to 'one per external disk', and \ - no disks are available. Either delete old support bundles or add additional \ - disks"; + no disks are available. You must delete old support bundles before new ones \ + can be created"; const FAILURE_REASON_NO_DATASET: &'static str = "Allocated dataset no longer exists"; @@ -50,8 +50,8 @@ pub struct SupportBundleExpungementReport { /// Bundles marked "failed" because the datasets storing them have been /// expunged. pub bundles_failed_missing_datasets: usize, - /// Bundles deleted because the datasets storing them have been - /// expunged. + /// Bundles already in the "destroying" state that have been deleted because + /// the datasets storing them have been expunged. pub bundles_deleted_missing_datasets: usize, /// Bundles marked "destroying" because the nexuses managing them have been @@ -79,7 +79,7 @@ impl DataStore { reason_for_creation: &'static str, this_nexus_id: OmicronZoneUuid, ) -> CreateResult { - opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; + opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; let conn = self.pool_connection_authorized(opctx).await?; #[derive(Debug)] @@ -164,7 +164,7 @@ impl DataStore { opctx: &OpContext, id: SupportBundleUuid, ) -> LookupResult { - opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; use db::schema::support_bundle::dsl; let conn = self.pool_connection_authorized(opctx).await?; @@ -182,7 +182,7 @@ impl DataStore { opctx: &OpContext, pagparams: &DataPageParams<'_, Uuid>, ) -> ListResultVec { - opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; use db::schema::support_bundle::dsl; let conn = self.pool_connection_authorized(opctx).await?; @@ -202,7 +202,7 @@ impl DataStore { nexus_id: OmicronZoneUuid, states: Vec, ) -> ListResultVec { - opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; use db::schema::support_bundle::dsl; let conn = self.pool_connection_authorized(opctx).await?; @@ -223,7 +223,7 @@ impl DataStore { opctx: &OpContext, blueprint: &nexus_types::deployment::Blueprint, ) -> Result { - opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; + opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; // For this blueprint: The set of all expunged Nexus zones let invalid_nexus_zones = blueprint @@ -327,11 +327,10 @@ impl DataStore { // the dataset expungement speeds up the process. let bundles_deleted_missing_datasets = diesel::delete(dsl::support_bundle) - .filter(dsl::state.eq_any(state.valid_old_states())) .filter(dsl::id.eq_any(bundles_to_delete)) // This check should be redundant (we already // partitioned above based on this state) but out of - // an abundance of catuion we don't auto-delete a + // an abundance of caution we don't auto-delete a // bundle in any other state. .filter(dsl::state.eq(SupportBundleState::Destroying)) .execute_async(conn) @@ -410,7 +409,7 @@ impl DataStore { id: SupportBundleUuid, state: SupportBundleState, ) -> Result<(), Error> { - opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; + opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; use db::schema::support_bundle::dsl; let conn = self.pool_connection_authorized(opctx).await?; @@ -443,7 +442,7 @@ impl DataStore { opctx: &OpContext, id: SupportBundleUuid, ) -> Result<(), Error> { - opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; + opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; use db::schema::support_bundle::dsl; @@ -899,7 +898,7 @@ mod test { let observed_bundles = datastore .support_bundle_list(&opctx, &pagparams) .await - .expect("Should be able to get bundle we just created"); + .expect("Should be able to query when no bundles exist"); assert!(observed_bundles.is_empty()); db.terminate().await; @@ -1073,6 +1072,121 @@ mod test { logctx.cleanup_successful(); } + #[tokio::test] + async fn test_bundle_deleted_from_expunged_dataset() { + static TEST_NAME: &str = "test_bundle_deleted_from_expunged_dataset"; + let logctx = dev::test_setup_log(TEST_NAME); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + let mut rng = SimRngState::from_seed(TEST_NAME); + let (_example, mut bp1) = ExampleSystemBuilder::new_with_rng( + &logctx.log, + rng.next_system_rng(), + ) + .build(); + + // Weirdly, the "ExampleSystemBuilder" blueprint has a parent blueprint, + // but which isn't exposed through the API. Since we're only able to see + // the blueprint it emits, that means we can't actually make it the + // target because "the parent blueprint is not the current target". + // + // Instead of dealing with that, we lie: claim this is the primordial + // blueprint, with no parent. + // + // Regardless, make this starter blueprint our target. + bp1.parent_blueprint_id = None; + bp_insert_and_make_target(&opctx, &datastore, &bp1).await; + + // Manually perform the equivalent of blueprint execution to populate + // database records. + let sleds = TestSled::new_from_blueprint(&bp1); + for sled in &sleds { + sled.create_database_records(&datastore, &opctx).await; + } + + // Extract Nexus and Dataset information from the generated blueprint. + let this_nexus_id = get_nexuses_from_blueprint( + &bp1, + BlueprintZoneFilter::ShouldBeRunning, + ) + .get(0) + .map(|id| *id) + .expect("There should be a Nexus in the example blueprint"); + let debug_datasets = get_debug_datasets_from_blueprint( + &bp1, + BlueprintDatasetFilter::InService, + ); + assert!(!debug_datasets.is_empty()); + + // When we create a bundle, it should exist on a dataset provisioned by + // the blueprint. + let bundle = datastore + .support_bundle_create(&opctx, "for the test", this_nexus_id) + .await + .expect("Should be able to create bundle"); + assert_eq!(bundle.assigned_nexus, Some(this_nexus_id.into())); + assert!( + debug_datasets.contains(&DatasetUuid::from(bundle.dataset_id)), + "Bundle should have been allocated from a blueprint dataset" + ); + + // Start the deletion of this bundle + datastore + .support_bundle_update( + &opctx, + bundle.id.into(), + SupportBundleState::Destroying, + ) + .await + .expect("Should have been able to update state"); + + // If we try to "fail support bundles" from expunged datasets/nexuses, + // we should see a no-op. Nothing has been expunged yet! + let report = + datastore.support_bundle_fail_expunged(&opctx, &bp1).await.expect( + "Should have been able to perform no-op support bundle failure", + ); + assert_eq!(SupportBundleExpungementReport::default(), report); + + // Expunge the bundle's dataset (manually) + let bp2 = { + let mut bp2 = bp1.clone(); + bp2.id = Uuid::new_v4(); + bp2.parent_blueprint_id = Some(bp1.id); + expunge_dataset_for_bundle(&mut bp2, &bundle); + bp2 + }; + bp_insert_and_make_target(&opctx, &datastore, &bp2).await; + + datastore + .support_bundle_fail_expunged(&opctx, &bp1) + .await + .expect_err("bp1 is no longer the target; this should fail"); + let report = datastore + .support_bundle_fail_expunged(&opctx, &bp2) + .await + .expect("Should have been able to mark bundle state as failed"); + assert_eq!( + SupportBundleExpungementReport { + bundles_deleted_missing_datasets: 1, + ..Default::default() + }, + report + ); + + // Should observe no bundles (it should have been deleted) + let pagparams = DataPageParams::max_page(); + let observed_bundles = datastore + .support_bundle_list(&opctx, &pagparams) + .await + .expect("Should be able to query when no bundles exist"); + assert!(observed_bundles.is_empty()); + + db.terminate().await; + logctx.cleanup_successful(); + } + #[tokio::test] async fn test_bundle_failed_from_expunged_nexus_no_reassign() { static TEST_NAME: &str = diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 399b06570a..37380f6e43 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -2449,7 +2449,7 @@ CREATE UNIQUE INDEX IF NOT EXISTS one_bundle_per_dataset ON omicron.public.suppo dataset_id ); -CREATE INDEX IF NOT EXISTS lookup_bundle_by_neuxs ON omicron.public.support_bundle ( +CREATE INDEX IF NOT EXISTS lookup_bundle_by_nexus ON omicron.public.support_bundle ( assigned_nexus ); diff --git a/schema/crdb/support-bundles/up04.sql b/schema/crdb/support-bundles/up04.sql index 6df95640fd..58b903e500 100644 --- a/schema/crdb/support-bundles/up04.sql +++ b/schema/crdb/support-bundles/up04.sql @@ -1,4 +1,4 @@ -CREATE INDEX IF NOT EXISTS lookup_bundle_by_neuxs ON omicron.public.support_bundle ( +CREATE INDEX IF NOT EXISTS lookup_bundle_by_nexus ON omicron.public.support_bundle ( assigned_nexus ); From e29733de837068a67fa8c1e367cb6291dc964db6 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Fri, 13 Dec 2024 14:25:51 -0800 Subject: [PATCH 21/25] Add to uncovered endpoints, since these aren't impl'd yet --- nexus/tests/output/uncovered-authz-endpoints.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/nexus/tests/output/uncovered-authz-endpoints.txt b/nexus/tests/output/uncovered-authz-endpoints.txt index 13fb389d6e..0a9e62707f 100644 --- a/nexus/tests/output/uncovered-authz-endpoints.txt +++ b/nexus/tests/output/uncovered-authz-endpoints.txt @@ -6,9 +6,12 @@ probe_view (get "/experimental/v1/probes/{probe support_bundle_list (get "/experimental/v1/system/support-bundles") support_bundle_view (get "/experimental/v1/system/support-bundles/{support_bundle}") support_bundle_download (get "/experimental/v1/system/support-bundles/{support_bundle}/download") +support_bundle_download_file (get "/experimental/v1/system/support-bundles/{support_bundle}/download/{file}") +support_bundle_index (get "/experimental/v1/system/support-bundles/{support_bundle}/index") ping (get "/v1/ping") networking_switch_port_status (get "/v1/system/hardware/switch-port/{port}/status") support_bundle_head (head "/experimental/v1/system/support-bundles/{support_bundle}/download") +support_bundle_head_file (head "/experimental/v1/system/support-bundles/{support_bundle}/download/{file}") device_auth_request (post "/device/auth") device_auth_confirm (post "/device/confirm") device_access_token (post "/device/token") From d16fda0a6203306ee9d22ea42b9b9366057370da Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 17 Dec 2024 13:48:22 -0800 Subject: [PATCH 22/25] Use a shared support bundle implementation for the simulated sled agent --- nexus/src/app/sagas/disk_create.rs | 11 +- nexus/src/app/sagas/instance_create.rs | 2 +- nexus/src/app/sagas/instance_ip_attach.rs | 20 +- nexus/src/app/sagas/instance_ip_detach.rs | 10 +- nexus/test-utils/src/resource_helpers.rs | 65 +- .../crucible_replacements.rs | 1 - nexus/tests/integration_tests/disks.rs | 10 +- nexus/tests/integration_tests/instances.rs | 8 +- nexus/tests/integration_tests/snapshots.rs | 4 +- .../integration_tests/volume_management.rs | 12 +- sled-agent/src/sim/artifact_store.rs | 9 +- sled-agent/src/sim/http_entrypoints.rs | 149 ++-- sled-agent/src/sim/http_entrypoints_pantry.rs | 27 +- .../src/sim/http_entrypoints_storage.rs | 34 +- sled-agent/src/sim/mod.rs | 1 + sled-agent/src/sim/server.rs | 45 +- sled-agent/src/sim/sled_agent.rs | 230 +++---- sled-agent/src/sim/storage.rs | 638 ++++++++++-------- sled-agent/src/support_bundle/storage.rs | 159 ++++- 19 files changed, 787 insertions(+), 648 deletions(-) diff --git a/nexus/src/app/sagas/disk_create.rs b/nexus/src/app/sagas/disk_create.rs index 51f6d29de1..7e04dbe857 100644 --- a/nexus/src/app/sagas/disk_create.rs +++ b/nexus/src/app/sagas/disk_create.rs @@ -1034,15 +1034,12 @@ pub(crate) mod test { true } - async fn no_regions_ensured( - sled_agent: &SledAgent, - test: &DiskTest<'_>, - ) -> bool { + fn no_regions_ensured(sled_agent: &SledAgent, test: &DiskTest<'_>) -> bool { for zpool in test.zpools() { for dataset in &zpool.datasets { let crucible_dataset = - sled_agent.get_crucible_dataset(zpool.id, dataset.id).await; - if !crucible_dataset.is_empty().await { + sled_agent.get_crucible_dataset(zpool.id, dataset.id); + if !crucible_dataset.is_empty() { return false; } } @@ -1073,7 +1070,7 @@ pub(crate) mod test { .await ); assert!(no_region_allocations_exist(datastore, &test).await); - assert!(no_regions_ensured(&sled_agent, &test).await); + assert!(no_regions_ensured(&sled_agent, &test)); assert!(test.crucible_resources_deleted().await); } diff --git a/nexus/src/app/sagas/instance_create.rs b/nexus/src/app/sagas/instance_create.rs index 07f7911ef5..aa181d7b79 100644 --- a/nexus/src/app/sagas/instance_create.rs +++ b/nexus/src/app/sagas/instance_create.rs @@ -1346,7 +1346,7 @@ pub mod test { assert!(disk_is_detached(datastore).await); assert!(no_instances_or_disks_on_sled(&sled_agent).await); - let v2p_mappings = &*sled_agent.v2p_mappings.lock().await; + let v2p_mappings = &*sled_agent.v2p_mappings.lock().unwrap(); assert!(v2p_mappings.is_empty()); } diff --git a/nexus/src/app/sagas/instance_ip_attach.rs b/nexus/src/app/sagas/instance_ip_attach.rs index e6fb8654ea..b0d51f7201 100644 --- a/nexus/src/app/sagas/instance_ip_attach.rs +++ b/nexus/src/app/sagas/instance_ip_attach.rs @@ -435,14 +435,16 @@ pub(crate) mod test { &instance_id, ) .await; - let mut eips = sled_agent.external_ips.lock().await; - let my_eips = eips.entry(vmm_id).or_default(); - assert!(my_eips - .iter() - .any(|v| matches!(v, InstanceExternalIpBody::Floating(_)))); - assert!(my_eips - .iter() - .any(|v| matches!(v, InstanceExternalIpBody::Ephemeral(_)))); + { + let mut eips = sled_agent.external_ips.lock().unwrap(); + let my_eips = eips.entry(vmm_id).or_default(); + assert!(my_eips + .iter() + .any(|v| matches!(v, InstanceExternalIpBody::Floating(_)))); + assert!(my_eips + .iter() + .any(|v| matches!(v, InstanceExternalIpBody::Ephemeral(_)))); + } // DB has records for SNAT plus the new IPs. let db_eips = datastore @@ -497,7 +499,7 @@ pub(crate) mod test { &instance_id, ) .await; - let mut eips = sled_agent.external_ips.lock().await; + let mut eips = sled_agent.external_ips.lock().unwrap(); let my_eips = eips.entry(vmm_id).or_default(); assert!(my_eips.is_empty()); } diff --git a/nexus/src/app/sagas/instance_ip_detach.rs b/nexus/src/app/sagas/instance_ip_detach.rs index d9da9fc05c..bec46f0269 100644 --- a/nexus/src/app/sagas/instance_ip_detach.rs +++ b/nexus/src/app/sagas/instance_ip_detach.rs @@ -405,9 +405,11 @@ pub(crate) mod test { &instance_id, ) .await; - let mut eips = sled_agent.external_ips.lock().await; - let my_eips = eips.entry(vmm_id).or_default(); - assert!(my_eips.is_empty()); + { + let mut eips = sled_agent.external_ips.lock().unwrap(); + let my_eips = eips.entry(vmm_id).or_default(); + assert!(my_eips.is_empty()); + } // DB only has record for SNAT. let db_eips = datastore @@ -467,7 +469,7 @@ pub(crate) mod test { assert!(db_eips.iter().any(|v| v.kind == IpKind::SNat)); // No IP bindings remain on sled-agent. - let eips = &*sled_agent.external_ips.lock().await; + let eips = &*sled_agent.external_ips.lock().unwrap(); for (_nic_id, eip_set) in eips { assert_eq!(eip_set.len(), 2); } diff --git a/nexus/test-utils/src/resource_helpers.rs b/nexus/test-utils/src/resource_helpers.rs index c5cb0231d1..89f453c873 100644 --- a/nexus/test-utils/src/resource_helpers.rs +++ b/nexus/test-utils/src/resource_helpers.rs @@ -1282,26 +1282,24 @@ impl<'a, N: NexusServer> DiskTest<'a, N> { // Tell the simulated sled agent to create the disk and zpool containing // these datasets. - sled_agent - .create_external_physical_disk( - physical_disk_id, - disk_identity.clone(), - ) - .await; - sled_agent - .create_zpool(zpool.id, physical_disk_id, zpool.size.to_bytes()) - .await; + sled_agent.create_external_physical_disk( + physical_disk_id, + disk_identity.clone(), + ); + sled_agent.create_zpool( + zpool.id, + physical_disk_id, + zpool.size.to_bytes(), + ); for dataset in &zpool.datasets { // Sled Agent side: Create the Dataset, make sure regions can be // created immediately if Nexus requests anything. let address = - sled_agent.create_crucible_dataset(zpool.id, dataset.id).await; + sled_agent.create_crucible_dataset(zpool.id, dataset.id); let crucible = - sled_agent.get_crucible_dataset(zpool.id, dataset.id).await; - crucible - .set_create_callback(Box::new(|_| RegionState::Created)) - .await; + sled_agent.get_crucible_dataset(zpool.id, dataset.id); + crucible.set_create_callback(Box::new(|_| RegionState::Created)); // Nexus side: Notify Nexus of the physical disk/zpool/dataset // combination that exists. @@ -1381,23 +1379,19 @@ impl<'a, N: NexusServer> DiskTest<'a, N> { for dataset in &zpool.datasets { let crucible = self .get_sled(*sled_id) - .get_crucible_dataset(zpool.id, dataset.id) - .await; + .get_crucible_dataset(zpool.id, dataset.id); let called = std::sync::atomic::AtomicBool::new(false); - crucible - .set_create_callback(Box::new(move |_| { - if !called.load(std::sync::atomic::Ordering::SeqCst) - { - called.store( - true, - std::sync::atomic::Ordering::SeqCst, - ); - RegionState::Requested - } else { - RegionState::Created - } - })) - .await; + crucible.set_create_callback(Box::new(move |_| { + if !called.load(std::sync::atomic::Ordering::SeqCst) { + called.store( + true, + std::sync::atomic::Ordering::SeqCst, + ); + RegionState::Requested + } else { + RegionState::Created + } + })); } } } @@ -1409,11 +1403,9 @@ impl<'a, N: NexusServer> DiskTest<'a, N> { for dataset in &zpool.datasets { let crucible = self .get_sled(*sled_id) - .get_crucible_dataset(zpool.id, dataset.id) - .await; + .get_crucible_dataset(zpool.id, dataset.id); crucible - .set_create_callback(Box::new(|_| RegionState::Failed)) - .await; + .set_create_callback(Box::new(|_| RegionState::Failed)); } } } @@ -1430,9 +1422,8 @@ impl<'a, N: NexusServer> DiskTest<'a, N> { for dataset in &zpool.datasets { let crucible = self .get_sled(*sled_id) - .get_crucible_dataset(zpool.id, dataset.id) - .await; - if !crucible.is_empty().await { + .get_crucible_dataset(zpool.id, dataset.id); + if !crucible.is_empty() { return false; } } diff --git a/nexus/tests/integration_tests/crucible_replacements.rs b/nexus/tests/integration_tests/crucible_replacements.rs index 5f7e92c393..621df47448 100644 --- a/nexus/tests/integration_tests/crucible_replacements.rs +++ b/nexus/tests/integration_tests/crucible_replacements.rs @@ -373,7 +373,6 @@ impl<'a> RegionReplacementDeletedVolumeTest<'a> { .activate_background_attachment( region_replacement.volume_id.to_string(), ) - .await .unwrap(); } diff --git a/nexus/tests/integration_tests/disks.rs b/nexus/tests/integration_tests/disks.rs index d9888f9ccd..5da4e49a3a 100644 --- a/nexus/tests/integration_tests/disks.rs +++ b/nexus/tests/integration_tests/disks.rs @@ -1341,9 +1341,7 @@ async fn test_disk_virtual_provisioning_collection_failed_delete( .sled_agent .sled_agent .get_crucible_dataset(zpool.id, dataset.id) - .await - .set_region_deletion_error(true) - .await; + .set_region_deletion_error(true); // Delete the disk - expect this to fail NexusRequest::new( @@ -1379,9 +1377,7 @@ async fn test_disk_virtual_provisioning_collection_failed_delete( .sled_agent .sled_agent .get_crucible_dataset(zpool.id, dataset.id) - .await - .set_region_deletion_error(false) - .await; + .set_region_deletion_error(false); // Request disk delete again NexusRequest::new( @@ -2479,7 +2475,7 @@ async fn test_no_halt_disk_delete_one_region_on_expunged_agent( let zpool = disk_test.zpools().next().expect("Expected at least one zpool"); let dataset = &zpool.datasets[0]; - cptestctx.sled_agent.sled_agent.drop_dataset(zpool.id, dataset.id).await; + cptestctx.sled_agent.sled_agent.drop_dataset(zpool.id, dataset.id); // Spawn a task that tries to delete the disk let disk_url = get_disk_url(DISK_NAME); diff --git a/nexus/tests/integration_tests/instances.rs b/nexus/tests/integration_tests/instances.rs index 163869896f..a1ffd73020 100644 --- a/nexus/tests/integration_tests/instances.rs +++ b/nexus/tests/integration_tests/instances.rs @@ -646,7 +646,7 @@ async fn test_instance_start_creates_networking_state( sled_agents.push(&cptestctx.sled_agent.sled_agent); for agent in &sled_agents { - agent.v2p_mappings.lock().await.clear(); + agent.v2p_mappings.lock().unwrap().clear(); } // Start the instance and make sure that it gets to Running. @@ -6244,7 +6244,7 @@ async fn test_instance_v2p_mappings(cptestctx: &ControlPlaneTestContext) { // Validate that every sled no longer has the V2P mapping for this instance for sled_agent in &sled_agents { let condition = || async { - let v2p_mappings = sled_agent.v2p_mappings.lock().await; + let v2p_mappings = sled_agent.v2p_mappings.lock().unwrap(); if v2p_mappings.is_empty() { Ok(()) } else { @@ -6501,7 +6501,7 @@ async fn assert_sled_v2p_mappings( vni: Vni, ) { let condition = || async { - let v2p_mappings = sled_agent.v2p_mappings.lock().await; + let v2p_mappings = sled_agent.v2p_mappings.lock().unwrap(); let mapping = v2p_mappings.iter().find(|mapping| { mapping.virtual_ip == nic.ip && mapping.virtual_mac == nic.mac @@ -6573,7 +6573,7 @@ pub async fn assert_sled_vpc_routes( kind: RouterKind::Custom(db_subnet.ipv4_block.0.into()), }; - let vpc_routes = sled_agent.vpc_routes.lock().await; + let vpc_routes = sled_agent.vpc_routes.lock().unwrap(); let sys_routes_found = vpc_routes .iter() .any(|(id, set)| *id == sys_key && set.routes == system_routes); diff --git a/nexus/tests/integration_tests/snapshots.rs b/nexus/tests/integration_tests/snapshots.rs index accb4470fb..dff07732c7 100644 --- a/nexus/tests/integration_tests/snapshots.rs +++ b/nexus/tests/integration_tests/snapshots.rs @@ -980,9 +980,7 @@ async fn test_snapshot_unwind(cptestctx: &ControlPlaneTestContext) { .sled_agent .sled_agent .get_crucible_dataset(zpool.id, dataset.id) - .await - .set_creating_a_running_snapshot_should_fail() - .await; + .set_creating_a_running_snapshot_should_fail(); // Issue snapshot request, expecting it to fail let snapshots_url = format!("/v1/snapshots?project={}", PROJECT_NAME); diff --git a/nexus/tests/integration_tests/volume_management.rs b/nexus/tests/integration_tests/volume_management.rs index 6a9ce28389..543e66ee30 100644 --- a/nexus/tests/integration_tests/volume_management.rs +++ b/nexus/tests/integration_tests/volume_management.rs @@ -2528,9 +2528,7 @@ async fn test_disk_create_saga_unwinds_correctly( .sled_agent .sled_agent .get_crucible_dataset(zpool.id, dataset.id) - .await - .set_region_creation_error(true) - .await; + .set_region_creation_error(true); let disk_size = ByteCount::from_gibibytes_u32(2); let base_disk = params::DiskCreate { @@ -2598,9 +2596,7 @@ async fn test_snapshot_create_saga_unwinds_correctly( .sled_agent .sled_agent .get_crucible_dataset(zpool.id, dataset.id) - .await - .set_region_creation_error(true) - .await; + .set_region_creation_error(true); // Create a snapshot let snapshot_create = params::SnapshotCreate { @@ -4225,11 +4221,9 @@ async fn test_read_only_region_reference_counting( TypedUuid::from_untyped_uuid(db_read_only_dataset.pool_id), db_read_only_dataset.id(), ) - .await .get(crucible_agent_client::types::RegionId( read_only_region.id().to_string() )) - .await .unwrap() .state, crucible_agent_client::types::State::Created @@ -4297,11 +4291,9 @@ async fn test_read_only_region_reference_counting( TypedUuid::from_untyped_uuid(db_read_only_dataset.pool_id), db_read_only_dataset.id(), ) - .await .get(crucible_agent_client::types::RegionId( read_only_region.id().to_string() )) - .await .unwrap() .state, crucible_agent_client::types::State::Destroyed diff --git a/sled-agent/src/sim/artifact_store.rs b/sled-agent/src/sim/artifact_store.rs index d73f5a2880..efb4fa9f36 100644 --- a/sled-agent/src/sim/artifact_store.rs +++ b/sled-agent/src/sim/artifact_store.rs @@ -5,10 +5,7 @@ //! Implementation of `crate::artifact_store::StorageBackend` for our simulated //! storage. -use std::sync::Arc; - use camino_tempfile::Utf8TempDir; -use futures::lock::Mutex; use sled_storage::error::Error as StorageError; use super::storage::Storage; @@ -16,11 +13,11 @@ use crate::artifact_store::DatasetsManager; pub(super) struct SimArtifactStorage { root: Utf8TempDir, - backend: Arc>, + backend: Storage, } impl SimArtifactStorage { - pub(super) fn new(backend: Arc>) -> SimArtifactStorage { + pub(super) fn new(backend: Storage) -> SimArtifactStorage { SimArtifactStorage { root: camino_tempfile::tempdir().unwrap(), backend, @@ -36,9 +33,7 @@ impl DatasetsManager for SimArtifactStorage { let config = self .backend .lock() - .await .datasets_config_list() - .await .map_err(|_| StorageError::LedgerNotFound)?; Ok(crate::artifact_store::filter_dataset_mountpoints( config, diff --git a/sled-agent/src/sim/http_entrypoints.rs b/sled-agent/src/sim/http_entrypoints.rs index 60dcb1be31..227270200d 100644 --- a/sled-agent/src/sim/http_entrypoints.rs +++ b/sled-agent/src/sim/http_entrypoints.rs @@ -5,6 +5,7 @@ //! HTTP entrypoint functions for the sled agent's exposed API use super::collection::PokeMode; +use crate::support_bundle::storage::SupportBundleQueryType; use camino::Utf8PathBuf; use dropshot::endpoint; use dropshot::ApiDescription; @@ -37,6 +38,7 @@ use omicron_common::disk::DatasetsManagementResult; use omicron_common::disk::DisksManagementResult; use omicron_common::disk::OmicronPhysicalDisksConfig; use omicron_common::update::ArtifactHash; +use range_requests::RequestContextEx; use sled_agent_api::*; use sled_agent_types::boot_disk::BootDiskOsWriteStatus; use sled_agent_types::boot_disk::BootDiskPathParams; @@ -240,7 +242,6 @@ impl SledAgentApi for SledAgentSimImpl { path_params.disk_id, body.snapshot_id, ) - .await .map_err(|e| HttpError::for_internal_error(e.to_string()))?; Ok(HttpResponseOk(VmmIssueDiskSnapshotRequestResponse { @@ -268,7 +269,6 @@ impl SledAgentApi for SledAgentSimImpl { let body_args = body.into_inner(); sa.set_virtual_nic_host(&body_args) - .await .map_err(|e| HttpError::for_internal_error(e.to_string()))?; Ok(HttpResponseUpdatedNoContent()) @@ -282,7 +282,6 @@ impl SledAgentApi for SledAgentSimImpl { let body_args = body.into_inner(); sa.unset_virtual_nic_host(&body_args) - .await .map_err(|e| HttpError::for_internal_error(e.to_string()))?; Ok(HttpResponseUpdatedNoContent()) @@ -294,7 +293,7 @@ impl SledAgentApi for SledAgentSimImpl { { let sa = rqctx.context(); - let vnics = sa.list_virtual_nics().await.map_err(HttpError::from)?; + let vnics = sa.list_virtual_nics().map_err(HttpError::from)?; Ok(HttpResponseOk(vnics)) } @@ -310,7 +309,7 @@ impl SledAgentApi for SledAgentSimImpl { rqctx: RequestContext, ) -> Result, HttpError> { let config = - rqctx.context().bootstore_network_config.lock().await.clone(); + rqctx.context().bootstore_network_config.lock().unwrap().clone(); Ok(HttpResponseOk(config)) } @@ -318,7 +317,8 @@ impl SledAgentApi for SledAgentSimImpl { rqctx: RequestContext, body: TypedBody, ) -> Result { - let mut config = rqctx.context().bootstore_network_config.lock().await; + let mut config = + rqctx.context().bootstore_network_config.lock().unwrap(); *config = body.into_inner(); Ok(HttpResponseUpdatedNoContent()) } @@ -329,7 +329,7 @@ impl SledAgentApi for SledAgentSimImpl { ) -> Result, HttpError> { let sa = rqctx.context(); Ok(HttpResponseOk( - sa.inventory(rqctx.server.local_addr).await.map_err(|e| { + sa.inventory(rqctx.server.local_addr).map_err(|e| { HttpError::for_internal_error(format!("{:#}", e)) })?, )) @@ -341,7 +341,7 @@ impl SledAgentApi for SledAgentSimImpl { ) -> Result, HttpError> { let sa = rqctx.context(); let body_args = body.into_inner(); - let result = sa.datasets_ensure(body_args).await?; + let result = sa.datasets_ensure(body_args)?; Ok(HttpResponseOk(result)) } @@ -349,7 +349,7 @@ impl SledAgentApi for SledAgentSimImpl { rqctx: RequestContext, ) -> Result, HttpError> { let sa = rqctx.context(); - Ok(HttpResponseOk(sa.datasets_config_list().await?)) + Ok(HttpResponseOk(sa.datasets_config_list()?)) } async fn omicron_physical_disks_put( @@ -358,7 +358,7 @@ impl SledAgentApi for SledAgentSimImpl { ) -> Result, HttpError> { let sa = rqctx.context(); let body_args = body.into_inner(); - let result = sa.omicron_physical_disks_ensure(body_args).await?; + let result = sa.omicron_physical_disks_ensure(body_args)?; Ok(HttpResponseOk(result)) } @@ -366,7 +366,7 @@ impl SledAgentApi for SledAgentSimImpl { rqctx: RequestContext, ) -> Result, HttpError> { let sa = rqctx.context(); - Ok(HttpResponseOk(sa.omicron_physical_disks_list().await?)) + Ok(HttpResponseOk(sa.omicron_physical_disks_list()?)) } async fn omicron_zones_put( @@ -375,7 +375,7 @@ impl SledAgentApi for SledAgentSimImpl { ) -> Result { let sa = rqctx.context(); let body_args = body.into_inner(); - sa.omicron_zones_ensure(body_args).await; + sa.omicron_zones_ensure(body_args); Ok(HttpResponseUpdatedNoContent()) } @@ -390,7 +390,7 @@ impl SledAgentApi for SledAgentSimImpl { rqctx: RequestContext, ) -> Result>, HttpError> { let sa = rqctx.context(); - Ok(HttpResponseOk(sa.list_vpc_routes().await)) + Ok(HttpResponseOk(sa.list_vpc_routes())) } async fn set_vpc_routes( @@ -398,7 +398,7 @@ impl SledAgentApi for SledAgentSimImpl { body: TypedBody>, ) -> Result { let sa = rqctx.context(); - sa.set_vpc_routes(body.into_inner()).await; + sa.set_vpc_routes(body.into_inner()); Ok(HttpResponseUpdatedNoContent()) } @@ -419,7 +419,7 @@ impl SledAgentApi for SledAgentSimImpl { rqctx: RequestContext, path_params: Path, query_params: Query, - _body: StreamingBody, + body: StreamingBody, ) -> Result, HttpError> { let sa = rqctx.context(); @@ -433,6 +433,7 @@ impl SledAgentApi for SledAgentSimImpl { dataset_id, support_bundle_id, hash, + body.into_stream(), ) .await?, )) @@ -446,15 +447,15 @@ impl SledAgentApi for SledAgentSimImpl { let SupportBundlePathParam { zpool_id, dataset_id, support_bundle_id } = path_params.into_inner(); - sa.support_bundle_get(zpool_id, dataset_id, support_bundle_id).await?; - - Ok(http::Response::builder() - .status(http::StatusCode::OK) - .header(http::header::CONTENT_TYPE, "text/html") - .body(dropshot::Body::with_content( - "simulated support bundle; do not eat", - )) - .unwrap()) + let range = rqctx.range(); + sa.support_bundle_get( + zpool_id, + dataset_id, + support_bundle_id, + range, + SupportBundleQueryType::Whole, + ) + .await } async fn support_bundle_download_file( @@ -465,18 +466,18 @@ impl SledAgentApi for SledAgentSimImpl { let SupportBundleFilePathParam { parent: SupportBundlePathParam { zpool_id, dataset_id, support_bundle_id }, - file: _, + file, } = path_params.into_inner(); - sa.support_bundle_get(zpool_id, dataset_id, support_bundle_id).await?; - - Ok(http::Response::builder() - .status(http::StatusCode::OK) - .header(http::header::CONTENT_TYPE, "text/html") - .body(dropshot::Body::with_content( - "simulated support bundle file; do not eat", - )) - .unwrap()) + let range = rqctx.range(); + sa.support_bundle_get( + zpool_id, + dataset_id, + support_bundle_id, + range, + SupportBundleQueryType::Path { file_path: file }, + ) + .await } async fn support_bundle_index( @@ -487,15 +488,15 @@ impl SledAgentApi for SledAgentSimImpl { let SupportBundlePathParam { zpool_id, dataset_id, support_bundle_id } = path_params.into_inner(); - sa.support_bundle_get(zpool_id, dataset_id, support_bundle_id).await?; - - Ok(http::Response::builder() - .status(http::StatusCode::OK) - .header(http::header::CONTENT_TYPE, "text/html") - .body(dropshot::Body::with_content( - "simulated support bundle index; do not eat", - )) - .unwrap()) + let range = rqctx.range(); + sa.support_bundle_get( + zpool_id, + dataset_id, + support_bundle_id, + range, + SupportBundleQueryType::Index, + ) + .await } async fn support_bundle_head( @@ -506,17 +507,15 @@ impl SledAgentApi for SledAgentSimImpl { let SupportBundlePathParam { zpool_id, dataset_id, support_bundle_id } = path_params.into_inner(); - sa.support_bundle_get(zpool_id, dataset_id, support_bundle_id).await?; - - let fictional_length = 10000; - - Ok(http::Response::builder() - .status(http::StatusCode::OK) - .header(http::header::CONTENT_TYPE, "text/html") - .header(hyper::header::ACCEPT_RANGES, "bytes") - .header(hyper::header::CONTENT_LENGTH, fictional_length) - .body(dropshot::Body::empty()) - .unwrap()) + let range = rqctx.range(); + sa.support_bundle_head( + zpool_id, + dataset_id, + support_bundle_id, + range, + SupportBundleQueryType::Whole, + ) + .await } async fn support_bundle_head_file( @@ -527,20 +526,18 @@ impl SledAgentApi for SledAgentSimImpl { let SupportBundleFilePathParam { parent: SupportBundlePathParam { zpool_id, dataset_id, support_bundle_id }, - file: _, + file, } = path_params.into_inner(); - sa.support_bundle_get(zpool_id, dataset_id, support_bundle_id).await?; - - let fictional_length = 10000; - - Ok(http::Response::builder() - .status(http::StatusCode::OK) - .header(http::header::CONTENT_TYPE, "text/html") - .header(hyper::header::ACCEPT_RANGES, "bytes") - .header(hyper::header::CONTENT_LENGTH, fictional_length) - .body(dropshot::Body::empty()) - .unwrap()) + let range = rqctx.range(); + sa.support_bundle_get( + zpool_id, + dataset_id, + support_bundle_id, + range, + SupportBundleQueryType::Path { file_path: file }, + ) + .await } async fn support_bundle_head_index( @@ -551,17 +548,15 @@ impl SledAgentApi for SledAgentSimImpl { let SupportBundlePathParam { zpool_id, dataset_id, support_bundle_id } = path_params.into_inner(); - sa.support_bundle_get(zpool_id, dataset_id, support_bundle_id).await?; - - let fictional_length = 10000; - - Ok(http::Response::builder() - .status(http::StatusCode::OK) - .header(http::header::CONTENT_TYPE, "text/html") - .header(hyper::header::ACCEPT_RANGES, "bytes") - .header(hyper::header::CONTENT_LENGTH, fictional_length) - .body(dropshot::Body::empty()) - .unwrap()) + let range = rqctx.range(); + sa.support_bundle_head( + zpool_id, + dataset_id, + support_bundle_id, + range, + SupportBundleQueryType::Index, + ) + .await } async fn support_bundle_delete( diff --git a/sled-agent/src/sim/http_entrypoints_pantry.rs b/sled-agent/src/sim/http_entrypoints_pantry.rs index c98c7db665..cbdc9329d4 100644 --- a/sled-agent/src/sim/http_entrypoints_pantry.rs +++ b/sled-agent/src/sim/http_entrypoints_pantry.rs @@ -69,7 +69,7 @@ async fn pantry_status( ) -> Result, HttpError> { let pantry = rc.context(); - let status = pantry.status().await?; + let status = pantry.status()?; Ok(HttpResponseOk(status)) } @@ -103,7 +103,7 @@ async fn volume_status( let path = path.into_inner(); let pantry = rc.context(); - let status = pantry.volume_status(path.id.clone()).await?; + let status = pantry.volume_status(path.id.clone())?; Ok(HttpResponseOk(status)) } @@ -134,7 +134,6 @@ async fn attach( pantry .attach(path.id.clone(), body.volume_construction_request) - .await .map_err(|e| HttpError::for_internal_error(e.to_string()))?; Ok(HttpResponseOk(AttachResult { id: path.id })) @@ -161,13 +160,11 @@ async fn attach_activate_background( let body = body.into_inner(); let pantry = rc.context(); - pantry - .attach_activate_background( - path.id.clone(), - body.job_id, - body.volume_construction_request, - ) - .await?; + pantry.attach_activate_background( + path.id.clone(), + body.job_id, + body.volume_construction_request, + )?; Ok(HttpResponseUpdatedNoContent()) } @@ -194,7 +191,7 @@ async fn is_job_finished( let path = path.into_inner(); let pantry = rc.context(); - let job_is_finished = pantry.is_job_finished(path.id).await?; + let job_is_finished = pantry.is_job_finished(path.id)?; Ok(HttpResponseOk(JobPollResponse { job_is_finished })) } @@ -217,7 +214,7 @@ async fn job_result_ok( let path = path.into_inner(); let pantry = rc.context(); - let job_result = pantry.get_job_result(path.id).await?; + let job_result = pantry.get_job_result(path.id)?; match job_result { Ok(job_result_ok) => { @@ -260,7 +257,6 @@ async fn import_from_url( let job_id = pantry .import_from_url(path.id.clone(), body.url, body.expected_digest) - .await .map_err(|e| HttpError::for_internal_error(e.to_string()))?; Ok(HttpResponseOk(ImportFromUrlResponse { job_id })) @@ -287,7 +283,6 @@ async fn snapshot( pantry .snapshot(path.id.clone(), body.snapshot_id) - .await .map_err(|e| HttpError::for_internal_error(e.to_string()))?; Ok(HttpResponseUpdatedNoContent()) @@ -320,7 +315,7 @@ async fn bulk_write( ) .map_err(|e| HttpError::for_bad_request(None, e.to_string()))?; - pantry.bulk_write(path.id.clone(), body.offset, data).await?; + pantry.bulk_write(path.id.clone(), body.offset, data)?; Ok(HttpResponseUpdatedNoContent()) } @@ -344,7 +339,6 @@ async fn scrub( let job_id = pantry .scrub(path.id.clone()) - .await .map_err(|e| HttpError::for_internal_error(e.to_string()))?; Ok(HttpResponseOk(ScrubResponse { job_id })) @@ -364,7 +358,6 @@ async fn detach( pantry .detach(path.id) - .await .map_err(|e| HttpError::for_internal_error(e.to_string()))?; Ok(HttpResponseDeleted()) diff --git a/sled-agent/src/sim/http_entrypoints_storage.rs b/sled-agent/src/sim/http_entrypoints_storage.rs index 34e26f5191..cb53d96fd1 100644 --- a/sled-agent/src/sim/http_entrypoints_storage.rs +++ b/sled-agent/src/sim/http_entrypoints_storage.rs @@ -63,7 +63,7 @@ async fn region_list( rc: RequestContext>, ) -> Result>, HttpError> { let crucible = rc.context(); - Ok(HttpResponseOk(crucible.list().await)) + Ok(HttpResponseOk(crucible.list())) } #[endpoint { @@ -77,7 +77,7 @@ async fn region_create( let params = body.into_inner(); let crucible = rc.context(); - let region = crucible.create(params).await.map_err(|e| { + let region = crucible.create(params).map_err(|e| { HttpError::for_internal_error( format!("region create failure: {:?}", e,), ) @@ -97,7 +97,7 @@ async fn region_get( let id = path.into_inner().id; let crucible = rc.context(); - match crucible.get(id).await { + match crucible.get(id) { Some(region) => Ok(HttpResponseOk(region)), None => { Err(HttpError::for_not_found(None, "Region not found".to_string())) @@ -118,7 +118,6 @@ async fn region_delete( crucible .delete(id) - .await .map_err(|e| HttpError::for_bad_request(None, e.to_string()))?; Ok(HttpResponseDeleted()) @@ -136,16 +135,16 @@ async fn region_get_snapshots( let crucible = rc.context(); - if crucible.get(id.clone()).await.is_none() { + if crucible.get(id.clone()).is_none() { return Err(HttpError::for_not_found( None, "Region not found".to_string(), )); } - let snapshots = crucible.snapshots_for_region(&id).await; + let snapshots = crucible.snapshots_for_region(&id); - let running_snapshots = crucible.running_snapshots_for_id(&id).await; + let running_snapshots = crucible.running_snapshots_for_id(&id); Ok(HttpResponseOk(GetSnapshotResponse { snapshots, running_snapshots })) } @@ -167,14 +166,14 @@ async fn region_get_snapshot( let p = path.into_inner(); let crucible = rc.context(); - if crucible.get(p.id.clone()).await.is_none() { + if crucible.get(p.id.clone()).is_none() { return Err(HttpError::for_not_found( None, "Region not found".to_string(), )); } - for snapshot in &crucible.snapshots_for_region(&p.id).await { + for snapshot in &crucible.snapshots_for_region(&p.id) { if snapshot.name == p.name { return Ok(HttpResponseOk(snapshot.clone())); } @@ -203,7 +202,7 @@ async fn region_delete_snapshot( let p = path.into_inner(); let crucible = rc.context(); - if crucible.get(p.id.clone()).await.is_none() { + if crucible.get(p.id.clone()).is_none() { return Err(HttpError::for_not_found( None, "Region not found".to_string(), @@ -212,7 +211,6 @@ async fn region_delete_snapshot( crucible .delete_snapshot(&p.id, &p.name) - .await .map_err(|e| HttpError::for_bad_request(None, e.to_string()))?; Ok(HttpResponseDeleted()) @@ -235,14 +233,14 @@ async fn region_run_snapshot( let p = path.into_inner(); let crucible = rc.context(); - if crucible.get(p.id.clone()).await.is_none() { + if crucible.get(p.id.clone()).is_none() { return Err(HttpError::for_not_found( None, "Region not found".to_string(), )); } - let snapshots = crucible.snapshots_for_region(&p.id).await; + let snapshots = crucible.snapshots_for_region(&p.id); if !snapshots.iter().any(|x| x.name == p.name) { return Err(HttpError::for_not_found( @@ -251,10 +249,8 @@ async fn region_run_snapshot( )); } - let running_snapshot = crucible - .create_running_snapshot(&p.id, &p.name) - .await - .map_err(|e| { + let running_snapshot = + crucible.create_running_snapshot(&p.id, &p.name).map_err(|e| { HttpError::for_internal_error(format!( "running snapshot create failure: {:?}", e, @@ -275,14 +271,14 @@ async fn region_delete_running_snapshot( let p = path.into_inner(); let crucible = rc.context(); - if crucible.get(p.id.clone()).await.is_none() { + if crucible.get(p.id.clone()).is_none() { return Err(HttpError::for_not_found( None, "Region not found".to_string(), )); } - crucible.delete_running_snapshot(&p.id, &p.name).await.map_err(|e| { + crucible.delete_running_snapshot(&p.id, &p.name).map_err(|e| { HttpError::for_internal_error(format!( "running snapshot create failure: {:?}", e, diff --git a/sled-agent/src/sim/mod.rs b/sled-agent/src/sim/mod.rs index ab3b155b36..c59af4ccce 100644 --- a/sled-agent/src/sim/mod.rs +++ b/sled-agent/src/sim/mod.rs @@ -24,3 +24,4 @@ pub use config::{ }; pub use server::{run_standalone_server, RssArgs, Server}; pub use sled_agent::SledAgent; +pub(crate) use storage::Storage; diff --git a/sled-agent/src/sim/server.rs b/sled-agent/src/sim/server.rs index 3833d5ca7c..ef13cdfcdd 100644 --- a/sled-agent/src/sim/server.rs +++ b/sled-agent/src/sim/server.rs @@ -188,23 +188,19 @@ impl Server { let vendor = "synthetic-vendor".to_string(); let serial = format!("synthetic-serial-{zpool_id}"); let model = "synthetic-model".to_string(); - sled_agent - .create_external_physical_disk( - physical_disk_id, - DiskIdentity { - vendor: vendor.clone(), - serial: serial.clone(), - model: model.clone(), - }, - ) - .await; + sled_agent.create_external_physical_disk( + physical_disk_id, + DiskIdentity { + vendor: vendor.clone(), + serial: serial.clone(), + model: model.clone(), + }, + ); - sled_agent - .create_zpool(zpool_id, physical_disk_id, zpool.size) - .await; + sled_agent.create_zpool(zpool_id, physical_disk_id, zpool.size); let dataset_id = DatasetUuid::new_v4(); let address = - sled_agent.create_crucible_dataset(zpool_id, dataset_id).await; + sled_agent.create_crucible_dataset(zpool_id, dataset_id); datasets.push(NexusTypes::DatasetCreateRequest { zpool_id: zpool_id.into_untyped_uuid(), @@ -218,10 +214,8 @@ impl Server { // Whenever Nexus tries to allocate a region, it should complete // immediately. What efficiency! let crucible = - sled_agent.get_crucible_dataset(zpool_id, dataset_id).await; - crucible - .set_create_callback(Box::new(|_| RegionState::Created)) - .await; + sled_agent.get_crucible_dataset(zpool_id, dataset_id); + crucible.set_create_callback(Box::new(|_| RegionState::Created)) } Ok(Server { @@ -240,8 +234,7 @@ impl Server { self.log.new(o!("kind" => "pantry")), self.config.storage.ip, self.sled_agent.clone(), - ) - .await; + ); self.pantry_server = Some(pantry_server); self.pantry_server.as_ref().unwrap() } @@ -370,7 +363,7 @@ pub async fn run_standalone_server( dns.initialize_with_config(&log, &dns_config).await?; let internal_dns_version = dns_config.generation; - let all_u2_zpools = server.sled_agent.get_zpools().await; + let all_u2_zpools = server.sled_agent.get_zpools(); let get_random_zpool = || { use rand::seq::SliceRandom; let pool = all_u2_zpools @@ -516,12 +509,12 @@ pub async fn run_standalone_server( }; let mut datasets = vec![]; - let physical_disks = server.sled_agent.get_all_physical_disks().await; - let zpools = server.sled_agent.get_zpools().await; + let physical_disks = server.sled_agent.get_all_physical_disks(); + let zpools = server.sled_agent.get_zpools(); for zpool in &zpools { let zpool_id = ZpoolUuid::from_untyped_uuid(zpool.id); for (dataset_id, address) in - server.sled_agent.get_crucible_datasets(zpool_id).await + server.sled_agent.get_crucible_datasets(zpool_id) { datasets.push(NexusTypes::DatasetCreateRequest { zpool_id: zpool.id, @@ -540,7 +533,7 @@ pub async fn run_standalone_server( }; let omicron_physical_disks_config = - server.sled_agent.omicron_physical_disks_list().await?; + server.sled_agent.omicron_physical_disks_list()?; let mut sled_configs = BTreeMap::new(); sled_configs.insert( config.id, @@ -559,7 +552,7 @@ pub async fn run_standalone_server( }) .collect(), }, - datasets: server.sled_agent.datasets_config_list().await?, + datasets: server.sled_agent.datasets_config_list()?, zones, }, ); diff --git a/sled-agent/src/sim/sled_agent.rs b/sled-agent/src/sim/sled_agent.rs index 1f099fc036..b0846c7216 100644 --- a/sled-agent/src/sim/sled_agent.rs +++ b/sled-agent/src/sim/sled_agent.rs @@ -14,11 +14,14 @@ use super::storage::Storage; use crate::artifact_store::ArtifactStore; use crate::nexus::NexusClient; use crate::sim::simulatable::Simulatable; +use crate::support_bundle::storage::SupportBundleQueryType; use crate::updates::UpdateManager; use anyhow::bail; use anyhow::Context; +use bytes::Bytes; +use dropshot::Body; use dropshot::HttpError; -use futures::lock::Mutex; +use futures::Stream; use nexus_sled_agent_shared::inventory::{ Inventory, InventoryDataset, InventoryDisk, InventoryZpool, OmicronZonesConfig, SledRole, @@ -47,8 +50,8 @@ use oxnet::Ipv6Net; use propolis_client::{ types::VolumeConstructionRequest, Client as PropolisClient, }; +use range_requests::PotentialRange; use sled_agent_api::SupportBundleMetadata; -use sled_agent_api::SupportBundleState; use sled_agent_types::disk::DiskStateRequested; use sled_agent_types::early_networking::{ EarlyNetworkConfig, EarlyNetworkConfigBody, @@ -62,6 +65,7 @@ use std::collections::{HashMap, HashSet, VecDeque}; use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr}; use std::str::FromStr; use std::sync::Arc; +use std::sync::Mutex; use std::time::Duration; use uuid::Uuid; @@ -79,14 +83,15 @@ pub struct SledAgent { vmms: Arc>, /// collection of simulated disks, indexed by disk uuid disks: Arc>, - storage: Arc>, + storage: Storage, updates: UpdateManager, nexus_address: SocketAddr, pub nexus_client: Arc, disk_id_to_region_ids: Mutex>>, pub v2p_mappings: Mutex>, - mock_propolis: - Mutex>, + mock_propolis: futures::lock::Mutex< + Option<(propolis_mock_server::Server, PropolisClient)>, + >, /// lists of external IPs assigned to instances pub external_ips: Mutex>>, @@ -173,11 +178,11 @@ impl SledAgent { }, }); - let storage = Arc::new(Mutex::new(Storage::new( + let storage = Storage::new( id.into_untyped_uuid(), config.storage.ip, storage_log, - ))); + ); let artifacts = ArtifactStore::new(&log, SimArtifactStorage::new(storage.clone())); @@ -202,7 +207,7 @@ impl SledAgent { v2p_mappings: Mutex::new(HashSet::new()), external_ips: Mutex::new(HashMap::new()), vpc_routes: Mutex::new(HashMap::new()), - mock_propolis: Mutex::new(None), + mock_propolis: futures::lock::Mutex::new(None), config: config.clone(), fake_zones: Mutex::new(OmicronZonesConfig { generation: Generation::new(), @@ -222,7 +227,7 @@ impl SledAgent { /// three crucible regions). Extract the region addresses, lookup the region /// from the port and pair disk id with region ids. This map is referred to /// later when making snapshots. - pub async fn map_disk_ids_to_region_ids( + pub fn map_disk_ids_to_region_ids( &self, volume_construction_request: &VolumeConstructionRequest, ) -> Result<(), Error> { @@ -243,11 +248,10 @@ impl SledAgent { let mut region_ids = Vec::new(); - let storage = self.storage.lock().await; + let storage = self.storage.lock(); for target in targets { let region = storage .get_region_for_port(target.port()) - .await .ok_or_else(|| { Error::internal_error(&format!( "no region for port {}", @@ -259,7 +263,8 @@ impl SledAgent { region_ids.push(region_id); } - let mut disk_id_to_region_ids = self.disk_id_to_region_ids.lock().await; + let mut disk_id_to_region_ids = + self.disk_id_to_region_ids.lock().unwrap(); disk_id_to_region_ids.insert(disk_id.to_string(), region_ids.clone()); Ok(()) @@ -398,10 +403,10 @@ impl SledAgent { for disk_request in &hardware.disks { let vcr = &disk_request.volume_construction_request; - self.map_disk_ids_to_region_ids(&vcr).await?; + self.map_disk_ids_to_region_ids(&vcr)?; } - let mut routes = self.vpc_routes.lock().await; + let mut routes = self.vpc_routes.lock().unwrap(); for nic in &hardware.nics { let my_routers = [ RouterId { vni: nic.vni, kind: RouterKind::System }, @@ -449,7 +454,8 @@ impl SledAgent { propolis_id: PropolisUuid, state: VmmStateRequested, ) -> Result { - if let Some(e) = self.instance_ensure_state_error.lock().await.as_ref() + if let Some(e) = + self.instance_ensure_state_error.lock().unwrap().as_ref() { return Err(e.clone().into()); } @@ -552,7 +558,7 @@ impl SledAgent { } pub async fn set_instance_ensure_state_error(&self, error: Option) { - *self.instance_ensure_state_error.lock().await = error; + *self.instance_ensure_state_error.lock().unwrap() = error; } /// Idempotently ensures that the given API Disk (described by `api_disk`) @@ -592,89 +598,58 @@ impl SledAgent { } /// Adds a Physical Disk to the simulated sled agent. - pub async fn create_external_physical_disk( + pub fn create_external_physical_disk( &self, id: PhysicalDiskUuid, identity: DiskIdentity, ) { let variant = DiskVariant::U2; - self.storage - .lock() - .await - .insert_physical_disk(id, identity, variant) - .await; + self.storage.lock().insert_physical_disk(id, identity, variant); } - pub async fn get_all_physical_disks( + pub fn get_all_physical_disks( &self, ) -> Vec { - self.storage.lock().await.get_all_physical_disks() + self.storage.lock().get_all_physical_disks() } - pub async fn get_zpools( - &self, - ) -> Vec { - self.storage.lock().await.get_all_zpools() + pub fn get_zpools(&self) -> Vec { + self.storage.lock().get_all_zpools() } - pub async fn get_crucible_datasets( + pub fn get_crucible_datasets( &self, zpool_id: ZpoolUuid, ) -> Vec<(DatasetUuid, SocketAddr)> { - self.storage.lock().await.get_all_crucible_datasets(zpool_id) + self.storage.lock().get_all_crucible_datasets(zpool_id) } /// Adds a Zpool to the simulated sled agent. - pub async fn create_zpool( + pub fn create_zpool( &self, id: ZpoolUuid, physical_disk_id: PhysicalDiskUuid, size: u64, ) { - self.storage - .lock() - .await - .insert_zpool(id, physical_disk_id, size) - .await; - } - - /// Adds a debug dataset within a zpool - pub async fn create_debug_dataset( - &self, - zpool_id: ZpoolUuid, - dataset_id: DatasetUuid, - ) { - self.storage - .lock() - .await - .insert_debug_dataset(zpool_id, dataset_id) - .await + self.storage.lock().insert_zpool(id, physical_disk_id, size); } /// Adds a Crucible Dataset within a zpool. - pub async fn create_crucible_dataset( + pub fn create_crucible_dataset( &self, zpool_id: ZpoolUuid, dataset_id: DatasetUuid, ) -> SocketAddr { - self.storage - .lock() - .await - .insert_crucible_dataset(zpool_id, dataset_id) - .await + self.storage.lock().insert_crucible_dataset(zpool_id, dataset_id) } /// Returns a crucible dataset within a particular zpool. - pub async fn get_crucible_dataset( + pub fn get_crucible_dataset( &self, zpool_id: ZpoolUuid, dataset_id: DatasetUuid, ) -> Arc { - self.storage - .lock() - .await - .get_crucible_dataset(zpool_id, dataset_id) - .await + self.storage.lock().get_crucible_dataset(zpool_id, dataset_id) } /// Issue a snapshot request for a Crucible disk attached to an instance. @@ -686,7 +661,7 @@ impl SledAgent { /// /// We're not simulating the propolis server, so directly create a /// snapshot here. - pub async fn instance_issue_disk_snapshot_request( + pub fn instance_issue_disk_snapshot_request( &self, _propolis_id: PropolisUuid, disk_id: Uuid, @@ -696,7 +671,7 @@ impl SledAgent { // for each region that makes up the disk. Use the disk_id_to_region_ids // map to perform lookup based on this function's disk id argument. - let disk_id_to_region_ids = self.disk_id_to_region_ids.lock().await; + let disk_id_to_region_ids = self.disk_id_to_region_ids.lock().unwrap(); let region_ids = disk_id_to_region_ids.get(&disk_id.to_string()); let region_ids = region_ids.ok_or_else(|| { @@ -705,16 +680,14 @@ impl SledAgent { info!(self.log, "disk id {} region ids are {:?}", disk_id, region_ids); - let storage = self.storage.lock().await; + let storage = self.storage.lock(); for region_id in region_ids { - let crucible_data = - storage.get_dataset_for_region(*region_id).await; + let crucible_data = storage.get_dataset_for_region(*region_id); if let Some(crucible_data) = crucible_data { crucible_data .create_snapshot(*region_id, snapshot_id) - .await .map_err(|e| Error::internal_error(&e.to_string()))?; } else { return Err(Error::not_found_by_id( @@ -727,28 +700,28 @@ impl SledAgent { Ok(()) } - pub async fn set_virtual_nic_host( + pub fn set_virtual_nic_host( &self, mapping: &VirtualNetworkInterfaceHost, ) -> Result<(), Error> { - let mut v2p_mappings = self.v2p_mappings.lock().await; + let mut v2p_mappings = self.v2p_mappings.lock().unwrap(); v2p_mappings.insert(mapping.clone()); Ok(()) } - pub async fn unset_virtual_nic_host( + pub fn unset_virtual_nic_host( &self, mapping: &VirtualNetworkInterfaceHost, ) -> Result<(), Error> { - let mut v2p_mappings = self.v2p_mappings.lock().await; + let mut v2p_mappings = self.v2p_mappings.lock().unwrap(); v2p_mappings.remove(mapping); Ok(()) } - pub async fn list_virtual_nics( + pub fn list_virtual_nics( &self, ) -> Result, Error> { - let v2p_mappings = self.v2p_mappings.lock().await; + let v2p_mappings = self.v2p_mappings.lock().unwrap(); Ok(Vec::from_iter(v2p_mappings.clone())) } @@ -763,7 +736,7 @@ impl SledAgent { )); } - let mut eips = self.external_ips.lock().await; + let mut eips = self.external_ips.lock().unwrap(); let my_eips = eips.entry(propolis_id).or_default(); // High-level behaviour: this should always succeed UNLESS @@ -796,7 +769,7 @@ impl SledAgent { )); } - let mut eips = self.external_ips.lock().await; + let mut eips = self.external_ips.lock().unwrap(); let my_eips = eips.entry(propolis_id).or_default(); my_eips.remove(&body_args); @@ -841,10 +814,7 @@ impl SledAgent { Ok(addr) } - pub async fn inventory( - &self, - addr: SocketAddr, - ) -> anyhow::Result { + pub fn inventory(&self, addr: SocketAddr) -> anyhow::Result { let sled_agent_address = match addr { SocketAddr::V4(_) => { bail!("sled_agent_ip must be v6 for inventory") @@ -852,7 +822,7 @@ impl SledAgent { SocketAddr::V6(v6) => v6, }; - let storage = self.storage.lock().await; + let storage = self.storage.lock(); Ok(Inventory { sled_id: self.id, sled_agent_address, @@ -867,7 +837,7 @@ impl SledAgent { self.config.hardware.reservoir_ram, ) .context("reservoir_size")?, - omicron_zones: self.fake_zones.lock().await.clone(), + omicron_zones: self.fake_zones.lock().unwrap().clone(), disks: storage .physical_disks() .values() @@ -899,7 +869,6 @@ impl SledAgent { // represent the "real" datasets the sled agent can observe. datasets: storage .datasets_config_list() - .await .map(|config| { config .datasets @@ -926,10 +895,10 @@ impl SledAgent { dataset_id: DatasetUuid, ) -> Result, HttpError> { self.storage - .lock() - .await - .support_bundle_list(zpool_id, dataset_id) + .as_support_bundle_storage(&self.log) + .list(zpool_id, dataset_id) .await + .map_err(|err| err.into()) } pub async fn support_bundle_create( @@ -938,35 +907,49 @@ impl SledAgent { dataset_id: DatasetUuid, support_bundle_id: SupportBundleUuid, expected_hash: ArtifactHash, + stream: impl Stream>, ) -> Result { self.storage - .lock() - .await - .support_bundle_create( + .as_support_bundle_storage(&self.log) + .create( zpool_id, dataset_id, support_bundle_id, expected_hash, + stream, ) - .await?; - - Ok(SupportBundleMetadata { - support_bundle_id, - state: SupportBundleState::Complete, - }) + .await + .map_err(|err| err.into()) } - pub async fn support_bundle_get( + pub(crate) async fn support_bundle_get( &self, zpool_id: ZpoolUuid, dataset_id: DatasetUuid, support_bundle_id: SupportBundleUuid, - ) -> Result<(), HttpError> { + range: Option, + query: SupportBundleQueryType, + ) -> Result, HttpError> { self.storage - .lock() + .as_support_bundle_storage(&self.log) + .get(zpool_id, dataset_id, support_bundle_id, range, query) .await - .support_bundle_exists(zpool_id, dataset_id, support_bundle_id) + .map_err(|err| err.into()) + } + + pub(crate) async fn support_bundle_head( + &self, + zpool_id: ZpoolUuid, + dataset_id: DatasetUuid, + support_bundle_id: SupportBundleUuid, + range: Option, + query: SupportBundleQueryType, + ) -> Result, HttpError> { + self.storage + .as_support_bundle_storage(&self.log) + .head(zpool_id, dataset_id, support_bundle_id, range, query) .await + .map_err(|err| err.into()) } pub async fn support_bundle_delete( @@ -976,67 +959,58 @@ impl SledAgent { support_bundle_id: SupportBundleUuid, ) -> Result<(), HttpError> { self.storage - .lock() - .await - .support_bundle_delete(zpool_id, dataset_id, support_bundle_id) + .as_support_bundle_storage(&self.log) + .delete(zpool_id, dataset_id, support_bundle_id) .await + .map_err(|err| err.into()) } - pub async fn datasets_ensure( + pub fn datasets_ensure( &self, config: DatasetsConfig, ) -> Result { - self.storage.lock().await.datasets_ensure(config).await + self.storage.lock().datasets_ensure(config) } - pub async fn datasets_config_list( - &self, - ) -> Result { - self.storage.lock().await.datasets_config_list().await + pub fn datasets_config_list(&self) -> Result { + self.storage.lock().datasets_config_list() } - pub async fn omicron_physical_disks_list( + pub fn omicron_physical_disks_list( &self, ) -> Result { - self.storage.lock().await.omicron_physical_disks_list().await + self.storage.lock().omicron_physical_disks_list() } - pub async fn omicron_physical_disks_ensure( + pub fn omicron_physical_disks_ensure( &self, config: OmicronPhysicalDisksConfig, ) -> Result { - self.storage.lock().await.omicron_physical_disks_ensure(config).await + self.storage.lock().omicron_physical_disks_ensure(config) } - pub async fn omicron_zones_list(&self) -> OmicronZonesConfig { - self.fake_zones.lock().await.clone() + pub fn omicron_zones_list(&self) -> OmicronZonesConfig { + self.fake_zones.lock().unwrap().clone() } - pub async fn omicron_zones_ensure( - &self, - requested_zones: OmicronZonesConfig, - ) { - *self.fake_zones.lock().await = requested_zones; + pub fn omicron_zones_ensure(&self, requested_zones: OmicronZonesConfig) { + *self.fake_zones.lock().unwrap() = requested_zones; } - pub async fn drop_dataset( - &self, - zpool_id: ZpoolUuid, - dataset_id: DatasetUuid, - ) { - self.storage.lock().await.drop_dataset(zpool_id, dataset_id) + pub fn drop_dataset(&self, zpool_id: ZpoolUuid, dataset_id: DatasetUuid) { + self.storage.lock().drop_dataset(zpool_id, dataset_id) } - pub async fn list_vpc_routes(&self) -> Vec { - let routes = self.vpc_routes.lock().await; + pub fn list_vpc_routes(&self) -> Vec { + let routes = self.vpc_routes.lock().unwrap(); routes .iter() .map(|(k, v)| ResolvedVpcRouteState { id: *k, version: v.version }) .collect() } - pub async fn set_vpc_routes(&self, new_routes: Vec) { - let mut routes = self.vpc_routes.lock().await; + pub fn set_vpc_routes(&self, new_routes: Vec) { + let mut routes = self.vpc_routes.lock().unwrap(); for new in new_routes { // Disregard any route information for a subnet we don't have. let Some(old) = routes.get(&new.id) else { diff --git a/sled-agent/src/sim/storage.rs b/sled-agent/src/sim/storage.rs index dc8cf63fe4..e6d999e7e8 100644 --- a/sled-agent/src/sim/storage.rs +++ b/sled-agent/src/sim/storage.rs @@ -12,15 +12,18 @@ use crate::sim::http_entrypoints_pantry::ExpectedDigest; use crate::sim::http_entrypoints_pantry::PantryStatus; use crate::sim::http_entrypoints_pantry::VolumeStatus; use crate::sim::SledAgent; +use crate::support_bundle::storage::SupportBundleManager; use anyhow::{self, bail, Result}; +use camino::Utf8Path; +use camino_tempfile::Utf8TempDir; use chrono::prelude::*; use crucible_agent_client::types::{ CreateRegion, Region, RegionId, RunningSnapshot, Snapshot, State, }; use dropshot::HandlerTaskMode; use dropshot::HttpError; -use futures::lock::Mutex; use omicron_common::disk::DatasetManagementStatus; +use omicron_common::disk::DatasetName; use omicron_common::disk::DatasetsConfig; use omicron_common::disk::DatasetsManagementResult; use omicron_common::disk::DiskIdentity; @@ -28,24 +31,26 @@ use omicron_common::disk::DiskManagementStatus; use omicron_common::disk::DiskVariant; use omicron_common::disk::DisksManagementResult; use omicron_common::disk::OmicronPhysicalDisksConfig; -use omicron_common::update::ArtifactHash; +use omicron_common::disk::SharedDatasetConfig; use omicron_uuid_kinds::DatasetUuid; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::OmicronZoneUuid; use omicron_uuid_kinds::PhysicalDiskUuid; use omicron_uuid_kinds::PropolisUuid; -use omicron_uuid_kinds::SupportBundleUuid; use omicron_uuid_kinds::ZpoolUuid; use propolis_client::types::VolumeConstructionRequest; use serde::Serialize; -use sled_agent_api::SupportBundleMetadata; -use sled_agent_api::SupportBundleState; +use sled_storage::manager::NestedDatasetConfig; +use sled_storage::manager::NestedDatasetListOptions; +use sled_storage::manager::NestedDatasetLocation; use slog::Logger; +use std::collections::BTreeMap; use std::collections::HashMap; use std::collections::HashSet; use std::net::{IpAddr, SocketAddr}; use std::str::FromStr; use std::sync::Arc; +use std::sync::Mutex; use uuid::Uuid; type CreateCallback = Box State + Send + 'static>; @@ -676,91 +681,90 @@ impl CrucibleData { } } - pub async fn set_create_callback(&self, callback: CreateCallback) { - self.inner.lock().await.set_create_callback(callback); + pub fn set_create_callback(&self, callback: CreateCallback) { + self.inner.lock().unwrap().set_create_callback(callback); } - pub async fn list(&self) -> Vec { - self.inner.lock().await.list() + pub fn list(&self) -> Vec { + self.inner.lock().unwrap().list() } - pub async fn create(&self, params: CreateRegion) -> Result { - self.inner.lock().await.create(params) + pub fn create(&self, params: CreateRegion) -> Result { + self.inner.lock().unwrap().create(params) } - pub async fn get(&self, id: RegionId) -> Option { - self.inner.lock().await.get(id) + pub fn get(&self, id: RegionId) -> Option { + self.inner.lock().unwrap().get(id) } - pub async fn delete(&self, id: RegionId) -> Result> { - self.inner.lock().await.delete(id) + pub fn delete(&self, id: RegionId) -> Result> { + self.inner.lock().unwrap().delete(id) } - pub async fn create_snapshot( + pub fn create_snapshot( &self, id: Uuid, snapshot_id: Uuid, ) -> Result { - self.inner.lock().await.create_snapshot(id, snapshot_id) + self.inner.lock().unwrap().create_snapshot(id, snapshot_id) } - pub async fn snapshots_for_region(&self, id: &RegionId) -> Vec { - self.inner.lock().await.snapshots_for_region(id) + pub fn snapshots_for_region(&self, id: &RegionId) -> Vec { + self.inner.lock().unwrap().snapshots_for_region(id) } - pub async fn get_snapshot_for_region( + pub fn get_snapshot_for_region( &self, id: &RegionId, snapshot_id: &str, ) -> Option { - self.inner.lock().await.get_snapshot_for_region(id, snapshot_id) + self.inner.lock().unwrap().get_snapshot_for_region(id, snapshot_id) } - pub async fn running_snapshots_for_id( + pub fn running_snapshots_for_id( &self, id: &RegionId, ) -> HashMap { - self.inner.lock().await.running_snapshots_for_id(id) + self.inner.lock().unwrap().running_snapshots_for_id(id) } - pub async fn delete_snapshot( - &self, - id: &RegionId, - name: &str, - ) -> Result<()> { - self.inner.lock().await.delete_snapshot(id, name) + pub fn delete_snapshot(&self, id: &RegionId, name: &str) -> Result<()> { + self.inner.lock().unwrap().delete_snapshot(id, name) } - pub async fn set_creating_a_running_snapshot_should_fail(&self) { - self.inner.lock().await.set_creating_a_running_snapshot_should_fail(); + pub fn set_creating_a_running_snapshot_should_fail(&self) { + self.inner + .lock() + .unwrap() + .set_creating_a_running_snapshot_should_fail(); } - pub async fn set_region_creation_error(&self, value: bool) { - self.inner.lock().await.set_region_creation_error(value); + pub fn set_region_creation_error(&self, value: bool) { + self.inner.lock().unwrap().set_region_creation_error(value); } - pub async fn set_region_deletion_error(&self, value: bool) { - self.inner.lock().await.set_region_deletion_error(value); + pub fn set_region_deletion_error(&self, value: bool) { + self.inner.lock().unwrap().set_region_deletion_error(value); } - pub async fn create_running_snapshot( + pub fn create_running_snapshot( &self, id: &RegionId, name: &str, ) -> Result { - self.inner.lock().await.create_running_snapshot(id, name) + self.inner.lock().unwrap().create_running_snapshot(id, name) } - pub async fn delete_running_snapshot( + pub fn delete_running_snapshot( &self, id: &RegionId, name: &str, ) -> Result<()> { - self.inner.lock().await.delete_running_snapshot(id, name) + self.inner.lock().unwrap().delete_running_snapshot(id, name) } - pub async fn is_empty(&self) -> bool { - self.inner.lock().await.is_empty() + pub fn is_empty(&self) -> bool { + self.inner.lock().unwrap().is_empty() } } @@ -814,11 +818,6 @@ impl CrucibleServer { } } -#[derive(Default)] -pub(crate) struct DebugData { - bundles: HashMap, -} - pub(crate) struct PhysicalDisk { pub(crate) identity: DiskIdentity, pub(crate) variant: DiskVariant, @@ -828,7 +827,6 @@ pub(crate) struct PhysicalDisk { /// Describes data being simulated within a dataset. pub(crate) enum DatasetContents { Crucible(CrucibleServer), - Debug(DebugData), } pub(crate) struct Zpool { @@ -847,10 +845,6 @@ impl Zpool { Zpool { id, physical_disk_id, total_size, datasets: HashMap::new() } } - fn insert_debug_dataset(&mut self, id: DatasetUuid) { - self.datasets.insert(id, DatasetContents::Debug(DebugData::default())); - } - fn insert_crucible_dataset( &mut self, log: &Logger, @@ -871,10 +865,7 @@ impl Zpool { let DatasetContents::Crucible(crucible) = self .datasets .get(&id) - .expect("Failed to get the dataset we just inserted") - else { - panic!("Should have just inserted Crucible dataset"); - }; + .expect("Failed to get the dataset we just inserted"); crucible } @@ -882,17 +873,16 @@ impl Zpool { self.total_size } - pub async fn get_dataset_for_region( + pub fn get_dataset_for_region( &self, region_id: Uuid, ) -> Option> { for dataset in self.datasets.values() { - if let DatasetContents::Crucible(dataset) = dataset { - for region in &dataset.data().list().await { - let id = Uuid::from_str(®ion.id.0).unwrap(); - if id == region_id { - return Some(dataset.data()); - } + let DatasetContents::Crucible(dataset) = dataset; + for region in &dataset.data().list() { + let id = Uuid::from_str(®ion.id.0).unwrap(); + if id == region_id { + return Some(dataset.data()); } } } @@ -900,19 +890,18 @@ impl Zpool { None } - pub async fn get_region_for_port(&self, port: u16) -> Option { + pub fn get_region_for_port(&self, port: u16) -> Option { let mut regions = vec![]; for dataset in self.datasets.values() { - if let DatasetContents::Crucible(dataset) = dataset { - for region in &dataset.data().list().await { - if region.state == State::Destroyed { - continue; - } + let DatasetContents::Crucible(dataset) = dataset; + for region in &dataset.data().list() { + if region.state == State::Destroyed { + continue; + } - if port == region.port_number { - regions.push(region.clone()); - } + if port == region.port_number { + regions.push(region.clone()); } } } @@ -928,12 +917,90 @@ impl Zpool { } } +/// Represents a nested dataset +pub struct NestedDatasetStorage { + config: NestedDatasetConfig, + // We intentionally store the children before the mountpoint, + // so they are deleted first. + children: BTreeMap, + // We store this directory as a temporary directory so it gets + // removed when this struct is dropped. + #[allow(dead_code)] + mountpoint: Utf8TempDir, +} + +impl NestedDatasetStorage { + fn new( + zpool_root: &Utf8Path, + dataset_root: DatasetName, + path: String, + shared_config: SharedDatasetConfig, + ) -> Self { + let name = NestedDatasetLocation { path, root: dataset_root }; + + // Create a mountpoint for the nested dataset storage that lasts + // as long as the nested dataset does. + let mountpoint = name.mountpoint(zpool_root); + println!("NestedDatasetStorage: Mountpoint {mountpoint}"); + let parent = mountpoint.as_path().parent().unwrap(); + println!("NestedDatasetStorage: Creating parent dir: {parent}"); + std::fs::create_dir_all(&parent).unwrap(); + + let new_dir_name = mountpoint.as_path().file_name().unwrap(); + println!("NestedDatasetStorage: New dir name: {new_dir_name}"); + let mountpoint = camino_tempfile::Builder::new() + .rand_bytes(0) + .prefix(new_dir_name) + .tempdir_in(parent) + .unwrap(); + + Self { + config: NestedDatasetConfig { name, inner: shared_config }, + children: BTreeMap::new(), + mountpoint, + } + } +} + /// Simulated representation of all storage on a sled. +#[derive(Clone)] pub struct Storage { - sled_id: Uuid, + inner: Arc>, +} + +impl Storage { + pub fn new(sled_id: Uuid, crucible_ip: IpAddr, log: Logger) -> Self { + Self { + inner: Arc::new(Mutex::new(StorageInner::new( + sled_id, + crucible_ip, + log, + ))), + } + } + + pub fn lock(&self) -> std::sync::MutexGuard { + self.inner.lock().unwrap() + } + + pub fn as_support_bundle_storage<'a>( + &'a self, + log: &'a Logger, + ) -> SupportBundleManager<'a> { + SupportBundleManager::new(log, self) + } +} + +/// Simulated representation of all storage on a sled. +/// +/// Guarded by a mutex from [Storage]. +pub struct StorageInner { log: Logger, + sled_id: Uuid, + root: Utf8TempDir, config: Option, dataset_config: Option, + nested_datasets: HashMap, physical_disks: HashMap, next_disk_slot: i64, zpools: HashMap, @@ -941,13 +1008,15 @@ pub struct Storage { next_crucible_port: u16, } -impl Storage { +impl StorageInner { pub fn new(sled_id: Uuid, crucible_ip: IpAddr, log: Logger) -> Self { Self { sled_id, log, + root: camino_tempfile::tempdir().unwrap(), config: None, dataset_config: None, + nested_datasets: HashMap::new(), physical_disks: HashMap::new(), next_disk_slot: 0, zpools: HashMap::new(), @@ -956,14 +1025,17 @@ impl Storage { } } + /// Returns a path to the "zpool root" for storage. + pub fn root(&self) -> &Utf8Path { + self.root.path() + } + /// Returns an immutable reference to all (currently known) physical disks pub fn physical_disks(&self) -> &HashMap { &self.physical_disks } - pub async fn datasets_config_list( - &self, - ) -> Result { + pub fn datasets_config_list(&self) -> Result { let Some(config) = self.dataset_config.as_ref() else { return Err(HttpError::for_not_found( None, @@ -973,7 +1045,7 @@ impl Storage { Ok(config.clone()) } - pub async fn datasets_ensure( + pub fn datasets_ensure( &mut self, config: DatasetsConfig, ) -> Result { @@ -997,6 +1069,27 @@ impl Storage { } self.dataset_config.replace(config.clone()); + // Add a "nested dataset" entry for all datasets that should exist, + // and remove it for all datasets that have been removed. + let dataset_names: HashSet<_> = config + .datasets + .values() + .map(|config| config.name.clone()) + .collect(); + for dataset in &dataset_names { + let root = self.root().to_path_buf(); + self.nested_datasets.entry(dataset.clone()).or_insert_with(|| { + NestedDatasetStorage::new( + &root, + dataset.clone(), + String::new(), + SharedDatasetConfig::default(), + ) + }); + } + self.nested_datasets + .retain(|dataset, _| dataset_names.contains(&dataset)); + Ok(DatasetsManagementResult { status: config .datasets @@ -1009,7 +1102,149 @@ impl Storage { }) } - pub async fn omicron_physical_disks_list( + pub fn nested_dataset_list( + &self, + name: NestedDatasetLocation, + options: NestedDatasetListOptions, + ) -> Result, HttpError> { + let Some(mut nested_dataset) = self.nested_datasets.get(&name.root) + else { + return Err(HttpError::for_not_found( + None, + "Dataset not found".to_string(), + )); + }; + + for path_component in name.path.split('/') { + if path_component.is_empty() { + continue; + } + match nested_dataset.children.get(path_component) { + Some(dataset) => nested_dataset = dataset, + None => { + return Err(HttpError::for_not_found( + None, + "Dataset not found".to_string(), + )) + } + }; + } + + let mut children: Vec<_> = nested_dataset + .children + .values() + .map(|storage| storage.config.clone()) + .collect(); + + match options { + NestedDatasetListOptions::ChildrenOnly => return Ok(children), + NestedDatasetListOptions::SelfAndChildren => { + children.insert(0, nested_dataset.config.clone()); + return Ok(children); + } + } + } + + pub fn nested_dataset_ensure( + &mut self, + config: NestedDatasetConfig, + ) -> Result<(), HttpError> { + let name = &config.name; + let nested_path = name.path.to_string(); + let zpool_root = self.root().to_path_buf(); + let Some(mut nested_dataset) = self.nested_datasets.get_mut(&name.root) + else { + return Err(HttpError::for_not_found( + None, + "Dataset not found".to_string(), + )); + }; + + for path_component in nested_path.split('/') { + if path_component.is_empty() { + continue; + } + + // Final component of path -- insert it here if it doesn't exist + // already. + if !path_component.contains('/') { + let entry = + nested_dataset.children.entry(path_component.to_string()); + entry + .and_modify(|storage| { + storage.config = config.clone(); + }) + .or_insert_with(|| { + NestedDatasetStorage::new( + &zpool_root, + config.name.root, + nested_path, + config.inner, + ) + }); + return Ok(()); + } + + match nested_dataset.children.get_mut(path_component) { + Some(dataset) => nested_dataset = dataset, + None => { + return Err(HttpError::for_not_found( + None, + "Dataset not found".to_string(), + )) + } + }; + } + return Err(HttpError::for_not_found( + None, + "Nested Dataset not found".to_string(), + )); + } + + pub fn nested_dataset_destroy( + &mut self, + name: NestedDatasetLocation, + ) -> Result<(), HttpError> { + let Some(mut nested_dataset) = self.nested_datasets.get_mut(&name.root) + else { + return Err(HttpError::for_not_found( + None, + "Dataset not found".to_string(), + )); + }; + + for path_component in name.path.split('/') { + if path_component.is_empty() { + continue; + } + + // Final component of path -- remove it if it exists. + if !path_component.contains('/') { + if nested_dataset.children.remove(path_component).is_none() { + return Err(HttpError::for_not_found( + None, + "Nested Dataset not found".to_string(), + )); + }; + return Ok(()); + } + match nested_dataset.children.get_mut(path_component) { + Some(dataset) => nested_dataset = dataset, + None => { + return Err(HttpError::for_not_found( + None, + "Dataset not found".to_string(), + )) + } + }; + } + return Err(HttpError::for_not_found( + None, + "Nested Dataset not found".to_string(), + )); + } + + pub fn omicron_physical_disks_list( &mut self, ) -> Result { let Some(config) = self.config.as_ref() else { @@ -1021,7 +1256,7 @@ impl Storage { Ok(config.clone()) } - pub async fn omicron_physical_disks_ensure( + pub fn omicron_physical_disks_ensure( &mut self, config: OmicronPhysicalDisksConfig, ) -> Result { @@ -1057,7 +1292,7 @@ impl Storage { }) } - pub async fn insert_physical_disk( + pub fn insert_physical_disk( &mut self, id: PhysicalDiskUuid, identity: DiskIdentity, @@ -1070,7 +1305,7 @@ impl Storage { } /// Adds a Zpool to the sled's simulated storage. - pub async fn insert_zpool( + pub fn insert_zpool( &mut self, zpool_id: ZpoolUuid, disk_id: PhysicalDiskUuid, @@ -1085,143 +1320,8 @@ impl Storage { &self.zpools } - fn get_debug_dataset( - &self, - zpool_id: ZpoolUuid, - dataset_id: DatasetUuid, - ) -> Result<&DebugData, HttpError> { - let Some(zpool) = self.zpools.get(&zpool_id) else { - return Err(HttpError::for_not_found( - None, - format!("zpool does not exist {zpool_id}"), - )); - }; - let Some(dataset) = zpool.datasets.get(&dataset_id) else { - return Err(HttpError::for_not_found( - None, - format!("dataset does not exist {dataset_id}"), - )); - }; - - let DatasetContents::Debug(debug) = dataset else { - return Err(HttpError::for_bad_request( - None, - format!("Not a debug dataset: {zpool_id} / {dataset_id}"), - )); - }; - - Ok(debug) - } - - fn get_debug_dataset_mut( - &mut self, - zpool_id: ZpoolUuid, - dataset_id: DatasetUuid, - ) -> Result<&mut DebugData, HttpError> { - let Some(zpool) = self.zpools.get_mut(&zpool_id) else { - return Err(HttpError::for_not_found( - None, - format!("zpool does not exist {zpool_id}"), - )); - }; - let Some(dataset) = zpool.datasets.get_mut(&dataset_id) else { - return Err(HttpError::for_not_found( - None, - format!("dataset does not exist {dataset_id}"), - )); - }; - - let DatasetContents::Debug(debug) = dataset else { - return Err(HttpError::for_bad_request( - None, - format!("Not a debug dataset: {zpool_id} / {dataset_id}"), - )); - }; - - Ok(debug) - } - - pub async fn support_bundle_list( - &self, - zpool_id: ZpoolUuid, - dataset_id: DatasetUuid, - ) -> Result, HttpError> { - let debug = self.get_debug_dataset(zpool_id, dataset_id)?; - - Ok(debug - .bundles - .keys() - .map(|id| SupportBundleMetadata { - support_bundle_id: *id, - state: SupportBundleState::Complete, - }) - .collect()) - } - - pub async fn support_bundle_create( - &mut self, - zpool_id: ZpoolUuid, - dataset_id: DatasetUuid, - support_bundle_id: SupportBundleUuid, - hash: ArtifactHash, - ) -> Result<(), HttpError> { - let debug = self.get_debug_dataset_mut(zpool_id, dataset_id)?; - - // This is for the simulated server, so we totally ignore the "contents" - // of the bundle and just accept that it should exist. - debug.bundles.insert(support_bundle_id, hash); - - Ok(()) - } - - pub async fn support_bundle_exists( - &self, - zpool_id: ZpoolUuid, - dataset_id: DatasetUuid, - support_bundle_id: SupportBundleUuid, - ) -> Result<(), HttpError> { - let debug = self.get_debug_dataset(zpool_id, dataset_id)?; - - if !debug.bundles.contains_key(&support_bundle_id) { - return Err(HttpError::for_not_found( - None, - format!("Support bundle not found {support_bundle_id}"), - )); - } - Ok(()) - } - - pub async fn support_bundle_delete( - &mut self, - zpool_id: ZpoolUuid, - dataset_id: DatasetUuid, - support_bundle_id: SupportBundleUuid, - ) -> Result<(), HttpError> { - let debug = self.get_debug_dataset_mut(zpool_id, dataset_id)?; - - if debug.bundles.remove(&support_bundle_id).is_none() { - return Err(HttpError::for_not_found( - None, - format!("Support bundle not found {support_bundle_id}"), - )); - } - Ok(()) - } - - /// Adds a debug dataset to the sled's simulated storage - pub async fn insert_debug_dataset( - &mut self, - zpool_id: ZpoolUuid, - dataset_id: DatasetUuid, - ) { - self.zpools - .get_mut(&zpool_id) - .expect("Zpool does not exist") - .insert_debug_dataset(dataset_id); - } - /// Adds a Crucible dataset to the sled's simulated storage. - pub async fn insert_crucible_dataset( + pub fn insert_crucible_dataset( &mut self, zpool_id: ZpoolUuid, dataset_id: DatasetUuid, @@ -1291,16 +1391,13 @@ impl Storage { zpool .datasets .iter() - .filter_map(|(id, dataset)| match dataset { - DatasetContents::Crucible(server) => { - Some((*id, server.address())) - } - _ => None, + .map(|(id, dataset)| match dataset { + DatasetContents::Crucible(server) => (*id, server.address()), }) .collect() } - pub async fn get_dataset( + pub fn get_dataset( &self, zpool_id: ZpoolUuid, dataset_id: DatasetUuid, @@ -1313,24 +1410,22 @@ impl Storage { .expect("Dataset does not exist") } - pub async fn get_crucible_dataset( + pub fn get_crucible_dataset( &self, zpool_id: ZpoolUuid, dataset_id: DatasetUuid, ) -> Arc { - match self.get_dataset(zpool_id, dataset_id).await { + match self.get_dataset(zpool_id, dataset_id) { DatasetContents::Crucible(crucible) => crucible.data.clone(), - _ => panic!("{zpool_id} / {dataset_id} is not a crucible dataset"), } } - pub async fn get_dataset_for_region( + pub fn get_dataset_for_region( &self, region_id: Uuid, ) -> Option> { for zpool in self.zpools.values() { - if let Some(dataset) = zpool.get_dataset_for_region(region_id).await - { + if let Some(dataset) = zpool.get_dataset_for_region(region_id) { return Some(dataset); } } @@ -1338,10 +1433,10 @@ impl Storage { None } - pub async fn get_region_for_port(&self, port: u16) -> Option { + pub fn get_region_for_port(&self, port: u16) -> Option { let mut regions = vec![]; for zpool in self.zpools.values() { - if let Some(region) = zpool.get_region_for_port(port).await { + if let Some(region) = zpool.get_region_for_port(port) { regions.push(region); } } @@ -1389,18 +1484,18 @@ impl Pantry { } } - pub async fn status(&self) -> Result { + pub fn status(&self) -> Result { Ok(PantryStatus { - volumes: self.volumes.lock().await.keys().cloned().collect(), - num_job_handles: self.jobs.lock().await.len(), + volumes: self.volumes.lock().unwrap().keys().cloned().collect(), + num_job_handles: self.jobs.lock().unwrap().len(), }) } - pub async fn entry( + pub fn entry( &self, volume_id: String, ) -> Result { - let volumes = self.volumes.lock().await; + let volumes = self.volumes.lock().unwrap(); match volumes.get(&volume_id) { Some(entry) => Ok(entry.vcr.clone()), @@ -1408,12 +1503,12 @@ impl Pantry { } } - pub async fn attach( + pub fn attach( &self, volume_id: String, volume_construction_request: VolumeConstructionRequest, ) -> Result<()> { - let mut volumes = self.volumes.lock().await; + let mut volumes = self.volumes.lock().unwrap(); volumes.insert( volume_id, @@ -1431,14 +1526,14 @@ impl Pantry { Ok(()) } - pub async fn attach_activate_background( + pub fn attach_activate_background( &self, volume_id: String, activate_job_id: String, volume_construction_request: VolumeConstructionRequest, ) -> Result<(), HttpError> { - let mut volumes = self.volumes.lock().await; - let mut jobs = self.jobs.lock().await; + let mut volumes = self.volumes.lock().unwrap(); + let mut jobs = self.jobs.lock().unwrap(); volumes.insert( volume_id, @@ -1458,30 +1553,30 @@ impl Pantry { Ok(()) } - pub async fn activate_background_attachment( + pub fn activate_background_attachment( &self, volume_id: String, ) -> Result { let activate_job = { - let volumes = self.volumes.lock().await; + let volumes = self.volumes.lock().unwrap(); volumes.get(&volume_id).unwrap().activate_job.clone().unwrap() }; - let mut status = self.volume_status(volume_id.clone()).await?; + let mut status = self.volume_status(volume_id.clone())?; status.active = true; status.seen_active = true; - self.update_volume_status(volume_id, status).await?; + self.update_volume_status(volume_id, status)?; Ok(activate_job) } - pub async fn volume_status( + pub fn volume_status( &self, volume_id: String, ) -> Result { - let volumes = self.volumes.lock().await; + let volumes = self.volumes.lock().unwrap(); match volumes.get(&volume_id) { Some(pantry_volume) => Ok(pantry_volume.status.clone()), @@ -1490,12 +1585,12 @@ impl Pantry { } } - pub async fn update_volume_status( + pub fn update_volume_status( &self, volume_id: String, status: VolumeStatus, ) -> Result<(), HttpError> { - let mut volumes = self.volumes.lock().await; + let mut volumes = self.volumes.lock().unwrap(); match volumes.get_mut(&volume_id) { Some(pantry_volume) => { @@ -1507,22 +1602,19 @@ impl Pantry { } } - pub async fn is_job_finished( - &self, - job_id: String, - ) -> Result { - let jobs = self.jobs.lock().await; + pub fn is_job_finished(&self, job_id: String) -> Result { + let jobs = self.jobs.lock().unwrap(); if !jobs.contains(&job_id) { return Err(HttpError::for_not_found(None, job_id)); } Ok(true) } - pub async fn get_job_result( + pub fn get_job_result( &self, job_id: String, ) -> Result, HttpError> { - let mut jobs = self.jobs.lock().await; + let mut jobs = self.jobs.lock().unwrap(); if !jobs.contains(&job_id) { return Err(HttpError::for_not_found(None, job_id)); } @@ -1530,23 +1622,23 @@ impl Pantry { Ok(Ok(true)) } - pub async fn import_from_url( + pub fn import_from_url( &self, volume_id: String, _url: String, _expected_digest: Option, ) -> Result { - self.entry(volume_id).await?; + self.entry(volume_id)?; // Make up job - let mut jobs = self.jobs.lock().await; + let mut jobs = self.jobs.lock().unwrap(); let job_id = Uuid::new_v4().to_string(); jobs.insert(job_id.clone()); Ok(job_id) } - pub async fn snapshot( + pub fn snapshot( &self, volume_id: String, snapshot_id: String, @@ -1555,12 +1647,11 @@ impl Pantry { // the simulated instance ensure, then call // [`instance_issue_disk_snapshot_request`] as the snapshot logic is the // same. - let volumes = self.volumes.lock().await; + let volumes = self.volumes.lock().unwrap(); let volume_construction_request = &volumes.get(&volume_id).unwrap().vcr; self.sled_agent - .map_disk_ids_to_region_ids(volume_construction_request) - .await?; + .map_disk_ids_to_region_ids(volume_construction_request)?; self.sled_agent .instance_issue_disk_snapshot_request( @@ -1568,17 +1659,16 @@ impl Pantry { volume_id.parse().unwrap(), snapshot_id.parse().unwrap(), ) - .await .map_err(|e| HttpError::for_internal_error(e.to_string())) } - pub async fn bulk_write( + pub fn bulk_write( &self, volume_id: String, offset: u64, data: Vec, ) -> Result<(), HttpError> { - let vcr = self.entry(volume_id).await?; + let vcr = self.entry(volume_id)?; // Currently, Nexus will only make volumes where the first subvolume is // a Region. This will change in the future! @@ -1632,19 +1722,19 @@ impl Pantry { Ok(()) } - pub async fn scrub(&self, volume_id: String) -> Result { - self.entry(volume_id).await?; + pub fn scrub(&self, volume_id: String) -> Result { + self.entry(volume_id)?; // Make up job - let mut jobs = self.jobs.lock().await; + let mut jobs = self.jobs.lock().unwrap(); let job_id = Uuid::new_v4().to_string(); jobs.insert(job_id.clone()); Ok(job_id) } - pub async fn detach(&self, volume_id: String) -> Result<()> { - let mut volumes = self.volumes.lock().await; + pub fn detach(&self, volume_id: String) -> Result<()> { + let mut volumes = self.volumes.lock().unwrap(); volumes.remove(&volume_id); Ok(()) } @@ -1656,11 +1746,7 @@ pub struct PantryServer { } impl PantryServer { - pub async fn new( - log: Logger, - ip: IpAddr, - sled_agent: Arc, - ) -> Self { + pub fn new(log: Logger, ip: IpAddr, sled_agent: Arc) -> Self { let pantry = Arc::new(Pantry::new(sled_agent)); let server = dropshot::ServerBuilder::new( diff --git a/sled-agent/src/support_bundle/storage.rs b/sled-agent/src/support_bundle/storage.rs index 97d345a8d2..c1b0cfe42f 100644 --- a/sled-agent/src/support_bundle/storage.rs +++ b/sled-agent/src/support_bundle/storage.rs @@ -4,6 +4,7 @@ //! Management of and access to Support Bundles +use async_trait::async_trait; use bytes::Bytes; use camino::Utf8Path; use dropshot::Body; @@ -13,6 +14,7 @@ use futures::StreamExt; use omicron_common::api::external::Error as ExternalError; use omicron_common::disk::CompressionAlgorithm; use omicron_common::disk::DatasetConfig; +use omicron_common::disk::DatasetsConfig; use omicron_common::disk::SharedDatasetConfig; use omicron_common::update::ArtifactHash; use omicron_uuid_kinds::DatasetUuid; @@ -30,6 +32,7 @@ use sled_storage::manager::NestedDatasetLocation; use sled_storage::manager::StorageHandle; use slog::Logger; use slog_error_chain::InlineErrorChain; +use std::borrow::Cow; use std::io::Write; use tokio::io::AsyncReadExt; use tokio::io::AsyncSeekExt; @@ -37,6 +40,16 @@ use tokio::io::AsyncWriteExt; use tokio_util::io::ReaderStream; use zip::result::ZipError; +// The final name of the bundle, as it is stored within the dedicated +// datasets. +// +// The full path is of the form: +// +// /pool/ext/$(POOL_UUID)/crypt/$(DATASET_TYPE)/$(BUNDLE_UUID)/bundle.zip +// | | This is a per-bundle nested dataset +// | This is a Debug dataset +const BUNDLE_FILE_NAME: &str = "bundle.zip"; + #[derive(thiserror::Error, Debug)] pub enum Error { #[error(transparent)] @@ -100,6 +113,116 @@ impl From for HttpError { } } +/// Abstracts the storage APIs for accessing datasets. +/// +/// Allows support bundle storage to work on both simulated and non-simulated +/// sled agents. +#[async_trait] +pub trait LocalStorage: Sync { + // These methods are all prefixed as "dyn_" to avoid duplicating the name + // with the real implementations. + // + // Dispatch is a little silly; if I use the same name as the real + // implementation, then a "missing function" dispatches to the trait instead + // and results in infinite recursion. + + /// Returns all configured datasets + async fn dyn_datasets_config_list(&self) -> Result; + + /// Returns all nested datasets within an existing dataset + async fn dyn_nested_dataset_list( + &self, + name: NestedDatasetLocation, + options: NestedDatasetListOptions, + ) -> Result, Error>; + + /// Ensures a nested dataset exists + async fn dyn_nested_dataset_ensure( + &self, + config: NestedDatasetConfig, + ) -> Result<(), Error>; + + /// Destroys a nested dataset + async fn dyn_nested_dataset_destroy( + &self, + name: NestedDatasetLocation, + ) -> Result<(), Error>; + + /// Returns the root filesystem path where datasets are mounted. + /// + /// This is typically "/" in prod, but can be a temporary directory + /// for tests to isolate storage that typically appears globally. + fn zpool_mountpoint_root(&self) -> Cow; +} + +/// This implementation is effectively a pass-through to the real methods +#[async_trait] +impl LocalStorage for StorageHandle { + async fn dyn_datasets_config_list(&self) -> Result { + self.datasets_config_list().await.map_err(|err| err.into()) + } + + async fn dyn_nested_dataset_list( + &self, + name: NestedDatasetLocation, + options: NestedDatasetListOptions, + ) -> Result, Error> { + self.nested_dataset_list(name, options).await.map_err(|err| err.into()) + } + + async fn dyn_nested_dataset_ensure( + &self, + config: NestedDatasetConfig, + ) -> Result<(), Error> { + self.nested_dataset_ensure(config).await.map_err(|err| err.into()) + } + + async fn dyn_nested_dataset_destroy( + &self, + name: NestedDatasetLocation, + ) -> Result<(), Error> { + self.nested_dataset_destroy(name).await.map_err(|err| err.into()) + } + + fn zpool_mountpoint_root(&self) -> Cow { + Cow::Borrowed(illumos_utils::zpool::ZPOOL_MOUNTPOINT_ROOT.into()) + } +} + +/// This implementation allows storage bundles to be stored on simulated storage +#[async_trait] +impl LocalStorage for crate::sim::Storage { + async fn dyn_datasets_config_list(&self) -> Result { + self.lock().datasets_config_list().map_err(|err| err.into()) + } + + async fn dyn_nested_dataset_list( + &self, + name: NestedDatasetLocation, + options: NestedDatasetListOptions, + ) -> Result, Error> { + self.lock().nested_dataset_list(name, options).map_err(|err| err.into()) + } + + async fn dyn_nested_dataset_ensure( + &self, + config: NestedDatasetConfig, + ) -> Result<(), Error> { + self.lock().nested_dataset_ensure(config).map_err(|err| err.into()) + } + + async fn dyn_nested_dataset_destroy( + &self, + name: NestedDatasetLocation, + ) -> Result<(), Error> { + self.lock().nested_dataset_destroy(name).map_err(|err| err.into()) + } + + fn zpool_mountpoint_root(&self) -> Cow { + Cow::Owned(self.lock().root().to_path_buf()) + } +} + /// Describes the type of access to the support bundle #[derive(Clone, Debug)] pub(crate) enum SupportBundleQueryType { @@ -237,7 +360,7 @@ fn stream_zip_entry( /// APIs to manage support bundle storage. pub struct SupportBundleManager<'a> { log: &'a Logger, - storage: &'a StorageHandle, + storage: &'a dyn LocalStorage, } impl<'a> SupportBundleManager<'a> { @@ -245,7 +368,7 @@ impl<'a> SupportBundleManager<'a> { /// to support bundle CRUD APIs. pub fn new( log: &'a Logger, - storage: &'a StorageHandle, + storage: &'a dyn LocalStorage, ) -> SupportBundleManager<'a> { Self { log, storage } } @@ -256,7 +379,7 @@ impl<'a> SupportBundleManager<'a> { zpool_id: ZpoolUuid, dataset_id: DatasetUuid, ) -> Result { - let datasets_config = self.storage.datasets_config_list().await?; + let datasets_config = self.storage.dyn_datasets_config_list().await?; let dataset = datasets_config .datasets .get(&dataset_id) @@ -290,7 +413,7 @@ impl<'a> SupportBundleManager<'a> { NestedDatasetLocation { path: String::from(""), root }; let datasets = self .storage - .nested_dataset_list( + .dyn_nested_dataset_list( dataset_location, NestedDatasetListOptions::ChildrenOnly, ) @@ -309,8 +432,8 @@ impl<'a> SupportBundleManager<'a> { // The dataset for a support bundle exists. let support_bundle_path = dataset .name - .mountpoint(illumos_utils::zpool::ZPOOL_MOUNTPOINT_ROOT.into()) - .join("bundle"); + .mountpoint(&self.storage.zpool_mountpoint_root()) + .join(BUNDLE_FILE_NAME); // Identify whether or not the final "bundle" file exists. // @@ -399,9 +522,9 @@ impl<'a> SupportBundleManager<'a> { let dataset = NestedDatasetLocation { path: support_bundle_id.to_string(), root }; // The mounted root of the support bundle dataset - let support_bundle_dir = dataset - .mountpoint(illumos_utils::zpool::ZPOOL_MOUNTPOINT_ROOT.into()); - let support_bundle_path = support_bundle_dir.join("bundle"); + let support_bundle_dir = + dataset.mountpoint(&self.storage.zpool_mountpoint_root()); + let support_bundle_path = support_bundle_dir.join(BUNDLE_FILE_NAME); let support_bundle_path_tmp = support_bundle_dir.join(format!( "bundle-{}.tmp", thread_rng() @@ -414,7 +537,7 @@ impl<'a> SupportBundleManager<'a> { // Ensure that the dataset exists. info!(log, "Ensuring dataset exists for bundle"); self.storage - .nested_dataset_ensure(NestedDatasetConfig { + .dyn_nested_dataset_ensure(NestedDatasetConfig { name: dataset, inner: SharedDatasetConfig { compression: CompressionAlgorithm::On, @@ -423,6 +546,7 @@ impl<'a> SupportBundleManager<'a> { }, }) .await?; + info!(log, "Dataset does exist for bundle"); // Exit early if the support bundle already exists if tokio::fs::try_exists(&support_bundle_path).await? { @@ -446,9 +570,14 @@ impl<'a> SupportBundleManager<'a> { // Stream the file into the dataset, first as a temporary file, // and then renaming to the final location. - info!(log, "Streaming bundle to storage"); + info!( + log, + "Streaming bundle to storage"; + "path" => ?support_bundle_path_tmp, + ); let tmp_file = tokio::fs::File::create(&support_bundle_path_tmp).await?; + if let Err(err) = Self::write_and_finalize_bundle( tmp_file, &support_bundle_path_tmp, @@ -492,7 +621,7 @@ impl<'a> SupportBundleManager<'a> { let root = self.get_configured_dataset(zpool_id, dataset_id).await?.name; self.storage - .nested_dataset_destroy(NestedDatasetLocation { + .dyn_nested_dataset_destroy(NestedDatasetLocation { path: support_bundle_id.to_string(), root, }) @@ -512,9 +641,9 @@ impl<'a> SupportBundleManager<'a> { let dataset = NestedDatasetLocation { path: support_bundle_id.to_string(), root }; // The mounted root of the support bundle dataset - let support_bundle_dir = dataset - .mountpoint(illumos_utils::zpool::ZPOOL_MOUNTPOINT_ROOT.into()); - let path = support_bundle_dir.join("bundle"); + let support_bundle_dir = + dataset.mountpoint(&self.storage.zpool_mountpoint_root()); + let path = support_bundle_dir.join(BUNDLE_FILE_NAME); let f = tokio::fs::File::open(&path).await?; Ok(f) From 60c27d32e5daf1e454ecee6d1998aad565435471 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 17 Dec 2024 13:56:05 -0800 Subject: [PATCH 23/25] cleanup println --- sled-agent/src/sim/storage.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/sled-agent/src/sim/storage.rs b/sled-agent/src/sim/storage.rs index e6d999e7e8..07cb4e1ccf 100644 --- a/sled-agent/src/sim/storage.rs +++ b/sled-agent/src/sim/storage.rs @@ -941,13 +941,10 @@ impl NestedDatasetStorage { // Create a mountpoint for the nested dataset storage that lasts // as long as the nested dataset does. let mountpoint = name.mountpoint(zpool_root); - println!("NestedDatasetStorage: Mountpoint {mountpoint}"); let parent = mountpoint.as_path().parent().unwrap(); - println!("NestedDatasetStorage: Creating parent dir: {parent}"); std::fs::create_dir_all(&parent).unwrap(); let new_dir_name = mountpoint.as_path().file_name().unwrap(); - println!("NestedDatasetStorage: New dir name: {new_dir_name}"); let mountpoint = camino_tempfile::Builder::new() .rand_bytes(0) .prefix(new_dir_name) From 1506c87e7036d7233b2840b6216c1d2db9bbcdd5 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 6 Jan 2025 10:19:53 -0800 Subject: [PATCH 24/25] merging --- nexus/db-queries/src/db/datastore/support_bundle.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nexus/db-queries/src/db/datastore/support_bundle.rs b/nexus/db-queries/src/db/datastore/support_bundle.rs index 8f96c4fd7c..d616b4356b 100644 --- a/nexus/db-queries/src/db/datastore/support_bundle.rs +++ b/nexus/db-queries/src/db/datastore/support_bundle.rs @@ -262,7 +262,7 @@ impl DataStore { .all_omicron_datasets( nexus_types::deployment::BlueprintDatasetFilter::Expunged, ) - .filter_map(|dataset_config| { + .filter_map(|(_sled_id, dataset_config)| { if matches!( dataset_config.kind, omicron_common::api::internal::shared::DatasetKind::Debug From 9d416b19d6765dc8d3c2ae2d28eecea2bfef991d Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 6 Jan 2025 14:32:23 -0800 Subject: [PATCH 25/25] Fix simulated nested datasets, add tests --- sled-agent/src/sim/storage.rs | 252 +++++++++++++++++++++++++++++++++- 1 file changed, 248 insertions(+), 4 deletions(-) diff --git a/sled-agent/src/sim/storage.rs b/sled-agent/src/sim/storage.rs index e0af1ce03d..e907aaffe1 100644 --- a/sled-agent/src/sim/storage.rs +++ b/sled-agent/src/sim/storage.rs @@ -401,6 +401,11 @@ impl CrucibleDataInner { #[cfg(test)] mod test { use super::*; + use omicron_common::api::external::Generation; + use omicron_common::disk::DatasetConfig; + use omicron_common::disk::DatasetKind; + use omicron_common::disk::DatasetName; + use omicron_common::zpool_name::ZpoolName; use omicron_test_utils::dev::test_setup_log; /// Validate that the simulated Crucible agent reuses ports when regions are @@ -661,6 +666,243 @@ mod test { logctx.cleanup_successful(); } + + #[test] + fn nested_dataset_not_found_missing_dataset() { + let logctx = test_setup_log("nested_dataset_not_found_missing_dataset"); + + let storage = StorageInner::new( + Uuid::new_v4(), + std::net::Ipv4Addr::LOCALHOST.into(), + logctx.log.clone(), + ); + + let zpool_id = ZpoolUuid::new_v4(); + + let err = storage + .nested_dataset_list( + NestedDatasetLocation { + path: String::new(), + root: DatasetName::new( + ZpoolName::new_external(zpool_id), + DatasetKind::Debug, + ), + }, + NestedDatasetListOptions::SelfAndChildren, + ) + .expect_err("Nested dataset listing should fail on fake dataset"); + + assert_eq!(err.status_code, 404); + + logctx.cleanup_successful(); + } + + #[test] + fn nested_dataset() { + let logctx = test_setup_log("nested_dataset"); + + let mut storage = StorageInner::new( + Uuid::new_v4(), + std::net::Ipv4Addr::LOCALHOST.into(), + logctx.log.clone(), + ); + + let zpool_id = ZpoolUuid::new_v4(); + let zpool_name = ZpoolName::new_external(zpool_id); + let dataset_id = DatasetUuid::new_v4(); + let dataset_name = DatasetName::new(zpool_name, DatasetKind::Debug); + + let config = DatasetsConfig { + generation: Generation::new(), + datasets: BTreeMap::from([( + dataset_id, + DatasetConfig { + id: dataset_id, + name: dataset_name.clone(), + inner: SharedDatasetConfig::default(), + }, + )]), + }; + + // Create the debug dataset on which we'll store everything else. + let result = storage.datasets_ensure(config).unwrap(); + assert!(!result.has_error()); + + // The list of nested datasets should only contain the root dataset. + let nested_datasets = storage + .nested_dataset_list( + NestedDatasetLocation { + path: String::new(), + root: dataset_name.clone(), + }, + NestedDatasetListOptions::SelfAndChildren, + ) + .unwrap(); + assert_eq!( + nested_datasets, + vec![NestedDatasetConfig { + name: NestedDatasetLocation { + path: String::new(), + root: dataset_name.clone(), + }, + inner: SharedDatasetConfig::default(), + }] + ); + + // Or, if we're requesting children explicitly, it should be empty. + let nested_dataset_root = NestedDatasetLocation { + path: String::new(), + root: dataset_name.clone(), + }; + + let nested_datasets = storage + .nested_dataset_list( + nested_dataset_root.clone(), + NestedDatasetListOptions::ChildrenOnly, + ) + .unwrap(); + assert_eq!(nested_datasets, vec![]); + + // We can request a nested dataset explicitly. + let foo_config = NestedDatasetConfig { + name: NestedDatasetLocation { + path: "foo".into(), + root: dataset_name.clone(), + }, + inner: SharedDatasetConfig::default(), + }; + storage.nested_dataset_ensure(foo_config.clone()).unwrap(); + let foobar_config = NestedDatasetConfig { + name: NestedDatasetLocation { + path: "foo/bar".into(), + root: dataset_name.clone(), + }, + inner: SharedDatasetConfig::default(), + }; + storage.nested_dataset_ensure(foobar_config.clone()).unwrap(); + + // We can observe the nested datasets we just created + let nested_datasets = storage + .nested_dataset_list( + nested_dataset_root.clone(), + NestedDatasetListOptions::ChildrenOnly, + ) + .unwrap(); + assert_eq!(nested_datasets, vec![foo_config.clone(),]); + + let nested_datasets = storage + .nested_dataset_list( + foo_config.name.clone(), + NestedDatasetListOptions::ChildrenOnly, + ) + .unwrap(); + assert_eq!(nested_datasets, vec![foobar_config.clone(),]); + + // We can destroy nested datasets too + storage.nested_dataset_destroy(foobar_config.name.clone()).unwrap(); + storage.nested_dataset_destroy(foo_config.name.clone()).unwrap(); + + logctx.cleanup_successful(); + } + + #[test] + fn nested_dataset_child_parent_relationship() { + let logctx = test_setup_log("nested_dataset_child_parent_relationship"); + + let mut storage = StorageInner::new( + Uuid::new_v4(), + std::net::Ipv4Addr::LOCALHOST.into(), + logctx.log.clone(), + ); + + let zpool_id = ZpoolUuid::new_v4(); + let zpool_name = ZpoolName::new_external(zpool_id); + let dataset_id = DatasetUuid::new_v4(); + let dataset_name = DatasetName::new(zpool_name, DatasetKind::Debug); + + let config = DatasetsConfig { + generation: Generation::new(), + datasets: BTreeMap::from([( + dataset_id, + DatasetConfig { + id: dataset_id, + name: dataset_name.clone(), + inner: SharedDatasetConfig::default(), + }, + )]), + }; + + // Create the debug dataset on which we'll store everything else. + let result = storage.datasets_ensure(config).unwrap(); + assert!(!result.has_error()); + let nested_dataset_root = NestedDatasetLocation { + path: String::new(), + root: dataset_name.clone(), + }; + let nested_datasets = storage + .nested_dataset_list( + nested_dataset_root.clone(), + NestedDatasetListOptions::ChildrenOnly, + ) + .unwrap(); + assert_eq!(nested_datasets, vec![]); + + // If we try to create a nested dataset "foo/bar" before the parent + // "foo", we expect an error. + + let foo_config = NestedDatasetConfig { + name: NestedDatasetLocation { + path: "foo".into(), + root: dataset_name.clone(), + }, + inner: SharedDatasetConfig::default(), + }; + let foobar_config = NestedDatasetConfig { + name: NestedDatasetLocation { + path: "foo/bar".into(), + root: dataset_name.clone(), + }, + inner: SharedDatasetConfig::default(), + }; + + let err = storage + .nested_dataset_ensure(foobar_config.clone()) + .expect_err("Should have failed to provision foo/bar before foo"); + assert_eq!(err.status_code, 404); + + // Try again, but creating them successfully this time. + storage.nested_dataset_ensure(foo_config.clone()).unwrap(); + storage.nested_dataset_ensure(foobar_config.clone()).unwrap(); + + // We can observe the nested datasets we just created + let nested_datasets = storage + .nested_dataset_list( + nested_dataset_root.clone(), + NestedDatasetListOptions::ChildrenOnly, + ) + .unwrap(); + assert_eq!(nested_datasets, vec![foo_config.clone(),]); + let nested_datasets = storage + .nested_dataset_list( + foo_config.name.clone(), + NestedDatasetListOptions::ChildrenOnly, + ) + .unwrap(); + assert_eq!(nested_datasets, vec![foobar_config.clone(),]); + + // Destroying the nested dataset parent should destroy children. + storage.nested_dataset_destroy(foo_config.name.clone()).unwrap(); + + let nested_datasets = storage + .nested_dataset_list( + nested_dataset_root.clone(), + NestedDatasetListOptions::ChildrenOnly, + ) + .unwrap(); + assert_eq!(nested_datasets, vec![]); + + logctx.cleanup_successful(); + } } /// Represents a running Crucible Agent. Contains regions. @@ -1153,14 +1395,15 @@ impl StorageInner { )); }; - for path_component in nested_path.split('/') { + let mut path_components = nested_path.split('/').peekable(); + while let Some(path_component) = path_components.next() { if path_component.is_empty() { continue; } // Final component of path -- insert it here if it doesn't exist // already. - if !path_component.contains('/') { + if path_components.peek().is_none() { let entry = nested_dataset.children.entry(path_component.to_string()); entry @@ -1206,13 +1449,14 @@ impl StorageInner { )); }; - for path_component in name.path.split('/') { + let mut path_components = name.path.split('/').peekable(); + while let Some(path_component) = path_components.next() { if path_component.is_empty() { continue; } // Final component of path -- remove it if it exists. - if !path_component.contains('/') { + if path_components.peek().is_none() { if nested_dataset.children.remove(path_component).is_none() { return Err(HttpError::for_not_found( None,