From 93b280ce4d259ea2449cdf498f7e7b843cb46cf8 Mon Sep 17 00:00:00 2001 From: iliana etaoin Date: Thu, 19 Oct 2023 08:36:39 -0700 Subject: [PATCH] plumb customer-configured DNS into instances (#4265) Currently [OPTE hardcodes DNS servers to 8.8.8.8](https://github.com/oxidecomputer/opte/issues/390), and we would prefer to not do this. When setting up the rack, customers configure DNS servers. Currently this is added to Nexus's deployment configuration, so that Nexus can resolve external domain names for SAML. This change plumbs these DNS servers through to sled-agent and OPTE. For non-instance OPTE ports, `DhcpCfg::default()` is used, which is equivalent to "no hostname, no domain name, no DNS servers, no search domains". This is not the final state of the DHCP work we want to do; in talking with @rmustacc and @rcgoodfellow we agree that implementing this minimal DHCP option set is the correct thing to do urgently; and that we still need to design the broader inter-instance networking picture (whether we want an instance-facing recursive resolver on the rack and whether it should resolve domain names for other instances with VPC IPs, what those hostnames and search domains should be, and how customers can modify these settings). --- .github/buildomat/jobs/deploy.sh | 2 +- Cargo.lock | 13 ++++----- Cargo.toml | 4 +-- illumos-utils/src/opte/mod.rs | 1 + illumos-utils/src/opte/params.rs | 23 +++++++++++++++ illumos-utils/src/opte/port_manager.rs | 12 ++++++-- nexus/src/app/instance.rs | 6 ++++ nexus/src/app/mod.rs | 12 +++++++- openapi/sled-agent.json | 34 ++++++++++++++++++++++ sled-agent/Cargo.toml | 1 - sled-agent/src/instance.rs | 40 +++++++++++++++++++++++++- sled-agent/src/params.rs | 2 ++ sled-agent/src/services.rs | 10 +++---- tools/opte_version | 2 +- 14 files changed, 140 insertions(+), 22 deletions(-) diff --git a/.github/buildomat/jobs/deploy.sh b/.github/buildomat/jobs/deploy.sh index c2579d98ea..bdc1a9cce8 100755 --- a/.github/buildomat/jobs/deploy.sh +++ b/.github/buildomat/jobs/deploy.sh @@ -2,7 +2,7 @@ #: #: name = "helios / deploy" #: variety = "basic" -#: target = "lab-2.0-opte-0.23" +#: target = "lab-2.0-opte-0.25" #: output_rules = [ #: "%/var/svc/log/oxide-sled-agent:default.log*", #: "%/pool/ext/*/crypt/zone/oxz_*/root/var/svc/log/oxide-*.log*", diff --git a/Cargo.lock b/Cargo.lock index 835fab53cb..43ccc57a73 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3406,7 +3406,7 @@ dependencies = [ [[package]] name = "illumos-sys-hdrs" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=631c2017f19cafb1535f621e9e5aa9198ccad869#631c2017f19cafb1535f621e9e5aa9198ccad869" +source = "git+https://github.com/oxidecomputer/opte?rev=258a8b59902dd36fc7ee5425e6b1fb5fc80d4649#258a8b59902dd36fc7ee5425e6b1fb5fc80d4649" [[package]] name = "illumos-utils" @@ -3828,7 +3828,7 @@ dependencies = [ [[package]] name = "kstat-macro" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=631c2017f19cafb1535f621e9e5aa9198ccad869#631c2017f19cafb1535f621e9e5aa9198ccad869" +source = "git+https://github.com/oxidecomputer/opte?rev=258a8b59902dd36fc7ee5425e6b1fb5fc80d4649#258a8b59902dd36fc7ee5425e6b1fb5fc80d4649" dependencies = [ "quote", "syn 1.0.109", @@ -5311,7 +5311,6 @@ dependencies = [ "openapi-lint", "openapiv3", "opte-ioctl", - "oxide-vpc", "oximeter 0.1.0", "oximeter-producer 0.1.0", "percent-encoding", @@ -5600,7 +5599,7 @@ dependencies = [ [[package]] name = "opte" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=631c2017f19cafb1535f621e9e5aa9198ccad869#631c2017f19cafb1535f621e9e5aa9198ccad869" +source = "git+https://github.com/oxidecomputer/opte?rev=258a8b59902dd36fc7ee5425e6b1fb5fc80d4649#258a8b59902dd36fc7ee5425e6b1fb5fc80d4649" dependencies = [ "cfg-if 0.1.10", "dyn-clone", @@ -5617,7 +5616,7 @@ dependencies = [ [[package]] name = "opte-api" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=631c2017f19cafb1535f621e9e5aa9198ccad869#631c2017f19cafb1535f621e9e5aa9198ccad869" +source = "git+https://github.com/oxidecomputer/opte?rev=258a8b59902dd36fc7ee5425e6b1fb5fc80d4649#258a8b59902dd36fc7ee5425e6b1fb5fc80d4649" dependencies = [ "cfg-if 0.1.10", "illumos-sys-hdrs", @@ -5630,7 +5629,7 @@ dependencies = [ [[package]] name = "opte-ioctl" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=631c2017f19cafb1535f621e9e5aa9198ccad869#631c2017f19cafb1535f621e9e5aa9198ccad869" +source = "git+https://github.com/oxidecomputer/opte?rev=258a8b59902dd36fc7ee5425e6b1fb5fc80d4649#258a8b59902dd36fc7ee5425e6b1fb5fc80d4649" dependencies = [ "libc", "libnet", @@ -5710,7 +5709,7 @@ dependencies = [ [[package]] name = "oxide-vpc" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=631c2017f19cafb1535f621e9e5aa9198ccad869#631c2017f19cafb1535f621e9e5aa9198ccad869" +source = "git+https://github.com/oxidecomputer/opte?rev=258a8b59902dd36fc7ee5425e6b1fb5fc80d4649#258a8b59902dd36fc7ee5425e6b1fb5fc80d4649" dependencies = [ "cfg-if 0.1.10", "illumos-sys-hdrs", diff --git a/Cargo.toml b/Cargo.toml index 72a7f6157e..7e6a2b3902 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -249,7 +249,7 @@ omicron-sled-agent = { path = "sled-agent" } omicron-test-utils = { path = "test-utils" } omicron-zone-package = "0.8.3" oxide-client = { path = "clients/oxide-client" } -oxide-vpc = { git = "https://github.com/oxidecomputer/opte", rev = "631c2017f19cafb1535f621e9e5aa9198ccad869", features = [ "api", "std" ] } +oxide-vpc = { git = "https://github.com/oxidecomputer/opte", rev = "258a8b59902dd36fc7ee5425e6b1fb5fc80d4649", features = [ "api", "std" ] } once_cell = "1.18.0" openapi-lint = { git = "https://github.com/oxidecomputer/openapi-lint", branch = "main" } openapiv3 = "1.0" @@ -257,7 +257,7 @@ openapiv3 = "1.0" openssl = "0.10" openssl-sys = "0.9" openssl-probe = "0.1.2" -opte-ioctl = { git = "https://github.com/oxidecomputer/opte", rev = "631c2017f19cafb1535f621e9e5aa9198ccad869" } +opte-ioctl = { git = "https://github.com/oxidecomputer/opte", rev = "258a8b59902dd36fc7ee5425e6b1fb5fc80d4649" } oso = "0.26" owo-colors = "3.5.0" oximeter = { path = "oximeter/oximeter" } diff --git a/illumos-utils/src/opte/mod.rs b/illumos-utils/src/opte/mod.rs index 10e2a45d83..710e783181 100644 --- a/illumos-utils/src/opte/mod.rs +++ b/illumos-utils/src/opte/mod.rs @@ -25,6 +25,7 @@ pub use port_manager::PortTicket; use ipnetwork::IpNetwork; use macaddr::MacAddr6; pub use oxide_vpc::api::BoundaryServices; +pub use oxide_vpc::api::DhcpCfg; pub use oxide_vpc::api::Vni; use std::net::IpAddr; diff --git a/illumos-utils/src/opte/params.rs b/illumos-utils/src/opte/params.rs index 4df437546c..df1f33cb92 100644 --- a/illumos-utils/src/opte/params.rs +++ b/illumos-utils/src/opte/params.rs @@ -50,3 +50,26 @@ pub struct DeleteVirtualNetworkInterfaceHost { /// be deleted. pub vni: external::Vni, } + +/// DHCP configuration for a port +/// +/// Not present here: Hostname (DHCPv4 option 12; used in DHCPv6 option 39); we +/// use `InstanceRuntimeState::hostname` for this value. +#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)] +pub struct DhcpConfig { + /// DNS servers to send to the instance + /// + /// (DHCPv4 option 6; DHCPv6 option 23) + pub dns_servers: Vec, + + /// DNS zone this instance's hostname belongs to (e.g. the `project.example` + /// part of `instance1.project.example`) + /// + /// (DHCPv4 option 15; used in DHCPv6 option 39) + pub host_domain: Option, + + /// DNS search domains + /// + /// (DHCPv4 option 119; DHCPv6 option 24) + pub search_domains: Vec, +} diff --git a/illumos-utils/src/opte/port_manager.rs b/illumos-utils/src/opte/port_manager.rs index 893db9a6ed..f0a8d8d839 100644 --- a/illumos-utils/src/opte/port_manager.rs +++ b/illumos-utils/src/opte/port_manager.rs @@ -19,6 +19,7 @@ use omicron_common::api::internal::shared::NetworkInterface; use omicron_common::api::internal::shared::NetworkInterfaceKind; use omicron_common::api::internal::shared::SourceNatConfig; use oxide_vpc::api::AddRouterEntryReq; +use oxide_vpc::api::DhcpCfg; use oxide_vpc::api::IpCfg; use oxide_vpc::api::IpCidr; use oxide_vpc::api::Ipv4Cfg; @@ -100,6 +101,7 @@ impl PortManager { source_nat: Option, external_ips: &[IpAddr], firewall_rules: &[VpcFirewallRule], + dhcp_config: DhcpCfg, ) -> Result<(Port, PortTicket), Error> { let mac = *nic.mac; let vni = Vni::new(nic.vni).unwrap(); @@ -205,8 +207,6 @@ impl PortManager { vni, phys_ip: self.inner.underlay_ip.into(), boundary_services, - // TODO-completeness (#2153): Plumb domain search list - domain_list: vec![], }; // Create the xde device. @@ -227,11 +227,17 @@ impl PortManager { "Creating xde device"; "port_name" => &port_name, "vpc_cfg" => ?&vpc_cfg, + "dhcp_config" => ?&dhcp_config, ); #[cfg(target_os = "illumos")] let hdl = { let hdl = opte_ioctl::OpteHdl::open(opte_ioctl::OpteHdl::XDE_CTL)?; - hdl.create_xde(&port_name, vpc_cfg, /* passthru = */ false)?; + hdl.create_xde( + &port_name, + vpc_cfg, + dhcp_config, + /* passthru = */ false, + )?; hdl }; diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index 592e1f0492..1adcd8f9c0 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -987,6 +987,12 @@ impl super::Nexus { source_nat, external_ips, firewall_rules, + dhcp_config: sled_agent_client::types::DhcpConfig { + dns_servers: self.external_dns_servers.clone(), + // TODO: finish designing instance DNS + host_domain: None, + search_domains: Vec::new(), + }, disks: disk_reqs, cloud_init_bytes: Some(base64::Engine::encode( &base64::engine::general_purpose::STANDARD, diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 45f69848e3..23ded83150 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -26,7 +26,7 @@ use omicron_common::api::internal::shared::SwitchLocation; use omicron_common::nexus_config::RegionAllocationStrategy; use slog::Logger; use std::collections::HashMap; -use std::net::Ipv6Addr; +use std::net::{IpAddr, Ipv6Addr}; use std::sync::Arc; use uuid::Uuid; @@ -153,6 +153,12 @@ pub struct Nexus { /// DNS resolver Nexus uses to resolve an external host external_resolver: Arc, + /// DNS servers used in `external_resolver`, used to provide DNS servers to + /// instances via DHCP + // TODO: This needs to be moved to the database. + // https://github.com/oxidecomputer/omicron/issues/3732 + external_dns_servers: Vec, + /// Mapping of SwitchLocations to their respective Dendrite Clients dpd_clients: HashMap>, @@ -332,6 +338,10 @@ impl Nexus { samael_max_issue_delay: std::sync::Mutex::new(None), internal_resolver: resolver, external_resolver, + external_dns_servers: config + .deployment + .external_dns_servers + .clone(), dpd_clients, background_tasks, default_region_allocation_strategy: config diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index 56437ab283..7831193fc2 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -1194,6 +1194,36 @@ "vni" ] }, + "DhcpConfig": { + "description": "DHCP configuration for a port\n\nNot present here: Hostname (DHCPv4 option 12; used in DHCPv6 option 39); we use `InstanceRuntimeState::hostname` for this value.", + "type": "object", + "properties": { + "dns_servers": { + "description": "DNS servers to send to the instance\n\n(DHCPv4 option 6; DHCPv6 option 23)", + "type": "array", + "items": { + "type": "string", + "format": "ip" + } + }, + "host_domain": { + "nullable": true, + "description": "DNS zone this instance's hostname belongs to (e.g. the `project.example` part of `instance1.project.example`)\n\n(DHCPv4 option 15; used in DHCPv6 option 39)", + "type": "string" + }, + "search_domains": { + "description": "DNS search domains\n\n(DHCPv4 option 119; DHCPv6 option 24)", + "type": "array", + "items": { + "type": "string" + } + } + }, + "required": [ + "dns_servers", + "search_domains" + ] + }, "DiskEnsureBody": { "description": "Sent from to a sled agent to establish the runtime state of a Disk", "type": "object", @@ -1697,6 +1727,9 @@ "nullable": true, "type": "string" }, + "dhcp_config": { + "$ref": "#/components/schemas/DhcpConfig" + }, "disks": { "type": "array", "items": { @@ -1731,6 +1764,7 @@ } }, "required": [ + "dhcp_config", "disks", "external_ips", "firewall_rules", diff --git a/sled-agent/Cargo.toml b/sled-agent/Cargo.toml index 82d7411d1a..636c9665ef 100644 --- a/sled-agent/Cargo.toml +++ b/sled-agent/Cargo.toml @@ -44,7 +44,6 @@ macaddr.workspace = true nexus-client.workspace = true omicron-common.workspace = true once_cell.workspace = true -oxide-vpc.workspace = true oximeter.workspace = true oximeter-producer.workspace = true percent-encoding.workspace = true diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index ce1ef662dc..94614c2363 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -26,7 +26,7 @@ use chrono::Utc; use futures::lock::{Mutex, MutexGuard}; use illumos_utils::dladm::Etherstub; use illumos_utils::link::VnicAllocator; -use illumos_utils::opte::PortManager; +use illumos_utils::opte::{DhcpCfg, PortManager}; use illumos_utils::running_zone::{InstalledZone, RunningZone}; use illumos_utils::svc::wait_for_service; use illumos_utils::zone::Zones; @@ -91,6 +91,10 @@ pub enum Error { #[error(transparent)] Opte(#[from] illumos_utils::opte::Error), + /// Issued by `impl TryFrom<&[u8]> for oxide_vpc::api::DomainName` + #[error("Invalid hostname: {0}")] + InvalidHostname(&'static str), + #[error("Error resolving DNS name: {0}")] ResolveError(#[from] internal_dns::resolver::ResolveError), @@ -207,6 +211,7 @@ struct InstanceInner { source_nat: SourceNatConfig, external_ips: Vec, firewall_rules: Vec, + dhcp_config: DhcpCfg, // Disk related properties // TODO: replace `propolis_client::handmade::*` with properly-modeled local types @@ -610,6 +615,37 @@ impl Instance { zone_bundler, } = services; + let mut dhcp_config = DhcpCfg { + hostname: Some( + hardware + .properties + .hostname + .parse() + .map_err(Error::InvalidHostname)?, + ), + host_domain: hardware + .dhcp_config + .host_domain + .map(|domain| domain.parse()) + .transpose() + .map_err(Error::InvalidHostname)?, + domain_search_list: hardware + .dhcp_config + .search_domains + .into_iter() + .map(|domain| domain.parse()) + .collect::>() + .map_err(Error::InvalidHostname)?, + dns4_servers: Vec::new(), + dns6_servers: Vec::new(), + }; + for ip in hardware.dhcp_config.dns_servers { + match ip { + IpAddr::V4(ip) => dhcp_config.dns4_servers.push(ip.into()), + IpAddr::V6(ip) => dhcp_config.dns6_servers.push(ip.into()), + } + } + let instance = InstanceInner { log: log.new(o!("instance_id" => id.to_string())), // NOTE: Mostly lies. @@ -633,6 +669,7 @@ impl Instance { source_nat: hardware.source_nat, external_ips: hardware.external_ips, firewall_rules: hardware.firewall_rules, + dhcp_config, requested_disks: hardware.disks, cloud_init_bytes: hardware.cloud_init_bytes, state: InstanceStates::new( @@ -852,6 +889,7 @@ impl Instance { snat, external_ips, &inner.firewall_rules, + inner.dhcp_config.clone(), )?; opte_ports.push(port); } diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index 84ec1ef0dc..e1c8b05cde 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -6,6 +6,7 @@ use crate::zone_bundle::PriorityOrder; pub use crate::zone_bundle::ZoneBundleCause; pub use crate::zone_bundle::ZoneBundleId; pub use crate::zone_bundle::ZoneBundleMetadata; +pub use illumos_utils::opte::params::DhcpConfig; pub use illumos_utils::opte::params::VpcFirewallRule; pub use illumos_utils::opte::params::VpcFirewallRulesEnsureBody; use omicron_common::api::internal::nexus::{ @@ -68,6 +69,7 @@ pub struct InstanceHardware { /// provided to an instance to allow inbound connectivity. pub external_ips: Vec, pub firewall_rules: Vec, + pub dhcp_config: DhcpConfig, // TODO: replace `propolis_client::handmade::*` with locally-modeled request type pub disks: Vec, pub cloud_init_bytes: Option, diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index f91b5091e6..06d3ae1977 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -52,7 +52,7 @@ use illumos_utils::dladm::{ Dladm, Etherstub, EtherstubVnic, GetSimnetError, PhysicalLink, }; use illumos_utils::link::{Link, VnicAllocator}; -use illumos_utils::opte::{Port, PortManager, PortTicket}; +use illumos_utils::opte::{DhcpCfg, Port, PortManager, PortTicket}; use illumos_utils::running_zone::{ InstalledZone, RunCommandError, RunningZone, }; @@ -863,11 +863,11 @@ impl ServiceManager { // config allows outbound access which is enough for // Boundary NTP which needs to come up before Nexus. let port = port_manager - .create_port(nic, snat, external_ips, &[]) + .create_port(nic, snat, external_ips, &[], DhcpCfg::default()) .map_err(|err| Error::ServicePortCreation { - service: svc.details.to_string(), - err: Box::new(err), - })?; + service: svc.details.to_string(), + err: Box::new(err), + })?; // We also need to update the switch with the NAT mappings let (target_ip, first_port, last_port) = match snat { diff --git a/tools/opte_version b/tools/opte_version index 2dbaeb7154..0a79a6aba9 100644 --- a/tools/opte_version +++ b/tools/opte_version @@ -1 +1 @@ -0.23.181 +0.25.183