From 4130d307b22dc1e24b2c163845f61dc7ec30a3ac Mon Sep 17 00:00:00 2001 From: Greg Colombo Date: Tue, 15 Apr 2025 19:57:57 +0000 Subject: [PATCH 01/17] make sleds report their CPU families to Nexus RFD 505 proposes that instances should be able to set a "minimum hardware platform" or "minimum CPU platform" that allows users to constrain an instance to run on sleds that have a specific set of CPU features available. This allows a user to opt a VM into advanced hardware features (e.g. AVX-512 support) by constraining it to run only on sleds that support those features. For this to work, Nexus needs to understand what CPUs are present in which sleds. Have sled-agent query CPUID to get CPU vendor and family information and report this to Nexus as part of the sled hardware manifest. --- Cargo.lock | 1 + nexus/db-model/src/lib.rs | 2 + nexus/db-model/src/schema_versions.rs | 3 +- nexus/db-model/src/sled.rs | 12 ++ nexus/db-model/src/sled_cpu_family.rs | 57 ++++++++ .../src/db/datastore/crucible_dataset.rs | 2 + .../src/db/datastore/support_bundle.rs | 2 + .../src/db/pub_test_utils/helpers.rs | 9 ++ nexus/db-schema/src/enums.rs | 1 + nexus/db-schema/src/schema.rs | 1 + nexus/inventory/Cargo.toml | 1 + nexus/inventory/src/collector.rs | 2 + .../rendezvous/src/crucible_dataset.rs | 2 + .../background/tasks/blueprint_execution.rs | 4 +- .../background/tasks/inventory_collection.rs | 2 + nexus/src/app/sled.rs | 1 + nexus/test-utils/src/lib.rs | 12 ++ nexus/tests/integration_tests/rack.rs | 4 + nexus/tests/integration_tests/sleds.rs | 63 ++++++-- nexus/types/src/external_api/views.rs | 22 +++ nexus/types/src/internal_api/params.rs | 21 +++ openapi/nexus-internal.json | 35 +++++ openapi/nexus.json | 35 +++++ schema/crdb/dbinit.sql | 22 ++- schema/crdb/sled-cpu-family/up01.sql | 5 + schema/crdb/sled-cpu-family/up02.sql | 2 + schema/crdb/sled-cpu-family/up03.sql | 1 + sled-agent/src/bin/sled-agent-sim.rs | 3 + sled-agent/src/nexus.rs | 20 +++ sled-agent/src/sim/config.rs | 9 ++ sled-agent/src/sim/server.rs | 1 + sled-hardware/src/illumos/mod.rs | 7 +- sled-hardware/src/lib.rs | 135 ++++++++++++++++++ sled-hardware/src/non_illumos/mod.rs | 6 +- sled-hardware/types/src/lib.rs | 7 + 35 files changed, 495 insertions(+), 17 deletions(-) create mode 100644 nexus/db-model/src/sled_cpu_family.rs create mode 100644 schema/crdb/sled-cpu-family/up01.sql create mode 100644 schema/crdb/sled-cpu-family/up02.sql create mode 100644 schema/crdb/sled-cpu-family/up03.sql diff --git a/Cargo.lock b/Cargo.lock index c0351eb6571..ff5e1ac1815 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6555,6 +6555,7 @@ dependencies = [ "id-map", "iddqd", "itertools 0.14.0", + "nexus-client", "nexus-sled-agent-shared", "nexus-types", "ntp-admin-client", diff --git a/nexus/db-model/src/lib.rs b/nexus/db-model/src/lib.rs index d4c2df179d1..ba26c0c2309 100644 --- a/nexus/db-model/src/lib.rs +++ b/nexus/db-model/src/lib.rs @@ -103,6 +103,7 @@ mod silo_group; mod silo_user; mod silo_user_password_hash; mod sled; +mod sled_cpu_family; mod sled_instance; mod sled_policy; mod sled_resource_vmm; @@ -223,6 +224,7 @@ pub use silo_group::*; pub use silo_user::*; pub use silo_user_password_hash::*; pub use sled::*; +pub use sled_cpu_family::*; pub use sled_instance::*; pub use sled_policy::to_db_sled_policy; // Do not expose DbSledPolicy pub use sled_resource_vmm::*; diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index 1507aeab46f..7a42e172291 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -16,7 +16,7 @@ use std::{collections::BTreeMap, sync::LazyLock}; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: Version = Version::new(173, 0, 0); +pub const SCHEMA_VERSION: Version = Version::new(174, 0, 0); /// List of all past database schema versions, in *reverse* order /// @@ -28,6 +28,7 @@ static KNOWN_VERSIONS: LazyLock> = LazyLock::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), + KnownVersion::new(174, "sled-cpu-family"), KnownVersion::new(173, "inv-internal-dns"), KnownVersion::new(172, "add-zones-with-mupdate-override"), KnownVersion::new(171, "inv-clear-mupdate-override"), diff --git a/nexus/db-model/src/sled.rs b/nexus/db-model/src/sled.rs index 6ed06e20021..f4c8e62f9ae 100644 --- a/nexus/db-model/src/sled.rs +++ b/nexus/db-model/src/sled.rs @@ -6,6 +6,7 @@ use super::{ByteCount, Generation, SledState, SqlU16, SqlU32}; use crate::collection::DatastoreCollectionConfig; use crate::ipv6; use crate::sled::shared::Baseboard; +use crate::sled_cpu_family::SledCpuFamily; use crate::sled_policy::DbSledPolicy; use chrono::{DateTime, Utc}; use db_macros::Asset; @@ -40,6 +41,8 @@ pub struct SledSystemHardware { // current VMM reservoir size pub reservoir_size: ByteCount, + + pub cpu_family: SledCpuFamily, } /// Database representation of a Sled. @@ -84,6 +87,9 @@ pub struct Sled { // ServiceAddress (Repo Depot API). Uses `ip`. pub repo_depot_port: SqlU16, + + /// The family of this sled's CPU. + pub cpu_family: SledCpuFamily, } impl Sled { @@ -141,6 +147,7 @@ impl From for views::Sled { state: sled.state.into(), usable_hardware_threads: sled.usable_hardware_threads.0, usable_physical_ram: *sled.usable_physical_ram, + cpu_family: sled.cpu_family.into(), } } } @@ -185,6 +192,7 @@ impl From for params::SledAgentInfo { usable_physical_ram: sled.usable_physical_ram.into(), reservoir_size: sled.reservoir_size.into(), generation: sled.sled_agent_gen.into(), + cpu_family: sled.cpu_family.into(), decommissioned, } } @@ -229,6 +237,8 @@ pub struct SledUpdate { // ServiceAddress (Repo Depot API). Uses `ip`. pub repo_depot_port: SqlU16, + pub cpu_family: SledCpuFamily, + // Generation number - owned and incremented by sled-agent. pub sled_agent_gen: Generation, } @@ -258,6 +268,7 @@ impl SledUpdate { ip: addr.ip().into(), port: addr.port().into(), repo_depot_port: repo_depot_port.into(), + cpu_family: hardware.cpu_family, sled_agent_gen, } } @@ -296,6 +307,7 @@ impl SledUpdate { repo_depot_port: self.repo_depot_port, last_used_address, sled_agent_gen: self.sled_agent_gen, + cpu_family: self.cpu_family, } } diff --git a/nexus/db-model/src/sled_cpu_family.rs b/nexus/db-model/src/sled_cpu_family.rs new file mode 100644 index 00000000000..8247e1a2506 --- /dev/null +++ b/nexus/db-model/src/sled_cpu_family.rs @@ -0,0 +1,57 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use super::impl_enum_type; +use serde::{Deserialize, Serialize}; + +impl_enum_type!( + SledCpuFamilyEnum: + + #[derive( + Copy, + Clone, + Debug, + PartialEq, + AsExpression, + FromSqlRow, + Serialize, + Deserialize + )] + pub enum SledCpuFamily; + + Unknown => b"unknown" + AmdMilan => b"amd_milan" + AmdTurin => b"amd_turin" +); + +impl From for SledCpuFamily { + fn from(value: nexus_types::internal_api::params::SledCpuFamily) -> Self { + use nexus_types::internal_api::params::SledCpuFamily as InputFamily; + match value { + InputFamily::Unknown => Self::Unknown, + InputFamily::AmdMilan => Self::AmdMilan, + InputFamily::AmdTurin => Self::AmdTurin, + } + } +} + +impl From for nexus_types::internal_api::params::SledCpuFamily { + fn from(value: SledCpuFamily) -> Self { + match value { + SledCpuFamily::Unknown => Self::Unknown, + SledCpuFamily::AmdMilan => Self::AmdMilan, + SledCpuFamily::AmdTurin => Self::AmdTurin, + } + } +} + +impl From for nexus_types::external_api::views::SledCpuFamily { + fn from(value: SledCpuFamily) -> Self { + match value { + SledCpuFamily::Unknown => Self::Unknown, + SledCpuFamily::AmdMilan => Self::AmdMilan, + SledCpuFamily::AmdTurin => Self::AmdTurin, + } + } +} diff --git a/nexus/db-queries/src/db/datastore/crucible_dataset.rs b/nexus/db-queries/src/db/datastore/crucible_dataset.rs index 83b6cd6cb6a..fd9eee898bf 100644 --- a/nexus/db-queries/src/db/datastore/crucible_dataset.rs +++ b/nexus/db-queries/src/db/datastore/crucible_dataset.rs @@ -294,6 +294,7 @@ mod test { use crate::db::pub_test_utils::TestDatabase; use nexus_db_model::Generation; use nexus_db_model::SledBaseboard; + use nexus_db_model::SledCpuFamily; use nexus_db_model::SledSystemHardware; use nexus_db_model::SledUpdate; use omicron_common::api::external::ByteCount; @@ -323,6 +324,7 @@ mod test { usable_hardware_threads: 128, usable_physical_ram: (64 << 30).try_into().unwrap(), reservoir_size: (16 << 30).try_into().unwrap(), + cpu_family: SledCpuFamily::AmdMilan, }, Uuid::new_v4(), Generation::new(), diff --git a/nexus/db-queries/src/db/datastore/support_bundle.rs b/nexus/db-queries/src/db/datastore/support_bundle.rs index b6aaf5b4661..05195def6df 100644 --- a/nexus/db-queries/src/db/datastore/support_bundle.rs +++ b/nexus/db-queries/src/db/datastore/support_bundle.rs @@ -515,6 +515,7 @@ mod test { use crate::db::pub_test_utils::TestDatabase; use nexus_db_model::Generation; use nexus_db_model::SledBaseboard; + use nexus_db_model::SledCpuFamily; use nexus_db_model::SledSystemHardware; use nexus_db_model::SledUpdate; use nexus_db_model::Zpool; @@ -617,6 +618,7 @@ mod test { usable_hardware_threads: 128, usable_physical_ram: (64 << 30).try_into().unwrap(), reservoir_size: (16 << 30).try_into().unwrap(), + cpu_family: SledCpuFamily::AmdMilan, }, rack_id, Generation::new(), diff --git a/nexus/db-queries/src/db/pub_test_utils/helpers.rs b/nexus/db-queries/src/db/pub_test_utils/helpers.rs index c81f6440d0a..9369324e72a 100644 --- a/nexus/db-queries/src/db/pub_test_utils/helpers.rs +++ b/nexus/db-queries/src/db/pub_test_utils/helpers.rs @@ -25,6 +25,7 @@ use nexus_db_model::ProjectImage; use nexus_db_model::ProjectImageIdentity; use nexus_db_model::Resources; use nexus_db_model::SledBaseboard; +use nexus_db_model::SledCpuFamily; use nexus_db_model::SledSystemHardware; use nexus_db_model::SledUpdate; use nexus_db_model::Snapshot; @@ -77,6 +78,7 @@ pub struct SledSystemHardwareBuilder { usable_hardware_threads: u32, usable_physical_ram: i64, reservoir_size: i64, + cpu_family: SledCpuFamily, } impl Default for SledSystemHardwareBuilder { @@ -86,6 +88,7 @@ impl Default for SledSystemHardwareBuilder { usable_hardware_threads: 4, usable_physical_ram: 1 << 40, reservoir_size: 1 << 39, + cpu_family: SledCpuFamily::AmdMilan, } } } @@ -121,12 +124,18 @@ impl SledSystemHardwareBuilder { self } + pub fn cpu_family(&mut self, family: SledCpuFamily) -> &mut Self { + self.cpu_family = family; + self + } + pub fn build(&self) -> SledSystemHardware { SledSystemHardware { is_scrimlet: self.is_scrimlet, usable_hardware_threads: self.usable_hardware_threads, usable_physical_ram: self.usable_physical_ram.try_into().unwrap(), reservoir_size: self.reservoir_size.try_into().unwrap(), + cpu_family: self.cpu_family, } } } diff --git a/nexus/db-schema/src/enums.rs b/nexus/db-schema/src/enums.rs index ffee098d6c7..372d42dc40a 100644 --- a/nexus/db-schema/src/enums.rs +++ b/nexus/db-schema/src/enums.rs @@ -72,6 +72,7 @@ define_enums! { RouterRouteKindEnum => "router_route_kind", SagaStateEnum => "saga_state", ServiceKindEnum => "service_kind", + SledCpuFamilyEnum => "sled_cpu_family", SledPolicyEnum => "sled_policy", SledRoleEnum => "sled_role", SledStateEnum => "sled_state", diff --git a/nexus/db-schema/src/schema.rs b/nexus/db-schema/src/schema.rs index 8ef398e44d1..3f1fa67ca70 100644 --- a/nexus/db-schema/src/schema.rs +++ b/nexus/db-schema/src/schema.rs @@ -971,6 +971,7 @@ table! { sled_state -> crate::enums::SledStateEnum, sled_agent_gen -> Int8, repo_depot_port -> Int4, + cpu_family -> crate::enums::SledCpuFamilyEnum, } } diff --git a/nexus/inventory/Cargo.toml b/nexus/inventory/Cargo.toml index f6b90fb6f30..9d42aab4d0a 100644 --- a/nexus/inventory/Cargo.toml +++ b/nexus/inventory/Cargo.toml @@ -48,6 +48,7 @@ omicron-workspace-hack.workspace = true expectorate.workspace = true gateway-test-utils.workspace = true httpmock.workspace = true +nexus-client.workspace = true omicron-sled-agent.workspace = true regex.workspace = true tokio.workspace = true diff --git a/nexus/inventory/src/collector.rs b/nexus/inventory/src/collector.rs index 211f6d8ad83..fddde6f5c07 100644 --- a/nexus/inventory/src/collector.rs +++ b/nexus/inventory/src/collector.rs @@ -666,6 +666,7 @@ mod test { use crate::StaticSledAgentEnumerator; use gateway_messages::SpPort; use id_map::IdMap; + use nexus_client::types::SledCpuFamily; use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryStatus; use nexus_sled_agent_shared::inventory::HostPhase2DesiredSlots; use nexus_sled_agent_shared::inventory::OmicronSledConfig; @@ -913,6 +914,7 @@ mod test { None, None, sim::ZpoolConfig::None, + SledCpuFamily::AmdMilan, ); let agent = diff --git a/nexus/reconfigurator/rendezvous/src/crucible_dataset.rs b/nexus/reconfigurator/rendezvous/src/crucible_dataset.rs index 0d4fd8a8382..22d68157bf2 100644 --- a/nexus/reconfigurator/rendezvous/src/crucible_dataset.rs +++ b/nexus/reconfigurator/rendezvous/src/crucible_dataset.rs @@ -130,6 +130,7 @@ mod tests { use async_bb8_diesel::AsyncSimpleConnection; use nexus_db_model::Generation; use nexus_db_model::SledBaseboard; + use nexus_db_model::SledCpuFamily; use nexus_db_model::SledSystemHardware; use nexus_db_model::SledUpdate; use nexus_db_model::Zpool; @@ -201,6 +202,7 @@ mod tests { usable_hardware_threads: 128, usable_physical_ram: (64 << 30).try_into().unwrap(), reservoir_size: (16 << 30).try_into().unwrap(), + cpu_family: SledCpuFamily::Unknown, }, Uuid::new_v4(), Generation::new(), diff --git a/nexus/src/app/background/tasks/blueprint_execution.rs b/nexus/src/app/background/tasks/blueprint_execution.rs index 88fea70e7a1..3a2c6ff404d 100644 --- a/nexus/src/app/background/tasks/blueprint_execution.rs +++ b/nexus/src/app/background/tasks/blueprint_execution.rs @@ -180,7 +180,8 @@ mod test { use id_map::IdMap; use itertools::Itertools as _; use nexus_db_model::{ - ByteCount, SledBaseboard, SledSystemHardware, SledUpdate, Zpool, + ByteCount, SledBaseboard, SledCpuFamily, SledSystemHardware, + SledUpdate, Zpool, }; use nexus_db_queries::authn; use nexus_db_queries::context::OpContext; @@ -359,6 +360,7 @@ mod test { usable_hardware_threads: 4, usable_physical_ram: ByteCount(1000.into()), reservoir_size: ByteCount(999.into()), + cpu_family: SledCpuFamily::AmdMilan, }, rack_id, nexus_db_model::Generation::new(), diff --git a/nexus/src/app/background/tasks/inventory_collection.rs b/nexus/src/app/background/tasks/inventory_collection.rs index 87c13422bcc..a55d60124d2 100644 --- a/nexus/src/app/background/tasks/inventory_collection.rs +++ b/nexus/src/app/background/tasks/inventory_collection.rs @@ -267,6 +267,7 @@ mod test { use crate::app::background::BackgroundTask; use nexus_db_model::Generation; use nexus_db_model::SledBaseboard; + use nexus_db_model::SledCpuFamily; use nexus_db_model::SledSystemHardware; use nexus_db_model::SledUpdate; use nexus_db_queries::context::OpContext; @@ -443,6 +444,7 @@ mod test { usable_physical_ram: ByteCount::from_gibibytes_u32(16) .into(), reservoir_size: ByteCount::from_gibibytes_u32(8).into(), + cpu_family: SledCpuFamily::AmdMilan, }, rack_id, Generation::new(), diff --git a/nexus/src/app/sled.rs b/nexus/src/app/sled.rs index 715a1504081..799cb4136f7 100644 --- a/nexus/src/app/sled.rs +++ b/nexus/src/app/sled.rs @@ -77,6 +77,7 @@ impl super::Nexus { usable_hardware_threads: info.usable_hardware_threads, usable_physical_ram: info.usable_physical_ram.into(), reservoir_size: info.reservoir_size.into(), + cpu_family: info.cpu_family.into(), }, self.rack_id, info.generation.into(), diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index 9a76249fb12..ed42d80ef7c 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -27,6 +27,7 @@ use id_map::IdMap; use internal_dns_types::config::DnsConfigBuilder; use internal_dns_types::names::DNS_ZONE_EXTERNAL_TESTING; use internal_dns_types::names::ServiceName; +use nexus_client::types::SledCpuFamily; use nexus_config::Database; use nexus_config::DpdConfig; use nexus_config::InternalDns; @@ -1902,7 +1903,18 @@ pub async fn start_sled_agent( Some(nexus_address), Some(update_directory), sim::ZpoolConfig::None, + SledCpuFamily::AmdMilan, ); + start_sled_agent_with_config(log, &config, sled_index, simulated_upstairs) + .await +} + +pub async fn start_sled_agent_with_config( + log: Logger, + config: &sim::Config, + sled_index: u16, + simulated_upstairs: &Arc, +) -> Result { let server = sim::Server::start(&config, &log, true, simulated_upstairs, sled_index) .await diff --git a/nexus/tests/integration_tests/rack.rs b/nexus/tests/integration_tests/rack.rs index 9eebe3d2130..b5d63858908 100644 --- a/nexus/tests/integration_tests/rack.rs +++ b/nexus/tests/integration_tests/rack.rs @@ -7,6 +7,7 @@ use http::Method; use http::StatusCode; use nexus_client::types::SledId; use nexus_db_model::SledBaseboard; +use nexus_db_model::SledCpuFamily as DbSledCpuFamily; use nexus_db_model::SledSystemHardware; use nexus_db_model::SledUpdate; use nexus_sled_agent_shared::inventory::SledRole; @@ -20,6 +21,7 @@ use nexus_types::external_api::params; use nexus_types::external_api::shared::UninitializedSled; use nexus_types::external_api::views::Rack; use nexus_types::internal_api::params::SledAgentInfo; +use nexus_types::internal_api::params::SledCpuFamily; use omicron_common::api::external::ByteCount; use omicron_common::api::external::Generation; use omicron_uuid_kinds::GenericUuid; @@ -135,6 +137,7 @@ async fn test_sled_list_uninitialized(cptestctx: &ControlPlaneTestContext) { usable_hardware_threads: 32, usable_physical_ram: ByteCount::from_gibibytes_u32(100), reservoir_size: ByteCount::from_mebibytes_u32(100), + cpu_family: SledCpuFamily::Unknown, generation: Generation::new(), decommissioned: false, }; @@ -240,6 +243,7 @@ async fn test_sled_add(cptestctx: &ControlPlaneTestContext) { usable_hardware_threads: 8, usable_physical_ram: (1 << 30).try_into().unwrap(), reservoir_size: (1 << 20).try_into().unwrap(), + cpu_family: DbSledCpuFamily::Unknown, }, nexus.rack_id(), Generation::new().into(), diff --git a/nexus/tests/integration_tests/sleds.rs b/nexus/tests/integration_tests/sleds.rs index 8735bd568e4..d4d5ee825fd 100644 --- a/nexus/tests/integration_tests/sleds.rs +++ b/nexus/tests/integration_tests/sleds.rs @@ -15,10 +15,10 @@ use nexus_test_utils::resource_helpers::create_default_ip_pool; use nexus_test_utils::resource_helpers::create_instance; use nexus_test_utils::resource_helpers::create_project; use nexus_test_utils::resource_helpers::objects_list_page_authz; -use nexus_test_utils::start_sled_agent; +use nexus_test_utils::{start_sled_agent, start_sled_agent_with_config}; use nexus_test_utils_macros::nexus_test; use nexus_types::external_api::views::SledInstance; -use nexus_types::external_api::views::{PhysicalDisk, Sled}; +use nexus_types::external_api::views::{PhysicalDisk, Sled, SledCpuFamily}; use omicron_sled_agent::sim; use omicron_test_utils::dev::poll::{CondCheckError, wait_for_condition}; use omicron_uuid_kinds::GenericUuid; @@ -60,34 +60,60 @@ async fn test_sleds_list(cptestctx: &ControlPlaneTestContext) { assert_eq!(sleds_list(&client, &sleds_url).await.len(), 2); // Now start a few more sled agents. - let nsleds = 3; - let mut sas = Vec::with_capacity(nsleds); - for i in 0..nsleds { + let mut sas = Vec::new(); + let nexus_address = + cptestctx.server.get_http_server_internal_address().await; + let update_directory = Utf8Path::new("/should/not/be/used"); + let simulated_upstairs = &cptestctx.first_sled_agent().simulated_upstairs; + + for _ in 0..4 { let sa_id = SledUuid::new_v4(); let log = cptestctx.logctx.log.new(o!( "sled_id" => sa_id.to_string() )); - let addr = cptestctx.server.get_http_server_internal_address().await; - let update_directory = Utf8Path::new("/should/not/be/used"); sas.push( start_sled_agent( log, - addr, + nexus_address, sa_id, // Index starts at 2: the `nexus_test` macro already created two // sled agents as part of the ControlPlaneTestContext setup. - 2 + i as u16, + 2 + sas.len() as u16 + 1, &update_directory, sim::SimMode::Explicit, - &cptestctx.first_sled_agent().simulated_upstairs, + &simulated_upstairs, ) .await .unwrap(), ); } + let turin_sled_id = SledUuid::new_v4(); + let turin_sled_agent_log = + cptestctx.logctx.log.new(o!( "sled_id" => turin_sled_id.to_string() )); + + let turin_config = omicron_sled_agent::sim::Config::for_testing( + turin_sled_id, + omicron_sled_agent::sim::SimMode::Explicit, + Some(nexus_address), + Some(&update_directory), + omicron_sled_agent::sim::ZpoolConfig::None, + nexus_client::types::SledCpuFamily::AmdTurin, + ); + + sas.push( + start_sled_agent_with_config( + turin_sled_agent_log, + &turin_config, + 2 + sas.len() as u16 + 1, + &simulated_upstairs, + ) + .await + .unwrap(), + ); + // List sleds again. let sleds_found = sleds_list(&client, &sleds_url).await; - assert_eq!(sleds_found.len(), nsleds + 2); + assert_eq!(sleds_found.len(), sas.len() + 2); let sledids_found = sleds_found.iter().map(|sv| sv.identity.id).collect::>(); @@ -95,6 +121,21 @@ async fn test_sleds_list(cptestctx: &ControlPlaneTestContext) { sledids_found_sorted.sort(); assert_eq!(sledids_found, sledids_found_sorted); + let milans_found = sleds_found + .iter() + .filter(|sv| sv.cpu_family == SledCpuFamily::AmdMilan) + .count(); + // Simulated sled-agents report Milan processors by default. The two fake + // sled-agents created by `#[nexus_test]` as well as the four manually + // created above should be counted here. + assert_eq!(milans_found, 2 + 4); + + let turins_found = sleds_found + .iter() + .filter(|sv| sv.cpu_family == SledCpuFamily::AmdTurin) + .count(); + assert_eq!(turins_found, 1); + // Tear down the agents. for sa in sas { sa.http_server.close().await.unwrap(); diff --git a/nexus/types/src/external_api/views.rs b/nexus/types/src/external_api/views.rs index 9bd8ab5cc12..1fdd609b366 100644 --- a/nexus/types/src/external_api/views.rs +++ b/nexus/types/src/external_api/views.rs @@ -596,6 +596,8 @@ pub struct Sled { pub usable_hardware_threads: u32, /// Amount of RAM which may be used by the Sled's OS pub usable_physical_ram: ByteCount, + /// The family of the sled's CPU(s). + pub cpu_family: SledCpuFamily, } /// The operator-defined provision policy of a sled. @@ -765,6 +767,26 @@ impl fmt::Display for SledState { } } +/// Identifies the kind of CPU present on a sled, determined by reading CPUID. +/// This is the CPU family used in deciding if this sled can support an instance +/// with a particular required CPU platform. +// In lab and development environments in particular, the family reported here +// may differ from the real processor family. `sled-hardware::detect_cpu_family` +// tries to map various CPUs that we would not ship in a rack to their +// greatest-common-denominator family names here. +#[derive(Clone, Serialize, Deserialize, Debug, JsonSchema, PartialEq)] +#[serde(rename_all = "snake_case")] +pub enum SledCpuFamily { + /// The CPU vendor or its model/family numbers were not recognized. + Unknown, + + /// The sled has an AMD Milan (Zen 3) processor. + AmdMilan, + + /// The sled has an AMD Turin (Zen 5) processor. + AmdTurin, +} + /// An operator's view of an instance running on a given sled #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] pub struct SledInstance { diff --git a/nexus/types/src/internal_api/params.rs b/nexus/types/src/internal_api/params.rs index a1a707d12a9..81663f787c0 100644 --- a/nexus/types/src/internal_api/params.rs +++ b/nexus/types/src/internal_api/params.rs @@ -30,6 +30,24 @@ use std::net::SocketAddr; use std::net::SocketAddrV6; use uuid::Uuid; +/// Identifies the kind of CPU present on a sled, determined by reading CPUID. +#[derive(Serialize, Deserialize, Debug, JsonSchema)] +#[serde(rename_all = "snake_case")] +pub enum SledCpuFamily { + /// The CPU vendor or its family number don't correspond to any of the + /// known family variants. + Unknown, + + /// AMD Milan processors (or very close). Could be an actual Milan in a + /// Gimlet, a close-to-Milan client Zen 3 part, or Zen 4 (for which Milan is + /// the greatest common denominator). + AmdMilan, + + /// AMD Turin processors (or very close). Could be an actual Turin in a + /// Cosmo, or a close-to-Turin client Zen 5 part. + AmdTurin, +} + /// Sent by a sled agent to Nexus to inform about resources #[derive(Serialize, Deserialize, Debug, JsonSchema)] pub struct SledAgentInfo { @@ -56,6 +74,9 @@ pub struct SledAgentInfo { /// Must be smaller than "usable_physical_ram" pub reservoir_size: ByteCount, + /// The family of the sled's CPU. + pub cpu_family: SledCpuFamily, + /// The generation number of this request from sled-agent pub generation: Generation, diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index 7f927ff8e88..75b9e22e84b 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -7195,6 +7195,14 @@ } ] }, + "cpu_family": { + "description": "The family of the sled's CPU.", + "allOf": [ + { + "$ref": "#/components/schemas/SledCpuFamily" + } + ] + }, "decommissioned": { "description": "Whether the sled-agent has been decommissioned by nexus\n\nThis flag is only set to true by nexus. Setting it on an upsert from sled-agent has no effect.", "type": "boolean" @@ -7250,6 +7258,7 @@ }, "required": [ "baseboard", + "cpu_family", "decommissioned", "generation", "repo_depot_port", @@ -7260,6 +7269,32 @@ "usable_physical_ram" ] }, + "SledCpuFamily": { + "description": "Identifies the kind of CPU present on a sled, determined by reading CPUID.", + "oneOf": [ + { + "description": "The CPU vendor or its family number don't correspond to any of the known family variants.", + "type": "string", + "enum": [ + "unknown" + ] + }, + { + "description": "AMD Milan processors (or very close). Could be an actual Milan in a Gimlet, a close-to-Milan client Zen 3 part, or Zen 4 (for which Milan is the greatest common denominator).", + "type": "string", + "enum": [ + "amd_milan" + ] + }, + { + "description": "AMD Turin processors (or very close). Could be an actual Turin in a Cosmo, or a close-to-Turin client Zen 5 part.", + "type": "string", + "enum": [ + "amd_turin" + ] + } + ] + }, "SledId": { "type": "object", "properties": { diff --git a/openapi/nexus.json b/openapi/nexus.json index bd91bcc6534..657d9f5c4d0 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -23719,6 +23719,14 @@ "baseboard": { "$ref": "#/components/schemas/Baseboard" }, + "cpu_family": { + "description": "The family of the sled's CPU(s).", + "allOf": [ + { + "$ref": "#/components/schemas/SledCpuFamily" + } + ] + }, "id": { "description": "unique, immutable, system-controlled identifier for each resource", "type": "string", @@ -23772,6 +23780,7 @@ }, "required": [ "baseboard", + "cpu_family", "id", "policy", "rack_id", @@ -23782,6 +23791,32 @@ "usable_physical_ram" ] }, + "SledCpuFamily": { + "description": "Identifies the kind of CPU present on a sled, determined by reading CPUID. This is the CPU family used in deciding if this sled can support an instance with a particular required CPU platform.", + "oneOf": [ + { + "description": "The CPU vendor or its model/family numbers were not recognized.", + "type": "string", + "enum": [ + "unknown" + ] + }, + { + "description": "The sled has an AMD Milan (Zen 3) processor.", + "type": "string", + "enum": [ + "amd_milan" + ] + }, + { + "description": "The sled has an AMD Turin (Zen 5) processor.", + "type": "string", + "enum": [ + "amd_turin" + ] + } + ] + }, "SledId": { "description": "The unique ID of a sled.", "type": "object", diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 3da3de08a80..c4aa716e9f8 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -187,6 +187,21 @@ CREATE TYPE IF NOT EXISTS omicron.public.sled_state AS ENUM ( 'decommissioned' ); +-- The model of CPU installed in a particular sled, discovered by sled-agent +-- and reported to Nexus. This determines what VMs can run on a sled: instances +-- that require a specific minimum CPU platform can only run on sleds whose +-- CPUs support all the features of that platform. +CREATE TYPE IF NOT EXISTS omicron.public.sled_cpu_family AS ENUM ( + -- Sled-agent didn't recognize the sled's CPU. + 'unknown', + + -- AMD Milan, or lab CPU close enough that sled-agent reported it as one. + 'amd_milan', + + -- AMD Turin, or lab CPU close enough that sled-agent reported it as one. + 'amd_turin' +); + CREATE TABLE IF NOT EXISTS omicron.public.sled ( /* Identity metadata (asset) */ id UUID PRIMARY KEY, @@ -229,7 +244,10 @@ CREATE TABLE IF NOT EXISTS omicron.public.sled ( /* The bound port of the Repo Depot API server, running on the same IP as the sled agent server. */ - repo_depot_port INT4 CHECK (port BETWEEN 0 AND 65535) NOT NULL + repo_depot_port INT4 CHECK (port BETWEEN 0 AND 65535) NOT NULL, + + /* The sled's detected CPU family. */ + cpu_family omicron.public.sled_cpu_family NOT NULL ); -- Add an index that ensures a given physical sled (identified by serial and @@ -6342,7 +6360,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - (TRUE, NOW(), NOW(), '173.0.0', NULL) + (TRUE, NOW(), NOW(), '174.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/schema/crdb/sled-cpu-family/up01.sql b/schema/crdb/sled-cpu-family/up01.sql new file mode 100644 index 00000000000..9531cec6a7d --- /dev/null +++ b/schema/crdb/sled-cpu-family/up01.sql @@ -0,0 +1,5 @@ +CREATE TYPE IF NOT EXISTS omicron.public.sled_cpu_family AS ENUM ( + 'unknown', + 'amd_milan', + 'amd_turin' +); diff --git a/schema/crdb/sled-cpu-family/up02.sql b/schema/crdb/sled-cpu-family/up02.sql new file mode 100644 index 00000000000..1409e918dae --- /dev/null +++ b/schema/crdb/sled-cpu-family/up02.sql @@ -0,0 +1,2 @@ +ALTER TABLE omicron.public.sled ADD COLUMN IF NOT EXISTS + cpu_family omicron.public.sled_cpu_family NOT NULL DEFAULT 'unknown'; diff --git a/schema/crdb/sled-cpu-family/up03.sql b/schema/crdb/sled-cpu-family/up03.sql new file mode 100644 index 00000000000..612de867e4f --- /dev/null +++ b/schema/crdb/sled-cpu-family/up03.sql @@ -0,0 +1 @@ +ALTER TABLE omicron.public.sled ALTER COLUMN cpu_family DROP DEFAULT; diff --git a/sled-agent/src/bin/sled-agent-sim.rs b/sled-agent/src/bin/sled-agent-sim.rs index ca96b2513e1..8378dc02a49 100644 --- a/sled-agent/src/bin/sled-agent-sim.rs +++ b/sled-agent/src/bin/sled-agent-sim.rs @@ -12,6 +12,7 @@ use clap::Parser; use dropshot::ConfigDropshot; use dropshot::ConfigLogging; use dropshot::ConfigLoggingLevel; +use nexus_client::types::SledCpuFamily; use omicron_common::api::internal::nexus::Certificate; use omicron_common::cmd::CmdError; use omicron_common::cmd::fatal; @@ -110,6 +111,7 @@ async fn do_run() -> Result<(), CmdError> { hardware_threads: 32, physical_ram: 64 * (1 << 30), reservoir_ram: 32 * (1 << 30), + cpu_family: SledCpuFamily::AmdMilan, baseboard: Baseboard::Gimlet { identifier: format!("sim-{}", args.uuid), model: String::from("sim-gimlet"), @@ -122,6 +124,7 @@ async fn do_run() -> Result<(), CmdError> { Some(args.nexus_addr), Some(tmp.path()), ZpoolConfig::TenVirtualU2s, + SledCpuFamily::AmdMilan, ) }; diff --git a/sled-agent/src/nexus.rs b/sled-agent/src/nexus.rs index 3faeed749bb..52870fe1532 100644 --- a/sled-agent/src/nexus.rs +++ b/sled-agent/src/nexus.rs @@ -77,6 +77,24 @@ impl ConvertInto } } +impl ConvertInto + for sled_hardware_types::CpuFamily +{ + fn convert(self) -> nexus_client::types::SledCpuFamily { + match self { + sled_hardware_types::CpuFamily::Unknown => { + nexus_client::types::SledCpuFamily::Unknown + } + sled_hardware_types::CpuFamily::AmdMilan => { + nexus_client::types::SledCpuFamily::AmdMilan + } + sled_hardware_types::CpuFamily::AmdTurin => { + nexus_client::types::SledCpuFamily::AmdTurin + } + } + } +} + // Somewhat arbitrary bound size, large enough that we should never hit it. const QUEUE_SIZE: usize = 256; @@ -275,6 +293,7 @@ impl NexusNotifierTask { .usable_physical_ram_bytes() .into(), reservoir_size: vmm_reservoir_manager.reservoir_size().into(), + cpu_family: hardware.cpu_family().convert(), generation, decommissioned: false, } @@ -654,6 +673,7 @@ mod test { usable_physical_ram: ByteCount::from(1024 * 1024 * 1024u32) .into(), reservoir_size: ByteCount::from(0u32).into(), + cpu_family: nexus_client::types::SledCpuFamily::Unknown, generation: Generation::new(), decommissioned: false, })); diff --git a/sled-agent/src/sim/config.rs b/sled-agent/src/sim/config.rs index dbd9f00c22e..d77d08fc50b 100644 --- a/sled-agent/src/sim/config.rs +++ b/sled-agent/src/sim/config.rs @@ -7,6 +7,7 @@ use crate::updates::ConfigUpdates; use camino::Utf8Path; use dropshot::ConfigDropshot; +use nexus_client::types::SledCpuFamily; use omicron_uuid_kinds::SledUuid; use serde::Deserialize; use serde::Serialize; @@ -56,6 +57,12 @@ pub struct ConfigHardware { pub hardware_threads: u32, pub physical_ram: u64, pub reservoir_ram: u64, + /// The kind of CPU to report the simulated sled as. In reality this is + /// constrained by `baseboard`; a `Baseboard::Gimlet` will only have an + /// `SledCpuFamily::AmdMilan`. A future `Baseboard::Cosmo` will *never* have + /// a `SledCpuFamily::AmdMilan`. Because the baseboard does not imply a + /// specific individual CPU family, though, it's simpler to record here. + pub cpu_family: SledCpuFamily, pub baseboard: Baseboard, } @@ -93,6 +100,7 @@ impl Config { nexus_address: Option, update_directory: Option<&Utf8Path>, zpool_config: ZpoolConfig, + cpu_family: SledCpuFamily, ) -> Config { // This IP range is guaranteed by RFC 6666 to discard traffic. // For tests that don't use a Nexus, we use this address to simulate a @@ -133,6 +141,7 @@ impl Config { hardware_threads: TEST_HARDWARE_THREADS, physical_ram: TEST_PHYSICAL_RAM, reservoir_ram: TEST_RESERVOIR_RAM, + cpu_family, baseboard: Baseboard::Gimlet { identifier: format!("sim-{}", id), model: String::from("sim-gimlet"), diff --git a/sled-agent/src/sim/server.rs b/sled-agent/src/sim/server.rs index 05c75e18c0e..690efdadfe3 100644 --- a/sled-agent/src/sim/server.rs +++ b/sled-agent/src/sim/server.rs @@ -166,6 +166,7 @@ impl Server { config.hardware.reservoir_ram, ) .unwrap(), + cpu_family: config.hardware.cpu_family, generation: Generation::new(), decommissioned: false, }, diff --git a/sled-hardware/src/illumos/mod.rs b/sled-hardware/src/illumos/mod.rs index 057db6012b6..d51ede8a037 100644 --- a/sled-hardware/src/illumos/mod.rs +++ b/sled-hardware/src/illumos/mod.rs @@ -9,7 +9,7 @@ use gethostname::gethostname; use illumos_devinfo::{DevInfo, DevLinkType, DevLinks, Node, Property}; use libnvme::{Nvme, controller::Controller}; use omicron_common::disk::{DiskIdentity, DiskVariant}; -use sled_hardware_types::Baseboard; +use sled_hardware_types::{Baseboard, CpuFamily}; use slog::Logger; use slog::debug; use slog::error; @@ -797,6 +797,11 @@ impl HardwareManager { .unwrap_or_else(|| Baseboard::unknown()) } + pub fn cpu_family(&self) -> CpuFamily { + let log = self.log.new(slog::o!("component" => "detect_cpu_family")); + crate::detect_cpu_family(&log) + } + pub fn online_processor_count(&self) -> u32 { self.inner.lock().unwrap().online_processor_count } diff --git a/sled-hardware/src/lib.rs b/sled-hardware/src/lib.rs index 18c6b4ba3a2..d778d619191 100644 --- a/sled-hardware/src/lib.rs +++ b/sled-hardware/src/lib.rs @@ -149,3 +149,138 @@ impl MemoryReservations { vmm_eligible } } + +/// Detects the current sled's CPU family using the CPUID instruction. +#[cfg(target_arch = "x86_64")] +pub fn detect_cpu_family(log: &Logger) -> sled_hardware_types::CpuFamily { + use core::arch::x86_64::__cpuid_count; + use sled_hardware_types::CpuFamily; + + // Read leaf 0 to figure out the processor's vendor and whether leaf 1 + // (which contains family, model, and stepping information) is available. + let leaf_0 = unsafe { __cpuid_count(0, 0) }; + + info!(log, "read CPUID leaf 0 to detect CPU vendor"; "values" => ?leaf_0); + + // If leaf 1 is unavailable, there's no way to figure out what family this + // processor belongs to. + if leaf_0.eax < 1 { + return CpuFamily::Unknown; + } + + // Check the vendor ID string in ebx/ecx/edx. + match (leaf_0.ebx, leaf_0.ecx, leaf_0.edx) { + // "AuthenticAMD"; see AMD APM volume 3 (March 2024) section E.3.1. + (0x68747541, 0x444D4163, 0x69746E65) => {} + _ => return CpuFamily::Unknown, + } + + // Feature detection after this point is AMD-specific - if we find ourselves + // supporting other CPU vendors we'll want to split this out accordingly. + + // Per AMD APM volume 3 (March 2024) section E.3.2, the processor family + // number is computed as follows: + // + // - Read bits 11:8 of leaf 1 eax to get the "base" family value. If this + // value is less than 0xF, the family value is equal to the base family + // value. + // - If the base family value is 0xF, eax[27:20] contains the "extended" + // family value, and the actual family value is the sum of the base and + // the extended values. + let leaf_1 = unsafe { __cpuid_count(1, 0) }; + let mut family = (leaf_1.eax & 0x00000F00) >> 8; + if family == 0xF { + family += (leaf_1.eax & 0x0FF00000) >> 20; + } + + // Also from the APM volume 3 section E.3.2, the processor model number is + // computed as follows: + // + // - Read bits 7:4 of leaf 1 eax to get the "base" model value. + // - If the "base" family value is less than 0xF, the "base" model stands. + // Otherwise, four additional bits of the model come from eax[19:16]. + // + // If the computed family number is 0xF or greater, that implies the "bsae" + // family was 0xF or greater as well. + let mut model = (leaf_1.eax & 0x000000F0) >> 4; + if family >= 0xF { + model |= (leaf_1.eax & 0x000F0000) >> 12; + } + + info!( + log, + "read CPUID leaf 1 to detect CPU family"; + "values" => ?leaf_1, + "family" => family, + "model" => model, + ); + + // Match on the family/model ranges we've detected. Notably client parts are + // reported as if they were their server counterparts; the feature parity is + // close enough that guests probably won't run into issues. This lowers + // friction for testing migrations where the control plane would need to + // tell what hosts could be compatible with a VMM's CPU platform. + // + // TODO(?): Exhaustively check that client parts support all CPU features of + // the corresponding Oxide CPU platform before doing this "as-if" reporting. + // Lab systems built out of client parts may have hardware which support all + // features in the corresponding instance CPU platform, but have individual + // features disabled in the BIOS or by client part microcode. This can + // result in funky situations, like an Oxide CPU platform advertising CPU + // features that lab systems don't support. This is unlikely, but take + // AVX512 as an example: users can often disable AVX512 entirely on Zen 5 + // BIOSes. In this case a VM on a 9000-series Ryzen will be told those + // instructions are available only for the guest to get #UD at runtime. + match family { + 0x19 if model <= 0x0F => { + // This covers both Milan and Zen 3-based Threadrippers. I don't + // have a 5000-series Threadripper on hand to test but I believe + // they are feature-compatible. + CpuFamily::AmdMilan + } + 0x19 if model >= 0x10 && model <= 0x1F => { + // This covers both Genoa and Zen 4-based Threadrippers. Again, + // don't have a comparable Threadripper to test here. + // + // We intend to expose Turin and Milan as families a guest can + // choose, skipping the Zen 4 EPYC parts. So, round this down to + // Milan; if we're here it's a lab system and the alternative is + // "unknown". + CpuFamily::AmdMilan + } + 0x19 if model >= 0x20 && model <= 0x2F => { + // These are client Zen 3 parts aka Vermeer. Feature-wise, they are + // missing INVLPGB from Milan, but are otherwise close, and we don't + // expose INVLPGB to guests currently anyway. + CpuFamily::AmdMilan + } + 0x19 if model >= 0x60 && model <= 0x6F => { + // These are client Zen 4 parts aka Raphael. Similar to the above + // with Genoa and Vermeer, round these down to Milan in support of + // lab clusters instead of calling them unknown. + CpuFamily::AmdMilan + } + 0x1A if model <= 0x0F => CpuFamily::AmdTurin, + 0x1A if model >= 0x10 && model <= 0x1F => { + // These are Turin Dense, but from a CPU feature perspective they're + // equivalently capable to Turin, so for our purposes they're the + // same. + CpuFamily::AmdTurin + } + 0x1A if model >= 0x40 && model <= 0x4F => { + // These are client Zen 5 parts aka Granite Ridge. Won't be in a + // rack, but plausibly in a lab cluster. Like other non-server + // parts, these don't have INVLPGB, which we don't expose to guests. + // They should otherwise be a sufficient stand-in for Turin. + CpuFamily::AmdTurin + } + // Remaining family/model ranges in known families are likely mobile + // parts and intentionally rolled up into "Unknown." There, it's harder + // to predict what features out of the corresponding CPU platform would + // actually be present. It's also less likely that someone has a laptop + // or APU as part of a development cluster! + // + // Other families are, of course, unknown. + _ => CpuFamily::Unknown, + } +} diff --git a/sled-hardware/src/non_illumos/mod.rs b/sled-hardware/src/non_illumos/mod.rs index c54afe87301..448dc59287c 100644 --- a/sled-hardware/src/non_illumos/mod.rs +++ b/sled-hardware/src/non_illumos/mod.rs @@ -6,7 +6,7 @@ use crate::SledMode; use crate::disk::{DiskPaths, Partition, PooledDiskError, UnparsedDisk}; use omicron_common::disk::{DiskIdentity, DiskVariant}; use omicron_uuid_kinds::ZpoolUuid; -use sled_hardware_types::Baseboard; +use sled_hardware_types::{Baseboard, CpuFamily}; use slog::Logger; use std::collections::HashMap; use tokio::sync::broadcast; @@ -41,6 +41,10 @@ impl HardwareManager { unimplemented!("Accessing hardware unsupported on non-illumos"); } + pub fn cpu_family(&self) -> CpuFamily { + unimplemented!("Accessing hardware unsupported on non-illumos"); + } + pub fn online_processor_count(&self) -> u32 { unimplemented!("Accessing hardware unsupported on non-illumos"); } diff --git a/sled-hardware/types/src/lib.rs b/sled-hardware/types/src/lib.rs index b34b5b1f422..1a7047fb076 100644 --- a/sled-hardware/types/src/lib.rs +++ b/sled-hardware/types/src/lib.rs @@ -95,3 +95,10 @@ impl std::fmt::Display for Baseboard { } } } + +#[derive(Clone, Copy, Debug)] +pub enum CpuFamily { + Unknown, + AmdMilan, + AmdTurin, +} From d316a2e1f322dff3f478d4447a196e34db441b04 Mon Sep 17 00:00:00 2001 From: iximeow Date: Wed, 30 Jul 2025 19:03:28 +0000 Subject: [PATCH 02/17] differentiate Turin and Turin Dense for the control plane --- nexus/db-model/src/sled_cpu_family.rs | 1 + nexus/types/src/external_api/views.rs | 3 +++ nexus/types/src/internal_api/params.rs | 4 ++++ schema/crdb/dbinit.sql | 5 ++++- sled-hardware/src/lib.rs | 8 ++++---- sled-hardware/types/src/lib.rs | 1 + 6 files changed, 17 insertions(+), 5 deletions(-) diff --git a/nexus/db-model/src/sled_cpu_family.rs b/nexus/db-model/src/sled_cpu_family.rs index 8247e1a2506..b69ca5f8c17 100644 --- a/nexus/db-model/src/sled_cpu_family.rs +++ b/nexus/db-model/src/sled_cpu_family.rs @@ -23,6 +23,7 @@ impl_enum_type!( Unknown => b"unknown" AmdMilan => b"amd_milan" AmdTurin => b"amd_turin" + AmdTurinDense => b"amd_turin_dense" ); impl From for SledCpuFamily { diff --git a/nexus/types/src/external_api/views.rs b/nexus/types/src/external_api/views.rs index 1fdd609b366..e9cdcbe97b6 100644 --- a/nexus/types/src/external_api/views.rs +++ b/nexus/types/src/external_api/views.rs @@ -785,6 +785,9 @@ pub enum SledCpuFamily { /// The sled has an AMD Turin (Zen 5) processor. AmdTurin, + + /// The sled has an AMD Turin Dense (Zen 5c) processor. + AmdTurinDense, } /// An operator's view of an instance running on a given sled diff --git a/nexus/types/src/internal_api/params.rs b/nexus/types/src/internal_api/params.rs index 81663f787c0..45755187d39 100644 --- a/nexus/types/src/internal_api/params.rs +++ b/nexus/types/src/internal_api/params.rs @@ -46,6 +46,10 @@ pub enum SledCpuFamily { /// AMD Turin processors (or very close). Could be an actual Turin in a /// Cosmo, or a close-to-Turin client Zen 5 part. AmdTurin, + + /// AMD Turin Dense processors. There are no "Turin Dense-like" CPUs unlike + /// other cases, so this means a bona fide Zen 5c Turin Dense part. + AmdTurinDense, } /// Sent by a sled agent to Nexus to inform about resources diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index c4aa716e9f8..0a5587c5939 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -199,7 +199,10 @@ CREATE TYPE IF NOT EXISTS omicron.public.sled_cpu_family AS ENUM ( 'amd_milan', -- AMD Turin, or lab CPU close enough that sled-agent reported it as one. - 'amd_turin' + 'amd_turin', + + -- AMD Turin Dense. There are no "Turin Dense-likes", so this is precise. + 'amd_turin_dense' ); CREATE TABLE IF NOT EXISTS omicron.public.sled ( diff --git a/sled-hardware/src/lib.rs b/sled-hardware/src/lib.rs index d778d619191..3a8c5227c3a 100644 --- a/sled-hardware/src/lib.rs +++ b/sled-hardware/src/lib.rs @@ -262,10 +262,10 @@ pub fn detect_cpu_family(log: &Logger) -> sled_hardware_types::CpuFamily { } 0x1A if model <= 0x0F => CpuFamily::AmdTurin, 0x1A if model >= 0x10 && model <= 0x1F => { - // These are Turin Dense, but from a CPU feature perspective they're - // equivalently capable to Turin, so for our purposes they're the - // same. - CpuFamily::AmdTurin + // These are Turin Dense. From a CPU feature perspective they're + // equivalently capable to Turin, but they are physically distinct + // and sled operators should be able to see that. + CpuFamily::AmdTurinDense } 0x1A if model >= 0x40 && model <= 0x4F => { // These are client Zen 5 parts aka Granite Ridge. Won't be in a diff --git a/sled-hardware/types/src/lib.rs b/sled-hardware/types/src/lib.rs index 1a7047fb076..663ce8de323 100644 --- a/sled-hardware/types/src/lib.rs +++ b/sled-hardware/types/src/lib.rs @@ -101,4 +101,5 @@ pub enum CpuFamily { Unknown, AmdMilan, AmdTurin, + AmdTurinDense, } From 13672898a91ab79c3667416f8294f39b3d0757e0 Mon Sep 17 00:00:00 2001 From: iximeow Date: Wed, 30 Jul 2025 19:04:38 +0000 Subject: [PATCH 03/17] unwind CPU families from the public sled API --- nexus/db-model/src/sled.rs | 8 +++- nexus/db-model/src/sled_cpu_family.rs | 12 +---- nexus/tests/integration_tests/sleds.rs | 63 +++++--------------------- nexus/types/src/external_api/views.rs | 25 ---------- openapi/nexus-internal.json | 7 +++ openapi/nexus.json | 35 -------------- sled-agent/src/nexus.rs | 3 ++ 7 files changed, 30 insertions(+), 123 deletions(-) diff --git a/nexus/db-model/src/sled.rs b/nexus/db-model/src/sled.rs index f4c8e62f9ae..e9967569006 100644 --- a/nexus/db-model/src/sled.rs +++ b/nexus/db-model/src/sled.rs @@ -89,6 +89,13 @@ pub struct Sled { pub repo_depot_port: SqlU16, /// The family of this sled's CPU. + /// + /// This is primarily useful for questions about instance CPU platform + /// compatibility; it is too broad for topology-related sled selection + /// and more precise than a more general report of microarchitecture. We + /// likely should include much more about the sled's CPU alongside this for + /// those broader questions and reporting (see + /// https://github.com/oxidecomputer/omicron/issues/8730 for examples). pub cpu_family: SledCpuFamily, } @@ -147,7 +154,6 @@ impl From for views::Sled { state: sled.state.into(), usable_hardware_threads: sled.usable_hardware_threads.0, usable_physical_ram: *sled.usable_physical_ram, - cpu_family: sled.cpu_family.into(), } } } diff --git a/nexus/db-model/src/sled_cpu_family.rs b/nexus/db-model/src/sled_cpu_family.rs index b69ca5f8c17..12c8c4ba5c7 100644 --- a/nexus/db-model/src/sled_cpu_family.rs +++ b/nexus/db-model/src/sled_cpu_family.rs @@ -33,6 +33,7 @@ impl From for SledCpuFamily { InputFamily::Unknown => Self::Unknown, InputFamily::AmdMilan => Self::AmdMilan, InputFamily::AmdTurin => Self::AmdTurin, + InputFamily::AmdTurinDense => Self::AmdTurinDense, } } } @@ -43,16 +44,7 @@ impl From for nexus_types::internal_api::params::SledCpuFamily { SledCpuFamily::Unknown => Self::Unknown, SledCpuFamily::AmdMilan => Self::AmdMilan, SledCpuFamily::AmdTurin => Self::AmdTurin, - } - } -} - -impl From for nexus_types::external_api::views::SledCpuFamily { - fn from(value: SledCpuFamily) -> Self { - match value { - SledCpuFamily::Unknown => Self::Unknown, - SledCpuFamily::AmdMilan => Self::AmdMilan, - SledCpuFamily::AmdTurin => Self::AmdTurin, + SledCpuFamily::AmdTurinDense => Self::AmdTurinDense, } } } diff --git a/nexus/tests/integration_tests/sleds.rs b/nexus/tests/integration_tests/sleds.rs index d4d5ee825fd..8735bd568e4 100644 --- a/nexus/tests/integration_tests/sleds.rs +++ b/nexus/tests/integration_tests/sleds.rs @@ -15,10 +15,10 @@ use nexus_test_utils::resource_helpers::create_default_ip_pool; use nexus_test_utils::resource_helpers::create_instance; use nexus_test_utils::resource_helpers::create_project; use nexus_test_utils::resource_helpers::objects_list_page_authz; -use nexus_test_utils::{start_sled_agent, start_sled_agent_with_config}; +use nexus_test_utils::start_sled_agent; use nexus_test_utils_macros::nexus_test; use nexus_types::external_api::views::SledInstance; -use nexus_types::external_api::views::{PhysicalDisk, Sled, SledCpuFamily}; +use nexus_types::external_api::views::{PhysicalDisk, Sled}; use omicron_sled_agent::sim; use omicron_test_utils::dev::poll::{CondCheckError, wait_for_condition}; use omicron_uuid_kinds::GenericUuid; @@ -60,60 +60,34 @@ async fn test_sleds_list(cptestctx: &ControlPlaneTestContext) { assert_eq!(sleds_list(&client, &sleds_url).await.len(), 2); // Now start a few more sled agents. - let mut sas = Vec::new(); - let nexus_address = - cptestctx.server.get_http_server_internal_address().await; - let update_directory = Utf8Path::new("/should/not/be/used"); - let simulated_upstairs = &cptestctx.first_sled_agent().simulated_upstairs; - - for _ in 0..4 { + let nsleds = 3; + let mut sas = Vec::with_capacity(nsleds); + for i in 0..nsleds { let sa_id = SledUuid::new_v4(); let log = cptestctx.logctx.log.new(o!( "sled_id" => sa_id.to_string() )); + let addr = cptestctx.server.get_http_server_internal_address().await; + let update_directory = Utf8Path::new("/should/not/be/used"); sas.push( start_sled_agent( log, - nexus_address, + addr, sa_id, // Index starts at 2: the `nexus_test` macro already created two // sled agents as part of the ControlPlaneTestContext setup. - 2 + sas.len() as u16 + 1, + 2 + i as u16, &update_directory, sim::SimMode::Explicit, - &simulated_upstairs, + &cptestctx.first_sled_agent().simulated_upstairs, ) .await .unwrap(), ); } - let turin_sled_id = SledUuid::new_v4(); - let turin_sled_agent_log = - cptestctx.logctx.log.new(o!( "sled_id" => turin_sled_id.to_string() )); - - let turin_config = omicron_sled_agent::sim::Config::for_testing( - turin_sled_id, - omicron_sled_agent::sim::SimMode::Explicit, - Some(nexus_address), - Some(&update_directory), - omicron_sled_agent::sim::ZpoolConfig::None, - nexus_client::types::SledCpuFamily::AmdTurin, - ); - - sas.push( - start_sled_agent_with_config( - turin_sled_agent_log, - &turin_config, - 2 + sas.len() as u16 + 1, - &simulated_upstairs, - ) - .await - .unwrap(), - ); - // List sleds again. let sleds_found = sleds_list(&client, &sleds_url).await; - assert_eq!(sleds_found.len(), sas.len() + 2); + assert_eq!(sleds_found.len(), nsleds + 2); let sledids_found = sleds_found.iter().map(|sv| sv.identity.id).collect::>(); @@ -121,21 +95,6 @@ async fn test_sleds_list(cptestctx: &ControlPlaneTestContext) { sledids_found_sorted.sort(); assert_eq!(sledids_found, sledids_found_sorted); - let milans_found = sleds_found - .iter() - .filter(|sv| sv.cpu_family == SledCpuFamily::AmdMilan) - .count(); - // Simulated sled-agents report Milan processors by default. The two fake - // sled-agents created by `#[nexus_test]` as well as the four manually - // created above should be counted here. - assert_eq!(milans_found, 2 + 4); - - let turins_found = sleds_found - .iter() - .filter(|sv| sv.cpu_family == SledCpuFamily::AmdTurin) - .count(); - assert_eq!(turins_found, 1); - // Tear down the agents. for sa in sas { sa.http_server.close().await.unwrap(); diff --git a/nexus/types/src/external_api/views.rs b/nexus/types/src/external_api/views.rs index e9cdcbe97b6..9bd8ab5cc12 100644 --- a/nexus/types/src/external_api/views.rs +++ b/nexus/types/src/external_api/views.rs @@ -596,8 +596,6 @@ pub struct Sled { pub usable_hardware_threads: u32, /// Amount of RAM which may be used by the Sled's OS pub usable_physical_ram: ByteCount, - /// The family of the sled's CPU(s). - pub cpu_family: SledCpuFamily, } /// The operator-defined provision policy of a sled. @@ -767,29 +765,6 @@ impl fmt::Display for SledState { } } -/// Identifies the kind of CPU present on a sled, determined by reading CPUID. -/// This is the CPU family used in deciding if this sled can support an instance -/// with a particular required CPU platform. -// In lab and development environments in particular, the family reported here -// may differ from the real processor family. `sled-hardware::detect_cpu_family` -// tries to map various CPUs that we would not ship in a rack to their -// greatest-common-denominator family names here. -#[derive(Clone, Serialize, Deserialize, Debug, JsonSchema, PartialEq)] -#[serde(rename_all = "snake_case")] -pub enum SledCpuFamily { - /// The CPU vendor or its model/family numbers were not recognized. - Unknown, - - /// The sled has an AMD Milan (Zen 3) processor. - AmdMilan, - - /// The sled has an AMD Turin (Zen 5) processor. - AmdTurin, - - /// The sled has an AMD Turin Dense (Zen 5c) processor. - AmdTurinDense, -} - /// An operator's view of an instance running on a given sled #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] pub struct SledInstance { diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index 75b9e22e84b..ff039ee29a6 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -7292,6 +7292,13 @@ "enum": [ "amd_turin" ] + }, + { + "description": "AMD Turin Dense processors. There are no \"Turin Dense-like\" CPUs unlike other cases, so this means a bona fide Zen 5c Turin Dense part.", + "type": "string", + "enum": [ + "amd_turin_dense" + ] } ] }, diff --git a/openapi/nexus.json b/openapi/nexus.json index 657d9f5c4d0..bd91bcc6534 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -23719,14 +23719,6 @@ "baseboard": { "$ref": "#/components/schemas/Baseboard" }, - "cpu_family": { - "description": "The family of the sled's CPU(s).", - "allOf": [ - { - "$ref": "#/components/schemas/SledCpuFamily" - } - ] - }, "id": { "description": "unique, immutable, system-controlled identifier for each resource", "type": "string", @@ -23780,7 +23772,6 @@ }, "required": [ "baseboard", - "cpu_family", "id", "policy", "rack_id", @@ -23791,32 +23782,6 @@ "usable_physical_ram" ] }, - "SledCpuFamily": { - "description": "Identifies the kind of CPU present on a sled, determined by reading CPUID. This is the CPU family used in deciding if this sled can support an instance with a particular required CPU platform.", - "oneOf": [ - { - "description": "The CPU vendor or its model/family numbers were not recognized.", - "type": "string", - "enum": [ - "unknown" - ] - }, - { - "description": "The sled has an AMD Milan (Zen 3) processor.", - "type": "string", - "enum": [ - "amd_milan" - ] - }, - { - "description": "The sled has an AMD Turin (Zen 5) processor.", - "type": "string", - "enum": [ - "amd_turin" - ] - } - ] - }, "SledId": { "description": "The unique ID of a sled.", "type": "object", diff --git a/sled-agent/src/nexus.rs b/sled-agent/src/nexus.rs index 52870fe1532..9c335dc88e4 100644 --- a/sled-agent/src/nexus.rs +++ b/sled-agent/src/nexus.rs @@ -91,6 +91,9 @@ impl ConvertInto sled_hardware_types::CpuFamily::AmdTurin => { nexus_client::types::SledCpuFamily::AmdTurin } + sled_hardware_types::CpuFamily::AmdTurinDense => { + nexus_client::types::SledCpuFamily::AmdTurinDense + } } } } From b5eaf68865d1fc920d819e21084d066921d24fd8 Mon Sep 17 00:00:00 2001 From: iximeow Date: Wed, 30 Jul 2025 21:12:02 +0000 Subject: [PATCH 04/17] review notes --- sled-hardware/src/lib.rs | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/sled-hardware/src/lib.rs b/sled-hardware/src/lib.rs index 3a8c5227c3a..3f57745a81c 100644 --- a/sled-hardware/src/lib.rs +++ b/sled-hardware/src/lib.rs @@ -151,6 +151,15 @@ impl MemoryReservations { } /// Detects the current sled's CPU family using the CPUID instruction. +/// +/// TODO: Ideally we would call into libtopo and pass along the information +/// identified there. See https://github.com/oxidecomputer/omicron/issues/8732. +/// +/// Everything here is duplicative with CPU identification done by the kernel. +/// You'll even find a very similar (but much more comprehensive) AMD family +/// mapping at `amd_revmap` in `usr/src/uts/intel/os/cpuid_subr.c`. But +/// sled-agent does not yet know about libtopo, getting topo snapshots, walking +/// them, or any of that, so the parsing is performed again here. #[cfg(target_arch = "x86_64")] pub fn detect_cpu_family(log: &Logger) -> sled_hardware_types::CpuFamily { use core::arch::x86_64::__cpuid_count; @@ -200,7 +209,7 @@ pub fn detect_cpu_family(log: &Logger) -> sled_hardware_types::CpuFamily { // - If the "base" family value is less than 0xF, the "base" model stands. // Otherwise, four additional bits of the model come from eax[19:16]. // - // If the computed family number is 0xF or greater, that implies the "bsae" + // If the computed family number is 0xF or greater, that implies the "base" // family was 0xF or greater as well. let mut model = (leaf_1.eax & 0x000000F0) >> 4; if family >= 0xF { @@ -210,9 +219,12 @@ pub fn detect_cpu_family(log: &Logger) -> sled_hardware_types::CpuFamily { info!( log, "read CPUID leaf 1 to detect CPU family"; - "values" => ?leaf_1, - "family" => family, - "model" => model, + "leaf1.eax" => format_args!("{:#08x}", leaf_1.eax), + "leaf1.ebx" => format_args!("{:#08x}", leaf_1.ebx), + "leaf1.ecx" => format_args!("{:#08x}", leaf_1.ecx), + "leaf1.edx" => format_args!("{:#08x}", leaf_1.edx), + "parsed family" => format_args!("{family:#x}"), + "parsed model" => format_args!("{model:#x}"), ); // Match on the family/model ranges we've detected. Notably client parts are From 114f383266fae89c4447bfac396356197c0a12bc Mon Sep 17 00:00:00 2001 From: iximeow Date: Wed, 30 Jul 2025 21:51:21 +0000 Subject: [PATCH 05/17] fix links ugh --- nexus/db-model/src/sled.rs | 2 +- sled-hardware/src/lib.rs | 3 ++- sled-hardware/types/src/lib.rs | 10 ++++++++++ 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/nexus/db-model/src/sled.rs b/nexus/db-model/src/sled.rs index e9967569006..631cc92de0a 100644 --- a/nexus/db-model/src/sled.rs +++ b/nexus/db-model/src/sled.rs @@ -95,7 +95,7 @@ pub struct Sled { /// and more precise than a more general report of microarchitecture. We /// likely should include much more about the sled's CPU alongside this for /// those broader questions and reporting (see - /// https://github.com/oxidecomputer/omicron/issues/8730 for examples). + /// for examples). pub cpu_family: SledCpuFamily, } diff --git a/sled-hardware/src/lib.rs b/sled-hardware/src/lib.rs index 3f57745a81c..bd5d00ac47b 100644 --- a/sled-hardware/src/lib.rs +++ b/sled-hardware/src/lib.rs @@ -153,7 +153,8 @@ impl MemoryReservations { /// Detects the current sled's CPU family using the CPUID instruction. /// /// TODO: Ideally we would call into libtopo and pass along the information -/// identified there. See https://github.com/oxidecomputer/omicron/issues/8732. +/// identified there. See +/// . /// /// Everything here is duplicative with CPU identification done by the kernel. /// You'll even find a very similar (but much more comprehensive) AMD family diff --git a/sled-hardware/types/src/lib.rs b/sled-hardware/types/src/lib.rs index 663ce8de323..5d6ea5c8d3b 100644 --- a/sled-hardware/types/src/lib.rs +++ b/sled-hardware/types/src/lib.rs @@ -96,6 +96,16 @@ impl std::fmt::Display for Baseboard { } } +/// A general description of the CPU family for processor(s) in this sled. +/// +/// This is intended to broadly support the control plane answering the question +/// "can I run this instance on that sled?" given an instance with either no or +/// some CPU platform requirement. It is not enough information for more precise +/// placement questions - for example, is a CPU a high-frequency part or +/// many-core part? We don't include Genoa here, but in that CPU family there +/// are high frequency parts, many-core parts, and large-cache parts. To support +/// those questions (or satisfactorily answer #8730) we would need to collect +/// additional information and send it along. #[derive(Clone, Copy, Debug)] pub enum CpuFamily { Unknown, From 4c40d473943d335c079041238900a8c184d83213 Mon Sep 17 00:00:00 2001 From: iximeow Date: Wed, 30 Jul 2025 23:21:38 +0000 Subject: [PATCH 06/17] migration still needs to know about turin dense --- schema/crdb/sled-cpu-family/up01.sql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/schema/crdb/sled-cpu-family/up01.sql b/schema/crdb/sled-cpu-family/up01.sql index 9531cec6a7d..f1bb76f3389 100644 --- a/schema/crdb/sled-cpu-family/up01.sql +++ b/schema/crdb/sled-cpu-family/up01.sql @@ -1,5 +1,6 @@ CREATE TYPE IF NOT EXISTS omicron.public.sled_cpu_family AS ENUM ( 'unknown', 'amd_milan', - 'amd_turin' + 'amd_turin', + 'amd_turin_dense' ); From 5ec45d3352d0bc52f3a3e432594643dacd87b7c6 Mon Sep 17 00:00:00 2001 From: iximeow Date: Sat, 2 Aug 2025 00:14:35 +0000 Subject: [PATCH 07/17] sled-agent needs to expose cpu_family for inventory collections too the existing plumbing was sufficient for sled-agent to report the CPU family at startup, but did not provide the CPU family when Nexus calls later for inventory collections. when you've upgraded to this version, the database migration sets the sled CPU family to `unknown` expecting that the next inventory collection will figure things out. this doesn't happen, and the initial check-in doesn't update the CPU type either (presumably because the sled is already known and initialized from the control plane's perspective?) this does... most of the plumbing to report a sled's CPU family for inventory collection, but it doesn't actually work. `SledCpuFamily` being both in `omicron-common` and `nexus-client` is kind of unworkable. probably need a `ConvertInto` or something to transform the shared into the `nexus-client` when needed..? i've been trying to figure out what exactly is necessary and what is just building a mess for myself for two hours and this feels like it's going nowhere. --- common/src/api/internal/shared.rs | 33 +++++++++++++++++++ nexus-sled-agent-shared/src/inventory.rs | 3 +- nexus/db-model/src/inventory.rs | 3 ++ nexus/db-model/src/sled_cpu_family.rs | 8 ++--- .../db-queries/src/db/datastore/inventory.rs | 5 +++ nexus/db-schema/src/schema.rs | 1 + nexus/inventory/src/builder.rs | 1 + nexus/inventory/src/examples.rs | 2 ++ nexus/reconfigurator/planning/src/system.rs | 3 ++ nexus/types/src/internal_api/params.rs | 23 +------------ nexus/types/src/inventory.rs | 2 ++ nexus/types/src/inventory/display.rs | 2 ++ sled-agent/src/sim/config.rs | 2 +- sled-agent/src/sim/sled_agent.rs | 1 + sled-agent/src/sled_agent.rs | 3 ++ 15 files changed, 64 insertions(+), 28 deletions(-) diff --git a/common/src/api/internal/shared.rs b/common/src/api/internal/shared.rs index ebc9f6a46ca..f912cba3f30 100644 --- a/common/src/api/internal/shared.rs +++ b/common/src/api/internal/shared.rs @@ -1101,6 +1101,39 @@ pub struct SledIdentifiers { pub serial: String, } +/// Identifies the kind of CPU present on a sled, determined by reading CPUID. +#[derive(Serialize, Deserialize, Copy, Clone, Debug, PartialEq, Eq, JsonSchema)] +#[serde(rename_all = "snake_case")] +pub enum SledCpuFamily { + /// The CPU vendor or its family number don't correspond to any of the + /// known family variants. + Unknown, + + /// AMD Milan processors (or very close). Could be an actual Milan in a + /// Gimlet, a close-to-Milan client Zen 3 part, or Zen 4 (for which Milan is + /// the greatest common denominator). + AmdMilan, + + /// AMD Turin processors (or very close). Could be an actual Turin in a + /// Cosmo, or a close-to-Turin client Zen 5 part. + AmdTurin, + + /// AMD Turin Dense processors. There are no "Turin Dense-like" CPUs unlike + /// other cases, so this means a bona fide Zen 5c Turin Dense part. + AmdTurinDense, +} + +impl fmt::Display for SledCpuFamily { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + SledCpuFamily::Unknown => write!(f, "unknown"), + SledCpuFamily::AmdMilan => write!(f, "milan"), + SledCpuFamily::AmdTurin => write!(f, "turin"), + SledCpuFamily::AmdTurinDense => write!(f, "turin_dense"), + } + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/nexus-sled-agent-shared/src/inventory.rs b/nexus-sled-agent-shared/src/inventory.rs index f5e5baa0aef..867512737e0 100644 --- a/nexus-sled-agent-shared/src/inventory.rs +++ b/nexus-sled-agent-shared/src/inventory.rs @@ -26,7 +26,7 @@ use omicron_common::update::OmicronZoneManifestSource; use omicron_common::{ api::{ external::{ByteCount, Generation}, - internal::shared::{NetworkInterface, SourceNatConfig}, + internal::shared::{NetworkInterface, SourceNatConfig, SledCpuFamily}, }, disk::{DatasetConfig, DiskVariant, OmicronPhysicalDiskConfig}, update::ArtifactId, @@ -121,6 +121,7 @@ pub struct Inventory { pub baseboard: Baseboard, pub usable_hardware_threads: u32, pub usable_physical_ram: ByteCount, + pub cpu_family: SledCpuFamily, pub reservoir_size: ByteCount, pub disks: Vec, pub zpools: Vec, diff --git a/nexus/db-model/src/inventory.rs b/nexus/db-model/src/inventory.rs index 12fdf5aad25..ebebf139805 100644 --- a/nexus/db-model/src/inventory.rs +++ b/nexus/db-model/src/inventory.rs @@ -8,6 +8,7 @@ use crate::ArtifactHash; use crate::Generation; use crate::PhysicalDiskKind; use crate::omicron_zone_config::{self, OmicronZoneNic}; +use crate::sled_cpu_family::SledCpuFamily; use crate::typed_uuid::DbTypedUuid; use crate::{ ByteCount, MacAddr, Name, ServiceKind, SqlU8, SqlU16, SqlU32, @@ -887,6 +888,7 @@ pub struct InvSledAgent { pub sled_role: SledRole, pub usable_hardware_threads: SqlU32, pub usable_physical_ram: ByteCount, + pub cpu_family: SledCpuFamily, pub reservoir_size: ByteCount, // Soft foreign key to an `InvOmicronSledConfig` pub ledgered_sled_config: Option>, @@ -1300,6 +1302,7 @@ impl InvSledAgent { usable_physical_ram: ByteCount::from( sled_agent.usable_physical_ram, ), + cpu_family: sled_agent.cpu_family.into(), reservoir_size: ByteCount::from(sled_agent.reservoir_size), ledgered_sled_config: ledgered_sled_config.map(From::from), reconciler_status, diff --git a/nexus/db-model/src/sled_cpu_family.rs b/nexus/db-model/src/sled_cpu_family.rs index 12c8c4ba5c7..13838b89fce 100644 --- a/nexus/db-model/src/sled_cpu_family.rs +++ b/nexus/db-model/src/sled_cpu_family.rs @@ -26,9 +26,9 @@ impl_enum_type!( AmdTurinDense => b"amd_turin_dense" ); -impl From for SledCpuFamily { - fn from(value: nexus_types::internal_api::params::SledCpuFamily) -> Self { - use nexus_types::internal_api::params::SledCpuFamily as InputFamily; +impl From for SledCpuFamily { + fn from(value: omicron_common::api::internal::shared::SledCpuFamily) -> Self { + use omicron_common::api::internal::shared::SledCpuFamily as InputFamily; match value { InputFamily::Unknown => Self::Unknown, InputFamily::AmdMilan => Self::AmdMilan, @@ -38,7 +38,7 @@ impl From for SledCpuFamily { } } -impl From for nexus_types::internal_api::params::SledCpuFamily { +impl From for omicron_common::api::internal::shared::SledCpuFamily { fn from(value: SledCpuFamily) -> Self { match value { SledCpuFamily::Unknown => Self::Unknown, diff --git a/nexus/db-queries/src/db/datastore/inventory.rs b/nexus/db-queries/src/db/datastore/inventory.rs index 79dc4f443bf..71255dd9552 100644 --- a/nexus/db-queries/src/db/datastore/inventory.rs +++ b/nexus/db-queries/src/db/datastore/inventory.rs @@ -1385,6 +1385,8 @@ impl DataStore { sled_agent.usable_physical_ram, ) .into_sql::(), + nexus_db_model::SledCpuFamily::from(sled_agent.cpu_family) + .into_sql::(), nexus_db_model::ByteCount::from( sled_agent.reservoir_size, ) @@ -1439,6 +1441,7 @@ impl DataStore { sa_dsl::sled_role, sa_dsl::usable_hardware_threads, sa_dsl::usable_physical_ram, + sa_dsl::cpu_family, sa_dsl::reservoir_size, sa_dsl::ledgered_sled_config, sa_dsl::reconciler_status_kind, @@ -1470,6 +1473,7 @@ impl DataStore { _sled_role, _usable_hardware_threads, _usable_physical_ram, + _cpu_family, _reservoir_size, _ledgered_sled_config, _reconciler_status_kind, @@ -3846,6 +3850,7 @@ impl DataStore { sled_role: s.sled_role.into(), usable_hardware_threads: u32::from(s.usable_hardware_threads), usable_physical_ram: s.usable_physical_ram.into(), + cpu_family: s.cpu_family.into(), reservoir_size: s.reservoir_size.into(), // For disks, zpools, and datasets, the map for a sled ID is // only populated if there is at least one disk/zpool/dataset diff --git a/nexus/db-schema/src/schema.rs b/nexus/db-schema/src/schema.rs index 3f1fa67ca70..fb04983dce4 100644 --- a/nexus/db-schema/src/schema.rs +++ b/nexus/db-schema/src/schema.rs @@ -1609,6 +1609,7 @@ table! { sled_role -> crate::enums::SledRoleEnum, usable_hardware_threads -> Int8, usable_physical_ram -> Int8, + cpu_family -> crate::enums::SledCpuFamilyEnum, reservoir_size -> Int8, ledgered_sled_config -> Nullable, diff --git a/nexus/inventory/src/builder.rs b/nexus/inventory/src/builder.rs index c9eb4622df3..76af085af9a 100644 --- a/nexus/inventory/src/builder.rs +++ b/nexus/inventory/src/builder.rs @@ -595,6 +595,7 @@ impl CollectionBuilder { baseboard_id, usable_hardware_threads: inventory.usable_hardware_threads, usable_physical_ram: inventory.usable_physical_ram, + cpu_family: inventory.cpu_family, reservoir_size: inventory.reservoir_size, time_collected, sled_id, diff --git a/nexus/inventory/src/examples.rs b/nexus/inventory/src/examples.rs index fa0bf82d309..86c67a9c652 100644 --- a/nexus/inventory/src/examples.rs +++ b/nexus/inventory/src/examples.rs @@ -40,6 +40,7 @@ use nexus_types::inventory::ZpoolName; use omicron_cockroach_metrics::MetricValue; use omicron_cockroach_metrics::PrometheusMetrics; use omicron_common::api::external::ByteCount; +use omicron_common::api::internal::shared::SledCpuFamily; use omicron_common::disk::DatasetConfig; use omicron_common::disk::DatasetKind; use omicron_common::disk::DatasetName; @@ -957,6 +958,7 @@ pub fn sled_agent( sled_id, usable_hardware_threads: 10, usable_physical_ram: ByteCount::from(1024 * 1024), + cpu_family: SledCpuFamily::AmdMilan, disks, zpools, datasets, diff --git a/nexus/reconfigurator/planning/src/system.rs b/nexus/reconfigurator/planning/src/system.rs index 4f745953142..2ad075b4fc1 100644 --- a/nexus/reconfigurator/planning/src/system.rs +++ b/nexus/reconfigurator/planning/src/system.rs @@ -57,6 +57,7 @@ use omicron_common::address::SLED_PREFIX; use omicron_common::address::get_sled_address; use omicron_common::api::external::ByteCount; use omicron_common::api::external::Generation; +use omicron_common::api::internal::shared::SledCpuFamily; use omicron_common::disk::DiskIdentity; use omicron_common::disk::DiskVariant; use omicron_common::disk::M2Slot; @@ -1071,6 +1072,7 @@ impl Sled { sled_id, usable_hardware_threads: 10, usable_physical_ram: ByteCount::from(1024 * 1024), + cpu_family: SledCpuFamily::AmdMilan, // Populate disks, appearing like a real device. disks: zpools .values() @@ -1267,6 +1269,7 @@ impl Sled { sled_id, usable_hardware_threads: inv_sled_agent.usable_hardware_threads, usable_physical_ram: inv_sled_agent.usable_physical_ram, + cpu_family: inv_sled_agent.cpu_family, disks: vec![], zpools: vec![], datasets: vec![], diff --git a/nexus/types/src/internal_api/params.rs b/nexus/types/src/internal_api/params.rs index 45755187d39..3362853fe4f 100644 --- a/nexus/types/src/internal_api/params.rs +++ b/nexus/types/src/internal_api/params.rs @@ -18,6 +18,7 @@ use omicron_common::api::internal::nexus::Certificate; use omicron_common::api::internal::shared::AllowedSourceIps; use omicron_common::api::internal::shared::ExternalPortDiscovery; use omicron_common::api::internal::shared::RackNetworkConfig; +use omicron_common::api::internal::shared::SledCpuFamily; use omicron_common::api::internal::shared::SourceNatConfig; use omicron_uuid_kinds::DatasetUuid; use omicron_uuid_kinds::PhysicalDiskUuid; @@ -30,28 +31,6 @@ use std::net::SocketAddr; use std::net::SocketAddrV6; use uuid::Uuid; -/// Identifies the kind of CPU present on a sled, determined by reading CPUID. -#[derive(Serialize, Deserialize, Debug, JsonSchema)] -#[serde(rename_all = "snake_case")] -pub enum SledCpuFamily { - /// The CPU vendor or its family number don't correspond to any of the - /// known family variants. - Unknown, - - /// AMD Milan processors (or very close). Could be an actual Milan in a - /// Gimlet, a close-to-Milan client Zen 3 part, or Zen 4 (for which Milan is - /// the greatest common denominator). - AmdMilan, - - /// AMD Turin processors (or very close). Could be an actual Turin in a - /// Cosmo, or a close-to-Turin client Zen 5 part. - AmdTurin, - - /// AMD Turin Dense processors. There are no "Turin Dense-like" CPUs unlike - /// other cases, so this means a bona fide Zen 5c Turin Dense part. - AmdTurinDense, -} - /// Sent by a sled agent to Nexus to inform about resources #[derive(Serialize, Deserialize, Debug, JsonSchema)] pub struct SledAgentInfo { diff --git a/nexus/types/src/inventory.rs b/nexus/types/src/inventory.rs index 7acfe51cd6a..285a529b394 100644 --- a/nexus/types/src/inventory.rs +++ b/nexus/types/src/inventory.rs @@ -36,6 +36,7 @@ use omicron_common::api::external::ByteCount; pub use omicron_common::api::internal::shared::NetworkInterface; pub use omicron_common::api::internal::shared::NetworkInterfaceKind; pub use omicron_common::api::internal::shared::SourceNatConfig; +pub use omicron_common::api::internal::shared::SledCpuFamily; use omicron_common::disk::M2Slot; pub use omicron_common::zpool_name::ZpoolName; use omicron_uuid_kinds::CollectionUuid; @@ -638,6 +639,7 @@ pub struct SledAgent { pub sled_role: SledRole, pub usable_hardware_threads: u32, pub usable_physical_ram: ByteCount, + pub cpu_family: SledCpuFamily, pub reservoir_size: ByteCount, pub disks: Vec, pub zpools: Vec, diff --git a/nexus/types/src/inventory/display.rs b/nexus/types/src/inventory/display.rs index 2917b762623..cfee239a417 100644 --- a/nexus/types/src/inventory/display.rs +++ b/nexus/types/src/inventory/display.rs @@ -553,6 +553,7 @@ fn display_sleds( sled_role, usable_hardware_threads, usable_physical_ram, + cpu_family, reservoir_size, disks, zpools, @@ -585,6 +586,7 @@ fn display_sleds( )?; writeln!(indented, "address: {}", sled_agent_address)?; writeln!(indented, "usable hw threads: {}", usable_hardware_threads)?; + writeln!(indented, "CPU family: {}", cpu_family)?; writeln!( indented, "usable memory (GiB): {}", diff --git a/sled-agent/src/sim/config.rs b/sled-agent/src/sim/config.rs index d77d08fc50b..4f0851d0dd3 100644 --- a/sled-agent/src/sim/config.rs +++ b/sled-agent/src/sim/config.rs @@ -7,7 +7,7 @@ use crate::updates::ConfigUpdates; use camino::Utf8Path; use dropshot::ConfigDropshot; -use nexus_client::types::SledCpuFamily; +use omicron_common::api::internal::shared::SledCpuFamily; use omicron_uuid_kinds::SledUuid; use serde::Deserialize; use serde::Serialize; diff --git a/sled-agent/src/sim/sled_agent.rs b/sled-agent/src/sim/sled_agent.rs index 43010c18014..c1609c89358 100644 --- a/sled-agent/src/sim/sled_agent.rs +++ b/sled-agent/src/sim/sled_agent.rs @@ -754,6 +754,7 @@ impl SledAgent { self.config.hardware.physical_ram, ) .context("usable_physical_ram")?, + cpu_family: self.config.hardware.cpu_family, reservoir_size: ByteCount::try_from( self.config.hardware.reservoir_ram, ) diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 883140ed8d8..c76d102535f 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -1097,6 +1097,8 @@ impl SledAgent { self.inner.hardware.online_processor_count(); let usable_physical_ram = self.inner.hardware.usable_physical_ram_bytes(); + let cpu_family = + self.inner.hardware.cpu_family(); let reservoir_size = self.inner.instances.reservoir_size(); let sled_role = if is_scrimlet { SledRole::Scrimlet } else { SledRole::Gimlet }; @@ -1119,6 +1121,7 @@ impl SledAgent { baseboard, usable_hardware_threads, usable_physical_ram: ByteCount::try_from(usable_physical_ram)?, + cpu_family, reservoir_size, disks, zpools, From e9cbbdd7b3462c0410d801a69166651102368a6e Mon Sep 17 00:00:00 2001 From: iximeow Date: Sat, 2 Aug 2025 01:28:12 +0000 Subject: [PATCH 08/17] it compiles (might work now?) --- common/src/api/internal/shared.rs | 13 +++++++++- nexus-sled-agent-shared/src/inventory.rs | 2 +- nexus/db-model/src/sled_cpu_family.rs | 12 ++++++--- .../src/db/datastore/physical_disk.rs | 2 ++ nexus/inventory/src/collector.rs | 2 +- nexus/test-utils/src/lib.rs | 2 +- nexus/tests/integration_tests/rack.rs | 2 +- nexus/types/src/inventory.rs | 2 +- sled-agent/src/bin/sled-agent-sim.rs | 2 +- sled-agent/src/nexus.rs | 11 ++++---- sled-agent/src/rack_setup/plan/service.rs | 2 ++ sled-agent/src/rack_setup/service.rs | 2 ++ sled-agent/src/sim/server.rs | 4 +-- sled-agent/src/sled_agent.rs | 3 +-- sled-hardware/src/illumos/mod.rs | 5 ++-- sled-hardware/src/lib.rs | 26 ++++++++++--------- sled-hardware/src/non_illumos/mod.rs | 5 ++-- sled-hardware/types/src/lib.rs | 18 ------------- 18 files changed, 62 insertions(+), 53 deletions(-) diff --git a/common/src/api/internal/shared.rs b/common/src/api/internal/shared.rs index f912cba3f30..f23925318c3 100644 --- a/common/src/api/internal/shared.rs +++ b/common/src/api/internal/shared.rs @@ -1102,7 +1102,18 @@ pub struct SledIdentifiers { } /// Identifies the kind of CPU present on a sled, determined by reading CPUID. -#[derive(Serialize, Deserialize, Copy, Clone, Debug, PartialEq, Eq, JsonSchema)] +/// +/// This is intended to broadly support the control plane answering the question +/// "can I run this instance on that sled?" given an instance with either no or +/// some CPU platform requirement. It is not enough information for more precise +/// placement questions - for example, is a CPU a high-frequency part or +/// many-core part? We don't include Genoa here, but in that CPU family there +/// are high frequency parts, many-core parts, and large-cache parts. To support +/// those questions (or satisfactorily answer #8730) we would need to collect +/// additional information and send it along. +#[derive( + Serialize, Deserialize, Copy, Clone, Debug, PartialEq, Eq, JsonSchema, +)] #[serde(rename_all = "snake_case")] pub enum SledCpuFamily { /// The CPU vendor or its family number don't correspond to any of the diff --git a/nexus-sled-agent-shared/src/inventory.rs b/nexus-sled-agent-shared/src/inventory.rs index 867512737e0..ac7856695fd 100644 --- a/nexus-sled-agent-shared/src/inventory.rs +++ b/nexus-sled-agent-shared/src/inventory.rs @@ -26,7 +26,7 @@ use omicron_common::update::OmicronZoneManifestSource; use omicron_common::{ api::{ external::{ByteCount, Generation}, - internal::shared::{NetworkInterface, SourceNatConfig, SledCpuFamily}, + internal::shared::{NetworkInterface, SledCpuFamily, SourceNatConfig}, }, disk::{DatasetConfig, DiskVariant, OmicronPhysicalDiskConfig}, update::ArtifactId, diff --git a/nexus/db-model/src/sled_cpu_family.rs b/nexus/db-model/src/sled_cpu_family.rs index 13838b89fce..700be75946d 100644 --- a/nexus/db-model/src/sled_cpu_family.rs +++ b/nexus/db-model/src/sled_cpu_family.rs @@ -26,8 +26,12 @@ impl_enum_type!( AmdTurinDense => b"amd_turin_dense" ); -impl From for SledCpuFamily { - fn from(value: omicron_common::api::internal::shared::SledCpuFamily) -> Self { +impl From + for SledCpuFamily +{ + fn from( + value: omicron_common::api::internal::shared::SledCpuFamily, + ) -> Self { use omicron_common::api::internal::shared::SledCpuFamily as InputFamily; match value { InputFamily::Unknown => Self::Unknown, @@ -38,7 +42,9 @@ impl From for SledCpuFamil } } -impl From for omicron_common::api::internal::shared::SledCpuFamily { +impl From + for omicron_common::api::internal::shared::SledCpuFamily +{ fn from(value: SledCpuFamily) -> Self { match value { SledCpuFamily::Unknown => Self::Unknown, diff --git a/nexus/db-queries/src/db/datastore/physical_disk.rs b/nexus/db-queries/src/db/datastore/physical_disk.rs index 9409c6c9e1d..2cea054b29d 100644 --- a/nexus/db-queries/src/db/datastore/physical_disk.rs +++ b/nexus/db-queries/src/db/datastore/physical_disk.rs @@ -344,6 +344,7 @@ mod test { }; use nexus_types::identity::Asset; use omicron_common::api::external::ByteCount; + use omicron_common::api::internal::shared::SledCpuFamily; use omicron_common::disk::{DiskIdentity, DiskVariant}; use omicron_test_utils::dev; use std::num::NonZeroU32; @@ -693,6 +694,7 @@ mod test { sled_id: SledUuid::from_untyped_uuid(sled.id()), usable_hardware_threads: 10, usable_physical_ram: ByteCount::from(1024 * 1024), + cpu_family: SledCpuFamily::AmdMilan, disks, zpools: vec![], datasets: vec![], diff --git a/nexus/inventory/src/collector.rs b/nexus/inventory/src/collector.rs index fddde6f5c07..ce1988b36de 100644 --- a/nexus/inventory/src/collector.rs +++ b/nexus/inventory/src/collector.rs @@ -666,7 +666,6 @@ mod test { use crate::StaticSledAgentEnumerator; use gateway_messages::SpPort; use id_map::IdMap; - use nexus_client::types::SledCpuFamily; use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryStatus; use nexus_sled_agent_shared::inventory::HostPhase2DesiredSlots; use nexus_sled_agent_shared::inventory::OmicronSledConfig; @@ -676,6 +675,7 @@ mod test { use nexus_types::inventory::Collection; use omicron_cockroach_metrics::CockroachClusterAdminClient; use omicron_common::api::external::Generation; + use omicron_common::api::internal::shared::SledCpuFamily; use omicron_common::zpool_name::ZpoolName; use omicron_sled_agent::sim; use omicron_uuid_kinds::OmicronZoneUuid; diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index ed42d80ef7c..845d09b3a2f 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -27,7 +27,6 @@ use id_map::IdMap; use internal_dns_types::config::DnsConfigBuilder; use internal_dns_types::names::DNS_ZONE_EXTERNAL_TESTING; use internal_dns_types::names::ServiceName; -use nexus_client::types::SledCpuFamily; use nexus_config::Database; use nexus_config::DpdConfig; use nexus_config::InternalDns; @@ -75,6 +74,7 @@ use omicron_common::api::internal::nexus::ProducerKind; use omicron_common::api::internal::shared::DatasetKind; use omicron_common::api::internal::shared::NetworkInterface; use omicron_common::api::internal::shared::NetworkInterfaceKind; +use omicron_common::api::internal::shared::SledCpuFamily; use omicron_common::api::internal::shared::SourceNatConfig; use omicron_common::api::internal::shared::SwitchLocation; use omicron_common::disk::CompressionAlgorithm; diff --git a/nexus/tests/integration_tests/rack.rs b/nexus/tests/integration_tests/rack.rs index b5d63858908..32610a3c043 100644 --- a/nexus/tests/integration_tests/rack.rs +++ b/nexus/tests/integration_tests/rack.rs @@ -21,9 +21,9 @@ use nexus_types::external_api::params; use nexus_types::external_api::shared::UninitializedSled; use nexus_types::external_api::views::Rack; use nexus_types::internal_api::params::SledAgentInfo; -use nexus_types::internal_api::params::SledCpuFamily; use omicron_common::api::external::ByteCount; use omicron_common::api::external::Generation; +use omicron_common::api::internal::shared::SledCpuFamily; use omicron_uuid_kinds::GenericUuid; use std::time::Duration; use uuid::Uuid; diff --git a/nexus/types/src/inventory.rs b/nexus/types/src/inventory.rs index 285a529b394..70f2451d29b 100644 --- a/nexus/types/src/inventory.rs +++ b/nexus/types/src/inventory.rs @@ -35,8 +35,8 @@ use nexus_sled_agent_shared::inventory::ZoneImageResolverInventory; use omicron_common::api::external::ByteCount; pub use omicron_common::api::internal::shared::NetworkInterface; pub use omicron_common::api::internal::shared::NetworkInterfaceKind; +use omicron_common::api::internal::shared::SledCpuFamily; pub use omicron_common::api::internal::shared::SourceNatConfig; -pub use omicron_common::api::internal::shared::SledCpuFamily; use omicron_common::disk::M2Slot; pub use omicron_common::zpool_name::ZpoolName; use omicron_uuid_kinds::CollectionUuid; diff --git a/sled-agent/src/bin/sled-agent-sim.rs b/sled-agent/src/bin/sled-agent-sim.rs index 8378dc02a49..ccb5e0eaf8d 100644 --- a/sled-agent/src/bin/sled-agent-sim.rs +++ b/sled-agent/src/bin/sled-agent-sim.rs @@ -12,8 +12,8 @@ use clap::Parser; use dropshot::ConfigDropshot; use dropshot::ConfigLogging; use dropshot::ConfigLoggingLevel; -use nexus_client::types::SledCpuFamily; use omicron_common::api::internal::nexus::Certificate; +use omicron_common::api::internal::shared::SledCpuFamily; use omicron_common::cmd::CmdError; use omicron_common::cmd::fatal; use omicron_sled_agent::sim::RssArgs; diff --git a/sled-agent/src/nexus.rs b/sled-agent/src/nexus.rs index 9c335dc88e4..5ebb086da13 100644 --- a/sled-agent/src/nexus.rs +++ b/sled-agent/src/nexus.rs @@ -78,20 +78,21 @@ impl ConvertInto } impl ConvertInto - for sled_hardware_types::CpuFamily + for omicron_common::api::internal::shared::SledCpuFamily { fn convert(self) -> nexus_client::types::SledCpuFamily { + use omicron_common::api::internal::shared::SledCpuFamily as SharedSledCpuFamily; match self { - sled_hardware_types::CpuFamily::Unknown => { + SharedSledCpuFamily::Unknown => { nexus_client::types::SledCpuFamily::Unknown } - sled_hardware_types::CpuFamily::AmdMilan => { + SharedSledCpuFamily::AmdMilan => { nexus_client::types::SledCpuFamily::AmdMilan } - sled_hardware_types::CpuFamily::AmdTurin => { + SharedSledCpuFamily::AmdTurin => { nexus_client::types::SledCpuFamily::AmdTurin } - sled_hardware_types::CpuFamily::AmdTurinDense => { + SharedSledCpuFamily::AmdTurinDense => { nexus_client::types::SledCpuFamily::AmdTurinDense } } diff --git a/sled-agent/src/rack_setup/plan/service.rs b/sled-agent/src/rack_setup/plan/service.rs index 3732bca059a..bc6476f122a 100644 --- a/sled-agent/src/rack_setup/plan/service.rs +++ b/sled-agent/src/rack_setup/plan/service.rs @@ -1160,6 +1160,7 @@ mod tests { use omicron_common::api::external::ByteCount; use omicron_common::api::internal::shared::AllowedSourceIps; use omicron_common::api::internal::shared::RackNetworkConfig; + use omicron_common::api::internal::shared::SledCpuFamily; use oxnet::Ipv6Net; use sled_agent_types::rack_init::BootstrapAddressDiscovery; use sled_agent_types::rack_init::RecoverySiloConfig; @@ -1372,6 +1373,7 @@ mod tests { baseboard: Baseboard::Unknown, usable_hardware_threads: 32, usable_physical_ram: ByteCount::try_from(1_u64 << 40).unwrap(), + cpu_family: SledCpuFamily::AmdMilan, reservoir_size: ByteCount::try_from(1_u64 << 40).unwrap(), disks, zpools: vec![], diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 508733d4f2e..38684da7a57 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -1754,6 +1754,7 @@ mod test { use omicron_common::{ address::{Ipv6Subnet, SLED_PREFIX, get_sled_address}, api::external::{ByteCount, Generation}, + api::internal::shared::SledCpuFamily, disk::{DiskIdentity, DiskVariant}, }; use omicron_uuid_kinds::SledUuid; @@ -1775,6 +1776,7 @@ mod test { baseboard: Baseboard::Unknown, usable_hardware_threads: 32, usable_physical_ram: ByteCount::from_gibibytes_u32(16), + cpu_family: SledCpuFamily::AmdMilan, reservoir_size: ByteCount::from_gibibytes_u32(0), disks: (0..u2_count) .map(|i| InventoryDisk { diff --git a/sled-agent/src/sim/server.rs b/sled-agent/src/sim/server.rs index 690efdadfe3..f252e327834 100644 --- a/sled-agent/src/sim/server.rs +++ b/sled-agent/src/sim/server.rs @@ -8,7 +8,7 @@ use super::config::Config; use super::http_entrypoints::api as http_api; use super::sled_agent::SledAgent; use super::storage::PantryServer; -use crate::nexus::NexusClient; +use crate::nexus::{ConvertInto, NexusClient}; use crate::rack_setup::SledConfig; use crate::rack_setup::service::build_initial_blueprint_from_sled_configs; use crate::rack_setup::{ @@ -166,7 +166,7 @@ impl Server { config.hardware.reservoir_ram, ) .unwrap(), - cpu_family: config.hardware.cpu_family, + cpu_family: config.hardware.cpu_family.convert(), generation: Generation::new(), decommissioned: false, }, diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index c76d102535f..7124938291c 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -1097,8 +1097,7 @@ impl SledAgent { self.inner.hardware.online_processor_count(); let usable_physical_ram = self.inner.hardware.usable_physical_ram_bytes(); - let cpu_family = - self.inner.hardware.cpu_family(); + let cpu_family = self.inner.hardware.cpu_family(); let reservoir_size = self.inner.instances.reservoir_size(); let sled_role = if is_scrimlet { SledRole::Scrimlet } else { SledRole::Gimlet }; diff --git a/sled-hardware/src/illumos/mod.rs b/sled-hardware/src/illumos/mod.rs index d51ede8a037..9e319d3cd8b 100644 --- a/sled-hardware/src/illumos/mod.rs +++ b/sled-hardware/src/illumos/mod.rs @@ -8,8 +8,9 @@ use camino::Utf8PathBuf; use gethostname::gethostname; use illumos_devinfo::{DevInfo, DevLinkType, DevLinks, Node, Property}; use libnvme::{Nvme, controller::Controller}; +use omicron_common::api::internal::shared::SledCpuFamily; use omicron_common::disk::{DiskIdentity, DiskVariant}; -use sled_hardware_types::{Baseboard, CpuFamily}; +use sled_hardware_types::Baseboard; use slog::Logger; use slog::debug; use slog::error; @@ -797,7 +798,7 @@ impl HardwareManager { .unwrap_or_else(|| Baseboard::unknown()) } - pub fn cpu_family(&self) -> CpuFamily { + pub fn cpu_family(&self) -> SledCpuFamily { let log = self.log.new(slog::o!("component" => "detect_cpu_family")); crate::detect_cpu_family(&log) } diff --git a/sled-hardware/src/lib.rs b/sled-hardware/src/lib.rs index bd5d00ac47b..97089091539 100644 --- a/sled-hardware/src/lib.rs +++ b/sled-hardware/src/lib.rs @@ -162,9 +162,11 @@ impl MemoryReservations { /// sled-agent does not yet know about libtopo, getting topo snapshots, walking /// them, or any of that, so the parsing is performed again here. #[cfg(target_arch = "x86_64")] -pub fn detect_cpu_family(log: &Logger) -> sled_hardware_types::CpuFamily { +pub fn detect_cpu_family( + log: &Logger, +) -> omicron_common::api::internal::shared::SledCpuFamily { use core::arch::x86_64::__cpuid_count; - use sled_hardware_types::CpuFamily; + use omicron_common::api::internal::shared::SledCpuFamily; // Read leaf 0 to figure out the processor's vendor and whether leaf 1 // (which contains family, model, and stepping information) is available. @@ -175,14 +177,14 @@ pub fn detect_cpu_family(log: &Logger) -> sled_hardware_types::CpuFamily { // If leaf 1 is unavailable, there's no way to figure out what family this // processor belongs to. if leaf_0.eax < 1 { - return CpuFamily::Unknown; + return SledCpuFamily::Unknown; } // Check the vendor ID string in ebx/ecx/edx. match (leaf_0.ebx, leaf_0.ecx, leaf_0.edx) { // "AuthenticAMD"; see AMD APM volume 3 (March 2024) section E.3.1. (0x68747541, 0x444D4163, 0x69746E65) => {} - _ => return CpuFamily::Unknown, + _ => return SledCpuFamily::Unknown, } // Feature detection after this point is AMD-specific - if we find ourselves @@ -249,7 +251,7 @@ pub fn detect_cpu_family(log: &Logger) -> sled_hardware_types::CpuFamily { // This covers both Milan and Zen 3-based Threadrippers. I don't // have a 5000-series Threadripper on hand to test but I believe // they are feature-compatible. - CpuFamily::AmdMilan + SledCpuFamily::AmdMilan } 0x19 if model >= 0x10 && model <= 0x1F => { // This covers both Genoa and Zen 4-based Threadrippers. Again, @@ -259,33 +261,33 @@ pub fn detect_cpu_family(log: &Logger) -> sled_hardware_types::CpuFamily { // choose, skipping the Zen 4 EPYC parts. So, round this down to // Milan; if we're here it's a lab system and the alternative is // "unknown". - CpuFamily::AmdMilan + SledCpuFamily::AmdMilan } 0x19 if model >= 0x20 && model <= 0x2F => { // These are client Zen 3 parts aka Vermeer. Feature-wise, they are // missing INVLPGB from Milan, but are otherwise close, and we don't // expose INVLPGB to guests currently anyway. - CpuFamily::AmdMilan + SledCpuFamily::AmdMilan } 0x19 if model >= 0x60 && model <= 0x6F => { // These are client Zen 4 parts aka Raphael. Similar to the above // with Genoa and Vermeer, round these down to Milan in support of // lab clusters instead of calling them unknown. - CpuFamily::AmdMilan + SledCpuFamily::AmdMilan } - 0x1A if model <= 0x0F => CpuFamily::AmdTurin, + 0x1A if model <= 0x0F => SledCpuFamily::AmdTurin, 0x1A if model >= 0x10 && model <= 0x1F => { // These are Turin Dense. From a CPU feature perspective they're // equivalently capable to Turin, but they are physically distinct // and sled operators should be able to see that. - CpuFamily::AmdTurinDense + SledCpuFamily::AmdTurinDense } 0x1A if model >= 0x40 && model <= 0x4F => { // These are client Zen 5 parts aka Granite Ridge. Won't be in a // rack, but plausibly in a lab cluster. Like other non-server // parts, these don't have INVLPGB, which we don't expose to guests. // They should otherwise be a sufficient stand-in for Turin. - CpuFamily::AmdTurin + SledCpuFamily::AmdTurin } // Remaining family/model ranges in known families are likely mobile // parts and intentionally rolled up into "Unknown." There, it's harder @@ -294,6 +296,6 @@ pub fn detect_cpu_family(log: &Logger) -> sled_hardware_types::CpuFamily { // or APU as part of a development cluster! // // Other families are, of course, unknown. - _ => CpuFamily::Unknown, + _ => SledCpuFamily::Unknown, } } diff --git a/sled-hardware/src/non_illumos/mod.rs b/sled-hardware/src/non_illumos/mod.rs index 448dc59287c..c3dd03c61b5 100644 --- a/sled-hardware/src/non_illumos/mod.rs +++ b/sled-hardware/src/non_illumos/mod.rs @@ -4,9 +4,10 @@ use crate::SledMode; use crate::disk::{DiskPaths, Partition, PooledDiskError, UnparsedDisk}; +use omicron_common::api::internal::SledCpuFamily; use omicron_common::disk::{DiskIdentity, DiskVariant}; use omicron_uuid_kinds::ZpoolUuid; -use sled_hardware_types::{Baseboard, CpuFamily}; +use sled_hardware_types::Baseboard; use slog::Logger; use std::collections::HashMap; use tokio::sync::broadcast; @@ -41,7 +42,7 @@ impl HardwareManager { unimplemented!("Accessing hardware unsupported on non-illumos"); } - pub fn cpu_family(&self) -> CpuFamily { + pub fn cpu_family(&self) -> SledCpuFamily { unimplemented!("Accessing hardware unsupported on non-illumos"); } diff --git a/sled-hardware/types/src/lib.rs b/sled-hardware/types/src/lib.rs index 5d6ea5c8d3b..b34b5b1f422 100644 --- a/sled-hardware/types/src/lib.rs +++ b/sled-hardware/types/src/lib.rs @@ -95,21 +95,3 @@ impl std::fmt::Display for Baseboard { } } } - -/// A general description of the CPU family for processor(s) in this sled. -/// -/// This is intended to broadly support the control plane answering the question -/// "can I run this instance on that sled?" given an instance with either no or -/// some CPU platform requirement. It is not enough information for more precise -/// placement questions - for example, is a CPU a high-frequency part or -/// many-core part? We don't include Genoa here, but in that CPU family there -/// are high frequency parts, many-core parts, and large-cache parts. To support -/// those questions (or satisfactorily answer #8730) we would need to collect -/// additional information and send it along. -#[derive(Clone, Copy, Debug)] -pub enum CpuFamily { - Unknown, - AmdMilan, - AmdTurin, - AmdTurinDense, -} From bf7ccae08a50b449b2bc7ff4f7960d8b767df9cc Mon Sep 17 00:00:00 2001 From: iximeow Date: Sat, 2 Aug 2025 02:02:50 +0000 Subject: [PATCH 09/17] migrations need to be... right ... --- schema/crdb/dbinit.sql | 4 ++++ schema/crdb/sled-cpu-family/up04.sql | 2 ++ schema/crdb/sled-cpu-family/up05.sql | 1 + 3 files changed, 7 insertions(+) create mode 100644 schema/crdb/sled-cpu-family/up04.sql create mode 100644 schema/crdb/sled-cpu-family/up05.sql diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 0a5587c5939..6eccc05b831 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -3705,6 +3705,10 @@ CREATE TABLE IF NOT EXISTS omicron.public.inv_sled_agent ( -- present. mupdate_override_boot_disk_error TEXT, + -- The sled's CPU family. This is also duplicated with the `sled` table, + -- similar to `usable_hardware_threads` and friends above. + cpu_family omicron.public.sled_cpu_family NOT NULL, + CONSTRAINT reconciler_status_sled_config_present_if_running CHECK ( (reconciler_status_kind = 'running' AND reconciler_status_sled_config IS NOT NULL) diff --git a/schema/crdb/sled-cpu-family/up04.sql b/schema/crdb/sled-cpu-family/up04.sql new file mode 100644 index 00000000000..b2fd0b97156 --- /dev/null +++ b/schema/crdb/sled-cpu-family/up04.sql @@ -0,0 +1,2 @@ +ALTER TABLE omicron.public.inv_sled_agent ADD COLUMN IF NOT EXISTS + cpu_family omicron.public.sled_cpu_family NOT NULL DEFAULT 'unknown'; diff --git a/schema/crdb/sled-cpu-family/up05.sql b/schema/crdb/sled-cpu-family/up05.sql new file mode 100644 index 00000000000..61db961a1b5 --- /dev/null +++ b/schema/crdb/sled-cpu-family/up05.sql @@ -0,0 +1 @@ +ALTER TABLE omicron.public.inv_sled_agent ALTER COLUMN cpu_family DROP DEFAULT; From 0a79d5ed71ce6f07f40df68b4f88aac2bdba241c Mon Sep 17 00:00:00 2001 From: iximeow Date: Sat, 2 Aug 2025 02:10:29 +0000 Subject: [PATCH 10/17] and that's the missing update of cpu_family. --- nexus/db-queries/src/db/datastore/sled.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/nexus/db-queries/src/db/datastore/sled.rs b/nexus/db-queries/src/db/datastore/sled.rs index 435a2e9d3b6..39ce5e08c36 100644 --- a/nexus/db-queries/src/db/datastore/sled.rs +++ b/nexus/db-queries/src/db/datastore/sled.rs @@ -309,6 +309,7 @@ impl DataStore { .eq(sled_update.usable_hardware_threads), dsl::usable_physical_ram.eq(sled_update.usable_physical_ram), dsl::reservoir_size.eq(sled_update.reservoir_size), + dsl::cpu_family.eq(sled_update.cpu_family), dsl::sled_agent_gen.eq(sled_update.sled_agent_gen), )) .filter(dsl::sled_agent_gen.lt(sled_update.sled_agent_gen)) From ea59a267a37069a688fbeb719a69d4d08d2ba0a1 Mon Sep 17 00:00:00 2001 From: iximeow Date: Sat, 2 Aug 2025 02:43:07 +0000 Subject: [PATCH 11/17] non-illumos has to build too ofc --- sled-hardware/src/non_illumos/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sled-hardware/src/non_illumos/mod.rs b/sled-hardware/src/non_illumos/mod.rs index c3dd03c61b5..caae6fcf6a1 100644 --- a/sled-hardware/src/non_illumos/mod.rs +++ b/sled-hardware/src/non_illumos/mod.rs @@ -4,7 +4,7 @@ use crate::SledMode; use crate::disk::{DiskPaths, Partition, PooledDiskError, UnparsedDisk}; -use omicron_common::api::internal::SledCpuFamily; +use omicron_common::api::internal::shared::SledCpuFamily; use omicron_common::disk::{DiskIdentity, DiskVariant}; use omicron_uuid_kinds::ZpoolUuid; use sled_hardware_types::Baseboard; From 10cd3356e58774fe91ab48fbc9886338f3ec10a8 Mon Sep 17 00:00:00 2001 From: iximeow Date: Sun, 3 Aug 2025 23:34:17 +0000 Subject: [PATCH 12/17] fix expectorated output and, oh, docs are in the openapi spec --- .../tests/output/cmds-example-stdout | 3 ++ .../output/cmds-mupdate-update-flow-stdout | 3 ++ openapi/nexus-internal.json | 2 +- openapi/sled-agent.json | 37 +++++++++++++++++++ 4 files changed, 44 insertions(+), 1 deletion(-) diff --git a/dev-tools/reconfigurator-cli/tests/output/cmds-example-stdout b/dev-tools/reconfigurator-cli/tests/output/cmds-example-stdout index e6f97ae6104..a9b94800388 100644 --- a/dev-tools/reconfigurator-cli/tests/output/cmds-example-stdout +++ b/dev-tools/reconfigurator-cli/tests/output/cmds-example-stdout @@ -1101,6 +1101,7 @@ sled 2eb69596-f081-4e2d-9425-9994926e0832 (role = Gimlet, serial serial1) found at: from fake sled agent address: [fd00:1122:3344:102::1]:12345 usable hw threads: 10 + CPU family: milan usable memory (GiB): 0 reservoir (GiB): 0 physical disks: @@ -1210,6 +1211,7 @@ sled 32d8d836-4d8a-4e54-8fa9-f31d79c42646 (role = Gimlet, serial serial2) found at: from fake sled agent address: [fd00:1122:3344:103::1]:12345 usable hw threads: 10 + CPU family: milan usable memory (GiB): 0 reservoir (GiB): 0 physical disks: @@ -1319,6 +1321,7 @@ sled 89d02b1b-478c-401a-8e28-7a26f74fa41b (role = Gimlet, serial serial0) found at: from fake sled agent address: [fd00:1122:3344:101::1]:12345 usable hw threads: 10 + CPU family: milan usable memory (GiB): 0 reservoir (GiB): 0 physical disks: diff --git a/dev-tools/reconfigurator-cli/tests/output/cmds-mupdate-update-flow-stdout b/dev-tools/reconfigurator-cli/tests/output/cmds-mupdate-update-flow-stdout index 77b502ab240..deeb6a9c044 100644 --- a/dev-tools/reconfigurator-cli/tests/output/cmds-mupdate-update-flow-stdout +++ b/dev-tools/reconfigurator-cli/tests/output/cmds-mupdate-update-flow-stdout @@ -88,6 +88,7 @@ sled 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c (role = Gimlet, serial serial1) found at: from fake sled agent address: [fd00:1122:3344:102::1]:12345 usable hw threads: 10 + CPU family: milan usable memory (GiB): 0 reservoir (GiB): 0 physical disks: @@ -194,6 +195,7 @@ sled 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6 (role = Gimlet, serial serial0) found at: from fake sled agent address: [fd00:1122:3344:101::1]:12345 usable hw threads: 10 + CPU family: milan usable memory (GiB): 0 reservoir (GiB): 0 physical disks: @@ -302,6 +304,7 @@ sled d81c6a84-79b8-4958-ae41-ea46c9b19763 (role = Gimlet, serial serial2) found at: from fake sled agent address: [fd00:1122:3344:103::1]:12345 usable hw threads: 10 + CPU family: milan usable memory (GiB): 0 reservoir (GiB): 0 physical disks: diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index ff039ee29a6..f3954ea20a2 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -7270,7 +7270,7 @@ ] }, "SledCpuFamily": { - "description": "Identifies the kind of CPU present on a sled, determined by reading CPUID.", + "description": "Identifies the kind of CPU present on a sled, determined by reading CPUID.\n\nThis is intended to broadly support the control plane answering the question \"can I run this instance on that sled?\" given an instance with either no or some CPU platform requirement. It is not enough information for more precise placement questions - for example, is a CPU a high-frequency part or many-core part? We don't include Genoa here, but in that CPU family there are high frequency parts, many-core parts, and large-cache parts. To support those questions (or satisfactorily answer #8730) we would need to collect additional information and send it along.", "oneOf": [ { "description": "The CPU vendor or its family number don't correspond to any of the known family variants.", diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index ac78c85e472..79fcdddbc6f 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -5059,6 +5059,9 @@ "baseboard": { "$ref": "#/components/schemas/Baseboard" }, + "cpu_family": { + "$ref": "#/components/schemas/SledCpuFamily" + }, "datasets": { "type": "array", "items": { @@ -5122,6 +5125,7 @@ }, "required": [ "baseboard", + "cpu_family", "datasets", "disks", "reconciler_status", @@ -6993,6 +6997,39 @@ "com4" ] }, + "SledCpuFamily": { + "description": "Identifies the kind of CPU present on a sled, determined by reading CPUID.\n\nThis is intended to broadly support the control plane answering the question \"can I run this instance on that sled?\" given an instance with either no or some CPU platform requirement. It is not enough information for more precise placement questions - for example, is a CPU a high-frequency part or many-core part? We don't include Genoa here, but in that CPU family there are high frequency parts, many-core parts, and large-cache parts. To support those questions (or satisfactorily answer #8730) we would need to collect additional information and send it along.", + "oneOf": [ + { + "description": "The CPU vendor or its family number don't correspond to any of the known family variants.", + "type": "string", + "enum": [ + "unknown" + ] + }, + { + "description": "AMD Milan processors (or very close). Could be an actual Milan in a Gimlet, a close-to-Milan client Zen 3 part, or Zen 4 (for which Milan is the greatest common denominator).", + "type": "string", + "enum": [ + "amd_milan" + ] + }, + { + "description": "AMD Turin processors (or very close). Could be an actual Turin in a Cosmo, or a close-to-Turin client Zen 5 part.", + "type": "string", + "enum": [ + "amd_turin" + ] + }, + { + "description": "AMD Turin Dense processors. There are no \"Turin Dense-like\" CPUs unlike other cases, so this means a bona fide Zen 5c Turin Dense part.", + "type": "string", + "enum": [ + "amd_turin_dense" + ] + } + ] + }, "SledDiagnosticsQueryOutput": { "oneOf": [ { From 99a37f08ae4163f003b74776870f8be454643b11 Mon Sep 17 00:00:00 2001 From: iximeow Date: Wed, 6 Aug 2025 20:26:48 +0000 Subject: [PATCH 13/17] cleanup --- common/src/api/internal/shared.rs | 18 ++++++++++++------ sled-hardware/src/lib.rs | 4 ++-- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/common/src/api/internal/shared.rs b/common/src/api/internal/shared.rs index f23925318c3..c87e1a81d56 100644 --- a/common/src/api/internal/shared.rs +++ b/common/src/api/internal/shared.rs @@ -1134,17 +1134,23 @@ pub enum SledCpuFamily { AmdTurinDense, } -impl fmt::Display for SledCpuFamily { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl SledCpuFamily { + fn as_str(&self) -> &'static str { match self { - SledCpuFamily::Unknown => write!(f, "unknown"), - SledCpuFamily::AmdMilan => write!(f, "milan"), - SledCpuFamily::AmdTurin => write!(f, "turin"), - SledCpuFamily::AmdTurinDense => write!(f, "turin_dense"), + SledCpuFamily::Unknown => "unknown", + SledCpuFamily::AmdMilan => "amd_milan", + SledCpuFamily::AmdTurin => "amd_turin", + SledCpuFamily::AmdTurinDense => "amd_turin_dense", } } } +impl fmt::Display for SledCpuFamily { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.as_str()) + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/sled-hardware/src/lib.rs b/sled-hardware/src/lib.rs index 97089091539..b475e2f28da 100644 --- a/sled-hardware/src/lib.rs +++ b/sled-hardware/src/lib.rs @@ -226,8 +226,8 @@ pub fn detect_cpu_family( "leaf1.ebx" => format_args!("{:#08x}", leaf_1.ebx), "leaf1.ecx" => format_args!("{:#08x}", leaf_1.ecx), "leaf1.edx" => format_args!("{:#08x}", leaf_1.edx), - "parsed family" => format_args!("{family:#x}"), - "parsed model" => format_args!("{model:#x}"), + "parsed_family" => format_args!("{family:#x}"), + "parsed_model" => format_args!("{model:#x}"), ); // Match on the family/model ranges we've detected. Notably client parts are From 6846a4ad9f9a47cbbb656e7a8e97bff750ec20a3 Mon Sep 17 00:00:00 2001 From: iximeow Date: Wed, 6 Aug 2025 22:43:09 +0000 Subject: [PATCH 14/17] move SledCpuFamily to a more fitting place --- common/src/api/internal/shared.rs | 50 ------------------- nexus-sled-agent-shared/src/inventory.rs | 6 +-- nexus/db-model/src/sled_cpu_family.rs | 14 ++---- .../src/db/datastore/physical_disk.rs | 3 +- nexus/inventory/src/collector.rs | 2 +- nexus/inventory/src/examples.rs | 2 +- nexus/reconfigurator/planning/src/system.rs | 2 +- nexus/test-utils/src/lib.rs | 2 +- nexus/tests/integration_tests/rack.rs | 2 +- nexus/types/src/internal_api/params.rs | 3 +- nexus/types/src/inventory.rs | 2 +- sled-agent/src/bin/sled-agent-sim.rs | 3 +- sled-agent/src/nexus.rs | 4 +- sled-agent/src/rack_setup/plan/service.rs | 2 +- sled-agent/src/rack_setup/service.rs | 3 +- sled-agent/src/sim/config.rs | 3 +- sled-hardware/src/illumos/mod.rs | 3 +- sled-hardware/src/lib.rs | 6 +-- sled-hardware/src/non_illumos/mod.rs | 3 +- sled-hardware/types/src/lib.rs | 50 +++++++++++++++++++ 20 files changed, 75 insertions(+), 90 deletions(-) diff --git a/common/src/api/internal/shared.rs b/common/src/api/internal/shared.rs index c87e1a81d56..ebc9f6a46ca 100644 --- a/common/src/api/internal/shared.rs +++ b/common/src/api/internal/shared.rs @@ -1101,56 +1101,6 @@ pub struct SledIdentifiers { pub serial: String, } -/// Identifies the kind of CPU present on a sled, determined by reading CPUID. -/// -/// This is intended to broadly support the control plane answering the question -/// "can I run this instance on that sled?" given an instance with either no or -/// some CPU platform requirement. It is not enough information for more precise -/// placement questions - for example, is a CPU a high-frequency part or -/// many-core part? We don't include Genoa here, but in that CPU family there -/// are high frequency parts, many-core parts, and large-cache parts. To support -/// those questions (or satisfactorily answer #8730) we would need to collect -/// additional information and send it along. -#[derive( - Serialize, Deserialize, Copy, Clone, Debug, PartialEq, Eq, JsonSchema, -)] -#[serde(rename_all = "snake_case")] -pub enum SledCpuFamily { - /// The CPU vendor or its family number don't correspond to any of the - /// known family variants. - Unknown, - - /// AMD Milan processors (or very close). Could be an actual Milan in a - /// Gimlet, a close-to-Milan client Zen 3 part, or Zen 4 (for which Milan is - /// the greatest common denominator). - AmdMilan, - - /// AMD Turin processors (or very close). Could be an actual Turin in a - /// Cosmo, or a close-to-Turin client Zen 5 part. - AmdTurin, - - /// AMD Turin Dense processors. There are no "Turin Dense-like" CPUs unlike - /// other cases, so this means a bona fide Zen 5c Turin Dense part. - AmdTurinDense, -} - -impl SledCpuFamily { - fn as_str(&self) -> &'static str { - match self { - SledCpuFamily::Unknown => "unknown", - SledCpuFamily::AmdMilan => "amd_milan", - SledCpuFamily::AmdTurin => "amd_turin", - SledCpuFamily::AmdTurinDense => "amd_turin_dense", - } - } -} - -impl fmt::Display for SledCpuFamily { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}", self.as_str()) - } -} - #[cfg(test)] mod tests { use super::*; diff --git a/nexus-sled-agent-shared/src/inventory.rs b/nexus-sled-agent-shared/src/inventory.rs index ac7856695fd..d26d8ad29a1 100644 --- a/nexus-sled-agent-shared/src/inventory.rs +++ b/nexus-sled-agent-shared/src/inventory.rs @@ -26,7 +26,7 @@ use omicron_common::update::OmicronZoneManifestSource; use omicron_common::{ api::{ external::{ByteCount, Generation}, - internal::shared::{NetworkInterface, SledCpuFamily, SourceNatConfig}, + internal::shared::{NetworkInterface, SourceNatConfig}, }, disk::{DatasetConfig, DiskVariant, OmicronPhysicalDiskConfig}, update::ArtifactId, @@ -40,9 +40,9 @@ use omicron_uuid_kinds::{SledUuid, ZpoolUuid}; use schemars::schema::{Schema, SchemaObject}; use schemars::{JsonSchema, SchemaGenerator}; use serde::{Deserialize, Serialize}; -// Export this type for convenience -- this way, dependents don't have to +// Export these types for convenience -- this way, dependents don't have to // depend on sled-hardware-types. -pub use sled_hardware_types::Baseboard; +pub use sled_hardware_types::{Baseboard, SledCpuFamily}; use strum::EnumIter; use tufaceous_artifact::{ArtifactHash, KnownArtifactKind}; diff --git a/nexus/db-model/src/sled_cpu_family.rs b/nexus/db-model/src/sled_cpu_family.rs index 700be75946d..703728eca1d 100644 --- a/nexus/db-model/src/sled_cpu_family.rs +++ b/nexus/db-model/src/sled_cpu_family.rs @@ -26,13 +26,9 @@ impl_enum_type!( AmdTurinDense => b"amd_turin_dense" ); -impl From - for SledCpuFamily -{ - fn from( - value: omicron_common::api::internal::shared::SledCpuFamily, - ) -> Self { - use omicron_common::api::internal::shared::SledCpuFamily as InputFamily; +impl From for SledCpuFamily { + fn from(value: nexus_sled_agent_shared::inventory::SledCpuFamily) -> Self { + use nexus_sled_agent_shared::inventory::SledCpuFamily as InputFamily; match value { InputFamily::Unknown => Self::Unknown, InputFamily::AmdMilan => Self::AmdMilan, @@ -42,9 +38,7 @@ impl From } } -impl From - for omicron_common::api::internal::shared::SledCpuFamily -{ +impl From for nexus_sled_agent_shared::inventory::SledCpuFamily { fn from(value: SledCpuFamily) -> Self { match value { SledCpuFamily::Unknown => Self::Unknown, diff --git a/nexus/db-queries/src/db/datastore/physical_disk.rs b/nexus/db-queries/src/db/datastore/physical_disk.rs index 2cea054b29d..0012ee54f64 100644 --- a/nexus/db-queries/src/db/datastore/physical_disk.rs +++ b/nexus/db-queries/src/db/datastore/physical_disk.rs @@ -340,11 +340,10 @@ mod test { use nexus_db_lookup::LookupPath; use nexus_sled_agent_shared::inventory::{ Baseboard, ConfigReconcilerInventoryStatus, Inventory, InventoryDisk, - SledRole, ZoneImageResolverInventory, + SledCpuFamily, SledRole, ZoneImageResolverInventory, }; use nexus_types::identity::Asset; use omicron_common::api::external::ByteCount; - use omicron_common::api::internal::shared::SledCpuFamily; use omicron_common::disk::{DiskIdentity, DiskVariant}; use omicron_test_utils::dev; use std::num::NonZeroU32; diff --git a/nexus/inventory/src/collector.rs b/nexus/inventory/src/collector.rs index ce1988b36de..ec02670684a 100644 --- a/nexus/inventory/src/collector.rs +++ b/nexus/inventory/src/collector.rs @@ -671,11 +671,11 @@ mod test { use nexus_sled_agent_shared::inventory::OmicronSledConfig; use nexus_sled_agent_shared::inventory::OmicronZoneConfig; use nexus_sled_agent_shared::inventory::OmicronZoneImageSource; + use nexus_sled_agent_shared::inventory::SledCpuFamily; use nexus_sled_agent_shared::inventory::OmicronZoneType; use nexus_types::inventory::Collection; use omicron_cockroach_metrics::CockroachClusterAdminClient; use omicron_common::api::external::Generation; - use omicron_common::api::internal::shared::SledCpuFamily; use omicron_common::zpool_name::ZpoolName; use omicron_sled_agent::sim; use omicron_uuid_kinds::OmicronZoneUuid; diff --git a/nexus/inventory/src/examples.rs b/nexus/inventory/src/examples.rs index 86c67a9c652..81233de5a83 100644 --- a/nexus/inventory/src/examples.rs +++ b/nexus/inventory/src/examples.rs @@ -29,6 +29,7 @@ use nexus_sled_agent_shared::inventory::InventoryZpool; use nexus_sled_agent_shared::inventory::OmicronSledConfig; use nexus_sled_agent_shared::inventory::OmicronZonesConfig; use nexus_sled_agent_shared::inventory::OrphanedDataset; +use nexus_sled_agent_shared::inventory::SledCpuFamily; use nexus_sled_agent_shared::inventory::SledRole; use nexus_sled_agent_shared::inventory::ZoneImageResolverInventory; use nexus_types::inventory::BaseboardId; @@ -40,7 +41,6 @@ use nexus_types::inventory::ZpoolName; use omicron_cockroach_metrics::MetricValue; use omicron_cockroach_metrics::PrometheusMetrics; use omicron_common::api::external::ByteCount; -use omicron_common::api::internal::shared::SledCpuFamily; use omicron_common::disk::DatasetConfig; use omicron_common::disk::DatasetKind; use omicron_common::disk::DatasetName; diff --git a/nexus/reconfigurator/planning/src/system.rs b/nexus/reconfigurator/planning/src/system.rs index 2ad075b4fc1..172db99c67a 100644 --- a/nexus/reconfigurator/planning/src/system.rs +++ b/nexus/reconfigurator/planning/src/system.rs @@ -23,6 +23,7 @@ use nexus_sled_agent_shared::inventory::InventoryDisk; use nexus_sled_agent_shared::inventory::InventoryZpool; use nexus_sled_agent_shared::inventory::MupdateOverrideBootInventory; use nexus_sled_agent_shared::inventory::OmicronSledConfig; +use nexus_sled_agent_shared::inventory::SledCpuFamily; use nexus_sled_agent_shared::inventory::SledRole; use nexus_sled_agent_shared::inventory::ZoneImageResolverInventory; use nexus_sled_agent_shared::inventory::ZoneManifestBootInventory; @@ -57,7 +58,6 @@ use omicron_common::address::SLED_PREFIX; use omicron_common::address::get_sled_address; use omicron_common::api::external::ByteCount; use omicron_common::api::external::Generation; -use omicron_common::api::internal::shared::SledCpuFamily; use omicron_common::disk::DiskIdentity; use omicron_common::disk::DiskVariant; use omicron_common::disk::M2Slot; diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index 845d09b3a2f..f68c5a96573 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -38,6 +38,7 @@ use nexus_db_queries::db::pub_test_utils::crdb; use nexus_sled_agent_shared::inventory::HostPhase2DesiredSlots; use nexus_sled_agent_shared::inventory::OmicronSledConfig; use nexus_sled_agent_shared::inventory::OmicronZoneDataset; +use nexus_sled_agent_shared::inventory::SledCpuFamily; use nexus_sled_agent_shared::recovery_silo::RecoverySiloConfig; use nexus_test_interface::NexusServer; use nexus_types::deployment::Blueprint; @@ -74,7 +75,6 @@ use omicron_common::api::internal::nexus::ProducerKind; use omicron_common::api::internal::shared::DatasetKind; use omicron_common::api::internal::shared::NetworkInterface; use omicron_common::api::internal::shared::NetworkInterfaceKind; -use omicron_common::api::internal::shared::SledCpuFamily; use omicron_common::api::internal::shared::SourceNatConfig; use omicron_common::api::internal::shared::SwitchLocation; use omicron_common::disk::CompressionAlgorithm; diff --git a/nexus/tests/integration_tests/rack.rs b/nexus/tests/integration_tests/rack.rs index 32610a3c043..5b2fb969433 100644 --- a/nexus/tests/integration_tests/rack.rs +++ b/nexus/tests/integration_tests/rack.rs @@ -10,6 +10,7 @@ use nexus_db_model::SledBaseboard; use nexus_db_model::SledCpuFamily as DbSledCpuFamily; use nexus_db_model::SledSystemHardware; use nexus_db_model::SledUpdate; +use nexus_sled_agent_shared::inventory::SledCpuFamily; use nexus_sled_agent_shared::inventory::SledRole; use nexus_test_utils::TEST_SUITE_PASSWORD; use nexus_test_utils::http_testing::AuthnMode; @@ -23,7 +24,6 @@ use nexus_types::external_api::views::Rack; use nexus_types::internal_api::params::SledAgentInfo; use omicron_common::api::external::ByteCount; use omicron_common::api::external::Generation; -use omicron_common::api::internal::shared::SledCpuFamily; use omicron_uuid_kinds::GenericUuid; use std::time::Duration; use uuid::Uuid; diff --git a/nexus/types/src/internal_api/params.rs b/nexus/types/src/internal_api/params.rs index 3362853fe4f..fdb12c42a26 100644 --- a/nexus/types/src/internal_api/params.rs +++ b/nexus/types/src/internal_api/params.rs @@ -8,7 +8,7 @@ use crate::deployment::Blueprint; use crate::external_api::params::PhysicalDiskKind; use crate::external_api::shared::Baseboard; use crate::external_api::shared::IpRange; -use nexus_sled_agent_shared::inventory::SledRole; +use nexus_sled_agent_shared::inventory::{SledCpuFamily, SledRole}; use nexus_sled_agent_shared::recovery_silo::RecoverySiloConfig; use omicron_common::api::external::ByteCount; use omicron_common::api::external::Generation; @@ -18,7 +18,6 @@ use omicron_common::api::internal::nexus::Certificate; use omicron_common::api::internal::shared::AllowedSourceIps; use omicron_common::api::internal::shared::ExternalPortDiscovery; use omicron_common::api::internal::shared::RackNetworkConfig; -use omicron_common::api::internal::shared::SledCpuFamily; use omicron_common::api::internal::shared::SourceNatConfig; use omicron_uuid_kinds::DatasetUuid; use omicron_uuid_kinds::PhysicalDiskUuid; diff --git a/nexus/types/src/inventory.rs b/nexus/types/src/inventory.rs index 70f2451d29b..9c1322d5821 100644 --- a/nexus/types/src/inventory.rs +++ b/nexus/types/src/inventory.rs @@ -30,12 +30,12 @@ use nexus_sled_agent_shared::inventory::InventoryDisk; use nexus_sled_agent_shared::inventory::InventoryZpool; use nexus_sled_agent_shared::inventory::OmicronSledConfig; use nexus_sled_agent_shared::inventory::OmicronZoneConfig; +use nexus_sled_agent_shared::inventory::SledCpuFamily; use nexus_sled_agent_shared::inventory::SledRole; use nexus_sled_agent_shared::inventory::ZoneImageResolverInventory; use omicron_common::api::external::ByteCount; pub use omicron_common::api::internal::shared::NetworkInterface; pub use omicron_common::api::internal::shared::NetworkInterfaceKind; -use omicron_common::api::internal::shared::SledCpuFamily; pub use omicron_common::api::internal::shared::SourceNatConfig; use omicron_common::disk::M2Slot; pub use omicron_common::zpool_name::ZpoolName; diff --git a/sled-agent/src/bin/sled-agent-sim.rs b/sled-agent/src/bin/sled-agent-sim.rs index ccb5e0eaf8d..88ca421c555 100644 --- a/sled-agent/src/bin/sled-agent-sim.rs +++ b/sled-agent/src/bin/sled-agent-sim.rs @@ -13,7 +13,6 @@ use dropshot::ConfigDropshot; use dropshot::ConfigLogging; use dropshot::ConfigLoggingLevel; use omicron_common::api::internal::nexus::Certificate; -use omicron_common::api::internal::shared::SledCpuFamily; use omicron_common::cmd::CmdError; use omicron_common::cmd::fatal; use omicron_sled_agent::sim::RssArgs; @@ -22,7 +21,7 @@ use omicron_sled_agent::sim::{ run_standalone_server, }; use omicron_uuid_kinds::SledUuid; -use sled_hardware_types::Baseboard; +use sled_hardware_types::{Baseboard, SledCpuFamily}; use std::net::SocketAddr; use std::net::SocketAddrV6; diff --git a/sled-agent/src/nexus.rs b/sled-agent/src/nexus.rs index 5ebb086da13..e9e28b5c606 100644 --- a/sled-agent/src/nexus.rs +++ b/sled-agent/src/nexus.rs @@ -78,10 +78,10 @@ impl ConvertInto } impl ConvertInto - for omicron_common::api::internal::shared::SledCpuFamily + for sled_hardware_types::SledCpuFamily { fn convert(self) -> nexus_client::types::SledCpuFamily { - use omicron_common::api::internal::shared::SledCpuFamily as SharedSledCpuFamily; + use sled_hardware_types::SledCpuFamily as SharedSledCpuFamily; match self { SharedSledCpuFamily::Unknown => { nexus_client::types::SledCpuFamily::Unknown diff --git a/sled-agent/src/rack_setup/plan/service.rs b/sled-agent/src/rack_setup/plan/service.rs index bc6476f122a..37c74805c3c 100644 --- a/sled-agent/src/rack_setup/plan/service.rs +++ b/sled-agent/src/rack_setup/plan/service.rs @@ -1155,12 +1155,12 @@ impl ServicePortBuilder { mod tests { use super::*; use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryStatus; + use nexus_sled_agent_shared::inventory::SledCpuFamily; use nexus_sled_agent_shared::inventory::ZoneImageResolverInventory; use omicron_common::address::IpRange; use omicron_common::api::external::ByteCount; use omicron_common::api::internal::shared::AllowedSourceIps; use omicron_common::api::internal::shared::RackNetworkConfig; - use omicron_common::api::internal::shared::SledCpuFamily; use oxnet::Ipv6Net; use sled_agent_types::rack_init::BootstrapAddressDiscovery; use sled_agent_types::rack_init::RecoverySiloConfig; diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 38684da7a57..5cf06ba0c32 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -1749,12 +1749,11 @@ mod test { use nexus_reconfigurator_blippy::{Blippy, BlippyReportSortKey}; use nexus_sled_agent_shared::inventory::{ Baseboard, ConfigReconcilerInventoryStatus, Inventory, InventoryDisk, - OmicronZoneType, SledRole, ZoneImageResolverInventory, + OmicronZoneType, SledCpuFamily, SledRole, ZoneImageResolverInventory, }; use omicron_common::{ address::{Ipv6Subnet, SLED_PREFIX, get_sled_address}, api::external::{ByteCount, Generation}, - api::internal::shared::SledCpuFamily, disk::{DiskIdentity, DiskVariant}, }; use omicron_uuid_kinds::SledUuid; diff --git a/sled-agent/src/sim/config.rs b/sled-agent/src/sim/config.rs index 4f0851d0dd3..58454d2a507 100644 --- a/sled-agent/src/sim/config.rs +++ b/sled-agent/src/sim/config.rs @@ -7,11 +7,10 @@ use crate::updates::ConfigUpdates; use camino::Utf8Path; use dropshot::ConfigDropshot; -use omicron_common::api::internal::shared::SledCpuFamily; use omicron_uuid_kinds::SledUuid; use serde::Deserialize; use serde::Serialize; -pub use sled_hardware_types::Baseboard; +pub use sled_hardware_types::{Baseboard, SledCpuFamily}; use std::net::Ipv6Addr; use std::net::{IpAddr, SocketAddr}; diff --git a/sled-hardware/src/illumos/mod.rs b/sled-hardware/src/illumos/mod.rs index 9e319d3cd8b..3f673e0b4ca 100644 --- a/sled-hardware/src/illumos/mod.rs +++ b/sled-hardware/src/illumos/mod.rs @@ -8,9 +8,8 @@ use camino::Utf8PathBuf; use gethostname::gethostname; use illumos_devinfo::{DevInfo, DevLinkType, DevLinks, Node, Property}; use libnvme::{Nvme, controller::Controller}; -use omicron_common::api::internal::shared::SledCpuFamily; use omicron_common::disk::{DiskIdentity, DiskVariant}; -use sled_hardware_types::Baseboard; +use sled_hardware_types::{Baseboard, SledCpuFamily}; use slog::Logger; use slog::debug; use slog::error; diff --git a/sled-hardware/src/lib.rs b/sled-hardware/src/lib.rs index b475e2f28da..582c13f4053 100644 --- a/sled-hardware/src/lib.rs +++ b/sled-hardware/src/lib.rs @@ -162,11 +162,9 @@ impl MemoryReservations { /// sled-agent does not yet know about libtopo, getting topo snapshots, walking /// them, or any of that, so the parsing is performed again here. #[cfg(target_arch = "x86_64")] -pub fn detect_cpu_family( - log: &Logger, -) -> omicron_common::api::internal::shared::SledCpuFamily { +pub fn detect_cpu_family(log: &Logger) -> sled_hardware_types::SledCpuFamily { use core::arch::x86_64::__cpuid_count; - use omicron_common::api::internal::shared::SledCpuFamily; + use sled_hardware_types::SledCpuFamily; // Read leaf 0 to figure out the processor's vendor and whether leaf 1 // (which contains family, model, and stepping information) is available. diff --git a/sled-hardware/src/non_illumos/mod.rs b/sled-hardware/src/non_illumos/mod.rs index caae6fcf6a1..314ea2ed4a4 100644 --- a/sled-hardware/src/non_illumos/mod.rs +++ b/sled-hardware/src/non_illumos/mod.rs @@ -4,10 +4,9 @@ use crate::SledMode; use crate::disk::{DiskPaths, Partition, PooledDiskError, UnparsedDisk}; -use omicron_common::api::internal::shared::SledCpuFamily; use omicron_common::disk::{DiskIdentity, DiskVariant}; use omicron_uuid_kinds::ZpoolUuid; -use sled_hardware_types::Baseboard; +use sled_hardware_types::{SledCpuFamily, Baseboard}; use slog::Logger; use std::collections::HashMap; use tokio::sync::broadcast; diff --git a/sled-hardware/types/src/lib.rs b/sled-hardware/types/src/lib.rs index b34b5b1f422..ce4a29da4c0 100644 --- a/sled-hardware/types/src/lib.rs +++ b/sled-hardware/types/src/lib.rs @@ -95,3 +95,53 @@ impl std::fmt::Display for Baseboard { } } } + +/// Identifies the kind of CPU present on a sled, determined by reading CPUID. +/// +/// This is intended to broadly support the control plane answering the question +/// "can I run this instance on that sled?" given an instance with either no or +/// some CPU platform requirement. It is not enough information for more precise +/// placement questions - for example, is a CPU a high-frequency part or +/// many-core part? We don't include Genoa here, but in that CPU family there +/// are high frequency parts, many-core parts, and large-cache parts. To support +/// those questions (or satisfactorily answer #8730) we would need to collect +/// additional information and send it along. +#[derive( + Serialize, Deserialize, Copy, Clone, Debug, PartialEq, Eq, JsonSchema, +)] +#[serde(rename_all = "snake_case")] +pub enum SledCpuFamily { + /// The CPU vendor or its family number don't correspond to any of the + /// known family variants. + Unknown, + + /// AMD Milan processors (or very close). Could be an actual Milan in a + /// Gimlet, a close-to-Milan client Zen 3 part, or Zen 4 (for which Milan is + /// the greatest common denominator). + AmdMilan, + + /// AMD Turin processors (or very close). Could be an actual Turin in a + /// Cosmo, or a close-to-Turin client Zen 5 part. + AmdTurin, + + /// AMD Turin Dense processors. There are no "Turin Dense-like" CPUs unlike + /// other cases, so this means a bona fide Zen 5c Turin Dense part. + AmdTurinDense, +} + +impl SledCpuFamily { + fn as_str(&self) -> &'static str { + match self { + SledCpuFamily::Unknown => "unknown", + SledCpuFamily::AmdMilan => "amd_milan", + SledCpuFamily::AmdTurin => "amd_turin", + SledCpuFamily::AmdTurinDense => "amd_turin_dense", + } + } +} + +impl std::fmt::Display for SledCpuFamily { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.as_str()) + } +} From 5f94661d8a68c4b7a65f533af5337f3a18ed7838 Mon Sep 17 00:00:00 2001 From: iximeow Date: Wed, 6 Aug 2025 23:10:13 +0000 Subject: [PATCH 15/17] rustfmt AGH --- nexus/inventory/src/collector.rs | 2 +- sled-hardware/src/non_illumos/mod.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/nexus/inventory/src/collector.rs b/nexus/inventory/src/collector.rs index ec02670684a..7a4ac6ca959 100644 --- a/nexus/inventory/src/collector.rs +++ b/nexus/inventory/src/collector.rs @@ -671,8 +671,8 @@ mod test { use nexus_sled_agent_shared::inventory::OmicronSledConfig; use nexus_sled_agent_shared::inventory::OmicronZoneConfig; use nexus_sled_agent_shared::inventory::OmicronZoneImageSource; - use nexus_sled_agent_shared::inventory::SledCpuFamily; use nexus_sled_agent_shared::inventory::OmicronZoneType; + use nexus_sled_agent_shared::inventory::SledCpuFamily; use nexus_types::inventory::Collection; use omicron_cockroach_metrics::CockroachClusterAdminClient; use omicron_common::api::external::Generation; diff --git a/sled-hardware/src/non_illumos/mod.rs b/sled-hardware/src/non_illumos/mod.rs index 314ea2ed4a4..fa660ad0caa 100644 --- a/sled-hardware/src/non_illumos/mod.rs +++ b/sled-hardware/src/non_illumos/mod.rs @@ -6,7 +6,7 @@ use crate::SledMode; use crate::disk::{DiskPaths, Partition, PooledDiskError, UnparsedDisk}; use omicron_common::disk::{DiskIdentity, DiskVariant}; use omicron_uuid_kinds::ZpoolUuid; -use sled_hardware_types::{SledCpuFamily, Baseboard}; +use sled_hardware_types::{Baseboard, SledCpuFamily}; use slog::Logger; use std::collections::HashMap; use tokio::sync::broadcast; From 543bdc952de883f0fec1f866119e214f00c5613c Mon Sep 17 00:00:00 2001 From: iximeow Date: Wed, 6 Aug 2025 23:29:10 +0000 Subject: [PATCH 16/17] and expectorate up the reconfigurator output --- .../reconfigurator-cli/tests/output/cmds-example-stdout | 6 +++--- .../tests/output/cmds-mupdate-update-flow-stdout | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/dev-tools/reconfigurator-cli/tests/output/cmds-example-stdout b/dev-tools/reconfigurator-cli/tests/output/cmds-example-stdout index a9b94800388..eafab73da16 100644 --- a/dev-tools/reconfigurator-cli/tests/output/cmds-example-stdout +++ b/dev-tools/reconfigurator-cli/tests/output/cmds-example-stdout @@ -1101,7 +1101,7 @@ sled 2eb69596-f081-4e2d-9425-9994926e0832 (role = Gimlet, serial serial1) found at: from fake sled agent address: [fd00:1122:3344:102::1]:12345 usable hw threads: 10 - CPU family: milan + CPU family: amd_milan usable memory (GiB): 0 reservoir (GiB): 0 physical disks: @@ -1211,7 +1211,7 @@ sled 32d8d836-4d8a-4e54-8fa9-f31d79c42646 (role = Gimlet, serial serial2) found at: from fake sled agent address: [fd00:1122:3344:103::1]:12345 usable hw threads: 10 - CPU family: milan + CPU family: amd_milan usable memory (GiB): 0 reservoir (GiB): 0 physical disks: @@ -1321,7 +1321,7 @@ sled 89d02b1b-478c-401a-8e28-7a26f74fa41b (role = Gimlet, serial serial0) found at: from fake sled agent address: [fd00:1122:3344:101::1]:12345 usable hw threads: 10 - CPU family: milan + CPU family: amd_milan usable memory (GiB): 0 reservoir (GiB): 0 physical disks: diff --git a/dev-tools/reconfigurator-cli/tests/output/cmds-mupdate-update-flow-stdout b/dev-tools/reconfigurator-cli/tests/output/cmds-mupdate-update-flow-stdout index deeb6a9c044..ace1d720beb 100644 --- a/dev-tools/reconfigurator-cli/tests/output/cmds-mupdate-update-flow-stdout +++ b/dev-tools/reconfigurator-cli/tests/output/cmds-mupdate-update-flow-stdout @@ -88,7 +88,7 @@ sled 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c (role = Gimlet, serial serial1) found at: from fake sled agent address: [fd00:1122:3344:102::1]:12345 usable hw threads: 10 - CPU family: milan + CPU family: amd_milan usable memory (GiB): 0 reservoir (GiB): 0 physical disks: @@ -195,7 +195,7 @@ sled 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6 (role = Gimlet, serial serial0) found at: from fake sled agent address: [fd00:1122:3344:101::1]:12345 usable hw threads: 10 - CPU family: milan + CPU family: amd_milan usable memory (GiB): 0 reservoir (GiB): 0 physical disks: @@ -304,7 +304,7 @@ sled d81c6a84-79b8-4958-ae41-ea46c9b19763 (role = Gimlet, serial serial2) found at: from fake sled agent address: [fd00:1122:3344:103::1]:12345 usable hw threads: 10 - CPU family: milan + CPU family: amd_milan usable memory (GiB): 0 reservoir (GiB): 0 physical disks: From 3e2ae248caf0d4bf456581c29df9ff3f3e600bf6 Mon Sep 17 00:00:00 2001 From: iximeow Date: Tue, 12 Aug 2025 20:44:43 +0000 Subject: [PATCH 17/17] fix bad merge and drop unused dep in nexus-inventory --- Cargo.lock | 1 - nexus/db-model/src/schema_versions.rs | 2 +- nexus/inventory/Cargo.toml | 1 - 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d7941913c04..3250e918b68 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6606,7 +6606,6 @@ dependencies = [ "id-map", "iddqd", "itertools 0.14.0", - "nexus-client", "nexus-sled-agent-shared", "nexus-types", "ntp-admin-client", diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index b8a81ca3ab3..24407614c53 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -16,7 +16,7 @@ use std::{collections::BTreeMap, sync::LazyLock}; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: Version = Version::new(179, 0, 0); +pub const SCHEMA_VERSION: Version = Version::new(180, 0, 0); /// List of all past database schema versions, in *reverse* order /// diff --git a/nexus/inventory/Cargo.toml b/nexus/inventory/Cargo.toml index 9d42aab4d0a..f6b90fb6f30 100644 --- a/nexus/inventory/Cargo.toml +++ b/nexus/inventory/Cargo.toml @@ -48,7 +48,6 @@ omicron-workspace-hack.workspace = true expectorate.workspace = true gateway-test-utils.workspace = true httpmock.workspace = true -nexus-client.workspace = true omicron-sled-agent.workspace = true regex.workspace = true tokio.workspace = true