Skip to content

Commit 91d9377

Browse files
committed
Collect timesync status in inventory
1 parent 356e0b3 commit 91d9377

File tree

17 files changed

+508
-51
lines changed

17 files changed

+508
-51
lines changed

Cargo.lock

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

dev-tools/ls-apis/tests/api_dependencies.out

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ Nexus Internal API (client: nexus-client)
7171
consumed by: propolis-server (propolis/bin/propolis-server) via 3 paths
7272

7373
NTP Admin (client: ntp-admin-client)
74+
consumed by: omicron-nexus (omicron/nexus) via 2 paths
7475
consumed by: omicron-sled-agent (omicron/sled-agent) via 2 paths
7576

7677
External API (client: oxide-client)

internal-dns/resolver/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ hickory-resolver.workspace = true
1313
hickory-proto.workspace = true
1414
internal-dns-types.workspace = true
1515
omicron-common.workspace = true
16+
omicron-uuid-kinds.workspace = true
1617
omicron-workspace-hack.workspace = true
1718
qorb.workspace = true
1819
reqwest = { workspace = true, features = ["rustls-tls", "stream"] }

internal-dns/resolver/src/resolver.rs

Lines changed: 225 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,11 @@ use hickory_resolver::config::{
99
};
1010
use hickory_resolver::lookup::SrvLookup;
1111
use hickory_resolver::name_server::TokioConnectionProvider;
12-
use internal_dns_types::names::ServiceName;
12+
use internal_dns_types::names::{DNS_ZONE, ServiceName};
1313
use omicron_common::address::{
1414
AZ_PREFIX, DNS_PORT, Ipv6Subnet, get_internal_dns_server_addresses,
1515
};
16+
use omicron_uuid_kinds::OmicronZoneUuid;
1617
use slog::{debug, error, info, trace};
1718
use std::net::{Ipv6Addr, SocketAddr, SocketAddrV6};
1819

@@ -28,6 +29,37 @@ pub enum ResolveError {
2829
NotFoundByString(String),
2930
}
3031

32+
fn is_no_records_found(err: &hickory_resolver::ResolveError) -> bool {
33+
match err.kind() {
34+
hickory_resolver::ResolveErrorKind::Proto(proto_error) => {
35+
match proto_error.kind() {
36+
hickory_resolver::proto::ProtoErrorKind::NoRecordsFound {
37+
..
38+
} => true,
39+
_ => false,
40+
}
41+
}
42+
_ => false,
43+
}
44+
}
45+
46+
impl ResolveError {
47+
/// Returns "true" if this error indicates the record is not found.
48+
pub fn is_not_found(&self) -> bool {
49+
match self {
50+
ResolveError::NotFound(_) | ResolveError::NotFoundByString(_) => {
51+
true
52+
}
53+
ResolveError::Resolve(hickory_err)
54+
if is_no_records_found(&hickory_err) =>
55+
{
56+
true
57+
}
58+
_ => false,
59+
}
60+
}
61+
}
62+
3163
/// A wrapper around a set of bootstrap DNS addresses, providing a convenient
3264
/// way to construct a [`qorb::resolvers::dns::DnsResolver`] for specific
3365
/// services.
@@ -314,6 +346,38 @@ impl Resolver {
314346
}
315347
}
316348

349+
/// Returns the targets of the SRV records for a DNS name with their associated zone UUIDs.
350+
///
351+
/// Similar to [`Resolver::lookup_all_socket_v6`], but extracts the OmicronZoneUuid
352+
/// from DNS target names that follow the pattern `{uuid}.host.{DNS_ZONE}`.
353+
/// Returns a list of (OmicronZoneUuid, SocketAddrV6) pairs.
354+
///
355+
/// Returns an error if any target cannot be parsed as a zone UUID pattern.
356+
pub async fn lookup_all_socket_and_zone_v6(
357+
&self,
358+
service: ServiceName,
359+
) -> Result<Vec<(OmicronZoneUuid, SocketAddrV6)>, ResolveError> {
360+
let name = service.srv_name();
361+
trace!(self.log, "lookup_all_socket_and_zone_v6 srv"; "dns_name" => &name);
362+
let response = self.resolver.srv_lookup(&name).await?;
363+
debug!(
364+
self.log,
365+
"lookup_all_socket_and_zone_v6 srv";
366+
"dns_name" => &name,
367+
"response" => ?response
368+
);
369+
370+
let results = self
371+
.lookup_service_targets_with_zones(response)
372+
.await?
373+
.collect::<Vec<_>>();
374+
if !results.is_empty() {
375+
Ok(results)
376+
} else {
377+
Err(ResolveError::NotFound(service))
378+
}
379+
}
380+
317381
// Returns an iterator of SocketAddrs for the specified SRV name.
318382
//
319383
// Acts on a raw string for compatibility with the reqwest::dns::Resolve
@@ -399,6 +463,92 @@ impl Resolver {
399463
.flatten()
400464
}
401465

466+
/// Similar to [`Resolver::lookup_service_targets`], but extracts zone UUIDs from target names.
467+
///
468+
/// Returns an iterator of (OmicronZoneUuid, SocketAddrV6) pairs for targets that match
469+
/// the pattern `{uuid}.host.{DNS_ZONE}`. Returns an error if any target doesn't match
470+
/// this pattern.
471+
async fn lookup_service_targets_with_zones(
472+
&self,
473+
service_lookup: SrvLookup,
474+
) -> Result<
475+
impl Iterator<Item = (OmicronZoneUuid, SocketAddrV6)> + Send,
476+
ResolveError,
477+
> {
478+
let futures =
479+
std::iter::repeat((self.log.clone(), self.resolver.clone()))
480+
.zip(service_lookup.into_iter())
481+
.map(|((log, resolver), srv)| async move {
482+
let target = srv.target();
483+
let port = srv.port();
484+
let target_str = target.to_string();
485+
// Try to parse the zone UUID from the target name
486+
let zone_uuid = match Self::parse_zone_uuid_from_target(&target_str) {
487+
Some(uuid) => uuid,
488+
None => {
489+
error!(
490+
log,
491+
"lookup_service_targets_with_zones: target doesn't match zone pattern";
492+
"target" => ?target_str,
493+
);
494+
return Err((target.clone(), hickory_resolver::ResolveError::from(hickory_resolver::ResolveErrorKind::Message("target doesn't match zone pattern"))));
495+
}
496+
};
497+
trace!(
498+
log,
499+
"lookup_service_targets_with_zones: looking up SRV target";
500+
"name" => ?target,
501+
"zone_uuid" => ?zone_uuid,
502+
);
503+
resolver
504+
.ipv6_lookup(target.clone())
505+
.await
506+
.map(|ips| (ips, port, zone_uuid))
507+
.map_err(|err| (target.clone(), err))
508+
});
509+
let log = self.log.clone();
510+
let results = futures::future::join_all(futures).await;
511+
let mut socket_addrs = Vec::new();
512+
for result in results {
513+
match result {
514+
Ok((ips, port, zone_uuid)) => {
515+
// Add all IP addresses for this zone
516+
for aaaa in ips {
517+
socket_addrs.push((
518+
zone_uuid,
519+
SocketAddrV6::new(aaaa.into(), port, 0, 0),
520+
));
521+
}
522+
}
523+
Err((target, err)) => {
524+
error!(
525+
log,
526+
"lookup_service_targets_with_zones: failed looking up target";
527+
"name" => ?target,
528+
"error" => ?err,
529+
);
530+
return Err(ResolveError::Resolve(err));
531+
}
532+
}
533+
}
534+
Ok(socket_addrs.into_iter())
535+
}
536+
537+
/// Parse a zone UUID from a DNS target name following the pattern `{uuid}.host.{DNS_ZONE}`.
538+
fn parse_zone_uuid_from_target(target: &str) -> Option<OmicronZoneUuid> {
539+
// Remove trailing dot if present
540+
let target = target.strip_suffix('.').unwrap_or(target);
541+
542+
// Expected format: "{uuid}.host.{DNS_ZONE}"
543+
let expected_suffix = format!(".host.{}", DNS_ZONE);
544+
545+
if let Some(uuid_str) = target.strip_suffix(&expected_suffix) {
546+
uuid_str.parse::<OmicronZoneUuid>().ok()
547+
} else {
548+
None
549+
}
550+
}
551+
402552
/// Lookup a specific record's IPv6 address
403553
///
404554
/// In general, callers should _not_ be using this function, and instead
@@ -436,7 +586,7 @@ mod test {
436586
use internal_dns_types::names::DNS_ZONE;
437587
use internal_dns_types::names::ServiceName;
438588
use omicron_test_utils::dev::test_setup_log;
439-
use omicron_uuid_kinds::OmicronZoneUuid;
589+
use omicron_uuid_kinds::{OmicronZoneUuid, SledUuid};
440590
use slog::{Logger, o};
441591
use std::collections::HashMap;
442592
use std::net::Ipv6Addr;
@@ -1131,4 +1281,77 @@ mod test {
11311281
dns_server.cleanup_successful();
11321282
logctx.cleanup_successful();
11331283
}
1284+
1285+
#[tokio::test]
1286+
async fn lookup_all_socket_and_zone_v6_success_and_failure() {
1287+
let logctx =
1288+
test_setup_log("lookup_all_socket_and_zone_v6_success_and_failure");
1289+
let dns_server = DnsServer::create(&logctx.log).await;
1290+
let resolver = dns_server.resolver().unwrap();
1291+
1292+
// Create DNS config with both zone and sled services
1293+
let mut dns_config = DnsConfigBuilder::new();
1294+
1295+
// Add a zone service (BoundaryNtp) that should succeed
1296+
let zone_uuid = OmicronZoneUuid::new_v4();
1297+
let zone_ip = Ipv6Addr::new(0xfd, 0, 0, 0, 0, 0, 0, 0x1);
1298+
let zone_port = 8080;
1299+
let zone_host = dns_config.host_zone(zone_uuid, zone_ip).unwrap();
1300+
dns_config
1301+
.service_backend_zone(
1302+
ServiceName::BoundaryNtp,
1303+
&zone_host,
1304+
zone_port,
1305+
)
1306+
.unwrap();
1307+
1308+
// Add a sled service (SledAgent) that should fail
1309+
let sled_uuid = SledUuid::new_v4();
1310+
let sled_ip = Ipv6Addr::new(0xfd, 0, 0, 0, 0, 0, 0, 0x2);
1311+
let sled_port = 8081;
1312+
let sled_host = dns_config.host_sled(sled_uuid, sled_ip).unwrap();
1313+
dns_config
1314+
.service_backend_sled(
1315+
ServiceName::SledAgent(sled_uuid),
1316+
&sled_host,
1317+
sled_port,
1318+
)
1319+
.unwrap();
1320+
1321+
let dns_config = dns_config.build_full_config_for_initial_generation();
1322+
dns_server.update(&dns_config).await.unwrap();
1323+
1324+
// Test 1: Zone service should succeed
1325+
let zone_results = resolver
1326+
.lookup_all_socket_and_zone_v6(ServiceName::BoundaryNtp)
1327+
.await
1328+
.expect("Should have been able to look up zone service");
1329+
1330+
assert_eq!(zone_results.len(), 1);
1331+
let (returned_zone_uuid, returned_addr) = &zone_results[0];
1332+
assert_eq!(*returned_zone_uuid, zone_uuid);
1333+
assert_eq!(returned_addr.ip(), &zone_ip);
1334+
assert_eq!(returned_addr.port(), zone_port);
1335+
1336+
// Test 2: Sled service should fail (targets don't match zone pattern)
1337+
let sled_error = resolver
1338+
.lookup_all_socket_and_zone_v6(ServiceName::SledAgent(sled_uuid))
1339+
.await
1340+
.expect_err("Should have failed to look up sled service");
1341+
1342+
// The error should be a ResolveError indicating the target doesn't match the zone pattern
1343+
match sled_error {
1344+
ResolveError::Resolve(hickory_err) => {
1345+
assert!(
1346+
hickory_err
1347+
.to_string()
1348+
.contains("target doesn't match zone pattern")
1349+
);
1350+
}
1351+
_ => panic!("Expected ResolveError::Resolve, got {:?}", sled_error),
1352+
}
1353+
1354+
dns_server.cleanup_successful();
1355+
logctx.cleanup_successful();
1356+
}
11341357
}

nexus/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ nexus-mgs-updates.workspace = true
6464
nexus-networking.workspace = true
6565
nexus-saga-recovery.workspace = true
6666
nexus-test-interface.workspace = true
67+
ntp-admin-client.workspace = true
6768
num-integer.workspace = true
6869
omicron-cockroach-metrics.workspace = true
6970
openssl.workspace = true

nexus/db-model/src/inventory.rs

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ use nexus_db_schema::schema::{
3434
inv_last_reconciliation_disk_result,
3535
inv_last_reconciliation_orphaned_dataset,
3636
inv_last_reconciliation_zone_result, inv_mupdate_override_non_boot,
37-
inv_nvme_disk_firmware, inv_omicron_sled_config,
37+
inv_ntp_timesync, inv_nvme_disk_firmware, inv_omicron_sled_config,
3838
inv_omicron_sled_config_dataset, inv_omicron_sled_config_disk,
3939
inv_omicron_sled_config_zone, inv_omicron_sled_config_zone_nic,
4040
inv_physical_disk, inv_root_of_trust, inv_root_of_trust_page,
@@ -61,7 +61,7 @@ use nexus_sled_agent_shared::inventory::{
6161
};
6262
use nexus_types::inventory::{
6363
BaseboardId, Caboose, CockroachStatus, Collection, NvmeFirmware,
64-
PowerState, RotPage, RotSlot,
64+
PowerState, RotPage, RotSlot, TimeSync,
6565
};
6666
use omicron_common::api::external;
6767
use omicron_common::api::internal::shared::NetworkInterface;
@@ -2788,6 +2788,33 @@ impl TryFrom<InvCockroachStatus> for CockroachStatus {
27882788
}
27892789
}
27902790

2791+
#[derive(Queryable, Clone, Debug, Selectable, Insertable)]
2792+
#[diesel(table_name = inv_ntp_timesync)]
2793+
pub struct InvNtpTimesync {
2794+
pub inv_collection_id: DbTypedUuid<CollectionKind>,
2795+
pub zone_id: DbTypedUuid<OmicronZoneKind>,
2796+
pub synced: bool,
2797+
}
2798+
2799+
impl InvNtpTimesync {
2800+
pub fn new(
2801+
inv_collection_id: CollectionUuid,
2802+
timesync: &TimeSync,
2803+
) -> Result<Self, anyhow::Error> {
2804+
Ok(Self {
2805+
inv_collection_id: inv_collection_id.into(),
2806+
zone_id: timesync.zone_id.into(),
2807+
synced: timesync.synced,
2808+
})
2809+
}
2810+
}
2811+
2812+
impl From<InvNtpTimesync> for nexus_types::inventory::TimeSync {
2813+
fn from(value: InvNtpTimesync) -> Self {
2814+
Self { zone_id: value.zone_id.into(), synced: value.synced }
2815+
}
2816+
}
2817+
27912818
#[cfg(test)]
27922819
mod test {
27932820
use nexus_types::inventory::NvmeFirmware;

nexus/db-model/src/schema_versions.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ use std::{collections::BTreeMap, sync::LazyLock};
1616
///
1717
/// This must be updated when you change the database schema. Refer to
1818
/// schema/crdb/README.adoc in the root of this repository for details.
19-
pub const SCHEMA_VERSION: Version = Version::new(165, 0, 0);
19+
pub const SCHEMA_VERSION: Version = Version::new(166, 0, 0);
2020

2121
/// List of all past database schema versions, in *reverse* order
2222
///
@@ -28,6 +28,7 @@ static KNOWN_VERSIONS: LazyLock<Vec<KnownVersion>> = LazyLock::new(|| {
2828
// | leaving the first copy as an example for the next person.
2929
// v
3030
// KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"),
31+
KnownVersion::new(166, "inv-ntp-timesync"),
3132
KnownVersion::new(165, "route-config-rib-priority"),
3233
KnownVersion::new(164, "fix-leaked-bp-oximeter-read-policy-rows"),
3334
KnownVersion::new(163, "bp-desired-host-phase-2"),

0 commit comments

Comments
 (0)