Skip to content

Commit 4e9c4bf

Browse files
authored
[sled-diagnostics] want chrony logs in support bundles (#8530)
This fixes #8516 Note that this PR is on top of #8529
1 parent 3c6ed3a commit 4e9c4bf

File tree

3 files changed

+179
-37
lines changed

3 files changed

+179
-37
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

sled-diagnostics/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ once_cell.workspace = true
1919
oxlog.workspace = true
2020
parallel-task-set.workspace = true
2121
rand.workspace = true
22+
regex.workspace = true
2223
schemars.workspace = true
2324
serde.workspace = true
2425
slog.workspace = true

sled-diagnostics/src/logs.rs

Lines changed: 177 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
use std::{
88
collections::{HashMap, HashSet, hash_map::Entry},
99
io::{BufRead, BufReader, Seek, Write},
10+
sync::LazyLock,
1011
};
1112

1213
use camino::{Utf8Path, Utf8PathBuf};
@@ -18,6 +19,7 @@ use illumos_utils::zfs::{
1819
use oxlog::LogFile;
1920
use oxlog::SvcLogs;
2021
use rand::{Rng, distributions::Alphanumeric, thread_rng};
22+
use regex::Regex;
2123
use slog::Logger;
2224
use std::collections::BTreeMap;
2325
use zip::{result::ZipError, write::FullFileOptions};
@@ -565,8 +567,9 @@ impl LogsHandle {
565567
current: true,
566568
archived: true,
567569
extra: true,
568-
// This avoids calling stat on every log file found.
569-
show_empty: true,
570+
// This will cause oxlog to call stat on each file resulting
571+
// in a sorted order.
572+
show_empty: false,
570573
date_range: None,
571574
},
572575
);
@@ -650,36 +653,40 @@ impl LogsHandle {
650653

651654
// - Grab all of the service's extra logs -
652655

653-
// Currently we are only grabbing cockroachdb logs. If other
654-
// services contain valuable logs we should add them here.
655-
if service == "cockroachdb" {
656-
let cockroach_extra_logs =
657-
sort_cockroach_extra_logs(&service_logs.extra);
658-
for (_prefix, extra_logs) in cockroach_extra_logs {
659-
// We always want the most current log being written to.
660-
if let Some(log) = extra_logs.current {
661-
self.process_logs(
662-
&service,
663-
&mut zip,
664-
&mut log_snapshots,
665-
log,
666-
LogType::Extra,
667-
)
668-
.await?;
669-
}
656+
// Attempt to parse and sort any extra logs we find for a service.
657+
let extra_logs = match service.as_str() {
658+
// cockroach embeds its rotation status in the name:
659+
// "cockroach-health.log" vs
660+
// "oach-health.oxzcockroachdba3628a56-6f85-43b5-be50-71d8f0e04877.root.2025-01-31T21_43_26Z.011435.log"
661+
"cockroachdb" => sort_cockroach_extra_logs(&service_logs.extra),
662+
// fall back parser that matches "service.log",
663+
// "service.field.n.log", "service.log.1", or
664+
// "service.field.n.log.1"
665+
_ => sort_extra_logs(&self.log, &service_logs.extra),
666+
};
667+
for (_, logs) in extra_logs {
668+
// We always want the most current log being written to.
669+
if let Some(log) = logs.current {
670+
self.process_logs(
671+
&service,
672+
&mut zip,
673+
&mut log_snapshots,
674+
log,
675+
LogType::Extra,
676+
)
677+
.await?;
678+
}
670679

671-
// We clamp the number of rotated logs we grab to 5.
672-
for log in extra_logs.rotated.iter().rev().take(max_rotated)
673-
{
674-
self.process_logs(
675-
&service,
676-
&mut zip,
677-
&mut log_snapshots,
678-
log,
679-
LogType::Extra,
680-
)
681-
.await?;
682-
}
680+
// We clamp the number of rotated logs we grab to 5.
681+
for log in logs.rotated.iter().rev().take(max_rotated) {
682+
self.process_logs(
683+
&service,
684+
&mut zip,
685+
&mut log_snapshots,
686+
log,
687+
LogType::Extra,
688+
)
689+
.await?;
683690
}
684691
}
685692
}
@@ -733,14 +740,58 @@ fn write_log_to_zip<W: Write + Seek>(
733740
Ok(())
734741
}
735742

743+
/// A log file that is found in oxlog's "extra" bucket of service logs.
744+
#[derive(Debug, PartialEq)]
745+
enum ExtraLogKind<'a> {
746+
/// The current log being written to e.g. service-a.log
747+
Current { name: &'a str, log: &'a LogFile },
748+
/// A log file that has been rotated e.g. service-a.log.4
749+
Rotated { name: &'a str, log: &'a LogFile },
750+
}
751+
736752
#[derive(Debug, Default, PartialEq)]
737-
struct CockroachExtraLog<'a> {
753+
struct ExtraLogs<'a> {
738754
current: Option<&'a Utf8Path>,
739755
rotated: Vec<&'a Utf8Path>,
740756
}
741-
fn sort_cockroach_extra_logs(
742-
logs: &[LogFile],
743-
) -> HashMap<&str, CockroachExtraLog<'_>> {
757+
758+
fn sort_extra_logs<'a>(
759+
logger: &Logger,
760+
logs: &'a [LogFile],
761+
) -> HashMap<&'a str, ExtraLogs<'a>> {
762+
let mut res = HashMap::new();
763+
764+
for log in logs {
765+
if let Some(kind) = parse_extra_log(log) {
766+
match kind {
767+
ExtraLogKind::Current { name, log } => {
768+
let entry: &mut ExtraLogs<'_> =
769+
res.entry(name).or_default();
770+
// We don't expect to stumble upon this unless there's a
771+
// programmer error, in which case we should leave ourselves
772+
// a record of it.
773+
if let Some(old_path) = entry.current {
774+
warn!(
775+
logger,
776+
"found multiple current log files for {name}";
777+
"old" => %old_path,
778+
"new" => %log.path,
779+
);
780+
}
781+
entry.current = Some(&log.path);
782+
}
783+
ExtraLogKind::Rotated { name, log } => {
784+
let entry = res.entry(name).or_default();
785+
entry.rotated.push(&log.path);
786+
}
787+
}
788+
}
789+
}
790+
791+
res
792+
}
793+
794+
fn sort_cockroach_extra_logs(logs: &[LogFile]) -> HashMap<&str, ExtraLogs<'_>> {
744795
// Known logging paths for cockroachdb:
745796
// https://www.cockroachlabs.com/docs/stable/logging-overview#logging-destinations
746797
let cockroach_log_prefix = HashSet::from([
@@ -759,7 +810,7 @@ fn sort_cockroach_extra_logs(
759810
"cockroach-stderr",
760811
]);
761812

762-
let mut interested: HashMap<&str, CockroachExtraLog<'_>> = HashMap::new();
813+
let mut interested: HashMap<&str, ExtraLogs<'_>> = HashMap::new();
763814
for log in logs {
764815
let Some(file_name) = log.path.file_name() else {
765816
continue;
@@ -791,6 +842,52 @@ fn sort_cockroach_extra_logs(
791842
interested
792843
}
793844

845+
/// For a provided `LogFile` return an optional `ExtraLog` if it's in a well
846+
/// formed logging format consisting of any non whitespace character followed
847+
/// by any number of none whitespace characters followed by a literal "." that
848+
/// ends in a single ".log" or ".log.N" where N is a digit desginating log
849+
/// rotation.
850+
///
851+
/// Examples:
852+
/// - service-1.log
853+
/// - service-1.log.4
854+
/// - service-2.stderr.log
855+
/// - service-2.stderr.log.2
856+
fn parse_extra_log(logfile: &LogFile) -> Option<ExtraLogKind> {
857+
static RE: LazyLock<Regex> = LazyLock::new(|| {
858+
//Regex explanation:
859+
// ^ : start of the line
860+
// (?:([^.\s]+) : at least one character that is not whitespace or a
861+
// "." (capturing: log name)
862+
// (?:\.[^.\s]+)*) : a "." followed by at least one character that is
863+
// not whitespace or a "." 0 or more times
864+
// (non capturing)
865+
// \.log : .log
866+
// (\.\d+)? : an optional "." followed by one or more digits
867+
// (capturing: current | rotated)
868+
// $ : end of the line
869+
Regex::new(r"^(?:([^.\s]+)(?:\.[^.\s]+)*)\.log(\.\d+)?$").unwrap()
870+
});
871+
872+
let Some(file_name) = logfile.path.file_name() else { return None };
873+
RE.captures(file_name).and_then(|c| {
874+
// The first capture group is not optional and is the log files name
875+
c.get(1).map(|name| {
876+
match c.get(2).is_some() {
877+
// Capture group 2 means that we have a logfile that
878+
// ends in a number e.g. "sled-agent.log.2"
879+
true => {
880+
ExtraLogKind::Rotated { name: name.as_str(), log: logfile }
881+
}
882+
// Otherwise we have found the current log file
883+
false => {
884+
ExtraLogKind::Current { name: name.as_str(), log: logfile }
885+
}
886+
}
887+
})
888+
})
889+
}
890+
794891
#[cfg(test)]
795892
mod test {
796893
use std::collections::HashMap;
@@ -817,7 +914,7 @@ mod test {
817914
oxlog::LogFile { path: Utf8PathBuf::from(l), size: None, modified: None }
818915
}).collect();
819916

820-
let mut expected: HashMap<&str, CockroachExtraLog> = HashMap::new();
917+
let mut expected: HashMap<&str, ExtraLogs<'_>> = HashMap::new();
821918

822919
// cockroach
823920
expected.entry("cockroach").or_default().current =
@@ -1290,4 +1387,47 @@ mod illumos_tests {
12901387
harness.cleanup().await;
12911388
logctx.cleanup_successful();
12921389
}
1390+
1391+
#[test]
1392+
fn test_extra_log_file_regex() {
1393+
let current = [("foo.log", "foo"), ("foo.bar.baz.log", "foo")];
1394+
for (log, name) in current {
1395+
let logfile = LogFile {
1396+
path: log.parse().unwrap(),
1397+
size: None,
1398+
modified: None,
1399+
};
1400+
let res = parse_extra_log(&logfile);
1401+
assert_eq!(
1402+
res.unwrap(),
1403+
ExtraLogKind::Current { name, log: &logfile }
1404+
);
1405+
}
1406+
1407+
let rotated = [("foo.log.1", "foo"), ("foo.bar.baz.log.1", "foo")];
1408+
for (log, name) in rotated {
1409+
let logfile = LogFile {
1410+
path: log.parse().unwrap(),
1411+
size: None,
1412+
modified: None,
1413+
};
1414+
let res = parse_extra_log(&logfile);
1415+
assert_eq!(
1416+
res.unwrap(),
1417+
ExtraLogKind::Rotated { name, log: &logfile }
1418+
);
1419+
}
1420+
1421+
let invalid =
1422+
["foo bar.log.1", "some-cool-log", "log.foo.1", "log.foo"];
1423+
for log in invalid {
1424+
let logfile = LogFile {
1425+
path: log.parse().unwrap(),
1426+
size: None,
1427+
modified: None,
1428+
};
1429+
let res = parse_extra_log(&logfile);
1430+
assert!(res.is_none());
1431+
}
1432+
}
12931433
}

0 commit comments

Comments
 (0)