Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/buildomat/jobs/test-live-repair.sh
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ $BINDIR/dsc start \
# This gives dsc time to fail, as it is known to happen. If we don't check,
# then the later test will just hang forever waiting for downstairs that
# will never show up.
sleep 5
sleep 60 # bonus time for region checking
dsc_pid=$(pgrep dsc);

if [[ "$dsc_pid" -eq 0 ]]; then
Expand Down
13 changes: 13 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ members = [
"workspace-hack",
"xtask",
"agent-antagonist",
"verify-raw",
]
resolver = "2"

Expand Down
6 changes: 3 additions & 3 deletions common/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,8 @@ pub enum CrucibleError {
#[error("Invalid downstairs replace {0}")]
ReplaceRequestInvalid(String),

#[error("missing context slot for block {0}")]
MissingContextSlot(u64),
#[error("missing context slot for block {block} in extent {extent}")]
MissingContextSlot { block: u64, extent: u32 },

#[error("metadata deserialization failed: {0}")]
BadMetadata(String),
Expand Down Expand Up @@ -458,7 +458,7 @@ impl From<CrucibleError> for dropshot::HttpError {
| CrucibleError::UpstairsActivateInProgress
| CrucibleError::UpstairsDeactivating
| CrucibleError::UuidMismatch
| CrucibleError::MissingContextSlot(..)
| CrucibleError::MissingContextSlot { .. }
| CrucibleError::BadMetadata(..)
| CrucibleError::BadContextSlot(..)
| CrucibleError::MissingBlockContext
Expand Down
58 changes: 41 additions & 17 deletions downstairs/src/dump.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,28 +11,52 @@ struct ExtInfo {
ei_hm: HashMap<u32, ExtentMeta>,
}

pub fn verify_region(region_dir: PathBuf, log: Logger) -> Result<()> {
pub fn verify_region(
region_dir: PathBuf,
thread_count: Option<usize>,
log: Logger,
) -> Result<()> {
let region = Region::open(region_dir, false, true, &log)?;
let errors: Vec<_> = region
.extents
.par_iter()
.filter_map(|e| {
let extent = match e {
extent::ExtentState::Opened(extent) => extent,
extent::ExtentState::Closed => panic!("dump on closed extent!"),
};

if let Err(err) = extent.validate() {
Some((extent.number, err))
} else {
None
}
})
.collect();
// Configure thread pool based on parameter
let pool = if let Some(count) = thread_count {
rayon::ThreadPoolBuilder::new()
.num_threads(count)
.build()
.expect("Failed to build thread pool")
} else {
rayon::ThreadPoolBuilder::new()
.build()
.expect("Failed to build thread pool")
};

let errors: Vec<_> = pool.install(|| {
region
.extents
.par_iter()
.filter_map(|e| {
let extent = match e {
extent::ExtentState::Opened(extent) => extent,
extent::ExtentState::Closed => {
panic!("dump on closed extent!")
}
};

if let Err(err) = extent.validate() {
Some((extent.number, err))
} else {
None
}
})
.collect()
});

if !errors.is_empty() {
for (number, err) in &errors {
println!("validation failed for extent {}: {:?}", number, err);
println!(
"validation failed for extent {} 0x{:03X} : {:?}",
number, number.0, err
);
}
bail!("Region failed to verify");
}
Expand Down
17 changes: 16 additions & 1 deletion downstairs/src/extent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,22 @@ pub fn extent_dir<P: AsRef<Path>>(dir: P, number: ExtentId) -> PathBuf {
* anchored under "dir".
*/
pub fn extent_path<P: AsRef<Path>>(dir: P, number: ExtentId) -> PathBuf {
extent_dir(dir, number).join(extent_file_name(number, ExtentType::Data))
let e = extent_file_name(number, ExtentType::Data);

// XXX terrible hack: if someone has already provided a full directory tree
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need this so we can verify the incoming copy of an extent?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, opening an extent takes the extent's root directory and number, then builds the extent file path internally. This is annoying if we want to open a specific raw file!

Since we're building a test image directly from this PR (and probably not merging it), I didn't bother to clean this up further.

// ending in `.copy`, then just append the extent file name. This lets us
// open individual extent files during live-repair.
if dir
.as_ref()
.iter()
.next_back()
.and_then(|s| s.to_str())
.is_some_and(|s| s.ends_with(".copy"))
{
dir.as_ref().join(e)
} else {
extent_dir(dir, number).join(e)
}
}

/**
Expand Down
Loading