Skip to content

Commit 5d41257

Browse files
committed
Do single-block validation after live-repair and when opening extent
1 parent 1f48224 commit 5d41257

File tree

4 files changed

+103
-4
lines changed

4 files changed

+103
-4
lines changed

downstairs/src/extent.rs

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ pub(crate) trait ExtentInner: Send + Sync + Debug {
3535
fn flush_number(&self) -> Result<u64, CrucibleError>;
3636
fn dirty(&self) -> Result<bool, CrucibleError>;
3737
fn validate(&self) -> Result<(), CrucibleError>;
38+
fn validate_block(&self, i: u64) -> Result<(), CrucibleError>;
3839

3940
/// Performs any metadata updates needed before a flush
4041
fn pre_flush(
@@ -250,7 +251,22 @@ pub fn extent_dir<P: AsRef<Path>>(dir: P, number: ExtentId) -> PathBuf {
250251
* anchored under "dir".
251252
*/
252253
pub fn extent_path<P: AsRef<Path>>(dir: P, number: ExtentId) -> PathBuf {
253-
extent_dir(dir, number).join(extent_file_name(number, ExtentType::Data))
254+
let e = extent_file_name(number, ExtentType::Data);
255+
256+
// XXX terrible hack: if someone has already provided a full directory tree
257+
// ending in `.copy`, then just append the extent file name. This lets us
258+
// open individual extent files during live-repair.
259+
if dir
260+
.as_ref()
261+
.iter()
262+
.next_back()
263+
.and_then(|s| s.to_str())
264+
.is_some_and(|s| s.ends_with(".copy"))
265+
{
266+
dir.as_ref().join(e)
267+
} else {
268+
extent_dir(dir, number).join(e)
269+
}
254270
}
255271

256272
/**
@@ -488,6 +504,11 @@ impl Extent {
488504
}
489505
};
490506

507+
// XXX debug validation after opening the extent
508+
if let Err(e) = inner.validate_block(0) {
509+
panic!("could not validate extent {number}: {e:#?}");
510+
}
511+
491512
let extent = Extent {
492513
number,
493514
read_only,

downstairs/src/extent_inner_raw.rs

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -572,6 +572,65 @@ impl ExtentInner for RawInner {
572572
r
573573
}
574574

575+
fn validate_block(&self, block: u64) -> Result<(), CrucibleError> {
576+
let block_size = self.extent_size.block_size_in_bytes() as usize;
577+
578+
// Read context data to local arrays
579+
let ctx_a = self.layout.read_context_slots_contiguous(
580+
&self.file,
581+
block,
582+
1,
583+
ContextSlot::A,
584+
)?[0];
585+
let ctx_b = self.layout.read_context_slots_contiguous(
586+
&self.file,
587+
block,
588+
1,
589+
ContextSlot::B,
590+
)?[0];
591+
592+
let mut data = vec![0; block_size];
593+
pread_all(
594+
self.file.as_fd(),
595+
&mut data,
596+
block_size as i64 * block as i64,
597+
)
598+
.map_err(|e| {
599+
CrucibleError::IoError(format!(
600+
"extent {}: reading block {block} data failed: {e}",
601+
self.extent_number
602+
))
603+
})?;
604+
605+
let hash = integrity_hash(&[&data]);
606+
607+
// Pick out the active context slot
608+
let context = match self.active_context[block] {
609+
ContextSlot::A => ctx_a,
610+
ContextSlot::B => ctx_b,
611+
};
612+
613+
if let Some(context) = context {
614+
if context.on_disk_hash == hash {
615+
Ok(())
616+
} else {
617+
Err(CrucibleError::GenericError(format!(
618+
"block {block} has an active slot with mismatched hash"
619+
)))
620+
}
621+
} else {
622+
// context slot is empty, hopefully data is as well!
623+
if data.iter().all(|v| *v == 0u8) {
624+
Ok(())
625+
} else {
626+
Err(CrucibleError::GenericError(format!(
627+
"block {block} has an empty active slot, \
628+
but contains non-zero data",
629+
)))
630+
}
631+
}
632+
}
633+
575634
fn validate(&self) -> Result<(), CrucibleError> {
576635
let block_size = self.extent_size.block_size_in_bytes() as usize;
577636

downstairs/src/extent_inner_sqlite.rs

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -103,9 +103,13 @@ impl ExtentInner for SqliteInner {
103103
}
104104

105105
fn validate(&self) -> Result<(), CrucibleError> {
106-
Err(CrucibleError::GenericError(
107-
"`validate` is not implemented for Sqlite extent".to_owned(),
108-
))
106+
// We don't implement validation for SQLite extents
107+
Ok(())
108+
}
109+
110+
fn validate_block(&self, _i: u64) -> Result<(), CrucibleError> {
111+
// We don't implement validation for SQLite extents
112+
Ok(())
109113
}
110114

111115
#[cfg(test)]

downstairs/src/region.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -682,6 +682,21 @@ impl Region {
682682
);
683683
}
684684

685+
// XXX debug code
686+
// Open the extent that we just received, which checks block 0. We do
687+
// this before copying it, to check what was received on the wire.
688+
if let Err(e) = Extent::open(
689+
&copy_dir,
690+
&self.def(),
691+
eid,
692+
true, // read-only
693+
&self.log.clone(),
694+
) {
695+
panic!(
696+
"Failed to open live-repair extent {eid} in {copy_dir:?}: {e:?}"
697+
);
698+
}
699+
685700
// After we have all files: move the repair dir.
686701
info!(
687702
self.log,

0 commit comments

Comments
 (0)