@@ -25,8 +25,8 @@ use crate::{
2525 DownstairsIO , DownstairsMend , DsState , DsStateData , ExtentFix ,
2626 ExtentRepairIDs , IOState , IOStateCount , IOop , ImpactedBlocks , JobId ,
2727 Message , NegotiationState , RawReadResponse , RawWrite , ReconcileIO ,
28- ReconciliationId , RegionDefinition , ReplaceResult , SnapshotDetails ,
29- WorkSummary ,
28+ ReconciliationId , RegionDefinition , RegionMetadata , ReplaceResult ,
29+ SnapshotDetails , WorkSummary ,
3030} ;
3131use crucible_common:: { BlockIndex , ExtentId , NegotiationError } ;
3232use crucible_protocol:: WriteHeader ;
@@ -165,6 +165,20 @@ pub(crate) enum LiveRepairState {
165165 } ,
166166}
167167
168+ #[ derive( Copy , Clone , Debug , PartialEq ) ]
169+ pub ( crate ) enum LiveRepairStart {
170+ /// Live-repair has started
171+ Started ,
172+ /// Live-repair is already running
173+ AlreadyRunning ,
174+ /// No downstairs is in `LiveRepairReady`
175+ NotNeeded ,
176+ /// All three downstairs need live-repair (oh no)
177+ AllNeedRepair ,
178+ /// There is no source downstairs available
179+ NoSource ,
180+ }
181+
168182impl LiveRepairState {
169183 fn dummy ( ) -> Self {
170184 LiveRepairState :: Noop {
@@ -874,7 +888,16 @@ impl Downstairs {
874888 /// Returns `true` if repair is needed, `false` otherwise
875889 pub ( crate ) fn collate ( & mut self ) -> Result < bool , NegotiationError > {
876890 let r = self . check_region_metadata ( ) ?;
877- Ok ( self . start_reconciliation ( r) )
891+ Ok ( self . start_reconciliation ( r, |data| {
892+ let DsStateData :: Connecting {
893+ state : NegotiationStateData :: WaitQuorum ( r) ,
894+ ..
895+ } = data
896+ else {
897+ panic ! ( "client is not in WaitQuorum" ) ;
898+ } ;
899+ r
900+ } ) )
878901 }
879902
880903 /// Checks that region metadata is valid
@@ -963,9 +986,46 @@ impl Downstairs {
963986 Ok ( CollateData { max_flush, max_gen } )
964987 }
965988
989+ /// Begins reconciliation from all downstairs in `LiveRepairReady`
990+ ///
991+ /// # Panics
992+ /// If any of the downstairs is not in `LiveRepairReady`
993+ #[ must_use]
994+ pub ( crate ) fn reconcile_from_live_repair_ready ( & mut self ) -> bool {
995+ let mut max_flush = 0 ;
996+ let mut max_gen = 0 ;
997+ for client in self . clients . iter_mut ( ) {
998+ let DsStateData :: Connecting {
999+ state : NegotiationStateData :: LiveRepairReady ( data) ,
1000+ ..
1001+ } = client. state_data ( )
1002+ else {
1003+ panic ! ( "got invalid client state" ) ;
1004+ } ;
1005+ for m in data. iter ( ) {
1006+ max_flush = max_flush. max ( m. flush + 1 ) ;
1007+ max_gen = max_gen. max ( m. gen + 1 ) ;
1008+ }
1009+ }
1010+ self . start_reconciliation ( CollateData { max_gen, max_flush } , |data| {
1011+ let DsStateData :: Connecting {
1012+ state : NegotiationStateData :: LiveRepairReady ( r) ,
1013+ ..
1014+ } = data
1015+ else {
1016+ panic ! ( "client is not in LiveRepairReady" ) ;
1017+ } ;
1018+ r
1019+ } )
1020+ }
1021+
9661022 /// Begins reconciliation, using the given collation data
9671023 #[ must_use]
968- fn start_reconciliation ( & mut self , data : CollateData ) -> bool {
1024+ fn start_reconciliation < G : Fn ( & DsStateData ) -> & RegionMetadata > (
1025+ & mut self ,
1026+ data : CollateData ,
1027+ getter : G ,
1028+ ) -> bool {
9691029 let CollateData { max_flush, max_gen } = data;
9701030
9711031 /*
@@ -978,7 +1038,7 @@ impl Downstairs {
9781038 * Determine what extents don't match and what to do
9791039 * about that
9801040 */
981- if let Some ( reconcile_list) = self . mismatch_list ( ) {
1041+ if let Some ( reconcile_list) = self . mismatch_list ( getter ) {
9821042 for c in self . clients . iter_mut ( ) {
9831043 c. begin_reconcile ( ) ;
9841044 }
@@ -1026,10 +1086,14 @@ impl Downstairs {
10261086 ///
10271087 /// This function is idempotent; it returns without doing anything if
10281088 /// live-repair either can't be started or is already running.
1029- pub ( crate ) fn check_live_repair_start ( & mut self , up_state : & UpstairsState ) {
1089+ #[ must_use]
1090+ pub ( crate ) fn check_live_repair_start (
1091+ & mut self ,
1092+ up_state : & UpstairsState ,
1093+ ) -> LiveRepairStart {
10301094 // If we're already doing live-repair, then we can't start live-repair
10311095 if self . live_repair_in_progress ( ) {
1032- return ;
1096+ return LiveRepairStart :: AlreadyRunning ;
10331097 }
10341098
10351099 // Begin setting up live-repair state
@@ -1054,16 +1118,15 @@ impl Downstairs {
10541118
10551119 // Can't start live-repair if no one is LiveRepairReady
10561120 if repair_downstairs. is_empty ( ) {
1057- return ;
1121+ return LiveRepairStart :: NotNeeded ;
1122+ } else if repair_downstairs. len ( ) == 3 {
1123+ warn ! ( self . log, "All three downstairs require repair" ) ;
1124+ return LiveRepairStart :: AllNeedRepair ;
10581125 }
10591126
10601127 // Can't start live-repair if we don't have a source downstairs
10611128 let Some ( source_downstairs) = source_downstairs else {
1062- warn ! ( self . log, "No source, no Live Repair possible" ) ;
1063- if repair_downstairs. len ( ) == 3 {
1064- warn ! ( self . log, "All three downstairs require repair" ) ;
1065- }
1066- return ;
1129+ return LiveRepairStart :: NoSource ;
10671130 } ;
10681131
10691132 // Move the upstairs that were LiveRepairReady to LiveRepair
@@ -1105,6 +1168,8 @@ impl Downstairs {
11051168
11061169 let repair = self . repair . as_ref ( ) . unwrap ( ) ;
11071170 self . notify_live_repair_start ( repair) ;
1171+
1172+ LiveRepairStart :: Started
11081173 }
11091174
11101175 /// Checks whether live-repair can continue
@@ -2008,18 +2073,14 @@ impl Downstairs {
20082073 }
20092074
20102075 /// Compares region metadata from all three clients and builds a mend list
2011- fn mismatch_list ( & self ) -> Option < DownstairsMend > {
2076+ fn mismatch_list < G : Fn ( & DsStateData ) -> & RegionMetadata > (
2077+ & self ,
2078+ getter : G ,
2079+ ) -> Option < DownstairsMend > {
20122080 let log = self . log . new ( o ! ( "" => "mend" . to_string( ) ) ) ;
20132081 let mut meta = ClientMap :: new ( ) ;
20142082 for i in ClientId :: iter ( ) {
2015- let DsStateData :: Connecting {
2016- state : NegotiationStateData :: WaitQuorum ( r) ,
2017- ..
2018- } = self . clients [ i] . state_data ( )
2019- else {
2020- panic ! ( "client {i} is not in WaitQuorum" ) ;
2021- } ;
2022- meta. insert ( i, r) ;
2083+ meta. insert ( i, getter ( self . clients [ i] . state_data ( ) ) ) ;
20232084 }
20242085 DownstairsMend :: new ( & meta, log)
20252086 }
@@ -4006,7 +4067,8 @@ struct DownstairsBackpressureConfig {
40064067pub ( crate ) mod test {
40074068 use super :: {
40084069 ClientFaultReason , ClientNegotiationFailed , ClientStopReason ,
4009- ConnectionMode , Downstairs , DsState , NegotiationStateData , PendingJob ,
4070+ ConnectionMode , Downstairs , DsState , LiveRepairStart ,
4071+ NegotiationStateData , PendingJob ,
40104072 } ;
40114073 use crate :: {
40124074 downstairs:: { LiveRepairData , LiveRepairState , ReconcileData } ,
@@ -4081,7 +4143,7 @@ pub(crate) mod test {
40814143 NegotiationStateData :: WaitForPromote ,
40824144 NegotiationStateData :: WaitForRegionInfo ,
40834145 NegotiationStateData :: GetExtentVersions ,
4084- NegotiationStateData :: LiveRepairReady ,
4146+ NegotiationStateData :: LiveRepairReady ( Default :: default ( ) ) ,
40854147 ] {
40864148 ds. clients [ to_repair] . checked_state_transition (
40874149 & UpstairsState :: Active ,
@@ -9613,7 +9675,8 @@ pub(crate) mod test {
96139675
96149676 // Start the repair normally. This enqueues the close & reopen jobs, and
96159677 // reserves Job IDs for the repair/noop
9616- ds. check_live_repair_start ( & UpstairsState :: Active ) ;
9678+ let r = ds. check_live_repair_start ( & UpstairsState :: Active ) ;
9679+ assert_eq ! ( r, LiveRepairStart :: Started ) ;
96179680 assert ! ( ds. live_repair_in_progress( ) ) ;
96189681
96199682 // Submit a write.
0 commit comments