Skip to content

Commit f5ea0bc

Browse files
committed
only hold back run when merge can't get too large
The invariants here are actually not quite true. In pathological cases, runs can get even more than one size too small.
1 parent bbe8b57 commit f5ea0bc

File tree

1 file changed

+27
-24
lines changed

1 file changed

+27
-24
lines changed

prototypes/ScheduledMerges.hs

Lines changed: 27 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -196,8 +196,9 @@ invariant = go 1
196196
-- but it is still represented as a 'MergingRun', using 'SingleRun'.
197197
MergePolicyLevelling -> assertST $ null rs
198198
-- Runs in tiering levels usually fit that size, but they can be one
199-
-- larger, if a run has been held back (creating a 5-way merge).
200-
MergePolicyTiering -> assertST $ all (\r -> tieringRunSizeToLevel r `elem` [ln, ln+1]) rs
199+
-- smaller due to compaction (if they have not been held back and
200+
-- merged again).
201+
MergePolicyTiering -> assertST $ all (\r -> tieringRunSizeToLevel r `elem` [ln-1, ln]) rs
201202

202203
-- Incoming runs being merged also need to be of the right size, but the
203204
-- conditions are more complicated.
@@ -222,17 +223,17 @@ invariant = go 1
222223
assertST $ levellingRunSizeToLevel r <= ln+1
223224

224225
-- An ongoing merge for levelling should have 4 incoming runs of
225-
-- the right size for the level below (or slightly larger due to
226-
-- holding back underfull runs), and 1 run from this level,
227-
-- but the run from this level can be of almost any size for the
228-
-- same reasons as above. Although if this is the first merge for
229-
-- a new level, it'll have only 4 runs.
226+
-- the right size for the level below (or slightly smaller),
227+
-- and 1 run from this level, but the run from this level can be of
228+
-- almost any size for the same reasons as above.
229+
-- Although if this is the first merge for a new level, it'll have
230+
-- only 4 runs.
230231
(_, OngoingMerge _ rs _) -> do
231232
let incoming = take 4 rs
232233
let resident = drop 4 rs
233234
assertST $ length incoming == 4
234235
assertST $ length resident <= 1
235-
assertST $ all (\r -> tieringRunSizeToLevel r `elem` [ln-1, ln]) incoming
236+
assertST $ all (\r -> tieringRunSizeToLevel r `elem` [ln-2, ln-1]) incoming
236237
assertST $ all (\r -> levellingRunSizeToLevel r <= ln+1) resident
237238

238239
MergePolicyTiering ->
@@ -250,22 +251,21 @@ invariant = go 1
250251
-- a single level only.
251252
(_, CompletedMerge r, MergeLastLevel) -> do
252253
assertST $ ln == 1
253-
assertST $ tieringRunSizeToLevel r <= ln+1
254+
assertST $ tieringRunSizeToLevel r <= ln
254255

255256
-- A completed mid level run is usually of the size for the
256257
-- level it is entering, but can also be one smaller (in which case
257-
-- it'll be held back and merged again) or one larger (because it
258-
-- includes a run that has been held back before).
258+
-- it'll be held back and merged again).
259259
(_, CompletedMerge r, MergeMidLevel) ->
260-
assertST $ tieringRunSizeToLevel r `elem` [ln-1, ln, ln+1]
260+
assertST $ tieringRunSizeToLevel r `elem` [ln-1, ln]
261261

262262
-- An ongoing merge for tiering should have 4 incoming runs of
263-
-- the right size for the level below, and at most 1 run held back
264-
-- due to being too small (which would thus also be of the size of
265-
-- the level below).
263+
-- the right size for the level below (or slightly smaller), and at
264+
-- most 1 run held back due to being too small (which would thus
265+
-- also be of the size of the level below).
266266
(_, OngoingMerge _ rs _, _) -> do
267267
assertST $ length rs == 4 || length rs == 5
268-
assertST $ all (\r -> tieringRunSizeToLevel r == ln-1) rs
268+
assertST $ all (\r -> tieringRunSizeToLevel r `elem` [ln-2, ln-1]) rs
269269

270270
-- 'callStack' just ensures that the 'HasCallStack' constraint is not redundant
271271
-- when compiling with debug assertions disabled.
@@ -303,7 +303,7 @@ newMerge tr level mergepolicy mergelast rs = do
303303
debt = newMergeDebt $ case mergepolicy of
304304
MergePolicyLevelling -> 4 * tieringRunSize (level-1)
305305
+ levellingRunSize level
306-
MergePolicyTiering -> length rs * tieringRunSize (level-1)
306+
MergePolicyTiering -> 4 * tieringRunSize (level-1)
307307
-- deliberately lazy:
308308
r = case mergelast of
309309
MergeMidLevel -> (mergek rs)
@@ -484,9 +484,9 @@ creditsForMerge SingleRun{} = 0
484484
-- It needs to be completed before another run comes in.
485485
creditsForMerge (MergingRun MergePolicyLevelling _ _) = (1 + 4) / 1
486486

487-
-- A tiering merge has 5 runs at most (once could be held back to merged again)
487+
-- A tiering merge has 4 runs at most (once could be held back to merged again)
488488
-- and must be completed before the level is full (once 4 more runs come in).
489-
creditsForMerge (MergingRun MergePolicyTiering _ _) = 5 / 4
489+
creditsForMerge (MergingRun MergePolicyTiering _ _) = 4 / 4
490490

491491
type Event = EventAt EventDetail
492492
data EventAt e = EventAt {
@@ -529,9 +529,9 @@ increment tr sc = \r ls -> do
529529
assertST $ tieringRunSizeToLevel r `elem` [ln, ln+1] -- +1 from levelling
530530
_ -> do
531531
assertST $ length incoming == 4
532-
-- because of overfull runs due to holding back
533-
assertST $ all (\r -> tieringRunSizeToLevel r `elem` [ln-1, ln]) incoming
534-
assertST $ tieringLevel (sum (map Map.size incoming)) `elem` [ln, ln+1]
532+
-- because of underfull runs
533+
assertST $ all (\r -> tieringRunSizeToLevel r `elem` [ln-2, ln-1]) incoming
534+
assertST $ tieringLevel (sum (map Map.size incoming)) `elem` [ln-1, ln]
535535
go' ln incoming ls
536536

537537
go' !ln incoming [] = do
@@ -548,8 +548,11 @@ increment tr sc = \r ls -> do
548548
case mergePolicyForLevel ln ls of
549549

550550
-- If r is still too small for this level then keep it and merge again
551-
-- with the incoming runs.
552-
MergePolicyTiering | tieringRunSizeToLevel r < ln -> do
551+
-- with the incoming runs, but only if the resulting run is guaranteed
552+
-- not to be too large for this level.
553+
MergePolicyTiering
554+
| tieringRunSizeToLevel r < ln
555+
, sum (map Map.size (r : incoming)) <= tieringRunSize ln -> do
553556
let mergelast = mergeLastForLevel ls
554557
mr' <- newMerge tr' ln MergePolicyTiering mergelast (incoming ++ [r])
555558
return (Level mr' rs : ls)

0 commit comments

Comments
 (0)