Skip to content

Commit 1adc6c7

Browse files
authored
Backport to branch(3.10) : Fix snapshot management issues (#2024)
1 parent c1896ce commit 1adc6c7

File tree

7 files changed

+768
-301
lines changed

7 files changed

+768
-301
lines changed

core/src/main/java/com/scalar/db/transaction/consensuscommit/CrudHandler.java

Lines changed: 53 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import com.scalar.db.api.Result;
1212
import com.scalar.db.api.Scan;
1313
import com.scalar.db.api.Scanner;
14+
import com.scalar.db.api.Selection;
1415
import com.scalar.db.api.TableMetadata;
1516
import com.scalar.db.exception.storage.ExecutionException;
1617
import com.scalar.db.exception.transaction.CrudException;
@@ -19,8 +20,11 @@
1920
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
2021
import java.io.IOException;
2122
import java.util.ArrayList;
23+
import java.util.LinkedHashMap;
2224
import java.util.LinkedHashSet;
2325
import java.util.List;
26+
import java.util.Map;
27+
import java.util.Map.Entry;
2428
import java.util.Optional;
2529
import java.util.stream.Collectors;
2630
import javax.annotation.concurrent.ThreadSafe;
@@ -63,30 +67,35 @@ public CrudHandler(
6367
this.mutationConditionsValidator = mutationConditionsValidator;
6468
}
6569

66-
public Optional<Result> get(Get get) throws CrudException {
67-
List<String> originalProjections = new ArrayList<>(get.getProjections());
70+
public Optional<Result> get(Get originalGet) throws CrudException {
71+
List<String> originalProjections = new ArrayList<>(originalGet.getProjections());
72+
Get get = (Get) prepareStorageSelection(originalGet);
6873

6974
Optional<TransactionResult> result;
7075
Snapshot.Key key = new Snapshot.Key(get);
7176

72-
if (snapshot.containsKeyInReadSet(key)) {
73-
return createGetResult(key, originalProjections);
77+
if (snapshot.containsKeyInGetSet(get)) {
78+
return createGetResult(key, get, originalProjections);
7479
}
7580

7681
result = getFromStorage(get);
7782
if (!result.isPresent() || result.get().isCommitted()) {
83+
// Keep the read set latest to create before image by using the latest record (result)
84+
// because another conflicting transaction might have updated the record after this
85+
// transaction read it first.
7886
snapshot.put(key, result);
79-
return createGetResult(key, originalProjections);
87+
snapshot.put(get, result); // for re-read and validation
88+
return createGetResult(key, get, originalProjections);
8089
}
8190
throw new UncommittedRecordException(
8291
result.get(), "this record needs recovery", snapshot.getId());
8392
}
8493

85-
private Optional<Result> createGetResult(Snapshot.Key key, List<String> projections)
94+
private Optional<Result> createGetResult(Snapshot.Key key, Get get, List<String> projections)
8695
throws CrudException {
8796
TableMetadata metadata = getTableMetadata(key.getNamespace(), key.getTable());
8897
return snapshot
89-
.get(key)
98+
.mergeResult(key, snapshot.get(get))
9099
.map(r -> new FilteredResult(r, projections, metadata, isIncludeMetadataEnabled));
91100
}
92101

@@ -104,20 +113,22 @@ public List<Result> scan(Scan scan) throws CrudException {
104113
return results;
105114
}
106115

107-
private List<Result> scanInternal(Scan scan) throws CrudException {
108-
List<String> originalProjections = new ArrayList<>(scan.getProjections());
116+
private List<Result> scanInternal(Scan originalScan) throws CrudException {
117+
List<String> originalProjections = new ArrayList<>(originalScan.getProjections());
118+
Scan scan = (Scan) prepareStorageSelection(originalScan);
109119

110-
List<Result> results = new ArrayList<>();
120+
Map<Snapshot.Key, TransactionResult> results = new LinkedHashMap<>();
111121

112-
Optional<List<Snapshot.Key>> keysInSnapshot = snapshot.get(scan);
113-
if (keysInSnapshot.isPresent()) {
114-
for (Snapshot.Key key : keysInSnapshot.get()) {
115-
snapshot.get(key).ifPresent(results::add);
122+
Optional<Map<Snapshot.Key, TransactionResult>> resultsInSnapshot = snapshot.get(scan);
123+
if (resultsInSnapshot.isPresent()) {
124+
for (Entry<Snapshot.Key, TransactionResult> entry : resultsInSnapshot.get().entrySet()) {
125+
snapshot
126+
.mergeResult(entry.getKey(), Optional.of(entry.getValue()))
127+
.ifPresent(result -> results.put(entry.getKey(), result));
116128
}
117129
return createScanResults(scan, originalProjections, results);
118130
}
119131

120-
List<Snapshot.Key> keys = new ArrayList<>();
121132
Scanner scanner = null;
122133
try {
123134
scanner = getFromStorage(scan);
@@ -130,12 +141,12 @@ private List<Result> scanInternal(Scan scan) throws CrudException {
130141

131142
Snapshot.Key key = new Snapshot.Key(scan, r);
132143

133-
if (!snapshot.containsKeyInReadSet(key)) {
134-
snapshot.put(key, Optional.of(result));
135-
}
144+
// We always update the read set to create before image by using the latest record (result)
145+
// because another conflicting transaction might have updated the record after this
146+
// transaction read it first.
147+
snapshot.put(key, Optional.of(result));
136148

137-
keys.add(key);
138-
snapshot.get(key).ifPresent(results::add);
149+
snapshot.mergeResult(key, Optional.of(result)).ifPresent(value -> results.put(key, value));
139150
}
140151
} finally {
141152
if (scanner != null) {
@@ -146,15 +157,16 @@ private List<Result> scanInternal(Scan scan) throws CrudException {
146157
}
147158
}
148159
}
149-
snapshot.put(scan, keys);
160+
snapshot.put(scan, results);
150161

151162
return createScanResults(scan, originalProjections, results);
152163
}
153164

154-
private List<Result> createScanResults(Scan scan, List<String> projections, List<Result> results)
165+
private List<Result> createScanResults(
166+
Scan scan, List<String> projections, Map<Snapshot.Key, TransactionResult> results)
155167
throws CrudException {
156168
TableMetadata metadata = getTableMetadata(scan.forNamespace().get(), scan.forTable().get());
157-
return results.stream()
169+
return results.values().stream()
158170
.map(r -> new FilteredResult(r, projections, metadata, isIncludeMetadataEnabled))
159171
.collect(Collectors.toList());
160172
}
@@ -171,37 +183,38 @@ public void delete(Delete delete) throws UnsatisfiedConditionException {
171183
snapshot.put(new Snapshot.Key(delete), delete);
172184
}
173185

174-
private Optional<TransactionResult> getFromStorage(Get get) throws CrudException {
186+
@VisibleForTesting
187+
Optional<TransactionResult> getFromStorage(Get get) throws CrudException {
175188
try {
176-
get.clearProjections();
177-
// Retrieve only the after images columns when including the metadata is disabled, otherwise
178-
// retrieve all the columns
179-
if (!isIncludeMetadataEnabled) {
180-
LinkedHashSet<String> afterImageColumnNames =
181-
tableMetadataManager.getTransactionTableMetadata(get).getAfterImageColumnNames();
182-
get.withProjections(afterImageColumnNames);
183-
}
184-
get.withConsistency(Consistency.LINEARIZABLE);
185189
return storage.get(get).map(TransactionResult::new);
186190
} catch (ExecutionException e) {
187191
throw new CrudException("get failed", e, snapshot.getId());
188192
}
189193
}
190194

191-
private Scanner getFromStorage(Scan scan) throws CrudException {
195+
@VisibleForTesting
196+
Scanner getFromStorage(Scan scan) throws CrudException {
197+
try {
198+
return storage.scan(scan);
199+
} catch (ExecutionException e) {
200+
throw new CrudException("scan failed", e, snapshot.getId());
201+
}
202+
}
203+
204+
private Selection prepareStorageSelection(Selection selection) throws CrudException {
192205
try {
193-
scan.clearProjections();
206+
selection.clearProjections();
194207
// Retrieve only the after images columns when including the metadata is disabled, otherwise
195208
// retrieve all the columns
196209
if (!isIncludeMetadataEnabled) {
197210
LinkedHashSet<String> afterImageColumnNames =
198-
tableMetadataManager.getTransactionTableMetadata(scan).getAfterImageColumnNames();
199-
scan.withProjections(afterImageColumnNames);
211+
tableMetadataManager.getTransactionTableMetadata(selection).getAfterImageColumnNames();
212+
selection.withProjections(afterImageColumnNames);
200213
}
201-
scan.withConsistency(Consistency.LINEARIZABLE);
202-
return storage.scan(scan);
214+
selection.withConsistency(Consistency.LINEARIZABLE);
215+
return selection;
203216
} catch (ExecutionException e) {
204-
throw new CrudException("scan failed", e, snapshot.getId());
217+
throw new CrudException("getting a table metadata failed", e, snapshot.getId());
205218
}
206219
}
207220

core/src/main/java/com/scalar/db/transaction/consensuscommit/Snapshot.java

Lines changed: 52 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,8 @@ public class Snapshot {
4949
private final TransactionTableMetadataManager tableMetadataManager;
5050
private final ParallelExecutor parallelExecutor;
5151
private final Map<Key, Optional<TransactionResult>> readSet;
52-
private final Map<Scan, List<Key>> scanSet;
52+
private final Map<Get, Optional<TransactionResult>> getSet;
53+
private final Map<Scan, Map<Key, TransactionResult>> scanSet;
5354
private final Map<Key, Put> writeSet;
5455
private final Map<Key, Delete> deleteSet;
5556

@@ -65,6 +66,7 @@ public Snapshot(
6566
this.tableMetadataManager = tableMetadataManager;
6667
this.parallelExecutor = parallelExecutor;
6768
readSet = new HashMap<>();
69+
getSet = new HashMap<>();
6870
scanSet = new HashMap<>();
6971
writeSet = new HashMap<>();
7072
deleteSet = new HashMap<>();
@@ -78,7 +80,8 @@ public Snapshot(
7880
TransactionTableMetadataManager tableMetadataManager,
7981
ParallelExecutor parallelExecutor,
8082
Map<Key, Optional<TransactionResult>> readSet,
81-
Map<Scan, List<Key>> scanSet,
83+
Map<Get, Optional<TransactionResult>> getSet,
84+
Map<Scan, Map<Key, TransactionResult>> scanSet,
8285
Map<Key, Put> writeSet,
8386
Map<Key, Delete> deleteSet) {
8487
this.id = id;
@@ -87,6 +90,7 @@ public Snapshot(
8790
this.tableMetadataManager = tableMetadataManager;
8891
this.parallelExecutor = parallelExecutor;
8992
this.readSet = readSet;
93+
this.getSet = getSet;
9094
this.scanSet = scanSet;
9195
this.writeSet = writeSet;
9296
this.deleteSet = deleteSet;
@@ -107,8 +111,12 @@ public void put(Key key, Optional<TransactionResult> result) {
107111
readSet.put(key, result);
108112
}
109113

110-
public void put(Scan scan, List<Key> keys) {
111-
scanSet.put(scan, keys);
114+
public void put(Get get, Optional<TransactionResult> result) {
115+
getSet.put(get, result);
116+
}
117+
118+
public void put(Scan scan, Map<Key, TransactionResult> results) {
119+
scanSet.put(scan, results);
112120
}
113121

114122
public void put(Key key, Put put) {
@@ -137,21 +145,18 @@ public Optional<TransactionResult> getFromReadSet(Key key) {
137145
return readSet.containsKey(key) ? readSet.get(key) : Optional.empty();
138146
}
139147

140-
public Optional<TransactionResult> get(Key key) throws CrudException {
148+
public Optional<TransactionResult> mergeResult(Key key, Optional<TransactionResult> result)
149+
throws CrudException {
141150
if (deleteSet.containsKey(key)) {
142151
return Optional.empty();
143-
} else if (readSet.containsKey(key)) {
144-
if (writeSet.containsKey(key)) {
145-
// merge the result in the read set and the put in the write set
146-
return Optional.of(
147-
new TransactionResult(
148-
new MergedResult(readSet.get(key), writeSet.get(key), getTableMetadata(key))));
149-
} else {
150-
return readSet.get(key);
151-
}
152+
} else if (writeSet.containsKey(key)) {
153+
// merge the result in the read set and the put in the write set
154+
return Optional.of(
155+
new TransactionResult(
156+
new MergedResult(result, writeSet.get(key), getTableMetadata(key))));
157+
} else {
158+
return result;
152159
}
153-
throw new IllegalArgumentException(
154-
"getting data neither in the read set nor the delete set is not allowed");
155160
}
156161

157162
private TableMetadata getTableMetadata(Key key) throws CrudException {
@@ -185,7 +190,17 @@ private TableMetadata getTableMetadata(Scan scan) throws ExecutionException {
185190
}
186191
}
187192

188-
public Optional<List<Key>> get(Scan scan) {
193+
public boolean containsKeyInGetSet(Get get) {
194+
return getSet.containsKey(get);
195+
}
196+
197+
public Optional<TransactionResult> get(Get get) {
198+
// We expect this method is called after putting the result of the get operation in the get set.
199+
assert getSet.containsKey(get);
200+
return getSet.get(get);
201+
}
202+
203+
public Optional<Map<Key, TransactionResult>> get(Scan scan) {
189204
if (scanSet.containsKey(scan)) {
190205
return Optional.ofNullable(scanSet.get(scan));
191206
}
@@ -222,6 +237,10 @@ private boolean isWriteSetOverlappedWith(Scan scan) {
222237
}
223238

224239
for (Map.Entry<Key, Put> entry : writeSet.entrySet()) {
240+
if (scanSet.get(scan).containsKey(entry.getKey())) {
241+
return true;
242+
}
243+
225244
Put put = entry.getValue();
226245

227246
if (!put.forNamespace().equals(scan.forNamespace())
@@ -278,7 +297,7 @@ private boolean isWriteSetOverlappedWith(Scan scan) {
278297

279298
private boolean isWriteSetOverlappedWith(ScanWithIndex scan) {
280299
for (Map.Entry<Key, Put> entry : writeSet.entrySet()) {
281-
if (scanSet.get(scan).contains(entry.getKey())) {
300+
if (scanSet.get(scan).containsKey(entry.getKey())) {
282301
return true;
283302
}
284303

@@ -315,7 +334,7 @@ private boolean isWriteSetOverlappedWith(ScanAll scan) {
315334
// yet. Thus, we need to evaluate if the scan condition potentially matches put operations.
316335

317336
// Check for cases 1 and 2
318-
if (scanSet.get(scan).contains(entry.getKey())) {
337+
if (scanSet.get(scan).containsKey(entry.getKey())) {
319338
return true;
320339
}
321340

@@ -433,14 +452,14 @@ void toSerializableWithExtraRead(DistributedStorage storage)
433452
List<ParallelExecutorTask> tasks = new ArrayList<>();
434453

435454
// Read set by scan is re-validated to check if there is no anti-dependency
436-
for (Map.Entry<Scan, List<Key>> entry : scanSet.entrySet()) {
455+
for (Map.Entry<Scan, Map<Key, TransactionResult>> entry : scanSet.entrySet()) {
437456
tasks.add(
438457
() -> {
439458
Map<Key, TransactionResult> currentReadMap = new HashMap<>();
440459
Set<Key> validatedReadSet = new HashSet<>();
441460
Scanner scanner = null;
461+
Scan scan = entry.getKey();
442462
try {
443-
Scan scan = entry.getKey();
444463
// only get tx_id and tx_version columns because we use only them to compare
445464
scan.clearProjections();
446465
scan.withProjection(Attribute.ID).withProjection(Attribute.VERSION);
@@ -464,13 +483,15 @@ void toSerializableWithExtraRead(DistributedStorage storage)
464483
}
465484
}
466485

467-
for (Key key : entry.getValue()) {
486+
for (Map.Entry<Key, TransactionResult> e : entry.getValue().entrySet()) {
487+
Key key = e.getKey();
488+
TransactionResult result = e.getValue();
468489
if (writeSet.containsKey(key) || deleteSet.containsKey(key)) {
469490
continue;
470491
}
471492
// Check if read records are not changed
472493
TransactionResult latestResult = currentReadMap.get(key);
473-
if (isChanged(Optional.ofNullable(latestResult), readSet.get(key))) {
494+
if (isChanged(Optional.ofNullable(latestResult), Optional.of(result))) {
474495
throwExceptionDueToAntiDependency();
475496
}
476497
validatedReadSet.add(key);
@@ -483,35 +504,23 @@ void toSerializableWithExtraRead(DistributedStorage storage)
483504
});
484505
}
485506

486-
// Calculate read set validated by scan
487-
Set<Key> validatedReadSetByScan = new HashSet<>();
488-
for (List<Key> values : scanSet.values()) {
489-
validatedReadSetByScan.addAll(values);
490-
}
491-
492507
// Read set by get is re-validated to check if there is no anti-dependency
493-
for (Map.Entry<Key, Optional<TransactionResult>> entry : readSet.entrySet()) {
494-
Key key = entry.getKey();
495-
if (writeSet.containsKey(key)
496-
|| deleteSet.containsKey(key)
497-
|| validatedReadSetByScan.contains(key)) {
508+
for (Map.Entry<Get, Optional<TransactionResult>> entry : getSet.entrySet()) {
509+
Get get = entry.getKey();
510+
Key key = new Key(get);
511+
if (writeSet.containsKey(key) || deleteSet.containsKey(key)) {
498512
continue;
499513
}
500514

501515
tasks.add(
502516
() -> {
517+
Optional<TransactionResult> originalResult = getSet.get(get);
503518
// only get tx_id and tx_version columns because we use only them to compare
504-
Get get =
505-
new Get(key.getPartitionKey(), key.getClusteringKey().orElse(null))
506-
.withProjection(Attribute.ID)
507-
.withProjection(Attribute.VERSION)
508-
.withConsistency(Consistency.LINEARIZABLE)
509-
.forNamespace(key.getNamespace())
510-
.forTable(key.getTable());
511-
519+
get.clearProjections();
520+
get.withProjection(Attribute.ID).withProjection(Attribute.VERSION);
512521
Optional<TransactionResult> latestResult = storage.get(get).map(TransactionResult::new);
513522
// Check if a read record is not changed
514-
if (isChanged(latestResult, entry.getValue())) {
523+
if (isChanged(latestResult, originalResult)) {
515524
throwExceptionDueToAntiDependency();
516525
}
517526
});

0 commit comments

Comments
 (0)