Skip to content

Commit e657e81

Browse files
authored
Support SERIALIZABLE isolation for index-based operations with before-image index validation (#3463)
1 parent 37f9d68 commit e657e81

8 files changed

Lines changed: 671 additions & 222 deletions

File tree

core/src/integration-test/java/com/scalar/db/storage/objectstorage/ConsensusCommitSpecificIntegrationTestWithObjectStorage.java

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,11 +83,11 @@ protected Properties getProperties(String testName) {
8383

8484
@Override
8585
@Disabled("Object Storage does not support index-related operations")
86-
public void scan_ScanWithIndexGiven_WithSerializable_ShouldThrowIllegalArgumentException() {}
86+
public void scan_ScanWithIndexGiven_WithSerializable_ShouldScan() {}
8787

8888
@Override
8989
@Disabled("Object Storage does not support index-related operations")
90-
public void get_GetWithIndexGiven_WithSerializable_ShouldThrowIllegalArgumentException() {}
90+
public void get_GetWithIndexGiven_WithSerializable_ShouldGet() {}
9191

9292
@Override
9393
@Disabled("Object Storage does not support index-related operations")
@@ -274,4 +274,29 @@ public void get_GetWithIndexForDeletedWhenCoordinatorStateAborted_ShouldRollBack
274274
public void
275275
getScanner_ScanAllWithIndexConditionForDeletedWhenCoordinatorStateAborted_ShouldRollBackAndReturnAllRecords(
276276
Isolation isolation) {}
277+
278+
@Override
279+
@Disabled("Object Storage does not support index-related operations")
280+
public void
281+
commit_GetWithIndexInSerializable_WhenBeforeIndexHasPreparedRecordFromOtherTransaction_ShouldThrowCommitConflictException() {}
282+
283+
@Override
284+
@Disabled("Object Storage does not support index-related operations")
285+
public void
286+
commit_ScanWithIndexInSerializable_WhenBeforeIndexHasPreparedRecordFromOtherTransaction_ShouldThrowCommitConflictException() {}
287+
288+
@Override
289+
@Disabled("Object Storage does not support index-related operations")
290+
public void
291+
commit_ScanAllWithIndexConditionInSerializable_WhenBeforeIndexHasPreparedRecordFromOtherTransaction_ShouldThrowCommitConflictException() {}
292+
293+
@Override
294+
@Disabled("Object Storage does not support index-related operations")
295+
public void
296+
commit_GetScannerWithIndexInSerializable_WhenBeforeIndexHasPreparedRecordFromOtherTransaction_ShouldThrowCommitConflictException() {}
297+
298+
@Override
299+
@Disabled("Object Storage does not support index-related operations")
300+
public void
301+
commit_GetScannerWithScanAllIndexConditionInSerializable_WhenBeforeIndexHasPreparedRecordFromOtherTransaction_ShouldThrowCommitConflictException() {}
277302
}

core/src/main/java/com/scalar/db/common/CoreError.java

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -914,19 +914,22 @@ public enum CoreError implements ScalarDbError {
914914
CONSENSUS_COMMIT_INDEX_GET_NOT_ALLOWED_IN_SERIALIZABLE(
915915
Category.USER_ERROR,
916916
"0260",
917-
"Get operations by using an index is not allowed in the SERIALIZABLE isolation level",
917+
"Get operations using a secondary index are not allowed in the SERIALIZABLE isolation level without before-image indexes. "
918+
+ "Run repairTable() to create before-image indexes for the table, which will enable index-based Get operations in the SERIALIZABLE isolation level",
918919
"",
919920
""),
920921
CONSENSUS_COMMIT_INDEX_SCAN_NOT_ALLOWED_IN_SERIALIZABLE(
921922
Category.USER_ERROR,
922923
"0261",
923-
"Scan operations by using an index is not allowed in the SERIALIZABLE isolation level",
924+
"Scan operations using a secondary index are not allowed in the SERIALIZABLE isolation level without before-image indexes. "
925+
+ "Run repairTable() to create before-image indexes for the table, which will enable index-based Scan operations in the SERIALIZABLE isolation level",
924926
"",
925927
""),
926928
CONSENSUS_COMMIT_CONDITION_ON_INDEXED_COLUMNS_NOT_ALLOWED_IN_CROSS_PARTITION_SCAN_IN_SERIALIZABLE(
927929
Category.USER_ERROR,
928930
"0262",
929-
"Conditions on indexed columns in cross-partition scan operations are not allowed in the SERIALIZABLE isolation level",
931+
"Conditions on indexed columns in cross-partition scan operations are not allowed in the SERIALIZABLE isolation level without before-image indexes. "
932+
+ "Run repairTable() to create before-image indexes for the table, which will enable conditions on indexed columns in cross-partition scan operations in the SERIALIZABLE isolation level",
930933
"",
931934
""),
932935
OBJECT_STORAGE_CLOUD_STORAGE_SERVICE_ACCOUNT_KEY_NOT_FOUND(

core/src/main/java/com/scalar/db/transaction/consensuscommit/ConsensusCommitOperationChecker.java

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,8 @@ public void check(Get get, TransactionContext context) throws ExecutionException
9696
// If the index column is part of the primary key, it's allowed
9797
String indexKeyColumnName = get.getPartitionKey().getColumns().get(0).getName();
9898
if (!tableMetadata.getPartitionKeyNames().contains(indexKeyColumnName)
99-
&& !tableMetadata.getClusteringKeyNames().contains(indexKeyColumnName)) {
99+
&& !tableMetadata.getClusteringKeyNames().contains(indexKeyColumnName)
100+
&& !metadata.hasBeforeImageSecondaryIndex(indexKeyColumnName)) {
100101
throw new IllegalArgumentException(
101102
CoreError.CONSENSUS_COMMIT_INDEX_GET_NOT_ALLOWED_IN_SERIALIZABLE.buildMessage());
102103
}
@@ -163,7 +164,8 @@ public void check(Scan scan, TransactionContext context) throws ExecutionExcepti
163164
// If the index column is part of the primary key, it's allowed
164165
String indexKeyColumnName = scan.getPartitionKey().getColumns().get(0).getName();
165166
if (!tableMetadata.getPartitionKeyNames().contains(indexKeyColumnName)
166-
&& !tableMetadata.getClusteringKeyNames().contains(indexKeyColumnName)) {
167+
&& !tableMetadata.getClusteringKeyNames().contains(indexKeyColumnName)
168+
&& !metadata.hasBeforeImageSecondaryIndex(indexKeyColumnName)) {
167169
throw new IllegalArgumentException(
168170
CoreError.CONSENSUS_COMMIT_INDEX_SCAN_NOT_ALLOWED_IN_SERIALIZABLE.buildMessage());
169171
}
@@ -177,7 +179,8 @@ public void check(Scan scan, TransactionContext context) throws ExecutionExcepti
177179
// If the column is an indexed column but is part of the primary key, it's allowed
178180
if (tableMetadata.getSecondaryIndexNames().contains(column)
179181
&& !tableMetadata.getPartitionKeyNames().contains(column)
180-
&& !tableMetadata.getClusteringKeyNames().contains(column)) {
182+
&& !tableMetadata.getClusteringKeyNames().contains(column)
183+
&& !metadata.hasBeforeImageSecondaryIndex(column)) {
181184
throw new IllegalArgumentException(
182185
CoreError
183186
.CONSENSUS_COMMIT_CONDITION_ON_INDEXED_COLUMNS_NOT_ALLOWED_IN_CROSS_PARTITION_SCAN_IN_SERIALIZABLE

core/src/main/java/com/scalar/db/transaction/consensuscommit/CrudHandler.java

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -612,8 +612,20 @@ private TransactionTableMetadata getTransactionTableMetadata(
612612
}
613613

614614
/**
615-
* Returns whether the given selection requires a before-image index check. This is true when the
616-
* selection uses a secondary index that has a corresponding before-image secondary index.
615+
* Returns whether the given selection requires a before-image index check.
616+
*
617+
* <p>For index-based selections (Get with index, Scan with index), this returns true when the
618+
* index column has a corresponding before-image secondary index. For ScanAll, this returns true
619+
* when any conjunction condition is on a column that has both a secondary index and a
620+
* corresponding before-image secondary index.
621+
*
622+
* <p>If the before-image index does not exist (e.g., for tables created before the before-image
623+
* index check feature was introduced), the check is skipped. In SNAPSHOT and READ_COMMITTED
624+
* isolation, this means index-based reads may return eventually consistent results, which is a
625+
* known limitation (a warning is logged at startup via {@code
626+
* warnIfBeforeImageIndexesAreMissing}). In SERIALIZABLE isolation, this case does not occur
627+
* because {@link ConsensusCommitOperationChecker} rejects index-based operations on tables
628+
* without before-image indexes.
617629
*
618630
* @param selection the selection operation
619631
* @param metadata the transaction table metadata
@@ -626,11 +638,11 @@ boolean requiresBeforeIndexCheck(Selection selection, TransactionTableMetadata m
626638
}
627639

628640
if (selection instanceof ScanAll) {
629-
// For ScanAll, check if any conjunction condition is on a column that has a before-image
630-
// secondary index
631641
for (Selection.Conjunction conjunction : selection.getConjunctions()) {
632642
for (ConditionalExpression condition : conjunction.getConditions()) {
633-
if (metadata.hasBeforeImageSecondaryIndex(condition.getColumn().getName())) {
643+
String columnName = condition.getColumn().getName();
644+
if (metadata.getTableMetadata().getSecondaryIndexNames().contains(columnName)
645+
&& metadata.hasBeforeImageSecondaryIndex(columnName)) {
634646
return true;
635647
}
636648
}
@@ -710,6 +722,18 @@ boolean checkAndRecoverBeforeIndexRecords(
710722
}
711723
}
712724
}
725+
} catch (RuntimeException e) {
726+
Exception exception;
727+
if (e.getCause() instanceof ExecutionException) {
728+
exception = (ExecutionException) e.getCause();
729+
} else {
730+
exception = e;
731+
}
732+
throw new CrudException(
733+
CoreError.CONSENSUS_COMMIT_SCANNING_RECORDS_FROM_STORAGE_FAILED.buildMessage(
734+
exception.getMessage()),
735+
exception,
736+
context.transactionId);
713737
} catch (ExecutionException e) {
714738
throw new CrudException(
715739
CoreError.CONSENSUS_COMMIT_SCANNING_RECORDS_FROM_STORAGE_FAILED.buildMessage(

core/src/main/java/com/scalar/db/transaction/consensuscommit/Snapshot.java

Lines changed: 128 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,13 @@
22

33
import static com.scalar.db.transaction.consensuscommit.ConsensusCommitOperationAttributes.isImplicitPreReadEnabled;
44
import static com.scalar.db.transaction.consensuscommit.ConsensusCommitOperationAttributes.isInsertModeEnabled;
5-
import static com.scalar.db.transaction.consensuscommit.ConsensusCommitUtils.getTransactionTableMetadata;
65

76
import com.google.common.annotations.VisibleForTesting;
87
import com.google.common.base.MoreObjects;
98
import com.google.common.collect.ComparisonChain;
109
import com.google.common.collect.Iterators;
1110
import com.scalar.db.api.ConditionSetBuilder;
11+
import com.scalar.db.api.ConditionalExpression;
1212
import com.scalar.db.api.Delete;
1313
import com.scalar.db.api.DistributedStorage;
1414
import com.scalar.db.api.Get;
@@ -20,6 +20,7 @@
2020
import com.scalar.db.api.ScanAll;
2121
import com.scalar.db.api.ScanWithIndex;
2222
import com.scalar.db.api.Scanner;
23+
import com.scalar.db.api.Selection;
2324
import com.scalar.db.api.Selection.Conjunction;
2425
import com.scalar.db.api.TableMetadata;
2526
import com.scalar.db.common.CoreError;
@@ -560,22 +561,43 @@ void toSerializable(DistributedStorage storage)
560561

561562
// Scan set is re-validated to check if there is no anti-dependency
562563
for (Map.Entry<Scan, LinkedHashMap<Key, TransactionResult>> entry : scanSet.entrySet()) {
563-
tasks.add(() -> validateScanResults(storage, entry.getKey(), entry.getValue(), false));
564+
tasks.add(
565+
() -> {
566+
TransactionTableMetadata txMetadata = getTransactionTableMetadata(entry.getKey());
567+
validateScanResults(
568+
storage, entry.getKey(), entry.getValue(), false, txMetadata.getTableMetadata());
569+
validateBeforeIndex(storage, entry.getKey(), txMetadata);
570+
});
564571
}
565572

566573
// Scanner set is re-validated to check if there is no anti-dependency
567574
for (ScannerInfo scannerInfo : scannerSet) {
568-
tasks.add(() -> validateScanResults(storage, scannerInfo.scan, scannerInfo.results, true));
575+
tasks.add(
576+
() -> {
577+
TransactionTableMetadata txMetadata = getTransactionTableMetadata(scannerInfo.scan);
578+
validateScanResults(
579+
storage,
580+
scannerInfo.scan,
581+
scannerInfo.results,
582+
true,
583+
txMetadata.getTableMetadata());
584+
validateBeforeIndex(storage, scannerInfo.scan, txMetadata);
585+
});
569586
}
570587

571588
// Get set is re-validated to check if there is no anti-dependency
572589
for (Map.Entry<Get, Optional<TransactionResult>> entry : getSet.entrySet()) {
573590
Get get = entry.getKey();
574-
TableMetadata metadata = getTableMetadata(get);
591+
TransactionTableMetadata txMetadata = getTransactionTableMetadata(get);
592+
TableMetadata metadata = txMetadata.getTableMetadata();
575593

576594
if (ScalarDbUtils.isSecondaryIndexSpecified(get, metadata)) {
577595
// For Get with index
578-
tasks.add(() -> validateGetWithIndexResult(storage, get, entry.getValue(), metadata));
596+
tasks.add(
597+
() -> {
598+
validateGetWithIndexResult(storage, get, entry.getValue(), metadata);
599+
validateBeforeIndex(storage, get, txMetadata);
600+
});
579601
} else {
580602
// For other Get
581603

@@ -610,20 +632,19 @@ void toSerializable(DistributedStorage storage)
610632
* @param results the results of the scan
611633
* @param notFullyScannedScanner if this is a validation for a scanner that has not been fully
612634
* scanned
635+
* @param metadata the table metadata for the scanned table
613636
* @throws ExecutionException if a storage operation fails
614637
* @throws ValidationConflictException if the scan results are changed by another transaction
615638
*/
616639
private void validateScanResults(
617640
DistributedStorage storage,
618641
Scan scan,
619642
LinkedHashMap<Key, TransactionResult> results,
620-
boolean notFullyScannedScanner)
643+
boolean notFullyScannedScanner,
644+
TableMetadata metadata)
621645
throws ExecutionException, ValidationConflictException {
622-
Scanner scanner = null;
623-
try {
624-
TableMetadata metadata = getTableMetadata(scan);
625-
626-
scanner = storage.scan(ConsensusCommitUtils.prepareScanForStorage(scan, metadata));
646+
try (Scanner scanner =
647+
storage.scan(ConsensusCommitUtils.prepareScanForStorage(scan, metadata))) {
627648

628649
// Initialize the iterator for the latest scan results
629650
Optional<Result> latestResult = getNextResult(scanner, scan);
@@ -722,14 +743,8 @@ private void validateScanResults(
722743
throwExceptionDueToAntiDependency();
723744
}
724745
}
725-
} finally {
726-
if (scanner != null) {
727-
try {
728-
scanner.close();
729-
} catch (IOException e) {
730-
logger.warn("Failed to close the scanner. Transaction ID: {}", id, e);
731-
}
732-
}
746+
} catch (IOException e) {
747+
logger.warn("Failed to close the scanner. Transaction ID: {}", id, e);
733748
}
734749
}
735750

@@ -780,7 +795,7 @@ private void validateGetWithIndexResult(
780795
originalResult.ifPresent(r -> results.put(new Snapshot.Key(scanWithIndex, r, metadata), r));
781796

782797
// Validate the result to check if there is no anti-dependency
783-
validateScanResults(storage, scanWithIndex, results, false);
798+
validateScanResults(storage, scanWithIndex, results, false, metadata);
784799
}
785800

786801
private void validateGetResult(
@@ -810,10 +825,99 @@ private void validateGetResult(
810825
}
811826
}
812827

813-
private TableMetadata getTableMetadata(Operation operation) throws ExecutionException {
814-
TransactionTableMetadata transactionTableMetadata =
815-
getTransactionTableMetadata(tableMetadataManager, operation);
816-
return transactionTableMetadata.getTableMetadata();
828+
/**
829+
* Validates that there are no uncommitted records on the before-image index that could cause
830+
* phantom reads.
831+
*
832+
* <p>This is needed because when another transaction updates a record's indexed column (e.g.,
833+
* from 10 to 20) and is in PREPARED/DELETED state, the regular index scan (e.g., index_col=10)
834+
* won't find that record since its current value is 20. However, the record's committed
835+
* (before-image) value is still 10. Without this check, a phantom could go undetected: a record
836+
* committed with index_col=10 but updated to 20 by another PREPARED transaction would be
837+
* invisible to both the original scan and the validation re-scan.
838+
*
839+
* <p>This method is only called in the SERIALIZABLE extra-read validation phase. In SERIALIZABLE,
840+
* {@link ConsensusCommitOperationChecker} rejects index-based operations on tables without
841+
* before-image indexes, so the existence of before-image indexes is guaranteed when this method
842+
* is called. Therefore, this method only needs to check whether the selection is an index-based
843+
* operation (Get with index, Scan with index, or ScanAll with indexed column conditions), without
844+
* checking for the existence of before-image indexes.
845+
*
846+
* @param storage a distributed storage
847+
* @param selection the original selection operation (Get with index, ScanWithIndex, or ScanAll)
848+
* @throws ExecutionException if a storage operation fails
849+
* @throws ValidationConflictException if uncommitted records are found on the before-image index
850+
*/
851+
private void validateBeforeIndex(
852+
DistributedStorage storage, Selection selection, TransactionTableMetadata txMetadata)
853+
throws ExecutionException, ValidationConflictException {
854+
if (!isIndexBasedOperation(selection, txMetadata.getTableMetadata())) {
855+
return;
856+
}
857+
858+
Scan beforeIndexScan;
859+
if (selection instanceof ScanAll) {
860+
beforeIndexScan =
861+
ConsensusCommitUtils.createBeforeIndexScanAll(
862+
(ScanAll) selection, txMetadata.getTableMetadata());
863+
} else {
864+
beforeIndexScan = ConsensusCommitUtils.createBeforeIndexScan(selection);
865+
}
866+
867+
try (Scanner scanner = storage.scan(beforeIndexScan)) {
868+
for (Result result : scanner) {
869+
TransactionResult txResult = new TransactionResult(result);
870+
// Conservatively fail if any uncommitted record from another transaction is found on the
871+
// before-image index. This may cause false positives (e.g., when the record will be
872+
// rolled forward and its committed index value won't actually match the scan condition),
873+
// but it guarantees correctness. On retry, the record should be committed, so the retry
874+
// will succeed.
875+
if (!txResult.isCommitted() && !id.equals(txResult.getId())) {
876+
throwExceptionDueToAntiDependency();
877+
}
878+
}
879+
} catch (RuntimeException e) {
880+
if (e.getCause() instanceof ExecutionException) {
881+
throw (ExecutionException) e.getCause();
882+
}
883+
throw e;
884+
} catch (IOException e) {
885+
logger.warn("Failed to close the scanner. Transaction ID: {}", id, e);
886+
}
887+
}
888+
889+
/**
890+
* Checks if the given selection is an index-based operation that requires before-image index
891+
* validation. This includes Get with index, Scan with index, and ScanAll with conditions on
892+
* indexed columns.
893+
*
894+
* <p>For ScanAll, whether the underlying storage actually uses the index depends on the storage
895+
* implementation. However, this method considers ScanAll with conditions on indexed columns as an
896+
* index-based operation regardless.
897+
*
898+
* @param selection the selection operation to check
899+
* @param metadata the table metadata
900+
* @return true if the selection is an index-based operation
901+
*/
902+
private boolean isIndexBasedOperation(Selection selection, TableMetadata metadata) {
903+
if (ScalarDbUtils.isSecondaryIndexSpecified(selection, metadata)) {
904+
return true;
905+
}
906+
if (selection instanceof ScanAll) {
907+
for (Selection.Conjunction conjunction : selection.getConjunctions()) {
908+
for (ConditionalExpression condition : conjunction.getConditions()) {
909+
if (metadata.getSecondaryIndexNames().contains(condition.getColumn().getName())) {
910+
return true;
911+
}
912+
}
913+
}
914+
}
915+
return false;
916+
}
917+
918+
private TransactionTableMetadata getTransactionTableMetadata(Operation operation)
919+
throws ExecutionException {
920+
return ConsensusCommitUtils.getTransactionTableMetadata(tableMetadataManager, operation);
817921
}
818922

819923
private boolean isChanged(

0 commit comments

Comments
 (0)