diff --git a/docs/ReleaseNotes.md b/docs/ReleaseNotes.md index d6cc6c4622..f44a73bce4 100644 --- a/docs/ReleaseNotes.md +++ b/docs/ReleaseNotes.md @@ -33,7 +33,7 @@ Users performing online updates are encouraged to update from [4.0.559.4](#40559 * **Feature** Add enum column support to relational server [(Issue #3073)](https://github.com/FoundationDB/fdb-record-layer/issues/3073) * **Feature** Feature 3 [(Issue #NNN)](https://github.com/FoundationDB/fdb-record-layer/issues/NNN) * **Feature** Feature 4 [(Issue #NNN)](https://github.com/FoundationDB/fdb-record-layer/issues/NNN) -* **Feature** Feature 5 [(Issue #NNN)](https://github.com/FoundationDB/fdb-record-layer/issues/NNN) +* **Feature** Support Lucene index scrubbing [(Issue #3008)](https://github.com/FoundationDB/fdb-record-layer/issues/3008) * **Breaking change** Change 1 [(Issue #NNN)](https://github.com/FoundationDB/fdb-record-layer/issues/NNN) * **Breaking change** Change 2 [(Issue #NNN)](https://github.com/FoundationDB/fdb-record-layer/issues/NNN) * **Breaking change** Change 3 [(Issue #NNN)](https://github.com/FoundationDB/fdb-record-layer/issues/NNN) diff --git a/fdb-record-layer-core/src/main/java/com/apple/foundationdb/record/logging/LogMessageKeys.java b/fdb-record-layer-core/src/main/java/com/apple/foundationdb/record/logging/LogMessageKeys.java index 04ae9ae951..67a4c2cf35 100644 --- a/fdb-record-layer-core/src/main/java/com/apple/foundationdb/record/logging/LogMessageKeys.java +++ b/fdb-record-layer-core/src/main/java/com/apple/foundationdb/record/logging/LogMessageKeys.java @@ -126,6 +126,7 @@ public enum LogMessageKeys { PRIMARY_INDEX, VALUE_KEY, PRIMARY_KEY, + GROUPING_KEY, VALUE, INDEX_OPERATION("operation"), INITIAL_PREFIX, diff --git a/fdb-record-layer-core/src/main/java/com/apple/foundationdb/record/provider/foundationdb/IndexScrubbing.java b/fdb-record-layer-core/src/main/java/com/apple/foundationdb/record/provider/foundationdb/IndexScrubbing.java index e183c5fb5c..f95106536e 100644 --- a/fdb-record-layer-core/src/main/java/com/apple/foundationdb/record/provider/foundationdb/IndexScrubbing.java +++ b/fdb-record-layer-core/src/main/java/com/apple/foundationdb/record/provider/foundationdb/IndexScrubbing.java @@ -142,7 +142,6 @@ private CompletableFuture indexScrubRangeOnly(@Nonnull FDBRecordStore s throw new UnsupportedOperationException("This index does not support scrubbing type " + scrubbingType); } - return indexScrubRangeOnly(store, recordsScanned, index, tools, maintainer.isIdempotent()); } diff --git a/fdb-record-layer-core/src/main/java/com/apple/foundationdb/record/provider/foundationdb/indexes/ValueIndexScrubbingToolsMissing.java b/fdb-record-layer-core/src/main/java/com/apple/foundationdb/record/provider/foundationdb/indexes/ValueIndexScrubbingToolsMissing.java index 71c45bf6c1..665240c7c2 100644 --- a/fdb-record-layer-core/src/main/java/com/apple/foundationdb/record/provider/foundationdb/indexes/ValueIndexScrubbingToolsMissing.java +++ b/fdb-record-layer-core/src/main/java/com/apple/foundationdb/record/provider/foundationdb/indexes/ValueIndexScrubbingToolsMissing.java @@ -130,7 +130,7 @@ private CompletableFuture> getMissingIndexKeys(FDBRecordStore store, } @Nonnull - private RecordCursor indexEntriesForRecord(@Nonnull FDBRecordStore store, @Nonnull FDBStoredRecord rec) { + protected RecordCursor indexEntriesForRecord(@Nonnull FDBRecordStore store, @Nonnull FDBStoredRecord rec) { final IndexMaintainer maintainer = store.getIndexMaintainer(index); if (isSynthetic) { final RecordQueryPlanner queryPlanner = diff --git a/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneIndexMaintainer.java b/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneIndexMaintainer.java index 7fb25ac366..1900b7912a 100644 --- a/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneIndexMaintainer.java +++ b/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneIndexMaintainer.java @@ -58,6 +58,7 @@ import com.apple.foundationdb.record.provider.foundationdb.IndexOperation; import com.apple.foundationdb.record.provider.foundationdb.IndexOperationResult; import com.apple.foundationdb.record.provider.foundationdb.IndexScanBounds; +import com.apple.foundationdb.record.provider.foundationdb.IndexScrubbingTools; import com.apple.foundationdb.record.provider.foundationdb.indexes.InvalidIndexEntry; import com.apple.foundationdb.record.provider.foundationdb.indexes.StandardIndexMaintainer; import com.apple.foundationdb.record.query.QueryToKeyMatcher; @@ -111,6 +112,7 @@ public class LuceneIndexMaintainer extends StandardIndexMaintainer { private static final Logger LOG = LoggerFactory.getLogger(LuceneIndexMaintainer.class); + @Nonnull private final FDBDirectoryManager directoryManager; private final LuceneAnalyzerCombinationProvider indexAnalyzerSelector; private final LuceneAnalyzerCombinationProvider autoCompleteAnalyzerSelector; @@ -750,4 +752,20 @@ private void logSerializationError(String format, Object ... arguments) { } } } + + @Nullable + @Override + public IndexScrubbingTools getIndexScrubbingTools(final IndexScrubbingTools.ScrubbingType type) { + switch (type) { + case MISSING: + final Map options = state.index.getOptions(); + if (Boolean.parseBoolean(options.get(LuceneIndexOptions.PRIMARY_KEY_SEGMENT_INDEX_ENABLED)) || + Boolean.parseBoolean(options.get(LuceneIndexOptions.PRIMARY_KEY_SEGMENT_INDEX_V2_ENABLED))) { + return new LuceneIndexScrubbingToolsMissing(partitioner, directoryManager, indexAnalyzerSelector); + } + return null; + default: + return null; + } + } } diff --git a/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneIndexScrubbingToolsMissing.java b/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneIndexScrubbingToolsMissing.java new file mode 100644 index 0000000000..6f08509679 --- /dev/null +++ b/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneIndexScrubbingToolsMissing.java @@ -0,0 +1,218 @@ +/* + * LuceneIndexScrubbingToolsMissing.java + * + * This source file is part of the FoundationDB open source project + * + * Copyright 2015-2025 Apple Inc. and the FoundationDB project authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.apple.foundationdb.record.lucene; + +import com.apple.foundationdb.async.AsyncUtil; +import com.apple.foundationdb.record.RecordCursor; +import com.apple.foundationdb.record.RecordCursorResult; +import com.apple.foundationdb.record.logging.KeyValueLogMessage; +import com.apple.foundationdb.record.logging.LogMessageKeys; +import com.apple.foundationdb.record.lucene.directory.FDBDirectoryManager; +import com.apple.foundationdb.record.metadata.Index; +import com.apple.foundationdb.record.metadata.RecordType; +import com.apple.foundationdb.record.metadata.expressions.KeyExpression; +import com.apple.foundationdb.record.provider.foundationdb.FDBIndexableRecord; +import com.apple.foundationdb.record.provider.foundationdb.FDBRecordStore; +import com.apple.foundationdb.record.provider.foundationdb.FDBStoreTimer; +import com.apple.foundationdb.record.provider.foundationdb.FDBStoredRecord; +import com.apple.foundationdb.record.provider.foundationdb.FDBSyntheticRecord; +import com.apple.foundationdb.record.provider.foundationdb.indexes.ValueIndexScrubbingToolsMissing; +import com.apple.foundationdb.record.query.plan.RecordQueryPlanner; +import com.apple.foundationdb.record.query.plan.synthetic.SyntheticRecordFromStoredRecordPlan; +import com.apple.foundationdb.record.query.plan.synthetic.SyntheticRecordPlanner; +import com.apple.foundationdb.record.util.pair.Pair; +import com.apple.foundationdb.tuple.Tuple; +import com.google.protobuf.Message; +import org.apache.lucene.index.DirectoryReader; + +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import java.io.IOException; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.atomic.AtomicReference; +import java.util.stream.Collectors; + +/** + * Index Scrubbing Toolbox for a Lucene index maintainer. Scrub missing value index entries - i.e. detect record(s) that should + * have been indexed, but cannot be found in the segment index. + */ +public class LuceneIndexScrubbingToolsMissing extends ValueIndexScrubbingToolsMissing { + private Collection recordTypes = null; + private Index index; + private boolean isSynthetic; + + @Nonnull + private final LucenePartitioner partitioner; + @Nonnull + private final FDBDirectoryManager directoryManager; + @Nonnull + private final LuceneAnalyzerCombinationProvider indexAnalyzerSelector; + + public LuceneIndexScrubbingToolsMissing(@Nonnull LucenePartitioner partitioner, @Nonnull FDBDirectoryManager directoryManager, + @Nonnull LuceneAnalyzerCombinationProvider indexAnalyzerSelector) { + this.partitioner = partitioner; + this.directoryManager = directoryManager; + this.indexAnalyzerSelector = indexAnalyzerSelector; + } + + + @Override + public void presetCommonParams(Index index, boolean allowRepair, boolean isSynthetic, Collection types) { + this.recordTypes = types; + this.index = index; + this.isSynthetic = isSynthetic; + // call super, but force allowRepair as false + super.presetCommonParams(index, false, isSynthetic, types); + } + + /** + * Provide a lucene specific reason for detecting a "missing" index entry. + */ + public enum MissingIndexReason { + NOT_IN_PARTITION, + NOT_IN_PK_SEGMENT_INDEX, + EMPTY_RECORDS_FIELDS, + } + + @Override + @Nullable + public CompletableFuture handleOneItem(final FDBRecordStore store, final RecordCursorResult> result) { + if (recordTypes == null || index == null) { + throw new IllegalStateException("presetParams was not called appropriately for this scrubbing tool"); + } + + final FDBStoredRecord rec = result.get(); + if (rec == null || !recordTypes.contains(rec.getRecordType())) { + return CompletableFuture.completedFuture(null); + } + + return detectMissingIndexKeys(store, rec) + .thenApply(missingIndexesKeys -> { + if (missingIndexesKeys == null) { + return null; + } + // Here: Oh, No! an index entry is missing!! + // (Maybe) report an error + return new Issue( + KeyValueLogMessage.build("Scrubber: missing index entry", + LogMessageKeys.KEY, rec.getPrimaryKey(), + LogMessageKeys.GROUPING_KEY, missingIndexesKeys.getValue(), + LogMessageKeys.REASON, missingIndexesKeys.getKey()), + FDBStoreTimer.Counts.INDEX_SCRUBBER_MISSING_ENTRIES, + null); + }); + } + + @SuppressWarnings("PMD.CloseResource") + private CompletableFuture> detectMissingIndexKeys(final FDBRecordStore store, FDBStoredRecord rec) { + // Generate synthetic record (if applicable) and return the first detected missing (if any). + final AtomicReference> issue = new AtomicReference<>(); + + if (!isSynthetic) { + return checkMissingIndexKey(rec, issue).thenApply(ignore -> issue.get()); + } + final RecordQueryPlanner queryPlanner = + new RecordQueryPlanner(store.getRecordMetaData(), store.getRecordStoreState().withWriteOnlyIndexes(Collections.singletonList(index.getName()))); + final SyntheticRecordPlanner syntheticPlanner = new SyntheticRecordPlanner(store, queryPlanner); + SyntheticRecordFromStoredRecordPlan syntheticPlan = syntheticPlanner.forIndex(index); + final RecordCursor recordCursor = syntheticPlan.execute(store, rec); + + return AsyncUtil.whenAll( + recordCursor.asStream().map(syntheticRecord -> checkMissingIndexKey(syntheticRecord, issue)) + .collect(Collectors.toList())) + .whenComplete((ret, e) -> recordCursor.close()) + .thenApply(ignore -> issue.get()); + + } + + private CompletableFuture checkMissingIndexKey(FDBIndexableRecord rec, + AtomicReference> issue) { + // Iterate grouping keys (if any) and detect missing index entry (if any) + final KeyExpression root = index.getRootExpression(); + final Map> recordFields = LuceneDocumentFromRecord.getRecordFields(root, rec); + if (recordFields.isEmpty()) { + // recordFields should not be an empty map + issue.compareAndSet(null, Pair.of(MissingIndexReason.EMPTY_RECORDS_FIELDS, null)); + return AsyncUtil.DONE; + } + if (recordFields.size() == 1) { + // A single grouping key, simple check. + return checkMissingIndexKey(rec, recordFields.keySet().iterator().next(), issue); + } + + // Here: more than one grouping key, declare an issue if at least one of them is missing + return AsyncUtil.whenAll( recordFields.keySet().stream().map(groupingKey -> + checkMissingIndexKey(rec, groupingKey, issue) + ).collect(Collectors.toList())) + .thenApply(ignore -> null); + } + + private CompletableFuture checkMissingIndexKey(FDBIndexableRecord rec, Tuple groupingKey, AtomicReference> issue) { + // Get partition (if applicable) and detect missing index entry (if any) + if (!partitioner.isPartitioningEnabled()) { + if (isMissingIndexKey(rec, null, groupingKey)) { + issue.compareAndSet(null, Pair.of(MissingIndexReason.NOT_IN_PK_SEGMENT_INDEX, null)); + } + return AsyncUtil.DONE; + } + return partitioner.tryGetPartitionInfo(rec, groupingKey).thenApply(partitionInfo -> { + if (partitionInfo == null) { + issue.compareAndSet(null, Pair.of(MissingIndexReason.NOT_IN_PARTITION, groupingKey)); + } else if (isMissingIndexKey(rec, partitionInfo.getId(), groupingKey)) { + issue.compareAndSet(null, Pair.of(MissingIndexReason.NOT_IN_PK_SEGMENT_INDEX, groupingKey)); + } + return null; + }); + } + + @SuppressWarnings("PMD.CloseResource") + private boolean isMissingIndexKey(FDBIndexableRecord rec, Integer partitionId, Tuple groupingKey) { + @Nullable final LucenePrimaryKeySegmentIndex segmentIndex = directoryManager.getDirectory(groupingKey, partitionId).getPrimaryKeySegmentIndex(); + if (segmentIndex == null) { + // Here: internal error, getIndexScrubbingTools should have indicated that scrub missing is not supported. + throw new IllegalStateException("LucneIndexScrubbingToolsMissing without a LucenePrimaryKeySegmentIndex"); + } + + try { + // TODO: this is called to initialize the writer, else we get an exception at getDirectoryReader. Should it really be done for a RO operation? + directoryManager.getIndexWriter(groupingKey, partitionId, indexAnalyzerSelector.provideIndexAnalyzer("")); + } catch (IOException e) { + throw LuceneExceptions.toRecordCoreException("failed getIndexWriter", e); + } + try { + DirectoryReader directoryReader = directoryManager.getDirectoryReader(groupingKey, partitionId); + final LucenePrimaryKeySegmentIndex.DocumentIndexEntry documentIndexEntry = segmentIndex.findDocument(directoryReader, rec.getPrimaryKey()); + if (documentIndexEntry == null) { + // Here: the document had not been found in the PK segment index + return true; + } + } catch (IOException ex) { + // Here: an unexpected exception. Unwrap and rethrow. + throw LuceneExceptions.toRecordCoreException("Error while finding document", ex); + } + return false; + } + +} diff --git a/fdb-record-layer-lucene/src/test/java/com/apple/foundationdb/record/lucene/LuceneIndexGetMetadataInfoTest.java b/fdb-record-layer-lucene/src/test/java/com/apple/foundationdb/record/lucene/LuceneIndexGetMetadataInfoTest.java index 2d92a8e271..12834f7e2c 100644 --- a/fdb-record-layer-lucene/src/test/java/com/apple/foundationdb/record/lucene/LuceneIndexGetMetadataInfoTest.java +++ b/fdb-record-layer-lucene/src/test/java/com/apple/foundationdb/record/lucene/LuceneIndexGetMetadataInfoTest.java @@ -24,7 +24,6 @@ import com.apple.foundationdb.record.provider.foundationdb.FDBRecordStore; import com.apple.foundationdb.record.provider.foundationdb.FDBRecordStoreTestBase; import com.apple.foundationdb.record.provider.foundationdb.IndexOperationResult; -import com.apple.foundationdb.record.provider.foundationdb.OnlineIndexer; import com.apple.foundationdb.tuple.Tuple; import com.google.protobuf.ByteString; import org.hamcrest.Matchers; @@ -37,7 +36,6 @@ import javax.annotation.Nullable; import java.util.List; import java.util.Map; -import java.util.Objects; import java.util.Set; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -102,7 +100,9 @@ void getMetadataPartitioned(boolean justPartitionInfo, boolean isGrouped) { dataModel.saveRecords(10, context, i / 3); commit(context); } - explicitMergeIndex(dataModel); + try (final FDBRecordContext context = openContext()) { + dataModel.explicitMergeIndex(context, timer); + } } final Set groupingKeys = isGrouped ? dataModel.groupingKeys() : Set.of(Tuple.from()); @@ -146,7 +146,9 @@ void getMetadataAfterDelete() { dataModel.saveRecords(10, context, i / 3); commit(context); } - explicitMergeIndex(dataModel); + try (final FDBRecordContext context = openContext()) { + dataModel.explicitMergeIndex(context, timer); + } } final Tuple groupingKey = Tuple.from(); @@ -225,17 +227,4 @@ private static void assertLessThan(final ByteString lesserOne, final ByteString private static int segmentCountToFileCount(final int segmentCount) { return segmentCount * 4 + 1; } - - private void explicitMergeIndex(LuceneIndexTestDataModel dataModel) { - try (FDBRecordContext context = openContext()) { - FDBRecordStore recordStore = Objects.requireNonNull(dataModel.schemaSetup.apply(context)); - try (OnlineIndexer indexBuilder = OnlineIndexer.newBuilder() - .setRecordStore(recordStore) - .setIndex(dataModel.index) - .setTimer(timer) - .build()) { - indexBuilder.mergeIndex(); - } - } - } } diff --git a/fdb-record-layer-lucene/src/test/java/com/apple/foundationdb/record/lucene/LuceneIndexScrubbingTest.java b/fdb-record-layer-lucene/src/test/java/com/apple/foundationdb/record/lucene/LuceneIndexScrubbingTest.java new file mode 100644 index 0000000000..b6229d28eb --- /dev/null +++ b/fdb-record-layer-lucene/src/test/java/com/apple/foundationdb/record/lucene/LuceneIndexScrubbingTest.java @@ -0,0 +1,275 @@ +/* + * LuceneIndexScrubbingTest.java + * + * This source file is part of the FoundationDB open source project + * + * Copyright 2015-2024 Apple Inc. and the FoundationDB project authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.apple.foundationdb.record.lucene; + +import com.apple.foundationdb.record.IndexState; +import com.apple.foundationdb.record.RecordMetaDataProvider; +import com.apple.foundationdb.record.lucene.directory.InjectedFailureRepository; +import com.apple.foundationdb.record.lucene.directory.MockedLuceneIndexMaintainerFactory; +import com.apple.foundationdb.record.lucene.directory.TestingIndexMaintainerRegistry; +import com.apple.foundationdb.record.metadata.Index; +import com.apple.foundationdb.record.provider.foundationdb.FDBRecordContext; +import com.apple.foundationdb.record.provider.foundationdb.FDBRecordStore; +import com.apple.foundationdb.record.provider.foundationdb.OnlineIndexScrubber; +import com.apple.foundationdb.record.provider.foundationdb.keyspace.KeySpacePath; +import com.apple.foundationdb.record.query.plan.QueryPlanner; +import com.apple.foundationdb.record.util.pair.Pair; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import javax.annotation.Nonnull; +import java.util.Map; +import java.util.stream.Stream; + +import static com.apple.foundationdb.record.lucene.LuceneIndexTestUtils.SIMPLE_TEXT_SUFFIXES_WITH_PRIMARY_KEY_SEGMENT_INDEX; +import static com.apple.foundationdb.record.lucene.LuceneIndexTestUtils.createComplexDocument; +import static com.apple.foundationdb.record.lucene.LuceneIndexTestUtils.createSimpleDocument; +import static com.apple.foundationdb.record.lucene.directory.InjectedFailureRepository.Flags.LUCENE_MAINTAINER_SKIP_INDEX_UPDATE; +import static com.apple.foundationdb.record.provider.foundationdb.indexes.TextIndexTestUtils.SIMPLE_DOC; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +class LuceneIndexScrubbingTest extends FDBLuceneTestBase { + + private TestingIndexMaintainerRegistry registry; + private boolean flipBoolean = false; + + @BeforeEach + public void beforeEach() { + registry = new TestingIndexMaintainerRegistry(); + } + + private void rebuildIndexMetaData(final FDBRecordContext context, final String document, final Index index) { + Pair pair = LuceneIndexTestUtils.rebuildIndexMetaData(context, path, document, index, isUseCascadesPlanner()); + this.recordStore = pair.getLeft(); + this.planner = pair.getRight(); + } + + private static Stream threeBooleanArgs() { + return Stream.of(false, true) + .flatMap(isSynthetic -> Stream.of(false, true) + .flatMap(isPartitioned -> Stream.of(false, true) + .map(isGrouped -> Arguments.of(isSynthetic, isGrouped, isPartitioned)))); + } + + @Nonnull + protected FDBRecordStore.Builder getStoreBuilderWithRegistry(@Nonnull FDBRecordContext context, + @Nonnull RecordMetaDataProvider metaData, + @Nonnull final KeySpacePath path) { + return super.getStoreBuilder(context, metaData, path).setIndexMaintainerRegistry(registry); + } + + @ParameterizedTest + @MethodSource("threeBooleanArgs") + void luceneIndexScrubMissingDataModelNoIssues(boolean isSynthetic, boolean isGrouped, boolean isPartitioned) { + // Scrub a valid index, expect zero issues + final long seed = 7L; + + final LuceneIndexTestDataModel dataModel = new LuceneIndexTestDataModel.Builder(seed, this::getStoreBuilderWithRegistry, pathManager) + .setIsGrouped(isGrouped) + .setIsSynthetic(isSynthetic) + .setPartitionHighWatermark(isPartitioned ? 5 : 0) + .build(); + + for (int i = 0; i < 14; i++) { + try (final FDBRecordContext context = openContext()) { + dataModel.saveRecords(7, context, i / 6); + context.commit(); + } + } + + try (final FDBRecordContext context = openContext()) { + dataModel.explicitMergeIndex(context, timer); + context.commit(); + } + try (final FDBRecordContext context = openContext()) { + FDBRecordStore store = dataModel.createOrOpenRecordStore(context); + boolean atLeastOnce = false; + for (Map.Entry entry : store.getAllIndexStates().entrySet()) { + Index index = entry.getKey(); + IndexState indexState = entry.getValue(); + if (index.getType().equalsIgnoreCase("lucene") && indexState.equals(IndexState.READABLE)) { + atLeastOnce = true; + try (OnlineIndexScrubber indexScrubber = OnlineIndexScrubber.newBuilder() + .setRecordStore(store) + .setIndex(index) + .build()) { + final long missingEntriesCount = indexScrubber.scrubMissingIndexEntries(); + assertEquals(0, missingEntriesCount); + } + } + } + assertTrue(atLeastOnce); + } + } + + @Test + void luceneIndexScrubMissingSimpleNoIssues() { + // Scrub a valid index, expect zero issues + Index index = SIMPLE_TEXT_SUFFIXES_WITH_PRIMARY_KEY_SEGMENT_INDEX; + try (final FDBRecordContext context = openContext()) { + // Write some records + rebuildIndexMetaData(context, SIMPLE_DOC, index); + recordStore.saveRecord(createSimpleDocument(1623L, ENGINEER_JOKE, 2)); + recordStore.saveRecord(createSimpleDocument(1547L, WAYLON, 1)); + recordStore.saveRecord(createSimpleDocument(2222L, WAYLON + " who?", 1)); + context.commit(); + } + try (final FDBRecordContext context = openContext()) { + // Overwrite + add records + rebuildIndexMetaData(context, SIMPLE_DOC, index); + recordStore.saveRecord(createSimpleDocument(1623L, ENGINEER_JOKE, 2)); + recordStore.saveRecord(createSimpleDocument(7771547L, WAYLON, 1)); + recordStore.saveRecord(createSimpleDocument(7772222L, WAYLON + " who?", 1)); + context.commit(); + } + try (final FDBRecordContext context = openContext()) { + // Scrub issues, assert none + rebuildIndexMetaData(context, SIMPLE_DOC, index); + try (OnlineIndexScrubber indexScrubber = OnlineIndexScrubber.newBuilder() + .setRecordStore(recordStore) + .setIndex(index) + .build()) { + final long missingEntriesCount = indexScrubber.scrubMissingIndexEntries(); + assertEquals(0, missingEntriesCount); + } + } + } + + + @ParameterizedTest + @MethodSource("threeBooleanArgs") + void luceneIndexScrubMissingDataModel(boolean isSynthetic, boolean isGrouped, boolean isPartitioned) { + // Scrub an index with missing entries + final long seed = 207L; + + final LuceneIndexTestDataModel dataModel = new LuceneIndexTestDataModel.Builder(seed, this::getStoreBuilderWithRegistry, pathManager) + .setIsGrouped(isGrouped) + .setIsSynthetic(isSynthetic) + .setPartitionHighWatermark(isPartitioned ? 5 : 0) + .build(); + + final InjectedFailureRepository injectedFailures = new InjectedFailureRepository(); + registry.overrideFactory(new MockedLuceneIndexMaintainerFactory(injectedFailures)); + + try (final FDBRecordContext context = openContext()) { + // Write some documents + dataModel.saveRecordsToAllGroups(17, context); + context.commit(); + } + + try (final FDBRecordContext context = openContext()) { + dataModel.explicitMergeIndex(context, timer); + context.commit(); + } + + try (final FDBRecordContext context = openContext()) { + // By saving records with both setReverseSaveOrder true and false, we ensure that records + // are in the oldest and most-recent partitions (if there are partitions) + dataModel.setReverseSaveOrder(true); + dataModel.saveRecords(7, context, 1); + dataModel.setReverseSaveOrder(false); + dataModel.saveRecords(7, context, 2); + // Write few more records without updating + injectedFailures.setFlag(LUCENE_MAINTAINER_SKIP_INDEX_UPDATE); + dataModel.saveRecords(5, context, 4); + dataModel.setReverseSaveOrder(true); + dataModel.saveRecords(3, context, 1); + dataModel.setReverseSaveOrder(false); + dataModel.saveRecords(2, context, 3); + injectedFailures.setFlag(LUCENE_MAINTAINER_SKIP_INDEX_UPDATE, false); + context.commit(); + } + + try (final FDBRecordContext context = openContext()) { + dataModel.explicitMergeIndex(context, timer); + context.commit(); + } + + try (final FDBRecordContext context = openContext()) { + FDBRecordStore store = dataModel.createOrOpenRecordStore(context); + boolean atLeastOnce = false; + for (Map.Entry entry : store.getAllIndexStates().entrySet()) { + Index index = entry.getKey(); + IndexState indexState = entry.getValue(); + if (index.getType().equalsIgnoreCase("lucene") && indexState.equals(IndexState.READABLE)) { + atLeastOnce = true; + try (OnlineIndexScrubber indexScrubber = OnlineIndexScrubber.newBuilder() + .setRecordStore(store) + .setIndex(index) + .build()) { + final long missingEntriesCount = indexScrubber.scrubMissingIndexEntries(); + assertEquals(10, missingEntriesCount); + } + } + } + assertTrue(atLeastOnce); + } + } + + @Test + void luceneIndexScrubMissingSimple() { + // Scrub an index with missing entries + Index index = SIMPLE_TEXT_SUFFIXES_WITH_PRIMARY_KEY_SEGMENT_INDEX; + + long startTime = System.currentTimeMillis(); + try (final FDBRecordContext context = openContext()) { + // Write some records + rebuildIndexMetaData(context, SIMPLE_DOC, index); + recordStore.saveRecord(createComplexDocument(1623L, WAYLON, 1, startTime)); + recordStore.saveRecord(createComplexDocument(1547L, WAYLON, 1, startTime + 1000)); + recordStore.saveRecord(createComplexDocument(2222L, WAYLON + " who?", 1, startTime + 2000)); + recordStore.saveRecord(createComplexDocument(899L, ENGINEER_JOKE, 1, startTime + 3000)); + context.commit(); + } + + final InjectedFailureRepository injectedFailures = new InjectedFailureRepository(); + registry.overrideFactory(new MockedLuceneIndexMaintainerFactory(injectedFailures)); + + try (final FDBRecordContext context = openContext()) { + // Overwrite + add records without updating the index + Pair pair = LuceneIndexTestUtils.rebuildIndexMetaData(context, path, SIMPLE_DOC, index, isUseCascadesPlanner(), registry); + this.recordStore = pair.getLeft(); + this.planner = pair.getRight(); + injectedFailures.setFlag(LUCENE_MAINTAINER_SKIP_INDEX_UPDATE); + recordStore.saveRecord(createSimpleDocument(1623L, ENGINEER_JOKE, 2)); + recordStore.saveRecord(createSimpleDocument(7771547L, WAYLON, 1)); + recordStore.saveRecord(createSimpleDocument(7772222L, WAYLON + " who?", 1)); + injectedFailures.setFlag(LUCENE_MAINTAINER_SKIP_INDEX_UPDATE, false); + context.commit(); + } + + try (final FDBRecordContext context = openContext()) { + // Scrub issues, assert the number of issues found + rebuildIndexMetaData(context, SIMPLE_DOC, index); + try (OnlineIndexScrubber indexScrubber = OnlineIndexScrubber.newBuilder() + .setRecordStore(recordStore) + .setIndex(index) + .build()) { + final long missingEntriesCount = indexScrubber.scrubMissingIndexEntries(); + assertEquals(3, missingEntriesCount); + } + } + } +} diff --git a/fdb-record-layer-lucene/src/test/java/com/apple/foundationdb/record/lucene/LuceneIndexTestDataModel.java b/fdb-record-layer-lucene/src/test/java/com/apple/foundationdb/record/lucene/LuceneIndexTestDataModel.java index f136d9f7ee..46e91a5382 100644 --- a/fdb-record-layer-lucene/src/test/java/com/apple/foundationdb/record/lucene/LuceneIndexTestDataModel.java +++ b/fdb-record-layer-lucene/src/test/java/com/apple/foundationdb/record/lucene/LuceneIndexTestDataModel.java @@ -32,6 +32,7 @@ import com.apple.foundationdb.record.provider.foundationdb.FDBRecordStore; import com.apple.foundationdb.record.provider.foundationdb.FDBStoreTimer; import com.apple.foundationdb.record.provider.foundationdb.FDBStoredRecord; +import com.apple.foundationdb.record.provider.foundationdb.OnlineIndexer; import com.apple.foundationdb.record.provider.foundationdb.keyspace.KeySpacePath; import com.apple.foundationdb.record.test.TestKeySpace; import com.apple.foundationdb.record.test.TestKeySpacePathManagerExtension; @@ -387,6 +388,18 @@ static RecordMetaDataBuilder createBaseMetaDataBuilder() { return metaDataBuilder; } + public void explicitMergeIndex(final FDBRecordContext context, @Nullable FDBStoreTimer timer) { + FDBRecordStore recordStore = Objects.requireNonNull(schemaSetup.apply(context)); + try (OnlineIndexer indexBuilder = OnlineIndexer.newBuilder() + .setRecordStore(recordStore) + .setIndex(index) + .setTimer(timer) + .build()) { + indexBuilder.mergeIndex(); + } + } + + public Integer nextInt(final int bound) { return random.nextInt(bound); } diff --git a/fdb-record-layer-lucene/src/test/java/com/apple/foundationdb/record/lucene/directory/InjectedFailureRepository.java b/fdb-record-layer-lucene/src/test/java/com/apple/foundationdb/record/lucene/directory/InjectedFailureRepository.java index 93639e3b85..78a971a8e9 100644 --- a/fdb-record-layer-lucene/src/test/java/com/apple/foundationdb/record/lucene/directory/InjectedFailureRepository.java +++ b/fdb-record-layer-lucene/src/test/java/com/apple/foundationdb/record/lucene/directory/InjectedFailureRepository.java @@ -25,6 +25,7 @@ import javax.annotation.Nonnull; import java.io.IOException; import java.util.EnumMap; +import java.util.Optional; import java.util.concurrent.atomic.AtomicLong; /** @@ -47,9 +48,14 @@ public enum Methods { LUCENE_GET_ALL_FIELDS_INFO_STREAM } + public enum Flags { + LUCENE_MAINTAINER_SKIP_INDEX_UPDATE, + } + // The injected failure state private EnumMap failureDescriptions = new EnumMap<>(Methods.class); private EnumMap invocationCounts = new EnumMap<>(Methods.class); + private EnumMap flagsMap = new EnumMap<>(Flags.class); public void addFailure(@Nonnull Methods method, @Nonnull Exception exception, long count) { failureDescriptions.put(method, new FailureDescription(method, exception, count)); @@ -65,6 +71,18 @@ public void clear() { invocationCounts.clear(); } + public void setFlag(@Nonnull Flags flag) { + setFlag(flag, true); + } + + public void setFlag(@Nonnull Flags flag, Boolean value) { + flagsMap.put(flag, value); + } + + public boolean hasFlag(@Nonnull Flags flag) { + return Optional.ofNullable(flagsMap.get(flag)).orElse(false); + } + public void checkFailureForIoException(@Nonnull final Methods method) throws IOException { try { checkFailure(method); diff --git a/fdb-record-layer-lucene/src/test/java/com/apple/foundationdb/record/lucene/directory/MockedLuceneIndexMaintainer.java b/fdb-record-layer-lucene/src/test/java/com/apple/foundationdb/record/lucene/directory/MockedLuceneIndexMaintainer.java index c9bf400588..039103674b 100644 --- a/fdb-record-layer-lucene/src/test/java/com/apple/foundationdb/record/lucene/directory/MockedLuceneIndexMaintainer.java +++ b/fdb-record-layer-lucene/src/test/java/com/apple/foundationdb/record/lucene/directory/MockedLuceneIndexMaintainer.java @@ -20,10 +20,15 @@ package com.apple.foundationdb.record.lucene.directory; +import com.apple.foundationdb.async.AsyncUtil; import com.apple.foundationdb.record.lucene.LuceneIndexMaintainer; +import com.apple.foundationdb.record.provider.foundationdb.FDBIndexableRecord; import com.apple.foundationdb.record.provider.foundationdb.IndexMaintainerState; +import com.google.protobuf.Message; import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.Executor; /** @@ -32,13 +37,25 @@ * the test execution. */ public class MockedLuceneIndexMaintainer extends LuceneIndexMaintainer { + final InjectedFailureRepository injectedFailures; + public MockedLuceneIndexMaintainer(@Nonnull final IndexMaintainerState state, @Nonnull final Executor executor, final InjectedFailureRepository injectedFailures) { super(state, executor); + this.injectedFailures = injectedFailures; // Setting failures has to be done here rather than via a constructor param since createDirectoryManager is called // from the super constructor before we can set local state ((MockedFDBDirectoryManager)getDirectoryManager()).setInjectedFailures(injectedFailures); } + @Nonnull + @Override + public CompletableFuture update(@Nullable final FDBIndexableRecord oldRecord, @Nullable final FDBIndexableRecord newRecord) { + if (injectedFailures.hasFlag(InjectedFailureRepository.Flags.LUCENE_MAINTAINER_SKIP_INDEX_UPDATE)) { + return AsyncUtil.DONE; + } + return super.update(oldRecord, newRecord); + } + @Nonnull @Override protected FDBDirectoryManager createDirectoryManager(@Nonnull final IndexMaintainerState state) {