diff --git a/src/__tests__/regression.ts b/src/__tests__/regression.ts index 09286ea..a2507e9 100644 --- a/src/__tests__/regression.ts +++ b/src/__tests__/regression.ts @@ -133,23 +133,6 @@ describe("Order of occurrence", () => { } }); - it("suggests the correct n-gram occurrence", () => { - // later occurrences were being preferred over earlier ones. - const map = new WordMap(); - map.appendAlignmentMemoryString("עִם", "with the"); - const source = "וְ⁠הָ⁠עִבְרִ֗ים הָי֤וּ לַ⁠פְּלִשְׁתִּים֙ כְּ⁠אֶתְמ֣וֹל שִׁלְשׁ֔וֹם אֲשֶׁ֨ר עָל֥וּ עִמָּ֛⁠ם בַּֽ⁠מַּחֲנֶ֖ה סָבִ֑יב וְ⁠גַם־ הֵ֗מָּה לִֽ⁠הְיוֹת֙ עִם־ יִשְׂרָאֵ֔ל אֲשֶׁ֥ר עִם־ שָׁא֖וּל וְ⁠יוֹנָתָֽן׃"; - const target = "Before that, some of the Hebrew men had deserted their army and gone to join with the Philistine army. But now those men revolted and joined with the Saul and Jonathan and the other Israelite soldiers."; - const suggestions = map.predict(source, target); - const predictions = suggestions[0].getPredictions() - .filter((p) => p.confidence >= 1); - expect(predictions[0].alignment.key).toEqual(predictions[1].alignment.key); - // expect target tokens to be used in order - expect(predictions[0].alignment.targetNgram.getTokens()[0].occurrence) - .toEqual(1); - expect(predictions[1].alignment.targetNgram.getTokens()[0].occurrence) - .toEqual(2); - }); - it("suggest the correct word occurrence", () => { // later occurrences were being preferred over earlier ones. const map = new WordMap(); diff --git a/src/core/Engine.ts b/src/core/Engine.ts index 8c86b3a..2631e18 100644 --- a/src/core/Engine.ts +++ b/src/core/Engine.ts @@ -244,22 +244,33 @@ export class Engine { return prediction.confidence >= minConfidence || prediction.target.isNull(); }; + const MAX_DISCARDS = 1000; const suggestionKeys: string[] = []; const suggestions: Suggestion[] = []; - const [isOccurrenceValid, addOccurrence, resetOccurrences] = useSequentialOccurrence(); + const [isOccurrenceValid, addOccurrence, resetOccurrences, reviewOccurrences] = useSequentialOccurrence(); const validPredictions = predictions.filter(isPredictionValid); - // build suggestions + // require occurrences to appear in order let forceOccurrence = forceOccurrenceOrder; + // require occurrences to begin at one and have no gaps + let strictOccurrence = forceOccurrenceOrder; let numDiscards = 0; let i = -1; while (suggestions.length < maxSuggestions) { i++; + // TRICKY: disable strict occurrence order if we exceed half of the maximum discards, + // and start at the beginning. + if (strictOccurrence && numDiscards >= MAX_DISCARDS / 2) { + console.warn("Exceeded maximum discards while searching for strict occurrence order. Strict occurrence checking disabled."); + strictOccurrence = false; + i = 1; + } + // TRICKY: disable forced occurrence order if we exceed the maximum discards, // and start at the beginning. - if (forceOccurrence && numDiscards >= 1000) { - console.warn("Exceeded maximum discards while searching for valid occurrence order."); + if (forceOccurrence && numDiscards >= MAX_DISCARDS) { + console.warn("Exceeded maximum discards while searching for valid occurrence order. Occurrence checking disabled."); forceOccurrence = false; i = 1; } @@ -283,10 +294,11 @@ export class Engine { getSequentialOccurrenceProps(best).forEach(addOccurrence); } - try { - utils.fillSuggestion(filtered, forceOccurrence, isOccurrenceValid, addOccurrence, suggestion); - } catch { - numDiscards ++; + utils.fillSuggestion(filtered, forceOccurrence, isOccurrenceValid, addOccurrence, suggestion); + + // make sure all occurrences begin at 1 and have no gaps + if (strictOccurrence && !reviewOccurrences()) { + numDiscards++; continue; } @@ -507,7 +519,6 @@ export class Engine { /** * Fill the suggestion with predictions. - * This may throw an exception if the suggestion becomes invalid. * @param predictions an array of sorted predictions * @param forceOccurrenceOrder * @param isOccurrenceValid @@ -555,6 +566,7 @@ function fillSuggestion(predictions: Prediction[], forceOccurrenceOrder: boolean if (isPredictionOccurrenceValid(nextBest)) { addPredictionOccurrences(nextBest); } else { + // skip the prediction since it would invalidate the occurrence order continue; } } @@ -564,6 +576,7 @@ function fillSuggestion(predictions: Prediction[], forceOccurrenceOrder: boolean }); suggestion.addPrediction(nextBest); + // TODO: we should break out of this loop once we have accounted for all of the words } }