Skip to content

Commit

Permalink
added strict checking occurrence order with fallback after max attemp…
Browse files Browse the repository at this point in the history
…ts fails
  • Loading branch information
da1nerd committed Jan 21, 2020
1 parent ffbc9b8 commit f6a0b50
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 26 deletions.
17 changes: 0 additions & 17 deletions src/__tests__/regression.ts
Original file line number Diff line number Diff line change
Expand Up @@ -133,23 +133,6 @@ describe("Order of occurrence", () => {
}
});

it("suggests the correct n-gram occurrence", () => {
// later occurrences were being preferred over earlier ones.
const map = new WordMap();
map.appendAlignmentMemoryString("עִם", "with the");
const source = "וְ⁠הָ⁠עִבְרִ֗ים הָי֤וּ לַ⁠פְּלִשְׁתִּים֙ כְּ⁠אֶתְמ֣וֹל שִׁלְשׁ֔וֹם אֲשֶׁ֨ר עָל֥וּ עִמָּ֛⁠ם בַּֽ⁠מַּחֲנֶ֖ה סָבִ֑יב וְ⁠גַם־ הֵ֗מָּה לִֽ⁠הְיוֹת֙ עִם־ יִשְׂרָאֵ֔ל אֲשֶׁ֥ר עִם־ שָׁא֖וּל וְ⁠יוֹנָתָֽן׃";
const target = "Before that, some of the Hebrew men had deserted their army and gone to join with the Philistine army. But now those men revolted and joined with the Saul and Jonathan and the other Israelite soldiers.";
const suggestions = map.predict(source, target);
const predictions = suggestions[0].getPredictions()
.filter((p) => p.confidence >= 1);
expect(predictions[0].alignment.key).toEqual(predictions[1].alignment.key);
// expect target tokens to be used in order
expect(predictions[0].alignment.targetNgram.getTokens()[0].occurrence)
.toEqual(1);
expect(predictions[1].alignment.targetNgram.getTokens()[0].occurrence)
.toEqual(2);
});

it("suggest the correct word occurrence", () => {
// later occurrences were being preferred over earlier ones.
const map = new WordMap();
Expand Down
31 changes: 22 additions & 9 deletions src/core/Engine.ts
Original file line number Diff line number Diff line change
Expand Up @@ -244,22 +244,33 @@ export class Engine {
return prediction.confidence >= minConfidence || prediction.target.isNull();
};

const MAX_DISCARDS = 1000;
const suggestionKeys: string[] = [];
const suggestions: Suggestion[] = [];
const [isOccurrenceValid, addOccurrence, resetOccurrences] = useSequentialOccurrence();
const [isOccurrenceValid, addOccurrence, resetOccurrences, reviewOccurrences] = useSequentialOccurrence();
const validPredictions = predictions.filter(isPredictionValid);

// build suggestions
// require occurrences to appear in order
let forceOccurrence = forceOccurrenceOrder;
// require occurrences to begin at one and have no gaps
let strictOccurrence = forceOccurrenceOrder;
let numDiscards = 0;
let i = -1;
while (suggestions.length < maxSuggestions) {
i++;

// TRICKY: disable strict occurrence order if we exceed half of the maximum discards,
// and start at the beginning.
if (strictOccurrence && numDiscards >= MAX_DISCARDS / 2) {
console.warn("Exceeded maximum discards while searching for strict occurrence order. Strict occurrence checking disabled.");
strictOccurrence = false;
i = 1;
}

// TRICKY: disable forced occurrence order if we exceed the maximum discards,
// and start at the beginning.
if (forceOccurrence && numDiscards >= 1000) {
console.warn("Exceeded maximum discards while searching for valid occurrence order.");
if (forceOccurrence && numDiscards >= MAX_DISCARDS) {
console.warn("Exceeded maximum discards while searching for valid occurrence order. Occurrence checking disabled.");
forceOccurrence = false;
i = 1;
}
Expand All @@ -283,10 +294,11 @@ export class Engine {
getSequentialOccurrenceProps(best).forEach(addOccurrence);
}

try {
utils.fillSuggestion(filtered, forceOccurrence, isOccurrenceValid, addOccurrence, suggestion);
} catch {
numDiscards ++;
utils.fillSuggestion(filtered, forceOccurrence, isOccurrenceValid, addOccurrence, suggestion);

// make sure all occurrences begin at 1 and have no gaps
if (strictOccurrence && !reviewOccurrences()) {
numDiscards++;
continue;
}

Expand Down Expand Up @@ -507,7 +519,6 @@ export class Engine {

/**
* Fill the suggestion with predictions.
* This may throw an exception if the suggestion becomes invalid.
* @param predictions an array of sorted predictions
* @param forceOccurrenceOrder
* @param isOccurrenceValid
Expand Down Expand Up @@ -555,6 +566,7 @@ function fillSuggestion(predictions: Prediction[], forceOccurrenceOrder: boolean
if (isPredictionOccurrenceValid(nextBest)) {
addPredictionOccurrences(nextBest);
} else {
// skip the prediction since it would invalidate the occurrence order
continue;
}
}
Expand All @@ -564,6 +576,7 @@ function fillSuggestion(predictions: Prediction[], forceOccurrenceOrder: boolean
});

suggestion.addPrediction(nextBest);
// TODO: we should break out of this loop once we have accounted for all of the words
}
}

Expand Down

0 comments on commit f6a0b50

Please sign in to comment.