Skip to content

Commit

Permalink
renamed saved alignments to alignment memory
Browse files Browse the repository at this point in the history
  • Loading branch information
da1nerd committed Sep 12, 2018
1 parent 1bd0b42 commit b820799
Show file tree
Hide file tree
Showing 12 changed files with 100 additions and 99 deletions.
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "wordmap",
"version": "0.3.1",
"version": "0.4.0",
"description": "Multi-Lingual Word Alignment Prediction",
"main": "dist/index.js",
"types": "dist/index.d.ts",
Expand Down
4 changes: 2 additions & 2 deletions src/Algorithm.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import AlignmentMemoryIndex from "./index/AlignmentMemoryIndex";
import CorpusIndex from "./index/CorpusIndex";
import SavedAlignmentsIndex from "./index/SavedAlignmentsIndex";
import UnalignedSentenceIndex from "./index/UnalignedSentenceIndex";
import Prediction from "./structures/Prediction";

Expand All @@ -12,6 +12,6 @@ export default interface Algorithm {
/**
* Executes the algorithm
*/
execute(predictions: Prediction[], cIndex: CorpusIndex, saIndex: SavedAlignmentsIndex, usIndex: UnalignedSentenceIndex): Prediction[];
execute(predictions: Prediction[], cIndex: CorpusIndex, saIndex: AlignmentMemoryIndex, usIndex: UnalignedSentenceIndex): Prediction[];

}
42 changes: 21 additions & 21 deletions src/Engine.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import {Token} from "wordmap-lexer";
import Algorithm from "./Algorithm";
import AlignmentMemoryIndex from "./index/AlignmentMemoryIndex";
import CorpusIndex from "./index/CorpusIndex";
import NumberObject from "./index/NumberObject";
import SavedAlignmentsIndex from "./index/SavedAlignmentsIndex";
import UnalignedSentenceIndex from "./index/UnalignedSentenceIndex";
import Parser from "./Parser";
import Alignment from "./structures/Alignment";
Expand Down Expand Up @@ -98,7 +98,7 @@ export default class Engine {
* @param predictions
* @param saIndex
*/
public static calculateConfidence(predictions: Prediction[], saIndex: SavedAlignmentsIndex): Prediction[] {
public static calculateConfidence(predictions: Prediction[], saIndex: AlignmentMemoryIndex): Prediction[] {
const finalPredictions: Prediction[] = [];
const weights: NumberObject = {
"alignmentPosition": 0.7,
Expand All @@ -110,12 +110,12 @@ export default class Engine {
"sourceCorpusPermutationsFrequencyRatio": 0.7,
"targetCorpusPermutationsFrequencyRatio": 0.7,

"sourceSavedAlignmentsFrequencyRatio": 0.7,
"targetSavedAlignmentsFrequencyRatio": 0.7
"sourceAlignmentMemoryFrequencyRatio": 0.7,
"targetAlignmentMemoryFrequencyRatio": 0.7
};

for (const p of predictions) {
const isSavedAlignment = saIndex.alignmentFrequency.read(p.alignment);
const isAlignmentMemory = saIndex.alignmentFrequency.read(p.alignment);

// confidence based on corpus
const corpusWeightedKeys = [
Expand All @@ -134,9 +134,9 @@ export default class Engine {
);

// confidence based on saved alignments
const savedAlignmentsWeightedKeys = [
"sourceSavedAlignmentsFrequencyRatio",
"targetSavedAlignmentsFrequencyRatio",
const alignmentMemoryWeightedKeys = [
"sourceAlignmentMemoryFrequencyRatio",
"targetAlignmentMemoryFrequencyRatio",
"alignmentPosition",
"ngramLength",
"characterLength",
Expand All @@ -145,18 +145,18 @@ export default class Engine {
];
let confidence = Engine.calculateWeightedConfidence(
p,
savedAlignmentsWeightedKeys,
alignmentMemoryWeightedKeys,
weights
);

// prefer to use the saved alignment confidence
if (!isSavedAlignment) {
if (!isAlignmentMemory) {
confidence = corpusConfidence;
confidence *= p.getScore("phrasePlausibility");
}

// boost confidence for saved alignments
if (isSavedAlignment) {
if (isAlignmentMemory) {
confidence++;
}

Expand Down Expand Up @@ -250,7 +250,7 @@ export default class Engine {
private maxSourceNgramLength: number;
private registeredAlgorithms: Algorithm[] = [];
private corpusIndex: CorpusIndex;
private savedAlignmentsIndex: SavedAlignmentsIndex;
private alignmentMemoryIndex: AlignmentMemoryIndex;

/**
* Returns a list of algorithms that are registered in the engine
Expand All @@ -267,7 +267,7 @@ export default class Engine {
this.maxSourceNgramLength = sourceNgramLength;
this.maxTargetNgramLength = targetNgramLength;
this.corpusIndex = new CorpusIndex();
this.savedAlignmentsIndex = new SavedAlignmentsIndex();
this.alignmentMemoryIndex = new AlignmentMemoryIndex();
}

/**
Expand All @@ -277,11 +277,11 @@ export default class Engine {
* @param {Token[]} sourceSentence - the source sentence tokens.
* @param {Token[]} targetSentence - the target sentence tokens.
* @param {CorpusIndex} cIndex
* @param {SavedAlignmentsIndex} saIndex
* @param {AlignmentMemoryIndex} saIndex
* @param {Algorithm[]} algorithms
* @return {Prediction[]}
*/
public performPrediction(sourceSentence: Token[], targetSentence: Token[], cIndex: CorpusIndex, saIndex: SavedAlignmentsIndex, algorithms: Algorithm[]) {
public performPrediction(sourceSentence: Token[], targetSentence: Token[], cIndex: CorpusIndex, saIndex: AlignmentMemoryIndex, algorithms: Algorithm[]) {
const sourceNgrams = Parser.ngrams(
sourceSentence,
this.maxSourceNgramLength
Expand Down Expand Up @@ -320,7 +320,7 @@ export default class Engine {
public score(predictions: Prediction[]): Prediction[] {
const results = Engine.calculateConfidence(
predictions,
this.savedAlignmentsIndex
this.alignmentMemoryIndex
);
return Engine.sortPredictions(results);
}
Expand All @@ -347,19 +347,19 @@ export default class Engine {
* Adding saved alignments improves the quality of predictions.
* @param {Array<Alignment>} alignmentMemory - a list of alignments
*/
public addSavedAlignments(alignmentMemory: Alignment[]) {
public addAlignmentMemory(alignmentMemory: Alignment[]) {
for (let i = alignmentMemory.length - 1; i >= 0; i--) {
const target = alignmentMemory[i].target;
if (target.tokenLength > this.maxTargetNgramLength) {
console.warn(`Alignment memory "${target.key}" exceeds maximum n-gram length of ${this.maxTargetNgramLength} and may be ignored.`);
console.warn(`Target Alignment Memory "${target.key}" exceeds maximum n-gram length of ${this.maxTargetNgramLength} and may be ignored.`);
}
const source = alignmentMemory[i].source;
if (source.tokenLength > this.maxSourceNgramLength) {
console.warn(`Alignment memory "${source.key}" exceeds maximum n-gram length of ${this.maxSourceNgramLength} and may be ignored.`);
console.warn(`Source Alignment Memory "${source.key}" exceeds maximum n-gram length of ${this.maxSourceNgramLength} and may be ignored.`);
}
}

this.savedAlignmentsIndex.append(alignmentMemory);
this.alignmentMemoryIndex.append(alignmentMemory);
}

/**
Expand All @@ -373,7 +373,7 @@ export default class Engine {
sourceSentence,
targetSentence,
this.corpusIndex,
this.savedAlignmentsIndex,
this.alignmentMemoryIndex,
this.registeredAlgorithms
);
}
Expand Down
8 changes: 4 additions & 4 deletions src/WordMap.ts
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@ export default class WordMap {
this.engine.addCorpus(sourceTokens, targetTokens);
}

public appendSavedAlignments(alignments: Alignment[]) {
this.engine.addSavedAlignments(alignments);
public appendAlignmentMemory(alignments: Alignment[]) {
this.engine.addAlignmentMemory(alignments);
}

/**
Expand All @@ -74,7 +74,7 @@ export default class WordMap {
* @param {string} target - a string of target phrases separated by new lines
* @return {Alignment[]} an array of alignment objects (as a convenience)
*/
public appendSavedAlignmentsString(source: string, target: string): Alignment[] {
public appendAlignmentMemoryString(source: string, target: string): Alignment[] {
const alignments: Alignment[] = [];
const sourceLines = source.split("\n");
const targetLines = target.split("\n");
Expand All @@ -90,7 +90,7 @@ export default class WordMap {
new Ngram(targetTokens)
));
}
this.appendSavedAlignments(alignments);
this.appendAlignmentMemory(alignments);
return alignments;
}

Expand Down
12 changes: 6 additions & 6 deletions src/__tests__/engine.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
jest.mock("../index/PermutationIndex");
import {Token} from "wordmap-lexer";
import Engine from "../Engine";
import AlignmentMemoryIndex from "../index/AlignmentMemoryIndex";
import CorpusIndex from "../index/CorpusIndex";
// @ts-ignore
import {mockAddAlignments, mockAddSentencePair} from "../index/PermutationIndex";
import SavedAlignmentsIndex from "../index/SavedAlignmentsIndex";
import Ngram from "../structures/Ngram";
import Prediction from "../structures/Prediction";
import {
Expand All @@ -30,7 +30,7 @@ it("registers an algorithm", () => {
it("adds the alignment to the index", () => {
const sentence = alignMockSentence("Once upon a time");
const engine = new Engine();
engine.addSavedAlignments(sentence);
engine.addAlignmentMemory(sentence);
expect(mockAddAlignments).toBeCalledWith(sentence);
});

Expand Down Expand Up @@ -153,7 +153,7 @@ it("runs all the algorithms", () => {
source,
target,
new CorpusIndex(),
new SavedAlignmentsIndex(),
new AlignmentMemoryIndex(),
algorithms
);

Expand Down Expand Up @@ -197,10 +197,10 @@ describe("scoring", () => {
prediction.setScores({
alignmentPosition: 1,
ngramLength: 2,
sourceSavedAlignmentsFrequencyRatio: 3,
sourceAlignmentMemoryFrequencyRatio: 3,
sourceCorpusPermutationsFrequencyRatio: 5,
targetCorpusPermutationsFrequencyRatio: 2,
targetSavedAlignmentsFrequencyRatio: 1,
targetAlignmentMemoryFrequencyRatio: 1,
phrasePlausibility: 2,
sourceNgramLength: 1,
characterLength: 3,
Expand All @@ -209,7 +209,7 @@ describe("scoring", () => {
});
const result = Engine.calculateConfidence(
[prediction],
new SavedAlignmentsIndex()
new AlignmentMemoryIndex()
);
expect(result[0].getScore("confidence")).toEqual(4.558139534883723);
});
Expand Down
28 changes: 14 additions & 14 deletions src/__tests__/map.ts
Original file line number Diff line number Diff line change
Expand Up @@ -49,17 +49,17 @@ describe("MAP", () => {
it("predicts from saved alignments", () => {
const map = new WordMap();
// append saved alignments
const sourceSavedAlignments = fs.readFileSync(path.join(
const sourceAlignmentMemory = fs.readFileSync(path.join(
__dirname,
"fixtures/corrections/greek.txt"
));
const targetSavedAlignments = fs.readFileSync(path.join(
const targetAlignmentMemory = fs.readFileSync(path.join(
__dirname,
"fixtures/corrections/english.txt"
));
map.appendSavedAlignmentsString(
sourceSavedAlignments.toString("utf-8"),
targetSavedAlignments.toString("utf-8")
map.appendAlignmentMemoryString(
sourceAlignmentMemory.toString("utf-8"),
targetAlignmentMemory.toString("utf-8")
);

const unalignedPair = [
Expand Down Expand Up @@ -162,8 +162,8 @@ describe("MAP", () => {
it("excludes alignment memory that exceeds the max ngram length", () => {
const map = new WordMap({targetNgramLength: 3});

map.appendSavedAlignmentsString("φιλοτέκνους", "and children");
map.appendSavedAlignmentsString("φιλάνδρους", "love their own husbands");
map.appendAlignmentMemoryString("φιλοτέκνους", "and children");
map.appendAlignmentMemoryString("φιλάνδρους", "love their own husbands");
const suggestions = map.predict(
"ἵνα σωφρονίζωσι τὰς νέας, φιλάνδρους εἶναι, φιλοτέκνους",
"In this way they may train the younger women to love their own husbands and children"
Expand All @@ -178,8 +178,8 @@ describe("MAP", () => {
it("uses alignment memory that falls within expanded ngram length", () => {
const map = new WordMap({targetNgramLength: 4});

map.appendSavedAlignmentsString("φιλοτέκνους", "and children");
map.appendSavedAlignmentsString("φιλάνδρους", "love their own husbands");
map.appendAlignmentMemoryString("φιλοτέκνους", "and children");
map.appendAlignmentMemoryString("φιλάνδρους", "love their own husbands");
const suggestions = map.predict(
"ἵνα σωφρονίζωσι τὰς νέας, φιλάνδρους εἶναι, φιλοτέκνους",
"In this way they may train the younger women to love their own husbands and children"
Expand Down Expand Up @@ -210,17 +210,17 @@ describe("MAP", () => {
);

// append saved alignments
const sourceSavedAlignments = fs.readFileSync(path.join(
const sourceAlignmentMemory = fs.readFileSync(path.join(
__dirname,
"fixtures/corrections/greek.txt"
));
const targetSavedAlignments = fs.readFileSync(path.join(
const targetAlignmentMemory = fs.readFileSync(path.join(
__dirname,
"fixtures/corrections/english.txt"
));
map.appendSavedAlignmentsString(
sourceSavedAlignments.toString("utf-8"),
targetSavedAlignments.toString("utf-8")
map.appendAlignmentMemoryString(
sourceAlignmentMemory.toString("utf-8"),
targetAlignmentMemory.toString("utf-8")
);

const unalignedPair = [
Expand Down
3 changes: 2 additions & 1 deletion src/__tests__/titus_map.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ describe("MAP predictions in Titus", () => {
);
console.log(
"benchmarks\n",
map.predictWithBenchmark(secondUnalignedPair[0],
map.predictWithBenchmark(
secondUnalignedPair[0],
secondUnalignedPair[1],
benchmark,
2
Expand Down
4 changes: 2 additions & 2 deletions src/algorithms/AlignmentOccurrences.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import Algorithm from "../Algorithm";
import AlignmentMemoryIndex from "../index/AlignmentMemoryIndex";
import CorpusIndex from "../index/CorpusIndex";
import SavedAlignmentsIndex from "../index/SavedAlignmentsIndex";
import UnalignedSentenceIndex from "../index/UnalignedSentenceIndex";
import Prediction from "../structures/Prediction";

Expand All @@ -11,7 +11,7 @@ import Prediction from "../structures/Prediction";
export default class AlignmentOccurrences implements Algorithm {
public name = "alignment occurrences";

public execute(predictions: Prediction[], cIndex: CorpusIndex, saIndex: SavedAlignmentsIndex, usIndex: UnalignedSentenceIndex): Prediction[] {
public execute(predictions: Prediction[], cIndex: CorpusIndex, saIndex: AlignmentMemoryIndex, usIndex: UnalignedSentenceIndex): Prediction[] {
for (const p of predictions) {
let weight = 0;

Expand Down
Loading

0 comments on commit b820799

Please sign in to comment.