-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #7 from na2na-p/develop
除外設定を利用できるようにした
- Loading branch information
Showing
8 changed files
with
223 additions
and
105 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
data |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,6 +3,8 @@ | |
"deno.lint": true, | ||
"deno.unstable": false, | ||
"cSpell.words": [ | ||
"Deno" | ||
"Deno", | ||
"ipadic", | ||
"neologd" | ||
] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
export { analyse } from "./src/index.ts"; | ||
export { Gomamayo } from "./src/index.ts"; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,22 @@ | ||
import { analyse } from "./index.ts"; | ||
const inputString:string = Deno.args[0]; | ||
import { Gomamayo } from "./index.ts"; | ||
const mode = Deno.args[0]; // "analyse" or "addIgnore" | ||
const inputString: string = Deno.args[1]; | ||
|
||
console.log(await analyse(inputString)); | ||
// "../data/ignoreWords.json"に設定ファイルがあると想定しています。 | ||
// なければ作成してください。 | ||
const gomamayo = new Gomamayo("./data/ignoreWords.json"); | ||
|
||
switch (mode) { | ||
case "addIgnore": | ||
console.log("addIgnore"); | ||
console.log(await gomamayo.addIgnoreWord(inputString)); | ||
break; | ||
|
||
case "analyse": | ||
console.log(await gomamayo.analyse(inputString)); | ||
break; | ||
|
||
default: | ||
console.log("第一引数で、実行モード(analyse/addIgnore)の指定をしてください。"); | ||
break; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,24 @@ | ||
import * as gomamayo from "https://deno.land/x/gomamayo_deno/mod.ts"; | ||
import { Gomamayo } from "https://deno.land/x/gomamayo_deno/mod.ts"; | ||
|
||
const inputString:string = Deno.args[0]; | ||
const ignoreSettingsPath = "./data/ignoreWords.json"; // 除外ファイル設定を書いてください。設定しない場合はnull、あるいは new Gomamayo(ignoreSettingsPath) としてください。 | ||
const gomamayo = new Gomamayo(ignoreSettingsPath); | ||
const mode = Deno.args[0]; // "analyse" or "addIgnore" | ||
const inputString: string = Deno.args[1]; | ||
|
||
console.log(await gomamayo.analyse(inputString)); | ||
// deno run --allow-run --allow-read https://deno.land/x/gomamayo_deno/src/cli.ts analyse 株式公開買付 | ||
// deno run --allow-run --allow-read https://deno.land/x/gomamayo_deno/src/cli.ts addIgnore 株式公開買付 | ||
|
||
switch (mode) { | ||
case "addIgnore": | ||
console.log("addIgnore"); | ||
console.log(await gomamayo.addIgnoreWord(inputString)); | ||
break; | ||
|
||
case "analyse": | ||
console.log(await gomamayo.analyse(inputString)); | ||
break; | ||
|
||
default: | ||
console.log("第一引数で、実行モード(analyse/addIgnore)の指定をしてください。"); | ||
break; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,114 +1,190 @@ | ||
import { MeCab } from "https://deno.land/x/[email protected]/mod.ts"; | ||
// assets/vowel_define.jsonを読み込む | ||
const vowelDefine = await Deno.readTextFile("./assets/vowel_define.json"); | ||
const mecab = new MeCab(["mecab"]); | ||
import { Database } from "https://deno.land/x/[email protected]/mod.ts"; | ||
|
||
type gomamayoResult = { | ||
interface ParsedWord { | ||
// 0 | ||
surface: string; | ||
// 1 | ||
feature: string; | ||
// 2..4 | ||
featureDetails: string[]; | ||
// 5..6 | ||
conjugationForms: string[]; | ||
// 7 | ||
originalForm: string; | ||
// 8 | ||
reading?: string; | ||
// 9 | ||
pronunciation?: string; | ||
} | ||
|
||
interface gomamayoResult { | ||
isGomamayo: boolean; | ||
combo: number; // inputString中にあるゴママヨの総数 | ||
detail: gomamayoDetail[]; | ||
}; | ||
} | ||
|
||
type gomamayoDetail = { | ||
interface gomamayoDetail { | ||
surface: string; // 該当の2語を入れる | ||
dimension: number; // n次ゴママヨのn | ||
rawResult1: any; // mecab.parseの結果 気持ち的にはMeCabのParsedWordって型を使いたい。 | ||
rawResult2: any; // mecab.parseの結果 気持ち的にはMeCabのParsedWordって型を使いたい。 | ||
}; | ||
rawResult1: ParsedWord; // mecab.parseの結果 気持ち的にはMeCabのParsedWordって型を使いたい。 | ||
rawResult2: ParsedWord; // mecab.parseの結果 気持ち的にはMeCabのParsedWordって型を使いたい。 | ||
} | ||
|
||
/** | ||
* @param {string} inputString | ||
* @return {MeCab.ParsedWord[]} | ||
*/ | ||
async function parse(inputString: string) { | ||
const rawResult = await mecab.parse(inputString); | ||
interface ignoreWord { | ||
surface: string; | ||
} | ||
|
||
// rawResult.pronunciationがundefinedの場合、rawResult.pronunciation = rawResult.surfaceとなるようにする | ||
const parseResult = rawResult.map((raw) => { | ||
if (raw.pronunciation === undefined) { | ||
raw.pronunciation = raw.surface; | ||
} | ||
if (raw.reading === undefined) { | ||
raw.reading = raw.surface; | ||
class Gomamayo { | ||
private vowelDefine: string; | ||
private mecab = new MeCab(["mecab"]); | ||
private db: Database<ignoreWord> | null = null; | ||
|
||
constructor(dbPath: string | null = null) { | ||
this.vowelDefine = Deno.readTextFileSync("./assets/vowel_define.json"); | ||
if (dbPath) { | ||
this.db = new Database<ignoreWord>(dbPath); | ||
console.log(`${dbPath} を読み込みました。`); | ||
} else { | ||
this.db = null; | ||
} | ||
return raw; | ||
}); | ||
return parseResult; | ||
} | ||
} | ||
|
||
/** | ||
* @param {string} inputString | ||
* @return {ParsedWord[]} | ||
*/ | ||
public async parse(inputString: string): Promise<ParsedWord[]> { | ||
const rawResult = await this.mecab.parse(inputString); | ||
|
||
/** | ||
* @param {string} rawReading | ||
* @return {string} | ||
*/ | ||
function prolongedSoundMarkVowelize(rawReading: string): string { | ||
const vowelDefineJSON = JSON.parse(vowelDefine); | ||
// readingに長音が含まれている場合はすべてカタカナに変換する | ||
let returnReading = ""; | ||
rawReading.replace(/[ぁ-ゖ]/g, (s) => { | ||
return String.fromCharCode(s.charCodeAt(0) + 0x60); | ||
}); | ||
for (let i = 0; i < rawReading.length; i++) { | ||
const prev = rawReading[i - 1]; | ||
const current = rawReading[i]; | ||
returnReading += (current === "ー") ? vowelDefineJSON[prev] : current; | ||
// rawResult.pronunciationがundefinedの場合、rawResult.pronunciation = rawResult.surfaceとなるようにする | ||
const parseResult = rawResult.map((raw) => { | ||
if (raw.pronunciation === undefined) { | ||
raw.pronunciation = raw.surface; | ||
} | ||
if (raw.reading === undefined) { | ||
raw.reading = raw.surface; | ||
} | ||
return raw; | ||
}); | ||
return parseResult; | ||
} | ||
return returnReading; | ||
} | ||
|
||
/** | ||
* @param {string} 判定したい文字列 | ||
* @return 分析結果 | ||
*/ | ||
async function analyse(inputString: string): Promise<gomamayoResult> { | ||
const gomamayoResult: gomamayoResult = { | ||
isGomamayo: false, | ||
combo: 0, | ||
detail: [], | ||
}; | ||
const rawParseResult = await parse(inputString); | ||
/** | ||
* @param {string} rawReading | ||
* @return {string} | ||
*/ | ||
public prolongedSoundMarkVowelize(rawReading: string): string { | ||
const vowelDefineJSON = JSON.parse(this.vowelDefine); | ||
// readingに長音が含まれている場合はすべてカタカナに変換する | ||
let returnReading = ""; | ||
rawReading.replace(/[ぁ-ゖ]/g, (s) => { | ||
return String.fromCharCode(s.charCodeAt(0) + 0x60); | ||
}); | ||
for (let i = 0; i < rawReading.length; i++) { | ||
const prev = rawReading[i - 1]; | ||
const current = rawReading[i]; | ||
returnReading += (current === "ー") ? vowelDefineJSON[prev] : current; | ||
} | ||
return returnReading; | ||
} | ||
|
||
/** | ||
* @param {string} inputString 判定したい文字列 | ||
* @param isIgnored 除外設定を使うかどうか。指定した文字列を除外する場合はtrue。デフォルトはtrue。 | ||
* @return 分析結果 | ||
*/ | ||
public async analyse( | ||
inputString: string, | ||
isIgnored = true, | ||
): Promise<gomamayoResult> { | ||
const gomamayoResult: gomamayoResult = { | ||
isGomamayo: false, | ||
combo: 0, | ||
detail: [], | ||
}; | ||
const rawParseResult = await this.parse(inputString); | ||
|
||
// rawParseResult[i].readingに「ー」が含まれていたらprolongedSoundMarkVowelizeを実行し、それに置き換える | ||
rawParseResult.map((raw) => { | ||
if (typeof raw.reading !== "undefined") { | ||
if (raw.reading.includes("ー")) { | ||
raw.reading = prolongedSoundMarkVowelize(raw.reading); | ||
if (isIgnored) { | ||
console.log("除外設定を使用します。"); | ||
if (this.db) { | ||
const ignoreWords = await this.db.findMany(); | ||
// ignoreWords[i].surfaceが、inputStringに含まれているかどうかを判定する | ||
for (let i = 0; i < ignoreWords.length; i++) { | ||
if (inputString.includes(ignoreWords[i].surface)) { | ||
console.log(`除外ワード\n${ignoreWords[i].surface}\nが含まれていたため、判定を中断します。`); | ||
return gomamayoResult; | ||
} | ||
} | ||
} | ||
} | ||
return raw; | ||
}); | ||
|
||
for (let i = 0; i < rawParseResult.length - 1; i++) { | ||
const first = rawParseResult[i]; | ||
const second = rawParseResult[i + 1]; | ||
if ( | ||
first.feature !== "名詞" && first.feature !== "数詞" || | ||
second.surface === first.surface | ||
) { | ||
continue; | ||
} | ||
// first.readingを後ろから1文字ずつ見ていく | ||
// 同時に、second.readingを先頭から1文字ずつ見ていく | ||
// 一致したら、gomamayoResultにpushする | ||
if (first.reading && second.reading) { | ||
// firstとsecondのreading.lengthのうち、短い方を | ||
const minLength = Math.min(first.reading.length, second.reading.length); | ||
for (let j = 1; j < minLength; j++) { | ||
const firstReading = first.reading.slice(first.reading.length - j); | ||
const secondReading = second.reading.slice(0, j); | ||
if (firstReading === secondReading) { | ||
gomamayoResult.isGomamayo = true; | ||
gomamayoResult.detail.push({ | ||
surface: first.surface + "|" + second.surface, | ||
dimension: j, | ||
rawResult1: first, | ||
rawResult2: second, | ||
}); | ||
gomamayoResult.combo++; | ||
// rawParseResult[i].readingに「ー」が含まれていたらprolongedSoundMarkVowelizeを実行し、それに置き換える | ||
rawParseResult.map((raw) => { | ||
if (typeof raw.reading !== "undefined") { | ||
if (raw.reading.includes("ー")) { | ||
raw.reading = this.prolongedSoundMarkVowelize(raw.reading); | ||
} | ||
} | ||
return raw; | ||
}); | ||
|
||
for (let i = 0; i < rawParseResult.length - 1; i++) { | ||
const first = rawParseResult[i]; | ||
const second = rawParseResult[i + 1]; | ||
if ( | ||
first.feature !== "名詞" && first.feature !== "数詞" || | ||
second.surface === first.surface | ||
) { | ||
continue; | ||
} | ||
// first.readingを後ろから1文字ずつ見ていく | ||
// 同時に、second.readingを先頭から1文字ずつ見ていく | ||
// 一致したら、gomamayoResultにpushする | ||
if (first.reading && second.reading) { | ||
// firstとsecondのreading.lengthのうち、短い方を | ||
const minLength = Math.min(first.reading.length, second.reading.length); | ||
for (let j = 1; j < minLength; j++) { | ||
const firstReading = first.reading.slice(first.reading.length - j); | ||
const secondReading = second.reading.slice(0, j); | ||
if (firstReading === secondReading) { | ||
gomamayoResult.isGomamayo = true; | ||
gomamayoResult.detail.push({ | ||
surface: first.surface + "|" + second.surface, | ||
dimension: j, | ||
rawResult1: first, | ||
rawResult2: second, | ||
}); | ||
gomamayoResult.combo++; | ||
} | ||
} | ||
} | ||
} | ||
return gomamayoResult; | ||
} | ||
|
||
/** | ||
* ゴママヨではない語を設定する。設定ファイルが必要。 | ||
* @param word | ||
* @returns | ||
*/ | ||
public addIgnoreWord(word: string): Promise<boolean> { | ||
if (this.db) { | ||
this.db.insertOne({ | ||
surface: word, | ||
}) | ||
.then(() => { | ||
console.log(`${word} を除外設定に追加しました。`); | ||
}) | ||
.catch((err) => { | ||
console.error(err); | ||
return false; | ||
}); | ||
return Promise.resolve(true); | ||
} else { | ||
return Promise.resolve(false); | ||
} | ||
} | ||
return gomamayoResult; | ||
} | ||
|
||
export { analyse }; | ||
export { Gomamayo }; |