Skip to content

Commit

Permalink
Merge pull request #7 from na2na-p/develop
Browse files Browse the repository at this point in the history
除外設定を利用できるようにした
  • Loading branch information
na2na-p authored Mar 21, 2022
2 parents 1009f6f + 2cbca3f commit c6ebecc
Show file tree
Hide file tree
Showing 8 changed files with 223 additions and 105 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
data
4 changes: 3 additions & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
"deno.lint": true,
"deno.unstable": false,
"cSpell.words": [
"Deno"
"Deno",
"ipadic",
"neologd"
]
}
8 changes: 4 additions & 4 deletions README.en.md
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
# What

This is an attempt to detect [gomamayo](https://thinaticsystem.com/glossary/gomamayo/), a play on words, using MeCab.
Only work on Japanese.
This is an attempt to detect [gomamayo](https://thinaticsystem.com/glossary/gomamayo/), a play on words, using MeCab.
Only works in Japanese.

# Getting Started

## Dependencies

- deno
- MeCab
- MeCab dictionary
We recommend the use of mecab-ipadic-neologd. (https://github.com/neologd/mecab-ipadic-neologd)
- MeCab dictionary
Use of mecab-ipadic-neologd is recommended (https://github.com/neologd/mecab-ipadic-neologd)

## Example

Expand Down
8 changes: 5 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,14 @@

- Deno
- MeCab
- MeCab 辞書
[mecab-ipadic-neologd](https://github.com/neologd/mecab-ipadic-neologd)がおすすめです。
- MeCab 辞書
- [mecab-ipadic-neologd](https://github.com/neologd/mecab-ipadic-neologd)がおすすめです。
- 除外設定用設定ファイル
- 必須ではありません。作成する場合は空ファイルのjsonを作成してください。AloeDBを使用しています。そちらの設定を確認してもらうのもいいかもしれません。

## 実行例

`deno run --allow-run --allow-read https://deno.land/x/gomamayo_deno/src/cli.ts 株式公開買付`
`deno run --allow-run --allow-read https://deno.land/x/gomamayo_deno/src/cli.ts analyse 株式公開買付`
あるいは、`https://deno.land/x/gomamayo_deno/src/example.ts`を参考にしてください。

## 実行結果
Expand Down
2 changes: 1 addition & 1 deletion mod.ts
Original file line number Diff line number Diff line change
@@ -1 +1 @@
export { analyse } from "./src/index.ts";
export { Gomamayo } from "./src/index.ts";
24 changes: 21 additions & 3 deletions src/cli.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,22 @@
import { analyse } from "./index.ts";
const inputString:string = Deno.args[0];
import { Gomamayo } from "./index.ts";
const mode = Deno.args[0]; // "analyse" or "addIgnore"
const inputString: string = Deno.args[1];

console.log(await analyse(inputString));
// "../data/ignoreWords.json"に設定ファイルがあると想定しています。
// なければ作成してください。
const gomamayo = new Gomamayo("./data/ignoreWords.json");

switch (mode) {
case "addIgnore":
console.log("addIgnore");
console.log(await gomamayo.addIgnoreWord(inputString));
break;

case "analyse":
console.log(await gomamayo.analyse(inputString));
break;

default:
console.log("第一引数で、実行モード(analyse/addIgnore)の指定をしてください。");
break;
}
25 changes: 22 additions & 3 deletions src/example.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,24 @@
import * as gomamayo from "https://deno.land/x/gomamayo_deno/mod.ts";
import { Gomamayo } from "https://deno.land/x/gomamayo_deno/mod.ts";

const inputString:string = Deno.args[0];
const ignoreSettingsPath = "./data/ignoreWords.json"; // 除外ファイル設定を書いてください。設定しない場合はnull、あるいは new Gomamayo(ignoreSettingsPath) としてください。
const gomamayo = new Gomamayo(ignoreSettingsPath);
const mode = Deno.args[0]; // "analyse" or "addIgnore"
const inputString: string = Deno.args[1];

console.log(await gomamayo.analyse(inputString));
// deno run --allow-run --allow-read https://deno.land/x/gomamayo_deno/src/cli.ts analyse 株式公開買付
// deno run --allow-run --allow-read https://deno.land/x/gomamayo_deno/src/cli.ts addIgnore 株式公開買付

switch (mode) {
case "addIgnore":
console.log("addIgnore");
console.log(await gomamayo.addIgnoreWord(inputString));
break;

case "analyse":
console.log(await gomamayo.analyse(inputString));
break;

default:
console.log("第一引数で、実行モード(analyse/addIgnore)の指定をしてください。");
break;
}
256 changes: 166 additions & 90 deletions src/index.ts
Original file line number Diff line number Diff line change
@@ -1,114 +1,190 @@
import { MeCab } from "https://deno.land/x/[email protected]/mod.ts";
// assets/vowel_define.jsonを読み込む
const vowelDefine = await Deno.readTextFile("./assets/vowel_define.json");
const mecab = new MeCab(["mecab"]);
import { Database } from "https://deno.land/x/[email protected]/mod.ts";

type gomamayoResult = {
interface ParsedWord {
// 0
surface: string;
// 1
feature: string;
// 2..4
featureDetails: string[];
// 5..6
conjugationForms: string[];
// 7
originalForm: string;
// 8
reading?: string;
// 9
pronunciation?: string;
}

interface gomamayoResult {
isGomamayo: boolean;
combo: number; // inputString中にあるゴママヨの総数
detail: gomamayoDetail[];
};
}

type gomamayoDetail = {
interface gomamayoDetail {
surface: string; // 該当の2語を入れる
dimension: number; // n次ゴママヨのn
rawResult1: any; // mecab.parseの結果 気持ち的にはMeCabのParsedWordって型を使いたい。
rawResult2: any; // mecab.parseの結果 気持ち的にはMeCabのParsedWordって型を使いたい。
};
rawResult1: ParsedWord; // mecab.parseの結果 気持ち的にはMeCabのParsedWordって型を使いたい。
rawResult2: ParsedWord; // mecab.parseの結果 気持ち的にはMeCabのParsedWordって型を使いたい。
}

/**
* @param {string} inputString
* @return {MeCab.ParsedWord[]}
*/
async function parse(inputString: string) {
const rawResult = await mecab.parse(inputString);
interface ignoreWord {
surface: string;
}

// rawResult.pronunciationがundefinedの場合、rawResult.pronunciation = rawResult.surfaceとなるようにする
const parseResult = rawResult.map((raw) => {
if (raw.pronunciation === undefined) {
raw.pronunciation = raw.surface;
}
if (raw.reading === undefined) {
raw.reading = raw.surface;
class Gomamayo {
private vowelDefine: string;
private mecab = new MeCab(["mecab"]);
private db: Database<ignoreWord> | null = null;

constructor(dbPath: string | null = null) {
this.vowelDefine = Deno.readTextFileSync("./assets/vowel_define.json");
if (dbPath) {
this.db = new Database<ignoreWord>(dbPath);
console.log(`${dbPath} を読み込みました。`);
} else {
this.db = null;
}
return raw;
});
return parseResult;
}
}

/**
* @param {string} inputString
* @return {ParsedWord[]}
*/
public async parse(inputString: string): Promise<ParsedWord[]> {
const rawResult = await this.mecab.parse(inputString);

/**
* @param {string} rawReading
* @return {string}
*/
function prolongedSoundMarkVowelize(rawReading: string): string {
const vowelDefineJSON = JSON.parse(vowelDefine);
// readingに長音が含まれている場合はすべてカタカナに変換する
let returnReading = "";
rawReading.replace(/[-]/g, (s) => {
return String.fromCharCode(s.charCodeAt(0) + 0x60);
});
for (let i = 0; i < rawReading.length; i++) {
const prev = rawReading[i - 1];
const current = rawReading[i];
returnReading += (current === "ー") ? vowelDefineJSON[prev] : current;
// rawResult.pronunciationがundefinedの場合、rawResult.pronunciation = rawResult.surfaceとなるようにする
const parseResult = rawResult.map((raw) => {
if (raw.pronunciation === undefined) {
raw.pronunciation = raw.surface;
}
if (raw.reading === undefined) {
raw.reading = raw.surface;
}
return raw;
});
return parseResult;
}
return returnReading;
}

/**
* @param {string} 判定したい文字列
* @return 分析結果
*/
async function analyse(inputString: string): Promise<gomamayoResult> {
const gomamayoResult: gomamayoResult = {
isGomamayo: false,
combo: 0,
detail: [],
};
const rawParseResult = await parse(inputString);
/**
* @param {string} rawReading
* @return {string}
*/
public prolongedSoundMarkVowelize(rawReading: string): string {
const vowelDefineJSON = JSON.parse(this.vowelDefine);
// readingに長音が含まれている場合はすべてカタカナに変換する
let returnReading = "";
rawReading.replace(/[-]/g, (s) => {
return String.fromCharCode(s.charCodeAt(0) + 0x60);
});
for (let i = 0; i < rawReading.length; i++) {
const prev = rawReading[i - 1];
const current = rawReading[i];
returnReading += (current === "ー") ? vowelDefineJSON[prev] : current;
}
return returnReading;
}

/**
* @param {string} inputString 判定したい文字列
* @param isIgnored 除外設定を使うかどうか。指定した文字列を除外する場合はtrue。デフォルトはtrue。
* @return 分析結果
*/
public async analyse(
inputString: string,
isIgnored = true,
): Promise<gomamayoResult> {
const gomamayoResult: gomamayoResult = {
isGomamayo: false,
combo: 0,
detail: [],
};
const rawParseResult = await this.parse(inputString);

// rawParseResult[i].readingに「ー」が含まれていたらprolongedSoundMarkVowelizeを実行し、それに置き換える
rawParseResult.map((raw) => {
if (typeof raw.reading !== "undefined") {
if (raw.reading.includes("ー")) {
raw.reading = prolongedSoundMarkVowelize(raw.reading);
if (isIgnored) {
console.log("除外設定を使用します。");
if (this.db) {
const ignoreWords = await this.db.findMany();
// ignoreWords[i].surfaceが、inputStringに含まれているかどうかを判定する
for (let i = 0; i < ignoreWords.length; i++) {
if (inputString.includes(ignoreWords[i].surface)) {
console.log(`除外ワード\n${ignoreWords[i].surface}\nが含まれていたため、判定を中断します。`);
return gomamayoResult;
}
}
}
}
return raw;
});

for (let i = 0; i < rawParseResult.length - 1; i++) {
const first = rawParseResult[i];
const second = rawParseResult[i + 1];
if (
first.feature !== "名詞" && first.feature !== "数詞" ||
second.surface === first.surface
) {
continue;
}
// first.readingを後ろから1文字ずつ見ていく
// 同時に、second.readingを先頭から1文字ずつ見ていく
// 一致したら、gomamayoResultにpushする
if (first.reading && second.reading) {
// firstとsecondのreading.lengthのうち、短い方を
const minLength = Math.min(first.reading.length, second.reading.length);
for (let j = 1; j < minLength; j++) {
const firstReading = first.reading.slice(first.reading.length - j);
const secondReading = second.reading.slice(0, j);
if (firstReading === secondReading) {
gomamayoResult.isGomamayo = true;
gomamayoResult.detail.push({
surface: first.surface + "|" + second.surface,
dimension: j,
rawResult1: first,
rawResult2: second,
});
gomamayoResult.combo++;
// rawParseResult[i].readingに「ー」が含まれていたらprolongedSoundMarkVowelizeを実行し、それに置き換える
rawParseResult.map((raw) => {
if (typeof raw.reading !== "undefined") {
if (raw.reading.includes("ー")) {
raw.reading = this.prolongedSoundMarkVowelize(raw.reading);
}
}
return raw;
});

for (let i = 0; i < rawParseResult.length - 1; i++) {
const first = rawParseResult[i];
const second = rawParseResult[i + 1];
if (
first.feature !== "名詞" && first.feature !== "数詞" ||
second.surface === first.surface
) {
continue;
}
// first.readingを後ろから1文字ずつ見ていく
// 同時に、second.readingを先頭から1文字ずつ見ていく
// 一致したら、gomamayoResultにpushする
if (first.reading && second.reading) {
// firstとsecondのreading.lengthのうち、短い方を
const minLength = Math.min(first.reading.length, second.reading.length);
for (let j = 1; j < minLength; j++) {
const firstReading = first.reading.slice(first.reading.length - j);
const secondReading = second.reading.slice(0, j);
if (firstReading === secondReading) {
gomamayoResult.isGomamayo = true;
gomamayoResult.detail.push({
surface: first.surface + "|" + second.surface,
dimension: j,
rawResult1: first,
rawResult2: second,
});
gomamayoResult.combo++;
}
}
}
}
return gomamayoResult;
}

/**
* ゴママヨではない語を設定する。設定ファイルが必要。
* @param word
* @returns
*/
public addIgnoreWord(word: string): Promise<boolean> {
if (this.db) {
this.db.insertOne({
surface: word,
})
.then(() => {
console.log(`${word} を除外設定に追加しました。`);
})
.catch((err) => {
console.error(err);
return false;
});
return Promise.resolve(true);
} else {
return Promise.resolve(false);
}
}
return gomamayoResult;
}

export { analyse };
export { Gomamayo };

0 comments on commit c6ebecc

Please sign in to comment.