diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index fed0d32e..9535112d 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -326,3 +326,21 @@ jobs: asset_path: artifacts/kiwi-java.jar asset_name: kiwi-java-${{ steps.get_release.outputs.tag_name }}-lnx-${{ matrix.arch }}.jar asset_content_type: application/octet-stream + + build-emscripten: + name: Emscripten + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + with: + submodules: true + lfs: true + - uses: mymindstorm/setup-emsdk@v14 + - name: Build + run: | + cd bindings/wasm + ./build.sh + - uses: JS-DevTools/npm-publish@v3 + with: + token: ${{ secrets.NPM_TOKEN }} + package: bindings/wasm/package diff --git a/CMakeLists.txt b/CMakeLists.txt index 1d686015..51c6dd02 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,6 +8,9 @@ set ( CMAKE_VERBOSE_MAKEFILE true ) option(KIWI_USE_MIMALLOC "Use mimalloc for faster memory allocation" ON) option(KIWI_USE_CPUINFO "Use cpuinfo for dynamic CPU dispatching" ON) option(KIWI_STATIC_WITHOUT_MT "Use /MT Option in building kiwi_static" OFF) +option(KIWI_BUILD_CLI "Build CLI tool" ON) +option(KIWI_BUILD_EVALUATOR "Build Evaluator" ON) +option(KIWI_BUILD_MODEL_BUILDER "Build Model Builder" ON) option(KIWI_BUILD_TEST "Build Test sets" ON) option(KIWI_JAVA_BINDING "Build Java binding" OFF) set(KIWI_CPU_ARCH "" CACHE STRING "Set architecture type for macOS") @@ -23,7 +26,9 @@ if (NOT CMAKE_BUILD_TYPE) endif() if(NOT KIWI_CPU_ARCH) - if (CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)") + if (EMSCRIPTEN) + set(KIWI_CPU_ARCH "wasm") + elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)") set(KIWI_CPU_ARCH "x86_64") elseif (HOST_ARCHITECTURE MATCHES "^arm64" OR CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm|aarch64)") set(KIWI_CPU_ARCH "arm64") @@ -174,6 +179,8 @@ elseif (KIWI_CPU_ARCH MATCHES "arm64") src/archImpl/neon.cpp ) set_source_files_properties(src/archImpl/neon.cpp PROPERTIES COMPILE_FLAGS "-march=armv8-a") +elseif (KIWI_CPU_ARCH MATCHES "wasm") + message("Compiling for wasm") else() message("Compiling for other") endif() @@ -208,30 +215,36 @@ target_compile_options("${PROJECT_NAME}" PRIVATE "${ADDITIONAL_FLAGS}") #target_link_libraries("${PROJECT_NAME}_static" cpuinfo_internals) #target_link_libraries("${PROJECT_NAME}" cpuinfo) -add_executable( "${PROJECT_NAME}-cli-${PROJECT_VERSION}" - tools/runner.cpp -) +if (KIWI_BUILD_CLI) + add_executable( "${PROJECT_NAME}-cli-${PROJECT_VERSION}" + tools/runner.cpp + ) -target_link_libraries( "${PROJECT_NAME}-cli-${PROJECT_VERSION}" - "${PROJECT_NAME}_static" -) + target_link_libraries( "${PROJECT_NAME}-cli-${PROJECT_VERSION}" + "${PROJECT_NAME}_static" + ) +endif() -add_executable( "${PROJECT_NAME}-evaluator" - tools/Evaluator.cpp - tools/evaluator_main.cpp -) +if (KIWI_BUILD_EVALUATOR) + add_executable( "${PROJECT_NAME}-evaluator" + tools/Evaluator.cpp + tools/evaluator_main.cpp + ) -target_link_libraries( "${PROJECT_NAME}-evaluator" - "${PROJECT_NAME}_static" -) + target_link_libraries( "${PROJECT_NAME}-evaluator" + "${PROJECT_NAME}_static" + ) +endif() -add_executable( "${PROJECT_NAME}-model-builder" - tools/model_builder.cpp -) +if (KIWI_BUILD_MODEL_BUILDER) + add_executable( "${PROJECT_NAME}-model-builder" + tools/model_builder.cpp + ) -target_link_libraries( "${PROJECT_NAME}-model-builder" - "${PROJECT_NAME}_static" -) + target_link_libraries( "${PROJECT_NAME}-model-builder" + "${PROJECT_NAME}_static" + ) +endif() if(MSVC) if(KIWI_STATIC_WITHOUT_MT) @@ -262,13 +275,17 @@ if(UNIX AND NOT APPLE) rt ) - target_link_libraries( "${PROJECT_NAME}-cli-${PROJECT_VERSION}" - rt - ) + if (KIWI_BUILD_CLI) + target_link_libraries( "${PROJECT_NAME}-cli-${PROJECT_VERSION}" + rt + ) + endif() - target_link_libraries( "${PROJECT_NAME}-evaluator" - rt - ) + if (KIWI_BUILD_EVALUATOR) + target_link_libraries( "${PROJECT_NAME}-evaluator" + rt + ) + endif() endif() target_compile_definitions("${PROJECT_NAME}" @@ -291,3 +308,7 @@ endif() if(KIWI_JAVA_BINDING) add_subdirectory( bindings/java ) endif() + +if(EMSCRIPTEN) + add_subdirectory( bindings/wasm ) +endif() diff --git a/bindings/wasm/CMakeLists.txt b/bindings/wasm/CMakeLists.txt new file mode 100644 index 00000000..5311cd98 --- /dev/null +++ b/bindings/wasm/CMakeLists.txt @@ -0,0 +1,11 @@ +add_executable( "${PROJECT_NAME}-wasm" + kiwi_wasm.cpp +) + +target_link_libraries( "${PROJECT_NAME}-wasm" + "${PROJECT_NAME}_static" +) + +set_target_properties("${PROJECT_NAME}-wasm" PROPERTIES + LINK_FLAGS "--bind -s WASM=1 -s ALLOW_MEMORY_GROWTH=1 -s EXPORT_ES6=1 -s MODULARIZE=1 -s EXPORT_NAME=kiwi -s 'EXPORTED_RUNTIME_METHODS=[\"FS\"]'" +) diff --git a/bindings/wasm/README.md b/bindings/wasm/README.md new file mode 100644 index 00000000..04cf4761 --- /dev/null +++ b/bindings/wasm/README.md @@ -0,0 +1,79 @@ +# kiwi-nlp, 한국어 형태소 분석기 Kiwi의 TypeScript/JavaScript 바인딩 + +## Building + +Additionally to the requirements of the main project, you need to install [Emscripten](https://emscripten.org/docs/getting_started/downloads.html) and [npm](https://docs.npmjs.com/downloading-and-installing-node-js-and-npm). + +To build the package, simply run `./build.sh`. + +This is currently only supported on Linux and macOS. You can run the build script on Windows by using [WSL](https://learn.microsoft.com/en-us/windows/wsl/install). + +You can pass the `--demo` flag to build the demo in `package-demo` as well. +If you pass `--demo-dev`, a development server for the demo will be started. + +Running the above command also automatically upgrades to package version if it doesn't match the version in the main project. + +## Documentation + +The documentation for the package can be generated by running `npm run doc` inside the `package` directory. + +The main entry point for the API is `KiwiBuilder`, which is used to create instances `Kiwi`. + +## Example Usage + +```javascript +import { KiwiBuilder, Match } from 'kiwi-nlp'; + +async function example() { + const builder = await KiwiBuilder.create('path to kiwi-wasm.wasm'); + + const kiwi = await builder.build({ + modelFiles: { + 'combiningRule.txt': '/path/to/model/combiningRule.txt', + 'default.dict': '/path/to/model/default.dict', + 'extract.mdl': '/path/to/model/extract.mdl', + 'multi.dict': '/path/to/model/multi.dict', + 'sj.knlm': '/path/to/model/sj.knlm', + 'sj.morph': '/path/to/model/sj.morph', + 'skipbigram.mdl': '/path/to/model/skipbigram.mdl', + 'typo.dict': '/path/to/model/typo.dict', + } + }); + + const tokens = kiwi.analyze('다음은 예시 텍스트입니다.', Match.allWithNormalizing); + /* Output: { + "score": -39.772212982177734, + "tokens": [ + { + "length": 2, + "lineNumber": 0, + "pairedToken": 4294967295, + "position": 0, + "score": -6.5904083251953125, + "sentPosition": 0, + "str": "다음", + "subSentPosition": 0, + "tag": "NNG", + "typoCost": 0, + "typoFormId": 0, + "wordPosition": 0 + }, + { + "length": 1, + "lineNumber": 0, + "pairedToken": 4294967295, + "position": 2, + "score": -1.844599723815918, + "sentPosition": 0, + "str": "은", + "subSentPosition": 0, + "tag": "JX", + "typoCost": 0, + "typoFormId": 0, + "wordPosition": 0 + }, + ... + ] + } */ +} +``` diff --git a/bindings/wasm/build.sh b/bindings/wasm/build.sh new file mode 100755 index 00000000..8581dd99 --- /dev/null +++ b/bindings/wasm/build.sh @@ -0,0 +1,73 @@ +#!/usr/bin/env bash + +set -e + +# Change to the script directory +cd "$(dirname "$0")" + +# Get root directory +REPO_ROOT_DIR=$(git rev-parse --show-toplevel) + +# Check if emscripten is installed +if ! command -v emcmake &> /dev/null; then + echo "Emscripten is not installed. Please install it and make sure it is in your PATH." + exit 1 +fi + +# Generate the package structure +mkdir -p package/src/build +mkdir -p package/dist + +# Find core count for make. Prefer nproc, then sysctl, then default to 1 +if command -v nproc &> /dev/null; then + CORE_COUNT=$(nproc) +elif command -v sysctl &> /dev/null; then + CORE_COUNT=$(sysctl -n hw.logicalcpu) +else + CORE_COUNT=1 +fi + +# Build the wasm module and read the project version +mkdir -p build +cd build +emcmake cmake \ + -DCMAKE_BUILD_TYPE=Release \ + -DKIWI_USE_CPUINFO=OFF \ + -DKIWI_USE_MIMALLOC=OFF \ + -DKIWI_BUILD_TEST=OFF \ + -DKIWI_BUILD_CLI=OFF \ + -DKIWI_BUILD_EVALUATOR=OFF \ + -DKIWI_BUILD_MODEL_BUILDER=OFF \ + $REPO_ROOT_DIR +make -j $CORE_COUNT +PROJECT_VERSION=$(grep -m 1 CMAKE_PROJECT_VERSION:STATIC CMakeCache.txt | cut -d'=' -f2) +if [ -z "$PROJECT_VERSION" ]; then + echo "Failed to read project version from CMakeCache.txt" + exit 1 +fi +cd .. + +# Copy the generated files to the package +cp build/bindings/wasm/kiwi-wasm.js package/src/build/kiwi-wasm.js +cp build/bindings/wasm/kiwi-wasm.wasm package/dist/kiwi-wasm.wasm + +# Build typescript wrapper package and update the version +cd package +npm install +npm run build +npm version --no-git-tag-version --allow-same-version $PROJECT_VERSION +cd .. + +# Build the demo package if --demo or --demo-dev is passed +# --demo with create a static build +# --demo-dev will start a development server +if [ "$1" == "--demo" ] || [ "$1" == "--demo-dev" ]; then + cd package-demo + npm install + if [ "$1" == "--demo-dev" ]; then + npm run dev + else + npm run build + fi + cd .. +fi diff --git a/bindings/wasm/kiwi_wasm.cpp b/bindings/wasm/kiwi_wasm.cpp new file mode 100644 index 00000000..7652989d --- /dev/null +++ b/bindings/wasm/kiwi_wasm.cpp @@ -0,0 +1,585 @@ +#include + +#include +#include + +#include +#include +#include + +using namespace kiwi; +using namespace nlohmann; + + +static std::map instances; + +int nextInstanceId() { + static int id = 0; + return id++; +} + + +static std::map> morphemeSets; + +int nextMorphemeSetId() { + static int id = 0; + return id++; +} + + +template +inline T getAtOrDefault(const json& args, size_t index, const T& defaultValue) { + return args.size() > index ? args.at(index).get() : defaultValue; +} + + +inline std::unordered_set parseMorphemeSet(const Kiwi& kiwi, const json& morphs) { + std::unordered_set set; + + for (const auto& morph : morphs) { + const std::string form8 = morph["form"]; + const std::u16string form = utf8To16(form8); + + POSTag tag = POSTag::unknown; + if (morph.contains("tag")) { + const std::string tagStr8 = morph["tag"]; + const std::u16string tagStr = utf8To16(tagStr8); + tag = toPOSTag(tagStr); + } + + auto matches = kiwi.findMorpheme(form, tag); + set.insert(matches.begin(), matches.end()); + } + + return set; +} + + +class BlockListArg { + std::unordered_set tempSet; + int blockListId; + +public: + BlockListArg(const Kiwi& kiwi, const json& args, size_t index) : blockListId(-1) { + if (args.size() <= index) { + return; + } + const auto& arg = args.at(index); + if (arg.is_number_integer()) { + blockListId = arg.get(); + } else if (arg.is_array()) { + tempSet = parseMorphemeSet(kiwi, arg); + } + } + + const std::unordered_set* setPtr() const { + if (blockListId >= 0) { + return &morphemeSets[blockListId]; + } + if (!tempSet.empty()) { + return &tempSet; + } + return nullptr; + } +}; + + +std::vector parsePretokenizedArg(const json& args, size_t index) { + std::vector spans; + + if (args.size() <= index) { + return spans; + } + + const json& arg = args.at(index); + + if (!arg.is_array()) { + return spans; + } + + for (const auto& span : arg) { + const uint32_t start = span["start"]; + const uint32_t end = span["end"]; + + std::vector tokenization; + + for (const auto& token : span["tokenization"]) { + const std::string form8 = token["form"]; + const std::u16string form = utf8To16(form8); + + const uint32_t start = token["start"]; + const uint32_t end = token["end"]; + POSTag tag = POSTag::unknown; + if (token.contains("tag")) { + const std::string tagStr8 = token["tag"]; + const std::u16string tagStr = utf8To16(tagStr8); + tag = toPOSTag(tagStr); + } + + tokenization.push_back(BasicToken{ form, start, end, tag }); + } + + spans.push_back(PretokenizedSpan{ start, end, tokenization }); + } + + return spans; +} + + +inline json serializeTokenInfo(const Kiwi& kiwi, const TokenInfo& tokenInfo) { + return { + { "str", utf16To8(tokenInfo.str) }, + { "position", tokenInfo.position }, + { "wordPosition", tokenInfo.wordPosition }, + { "sentPosition", tokenInfo.sentPosition }, + { "lineNumber", tokenInfo.lineNumber }, + { "length", tokenInfo.length }, + { "tag", tagToString(tokenInfo.tag) }, + { "score", tokenInfo.score }, + { "typoCost", tokenInfo.typoCost }, + { "typoFormId", tokenInfo.typoFormId }, + { "pairedToken", tokenInfo.pairedToken }, + { "subSentPosition", tokenInfo.subSentPosition }, + { "morphId", kiwi.morphToId(tokenInfo.morph) }, + }; +} + +inline json serializeTokenInfoVec(const Kiwi& kiwi, const std::vector& tokenInfoVec) { + json result = json::array(); + for (const TokenInfo& tokenInfo : tokenInfoVec) { + result.push_back(serializeTokenInfo(kiwi, tokenInfo)); + } + return result; +} + +inline json serializeTokenResult(const Kiwi& kiwi, const TokenResult& tokenResult) { + return { + { "tokens", serializeTokenInfoVec(kiwi, tokenResult.first) }, + { "score", tokenResult.second }, + }; +} + +inline json serializeTokenResultVec(const Kiwi& kiwi, const std::vector& tokenResultVec) { + json result = json::array(); + for (const TokenResult& tokenResult : tokenResultVec) { + result.push_back(serializeTokenResult(kiwi, tokenResult)); + } + return result; +} + + +json version(const json& args) { + return KIWI_VERSION_STRING; +} + +json build(const json& args) { + const int id = nextInstanceId(); + + const json buildArgs = args[0]; + + const std::string modelPath = buildArgs["modelPath"]; + const size_t numThreads = 0; + const bool useSBG = buildArgs.value("modelType", "knlm") == "sbg"; + + BuildOption buildOptions = BuildOption::none; + if (buildArgs.value("integrateAllomorph", true)) { + buildOptions |= BuildOption::integrateAllomorph; + } + if (buildArgs.value("loadDefaultDict", true)) { + buildOptions |= BuildOption::loadDefaultDict; + } + if (buildArgs.value("loadTypoDict", true)) { + buildOptions |= BuildOption::loadTypoDict; + } + if (buildArgs.value("loadMultiDict", true)) { + buildOptions |= BuildOption::loadMultiDict; + } + + KiwiBuilder builder = KiwiBuilder{ + modelPath, + numThreads, + buildOptions, + useSBG, + }; + + const auto userDicts = buildArgs.value("userDicts", json::array()); + for (const auto& pathJson : userDicts) { + const std::string path = pathJson; + builder.loadDictionary(path); + } + + const auto userWords = buildArgs.value("userWords", json::array()); + for (const auto& word : userWords) { + const std::string word8 = word["word"]; + const std::u16string word16 = utf8To16(word8); + + const std::string tag8 = word.value("tag", "NNG"); + const std::u16string tag16 = utf8To16(tag8); + const POSTag tag = toPOSTag(tag16); + + const float score = word.value("score", 0.0f); + + if (word.contains("origWord")) { + const std::string origWord8 = word["origWord"]; + const std::u16string origWord16 = utf8To16(origWord8); + + builder.addWord(word16, tag, score, origWord16); + } else { + builder.addWord(word16, tag, score); + } + } + + const auto preanalyzedWords = buildArgs.value("preanalyzedWords", json::array()); + for (const auto& preanalyzedWord : preanalyzedWords) { + const std::string form8 = preanalyzedWord["form"]; + const std::u16string form = utf8To16(form8); + const float score = preanalyzedWord.value("score", 0.0f); + + std::vector> analyzed; + std::vector> positions; + + for (const auto& analyzedToken : preanalyzedWord["analyzed"]) { + const std::string form8 = analyzedToken["form"]; + const std::u16string form = utf8To16(form8); + + const std::string tag8 = analyzedToken["tag"]; + const std::u16string tag16 = utf8To16(tag8); + const POSTag tag = toPOSTag(tag16); + + analyzed.push_back({ form, tag }); + + if (analyzedToken.contains("start") && analyzedToken.contains("end")) { + const size_t start = analyzedToken["start"]; + const size_t end = analyzedToken["end"]; + positions.push_back({ start, end }); + } + } + + builder.addPreAnalyzedWord(form, analyzed, positions, score); + } + + const auto typos = buildArgs.value("typos", json(nullptr)); + const float typoCostThreshold = buildArgs.value("typoCostThreshold", 2.5f); + + if (typos.is_null()) { + instances.emplace(id, builder.build(DefaultTypoSet::withoutTypo, typoCostThreshold)); + } else if (typos.is_string()) { + DefaultTypoSet typoSet = DefaultTypoSet::withoutTypo; + const std::string typosStr = typos.get(); + + if (typosStr == "basic") { + typoSet = DefaultTypoSet::basicTypoSet; + } else if (typosStr == "continual") { + typoSet = DefaultTypoSet::continualTypoSet; + } else if (typosStr == "basicWithContinual") { + typoSet = DefaultTypoSet::basicTypoSetWithContinual; + } + + instances.emplace(id, builder.build(typoSet, typoCostThreshold)); + } else { + TypoTransformer typoTransformer; + + for (const auto& def : typos.value("defs", json::array())) { + const float cost = def.value("cost", 1.0f); + + CondVowel condVowel = CondVowel::none; + const std::string condVowelStr = def.value("condVowel", "none"); + + if (condVowelStr == "any") { + condVowel = CondVowel::any; + } else if (condVowelStr == "vowel") { + condVowel = CondVowel::vowel; + } else if (condVowelStr == "applosive") { + condVowel = CondVowel::applosive; + } + + for (const auto& orig8 : def["orig"]) { + const auto orig16 = utf8To16(orig8); + for (const auto& error8 : def["error"]) { + typoTransformer.addTypo(orig16, utf8To16(error8), cost, condVowel); + } + } + } + + const float continualTypoCost = typos.value("continualTypoCost", 1.0f); + typoTransformer.setContinualTypoCost(continualTypoCost); + + instances.emplace(id, builder.build(typoTransformer, typoCostThreshold)); + } + + return id; +} + + +json kiwiReady(Kiwi& kiwi, const json& args) { + return kiwi.ready(); +} + +json kiwiIsTypoTolerant(Kiwi& kiwi, const json& args) { + return kiwi.isTypoTolerant(); +} + +json kiwiAnalyze(Kiwi& kiwi, const json& args) { + const std::string str = args[0]; + const Match matchOptions = getAtOrDefault(args, 1, Match::allWithNormalizing); + const BlockListArg blockListArg(kiwi, args, 2); + const auto pretokenized = parsePretokenizedArg(args, 3); + + const TokenResult tokenResult = kiwi.analyze(str, (Match)matchOptions, blockListArg.setPtr(), pretokenized); + + return serializeTokenResult(kiwi, tokenResult); +} + +json kiwiAnalyzeTopN(Kiwi& kiwi, const json& args) { + const std::string str = args[0]; + const int topN = args[1]; + const Match matchOptions = getAtOrDefault(args, 2, Match::allWithNormalizing); + const BlockListArg blockListArg(kiwi, args, 3); + const auto pretokenized = parsePretokenizedArg(args, 4); + + const std::vector tokenResults = kiwi.analyze(str, topN, matchOptions, blockListArg.setPtr(), pretokenized); + + return serializeTokenResultVec(kiwi, tokenResults); +} + +json kiwiTokenize(Kiwi& kiwi, const json& args) { + const std::string str = args[0]; + const Match matchOptions = getAtOrDefault(args, 1, Match::allWithNormalizing); + const BlockListArg blockListArg(kiwi, args, 2); + const auto pretokenized = parsePretokenizedArg(args, 3); + + const TokenResult tokenResult = kiwi.analyze(str, (Match)matchOptions, blockListArg.setPtr(), pretokenized); + + return serializeTokenInfoVec(kiwi, tokenResult.first); +} + +json kiwiTokenizeTopN(Kiwi& kiwi, const json& args) { + const std::string str = args[0]; + const int topN = args[1]; + const Match matchOptions = getAtOrDefault(args, 2, Match::allWithNormalizing); + const BlockListArg blockListArg(kiwi, args, 3); + const auto pretokenized = parsePretokenizedArg(args, 4); + + const std::vector tokenResults = kiwi.analyze(str, topN, matchOptions, blockListArg.setPtr(), pretokenized); + + json result = json::array(); + for (const TokenResult& tokenResult : tokenResults) { + result.push_back(serializeTokenInfoVec(kiwi, tokenResult.first)); + } + + return result; +} + +json kiwiSplitIntoSents(Kiwi& kiwi, const json& args) { + const std::string str = args[0]; + const Match matchOptions = getAtOrDefault(args, 1, Match::allWithNormalizing); + const bool withTokenResult = getAtOrDefault(args, 2, false); + + TokenResult tokenResult; + const auto sentenceSpans = kiwi.splitIntoSents(str, matchOptions, withTokenResult ? &tokenResult : nullptr); + + json spans = json::array(); + for (const auto& span : sentenceSpans) { + spans.push_back({ + { "start", span.first }, + { "end", span.second }, + }); + } + + return { + { "spans", spans }, + { "tokenResult", withTokenResult ? serializeTokenResult(kiwi, tokenResult) : nullptr }, + }; +} + +json kiwiJoinSent(Kiwi& kiwi, const json& args) { + const json morphs = args[0]; + const bool lmSearch = getAtOrDefault(args, 1, true); + const bool withRanges = getAtOrDefault(args, 2, false); + + auto joiner = kiwi.newJoiner(lmSearch); + + for (const auto& morph : morphs) { + const std::string form8 = morph["form"]; + const std::u16string form = utf8To16(form8); + + const std::string tagStr8 = morph["tag"]; + const std::u16string tagStr = utf8To16(tagStr8); + const POSTag tag = toPOSTag(tagStr); + + const cmb::Space space = morph.value("space", cmb::Space::none); + + joiner.add(form, tag, true, space); + } + + std::vector> ranges; + const std::string str = joiner.getU8(withRanges ? &ranges : nullptr); + + json rangesRet = json::array(); + for (const auto& range : ranges) { + rangesRet.push_back({ + { "start", range.first }, + { "end", range.second }, + }); + } + + return { + { "str", str }, + { "ranges", withRanges ? rangesRet : nullptr }, + }; +} + +json kiwiGetCutOffThreshold(Kiwi& kiwi, const json& args) { + return kiwi.getCutOffThreshold(); +} + +json kiwiSetCutOffThreshold(Kiwi& kiwi, const json& args) { + kiwi.setCutOffThreshold(args[0]); + return nullptr; +} + +json kiwiGetUnkScoreBias(Kiwi& kiwi, const json& args) { + return kiwi.getUnkScoreBias(); +} + +json kiwiSetUnkScoreBias(Kiwi& kiwi, const json& args) { + kiwi.setUnkScoreBias(args[0]); + return nullptr; +} + +json kiwiGetUnkScoreScale(Kiwi& kiwi, const json& args) { + return kiwi.getUnkScoreScale(); +} + +json kiwiSetUnkScoreScale(Kiwi& kiwi, const json& args) { + kiwi.setUnkScoreScale(args[0]); + return nullptr; +} + +json kiwiGetMaxUnkFormSize(Kiwi& kiwi, const json& args) { + return kiwi.getMaxUnkFormSize(); +} + +json kiwiSetMaxUnkFormSize(Kiwi& kiwi, const json& args) { + kiwi.setMaxUnkFormSize(args[0]); + return nullptr; +} + +json kiwiGetSpaceTolerance(Kiwi& kiwi, const json& args) { + return kiwi.getSpaceTolerance(); +} + +json kiwiSetSpaceTolerance(Kiwi& kiwi, const json& args) { + kiwi.setSpaceTolerance(args[0]); + return nullptr; +} + +json kiwiGetSpacePenalty(Kiwi& kiwi, const json& args) { + return kiwi.getSpacePenalty(); +} + +json kiwiSetSpacePenalty(Kiwi& kiwi, const json& args) { + kiwi.setSpacePenalty(args[0]); + return nullptr; +} + +json kiwiGetTypoCostWeight(Kiwi& kiwi, const json& args) { + return kiwi.getTypoCostWeight(); +} + +json kiwiSetTypoCostWeight(Kiwi& kiwi, const json& args) { + kiwi.setTypoCostWeight(args[0]); + return nullptr; +} + +json kiwiGetIntegrateAllomorph(Kiwi& kiwi, const json& args) { + return kiwi.getIntegrateAllomorph(); +} + +json kiwiSetIntegrateAllomorph(Kiwi& kiwi, const json& args) { + kiwi.setIntegrateAllomorph(args[0]); + return nullptr; +} + +json kiwiCreateMorphemeSet(Kiwi& kiwi, const json& args) { + const int id = nextMorphemeSetId(); + + const json morphs = args[0]; + std::unordered_set set = parseMorphemeSet(kiwi, morphs); + + morphemeSets.emplace(id, set); + + return id; +} + +json kiwiDestroyMorphemeSet(Kiwi& kiwi, const json& args) { + const int id = args[0]; + morphemeSets.erase(id); + return nullptr; +} + + +using ApiMethod = json(*)(const json&); +using InstanceApiMethod = json(*)(Kiwi&, const json&); + +std::map apiMethods = { + { "version", version }, + { "build", build }, +}; + +std::map instanceApiMethods = { + { "ready", kiwiReady }, + { "isTypoTolerant", kiwiIsTypoTolerant }, + { "analyze", kiwiAnalyze }, + { "analyzeTopN", kiwiAnalyzeTopN }, + { "tokenize", kiwiTokenize }, + { "tokenizeTopN", kiwiTokenizeTopN}, + { "splitIntoSents", kiwiSplitIntoSents }, + { "joinSent", kiwiJoinSent }, + { "getCutOffThreshold", kiwiGetCutOffThreshold }, + { "setCutOffThreshold", kiwiSetCutOffThreshold }, + { "getUnkScoreBias", kiwiGetUnkScoreBias }, + { "setUnkScoreBias", kiwiSetUnkScoreBias }, + { "getUnkScoreScale", kiwiGetUnkScoreScale }, + { "setUnkScoreScale", kiwiSetUnkScoreScale }, + { "getMaxUnkFormSize", kiwiGetMaxUnkFormSize }, + { "setMaxUnkFormSize", kiwiSetMaxUnkFormSize }, + { "getSpaceTolerance", kiwiGetSpaceTolerance }, + { "setSpaceTolerance", kiwiSetSpaceTolerance }, + { "getSpacePenalty", kiwiGetSpacePenalty }, + { "setSpacePenalty", kiwiSetSpacePenalty }, + { "getTypoCostWeight", kiwiGetTypoCostWeight }, + { "setTypoCostWeight", kiwiSetTypoCostWeight }, + { "getIntegrateAllomorph", kiwiGetIntegrateAllomorph }, + { "setIntegrateAllomorph", kiwiSetIntegrateAllomorph }, + { "createMorphemeSet", kiwiCreateMorphemeSet }, + { "destroyMorphemeSet", kiwiDestroyMorphemeSet }, +}; + + +std::string api(std::string dataStr) { + const json data = json::parse(dataStr); + + const std::string methodName = data["method"]; + const json args = data["args"]; + const json id = data.value("id", json(nullptr)); + + if (id.is_number_integer()) { + const int instanceId = id; + auto& instance = instances[instanceId]; + const auto method = instanceApiMethods[methodName]; + return method(instance, args).dump(); + } + + return apiMethods[methodName](args).dump(); +} + + +EMSCRIPTEN_BINDINGS(kiwi) { + emscripten::constant("VERSION_MAJOR", KIWI_VERSION_MAJOR); + emscripten::constant("VERSION_MINOR", KIWI_VERSION_MINOR); + emscripten::constant("VERSION_PATCH", KIWI_VERSION_PATCH); + emscripten::constant("VERSION", emscripten::val(KIWI_VERSION_STRING)); + + emscripten::function("api", &api); +} diff --git a/bindings/wasm/package-demo/.gitignore b/bindings/wasm/package-demo/.gitignore new file mode 100644 index 00000000..a547bf36 --- /dev/null +++ b/bindings/wasm/package-demo/.gitignore @@ -0,0 +1,24 @@ +# Logs +logs +*.log +npm-debug.log* +yarn-debug.log* +yarn-error.log* +pnpm-debug.log* +lerna-debug.log* + +node_modules +dist +dist-ssr +*.local + +# Editor directories and files +.vscode/* +!.vscode/extensions.json +.idea +.DS_Store +*.suo +*.ntvs* +*.njsproj +*.sln +*.sw? diff --git a/bindings/wasm/package-demo/index.html b/bindings/wasm/package-demo/index.html new file mode 100644 index 00000000..3bfa88cf --- /dev/null +++ b/bindings/wasm/package-demo/index.html @@ -0,0 +1,45 @@ + + + + + + + Kiwi Demo + + + + + + + + + + + +
+ +
Loading...
+
+ + + + + + + + + + + + + + + diff --git a/bindings/wasm/package-demo/package-lock.json b/bindings/wasm/package-demo/package-lock.json new file mode 100644 index 00000000..a0e91a0c --- /dev/null +++ b/bindings/wasm/package-demo/package-lock.json @@ -0,0 +1,1198 @@ +{ + "name": "package-demo", + "version": "0.0.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "package-demo", + "version": "0.0.0", + "dependencies": { + "kiwi-nlp": "file:../package", + "vite-plugin-static-copy": "^1.0.5" + }, + "devDependencies": { + "typescript": "^5.2.2", + "vite": "^5.2.0" + } + }, + "../package": { + "name": "kiwi-nlp", + "version": "0.17.1", + "license": "LGPL-2.1-or-later", + "devDependencies": { + "typedoc": "^0.26.2", + "typescript": "^5.4.5" + } + }, + "node_modules/@esbuild/aix-ppc64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.21.5.tgz", + "integrity": "sha512-1SDgH6ZSPTlggy1yI6+Dbkiz8xzpHJEVAlF/AM1tHPLsf5STom9rwtjE4hKAF20FfXXNTFqEYXyJNWh1GiZedQ==", + "cpu": [ + "ppc64" + ], + "license": "MIT", + "optional": true, + "os": [ + "aix" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/android-arm": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.21.5.tgz", + "integrity": "sha512-vCPvzSjpPHEi1siZdlvAlsPxXl7WbOVUBBAowWug4rJHb68Ox8KualB+1ocNvT5fjv6wpkX6o/iEpbDrf68zcg==", + "cpu": [ + "arm" + ], + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/android-arm64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.21.5.tgz", + "integrity": "sha512-c0uX9VAUBQ7dTDCjq+wdyGLowMdtR/GoC2U5IYk/7D1H1JYC0qseD7+11iMP2mRLN9RcCMRcjC4YMclCzGwS/A==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/android-x64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.21.5.tgz", + "integrity": "sha512-D7aPRUUNHRBwHxzxRvp856rjUHRFW1SdQATKXH2hqA0kAZb1hKmi02OpYRacl0TxIGz/ZmXWlbZgjwWYaCakTA==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/darwin-arm64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.21.5.tgz", + "integrity": "sha512-DwqXqZyuk5AiWWf3UfLiRDJ5EDd49zg6O9wclZ7kUMv2WRFr4HKjXp/5t8JZ11QbQfUS6/cRCKGwYhtNAY88kQ==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/darwin-x64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.21.5.tgz", + "integrity": "sha512-se/JjF8NlmKVG4kNIuyWMV/22ZaerB+qaSi5MdrXtd6R08kvs2qCN4C09miupktDitvh8jRFflwGFBQcxZRjbw==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/freebsd-arm64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.21.5.tgz", + "integrity": "sha512-5JcRxxRDUJLX8JXp/wcBCy3pENnCgBR9bN6JsY4OmhfUtIHe3ZW0mawA7+RDAcMLrMIZaf03NlQiX9DGyB8h4g==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/freebsd-x64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.21.5.tgz", + "integrity": "sha512-J95kNBj1zkbMXtHVH29bBriQygMXqoVQOQYA+ISs0/2l3T9/kj42ow2mpqerRBxDJnmkUDCaQT/dfNXWX/ZZCQ==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-arm": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.21.5.tgz", + "integrity": "sha512-bPb5AHZtbeNGjCKVZ9UGqGwo8EUu4cLq68E95A53KlxAPRmUyYv2D6F0uUI65XisGOL1hBP5mTronbgo+0bFcA==", + "cpu": [ + "arm" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-arm64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.21.5.tgz", + "integrity": "sha512-ibKvmyYzKsBeX8d8I7MH/TMfWDXBF3db4qM6sy+7re0YXya+K1cem3on9XgdT2EQGMu4hQyZhan7TeQ8XkGp4Q==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-ia32": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.21.5.tgz", + "integrity": "sha512-YvjXDqLRqPDl2dvRODYmmhz4rPeVKYvppfGYKSNGdyZkA01046pLWyRKKI3ax8fbJoK5QbxblURkwK/MWY18Tg==", + "cpu": [ + "ia32" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-loong64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.21.5.tgz", + "integrity": "sha512-uHf1BmMG8qEvzdrzAqg2SIG/02+4/DHB6a9Kbya0XDvwDEKCoC8ZRWI5JJvNdUjtciBGFQ5PuBlpEOXQj+JQSg==", + "cpu": [ + "loong64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-mips64el": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.21.5.tgz", + "integrity": "sha512-IajOmO+KJK23bj52dFSNCMsz1QP1DqM6cwLUv3W1QwyxkyIWecfafnI555fvSGqEKwjMXVLokcV5ygHW5b3Jbg==", + "cpu": [ + "mips64el" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-ppc64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.21.5.tgz", + "integrity": "sha512-1hHV/Z4OEfMwpLO8rp7CvlhBDnjsC3CttJXIhBi+5Aj5r+MBvy4egg7wCbe//hSsT+RvDAG7s81tAvpL2XAE4w==", + "cpu": [ + "ppc64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-riscv64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.21.5.tgz", + "integrity": "sha512-2HdXDMd9GMgTGrPWnJzP2ALSokE/0O5HhTUvWIbD3YdjME8JwvSCnNGBnTThKGEB91OZhzrJ4qIIxk/SBmyDDA==", + "cpu": [ + "riscv64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-s390x": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.21.5.tgz", + "integrity": "sha512-zus5sxzqBJD3eXxwvjN1yQkRepANgxE9lgOW2qLnmr8ikMTphkjgXu1HR01K4FJg8h1kEEDAqDcZQtbrRnB41A==", + "cpu": [ + "s390x" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-x64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.21.5.tgz", + "integrity": "sha512-1rYdTpyv03iycF1+BhzrzQJCdOuAOtaqHTWJZCWvijKD2N5Xu0TtVC8/+1faWqcP9iBCWOmjmhoH94dH82BxPQ==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/netbsd-x64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.21.5.tgz", + "integrity": "sha512-Woi2MXzXjMULccIwMnLciyZH4nCIMpWQAs049KEeMvOcNADVxo0UBIQPfSmxB3CWKedngg7sWZdLvLczpe0tLg==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "netbsd" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/openbsd-x64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.21.5.tgz", + "integrity": "sha512-HLNNw99xsvx12lFBUwoT8EVCsSvRNDVxNpjZ7bPn947b8gJPzeHWyNVhFsaerc0n3TsbOINvRP2byTZ5LKezow==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "openbsd" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/sunos-x64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.21.5.tgz", + "integrity": "sha512-6+gjmFpfy0BHU5Tpptkuh8+uw3mnrvgs+dSPQXQOv3ekbordwnzTVEb4qnIvQcYXq6gzkyTnoZ9dZG+D4garKg==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "sunos" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/win32-arm64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.21.5.tgz", + "integrity": "sha512-Z0gOTd75VvXqyq7nsl93zwahcTROgqvuAcYDUr+vOv8uHhNSKROyU961kgtCD1e95IqPKSQKH7tBTslnS3tA8A==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/win32-ia32": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.21.5.tgz", + "integrity": "sha512-SWXFF1CL2RVNMaVs+BBClwtfZSvDgtL//G/smwAc5oVK/UPu2Gu9tIaRgFmYFFKrmg3SyAjSrElf0TiJ1v8fYA==", + "cpu": [ + "ia32" + ], + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/win32-x64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.21.5.tgz", + "integrity": "sha512-tQd/1efJuzPC6rCFwEvLtci/xNFcTZknmXs98FYDfGE4wP9ClFV98nyKrzJKVPMhdDnjzLhdUyMX4PsQAPjwIw==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@nodelib/fs.scandir": { + "version": "2.1.5", + "resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz", + "integrity": "sha512-vq24Bq3ym5HEQm2NKCr3yXDwjc7vTsEThRDnkp2DK9p1uqLR+DHurm/NOTo0KG7HYHU7eppKZj3MyqYuMBf62g==", + "license": "MIT", + "dependencies": { + "@nodelib/fs.stat": "2.0.5", + "run-parallel": "^1.1.9" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/@nodelib/fs.stat": { + "version": "2.0.5", + "resolved": "https://registry.npmjs.org/@nodelib/fs.stat/-/fs.stat-2.0.5.tgz", + "integrity": "sha512-RkhPPp2zrqDAQA/2jNhnztcPAlv64XdhIp7a7454A5ovI7Bukxgt7MX7udwAu3zg1DcpPU0rz3VV1SeaqvY4+A==", + "license": "MIT", + "engines": { + "node": ">= 8" + } + }, + "node_modules/@nodelib/fs.walk": { + "version": "1.2.8", + "resolved": "https://registry.npmjs.org/@nodelib/fs.walk/-/fs.walk-1.2.8.tgz", + "integrity": "sha512-oGB+UxlgWcgQkgwo8GcEGwemoTFt3FIO9ababBmaGwXIoBKZ+GTy0pP185beGg7Llih/NSHSV2XAs1lnznocSg==", + "license": "MIT", + "dependencies": { + "@nodelib/fs.scandir": "2.1.5", + "fastq": "^1.6.0" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/@rollup/rollup-android-arm-eabi": { + "version": "4.18.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.18.0.tgz", + "integrity": "sha512-Tya6xypR10giZV1XzxmH5wr25VcZSncG0pZIjfePT0OVBvqNEurzValetGNarVrGiq66EBVAFn15iYX4w6FKgQ==", + "cpu": [ + "arm" + ], + "license": "MIT", + "optional": true, + "os": [ + "android" + ] + }, + "node_modules/@rollup/rollup-android-arm64": { + "version": "4.18.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm64/-/rollup-android-arm64-4.18.0.tgz", + "integrity": "sha512-avCea0RAP03lTsDhEyfy+hpfr85KfyTctMADqHVhLAF3MlIkq83CP8UfAHUssgXTYd+6er6PaAhx/QGv4L1EiA==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "android" + ] + }, + "node_modules/@rollup/rollup-darwin-arm64": { + "version": "4.18.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-arm64/-/rollup-darwin-arm64-4.18.0.tgz", + "integrity": "sha512-IWfdwU7KDSm07Ty0PuA/W2JYoZ4iTj3TUQjkVsO/6U+4I1jN5lcR71ZEvRh52sDOERdnNhhHU57UITXz5jC1/w==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ] + }, + "node_modules/@rollup/rollup-darwin-x64": { + "version": "4.18.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-x64/-/rollup-darwin-x64-4.18.0.tgz", + "integrity": "sha512-n2LMsUz7Ynu7DoQrSQkBf8iNrjOGyPLrdSg802vk6XT3FtsgX6JbE8IHRvposskFm9SNxzkLYGSq9QdpLYpRNA==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ] + }, + "node_modules/@rollup/rollup-linux-arm-gnueabihf": { + "version": "4.18.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-gnueabihf/-/rollup-linux-arm-gnueabihf-4.18.0.tgz", + "integrity": "sha512-C/zbRYRXFjWvz9Z4haRxcTdnkPt1BtCkz+7RtBSuNmKzMzp3ZxdM28Mpccn6pt28/UWUCTXa+b0Mx1k3g6NOMA==", + "cpu": [ + "arm" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-arm-musleabihf": { + "version": "4.18.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-musleabihf/-/rollup-linux-arm-musleabihf-4.18.0.tgz", + "integrity": "sha512-l3m9ewPgjQSXrUMHg93vt0hYCGnrMOcUpTz6FLtbwljo2HluS4zTXFy2571YQbisTnfTKPZ01u/ukJdQTLGh9A==", + "cpu": [ + "arm" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-arm64-gnu": { + "version": "4.18.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-gnu/-/rollup-linux-arm64-gnu-4.18.0.tgz", + "integrity": "sha512-rJ5D47d8WD7J+7STKdCUAgmQk49xuFrRi9pZkWoRD1UeSMakbcepWXPF8ycChBoAqs1pb2wzvbY6Q33WmN2ftw==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-arm64-musl": { + "version": "4.18.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-musl/-/rollup-linux-arm64-musl-4.18.0.tgz", + "integrity": "sha512-be6Yx37b24ZwxQ+wOQXXLZqpq4jTckJhtGlWGZs68TgdKXJgw54lUUoFYrg6Zs/kjzAQwEwYbp8JxZVzZLRepQ==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-powerpc64le-gnu": { + "version": "4.18.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-powerpc64le-gnu/-/rollup-linux-powerpc64le-gnu-4.18.0.tgz", + "integrity": "sha512-hNVMQK+qrA9Todu9+wqrXOHxFiD5YmdEi3paj6vP02Kx1hjd2LLYR2eaN7DsEshg09+9uzWi2W18MJDlG0cxJA==", + "cpu": [ + "ppc64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-riscv64-gnu": { + "version": "4.18.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-gnu/-/rollup-linux-riscv64-gnu-4.18.0.tgz", + "integrity": "sha512-ROCM7i+m1NfdrsmvwSzoxp9HFtmKGHEqu5NNDiZWQtXLA8S5HBCkVvKAxJ8U+CVctHwV2Gb5VUaK7UAkzhDjlg==", + "cpu": [ + "riscv64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-s390x-gnu": { + "version": "4.18.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-s390x-gnu/-/rollup-linux-s390x-gnu-4.18.0.tgz", + "integrity": "sha512-0UyyRHyDN42QL+NbqevXIIUnKA47A+45WyasO+y2bGJ1mhQrfrtXUpTxCOrfxCR4esV3/RLYyucGVPiUsO8xjg==", + "cpu": [ + "s390x" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-x64-gnu": { + "version": "4.18.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-gnu/-/rollup-linux-x64-gnu-4.18.0.tgz", + "integrity": "sha512-xuglR2rBVHA5UsI8h8UbX4VJ470PtGCf5Vpswh7p2ukaqBGFTnsfzxUBetoWBWymHMxbIG0Cmx7Y9qDZzr648w==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-x64-musl": { + "version": "4.18.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-musl/-/rollup-linux-x64-musl-4.18.0.tgz", + "integrity": "sha512-LKaqQL9osY/ir2geuLVvRRs+utWUNilzdE90TpyoX0eNqPzWjRm14oMEE+YLve4k/NAqCdPkGYDaDF5Sw+xBfg==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-win32-arm64-msvc": { + "version": "4.18.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-arm64-msvc/-/rollup-win32-arm64-msvc-4.18.0.tgz", + "integrity": "sha512-7J6TkZQFGo9qBKH0pk2cEVSRhJbL6MtfWxth7Y5YmZs57Pi+4x6c2dStAUvaQkHQLnEQv1jzBUW43GvZW8OFqA==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "win32" + ] + }, + "node_modules/@rollup/rollup-win32-ia32-msvc": { + "version": "4.18.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-ia32-msvc/-/rollup-win32-ia32-msvc-4.18.0.tgz", + "integrity": "sha512-Txjh+IxBPbkUB9+SXZMpv+b/vnTEtFyfWZgJ6iyCmt2tdx0OF5WhFowLmnh8ENGNpfUlUZkdI//4IEmhwPieNg==", + "cpu": [ + "ia32" + ], + "license": "MIT", + "optional": true, + "os": [ + "win32" + ] + }, + "node_modules/@rollup/rollup-win32-x64-msvc": { + "version": "4.18.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.18.0.tgz", + "integrity": "sha512-UOo5FdvOL0+eIVTgS4tIdbW+TtnBLWg1YBCcU2KWM7nuNwRz9bksDX1bekJJCpu25N1DVWaCwnT39dVQxzqS8g==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "win32" + ] + }, + "node_modules/@types/estree": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.5.tgz", + "integrity": "sha512-/kYRxGDLWzHOB7q+wtSUQlFrtcdUccpfy+X+9iMBpHK8QLLhx2wIPYuS5DYtR9Wa/YlZAbIovy7qVdB1Aq6Lyw==", + "license": "MIT" + }, + "node_modules/anymatch": { + "version": "3.1.3", + "resolved": "https://registry.npmjs.org/anymatch/-/anymatch-3.1.3.tgz", + "integrity": "sha512-KMReFUr0B4t+D+OBkjR3KYqvocp2XaSzO55UcB6mgQMd3KbcE+mWTyvVV7D/zsdEbNnV6acZUutkiHQXvTr1Rw==", + "license": "ISC", + "dependencies": { + "normalize-path": "^3.0.0", + "picomatch": "^2.0.4" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/binary-extensions": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/binary-extensions/-/binary-extensions-2.3.0.tgz", + "integrity": "sha512-Ceh+7ox5qe7LJuLHoY0feh3pHuUDHAcRUeyL2VYghZwfpkNIy/+8Ocg0a3UuSoYzavmylwuLWQOf3hl0jjMMIw==", + "license": "MIT", + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/braces": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.3.tgz", + "integrity": "sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==", + "license": "MIT", + "dependencies": { + "fill-range": "^7.1.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/chokidar": { + "version": "3.6.0", + "resolved": "https://registry.npmjs.org/chokidar/-/chokidar-3.6.0.tgz", + "integrity": "sha512-7VT13fmjotKpGipCW9JEQAusEPE+Ei8nl6/g4FBAmIm0GOOLMua9NDDo/DWp0ZAxCr3cPq5ZpBqmPAQgDda2Pw==", + "license": "MIT", + "dependencies": { + "anymatch": "~3.1.2", + "braces": "~3.0.2", + "glob-parent": "~5.1.2", + "is-binary-path": "~2.1.0", + "is-glob": "~4.0.1", + "normalize-path": "~3.0.0", + "readdirp": "~3.6.0" + }, + "engines": { + "node": ">= 8.10.0" + }, + "funding": { + "url": "https://paulmillr.com/funding/" + }, + "optionalDependencies": { + "fsevents": "~2.3.2" + } + }, + "node_modules/esbuild": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.21.5.tgz", + "integrity": "sha512-mg3OPMV4hXywwpoDxu3Qda5xCKQi+vCTZq8S9J/EpkhB2HzKXq4SNFZE3+NK93JYxc8VMSep+lOUSC/RVKaBqw==", + "hasInstallScript": true, + "license": "MIT", + "bin": { + "esbuild": "bin/esbuild" + }, + "engines": { + "node": ">=12" + }, + "optionalDependencies": { + "@esbuild/aix-ppc64": "0.21.5", + "@esbuild/android-arm": "0.21.5", + "@esbuild/android-arm64": "0.21.5", + "@esbuild/android-x64": "0.21.5", + "@esbuild/darwin-arm64": "0.21.5", + "@esbuild/darwin-x64": "0.21.5", + "@esbuild/freebsd-arm64": "0.21.5", + "@esbuild/freebsd-x64": "0.21.5", + "@esbuild/linux-arm": "0.21.5", + "@esbuild/linux-arm64": "0.21.5", + "@esbuild/linux-ia32": "0.21.5", + "@esbuild/linux-loong64": "0.21.5", + "@esbuild/linux-mips64el": "0.21.5", + "@esbuild/linux-ppc64": "0.21.5", + "@esbuild/linux-riscv64": "0.21.5", + "@esbuild/linux-s390x": "0.21.5", + "@esbuild/linux-x64": "0.21.5", + "@esbuild/netbsd-x64": "0.21.5", + "@esbuild/openbsd-x64": "0.21.5", + "@esbuild/sunos-x64": "0.21.5", + "@esbuild/win32-arm64": "0.21.5", + "@esbuild/win32-ia32": "0.21.5", + "@esbuild/win32-x64": "0.21.5" + } + }, + "node_modules/fast-glob": { + "version": "3.3.2", + "resolved": "https://registry.npmjs.org/fast-glob/-/fast-glob-3.3.2.tgz", + "integrity": "sha512-oX2ruAFQwf/Orj8m737Y5adxDQO0LAB7/S5MnxCdTNDd4p6BsyIVsv9JQsATbTSq8KHRpLwIHbVlUNatxd+1Ow==", + "license": "MIT", + "dependencies": { + "@nodelib/fs.stat": "^2.0.2", + "@nodelib/fs.walk": "^1.2.3", + "glob-parent": "^5.1.2", + "merge2": "^1.3.0", + "micromatch": "^4.0.4" + }, + "engines": { + "node": ">=8.6.0" + } + }, + "node_modules/fastq": { + "version": "1.17.1", + "resolved": "https://registry.npmjs.org/fastq/-/fastq-1.17.1.tgz", + "integrity": "sha512-sRVD3lWVIXWg6By68ZN7vho9a1pQcN/WBFaAAsDDFzlJjvoGx0P8z7V1t72grFJfJhu3YPZBuu25f7Kaw2jN1w==", + "license": "ISC", + "dependencies": { + "reusify": "^1.0.4" + } + }, + "node_modules/fill-range": { + "version": "7.1.1", + "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz", + "integrity": "sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==", + "license": "MIT", + "dependencies": { + "to-regex-range": "^5.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/fs-extra": { + "version": "11.2.0", + "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-11.2.0.tgz", + "integrity": "sha512-PmDi3uwK5nFuXh7XDTlVnS17xJS7vW36is2+w3xcv8SVxiB4NyATf4ctkVY5bkSjX0Y4nbvZCq1/EjtEyr9ktw==", + "license": "MIT", + "dependencies": { + "graceful-fs": "^4.2.0", + "jsonfile": "^6.0.1", + "universalify": "^2.0.0" + }, + "engines": { + "node": ">=14.14" + } + }, + "node_modules/fsevents": { + "version": "2.3.3", + "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz", + "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==", + "hasInstallScript": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": "^8.16.0 || ^10.6.0 || >=11.0.0" + } + }, + "node_modules/glob-parent": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz", + "integrity": "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==", + "license": "ISC", + "dependencies": { + "is-glob": "^4.0.1" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/graceful-fs": { + "version": "4.2.11", + "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz", + "integrity": "sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==", + "license": "ISC" + }, + "node_modules/is-binary-path": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/is-binary-path/-/is-binary-path-2.1.0.tgz", + "integrity": "sha512-ZMERYes6pDydyuGidse7OsHxtbI7WVeUEozgR/g7rd0xUimYNlvZRE/K2MgZTjWy725IfelLeVcEM97mmtRGXw==", + "license": "MIT", + "dependencies": { + "binary-extensions": "^2.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/is-extglob": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz", + "integrity": "sha512-SbKbANkN603Vi4jEZv49LeVJMn4yGwsbzZworEoyEiutsN3nJYdbO36zfhGJ6QEDpOZIFkDtnq5JRxmvl3jsoQ==", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/is-glob": { + "version": "4.0.3", + "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-4.0.3.tgz", + "integrity": "sha512-xelSayHH36ZgE7ZWhli7pW34hNbNl8Ojv5KVmkJD4hBdD3th8Tfk9vYasLM+mXWOZhFkgZfxhLSnrwRr4elSSg==", + "license": "MIT", + "dependencies": { + "is-extglob": "^2.1.1" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/is-number": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/is-number/-/is-number-7.0.0.tgz", + "integrity": "sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==", + "license": "MIT", + "engines": { + "node": ">=0.12.0" + } + }, + "node_modules/jsonfile": { + "version": "6.1.0", + "resolved": "https://registry.npmjs.org/jsonfile/-/jsonfile-6.1.0.tgz", + "integrity": "sha512-5dgndWOriYSm5cnYaJNhalLNDKOqFwyDB/rr1E9ZsGciGvKPs8R2xYGCacuf3z6K1YKDz182fd+fY3cn3pMqXQ==", + "license": "MIT", + "dependencies": { + "universalify": "^2.0.0" + }, + "optionalDependencies": { + "graceful-fs": "^4.1.6" + } + }, + "node_modules/kiwi-nlp": { + "resolved": "../package", + "link": true + }, + "node_modules/merge2": { + "version": "1.4.1", + "resolved": "https://registry.npmjs.org/merge2/-/merge2-1.4.1.tgz", + "integrity": "sha512-8q7VEgMJW4J8tcfVPy8g09NcQwZdbwFEqhe/WZkoIzjn/3TGDwtOCYtXGxA3O8tPzpczCCDgv+P2P5y00ZJOOg==", + "license": "MIT", + "engines": { + "node": ">= 8" + } + }, + "node_modules/micromatch": { + "version": "4.0.7", + "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-4.0.7.tgz", + "integrity": "sha512-LPP/3KorzCwBxfeUuZmaR6bG2kdeHSbe0P2tY3FLRU4vYrjYz5hI4QZwV0njUx3jeuKe67YukQ1LSPZBKDqO/Q==", + "license": "MIT", + "dependencies": { + "braces": "^3.0.3", + "picomatch": "^2.3.1" + }, + "engines": { + "node": ">=8.6" + } + }, + "node_modules/nanoid": { + "version": "3.3.7", + "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.7.tgz", + "integrity": "sha512-eSRppjcPIatRIMC1U6UngP8XFcz8MQWGQdt1MTBQ7NaAmvXDfvNxbvWV3x2y6CdEUciCSsDHDQZbhYaB8QEo2g==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "license": "MIT", + "bin": { + "nanoid": "bin/nanoid.cjs" + }, + "engines": { + "node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1" + } + }, + "node_modules/normalize-path": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/normalize-path/-/normalize-path-3.0.0.tgz", + "integrity": "sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/picocolors": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.0.1.tgz", + "integrity": "sha512-anP1Z8qwhkbmu7MFP5iTt+wQKXgwzf7zTyGlcdzabySa9vd0Xt392U0rVmz9poOaBj0uHJKyyo9/upk0HrEQew==", + "license": "ISC" + }, + "node_modules/picomatch": { + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.1.tgz", + "integrity": "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==", + "license": "MIT", + "engines": { + "node": ">=8.6" + }, + "funding": { + "url": "https://github.com/sponsors/jonschlinkert" + } + }, + "node_modules/postcss": { + "version": "8.4.38", + "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.4.38.tgz", + "integrity": "sha512-Wglpdk03BSfXkHoQa3b/oulrotAkwrlLDRSOb9D0bN86FdRyE9lppSp33aHNPgBa0JKCoB+drFLZkQoRRYae5A==", + "funding": [ + { + "type": "opencollective", + "url": "https://opencollective.com/postcss/" + }, + { + "type": "tidelift", + "url": "https://tidelift.com/funding/github/npm/postcss" + }, + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "license": "MIT", + "dependencies": { + "nanoid": "^3.3.7", + "picocolors": "^1.0.0", + "source-map-js": "^1.2.0" + }, + "engines": { + "node": "^10 || ^12 || >=14" + } + }, + "node_modules/queue-microtask": { + "version": "1.2.3", + "resolved": "https://registry.npmjs.org/queue-microtask/-/queue-microtask-1.2.3.tgz", + "integrity": "sha512-NuaNSa6flKT5JaSYQzJok04JzTL1CA6aGhv5rfLW3PgqA+M2ChpZQnAC8h8i4ZFkBS8X5RqkDBHA7r4hej3K9A==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT" + }, + "node_modules/readdirp": { + "version": "3.6.0", + "resolved": "https://registry.npmjs.org/readdirp/-/readdirp-3.6.0.tgz", + "integrity": "sha512-hOS089on8RduqdbhvQ5Z37A0ESjsqz6qnRcffsMU3495FuTdqSm+7bhJ29JvIOsBDEEnan5DPu9t3To9VRlMzA==", + "license": "MIT", + "dependencies": { + "picomatch": "^2.2.1" + }, + "engines": { + "node": ">=8.10.0" + } + }, + "node_modules/reusify": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/reusify/-/reusify-1.0.4.tgz", + "integrity": "sha512-U9nH88a3fc/ekCF1l0/UP1IosiuIjyTh7hBvXVMHYgVcfGvt897Xguj2UOLDeI5BG2m7/uwyaLVT6fbtCwTyzw==", + "license": "MIT", + "engines": { + "iojs": ">=1.0.0", + "node": ">=0.10.0" + } + }, + "node_modules/rollup": { + "version": "4.18.0", + "resolved": "https://registry.npmjs.org/rollup/-/rollup-4.18.0.tgz", + "integrity": "sha512-QmJz14PX3rzbJCN1SG4Xe/bAAX2a6NpCP8ab2vfu2GiUr8AQcr2nCV/oEO3yneFarB67zk8ShlIyWb2LGTb3Sg==", + "license": "MIT", + "dependencies": { + "@types/estree": "1.0.5" + }, + "bin": { + "rollup": "dist/bin/rollup" + }, + "engines": { + "node": ">=18.0.0", + "npm": ">=8.0.0" + }, + "optionalDependencies": { + "@rollup/rollup-android-arm-eabi": "4.18.0", + "@rollup/rollup-android-arm64": "4.18.0", + "@rollup/rollup-darwin-arm64": "4.18.0", + "@rollup/rollup-darwin-x64": "4.18.0", + "@rollup/rollup-linux-arm-gnueabihf": "4.18.0", + "@rollup/rollup-linux-arm-musleabihf": "4.18.0", + "@rollup/rollup-linux-arm64-gnu": "4.18.0", + "@rollup/rollup-linux-arm64-musl": "4.18.0", + "@rollup/rollup-linux-powerpc64le-gnu": "4.18.0", + "@rollup/rollup-linux-riscv64-gnu": "4.18.0", + "@rollup/rollup-linux-s390x-gnu": "4.18.0", + "@rollup/rollup-linux-x64-gnu": "4.18.0", + "@rollup/rollup-linux-x64-musl": "4.18.0", + "@rollup/rollup-win32-arm64-msvc": "4.18.0", + "@rollup/rollup-win32-ia32-msvc": "4.18.0", + "@rollup/rollup-win32-x64-msvc": "4.18.0", + "fsevents": "~2.3.2" + } + }, + "node_modules/run-parallel": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/run-parallel/-/run-parallel-1.2.0.tgz", + "integrity": "sha512-5l4VyZR86LZ/lDxZTR6jqL8AFE2S0IFLMP26AbjsLVADxHdhB/c0GUsH+y39UfCi3dzz8OlQuPmnaJOMoDHQBA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT", + "dependencies": { + "queue-microtask": "^1.2.2" + } + }, + "node_modules/source-map-js": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.0.tgz", + "integrity": "sha512-itJW8lvSA0TXEphiRoawsCksnlf8SyvmFzIhltqAHluXd88pkCd+cXJVHTDwdCr0IzwptSm035IHQktUu1QUMg==", + "license": "BSD-3-Clause", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/to-regex-range": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz", + "integrity": "sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==", + "license": "MIT", + "dependencies": { + "is-number": "^7.0.0" + }, + "engines": { + "node": ">=8.0" + } + }, + "node_modules/typescript": { + "version": "5.4.5", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.4.5.tgz", + "integrity": "sha512-vcI4UpRgg81oIRUFwR0WSIHKt11nJ7SAVlYNIu+QpqeyXP+gpQJy/Z4+F0aGxSE4MqwjyXvW/TzgkLAx2AGHwQ==", + "dev": true, + "license": "Apache-2.0", + "bin": { + "tsc": "bin/tsc", + "tsserver": "bin/tsserver" + }, + "engines": { + "node": ">=14.17" + } + }, + "node_modules/universalify": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/universalify/-/universalify-2.0.1.tgz", + "integrity": "sha512-gptHNQghINnc/vTGIk0SOFGFNXw7JVrlRUtConJRlvaw6DuX0wO5Jeko9sWrMBhh+PsYAZ7oXAiOnf/UKogyiw==", + "license": "MIT", + "engines": { + "node": ">= 10.0.0" + } + }, + "node_modules/vite": { + "version": "5.3.1", + "resolved": "https://registry.npmjs.org/vite/-/vite-5.3.1.tgz", + "integrity": "sha512-XBmSKRLXLxiaPYamLv3/hnP/KXDai1NDexN0FpkTaZXTfycHvkRHoenpgl/fvuK/kPbB6xAgoyiryAhQNxYmAQ==", + "license": "MIT", + "dependencies": { + "esbuild": "^0.21.3", + "postcss": "^8.4.38", + "rollup": "^4.13.0" + }, + "bin": { + "vite": "bin/vite.js" + }, + "engines": { + "node": "^18.0.0 || >=20.0.0" + }, + "funding": { + "url": "https://github.com/vitejs/vite?sponsor=1" + }, + "optionalDependencies": { + "fsevents": "~2.3.3" + }, + "peerDependencies": { + "@types/node": "^18.0.0 || >=20.0.0", + "less": "*", + "lightningcss": "^1.21.0", + "sass": "*", + "stylus": "*", + "sugarss": "*", + "terser": "^5.4.0" + }, + "peerDependenciesMeta": { + "@types/node": { + "optional": true + }, + "less": { + "optional": true + }, + "lightningcss": { + "optional": true + }, + "sass": { + "optional": true + }, + "stylus": { + "optional": true + }, + "sugarss": { + "optional": true + }, + "terser": { + "optional": true + } + } + }, + "node_modules/vite-plugin-static-copy": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/vite-plugin-static-copy/-/vite-plugin-static-copy-1.0.5.tgz", + "integrity": "sha512-02k0Rox+buYdEOfeilKZSgs1gXfPf9RjVztZEIYZgVIxjsVZi6AXssjzdi+qW6zYt00d3bq+tpP2voVXN2fKLw==", + "license": "MIT", + "dependencies": { + "chokidar": "^3.5.3", + "fast-glob": "^3.2.11", + "fs-extra": "^11.1.0", + "picocolors": "^1.0.0" + }, + "engines": { + "node": "^18.0.0 || >=20.0.0" + }, + "peerDependencies": { + "vite": "^5.0.0" + } + } + } +} diff --git a/bindings/wasm/package-demo/package.json b/bindings/wasm/package-demo/package.json new file mode 100644 index 00000000..3539068c --- /dev/null +++ b/bindings/wasm/package-demo/package.json @@ -0,0 +1,19 @@ +{ + "name": "package-demo", + "private": true, + "version": "0.0.0", + "type": "module", + "scripts": { + "dev": "vite", + "build": "tsc && vite build", + "preview": "vite preview" + }, + "dependencies": { + "kiwi-nlp": "file:../package", + "vite-plugin-static-copy": "^1.0.5" + }, + "devDependencies": { + "typescript": "^5.2.2", + "vite": "^5.2.0" + } +} diff --git a/bindings/wasm/package-demo/public/logo.png b/bindings/wasm/package-demo/public/logo.png new file mode 100644 index 00000000..5ece1411 Binary files /dev/null and b/bindings/wasm/package-demo/public/logo.png differ diff --git a/bindings/wasm/package-demo/src/index.ts b/bindings/wasm/package-demo/src/index.ts new file mode 100644 index 00000000..35c0757e --- /dev/null +++ b/bindings/wasm/package-demo/src/index.ts @@ -0,0 +1,69 @@ +import { WorkerRequest, WorkerResponse } from './worker.js'; +import { TokenInfo } from 'kiwi-nlp'; + +const elVersion = document.getElementById('version')!; +const elInput = document.getElementById('input') as HTMLInputElement; +const elResultTable = document.getElementById('result') as HTMLTableElement; + +const worker = new Worker( + new URL('./worker.ts', import.meta.url), + { type: 'module' } +); + +worker.onmessage = (event) => { + const response: WorkerResponse = event.data; + + switch (response.type) { + case 'inited': + inited(response.version); + break; + case 'analyzed': + analyzed(response.result, response.text); + break; + default: + console.error('Unknown worker message'); + break; + } +}; + +function sendWorkerRequest(request: WorkerRequest) { + worker.postMessage(request); +} + +sendWorkerRequest({ type: 'init' }); + +function inited(version: string) { + elVersion.innerText = 'v' + version; + + elInput.hidden = false; + elInput.addEventListener('input', analyze); + analyze(); +} + +function analyze() { + const text = elInput.value; + worker.postMessage({ type: 'analyze', text }); +} + +function analyzed(tokenInfos: TokenInfo[], text: string) { + while (elResultTable.rows.length > 1) { + elResultTable.deleteRow(1); + } + + for (const tokenInfo of tokenInfos) { + const surface = text.substring( + tokenInfo.position, + tokenInfo.position + tokenInfo.length + ); + + const row = elResultTable.insertRow(); + row.insertCell().innerText = tokenInfo.position.toString(); + row.insertCell().innerText = tokenInfo.length.toString(); + row.insertCell().innerText = surface; + row.insertCell().innerText = tokenInfo.str; + row.insertCell().innerText = tokenInfo.tag; + row.insertCell().innerText = tokenInfo.score.toString(); + } + + elResultTable.hidden = tokenInfos.length === 0; +} diff --git a/bindings/wasm/package-demo/src/modelFiles.ts b/bindings/wasm/package-demo/src/modelFiles.ts new file mode 100644 index 00000000..0777d865 --- /dev/null +++ b/bindings/wasm/package-demo/src/modelFiles.ts @@ -0,0 +1,14 @@ +export const requiredModelFiles = [ + 'combiningRule.txt', + 'default.dict', + 'extract.mdl', + 'multi.dict', + 'sj.knlm', + 'sj.morph', + 'skipbigram.mdl', + 'typo.dict', +]; + +export const modelFiles = Object.fromEntries( + requiredModelFiles.map((f) => [f, '/model/' + f]) +); diff --git a/bindings/wasm/package-demo/src/style.css b/bindings/wasm/package-demo/src/style.css new file mode 100644 index 00000000..eae743e3 --- /dev/null +++ b/bindings/wasm/package-demo/src/style.css @@ -0,0 +1,77 @@ +* { + box-sizing: border-box; +} + +html { + display: flex; + justify-content: center; +} + +body { + width: min(100%, 800px); + padding: 1rem; + + background-color: #ffffff; + color: #000000; + + font-family: "Noto Sans KR", sans-serif; + font-optical-sizing: auto; + font-weight: 400; + font-style: normal; + + display: flex; + flex-direction: column; + gap: 1rem; + +} + +.title-wrap { + display: flex; + flex-direction: row; + gap: 1rem; + align-items: center; + justify-content: space-between; +} + +.logo { + width: 15rem; +} + +#input { + width: 100%; + padding: 0.5rem; + font-size: 1rem; + + appearance: none; + border: 1px solid #ccc; + border-radius: 0.25rem; +} + +#input:focus { + outline: 2px solid #0078d4; +} + +#result { + width: 100%; + border-collapse: collapse; + border-style: hidden; + border-radius: 0.25rem; + box-shadow: 0 0 0 1px #ccc; + padding: 0; +} + +#result tr:not(:first-child) { + border-top: 1px solid #ccc; +} + +#result td, +#result th { + padding: 0.5rem; + border: none; +} + +#result th { + text-align: left; + font-weight: 400; + background-color: #f0f0f0; +} diff --git a/bindings/wasm/package-demo/src/vite-env.d.ts b/bindings/wasm/package-demo/src/vite-env.d.ts new file mode 100644 index 00000000..11f02fe2 --- /dev/null +++ b/bindings/wasm/package-demo/src/vite-env.d.ts @@ -0,0 +1 @@ +/// diff --git a/bindings/wasm/package-demo/src/worker.ts b/bindings/wasm/package-demo/src/worker.ts new file mode 100644 index 00000000..5ce70ab5 --- /dev/null +++ b/bindings/wasm/package-demo/src/worker.ts @@ -0,0 +1,50 @@ +import { KiwiBuilder, Kiwi, TokenInfo } from 'kiwi-nlp'; +import kiwiWasmPath from 'kiwi-nlp/dist/kiwi-wasm.wasm?url'; +import { modelFiles } from './modelFiles'; + +let kiwiBuilder: KiwiBuilder | null = null; +let kiwi: Kiwi | null = null; +let version: string | null = null; + +export type WorkerRequest = + | { type: 'init' } + | { type: 'analyze'; text: string }; +export type WorkerResponse = + | { type: 'inited'; version: string } + | { type: 'analyzed'; result: TokenInfo[]; text: string }; + +function sendResponse(response: WorkerResponse) { + self.postMessage(response); +} + +async function init() { + kiwiBuilder = await KiwiBuilder.create(kiwiWasmPath); + version = kiwiBuilder.version(); + kiwi = await kiwiBuilder.build({ modelFiles }); + + sendResponse({ type: 'inited', version }); +} + +function analyze(text: string) { + if (!kiwi) { + throw new Error('Kiwi is not initialized'); + } + + const result = kiwi.tokenize(text); + sendResponse({ type: 'analyzed', result, text }); +} + +self.onmessage = (event) => { + const request: WorkerRequest = event.data; + + switch (request.type) { + case 'init': + init(); + break; + case 'analyze': + analyze(request.text); + break; + default: + throw new Error('Unknown request type'); + } +}; diff --git a/bindings/wasm/package-demo/tsconfig.json b/bindings/wasm/package-demo/tsconfig.json new file mode 100644 index 00000000..a1b39a70 --- /dev/null +++ b/bindings/wasm/package-demo/tsconfig.json @@ -0,0 +1,23 @@ +{ + "compilerOptions": { + "target": "ES2020", + "useDefineForClassFields": true, + "module": "ESNext", + "lib": ["ES2020", "DOM", "DOM.Iterable"], + "skipLibCheck": true, + + /* Bundler mode */ + "moduleResolution": "bundler", + "allowImportingTsExtensions": true, + "resolveJsonModule": true, + "isolatedModules": true, + "noEmit": true, + + /* Linting */ + "strict": true, + "noUnusedLocals": true, + "noUnusedParameters": true, + "noFallthroughCasesInSwitch": true + }, + "include": ["src"] +} diff --git a/bindings/wasm/package-demo/vite.config.ts b/bindings/wasm/package-demo/vite.config.ts new file mode 100644 index 00000000..66cbb960 --- /dev/null +++ b/bindings/wasm/package-demo/vite.config.ts @@ -0,0 +1,14 @@ +import { defineConfig } from 'vite'; +import { viteStaticCopy } from 'vite-plugin-static-copy'; +import { requiredModelFiles } from './src/modelFiles'; + +const targets = requiredModelFiles.map((file) => ({ + src: '../../../ModelGenerator/' + file, + dest: 'model', +})); + +export default defineConfig({ + plugins: [ + viteStaticCopy({ targets }), + ], +}); diff --git a/bindings/wasm/package/.gitignore b/bindings/wasm/package/.gitignore new file mode 100644 index 00000000..1a1302e8 --- /dev/null +++ b/bindings/wasm/package/.gitignore @@ -0,0 +1,4 @@ +node_modules +dist +build +doc diff --git a/bindings/wasm/package/package-lock.json b/bindings/wasm/package/package-lock.json new file mode 100644 index 00000000..404fd32a --- /dev/null +++ b/bindings/wasm/package/package-lock.json @@ -0,0 +1,196 @@ +{ + "name": "kiwi-nlp", + "version": "0.17.1", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "kiwi-nlp", + "version": "0.17.1", + "license": "LGPL-2.1-or-later", + "devDependencies": { + "typedoc": "^0.26.2", + "typescript": "^5.4.5" + } + }, + "node_modules/@shikijs/core": { + "version": "1.9.0", + "resolved": "https://registry.npmjs.org/@shikijs/core/-/core-1.9.0.tgz", + "integrity": "sha512-cbSoY8P/jgGByG8UOl3jnP/CWg/Qk+1q+eAKWtcrU3pNoILF8wTsLB0jT44qUBV8Ce1SvA9uqcM9Xf+u3fJFBw==", + "dev": true, + "license": "MIT" + }, + "node_modules/argparse": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz", + "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==", + "dev": true, + "license": "Python-2.0" + }, + "node_modules/balanced-match": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", + "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==", + "dev": true, + "license": "MIT" + }, + "node_modules/brace-expansion": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz", + "integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==", + "dev": true, + "license": "MIT", + "dependencies": { + "balanced-match": "^1.0.0" + } + }, + "node_modules/entities": { + "version": "4.5.0", + "resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz", + "integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==", + "dev": true, + "license": "BSD-2-Clause", + "engines": { + "node": ">=0.12" + }, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, + "node_modules/linkify-it": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/linkify-it/-/linkify-it-5.0.0.tgz", + "integrity": "sha512-5aHCbzQRADcdP+ATqnDuhhJ/MRIqDkZX5pyjFHRRysS8vZ5AbqGEoFIb6pYHPZ+L/OC2Lc+xT8uHVVR5CAK/wQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "uc.micro": "^2.0.0" + } + }, + "node_modules/lunr": { + "version": "2.3.9", + "resolved": "https://registry.npmjs.org/lunr/-/lunr-2.3.9.tgz", + "integrity": "sha512-zTU3DaZaF3Rt9rhN3uBMGQD3dD2/vFQqnvZCDv4dl5iOzq2IZQqTxu90r4E5J+nP70J3ilqVCrbho2eWaeW8Ow==", + "dev": true, + "license": "MIT" + }, + "node_modules/markdown-it": { + "version": "14.1.0", + "resolved": "https://registry.npmjs.org/markdown-it/-/markdown-it-14.1.0.tgz", + "integrity": "sha512-a54IwgWPaeBCAAsv13YgmALOF1elABB08FxO9i+r4VFk5Vl4pKokRPeX8u5TCgSsPi6ec1otfLjdOpVcgbpshg==", + "dev": true, + "license": "MIT", + "dependencies": { + "argparse": "^2.0.1", + "entities": "^4.4.0", + "linkify-it": "^5.0.0", + "mdurl": "^2.0.0", + "punycode.js": "^2.3.1", + "uc.micro": "^2.1.0" + }, + "bin": { + "markdown-it": "bin/markdown-it.mjs" + } + }, + "node_modules/mdurl": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/mdurl/-/mdurl-2.0.0.tgz", + "integrity": "sha512-Lf+9+2r+Tdp5wXDXC4PcIBjTDtq4UKjCPMQhKIuzpJNW0b96kVqSwW0bT7FhRSfmAiFYgP+SCRvdrDozfh0U5w==", + "dev": true, + "license": "MIT" + }, + "node_modules/minimatch": { + "version": "9.0.4", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.4.tgz", + "integrity": "sha512-KqWh+VchfxcMNRAJjj2tnsSJdNbHsVgnkBhTNrW7AjVo6OvLtxw8zfT9oLw1JSohlFzJ8jCoTgaoXvJ+kHt6fw==", + "dev": true, + "license": "ISC", + "dependencies": { + "brace-expansion": "^2.0.1" + }, + "engines": { + "node": ">=16 || 14 >=14.17" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/punycode.js": { + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/punycode.js/-/punycode.js-2.3.1.tgz", + "integrity": "sha512-uxFIHU0YlHYhDQtV4R9J6a52SLx28BCjT+4ieh7IGbgwVJWO+km431c4yRlREUAsAmt/uMjQUyQHNEPf0M39CA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6" + } + }, + "node_modules/shiki": { + "version": "1.9.0", + "resolved": "https://registry.npmjs.org/shiki/-/shiki-1.9.0.tgz", + "integrity": "sha512-i6//Lqgn7+7nZA0qVjoYH0085YdNk4MC+tJV4bo+HgjgRMJ0JmkLZzFAuvVioJqLkcGDK5GAMpghZEZkCnwxpQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@shikijs/core": "1.9.0" + } + }, + "node_modules/typedoc": { + "version": "0.26.2", + "resolved": "https://registry.npmjs.org/typedoc/-/typedoc-0.26.2.tgz", + "integrity": "sha512-q/t+M+PZqhN9gPWLBZ3CCvP+KT8O1tyYkSzEYbcQ6mo89avdIrMlBEl3vfo5BgSzwC6Lbmq0W64E8RkY+eVsLA==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "lunr": "^2.3.9", + "markdown-it": "^14.1.0", + "minimatch": "^9.0.4", + "shiki": "^1.9.0", + "yaml": "^2.4.5" + }, + "bin": { + "typedoc": "bin/typedoc" + }, + "engines": { + "node": ">= 18" + }, + "peerDependencies": { + "typescript": "4.6.x || 4.7.x || 4.8.x || 4.9.x || 5.0.x || 5.1.x || 5.2.x || 5.3.x || 5.4.x || 5.5.x" + } + }, + "node_modules/typescript": { + "version": "5.4.5", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.4.5.tgz", + "integrity": "sha512-vcI4UpRgg81oIRUFwR0WSIHKt11nJ7SAVlYNIu+QpqeyXP+gpQJy/Z4+F0aGxSE4MqwjyXvW/TzgkLAx2AGHwQ==", + "dev": true, + "license": "Apache-2.0", + "bin": { + "tsc": "bin/tsc", + "tsserver": "bin/tsserver" + }, + "engines": { + "node": ">=14.17" + } + }, + "node_modules/uc.micro": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/uc.micro/-/uc.micro-2.1.0.tgz", + "integrity": "sha512-ARDJmphmdvUk6Glw7y9DQ2bFkKBHwQHLi2lsaH6PPmz/Ka9sFOBsBluozhDltWmnv9u/cF6Rt87znRTPV+yp/A==", + "dev": true, + "license": "MIT" + }, + "node_modules/yaml": { + "version": "2.4.5", + "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.4.5.tgz", + "integrity": "sha512-aBx2bnqDzVOyNKfsysjA2ms5ZlnjSAW2eG3/L5G/CSujfjLJTJsEw1bGw8kCf04KodQWk1pxlGnZ56CRxiawmg==", + "dev": true, + "license": "ISC", + "bin": { + "yaml": "bin.mjs" + }, + "engines": { + "node": ">= 14" + } + } + } +} diff --git a/bindings/wasm/package/package.json b/bindings/wasm/package/package.json new file mode 100644 index 00000000..207402d1 --- /dev/null +++ b/bindings/wasm/package/package.json @@ -0,0 +1,32 @@ +{ + "name": "kiwi-nlp", + "version": "0.17.1", + "description": "Kiwi, Korean Intelligent Word Identifier", + "repository": { + "type": "git", + "url": "git+https://github.com/bab2min/Kiwi.git" + }, + "main": "dist/index.js", + "files": [ + "dist" + ], + "keywords": [ + "korean", + "nlp", + "tokenizer" + ], + "author": "bab2min (lab.bab2min.pe.kr)", + "license": "LGPL-2.1-or-later", + "bugs": { + "url": "https://github.com/bab2min/Kiwi/issues" + }, + "homepage": "https://lab.bab2min.pe.kr/kiwi", + "devDependencies": { + "typedoc": "^0.26.2", + "typescript": "^5.4.5" + }, + "scripts": { + "build": "tsc", + "doc": "typedoc --out doc src" + } +} diff --git a/bindings/wasm/package/src/build-args.ts b/bindings/wasm/package/src/build-args.ts new file mode 100644 index 00000000..75316d4b --- /dev/null +++ b/bindings/wasm/package/src/build-args.ts @@ -0,0 +1,157 @@ +/** + * A single file to be loaded. The key is the name of the file and the value is the file data. + * The file data can be a string representing a URL or an ArrayBufferView directly containing the file data. + */ +export type ModelFiles = { [name: string]: ArrayBufferView | string }; + +/** + * A single user word to add. + */ +export interface UserWord { + /** + * The word to add. + */ + word: string; + /** + * Part-of-speech tag. Defaults to 'NNP'. + */ + tag?: string; + /** + * The weighted score of the morpheme to add. + * If there are multiple morpheme combinations that match the form,the word with the higher score will be prioritized. + * Defaults to 0. + */ + score?: number; + /** + * The original morpheme of the morpheme to be added. + * If the morpheme to be added is a variant of a particular morpheme, the original morpheme can be passed as this argument. + * If it is not present, it can be omitted. + */ + origWord?: string; +}; + +export interface PreanalyzedToken { + /** + * Form of the token. + */ + form: string; + /** + * Part-of-speech tag of the token. + */ + tag: string; + /** + * Start position of the token in the preanalyzed word. If omitted, all token positions are automatically calculated. + */ + start?: number; + /** + * Ebd position of the token in the preanalyzed word. If omitted, all token positions are automatically calculated. + */ + end?: number; +} + +export interface PreanalyzedWord { + /** + * Form to add. + */ + form: string; + /** + * The result of the morphological analysis of form. + */ + analyzed: PreanalyzedToken[]; + /** + * The weighted score of the morpheme sequence to add. + * If there are multiple morpheme combinations that match the form, the word with the higher score will be prioritized. + */ + score?: number; +} + +export interface TypoDefinition { + /** + * Source strings + */ + orig: string[]; + /** + * The typos to be replaced + */ + error: string[]; + /** + * Replacement cost. Defaults to 1. + */ + cost?: number; + /** + * Conditions under which typos can be replaced. + * One of `none`, `any` (after any letter), `vowel` (after a vowel), or `applosive` (after an applosive). + * Defaults to `none` when omitted. + */ + condition?: "none" | "any" | "vowel" | "applosive"; +} + +export interface TypoTransformer { + /** + * A list of {@link TypoDefinition} that define typo generation rules. + */ + defs: TypoDefinition[]; + /** + * The cost of continual typos. Defaults to 1. + */ + continualTypoCost?: number; +} + +export interface BuildArgs { + /** + * The model files to load. Required. + */ + modelFiles: ModelFiles; + /** + * If `true`, unify phonological variants. + * Outputs endings that change form depending on the positivity/negativity of the preceding vowel, such as /아/ and /어/ or /았/ and /었/, as one. + * Defaults to `true` + */ + integrateAllomorph?: boolean; + /** + * If `true`, the default dictionary is loaded. + * The default dictionary consists of proper noun headings extracted from Wikipedia and Namuwiki. + * Defaults to `true`. + */ + loadDefaultDict?: boolean; + /** + * If true, the built-in typo dictionary is loaded. + * The typo dictionary consists of a subset of common misspellings and variant endings that are commonly used on the internet. + * Defaults to `true`. + */ + loadTypoDict?: boolean; + /** + * If `true`, the built-in polysemous dictionary is loaded. + * The polysemous dictionary consists of proper nouns listed in WikiData. + * Defaults to `true`. + */ + loadMultiDict?: boolean; + /** + * Additional user dictionaries to load. Used files must appear in the `modelFiles` object. + */ + userDicts?: string[]; + /** + * Additional user words to load. + */ + userWords?: UserWord[]; + /** + * Preanalyzed words to load. + */ + preanalyzedWords?: PreanalyzedWord[]; + /** + * Specifies the language model to use for morphological analysis. Defaults to 'knlm'. + * - `knlm`: Fast and can model the relationships between morphemes within a short distance (usually two or three) with high accuracy. However, it has the limitation that it cannot take into account the relationships between morphemes over a long distance. + * - `sbg`: Driven by internally calibrating the results of SkipBigram to the results of KNLM. At a processing time increase of about 30% compared to KNLM, it is able to model relationships between morphemes over large distances (up to 8 real morphemes) with moderate accuracy. + */ + modelType?: 'knlm' | 'sbg'; + /** + * The typo information to use for correction. + * Can be one of the built in `none`, `basic`, `continual`, `basicWithContinual` typo sets, or a custom {@link TypoTransformer}. + * Defaults to `none`, which disables typo correction. + */ + typos?: 'none' | 'basic' | 'continual' | 'basicWithContinual' | TypoTransformer; + /** + * The maximum typo cost to consider when correcting typos. Typos beyond this cost will not be explored. Defaults to 2.5. + */ + typoCostThreshold?: number; +}; diff --git a/bindings/wasm/package/src/index.ts b/bindings/wasm/package/src/index.ts new file mode 100644 index 00000000..a8f19f2e --- /dev/null +++ b/bindings/wasm/package/src/index.ts @@ -0,0 +1,3 @@ +export * from './build-args.js'; +export * from './kiwi-builder.js'; +export * from './kiwi.js'; diff --git a/bindings/wasm/package/src/kiwi-api.ts b/bindings/wasm/package/src/kiwi-api.ts new file mode 100644 index 00000000..52506dc2 --- /dev/null +++ b/bindings/wasm/package/src/kiwi-api.ts @@ -0,0 +1,16 @@ +import { ModelFiles } from "./build-args"; + +interface LoadModelFilesResult { + unload: () => Promise; + modelPath: string; +} + +export interface KiwiApi { + cmd: (args: any) => any; + loadModelFiles: (files: ModelFiles) => Promise; +} + +export interface KiwiApiAsync { + cmd: (...args: any) => Promise; + loadFiles: (files: ModelFiles) => Promise; +} diff --git a/bindings/wasm/package/src/kiwi-builder.ts b/bindings/wasm/package/src/kiwi-builder.ts new file mode 100644 index 00000000..9d86af85 --- /dev/null +++ b/bindings/wasm/package/src/kiwi-builder.ts @@ -0,0 +1,131 @@ +import initKiwi from './build/kiwi-wasm.js'; +import { KiwiApi } from './kiwi-api.js'; +import { Kiwi } from './kiwi.js'; +import { BuildArgs } from './build-args.js'; + +async function createKiwiApi(wasmPath: string): Promise { + const kiwi = await initKiwi({ + locateFile: (path) => { + if (path.endsWith('.wasm')) { + return wasmPath; + } + return path; + }, + }); + + return { + cmd: (args: any) => { + return JSON.parse(kiwi.api(JSON.stringify(args))); + }, + loadModelFiles: async (files) => { + const modelPath = + Math.random().toString(36).substring(2) + Date.now(); + + kiwi.FS.mkdir(modelPath); + + const fileEntries = Object.entries(files); + + await Promise.all( + fileEntries.map(async ([name, data]) => { + const path = modelPath + '/' + name; + + if (typeof data === 'string') { + const response = await fetch(data); + + if (!response.ok) { + throw new Error(`Failed to fetch file: ${data}`); + } + + const fetchData = await response.arrayBuffer(); + + kiwi.FS.writeFile(path, new Uint8Array(fetchData)); + } else { + kiwi.FS.writeFile(path, data); + } + }) + ); + + return { + unload: async () => { + kiwi.FS.rmdir(modelPath); + }, + modelPath, + }; + }, + }; +} + +/** + * Used to create Kiwi instances. Main entry point for the API. + * It is recommended to create a KiwiBuilder and the Kiwi instances in a worker to prevent blocking the main thread. + */ +export class KiwiBuilder { + private api: KiwiApi; + + private constructor(api: KiwiApi) { + this.api = api; + } + + /** + * Creates a new KiwiBuilder instance. This internally loads the wasm file. + * @param wasmPath Path to the kiwi-wasm.wasm file. This is located at `/dist/kiwi-wasm.wasm` in the npm package. + * It is up to the user to serve this file. See the `package-demo` project for an example of how to include this file as a static asset with vite. + */ + static async create(wasmPath: string): Promise { + const api = await createKiwiApi(wasmPath); + return new KiwiBuilder(api); + } + + /** + * Creates a new Kiwi instance. + * Note: Even though this method is async, the construction of the Kiwi instance happens in the same + * JavaScript context. This means that this method can hang your application if not called in a worker. + * @param buildArgs Arguments for building the Kiwi instance. See {@link BuildArgs} for more information. + * @returns a {@link Kiwi} instance that is ready for morphological analysis. + */ + public async build(buildArgs: BuildArgs): Promise { + const modelFiles = buildArgs.modelFiles; + const loadResult = await this.api.loadModelFiles(modelFiles); + + const apiBuildArgs = { + ...buildArgs, + modelPath: loadResult.modelPath, + }; + apiBuildArgs.modelFiles = undefined; + if (apiBuildArgs.userDicts) { + apiBuildArgs.userDicts = apiBuildArgs.userDicts.map( + (path) => loadResult.modelPath + '/' + path + ); + } + + const id = this.api.cmd({ + method: 'build', + args: [apiBuildArgs], + }) as number; + + return new Proxy({}, { + get: (_target, prop) => { + // prevent recursive promise resolution + if (prop === 'then') { + return undefined; + } + + return (...methodArgs: any[]) => { + return this.api.cmd({ + method: prop.toString(), + id, + args: methodArgs, + }); + }; + }, + }) as Kiwi; + } + + /** + * Get the version of the Kiwi wasm module. + * @returns The version of the Kiwi wasm module. + */ + version(): string { + return this.api.cmd({ method: 'version', args: [] }) as string; + } +} diff --git a/bindings/wasm/package/src/kiwi.ts b/bindings/wasm/package/src/kiwi.ts new file mode 100644 index 00000000..c42aef3c --- /dev/null +++ b/bindings/wasm/package/src/kiwi.ts @@ -0,0 +1,287 @@ +import { AsyncMethods } from './util.js'; + +/** + * Describes a single morpheme in the input string of the morphological analysis. + */ +export interface TokenInfo { + /** + * The form of the morpheme. + */ + str: string; + /** + * The start position in the input string. + */ + position: number; + /** + * Word index in the input string (space based). + */ + wordPosition: number; + /** + * Sentence index in the input string. + */ + sentPosition: number; + /** + * Line index in the input string. + */ + lineNumber: number; + /** + * Length of the morpheme in the input string. + */ + length: number; + /** + * Part of speech tag of the morpheme. + */ + tag: string; + /** + * Language model score of the morpheme. + */ + score: number; + /** + * Cost of the typo that was corrected. If no typo correction was performed, this value is 0. + */ + typoCost: number; + /** + * Typo correction form if typo correction was performed. Id of pretokenized span if no typo correction was performed. + */ + typoFormId: number; + /** + * For morphemes belonging to SSO, SSC part of speech tags, the position of the paired morpheme (-1 means no corresponding morpheme). + */ + pairedToken: number; + /** + * The index of the sub-sentence enclosed in quotation marks or parentheses. Starts at 1. A value of 0 indicates that it is not a subordinate sentence. + */ + subSentPosition: number; + /** + * The id of the morpheme information in the used Kiwi object. -1 indicates OOV. + */ + morphId: number; + +} + +export interface TokenResult { + /** + * Array of `TokenInfo` objects representing the morphemes in the input string. + */ + tokens: TokenInfo[]; + /** + * The score of the morphological analysis result. + */ + score: number; +} + +/** + * Describes matching options when performing morphological analysis. + * These options can be combined using the bitwise OR operator. + */ +export enum Match { + none = 0, + url = 1 << 0, + email = 1 << 1, + hashtag = 1 << 2, + mention = 1 << 3, + serial = 1 << 4, + emoji = 1 << 5, + normalizeCoda = 1 << 16, + joinNounPrefix = 1 << 17, + joinNounSuffix = 1 << 18, + joinVerbSuffix = 1 << 19, + joinAdjSuffix = 1 << 20, + joinAdvSuffix = 1 << 21, + splitComplex = 1 << 22, + zCoda = 1 << 23, + joinVSuffix = joinVerbSuffix | joinAdjSuffix, + joinAffix = joinNounPrefix | + joinNounSuffix | + joinVerbSuffix | + joinAdjSuffix | + joinAdvSuffix, + all = url | email | hashtag | mention | serial | emoji | zCoda, + allWithNormalizing = all | normalizeCoda, +} + +export interface SentenceSpan { + start: number; + end: number; +} + +export interface SentenceSplitResult { + /** + * Array of `SentenceSpan` objects representing the start and end positions of each sentence. + */ + spans: SentenceSpan[]; + /** + * Array of `TokenResult` objects representing the morphological analysis result of the input string. + */ + tokenResult: TokenResult | null; +} + +export enum Space { + none = 0, + noSpace = 1, + insertSpace = 2, +} + +export interface Morph { + form: string; + tag: string; +} + +export interface SentenceJoinMorph extends Morph { + space?: Space; +} + +export interface SentenceJoinResult { + str: string; + ranges: SentenceSpan[] | null; +} + +export type MorphemeSet = number; + +export interface PretokenizedToken extends Morph { + start: number; + end: number; +} + +export interface PretokenizedSpan { + start: number; + end: number; + tokenization: PretokenizedToken[]; +} + +/** + * Interface that performs the actual morphological analysis. + * Cannot be constructed directly, use {@link KiwiBuilder} to create a new instance. + */ +export interface Kiwi { + /** + * Tells whether the current Kiwi object is ready to perform morphological analysis. + * @returns `true` if it is ready for morphological analysis. + */ + ready: () => boolean; + /** + * Tells you if the current Kiwi object was created with typo correction turned on. + * @returns `true` if typo correction is turned on. + */ + isTypoTolerant: () => boolean; + /** + * Performs morphological analysis. Returns a single list of tokens along with an analysis score. Use `tokenize` if the result score is not needed. Use `analyzeTopN` if you need multiple results. + * @param str String to analyze + * @param matchOptions Specifies the special string pattern extracted. This can be set to any combination of `Match` by using the bitwise OR operator. + * @param blockList Specifies a list of morphemes to prohibit from appearing as candidates in the analysis. + * @param pretokenized Predefines the result of morphological analysis of a specific segment of text prior to morphological analysis. The section of text defined by this value will always be tokenized in that way only. + * @returns A single `TokenResult` object. + */ + analyze: ( + str: string, + matchOptions?: Match, + blockList?: Morph[] | MorphemeSet, + pretokenized?: PretokenizedSpan[] + ) => TokenResult; + /** + * Performs morphological analysis. Returns multiple list of tokens along with an analysis score. Use `tokenizeTopN` if the result scores are not needed. Use `analyze` if you need only one result. + * @param str String to analyze + * @param n Number of results to return + * @param matchOptions Specifies the special string pattern extracted. This can be set to any combination of `Match` by using the bitwise OR operator. + * @param blockList Specifies a list of morphemes to prohibit from appearing as candidates in the analysis. + * @param pretokenized Predefines the result of morphological analysis of a specific segment of text prior to morphological analysis. The section of text defined by this value will always be tokenized in that way only. + * @returns A list of `TokenResult` objects. + */ + analyzeTopN: ( + str: string, + n: number, + matchOptions?: Match, + blockList?: Morph[] | MorphemeSet, + pretokenized?: PretokenizedSpan[] + ) => TokenResult[]; + /** + * Performs morphological analysis. Returns a single list of tokens. Use `analyze` if the result score is needed. Use `tokenizeTopN` if you need multiple results. + * @param str String to analyze + * @param matchOptions Specifies the special string pattern extracted. This can be set to any combination of `Match` by using the bitwise OR operator. + * @param blockList Specifies a list of morphemes to prohibit from appearing as candidates in the analysis. + * @param pretokenized Predefines the result of morphological analysis of a specific segment of text prior to morphological analysis. The section of text defined by this value will always be tokenized in that way only. + * @returns A list of `TokenInfo` object. + */ + tokenize: ( + str: string, + matchOptions?: Match, + blockList?: Morph[] | MorphemeSet, + pretokenized?: PretokenizedSpan[] + ) => TokenInfo[]; + /** + * Performs morphological analysis. Returns multiple lists of tokens. Use `analyzeTopN` if the result scores are needed. Use `tokenize` if you need only one result. + * @param str String to analyze + * @param n Number of results to return + * @param matchOptions Specifies the special string pattern extracted. This can be set to any combination of `Match` by using the bitwise OR operator. + * @param blockList Specifies a list of morphemes to prohibit from appearing as candidates in the analysis. + * @param pretokenized Predefines the result of morphological analysis of a specific segment of text prior to morphological analysis. The section of text defined by this value will always be tokenized in that way only. + * @returns A list of lists of `TokenInfo` objects. + */ + tokenizeTopN: ( + str: string, + n: number, + matchOptions?: Match, + blockList?: Morph[] | MorphemeSet, + pretokenized?: PretokenizedSpan[] + ) => TokenInfo[][]; + /** + * Returns the input text split into sentences. This method uses stemming internally during the sentence splitting process, so it can also be used to get stemming results simultaneously with sentence splitting. + * @param str String to split + * @param matchOptions Specifies the special string pattern extracted. This can be set to any combination of `Match` by using the bitwise OR operator. + * @param withTokenResult Specifies whether to include the result of morphological analysis in the returned `SentenceSplitResult` object. + * @returns A `SentenceSplitResult` object. + */ + splitIntoSents: ( + str: string, + matchOptions?: Match, + withTokenResult?: boolean + ) => SentenceSplitResult; + /** + * Combines morphemes and restores them to a sentence. Endings are changed to the appropriate form to match the preceding morpheme. + * @param morphs List of morphemes to combine + * @param lmSearch When there is an ambiguous morpheme that can be restored in more than one form, if this value is `true`, the language model is explored to select the best form. If `false`, no exploration is performed, but restoration is faster. + * @param withRanges Wehther to include the ranges of the morphemes in the returned `SentenceJoinResult` object. + * @returns + */ + joinSent: ( + morphs: SentenceJoinMorph[], + lmSearch?: boolean, + withRanges?: boolean + ) => SentenceJoinResult; + getCutOffThreshold: () => number; + setCutOffThreshold: (v: number) => void; + getUnkScoreBias: () => number; + setUnkScoreBias: (v: number) => void; + getUnkScoreScale: () => number; + setUnkScoreScale: (v: number) => void; + getMaxUnkFormSize: () => number; + setMaxUnkFormSize: (v: number) => void; + getSpaceTolerance: () => number; + setSpaceTolerance: (v: number) => void; + getSpacePenalty: () => number; + setSpacePenalty: (v: number) => void; + getTypoCostWeight: () => number; + setTypoCostWeight: (v: number) => void; + getIntegrateAllomorphic: () => boolean; + setIntegrateAllomorphic: (v: boolean) => void; + /** + * Creates a reusable morpheme set from a list of morphemes. This is intended to be used as the `blockList` parameter for the analyse and tokenize methods. + * NOTE: The morpheme set must be destroyed using `destroyMorphemeSet` when it is no longer needed. Otherwise, it will cause a memory leak. + * If you are using the morpheme set only once, you can pass the morpheme list directly to the `blockList` parameter instead of creating a morpheme set. + * @param morphs List of morphemes to create a set from + * @returns an handle to the created morpheme set + */ + createMorphemeSet: (morphs: Morph[]) => MorphemeSet; + /** + * Destroys a morpheme set created by `createMorphemeSet`. + * @param id Handle to the morpheme set to destroy + */ + destroyMorphemeSet: (id: MorphemeSet) => void; +} + +/** + * Interface that performs the actual morphological analysis. + * Same as `Kiwi`, but with all methods returning promises. This can be used when the original `Kiwi` object is constructed with a Web Worker. + * Cannot be constructed directly. + */ +export type KiwiAsync = AsyncMethods; diff --git a/bindings/wasm/package/src/util.ts b/bindings/wasm/package/src/util.ts new file mode 100644 index 00000000..bd69dc64 --- /dev/null +++ b/bindings/wasm/package/src/util.ts @@ -0,0 +1,5 @@ +export type AsyncMethods = { + [K in keyof T]: T[K] extends (...args: any) => any + ? (...args: Parameters) => Promise> + : T[K]; +}; diff --git a/bindings/wasm/package/tsconfig.json b/bindings/wasm/package/tsconfig.json new file mode 100644 index 00000000..4dfb491e --- /dev/null +++ b/bindings/wasm/package/tsconfig.json @@ -0,0 +1,10 @@ +{ + "compilerOptions": { + "module": "ES6", + "target": "ES2017", + "allowJs": true, + "declaration": true, + "outDir": "./dist" + }, + "include": ["src/**/*"], +} diff --git a/include/kiwi/BitUtils.h b/include/kiwi/BitUtils.h index a6361460..26c8cc29 100644 --- a/include/kiwi/BitUtils.h +++ b/include/kiwi/BitUtils.h @@ -136,7 +136,7 @@ namespace kiwi #endif } -#ifdef __APPLE__ +#if defined(__APPLE__) || defined(EMSCRIPTEN) inline int countTrailingZeroes(size_t v) { return countTrailingZeroes((uint64_t)v); } inline int countLeadingZeroes(size_t v) { return countLeadingZeroes((uint64_t)v); } diff --git a/src/KiwiBuilder.cpp b/src/KiwiBuilder.cpp index 3c072843..e5b2662d 100644 --- a/src/KiwiBuilder.cpp +++ b/src/KiwiBuilder.cpp @@ -1695,7 +1695,7 @@ Kiwi KiwiBuilder::build(const TypoTransformer& typos, float typoCostThreshold) c ret.morphemes.reserve(morphemes.size() + combinedMorphemes.size()); ret.combiningRule = combiningRule; ret.integrateAllomorph = !!(options & BuildOption::integrateAllomorph); - if (numThreads >= 1) + if (numThreads > 1) { ret.pool = make_unique(numThreads); }