Skip to content

Commit

Permalink
feat: detect language by tinyld
Browse files Browse the repository at this point in the history
  • Loading branch information
sdjdd committed Dec 12, 2023
1 parent b0534db commit 0799cb2
Show file tree
Hide file tree
Showing 10 changed files with 77 additions and 409 deletions.
349 changes: 32 additions & 317 deletions next/api/package-lock.json

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion next/api/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,13 @@
"@formatjs/intl-localematcher": "^0.2.32",
"@koa/cors": "^3.1.0",
"@koa/router": "^10.1.1",
"@notevenaneko/whatlang-node": "^1.0.0",
"@sentry/node": "^7.14.0",
"@sentry/tracing": "^7.14.0",
"@slack/web-api": "^6.5.1",
"aliyun-sdk": "^1.12.10",
"axios": "^0.24.0",
"bull": "^4.2.0",
"chinese-simple2traditional": "^1.2.0",
"date-fns": "^2.28.0",
"debug": "^4.3.4",
"eventemitter3": "^4.0.7",
Expand Down Expand Up @@ -56,6 +56,7 @@
"sluggo": "^0.3.1",
"sqlstring": "^2.3.3",
"throat": "^6.0.1",
"tinyld": "^1.3.4",
"ua-parser-js": "^1.0.33",
"yup": "^0.32.11",
"zod": "^3.11.6"
Expand Down
3 changes: 1 addition & 2 deletions next/api/src/model/Ticket.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import _ from 'lodash';
import { LangCodeISO6391 } from '@notevenaneko/whatlang-node';

import { config } from '@/config';
import events from '@/events';
Expand Down Expand Up @@ -218,7 +217,7 @@ export class Ticket extends Model {
parent?: Ticket;

@field()
language?: LangCodeISO6391;
language?: string;

@field()
channel?: string;
Expand Down
5 changes: 2 additions & 3 deletions next/api/src/router/ticket.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ import { getIP } from '@/utils';
import { organizationService } from '@/service/organization';
import { roleService } from '@/service/role';
import { allowedTicketLanguages } from '@/utils/locale';
import { LangCodeISO6391 } from '@notevenaneko/whatlang-node';
import { addInOrNotExistCondition } from '@/utils/conditions';
import { dynamicContentService } from '@/dynamic-content';
import { FileResponse } from '@/response/file';
Expand Down Expand Up @@ -763,7 +762,7 @@ const updateTicketSchema = yup.object({
tags: yup.array(ticketTagSchema.required()),
privateTags: yup.array(ticketTagSchema.required()),
evaluation: ticketEvaluationSchema.default(undefined),
language: yup.mixed().oneOf([...allowedTicketLanguages, null]),
language: yup.string().oneOf(allowedTicketLanguages).nullable(),
});

router.patch('/:id', async (ctx) => {
Expand Down Expand Up @@ -869,7 +868,7 @@ router.patch('/:id', async (ctx) => {
}

if (data.language !== undefined) {
updater.setLanguage(data.language as LangCodeISO6391 | null);
updater.setLanguage(data.language);
}

await updater.update(currentUser);
Expand Down
28 changes: 20 additions & 8 deletions next/api/src/ticket/TicketCreator.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import _ from 'lodash';
import { Detector, LangCodeISO6391 } from '@notevenaneko/whatlang-node';
import { detect } from 'tinyld/heavy';
import { traditionToSimple } from 'chinese-simple2traditional';

import events from '@/events';
import { ACLBuilder } from '@/orm';
Expand All @@ -15,8 +16,6 @@ import { TicketLog } from '@/model/TicketLog';
import { allowedTicketLanguages } from '@/utils/locale';
import { durationMetricService } from './services/duration-metric';

const detector = Detector.withAllowlist(allowedTicketLanguages);

export class TicketCreator {
private author?: User;
private reporter?: User;
Expand All @@ -30,7 +29,7 @@ export class TicketCreator {
private customFields?: FieldValue[];
private assignee?: User;
private group?: Group;
private language?: LangCodeISO6391;
private language?: string;
private channel?: string;

private aclBuilder: ACLBuilder;
Expand Down Expand Up @@ -212,11 +211,24 @@ export class TicketCreator {
}

private async detectLanguage() {
const lang = this.content && detector.detect(this.content);
const content = _([this.title, this.content])
.compact()
.map((s) => s.trim().slice(0, 1000))
.join('\n');

if (!content) return;

let lang = detect(content);

// sometimes output lang code does not exist in allowlist
if (lang && lang.isReliable && allowedTicketLanguages.includes(lang.lang.codeISO6391)) {
this.language = lang.lang.codeISO6391;
if (lang && allowedTicketLanguages.includes(lang)) {
if (lang === 'zh') {
if (traditionToSimple(content) === content) {
lang = 'zh-Hans';
} else {
lang = 'zh-Hant';
}
}
this.language = lang;
}
}

Expand Down
3 changes: 1 addition & 2 deletions next/api/src/ticket/TicketUpdater.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ import { systemUser, TinyUserInfo, User } from '@/model/User';

import { TinyReplyInfo } from '@/model/Reply';
import { TicketLog } from '@/model/TicketLog';
import { LangCodeISO6391 } from '@notevenaneko/whatlang-node';

export interface UpdateOptions {
useMasterKey?: boolean;
Expand Down Expand Up @@ -89,7 +88,7 @@ export class TicketUpdater {
return this;
}

setLanguage(lang: LangCodeISO6391 | null): this {
setLanguage(lang: string | null): this {
this.data.language = lang;
return this;
}
Expand Down
29 changes: 15 additions & 14 deletions next/api/src/utils/locale.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ import { LOCALES } from '@/i18n/locales';
import { z } from 'zod';
import * as yup from 'yup';
import { match } from '@formatjs/intl-localematcher';
import { LangCodeISO6391 } from '@notevenaneko/whatlang-node';

export const localeMatcherFactory = (requestedLocales: string[]) => (
availableLocales: string[],
Expand Down Expand Up @@ -40,17 +39,19 @@ export const localeSchemaForYup = yup
.test((s) => !!(s && LOCALES.includes(s)));

export const allowedTicketLanguages = [
LangCodeISO6391.Zh,
LangCodeISO6391.En,
LangCodeISO6391.Ja,
LangCodeISO6391.Ko,
LangCodeISO6391.Id,
LangCodeISO6391.Th,
LangCodeISO6391.De,
LangCodeISO6391.Fr,
LangCodeISO6391.Ru,
LangCodeISO6391.Es,
LangCodeISO6391.Pt,
LangCodeISO6391.Tr,
LangCodeISO6391.Vi,
'zh',
'zh-Hans',
'zh-Hant',
'en',
'ja',
'ko',
'id',
'th',
'de',
'fr',
'ru',
'es',
'pt',
'tr',
'vi',
];
5 changes: 2 additions & 3 deletions next/web/src/App/Admin/Tickets/Ticket/TicketDetail.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ import { getConfig, getMetadataRenderer } from '@/config/config';
import { ENABLE_LEANCLOUD_INTEGRATION, useCurrentUser } from '@/leancloud';
import { TicketLink } from '@/App/Admin/components/TicketLink';
import { TicketStatus } from '@/App/Admin/components/TicketStatus';
import { TicketLanguages } from '@/i18n/locales';
import { Timeline } from './Timeline';
import { TagData, TagForm } from './TagForm';
import { FormField } from './components/FormField';
Expand All @@ -51,7 +52,6 @@ import { CategoryCascader } from './components/CategoryCascader';
import { LeanCloudApp } from './components/LeanCloudApp';
import { ReplyCard } from './components/ReplyCard';
import { useMixedTicket } from './mixed-ticket';
import { langs } from './lang';
import { TicketField_v1, useTicketFields_v1 } from './api1';
import { CustomFields } from './components/CustomFields';
import { useTicketOpsLogs, useTicketReplies } from './timeline-data';
Expand Down Expand Up @@ -516,8 +516,7 @@ function TicketBasicInfoSection({ ticket, onChange, disabled }: TicketBasicInfoS
className="w-full"
allowClear
placeholder="未设置"
options={langs}
fieldNames={{ label: 'name', value: 'code' }}
options={Object.entries(TicketLanguages).map(([value, label]) => ({ label, value }))}
value={ticket.language}
onChange={(language) => onChange({ language: language ?? null })}
disabled={disabled}
Expand Down
59 changes: 0 additions & 59 deletions next/web/src/App/Admin/Tickets/Ticket/lang.ts

This file was deleted.

2 changes: 2 additions & 0 deletions next/web/src/i18n/locales.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,6 @@ export const LOCALES: Record<string, string> = {
export const TicketLanguages: Record<string, string> = {
...omit(LOCALES, ['zh-cn', 'zh-hk', 'zh-tw']),
zh: '中文',
'zh-Hans': '简体中文',
'zh-Hant': '繁体中文',
};

0 comments on commit 0799cb2

Please sign in to comment.