diff --git a/package-lock.json b/package-lock.json index c9d0859b64d4..16fa183a25a3 100644 --- a/package-lock.json +++ b/package-lock.json @@ -19,7 +19,7 @@ "@primer/octicons-react": "^19.11.0", "@primer/react": "36.27.0", "accept-language-parser": "^1.5.0", - "ajv": "^8.16.0", + "ajv": "^8.17.1", "ajv-errors": "^3.0.0", "ajv-formats": "^3.0.1", "bottleneck": "2.19.5", @@ -37,7 +37,7 @@ "express": "4.19.2", "express-rate-limit": "7.4.0", "fastest-levenshtein": "1.0.16", - "file-type": "19.1.0", + "file-type": "19.4.1", "flat": "^6.0.1", "github-slugger": "^2.0.0", "glob": "11.0.0", @@ -106,7 +106,7 @@ "@graphql-inspector/core": "^6.1.0", "@graphql-tools/load": "^8.0.0", "@octokit/rest": "^20.1.0", - "@playwright/test": "1.44.1", + "@playwright/test": "1.46.1", "@types/accept-language-parser": "1.5.6", "@types/connect-datadog": "0.0.10", "@types/connect-timeout": "0.0.39", @@ -2580,18 +2580,18 @@ } }, "node_modules/@playwright/test": { - "version": "1.44.1", - "resolved": "https://registry.npmjs.org/@playwright/test/-/test-1.44.1.tgz", - "integrity": "sha512-1hZ4TNvD5z9VuhNJ/walIjvMVvYkZKf71axoF/uiAqpntQJXpG64dlXhoDXE3OczPuTuvjf/M5KWFg5VAVUS3Q==", + "version": "1.46.1", + "resolved": "https://registry.npmjs.org/@playwright/test/-/test-1.46.1.tgz", + "integrity": "sha512-Fq6SwLujA/DOIvNC2EL/SojJnkKf/rAwJ//APpJJHRyMi1PdKrY3Az+4XNQ51N4RTbItbIByQ0jgd1tayq1aeA==", "devOptional": true, "dependencies": { - "playwright": "1.44.1" + "playwright": "1.46.1" }, "bin": { "playwright": "cli.js" }, "engines": { - "node": ">=16" + "node": ">=18" } }, "node_modules/@primer/behaviors": { @@ -3877,14 +3877,14 @@ } }, "node_modules/ajv": { - "version": "8.16.0", - "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.16.0.tgz", - "integrity": "sha512-F0twR8U1ZU67JIEtekUcLkXkoO5mMMmgGD8sK/xUFzJ805jxHQl92hImFAqqXMyMYjSPOyUPAwHYhB72g5sTXw==", + "version": "8.17.1", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.17.1.tgz", + "integrity": "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g==", "dependencies": { "fast-deep-equal": "^3.1.3", + "fast-uri": "^3.0.1", "json-schema-traverse": "^1.0.0", - "require-from-string": "^2.0.2", - "uri-js": "^4.4.1" + "require-from-string": "^2.0.2" }, "funding": { "type": "github", @@ -6646,6 +6646,11 @@ "integrity": "sha512-DCXu6Ifhqcks7TZKY3Hxp3y6qphY5SJZmrWMDrKcERSOXWQdMhU9Ig/PYrzyw/ul9jOIyh0N4M0tbC5hodg8dw==", "dev": true }, + "node_modules/fast-uri": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/fast-uri/-/fast-uri-3.0.1.tgz", + "integrity": "sha512-MWipKbbYiYI0UC7cl8m/i/IWTqfC8YXsqjzybjddLsFjStroQzsHXkc73JutMvBiXmOvapk+axIl79ig5t55Bw==" + }, "node_modules/fast-xml-parser": { "version": "4.4.1", "resolved": "https://registry.npmjs.org/fast-xml-parser/-/fast-xml-parser-4.4.1.tgz", @@ -6694,11 +6699,12 @@ } }, "node_modules/file-type": { - "version": "19.1.0", - "resolved": "https://registry.npmjs.org/file-type/-/file-type-19.1.0.tgz", - "integrity": "sha512-5rzeC2/GeStiAlYCenfrbKrQCiEzJTetCExFinFCH1UUz1XL7NlxRpLTwdWXzlVhLReRrWkfkNCH1Ap5zqOXtg==", + "version": "19.4.1", + "resolved": "https://registry.npmjs.org/file-type/-/file-type-19.4.1.tgz", + "integrity": "sha512-RuWzwF2L9tCHS76KR/Mdh+DwJZcFCzrhrPXpOw6MlEfl/o31fjpTikzcKlYuyeV7e7ftdCGVJTNOCzkYD/aLbw==", "dependencies": { - "strtok3": "^7.1.0", + "get-stream": "^9.0.1", + "strtok3": "^8.1.0", "token-types": "^6.0.0", "uint8array-extras": "^1.3.0" }, @@ -6709,6 +6715,32 @@ "url": "https://github.com/sindresorhus/file-type?sponsor=1" } }, + "node_modules/file-type/node_modules/get-stream": { + "version": "9.0.1", + "resolved": "https://registry.npmjs.org/get-stream/-/get-stream-9.0.1.tgz", + "integrity": "sha512-kVCxPF3vQM/N0B1PmoqVUqgHP+EeVjmZSQn+1oCRPxd2P21P2F19lIgbR3HBosbB1PUhOAoctJnfEn2GbN2eZA==", + "dependencies": { + "@sec-ant/readable-stream": "^0.4.1", + "is-stream": "^4.0.1" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/file-type/node_modules/is-stream": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/is-stream/-/is-stream-4.0.1.tgz", + "integrity": "sha512-Dnz92NInDqYckGEUJv689RbRiTSEHCQ7wOVeALbkOz999YpqT46yMRIGtSNl2iCL1waAZSx40+h59NV/EwzV/A==", + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/file-uri-to-path": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/file-uri-to-path/-/file-uri-to-path-1.0.0.tgz", @@ -11198,9 +11230,9 @@ } }, "node_modules/peek-readable": { - "version": "5.1.1", - "resolved": "https://registry.npmjs.org/peek-readable/-/peek-readable-5.1.1.tgz", - "integrity": "sha512-4hEOSH7KeEaZpMDF/xfm1W9fS5rT7Ett3BkXWHqAEzRLLwLaHkwOL+GvvpIEh9UrvX9BDhzfkvteslgraoH69w==", + "version": "5.1.4", + "resolved": "https://registry.npmjs.org/peek-readable/-/peek-readable-5.1.4.tgz", + "integrity": "sha512-E7mY2VmKqw9jYuXrSWGHFuPCW2SLQenzXLF3amGaY6lXXg4/b3gj5HVM7h8ZjCO/nZS9ICs0Cz285+32FvNd/A==", "engines": { "node": ">=14.16" }, @@ -11236,33 +11268,33 @@ } }, "node_modules/playwright": { - "version": "1.44.1", - "resolved": "https://registry.npmjs.org/playwright/-/playwright-1.44.1.tgz", - "integrity": "sha512-qr/0UJ5CFAtloI3avF95Y0L1xQo6r3LQArLIg/z/PoGJ6xa+EwzrwO5lpNr/09STxdHuUoP2mvuELJS+hLdtgg==", + "version": "1.46.1", + "resolved": "https://registry.npmjs.org/playwright/-/playwright-1.46.1.tgz", + "integrity": "sha512-oPcr1yqoXLCkgKtD5eNUPLiN40rYEM39odNpIb6VE6S7/15gJmA1NzVv6zJYusV0e7tzvkU/utBFNa/Kpxmwng==", "devOptional": true, "dependencies": { - "playwright-core": "1.44.1" + "playwright-core": "1.46.1" }, "bin": { "playwright": "cli.js" }, "engines": { - "node": ">=16" + "node": ">=18" }, "optionalDependencies": { "fsevents": "2.3.2" } }, "node_modules/playwright-core": { - "version": "1.44.1", - "resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.44.1.tgz", - "integrity": "sha512-wh0JWtYTrhv1+OSsLPgFzGzt67Y7BE/ZS3jEqgGBlp2ppp1ZDj8c+9IARNW4dwf1poq5MgHreEM2KV/GuR4cFA==", + "version": "1.46.1", + "resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.46.1.tgz", + "integrity": "sha512-h9LqIQaAv+CYvWzsZ+h3RsrqCStkBHlgo6/TJlFst3cOTlLghBQlJwPOZKQJTKNaD3QIB7aAVQ+gfWbN3NXB7A==", "devOptional": true, "bin": { "playwright-core": "cli.js" }, "engines": { - "node": ">=16" + "node": ">=18" } }, "node_modules/possible-typed-array-names": { @@ -11408,6 +11440,7 @@ }, "node_modules/punycode": { "version": "2.1.1", + "dev": true, "license": "MIT", "engines": { "node": ">=6" @@ -13464,15 +13497,15 @@ "license": "MIT" }, "node_modules/strtok3": { - "version": "7.1.0", - "resolved": "https://registry.npmjs.org/strtok3/-/strtok3-7.1.0.tgz", - "integrity": "sha512-19dQEwG6Jd+VabjPRyBhymIF069vZiqWSZa2jJBoKJTsqGKnTxowGoQaLnz+yLARfDI041IUQekyPUMWElOgsQ==", + "version": "8.1.0", + "resolved": "https://registry.npmjs.org/strtok3/-/strtok3-8.1.0.tgz", + "integrity": "sha512-ExzDvHYPj6F6QkSNe/JxSlBxTh3OrI6wrAIz53ulxo1c4hBJ1bT9C/JrAthEKHWG9riVH3Xzg7B03Oxty6S2Lw==", "dependencies": { "@tokenizer/token": "^0.3.0", - "peek-readable": "^5.1.1" + "peek-readable": "^5.1.4" }, "engines": { - "node": ">=14.16" + "node": ">=16" }, "funding": { "type": "github", @@ -14353,6 +14386,7 @@ }, "node_modules/uri-js": { "version": "4.4.1", + "dev": true, "license": "BSD-2-Clause", "dependencies": { "punycode": "^2.1.0" diff --git a/package.json b/package.json index f0ab041c0215..b1b213dc8c34 100644 --- a/package.json +++ b/package.json @@ -74,7 +74,7 @@ "sync-search-indices": "node src/search/scripts/sync-search-indices.js", "sync-search-server": "cross-env NODE_ENV=production PORT=4002 MINIMAL_RENDER=true CHANGELOG_DISABLED=true tsx src/frame/server.ts", "sync-secret-scanning": "tsx src/secret-scanning/scripts/sync.ts", - "sync-webhooks": "src/rest/scripts/update-files.js -o webhooks", + "sync-webhooks": "npx tsx src/rest/scripts/update-files.ts -o webhooks", "test": "vitest", "test-local-dev": "node src/workflows/test-local-dev.js", "test-moved-content": "tsx src/content-render/scripts/test-moved-content.ts", @@ -236,7 +236,7 @@ "@primer/octicons-react": "^19.11.0", "@primer/react": "36.27.0", "accept-language-parser": "^1.5.0", - "ajv": "^8.16.0", + "ajv": "^8.17.1", "ajv-errors": "^3.0.0", "ajv-formats": "^3.0.1", "bottleneck": "2.19.5", @@ -254,7 +254,7 @@ "express": "4.19.2", "express-rate-limit": "7.4.0", "fastest-levenshtein": "1.0.16", - "file-type": "19.1.0", + "file-type": "19.4.1", "flat": "^6.0.1", "github-slugger": "^2.0.0", "glob": "11.0.0", @@ -323,7 +323,7 @@ "@graphql-inspector/core": "^6.1.0", "@graphql-tools/load": "^8.0.0", "@octokit/rest": "^20.1.0", - "@playwright/test": "1.44.1", + "@playwright/test": "1.46.1", "@types/accept-language-parser": "1.5.6", "@types/connect-datadog": "0.0.10", "@types/connect-timeout": "0.0.39", diff --git a/src/audit-logs/lib/config.json b/src/audit-logs/lib/config.json index a167ad9b0494..b7ea37444a42 100644 --- a/src/audit-logs/lib/config.json +++ b/src/audit-logs/lib/config.json @@ -3,5 +3,5 @@ "apiOnlyEvents": "This event is not available in the web interface, only via the REST API, audit log streaming, or JSON/CSV exports.", "apiRequestEvent": "This event is only available via audit log streaming." }, - "sha": "156e6897dededb381697da9a39e7bb6eb7971480" + "sha": "4516a2f1ddf74032b4474b272c9850055470cad3" } \ No newline at end of file diff --git a/src/events/analyze-comment.js b/src/events/analyze-comment.js index 8b4e61a3bba4..ccf020bc4d70 100644 --- a/src/events/analyze-comment.js +++ b/src/events/analyze-comment.js @@ -72,6 +72,25 @@ export const SIGNAL_RATINGS = [ }, ] +export async function getGuessedLanguage(comment) { + if (!comment || !comment.trim()) { + return + } + + const bestGuess = language.guessBest(comment.trim()) + if (!bestGuess) return // Can happen if the text is just whitespace + // // @horizon-rs/language-guesser is based on tri-grams and can lead + // // to false positives. For example, it thinks that 'Thamk you ❤️🙏' is + // // Haitian! And that 'I wanne robux 1000' is Polish! + // // But that's because they are short and there's not enough clues to + // // guess what language it is. You and I might know those are actually + // // attempts to be English, despite the spelling. + // // But are they useful comments? Given that this is just a signal, + // // and not a hard blocker, it's more of a clue than a fact. + + return bestGuess.alpha2 +} + export async function analyzeComment(text, language = 'en') { const signals = [] let rating = 1.0 diff --git a/src/events/components/events.ts b/src/events/components/events.ts index 3c32d290f112..0f51ca145c24 100644 --- a/src/events/components/events.ts +++ b/src/events/components/events.ts @@ -120,6 +120,7 @@ type SendEventProps = { survey_comment?: string survey_email?: string survey_rating?: number + survey_comment_language?: string } } diff --git a/src/events/lib/schema.js b/src/events/lib/schema.js index 41fa78a0857f..e1e72f636a87 100644 --- a/src/events/lib/schema.js +++ b/src/events/lib/schema.js @@ -392,7 +392,12 @@ const survey = { survey_rating: { type: 'number', description: - 'The compute rating of the quality of the survey comment. Used for spam filtering and quality control.', + 'The computed rating of the quality of the survey comment. Used for spam filtering and quality control.', + }, + survey_comment_language: { + type: 'string', + description: + 'The guessed language of the survey comment. The guessed language is very inaccurate when the string contains fewer than 3 or 4 words.', }, }, } diff --git a/src/events/middleware.js b/src/events/middleware.js index 071cc2a7d5a9..a131b52361bc 100644 --- a/src/events/middleware.js +++ b/src/events/middleware.js @@ -8,7 +8,7 @@ import { noCacheControl } from '#src/frame/middleware/cache-control.js' import { getJsonValidator } from '#src/tests/lib/validate-json-schema.js' import { formatErrors } from './lib/middleware-errors.js' import { publish as _publish } from './lib/hydro.js' -import { analyzeComment } from './analyze-comment.js' +import { analyzeComment, getGuessedLanguage } from './analyze-comment.js' const router = express.Router() const OMIT_FIELDS = ['type'] @@ -74,6 +74,7 @@ router.post( comment: req.body.survey_comment, language: req.body.context.path_language, }) + req.body.survey_comment_language = await getGuessedLanguage(req.body.survey_comment) } await publish({ diff --git a/src/events/tests/analyze-comments.js b/src/events/tests/analyze-comments.js index 67207094eaf9..3ec086de94f2 100644 --- a/src/events/tests/analyze-comments.js +++ b/src/events/tests/analyze-comments.js @@ -1,6 +1,6 @@ import { describe, expect, test } from 'vitest' -import { analyzeComment } from '../analyze-comment.js' +import { analyzeComment, getGuessedLanguage } from '../analyze-comment.js' describe('analyzeComment', () => { test('email only', async () => { @@ -248,4 +248,26 @@ describe('analyzeComment', () => { expect(signals.includes('spammy-words')).toBeFalsy() } }) + + test('guessed-language', async () => { + // Yes + { + const guessedLanguage = await getGuessedLanguage('Garçon des la voituré') + expect(guessedLanguage).toBe('fr') + } + { + const guessedLanguage = await getGuessedLanguage('english words longer sentence this time') + expect(guessedLanguage).toBe('en') + } + + // False positives due to short text + { + const guessedLanguage = await analyzeComment('Hello') + expect(guessedLanguage).not.toBe('en') + } + { + const guessedLanguage = await analyzeComment('Garçon') + expect(guessedLanguage).not.toBe('fr') + } + }) }) diff --git a/src/rest/docs.js b/src/rest/docs.js index 8c5fb59eafb7..a394a0569284 100755 --- a/src/rest/docs.js +++ b/src/rest/docs.js @@ -31,28 +31,28 @@ log(chalk.green.bold(' Examples of ways you can build the REST docs locally:\n' log( chalk.cyan.bold(' - REST All versions:') + ' ' + - chalk.magenta('npm run sync-rest ; npm run dev'), + chalk.magenta('npm run sync-rest && npm run dev'), ) log( chalk.cyan.bold(' - REST Dotcom only:') + ' ' + - chalk.magenta('npm run sync-rest -- --versions api.github.com ; npm run dev'), + chalk.magenta('npm run sync-rest -- --versions api.github.com && npm run dev'), ) log( chalk.cyan.bold(' - REST Two versions:') + ' ' + - chalk.magenta('npm run sync-rest -- --versions ghes-3.7 ghes-3.8 ; npm run dev'), + chalk.magenta('npm run sync-rest -- --versions ghes-3.7 ghes-3.8 && npm run dev'), ) log( chalk.cyan.bold(' - REST Dotcom and next calendar date version:') + ' ' + - chalk.magenta('npm run sync-rest -- --next --versions api.github.com ; npm run dev'), + chalk.magenta('npm run sync-rest -- --next --versions api.github.com && npm run dev'), ) log( chalk.cyan.bold(' - REST Dotcom only, including unpublished operations:') + ' ' + chalk.magenta( - 'npm run sync-rest -- --versions api.github.com --include-unpublished ; npm run dev', + 'npm run sync-rest -- --versions api.github.com --include-unpublished && npm run dev', ), ) log(chalk.green.bold.underline('\nWebhook docs script examples\n')) @@ -60,17 +60,17 @@ log(chalk.green.bold(' Examples of ways you can build the Webhook docs locally: log( chalk.cyan.bold(' - Webhooks All versions:') + ' ' + - chalk.magenta('npm run sync-webhooks ; npm run dev'), + chalk.magenta('npm run sync-webhooks && npm run dev'), ) log( chalk.cyan.bold(' - Webhooks Dotcom only:') + ' ' + - chalk.magenta('npm run sync-webhooks -- --versions api.github.com ; npm run dev'), + chalk.magenta('npm run sync-webhooks -- --versions api.github.com && npm run dev'), ) log( chalk.cyan.bold(' - Webhooks Two versions:') + ' ' + - chalk.magenta('npm run sync-webhooks -- --versions ghes-3.7 ghes-3.8 ; npm run dev'), + chalk.magenta('npm run sync-webhooks -- --versions ghes-3.7 ghes-3.8 && npm run dev'), ) log(chalk.green.bold('\nFor more info and additional options, run:\n')) log(chalk.white.bold(' npm run sync-rest -- --help'))