Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Repo sync #34373

Merged
merged 6 commits into from
Aug 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 68 additions & 34 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@
"sync-search-indices": "node src/search/scripts/sync-search-indices.js",
"sync-search-server": "cross-env NODE_ENV=production PORT=4002 MINIMAL_RENDER=true CHANGELOG_DISABLED=true tsx src/frame/server.ts",
"sync-secret-scanning": "tsx src/secret-scanning/scripts/sync.ts",
"sync-webhooks": "src/rest/scripts/update-files.js -o webhooks",
"sync-webhooks": "npx tsx src/rest/scripts/update-files.ts -o webhooks",
"test": "vitest",
"test-local-dev": "node src/workflows/test-local-dev.js",
"test-moved-content": "tsx src/content-render/scripts/test-moved-content.ts",
Expand Down Expand Up @@ -236,7 +236,7 @@
"@primer/octicons-react": "^19.11.0",
"@primer/react": "36.27.0",
"accept-language-parser": "^1.5.0",
"ajv": "^8.16.0",
"ajv": "^8.17.1",
"ajv-errors": "^3.0.0",
"ajv-formats": "^3.0.1",
"bottleneck": "2.19.5",
Expand All @@ -254,7 +254,7 @@
"express": "4.19.2",
"express-rate-limit": "7.4.0",
"fastest-levenshtein": "1.0.16",
"file-type": "19.1.0",
"file-type": "19.4.1",
"flat": "^6.0.1",
"github-slugger": "^2.0.0",
"glob": "11.0.0",
Expand Down Expand Up @@ -323,7 +323,7 @@
"@graphql-inspector/core": "^6.1.0",
"@graphql-tools/load": "^8.0.0",
"@octokit/rest": "^20.1.0",
"@playwright/test": "1.44.1",
"@playwright/test": "1.46.1",
"@types/accept-language-parser": "1.5.6",
"@types/connect-datadog": "0.0.10",
"@types/connect-timeout": "0.0.39",
Expand Down
2 changes: 1 addition & 1 deletion src/audit-logs/lib/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
"apiOnlyEvents": "This event is not available in the web interface, only via the REST API, audit log streaming, or JSON/CSV exports.",
"apiRequestEvent": "This event is only available via audit log streaming."
},
"sha": "156e6897dededb381697da9a39e7bb6eb7971480"
"sha": "4516a2f1ddf74032b4474b272c9850055470cad3"
}
19 changes: 19 additions & 0 deletions src/events/analyze-comment.js
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,25 @@ export const SIGNAL_RATINGS = [
},
]

export async function getGuessedLanguage(comment) {
if (!comment || !comment.trim()) {
return
}

const bestGuess = language.guessBest(comment.trim())
if (!bestGuess) return // Can happen if the text is just whitespace
// // @horizon-rs/language-guesser is based on tri-grams and can lead
// // to false positives. For example, it thinks that 'Thamk you ❤️🙏' is
// // Haitian! And that 'I wanne robux 1000' is Polish!
// // But that's because they are short and there's not enough clues to
// // guess what language it is. You and I might know those are actually
// // attempts to be English, despite the spelling.
// // But are they useful comments? Given that this is just a signal,
// // and not a hard blocker, it's more of a clue than a fact.

return bestGuess.alpha2
}

export async function analyzeComment(text, language = 'en') {
const signals = []
let rating = 1.0
Expand Down
1 change: 1 addition & 0 deletions src/events/components/events.ts
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ type SendEventProps = {
survey_comment?: string
survey_email?: string
survey_rating?: number
survey_comment_language?: string
}
}

Expand Down
7 changes: 6 additions & 1 deletion src/events/lib/schema.js
Original file line number Diff line number Diff line change
Expand Up @@ -392,7 +392,12 @@ const survey = {
survey_rating: {
type: 'number',
description:
'The compute rating of the quality of the survey comment. Used for spam filtering and quality control.',
'The computed rating of the quality of the survey comment. Used for spam filtering and quality control.',
},
survey_comment_language: {
type: 'string',
description:
'The guessed language of the survey comment. The guessed language is very inaccurate when the string contains fewer than 3 or 4 words.',
},
},
}
Expand Down
3 changes: 2 additions & 1 deletion src/events/middleware.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import { noCacheControl } from '#src/frame/middleware/cache-control.js'
import { getJsonValidator } from '#src/tests/lib/validate-json-schema.js'
import { formatErrors } from './lib/middleware-errors.js'
import { publish as _publish } from './lib/hydro.js'
import { analyzeComment } from './analyze-comment.js'
import { analyzeComment, getGuessedLanguage } from './analyze-comment.js'

const router = express.Router()
const OMIT_FIELDS = ['type']
Expand Down Expand Up @@ -74,6 +74,7 @@ router.post(
comment: req.body.survey_comment,
language: req.body.context.path_language,
})
req.body.survey_comment_language = await getGuessedLanguage(req.body.survey_comment)
}

await publish({
Expand Down
24 changes: 23 additions & 1 deletion src/events/tests/analyze-comments.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { describe, expect, test } from 'vitest'

import { analyzeComment } from '../analyze-comment.js'
import { analyzeComment, getGuessedLanguage } from '../analyze-comment.js'

describe('analyzeComment', () => {
test('email only', async () => {
Expand Down Expand Up @@ -248,4 +248,26 @@ describe('analyzeComment', () => {
expect(signals.includes('spammy-words')).toBeFalsy()
}
})

test('guessed-language', async () => {
// Yes
{
const guessedLanguage = await getGuessedLanguage('Garçon des la voituré')
expect(guessedLanguage).toBe('fr')
}
{
const guessedLanguage = await getGuessedLanguage('english words longer sentence this time')
expect(guessedLanguage).toBe('en')
}

// False positives due to short text
{
const guessedLanguage = await analyzeComment('Hello')
expect(guessedLanguage).not.toBe('en')
}
{
const guessedLanguage = await analyzeComment('Garçon')
expect(guessedLanguage).not.toBe('fr')
}
})
})
Loading
Loading