Skip to content

Commit

Permalink
passes tests
Browse files Browse the repository at this point in the history
  • Loading branch information
eob committed Oct 1, 2024
1 parent 54cd315 commit 6c57dde
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 33 deletions.
25 changes: 1 addition & 24 deletions scripts/prepare/build/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -44,28 +44,5 @@ async function crawlers({ fixturesDirectory, downloadedDirectory }) {
const crawlers = await readFixturesYaml(
join(fixturesDirectory, "crawlers.yml"),
);
const browsersList = await browsers({ fixturesDirectory });
const downloaded = [];
for (const file of await readdir(downloadedDirectory)) {
if (!file.endsWith(".json")) {
continue;
}
try {
const content = await readFile(join(downloadedDirectory, file));
downloaded.push(...JSON.parse(content.toString()));
} catch (error) {
// Ignore
}
}
return crawlers.concat(
// Filter the downloaded crawlers lists
downloaded
.flat()
.filter((ua) => !ua.startsWith("#")) // Remove comments
.filter(
(ua = "") => !/ucweb|cubot/i.test(ua), // I don't know why it's in so many crawler lists
)
.filter((ua) => !browsersList.includes(ua)) // Remove browsers manually added to browsers.yml
.filter((ua = "") => ua.length < 4e3), // Remove very long user agent strings
);
return crawlers;
}
18 changes: 9 additions & 9 deletions tests/spec/test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,31 +42,31 @@ describe("isai", () => {
expect(isai(AI_USER_AGENT_EXAMPLE)).toBe(true);
});
test("isaiMatch: find pattern in bot user agent string", () => {
expect(isaiMatch(AI_USER_AGENT_EXAMPLE)).toBe("Google");
expect(isaiMatch(AI_USER_AGENT_EXAMPLE)).toBe("https://openai.com/searchbot");
});
test("isaiMatches: find all patterns in bot user agent string", () => {
expect(isaiMatches(AI_USER_AGENT_EXAMPLE)).toContain("Google");
expect(isaiMatches(AI_USER_AGENT_EXAMPLE)).toHaveLength(4);
expect(isaiMatches(AI_USER_AGENT_EXAMPLE)).toContain("https://openai.com/searchbot");
expect(isaiMatches(AI_USER_AGENT_EXAMPLE)).toHaveLength(1);
});
test("isaiPattern: find first pattern in bot user agent string", () => {
expect(isaiPattern(AI_USER_AGENT_EXAMPLE)).toBe(
"(?<! (?:channel/|google/))google(?!(app|/google| pixel))",
"https://openai.com/searchbot",
);
});
test("isaiPatterns: find all patterns in bot user agent string", () => {
expect(isaiPatterns(AI_USER_AGENT_EXAMPLE)).toContain(
"(?<! (?:channel/|google/))google(?!(app|/google| pixel))",
"https://openai.com/searchbot",
);
expect(isaiPatterns(AI_USER_AGENT_EXAMPLE)).toHaveLength(4);
expect(isaiPatterns(AI_USER_AGENT_EXAMPLE)).toHaveLength(1);
});
test("createisai: create custom isai function with custom pattern", () => {
const customisai = createisai(/bot/i);
expect(customisai(AI_USER_AGENT_EXAMPLE)).toBe(true);
});
test("createisaiFromList: create custom isai function with custom pattern", () => {
const ChromeLighthouseUserAgentStrings: string[] = [
"mozilla/5.0 (macintosh; intel mac os x 10_15_7) applewebkit/537.36 (khtml, like gecko) chrome/94.0.4590.2 safari/537.36 chrome-lighthouse",
"mozilla/5.0 (linux; android 7.0; moto g (4)) applewebkit/537.36 (khtml, like gecko) chrome/94.0.4590.2 mobile safari/537.36 chrome-lighthouse",
"Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko); compatible; ChatGPT-User/1.0; +https://openai.com/bot",
"Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko); compatible; GPTBot/1.1; +https://openai.com/gptbot",
];
const patternsToRemove: Set<string> = new Set(
ChromeLighthouseUserAgentStrings.map(isaiMatches).flat(),
Expand Down Expand Up @@ -98,7 +98,7 @@ describe("isai", () => {
(percent) => {
const ratio =
crawlers.filter((ua) => isaiNaive(ua)).length / crawlers.length;
expect(ratio).toBeLessThan(1);
expect(ratio).toBeLessThanOrEqual(1);
expect(ratio).toBeGreaterThan(percent / 100);
},
);
Expand Down

0 comments on commit 6c57dde

Please sign in to comment.