Skip to content

Commit

Permalink
feat: integrate adguard dialects
Browse files Browse the repository at this point in the history
  • Loading branch information
seia-soto committed Oct 15, 2024
1 parent 8a23810 commit 79e5d0d
Show file tree
Hide file tree
Showing 6 changed files with 396 additions and 36 deletions.
Binary file modified bun.lockb
Binary file not shown.
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
"test.e2e": "playwright test test/e2e/index.spec.js"
},
"dependencies": {
"@adguard/scriptlets": "^1.12.1",
"@adguard/tsurlfilter": "2.2.23",
"@eyeo/webext-ad-filtering-solution": "1.5.0"
},
Expand Down
62 changes: 53 additions & 9 deletions scripts/update.js
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
import { writeFileSync } from 'node:fs';
import { join, dirname } from 'node:path';
import { fileURLToPath } from 'node:url';
import { writeFileSync } from "node:fs";
import { join, dirname } from "node:path";
import { fileURLToPath } from "node:url";

import adguardDialects from "@adguard/scriptlets/dist/redirects.json" with { type: "json" };

const CWD = dirname(fileURLToPath(import.meta.url));

async function downloadResource(resourceName) {
console.log("Downloading resources...");

const { revisions } = await fetch(
`https://cdn.ghostery.com/adblocker/resources/${resourceName}/metadata.json`,
).then((result) => {
Expand All @@ -20,20 +24,60 @@ async function downloadResource(resourceName) {
`https://cdn.ghostery.com/adblocker/resources/${resourceName}/${latestRevision}/list.txt`,
).then((result) => {
if (!result.ok) {
throw new Error(`Failed to fetch ${resourceName}: ${result.status}: ${result.statusText}`);
throw new Error(
`Failed to fetch ${resourceName}: ${result.status}: ${result.statusText}`,
);
}
return result.text();
});
}

function extractRedirects(data) {
console.log("Extracting resources...");

const resources = JSON.parse(data);
const mappings = resources.redirects.map(redirect => [redirect.name, ...(redirect.aliases ?? [])]);
return JSON.stringify(mappings);
const mappings = resources.redirects.map((redirect) => [
redirect.name,
redirect.aliases ?? [],
]);

// Integrate adguard mappings
for (const dialect of adguardDialects) {
// Skip adguard exclusives
if (dialect.aliases === undefined) {
continue;
}

// Find an entry with adguard dialect
const entry = mappings.find(([, aliases]) => {
if (aliases.includes(dialect.title)) {
return true;
}

for (const alias of dialect.aliases) {
if (aliases.includes(alias)) {
return true;
}
}

return false;
});
if (entry === undefined) {
continue;
}

for (const alias of [dialect.title, ...dialect.aliases]) {
if (entry[1].includes(alias) === false) {
entry[1].push(alias);
}
}
}

return JSON.stringify(mappings, null, 2);
}

writeFileSync(
join(CWD, '..', 'src', 'mappings.json'),
extractRedirects(await downloadResource('ublock-resources-json')),
'utf-8',
join(CWD, "..", "src", "mappings.json"),
extractRedirects(await downloadResource("ublock-resources-json")),
"utf-8",
);
61 changes: 35 additions & 26 deletions src/converters/helpers.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import definition from '../mappings.json' with { type: 'json' }
import mappings from "../mappings.json" with { type: "json" };

function getPathBasename(path) {
const lastIndex = path.lastIndexOf('/');
const lastIndex = path.lastIndexOf("/");
if (lastIndex === -1) {
return path;
}
Expand All @@ -10,30 +10,33 @@ function getPathBasename(path) {

export function generateResourcesMapping() {
const resourcesMapping = new Map();
for (const names of definition) {
for (const name of names.slice(1)) {
resourcesMapping.set(name, names[0]);
for (const [name, aliases] of mappings) {
for (const alias of aliases) {
resourcesMapping.set(alias, name);
}
}
return resourcesMapping;
}

export const DEFAULT_PARAM_MAPPING = {
'3p': 'third-party',
'xhr': 'xmlhttprequest',
'frame': 'subdocument'
"3p": "third-party",
xhr: "xmlhttprequest",
frame: "subdocument",
};
export const DEFAULT_RESOURCES_MAPPING = generateResourcesMapping();

export function normalizeFilter(filter, {
mapping = DEFAULT_PARAM_MAPPING,
resourcesMapping = DEFAULT_RESOURCES_MAPPING,
} = {}) {
export function normalizeFilter(
filter,
{
mapping = DEFAULT_PARAM_MAPPING,
resourcesMapping = DEFAULT_RESOURCES_MAPPING,
} = {},
) {
let [front, ...back] = filter.split("$");
let params = back.join(',').split(',');
let params = back.join(",").split(",");

params.forEach((param, index) => {
const [key, value] = param.split('=');
const [key, value] = param.split("=");
const alias = mapping[key];
if (alias) {
params[index] = value ? `${alias}=${value}` : alias;
Expand All @@ -45,38 +48,43 @@ export function normalizeFilter(filter, {
});

// by default easylist syntax is case-insensitve
if (!params.find(p => p === 'match-case')) {
if (!params.find((p) => p === "match-case")) {
front = front.toLowerCase();
}

// adguard converter doesn't work with $redirect with slash value
// replace possible $redirect params including a slash
const indexOfRedirect = params.findIndex(p => p.startsWith('redirect=') && p.includes('/'));
const indexOfRedirect = params.findIndex(
(p) => p.startsWith("redirect=") && p.includes("/"),
);
if (indexOfRedirect !== -1) {
const name = resourcesMapping.get(params[indexOfRedirect].slice(9));
if (name !== undefined) {
params[indexOfRedirect] = 'redirect=' + name;
params[indexOfRedirect] = "redirect=" + name;
}
}

const indexOfRedirectRule = params.findIndex(p => p.startsWith('redirect-rule=') && p.includes('/'));
const indexOfRedirectRule = params.findIndex(
(p) => p.startsWith("redirect-rule=") && p.includes("/"),
);
if (indexOfRedirectRule !== -1) {
const name = resourcesMapping.get(params[indexOfRedirectRule].slice(14));
if (name !== undefined) {
params[indexOfRedirectRule] = 'redirect-rule=' + name;
params[indexOfRedirectRule] = "redirect-rule=" + name;
}
}

if (back.length === 0) {
return front;
}

return `${front}$${params.join(',')}`;
return `${front}$${params.join(",")}`;
}

export function normalizeRule(rule, {
resourcesMapping = DEFAULT_RESOURCES_MAPPING
} = {}) {
export function normalizeRule(
rule,
{ resourcesMapping = DEFAULT_RESOURCES_MAPPING } = {},
) {
if (!rule) {
return;
}
Expand Down Expand Up @@ -113,16 +121,17 @@ export function normalizeRule(rule, {
delete newRule.condition.domains;
}

if (newRule.action && newRule.action.type === 'redirect') {
if (newRule.action && newRule.action.type === "redirect") {
const filename = getPathBasename(newRule.action.redirect.extensionPath);
const preferredFilename =
resourcesMapping.get(filename) ??
// try searching without an extension
// adguard converter attaches an file extension at the end
resourcesMapping.get(filename.slice(0, filename.lastIndexOf('.')));
resourcesMapping.get(filename.slice(0, filename.lastIndexOf(".")));
if (preferredFilename !== undefined) {
newRule.action.redirect.extensionPath =
newRule.action.redirect.extensionPath.slice(0, -filename.length) + preferredFilename;
newRule.action.redirect.extensionPath.slice(0, -filename.length) +
preferredFilename;
}
}

Expand Down
Loading

0 comments on commit 79e5d0d

Please sign in to comment.