diff --git a/js/Cargo.lock b/js/Cargo.lock index 72123f92..a41a5964 100644 --- a/js/Cargo.lock +++ b/js/Cargo.lock @@ -4,15 +4,14 @@ version = 4 [[package]] name = "adblock" -version = "0.9.5" +version = "0.9.6" dependencies = [ "addr", "base64", - "bitflags", + "bitflags 2.9.0", "cssparser", "idna", "itertools", - "lifeguard", "memchr", "once_cell", "percent-encoding", @@ -63,9 +62,9 @@ checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" [[package]] name = "base64" -version = "0.13.0" +version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" [[package]] name = "bitflags" @@ -73,6 +72,15 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +[[package]] +name = "bitflags" +version = "2.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd" +dependencies = [ + "serde", +] + [[package]] name = "byteorder" version = "1.4.3" @@ -362,12 +370,6 @@ dependencies = [ "winapi", ] -[[package]] -name = "lifeguard" -version = "0.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89be94dbd775db37b46ca4f4bf5cf89adfb13ba197bfbcb69b2122848ee73c26" - [[package]] name = "litemap" version = "0.7.4" @@ -674,9 +676,9 @@ checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e" [[package]] name = "seahash" -version = "3.0.6" +version = "4.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18d6061ff4917fac173fa07b839c8c3f805c0bf3801c52499cc85cdbad8c28df" +checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" [[package]] name = "selectors" @@ -684,7 +686,7 @@ version = "0.24.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c37578180969d00692904465fb7f6b3d50b9a2b952b87c23d0e2e5cb5013416" dependencies = [ - "bitflags", + "bitflags 1.3.2", "cssparser", "derive_more", "fxhash", diff --git a/js/check.js b/js/check.js new file mode 100755 index 00000000..521352ac --- /dev/null +++ b/js/check.js @@ -0,0 +1,231 @@ +#!/usr/bin/env node + +const { text } = require('node:stream/consumers') +const readline = require('node:readline') +const fs = require('node:fs') + +const { ArgumentParser, FileType } = require('argparse') + +const adblockRust = require('./index.js') +const adblockRustPackage = require('./../package.json') + +// These are defined by different content filter projects (AdBlock Plus, +// uBlockOrigin, AdGuard, etc.). +// For example, https://github.com/gorhill/uBlock/wiki/Static-filter-syntax +const filterListRequestTypes = [ + 'beacon', + 'csp_report', + 'document', + 'font', + 'image', + 'media', + 'object', + 'ping', + 'script', + 'stylesheet', + 'sub_frame', + 'websocket', + 'xhr', + 'other', + 'speculative', + 'web_manifest', + 'xbl', + 'xml_dtd', + 'xslt' +] + +// These values are defined by Blink, in `Resource::ResourceTypeToString`. +// See third_party/blink/renderer/platform/loader/fetch/resource.h. +// The OTHER catch all case covers the additional types +// defined in `blink::Resource::InitiatorTypeNameToString`. +// +// See https://source.chromium.org/chromium/chromium/src/+/main:third_party/blink/renderer/platform/loader/fetch/resource.cc +/* eslint-disable quote-props */ +const chromiumRequestTypeMapping = { + 'Attribution resource': 'other', + 'Audio': 'media', + 'CSS resource': 'stylesheet', + 'CSS stylesheet': 'stylesheet', + 'Dictionary': 'other', + 'Document': 'document', + 'Fetch': 'xhr', + 'Font': 'font', + 'Icon': 'other', + 'Image': 'image', + 'Internal resource': 'other', + 'Link element resource': 'other', + 'Link prefetch resource': 'speculative', + 'Manifest': 'web_manifest', + 'Mock': 'other', + 'Other resource': 'other', + 'Processing instruction': 'other', + 'Script': 'script', + 'SpeculationRule': 'speculative', + 'SVG document': 'media', + 'SVG Use element resource': 'media', + 'Text track': 'other', + 'Track': 'other', + 'User Agent CSS resource': 'stylesheet', + 'Video': 'media', + 'XML resource': 'document', + 'XMLHttpRequest': 'xhr', + 'XSL stylesheet': 'xslt' +} +/* eslint-enable quote-props */ +const chromiumRequestTypes = Object.keys(chromiumRequestTypeMapping) +const requestTypeOptions = filterListRequestTypes.concat(chromiumRequestTypes) +requestTypeOptions.sort() + +const parser = new ArgumentParser({ + add_help: true, + description: 'Check whether a URL would be blocked by given filter list rules' +}) +parser.add_argument('-v', '--version', { + action: 'version', + version: adblockRustPackage.version +}) + +parser.add_argument('--requests', { + type: FileType('r'), + default: process.stdin, + help: 'Path to a file of requests to check filter list rules against (or, ' + + 'by default, STDIN). This input should be lines of JSON documents, ' + + 'one document per line. This JSON text must have the following keys: ' + + '"url", "context", and "type", which corresponds to the --url, ' + + '--context, and --type arguments.' +}) + +parser.add_argument('--url', { + type: URL, + help: 'The full URL to check against the provided filter lists.' +}) +parser.add_argument('--context', { + type: URL, + help: 'The security context the request occurred in, as a full URL' +}) +parser.add_argument('--type', { + help: 'The type of the request, using either i. the types defined by ' + + 'filter list projects (which are all in lowercase, e.g., "xhr" or ' + + '"stylesheet"), or ii. the types defined in the Chromium source ' + + '(which start with an uppercase character, e.g., "XMLHttpRequest" or ' + + '"CSS stylesheet")', + choices: requestTypeOptions +}) + +parser.add_argument('--rules', { + type: FileType('r'), + nargs: '*', + help: 'One or more paths to files of filter list rules to check the ' + + 'request against. By default uses bundled old-and-outdated versions ' + + 'of easylist and easyprivacy' +}) +parser.add_argument('--verbose', { + default: false, + action: 'store_true', + help: 'Print information about what rule(s) the request matched.' +}) + +const checkRequest = (engine, request, requestType, requestContext) => { + const requestTypeUnified = chromiumRequestTypeMapping[requestType] || requestType + try { + return engine.check( + request.toString(), + requestContext.toString(), + requestTypeUnified, + true + ) + } catch (e) { + console.error(`Error checking request: url:${request}, ` + + `context:${requestContext}, type:${requestTypeUnified}`) + console.error('adblock-rust error: ' + e.toString()) + return null + } +} + +;(async () => { + const args = parser.parse_args() + + const filterSet = new adblockRust.FilterSet(true) + let ruleStreams + if (args.rules) { + ruleStreams = args.rules + } else { + const defaultLists = [ + './data/easylist.to/easylist/easylist.txt', + './data/easylist.to/easylist/easyprivacy.txt' + ] + ruleStreams = defaultLists.map((x) => fs.createReadStream(x, {})) + } + + for (const aRuleStream of ruleStreams) { + const rulesText = await text(aRuleStream) + filterSet.addFilters(rulesText.split('\n')) + } + + const engine = new adblockRust.Engine(filterSet, true) + const checkRequestFunc = checkRequest.bind(undefined, engine) + + // This code can either be invoked to consider one request, using command + // line flags, or read request descriptions from a handle. If + // any of the following arguments were provided, then we assume we're in + // "arguments" mode, otherwise we stream request descriptions from the + // --requests argument. + const requestDescArgs = ['url', 'context', 'type'] + const numRequestDescArgs = requestDescArgs.reduce((accumulator, curValue) => { + return (args[curValue] !== undefined) ? accumulator + 1 : accumulator + }, 0) + const isReadingRequestFromArgs = (numRequestDescArgs > 0) + + if (isReadingRequestFromArgs) { + if (numRequestDescArgs < requestDescArgs.length) { + throw new Error( + '--url, --context, and --type must be either all provided, or none of ' + + 'them provided.') + } + const result = checkRequestFunc(args.url, args.type, args.context) + if (result === null) { + process.exit(1) + } + const resultMatched = result.matched + console.log(args.verbose ? result : resultMatched) + process.exit(0) + } + + // Otherwise, we're in "streaming" mode, and we read requests off whatever + // was provided in --requests (which is either the path to a file, or + // stdin). + const readlineInterface = readline.createInterface({ + input: args.requests, + terminal: false + }) + let anyErrors = false + readlineInterface.on('line', (line) => { + let requestData + try { + requestData = JSON.parse(line) + } catch (e) { + const msg = 'Invalid JSON in requests input: ' + line + throw new Error(msg) + } + + if (requestData.url === undefined || + requestData.type === undefined || + requestData.context === undefined) { + throw new Error('Request description does not include all three ' + + 'required keys, "url", "type", "context".\n' + line) + } + + const result = checkRequestFunc( + requestData.url, requestData.type, requestData.context) + if (result === null) { + anyErrors = true + } else { + const resultMatched = result.matched + console.log(args.verbose ? JSON.stringify(result) : resultMatched) + } + }) + + readlineInterface.on('close', () => { + process.exit(anyErrors === true ? 1 : 0) + }) +})() diff --git a/package-lock.json b/package-lock.json index 7fab6f91..f624906a 100644 --- a/package-lock.json +++ b/package-lock.json @@ -10,9 +10,15 @@ "hasInstallScript": true, "license": "MPL-2.0", "dependencies": { + "argparse": "^2.0.1", "cargo-cp-artifact": "^0.1" } }, + "node_modules/argparse": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz", + "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==" + }, "node_modules/cargo-cp-artifact": { "version": "0.1.9", "resolved": "https://registry.npmjs.org/cargo-cp-artifact/-/cargo-cp-artifact-0.1.9.tgz", @@ -23,6 +29,11 @@ } }, "dependencies": { + "argparse": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz", + "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==" + }, "cargo-cp-artifact": { "version": "0.1.9", "resolved": "https://registry.npmjs.org/cargo-cp-artifact/-/cargo-cp-artifact-0.1.9.tgz", diff --git a/package.json b/package.json index 53d450fd..d89e51a5 100644 --- a/package.json +++ b/package.json @@ -28,12 +28,14 @@ "url": "git+https://github.com/brave/adblock-rust.git" }, "dependencies": { + "argparse": "^2.0.1", "cargo-cp-artifact": "^0.1" }, "scripts": { "build": "cd js && cargo-cp-artifact -nc index.node -- cargo build --message-format=json-render-diagnostics", "build-debug": "npm run build --", "build-release": "npm run build -- --release", + "check": "node ./js/check.js", "update-lists": "node data/update-lists.js", "postinstall": "npm run build-release", "test": "cargo test"