diff --git a/Dockerfile b/Dockerfile index bfe9f0030..e0da9f691 100644 --- a/Dockerfile +++ b/Dockerfile @@ -50,6 +50,8 @@ RUN ln -s /app/dist/main.js /usr/bin/crawl; \ ln -s /app/dist/main.js /usr/bin/qa; \ ln -s /app/dist/create-login-profile.js /usr/bin/create-login-profile +RUN mkdir -p /app/behaviors + WORKDIR /crawls # enable to test custom behaviors build (from browsertrix-behaviors) diff --git a/docs/docs/user-guide/cli-options.md b/docs/docs/user-guide/cli-options.md index 7011ecc2f..5c0ef5021 100644 --- a/docs/docs/user-guide/cli-options.md +++ b/docs/docs/user-guide/cli-options.md @@ -251,9 +251,11 @@ Options: ailOnFailedSeed may result in crawl failing due to non-200 responses [boolean] [default: false] - --customBehaviors injects a custom behavior file or se - t of behavior files in a directory - [string] + --customBehaviors Custom behavior files to inject. Val + ues can be URLs, paths to individual + behavior files, or paths to a direct + ory of behavior files. + [array] [default: []] --debugAccessRedis if set, runs internal redis without protected mode to allow external acc ess (for debugging) [boolean] diff --git a/package.json b/package.json index 5d0c0f082..a42ae2a70 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "browsertrix-crawler", - "version": "1.3.4", + "version": "1.4.0-beta.0", "main": "browsertrix-crawler", "type": "module", "repository": "https://github.com/webrecorder/browsertrix-crawler", @@ -30,7 +30,7 @@ "p-queue": "^7.3.4", "pixelmatch": "^5.3.0", "pngjs": "^7.0.0", - "puppeteer-core": "^23.5.1", + "puppeteer-core": "^23.6.0", "sax": "^1.3.0", "sharp": "^0.32.6", "tsc": "^2.0.4", diff --git a/src/crawler.ts b/src/crawler.ts index 4b547cfb4..5e5e45305 100644 --- a/src/crawler.ts +++ b/src/crawler.ts @@ -39,7 +39,7 @@ import { runWorkers, } from "./util/worker.js"; import { sleep, timedRun, secondsElapsed } from "./util/timing.js"; -import { collectAllFileSources, getInfoString } from "./util/file_reader.js"; +import { collectCustomBehaviors, getInfoString } from "./util/file_reader.js"; import { Browser } from "./util/browser.js"; @@ -175,6 +175,7 @@ export class Crawler { finalExit = false; uploadAndDeleteLocal = false; done = false; + postCrawling = false; textInPages = false; @@ -510,7 +511,7 @@ export class Crawler { } if (this.params.customBehaviors) { - this.customBehaviors = this.loadCustomBehaviors( + this.customBehaviors = await this.loadCustomBehaviors( this.params.customBehaviors, ); } @@ -800,10 +801,10 @@ self.__bx_behaviors.selectMainBehavior(); }); } - loadCustomBehaviors(filename: string) { + async loadCustomBehaviors(sources: string[]) { let str = ""; - for (const { contents } of collectAllFileSources(filename, ".js")) { + for (const { contents } of await collectCustomBehaviors(sources)) { str += `self.__bx_behaviors.load(${contents});\n`; } @@ -811,13 +812,13 @@ self.__bx_behaviors.selectMainBehavior(); } async checkBehaviorScripts(cdp: CDPSession) { - const filename = this.params.customBehaviors; + const sources = this.params.customBehaviors; - if (!filename) { + if (!sources) { return; } - for (const { path, contents } of collectAllFileSources(filename, ".js")) { + for (const { path, contents } of await collectCustomBehaviors(sources)) { await this.browser.checkScript(cdp, path, contents); } } @@ -1536,12 +1537,13 @@ self.__bx_behaviors.selectMainBehavior(); } async postCrawl() { + this.postCrawling = true; + logger.info("Crawling done"); + if (this.params.combineWARC && !this.params.dryRun) { await this.combineWARC(); } - logger.info("Crawling done"); - if ( (this.params.generateCDX || this.params.generateWACZ) && !this.params.dryRun diff --git a/src/util/argParser.ts b/src/util/argParser.ts index 73abf4f47..390279eac 100644 --- a/src/util/argParser.ts +++ b/src/util/argParser.ts @@ -563,8 +563,10 @@ class ArgParser { customBehaviors: { describe: - "injects a custom behavior file or set of behavior files in a directory", - type: "string", + "Custom behavior files to inject. Values can be URLs, paths to individual behavior files, or paths" + + " to a directory of behavior files", + type: "array", + default: [], }, debugAccessRedis: { diff --git a/src/util/browser.ts b/src/util/browser.ts index df4a22932..7d81f0579 100644 --- a/src/util/browser.ts +++ b/src/util/browser.ts @@ -6,7 +6,7 @@ import { Readable } from "node:stream"; import os from "os"; import path from "path"; -import { LogContext, logger } from "./logger.js"; +import { formatErr, LogContext, logger } from "./logger.js"; import { initStorage } from "./storage.js"; import { DISPLAY, type ServiceWorkerOpt } from "./constants.js"; @@ -126,7 +126,7 @@ export class Browser { ? undefined : (target) => this.targetFilter(target), }; - await this._init(launchOpts, ondisconnect, recording); + await this._init(launchOpts, ondisconnect); } targetFilter(target: Target) { @@ -392,7 +392,6 @@ export class Browser { launchOpts: PuppeteerLaunchOptions, // eslint-disable-next-line @typescript-eslint/ban-types ondisconnect: Function | null = null, - recording: boolean, ) { this.browser = await puppeteer.launch(launchOpts); @@ -400,9 +399,7 @@ export class Browser { this.firstCDP = await target.createCDPSession(); - if (recording) { - await this.serviceWorkerFetch(); - } + await this.browserContextFetch(); if (ondisconnect) { this.browser.on("disconnected", (err) => ondisconnect(err)); @@ -479,35 +476,24 @@ export class Browser { return { page, cdp }; } - async serviceWorkerFetch() { + async browserContextFetch() { if (!this.firstCDP) { return; } this.firstCDP.on("Fetch.requestPaused", async (params) => { - const { frameId, requestId, networkId, request } = params; + const { frameId, requestId, request } = params; + + const { url } = request; if (!this.firstCDP) { throw new Error("CDP missing"); } - if (networkId) { - try { - await this.firstCDP.send("Fetch.continueResponse", { requestId }); - } catch (e) { - logger.warn( - "continueResponse failed", - { url: request.url }, - "recorder", - ); - } - return; - } - let foundRecorder = null; for (const recorder of this.recorders) { - if (recorder.swUrls.has(request.url)) { + if (recorder.swUrls.has(url)) { recorder.swFrameIds.add(frameId); } @@ -520,16 +506,16 @@ export class Browser { if (!foundRecorder) { logger.warn( "Skipping URL from unknown frame", - { url: request.url, frameId }, + { url, frameId }, "recorder", ); try { await this.firstCDP.send("Fetch.continueResponse", { requestId }); } catch (e) { - logger.warn( + logger.debug( "continueResponse failed", - { url: request.url }, + { url, ...formatErr(e), from: "serviceWorker" }, "recorder", ); } diff --git a/src/util/file_reader.ts b/src/util/file_reader.ts index 45baf9b68..2c7369d37 100644 --- a/src/util/file_reader.ts +++ b/src/util/file_reader.ts @@ -1,27 +1,83 @@ -import fs from "fs"; import fsp from "fs/promises"; import path from "path"; +import crypto from "crypto"; +import { fetch } from "undici"; + +import { logger } from "./logger.js"; const MAX_DEPTH = 2; -export function collectAllFileSources( +// Add .ts to allowed extensions when we can support it +const ALLOWED_EXTS = [".js"]; + +export type FileSource = { + path: string; + contents: string; +}; + +export type FileSources = FileSource[]; + +export async function collectCustomBehaviors( + sources: string[], +): Promise { + const collectedSources: FileSources = []; + + for (const fileSource of sources) { + if (fileSource.startsWith("http")) { + const newSources = await collectOnlineBehavior(fileSource); + collectedSources.push(...newSources); + } else { + const newSources = await collectLocalPathBehaviors(fileSource); + collectedSources.push(...newSources); + } + } + + return collectedSources; +} + +async function collectOnlineBehavior(url: string): Promise { + const filename = crypto.randomBytes(4).toString("hex") + ".js"; + const behaviorFilepath = `/app/behaviors/${filename}`; + + try { + const res = await fetch(url); + const fileContents = await res.text(); + await fsp.writeFile(behaviorFilepath, fileContents); + logger.info( + "Custom behavior file downloaded", + { url, path: behaviorFilepath }, + "behavior", + ); + return await collectLocalPathBehaviors(behaviorFilepath); + } catch (e) { + logger.error( + "Error downloading custom behavior from URL", + { url, error: e }, + "behavior", + ); + } + return []; +} + +async function collectLocalPathBehaviors( fileOrDir: string, - ext?: string, depth = 0, -): { path: string; contents: string }[] { +): Promise { const resolvedPath = path.resolve(fileOrDir); if (depth >= MAX_DEPTH) { - console.warn( - `WARN: MAX_DEPTH of ${MAX_DEPTH} reached traversing "${resolvedPath}"`, + logger.warn( + `Max depth of ${MAX_DEPTH} reached traversing "${resolvedPath}"`, + {}, + "behavior", ); return []; } - const stat = fs.statSync(resolvedPath); + const stat = await fsp.stat(resolvedPath); - if (stat.isFile() && (ext === null || path.extname(resolvedPath) === ext)) { - const contents = fs.readFileSync(resolvedPath); + if (stat.isFile() && ALLOWED_EXTS.includes(path.extname(resolvedPath))) { + const contents = await fsp.readFile(resolvedPath); return [ { path: resolvedPath, @@ -30,24 +86,28 @@ export function collectAllFileSources( ]; } - if (stat.isDirectory()) { - const files = fs.readdirSync(resolvedPath); - return files.reduce( - (acc: { path: string; contents: string }[], next: string) => { - const nextPath = path.join(fileOrDir, next); - return [...acc, ...collectAllFileSources(nextPath, ext, depth + 1)]; - }, - [], + const behaviors: FileSources = []; + + const isDir = stat.isDirectory(); + + if (!isDir && depth === 0) { + logger.warn( + "The provided path is not a .js file or directory", + { path: resolvedPath }, + "behavior", ); } - if (depth === 0) { - console.warn( - `WARN: The provided path "${resolvedPath}" is not a .js file or directory.`, - ); + if (isDir) { + const files = await fsp.readdir(resolvedPath); + for (const file of files) { + const filePath = path.join(resolvedPath, file); + const newBehaviors = await collectLocalPathBehaviors(filePath, depth + 1); + behaviors.push(...newBehaviors); + } } - return []; + return behaviors; } export async function getInfoString() { diff --git a/src/util/recorder.ts b/src/util/recorder.ts index 3eabdf7b5..be36cc3b6 100644 --- a/src/util/recorder.ts +++ b/src/util/recorder.ts @@ -122,6 +122,7 @@ export class Recorder { pendingRequests!: Map; skipIds!: Set; pageInfo!: PageInfoRecord; + skipRangeUrls!: Map; swTargetId?: string | null; swFrameIds = new Set(); @@ -130,7 +131,8 @@ export class Recorder { // TODO: Fix this the next time the file is edited. // eslint-disable-next-line @typescript-eslint/no-explicit-any logDetails: Record = {}; - skipping = false; + + pageFinished = false; gzip = true; @@ -169,6 +171,7 @@ export class Recorder { frameIdToExecId: Map; }) { this.frameIdToExecId = frameIdToExecId; + this.pageFinished = false; // Fetch cdp.on("Fetch.requestPaused", (params) => { @@ -407,6 +410,8 @@ export class Recorder { logNetwork("Network.loadingFailed", { requestId, url, + errorText, + type, ...this.logDetails, }); @@ -426,15 +431,14 @@ export class Recorder { case "net::ERR_ABORTED": // check if this is a false positive -- a valid download that's already been fetched // the abort is just for page, but download will succeed - if (type === "Document" && reqresp.isValidBinary()) { + if ( + (type === "Document" || type === "Media") && + reqresp.isValidBinary() + ) { this.removeReqResp(requestId); return this.serializeToWARC(reqresp); - } else if ( - url && - reqresp.requestHeaders && - reqresp.requestHeaders["x-browsertrix-fetch"] - ) { - delete reqresp.requestHeaders["x-browsertrix-fetch"]; + } else if (url && reqresp.requestHeaders && type === "Media") { + this.removeReqResp(requestId); logger.warn( "Attempt direct fetch of failed request", { url, ...this.logDetails }, @@ -453,7 +457,7 @@ export class Recorder { default: logger.warn( "Request failed", - { url, errorText, ...this.logDetails }, + { url, errorText, type, status: reqresp.status, ...this.logDetails }, "recorder", ); } @@ -495,7 +499,7 @@ export class Recorder { async handleRequestPaused( params: Protocol.Fetch.RequestPausedEvent, cdp: CDPSession, - isSWorker = false, + isBrowserContext = false, ) { const { requestId, @@ -520,10 +524,13 @@ export class Recorder { if ( responseStatusCode && !responseErrorReason && - !this.shouldSkip(headers, url, method, resourceType) && - !(isSWorker && networkId) + !this.shouldSkip(headers, url, method, resourceType) ) { - continued = await this.handleFetchResponse(params, cdp, isSWorker); + continued = await this.handleFetchResponse( + params, + cdp, + isBrowserContext, + ); } } catch (e) { logger.error( @@ -549,7 +556,7 @@ export class Recorder { async handleFetchResponse( params: Protocol.Fetch.RequestPausedEvent, cdp: CDPSession, - isSWorker: boolean, + isBrowserContext: boolean, ) { const { request } = params; const { url } = request; @@ -610,21 +617,44 @@ export class Recorder { return false; } else { - logger.debug( - "Skip 206 Response", - { range, contentLen, url, ...this.logDetails }, - "recorder", - ); + // logger.debug( + // "Skip 206 Response", + // { range, contentLen, url, ...this.logDetails }, + // "recorder", + // ); this.removeReqResp(networkId); + const count = this.skipRangeUrls.get(url) || 0; + if (count > 2) { + // just fail additional range requests to save bandwidth, as these are not being recorded + await cdp.send("Fetch.failRequest", { + requestId, + errorReason: "BlockedByResponse", + }); + return true; + } + this.skipRangeUrls.set(url, count + 1); return false; } } const reqresp = this.pendingReqResp(networkId); + if (!reqresp) { return false; } + // indicate that this is intercepted in the page context + if (!isBrowserContext) { + reqresp.inPageContext = true; + } + + // Already being handled by a different handler + if (reqresp.fetchContinued) { + return false; + } + + reqresp.fetchContinued = true; + if ( url === this.pageUrl && (!this.pageInfo.ts || @@ -643,12 +673,6 @@ export class Recorder { if (this.noResponseForStatus(responseStatusCode)) { reqresp.payload = new Uint8Array(); - - if (isSWorker) { - this.removeReqResp(networkId); - await this.serializeToWARC(reqresp); - } - return false; } @@ -656,13 +680,13 @@ export class Recorder { let streamingConsume = false; - // if contentLength is large or unknown, do streaming, unless its an essential resource - // in which case, need to do a full fetch either way - // don't count non-200 responses which may not have content-length if ( - (contentLen < 0 || contentLen > MAX_BROWSER_DEFAULT_FETCH_SIZE) && - responseStatusCode === 200 && - !this.isEssentialResource(reqresp.resourceType, mimeType) + this.shouldStream( + contentLen, + responseStatusCode || 0, + reqresp.resourceType || "", + mimeType, + ) ) { const opts: ResponseStreamAsyncFetchOptions = { reqresp, @@ -724,9 +748,9 @@ export class Recorder { const rewritten = await this.rewriteResponse(reqresp, mimeType); - // if in service worker, serialize here - // as won't be getting a loadingFinished message - if (isSWorker && reqresp.payload) { + // if in browser context, and not also intercepted in page context + // serialize here, as won't be getting a loadingFinished message for it + if (isBrowserContext && !reqresp.inPageContext && reqresp.payload) { this.removeReqResp(networkId); await this.serializeToWARC(reqresp); } @@ -794,7 +818,8 @@ export class Recorder { } this.pendingRequests = new Map(); this.skipIds = new Set(); - this.skipping = false; + this.skipRangeUrls = new Map(); + this.pageFinished = false; this.pageInfo = { pageid, urls: {}, @@ -861,8 +886,14 @@ export class Recorder { let numPending = this.pendingRequests.size; - while (numPending && !this.crawler.interrupted) { - const pending = []; + let pending = []; + while ( + numPending && + !this.pageFinished && + !this.crawler.interrupted && + !this.crawler.postCrawling + ) { + pending = []; for (const [requestId, reqresp] of this.pendingRequests.entries()) { const url = reqresp.url || ""; const entry: { @@ -892,11 +923,24 @@ export class Recorder { await sleep(5.0); numPending = this.pendingRequests.size; } + + if (this.pendingRequests.size) { + logger.warn( + "Dropping timed out requests", + { numPending, pending, ...this.logDetails }, + "recorder", + ); + for (const requestId of this.pendingRequests.keys()) { + this.removeReqResp(requestId); + } + } } async onClosePage() { // Any page-specific handling before page is closed. this.frameIdToExecId = null; + + this.pageFinished = true; } async onDone(timeout: number) { @@ -1019,7 +1063,7 @@ export class Recorder { } } - isEssentialResource(resourceType: string | undefined, contentType: string) { + isEssentialResource(resourceType: string, contentType: string) { if (resourceType === "script" || resourceType === "stylesheet") { return true; } @@ -1031,6 +1075,41 @@ export class Recorder { return false; } + shouldStream( + contentLength: number, + responseStatusCode: number, + resourceType: string, + mimeType: string, + ) { + // if contentLength is too large even for rewriting, always stream, will not do rewriting + // even if text + if (contentLength > MAX_TEXT_REWRITE_SIZE) { + return true; + } + + // if contentLength larger but is essential resource, do stream + // otherwise full fetch for rewriting + if ( + contentLength > MAX_BROWSER_DEFAULT_FETCH_SIZE && + !this.isEssentialResource(resourceType, mimeType) + ) { + return true; + } + + // if contentLength is unknown, also stream if its an essential resource and not 3xx / 4xx / 5xx + // status code, as these codes may have no content-length, and are likely small + if ( + contentLength < 0 && + !this.isEssentialResource(resourceType, mimeType) && + responseStatusCode >= 200 && + responseStatusCode < 300 + ) { + return true; + } + + return false; + } + protected getMimeType( headers?: Protocol.Fetch.HeaderEntry[] | { name: string; value: string }[], ) { @@ -1089,10 +1168,6 @@ export class Recorder { logNetwork("Skipping ignored id", { requestId }); return null; } - if (this.skipping) { - //logger.debug("Skipping request, page already finished", this.logDetails, "recorder"); - return null; - } const reqresp = new RequestResponseInfo(requestId); this.pendingRequests.set(requestId, reqresp); return reqresp; @@ -1395,7 +1470,7 @@ class AsyncFetcher { reqresp.payload = Buffer.concat(buffers, currSize); externalBuffer.buffers = [reqresp.payload]; } else if (fh) { - logger.warn( + logger.debug( "Large payload written to WARC, but not returned to browser (would require rereading into memory)", { url, actualSize: reqresp.readSize, maxSize: this.maxFetchSize }, "recorder", diff --git a/src/util/reqresp.ts b/src/util/reqresp.ts index 1b3b2d58f..5548cb5b9 100644 --- a/src/util/reqresp.ts +++ b/src/util/reqresp.ts @@ -49,6 +49,12 @@ export class RequestResponseInfo { payload?: Uint8Array; isRemoveRange = false; + // fetchContinued - avoid duplicate fetch response handling + fetchContinued = false; + + // is handled in page context + inPageContext = false; + // misc fromServiceWorker = false; fromCache = false; diff --git a/tests/add-exclusion.test.js b/tests/add-exclusion.test.js index 71a1d240f..861aa9072 100644 --- a/tests/add-exclusion.test.js +++ b/tests/add-exclusion.test.js @@ -16,7 +16,7 @@ test("dynamically add exclusion while crawl is running", async () => { try { exec( - "docker run -p 36382:6379 -e CRAWL_ID=test -v $PWD/test-crawls:/crawls -v $PWD/tests/fixtures:/tests/fixtures webrecorder/browsertrix-crawler crawl --collection add-exclusion --url https://webrecorder.net/ --scopeType prefix --limit 20 --logging debug --debugAccessRedis", + "docker run -p 36382:6379 -e CRAWL_ID=test -v $PWD/test-crawls:/crawls -v $PWD/tests/fixtures:/tests/fixtures webrecorder/browsertrix-crawler crawl --collection add-exclusion --url https://old.webrecorder.net/ --scopeType prefix --limit 20 --logging debug --debugAccessRedis", { shell: "/bin/bash" }, callback, ); diff --git a/tests/brave-query-redir.test.js b/tests/brave-query-redir.test.js index d07c2ab51..ff5d9d721 100644 --- a/tests/brave-query-redir.test.js +++ b/tests/brave-query-redir.test.js @@ -4,7 +4,7 @@ import { execSync } from "child_process"; test("check that gclid query URL is automatically redirected to remove it", async () => { try { execSync( - "docker run --rm -v $PWD/test-crawls:/crawls -i webrecorder/browsertrix-crawler crawl --url 'https://webrecorder.net/about?gclid=abc' --collection test-brave-redir --behaviors \"\" --limit 1 --generateCDX"); + "docker run --rm -v $PWD/test-crawls:/crawls -i webrecorder/browsertrix-crawler crawl --url 'https://old.webrecorder.net/about?gclid=abc' --collection test-brave-redir --behaviors \"\" --limit 1 --generateCDX"); } catch (error) { console.log(error.stderr); @@ -23,9 +23,9 @@ test("check that gclid query URL is automatically redirected to remove it", asyn for (const line of lines) { const json = line.split(" ").slice(2).join(" "); const data = JSON.parse(json); - if (data.url === "https://webrecorder.net/about?gclid=abc" && data.status === "307") { + if (data.url === "https://old.webrecorder.net/about?gclid=abc" && data.status === "307") { redirectFound = true; - } else if (data.url === "https://webrecorder.net/about" && data.status === "200") { + } else if (data.url === "https://old.webrecorder.net/about" && data.status === "200") { responseFound = true; } if (responseFound && redirectFound) { diff --git a/tests/custom-behavior.test.js b/tests/custom-behavior.test.js index 29d28484d..2bf061545 100644 --- a/tests/custom-behavior.test.js +++ b/tests/custom-behavior.test.js @@ -1,16 +1,16 @@ import child_process from "child_process"; -test("test custom behaviors", async () => { +test("test custom behaviors from local filepath", async () => { const res = child_process.execSync( - "docker run -v $PWD/test-crawls:/crawls -v $PWD/tests/custom-behaviors/:/custom-behaviors/ webrecorder/browsertrix-crawler crawl --url https://example.com/ --url https://example.org/ --url https://webrecorder.net/ --customBehaviors /custom-behaviors/ --scopeType page", + "docker run -v $PWD/test-crawls:/crawls -v $PWD/tests/custom-behaviors/:/custom-behaviors/ webrecorder/browsertrix-crawler crawl --url https://specs.webrecorder.net/ --url https://example.org/ --url https://old.webrecorder.net/ --customBehaviors /custom-behaviors/ --scopeType page", ); const log = res.toString(); - // custom behavior ran for example.com + // custom behavior ran for specs.webrecorder.net expect( log.indexOf( - '{"state":{},"msg":"test-stat","page":"https://example.com/","workerid":0}}', + '{"state":{},"msg":"test-stat","page":"https://specs.webrecorder.net/","workerid":0}}', ) > 0, ).toBe(true); @@ -27,10 +27,46 @@ test("test custom behaviors", async () => { ) > 0, ).toBe(true); - // another custom behavior ran for webrecorder.net + // another custom behavior ran for old.webrecorder.net expect( log.indexOf( - '{"state":{},"msg":"test-stat-2","page":"https://webrecorder.net/","workerid":0}}', + '{"state":{},"msg":"test-stat-2","page":"https://old.webrecorder.net/","workerid":0}}', + ) > 0, + ).toBe(true); +}); + +test("test custom behavior from URL", async () => { + const res = child_process.execSync("docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://old.webrecorder.net/ --customBehaviors https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/refs/heads/main/tests/custom-behaviors/custom-2.js --scopeType page"); + + const log = res.toString(); + + expect(log.indexOf("Custom behavior file downloaded") > 0).toBe(true); + + expect( + log.indexOf( + '{"state":{},"msg":"test-stat-2","page":"https://old.webrecorder.net/","workerid":0}}', + ) > 0, + ).toBe(true); +}); + +test("test mixed custom behavior sources", async () => { + const res = child_process.execSync("docker run -v $PWD/test-crawls:/crawls -v $PWD/tests/custom-behaviors/:/custom-behaviors/ webrecorder/browsertrix-crawler crawl --url https://specs.webrecorder.net/ --url https://old.webrecorder.net/ --customBehaviors https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/refs/heads/main/tests/custom-behaviors/custom-2.js --customBehaviors /custom-behaviors/custom.js --scopeType page"); + + const log = res.toString(); + + // test custom behavior from url ran + expect(log.indexOf("Custom behavior file downloaded") > 0).toBe(true); + + expect( + log.indexOf( + '{"state":{},"msg":"test-stat","page":"https://specs.webrecorder.net/","workerid":0}}', + ) > 0, + ).toBe(true); + + // test custom behavior from local file ran + expect( + log.indexOf( + '{"state":{},"msg":"test-stat-2","page":"https://old.webrecorder.net/","workerid":0}}', ) > 0, ).toBe(true); }); @@ -40,7 +76,7 @@ test("test invalid behavior exit", async () => { try { child_process.execSync( - "docker run -v $PWD/test-crawls:/crawls -v $PWD/tests/invalid-behaviors/:/custom-behaviors/ webrecorder/browsertrix-crawler crawl --url https://example.com/ --url https://example.org/ --url https://webrecorder.net/ --customBehaviors /custom-behaviors/invalid-export.js --scopeType page", + "docker run -v $PWD/test-crawls:/crawls -v $PWD/tests/invalid-behaviors/:/custom-behaviors/ webrecorder/browsertrix-crawler crawl --url https://example.com/ --url https://example.org/ --url https://old.webrecorder.net/ --customBehaviors /custom-behaviors/invalid-export.js --scopeType page", ); } catch (e) { status = e.status; diff --git a/tests/custom-behaviors/custom-2.js b/tests/custom-behaviors/custom-2.js index 70e488432..def3df694 100644 --- a/tests/custom-behaviors/custom-2.js +++ b/tests/custom-behaviors/custom-2.js @@ -11,7 +11,7 @@ class TestBehavior2 { } static isMatch() { - return window.location.origin === "https://webrecorder.net"; + return window.location.origin === "https://old.webrecorder.net"; } async *run(ctx) { diff --git a/tests/custom-behaviors/custom.js b/tests/custom-behaviors/custom.js index 19ce7850e..c1532ed03 100644 --- a/tests/custom-behaviors/custom.js +++ b/tests/custom-behaviors/custom.js @@ -11,7 +11,7 @@ class TestBehavior { } static isMatch() { - return window.location.origin === "https://example.com"; + return window.location.origin === "https://specs.webrecorder.net"; } async *run(ctx) { diff --git a/tests/dryrun.test.js b/tests/dryrun.test.js index 4234c23ec..b23796f4a 100644 --- a/tests/dryrun.test.js +++ b/tests/dryrun.test.js @@ -3,7 +3,7 @@ import fs from "fs"; test("ensure dryRun crawl only writes pages and logs", async () => { child_process.execSync( - 'docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://webrecorder.net/ --generateWACZ --text --collection dry-run-wr-net --combineWARC --rolloverSize 10000 --limit 2 --title "test title" --description "test description" --warcPrefix custom-prefix --dryRun --exclude community', + 'docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://old.webrecorder.net/ --generateWACZ --text --collection dry-run-wr-net --combineWARC --rolloverSize 10000 --limit 2 --title "test title" --description "test description" --warcPrefix custom-prefix --dryRun --exclude community', ); const files = fs.readdirSync("test-crawls/collections/dry-run-wr-net").sort(); diff --git a/tests/extra_hops_depth.test.js b/tests/extra_hops_depth.test.js index 50e33f0c1..b9944d553 100644 --- a/tests/extra_hops_depth.test.js +++ b/tests/extra_hops_depth.test.js @@ -12,7 +12,7 @@ test( async () => { try { await exec( - "docker run -v $PWD/test-crawls:/crawls -v $PWD/tests/fixtures:/tests/fixtures webrecorder/browsertrix-crawler crawl --collection extra-hops-beyond --extraHops 2 --url https://webrecorder.net/ --limit 5 --timeout 10 --exclude community --exclude tools", + "docker run -v $PWD/test-crawls:/crawls -v $PWD/tests/fixtures:/tests/fixtures webrecorder/browsertrix-crawler crawl --collection extra-hops-beyond --extraHops 2 --url https://old.webrecorder.net/ --limit 5 --timeout 10 --exclude community --exclude tools", ); } catch (error) { console.log(error); @@ -31,14 +31,14 @@ test( const crawledExtraPagesArray = crawledExtraPages.trim().split("\n"); const expectedPages = [ - "https://webrecorder.net/", + "https://old.webrecorder.net/", ]; const expectedExtraPages = [ - "https://webrecorder.net/blog", - "https://webrecorder.net/about", - "https://webrecorder.net/contact", - "https://webrecorder.net/faq", + "https://old.webrecorder.net/blog", + "https://old.webrecorder.net/about", + "https://old.webrecorder.net/contact", + "https://old.webrecorder.net/faq", ]; // first line is the header, not page, so adding -1 @@ -74,7 +74,7 @@ test( test("extra hops applies beyond depth limit", () => { try { execSync( - "docker run -v $PWD/test-crawls:/crawls -v $PWD/tests/fixtures:/tests/fixtures webrecorder/browsertrix-crawler crawl --collection extra-hops-depth-0 --extraHops 1 --url https://webrecorder.net/ --limit 2 --depth 0 --timeout 10 --exclude community --exclude tools", + "docker run -v $PWD/test-crawls:/crawls -v $PWD/tests/fixtures:/tests/fixtures webrecorder/browsertrix-crawler crawl --collection extra-hops-depth-0 --extraHops 1 --url https://old.webrecorder.net/ --limit 2 --depth 0 --timeout 10 --exclude community --exclude tools", ); } catch (error) { console.log(error); diff --git a/tests/file_stats.test.js b/tests/file_stats.test.js index 83b92c7f0..61042e380 100644 --- a/tests/file_stats.test.js +++ b/tests/file_stats.test.js @@ -3,7 +3,7 @@ import fs from "fs"; test("ensure that stats file is modified", async () => { const child = child_process.exec( - "docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://webrecorder.net/ --generateWACZ --text --limit 3 --exclude community --collection file-stats --statsFilename progress.json", + "docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://old.webrecorder.net/ --generateWACZ --text --limit 3 --exclude community --collection file-stats --statsFilename progress.json", ); // detect crawler exit diff --git a/tests/limit_reached.test.js b/tests/limit_reached.test.js index 856886db2..ec84ba03b 100644 --- a/tests/limit_reached.test.js +++ b/tests/limit_reached.test.js @@ -6,7 +6,7 @@ const exec = util.promisify(execCallback); test("ensure page limit reached", async () => { execSync( - 'docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --scopeType prefix --behaviors "" --url https://webrecorder.net/ --limit 12 --workers 2 --collection limit-test --statsFilename stats.json --exclude community', + 'docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --scopeType prefix --behaviors "" --url https://old.webrecorder.net/ --limit 12 --workers 2 --collection limit-test --statsFilename stats.json --exclude community', ); }); @@ -22,7 +22,7 @@ test("ensure crawl fails if failOnFailedLimit is reached", async () => { let passed = true; try { await exec( - "docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://webrecorder.net/will404 --url https://specs.webrecorder.net --failOnInvalidStatus --failOnFailedLimit 1 --limit 10 --collection faillimitreached", + "docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://old.webrecorder.net/will404 --url https://specs.webrecorder.net --failOnInvalidStatus --failOnFailedLimit 1 --limit 10 --collection faillimitreached", ); } catch (error) { expect(error.code).toEqual(17); diff --git a/tests/mult_url_crawl_with_favicon.test.js b/tests/mult_url_crawl_with_favicon.test.js index ca0f40fb7..ac60a6c71 100644 --- a/tests/mult_url_crawl_with_favicon.test.js +++ b/tests/mult_url_crawl_with_favicon.test.js @@ -6,7 +6,7 @@ const testIf = (condition, ...args) => condition ? test(...args) : test.skip(... test("ensure multi url crawl run with docker run passes", async () => { child_process.execSync( - 'docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://www.iana.org/ --url https://webrecorder.net/ --generateWACZ --text --collection advanced --combineWARC --rolloverSize 10000 --workers 2 --title "test title" --description "test description" --pages 2 --limit 2 --exclude community', + 'docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://www.iana.org/ --url https://old.webrecorder.net/ --generateWACZ --text --collection advanced --combineWARC --rolloverSize 10000 --workers 2 --title "test title" --description "test description" --pages 2 --limit 2 --exclude community', ); }); @@ -39,9 +39,9 @@ test("check that the favicon made it into the pages jsonl file", () => { ); const data = [data1, data2]; for (const d of data) { - if (d.url === "https://webrecorder.net/") { + if (d.url === "https://old.webrecorder.net/") { expect(d.favIconUrl).toEqual( - "https://webrecorder.net/assets/favicon.ico", + "https://old.webrecorder.net/assets/favicon.ico", ); } if (d.url === "https://iana.org/") { diff --git a/tests/multi-instance-crawl.test.js b/tests/multi-instance-crawl.test.js index 33c9383d2..9728554c9 100644 --- a/tests/multi-instance-crawl.test.js +++ b/tests/multi-instance-crawl.test.js @@ -33,7 +33,7 @@ afterAll(async () => { }); function runCrawl(name) { - const crawler = exec(`docker run --rm -v $PWD/test-crawls:/crawls --network=crawl --hostname=${name} webrecorder/browsertrix-crawler crawl --url https://www.webrecorder.net/ --limit 4 --exclude community --collection shared-${name} --crawlId testcrawl --redisStoreUrl redis://redis:6379`); + const crawler = exec(`docker run --rm -v $PWD/test-crawls:/crawls --network=crawl --hostname=${name} webrecorder/browsertrix-crawler crawl --url https://old.webrecorder.net/ --limit 4 --exclude community --collection shared-${name} --crawlId testcrawl --redisStoreUrl redis://redis:6379`); return new Promise((resolve) => { crawler.on("exit", (code) => { diff --git a/tests/pageinfo-records.test.js b/tests/pageinfo-records.test.js index 0221e697f..01dc77a48 100644 --- a/tests/pageinfo-records.test.js +++ b/tests/pageinfo-records.test.js @@ -5,7 +5,7 @@ import { WARCParser } from "warcio"; test("run warc and ensure pageinfo records contain the correct resources", async () => { child_process.execSync( - "docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://webrecorder.net/ --url https://webrecorder.net/about --url https://invalid.invalid/ --scopeType page --collection page-info-test --combineWARC", + "docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://old.webrecorder.net/ --url https://old.webrecorder.net/about --url https://invalid.invalid/ --scopeType page --collection page-info-test --combineWARC", ); const filename = path.join( @@ -26,7 +26,7 @@ test("run warc and ensure pageinfo records contain the correct resources", async for await (const record of parser) { if ( !foundIndex && - record.warcTargetURI === "urn:pageinfo:https://webrecorder.net/" + record.warcTargetURI === "urn:pageinfo:https://old.webrecorder.net/" ) { foundIndex = true; const text = await record.contentText(); @@ -35,7 +35,7 @@ test("run warc and ensure pageinfo records contain the correct resources", async if ( !foundAbout && - record.warcTargetURI === "urn:pageinfo:https://webrecorder.net/about" + record.warcTargetURI === "urn:pageinfo:https://old.webrecorder.net/about" ) { foundAbout = true; const text = await record.contentText(); @@ -64,47 +64,47 @@ function validateResourcesIndex(json) { expect(json).toHaveProperty("urls"); expect(json.counts).toEqual({ jsErrors: 0 }); expect(json.urls).toEqual({ - "https://webrecorder.net/": { + "https://old.webrecorder.net/": { status: 200, mime: "text/html", type: "document", }, - "https://webrecorder.net/assets/tools/logo-pywb.png": { + "https://old.webrecorder.net/assets/tools/logo-pywb.png": { mime: "image/png", status: 200, type: "image", }, - "https://webrecorder.net/assets/brand/archivewebpage-icon-color.svg": { + "https://old.webrecorder.net/assets/brand/archivewebpage-icon-color.svg": { mime: "image/svg+xml", status: 200, type: "image", }, - "https://webrecorder.net/assets/brand/browsertrix-icon-color.svg": { + "https://old.webrecorder.net/assets/brand/browsertrix-icon-color.svg": { mime: "image/svg+xml", status: 200, type: "image", }, - "https://webrecorder.net/assets/brand/browsertrixcrawler-icon-color.svg": { + "https://old.webrecorder.net/assets/brand/browsertrixcrawler-icon-color.svg": { mime: "image/svg+xml", status: 200, type: "image", }, - "https://webrecorder.net/assets/brand/replaywebpage-icon-color.svg": { + "https://old.webrecorder.net/assets/brand/replaywebpage-icon-color.svg": { mime: "image/svg+xml", status: 200, type: "image", }, - "https://webrecorder.net/assets/fontawesome/all.css": { + "https://old.webrecorder.net/assets/fontawesome/all.css": { status: 200, mime: "text/css", type: "stylesheet", }, - "https://webrecorder.net/assets/wr-logo.svg": { + "https://old.webrecorder.net/assets/wr-logo.svg": { status: 200, mime: "image/svg+xml", type: "image", }, - "https://webrecorder.net/assets/main.css": { + "https://old.webrecorder.net/assets/main.css": { status: 200, mime: "text/css", type: "stylesheet", @@ -113,22 +113,15 @@ function validateResourcesIndex(json) { { status: 200, mime: "text/css", type: "stylesheet" }, "https://fonts.googleapis.com/css?family=Source+Code+Pro|Source+Sans+Pro&display=swap": { status: 200, mime: "text/css", type: "stylesheet" }, - "https://stats.browsertrix.com/js/script.tagged-events.js": { - status: 200, - mime: "application/javascript", - type: "script", - }, "https://fonts.gstatic.com/s/sourcesanspro/v22/6xK3dSBYKcSV-LCoeQqfX1RYOo3qOK7l.woff2": { status: 200, mime: "font/woff2", type: "font" }, "https://fonts.gstatic.com/s/sourcesanspro/v22/6xKydSBYKcSV-LCoeQqfX1RYOo3ig4vwlxdu.woff2": { status: 200, mime: "font/woff2", type: "font" }, - "https://webrecorder.net/assets/favicon.ico": { + "https://old.webrecorder.net/assets/favicon.ico": { status: 200, mime: "image/vnd.microsoft.icon", type: "other", }, - "https://stats.browsertrix.com/api/event?__wb_method=POST&n=pageview&u=https%3A%2F%2Fwebrecorder.net%2F&d=webrecorder.net&r=null": - { status: 202, mime: "text/plain", type: "xhr" }, }); } @@ -139,17 +132,17 @@ function validateResourcesAbout(json) { expect(json).toHaveProperty("urls"); expect(json.counts).toEqual({ jsErrors: 0 }); expect(json.urls).toEqual({ - "https://webrecorder.net/about": { + "https://old.webrecorder.net/about": { status: 200, mime: "text/html", type: "document", }, - "https://webrecorder.net/assets/main.css": { + "https://old.webrecorder.net/assets/main.css": { status: 200, mime: "text/css", type: "stylesheet", }, - "https://webrecorder.net/assets/fontawesome/all.css": { + "https://old.webrecorder.net/assets/fontawesome/all.css": { status: 200, mime: "text/css", type: "stylesheet", @@ -158,12 +151,7 @@ function validateResourcesAbout(json) { { status: 200, mime: "text/css", type: "stylesheet" }, "https://fonts.googleapis.com/css2?family=Source+Sans+Pro:wght@700;900&display=swap": { status: 200, mime: "text/css", type: "stylesheet" }, - "https://stats.browsertrix.com/js/script.tagged-events.js": { - status: 200, - mime: "application/javascript", - type: "script", - }, - "https://webrecorder.net/assets/wr-logo.svg": { + "https://old.webrecorder.net/assets/wr-logo.svg": { status: 200, mime: "image/svg+xml", type: "image", @@ -172,12 +160,6 @@ function validateResourcesAbout(json) { { status: 200, mime: "font/woff2", type: "font" }, "https://fonts.gstatic.com/s/sourcesanspro/v22/6xKydSBYKcSV-LCoeQqfX1RYOo3ig4vwlxdu.woff2": { status: 200, mime: "font/woff2", type: "font" }, - "https://stats.browsertrix.com/api/event?__wb_method=POST&n=pageview&u=https%3A%2F%2Fwebrecorder.net%2Fabout&d=webrecorder.net&r=null": - { - status: 0, - type: "xhr", - error: "net::ERR_BLOCKED_BY_CLIENT", - }, }); } diff --git a/tests/proxy.test.js b/tests/proxy.test.js index ea6035169..526ed32dc 100644 --- a/tests/proxy.test.js +++ b/tests/proxy.test.js @@ -10,7 +10,7 @@ const WRONG_PORT = "33130"; const SSH_PROXY_IMAGE = "linuxserver/openssh-server" const PDF = "https://specs.webrecorder.net/wacz/1.1.1/wacz-2021.pdf"; -const HTML = "https://webrecorder.net/"; +const HTML = "https://old.webrecorder.net/"; const extraArgs = "--limit 1 --failOnFailedSeed --timeout 10 --logging debug"; diff --git a/tests/qa_compare.test.js b/tests/qa_compare.test.js index 039f5f9aa..8daed468d 100644 --- a/tests/qa_compare.test.js +++ b/tests/qa_compare.test.js @@ -8,7 +8,7 @@ test("run initial crawl with text and screenshots to prepare for QA", async () = fs.rmSync("./test-crawls/qa-wr-net", { recursive: true, force: true }); child_process.execSync( - "docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://webrecorder.net/ --url https://webrecorder.net/about --url https://browsertrix.com/ --url https://webrecorder.net/contact --scopeType page --collection qa-wr-net --text to-warc --screenshot view --generateWACZ", + "docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://old.webrecorder.net/ --url https://old.webrecorder.net/about --url https://browsertrix.com/ --url https://old.webrecorder.net/contact --scopeType page --collection qa-wr-net --text to-warc --screenshot view --generateWACZ", ); expect( diff --git a/tests/rollover-writer.test.js b/tests/rollover-writer.test.js index 771304ef1..0c90fb2ad 100644 --- a/tests/rollover-writer.test.js +++ b/tests/rollover-writer.test.js @@ -3,7 +3,7 @@ import fs from "fs"; test("set rollover to 500K and ensure individual WARCs rollover, including screenshots", async () => { child_process.execSync( - "docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://webrecorder.net/ --limit 5 --exclude community --collection rollover-500K --rolloverSize 500000 --screenshot view" + "docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://old.webrecorder.net/ --limit 5 --exclude community --collection rollover-500K --rolloverSize 500000 --screenshot view" ); const warcLists = fs.readdirSync("test-crawls/collections/rollover-500K/archive"); diff --git a/tests/saved-state.test.js b/tests/saved-state.test.js index d38d7dc06..72c28d8b3 100644 --- a/tests/saved-state.test.js +++ b/tests/saved-state.test.js @@ -53,7 +53,7 @@ test("check crawl interrupted + saved state written", async () => { try { containerId = execSync( - "docker run -d -v $PWD/test-crawls:/crawls -v $PWD/tests/fixtures:/tests/fixtures webrecorder/browsertrix-crawler crawl --collection int-state-test --url https://www.webrecorder.net/ --limit 10 --behaviors \"\" --exclude community", + "docker run -d -v $PWD/test-crawls:/crawls -v $PWD/tests/fixtures:/tests/fixtures webrecorder/browsertrix-crawler crawl --collection int-state-test --url http://old.webrecorder.net/ --limit 10 --behaviors \"\" --exclude community", { encoding: "utf-8" }, //wait.callback, ); @@ -118,7 +118,7 @@ test("check parsing saved state + page done + queue present", () => { // ensure extra seeds also set expect(state.extraSeeds).toEqual([ - `{"origSeedId":0,"newUrl":"https://webrecorder.net/"}`, + `{"origSeedId":0,"newUrl":"https://old.webrecorder.net/"}`, ]); }); @@ -129,7 +129,7 @@ test("check crawl restarted with saved state", async () => { try { containerId = execSync( - `docker run -d -p ${port}:6379 -e CRAWL_ID=test -v $PWD/test-crawls:/crawls -v $PWD/tests/fixtures:/tests/fixtures webrecorder/browsertrix-crawler crawl --collection int-state-test --url https://webrecorder.net/ --config /crawls/collections/int-state-test/crawls/${savedStateFile} --debugAccessRedis --limit 10 --behaviors "" --exclude community`, + `docker run -d -p ${port}:6379 -e CRAWL_ID=test -v $PWD/test-crawls:/crawls -v $PWD/tests/fixtures:/tests/fixtures webrecorder/browsertrix-crawler crawl --collection int-state-test --url https://old.webrecorder.net/ --config /crawls/collections/int-state-test/crawls/${savedStateFile} --debugAccessRedis --limit 10 --behaviors "" --exclude community`, { encoding: "utf-8" }, ); } catch (error) { diff --git a/tests/seeds.test.js b/tests/seeds.test.js index 0ebd08e37..f53c48c6c 100644 --- a/tests/seeds.test.js +++ b/tests/seeds.test.js @@ -46,7 +46,7 @@ test("ensure seed with 4xx/5xx response fails crawl if failOnFailedSeed and fail let passed = true; try { await exec( - "docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://www.iana.org/ --url https://webrecorder.net/doesntexist --generateWACZ --limit 2 --failOnFailedSeed --failOnInvalidStatus --collection failseed404status", + "docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://www.iana.org/ --url https://old.webrecorder.net/doesntexist --generateWACZ --limit 2 --failOnFailedSeed --failOnInvalidStatus --collection failseed404status", ); } catch (error) { expect(error.code).toEqual(1); @@ -59,7 +59,7 @@ test("ensure seed with 4xx/5xx response succeeds if failOnInvalidStatus is not s let passed = true; try { await exec( - "docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://www.iana.org/ --url https://webrecorder.net/doesntexist --generateWACZ --limit 2 --failOnFailedSeed --collection failseedwithoutinvalidstatus", + "docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://www.iana.org/ --url https://old.webrecorder.net/doesntexist --generateWACZ --limit 2 --failOnFailedSeed --collection failseedwithoutinvalidstatus", ); } catch (error) { console.log(error); diff --git a/yarn.lock b/yarn.lock index a2fb5d753..8b0b963e7 100644 --- a/yarn.lock +++ b/yarn.lock @@ -2075,10 +2075,10 @@ detect-newline@^3.0.0: resolved "https://registry.yarnpkg.com/detect-newline/-/detect-newline-3.1.0.tgz#576f5dfc63ae1a192ff192d8ad3af6308991b651" integrity sha512-TLz+x/vEXm/Y7P7wn1EJFNLxYpUD4TgMosxY6fAVJUnJMbupHBOncxyWUG9OpTaH9EBD7uFI5LfEgmMOc54DsA== -devtools-protocol@0.0.1342118: - version "0.0.1342118" - resolved "https://registry.yarnpkg.com/devtools-protocol/-/devtools-protocol-0.0.1342118.tgz#ea136fc1701572c0830233dcb414dc857e582e0a" - integrity sha512-75fMas7PkYNDTmDyb6PRJCH7ILmHLp+BhrZGeMsa4bCh40DTxgCz2NRy5UDzII4C5KuD0oBMZ9vXKhEl6UD/3w== +devtools-protocol@0.0.1354347: + version "0.0.1354347" + resolved "https://registry.yarnpkg.com/devtools-protocol/-/devtools-protocol-0.0.1354347.tgz#5cb509610b8f61fc69a31e5c810d5bed002d85ea" + integrity sha512-BlmkSqV0V84E2WnEnoPnwyix57rQxAM5SKJjf4TbYOCGLAWtz8CDH8RIaGOjPgPCXo2Mce3kxSY497OySidY3Q== diff-sequences@^29.6.3: version "29.6.3" @@ -4375,15 +4375,15 @@ punycode@^2.1.0: resolved "https://registry.yarnpkg.com/punycode/-/punycode-2.1.1.tgz#b58b010ac40c22c5657616c8d2c2c02c7bf479ec" integrity sha512-XRsRjdf+j5ml+y/6GKHPZbrF/8p2Yga0JPtdqTIY2Xe5ohJPD9saDJJLPvp9+NSBprVvevdXZybnj2cv8OEd0A== -puppeteer-core@^23.5.1: - version "23.5.1" - resolved "https://registry.yarnpkg.com/puppeteer-core/-/puppeteer-core-23.5.1.tgz#fac4268820c35d3172e783a1f1a39773b2c0f7c6" - integrity sha512-We6xKCSZaZ23+GAYckeNfeDeJIVuhxOBsh/gZkbULu/XLFJ3umSiiQ8Ey927h3g/XrCCr8CnSZ5fvP5v2vB5Yw== +puppeteer-core@^23.6.0: + version "23.6.0" + resolved "https://registry.yarnpkg.com/puppeteer-core/-/puppeteer-core-23.6.0.tgz#a3e1e09c05f47fb8ca2bc9d4ca200d18e3704303" + integrity sha512-se1bhgUpR9C529SgHGr/eyT92mYyQPAhA2S9pGtGrVG2xob9qE6Pbp7TlqiSPlnnY1lINqhn6/67EwkdzOmKqQ== dependencies: "@puppeteer/browsers" "2.4.0" chromium-bidi "0.8.0" debug "^4.3.7" - devtools-protocol "0.0.1342118" + devtools-protocol "0.0.1354347" typed-query-selector "^2.12.0" ws "^8.18.0"