diff --git a/helpers/exceptions.js b/helpers/exceptions.js index eee261f..c867a98 100644 --- a/helpers/exceptions.js +++ b/helpers/exceptions.js @@ -21,3 +21,11 @@ exports.TooManyContextsError = class TooManyContextsError extends Error { this.name = "TooManyContextsError"; } } + +exports.NoHarWriterError = class NoHarWriterError extends Error { + constructor(message="There is no initialized Har Writer on the page to which the Har action was applied.", ...args) { + super(message, ...args); + this.message = message; + this.name = "NoHarWriterError"; + } +} \ No newline at end of file diff --git a/helpers/utils.js b/helpers/utils.js index 0cb484f..125231f 100644 --- a/helpers/utils.js +++ b/helpers/utils.js @@ -2,6 +2,7 @@ const exceptions = require("./exceptions"); const { proxyRequest } = require('puppeteer-proxy'); const timeoutContext = require('./timeout_context'); const limitContext = require('./limit_context'); +const PuppeteerHar = require('puppeteer-har'); const PROXY_URL_KEY = 'puppeteer-service-proxy-url' @@ -91,8 +92,13 @@ exports.getContents = async function getContents(page, waitFor) { }; }; -async function newPage(context) { +async function newPage(context, request) { const page = await context.newPage(); + if (request.body.harRecording){ + const harWriter = new PuppeteerHar(page) + harWriter.start() + page.harWriter = harWriter + } await page.setRequestInterception(true); @@ -143,18 +149,18 @@ exports.getBrowserPage = async function getBrowserPage(browser, request) { const { contextId, pageId } = request.query; if (contextId) { const context = await findContextInBrowser(browser, contextId); - return pageId ? findPageInContext(context, pageId) : newPage(context); + return pageId ? findPageInContext(context, pageId) : newPage(context, request); } const proxy = getProxy(request); if (!proxy) { const context = await newContext(browser); - return newPage(context); + return newPage(context, request); } const { origin: proxyServer, username, password } = new URL(proxy); const context = await newContext(browser, { proxyServer }); context[PROXY_URL_KEY] = proxy; - const page = await newPage(context); + const page = await newPage(context, request); if (username) { await page.authenticate({ username: decodeURIComponent(username), diff --git a/package.json b/package.json index cd6e626..fedba05 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "scrapy-puppeteer-service", - "version": "0.3.2", + "version": "0.3.3", "private": true, "scripts": { "start": "node ./bin/www" @@ -25,6 +25,7 @@ "puppeteer-extra-plugin-recaptcha": "^3.6.8", "puppeteer-extra-plugin-stealth": "^2.11.2", "puppeteer-proxy": "^2.1.2", + "puppeteer-har": "1.1.2", "winston": "^3.11.0", "winston-logstash": "^1.2.1" } diff --git a/routes/har.js b/routes/har.js index 6d3de34..679db37 100644 --- a/routes/har.js +++ b/routes/har.js @@ -1,18 +1,24 @@ const express = require('express'); const utils = require('../helpers/utils'); const router = express.Router(); +const PuppeteerHar = require('puppeteer-har'); +const exceptions = require("../helpers/exceptions"); +async function action(page, request) { -//TODO Method that returns har of page downloads using https://www.npmjs.com/package/puppeteer-har -router.post('/', async function (req, res, next) { - - if (!(typeof action === "function" && action.length >= 1)) { - res.status("501"); - res.send("Not implemented yet"); - next(); - return; + if (!(page.harWriter)){ + throw new exceptions.NoHarWriterError(); } + harData = await page.harWriter.stop(); + harJson = JSON.stringify(harData); + return { + har: harJson + }; +} + +router.post('/', async function (req, res, next) { + try { let response = await utils.performAction(req, action); res.header('scrapy-puppeteer-service-context-id', response.contextId); @@ -22,4 +28,4 @@ router.post('/', async function (req, res, next) { } }); -module.exports = router; +module.exports = router; \ No newline at end of file