Skip to content

Commit

Permalink
Add har files (#49)
Browse files Browse the repository at this point in the history
* Har add to scrapy-puppeteer-service

* fix

* fix

* Refactoring

* "version": "0.3.3"
  • Loading branch information
AndrewKorzh authored Aug 8, 2024
1 parent 8ef48e9 commit cf90be7
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 14 deletions.
8 changes: 8 additions & 0 deletions helpers/exceptions.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,11 @@ exports.TooManyContextsError = class TooManyContextsError extends Error {
this.name = "TooManyContextsError";
}
}

exports.NoHarWriterError = class NoHarWriterError extends Error {
constructor(message="There is no initialized Har Writer on the page to which the Har action was applied.", ...args) {
super(message, ...args);
this.message = message;
this.name = "NoHarWriterError";
}
}
14 changes: 10 additions & 4 deletions helpers/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ const exceptions = require("./exceptions");
const { proxyRequest } = require('puppeteer-proxy');
const timeoutContext = require('./timeout_context');
const limitContext = require('./limit_context');
const PuppeteerHar = require('puppeteer-har');

const PROXY_URL_KEY = 'puppeteer-service-proxy-url'

Expand Down Expand Up @@ -91,8 +92,13 @@ exports.getContents = async function getContents(page, waitFor) {
};
};

async function newPage(context) {
async function newPage(context, request) {
const page = await context.newPage();
if (request.body.harRecording){
const harWriter = new PuppeteerHar(page)
harWriter.start()
page.harWriter = harWriter
}

await page.setRequestInterception(true);

Expand Down Expand Up @@ -143,18 +149,18 @@ exports.getBrowserPage = async function getBrowserPage(browser, request) {
const { contextId, pageId } = request.query;
if (contextId) {
const context = await findContextInBrowser(browser, contextId);
return pageId ? findPageInContext(context, pageId) : newPage(context);
return pageId ? findPageInContext(context, pageId) : newPage(context, request);
}
const proxy = getProxy(request);
if (!proxy) {
const context = await newContext(browser);
return newPage(context);
return newPage(context, request);
}
const { origin: proxyServer, username, password } = new URL(proxy);

const context = await newContext(browser, { proxyServer });
context[PROXY_URL_KEY] = proxy;
const page = await newPage(context);
const page = await newPage(context, request);
if (username) {
await page.authenticate({
username: decodeURIComponent(username),
Expand Down
3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "scrapy-puppeteer-service",
"version": "0.3.2",
"version": "0.3.3",
"private": true,
"scripts": {
"start": "node ./bin/www"
Expand All @@ -25,6 +25,7 @@
"puppeteer-extra-plugin-recaptcha": "^3.6.8",
"puppeteer-extra-plugin-stealth": "^2.11.2",
"puppeteer-proxy": "^2.1.2",
"puppeteer-har": "1.1.2",
"winston": "^3.11.0",
"winston-logstash": "^1.2.1"
}
Expand Down
24 changes: 15 additions & 9 deletions routes/har.js
Original file line number Diff line number Diff line change
@@ -1,18 +1,24 @@
const express = require('express');
const utils = require('../helpers/utils');
const router = express.Router();
const PuppeteerHar = require('puppeteer-har');
const exceptions = require("../helpers/exceptions");

async function action(page, request) {

//TODO Method that returns har of page downloads using https://www.npmjs.com/package/puppeteer-har
router.post('/', async function (req, res, next) {

if (!(typeof action === "function" && action.length >= 1)) {
res.status("501");
res.send("Not implemented yet");
next();
return;
if (!(page.harWriter)){
throw new exceptions.NoHarWriterError();
}

harData = await page.harWriter.stop();
harJson = JSON.stringify(harData);
return {
har: harJson
};
}

router.post('/', async function (req, res, next) {

try {
let response = await utils.performAction(req, action);
res.header('scrapy-puppeteer-service-context-id', response.contextId);
Expand All @@ -22,4 +28,4 @@ router.post('/', async function (req, res, next) {
}
});

module.exports = router;
module.exports = router;

0 comments on commit cf90be7

Please sign in to comment.