Skip to content

Commit

Permalink
Better logging of everything (#40)
Browse files Browse the repository at this point in the history
* Base code for logging. Need to write proper format for logs

* Additional formats

* trying to print error with red colour...

* Completed logs in console

* added logging of pathname

* final changes in order to beautify

* Name consistency

* New paradigm of logging

* fixed action representation in Logs

* Fixed action's header setting

* Creation page fix

* Added contextId and pageId for error logging

* Better naming

* Minor changes to:
naming, structure

* Minor changes to:
naming, structure

* Minor changes to:
naming, structure

* bump version, minor refinements

---------

Co-authored-by: Max Varlamov <[email protected]>
Co-authored-by: Max Varlamov <[email protected]>
  • Loading branch information
3 people authored Feb 14, 2024
1 parent cc6ff31 commit 645d27e
Show file tree
Hide file tree
Showing 8 changed files with 663 additions and 407 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
.idea
node_modules
.gitignore
11 changes: 8 additions & 3 deletions app.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ const puppeteer = require('puppeteer-extra')
const RecaptchaPlugin = require('puppeteer-extra-plugin-recaptcha')
const StealthPlugin = require('puppeteer-extra-plugin-stealth')
const cookieParser = require('cookie-parser');
const logger = require('morgan');
const bodyParser = require('body-parser');
const AsyncLock = require('async-lock');

Expand All @@ -22,16 +21,21 @@ const harRouter = require('./routes/har');
const closeContextRouter = require('./routes/close_context');

const middlewares = require('./helpers/middlewares')
const loggers = require("./helpers/loggers");

const app = express();

const LOG_LEVEL = process.env.LOG_LEVEL || "http";
const LOG_FILE = process.env.LOG_FILE;
const HEADLESS = (process.env.HEADLESS || "true").toLowerCase() === "true";
const CONNECT_TIMEOUT = parseInt(process.env.CONNECT_TIMEOUT) || 180000;
const VIEWPORT_WIDTH = parseInt(process.env.VIEWPORT_WIDTH) || 1280;
const VIEWPORT_HEIGHT = parseInt(process.env.VIEWPORT_HEIGHT) || 720;
const TOKEN_2CAPTCHA = process.env.TOKEN_2CAPTCHA;
const STEALTH_BROWSING = (process.env.STEALTH_BROWSING || "true").toLowerCase() === "true";

loggers.initLogger(LOG_LEVEL, LOG_FILE);

async function setupBrowser() {
try {
if (TOKEN_2CAPTCHA) { // If token is given then RecapcthaPlugin is activated
Expand Down Expand Up @@ -80,7 +84,7 @@ async function setupBrowser() {

app.use(express.json());
app.use(express.urlencoded({extended: false}));
app.use(logger('dev'));
app.use(middlewares.logHTTPMiddleware());
app.use(bodyParser.raw({inflate: true, limit: '200kb', type: 'application/javascript'}));
app.use(cookieParser());

Expand All @@ -97,6 +101,7 @@ app.use('/mhtml', mhtmlRouter);
app.use('/har', harRouter);
app.use('/close_context', closeContextRouter);

app.use(middlewares.exceptionMiddleware);
app.use(middlewares.processExceptionMiddleware);
app.use(middlewares.logExceptionMiddleware);

module.exports = app;
90 changes: 90 additions & 0 deletions helpers/loggers.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
const winston = require('winston');

const logPrinter = winston.format.printf((info) => {
let log = `[${info.timestamp}] ${info.level}: ${info.message}`;

if (info.stack) {
log = `${log}`
+ "\nRequest parameters:"
+ `\n contextId: ${info.contextId}`
+ `\n pageId: ${info.pageId}`
+ `\n${info.stack}\n`
}

return log;
});

const fileFormat = winston.format.combine(
winston.format.timestamp({format: 'YYYY-MM-DD HH:mm:ss:ms'}),
winston.format.errors({stack: true}),
logPrinter,
);

const consoleFormat = winston.format.combine(
winston.format.errors({stack: true}),
winston.format.timestamp({format: 'YYYY-MM-DD HH:mm:ss:ms'}),
winston.format.colorize({level: true}),
logPrinter,
);

let logger;

function createTransports(logLevel, logFilePath) {
let transports = [];

transports.push(new winston.transports.Console({
level: logLevel,
format: consoleFormat,
}));
if (logFilePath !== undefined) {
transports.push(new winston.transports.File({
level: logLevel,
filename: logFilePath,
format: fileFormat,
}));
}

return transports;
}

exports.initLogger = function initLogger(logLevel, logFilePath) {
const transports = createTransports(logLevel, logFilePath);

logger = winston.createLogger({
level: logLevel,
transports: transports,
});
}

function getBody(body) {
if (body instanceof Buffer) { // Action request
body = body.toString()
} else { // Other requests
body = JSON.stringify(body, null, 8);
}

return body;
}

exports.HTTPFormat = function HTTPFormat(tokens, req, res) {
const reqContextId = req.query["contextId"];
const reqPageId = req.query["pageId"];
const resContextId = res.get('scrapy-puppeteer-service-context-id');
const closePage = req.query["closePage"] === "1";

const url = req.baseUrl || req.originalUrl || req.url;
const queryIndex = url.indexOf('?');
const pathname = queryIndex !== -1 ? url.slice(1, queryIndex) : url.slice(1);

return `${pathname} (${tokens.status(req, res)})`
+ "\nRequest parameters:"
+ `${reqContextId ? `\n contextId: ${reqContextId}` : ""}`
+ `${reqPageId ? `\n pageId: ${reqPageId}` : ""}`
+ `\n closePage: ${closePage}`
+ `\n body: ${getBody(req.body)}`
+ `${!reqContextId && resContextId ? `\nCreated page with ${resContextId} contextId` : ""}`;
}

exports.getLogger = function getLogger() {
return logger;
}
27 changes: 26 additions & 1 deletion helpers/middlewares.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,22 @@
const morgan = require('morgan');
const loggers = require('./loggers');
const exceptions = require("./exceptions");

exports.exceptionMiddleware = async function exceptionMiddleware(err, req, res, next) {

exports.logHTTPMiddleware = function logHTTPMiddleware() {
const logger = loggers.getLogger();

return morgan(
loggers.HTTPFormat,
{
stream: {
write: (message) => logger.http(message),
},
}
);
}

exports.processExceptionMiddleware = async function processExceptionMiddleware(err, req, res, next) {
if (res.headersSent) {
return next(err);
}
Expand Down Expand Up @@ -31,3 +47,12 @@ exports.exceptionMiddleware = async function exceptionMiddleware(err, req, res,

next(err);
}

exports.logExceptionMiddleware = async function logExceptionMiddleware(err, req, res, next) {
loggers.getLogger().error({
message: err,
contextId: req.query["contextId"],
pageId: req.query["pageId"],
});
next();
}
14 changes: 11 additions & 3 deletions helpers/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -173,11 +173,19 @@ exports.performAction = async function performAction(request, action) {
await page.setExtraHTTPHeaders(extraHeaders);
}

const contextId = page.browserContext().id;
const pageId = page.target()._targetId;

try {
return await action(page, request);
const response = await action(page, request);
return {
...response,
contextId,
pageId,
};
} catch (err) {
err.contextId = page.browserContext().id;
err.pageId = page.target()._targetId;
err.contextId = contextId;
err.pageId = pageId;
throw err;
}
});
Expand Down
5 changes: 3 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "scrapy-puppeteer-service",
"version": "0.2.3",
"version": "0.2.4",
"private": true,
"scripts": {
"start": "node ./bin/www"
Expand All @@ -18,6 +18,7 @@
"puppeteer-extra": "^3.3.6",
"puppeteer-extra-plugin-recaptcha": "^3.6.8",
"puppeteer-extra-plugin-stealth": "^2.11.2",
"puppeteer-proxy": "^2.1.2"
"puppeteer-proxy": "^2.1.2",
"winston": "^3.11.0"
}
}
32 changes: 16 additions & 16 deletions routes/goto.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,22 +9,22 @@ async function action(page, request) {
}

// body = {
// "url": <string> URL to navigate page to. The url should include scheme, e.g. https://.
// "navigationOptions": { Navigation parameters which might have the following properties:
// "timeout": <number> Maximum navigation time in milliseconds, defaults to 30 seconds, pass 0 to disable timeout. The default value can be changed by using the page.setDefaultNavigationTimeout(timeout) or page.setDefaultTimeout(timeout) methods.
// "waitUntil": <string|Array<string>> When to consider navigation succeeded, defaults to load. Given an array of event strings, navigation is considered to be successful after all events have been fired. Events can be either:
// load - consider navigation to be finished when the load event is fired.
// domcontentloaded - consider navigation to be finished when the DOMContentLoaded event is fired.
// networkidle0 - consider navigation to be finished when there are no more than 0 network connections for at least 500 ms.
// networkidle2 - consider navigation to be finished when there are no more than 2 network connections for at least 500 ms.
// "referer" <string> Referer header value. If provided it will take preference over the referer header value set by page.setExtraHTTPHeaders().
// },
// "waitOptions": {
// "timeout": <number> Wait for given timeout in milliseconds
// "selector": <string> Wait for element by selector (see https://pptr.dev/api/puppeteer.page.waitforselector)
// "xpath": <string> Wait for element by xpath (see https://pptr.dev/api/puppeteer.page.waitforxpath)
// "options": <object> Options to wait for elements (see https://pptr.dev/api/puppeteer.waitforselectoroptions)
// }
// "url": <string> URL to navigate page to. The url should include scheme, e.g. https://.
// "navigationOptions": { Navigation parameters which might have the following properties:
// "timeout": <number> Maximum navigation time in milliseconds, defaults to 30 seconds, pass 0 to disable timeout. The default value can be changed by using the page.setDefaultNavigationTimeout(timeout) or page.setDefaultTimeout(timeout) methods.
// "waitUntil": <string|Array<string>> When to consider navigation succeeded, defaults to load. Given an array of event strings, navigation is considered to be successful after all events have been fired. Events can be either:
// load - consider navigation to be finished when the load event is fired.
// domcontentloaded - consider navigation to be finished when the DOMContentLoaded event is fired.
// networkidle0 - consider navigation to be finished when there are no more than 0 network connections for at least 500 ms.
// networkidle2 - consider navigation to be finished when there are no more than 2 network connections for at least 500 ms.
// "referer" <string> Referer header value. If provided it will take preference over the referer header value set by page.setExtraHTTPHeaders().
// },
// "waitOptions": {
// "timeout": <number> Wait for given timeout in milliseconds
// "selector": <string> Wait for element by selector (see https://pptr.dev/api/puppeteer.page.waitforselector)
// "xpath": <string> Wait for element by xpath (see https://pptr.dev/api/puppeteer.page.waitforxpath)
// "options": <object> Options to wait for elements (see https://pptr.dev/api/puppeteer.waitforselectoroptions)
// }
// }
//
router.post('/', async function (req, res, next) {
Expand Down
Loading

0 comments on commit 645d27e

Please sign in to comment.