diff --git a/README.md b/README.md index ebaffdd6..7631e272 100644 --- a/README.md +++ b/README.md @@ -39,3 +39,37 @@ $ npm test ```bash $ npm run lint ``` + +## Message Body Formats + +Audit worker consumes the `AUDIT_JOBS_QUEUE` queue, performs the requested audit, then queues the result to `AUDIT_RESULTS_QUEUE` for the interested parties to consume later on. + +Expected message body format in `AUDIT_JOBS_QUEUE` is: + +```json +{ + "type": "string", + "url": "string", + "auditContext": "object" +} +``` + +Output message body format sent to `AUDIT_RESULTS_QUEUE` is: + +```json +{ + "type": "string", + "url": "string", + "auditContext": "object", + "auditResult": "object" +} +``` + +## Required ENV Variables + +Currently, audit worker requires two env variables: + +```plaintext +AUDIT_RESULTS_QUEUE_URL=url of the queue to send audit results to +RUM_DOMAIN_KEY=global domain key for the rum api +``` diff --git a/src/cwv/handler.js b/src/cwv/handler.js index 570e50ec..4ad653f1 100644 --- a/src/cwv/handler.js +++ b/src/cwv/handler.js @@ -16,24 +16,48 @@ import { fetch } from '../support/utils.js'; export const DEFAULT_PARAMS = { interval: 7, offset: 0, - limit: 100, + limit: 101, }; +const DOMAIN_LIST_URL = 'https://helix-pages.anywhere.run/helix-services/run-query@v3/rum-dashboard'; + // weekly pageview threshold to eliminate urls with lack of samples const PAGEVIEW_THRESHOLD = 7000; +/** + * url param in run-query@v3/rum-dashboard works in a 'startsWith' fashion. url=domain.com returns + * an empty result whereas url=www.domain.com/ returns the desired result. To catch the redirects + * to subdomains we issue a GET call to the domain, then use the final url after redirects + * @param url + * @returns finalUrl {Promise} + */ export async function getRUMUrl(url) { const urlWithScheme = url.startsWith('http') ? url : `https://${url}`; const resp = await fetch(urlWithScheme); return resp.url.split('://')[1]; } -const DOMAIN_LIST_URL = 'https://helix-pages.anywhere.run/helix-services/run-query@v3/rum-dashboard'; +function filterRUMData(data) { + return data.pageviews > PAGEVIEW_THRESHOLD // ignore the pages with low pageviews + && data.url.toLowerCase() !== 'other'; // ignore the combined result +} +function processRUMResponse(respJson) { + return respJson?.results?.data + .filter(filterRUMData) + .map((row) => ({ + url: row.url, + pageviews: row.pageviews, + avglcp: row.avglcp, + })); +} export default async function auditCWV(message, context) { const { type, url, auditContext } = message; const { log, sqs } = context; - const { AUDIT_JOBS_QUEUE_URL: queueUrl } = context.env; + const { + AUDIT_RESULTS_QUEUE_URL: queueUrl, + RUM_DOMAIN_KEY: domainkey, + } = context.env; log.info(`Received audit req for domain: ${url}`); @@ -41,21 +65,14 @@ export default async function auditCWV(message, context) { const params = { ...DEFAULT_PARAMS, - domainkey: context.env.RUM_DOMAIN_KEY, + domainkey, url: finalUrl, }; const resp = await fetch(createUrl(DOMAIN_LIST_URL, params)); const respJson = await resp.json(); - const auditResult = respJson?.results?.data - .filter((row) => row.pageviews > PAGEVIEW_THRESHOLD) - .filter((row) => row.url.toLowerCase() !== 'other') - .map((row) => ({ - url: row.url, - pageviews: row.pageviews, - avglcp: row.avglcp, - })); + const auditResult = processRUMResponse(respJson); await sqs.sendMessage(queueUrl, { type, diff --git a/src/index.js b/src/index.js index d6e65044..de9e4c54 100644 --- a/src/index.js +++ b/src/index.js @@ -33,11 +33,13 @@ function sqsEventAdapter(fn) { let message; try { - log.info(`number of records in message: ${context.invocation?.event?.Records.length}`); // currently not publishing batch messages - message = JSON.parse(context.invocation?.event?.Records[0]?.body); + const records = context.invocation?.event?.Records; + log.info(`Received ${records.length} many records. ID of the first message in the batch: ${records[0]?.messageId}`); + message = JSON.parse(records[0]?.body); + log.info(`Received message with id: ${context.invocation?.event?.Records.length}`); } catch (e) { - log.error('Function was not invoked properly, message body is not a valid JSON'); + log.error('Function was not invoked properly, message body is not a valid JSON', e); return new Response('', { status: 400, headers: { diff --git a/test/audits/cwv.test.js b/test/audits/cwv.test.js index 59d3985b..ef17a25a 100644 --- a/test/audits/cwv.test.js +++ b/test/audits/cwv.test.js @@ -45,7 +45,7 @@ describe('Index Tests', () => { region: 'us-east-1', }, env: { - AUDIT_JOBS_QUEUE_URL: 'queueUrl', + AUDIT_RESULTS_QUEUE_URL: 'queueUrl', RUM_DOMAIN_KEY: 'domainkey', }, invocation: { @@ -84,7 +84,7 @@ describe('Index Tests', () => { expect(resp.status).to.equal(200); expect(context.sqs.sendMessage).to.have.been.calledOnce; expect(context.sqs.sendMessage).to.have.been - .calledWith(context.env.AUDIT_JOBS_QUEUE_URL, expectedMessage); + .calledWith(context.env.AUDIT_RESULTS_QUEUE_URL, expectedMessage); }); it('getRUMUrl do not add scheme to urls with a scheme already', async () => {