Skip to content

Commit

Permalink
feat: add support for 404 audits
Browse files Browse the repository at this point in the history
  • Loading branch information
alinarublea committed Dec 1, 2023
1 parent 6cf6045 commit d19160c
Show file tree
Hide file tree
Showing 7 changed files with 335 additions and 25 deletions.
31 changes: 8 additions & 23 deletions src/cwv/handler.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,14 @@
*/

import { createUrl, Response } from '@adobe/fetch';
import { fetch } from '../support/utils.js';
import {
DOMAIN_LIST_URL, DOMAIN_REQUEST_DEFAULT_PARAMS, fetch, getRUMUrl, PAGEVIEW_THRESHOLD,
} from '../support/utils.js';

export const DEFAULT_PARAMS = {
interval: 7,
offset: 0,
limit: 101,
};

const DOMAIN_LIST_URL = 'https://helix-pages.anywhere.run/helix-services/run-query@v3/rum-dashboard';

// weekly pageview threshold to eliminate urls with lack of samples
const PAGEVIEW_THRESHOLD = 7000;
export function filterRUMData(data) {
return data.pageviews > PAGEVIEW_THRESHOLD // ignore the pages with low pageviews
&& data.url.toLowerCase() !== 'other'; // ignore the combined result
}

/**
* url param in run-query@v3/rum-dashboard works in a 'startsWith' fashion. url=domain.com returns
Expand All @@ -31,17 +27,6 @@ const PAGEVIEW_THRESHOLD = 7000;
* @param url
* @returns finalUrl {Promise<string>}
*/
export async function getRUMUrl(url) {
const urlWithScheme = url.startsWith('http') ? url : `https://${url}`;
const resp = await fetch(urlWithScheme);
const finalUrl = resp.url.split('://')[1];
return finalUrl.endsWith('/') ? finalUrl.slice(0, -1) : /* c8 ignore next */ finalUrl;
}

function filterRUMData(data) {
return data.pageviews > PAGEVIEW_THRESHOLD // ignore the pages with low pageviews
&& data.url.toLowerCase() !== 'other'; // ignore the combined result
}

function processRUMResponse(respJson) {
return respJson?.results?.data
Expand All @@ -68,7 +53,7 @@ export default async function auditCWV(message, context) {
auditContext.finalUrl = finalUrl;

const params = {
...DEFAULT_PARAMS,
...DOMAIN_REQUEST_DEFAULT_PARAMS,
domainkey,
url: finalUrl,
};
Expand Down
2 changes: 2 additions & 0 deletions src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,11 @@ import { Response } from '@adobe/fetch';
import secrets from '@adobe/helix-shared-secrets';
import sqs from './support/sqs.js';
import cwv from './cwv/handler.js';
import notfound from './notfound/handler.js';

const HANDLERS = {
cwv,
404: notfound,
};

/**
Expand Down
73 changes: 73 additions & 0 deletions src/notfound/handler.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
/*
* Copyright 2023 Adobe. All rights reserved.
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. You may obtain a copy
* of the License at http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
* OF ANY KIND, either express or implied. See the License for the specific language
* governing permissions and limitations under the License.
*/

import { createUrl, Response } from '@adobe/fetch';
import {
DOMAIN_LIST_URL, DOMAIN_REQUEST_DEFAULT_PARAMS, fetch, getRUMUrl, PAGEVIEW_THRESHOLD,
} from '../support/utils.js';

export function filter404Data(data) {
return data.views > PAGEVIEW_THRESHOLD // ignore the pages with low pageviews
&& data.url.toLowerCase() !== 'other'; // ignore the combined result
}
/**
* url param in run-query@v3/rum-dashboard works in a 'startsWith' fashion. url=domain.com returns
* an empty result whereas url=www.domain.com/ returns the desired result. To catch the redirects
* to subdomains we issue a GET call to the domain, then use the final url after redirects
* @param url
* @returns finalUrl {Promise<string>}
*/

function process404Response(respJson) {
return respJson?.results?.data
.filter(filter404Data)
.map((row) => ({
url: row.url,
pageviews: row.views,
}));
}
export default async function audit404(message, context) {
const { type, url, auditContext } = message;
const { log, sqs } = context;
const {
AUDIT_RESULTS_QUEUE_URL: queueUrl,
RUM_DOMAIN_KEY: domainkey,
} = context.env;

log.info(`Received audit req for domain: ${url}`);

const finalUrl = await getRUMUrl(url);
auditContext.finalUrl = finalUrl;

const params = {
...DOMAIN_REQUEST_DEFAULT_PARAMS,
domainkey,
url: finalUrl,
checkpoint: 404,
};

const resp = await fetch(createUrl(DOMAIN_LIST_URL, params));
const respJson = await resp.json();

const auditResult = process404Response(respJson);

await sqs.sendMessage(queueUrl, {
type,
url,
auditContext,
auditResult,
});

log.info(`Successfully audited ${url} for ${type} type audit`);

return new Response('');
}
17 changes: 17 additions & 0 deletions src/support/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,24 @@
*/
import { context as h2, h1 } from '@adobe/fetch';

export const PAGEVIEW_THRESHOLD = 7000;
export const DOMAIN_LIST_URL = 'https://helix-pages.anywhere.run/helix-services/run-query@v3/rum-dashboard';

export const DOMAIN_REQUEST_DEFAULT_PARAMS = {
interval: 7,
offset: 0,
limit: 101,
};
/* c8 ignore next 3 */
export const { fetch } = process.env.HELIX_FETCH_FORCE_HTTP1
? h1()
: h2();

// weekly pageview threshold to eliminate urls with lack of samples

export async function getRUMUrl(url) {
const urlWithScheme = url.startsWith('http') ? url : `https://${url}`;
const resp = await fetch(urlWithScheme);
const finalUrl = resp.url.split('://')[1];
return finalUrl.endsWith('/') ? finalUrl.slice(0, -1) : /* c8 ignore next */ finalUrl;
}
4 changes: 2 additions & 2 deletions test/audits/cwv.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ import sinonChai from 'sinon-chai';
import { Request } from '@adobe/fetch';
import nock from 'nock';
import { main } from '../../src/index.js';
import { DEFAULT_PARAMS, getRUMUrl } from '../../src/cwv/handler.js';
import { DOMAIN_REQUEST_DEFAULT_PARAMS, getRUMUrl } from '../../src/support/utils.js';
import { expectedAuditResult, rumData } from '../rum-data.js';

chai.use(sinonChai);
Expand Down Expand Up @@ -68,7 +68,7 @@ describe('Index Tests', () => {
nock('https://helix-pages.anywhere.run')
.get('/helix-services/run-query@v3/rum-dashboard')
.query({
...DEFAULT_PARAMS,
...DOMAIN_REQUEST_DEFAULT_PARAMS,
domainkey: context.env.RUM_DOMAIN_KEY,
url: 'adobe.com',
})
Expand Down
99 changes: 99 additions & 0 deletions test/audits/notfound.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
/*
* Copyright 2023 Adobe. All rights reserved.
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. You may obtain a copy
* of the License at http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
* OF ANY KIND, either express or implied. See the License for the specific language
* governing permissions and limitations under the License.
*/

/* eslint-env mocha */
/* eslint-disable no-unused-expressions */ // expect statements

import chai from 'chai';
import sinon from 'sinon';
import sinonChai from 'sinon-chai';
import { Request } from '@adobe/fetch';
import nock from 'nock';
import { main } from '../../src/index.js';
import { DOMAIN_REQUEST_DEFAULT_PARAMS, getRUMUrl } from '../../src/support/utils.js';
import { notFoundData, expectedAuditResult } from '../notfounddata.js';

chai.use(sinonChai);
const { expect } = chai;

const sandbox = sinon.createSandbox();
describe('Index Tests', () => {
const request = new Request('https://space.cat');
let context;
let messageBodyJson;

beforeEach('setup', () => {
messageBodyJson = {
type: '404',
url: 'adobe.com',
auditContext: {
finalUrl: 'adobe.com',
},
};
context = {
log: console,
runtime: {
region: 'us-east-1',
},
env: {
AUDIT_RESULTS_QUEUE_URL: 'queueUrl',
RUM_DOMAIN_KEY: 'domainkey',
},
invocation: {
event: {
Records: [{
body: JSON.stringify(messageBodyJson),
}],
},
},
sqs: {
sendMessage: sandbox.stub().resolves(),
},
};
});

it('fetch cwv for base url > process > send results', async () => {
nock('https://adobe.com')
.get('/')
.reply(200);
nock('https://helix-pages.anywhere.run')
.get('/helix-services/run-query@v3/rum-dashboard')
.query({
...DOMAIN_REQUEST_DEFAULT_PARAMS,
domainkey: context.env.RUM_DOMAIN_KEY,
checkpoint: 404,
url: 'adobe.com',
})
.reply(200, notFoundData);

const resp = await main(request, context);

const expectedMessage = {
...messageBodyJson,
auditResult: expectedAuditResult,
};

expect(resp.status).to.equal(200);
expect(context.sqs.sendMessage).to.have.been.calledOnce;
expect(context.sqs.sendMessage).to.have.been
.calledWith(context.env.AUDIT_RESULTS_QUEUE_URL, expectedMessage);
});

it('getRUMUrl do not add scheme to urls with a scheme already', async () => {
nock('http://space.cat')
.get('/')
.reply(200);

const finalUrl = await getRUMUrl('http://space.cat');
expect(finalUrl).to.eql('space.cat');
});
});
Loading

0 comments on commit d19160c

Please sign in to comment.