From 411ca3e6fe17e5504de93f654940fccf2508060d Mon Sep 17 00:00:00 2001 From: Damian Zehnder Date: Tue, 20 Aug 2024 11:18:27 +0200 Subject: [PATCH] fix: PR review --- src/index.js | 4 +- src/url-inspect/pdp-handler.js | 88 ++++------------------------------ src/url-inspect/url-inspect.js | 85 ++++++++++++++++++++++++++++++++ test/audits/pdp.test.js | 10 ++-- 4 files changed, 101 insertions(+), 86 deletions(-) create mode 100644 src/url-inspect/url-inspect.js diff --git a/src/index.js b/src/index.js index 6e74c36b..1e64285b 100644 --- a/src/index.js +++ b/src/index.js @@ -30,7 +30,7 @@ import conversion from './conversion/handler.js'; import essExperimentationDaily from './experimentation-ess/daily.js'; import essExperimentationAll from './experimentation-ess/all.js'; import costs from './costs/handler.js'; -import pdpIndexability from './url-inspect/pdp-handler.js'; +import gscPdpStructuredData from './url-inspect/pdp-handler.js'; const HANDLERS = { apex, @@ -46,7 +46,7 @@ const HANDLERS = { 'experimentation-ess-daily': essExperimentationDaily, 'experimentation-ess-all': essExperimentationAll, costs, - 'pdp-indexability': pdpIndexability, + 'gsc-pdp-structured-data': gscPdpStructuredData, }; function getElapsedSeconds(startTime) { diff --git a/src/url-inspect/pdp-handler.js b/src/url-inspect/pdp-handler.js index 72a2c077..cede43a8 100644 --- a/src/url-inspect/pdp-handler.js +++ b/src/url-inspect/pdp-handler.js @@ -9,94 +9,24 @@ * OF ANY KIND, either express or implied. See the License for the specific language * governing permissions and limitations under the License. */ -import GoogleClient from '@adobe/spacecat-shared-google-client'; +import { isArray } from '@adobe/spacecat-shared-utils'; import { AuditBuilder } from '../common/audit-builder.js'; +import { processUrlInspect } from './url-inspect.js'; -/** - * Processes an audit of the product detail pages of a site using Google's URL inspection tool. - * - * @async - * @function - * @param {string} baseURL - The base URL for the audit. - * @param {Object} context - The context object. - * @param {Object} site - The site object for the site to be audited. - * - * @returns {Promise>} - A promise that resolves to an array of objects, - * each containing the inspection URL, filtered index status result, and filtered rich results. - * If an error occurs during the inspection of a URL, the object will include an error message. - * - * @throws {Error} - Throws an error if the audit process fails. - */ -async function processAudit(baseURL, context, site) { +export async function gscPdpStructuredDataHandler(baseURL, context, site) { const { log } = context; + log.info(`Received Product Detail Page indexability audit request for ${baseURL}`); + const startTime = process.hrtime(); + const siteId = site.getId(); const productDetailPages = await site.getConfig().getProductDetailPages('pdp-indexability'); - if (!productDetailPages || productDetailPages.length === 0) { + if (isArray(productDetailPages) && productDetailPages.length === 0) { log.error(`No top pages found for site ID: ${siteId}`); throw new Error(`No top pages found for site: ${baseURL}`); } - const google = GoogleClient.createFrom(context, baseURL); - - const urlInspectionResult = productDetailPages.map(async (pdp) => { - try { - const { inspectionResult } = await google.urlInspect(pdp); - log.info(`Successfully inspected URL: ${pdp}`); - - const filteredIndexStatusResult = { - verdict: inspectionResult.indexStatusResult.verdict, - lastCrawlTime: inspectionResult.indexStatusResult.lastCrawlTime, - }; - - const filteredRichResults = inspectionResult.richResultsResult.detectedItems.map( - (item) => { - const filteredItems = item.items.filter( - (issueItem) => issueItem.issues.some( - (issue) => issue.severity === 'ERROR', - ), - ).map((issueItem) => ({ - name: issueItem.name, - issues: issueItem.issues.filter((issue) => issue.severity === 'ERROR'), - })); - - return { - richResultType: item.richResultType, - items: filteredItems, - }; - }, - ).filter((item) => item.items.length > 0); - - if (filteredRichResults.length > 0) { - filteredRichResults.verdict = inspectionResult.richResultsResult.verdict; - log.info(`Found ${filteredRichResults.length} rich results issues for URL: ${pdp}`); - } else { - log.info(`No rich results issues found for URL: ${pdp}`); - } - - return { - inspectionUrl: pdp, - indexStatusResult: filteredIndexStatusResult, - richResults: filteredRichResults, - }; - } catch (error) { - log.error(`Failed to inspect URL: ${pdp}. Error: ${error.message}`); - return { - inspectionUrl: pdp, - error: error.message, - }; - } - }); - - return Promise.all(urlInspectionResult); -} - -export async function pdpIndexabilityRunner(baseURL, context, site) { - const { log } = context; - log.info(`Received Product Detail Page indexability audit request for ${baseURL}`); - const startTime = process.hrtime(); - - const auditResult = await processAudit(baseURL, context, site); + const auditResult = await processUrlInspect(baseURL, context, productDetailPages); const endTime = process.hrtime(startTime); const elapsedSeconds = endTime[0] + endTime[1] / 1e9; @@ -111,6 +41,6 @@ export async function pdpIndexabilityRunner(baseURL, context, site) { } export default new AuditBuilder() - .withRunner(pdpIndexabilityRunner) + .withRunner(gscPdpStructuredDataHandler) .withUrlResolver((site) => site.getBaseURL()) .build(); diff --git a/src/url-inspect/url-inspect.js b/src/url-inspect/url-inspect.js new file mode 100644 index 00000000..a8a3dfb1 --- /dev/null +++ b/src/url-inspect/url-inspect.js @@ -0,0 +1,85 @@ +/* + * Copyright 2024 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +import GoogleClient from '@adobe/spacecat-shared-google-client'; + +/** + * Processes an audit of a set of pages from a site using Google's URL inspection tool. + * + * @async + * @function + * @param {string} baseURL - The base URL for the audit. + * @param {Object} context - The context object. + * @param {Array} pages - An array of page URLs to be audited. + * + * @returns {Promise>} - A promise that resolves to an array of objects, + * each containing the inspection URL, filtered index status result, and filtered rich results. + * If an error occurs during the inspection of a URL, the object will include an error message. + * + * @throws {Error} - Throws an error if the audit process fails. + */ +export async function processUrlInspect(baseURL, context, pages) { + const { log } = context; + + const google = GoogleClient.createFrom(context, baseURL); + + const urlInspectionResult = pages.map(async (page) => { + try { + const { inspectionResult } = await google.urlInspect(page); + log.info(`Successfully inspected URL: ${page}`); + + const filteredIndexStatusResult = { + verdict: inspectionResult.indexStatusResult.verdict, + lastCrawlTime: inspectionResult.indexStatusResult.lastCrawlTime, + }; + + const filteredRichResults = inspectionResult.richResultsResult.detectedItems.map( + (item) => { + const filteredItems = item.items.filter( + (issueItem) => issueItem.issues.some( + (issue) => issue.severity === 'ERROR', + ), + ).map((issueItem) => ({ + name: issueItem.name, + issues: issueItem.issues.filter((issue) => issue.severity === 'ERROR'), + })); + + return { + richResultType: item.richResultType, + items: filteredItems, + }; + }, + ).filter((item) => item.items.length > 0); + + if (filteredRichResults.length > 0) { + filteredRichResults.verdict = inspectionResult.richResultsResult.verdict; + log.info(`Found ${filteredRichResults.length} rich results issues for URL: ${page}`); + } else { + log.info(`No rich results issues found for URL: ${page}`); + } + + return { + inspectionUrl: page, + indexStatusResult: filteredIndexStatusResult, + richResults: filteredRichResults, + }; + } catch (error) { + log.error(`Failed to inspect URL: ${page}. Error: ${error.message}`); + return { + inspectionUrl: page, + error: error.message, + }; + } + }); + + return Promise.all(urlInspectionResult); +} diff --git a/test/audits/pdp.test.js b/test/audits/pdp.test.js index 38c94a2e..5599a2a9 100644 --- a/test/audits/pdp.test.js +++ b/test/audits/pdp.test.js @@ -16,7 +16,7 @@ import { expect, use } from 'chai'; import sinonChai from 'sinon-chai'; import sinon from 'sinon'; -import { pdpIndexabilityRunner } from '../../src/url-inspect/pdp-handler.js'; +import { gscPdpStructuredDataHandler } from '../../src/url-inspect/pdp-handler.js'; use(sinonChai); @@ -119,7 +119,7 @@ describe('URLInspect Audit', () => { it('should successfully return a filtered result of the url inspection result', async () => { urlInspectStub.resolves(fullUrlInspectionResult); - const auditData = await pdpIndexabilityRunner('https://www.example.com', context, siteStub); + const auditData = await gscPdpStructuredDataHandler('https://www.example.com', context, siteStub); expect(auditData.auditResult).to.deep.equal( [ @@ -229,7 +229,7 @@ describe('URLInspect Audit', () => { delete fullUrlInspectionResult.inspectionResult.richResultsResult; urlInspectStub.resolves(fullUrlInspectionResult); - const auditData = await pdpIndexabilityRunner('https://www.example.com', context, siteStub); + const auditData = await gscPdpStructuredDataHandler('https://www.example.com', context, siteStub); expect(auditData.auditResult[0].richResults).to.equal(undefined); }); @@ -241,7 +241,7 @@ describe('URLInspect Audit', () => { .richResultsResult.detectedItems[1].items[0].issues[1]; urlInspectStub.resolves(fullUrlInspectionResult); - const auditData = await pdpIndexabilityRunner('https://www.example.com', context, siteStub); + const auditData = await gscPdpStructuredDataHandler('https://www.example.com', context, siteStub); expect(auditData.auditResult[0].richResults).to.deep.equal([]); expect(auditData.auditResult[1].richResults).to.deep.equal([]); @@ -252,7 +252,7 @@ describe('URLInspect Audit', () => { getProductDetailPages: () => [], }); try { - await pdpIndexabilityRunner('https://www.example.com', context, siteStub); + await gscPdpStructuredDataHandler('https://www.example.com', context, siteStub); } catch (error) { expect(error.message).to.equal('No top pages found for site: https://www.example.com'); }