diff --git a/src/backlinks/handler.js b/src/backlinks/handler.js index 3b825cc3..86fa774b 100644 --- a/src/backlinks/handler.js +++ b/src/backlinks/handler.js @@ -15,48 +15,11 @@ import { } from '@adobe/spacecat-shared-http-utils'; import { composeAuditURL } from '@adobe/spacecat-shared-utils'; import AhrefsAPIClient from '@adobe/spacecat-shared-ahrefs-client'; -import { AbortController, AbortError } from '@adobe/fetch'; import { retrieveSiteBySiteId } from '../utils/data-access.js'; -import { enhanceBacklinksWithFixes, fetch } from '../support/utils.js'; - -const TIMEOUT = 3000; +import { enhanceBacklinksWithFixes, isStillBrokenURL } from '../support/utils.js'; export async function filterOutValidBacklinks(backlinks, log) { - const fetchWithTimeout = async (url, timeout) => { - const controller = new AbortController(); - const { signal } = controller; - const id = setTimeout(() => controller.abort(), timeout); - - try { - const response = await fetch(url, { signal }); - clearTimeout(id); - return response; - } catch (error) { - if (error instanceof AbortError) { - log.warn(`Request to ${url} timed out after ${timeout}ms`); - return { ok: false, status: 408 }; - } - } finally { - clearTimeout(id); - } - return null; - }; - - const isStillBrokenBacklink = async (backlink) => { - try { - const response = await fetchWithTimeout(backlink.url_to, TIMEOUT); - if (!response.ok && response.status !== 404 - && response.status >= 400 && response.status < 500) { - log.warn(`Backlink ${backlink.url_to} returned status ${response.status}`); - } - return !response.ok; - } catch (error) { - log.error(`Failed to check backlink ${backlink.url_to}: ${error.message}`); - return true; - } - }; - - const backlinkStatuses = await Promise.all(backlinks.map(isStillBrokenBacklink)); + const backlinkStatuses = await Promise.all(backlinks.map(async (backlink) => isStillBrokenURL(backlink.url_to, 'backlink', log))); return backlinks.filter((_, index) => backlinkStatuses[index]); } diff --git a/src/support/utils.js b/src/support/utils.js index d99b9703..122e6b8c 100644 --- a/src/support/utils.js +++ b/src/support/utils.js @@ -10,7 +10,9 @@ * governing permissions and limitations under the License. */ -import { context as h2, h1 } from '@adobe/fetch'; +import { + AbortController, AbortError, context as h2, h1, +} from '@adobe/fetch'; import { hasText, resolveCustomerSecretsName } from '@adobe/spacecat-shared-utils'; import URI from 'urijs'; import { JSDOM } from 'jsdom'; @@ -32,6 +34,51 @@ export async function getRUMUrl(url) { return finalUrl.endsWith('/') ? finalUrl.slice(0, -1) : /* c8 ignore next */ finalUrl; } +const TIMEOUT = 3000; + +/** + * Fetches a URL with a specified timeout. + * + * @async + * @param {string} url - The URL to fetch. + * @param {number} timeout - The timeout duration in milliseconds. + * @param {Object} log - The logging object to record information and errors. + * @returns {Promise<{ok: boolean, status: number}>} - A promise that resolves the response object + */ +export const fetchWithTimeout = async (url, timeout, log) => { + const controller = new AbortController(); + const { signal } = controller; + const id = setTimeout(() => controller.abort(), timeout); + + try { + const response = await fetch(url, { signal }); + clearTimeout(id); + return response; + } catch (error) { + if (error instanceof AbortError) { + log.warn(`Request to ${url} timed out after ${timeout}ms`); + return { ok: false, status: 408 }; + } + } finally { + clearTimeout(id); + } + return null; +}; + +export const isStillBrokenURL = async (url, label, log) => { + try { + const response = await fetchWithTimeout(url, TIMEOUT, log); + if (!response.ok && response.status !== 404 + && response.status >= 400 && response.status < 500) { + log.warn(`${label} ${url} returned status ${response.status}`); + } + return !response.ok; + } catch (error) { + log.error(`Failed to check ${label} ${url}: ${error.message}`); + return true; + } +}; + /** * Checks if a given URL contains a domain with a non-www subdomain. * diff --git a/test/audits/backlinks.test.js b/test/audits/backlinks.test.js index 2c09f8d1..fc441ac9 100644 --- a/test/audits/backlinks.test.js +++ b/test/audits/backlinks.test.js @@ -28,6 +28,7 @@ chai.use(sinonChai); chai.use(chaiAsPromised); const { expect } = chai; +// eslint-disable-next-line func-names describe('Backlinks Tests', function () { this.timeout(10000); let message; @@ -517,7 +518,7 @@ describe('Backlinks Tests', function () { expect(context.sqs.sendMessage).to.have.been.calledOnce; expect(context.sqs.sendMessage).to.have.been .calledWith(context.env.AUDIT_RESULTS_QUEUE_URL, expectedMessage); - expect(context.log.warn).to.have.been.calledWith('Backlink https://foo.com/returns-429 returned status 429'); + expect(context.log.warn).to.have.been.calledWith('backlink https://foo.com/returns-429 returned status 429'); expect(context.log.info).to.have.been.calledWith('Successfully audited site2 for broken-backlinks type audit'); });