From 0bbc1c4a711b07bfbdebcce7efd0bc5b7b0a2c79 Mon Sep 17 00:00:00 2001 From: paraschi Date: Wed, 14 Aug 2024 18:14:00 +0300 Subject: [PATCH] fix: sitemap usecase --- src/sitemap/handler.js | 22 +++++++++++++++------- test/audits/sitemap.test.js | 17 ++++++++++++----- 2 files changed, 27 insertions(+), 12 deletions(-) diff --git a/src/sitemap/handler.js b/src/sitemap/handler.js index a3b6d002..d7232c03 100644 --- a/src/sitemap/handler.js +++ b/src/sitemap/handler.js @@ -190,34 +190,42 @@ export async function getBaseUrlPagesFromSitemaps(baseUrl, urls) { const baseUrlVariant = toggleWWW(baseUrl); const contentsCache = {}; - // Prepare all promises for checking each sitemap URL. - const checkPromises = urls.map(async (url) => { + const fillSitemapContents = async (url) => { const urlData = await checkSitemap(url); contentsCache[url] = urlData; return { url, urlData }; - }); + }; + + // Prepare all promises for checking each sitemap URL. + const checkPromises = urls.map(fillSitemapContents); // Execute all checks concurrently. const results = await Promise.all(checkPromises); const matchingUrls = []; // Process each result. - results.forEach(({ url, urlData }) => { + for (const { url, urlData } of results) { if (urlData.existsAndIsValid) { if (urlData.details && urlData.details.isSitemapIndex) { console.log(`Sitemap Index found: ${url}`); const extractedSitemaps = getSitemapUrlsFromSitemapIndex(urlData.details.sitemapContent); console.log(`Extracted Sitemaps from Index: ${extractedSitemaps}`); - extractedSitemaps.forEach((extractedSitemapUrl) => { + for (const extractedSitemapUrl of extractedSitemaps) { if (!contentsCache[extractedSitemapUrl]) { matchingUrls.push(extractedSitemapUrl); + try { + // eslint-disable-next-line no-await-in-loop + await fillSitemapContents(extractedSitemapUrl); + } catch (err) { + // not available + } } - }); + } } else if (url.startsWith(baseUrl) || url.startsWith(baseUrlVariant)) { matchingUrls.push(url); } } - }); + } console.log(`Matching URLs for further processing: ${matchingUrls}`); diff --git a/test/audits/sitemap.test.js b/test/audits/sitemap.test.js index 1bf1e5dc..7d349429 100644 --- a/test/audits/sitemap.test.js +++ b/test/audits/sitemap.test.js @@ -65,7 +65,14 @@ describe('Sitemap Audit', () => { + `${url}/sitemap_bar.xml\n` + ''; + const payload1 = '\n' + + '\n' + + ` ${url}/foo\n` + + ` ${url}/bar\n` + + ''; + beforeEach('setup', () => { + nock.cleanAll(); context = new MockContextBuilder() .withSandbox(sandbox) .build(message); @@ -132,7 +139,7 @@ describe('Sitemap Audit', () => { }); }); - it.skip('runs successfully for sitemap extracted from robots.txt through sitemap index', async () => { + it('runs successfully for sitemap extracted from robots.txt through sitemap index', async () => { nock(url) .get('/robots.txt') .reply(200, `Sitemap: ${url}/sitemap_index.xml`); @@ -142,11 +149,11 @@ describe('Sitemap Audit', () => { .reply(200, sitemapIndex); nock(url) - .head('/sitemap_foo.xml') - .reply(200); + .get('/sitemap_foo.xml') + .reply(200, payload1); nock(url) - .head('/sitemap_bar.xml') + .get('/sitemap_bar.xml') .reply(200); nock(url) @@ -574,7 +581,7 @@ describe('Sitemap Audit', () => { }); }); - it.skip('should return success when sitemap_index.xml is found', async () => { + it('should return success when sitemap_index.xml is found', async () => { nock(url) .get('/robots.txt') .reply(200, 'Allow: /');