diff --git a/src/sitemap/handler.js b/src/sitemap/handler.js
index a3b6d002..d7232c03 100644
--- a/src/sitemap/handler.js
+++ b/src/sitemap/handler.js
@@ -190,34 +190,42 @@ export async function getBaseUrlPagesFromSitemaps(baseUrl, urls) {
const baseUrlVariant = toggleWWW(baseUrl);
const contentsCache = {};
- // Prepare all promises for checking each sitemap URL.
- const checkPromises = urls.map(async (url) => {
+ const fillSitemapContents = async (url) => {
const urlData = await checkSitemap(url);
contentsCache[url] = urlData;
return { url, urlData };
- });
+ };
+
+ // Prepare all promises for checking each sitemap URL.
+ const checkPromises = urls.map(fillSitemapContents);
// Execute all checks concurrently.
const results = await Promise.all(checkPromises);
const matchingUrls = [];
// Process each result.
- results.forEach(({ url, urlData }) => {
+ for (const { url, urlData } of results) {
if (urlData.existsAndIsValid) {
if (urlData.details && urlData.details.isSitemapIndex) {
console.log(`Sitemap Index found: ${url}`);
const extractedSitemaps = getSitemapUrlsFromSitemapIndex(urlData.details.sitemapContent);
console.log(`Extracted Sitemaps from Index: ${extractedSitemaps}`);
- extractedSitemaps.forEach((extractedSitemapUrl) => {
+ for (const extractedSitemapUrl of extractedSitemaps) {
if (!contentsCache[extractedSitemapUrl]) {
matchingUrls.push(extractedSitemapUrl);
+ try {
+ // eslint-disable-next-line no-await-in-loop
+ await fillSitemapContents(extractedSitemapUrl);
+ } catch (err) {
+ // not available
+ }
}
- });
+ }
} else if (url.startsWith(baseUrl) || url.startsWith(baseUrlVariant)) {
matchingUrls.push(url);
}
}
- });
+ }
console.log(`Matching URLs for further processing: ${matchingUrls}`);
diff --git a/test/audits/sitemap.test.js b/test/audits/sitemap.test.js
index 1bf1e5dc..7d349429 100644
--- a/test/audits/sitemap.test.js
+++ b/test/audits/sitemap.test.js
@@ -65,7 +65,14 @@ describe('Sitemap Audit', () => {
+ `${url}/sitemap_bar.xml\n`
+ '';
+ const payload1 = '\n'
+ + '\n'
+ + ` ${url}/foo\n`
+ + ` ${url}/bar\n`
+ + '';
+
beforeEach('setup', () => {
+ nock.cleanAll();
context = new MockContextBuilder()
.withSandbox(sandbox)
.build(message);
@@ -132,7 +139,7 @@ describe('Sitemap Audit', () => {
});
});
- it.skip('runs successfully for sitemap extracted from robots.txt through sitemap index', async () => {
+ it('runs successfully for sitemap extracted from robots.txt through sitemap index', async () => {
nock(url)
.get('/robots.txt')
.reply(200, `Sitemap: ${url}/sitemap_index.xml`);
@@ -142,11 +149,11 @@ describe('Sitemap Audit', () => {
.reply(200, sitemapIndex);
nock(url)
- .head('/sitemap_foo.xml')
- .reply(200);
+ .get('/sitemap_foo.xml')
+ .reply(200, payload1);
nock(url)
- .head('/sitemap_bar.xml')
+ .get('/sitemap_bar.xml')
.reply(200);
nock(url)
@@ -574,7 +581,7 @@ describe('Sitemap Audit', () => {
});
});
- it.skip('should return success when sitemap_index.xml is found', async () => {
+ it('should return success when sitemap_index.xml is found', async () => {
nock(url)
.get('/robots.txt')
.reply(200, 'Allow: /');