Skip to content

Commit

Permalink
fix: sitemap usecase
Browse files Browse the repository at this point in the history
  • Loading branch information
AndreiAlexandruParaschiv committed Aug 14, 2024
1 parent 7a2ab19 commit 0bbc1c4
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 12 deletions.
22 changes: 15 additions & 7 deletions src/sitemap/handler.js
Original file line number Diff line number Diff line change
Expand Up @@ -190,34 +190,42 @@ export async function getBaseUrlPagesFromSitemaps(baseUrl, urls) {
const baseUrlVariant = toggleWWW(baseUrl);
const contentsCache = {};

// Prepare all promises for checking each sitemap URL.
const checkPromises = urls.map(async (url) => {
const fillSitemapContents = async (url) => {
const urlData = await checkSitemap(url);
contentsCache[url] = urlData;
return { url, urlData };
});
};

// Prepare all promises for checking each sitemap URL.
const checkPromises = urls.map(fillSitemapContents);

// Execute all checks concurrently.
const results = await Promise.all(checkPromises);
const matchingUrls = [];

// Process each result.
results.forEach(({ url, urlData }) => {
for (const { url, urlData } of results) {
if (urlData.existsAndIsValid) {
if (urlData.details && urlData.details.isSitemapIndex) {
console.log(`Sitemap Index found: ${url}`);
const extractedSitemaps = getSitemapUrlsFromSitemapIndex(urlData.details.sitemapContent);
console.log(`Extracted Sitemaps from Index: ${extractedSitemaps}`);
extractedSitemaps.forEach((extractedSitemapUrl) => {
for (const extractedSitemapUrl of extractedSitemaps) {
if (!contentsCache[extractedSitemapUrl]) {
matchingUrls.push(extractedSitemapUrl);
try {
// eslint-disable-next-line no-await-in-loop
await fillSitemapContents(extractedSitemapUrl);
} catch (err) {
// not available
}
}
});
}
} else if (url.startsWith(baseUrl) || url.startsWith(baseUrlVariant)) {
matchingUrls.push(url);
}
}
});
}

console.log(`Matching URLs for further processing: ${matchingUrls}`);

Expand Down
17 changes: 12 additions & 5 deletions test/audits/sitemap.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,14 @@ describe('Sitemap Audit', () => {
+ `<sitemap><loc>${url}/sitemap_bar.xml</loc></sitemap>\n`
+ '</sitemapindex>';

const payload1 = '<?xml version="1.0" encoding="UTF-8"?>\n'
+ '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n'
+ `<url> <loc>${url}/foo</loc></url>\n`
+ `<url> <loc>${url}/bar</loc></url>\n`
+ '</urlset>';

beforeEach('setup', () => {
nock.cleanAll();
context = new MockContextBuilder()
.withSandbox(sandbox)
.build(message);
Expand Down Expand Up @@ -132,7 +139,7 @@ describe('Sitemap Audit', () => {
});
});

it.skip('runs successfully for sitemap extracted from robots.txt through sitemap index', async () => {
it('runs successfully for sitemap extracted from robots.txt through sitemap index', async () => {
nock(url)
.get('/robots.txt')
.reply(200, `Sitemap: ${url}/sitemap_index.xml`);
Expand All @@ -142,11 +149,11 @@ describe('Sitemap Audit', () => {
.reply(200, sitemapIndex);

nock(url)
.head('/sitemap_foo.xml')
.reply(200);
.get('/sitemap_foo.xml')
.reply(200, payload1);

nock(url)
.head('/sitemap_bar.xml')
.get('/sitemap_bar.xml')
.reply(200);

nock(url)
Expand Down Expand Up @@ -574,7 +581,7 @@ describe('Sitemap Audit', () => {
});
});

it.skip('should return success when sitemap_index.xml is found', async () => {
it('should return success when sitemap_index.xml is found', async () => {
nock(url)
.get('/robots.txt')
.reply(200, 'Allow: /');
Expand Down

0 comments on commit 0bbc1c4

Please sign in to comment.