Skip to content

Commit

Permalink
feat: robots.txt improve message
Browse files Browse the repository at this point in the history
  • Loading branch information
AndreiAlexandruParaschiv committed Oct 15, 2024
1 parent 1bdb583 commit 244c2cd
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 12 deletions.
16 changes: 9 additions & 7 deletions src/sitemap/handler.js
Original file line number Diff line number Diff line change
Expand Up @@ -153,15 +153,16 @@ export async function checkSitemap(sitemapUrl) {
* @param {Object} log - The logging object to record information and errors.
* @returns {Promise<string[]>} - A promise that resolves to an array of URLs that exist.
*/
async function filterUrlsByStatus(urls, log) {
async function filterValidUrls(urls, log) {
const fetchPromises = urls.map(async (url) => {
try {
const response = await fetch(url, { method: 'HEAD' });
if (response.status === 301 || response.status === 404) {
if (response.ok) {
return url;
} else {
log.info(`URL ${url} returned status code ${response.status}`);
return { url, status: response.status };
return null;
}
return null;
} catch (error) {
log.error(`Failed to fetch URL ${url}: ${error.message}`);
return null;
Expand All @@ -170,6 +171,7 @@ async function filterUrlsByStatus(urls, log) {

const results = await Promise.allSettled(fetchPromises);

// filter only the fulfilled promises that have a valid URL
return results
.filter((result) => result.status === 'fulfilled' && result.value !== null)
.map((result) => result.value);
Expand Down Expand Up @@ -280,9 +282,9 @@ export async function findSitemap(inputUrl, log) {

if (!sitemapUrls.length) {
const commonSitemapUrls = [`${protocol}://${domain}/sitemap.xml`, `${protocol}://${domain}/sitemap_index.xml`];
sitemapUrls = await filterUrlsByStatus(commonSitemapUrls, log);
sitemapUrls = await filterValidUrls(commonSitemapUrls, log);
if (!sitemapUrls.length) {
logMessages.push({ value: `No sitemap found in robots.txt or common paths for ${protocol}://${domain}`, error: ERROR_CODES.NO_SITEMAP_IN_ROBOTS });
logMessages.push({ value: 'No sitemap mentioned in robots.txt', error: ERROR_CODES.NO_SITEMAP_IN_ROBOTS });
return { success: false, reasons: logMessages };
}
}
Expand All @@ -299,7 +301,7 @@ export async function findSitemap(inputUrl, log) {
for (const s of extractedSitemapUrls) {
const urlsToCheck = extractedPaths[s];
// eslint-disable-next-line no-await-in-loop
const existingPages = await filterUrlsByStatus(urlsToCheck, log);
const existingPages = await filterValidUrls(urlsToCheck, log);

if (existingPages.length === 0) {
delete extractedPaths[s];
Expand Down
10 changes: 5 additions & 5 deletions test/audits/sitemap.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ describe('Sitemap Audit', () => {
sandbox.restore();
});

describe.skip('sitemapAuditRunner', () => {
describe('sitemapAuditRunner', () => {
it('runs successfully for sitemaps extracted from robots.txt', async () => {
nock(url)
.get('/robots.txt')
Expand Down Expand Up @@ -298,7 +298,7 @@ describe('Sitemap Audit', () => {
},
{
error: ERROR_CODES.NO_SITEMAP_IN_ROBOTS,
value: `No sitemap found in robots.txt or common paths for ${url}`,
value: 'No sitemap mentioned in robots.txt',
},
],
},
Expand Down Expand Up @@ -385,7 +385,7 @@ describe('Sitemap Audit', () => {
});
});

describe.skip('checkSitemap', () => {
describe('checkSitemap', () => {
it('should return SITEMAP_NOT_FOUND when the sitemap does not exist', async () => {
nock(url)
.get('/sitemap.xml')
Expand Down Expand Up @@ -429,7 +429,7 @@ describe('Sitemap Audit', () => {
});
});

describe.skip('getBaseUrlPagesFromSitemaps', () => {
describe('getBaseUrlPagesFromSitemaps', () => {
const sampleSitemapMoreUrls = '<?xml version="1.0" encoding="UTF-8"?>\n'
+ '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n'
+ `<url> <loc>${url}/foo</loc></url>\n`
Expand Down Expand Up @@ -481,7 +481,7 @@ describe('Sitemap Audit', () => {
});
});

describe.skip('findSitemap', () => {
describe('findSitemap', () => {
it('should return error when URL is invalid', async () => {
const result = await findSitemap('not a valid url');
expect(result.success).to.equal(false);
Expand Down

0 comments on commit 244c2cd

Please sign in to comment.