Skip to content

Commit

Permalink
fix: unexpected close tag issue
Browse files Browse the repository at this point in the history
  • Loading branch information
AndreiAlexandruParaschiv committed Aug 20, 2024
1 parent 537a6ac commit 7a52c1a
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 20 deletions.
6 changes: 3 additions & 3 deletions src/sitemap/handler.js
Original file line number Diff line number Diff line change
Expand Up @@ -115,10 +115,10 @@ export async function checkRobotsForSitemap(protocol, domain, log) {
* @returns {boolean} - True if the sitemap content is valid, otherwise false.
*/
export function isSitemapContentValid(sitemapContent) {
return sitemapContent.payload.trim().startsWith('<?xml')
|| VALID_MIME_TYPES.some((type) => sitemapContent.type.includes(type));
const payload = sitemapContent.payload.trim();
return payload.startsWith('<?xml') && payload.endsWith('>')
&& VALID_MIME_TYPES.some((type) => sitemapContent.type.includes(type));
}

/**
* Checks the validity and existence of a sitemap by fetching its content.
*
Expand Down
37 changes: 20 additions & 17 deletions src/support/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -91,23 +91,26 @@ export function extractDomainAndProtocol(inputUrl) {
* @returns {Array<string>} An array of URLs extracted from the sitemap.
*/
export function extractUrlsFromSitemap(content, log, tagName = 'url') {
const dom = new JSDOM(content.payload, { contentType: 'text/xml' });
const { document } = dom.window;

const elements = document.getElementsByTagName(tagName);

// Map through the elements, extract the text of the 'loc' tags, and filter out null
return Array.from(elements)
.map((element) => {
const loc = element.getElementsByTagName('loc')[0];
// Check if loc exists before trying to access textContent
if (loc && loc.textContent) {
log.info('Extracted URL:', loc.textContent.trim());
return loc.textContent.trim();
}
return null;
})
.filter((url) => url !== null);
try {
const dom = new JSDOM(content.payload, { contentType: 'text/xml' });
const { document } = dom.window;
const elements = document.getElementsByTagName(tagName);

return Array.from(elements)
.map((element) => {
const loc = element.getElementsByTagName('loc')[0];
if (loc && loc.textContent) {
log.info('Extracted URL:', loc.textContent.trim());
return loc.textContent.trim();
}
return null;
})
.filter((url) => url !== null);
} catch (error) {
log.error(`Failed to parse XML content in sitemap: ${error.message}`);
log.error(`Content received: ${content.payload}`);
return [];
}
}

/**
Expand Down

0 comments on commit 7a52c1a

Please sign in to comment.