diff --git a/src/canonical/handler.js b/src/canonical/handler.js index 8b295590..edd78a81 100644 --- a/src/canonical/handler.js +++ b/src/canonical/handler.js @@ -304,6 +304,83 @@ async function validateCanonicalTag(url, log) { // }; // } +/** + * Validates the format of a canonical URL against a base URL. + * + * @param {string} canonicalUrl - The canonical URL to validate. + * @param {string} baseUrl - The base URL to compare against. + * @param log + * @returns {Array} Array of check results, each with a check and error if the check failed. + */ + +function validateCanonicalUrlFormat(canonicalUrl, baseUrl, log) { + const url = new URL(canonicalUrl); + const base = new URL(baseUrl); + const checks = []; + + // Check if the canonical URL is absolute + if (!url.href.startsWith('http://') && !url.href.startsWith('https://')) { + checks.push({ + check: ChecksAndErrors.CANONICAL_URL_ABSOLUTE.check, + error: ChecksAndErrors.CANONICAL_URL_ABSOLUTE.error, + }); + log.info(`Canonical URL is not absolute: ${canonicalUrl}`); + } else { + checks.push({ + check: ChecksAndErrors.CANONICAL_URL_ABSOLUTE.check, + success: true, + }); + log.info(`Canonical URL is absolute: ${canonicalUrl}`); + } + + // Check if the canonical URL has the same protocol as the base URL + if (!url.href.startsWith(base.protocol)) { + checks.push({ + check: ChecksAndErrors.CANONICAL_URL_SAME_PROTOCOL.check, + error: ChecksAndErrors.CANONICAL_URL_SAME_PROTOCOL.error, + }); + log.info(`Canonical URL does not have the same protocol as base URL: ${canonicalUrl}`); + } else { + checks.push({ + check: ChecksAndErrors.CANONICAL_URL_SAME_PROTOCOL.check, + success: true, + }); + log.info(`Canonical URL has the same protocol as base URL: ${canonicalUrl}`); + } + + // Check if the canonical URL has the same domain as the base URL + if (url.hostname !== base.hostname) { + checks.push({ + check: ChecksAndErrors.CANONICAL_URL_SAME_DOMAIN.check, + error: ChecksAndErrors.CANONICAL_URL_SAME_DOMAIN.error, + }); + log.info(`Canonical URL does not have the same domain as base URL: ${canonicalUrl}`); + } else { + checks.push({ + check: ChecksAndErrors.CANONICAL_URL_SAME_DOMAIN.check, + success: true, + }); + log.info(`Canonical URL has the same domain as base URL: ${canonicalUrl}`); + } + + // Check if the canonical URL is in lowercase + if (canonicalUrl !== canonicalUrl.toLowerCase()) { + checks.push({ + check: ChecksAndErrors.CANONICAL_URL_LOWERCASED.check, + error: ChecksAndErrors.CANONICAL_URL_LOWERCASED.error, + }); + log.info(`Canonical URL is not in lowercase: ${canonicalUrl}`); + } else { + checks.push({ + check: ChecksAndErrors.CANONICAL_URL_LOWERCASED.check, + success: true, + }); + log.info(`Canonical URL is in lowercase: ${canonicalUrl}`); + } + + return checks; +} + /** * Recursively validates the contents of a canonical URL. * @@ -323,7 +400,7 @@ async function validateCanonicalUrlContentsRecursive(canonicalUrl, log, visitedU error: ChecksAndErrors.CANONICAL_URL_NO_REDIRECT.error, success: false, }); - return { canonicalUrl, checks }; + return checks; } // Add the current URL to the visited set @@ -390,83 +467,6 @@ async function validateCanonicalUrlContentsRecursive(canonicalUrl, log, visitedU }); } - return { canonicalUrl, checks }; -} - -/** - * Validates the format of a canonical URL against a base URL. - * - * @param {string} canonicalUrl - The canonical URL to validate. - * @param {string} baseUrl - The base URL to compare against. - * @param log - * @returns {Array} Array of check results, each with a check and error if the check failed. - */ - -function validateCanonicalUrlFormat(canonicalUrl, baseUrl, log) { - const url = new URL(canonicalUrl); - const base = new URL(baseUrl); - const checks = []; - - // Check if the canonical URL is absolute - if (!url.href.startsWith('http://') && !url.href.startsWith('https://')) { - checks.push({ - check: ChecksAndErrors.CANONICAL_URL_ABSOLUTE.check, - error: ChecksAndErrors.CANONICAL_URL_ABSOLUTE.error, - }); - log.info(`Canonical URL is not absolute: ${canonicalUrl}`); - } else { - checks.push({ - check: ChecksAndErrors.CANONICAL_URL_ABSOLUTE.check, - success: true, - }); - log.info(`Canonical URL is absolute: ${canonicalUrl}`); - } - - // Check if the canonical URL has the same protocol as the base URL - if (!url.href.startsWith(base.protocol)) { - checks.push({ - check: ChecksAndErrors.CANONICAL_URL_SAME_PROTOCOL.check, - error: ChecksAndErrors.CANONICAL_URL_SAME_PROTOCOL.error, - }); - log.info(`Canonical URL does not have the same protocol as base URL: ${canonicalUrl}`); - } else { - checks.push({ - check: ChecksAndErrors.CANONICAL_URL_SAME_PROTOCOL.check, - success: true, - }); - log.info(`Canonical URL has the same protocol as base URL: ${canonicalUrl}`); - } - - // Check if the canonical URL has the same domain as the base URL - if (url.hostname !== base.hostname) { - checks.push({ - check: ChecksAndErrors.CANONICAL_URL_SAME_DOMAIN.check, - error: ChecksAndErrors.CANONICAL_URL_SAME_DOMAIN.error, - }); - log.info(`Canonical URL does not have the same domain as base URL: ${canonicalUrl}`); - } else { - checks.push({ - check: ChecksAndErrors.CANONICAL_URL_SAME_DOMAIN.check, - success: true, - }); - log.info(`Canonical URL has the same domain as base URL: ${canonicalUrl}`); - } - - // Check if the canonical URL is in lowercase - if (canonicalUrl !== canonicalUrl.toLowerCase()) { - checks.push({ - check: ChecksAndErrors.CANONICAL_URL_LOWERCASED.check, - error: ChecksAndErrors.CANONICAL_URL_LOWERCASED.error, - }); - log.info(`Canonical URL is not in lowercase: ${canonicalUrl}`); - } else { - checks.push({ - check: ChecksAndErrors.CANONICAL_URL_LOWERCASED.check, - success: true, - }); - log.info(`Canonical URL is in lowercase: ${canonicalUrl}`); - } - return checks; }