From 61cb387144063480fb442da5d92eabf86d943b94 Mon Sep 17 00:00:00 2001 From: paraschi Date: Thu, 22 Aug 2024 21:11:10 +0300 Subject: [PATCH 1/4] fix: same domain check with www for baseUrl --- src/canonical/handler.js | 2 +- test/audits/canonical.test.js | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/src/canonical/handler.js b/src/canonical/handler.js index 8888c735..34b75b70 100644 --- a/src/canonical/handler.js +++ b/src/canonical/handler.js @@ -352,7 +352,7 @@ export function validateCanonicalFormat(canonicalUrl, baseUrl, log) { } // Check if the canonical URL has the same domain as the base URL - if (url.hostname !== base.hostname) { + if (url.hostname !== base.hostname && url.hostname !== `www.${base.hostname}` && `www.${url.hostname}` !== base.hostname) { checks.push({ check: CANONICAL_CHECKS.CANONICAL_URL_SAME_DOMAIN.check, success: false, diff --git a/test/audits/canonical.test.js b/test/audits/canonical.test.js index d80f64a1..1ec8be4c 100644 --- a/test/audits/canonical.test.js +++ b/test/audits/canonical.test.js @@ -325,6 +325,28 @@ describe('Canonical URL Tests', () => { expect(log.info).to.have.been.calledWith('Canonical URL https://example.com uses a different protocol than base URL http://example.com'); }); + it('should pass when canonical URL and base URL are identical, including the www prefix', () => { + const canonicalUrl = 'https://www.example.com'; + const baseUrl = 'https://example.com'; + const result = validateCanonicalFormat(canonicalUrl, baseUrl, log); + + expect(result).to.deep.include({ + check: 'canonical-url-same-domain', + success: true, + }); + }); + + it('should pass when canonical URL and base URL are identical, including the www prefix', () => { + const canonicalUrl = 'https://example.com'; + const baseUrl = 'https://www.example.com'; + const result = validateCanonicalFormat(canonicalUrl, baseUrl, log); + + expect(result).to.deep.include({ + check: 'canonical-url-same-domain', + success: true, + }); + }); + it('should fail if the canonical URL is not absolute', () => { const canonicalUrl = '/relative/url'; const baseUrl = 'http://example.com'; From 533b858615ae3c0442086829e63f854c8256b61f Mon Sep 17 00:00:00 2001 From: paraschi Date: Fri, 23 Aug 2024 12:39:57 +0300 Subject: [PATCH 2/4] fix: simplify tests --- test/audits/canonical.test.js | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/test/audits/canonical.test.js b/test/audits/canonical.test.js index 1ec8be4c..62462f2a 100644 --- a/test/audits/canonical.test.js +++ b/test/audits/canonical.test.js @@ -325,25 +325,19 @@ describe('Canonical URL Tests', () => { expect(log.info).to.have.been.calledWith('Canonical URL https://example.com uses a different protocol than base URL http://example.com'); }); - it('should pass when canonical URL and base URL are identical, including the www prefix', () => { - const canonicalUrl = 'https://www.example.com'; - const baseUrl = 'https://example.com'; - const result = validateCanonicalFormat(canonicalUrl, baseUrl, log); - - expect(result).to.deep.include({ - check: 'canonical-url-same-domain', - success: true, - }); - }); + it('should pass when canonical URL and base URL are identical, regardless of the www prefix', () => { + const cases = [ + { canonicalUrl: 'https://www.example.com', baseUrl: 'https://example.com' }, + { canonicalUrl: 'https://example.com', baseUrl: 'https://www.example.com' }, + ]; - it('should pass when canonical URL and base URL are identical, including the www prefix', () => { - const canonicalUrl = 'https://example.com'; - const baseUrl = 'https://www.example.com'; - const result = validateCanonicalFormat(canonicalUrl, baseUrl, log); + cases.forEach(({ canonicalUrl, baseUrl }) => { + const result = validateCanonicalFormat(canonicalUrl, baseUrl, log); - expect(result).to.deep.include({ - check: 'canonical-url-same-domain', - success: true, + expect(result).to.deep.include({ + check: 'canonical-url-same-domain', + success: true, + }); }); }); From 329f06bed744d83f402d19fd1e492f90ec2a3f16 Mon Sep 17 00:00:00 2001 From: paraschi Date: Fri, 23 Aug 2024 16:39:49 +0300 Subject: [PATCH 3/4] fix: pr review --- src/canonical/handler.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/canonical/handler.js b/src/canonical/handler.js index 34b75b70..2697f02a 100644 --- a/src/canonical/handler.js +++ b/src/canonical/handler.js @@ -11,6 +11,7 @@ */ import { JSDOM } from 'jsdom'; +import { composeBaseURL } from '@adobe/spacecat-shared-utils'; import { fetch } from '../support/utils.js'; import { AuditBuilder } from '../common/audit-builder.js'; import { noopUrlResolver } from '../common/audit.js'; @@ -352,7 +353,7 @@ export function validateCanonicalFormat(canonicalUrl, baseUrl, log) { } // Check if the canonical URL has the same domain as the base URL - if (url.hostname !== base.hostname && url.hostname !== `www.${base.hostname}` && `www.${url.hostname}` !== base.hostname) { + if (composeBaseURL(url.hostname) !== composeBaseURL(base.hostname)) { checks.push({ check: CANONICAL_CHECKS.CANONICAL_URL_SAME_DOMAIN.check, success: false, From 8b1979ffe40d6af43cd136bbc752e1e1f563105b Mon Sep 17 00:00:00 2001 From: paraschi Date: Fri, 23 Aug 2024 19:15:28 +0300 Subject: [PATCH 4/4] fix: ready to merge --- test/audits/canonical.test.js | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/test/audits/canonical.test.js b/test/audits/canonical.test.js index 62462f2a..3b2dc7dd 100644 --- a/test/audits/canonical.test.js +++ b/test/audits/canonical.test.js @@ -98,7 +98,7 @@ describe('Canonical URL Tests', () => { describe('validateCanonicalTag', () => { it('should handle missing canonical tag', async () => { const url = 'http://example.com'; - const html = ''; + const html = 'test'; nock('http://example.com').get('/').reply(200, html); const result = await validateCanonicalTag(url, log); @@ -153,7 +153,7 @@ describe('Canonical URL Tests', () => { it('should handle invalid canonical URL correctly', async () => { const url = 'http://example.com'; - const html = ''; + const html = 'test'; nock(url).get('/').reply(200, html); const result = await validateCanonicalTag(url, log); @@ -168,7 +168,7 @@ describe('Canonical URL Tests', () => { it('should handle empty canonical tag', async () => { const url = 'http://example.com'; - const html = ''; + const html = 'test'; nock(url).get('/').reply(200, html); const result = await validateCanonicalTag(url, log); @@ -184,7 +184,7 @@ describe('Canonical URL Tests', () => { it('should handle multiple canonical tags', async () => { const url = 'http://example.com'; - const html = ''; + const html = 'test'; nock(url).get('/').reply(200, html); const result = await validateCanonicalTag(url, log); @@ -198,7 +198,7 @@ describe('Canonical URL Tests', () => { it('should fail if the canonical tag is not in the head section', async () => { const url = 'http://example.com'; - const html = ''; + const html = 'test'; nock(url).get('/').reply(200, html); const result = await validateCanonicalTag(url, log); @@ -356,7 +356,7 @@ describe('Canonical URL Tests', () => { it('should pass if the canonical URL points to itself', async () => { const url = 'http://example.com'; - const html = ``; + const html = `test`; nock(url).get('/').reply(200, html); const result = await validateCanonicalTag(url, log); @@ -396,7 +396,7 @@ describe('Canonical URL Tests', () => { it('should fail if the canonical URL does not point to itself', async () => { const url = 'http://example.com'; const canonicalUrl = 'http://example.com/other-page'; - const html = ``; + const html = `test`; nock(url).get('/').reply(200, html); const result = await validateCanonicalTag(url, log); @@ -486,9 +486,9 @@ describe('Canonical URL Tests', () => { const expectedCanonicalUrl = 'https://example.com/canonical-page'; const html = ` - + - + test

Test Page

@@ -534,7 +534,7 @@ describe('Canonical URL Tests', () => { describe('canonicalAuditRunner', () => { it('should run canonical audit successfully', async () => { const baseURL = 'http://example.com'; - const html = ``; + const html = `test`; nock('http://example.com').get('/page1').reply(200, html); nock(baseURL).get('/').reply(200, html);