Skip to content

Commit

Permalink
fix: same domain check with www for baseUrl (#375)
Browse files Browse the repository at this point in the history
* fix: same domain check with www for baseUrl

* fix: simplify tests

* fix: pr review

* fix: ready to merge
  • Loading branch information
AndreiAlexandruParaschiv authored Aug 23, 2024
1 parent 86e0b05 commit eb30da3
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 11 deletions.
3 changes: 2 additions & 1 deletion src/canonical/handler.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
*/

import { JSDOM } from 'jsdom';
import { composeBaseURL } from '@adobe/spacecat-shared-utils';
import { fetch } from '../support/utils.js';
import { AuditBuilder } from '../common/audit-builder.js';
import { noopUrlResolver } from '../common/audit.js';
Expand Down Expand Up @@ -352,7 +353,7 @@ export function validateCanonicalFormat(canonicalUrl, baseUrl, log) {
}

// Check if the canonical URL has the same domain as the base URL
if (url.hostname !== base.hostname) {
if (composeBaseURL(url.hostname) !== composeBaseURL(base.hostname)) {
checks.push({
check: CANONICAL_CHECKS.CANONICAL_URL_SAME_DOMAIN.check,
success: false,
Expand Down
36 changes: 26 additions & 10 deletions test/audits/canonical.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ describe('Canonical URL Tests', () => {
describe('validateCanonicalTag', () => {
it('should handle missing canonical tag', async () => {
const url = 'http://example.com';
const html = '<!DOCTYPE html><html><head></head><body></body></html>';
const html = '<!DOCTYPE html><html lang="en"><head><title>test</title></head><body></body></html>';
nock('http://example.com').get('/').reply(200, html);

const result = await validateCanonicalTag(url, log);
Expand Down Expand Up @@ -153,7 +153,7 @@ describe('Canonical URL Tests', () => {

it('should handle invalid canonical URL correctly', async () => {
const url = 'http://example.com';
const html = '<html><head><link rel="canonical" href="invalid-url"></head><body></body></html>';
const html = '<html lang="en"><head><link rel="canonical" href="invalid-url"><title>test</title></head><body></body></html>';
nock(url).get('/').reply(200, html);

const result = await validateCanonicalTag(url, log);
Expand All @@ -168,7 +168,7 @@ describe('Canonical URL Tests', () => {

it('should handle empty canonical tag', async () => {
const url = 'http://example.com';
const html = '<html><head><link rel="canonical" href=""></head><body></body></html>';
const html = '<html lang="en"><head><link rel="canonical" href=""><title>test</title></head><body></body></html>';
nock(url).get('/').reply(200, html);

const result = await validateCanonicalTag(url, log);
Expand All @@ -184,7 +184,7 @@ describe('Canonical URL Tests', () => {

it('should handle multiple canonical tags', async () => {
const url = 'http://example.com';
const html = '<html><head><link rel="canonical" href="http://example.com/page1"><link rel="canonical" href="http://example.com/page2"></head><body></body></html>';
const html = '<html lang="en"><head><link rel="canonical" href="http://example.com/page1"><link rel="canonical" href="http://example.com/page2"><title>test</title></head><body></body></html>';
nock(url).get('/').reply(200, html);

const result = await validateCanonicalTag(url, log);
Expand All @@ -198,7 +198,7 @@ describe('Canonical URL Tests', () => {

it('should fail if the canonical tag is not in the head section', async () => {
const url = 'http://example.com';
const html = '<html><head></head><body><link rel="canonical" href="http://example.com"></body></html>';
const html = '<html lang="en"><head><title>test</title></head><body><link rel="canonical" href="http://example.com"></body></html>';
nock(url).get('/').reply(200, html);

const result = await validateCanonicalTag(url, log);
Expand Down Expand Up @@ -325,6 +325,22 @@ describe('Canonical URL Tests', () => {
expect(log.info).to.have.been.calledWith('Canonical URL https://example.com uses a different protocol than base URL http://example.com');
});

it('should pass when canonical URL and base URL are identical, regardless of the www prefix', () => {
const cases = [
{ canonicalUrl: 'https://www.example.com', baseUrl: 'https://example.com' },
{ canonicalUrl: 'https://example.com', baseUrl: 'https://www.example.com' },
];

cases.forEach(({ canonicalUrl, baseUrl }) => {
const result = validateCanonicalFormat(canonicalUrl, baseUrl, log);

expect(result).to.deep.include({
check: 'canonical-url-same-domain',
success: true,
});
});
});

it('should fail if the canonical URL is not absolute', () => {
const canonicalUrl = '/relative/url';
const baseUrl = 'http://example.com';
Expand All @@ -340,7 +356,7 @@ describe('Canonical URL Tests', () => {

it('should pass if the canonical URL points to itself', async () => {
const url = 'http://example.com';
const html = `<html><head><link rel="canonical" href="${url}"></head><body></body></html>`;
const html = `<html lang="en"><head><link rel="canonical" href="${url}"><title>test</title></head><body></body></html>`;
nock(url).get('/').reply(200, html);

const result = await validateCanonicalTag(url, log);
Expand Down Expand Up @@ -380,7 +396,7 @@ describe('Canonical URL Tests', () => {
it('should fail if the canonical URL does not point to itself', async () => {
const url = 'http://example.com';
const canonicalUrl = 'http://example.com/other-page';
const html = `<html><head><link rel="canonical" href="${canonicalUrl}"></head><body></body></html>`;
const html = `<html lang="en"><head><link rel="canonical" href="${canonicalUrl}"><title>test</title></head><body></body></html>`;
nock(url).get('/').reply(200, html);

const result = await validateCanonicalTag(url, log);
Expand Down Expand Up @@ -470,9 +486,9 @@ describe('Canonical URL Tests', () => {
const expectedCanonicalUrl = 'https://example.com/canonical-page';

const html = `
<html>
<html lang="en">
<head>
<link rel="canonical" href="${href}">
<link rel="canonical" href="${href}"><title>test</title>
</head>
<body>
<h1>Test Page</h1>
Expand Down Expand Up @@ -518,7 +534,7 @@ describe('Canonical URL Tests', () => {
describe('canonicalAuditRunner', () => {
it('should run canonical audit successfully', async () => {
const baseURL = 'http://example.com';
const html = `<html><head><link rel="canonical" href="${baseURL}"></head><body></body></html>`;
const html = `<html lang="en"><head><link rel="canonical" href="${baseURL}"><title>test</title></head><body></body></html>`;

nock('http://example.com').get('/page1').reply(200, html);
nock(baseURL).get('/').reply(200, html);
Expand Down

0 comments on commit eb30da3

Please sign in to comment.