Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: same domain check with www for baseUrl #375

Merged
merged 4 commits into from
Aug 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/canonical/handler.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
*/

import { JSDOM } from 'jsdom';
import { composeBaseURL } from '@adobe/spacecat-shared-utils';
import { fetch } from '../support/utils.js';
import { AuditBuilder } from '../common/audit-builder.js';
import { noopUrlResolver } from '../common/audit.js';
Expand Down Expand Up @@ -352,7 +353,7 @@ export function validateCanonicalFormat(canonicalUrl, baseUrl, log) {
}

// Check if the canonical URL has the same domain as the base URL
if (url.hostname !== base.hostname) {
if (composeBaseURL(url.hostname) !== composeBaseURL(base.hostname)) {
checks.push({
check: CANONICAL_CHECKS.CANONICAL_URL_SAME_DOMAIN.check,
success: false,
Expand Down
36 changes: 26 additions & 10 deletions test/audits/canonical.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ describe('Canonical URL Tests', () => {
describe('validateCanonicalTag', () => {
it('should handle missing canonical tag', async () => {
const url = 'http://example.com';
const html = '<!DOCTYPE html><html><head></head><body></body></html>';
const html = '<!DOCTYPE html><html lang="en"><head><title>test</title></head><body></body></html>';
nock('http://example.com').get('/').reply(200, html);

const result = await validateCanonicalTag(url, log);
Expand Down Expand Up @@ -153,7 +153,7 @@ describe('Canonical URL Tests', () => {

it('should handle invalid canonical URL correctly', async () => {
const url = 'http://example.com';
const html = '<html><head><link rel="canonical" href="invalid-url"></head><body></body></html>';
const html = '<html lang="en"><head><link rel="canonical" href="invalid-url"><title>test</title></head><body></body></html>';
nock(url).get('/').reply(200, html);

const result = await validateCanonicalTag(url, log);
Expand All @@ -168,7 +168,7 @@ describe('Canonical URL Tests', () => {

it('should handle empty canonical tag', async () => {
const url = 'http://example.com';
const html = '<html><head><link rel="canonical" href=""></head><body></body></html>';
const html = '<html lang="en"><head><link rel="canonical" href=""><title>test</title></head><body></body></html>';
nock(url).get('/').reply(200, html);

const result = await validateCanonicalTag(url, log);
Expand All @@ -184,7 +184,7 @@ describe('Canonical URL Tests', () => {

it('should handle multiple canonical tags', async () => {
const url = 'http://example.com';
const html = '<html><head><link rel="canonical" href="http://example.com/page1"><link rel="canonical" href="http://example.com/page2"></head><body></body></html>';
const html = '<html lang="en"><head><link rel="canonical" href="http://example.com/page1"><link rel="canonical" href="http://example.com/page2"><title>test</title></head><body></body></html>';
nock(url).get('/').reply(200, html);

const result = await validateCanonicalTag(url, log);
Expand All @@ -198,7 +198,7 @@ describe('Canonical URL Tests', () => {

it('should fail if the canonical tag is not in the head section', async () => {
const url = 'http://example.com';
const html = '<html><head></head><body><link rel="canonical" href="http://example.com"></body></html>';
const html = '<html lang="en"><head><title>test</title></head><body><link rel="canonical" href="http://example.com"></body></html>';
nock(url).get('/').reply(200, html);

const result = await validateCanonicalTag(url, log);
Expand Down Expand Up @@ -325,6 +325,22 @@ describe('Canonical URL Tests', () => {
expect(log.info).to.have.been.calledWith('Canonical URL https://example.com uses a different protocol than base URL http://example.com');
});

it('should pass when canonical URL and base URL are identical, regardless of the www prefix', () => {
const cases = [
{ canonicalUrl: 'https://www.example.com', baseUrl: 'https://example.com' },
{ canonicalUrl: 'https://example.com', baseUrl: 'https://www.example.com' },
];

cases.forEach(({ canonicalUrl, baseUrl }) => {
const result = validateCanonicalFormat(canonicalUrl, baseUrl, log);

expect(result).to.deep.include({
check: 'canonical-url-same-domain',
success: true,
});
});
});

it('should fail if the canonical URL is not absolute', () => {
const canonicalUrl = '/relative/url';
const baseUrl = 'http://example.com';
Expand All @@ -340,7 +356,7 @@ describe('Canonical URL Tests', () => {

it('should pass if the canonical URL points to itself', async () => {
const url = 'http://example.com';
const html = `<html><head><link rel="canonical" href="${url}"></head><body></body></html>`;
const html = `<html lang="en"><head><link rel="canonical" href="${url}"><title>test</title></head><body></body></html>`;
nock(url).get('/').reply(200, html);

const result = await validateCanonicalTag(url, log);
Expand Down Expand Up @@ -380,7 +396,7 @@ describe('Canonical URL Tests', () => {
it('should fail if the canonical URL does not point to itself', async () => {
const url = 'http://example.com';
const canonicalUrl = 'http://example.com/other-page';
const html = `<html><head><link rel="canonical" href="${canonicalUrl}"></head><body></body></html>`;
const html = `<html lang="en"><head><link rel="canonical" href="${canonicalUrl}"><title>test</title></head><body></body></html>`;
nock(url).get('/').reply(200, html);

const result = await validateCanonicalTag(url, log);
Expand Down Expand Up @@ -470,9 +486,9 @@ describe('Canonical URL Tests', () => {
const expectedCanonicalUrl = 'https://example.com/canonical-page';

const html = `
<html>
<html lang="en">
<head>
<link rel="canonical" href="${href}">
<link rel="canonical" href="${href}"><title>test</title>
</head>
<body>
<h1>Test Page</h1>
Expand Down Expand Up @@ -518,7 +534,7 @@ describe('Canonical URL Tests', () => {
describe('canonicalAuditRunner', () => {
it('should run canonical audit successfully', async () => {
const baseURL = 'http://example.com';
const html = `<html><head><link rel="canonical" href="${baseURL}"></head><body></body></html>`;
const html = `<html lang="en"><head><link rel="canonical" href="${baseURL}"><title>test</title></head><body></body></html>`;

nock('http://example.com').get('/page1').reply(200, html);
nock(baseURL).get('/').reply(200, html);
Expand Down
Loading