diff --git a/.editorconfig b/.editorconfig index 1ba9813..961083e 100644 --- a/.editorconfig +++ b/.editorconfig @@ -4,7 +4,7 @@ root = true end_of_line = lf insert_final_newline = true -[*.{js,mjs,json,ts}] +[*.{js,mjs,mts,json,ts}] charset = utf-8 indent_style = tab indent_size = 2 diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 0000000..d496ad5 --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,42 @@ +name: Tests + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + merge_group: + branches: [ main] + +defaults: + run: + working-directory: . + +jobs: + build: + strategy: + matrix: + node-version: [ 18, 20 ] + os: [ubuntu-22.04] + + runs-on: ${{ matrix.os }} + + steps: + - name: Checkout repository # v4.0.0 + uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac + - name: Install PNPM # v2.4.0 + uses: pnpm/action-setup@d882d12c64e032187b2edb46d3a0d003b7a43598 + with: + version: 8 + - name: Use Node.js ${{ matrix.node-version }} # v3.8.1 + uses: actions/setup-node@5e21ff4d9bc1a8cf6de233a3057d20ec6b3fb69d + with: + node-version: ${{ matrix.node-version }} + cache: 'pnpm' + + - name: Install dependencies + run: pnpm install --frozen-lockfile + - name: Run Linters + run: pnpm lint + - name: Run Tests + run: pnpm test:coverage diff --git a/.gitignore b/.gitignore index 6baaf93..733f4e2 100644 --- a/.gitignore +++ b/.gitignore @@ -2,5 +2,5 @@ # # SPDX-License-Identifier: MIT +coverage/ node_modules/ -pnpm-lock.yaml diff --git a/.hooks/pre-commit b/.hooks/pre-commit new file mode 100755 index 0000000..587c838 --- /dev/null +++ b/.hooks/pre-commit @@ -0,0 +1,7 @@ +#!/bin/sh + +set -eu +set -o pipefail + +pnpm lint +pnpm test diff --git a/biome.json b/biome.json index efece0f..ad9d461 100644 --- a/biome.json +++ b/biome.json @@ -6,8 +6,8 @@ "rules": { "recommended": true }, - "ignore": ["./node_modules/**/*"], - "include": ["./*.json", "./*.js", "./*.mjs", "./*.d.ts"] + "ignore": ["./coverage/**/*", "./node_modules/**/*"], + "include": ["./*.json", "./*.js", "./*.mjs", "./*.mts", "./*.d.ts"] }, "formatter": { "enabled": true, @@ -17,7 +17,7 @@ "lineWidth": 80, "lineEnding": "lf", "ignore": ["./node_modules/**/*"], - "include": ["./*.json", "./*.mjs", "./*.d.ts"] + "include": ["./*.json", "./*.mjs", "./*.mts", "./*.d.ts"] }, "javascript": { "formatter": { diff --git a/core.mjs b/core.mjs new file mode 100644 index 0000000..5f26911 --- /dev/null +++ b/core.mjs @@ -0,0 +1,328 @@ +/* + * SPDX-FileCopyrightText: 2024 KindSpells Labs S.L. + * + * SPDX-License-Identifier: MIT + */ + +import { createHash } from 'node:crypto' +import { readFile, readdir, stat, writeFile } from 'node:fs/promises' +import { extname, resolve } from 'node:path' + +/** + * @typedef {{ + * inlineScriptHashes: Set, + * inlineStyleHashes: Set, + * extScriptHashes: Set, + * extStyleHashes: Set, + * }} HashesCollection + */ + +/** + * @param {string | ArrayBuffer | Buffer} data + * @returns {string} + */ +export const generateSRIHash = data => { + const hash = createHash('sha256') + if (data instanceof ArrayBuffer) { + hash.update(Buffer.from(data)) + } else if (data instanceof Buffer) { + hash.update(data) + } else { + hash.update(data, 'utf8') + } + return `sha256-${hash.digest('base64')}` +} + +/** @typedef {(hash: string, attrs: string, content?: string | undefined) => string} ElemReplacer */ + +/** @type {ElemReplacer} */ +const scriptReplacer = (hash, attrs, content) => + `${content ?? ''}` + +/** @type {ElemReplacer} */ +const styleReplacer = (hash, attrs, content) => + `${content ?? ''}` + +/** @type {ElemReplacer} */ +const linkStyleReplacer = (hash, attrs) => `` + +const srcRegex = /\s+(src|href)\s*=\s*("(?.*?)"|'(?.*?)')/i +const integrityRegex = + /\s+integrity\s*=\s*("(?.*?)"|'(?.*?)')/i +const relStylesheetRegex = /\s+rel\s*=\s*('stylesheet'|"stylesheet")/i + +/** + * This function extracts SRI hashes from inline and external resources, and + * adds the integrity attribute to the related HTML elements. + * + * Notice that it assumes that the HTML content is relatively well-formed, and + * that in case it already contains integrity attributes then they are correct. + * + * @param {import('astro').AstroIntegrationLogger} logger + * @param {string} distDir + * @param {string} content + * @param {HashesCollection} h + * @returns {Promise} + */ +export const updateSriHashes = async (logger, distDir, content, h) => { + const processors = /** @type {const} */ ([ + { + t: 'Script', + regex: + /(\s+[a-z][a-z0-9\-_]*(=('[^']*?'|"[^"]*?"))?)*?)\s*>(?[\s\S]*?)<\/\s*script\s*>/gi, + replacer: scriptReplacer, + hasContent: true, + attrsRegex: undefined, + }, + { + t: 'Style', + regex: + /(\s+[a-z][a-z0-9\-_]*(=('[^']*?'|"[^"]*?"))?)*?)\s*>(?[\s\S]*?)<\/\s*style\s*>/gi, + replacer: styleReplacer, + hasContent: true, + attrsRegex: undefined, + }, + { + t: 'Style', + regex: + /(\s+[a-z][a-z0-9\-_]*(=('[^']*?'|"[^"]*?"))?)*?)\s*\/?>/gi, + replacer: linkStyleReplacer, + hasContent: false, + attrsRegex: relStylesheetRegex, + }, + ]) + + let updatedContent = content + let match + + for (const { attrsRegex, hasContent, regex, replacer, t } of processors) { + // biome-ignore lint/suspicious/noAssignInExpressions: safe + while ((match = regex.exec(content)) !== null) { + const attrs = match.groups?.attrs ?? '' + const content = match.groups?.content ?? '' + + /** @type {string | undefined} */ + let sriHash = undefined + + if (attrs) { + if (attrsRegex && !attrsRegex.test(attrs)) { + continue + } + + const srcMatch = srcRegex.exec(attrs) + const integrityMatch = integrityRegex.exec(attrs) + + if (integrityMatch) { + sriHash = + integrityMatch.groups?.integrity1 ?? + integrityMatch.groups?.integrity2 + if (sriHash) { + ;(srcMatch ? h[`ext${t}Hashes`] : h[`inline${t}Hashes`]).add( + sriHash, + ) + continue + } + } + + if (srcMatch) { + const src = srcMatch.groups?.src1 ?? srcMatch.groups?.src2 ?? '' + + /** @type {string | ArrayBuffer | Buffer} */ + let resourceContent + if (src.startsWith('/')) { + const resourcePath = resolve(distDir, `.${src}`) + resourceContent = await readFile(resourcePath) + } else if (src.startsWith('http')) { + const resourceResponse = await fetch(src, { method: 'GET' }) + resourceContent = await resourceResponse.arrayBuffer() + } else { + logger.warn(`Unable to process external resource: "${src}"`) + continue + } + + sriHash = generateSRIHash(resourceContent) + h[`ext${t}Hashes`].add(sriHash) + } + } + + if (hasContent && !sriHash) { + sriHash = generateSRIHash(content) + h[`inline${t}Hashes`].add(sriHash) + } + + if (sriHash) { + updatedContent = updatedContent.replace( + match[0], + replacer(sriHash, attrs, content), + ) + } + } + } + + return updatedContent +} + +/** + * @param {import('astro').AstroIntegrationLogger} logger + * @param {string} filePath + * @param {string} distDir + * @param {HashesCollection} h + */ +const processHTMLFile = async (logger, filePath, distDir, h) => { + const content = await readFile(filePath, 'utf8') + const updatedContent = await updateSriHashes(logger, distDir, content, h) + + if (updatedContent !== content) { + await writeFile(filePath, updatedContent) + } +} + +/** + * @param {import('astro').AstroIntegrationLogger} logger + * @param {string} dirPath + * @param {string} distDir + * @param {HashesCollection} h + */ +const scanDirectory = async (logger, dirPath, distDir, h) => { + for (const file of await readdir(dirPath)) { + const filePath = resolve(dirPath, file) + const stats = await stat(filePath) + + if (stats.isDirectory()) { + await scanDirectory(logger, filePath, distDir, h) + } else if (stats.isFile() && extname(file) === '.html') { + await processHTMLFile(logger, filePath, distDir, h) + } + } +} + +/** + * @param {string} path + * @returns {Promise} + */ +export const doesFileExist = async path => { + try { + await stat(path) + return true + } catch (err) { + if (/** @type {{ code: unknown }} */ (err).code === 'ENOENT') { + return false + } + throw err + } +} + +/** + * @param {unknown[]} a + * @param {unknown[]} b + * @returns {boolean} + */ +export const arraysEqual = (a, b) => { + if (a.length !== b.length) { + return false + } + + for (let i = 0; i < a.length; i++) { + if (a[i] !== b[i]) { + return false + } + } + + return true +} + +/** + * This is a hack to scan for nested scripts in the `_astro` directory, but they + * should be detected in a recursive way, when we process the JS files that are + * being directly imported in the HTML files. + * + * TODO: Remove this function and implement the recursive scan. + * + * @param {string} dirPath + * @param {Set} extScriptHashes + */ +const scanForNestedScripts = async (dirPath, extScriptHashes) => { + const nestedScriptsDir = resolve(dirPath, '_astro') + + if (!(await doesFileExist(nestedScriptsDir))) { + return + } + + for (const file of await readdir(nestedScriptsDir)) { + const filePath = resolve(nestedScriptsDir, file) + + if ( + (await stat(filePath)).isFile() && + ['.js', '.mjs'].includes(extname(file)) + ) { + const sriHash = generateSRIHash(await readFile(filePath)) + extScriptHashes.add(sriHash) + } + } +} + +/** + * @param {import('astro').AstroIntegrationLogger} logger + * @param {import('./main.d.ts').StrictSriCspOptions} sriCspOptions + */ +export const generateSRIHashes = async ( + logger, + { distDir, sriHashesModule }, +) => { + const h = { + inlineScriptHashes: new Set(), + inlineStyleHashes: new Set(), + extScriptHashes: new Set(), + extStyleHashes: new Set(), + } + await scanDirectory(logger, distDir, distDir, h) + + // TODO: Remove temporary hack + await scanForNestedScripts(distDir, h.extScriptHashes) + + if (!sriHashesModule) { + return + } + + let persistHashes = false + + const inlineScriptHashes = Array.from(h.inlineScriptHashes).sort() + const inlineStyleHashes = Array.from(h.inlineStyleHashes).sort() + const extScriptHashes = Array.from(h.extScriptHashes).sort() + const extStyleHashes = Array.from(h.extStyleHashes).sort() + + if (await doesFileExist(sriHashesModule)) { + const hModule = /** @type {{ + inlineScriptHashes?: string[] | undefined + inlineStyleHashes?: string[] | undefined + extScriptHashes?: string[] | undefined + extStyleHashes?: string[] | undefined + }} */ (await import(sriHashesModule)) + + persistHashes = + !arraysEqual(inlineScriptHashes, hModule.inlineScriptHashes ?? []) || + !arraysEqual(inlineStyleHashes, hModule.inlineStyleHashes ?? []) || + !arraysEqual(extScriptHashes, hModule.extScriptHashes ?? []) || + !arraysEqual(extStyleHashes, hModule.extStyleHashes ?? []) + } else { + persistHashes = true + } + + if (persistHashes) { + let hashesFileContent = '// Do not edit this file manually\n\n' + hashesFileContent += `export const inlineScriptHashes = /** @type {string[]} */ ([${inlineScriptHashes + .map(h => `\n\t'${h}',`) + .join('')}${inlineScriptHashes.length > 0 ? '\n' : ''}])\n\n` + hashesFileContent += `export const inlineStyleHashes = /** @type {string[]} */ ([${inlineStyleHashes + .map(h => `\n\t'${h}',`) + .join('')}${inlineStyleHashes.length > 0 ? '\n' : ''}])\n\n` + hashesFileContent += `export const extScriptHashes = /** @type {string[]} */ ([${extScriptHashes + .map(h => `\n\t'${h}',`) + .join('')}${extScriptHashes.length > 0 ? '\n' : ''}])\n\n` + hashesFileContent += `export const extStyleHashes = /** @type {string[]} */ ([${extStyleHashes + .map(h => `\n\t'${h}',`) + .join('')}${extStyleHashes.length > 0 ? '\n' : ''}])\n` + + await writeFile(sriHashesModule, hashesFileContent) + } +} diff --git a/main.d.ts b/main.d.ts index 4e0ee38..830e0ce 100644 --- a/main.d.ts +++ b/main.d.ts @@ -11,8 +11,17 @@ export type SriCspOptions = { export type StrictSriCspOptions = SriCspOptions & { distDir: string } -export function sriCSP( - sriCspOptions: SriCspOptions, -): import('astro').AstroIntegration +type AstroBuildDoneOpts = { + dir: URL + logger: AstroIntegrationLogger + pages: { pathname: string }[] + routes: RouteData[] +} +export type Integration = { + name: string + hooks: { 'astro:build:done': (opts: AstroBuildDoneOpts) => Promise } +} + +export function sriCSP(sriCspOptions: SriCspOptions): Integration export default sriCSP diff --git a/main.mjs b/main.mjs index f553ff7..95280af 100644 --- a/main.mjs +++ b/main.mjs @@ -1,372 +1,12 @@ -/* - * SPDX-FileCopyrightText: 2024 KindSpells Labs S.L. - * - * SPDX-License-Identifier: MIT - */ - -import { createHash } from 'node:crypto' -import { readFile, readdir, stat, writeFile } from 'node:fs/promises' -import { extname, resolve } from 'node:path' import { fileURLToPath } from 'node:url' - -/** - * @param {string | ArrayBuffer | Buffer} data - * @returns {string} - */ -const generateSRIHash = data => { - const hash = createHash('sha256') - if (data instanceof ArrayBuffer) { - hash.update(Buffer.from(data)) - } else if (data instanceof Buffer) { - hash.update(data) - } else { - hash.update(data, 'utf8') - } - return `sha256-${hash.digest('base64')}` -} - -/** - * @param {RegExp} elemRegex - * @param {string} content - * @param {Set} inlineHashes - */ -const extractKnownSriHashes = (elemRegex, content, inlineHashes) => { - let match - - // biome-ignore lint/suspicious/noAssignInExpressions: safe - while ((match = elemRegex.exec(content)) !== null) { - const sriHash = match[1] - if (sriHash) { - inlineHashes.add(sriHash) - } - } -} - -/** - * @param {'script' | 'style'} elemType - * @param {string} content - * @param {Set} inlineHashes - * @returns {string} - */ -const updateInlineSriHashes = (elemType, content, inlineHashes) => { - let updatedContent = content - let match - - const elemRegex = new RegExp(`<${elemType}>([\\s\\S]*?)<\\/${elemType}>`, 'g') - - // biome-ignore lint/suspicious/noAssignInExpressions: safe - while ((match = elemRegex.exec(content)) !== null) { - const elemContent = match[1]?.trim() - - if (elemContent) { - const sriHash = generateSRIHash(elemContent) - updatedContent = updatedContent.replace( - match[0], - `<${elemType} integrity="${sriHash}">${elemContent}`, - ) - inlineHashes.add(sriHash) - } - } - - return updatedContent -} - -/** - * @param {import('astro').AstroIntegrationLogger} logger - * @param {string} distDir - * @param {'script' | 'style'} elemType - * @param {string} content - * @param {Set} extHashes - * @returns {Promise} - */ -const updateExternalSriHashes = async ( - logger, - distDir, - elemType, - content, - extHashes, -) => { - let updatedContent = content - let match - - const elemRegex = - elemType === 'script' - ? /\s+(type="module"\s+src="(?[\s\S]*?)"|src="(?[\s\S]*?)"\s+type="module"|src="(?[\s\S]*?)"))\s*(\/>|><\/script>)/gi - : /\s+(rel="stylesheet"\s+href="(?[\s\S]*?)"|href="(?[\s\S]*?)"\s+rel="stylesheet"))\s*\/>/gi - - // biome-ignore lint/suspicious/noAssignInExpressions: safe - while ((match = elemRegex.exec(content)) !== null) { - const attrs = match.groups?.attrs - const href = - match.groups?.href1 ?? - match.groups?.href2 ?? - match.groups?.href3 - if (!attrs || !href) { - continue - } - - /** @type {string | ArrayBuffer | Buffer} */ - let resourceContent - if (href.startsWith('/')) { - const resourcePath = resolve(distDir, `.${href}`) - resourceContent = await readFile(resourcePath) - } else if (href.startsWith('http')) { - const resourceResponse = await fetch(href, { method: 'GET' }) - resourceContent = await resourceResponse.arrayBuffer() - } else { - logger.warn(`Unable to process external resource: "${href}"`) - continue - } - - const sriHash = generateSRIHash(resourceContent) - updatedContent = updatedContent.replace( - match[0], - elemType === 'script' - ? `` - : ``, - ) - extHashes.add(sriHash) - } - - return updatedContent -} - -/** - * @param {import('astro').AstroIntegrationLogger} logger - * @param {string} filePath - * @param {string} distDir - */ -const processHTMLFile = async (logger, filePath, distDir) => { - const content = await readFile(filePath, 'utf8') - - const inlineScriptHashes = /** @type {Set} */ (new Set()) - const inlineStyleHashes = /** @type {Set} */ (new Set()) - const extScriptHashes = /** @type {Set} */ (new Set()) - const extStyleHashes = /** @type {Set} */ (new Set()) - - // Known Inline Resources (just a precaution) - extractKnownSriHashes( - / + + ` + + const expected = ` + + My Test Page + + + + + ` + + const h = getEmptyHashes() + const updated = await updateSriHashes( + console as unknown as AstroIntegrationLogger, + testsDir, + content, + h, + ) + + expect(updated).toEqual(expected) + expect(h.inlineScriptHashes.size).toBe(1) + expect(h.inlineScriptHashes.has('sha256-TWupyvVdPa1DyFqLnQMqRpuUWdS3nKPnz70IcS/1o3Q=')).toBe(true) + expect(h.inlineStyleHashes.size).toBe(0) + expect(h.extScriptHashes.size).toBe(0) + expect(h.extStyleHashes.size).toBe(0) + }) + + it('adds sri hash to inline style', async () => { + const content = ` + + My Test Page + + + +

My Test Page

+

Some text

+ + ` + + const expected = ` + + My Test Page + + + +

My Test Page

+

Some text

+ + ` + + const h = getEmptyHashes() + const updated = await updateSriHashes( + console as unknown as AstroIntegrationLogger, + testsDir, + content, + h, + ) + + expect(updated).toEqual(expected) + expect(h.inlineStyleHashes.size).toBe(1) + expect(h.inlineStyleHashes.has('sha256-VATw/GI1Duwve1FGJ+z3c4gwulpBbeoGo1DqO20SdxM=')).toBe(true) + expect(h.inlineScriptHashes.size).toBe(0) + expect(h.extScriptHashes.size).toBe(0) + expect(h.extStyleHashes.size).toBe(0) + }) + + it('adds sri hash to external script (same origin)', async () => { + const content = ` + + My Test Page + + + + + ` + + const expected = ` + + My Test Page + + + + + ` + + const h = getEmptyHashes() + const updated = await updateSriHashes( + console as unknown as AstroIntegrationLogger, + rootDir, + content, + h, + ) + + expect(updated).toEqual(expected) + expect(h.extScriptHashes.size).toBe(1) + expect(h.extScriptHashes.has('sha256-GlpkA8WAeGW9d6jr04eDhYbHj9yNtaB4+Q/5HwOc05M=')).toBe(true) + expect(h.inlineScriptHashes.size).toBe(0) + expect(h.inlineStyleHashes.size).toBe(0) + expect(h.extStyleHashes.size).toBe(0) + }) + + it('adds sri hash to external script (cross origin)', async () => { + const remoteScript = 'https://raw.githubusercontent.com/KindSpells/astro-sri-csp/ae9521048f2129f633c075b7f7ef24e11bbd1884/main.mjs' + const content = ` + + My Test Page + + + + + ` + + const expected = ` + + My Test Page + + + + + ` + + const h = getEmptyHashes() + const updated = await updateSriHashes( + console as unknown as AstroIntegrationLogger, + rootDir, + content, + h, + ) + + expect(updated).toEqual(expected) + expect(h.extScriptHashes.size).toBe(1) + expect(h.extScriptHashes.has('sha256-i4WR4ifasidZIuS67Rr6Knsy7/hK1xbVTc8ZAmnAv1Q=')).toBe(true) + expect(h.inlineScriptHashes.size).toBe(0) + expect(h.inlineStyleHashes.size).toBe(0) + expect(h.extStyleHashes.size).toBe(0) + }) + + // TODO: Add tests for external styles +}) diff --git a/vitest.config.mts b/vitest.config.mts new file mode 100644 index 0000000..21b861c --- /dev/null +++ b/vitest.config.mts @@ -0,0 +1,18 @@ +// eslint-disable-next-line import/no-unresolved +import { defineConfig } from 'vitest/config' + +export default defineConfig({ + test: { + coverage: { + provider: 'v8', + include: ['*.mjs'], + exclude: ['tests/**/*'], + thresholds: { + branches: 55.00, + lines: 50.0, + functions: 50.0, + statements: 60.0, + }, + }, + }, +})