diff --git a/packages/ssr/src/utils/chunker.ts b/packages/ssr/src/utils/chunker.ts index 2e63d49a..9efcefe0 100644 --- a/packages/ssr/src/utils/chunker.ts +++ b/packages/ssr/src/utils/chunker.ts @@ -3,34 +3,62 @@ interface Chunk { value: string; } -function createChunkRegExp(chunkSize: number) { - return new RegExp('.{1,' + chunkSize + '}', 'g'); -} - const MAX_CHUNK_SIZE = 3180; -const MAX_CHUNK_REGEXP = createChunkRegExp(MAX_CHUNK_SIZE); /** * create chunks from a string and return an array of object */ export function createChunks(key: string, value: string, chunkSize?: number): Chunk[] { - const re = chunkSize !== undefined ? createChunkRegExp(chunkSize) : MAX_CHUNK_REGEXP; - // check the length of the string to work out if it should be returned or chunked - const chunkCount = Math.ceil(value.length / (chunkSize ?? MAX_CHUNK_SIZE)); + const resolvedChunkSize = chunkSize ?? MAX_CHUNK_SIZE; + + let encodedValue = encodeURIComponent(value); - if (chunkCount === 1) { + if (encodedValue.length <= resolvedChunkSize) { return [{ name: key, value }]; } - const chunks: Chunk[] = []; - // split string into a array based on the regex - const values = value.match(re); - values?.forEach((value, i) => { - const name = `${key}.${i}`; - chunks.push({ name, value }); - }); + const chunks: string[] = []; + + while (encodedValue.length > 0) { + let encodedChunkHead = encodedValue.slice(0, resolvedChunkSize); + + const lastEscapePos = encodedChunkHead.lastIndexOf('%'); + + // Check if the last escaped character is truncated. + if (lastEscapePos > resolvedChunkSize - 3) { + // If so, reslice the string to exclude the whole escape sequence. + // We only reduce the size of the string as the chunk must + // be smaller than the chunk size. + encodedChunkHead = encodedChunkHead.slice(0, lastEscapePos); + } + + let valueHead: string = ''; + + // Check if the chunk was split along a valid unicode boundary. + while (encodedChunkHead.length > 0) { + try { + // Try to decode the chunk back and see if it is valid. + // Stop when the chunk is valid. + valueHead = decodeURIComponent(encodedChunkHead); + break; + } catch (error) { + if ( + error instanceof URIError && + encodedChunkHead.at(-3) === '%' && + encodedChunkHead.length > 3 + ) { + encodedChunkHead = encodedChunkHead.slice(0, encodedChunkHead.length - 3); + } else { + throw error; + } + } + } + + chunks.push(valueHead); + encodedValue = encodedValue.slice(encodedChunkHead.length); + } - return chunks; + return chunks.map((value, i) => ({ name: `${key}.${i}`, value })); } // Get fully constructed chunks diff --git a/packages/ssr/tests/chunker.spec.ts b/packages/ssr/tests/chunker.spec.ts index a9038fd0..f8b28c50 100644 --- a/packages/ssr/tests/chunker.spec.ts +++ b/packages/ssr/tests/chunker.spec.ts @@ -64,4 +64,83 @@ describe('chunker', () => { expect(len(`${key}=${DOUBLE_CHUNK_STRING}`)).toBe(7257); expect(combined).toBe(DOUBLE_CHUNK_STRING); }); + + it('should correctly break between unicode boundaries in escaped characters', () => { + const test = ' '; + const chunks = createChunks('key', test, 4); + expect(chunks).toEqual([ + { + name: 'key.0', + value: ' ' + }, + { + name: 'key.1', + value: ' ' + }, + { + name: 'key.2', + value: ' ' + } + ]); + + expect(chunks.map((char) => char.value).join('')).toEqual(test); + }); + + describe('should correctly break between unicode boundaries in long unicode', () => { + it('should correctly break between unicode boundaries in long unicode (4 bytes)', () => { + const test = '🤦🏻‍♂️'; + const chunksAtStartBorder = createChunks('key', test, 12); + const chunksAtEndBorder = createChunks('key', test, 17); + expect(chunksAtStartBorder).toEqual(chunksAtEndBorder); + expect(chunksAtStartBorder).toEqual([ + { + name: 'key.0', + value: '🤦' + }, + { + name: 'key.1', + value: '🏻' + }, + { + name: 'key.2', + value: '‍' + }, + { + name: 'key.3', + value: '♂' + }, + { + name: 'key.4', + value: '️' + } + ]); + expect(chunksAtStartBorder.map((char) => char.value).join('')).toEqual(test); + }); + + it('should correctly break between unicode boundaries in long unicode (5 bytes)', () => { + const test = '🤦🏻‍♂️'; + const chunksAtStartBorder = createChunks('key', test, 18); + const chunksAtEndBorder = createChunks('key', test, 20); + expect(chunksAtStartBorder).toEqual(chunksAtEndBorder); + expect(chunksAtStartBorder).toEqual([ + { + name: 'key.0', + value: '🤦' + }, + { + name: 'key.1', + value: '🏻' + }, + { + name: 'key.2', + value: '‍♂' + }, + { + name: 'key.3', + value: '️' + } + ]); + expect(chunksAtStartBorder.map((char) => char.value).join('')).toEqual(test); + }); + }); });