Skip to content
This repository was archived by the owner on May 12, 2025. It is now read-only.

Commit 779c678

Browse files
author
Andrii Rodionov
committed
Added BOM detection and BOM marker removal
If a source code uses encoding with BOM marker, this marker becomes a part of a prefix in a JS.CompilationUnit. During the equality check on the Java side (org.openrewrite.Parser.requirePrintEqualsInput) between the source code and the code generated based on the LST, Java trims the BOM marker. So this causes an idempotency error. Therefore, we also detect and trim the BOM marker on a JS/TS parser side from JS.CompilationUnit prefix.
1 parent 2d8ec7c commit 779c678

File tree

1 file changed

+33
-3
lines changed

1 file changed

+33
-3
lines changed

openrewrite/src/javascript/parser.ts

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -210,15 +210,45 @@ export class JavaScriptParserVisitor {
210210
return this.visit(node) as T;
211211
}
212212

213+
detectBOMAndTextEncoding(content: String) : { hasBom: boolean; encoding: string | null } {
214+
const BOM_UTF8 = "\uFEFF"; // BOM for UTF-8
215+
const BOM_UTF16_LE = [0xFF, 0xFE]; // BOM for UTF-16 Little Endian
216+
217+
// Detect BOM
218+
const hasUtf8Bom = content.startsWith(BOM_UTF8);
219+
const hasUtf16LeBom = content.charCodeAt(0) === BOM_UTF16_LE[0] && content.charCodeAt(1) === BOM_UTF16_LE[1];
220+
221+
if (hasUtf8Bom) {
222+
return { hasBom: true, encoding: 'utf8' };
223+
} else if (hasUtf16LeBom) {
224+
return { hasBom: true, encoding: 'utf16le' };
225+
}
226+
227+
return { hasBom: false, encoding: null };
228+
}
229+
213230
visitSourceFile(node: ts.SourceFile): JS.CompilationUnit {
231+
232+
let bomAndTextEncoding = this.detectBOMAndTextEncoding(node.getFullText());
233+
234+
let prefix = this.prefix(node);
235+
if (bomAndTextEncoding.hasBom) {
236+
// If a node full text has a BOM marker, it becomes a part of the prefix, so we remove it
237+
if (bomAndTextEncoding.encoding === 'utf8') {
238+
prefix = prefix.withWhitespace(prefix.whitespace!.slice(1));
239+
} else if (bomAndTextEncoding.encoding === 'utf16le') {
240+
prefix = prefix.withWhitespace(prefix.whitespace!.slice(2));
241+
}
242+
}
243+
214244
return new JS.CompilationUnit(
215245
randomId(),
216-
this.prefix(node),
246+
prefix,
217247
Markers.EMPTY,
218248
this.sourceFile.fileName,
219249
null,
220-
null,
221-
false,
250+
bomAndTextEncoding.encoding,
251+
bomAndTextEncoding.hasBom,
222252
null,
223253
[],
224254
this.semicolonPaddedStatementList(node.statements),

0 commit comments

Comments
 (0)