@@ -209,13 +209,18 @@ JS::CharsToNewUTF8CharsZ(JSContext* maybeCx,
209
209
static const uint32_t INVALID_UTF8 = UINT32_MAX;
210
210
211
211
/*
212
- * Convert a utf8 character sequence into a UCS-4 character and return that
213
- * character. It is assumed that the caller already checked that the sequence
214
- * is valid.
212
+ * Convert a UTF-8 or WTF-8 (depending on InputCharsT, which is either
213
+ * UTF8Chars or WTF8Chars) character sequence into a UCS-4 character and return
214
+ * that character. It is assumed that the caller already checked that the
215
+ * sequence is valid.
215
216
*/
216
- uint32_t
217
- JS::Utf8ToOneUcs4Char (const uint8_t * utf8Buffer, int utf8Length)
217
+ template <class InputCharsT >
218
+ static uint32_t
219
+ Utf8ToOneUcs4CharImpl (const uint8_t * utf8Buffer, int utf8Length)
218
220
{
221
+ static_assert (std::is_same<InputCharsT, UTF8Chars>::value ||
222
+ std::is_same<InputCharsT, WTF8Chars>::value,
223
+ " must be either UTF-8 or WTF-8" );
219
224
MOZ_ASSERT (1 <= utf8Length && utf8Length <= 4 );
220
225
221
226
if (utf8Length == 1 ) {
@@ -235,13 +240,26 @@ JS::Utf8ToOneUcs4Char(const uint8_t* utf8Buffer, int utf8Length)
235
240
ucs4Char = (ucs4Char << 6 ) | (*utf8Buffer++ & 0x3F );
236
241
}
237
242
238
- if (MOZ_UNLIKELY (ucs4Char < minucs4Char || (ucs4Char >= 0xD800 && ucs4Char <= 0xDFFF ))) {
243
+ if (MOZ_UNLIKELY (ucs4Char < minucs4Char)) {
244
+ return INVALID_UTF8;
245
+ }
246
+
247
+ // WTF-8 allows lone surrogate.
248
+ if (std::is_same<InputCharsT, UTF8Chars>::value &&
249
+ MOZ_UNLIKELY (ucs4Char >= 0xD800 && ucs4Char <= 0xDFFF ))
250
+ {
239
251
return INVALID_UTF8;
240
252
}
241
253
242
254
return ucs4Char;
243
255
}
244
256
257
+ uint32_t
258
+ JS::Utf8ToOneUcs4Char (const uint8_t * utf8Buffer, int utf8Length)
259
+ {
260
+ return Utf8ToOneUcs4CharImpl<UTF8Chars>(utf8Buffer, utf8Length);
261
+ }
262
+
245
263
static void
246
264
ReportInvalidCharacter (JSContext* cx, uint32_t offset)
247
265
{
@@ -276,13 +294,13 @@ enum class OnUTF8Error {
276
294
Crash,
277
295
};
278
296
279
- // Scan UTF8 input and (internally, at least) convert it to a series of UTF-16
280
- // code units. But you can also do odd things like pass an empty lambda for
281
- // `dst`, in which case the output is discarded entirely--the only effect of
282
- // calling the template that way is error-checking.
283
- template <OnUTF8Error ErrorAction, typename OutputFn>
297
+ // Scan UTF-8 or WTF-8 input and (internally, at least) convert it to a series
298
+ // of UTF-16 code units. But you can also do odd things like pass an empty
299
+ // lambda for `dst`, in which case the output is discarded entirely--the only
300
+ // effect of calling the template that way is error-checking.
301
+ template <OnUTF8Error ErrorAction, typename OutputFn, class InputCharsT >
284
302
static bool
285
- InflateUTF8ToUTF16 (JSContext* cx, const UTF8Chars src, OutputFn dst)
303
+ InflateUTF8ToUTF16 (JSContext* cx, const InputCharsT src, OutputFn dst)
286
304
{
287
305
size_t srclen = src.length ();
288
306
for (uint32_t i = 0 ; i < srclen; i++) {
@@ -339,7 +357,15 @@ InflateUTF8ToUTF16(JSContext* cx, const UTF8Chars src, OutputFn dst)
339
357
(v == 0xF0 && ((uint8_t )src[i + 1 ] & 0xF0 ) == 0x80 ) || // F0 90~BF
340
358
(v == 0xF4 && ((uint8_t )src[i + 1 ] & 0xF0 ) != 0x80 )) // F4 80~8F
341
359
{
342
- INVALID (ReportInvalidCharacter, i, 1 );
360
+ if (std::is_same<InputCharsT, UTF8Chars>::value) {
361
+ INVALID (ReportInvalidCharacter, i, 1 );
362
+ } else {
363
+ // WTF-8 allows lone surrogate as ED A0~BF 80~BF.
364
+ MOZ_ASSERT ((std::is_same<InputCharsT, WTF8Chars>::value));
365
+ if (v == 0xED && ((uint8_t )src[i + 1 ] & 0xE0 ) != 0xA0 ) { // ED A0~BF
366
+ INVALID (ReportInvalidCharacter, i, 1 );
367
+ }
368
+ }
343
369
}
344
370
345
371
// Check the continuation bytes.
@@ -350,7 +376,7 @@ InflateUTF8ToUTF16(JSContext* cx, const UTF8Chars src, OutputFn dst)
350
376
}
351
377
352
378
// Determine the code unit's length in CharT and act accordingly.
353
- v = JS::Utf8ToOneUcs4Char ((uint8_t *)&src[i], n);
379
+ v = Utf8ToOneUcs4CharImpl<InputCharsT> ((uint8_t *)&src[i], n);
354
380
if (v < 0x10000 ) {
355
381
// The n-byte UTF8 code unit will fit in a single CharT.
356
382
if (dst (char16_t (v)) == LoopDisposition::Break) {
@@ -383,9 +409,10 @@ InflateUTF8ToUTF16(JSContext* cx, const UTF8Chars src, OutputFn dst)
383
409
return true ;
384
410
}
385
411
386
- template <OnUTF8Error ErrorAction, typename CharT>
412
+ template <OnUTF8Error ErrorAction, typename CharT, class InputCharsT >
387
413
static void
388
- CopyAndInflateUTF8IntoBuffer (JSContext* cx, const UTF8Chars src, CharT *dst, size_t outlen, bool allASCII)
414
+ CopyAndInflateUTF8IntoBuffer (JSContext* cx, const InputCharsT src, CharT* dst, size_t outlen,
415
+ bool allASCII)
389
416
{
390
417
if (allASCII) {
391
418
size_t srclen = src.length ();
@@ -405,9 +432,9 @@ CopyAndInflateUTF8IntoBuffer(JSContext* cx, const UTF8Chars src, CharT *dst, siz
405
432
dst[outlen] = CharT (' \0 ' ); // NUL char
406
433
}
407
434
408
- template <OnUTF8Error ErrorAction, typename CharsT>
435
+ template <OnUTF8Error ErrorAction, typename CharsT, class InputCharsT >
409
436
static CharsT
410
- InflateUTF8StringHelper (JSContext* cx, const UTF8Chars src, size_t * outlen)
437
+ InflateUTF8StringHelper (JSContext* cx, const InputCharsT src, size_t * outlen)
411
438
{
412
439
using CharT = typename CharsT::CharT;
413
440
static_assert (std::is_same<CharT, char16_t >::value ||
@@ -448,6 +475,12 @@ JS::UTF8CharsToNewTwoByteCharsZ(JSContext* cx, const UTF8Chars utf8, size_t* out
448
475
return InflateUTF8StringHelper<OnUTF8Error::Throw, TwoByteCharsZ>(cx, utf8, outlen);
449
476
}
450
477
478
+ TwoByteCharsZ
479
+ JS::WTF8CharsToNewTwoByteCharsZ (JSContext* cx, const WTF8Chars wtf8, size_t * outlen)
480
+ {
481
+ return InflateUTF8StringHelper<OnUTF8Error::Throw, TwoByteCharsZ>(cx, wtf8, outlen);
482
+ }
483
+
451
484
TwoByteCharsZ
452
485
JS::UTF8CharsToNewTwoByteCharsZ (JSContext* cx, const ConstUTF8CharsZ& utf8, size_t * outlen)
453
486
{
@@ -518,8 +551,9 @@ JS::LossyUTF8CharsToNewLatin1CharsZ(JSContext* cx, const UTF8Chars utf8, size_t*
518
551
* consumption.
519
552
*/
520
553
554
+ template <class InputCharsT >
521
555
bool
522
- GetUTF8AtomizationData (JSContext* cx, const JS::UTF8Chars utf8, size_t * outlen,
556
+ GetUTF8AtomizationData (JSContext* cx, const InputCharsT utf8, size_t * outlen,
523
557
JS::SmallestEncoding* encoding, HashNumber* hashNum)
524
558
{
525
559
*outlen = 0 ;
@@ -539,6 +573,15 @@ GetUTF8AtomizationData(JSContext* cx, const JS::UTF8Chars utf8, size_t* outlen,
539
573
return true ;
540
574
}
541
575
576
+ template
577
+ bool
578
+ GetUTF8AtomizationData<JS::UTF8Chars>(JSContext* cx, const JS::UTF8Chars utf8, size_t * outlen,
579
+ JS::SmallestEncoding* encoding, HashNumber* hashNum);
580
+ template
581
+ bool
582
+ GetUTF8AtomizationData<JS::WTF8Chars>(JSContext* cx, const JS::WTF8Chars utf8, size_t * outlen,
583
+ JS::SmallestEncoding* encoding, HashNumber* hashNum);
584
+
542
585
template <typename CharT>
543
586
bool
544
587
UTF8EqualsChars (const JS::UTF8Chars utfChars, const CharT* chars)
@@ -575,9 +618,9 @@ UTF8EqualsChars(const JS::UTF8Chars utfChars, const CharT* chars)
575
618
template bool UTF8EqualsChars<char16_t >(const JS::UTF8Chars, const char16_t *);
576
619
template bool UTF8EqualsChars<JS::Latin1Char>(const JS::UTF8Chars, const JS::Latin1Char*);
577
620
578
- template <typename CharT>
621
+ template <typename CharT, class InputCharsT >
579
622
void
580
- InflateUTF8CharsToBufferAndTerminate (const UTF8Chars src, CharT* dst, size_t dstLen,
623
+ InflateUTF8CharsToBufferAndTerminate (const InputCharsT src, CharT* dst, size_t dstLen,
581
624
JS::SmallestEncoding encoding)
582
625
{
583
626
CopyAndInflateUTF8IntoBuffer<OnUTF8Error::Crash>(/* cx = */ nullptr , src, dst, dstLen,
@@ -590,6 +633,12 @@ InflateUTF8CharsToBufferAndTerminate<char16_t>(const UTF8Chars src, char16_t* ds
590
633
template void
591
634
InflateUTF8CharsToBufferAndTerminate<JS::Latin1Char>(const UTF8Chars src, JS::Latin1Char* dst,
592
635
size_t dstLen, JS::SmallestEncoding encoding);
636
+ template void
637
+ InflateUTF8CharsToBufferAndTerminate<char16_t >(const WTF8Chars src, char16_t * dst, size_t dstLen,
638
+ JS::SmallestEncoding encoding);
639
+ template void
640
+ InflateUTF8CharsToBufferAndTerminate<JS::Latin1Char>(const WTF8Chars src, JS::Latin1Char* dst,
641
+ size_t dstLen, JS::SmallestEncoding encoding);
593
642
594
643
#ifdef DEBUG
595
644
void
0 commit comments