Skip to content

Commit a4c312e

Browse files
authored
fix(TextDecoder): add (nearly) full support for TextDecoder and TextEncoder (#501)
1 parent 893a084 commit a4c312e

30 files changed

+1661
-205
lines changed

.github/workflows/dependencies.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,5 +11,5 @@ jobs:
1111
steps:
1212
- uses: actions/[email protected]
1313
with:
14-
allow-licenses: Apache-2.0, MIT, BSD-3-Clause, ISC, BSD-2-Clause, MIT OR (CC0-1.0 AND MIT), CC0-1.0 OR MIT OR (CC0-1.0 AND MIT), CC-BY-3.0, CC0-1.0, MIT OR Apache-2.0, MIT AND Apache-2.0, MIT OR WTFPL, BSD-2-Clause OR (MIT OR Apache-2.0), Python-2.0, ISC AND MIT, Apache-2.0 AND MIT
14+
allow-licenses: Apache-2.0, MIT, BSD-3-Clause, ISC, BSD-2-Clause, MIT OR (CC0-1.0 AND MIT), CC0-1.0 OR MIT OR (CC0-1.0 AND MIT), CC-BY-3.0, CC0-1.0, MIT OR Apache-2.0, MIT AND Apache-2.0, MIT OR WTFPL, BSD-2-Clause OR (MIT OR Apache-2.0), Python-2.0, ISC AND MIT, Apache-2.0 AND MIT, MIT/Apache-2.0, Apache-2.0 OR MIT, (Apache-2.0 OR MIT) AND BSD-3-Clause
1515
fail-on-scopes: runtime

c-dependencies/js-compute-runtime/Makefile

+27-3
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,27 @@ $(RUST_URL_LIB): | $(BUILD)
242242
--target-dir $(BUILD)/rusturl \
243243
--target=wasm32-wasi $(CARGO_FLAG)
244244

245+
# rustencoding build ################################################################
246+
247+
RUST_ENCODING_SRC := $(FSM_SRC)/rust-encoding
248+
249+
RUST_ENCODING_RS_FILES := $(shell find $(RUST_ENCODING_SRC)/src -name '*.rs')
250+
251+
RUST_ENCODING_LIB := $(BUILD)/rustencoding/wasm32-wasi/$(MODE)/librust_encoding.a
252+
253+
rustencoding: $(RUST_ENCODING_LIB)
254+
255+
$(RUST_ENCODING_LIB): $(RUST_ENCODING_RS_FILES)
256+
$(RUST_ENCODING_LIB): $(RUST_ENCODING_SRC)/Cargo.toml
257+
$(RUST_ENCODING_LIB): $(RUST_ENCODING_SRC)/cbindgen.toml
258+
$(RUST_ENCODING_LIB): | $(BUILD)
259+
$(call cmd_format,CARGO,$@) \
260+
cd $(RUST_ENCODING_SRC) && cbindgen --output rust-encoding.h && \
261+
cargo build $(call quiet_flag,--quiet) \
262+
--manifest-path $(RUST_ENCODING_SRC)/Cargo.toml \
263+
--target-dir $(BUILD)/rustencoding \
264+
--target=wasm32-wasi $(CARGO_FLAG)
265+
245266

246267
# wit-bindgen integration ######################################################
247268

@@ -283,7 +304,7 @@ $(eval $(call compile_cxx,$(FSM_SRC)/fastly-world/fastly_world_adapter.cpp))
283304
# NOTE: we shadow wasm-opt by adding $(FSM_SRC)/scripts to the path, which
284305
# includes a script called wasm-opt that immediately exits successfully. See
285306
# that script for more information about why we do this.
286-
$(OBJ_DIR)/js-compute-runtime.wasm: $(FSM_OBJ) $(SM_OBJ) $(RUST_URL_LIB)
307+
$(OBJ_DIR)/js-compute-runtime.wasm: $(FSM_OBJ) $(SM_OBJ) $(RUST_URL_LIB) $(RUST_ENCODING_LIB)
287308
$(OBJ_DIR)/js-compute-runtime.wasm: $(OBJ_DIR)/impl/main.o
288309
$(OBJ_DIR)/js-compute-runtime.wasm: $(OBJ_DIR)/fastly-world/fastly_world_adapter.o
289310
$(call cmd_format,WASI_LD,$@) PATH="$(FSM_SRC)/scripts:$$PATH" \
@@ -303,7 +324,7 @@ $(eval $(call compile_c,$(FSM_SRC)/fastly-world/fastly_world.c))
303324
# NOTE: we shadow wasm-opt by adding $(FSM_SRC)/scripts to the path, which
304325
# includes a script called wasm-opt that immediately exits successfully. See
305326
# that script for more information about why we do this.
306-
$(OBJ_DIR)/js-compute-runtime-component.wasm: $(FSM_OBJ) $(SM_OBJ) $(RUST_URL_LIB)
327+
$(OBJ_DIR)/js-compute-runtime-component.wasm: $(FSM_OBJ) $(SM_OBJ) $(RUST_URL_LIB) $(RUST_ENCODING_LIB)
307328
$(OBJ_DIR)/js-compute-runtime-component.wasm: $(OBJ_DIR)/impl/main_component.o
308329
$(OBJ_DIR)/js-compute-runtime-component.wasm: $(OBJ_DIR)/fastly-world/fastly_world.o
309330
$(call cmd_format,WASI_LD,$@) PATH="$(FSM_SRC)/scripts:$$PATH" \
@@ -315,14 +336,17 @@ $(eval $(call compile_cxx,$(FSM_SRC)/impl/main_component.cpp))
315336

316337
# Shared builtins build ########################################################
317338

318-
shared-builtins: shared/builtins.a shared/librust_url.a
339+
shared-builtins: shared/builtins.a shared/librust_url.a shared/librust_encoding.a
319340

320341
shared/builtins.a: $(OBJ_DIR)/builtins/shared/*.o $(OBJ_DIR)/builtin.o | shared
321342
$(call cmd,wasi_ar,$^)
322343

323344
shared/librust_url.a: $(RUST_URL_LIB) | shared
324345
$(call cmd,cp,$@)
325346

347+
shared/librust_encoding.a: $(RUST_ENCODING_LIB) | shared
348+
$(call cmd,cp,$@)
349+
326350
# These two rules copy the built artifacts into the $(FSM_SRC) directory, and
327351
# are both marked phony as we need to do the right thing when running the
328352
# following sequence:

c-dependencies/js-compute-runtime/builtins/shared/text-decoder.cpp

+207-15
Original file line numberDiff line numberDiff line change
@@ -1,42 +1,172 @@
11
#include "builtins/shared/text-decoder.h"
22
#include "builtin.h"
3+
#include "js-compute-builtins.h"
4+
#include "rust-encoding/rust-encoding.h"
35

46
namespace builtins {
57

68
bool TextDecoder::decode(JSContext *cx, unsigned argc, JS::Value *vp) {
7-
METHOD_HEADER(1)
9+
METHOD_HEADER(0);
810

9-
// Default to empty string if no input is given.
10-
if (args[0].isUndefined()) {
11-
args.rval().set(JS_GetEmptyStringValue(cx));
12-
return true;
11+
auto source_value = args.get(0);
12+
std::optional<std::span<uint8_t>> src;
13+
14+
if (source_value.isUndefined()) {
15+
src = std::span<uint8_t, 0>();
16+
} else {
17+
src = value_to_buffer(cx, source_value, "TextDecoder#decode: input");
18+
}
19+
if (!src.has_value()) {
20+
return false;
1321
}
1422

15-
auto data = value_to_buffer(cx, args[0], "TextDecoder#decode: input");
16-
if (!data.has_value()) {
23+
bool stream = false;
24+
if (args.hasDefined(1)) {
25+
auto options_value = args.get(1);
26+
if (!options_value.isObject()) {
27+
JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
28+
JSMSG_TEXT_DECODER_DECODE_OPTIONS_NOT_DICTIONARY);
29+
return false;
30+
}
31+
JS::RootedObject options(cx, &options_value.toObject());
32+
JS::RootedValue stream_value(cx);
33+
if (!JS_GetProperty(cx, options, "stream", &stream_value)) {
34+
return false;
35+
}
36+
stream = JS::ToBoolean(stream_value);
37+
}
38+
39+
auto fatal =
40+
JS::GetReservedSlot(self, static_cast<uint32_t>(TextDecoder::Slots::Fatal)).toBoolean();
41+
auto ignoreBOM =
42+
JS::GetReservedSlot(self, static_cast<uint32_t>(TextDecoder::Slots::IgnoreBOM)).toBoolean();
43+
auto decoder = reinterpret_cast<jsencoding::Decoder *>(
44+
JS::GetReservedSlot(self, static_cast<uint32_t>(TextDecoder::Slots::Decoder)).toPrivate());
45+
MOZ_ASSERT(decoder);
46+
47+
uint32_t result;
48+
size_t srcLen = src->size();
49+
size_t destLen = jsencoding::decoder_max_utf16_buffer_length(decoder, srcLen);
50+
std::unique_ptr<uint16_t[]> dest(new uint16_t[destLen + 1]);
51+
if (!dest) {
52+
JS_ReportOutOfMemory(cx);
1753
return false;
1854
}
55+
if (fatal) {
56+
result = jsencoding::decoder_decode_to_utf16_without_replacement(decoder, src->data(), &srcLen,
57+
dest.get(), &destLen, !stream);
58+
if (result != 0) {
59+
JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_TEXT_DECODER_DECODING_FAILED);
60+
return false;
61+
}
62+
} else {
63+
bool hadReplacements;
64+
result = jsencoding::decoder_decode_to_utf16(decoder, src->data(), &srcLen, dest.get(),
65+
&destLen, !stream, &hadReplacements);
66+
}
67+
MOZ_ASSERT(result == 0);
68+
69+
auto encoding = reinterpret_cast<jsencoding::Encoding *>(
70+
JS::GetReservedSlot(self, static_cast<uint32_t>(TextDecoder::Slots::Encoding)).toPrivate());
71+
MOZ_ASSERT(encoding);
72+
// If the internal streaming flag of the decoder object is not set,
73+
// then reset the encoding algorithm state to the default values
74+
if (!stream) {
75+
if (ignoreBOM) {
76+
jsencoding::encoding_new_decoder_without_bom_handling_into(encoding, decoder);
77+
} else {
78+
jsencoding::encoding_new_decoder_with_bom_removal_into(encoding, decoder);
79+
}
80+
}
1981

20-
JS::RootedString str(
21-
cx, JS_NewStringCopyUTF8N(cx, JS::UTF8Chars((char *)data->data(), data->size())));
22-
if (!str)
82+
JS::RootedString str(cx,
83+
JS_NewUCStringCopyN(cx, reinterpret_cast<char16_t *>(dest.get()), destLen));
84+
if (!str) {
85+
JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_TEXT_DECODER_DECODING_FAILED);
2386
return false;
87+
}
2488

2589
args.rval().setString(str);
2690
return true;
2791
}
2892

2993
bool TextDecoder::encoding_get(JSContext *cx, unsigned argc, JS::Value *vp) {
30-
METHOD_HEADER(0)
94+
METHOD_HEADER(0);
3195

32-
JS::RootedString str(cx, JS_NewStringCopyN(cx, "utf-8", 5));
33-
if (!str)
96+
JS::RootedObject result(cx);
97+
if (!JS_GetPrototype(cx, self, &result)) {
98+
return false;
99+
}
100+
if (result != TextDecoder::proto_obj) {
101+
JS_ReportErrorNumberASCII(cx, GetErrorMessageBuiltin, nullptr, JSMSG_INVALID_INTERFACE,
102+
"encoding get", "TextDecoder");
34103
return false;
104+
}
105+
106+
auto encoding = reinterpret_cast<jsencoding::Encoding *>(
107+
JS::GetReservedSlot(self, static_cast<uint32_t>(TextDecoder::Slots::Encoding)).toPrivate());
108+
MOZ_ASSERT(encoding);
109+
110+
std::unique_ptr<uint8_t[]> name(new uint8_t[jsencoding::ENCODING_NAME_MAX_LENGTH]);
111+
if (!name) {
112+
JS_ReportOutOfMemory(cx);
113+
return false;
114+
}
115+
size_t length = jsencoding::encoding_name(encoding, name.get());
116+
// encoding_rs/jsencoding returns the name uppercase but we need to have it lowercased
117+
for (size_t i = 0; i < length; i++) {
118+
name[i] = std::tolower(name[i]);
119+
}
120+
JS::RootedString str(cx, JS_NewStringCopyN(cx, reinterpret_cast<char *>(name.get()), length));
121+
if (!str) {
122+
JS_ReportOutOfMemory(cx);
123+
return false;
124+
}
35125

36126
args.rval().setString(str);
37127
return true;
38128
}
39129

130+
bool TextDecoder::fatal_get(JSContext *cx, unsigned argc, JS::Value *vp) {
131+
METHOD_HEADER(0);
132+
133+
JS::RootedObject result(cx);
134+
if (!JS_GetPrototype(cx, self, &result)) {
135+
return false;
136+
}
137+
if (result != TextDecoder::proto_obj) {
138+
JS_ReportErrorNumberASCII(cx, GetErrorMessageBuiltin, nullptr, JSMSG_INVALID_INTERFACE,
139+
"fatal get", "TextDecoder");
140+
return false;
141+
}
142+
143+
auto fatal =
144+
JS::GetReservedSlot(self, static_cast<uint32_t>(TextDecoder::Slots::Fatal)).toBoolean();
145+
146+
args.rval().setBoolean(fatal);
147+
return true;
148+
}
149+
150+
bool TextDecoder::ignoreBOM_get(JSContext *cx, unsigned argc, JS::Value *vp) {
151+
METHOD_HEADER(0);
152+
153+
JS::RootedObject result(cx);
154+
if (!JS_GetPrototype(cx, self, &result)) {
155+
return false;
156+
}
157+
if (result != TextDecoder::proto_obj) {
158+
JS_ReportErrorNumberASCII(cx, GetErrorMessageBuiltin, nullptr, JSMSG_INVALID_INTERFACE,
159+
"ignoreBOM get", "TextDecoder");
160+
return false;
161+
}
162+
163+
auto ignoreBOM =
164+
JS::GetReservedSlot(self, static_cast<uint32_t>(TextDecoder::Slots::IgnoreBOM)).toBoolean();
165+
166+
args.rval().setBoolean(ignoreBOM);
167+
return true;
168+
}
169+
40170
const JSFunctionSpec TextDecoder::static_methods[] = {
41171
JS_FS_END,
42172
};
@@ -46,20 +176,82 @@ const JSPropertySpec TextDecoder::static_properties[] = {
46176
};
47177

48178
const JSFunctionSpec TextDecoder::methods[] = {
49-
JS_FN("decode", decode, 1, JSPROP_ENUMERATE),
179+
JS_FN("decode", decode, 0, JSPROP_ENUMERATE),
50180
JS_FS_END,
51181
};
52182

53183
const JSPropertySpec TextDecoder::properties[] = {
54184
JS_PSG("encoding", encoding_get, JSPROP_ENUMERATE),
185+
JS_PSG("fatal", fatal_get, JSPROP_ENUMERATE),
186+
JS_PSG("ignoreBOM", ignoreBOM_get, JSPROP_ENUMERATE),
55187
JS_STRING_SYM_PS(toStringTag, "TextDecoder", JSPROP_READONLY),
56188
JS_PS_END,
57189
};
58190

191+
// constructor(optional DOMString label = "utf-8", optional TextDecoderOptions options = {});
59192
bool TextDecoder::constructor(JSContext *cx, unsigned argc, JS::Value *vp) {
60193
CTOR_HEADER("TextDecoder", 0);
61-
194+
// 1. Let encoding be the result of getting an encoding from label.
195+
auto label_value = args.get(0);
196+
// https://encoding.spec.whatwg.org/#concept-encoding-get
197+
// To get an encoding from a string label, run these steps:
198+
// 1. Remove any leading and trailing ASCII whitespace from label.
199+
// 2. If label is an ASCII case-insensitive match for any of the labels listed in the table
200+
// below, then return the corresponding encoding; otherwise return failure. JS-Compute-Runtime:
201+
size_t length;
202+
jsencoding::Encoding *encoding;
203+
if (label_value.isUndefined()) {
204+
encoding = const_cast<jsencoding::Encoding *>(jsencoding::encoding_for_label_no_replacement(
205+
reinterpret_cast<uint8_t *>(const_cast<char *>("UTF-8")), 5));
206+
} else {
207+
auto label_chars = encode(cx, label_value, &length);
208+
if (!label_chars) {
209+
return false;
210+
}
211+
encoding = const_cast<jsencoding::Encoding *>(jsencoding::encoding_for_label_no_replacement(
212+
reinterpret_cast<uint8_t *>(label_chars.get()), length));
213+
}
214+
if (!encoding) {
215+
JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_TEXT_DECODER_INVALID_ENCODING);
216+
return false;
217+
}
218+
bool fatal = false;
219+
bool ignoreBOM = false;
220+
if (args.hasDefined(1)) {
221+
auto options_val = args.get(1);
222+
if (options_val.isObject()) {
223+
JS::RootedObject options(cx, &options_val.toObject());
224+
JS::RootedValue fatal_value(cx);
225+
if (!JS_GetProperty(cx, options, "fatal", &fatal_value)) {
226+
return false;
227+
}
228+
fatal = JS::ToBoolean(fatal_value);
229+
JS::RootedValue ignoreBOM_value(cx);
230+
if (!JS_GetProperty(cx, options, "ignoreBOM", &ignoreBOM_value)) {
231+
return false;
232+
}
233+
ignoreBOM = JS::ToBoolean(ignoreBOM_value);
234+
} else if (!options_val.isNull()) {
235+
JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
236+
JSMSG_TEXT_DECODER_OPTIONS_NOT_DICTIONARY);
237+
return false;
238+
}
239+
}
62240
JS::RootedObject self(cx, JS_NewObjectForConstructor(cx, &class_, args));
241+
jsencoding::Decoder *decoder;
242+
if (ignoreBOM) {
243+
decoder = jsencoding::encoding_new_decoder_without_bom_handling(encoding);
244+
} else {
245+
decoder = jsencoding::encoding_new_decoder_with_bom_removal(encoding);
246+
}
247+
JS::SetReservedSlot(self, static_cast<uint32_t>(TextDecoder::Slots::Decoder),
248+
JS::PrivateValue(decoder));
249+
JS::SetReservedSlot(self, static_cast<uint32_t>(TextDecoder::Slots::Encoding),
250+
JS::PrivateValue(encoding));
251+
JS::SetReservedSlot(self, static_cast<uint32_t>(TextDecoder::Slots::Fatal),
252+
JS::BooleanValue(fatal));
253+
JS::SetReservedSlot(self, static_cast<uint32_t>(TextDecoder::Slots::IgnoreBOM),
254+
JS::BooleanValue(ignoreBOM));
63255

64256
args.rval().setObject(*self);
65257
return true;

c-dependencies/js-compute-runtime/builtins/shared/text-decoder.h

+9-1
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,19 @@ namespace builtins {
88
class TextDecoder final : public BuiltinImpl<TextDecoder> {
99
static bool decode(JSContext *cx, unsigned argc, JS::Value *vp);
1010
static bool encoding_get(JSContext *cx, unsigned argc, JS::Value *vp);
11+
static bool fatal_get(JSContext *cx, unsigned argc, JS::Value *vp);
12+
static bool ignoreBOM_get(JSContext *cx, unsigned argc, JS::Value *vp);
1113

1214
public:
1315
static constexpr const char *class_name = "TextDecoder";
1416

15-
enum class Slots { Count };
17+
enum class Slots {
18+
Decoder,
19+
Encoding,
20+
Fatal,
21+
IgnoreBOM,
22+
Count,
23+
};
1624

1725
static const JSFunctionSpec static_methods[];
1826
static const JSPropertySpec static_properties[];

0 commit comments

Comments
 (0)