From adbdcbfb3c8e21a1e5ee8dbdc12052c9063c6285 Mon Sep 17 00:00:00 2001 From: Jon Phillips Date: Fri, 8 Nov 2024 18:56:02 +0000 Subject: [PATCH 01/12] Call IsolateObserver::TeardownFinished() only after an isolate's API object has been destroyed. --- src/workerd/io/worker.c++ | 18 +++++++++--------- src/workerd/io/worker.h | 12 +++++++----- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/src/workerd/io/worker.c++ b/src/workerd/io/worker.c++ index c56b49438bb..b4434763e2a 100644 --- a/src/workerd/io/worker.c++ +++ b/src/workerd/io/worker.c++ @@ -972,16 +972,16 @@ Worker::Isolate::Isolate(kj::Own apiParam, kj::StringPtr id, InspectorPolicy inspectorPolicy, ConsoleMode consoleMode) - : id(kj::str(id)), + : metrics(kj::atomicAddRef(apiParam->getMetrics())), + id(kj::str(id)), api(kj::mv(apiParam)), limitEnforcer(api->getLimitEnforcer()), consoleMode(consoleMode), featureFlagsForFl(makeCompatJson(decompileCompatibilityFlagsForFl(api->getFeatureFlags()))), - metrics(api->getMetrics()), - impl(kj::heap(*api, metrics, limitEnforcer, inspectorPolicy)), + impl(kj::heap(*api, *metrics, limitEnforcer, inspectorPolicy)), weakIsolateRef(WeakIsolateRef::wrap(this)), traceAsyncContextKey(kj::refcounted()) { - metrics.created(); + metrics->created(); // We just created our isolate, so we don't need to use Isolate::Impl::Lock (nor an async lock). jsg::runInV8Stack([&](jsg::V8StackScope& stackScope) { auto lock = api->lock(stackScope); @@ -1237,7 +1237,7 @@ Worker::Script::Script(kj::Own isolateParam, modular(source.is()), impl(kj::heap()) { this->isPython = false; - auto parseMetrics = isolate->metrics.parse(startType); + auto parseMetrics = isolate->metrics->parse(startType); // TODO(perf): It could make sense to take an async lock when constructing a script if we // co-locate multiple scripts in the same isolate. As of this writing, we do not, except in // previews, where it doesn't matter. If we ever do co-locate multiple scripts in the same @@ -1371,13 +1371,13 @@ kj::Own Worker::Isolate::getWeakRef() con } Worker::Isolate::~Isolate() noexcept(false) { - metrics.teardownStarted(); + metrics->teardownStarted(); // Update the isolate stats one last time to make sure we're accurate for cleanup in // `evicted()`. - limitEnforcer.reportMetrics(metrics); + limitEnforcer.reportMetrics(*metrics); - metrics.evicted(); + metrics->evicted(); weakIsolateRef->invalidate(); // Make sure to destroy things under lock. This lock should never be contended since the isolate @@ -1386,7 +1386,7 @@ Worker::Isolate::~Isolate() noexcept(false) { // worker destruction queue. jsg::runInV8Stack([&](jsg::V8StackScope& stackScope) { Isolate::Impl::Lock recordedLock(*this, Worker::Lock::TakeSynchronously(kj::none), stackScope); - metrics.teardownLockAcquired(); + metrics->teardownLockAcquired(); auto inspector = kj::mv(impl->inspector); auto dropTraceAsyncContextKey = kj::mv(traceAsyncContextKey); }); diff --git a/src/workerd/io/worker.h b/src/workerd/io/worker.h index 5a2be4797a8..ddc5f19209b 100644 --- a/src/workerd/io/worker.h +++ b/src/workerd/io/worker.h @@ -286,11 +286,11 @@ class Worker::Isolate: public kj::AtomicRefcounted { static const Isolate& from(jsg::Lock& js); inline IsolateObserver& getMetrics() { - return metrics; + return *metrics; } inline const IsolateObserver& getMetrics() const { - return metrics; + return *metrics; } inline kj::StringPtr getId() const { @@ -402,6 +402,11 @@ class Worker::Isolate: public kj::AtomicRefcounted { kj::Promise takeAsyncLockImpl( kj::Maybe> lockTiming) const; + kj::Own metrics; + // NOTE: destruction order is important here. The teardown guard should be destroyed after the + // `api` since API destruction may perform some aspects of isolate teardown. + TeardownFinishedGuard teardownGuard{*metrics}; + kj::String id; kj::Own api; IsolateLimitEnforcer& limitEnforcer; @@ -411,9 +416,6 @@ class Worker::Isolate: public kj::AtomicRefcounted { // compatibility enable-flags that are relevant to FL. kj::Maybe featureFlagsForFl; - IsolateObserver& metrics; - TeardownFinishedGuard teardownGuard{metrics}; - struct Impl; kj::Own impl; From 7b4b042dc6b0be6ffa0c29a26a477a964101dcdf Mon Sep 17 00:00:00 2001 From: Harris Hancock Date: Tue, 12 Nov 2024 11:41:32 +0000 Subject: [PATCH 02/12] Release 2024-11-12 --- src/workerd/io/supported-compatibility-date.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/workerd/io/supported-compatibility-date.txt b/src/workerd/io/supported-compatibility-date.txt index b02d5cadaa2..5cd17859e76 100644 --- a/src/workerd/io/supported-compatibility-date.txt +++ b/src/workerd/io/supported-compatibility-date.txt @@ -1 +1 @@ -2024-11-06 +2024-11-12 From eb008cedbc05eae88287357aacc5f3869f080de8 Mon Sep 17 00:00:00 2001 From: Victor Berchet Date: Tue, 12 Nov 2024 16:28:23 +0100 Subject: [PATCH 03/12] [crypto] Clarify the status of crypto keys APIs (#3051) --- src/node/crypto.ts | 16 ++++++++-------- src/node/internal/crypto_keys.ts | 20 ++++---------------- 2 files changed, 12 insertions(+), 24 deletions(-) diff --git a/src/node/crypto.ts b/src/node/crypto.ts index 9c3231f34d1..d0a75d51065 100644 --- a/src/node/crypto.ts +++ b/src/node/crypto.ts @@ -285,14 +285,14 @@ export default { // * [x] crypto.createHash(algorithm[, options]) // * [x] crypto.createHmac(algorithm, key[, options]) // * [x] crypto.getHashes() -// * Keys -// * [ ] crypto.createPrivateKey(key) -// * [ ] crypto.createPublicKey(key) -// * [x] crypto.createSecretKey(key[, encoding]) -// * [x] crypto.generateKey(type, options, callback) -// * [x] crypto.generateKeyPair(type, options, callback) -// * [x] crypto.generateKeyPairSync(type, options) -// * [x] crypto.generateKeySync(type, options) +// * Keys, not implemented yet. Calling the following APIs will throw a ERR_METHOD_NOT_IMPLEMENTED +// * [.] crypto.createPrivateKey(key) +// * [.] crypto.createPublicKey(key) +// * [.] crypto.createSecretKey(key[, encoding]) +// * [.] crypto.generateKey(type, options, callback) +// * [.] crypto.generateKeyPair(type, options, callback) +// * [.] crypto.generateKeyPairSync(type, options) +// * [.] crypto.generateKeySync(type, options) // * Sign/Verify // * [ ] crypto.createSign(algorithm[, options]) // * [ ] crypto.createVerify(algorithm[, options]) diff --git a/src/node/internal/crypto_keys.ts b/src/node/internal/crypto_keys.ts index 83fc9fc91ad..014d0d7a729 100644 --- a/src/node/internal/crypto_keys.ts +++ b/src/node/internal/crypto_keys.ts @@ -438,10 +438,7 @@ export function generateKey( _options: GenerateKeyOptions, callback: GenerateKeyCallback ) { - // We intentionally have not implemented key generation up to this point. - // The reason is that generation of cryptographically safe keys is a CPU - // intensive operation that can often exceed limits on the amount of CPU - // time a worker is allowed. + // This API is not implemented yet. callback(new ERR_METHOD_NOT_IMPLEMENTED('crypto.generateKeySync')); } @@ -449,10 +446,7 @@ export function generateKeySync( _type: SecretKeyType, _options: GenerateKeyOptions ) { - // We intentionally have not implemented key generation up to this point. - // The reason is that generation of cryptographically safe keys is a CPU - // intensive operation that can often exceed limits on the amount of CPU - // time a worker is allowed. + // This API is not implemented yet. throw new ERR_METHOD_NOT_IMPLEMENTED('crypto.generateKeySync'); } @@ -461,10 +455,7 @@ export function generateKeyPair( _options: GenerateKeyPairOptions, callback: GenerateKeyPairCallback ) { - // We intentionally have not implemented key generation up to this point. - // The reason is that generation of cryptographically safe keys is a CPU - // intensive operation that can often exceed limits on the amount of CPU - // time a worker is allowed. + // This API is not implemented yet. callback(new ERR_METHOD_NOT_IMPLEMENTED('crypto.generateKeyPair')); } @@ -472,9 +463,6 @@ export function generateKeyPairSync( _type: AsymmetricKeyType, _options: GenerateKeyPairOptions ): KeyObjectPair { - // We intentionally have not implemented key generation up to this point. - // The reason is that generation of cryptographically safe keys is a CPU - // intensive operation that can often exceed limits on the amount of CPU - // time a worker is allowed. + // This API is not implemented yet. throw new ERR_METHOD_NOT_IMPLEMENTED('crypto.generateKeyPairSync'); } From c2e236961c0d2c16d4a84bcc94d8095c6d17895a Mon Sep 17 00:00:00 2001 From: Felix Hanau Date: Mon, 11 Nov 2024 22:26:38 +0000 Subject: [PATCH 04/12] [build] Move io-channels.h back into io target io-channels.h has a circular dependency with io due to its dependency on io-util.h (parsing the header using bazel is expected to fail accordingly). This means any source file depending on it would need to be in a bazel target depending on io to avoid missing header errors, even if the source file doesn't use io itself. Move it back into io in accordance with IWYU. - Fix dependencies for io:actor-id, util:duration-exceeded-logger - Fix duration-exceeded-logger.h appearing in two targets --- src/workerd/io/BUILD.bazel | 13 ++----------- src/workerd/server/tests/python/BUILD.bazel | 1 - src/workerd/util/BUILD.bazel | 3 ++- 3 files changed, 4 insertions(+), 13 deletions(-) diff --git a/src/workerd/io/BUILD.bazel b/src/workerd/io/BUILD.bazel index de35dfc0d2b..b0678e78715 100644 --- a/src/workerd/io/BUILD.bazel +++ b/src/workerd/io/BUILD.bazel @@ -59,6 +59,7 @@ wd_cc_library( "compatibility-date.h", "features.h", "hibernation-manager.h", + "io-channels.h", "io-context.h", "io-own.h", "io-util.h", @@ -86,7 +87,6 @@ wd_cc_library( ":actor-id", ":actor-storage_capnp", ":cdp_capnp", - ":io-channels", ":io-gate", ":io-helpers", ":limit-enforcer", @@ -228,16 +228,7 @@ wd_cc_library( name = "actor-id", hdrs = ["actor-id.h"], visibility = ["//visibility:public"], -) - -wd_cc_library( - name = "io-channels", - hdrs = ["io-channels.h"], - visibility = ["//visibility:public"], - deps = [ - ":actor-id", - ":trace", - ], + deps = ["@capnp-cpp//src/kj"], ) genrule( diff --git a/src/workerd/server/tests/python/BUILD.bazel b/src/workerd/server/tests/python/BUILD.bazel index 44998fcd440..cd22d88182a 100644 --- a/src/workerd/server/tests/python/BUILD.bazel +++ b/src/workerd/server/tests/python/BUILD.bazel @@ -1,6 +1,5 @@ load("@bazel_skylib//rules:copy_file.bzl", "copy_file") load("//:build/pyodide_bucket.bzl", "PYODIDE_IMPORTS_TO_TEST") -load("//:build/wd_test.bzl", "wd_test") load("//src/workerd/server/tests/python:import_tests.bzl", "gen_import_tests") load("//src/workerd/server/tests/python:py_wd_test.bzl", "py_wd_test") diff --git a/src/workerd/util/BUILD.bazel b/src/workerd/util/BUILD.bazel index f697bfbc119..4f0ecdb9aae 100644 --- a/src/workerd/util/BUILD.bazel +++ b/src/workerd/util/BUILD.bazel @@ -40,7 +40,6 @@ wd_cc_library( "batch-queue.h", "canceler.h", "color-util.h", - "duration-exceeded-logger.h", "http-util.h", "stream-utils.h", "uncaught-exception-source.h", @@ -50,6 +49,7 @@ wd_cc_library( ], visibility = ["//visibility:public"], deps = [ + ":duration-exceeded-logger", "@capnp-cpp//src/kj", "@capnp-cpp//src/kj:kj-async", ], @@ -199,6 +199,7 @@ wd_cc_library( name = "duration-exceeded-logger", hdrs = ["duration-exceeded-logger.h"], visibility = ["//visibility:public"], + deps = ["@capnp-cpp//src/kj"], ) exports_files(["autogate.h"]) From f117326ec29e97018f9768b17d263af50a5a9696 Mon Sep 17 00:00:00 2001 From: Felix Hanau Date: Tue, 12 Nov 2024 13:49:35 -0500 Subject: [PATCH 05/12] [o11y] Add rudimentary user tracing support for connect() (#3089) * [o11y] Add rudimentary user tracing support for connect() - Use more descriptive name for IOContext function * Add [[nodiscard]] attribute to span creation functions If newly created spans are immediately discarded, they will not be able to capture the run time of the underlying operation. --- src/workerd/api/sockets.c++ | 1 + src/workerd/io/io-context.c++ | 4 ++-- src/workerd/io/io-context.h | 8 ++++---- src/workerd/io/trace.h | 6 +++--- 4 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/workerd/api/sockets.c++ b/src/workerd/api/sockets.c++ index 2270f505a40..e3d345b288e 100644 --- a/src/workerd/api/sockets.c++ +++ b/src/workerd/api/sockets.c++ @@ -181,6 +181,7 @@ jsg::Ref connectImplNoOutputLock(jsg::Lock& js, auto& ioContext = IoContext::current(); JSG_REQUIRE(!ioContext.isFiddle(), TypeError, "Socket API not supported in web preview mode."); + auto userSpan = ioContext.makeUserTraceSpan("connect"_kjc); // Extract the domain/ip we are connecting to from the address. kj::String domain; diff --git a/src/workerd/io/io-context.c++ b/src/workerd/io/io-context.c++ index 47e61c1cd3c..5748e29b94d 100644 --- a/src/workerd/io/io-context.c++ +++ b/src/workerd/io/io-context.c++ @@ -833,7 +833,7 @@ kj::Own IoContext::getSubrequestChannel( }); } -kj::Own IoContext::getSubrequestChannelSpans(uint channel, +kj::Own IoContext::getSubrequestChannelWithSpans(uint channel, bool isInHouse, kj::Maybe cfBlobJson, kj::ConstString operationName, @@ -896,7 +896,7 @@ kj::Own IoContext::getHttpClientWithSpans(uint channel, kj::Maybe cfBlobJson, kj::ConstString operationName, std::initializer_list tags) { - return asHttpClient(getSubrequestChannelSpans( + return asHttpClient(getSubrequestChannelWithSpans( channel, isInHouse, kj::mv(cfBlobJson), kj::mv(operationName), kj::mv(tags))); } diff --git a/src/workerd/io/io-context.h b/src/workerd/io/io-context.h index 35ee29639f8..d9765e43f51 100644 --- a/src/workerd/io/io-context.h +++ b/src/workerd/io/io-context.h @@ -706,7 +706,7 @@ class IoContext final: public kj::Refcounted, private kj::TaskSet::ErrorHandler kj::LiteralStringConst key; kj::LiteralStringConst value; }; - kj::Own getSubrequestChannelSpans(uint channel, + kj::Own getSubrequestChannelWithSpans(uint channel, bool isInHouse, kj::Maybe cfBlobJson, kj::ConstString operationName, @@ -725,7 +725,7 @@ class IoContext final: public kj::Refcounted, private kj::TaskSet::ErrorHandler kj::Maybe cfBlobJson, kj::ConstString operationName); - // As above, but with list of span tags to add, analogous to getSubrequestChannelSpans(). + // As above, but with list of span tags to add, analogous to getSubrequestChannelWithSpans(). kj::Own getHttpClientWithSpans(uint channel, bool isInHouse, kj::Maybe cfBlobJson, @@ -779,8 +779,8 @@ class IoContext final: public kj::Refcounted, private kj::TaskSet::ErrorHandler // Returns a builder for recording tracing spans (or a no-op builder if tracing is inactive). // If called while the JS lock is held, uses the trace information from the current async // context, if available. - SpanBuilder makeTraceSpan(kj::ConstString operationName); - SpanBuilder makeUserTraceSpan(kj::ConstString operationName); + [[nodiscard]] SpanBuilder makeTraceSpan(kj::ConstString operationName); + [[nodiscard]] SpanBuilder makeUserTraceSpan(kj::ConstString operationName); // Implement per-IoContext rate limiting for Cache.put(). Pass the body of a Cache API PUT // request and get a possibly wrapped stream back. diff --git a/src/workerd/io/trace.h b/src/workerd/io/trace.h index 61ecf9c6297..ef801c80fcb 100644 --- a/src/workerd/io/trace.h +++ b/src/workerd/io/trace.h @@ -612,7 +612,7 @@ class SpanParent { // Create a new child span. // // `operationName` should be a string literal with infinite lifetime. - SpanBuilder newChild( + [[nodiscard]] SpanBuilder newChild( kj::ConstString operationName, kj::Date startTime = kj::systemPreciseCalendarClock().now()); // Useful to skip unnecessary code when not observed. @@ -688,7 +688,7 @@ class SpanBuilder { // Create a new child span. // // `operationName` should be a string literal with infinite lifetime. - SpanBuilder newChild( + [[nodiscard]] SpanBuilder newChild( kj::ConstString operationName, kj::Date startTime = kj::systemPreciseCalendarClock().now()); // Change the operation name from what was specified at span creation. @@ -727,7 +727,7 @@ class SpanObserver: public kj::Refcounted { // Allocate a new child span. // // Note that children can be created long after a span has completed. - virtual kj::Own newChild() = 0; + [[nodiscard]] virtual kj::Own newChild() = 0; // Report the span data. Called at the end of the span. // From 56c6529e9409d6f21c22473f613971d5bf122cab Mon Sep 17 00:00:00 2001 From: James M Snell Date: Tue, 12 Nov 2024 10:09:45 -0800 Subject: [PATCH 06/12] Have Blob use BufferSource for arrayBuffer() also Avoid use of the `kj::Array` type wrapping entirely for reads (`arrayBuffer()` as well as `bytes()`) --- src/workerd/api/blob.c++ | 17 +++++++++++++---- src/workerd/api/blob.h | 2 +- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/src/workerd/api/blob.c++ b/src/workerd/api/blob.c++ index 2d646b2774d..19396e17644 100644 --- a/src/workerd/api/blob.c++ +++ b/src/workerd/api/blob.c++ @@ -200,14 +200,23 @@ jsg::Ref Blob::slice( JSG_THIS, data.slice(start, end), normalizeType(kj::mv(type).orDefault(nullptr))); } -jsg::Promise> Blob::arrayBuffer(jsg::Lock& js) { - // TODO(perf): Find a way to avoid the copy. +jsg::Promise Blob::arrayBuffer(jsg::Lock& js) { FeatureObserver::maybeRecordUse(FeatureObserver::Feature::BLOB_AS_ARRAY_BUFFER); - return js.resolvedPromise(kj::heapArray(data)); + // We use BufferSource here instead of kj::Array to ensure that the + // resulting backing store is associated with the isolate, which is necessary + // for when we start making use of v8 sandboxing. + auto backing = jsg::BackingStore::alloc(js, data.size()); + backing.asArrayPtr().copyFrom(data); + return js.resolvedPromise(jsg::BufferSource(js, kj::mv(backing))); } jsg::Promise Blob::bytes(jsg::Lock& js) { - return js.resolvedPromise(js.bytes(kj::heapArray(data))); + // We use BufferSource here instead of kj::Array to ensure that the + // resulting backing store is associated with the isolate, which is necessary + // for when we start making use of v8 sandboxing. + auto backing = jsg::BackingStore::alloc(js, data.size()); + backing.asArrayPtr().copyFrom(data); + return js.resolvedPromise(jsg::BufferSource(js, kj::mv(backing))); } jsg::Promise Blob::text(jsg::Lock& js) { diff --git a/src/workerd/api/blob.h b/src/workerd/api/blob.h index 32be8728241..147d740e2f6 100644 --- a/src/workerd/api/blob.h +++ b/src/workerd/api/blob.h @@ -44,7 +44,7 @@ class Blob: public jsg::Object { jsg::Ref slice( jsg::Optional start, jsg::Optional end, jsg::Optional type); - jsg::Promise> arrayBuffer(jsg::Lock& js); + jsg::Promise arrayBuffer(jsg::Lock& js); jsg::Promise bytes(jsg::Lock& js); jsg::Promise text(jsg::Lock& js); jsg::Ref stream(); From 284ede75f7d1cc0663439e226e3ef93343a84363 Mon Sep 17 00:00:00 2001 From: James M Snell Date: Tue, 12 Nov 2024 11:25:46 -0800 Subject: [PATCH 07/12] Update streams readAllBytes impl to use BufferSource --- src/workerd/api/blob.c++ | 5 ++++ src/workerd/api/blob.h | 1 + src/workerd/api/http.c++ | 11 ++++---- src/workerd/api/http.h | 4 +-- src/workerd/api/r2-bucket.c++ | 4 +-- src/workerd/api/r2-bucket.h | 2 +- src/workerd/api/streams/common.h | 2 +- src/workerd/api/streams/internal.c++ | 21 ++++++++++---- src/workerd/api/streams/internal.h | 2 +- src/workerd/api/streams/standard-test.c++ | 34 +++++++++++------------ src/workerd/api/streams/standard.c++ | 29 +++++++++++-------- src/workerd/jsg/buffersource.h | 5 ++++ 12 files changed, 73 insertions(+), 47 deletions(-) diff --git a/src/workerd/api/blob.c++ b/src/workerd/api/blob.c++ index 19396e17644..74578186c6a 100644 --- a/src/workerd/api/blob.c++ +++ b/src/workerd/api/blob.c++ @@ -142,6 +142,11 @@ Blob::Blob(kj::Array data, kj::String type) data(ownData.get>()), type(kj::mv(type)) {} +Blob::Blob(jsg::Lock& js, jsg::BufferSource data, kj::String type) + : ownData(kj::mv(data)), + data(getPtr(ownData.get())), + type(kj::mv(type)) {} + Blob::Blob(jsg::Lock& js, kj::Array data, kj::String type) : ownData(wrap(js, kj::mv(data))), data(getPtr(ownData.get())), diff --git a/src/workerd/api/blob.h b/src/workerd/api/blob.h index 147d740e2f6..ca02339e81f 100644 --- a/src/workerd/api/blob.h +++ b/src/workerd/api/blob.h @@ -14,6 +14,7 @@ class ReadableStream; // An implementation of the Web Platform Standard Blob API class Blob: public jsg::Object { public: + Blob(jsg::Lock& js, jsg::BufferSource data, kj::String type); Blob(jsg::Lock& js, kj::Array data, kj::String type); Blob(jsg::Ref parent, kj::ArrayPtr data, kj::String type); diff --git a/src/workerd/api/http.c++ b/src/workerd/api/http.c++ index 3b6b16cc2a8..9fd77638e14 100644 --- a/src/workerd/api/http.c++ +++ b/src/workerd/api/http.c++ @@ -777,7 +777,7 @@ bool Body::getBodyUsed() { } return false; } -jsg::Promise> Body::arrayBuffer(jsg::Lock& js) { +jsg::Promise Body::arrayBuffer(jsg::Lock& js) { KJ_IF_SOME(i, impl) { return js.evalNow([&] { JSG_REQUIRE(!i.stream->isDisturbed(), TypeError, @@ -790,12 +790,13 @@ jsg::Promise> Body::arrayBuffer(jsg::Lock& js) { // If there's no body, we just return an empty array. // See https://fetch.spec.whatwg.org/#concept-body-consume-body - return js.resolvedPromise(kj::Array()); + auto backing = jsg::BackingStore::alloc(js, 0); + return js.resolvedPromise(jsg::BufferSource(js, kj::mv(backing))); } jsg::Promise Body::bytes(jsg::Lock& js) { - return arrayBuffer(js).then( - js, [](jsg::Lock& js, kj::Array data) { return js.bytes(kj::mv(data)); }); + return arrayBuffer(js).then(js, + [](jsg::Lock& js, jsg::BufferSource data) { return data.getTypedView(js); }); } jsg::Promise Body::text(jsg::Lock& js) { @@ -864,7 +865,7 @@ jsg::Promise Body::json(jsg::Lock& js) { } jsg::Promise> Body::blob(jsg::Lock& js) { - return arrayBuffer(js).then(js, [this](jsg::Lock& js, kj::Array buffer) { + return arrayBuffer(js).then(js, [this](jsg::Lock& js, jsg::BufferSource buffer) { kj::String contentType = headersRef.get(jsg::ByteString(kj::str("Content-Type"))) .map([](jsg::ByteString&& b) -> kj::String { return kj::mv(b); diff --git a/src/workerd/api/http.h b/src/workerd/api/http.h index 85b0ebfeaf0..1093c5e4149 100644 --- a/src/workerd/api/http.h +++ b/src/workerd/api/http.h @@ -356,7 +356,7 @@ class Body: public jsg::Object { kj::Maybe> getBody(); bool getBodyUsed(); - jsg::Promise> arrayBuffer(jsg::Lock& js); + jsg::Promise arrayBuffer(jsg::Lock& js); jsg::Promise bytes(jsg::Lock& js); jsg::Promise text(jsg::Lock& js); jsg::Promise> formData(jsg::Lock& js); @@ -501,7 +501,7 @@ class Fetcher: public JsRpcClientProvider { jsg::Lock& js, kj::OneOf, kj::String> requestOrUrl, jsg::Optional>> requestInit); - using GetResult = kj::OneOf, kj::Array, kj::String, jsg::Value>; + using GetResult = kj::OneOf, jsg::BufferSource, kj::String, jsg::Value>; jsg::Promise get(jsg::Lock& js, kj::String url, jsg::Optional type); diff --git a/src/workerd/api/r2-bucket.c++ b/src/workerd/api/r2-bucket.c++ index dcabc11d940..d1977a6f966 100644 --- a/src/workerd/api/r2-bucket.c++ +++ b/src/workerd/api/r2-bucket.c++ @@ -1054,7 +1054,7 @@ void R2Bucket::HeadResult::writeHttpMetadata(jsg::Lock& js, Headers& headers) { } } -jsg::Promise> R2Bucket::GetResult::arrayBuffer(jsg::Lock& js) { +jsg::Promise R2Bucket::GetResult::arrayBuffer(jsg::Lock& js) { return js.evalNow([&] { JSG_REQUIRE(!body->isDisturbed(), TypeError, "Body has already been used. " @@ -1094,7 +1094,7 @@ jsg::Promise R2Bucket::GetResult::json(jsg::Lock& js) { jsg::Promise> R2Bucket::GetResult::blob(jsg::Lock& js) { // Copy-pasted from http.c++ - return arrayBuffer(js).then(js, [this](jsg::Lock& js, kj::Array buffer) { + return arrayBuffer(js).then(js, [this](jsg::Lock& js, jsg::BufferSource buffer) { // httpMetadata can't be null because GetResult always populates it. kj::String contentType = KJ_REQUIRE_NONNULL(httpMetadata) .contentType.map([](const auto& str) { diff --git a/src/workerd/api/r2-bucket.h b/src/workerd/api/r2-bucket.h index 452cc2926d9..09284f50f49 100644 --- a/src/workerd/api/r2-bucket.h +++ b/src/workerd/api/r2-bucket.h @@ -346,7 +346,7 @@ class R2Bucket: public jsg::Object { return body->isDisturbed(); } - jsg::Promise> arrayBuffer(jsg::Lock& js); + jsg::Promise arrayBuffer(jsg::Lock& js); jsg::Promise text(jsg::Lock& js); jsg::Promise json(jsg::Lock& js); jsg::Promise> blob(jsg::Lock& js); diff --git a/src/workerd/api/streams/common.h b/src/workerd/api/streams/common.h index b0a7a9d87d5..649c2e943bd 100644 --- a/src/workerd/api/streams/common.h +++ b/src/workerd/api/streams/common.h @@ -510,7 +510,7 @@ class ReadableStreamController { // // limit specifies an upper maximum bound on the number of bytes permitted to be read. // The promise will reject if the read will produce more bytes than the limit. - virtual jsg::Promise> readAllBytes(jsg::Lock& js, uint64_t limit) = 0; + virtual jsg::Promise readAllBytes(jsg::Lock& js, uint64_t limit) = 0; // Fully consumes the ReadableStream. If the stream is already locked to a reader or // errored, the returned JS promise will reject. If the stream is already closed, the diff --git a/src/workerd/api/streams/internal.c++ b/src/workerd/api/streams/internal.c++ index 5bae8a30624..a4c57e87f84 100644 --- a/src/workerd/api/streams/internal.c++ +++ b/src/workerd/api/streams/internal.c++ @@ -2086,27 +2086,36 @@ jsg::Promise ReadableStreamInternalController::PipeLocked::read(jsg: return KJ_ASSERT_NONNULL(inner.read(js, kj::none)); } -jsg::Promise> ReadableStreamInternalController::readAllBytes( +jsg::Promise ReadableStreamInternalController::readAllBytes( jsg::Lock& js, uint64_t limit) { if (isLockedToReader()) { - return js.rejectedPromise>(KJ_EXCEPTION( + return js.rejectedPromise(KJ_EXCEPTION( FAILED, "jsg.TypeError: This ReadableStream is currently locked to a reader.")); } if (isPendingClosure) { - return js.rejectedPromise>( + return js.rejectedPromise( js.v8TypeError("This ReadableStream belongs to an object that is closing."_kj)); } KJ_SWITCH_ONEOF(state) { KJ_CASE_ONEOF(closed, StreamStates::Closed) { - return js.resolvedPromise(kj::Array()); + auto backing = jsg::BackingStore::alloc(js, 0); + return js.resolvedPromise(jsg::BufferSource(js, kj::mv(backing))); } KJ_CASE_ONEOF(errored, StreamStates::Errored) { - return js.rejectedPromise>(errored.addRef(js)); + return js.rejectedPromise(errored.addRef(js)); } KJ_CASE_ONEOF(readable, Readable) { auto source = KJ_ASSERT_NONNULL(removeSource(js)); auto& context = IoContext::current(); - return context.awaitIoLegacy(js, source->readAllBytes(limit).attach(kj::mv(source))); + // TODO(perf): v8 sandboxing will require that backing stores are allocated within + // the sandbox. This will require a change to the API of ReadableStreamSource::readAllBytes. + // For now, we'll read and allocate into a proper backing store. + return context.awaitIoLegacy(js, source->readAllBytes(limit).attach(kj::mv(source))) + .then(js, [](jsg::Lock& js, kj::Array bytes) -> jsg::BufferSource { + auto backing = jsg::BackingStore::alloc(js, bytes.size()); + backing.asArrayPtr().copyFrom(bytes); + return jsg::BufferSource(js, kj::mv(backing)); + }); } } KJ_UNREACHABLE; diff --git a/src/workerd/api/streams/internal.h b/src/workerd/api/streams/internal.h index 3319251bb4d..356d29e1d23 100644 --- a/src/workerd/api/streams/internal.h +++ b/src/workerd/api/streams/internal.h @@ -96,7 +96,7 @@ class ReadableStreamInternalController: public ReadableStreamController { void visitForGc(jsg::GcVisitor& visitor) override; - jsg::Promise> readAllBytes(jsg::Lock& js, uint64_t limit) override; + jsg::Promise readAllBytes(jsg::Lock& js, uint64_t limit) override; jsg::Promise readAllText(jsg::Lock& js, uint64_t limit) override; kj::Maybe tryGetLength(StreamEncoding encoding) override; diff --git a/src/workerd/api/streams/standard-test.c++ b/src/workerd/api/streams/standard-test.c++ index f740d87e71f..936d752361f 100644 --- a/src/workerd/api/streams/standard-test.c++ +++ b/src/workerd/api/streams/standard-test.c++ @@ -242,8 +242,8 @@ KJ_TEST("ReadableStream read all bytes (value readable)") { // Starts a read loop of javascript promises. auto promise = rs->getController().readAllBytes(js, 20).then( - js, [&](jsg::Lock& js, kj::Array&& text) { - KJ_ASSERT(text == "Hello, world!"_kjc.asBytes()); + js, [&](jsg::Lock& js, jsg::BufferSource&& text) { + KJ_ASSERT(text.asArrayPtr() == "Hello, world!"_kjb); checked++; }); @@ -300,8 +300,8 @@ KJ_TEST("ReadableStream read all bytes (byte readable)") { // Starts a read loop of javascript promises. auto promise = rs->getController().readAllBytes(js, 20).then( - js, [&](jsg::Lock& js, kj::Array&& text) { - KJ_ASSERT(text == "Hello, world!"_kjc.asBytes()); + js, [&](jsg::Lock& js, jsg::BufferSource&& text) { + KJ_ASSERT(text.asArrayPtr() == "Hello, world!"_kjb); checked++; }); @@ -363,8 +363,8 @@ KJ_TEST("ReadableStream read all bytes (value readable, more reads)") { // Starts a read loop of javascript promises. auto promise = rs->getController().readAllBytes(js, 20).then( - js, [&](jsg::Lock& js, kj::Array&& text) { - KJ_ASSERT(text == "Hello, world!"_kjc.asBytes()); + js, [&](jsg::Lock& js, jsg::BufferSource&& text) { + KJ_ASSERT(text.asArrayPtr() == "Hello, world!"_kjb); checked++; }); @@ -427,8 +427,8 @@ KJ_TEST("ReadableStream read all bytes (byte readable, more reads)") { // Starts a read loop of javascript promises. auto promise = rs->getController().readAllBytes(js, 20).then( - js, [&](jsg::Lock& js, kj::Array&& text) { - KJ_ASSERT(text == "Hello, world!"_kjc.asBytes()); + js, [&](jsg::Lock& js, jsg::BufferSource&& text) { + KJ_ASSERT(text.asArrayPtr() == "Hello, world!"_kjb); checked++; }); @@ -495,13 +495,13 @@ KJ_TEST("ReadableStream read all bytes (byte readable, large data)") { // Starts a read loop of javascript promises. auto promise = rs->getController() .readAllBytes(js, (BASE * 7) + 1) - .then(js, [&](jsg::Lock& js, kj::Array&& text) { + .then(js, [&](jsg::Lock& js, jsg::BufferSource&& text) { kj::byte check[BASE * 7]{}; kj::arrayPtr(check).first(BASE).fill('A'); kj::arrayPtr(check).slice(BASE).first(BASE * 2).fill('B'); kj::arrayPtr(check).slice(BASE * 3).fill('C'); KJ_ASSERT(text.size() == BASE * 7); - KJ_ASSERT(check == text); + KJ_ASSERT(text.asArrayPtr() == check); checked++; }); @@ -563,7 +563,7 @@ KJ_TEST("ReadableStream read all bytes (value readable, wrong type)") { // Starts a read loop of javascript promises. auto promise = rs->getController().readAllBytes(js, 20).then(js, - [](jsg::Lock& js, kj::Array&& text) { KJ_UNREACHABLE; }, + [](jsg::Lock& js, jsg::BufferSource&& text) { KJ_UNREACHABLE; }, [&](jsg::Lock& js, jsg::Value&& exception) { KJ_ASSERT(kj::str(exception.getHandle(js)) == "TypeError: This ReadableStream did not return bytes."); @@ -619,7 +619,7 @@ KJ_TEST("ReadableStream read all bytes (value readable, to many bytes)") { // Starts a read loop of javascript promises. auto promise = rs->getController().readAllBytes(js, 20).then(js, - [](jsg::Lock& js, kj::Array&& text) { KJ_UNREACHABLE; }, + [](jsg::Lock& js, jsg::BufferSource&& text) { KJ_UNREACHABLE; }, [&](jsg::Lock& js, jsg::Value&& exception) { KJ_ASSERT(kj::str(exception.getHandle(js)) == "TypeError: Memory limit exceeded before EOF."); checked++; @@ -675,7 +675,7 @@ KJ_TEST("ReadableStream read all bytes (byte readable, to many bytes)") { // Starts a read loop of javascript promises. auto promise = rs->getController().readAllBytes(js, 20).then(js, - [](jsg::Lock& js, kj::Array&& text) { KJ_UNREACHABLE; }, + [](jsg::Lock& js, jsg::BufferSource&& text) { KJ_UNREACHABLE; }, [&](jsg::Lock& js, jsg::Value&& exception) { KJ_ASSERT(kj::str(exception.getHandle(js)) == "TypeError: Memory limit exceeded before EOF."); checked++; @@ -718,7 +718,7 @@ KJ_TEST("ReadableStream read all bytes (byte readable, failed read)") { // Starts a read loop of javascript promises. auto promise = rs->getController().readAllBytes(js, 20).then(js, - [](jsg::Lock& js, kj::Array&& text) { KJ_UNREACHABLE; }, + [](jsg::Lock& js, jsg::BufferSource&& text) { KJ_UNREACHABLE; }, [&](jsg::Lock& js, jsg::Value&& exception) { KJ_ASSERT(kj::str(exception.getHandle(js)) == "Error: boom"); checked++; @@ -760,7 +760,7 @@ KJ_TEST("ReadableStream read all bytes (value readable, failed read)") { // Starts a read loop of javascript promises. auto promise = rs->getController().readAllBytes(js, 20).then(js, - [](jsg::Lock& js, kj::Array&& text) { KJ_UNREACHABLE; }, + [](jsg::Lock& js, jsg::BufferSource&& text) { KJ_UNREACHABLE; }, [&](jsg::Lock& js, jsg::Value&& exception) { KJ_ASSERT(kj::str(exception.getHandle(js)) == "Error: boom"); checked++; @@ -803,7 +803,7 @@ KJ_TEST("ReadableStream read all bytes (byte readable, failed start)") { // Starts a read loop of javascript promises. auto promise = rs->getController().readAllBytes(js, 20).then(js, - [](jsg::Lock& js, kj::Array&& text) { KJ_UNREACHABLE; }, + [](jsg::Lock& js, jsg::BufferSource&& text) { KJ_UNREACHABLE; }, [&](jsg::Lock& js, jsg::Value&& exception) { KJ_ASSERT(kj::str(exception.getHandle(js)) == "Error: boom"); checked++; @@ -846,7 +846,7 @@ KJ_TEST("ReadableStream read all bytes (byte readable, failed start 2)") { // Starts a read loop of javascript promises. auto promise = rs->getController().readAllBytes(js, 20).then(js, - [](jsg::Lock& js, kj::Array&& text) { KJ_UNREACHABLE; }, + [](jsg::Lock& js, jsg::BufferSource&& text) { KJ_UNREACHABLE; }, [&](jsg::Lock& js, jsg::Value&& exception) { KJ_ASSERT(kj::str(exception.getHandle(js)) == "Error: boom"); checked++; diff --git a/src/workerd/api/streams/standard.c++ b/src/workerd/api/streams/standard.c++ index 86c1994f66b..a640477d730 100644 --- a/src/workerd/api/streams/standard.c++ +++ b/src/workerd/api/streams/standard.c++ @@ -702,7 +702,7 @@ public: kj::Maybe> getController(); - jsg::Promise> readAllBytes(jsg::Lock& js, uint64_t limit) override; + jsg::Promise readAllBytes(jsg::Lock& js, uint64_t limit) override; jsg::Promise readAllText(jsg::Lock& js, uint64_t limit) override; kj::Maybe tryGetLength(StreamEncoding encoding) override; @@ -2643,11 +2643,11 @@ public: AllReader(jsg::Ref stream, uint64_t limit): state(kj::mv(stream)), limit(limit) {} KJ_DISALLOW_COPY_AND_MOVE(AllReader); - jsg::Promise> allBytes(jsg::Lock& js) { - return loop(js).then(js, [this](auto& js, PartList&& partPtrs) { - auto out = kj::heapArray(runningTotal); - copyInto(out, kj::mv(partPtrs)); - return kj::mv(out); + jsg::Promise allBytes(jsg::Lock& js) { + return loop(js).then(js, [this](auto& js, PartList&& partPtrs) -> jsg::BufferSource { + auto out = jsg::BackingStore::alloc(js, runningTotal); + copyInto(out.asArrayPtr(), kj::mv(partPtrs)); + return jsg::BufferSource(js, kj::mv(out)); }); } @@ -3037,10 +3037,10 @@ jsg::Promise ReadableStreamJsController::readAll(jsg::Lock& js, uint64_t limi KJ_ASSERT(lock.lock()); // The AllReader will hold a traceable reference to the ReadableStream. auto reader = kj::heap(addRef(), limit); - auto promise = ([&js, &reader] { - if constexpr (kj::isSameType>()) { + auto promise = ([&js, &reader]() -> jsg::Promise { + if constexpr (kj::isSameType()) { return reader->allBytes(js); - } else if constexpr (kj::isSameType()) { + } else { return reader->allText(js); } })(); @@ -3061,7 +3061,12 @@ jsg::Promise ReadableStreamJsController::readAll(jsg::Lock& js, uint64_t limi KJ_SWITCH_ONEOF(state) { KJ_CASE_ONEOF(closed, StreamStates::Closed) { - return js.resolvedPromise(T()); + if constexpr (kj::isSameType()) { + auto backing = jsg::BackingStore::alloc(js, 0); + return js.resolvedPromise(jsg::BufferSource(js, kj::mv(backing))); + } else { + return js.resolvedPromise(T()); + } } KJ_CASE_ONEOF(errored, StreamStates::Errored) { return js.rejectedPromise(errored.addRef(js)); @@ -3076,9 +3081,9 @@ jsg::Promise ReadableStreamJsController::readAll(jsg::Lock& js, uint64_t limi KJ_UNREACHABLE; } -jsg::Promise> ReadableStreamJsController::readAllBytes( +jsg::Promise ReadableStreamJsController::readAllBytes( jsg::Lock& js, uint64_t limit) { - return readAll>(js, limit); + return readAll(js, limit); } jsg::Promise ReadableStreamJsController::readAllText(jsg::Lock& js, uint64_t limit) { diff --git a/src/workerd/jsg/buffersource.h b/src/workerd/jsg/buffersource.h index f3c773edd4f..4f212188236 100644 --- a/src/workerd/jsg/buffersource.h +++ b/src/workerd/jsg/buffersource.h @@ -384,6 +384,11 @@ class BufferSource { return BufferSource(js, KJ_ASSERT_NONNULL(maybeBackingStore).getTypedViewSlice(start, end)); } + template + BufferSource getTypedView(jsg::Lock& js) { + return BufferSource(js, KJ_ASSERT_NONNULL(maybeBackingStore).getTypedView()); + } + JSG_MEMORY_INFO(BufferSource) { tracker.trackField("handle", handle); KJ_IF_SOME(backing, maybeBackingStore) { From 7f72d55106f14b5aa13abd0b43f9d254637a7f6d Mon Sep 17 00:00:00 2001 From: Mike Aizatsky Date: Tue, 12 Nov 2024 13:01:49 -0800 Subject: [PATCH 08/12] add compile cache support to wd_?s_bundle, enable for node (#3044) Is not used on load yet. --- build/config/BUILD.bazel | 8 +++ build/wd_js_bundle.bzl | 114 ++++++++++++++++++++++++++++++---- build/wd_ts_bundle.bzl | 7 ++- src/node/BUILD.bazel | 1 + src/workerd/jsg/modules.capnp | 3 + 5 files changed, 119 insertions(+), 14 deletions(-) create mode 100644 build/config/BUILD.bazel diff --git a/build/config/BUILD.bazel b/build/config/BUILD.bazel new file mode 100644 index 00000000000..875fd037722 --- /dev/null +++ b/build/config/BUILD.bazel @@ -0,0 +1,8 @@ +load("@bazel_skylib//rules:common_settings.bzl", "string_flag") + +# Similar to --run_under flag to be able to run cross-compiled target binaries. +string_flag( + name = "target_run_under", + build_setting_default = "", + visibility = ["//visibility:public"], +) diff --git a/build/wd_js_bundle.bzl b/build/wd_js_bundle.bzl index 77d1d862e60..63b249ff6ec 100644 --- a/build/wd_js_bundle.bzl +++ b/build/wd_js_bundle.bzl @@ -1,3 +1,4 @@ +load("@bazel_skylib//rules:common_settings.bzl", "BuildSettingInfo") load("@bazel_skylib//rules:copy_file.bzl", "copy_file") load("@capnp-cpp//src/capnp:cc_capnp_library.bzl", "cc_capnp_library") @@ -13,8 +14,6 @@ const {const_name} :Modules.Bundle = ( ]); """ -MODULE_TEMPLATE = """ (name = "{name}", {src_type} = embed "{path}", type = {type}, {ts_declaration})""" - def _to_name(file_name): return file_name.removesuffix(".js") @@ -26,10 +25,85 @@ def _relative_path(file_path, dir_path): fail("file_path need to start with dir_path: " + file_path + " vs " + dir_path) return file_path.removeprefix(dir_path) +def _gen_compile_cache_impl(ctx): + file_list = ctx.actions.declare_file("in") + + srcs = [] + for src in ctx.attr.srcs: + srcs.extend(src.files.to_list()) + + outs = [ctx.actions.declare_file(src.basename + "_cache") for src in srcs] + + content = [] + for i in range(0, len(srcs)): + content.append("{} {}".format(srcs[i].path, outs[i].path)) + + ctx.actions.write( + output = file_list, + content = "\n".join(content) + "\n", + ) + + args = ctx.actions.args() + args.add(file_list) + + run_under = ctx.attr._run_under[BuildSettingInfo].value + + # use run_shell together with cfg = target instead of ctx.actions.run + # to prevent double-compilation of v8. + ctx.actions.run_shell( + outputs = outs, + inputs = [file_list] + srcs, + command = run_under + " " + ctx.executable._tool.path + " $@", + arguments = [args], + use_default_shell_env = True, + tools = [ctx.executable._tool], + ) + + return [ + DefaultInfo(files = depset(direct = outs)), + ] + +_gen_compile_cache = rule( + implementation = _gen_compile_cache_impl, + attrs = { + "srcs": attr.label_list(mandatory = True, allow_files = True), + "_tool": attr.label( + executable = True, + allow_single_file = True, + cfg = "target", + default = "//src/rust/gen-compile-cache", + ), + "_run_under": attr.label(default = "//build/config:target_run_under"), + }, +) + +def _get_compile_cache(compile_cache, m): + if not compile_cache: + return None + files = m.files.to_list() + + if len(files) != 1: + fail("only single file expected") + + return compile_cache.get(files[0].path) + +MODULE_TEMPLATE = """ (name = "{name}", {src_type} = embed "{path}", type = {type}, {extras})""" + def _gen_api_bundle_capnpn_impl(ctx): output_dir = ctx.outputs.out.dirname + "/" - def _render_module(name, label, src_type, type): + def _render_module(name, label, src_type, type, cache = None): + ts_declaration_extra = ( + "tsDeclaration = embed \"" + _relative_path( + ctx.expand_location("$(location {})".format(ctx.attr.declarations[name]), ctx.attr.data), + output_dir, + ) + "\", " + ) if name in ctx.attr.declarations else "" + cache_extra = ( + "compileCache = embed \"{}\", ".format(_relative_path(cache, output_dir)) + ) if cache else "" + extras = ts_declaration_extra + cache_extra + return MODULE_TEMPLATE.format( name = name, # capnp doesn't allow ".." dir escape, make paths relative. @@ -40,20 +114,21 @@ def _gen_api_bundle_capnpn_impl(ctx): output_dir, ), type = type, - ts_declaration = ( - "tsDeclaration = embed \"" + _relative_path( - ctx.expand_location("$(location {})".format(ctx.attr.declarations[name]), ctx.attr.data), - output_dir, - ) + "\", " - ) if name in ctx.attr.declarations else "", + extras = extras, ) + compile_cache = {} + if ctx.attr.compile_cache: + locations = ctx.expand_location("$(locations {})".format(ctx.attr.compile_cache.label)).split(" ") + for loc in locations: + compile_cache[loc.removesuffix("_cache")] = loc + modules = [ - _render_module(ctx.attr.builtin_modules[m], m.label, "src", "builtin") + _render_module(ctx.attr.builtin_modules[m], m.label, "src", "builtin", _get_compile_cache(compile_cache, m)) for m in ctx.attr.builtin_modules ] modules += [ - _render_module(ctx.attr.internal_modules[m], m.label, "src", "internal") + _render_module(ctx.attr.internal_modules[m], m.label, "src", "internal", _get_compile_cache(compile_cache, m)) for m in ctx.attr.internal_modules ] modules += [ @@ -90,6 +165,7 @@ gen_api_bundle_capnpn = rule( "data": attr.label_list(allow_files = True), "const_name": attr.string(mandatory = True), "deps": attr.label_list(), + "compile_cache": attr.label(), }, ) @@ -124,7 +200,8 @@ def wd_js_bundle( internal_data_modules = [], internal_json_modules = [], declarations = [], - deps = []): + deps = [], + gen_compile_cache = False): """Generate cc capnp library with js api bundle. NOTE: Due to capnpc embed limitation all modules must be in the same or sub directory of the @@ -146,7 +223,7 @@ def wd_js_bundle( internal_json_modules: list of json source files declarations: d.ts label set deps: dependency list - Returns: The set of data dependencies + gen_compile_cache: generate compilation cache of every file and include into the bundle """ builtin_modules_dict = { m: "{}:{}".format(import_name, _to_name(m)) @@ -200,6 +277,16 @@ def wd_js_bundle( list(internal_declarations.values()) ) + compile_cache = None + if gen_compile_cache: + _gen_compile_cache( + name = name + "@compile_cache", + srcs = builtin_modules_dict.keys() + internal_modules_dict.keys(), + ) + compile_cache = name + "@compile_cache" + deps = deps + [compile_cache] + data = data + [compile_cache] + gen_api_bundle_capnpn( name = name + "@gen", out = name + ".capnp", @@ -213,6 +300,7 @@ def wd_js_bundle( declarations = builtin_declarations | internal_declarations, data = data, deps = deps, + compile_cache = compile_cache, ) cc_capnp_library( diff --git a/build/wd_ts_bundle.bzl b/build/wd_ts_bundle.bzl index 38fdc788395..baf07f9dd46 100644 --- a/build/wd_ts_bundle.bzl +++ b/build/wd_ts_bundle.bzl @@ -26,7 +26,8 @@ def wd_ts_bundle( internal_json_modules = [], lint = True, deps = [], - js_deps = []): + js_deps = [], + gen_compile_cache = False): """Compiles typescript modules and generates api bundle with the result. Args: @@ -41,8 +42,11 @@ def wd_ts_bundle( eslintrc_json: eslintrc.json label internal_wasm_modules: list of wasm source files internal_data_modules: list of data source files + internal_json_modules: list of json source files lint: enables/disables source linting deps: additional typescript dependencies + gen_compile_cache: generate compilation cache of every file and include into the bundle + js_deps: javascript dependencies """ ts_config( name = name + "@tsconfig", @@ -77,6 +81,7 @@ def wd_ts_bundle( declarations = declarations, schema_id = schema_id, deps = deps + js_deps, + gen_compile_cache = gen_compile_cache, ) if lint: diff --git a/src/node/BUILD.bazel b/src/node/BUILD.bazel index 341c0db8f36..440c167a704 100644 --- a/src/node/BUILD.bazel +++ b/src/node/BUILD.bazel @@ -3,6 +3,7 @@ load("@workerd//:build/wd_ts_bundle.bzl", "wd_ts_bundle") wd_ts_bundle( name = "node", eslintrc_json = "eslint.config.mjs", + gen_compile_cache = True, import_name = "node", internal_modules = glob([ "internal/*.ts", diff --git a/src/workerd/jsg/modules.capnp b/src/workerd/jsg/modules.capnp index 24fd7b948ff..434dc3b232f 100644 --- a/src/workerd/jsg/modules.capnp +++ b/src/workerd/jsg/modules.capnp @@ -23,6 +23,9 @@ struct Module { tsDeclaration @3 :Text; type @2 :ModuleType; + + # Optional compile cache to be used to speed up module loading + compileCache @7 :Data; } From 3a54618cee8c23acd8e58ddf76f7b1985cc69b46 Mon Sep 17 00:00:00 2001 From: James M Snell Date: Fri, 8 Nov 2024 06:00:20 -0800 Subject: [PATCH 09/12] Disable top-level await in require with a compat flag --- src/workerd/api/global-scope.c++ | 9 +- src/workerd/api/node/module.c++ | 26 +++-- .../node/tests/module-create-require-test.js | 9 +- .../tests/module-create-require-test.wd-test | 6 +- src/workerd/api/node/util.c++ | 7 -- .../api/tests/new-module-registry-test.js | 2 +- src/workerd/io/compatibility-date.capnp | 8 ++ src/workerd/io/worker.c++ | 3 + src/workerd/jsg/jsg.c++ | 4 + src/workerd/jsg/jsg.h | 1 + src/workerd/jsg/modules.c++ | 107 ++++++++++++------ src/workerd/jsg/modules.h | 9 +- src/workerd/jsg/setup.h | 9 ++ 13 files changed, 132 insertions(+), 68 deletions(-) diff --git a/src/workerd/api/global-scope.c++ b/src/workerd/api/global-scope.c++ index 44ca2649cce..e3072b957cd 100644 --- a/src/workerd/api/global-scope.c++ +++ b/src/workerd/api/global-scope.c++ @@ -849,15 +849,8 @@ jsg::JsValue resolveFromRegistry(jsg::Lock& js, kj::StringPtr specifier) { auto& info = JSG_REQUIRE_NONNULL( moduleRegistry->resolve(js, spec), Error, kj::str("No such module: ", specifier)); auto module = info.module.getHandle(js); - jsg::instantiateModule(js, module); - auto handle = jsg::check(module->Evaluate(js.v8Context())); - KJ_ASSERT(handle->IsPromise()); - auto prom = handle.As(); - KJ_ASSERT(prom->State() != v8::Promise::PromiseState::kPending); - if (module->GetStatus() == v8::Module::kErrored) { - jsg::throwTunneledException(js.v8Isolate, module->GetException()); - } + jsg::instantiateModule(js, module); return jsg::JsValue(js.v8Get(module->GetModuleNamespace().As(), "default"_kj)); } } // namespace diff --git a/src/workerd/api/node/module.c++ b/src/workerd/api/node/module.c++ index 16c4fbfd697..6f2482d9599 100644 --- a/src/workerd/api/node/module.c++ +++ b/src/workerd/api/node/module.c++ @@ -3,6 +3,7 @@ // https://opensource.org/licenses/Apache-2.0 #include "module.h" +#include #include namespace workerd::api::node { @@ -84,19 +85,22 @@ jsg::JsValue ModuleUtil::createRequire(jsg::Lock& js, kj::String path) { auto module = info.module.getHandle(js); - jsg::instantiateModule(js, module); - auto handle = jsg::check(module->Evaluate(js.v8Context())); - KJ_ASSERT(handle->IsPromise()); - auto prom = handle.As(); - if (prom->State() == v8::Promise::PromiseState::kPending) { - js.runMicrotasks(); - } - JSG_REQUIRE(prom->State() != v8::Promise::PromiseState::kPending, Error, - "Module evaluation did not complete synchronously."); - if (module->GetStatus() == v8::Module::kErrored) { - jsg::throwTunneledException(js.v8Isolate, module->GetException()); + auto features = FeatureFlags::get(js); + jsg::InstantiateModuleOptions options = jsg::InstantiateModuleOptions::DEFAULT; + if (features.getNoTopLevelAwaitInRequire()) { + options = jsg::InstantiateModuleOptions::NO_TOP_LEVEL_AWAIT; + + // If the module was already evaluated, let's check if it is async. + // If it is, we will throw an error. This case can happen if a previous + // attempt to require the module failed because the module was async. + if (module->GetStatus() == v8::Module::kEvaluated) { + JSG_REQUIRE(!module->IsGraphAsync(), Error, + "Top-level await in module is not permitted at this time."); + } } + jsg::instantiateModule(js, module, options); + if (isEsm) { // If the import is an esm module, we will return the namespace object. jsg::JsObject obj(module->GetModuleNamespace().As()); diff --git a/src/workerd/api/node/tests/module-create-require-test.js b/src/workerd/api/node/tests/module-create-require-test.js index d2579495150..724a48465c0 100644 --- a/src/workerd/api/node/tests/module-create-require-test.js +++ b/src/workerd/api/node/tests/module-create-require-test.js @@ -24,7 +24,14 @@ export const doTheTest = { strictEqual(assert, required); throws(() => require('invalid'), { - message: 'Module evaluation did not complete synchronously.', + message: 'Top-level await in module is not permitted at this time.', + }); + // Trying to require the module again should throw the same error. + throws(() => require('invalid'), { + message: 'Top-level await in module is not permitted at this time.', + }); + throws(() => require('invalid2'), { + message: 'Top-level await in module is not permitted at this time.', }); throws(() => require('does not exist')); diff --git a/src/workerd/api/node/tests/module-create-require-test.wd-test b/src/workerd/api/node/tests/module-create-require-test.wd-test index cc97fcb5074..c550ff58d5e 100644 --- a/src/workerd/api/node/tests/module-create-require-test.wd-test +++ b/src/workerd/api/node/tests/module-create-require-test.wd-test @@ -10,10 +10,12 @@ const unitTests :Workerd.Config = ( (name = "bar", esModule = "export default 2; export const __cjsUnwrapDefault = true;"), (name = "baz", commonJsModule = "module.exports = 3;"), (name = "worker/qux", text = "4"), - (name = "invalid", esModule = "const p = new Promise(() => {}); await p;"), + (name = "invalid", esModule = "await new Promise(() => {});"), + (name = "invalid2", commonJsModule = "require('invalid3');"), + (name = "invalid3", esModule = "await new Promise(() => {});"), ], compatibilityDate = "2024-08-01", - compatibilityFlags = ["nodejs_compat_v2"] + compatibilityFlags = ["disable_top_level_await_in_require", "nodejs_compat_v2"] ) ), ], diff --git a/src/workerd/api/node/util.c++ b/src/workerd/api/node/util.c++ index 5fe83665f23..0b526cb2bc4 100644 --- a/src/workerd/api/node/util.c++ +++ b/src/workerd/api/node/util.c++ @@ -217,13 +217,6 @@ jsg::JsValue UtilModule::getBuiltinModule(jsg::Lock& js, kj::String specifier) { jsg::ModuleRegistry::ResolveMethod::IMPORT, rawSpecifier.asPtr())) { auto module = info.module.getHandle(js); jsg::instantiateModule(js, module); - auto handle = jsg::check(module->Evaluate(js.v8Context())); - KJ_ASSERT(handle->IsPromise()); - auto prom = handle.As(); - KJ_ASSERT(prom->State() != v8::Promise::PromiseState::kPending); - if (module->GetStatus() == v8::Module::kErrored) { - jsg::throwTunneledException(js.v8Isolate, module->GetException()); - } // For Node.js modules, we want to grab the default export and return that. // For other built-ins, we'll return the module namespace instead. Can be diff --git a/src/workerd/api/tests/new-module-registry-test.js b/src/workerd/api/tests/new-module-registry-test.js index d271a41b6e4..6013b7885fb 100644 --- a/src/workerd/api/tests/new-module-registry-test.js +++ b/src/workerd/api/tests/new-module-registry-test.js @@ -8,7 +8,7 @@ import { import { foo, default as def } from 'foo'; import { default as fs } from 'node:fs'; import { Buffer } from 'buffer'; -const { foo: foo2, default: def2 } = await import('bar'); +import { foo as foo2, default as def2 } from 'bar'; // Verify that import.meta.url is correct here. strictEqual(import.meta.url, 'file:///worker'); diff --git a/src/workerd/io/compatibility-date.capnp b/src/workerd/io/compatibility-date.capnp index acc404c8f49..a7d16d4ad57 100644 --- a/src/workerd/io/compatibility-date.capnp +++ b/src/workerd/io/compatibility-date.capnp @@ -657,4 +657,12 @@ struct CompatibilityFlags @0x8f8c1b68151b6cef { # # This is a compat flag so that we can opt in our test workers into it before rolling out to # everyone. + + noTopLevelAwaitInRequire @67 :Bool + $compatEnableFlag("disable_top_level_await_in_require") + $compatDisableFlag("enable_top_level_await_in_require") + $compatEnableDate("2024-12-02"); + # When enabled, use of top-level await syntax in require() calls will be disallowed. + # The ecosystem and runtimes are moving to a state where top level await in modules + # is being strongly discouraged. } diff --git a/src/workerd/io/worker.c++ b/src/workerd/io/worker.c++ index b4434763e2a..26db2700414 100644 --- a/src/workerd/io/worker.c++ +++ b/src/workerd/io/worker.c++ @@ -999,6 +999,9 @@ Worker::Isolate::Isolate(kj::Own apiParam, if (features.getNodeJsCompatV2()) { lock->setNodeJsCompatEnabled(); } + if (features.getNoTopLevelAwaitInRequire()) { + lock->disableTopLevelAwait(); + } if (impl->inspector != kj::none || ::kj::_::Debug::shouldLog(::kj::LogSeverity::INFO)) { lock->setLoggerCallback([this](jsg::Lock& js, kj::StringPtr message) { diff --git a/src/workerd/jsg/jsg.c++ b/src/workerd/jsg/jsg.c++ index 05e346f2af0..5c625794a77 100644 --- a/src/workerd/jsg/jsg.c++ +++ b/src/workerd/jsg/jsg.c++ @@ -195,6 +195,10 @@ void Lock::setNodeJsCompatEnabled() { IsolateBase::from(v8Isolate).setNodeJsCompatEnabled({}, true); } +void Lock::disableTopLevelAwait() { + IsolateBase::from(v8Isolate).disableTopLevelAwait(); +} + void Lock::setToStringTag() { IsolateBase::from(v8Isolate).enableSetToStringTag(); } diff --git a/src/workerd/jsg/jsg.h b/src/workerd/jsg/jsg.h index 3190cdfd966..4945a950b6e 100644 --- a/src/workerd/jsg/jsg.h +++ b/src/workerd/jsg/jsg.h @@ -2516,6 +2516,7 @@ class Lock { void setNodeJsCompatEnabled(); void setToStringTag(); + void disableTopLevelAwait(); using Logger = void(Lock&, kj::StringPtr); void setLoggerCallback(kj::Function&& logger); diff --git a/src/workerd/jsg/modules.c++ b/src/workerd/jsg/modules.c++ index 4107313bb88..0131b16e6b2 100644 --- a/src/workerd/jsg/modules.c++ +++ b/src/workerd/jsg/modules.c++ @@ -272,24 +272,32 @@ v8::Local CommonJsModuleContext::require(jsg::Lock& js, kj::String sp // Adding imported from suffix here not necessary like it is for resolveCallback, since we have a // js stack that will include the parent module's name and location of the failed require(). - JSG_REQUIRE_NONNULL( - info.maybeSynthetic, TypeError, "Cannot use require() to import an ES Module."); - auto module = info.module.getHandle(js); - check(module->InstantiateModule(js.v8Context(), &resolveCallback)); - auto handle = check(module->Evaluate(js.v8Context())); - KJ_ASSERT(handle->IsPromise()); - auto prom = handle.As(); - - // This assert should always pass since evaluateSyntheticModuleCallback() for CommonJS - // modules (below) always returns an already-resolved promise. - KJ_ASSERT(prom->State() != v8::Promise::PromiseState::kPending); + JSG_REQUIRE(module->GetStatus() != v8::Module::Status::kEvaluating && + module->GetStatus() != v8::Module::Status::kInstantiating, + Error, + "Module cannot be synchronously required while it is being instantiated or evaluated. " + "This error typically means that a CommonJS or NodeJS-Compat type module has a circular " + "dependency on itself, and that a synchronous require() is being called while the module " + "is being loaded."); - if (module->GetStatus() == v8::Module::kErrored) { - throwTunneledException(js.v8Isolate, module->GetException()); + auto& isolateBase = IsolateBase::from(js.v8Isolate); + jsg::InstantiateModuleOptions options = jsg::InstantiateModuleOptions::DEFAULT; + if (!isolateBase.isTopLevelAwaitEnabled()) { + options = jsg::InstantiateModuleOptions::NO_TOP_LEVEL_AWAIT; + + // If the module was already evaluated, let's check if it is async. + // If it is, we will throw an error. This case can happen if a previous + // attempt to require the module failed because the module was async. + if (module->GetStatus() == v8::Module::kEvaluated) { + JSG_REQUIRE(!module->IsGraphAsync(), Error, + "Top-level await in module is not permitted at this time."); + } } + jsg::instantiateModule(js, module, options); + // Originally, This returned an object like `{default: module.exports}` when we really // intended to return the module exports raw. We should be extracting `default` here. // Unfortunately, there is a user depending on the wrong behavior in production, so we @@ -322,32 +330,47 @@ NonModuleScript NonModuleScript::compile(kj::StringPtr code, jsg::Lock& js, kj:: return NonModuleScript(js, check(v8::ScriptCompiler::CompileUnboundScript(isolate, &source))); } -void instantiateModule(jsg::Lock& js, v8::Local& module) { +void instantiateModule( + jsg::Lock& js, v8::Local& module, InstantiateModuleOptions options) { KJ_ASSERT(!module.IsEmpty()); auto isolate = js.v8Isolate; auto context = js.v8Context(); auto status = module->GetStatus(); - // Nothing to do if the module is already instantiated, evaluated, or errored. - if (status == v8::Module::Status::kInstantiated || status == v8::Module::Status::kEvaluated || - status == v8::Module::Status::kErrored) - return; - JSG_REQUIRE(status == v8::Module::Status::kUninstantiated, Error, - "Module cannot be synchronously required while it is being instantiated or evaluated. " - "This error typically means that a CommonJS or NodeJS-Compat type module has a circular " - "dependency on itself, and that a synchronous require() is being called while the module " - "is being loaded."); + // If the previous instantiation failed, throw the exception. + if (status == v8::Module::Status::kErrored) { + isolate->ThrowException(module->GetException()); + throw jsg::JsExceptionThrown(); + } + + // Nothing to do if the module is already instantiated, evaluated. + if (status == v8::Module::Status::kEvaluated || status == v8::Module::Status::kEvaluating) return; + + if (status == v8::Module::Status::kUninstantiated) { + jsg::check(module->InstantiateModule(context, &resolveCallback)); + } - jsg::check(module->InstantiateModule(context, &resolveCallback)); auto prom = jsg::check(module->Evaluate(context)).As(); + + if (module->IsGraphAsync() && prom->State() == v8::Promise::kPending) { + // Pump the microtasks if there's an unsettled top-level await in the module. + // Because we do not support i/o in this scope, this *should* resolve in a + // single drain of the microtask queue (tho it's possible that it'll take + // multiple tasks). When the runMicrotasks() is complete, the promise should + // be settled. + JSG_REQUIRE(options != InstantiateModuleOptions::NO_TOP_LEVEL_AWAIT, Error, + "Top-level await in module is not permitted at this time."); + } + // We run microtasks to ensure that any promises that happen to be scheduled + // during the evaluation of the top level scope have a chance to be settled, + // even if those are not directly awaited. js.runMicrotasks(); switch (prom->State()) { case v8::Promise::kPending: // Let's make sure nobody is depending on pending modules that do not resolve first. - KJ_LOG(WARNING, "Async module was not immediately resolved."); - break; + JSG_FAIL_REQUIRE(Error, "Top-level await in module is unsettled."); case v8::Promise::kRejected: // Since we don't actually support I/O when instantiating a worker, we don't return the // promise from module->Evaluate, which means we lose any errors that happen during @@ -594,20 +617,30 @@ v8::Local NodeJsModuleContext::require(jsg::Lock& js, kj::String spec auto module = info.module.getHandle(js); - jsg::instantiateModule(js, module); - - auto handle = jsg::check(module->Evaluate(js.v8Context())); - KJ_ASSERT(handle->IsPromise()); - auto prom = handle.As(); - - // This assert should always pass since evaluateSyntheticModuleCallback() for CommonJS - // modules (below) always returns an already-resolved promise. - KJ_ASSERT(prom->State() != v8::Promise::PromiseState::kPending); + JSG_REQUIRE(module->GetStatus() != v8::Module::Status::kEvaluating && + module->GetStatus() != v8::Module::Status::kInstantiating, + Error, + "Module cannot be synchronously required while it is being instantiated or evaluated. " + "This error typically means that a CommonJS or NodeJS-Compat type module has a circular " + "dependency on itself, and that a synchronous require() is being called while the module " + "is being loaded."); - if (module->GetStatus() == v8::Module::kErrored) { - jsg::throwTunneledException(js.v8Isolate, module->GetException()); + auto& isolateBase = IsolateBase::from(js.v8Isolate); + jsg::InstantiateModuleOptions options = jsg::InstantiateModuleOptions::DEFAULT; + if (!isolateBase.isTopLevelAwaitEnabled()) { + options = jsg::InstantiateModuleOptions::NO_TOP_LEVEL_AWAIT; + + // If the module was already evaluated, let's check if it is async. + // If it is, we will throw an error. This case can happen if a previous + // attempt to require the module failed because the module was async. + if (module->GetStatus() == v8::Module::kEvaluated) { + JSG_REQUIRE(!module->IsGraphAsync(), Error, + "Top-level await in module is not permitted at this time."); + } } + jsg::instantiateModule(js, module, options); + return js.v8Get(module->GetModuleNamespace().As(), "default"_kj); } diff --git a/src/workerd/jsg/modules.h b/src/workerd/jsg/modules.h index 0b68aca2726..b418582b8ef 100644 --- a/src/workerd/jsg/modules.h +++ b/src/workerd/jsg/modules.h @@ -191,7 +191,14 @@ class NonModuleScript { v8::Global unboundScript; }; -void instantiateModule(jsg::Lock& js, v8::Local& module); +enum class InstantiateModuleOptions { + DEFAULT, + NO_TOP_LEVEL_AWAIT, +}; + +void instantiateModule(jsg::Lock& js, + v8::Local& module, + InstantiateModuleOptions options = InstantiateModuleOptions::DEFAULT); enum class ModuleInfoCompileOption { // The BUNDLE options tells the compile operation to treat the content as coming diff --git a/src/workerd/jsg/setup.h b/src/workerd/jsg/setup.h index c3ad32cb5d4..0586a52508a 100644 --- a/src/workerd/jsg/setup.h +++ b/src/workerd/jsg/setup.h @@ -151,6 +151,14 @@ class IsolateBase { setToStringTag = true; } + inline void disableTopLevelAwait() { + allowTopLevelAwait = false; + } + + inline bool isTopLevelAwaitEnabled() const { + return allowTopLevelAwait; + } + // The logger will be optionally set by the isolate setup logic if there is anywhere // for the log to go (for instance, if debug logging is enabled or the inspector is // being used). @@ -238,6 +246,7 @@ class IsolateBase { bool asyncContextTrackingEnabled = false; bool nodeJsCompatEnabled = false; bool setToStringTag = false; + bool allowTopLevelAwait = true; kj::Maybe> maybeLogger; kj::Maybe> maybeErrorReporter; From 00667dedb76512b4b1a433148b121fe73508a2cf Mon Sep 17 00:00:00 2001 From: James M Snell Date: Sun, 10 Nov 2024 06:05:47 -0800 Subject: [PATCH 10/12] Allow for circular commonjs dependencies --- src/workerd/api/node/module.c++ | 12 ++++++++++++ src/workerd/jsg/modules.c++ | 30 +++++++++++++++--------------- src/workerd/jsg/modules.h | 8 +++++--- 3 files changed, 32 insertions(+), 18 deletions(-) diff --git a/src/workerd/api/node/module.c++ b/src/workerd/api/node/module.c++ index 6f2482d9599..dd9d247119c 100644 --- a/src/workerd/api/node/module.c++ +++ b/src/workerd/api/node/module.c++ @@ -85,6 +85,18 @@ jsg::JsValue ModuleUtil::createRequire(jsg::Lock& js, kj::String path) { auto module = info.module.getHandle(js); + if (module->GetStatus() == v8::Module::Status::kEvaluating || + module->GetStatus() == v8::Module::Status::kInstantiating) { + KJ_IF_SOME(synth, info.maybeSynthetic) { + KJ_IF_SOME(cjs, synth.tryGet()) { + return cjs.moduleContext->getExports(js); + } + KJ_IF_SOME(cjs, synth.tryGet()) { + return cjs.moduleContext->getExports(js); + } + } + } + auto features = FeatureFlags::get(js); jsg::InstantiateModuleOptions options = jsg::InstantiateModuleOptions::DEFAULT; if (features.getNoTopLevelAwaitInRequire()) { diff --git a/src/workerd/jsg/modules.c++ b/src/workerd/jsg/modules.c++ index 0131b16e6b2..8cbd143bf07 100644 --- a/src/workerd/jsg/modules.c++ +++ b/src/workerd/jsg/modules.c++ @@ -274,13 +274,17 @@ v8::Local CommonJsModuleContext::require(jsg::Lock& js, kj::String sp auto module = info.module.getHandle(js); - JSG_REQUIRE(module->GetStatus() != v8::Module::Status::kEvaluating && - module->GetStatus() != v8::Module::Status::kInstantiating, - Error, - "Module cannot be synchronously required while it is being instantiated or evaluated. " - "This error typically means that a CommonJS or NodeJS-Compat type module has a circular " - "dependency on itself, and that a synchronous require() is being called while the module " - "is being loaded."); + if (module->GetStatus() == v8::Module::Status::kEvaluating || + module->GetStatus() == v8::Module::Status::kInstantiating) { + KJ_IF_SOME(synth, info.maybeSynthetic) { + KJ_IF_SOME(cjs, synth.tryGet()) { + return cjs.moduleContext->getExports(js); + } + KJ_IF_SOME(cjs, synth.tryGet()) { + return cjs.moduleContext->getExports(js); + } + } + } auto& isolateBase = IsolateBase::from(js.v8Isolate); jsg::InstantiateModuleOptions options = jsg::InstantiateModuleOptions::DEFAULT; @@ -344,7 +348,7 @@ void instantiateModule( throw jsg::JsExceptionThrown(); } - // Nothing to do if the module is already instantiated, evaluated. + // Nothing to do if the module is already evaluated. if (status == v8::Module::Status::kEvaluated || status == v8::Module::Status::kEvaluating) return; if (status == v8::Module::Status::kUninstantiated) { @@ -354,11 +358,7 @@ void instantiateModule( auto prom = jsg::check(module->Evaluate(context)).As(); if (module->IsGraphAsync() && prom->State() == v8::Promise::kPending) { - // Pump the microtasks if there's an unsettled top-level await in the module. - // Because we do not support i/o in this scope, this *should* resolve in a - // single drain of the microtask queue (tho it's possible that it'll take - // multiple tasks). When the runMicrotasks() is complete, the promise should - // be settled. + // If top level await has been disable, error. JSG_REQUIRE(options != InstantiateModuleOptions::NO_TOP_LEVEL_AWAIT, Error, "Top-level await in module is not permitted at this time."); } @@ -369,7 +369,7 @@ void instantiateModule( switch (prom->State()) { case v8::Promise::kPending: - // Let's make sure nobody is depending on pending modules that do not resolve first. + // Let's make sure nobody is depending on modules awaiting on pending promises. JSG_FAIL_REQUIRE(Error, "Top-level await in module is unsettled."); case v8::Promise::kRejected: // Since we don't actually support I/O when instantiating a worker, we don't return the @@ -507,7 +507,7 @@ v8::Local compileWasmModule( // ====================================================================================== -jsg::Ref ModuleRegistry::NodeJsModuleInfo::initModuleContext( +jsg::Ref ModuleRegistry::NodeJsModuleInfo::initModuleContext( jsg::Lock& js, kj::StringPtr name) { return jsg::alloc(js, kj::Path::parse(name)); } diff --git a/src/workerd/jsg/modules.h b/src/workerd/jsg/modules.h index b418582b8ef..3dc73b45480 100644 --- a/src/workerd/jsg/modules.h +++ b/src/workerd/jsg/modules.h @@ -88,6 +88,8 @@ class CommonJsModuleContext: public jsg::Object { // expected within the global scope of a Node.js compatible module (such as // Buffer and process). +// TODO(cleanup): There's a fair amount of duplicated code between the CommonJsModule +// and NodeJsModule types... should be deduplicated. class NodeJsModuleObject: public jsg::Object { public: NodeJsModuleObject(jsg::Lock& js, kj::String path); @@ -252,7 +254,7 @@ class ModuleRegistry { }; struct NodeJsModuleInfo { - jsg::Ref moduleContext; + jsg::Ref moduleContext; jsg::Function evalFunc; NodeJsModuleInfo(auto& lock, kj::StringPtr name, kj::StringPtr content) @@ -262,7 +264,7 @@ class ModuleRegistry { NodeJsModuleInfo(NodeJsModuleInfo&&) = default; NodeJsModuleInfo& operator=(NodeJsModuleInfo&&) = default; - static jsg::Ref initModuleContext(jsg::Lock& js, kj::StringPtr name); + static jsg::Ref initModuleContext(jsg::Lock& js, kj::StringPtr name); static v8::MaybeLocal evaluate(jsg::Lock& js, NodeJsModuleInfo& info, @@ -270,7 +272,7 @@ class ModuleRegistry { const kj::Maybe>& maybeExports); jsg::Function initEvalFunc(auto& lock, - jsg::Ref& moduleContext, + jsg::Ref& moduleContext, kj::StringPtr name, kj::StringPtr content) { v8::ScriptOrigin origin(v8StrIntern(lock.v8Isolate, name)); From e858332164080ea2b1673b35299e6dd7d2fcd84b Mon Sep 17 00:00:00 2001 From: James M Snell Date: Tue, 12 Nov 2024 07:28:26 -0800 Subject: [PATCH 11/12] Consolidate the require(...) primary implementation --- src/workerd/api/node/module.c++ | 45 ++-------- src/workerd/jsg/modules.c++ | 145 ++++++++++++++++++-------------- src/workerd/jsg/modules.h | 13 +++ 3 files changed, 98 insertions(+), 105 deletions(-) diff --git a/src/workerd/api/node/module.c++ b/src/workerd/api/node/module.c++ index dd9d247119c..dfedc885162 100644 --- a/src/workerd/api/node/module.c++ +++ b/src/workerd/api/node/module.c++ @@ -81,48 +81,13 @@ jsg::JsValue ModuleUtil::createRequire(jsg::Lock& js, kj::String path) { jsg::ModuleRegistry::ResolveMethod::REQUIRE, spec.asPtr()), Error, "No such module \"", targetPath.toString(), "\"."); - bool isEsm = info.maybeSynthetic == kj::none; - - auto module = info.module.getHandle(js); - - if (module->GetStatus() == v8::Module::Status::kEvaluating || - module->GetStatus() == v8::Module::Status::kInstantiating) { - KJ_IF_SOME(synth, info.maybeSynthetic) { - KJ_IF_SOME(cjs, synth.tryGet()) { - return cjs.moduleContext->getExports(js); - } - KJ_IF_SOME(cjs, synth.tryGet()) { - return cjs.moduleContext->getExports(js); - } - } - } - - auto features = FeatureFlags::get(js); - jsg::InstantiateModuleOptions options = jsg::InstantiateModuleOptions::DEFAULT; - if (features.getNoTopLevelAwaitInRequire()) { - options = jsg::InstantiateModuleOptions::NO_TOP_LEVEL_AWAIT; - - // If the module was already evaluated, let's check if it is async. - // If it is, we will throw an error. This case can happen if a previous - // attempt to require the module failed because the module was async. - if (module->GetStatus() == v8::Module::kEvaluated) { - JSG_REQUIRE(!module->IsGraphAsync(), Error, - "Top-level await in module is not permitted at this time."); - } - } - - jsg::instantiateModule(js, module, options); - - if (isEsm) { - // If the import is an esm module, we will return the namespace object. - jsg::JsObject obj(module->GetModuleNamespace().As()); - if (obj.get(js, "__cjsUnwrapDefault"_kj) == js.boolean(true)) { - return obj.get(js, "default"_kj); - } - return obj; + jsg::ModuleRegistry::RequireImplOptions options = + jsg::ModuleRegistry::RequireImplOptions::DEFAULT; + if (info.maybeSynthetic != kj::none) { + options = jsg::ModuleRegistry::RequireImplOptions::EXPORT_DEFAULT; } - return jsg::JsValue(js.v8Get(module->GetModuleNamespace().As(), "default"_kj)); + return jsg::ModuleRegistry::requireImpl(js, info, options); })); } diff --git a/src/workerd/jsg/modules.c++ b/src/workerd/jsg/modules.c++ index 8cbd143bf07..7a789a373c8 100644 --- a/src/workerd/jsg/modules.c++ +++ b/src/workerd/jsg/modules.c++ @@ -272,46 +272,12 @@ v8::Local CommonJsModuleContext::require(jsg::Lock& js, kj::String sp // Adding imported from suffix here not necessary like it is for resolveCallback, since we have a // js stack that will include the parent module's name and location of the failed require(). - auto module = info.module.getHandle(js); - - if (module->GetStatus() == v8::Module::Status::kEvaluating || - module->GetStatus() == v8::Module::Status::kInstantiating) { - KJ_IF_SOME(synth, info.maybeSynthetic) { - KJ_IF_SOME(cjs, synth.tryGet()) { - return cjs.moduleContext->getExports(js); - } - KJ_IF_SOME(cjs, synth.tryGet()) { - return cjs.moduleContext->getExports(js); - } - } - } - - auto& isolateBase = IsolateBase::from(js.v8Isolate); - jsg::InstantiateModuleOptions options = jsg::InstantiateModuleOptions::DEFAULT; - if (!isolateBase.isTopLevelAwaitEnabled()) { - options = jsg::InstantiateModuleOptions::NO_TOP_LEVEL_AWAIT; - - // If the module was already evaluated, let's check if it is async. - // If it is, we will throw an error. This case can happen if a previous - // attempt to require the module failed because the module was async. - if (module->GetStatus() == v8::Module::kEvaluated) { - JSG_REQUIRE(!module->IsGraphAsync(), Error, - "Top-level await in module is not permitted at this time."); - } - } - - jsg::instantiateModule(js, module, options); - - // Originally, This returned an object like `{default: module.exports}` when we really - // intended to return the module exports raw. We should be extracting `default` here. - // Unfortunately, there is a user depending on the wrong behavior in production, so we - // needed a compatibility flag to fix. + ModuleRegistry::RequireImplOptions options = ModuleRegistry::RequireImplOptions::DEFAULT; if (getCommonJsExportDefault(js.v8Isolate)) { - return check(module->GetModuleNamespace().As()->Get( - js.v8Context(), v8StrIntern(js.v8Isolate, "default"))); - } else { - return module->GetModuleNamespace(); + options = ModuleRegistry::RequireImplOptions::EXPORT_DEFAULT; } + + return ModuleRegistry::requireImpl(js, info, options); } v8::Local NonModuleScript::runAndReturn(v8::Local context) const { @@ -615,33 +581,7 @@ v8::Local NodeJsModuleContext::require(jsg::Lock& js, kj::String spec info.maybeSynthetic, TypeError, "Cannot use require() to import an ES Module."); } - auto module = info.module.getHandle(js); - - JSG_REQUIRE(module->GetStatus() != v8::Module::Status::kEvaluating && - module->GetStatus() != v8::Module::Status::kInstantiating, - Error, - "Module cannot be synchronously required while it is being instantiated or evaluated. " - "This error typically means that a CommonJS or NodeJS-Compat type module has a circular " - "dependency on itself, and that a synchronous require() is being called while the module " - "is being loaded."); - - auto& isolateBase = IsolateBase::from(js.v8Isolate); - jsg::InstantiateModuleOptions options = jsg::InstantiateModuleOptions::DEFAULT; - if (!isolateBase.isTopLevelAwaitEnabled()) { - options = jsg::InstantiateModuleOptions::NO_TOP_LEVEL_AWAIT; - - // If the module was already evaluated, let's check if it is async. - // If it is, we will throw an error. This case can happen if a previous - // attempt to require the module failed because the module was async. - if (module->GetStatus() == v8::Module::kEvaluated) { - JSG_REQUIRE(!module->IsGraphAsync(), Error, - "Top-level await in module is not permitted at this time."); - } - } - - jsg::instantiateModule(js, module, options); - - return js.v8Get(module->GetModuleNamespace().As(), "default"_kj); + return ModuleRegistry::requireImpl(js, info, ModuleRegistry::RequireImplOptions::EXPORT_DEFAULT); } v8::Local NodeJsModuleContext::getBuffer(jsg::Lock& js) { @@ -712,4 +652,79 @@ kj::Maybe> tryResolveFromFallb return kj::none; } +JsValue ModuleRegistry::requireImpl(Lock& js, ModuleInfo& info, RequireImplOptions options) { + auto module = info.module.getHandle(js); + + // If the module status is evaluating or instantiating then the module is likely + // has a circular dependency on itself. If the module is a CommonJS or NodeJS + // module, we can return the exports object directly here. + if (module->GetStatus() == v8::Module::Status::kEvaluating || + module->GetStatus() == v8::Module::Status::kInstantiating) { + KJ_IF_SOME(synth, info.maybeSynthetic) { + KJ_IF_SOME(cjs, synth.tryGet()) { + return JsValue(cjs.moduleContext->getExports(js)); + } + KJ_IF_SOME(cjs, synth.tryGet()) { + return JsValue(cjs.moduleContext->getExports(js)); + } + } + } + + // When using require(...) we previously allowed the required modules to use + // top-level await. With a compat flag we disable use of top-level await but + // ONLY when the module is synchronously required. The same module being imported + // either statically or dynamically can still use TLA. This aligns with behavior + // being implemented in other JS runtimes. + auto& isolateBase = IsolateBase::from(js.v8Isolate); + jsg::InstantiateModuleOptions opts = jsg::InstantiateModuleOptions::DEFAULT; + if (!isolateBase.isTopLevelAwaitEnabled()) { + opts = jsg::InstantiateModuleOptions::NO_TOP_LEVEL_AWAIT; + + // If the module was already evaluated, let's check if it is async. + // If it is, we will throw an error. This case can happen if a previous + // attempt to require the module failed because the module was async. + if (module->GetStatus() == v8::Module::kEvaluated) { + JSG_REQUIRE(!module->IsGraphAsync(), Error, + "Top-level await in module is not permitted at this time."); + } + } + + jsg::instantiateModule(js, module, opts); + + if (info.maybeSynthetic == kj::none) { + // If the module is an ESM and the __cjsUnwrapDefault flag is set to true, we will + // always return the default export regardless of the options. + // Otherwise fallback to the options. This is an early version of the "module.exports" + // convention that Node.js finally adopted for require(esm) that was not officially + // adopted but there are a handful of modules in the ecosystem that supported it + // early. It's trivial for us to support here so let's just do so. + JsObject obj(module->GetModuleNamespace().As()); + if (obj.get(js, "__cjsUnwrapDefault"_kj) == js.boolean(true)) { + return obj.get(js, "default"_kj); + } + // If the ES Module namespace exports a "module.exports" key then that will be the + // export that is returned by the require(...) call per Node.js' recently added + // require(esm) support. + // See: https://nodejs.org/docs/latest/api/modules.html#loading-ecmascript-modules-using-require + if (obj.has(js, "module.exports"_kj)) { + // We only want to return the value if it is explicitly specified, otherwise we'll + // always be returning undefined. + return obj.get(js, "module.exports"_kj); + } + } + + // Originally, require returned an object like `{default: module.exports}` when we really + // intended to return the module exports raw. We should be extracting `default` here. + // When Node.js recently finally adopted require(esm), they adopted the default behavior + // of exporting the module namespace, which is fun. We'll stick with our default here for + // now but users can get Node.js-like behavior by switching off the + // exportCommonJsDefaultNamespace compat flag. + if (options == RequireImplOptions::EXPORT_DEFAULT) { + return JsValue(check(module->GetModuleNamespace().As()->Get( + js.v8Context(), v8StrIntern(js.v8Isolate, "default")))); + } + + return JsValue(module->GetModuleNamespace()); +} + } // namespace workerd::jsg diff --git a/src/workerd/jsg/modules.h b/src/workerd/jsg/modules.h index 3dc73b45480..5647377e970 100644 --- a/src/workerd/jsg/modules.h +++ b/src/workerd/jsg/modules.h @@ -194,7 +194,10 @@ class NonModuleScript { }; enum class InstantiateModuleOptions { + // Allows pending top-level await in the module when evaluated. Will cause + // the microtask queue to be drained once in an attempt to resolve those. DEFAULT, + // Throws if the module evaluation results in a pending promise. NO_TOP_LEVEL_AWAIT, }; @@ -403,6 +406,16 @@ class ModuleRegistry { using DynamicImportCallback = Promise(jsg::Lock& js, kj::Function handler); virtual void setDynamicImportCallback(kj::Function func) = 0; + + enum class RequireImplOptions { + // Require returns the module namespace. + DEFAULT, + // Require returns the default export. + EXPORT_DEFAULT, + }; + + static JsValue requireImpl( + Lock& js, ModuleInfo& info, RequireImplOptions options = RequireImplOptions::DEFAULT); }; template From d9309dda9673b5eda8feb3faf61ca953cb28e92a Mon Sep 17 00:00:00 2001 From: Hood Chatham Date: Wed, 6 Nov 2024 14:13:16 +0100 Subject: [PATCH 12/12] Add more jaeger spans in Python setup --- src/pyodide/internal/python.ts | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/pyodide/internal/python.ts b/src/pyodide/internal/python.ts index 3fd959690a9..b036b5f2886 100644 --- a/src/pyodide/internal/python.ts +++ b/src/pyodide/internal/python.ts @@ -62,10 +62,12 @@ async function prepareWasmLinearMemory(Module: Module): Promise { mountSitePackages(Module, SITE_PACKAGES.rootInfo); entropyMountFiles(Module); Module.noInitialRun = !SHOULD_RESTORE_SNAPSHOT; - preloadDynamicLibs(Module); - Module.removeRunDependency('dynlibs'); + enterJaegerSpan('preload_dynamic_libs', () => preloadDynamicLibs(Module)); + enterJaegerSpan('remove_run_dependency', () => + Module.removeRunDependency('dynlibs') + ); if (SHOULD_RESTORE_SNAPSHOT) { - restoreSnapshot(Module); + enterJaegerSpan('restore_snapshot', () => restoreSnapshot(Module)); // Invalidate caches if we have a snapshot because the contents of site-packages // may have changed. simpleRunPython(