diff --git a/samples/tail-worker/config.capnp b/samples/tail-worker/config.capnp new file mode 100644 index 00000000000..b981e8024fd --- /dev/null +++ b/samples/tail-worker/config.capnp @@ -0,0 +1,25 @@ +using Workerd = import "/workerd/workerd.capnp"; + +const tailWorkerExample :Workerd.Config = ( + services = [ + (name = "main", worker = .helloWorld), + (name = "log", worker = .logWorker), + ], + + sockets = [ ( name = "http", address = "*:8080", http = (), service = "main" ) ] +); + +const helloWorld :Workerd.Worker = ( + modules = [ + (name = "worker", esModule = embed "worker.js") + ], + compatibilityDate = "2023-02-28", + logging = ( toService = "log" ), +); + +const logWorker :Workerd.Worker = ( + modules = [ + (name = "worker", esModule = embed "tail.js") + ], + compatibilityDate = "2023-02-28", +); diff --git a/samples/tail-worker/tail.js b/samples/tail-worker/tail.js new file mode 100644 index 00000000000..bfdf7154d5f --- /dev/null +++ b/samples/tail-worker/tail.js @@ -0,0 +1,9 @@ +// Copyright (c) 2017-2023 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 + +export default { + tail(traces) { + console.log(traces); + } +}; diff --git a/samples/tail-worker/worker.js b/samples/tail-worker/worker.js new file mode 100644 index 00000000000..34041aa64b9 --- /dev/null +++ b/samples/tail-worker/worker.js @@ -0,0 +1,9 @@ +// Copyright (c) 2017-2023 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 + +export default { + async fetch(req, env) { + return new Response("Hello World\n"); + } +}; diff --git a/src/workerd/api/hibernatable-web-socket.c++ b/src/workerd/api/hibernatable-web-socket.c++ index ecb83fe7fe5..008ee8213d0 100644 --- a/src/workerd/api/hibernatable-web-socket.c++ +++ b/src/workerd/api/hibernatable-web-socket.c++ @@ -75,27 +75,27 @@ kj::Promise HibernatableWebSocketCustomEve auto eventParameters = consumeParams(); KJ_IF_SOME(t, incomingRequest->getWorkerTracer()) { - Trace::HibernatableWebSocketEventInfo::Type type = - [&]() -> Trace::HibernatableWebSocketEventInfo::Type { + trace::HibernatableWebSocketEventInfo::Type type = + [&]() -> trace::HibernatableWebSocketEventInfo::Type { KJ_SWITCH_ONEOF(eventParameters.eventType) { KJ_CASE_ONEOF(_, HibernatableSocketParams::Text) { - return Trace::HibernatableWebSocketEventInfo::Message{}; + return trace::HibernatableWebSocketEventInfo::Message{}; } KJ_CASE_ONEOF(data, HibernatableSocketParams::Data) { - return Trace::HibernatableWebSocketEventInfo::Message{}; + return trace::HibernatableWebSocketEventInfo::Message{}; } KJ_CASE_ONEOF(close, HibernatableSocketParams::Close) { - return Trace::HibernatableWebSocketEventInfo::Close{ + return trace::HibernatableWebSocketEventInfo::Close{ .code = close.code, .wasClean = close.wasClean}; } KJ_CASE_ONEOF(_, HibernatableSocketParams::Error) { - return Trace::HibernatableWebSocketEventInfo::Error{}; + return trace::HibernatableWebSocketEventInfo::Error{}; } } KJ_UNREACHABLE; }(); - t.setEventInfo(context.now(), Trace::HibernatableWebSocketEventInfo(kj::mv(type))); + t.setEventInfo(context.now(), trace::HibernatableWebSocketEventInfo(kj::mv(type))); } try { diff --git a/src/workerd/api/queue.c++ b/src/workerd/api/queue.c++ index c0cf06a3e43..7a8ae506399 100644 --- a/src/workerd/api/queue.c++ +++ b/src/workerd/api/queue.c++ @@ -531,7 +531,7 @@ kj::Promise QueueCustomEventImpl::run( } KJ_IF_SOME(t, incomingRequest->getWorkerTracer()) { - t.setEventInfo(context.now(), Trace::QueueEventInfo(kj::mv(queueName), batchSize)); + t.setEventInfo(context.now(), trace::QueueEventInfo(kj::mv(queueName), batchSize)); } // Create a custom refcounted type for holding the queueEvent so that we can pass it to the diff --git a/src/workerd/api/trace.c++ b/src/workerd/api/trace.c++ index 3dc4fe65d59..272b26386e5 100644 --- a/src/workerd/api/trace.c++ +++ b/src/workerd/api/trace.c++ @@ -37,7 +37,7 @@ kj::Maybe getTraceTimestamp(const Trace& trace) { return (trace.eventTimestamp - kj::UNIX_EPOCH) / kj::MILLISECONDS; } -double getTraceLogTimestamp(const Trace::Log& log) { +double getTraceLogTimestamp(const trace::Log& log) { if (isPredictableModeForTest()) { return 0; } else { @@ -45,7 +45,7 @@ double getTraceLogTimestamp(const Trace::Log& log) { } } -double getTraceDiagnosticChannelEventTimestamp(const Trace::DiagnosticChannelEvent& event) { +double getTraceDiagnosticChannelEventTimestamp(const trace::DiagnosticChannelEvent& event) { if (isPredictableModeForTest()) { return 0; } else { @@ -53,7 +53,7 @@ double getTraceDiagnosticChannelEventTimestamp(const Trace::DiagnosticChannelEve } } -kj::String getTraceLogLevel(const Trace::Log& log) { +kj::String getTraceLogLevel(const trace::Log& log) { switch (log.logLevel) { case LogLevel::DEBUG_: return kj::str("debug"); @@ -69,7 +69,7 @@ kj::String getTraceLogLevel(const Trace::Log& log) { KJ_UNREACHABLE; } -jsg::V8Ref getTraceLogMessage(jsg::Lock& js, const Trace::Log& log) { +jsg::V8Ref getTraceLogMessage(jsg::Lock& js, const trace::Log& log) { return js.parseJson(log.message).cast(js); } @@ -93,10 +93,11 @@ kj::Array> getTraceDiagnosticChannelEvents } kj::Maybe getTraceScriptVersion(const Trace& trace) { - return trace.scriptVersion.map([](const auto& version) { return ScriptVersion(*version); }); + return trace.onsetInfo.scriptVersion.map( + [](const auto& version) { return ScriptVersion(*version); }); } -double getTraceExceptionTimestamp(const Trace::Exception& ex) { +double getTraceExceptionTimestamp(const trace::Exception& ex) { if (isPredictableModeForTest()) { return 0; } else { @@ -109,8 +110,8 @@ kj::Array> getTraceExceptions(const Trace& trace) { } jsg::Optional> getTraceScriptTags(const Trace& trace) { - if (trace.scriptTags.size() > 0) { - return KJ_MAP(t, trace.scriptTags) -> kj::String { return kj::str(t); }; + if (trace.onsetInfo.scriptTags.size() > 0) { + return KJ_MAP(t, trace.onsetInfo.scriptTags) -> kj::String { return kj::str(t); }; } else { return kj::none; } @@ -126,7 +127,7 @@ kj::String enumToStr(const Enum& var) { } kj::Own getFetchRequestDetail( - jsg::Lock& js, const Trace& trace, const Trace::FetchEventInfo& eventInfo) { + jsg::Lock& js, const Trace& trace, const trace::FetchEventInfo& eventInfo) { const auto getCf = [&]() -> jsg::Optional> { const auto& cfJson = eventInfo.cfJson; if (cfJson.size() > 0) { @@ -135,9 +136,9 @@ kj::Own getFetchRequestDetail( return kj::none; }; - const auto getHeaders = [&]() -> kj::Array { + const auto getHeaders = [&]() -> kj::Array { return KJ_MAP(header, eventInfo.headers) { - return Trace::FetchEventInfo::Header(kj::str(header.name), kj::str(header.value)); + return trace::FetchEventInfo::Header(kj::str(header.name), kj::str(header.value)); }; }; @@ -146,45 +147,45 @@ kj::Own getFetchRequestDetail( } kj::Maybe getTraceEvent(jsg::Lock& js, const Trace& trace) { - KJ_IF_SOME(e, trace.eventInfo) { + KJ_IF_SOME(e, trace.onsetInfo.info) { KJ_SWITCH_ONEOF(e) { - KJ_CASE_ONEOF(fetch, Trace::FetchEventInfo) { + KJ_CASE_ONEOF(fetch, trace::FetchEventInfo) { return kj::Maybe( jsg::alloc(js, trace, fetch, trace.fetchResponseInfo)); } - KJ_CASE_ONEOF(jsRpc, Trace::JsRpcEventInfo) { + KJ_CASE_ONEOF(jsRpc, trace::JsRpcEventInfo) { return kj::Maybe(jsg::alloc(trace, jsRpc)); } - KJ_CASE_ONEOF(scheduled, Trace::ScheduledEventInfo) { + KJ_CASE_ONEOF(scheduled, trace::ScheduledEventInfo) { return kj::Maybe(jsg::alloc(trace, scheduled)); } - KJ_CASE_ONEOF(alarm, Trace::AlarmEventInfo) { + KJ_CASE_ONEOF(alarm, trace::AlarmEventInfo) { return kj::Maybe(jsg::alloc(trace, alarm)); } - KJ_CASE_ONEOF(queue, Trace::QueueEventInfo) { + KJ_CASE_ONEOF(queue, trace::QueueEventInfo) { return kj::Maybe(jsg::alloc(trace, queue)); } - KJ_CASE_ONEOF(email, Trace::EmailEventInfo) { + KJ_CASE_ONEOF(email, trace::EmailEventInfo) { return kj::Maybe(jsg::alloc(trace, email)); } - KJ_CASE_ONEOF(tracedTrace, Trace::TraceEventInfo) { + KJ_CASE_ONEOF(tracedTrace, trace::TraceEventInfo) { return kj::Maybe(jsg::alloc(trace, tracedTrace)); } - KJ_CASE_ONEOF(hibWs, Trace::HibernatableWebSocketEventInfo) { + KJ_CASE_ONEOF(hibWs, trace::HibernatableWebSocketEventInfo) { KJ_SWITCH_ONEOF(hibWs.type) { - KJ_CASE_ONEOF(message, Trace::HibernatableWebSocketEventInfo::Message) { + KJ_CASE_ONEOF(message, trace::HibernatableWebSocketEventInfo::Message) { return kj::Maybe(jsg::alloc(trace, message)); } - KJ_CASE_ONEOF(close, Trace::HibernatableWebSocketEventInfo::Close) { + KJ_CASE_ONEOF(close, trace::HibernatableWebSocketEventInfo::Close) { return kj::Maybe(jsg::alloc(trace, close)); } - KJ_CASE_ONEOF(error, Trace::HibernatableWebSocketEventInfo::Error) { + KJ_CASE_ONEOF(error, trace::HibernatableWebSocketEventInfo::Error) { return kj::Maybe(jsg::alloc(trace, error)); } } KJ_UNREACHABLE; } - KJ_CASE_ONEOF(custom, Trace::CustomEventInfo) { + KJ_CASE_ONEOF(custom, trace::CustomEventInfo) { return kj::Maybe(jsg::alloc(trace, custom)); } } @@ -199,13 +200,14 @@ TraceItem::TraceItem(jsg::Lock& js, const Trace& trace) logs(getTraceLogs(js, trace)), exceptions(getTraceExceptions(trace)), diagnosticChannelEvents(getTraceDiagnosticChannelEvents(js, trace)), - scriptName(trace.scriptName.map([](auto& name) { return kj::str(name); })), - entrypoint(trace.entrypoint.map([](auto& name) { return kj::str(name); })), + scriptName(trace.onsetInfo.scriptName.map([](auto& name) { return kj::str(name); })), + entrypoint(trace.onsetInfo.entrypoint.map([](auto& name) { return kj::str(name); })), scriptVersion(getTraceScriptVersion(trace)), - dispatchNamespace(trace.dispatchNamespace.map([](auto& ns) { return kj::str(ns); })), + dispatchNamespace( + trace.onsetInfo.dispatchNamespace.map([](auto& ns) { return kj::str(ns); })), scriptTags(getTraceScriptTags(trace)), - executionModel(enumToStr(trace.executionModel)), - outcome(enumToStr(trace.outcome)), + executionModel(enumToStr(trace.onsetInfo.executionModel)), + outcome(enumToStr(trace.outcomeInfo.outcome)), cpuTime(trace.cpuTime / kj::MILLISECONDS), wallTime(trace.wallTime / kj::MILLISECONDS), truncated(trace.truncated) {} @@ -304,13 +306,13 @@ uint TraceItem::getWallTime() { TraceItem::FetchEventInfo::FetchEventInfo(jsg::Lock& js, const Trace& trace, - const Trace::FetchEventInfo& eventInfo, - kj::Maybe responseInfo) + const trace::FetchEventInfo& eventInfo, + kj::Maybe responseInfo) : request(jsg::alloc(js, trace, eventInfo)), response(responseInfo.map([&](auto& info) { return jsg::alloc(trace, info); })) {} TraceItem::FetchEventInfo::Request::Detail::Detail(jsg::Optional> cf, - kj::Array headers, + kj::Array headers, kj::String method, kj::String url) : cf(kj::mv(cf)), @@ -330,7 +332,7 @@ jsg::Optional> TraceItem::FetchEve } TraceItem::FetchEventInfo::Request::Request( - jsg::Lock& js, const Trace& trace, const Trace::FetchEventInfo& eventInfo) + jsg::Lock& js, const Trace& trace, const trace::FetchEventInfo& eventInfo) : detail(getFetchRequestDetail(js, trace, eventInfo)) {} TraceItem::FetchEventInfo::Request::Request(Detail& detail, bool redacted) @@ -375,7 +377,7 @@ jsg::Ref TraceItem::FetchEventInfo::Request: } TraceItem::FetchEventInfo::Response::Response( - const Trace& trace, const Trace::FetchResponseInfo& responseInfo) + const Trace& trace, const trace::FetchResponseInfo& responseInfo) : status(responseInfo.statusCode) {} uint16_t TraceItem::FetchEventInfo::Response::getStatus() { @@ -383,7 +385,7 @@ uint16_t TraceItem::FetchEventInfo::Response::getStatus() { } TraceItem::JsRpcEventInfo::JsRpcEventInfo( - const Trace& trace, const Trace::JsRpcEventInfo& eventInfo) + const Trace& trace, const trace::JsRpcEventInfo& eventInfo) : rpcMethod(kj::str(eventInfo.methodName)) {} kj::StringPtr TraceItem::JsRpcEventInfo::getRpcMethod() { @@ -391,7 +393,7 @@ kj::StringPtr TraceItem::JsRpcEventInfo::getRpcMethod() { } TraceItem::ScheduledEventInfo::ScheduledEventInfo( - const Trace& trace, const Trace::ScheduledEventInfo& eventInfo) + const Trace& trace, const trace::ScheduledEventInfo& eventInfo) : scheduledTime(eventInfo.scheduledTime), cron(kj::str(eventInfo.cron)) {} @@ -403,7 +405,7 @@ kj::StringPtr TraceItem::ScheduledEventInfo::getCron() { } TraceItem::AlarmEventInfo::AlarmEventInfo( - const Trace& trace, const Trace::AlarmEventInfo& eventInfo) + const Trace& trace, const trace::AlarmEventInfo& eventInfo) : scheduledTime(eventInfo.scheduledTime) {} kj::Date TraceItem::AlarmEventInfo::getScheduledTime() { @@ -411,7 +413,7 @@ kj::Date TraceItem::AlarmEventInfo::getScheduledTime() { } TraceItem::QueueEventInfo::QueueEventInfo( - const Trace& trace, const Trace::QueueEventInfo& eventInfo) + const Trace& trace, const trace::QueueEventInfo& eventInfo) : queueName(kj::str(eventInfo.queueName)), batchSize(eventInfo.batchSize) {} @@ -424,7 +426,7 @@ uint32_t TraceItem::QueueEventInfo::getBatchSize() { } TraceItem::EmailEventInfo::EmailEventInfo( - const Trace& trace, const Trace::EmailEventInfo& eventInfo) + const Trace& trace, const trace::EmailEventInfo& eventInfo) : mailFrom(kj::str(eventInfo.mailFrom)), rcptTo(kj::str(eventInfo.rcptTo)), rawSize(eventInfo.rawSize) {} @@ -442,13 +444,13 @@ uint32_t TraceItem::EmailEventInfo::getRawSize() { } kj::Array> getConsumedEventsFromEventInfo( - const Trace::TraceEventInfo& eventInfo) { + const trace::TraceEventInfo& eventInfo) { return KJ_MAP(t, eventInfo.traces) -> jsg::Ref { return jsg::alloc(t); }; } -TraceItem::TailEventInfo::TailEventInfo(const Trace& trace, const Trace::TraceEventInfo& eventInfo) +TraceItem::TailEventInfo::TailEventInfo(const Trace& trace, const trace::TraceEventInfo& eventInfo) : consumedEvents(getConsumedEventsFromEventInfo(eventInfo)) {} kj::Array> TraceItem::TailEventInfo:: @@ -458,7 +460,7 @@ kj::Array> TraceItem::TailEventInfo }; } -TraceItem::TailEventInfo::TailItem::TailItem(const Trace::TraceEventInfo::TraceItem& traceItem) +TraceItem::TailEventInfo::TailItem::TailItem(const trace::TraceEventInfo::TraceItem& traceItem) : scriptName(traceItem.scriptName.map([](auto& s) { return kj::str(s); })) {} kj::Maybe TraceItem::TailEventInfo::TailItem::getScriptName() { @@ -466,7 +468,7 @@ kj::Maybe TraceItem::TailEventInfo::TailItem::getScriptName() { } TraceDiagnosticChannelEvent::TraceDiagnosticChannelEvent( - const Trace& trace, const Trace::DiagnosticChannelEvent& eventInfo) + const Trace& trace, const trace::DiagnosticChannelEvent& eventInfo) : timestamp(getTraceDiagnosticChannelEventTimestamp(eventInfo)), channel(kj::heapString(eventInfo.channel)), message(kj::heapArray(eventInfo.message)) {} @@ -509,19 +511,19 @@ ScriptVersion::ScriptVersion(const ScriptVersion& other) message{other.message.map([](const auto& message) { return kj::str(message); })} {} TraceItem::CustomEventInfo::CustomEventInfo( - const Trace& trace, const Trace::CustomEventInfo& eventInfo) + const Trace& trace, const trace::CustomEventInfo& eventInfo) : eventInfo(eventInfo) {} TraceItem::HibernatableWebSocketEventInfo::HibernatableWebSocketEventInfo( - const Trace& trace, const Trace::HibernatableWebSocketEventInfo::Message& eventInfo) + const Trace& trace, const trace::HibernatableWebSocketEventInfo::Message& eventInfo) : eventType(jsg::alloc(trace, eventInfo)) {} TraceItem::HibernatableWebSocketEventInfo::HibernatableWebSocketEventInfo( - const Trace& trace, const Trace::HibernatableWebSocketEventInfo::Close& eventInfo) + const Trace& trace, const trace::HibernatableWebSocketEventInfo::Close& eventInfo) : eventType(jsg::alloc(trace, eventInfo)) {} TraceItem::HibernatableWebSocketEventInfo::HibernatableWebSocketEventInfo( - const Trace& trace, const Trace::HibernatableWebSocketEventInfo::Error& eventInfo) + const Trace& trace, const trace::HibernatableWebSocketEventInfo::Error& eventInfo) : eventType(jsg::alloc(trace, eventInfo)) {} TraceItem::HibernatableWebSocketEventInfo::Type TraceItem::HibernatableWebSocketEventInfo:: @@ -548,7 +550,7 @@ bool TraceItem::HibernatableWebSocketEventInfo::Close::getWasClean() { return eventInfo.wasClean; } -TraceLog::TraceLog(jsg::Lock& js, const Trace& trace, const Trace::Log& log) +TraceLog::TraceLog(jsg::Lock& js, const Trace& trace, const trace::Log& log) : timestamp(getTraceLogTimestamp(log)), level(getTraceLogLevel(log)), message(getTraceLogMessage(js, log)) {} @@ -565,7 +567,7 @@ jsg::V8Ref TraceLog::getMessage(jsg::Lock& js) { return message.addRef(js); } -TraceException::TraceException(const Trace& trace, const Trace::Exception& exception) +TraceException::TraceException(const Trace& trace, const trace::Exception& exception) : timestamp(getTraceExceptionTimestamp(exception)), name(kj::str(exception.name)), message(kj::str(exception.message)), @@ -604,7 +606,7 @@ kj::Promise sendTracesToExportedHandler(kj::OwngetMetrics(); KJ_IF_SOME(t, incomingRequest->getWorkerTracer()) { - t.setEventInfo(context.now(), Trace::TraceEventInfo(traces)); + t.setEventInfo(context.now(), trace::TraceEventInfo(traces)); } // Add the actual JS as a wait until because the handler may be an event listener which can't diff --git a/src/workerd/api/trace.h b/src/workerd/api/trace.h index f09bf0ea955..61f4a801d81 100644 --- a/src/workerd/api/trace.h +++ b/src/workerd/api/trace.h @@ -145,7 +145,7 @@ class TraceItem final: public jsg::Object { }; // When adding a new TraceItem eventInfo type, it is important not to -// try keeping a reference to the Trace and Trace::*EventInfo inputs. +// try keeping a reference to the Trace and trace::*EventInfo inputs. // They are kj heap objects that have a lifespan that is managed independently // of the TraceItem object. Each of the implementations here extract the // necessary detail on creation and use LAZY instance properties to minimize @@ -168,8 +168,8 @@ class TraceItem::FetchEventInfo final: public jsg::Object { explicit FetchEventInfo(jsg::Lock& js, const Trace& trace, - const Trace::FetchEventInfo& eventInfo, - kj::Maybe responseInfo); + const trace::FetchEventInfo& eventInfo, + kj::Maybe responseInfo); jsg::Ref getRequest(); jsg::Optional> getResponse(); @@ -191,12 +191,12 @@ class TraceItem::FetchEventInfo::Request final: public jsg::Object { public: struct Detail: public kj::Refcounted { jsg::Optional> cf; - kj::Array headers; + kj::Array headers; kj::String method; kj::String url; Detail(jsg::Optional> cf, - kj::Array headers, + kj::Array headers, kj::String method, kj::String url); @@ -210,7 +210,7 @@ class TraceItem::FetchEventInfo::Request final: public jsg::Object { } }; - explicit Request(jsg::Lock& js, const Trace& trace, const Trace::FetchEventInfo& eventInfo); + explicit Request(jsg::Lock& js, const Trace& trace, const trace::FetchEventInfo& eventInfo); // Creates a possibly unredacted instance that shared a ref of the Detail explicit Request(Detail& detail, bool redacted = true); @@ -242,7 +242,7 @@ class TraceItem::FetchEventInfo::Request final: public jsg::Object { class TraceItem::FetchEventInfo::Response final: public jsg::Object { public: - explicit Response(const Trace& trace, const Trace::FetchResponseInfo& responseInfo); + explicit Response(const Trace& trace, const trace::FetchResponseInfo& responseInfo); uint16_t getStatus(); @@ -256,7 +256,7 @@ class TraceItem::FetchEventInfo::Response final: public jsg::Object { class TraceItem::JsRpcEventInfo final: public jsg::Object { public: - explicit JsRpcEventInfo(const Trace& trace, const Trace::JsRpcEventInfo& eventInfo); + explicit JsRpcEventInfo(const Trace& trace, const trace::JsRpcEventInfo& eventInfo); // We call this `rpcMethod` to make clear this is an RPC event, since some tail workers rely // on duck-typing EventInfo based on the properties present. (`methodName` might be ambiguous @@ -279,7 +279,7 @@ class TraceItem::JsRpcEventInfo final: public jsg::Object { class TraceItem::ScheduledEventInfo final: public jsg::Object { public: - explicit ScheduledEventInfo(const Trace& trace, const Trace::ScheduledEventInfo& eventInfo); + explicit ScheduledEventInfo(const Trace& trace, const trace::ScheduledEventInfo& eventInfo); double getScheduledTime(); kj::StringPtr getCron(); @@ -300,7 +300,7 @@ class TraceItem::ScheduledEventInfo final: public jsg::Object { class TraceItem::AlarmEventInfo final: public jsg::Object { public: - explicit AlarmEventInfo(const Trace& trace, const Trace::AlarmEventInfo& eventInfo); + explicit AlarmEventInfo(const Trace& trace, const trace::AlarmEventInfo& eventInfo); kj::Date getScheduledTime(); @@ -314,7 +314,7 @@ class TraceItem::AlarmEventInfo final: public jsg::Object { class TraceItem::QueueEventInfo final: public jsg::Object { public: - explicit QueueEventInfo(const Trace& trace, const Trace::QueueEventInfo& eventInfo); + explicit QueueEventInfo(const Trace& trace, const trace::QueueEventInfo& eventInfo); kj::StringPtr getQueueName(); uint32_t getBatchSize(); @@ -336,7 +336,7 @@ class TraceItem::QueueEventInfo final: public jsg::Object { class TraceItem::EmailEventInfo final: public jsg::Object { public: - explicit EmailEventInfo(const Trace& trace, const Trace::EmailEventInfo& eventInfo); + explicit EmailEventInfo(const Trace& trace, const trace::EmailEventInfo& eventInfo); kj::StringPtr getMailFrom(); kj::StringPtr getRcptTo(); @@ -363,7 +363,7 @@ class TraceItem::TailEventInfo final: public jsg::Object { public: class TailItem; - explicit TailEventInfo(const Trace& trace, const Trace::TraceEventInfo& eventInfo); + explicit TailEventInfo(const Trace& trace, const trace::TraceEventInfo& eventInfo); kj::Array> getConsumedEvents(); @@ -379,7 +379,7 @@ class TraceItem::TailEventInfo final: public jsg::Object { class TraceItem::TailEventInfo::TailItem final: public jsg::Object { public: - explicit TailItem(const Trace::TraceEventInfo::TraceItem& traceItem); + explicit TailItem(const trace::TraceEventInfo::TraceItem& traceItem); kj::Maybe getScriptName(); @@ -402,11 +402,11 @@ class TraceItem::HibernatableWebSocketEventInfo final: public jsg::Object { class Error; explicit HibernatableWebSocketEventInfo( - const Trace& trace, const Trace::HibernatableWebSocketEventInfo::Message& eventInfo); + const Trace& trace, const trace::HibernatableWebSocketEventInfo::Message& eventInfo); explicit HibernatableWebSocketEventInfo( - const Trace& trace, const Trace::HibernatableWebSocketEventInfo::Close& eventInfo); + const Trace& trace, const trace::HibernatableWebSocketEventInfo::Close& eventInfo); explicit HibernatableWebSocketEventInfo( - const Trace& trace, const Trace::HibernatableWebSocketEventInfo::Error& eventInfo); + const Trace& trace, const trace::HibernatableWebSocketEventInfo::Error& eventInfo); using Type = kj::OneOf, jsg::Ref, jsg::Ref>; @@ -425,7 +425,7 @@ class TraceItem::HibernatableWebSocketEventInfo final: public jsg::Object { class TraceItem::HibernatableWebSocketEventInfo::Message final: public jsg::Object { public: explicit Message( - const Trace& trace, const Trace::HibernatableWebSocketEventInfo::Message& eventInfo) + const Trace& trace, const trace::HibernatableWebSocketEventInfo::Message& eventInfo) : eventInfo(eventInfo) {} static constexpr kj::StringPtr webSocketEventType = "message"_kj; @@ -438,12 +438,12 @@ class TraceItem::HibernatableWebSocketEventInfo::Message final: public jsg::Obje } private: - const Trace::HibernatableWebSocketEventInfo::Message& eventInfo; + const trace::HibernatableWebSocketEventInfo::Message& eventInfo; }; class TraceItem::HibernatableWebSocketEventInfo::Close final: public jsg::Object { public: - explicit Close(const Trace& trace, const Trace::HibernatableWebSocketEventInfo::Close& eventInfo) + explicit Close(const Trace& trace, const trace::HibernatableWebSocketEventInfo::Close& eventInfo) : eventInfo(eventInfo) {} static constexpr kj::StringPtr webSocketEventType = "close"_kj; @@ -461,12 +461,12 @@ class TraceItem::HibernatableWebSocketEventInfo::Close final: public jsg::Object } private: - const Trace::HibernatableWebSocketEventInfo::Close& eventInfo; + const trace::HibernatableWebSocketEventInfo::Close& eventInfo; }; class TraceItem::HibernatableWebSocketEventInfo::Error final: public jsg::Object { public: - explicit Error(const Trace& trace, const Trace::HibernatableWebSocketEventInfo::Error& eventInfo) + explicit Error(const Trace& trace, const trace::HibernatableWebSocketEventInfo::Error& eventInfo) : eventInfo(eventInfo) {} static constexpr kj::StringPtr webSocketEventType = "error"_kj; @@ -479,23 +479,23 @@ class TraceItem::HibernatableWebSocketEventInfo::Error final: public jsg::Object } private: - const Trace::HibernatableWebSocketEventInfo::Error& eventInfo; + const trace::HibernatableWebSocketEventInfo::Error& eventInfo; }; class TraceItem::CustomEventInfo final: public jsg::Object { public: - explicit CustomEventInfo(const Trace& trace, const Trace::CustomEventInfo& eventInfo); + explicit CustomEventInfo(const Trace& trace, const trace::CustomEventInfo& eventInfo); JSG_RESOURCE_TYPE(CustomEventInfo) {} private: - const Trace::CustomEventInfo& eventInfo; + const trace::CustomEventInfo& eventInfo; }; class TraceDiagnosticChannelEvent final: public jsg::Object { public: explicit TraceDiagnosticChannelEvent( - const Trace& trace, const Trace::DiagnosticChannelEvent& eventInfo); + const Trace& trace, const trace::DiagnosticChannelEvent& eventInfo); double getTimestamp(); kj::StringPtr getChannel(); @@ -520,7 +520,7 @@ class TraceDiagnosticChannelEvent final: public jsg::Object { class TraceLog final: public jsg::Object { public: - TraceLog(jsg::Lock& js, const Trace& trace, const Trace::Log& log); + TraceLog(jsg::Lock& js, const Trace& trace, const trace::Log& log); double getTimestamp(); kj::StringPtr getLevel(); @@ -545,7 +545,7 @@ class TraceLog final: public jsg::Object { class TraceException final: public jsg::Object { public: - TraceException(const Trace& trace, const Trace::Exception& exception); + TraceException(const Trace& trace, const trace::Exception& exception); double getTimestamp(); kj::StringPtr getName(); @@ -623,6 +623,8 @@ class TraceCustomEventImpl final: public WorkerInterface::CustomEvent { return typeId; } + static constexpr uint16_t TYPE = 2; + private: uint16_t typeId; kj::Array> traces; diff --git a/src/workerd/api/worker-rpc.c++ b/src/workerd/api/worker-rpc.c++ index 863e23fca54..45289455bca 100644 --- a/src/workerd/api/worker-rpc.c++ +++ b/src/workerd/api/worker-rpc.c++ @@ -1671,7 +1671,7 @@ private: void addTrace(jsg::Lock& js, IoContext& ioctx, kj::StringPtr methodName) override { KJ_IF_SOME(t, tracer) { - t->setEventInfo(ioctx.now(), Trace::JsRpcEventInfo(kj::str(methodName))); + t->setEventInfo(ioctx.now(), trace::JsRpcEventInfo(kj::str(methodName))); } } }; diff --git a/src/workerd/io/BUILD.bazel b/src/workerd/io/BUILD.bazel index b68d51ebda1..2dd449387bb 100644 --- a/src/workerd/io/BUILD.bazel +++ b/src/workerd/io/BUILD.bazel @@ -144,8 +144,18 @@ wd_cc_library( wd_cc_library( name = "trace", - srcs = ["trace.c++"], - hdrs = ["trace.h"], + srcs = [ + "trace.c++", + "trace-common.c++", + "trace-legacy.c++", + "trace-streaming.c++", + ], + hdrs = [ + "trace.h", + "trace-common.h", + "trace-legacy.h", + "trace-streaming.h", + ], implementation_deps = [ "//src/workerd/util:thread-scopes", ], @@ -155,6 +165,7 @@ wd_cc_library( "//src/workerd/jsg:memory-tracker", "//src/workerd/util", "//src/workerd/util:own-util", + "//src/workerd/util:uuid", "@capnp-cpp//src/capnp:capnp-rpc", "@capnp-cpp//src/capnp:capnpc", "@capnp-cpp//src/kj:kj-async", @@ -324,3 +335,21 @@ kj_test( "@capnp-cpp//src/capnp:capnpc", ], ) + +kj_test( + src = "trace-streaming-test.c++", + deps = [ + ":trace", + "@capnp-cpp//src/capnp:capnpc", + "@capnp-cpp//src/kj/compat:kj-http", + ], +) + +kj_test( + src = "trace-common-test.c++", + deps = [ + ":trace", + "@capnp-cpp//src/capnp:capnpc", + "@capnp-cpp//src/kj/compat:kj-http", + ], +) diff --git a/src/workerd/io/trace-common-test.c++ b/src/workerd/io/trace-common-test.c++ new file mode 100644 index 00000000000..895b3bc38b1 --- /dev/null +++ b/src/workerd/io/trace-common-test.c++ @@ -0,0 +1,517 @@ +#include "trace-common.h" + +#include +#include +#include + +namespace workerd { +namespace { + +KJ_TEST("Tags work") { + { + trace::Tag tag(kj::str("a"), static_cast(1)); + auto& key = KJ_ASSERT_NONNULL(tag.key.tryGet()); + auto& value = KJ_ASSERT_NONNULL(tag.value.tryGet()); + KJ_EXPECT(key == "a"); + KJ_EXPECT(value == static_cast(1)); + KJ_EXPECT(tag.keyMatches("a"_kj)); + + capnp::MallocMessageBuilder message; + auto builder = message.initRoot(); + tag.copyTo(builder); + + // Round trip serialization works + auto reader = builder.asReader(); + trace::Tag tag2(reader); + auto& key2 = KJ_ASSERT_NONNULL(tag.key.tryGet()); + auto& value2 = KJ_ASSERT_NONNULL(tag.value.tryGet()); + KJ_EXPECT(key == key2); + KJ_EXPECT(value == value2); + + auto tag3 = tag.clone(); + KJ_EXPECT(tag3.keyMatches("a"_kj)); + } + + { + // The key can be a uint32_t + uint32_t a = 1; + trace::Tag tag(a, 2.0); + auto key = KJ_ASSERT_NONNULL(tag.key.tryGet()); + KJ_EXPECT(key == a); + KJ_EXPECT(tag.keyMatches(a)); + } +} + +KJ_TEST("Onset works") { + auto tags = kj::arr(trace::Tag(kj::str("a"), static_cast(1))); + trace::Onset onset(kj::str("bar"), kj::none, kj::str("baz"), kj::str("qux"), + kj::arr(kj::str("quux")), kj::str("corge"), ExecutionModel::STATELESS, kj::mv(tags)); + + trace::FetchEventInfo info(kj::HttpMethod::GET, kj::str("http://example.org"), kj::String(), + kj::arr(trace::FetchEventInfo::Header(kj::str("a"), kj::str("b")))); + onset.info = kj::mv(info); + + capnp::MallocMessageBuilder message; + auto builder = message.initRoot(); + onset.copyTo(builder); + + auto reader = builder.asReader(); + trace::Onset onset2(reader); + KJ_EXPECT(KJ_ASSERT_NONNULL(onset2.scriptName) == "bar"_kj); + KJ_EXPECT(KJ_ASSERT_NONNULL(onset2.dispatchNamespace) == "baz"_kj); + KJ_EXPECT(KJ_ASSERT_NONNULL(onset2.scriptId) == "qux"_kj); + KJ_EXPECT(onset2.scriptTags.size() == 1); + KJ_EXPECT(onset2.scriptTags[0] == "quux"_kj); + KJ_EXPECT(KJ_ASSERT_NONNULL(onset2.entrypoint) == "corge"_kj); + KJ_EXPECT(onset2.tags.size() == 1); + KJ_EXPECT(onset2.tags[0].keyMatches("a"_kj)); + + auto& onset2Info = KJ_ASSERT_NONNULL(onset2.info); + auto& onset2Fetch = KJ_ASSERT_NONNULL(onset2Info.tryGet()); + KJ_EXPECT(onset2Fetch.method == kj::HttpMethod::GET); + KJ_EXPECT(onset2Fetch.url == "http://example.org"_kj); + KJ_EXPECT(onset2Fetch.cfJson == ""); + KJ_EXPECT(onset2Fetch.headers.size() == 1); + KJ_EXPECT(onset2Fetch.headers[0].name == "a"_kj); + KJ_EXPECT(onset2Fetch.headers[0].value == "b"_kj); + + auto onset3 = onset.clone(); + KJ_EXPECT(KJ_ASSERT_NONNULL(onset3.scriptName) == "bar"_kj); + KJ_EXPECT(KJ_ASSERT_NONNULL(onset3.dispatchNamespace) == "baz"_kj); + KJ_EXPECT(KJ_ASSERT_NONNULL(onset3.scriptId) == "qux"_kj); + KJ_EXPECT(onset3.scriptTags.size() == 1); + KJ_EXPECT(onset3.scriptTags[0] == "quux"_kj); + KJ_EXPECT(KJ_ASSERT_NONNULL(onset3.entrypoint) == "corge"_kj); + KJ_EXPECT(onset3.tags.size() == 1); + KJ_EXPECT(onset3.tags[0].keyMatches("a"_kj)); + + auto& onset3Info = KJ_ASSERT_NONNULL(onset3.info); + auto& onset3Fetch = KJ_ASSERT_NONNULL(onset3Info.tryGet()); + KJ_EXPECT(onset3Fetch.method == kj::HttpMethod::GET); + KJ_EXPECT(onset3Fetch.url == "http://example.org"_kj); + KJ_EXPECT(onset3Fetch.cfJson == ""); + KJ_EXPECT(onset3Fetch.headers.size() == 1); + KJ_EXPECT(onset3Fetch.headers[0].name == "a"_kj); + KJ_EXPECT(onset3Fetch.headers[0].value == "b"_kj); +} + +KJ_TEST("FetchEventInfo works") { + trace::FetchEventInfo info(kj::HttpMethod::GET, kj::str("http://example.org"), kj::String(), + kj::arr(trace::FetchEventInfo::Header(kj::str("a"), kj::str("b")))); + KJ_EXPECT(info.method == kj::HttpMethod::GET); + KJ_EXPECT(info.url == "http://example.org"_kj); + KJ_EXPECT(info.cfJson == ""); + KJ_EXPECT(info.headers.size() == 1); + KJ_EXPECT(info.headers[0].name == "a"_kj); + KJ_EXPECT(info.headers[0].value == "b"_kj); + + capnp::MallocMessageBuilder message; + auto builder = message.initRoot(); + info.copyTo(builder); + + auto reader = builder.asReader(); + trace::FetchEventInfo info2(reader); + KJ_EXPECT(info2.method == kj::HttpMethod::GET); + KJ_EXPECT(info2.url == "http://example.org"_kj); + KJ_EXPECT(info2.cfJson == ""); + KJ_EXPECT(info2.headers.size() == 1); + KJ_EXPECT(info2.headers[0].name == "a"_kj); + KJ_EXPECT(info2.headers[0].value == "b"_kj); + + auto info3 = info.clone(); + KJ_EXPECT(info3.method == kj::HttpMethod::GET); + KJ_EXPECT(info3.url == "http://example.org"_kj); + KJ_EXPECT(info3.cfJson == ""); + KJ_EXPECT(info3.headers.size() == 1); +} + +KJ_TEST("FetchResponseInfo works") { + trace::FetchResponseInfo info(200); + KJ_EXPECT(info.statusCode == 200); + + capnp::MallocMessageBuilder message; + auto builder = message.initRoot(); + info.copyTo(builder); + + auto reader = builder.asReader(); + trace::FetchResponseInfo info2(reader); + KJ_EXPECT(info2.statusCode == 200); + + auto info3 = info.clone(); + KJ_EXPECT(info3.statusCode == 200); +} + +KJ_TEST("JsRpcEventInfo works") { + trace::JsRpcEventInfo info(kj::str("foo")); + KJ_EXPECT(info.methodName == "foo"_kj); + + capnp::MallocMessageBuilder message; + auto builder = message.initRoot(); + info.copyTo(builder); + + auto reader = builder.asReader(); + trace::JsRpcEventInfo info2(reader); + KJ_EXPECT(info2.methodName == "foo"_kj); + + auto info3 = info.clone(); + KJ_EXPECT(info3.methodName == "foo"_kj); +} + +KJ_TEST("ScheduledEventInfo works") { + trace::ScheduledEventInfo info(1.0, kj::String()); + KJ_EXPECT(info.scheduledTime == 1.0); + KJ_EXPECT(info.cron == ""_kj); + + capnp::MallocMessageBuilder message; + auto builder = message.initRoot(); + info.copyTo(builder); + + auto reader = builder.asReader(); + trace::ScheduledEventInfo info2(reader); + KJ_EXPECT(info2.scheduledTime == 1.0); + KJ_EXPECT(info2.cron == ""_kj); + + auto info3 = info.clone(); + KJ_EXPECT(info3.scheduledTime == 1.0); + KJ_EXPECT(info3.cron == ""_kj); +} + +KJ_TEST("AlarmEventInfo works") { + trace::AlarmEventInfo info(1 * kj::MILLISECONDS + kj::UNIX_EPOCH); + auto date = info.scheduledTime; + + capnp::MallocMessageBuilder message; + auto builder = message.initRoot(); + info.copyTo(builder); + + auto reader = builder.asReader(); + trace::AlarmEventInfo info2(reader); + KJ_EXPECT(info2.scheduledTime == date); + + auto info3 = info.clone(); + KJ_EXPECT(info3.scheduledTime == date); +} + +KJ_TEST("QueueEventInfo works") { + uint32_t a = 1; + trace::QueueEventInfo info(kj::str("foo"), a); + KJ_EXPECT(info.queueName == "foo"_kj); + KJ_EXPECT(info.batchSize == a); + + capnp::MallocMessageBuilder message; + auto builder = message.initRoot(); + info.copyTo(builder); + + auto reader = builder.asReader(); + trace::QueueEventInfo info2(reader); + KJ_EXPECT(info2.queueName == "foo"_kj); + KJ_EXPECT(info2.batchSize == a); + + auto info3 = info.clone(); + KJ_EXPECT(info3.queueName == "foo"_kj); + KJ_EXPECT(info3.batchSize == a); +} + +KJ_TEST("EmailEventInfo works") { + uint32_t a = 1; + trace::EmailEventInfo info(kj::str("foo"), kj::str("bar"), a); + KJ_EXPECT(info.mailFrom == "foo"_kj); + KJ_EXPECT(info.rcptTo == "bar"_kj); + KJ_EXPECT(info.rawSize == a); + + capnp::MallocMessageBuilder message; + auto builder = message.initRoot(); + info.copyTo(builder); + + auto reader = builder.asReader(); + trace::EmailEventInfo info2(reader); + KJ_EXPECT(info2.mailFrom == "foo"_kj); + KJ_EXPECT(info2.rcptTo == "bar"_kj); + KJ_EXPECT(info2.rawSize == a); + + auto info3 = info.clone(); + KJ_EXPECT(info3.mailFrom == "foo"_kj); + KJ_EXPECT(info3.rcptTo == "bar"_kj); + KJ_EXPECT(info3.rawSize == a); +} + +KJ_TEST("HibernatableWebSocketEventInfo works") { + trace::HibernatableWebSocketEventInfo info(trace::HibernatableWebSocketEventInfo::Message{}); + KJ_EXPECT(info.type.is()); + + capnp::MallocMessageBuilder message; + auto builder = message.initRoot(); + info.copyTo(builder); + + auto reader = builder.asReader(); + trace::HibernatableWebSocketEventInfo info2(reader); + KJ_EXPECT(info2.type.is()); + + auto info3 = info.clone(); + KJ_EXPECT(info3.type.is()); +} + +KJ_TEST("TraceEventInfo works") { + trace::TraceEventInfo info(kj::arr(trace::TraceEventInfo::TraceItem(kj::str("foo")))); + KJ_EXPECT(info.traces.size() == 1); + KJ_EXPECT(KJ_ASSERT_NONNULL(info.traces[0].scriptName) == "foo"_kj); + + capnp::MallocMessageBuilder message; + auto builder = message.initRoot(); + info.copyTo(builder); + + auto reader = builder.asReader(); + trace::TraceEventInfo info2(reader); + KJ_EXPECT(info2.traces.size() == 1); + KJ_EXPECT(KJ_ASSERT_NONNULL(info2.traces[0].scriptName) == "foo"_kj); + + auto info3 = info.clone(); + KJ_EXPECT(info3.traces.size() == 1); + KJ_EXPECT(KJ_ASSERT_NONNULL(info3.traces[0].scriptName) == "foo"_kj); +} + +KJ_TEST("Outcome works") { + trace::FetchResponseInfo info(200); + + trace::Outcome outcome(EventOutcome::OK, kj::Maybe(kj::mv(info))); + KJ_EXPECT(outcome.outcome == EventOutcome::OK); + KJ_EXPECT(KJ_ASSERT_NONNULL(outcome.info).is()); + + capnp::MallocMessageBuilder message; + auto builder = message.initRoot(); + outcome.copyTo(builder); + + auto reader = builder.asReader(); + trace::Outcome outcome2(reader); + KJ_EXPECT(outcome2.outcome == EventOutcome::OK); + KJ_EXPECT(KJ_ASSERT_NONNULL(outcome2.info).is()); + + auto outcome3 = outcome.clone(); + KJ_EXPECT(outcome3.outcome == EventOutcome::OK); + KJ_EXPECT(KJ_ASSERT_NONNULL(outcome3.info).is()); +} + +KJ_TEST("DiagnosticChannelEvent works") { + kj::Date date = 0 * kj::MILLISECONDS + kj::UNIX_EPOCH; + trace::DiagnosticChannelEvent event(date, kj::str("foo"), kj::arr(kj::byte(1))); + KJ_EXPECT(event.timestamp == date); + KJ_EXPECT(event.channel == "foo"_kj); + KJ_EXPECT(event.message.size() == 1); + + capnp::MallocMessageBuilder message; + auto builder = message.initRoot(); + event.copyTo(builder); + + auto reader = builder.asReader(); + trace::DiagnosticChannelEvent event2(reader); + KJ_EXPECT(event2.timestamp == date); + KJ_EXPECT(event2.channel == "foo"_kj); + KJ_EXPECT(event2.message.size() == 1); + + auto event3 = event.clone(); + KJ_EXPECT(event3.timestamp == date); + KJ_EXPECT(event3.channel == "foo"_kj); + KJ_EXPECT(event3.message.size() == 1); +} + +KJ_TEST("Log works") { + kj::Date date = 0 * kj::MILLISECONDS + kj::UNIX_EPOCH; + trace::Log log(date, LogLevel::INFO, kj::str("foo")); + KJ_EXPECT(log.timestamp == date); + KJ_EXPECT(log.logLevel == LogLevel::INFO); + KJ_EXPECT(log.message == "foo"_kj); + + capnp::MallocMessageBuilder message; + auto builder = message.initRoot(); + log.copyTo(builder); + + auto reader = builder.asReader(); + trace::Log log2(reader); + KJ_EXPECT(log2.timestamp == date); + KJ_EXPECT(log2.logLevel == LogLevel::INFO); + KJ_EXPECT(log2.message == "foo"_kj); + + auto log3 = log.clone(); + KJ_EXPECT(log3.timestamp == date); + KJ_EXPECT(log3.logLevel == LogLevel::INFO); + KJ_EXPECT(log3.message == "foo"_kj); +} + +KJ_TEST("LogV2 works") { + kj::Date date = 0 * kj::MILLISECONDS + kj::UNIX_EPOCH; + trace::LogV2 log(date, LogLevel::INFO, kj::heapArray(1)); + KJ_EXPECT(log.timestamp == date); + KJ_EXPECT(log.logLevel == LogLevel::INFO); + KJ_EXPECT(KJ_ASSERT_NONNULL(log.message.tryGet>()).size() == 1); + KJ_EXPECT(!log.truncated); + + capnp::MallocMessageBuilder message; + auto builder = message.initRoot(); + log.copyTo(builder); + + auto reader = builder.asReader(); + trace::LogV2 log2(reader); + KJ_EXPECT(log2.timestamp == date); + KJ_EXPECT(log2.logLevel == LogLevel::INFO); + KJ_EXPECT(KJ_ASSERT_NONNULL(log2.message.tryGet>()).size() == 1); + KJ_EXPECT(!log2.truncated); + + auto log3 = log.clone(); + KJ_EXPECT(log3.timestamp == date); + KJ_EXPECT(log3.logLevel == LogLevel::INFO); + KJ_EXPECT(KJ_ASSERT_NONNULL(log3.message.tryGet>()).size() == 1); + KJ_EXPECT(!log3.truncated); +} + +KJ_TEST("Exception works") { + kj::Date date = 0 * kj::MILLISECONDS + kj::UNIX_EPOCH; + trace::Exception exception(date, kj::str("foo"), kj::str("bar"), kj::none); + KJ_EXPECT(exception.timestamp == date); + KJ_EXPECT(exception.name == "foo"_kj); + KJ_EXPECT(exception.message == "bar"_kj); + KJ_EXPECT(exception.stack == nullptr); + + capnp::MallocMessageBuilder message; + auto builder = message.initRoot(); + exception.copyTo(builder); + + auto reader = builder.asReader(); + trace::Exception exception2(reader); + KJ_EXPECT(exception2.timestamp == date); + KJ_EXPECT(exception2.name == "foo"_kj); + KJ_EXPECT(exception2.message == "bar"_kj); + KJ_EXPECT(exception2.stack == nullptr); + + auto exception3 = exception.clone(); + KJ_EXPECT(exception3.timestamp == date); + KJ_EXPECT(exception3.name == "foo"_kj); + KJ_EXPECT(exception3.message == "bar"_kj); + KJ_EXPECT(exception3.stack == nullptr); +} + +KJ_TEST("Subrequest works") { + uint32_t a = 1; + trace::Subrequest subrequest(a, + trace::Subrequest::Info(trace::FetchEventInfo( + kj::HttpMethod::GET, kj::str("http://example.org"), kj::String(), nullptr))); + KJ_EXPECT(subrequest.id == a); + KJ_EXPECT(KJ_ASSERT_NONNULL(subrequest.info).is()); + + capnp::MallocMessageBuilder message; + auto builder = message.initRoot(); + subrequest.copyTo(builder); + + auto reader = builder.asReader(); + trace::Subrequest subrequest2(reader); + KJ_EXPECT(subrequest2.id == a); + KJ_EXPECT(KJ_ASSERT_NONNULL(subrequest.info).is()); + + auto subrequest3 = subrequest.clone(); + KJ_EXPECT(subrequest3.id == a); + KJ_EXPECT(KJ_ASSERT_NONNULL(subrequest.info).is()); +} + +KJ_TEST("SubrequestOutcome works") { + uint32_t a = 1; + trace::SubrequestOutcome outcome(a, kj::none, trace::SpanClose::Outcome::OK); + KJ_EXPECT(outcome.id == a); + KJ_EXPECT(outcome.outcome == trace::SpanClose::Outcome::OK); + + capnp::MallocMessageBuilder message; + auto builder = message.initRoot(); + outcome.copyTo(builder); + + auto reader = builder.asReader(); + trace::SubrequestOutcome outcome2(reader); + KJ_EXPECT(outcome2.id == a); + KJ_EXPECT(outcome2.outcome == trace::SpanClose::Outcome::OK); + + auto outcome3 = outcome.clone(); + KJ_EXPECT(outcome3.id == a); + KJ_EXPECT(outcome3.outcome == trace::SpanClose::Outcome::OK); +} + +KJ_TEST("SpanClose works") { + trace::SpanClose event(trace::SpanClose::Outcome::OK, nullptr); + KJ_EXPECT(event.outcome == trace::SpanClose::Outcome::OK); + KJ_EXPECT(event.tags.size() == 0); + + capnp::MallocMessageBuilder message; + auto builder = message.initRoot(); + event.copyTo(builder); + + auto reader = builder.asReader(); + trace::SpanClose event2(reader); + KJ_EXPECT(event2.outcome == trace::SpanClose::Outcome::OK); + KJ_EXPECT(event2.tags.size() == 0); + + auto event3 = event.clone(); + KJ_EXPECT(event3.outcome == trace::SpanClose::Outcome::OK); + KJ_EXPECT(event3.tags.size() == 0); +} + +KJ_TEST("Mark works") { + trace::Mark mark(kj::str("foo")); + KJ_EXPECT(mark.name == "foo"_kj); + + capnp::MallocMessageBuilder message; + auto builder = message.initRoot(); + mark.copyTo(builder); + + auto reader = builder.asReader(); + trace::Mark mark2(reader); + KJ_EXPECT(mark2.name == "foo"_kj); + + auto mark3 = mark.clone(); + KJ_EXPECT(mark3.name == "foo"_kj); +} + +KJ_TEST("Metric works") { + trace::Metric metric(trace::Metric::Type::COUNTER, kj::str("foo"), 1.0); + KJ_EXPECT(metric.type == trace::Metric::Type::COUNTER); + KJ_EXPECT(KJ_ASSERT_NONNULL(metric.key.tryGet()) == "foo"_kj); + KJ_EXPECT(metric.value == 1.0); + KJ_EXPECT(metric.keyMatches("foo"_kj)); + + enum class Foo { A }; + KJ_EXPECT(!metric.keyMatches(Foo::A)); + + capnp::MallocMessageBuilder message; + auto builder = message.initRoot(); + metric.copyTo(builder); + + auto reader = builder.asReader(); + trace::Metric metric2(reader); + KJ_EXPECT(metric2.type == trace::Metric::Type::COUNTER); + KJ_EXPECT(KJ_ASSERT_NONNULL(metric2.key.tryGet()) == "foo"_kj); + KJ_EXPECT(metric2.value == 1.0); + + auto metric3 = metric.clone(); + KJ_EXPECT(metric3.type == trace::Metric::Type::COUNTER); + KJ_EXPECT(KJ_ASSERT_NONNULL(metric3.key.tryGet()) == "foo"_kj); + KJ_EXPECT(metric3.value == 1.0); +} + +KJ_TEST("Dropped works") { + uint32_t a = 1; + uint32_t b = 2; + trace::Dropped dropped(a, b); + KJ_EXPECT(dropped.start == a); + KJ_EXPECT(dropped.end == b); + + capnp::MallocMessageBuilder message; + auto builder = message.initRoot(); + dropped.copyTo(builder); + + auto reader = builder.asReader(); + trace::Dropped dropped2(reader); + KJ_EXPECT(dropped2.start == a); + KJ_EXPECT(dropped2.end == b); + + auto dropped3 = dropped.clone(); + KJ_EXPECT(dropped3.start == a); + KJ_EXPECT(dropped3.end == b); +} + +} // namespace +} // namespace workerd diff --git a/src/workerd/io/trace-common.c++ b/src/workerd/io/trace-common.c++ new file mode 100644 index 00000000000..e0c6d9869bb --- /dev/null +++ b/src/workerd/io/trace-common.c++ @@ -0,0 +1,1231 @@ +// Copyright (c) 2017-2022 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 +#include "trace-common.h" + +#include "trace-legacy.h" + +#include +#include +#include + +namespace workerd::trace { + +// ====================================================================================== +// Tag +namespace { +Tags getTagsFromReader(const capnp::List::Reader& tags) { + kj::Vector results; + results.reserve(tags.size()); + for (auto tag: tags) { + results.add(Tag(tag)); + } + return results.releaseAsArray(); +} + +Tag::TagValue getTagValue(const rpc::Trace::Tag::Reader& reader) { + auto value = reader.getValue(); + switch (value.which()) { + case rpc::Trace::Tag::Value::Which::BOOL: { + return value.getBool(); + } + case rpc::Trace::Tag::Value::Which::INT64: { + return value.getInt64(); + } + case rpc::Trace::Tag::Value::Which::UINT64: { + return value.getUint64(); + } + case rpc::Trace::Tag::Value::Which::FLOAT64: { + return value.getFloat64(); + } + case rpc::Trace::Tag::Value::Which::TEXT: { + return kj::str(value.getText()); + } + case rpc::Trace::Tag::Value::Which::DATA: { + return kj::heapArray(value.getData()); + } + } + KJ_UNREACHABLE; +} + +Tag::TagKey getTagKey(const rpc::Trace::Tag::Reader& reader) { + auto key = reader.getKey(); + switch (key.which()) { + case rpc::Trace::Tag::Key::Which::TEXT: { + return kj::str(key.getText()); + } + case rpc::Trace::Tag::Key::Which::ID: { + return key.getId(); + } + } + KJ_UNREACHABLE; +} +Tags maybeGetTags(const auto& reader) { + if (!reader.hasTags()) return nullptr; + return getTagsFromReader(reader.getTags()); +} +} // namespace + +Tag::Tag(TagKey key, TagValue value): key(kj::mv(key)), value(kj::mv(value)) {} + +Tag::Tag(rpc::Trace::Tag::Reader reader): key(getTagKey(reader)), value(getTagValue(reader)) {} + +Tag Tag::clone() const { + TagKey newKey = ([&]() -> TagKey { + KJ_SWITCH_ONEOF(key) { + KJ_CASE_ONEOF(id, uint32_t) { + return id; + } + KJ_CASE_ONEOF(name, kj::String) { + return kj::str(name); + } + } + KJ_UNREACHABLE; + })(); + TagValue newValue = ([&]() -> TagValue { + KJ_SWITCH_ONEOF(value) { + KJ_CASE_ONEOF(b, bool) { + return b; + } + KJ_CASE_ONEOF(i, int64_t) { + return i; + } + KJ_CASE_ONEOF(u, uint64_t) { + return u; + } + KJ_CASE_ONEOF(d, double) { + return d; + } + KJ_CASE_ONEOF(s, kj::String) { + return kj::str(s); + } + KJ_CASE_ONEOF(a, kj::Array) { + return kj::heapArray(a); + } + } + KJ_UNREACHABLE; + })(); + return Tag(kj::mv(newKey), kj::mv(newValue)); +} + +void Tag::copyTo(rpc::Trace::Tag::Builder builder) const { + KJ_SWITCH_ONEOF(key) { + KJ_CASE_ONEOF(id, uint32_t) { + builder.getKey().setId(id); + } + KJ_CASE_ONEOF(name, kj::String) { + builder.getKey().setText(name); + } + } + KJ_SWITCH_ONEOF(value) { + KJ_CASE_ONEOF(b, bool) { + builder.getValue().setBool(b); + } + KJ_CASE_ONEOF(i, int64_t) { + builder.getValue().setInt64(i); + } + KJ_CASE_ONEOF(u, uint64_t) { + builder.getValue().setUint64(u); + } + KJ_CASE_ONEOF(d, double) { + builder.getValue().setFloat64(d); + } + KJ_CASE_ONEOF(s, kj::String) { + builder.getValue().setText(s); + } + KJ_CASE_ONEOF(a, kj::Array) { + builder.getValue().setData(a); + } + } +} + +bool Tag::keyMatches(kj::OneOf check) { + KJ_SWITCH_ONEOF(check) { + KJ_CASE_ONEOF(c, kj::StringPtr) { + KJ_IF_SOME(k, key.tryGet()) { + return k == c; + } + } + KJ_CASE_ONEOF(u, uint32_t) { + KJ_IF_SOME(k, key.tryGet()) { + return k == u; + } + } + } + return false; +} + +// ====================================================================================== +// Onset + +namespace { +kj::Maybe maybeGetScriptName(const rpc::Trace::Onset::Reader& reader) { + if (!reader.hasScriptName()) return kj::none; + return kj::str(reader.getScriptName()); +} +kj::Maybe> maybeGetScriptVersion( + const rpc::Trace::Onset::Reader& reader) { + if (!reader.hasScriptVersion()) return kj::none; + return capnp::clone(reader.getScriptVersion()); +} +kj::Maybe maybeGetDispatchNamespace(const rpc::Trace::Onset::Reader& reader) { + if (!reader.hasDispatchNamespace()) return kj::none; + return kj::str(reader.getDispatchNamespace()); +} +kj::Maybe maybeGetScriptId(const rpc::Trace::Onset::Reader& reader) { + if (!reader.hasScriptId()) return kj::none; + return kj::str(reader.getScriptId()); +} +kj::Array maybeGetScriptTags(const rpc::Trace::Onset::Reader& reader) { + kj::Vector results(reader.getScriptTags().size()); + for (auto tag: reader.getScriptTags()) { + results.add(kj::str(tag)); + } + return results.releaseAsArray(); +} +kj::Maybe maybeGetEntrypoint(const rpc::Trace::Onset::Reader& reader) { + if (!reader.hasEntrypoint()) return kj::none; + return kj::str(reader.getEntrypoint()); +} +kj::Maybe maybeGetEventInfo(const rpc::Trace::Onset::Reader& reader) { + auto info = reader.getInfo(); + switch (info.which()) { + case rpc::Trace::Onset::Info::Which::NONE: + return kj::none; + case rpc::Trace::Onset::Info::Which::FETCH: { + return kj::Maybe(FetchEventInfo(info.getFetch())); + } + case rpc::Trace::Onset::Info::Which::JS_RPC: { + return kj::Maybe(JsRpcEventInfo(info.getJsRpc())); + } + case rpc::Trace::Onset::Info::Which::SCHEDULED: { + return kj::Maybe(ScheduledEventInfo(info.getScheduled())); + } + case rpc::Trace::Onset::Info::Which::ALARM: { + return kj::Maybe(AlarmEventInfo(info.getAlarm())); + } + case rpc::Trace::Onset::Info::Which::QUEUE: { + return kj::Maybe(QueueEventInfo(info.getQueue())); + } + case rpc::Trace::Onset::Info::Which::EMAIL: { + return kj::Maybe(EmailEventInfo(info.getEmail())); + } + case rpc::Trace::Onset::Info::Which::TRACE: { + return kj::Maybe(TraceEventInfo(info.getTrace())); + } + case rpc::Trace::Onset::Info::Which::HIBERNATABLE_WEB_SOCKET: { + return kj::Maybe(HibernatableWebSocketEventInfo(info.getHibernatableWebSocket())); + } + case rpc::Trace::Onset::Info::Which::CUSTOM: { + // TODO(streaming-trace): Implement correctly + return kj::Maybe(CustomEventInfo()); + } + } + KJ_UNREACHABLE; +} + +kj::Maybe cloneEventInfo(const kj::Maybe& other) { + KJ_IF_SOME(e, other) { + KJ_SWITCH_ONEOF(e) { + KJ_CASE_ONEOF(fetch, FetchEventInfo) { + return kj::Maybe(fetch.clone()); + } + KJ_CASE_ONEOF(jsRpc, JsRpcEventInfo) { + return kj::Maybe(jsRpc.clone()); + } + KJ_CASE_ONEOF(scheduled, ScheduledEventInfo) { + return kj::Maybe(scheduled.clone()); + } + KJ_CASE_ONEOF(alarm, AlarmEventInfo) { + return kj::Maybe(alarm.clone()); + } + KJ_CASE_ONEOF(queue, QueueEventInfo) { + return kj::Maybe(queue.clone()); + } + KJ_CASE_ONEOF(email, EmailEventInfo) { + return kj::Maybe(email.clone()); + } + KJ_CASE_ONEOF(trace, TraceEventInfo) { + return kj::Maybe(trace.clone()); + } + KJ_CASE_ONEOF(hibWs, HibernatableWebSocketEventInfo) { + return kj::Maybe(hibWs.clone()); + } + KJ_CASE_ONEOF(custom, CustomEventInfo) { + // TODO(streaming-trace): Implement correctly + return kj::Maybe(CustomEventInfo()); + } + } + } + return kj::none; +} +} // namespace + +Onset::Onset(kj::Maybe scriptName, + kj::Maybe> scriptVersion, + kj::Maybe dispatchNamespace, + kj::Maybe scriptId, + kj::Array scriptTags, + kj::Maybe entrypoint, + ExecutionModel executionModel, + Tags tags) + : scriptName(kj::mv(scriptName)), + scriptVersion(kj::mv(scriptVersion)), + dispatchNamespace(kj::mv(dispatchNamespace)), + scriptId(kj::mv(scriptId)), + scriptTags(kj::mv(scriptTags)), + entrypoint(kj::mv(entrypoint)), + executionModel(executionModel), + tags(kj::mv(tags)) {} + +Onset::Onset(rpc::Trace::Onset::Reader reader) + : scriptName(maybeGetScriptName(reader)), + scriptVersion(maybeGetScriptVersion(reader)), + dispatchNamespace(maybeGetDispatchNamespace(reader)), + scriptId(maybeGetScriptId(reader)), + scriptTags(maybeGetScriptTags(reader)), + entrypoint(maybeGetEntrypoint(reader)), + executionModel(reader.getExecutionModel()), + info(maybeGetEventInfo(reader)), + tags(maybeGetTags(reader)) {} + +void Onset::copyTo(rpc::Trace::Onset::Builder builder) const { + KJ_IF_SOME(name, scriptName) { + builder.setScriptName(name); + } + KJ_IF_SOME(version, scriptVersion) { + builder.setScriptVersion(*version); + } + KJ_IF_SOME(ns, dispatchNamespace) { + builder.setDispatchNamespace(ns); + } + KJ_IF_SOME(id, scriptId) { + builder.setScriptId(id); + } + if (scriptTags.size() > 0) { + auto list = builder.initScriptTags(scriptTags.size()); + for (auto i: kj::indices(scriptTags)) { + list.set(i, scriptTags[i]); + } + } + KJ_IF_SOME(e, entrypoint) { + builder.setEntrypoint(e); + } + builder.setExecutionModel(executionModel); + + KJ_IF_SOME(i, info) { + auto infoBuilder = builder.initInfo(); + KJ_SWITCH_ONEOF(i) { + KJ_CASE_ONEOF(fetch, FetchEventInfo) { + fetch.copyTo(infoBuilder.initFetch()); + } + KJ_CASE_ONEOF(jsRpc, JsRpcEventInfo) { + jsRpc.copyTo(infoBuilder.initJsRpc()); + } + KJ_CASE_ONEOF(scheduled, ScheduledEventInfo) { + scheduled.copyTo(infoBuilder.initScheduled()); + } + KJ_CASE_ONEOF(alarm, AlarmEventInfo) { + alarm.copyTo(infoBuilder.initAlarm()); + } + KJ_CASE_ONEOF(queue, QueueEventInfo) { + queue.copyTo(infoBuilder.initQueue()); + } + KJ_CASE_ONEOF(email, EmailEventInfo) { + email.copyTo(infoBuilder.initEmail()); + } + KJ_CASE_ONEOF(trace, TraceEventInfo) { + trace.copyTo(infoBuilder.initTrace()); + } + KJ_CASE_ONEOF(hibWs, HibernatableWebSocketEventInfo) { + hibWs.copyTo(infoBuilder.initHibernatableWebSocket()); + } + KJ_CASE_ONEOF(custom, CustomEventInfo) { + // TODO(streaming-trace): Implement correctly + } + } + } + + if (tags.size() > 0) { + auto list = builder.initTags(tags.size()); + for (auto i: kj::indices(tags)) { + tags[i].copyTo(list[i]); + } + } +} + +Onset Onset::clone() const { + Onset onset(scriptName.map([](const kj::String& s) { return kj::str(s); }), + scriptVersion.map([](const kj::Own& s) { return capnp::clone(*s); }), + dispatchNamespace.map([](const kj::String& s) { return kj::str(s); }), + scriptId.map([](const kj::String& s) { return kj::str(s); }), + KJ_MAP(tag, scriptTags) { return kj::str(tag); }, + entrypoint.map([](const kj::String& s) { return kj::str(s); }), executionModel, + KJ_MAP(tag, tags) { return tag.clone(); }); + onset.info = cloneEventInfo(info); + return kj::mv(onset); +} + +// ====================================================================================== +// Outcome + +namespace { +kj::Maybe maybeGetInfo(const rpc::Trace::Outcome::Reader& reader) { + // if (!reader.hasInfo()) return kj::none; + auto info = reader.getInfo(); + switch (info.which()) { + case rpc::Trace::Outcome::Info::Which::NONE: { + return kj::none; + } + case rpc::Trace::Outcome::Info::Which::FETCH: { + return kj::Maybe(FetchResponseInfo(info.getFetch())); + } + case rpc::Trace::Outcome::Info::Which::CUSTOM: { + auto custom = info.getCustom(); + kj::Vector tags(custom.size()); + for (size_t n = 0; n < custom.size(); n++) { + tags.add(Tag(custom[n])); + } + return kj::Maybe(tags.releaseAsArray()); + } + } + KJ_UNREACHABLE; +} + +kj::Maybe cloneInfo(const kj::Maybe& other) { + KJ_IF_SOME(o, other) { + KJ_SWITCH_ONEOF(o) { + KJ_CASE_ONEOF(fetch, FetchResponseInfo) { + return kj::Maybe(fetch.clone()); + } + KJ_CASE_ONEOF(tags, Tags) { + kj::Vector newTags(tags.size()); + for (auto& tag: tags) { + newTags.add(tag.clone()); + } + return kj::Maybe(newTags.releaseAsArray()); + } + } + } + return kj::none; +} +} // namespace + +Outcome::Outcome(EventOutcome outcome, kj::Maybe info) + : outcome(outcome), + info(kj::mv(info)) {} + +Outcome::Outcome(rpc::Trace::Outcome::Reader reader) + : outcome(reader.getOutcome()), + info(maybeGetInfo(reader)) {} + +void Outcome::copyTo(rpc::Trace::Outcome::Builder builder) const { + builder.setOutcome(outcome); + KJ_IF_SOME(i, info) { + auto infoBuilder = builder.getInfo(); + KJ_SWITCH_ONEOF(i) { + KJ_CASE_ONEOF(fetch, FetchResponseInfo) { + fetch.copyTo(infoBuilder.initFetch()); + } + KJ_CASE_ONEOF(tags, Tags) { + auto list = infoBuilder.initCustom(tags.size()); + for (auto i: kj::indices(tags)) { + tags[i].copyTo(list[i]); + } + } + } + } +} + +Outcome Outcome::clone() const { + return Outcome{outcome, cloneInfo(info)}; +} + +// ====================================================================================== +// FetchEventInfo + +namespace { +kj::HttpMethod validateMethod(capnp::HttpMethod method) { + KJ_REQUIRE(method <= capnp::HttpMethod::UNSUBSCRIBE, "unknown method", method); + return static_cast(method); +} +} // namespace + +FetchEventInfo::FetchEventInfo( + kj::HttpMethod method, kj::String url, kj::String cfJson, kj::Array
headers) + : method(method), + url(kj::mv(url)), + cfJson(kj::mv(cfJson)), + headers(kj::mv(headers)) {} + +FetchEventInfo::FetchEventInfo(rpc::Trace::FetchEventInfo::Reader reader) + : method(validateMethod(reader.getMethod())), + url(kj::str(reader.getUrl())), + cfJson(kj::str(reader.getCfJson())) { + kj::Vector
v; + v.addAll(reader.getHeaders()); + headers = v.releaseAsArray(); +} + +void FetchEventInfo::copyTo(rpc::Trace::FetchEventInfo::Builder builder) const { + builder.setMethod(static_cast(method)); + builder.setUrl(url); + builder.setCfJson(cfJson); + + auto list = builder.initHeaders(headers.size()); + for (auto i: kj::indices(headers)) { + headers[i].copyTo(list[i]); + } +} + +FetchEventInfo::Header::Header(kj::String name, kj::String value) + : name(kj::mv(name)), + value(kj::mv(value)) {} + +FetchEventInfo::Header::Header(rpc::Trace::FetchEventInfo::Header::Reader reader) + : name(kj::str(reader.getName())), + value(kj::str(reader.getValue())) {} + +void FetchEventInfo::Header::copyTo(rpc::Trace::FetchEventInfo::Header::Builder builder) const { + builder.setName(name); + builder.setValue(value); +} + +FetchEventInfo FetchEventInfo::clone() const { + kj::Vector
newHeaders(headers.size()); + for (auto& header: headers) { + newHeaders.add(Header(kj::str(header.name), kj::str(header.value))); + } + + return FetchEventInfo(method, kj::str(url), kj::str(cfJson), newHeaders.releaseAsArray()); +} + +// ====================================================================================== +// FetchResponseInfo +FetchResponseInfo::FetchResponseInfo(uint16_t statusCode): statusCode(statusCode) {} + +FetchResponseInfo::FetchResponseInfo(rpc::Trace::FetchResponseInfo::Reader reader) + : statusCode(reader.getStatusCode()) {} + +void FetchResponseInfo::copyTo(rpc::Trace::FetchResponseInfo::Builder builder) const { + builder.setStatusCode(statusCode); +} + +FetchResponseInfo FetchResponseInfo::clone() const { + return FetchResponseInfo(statusCode); +} + +// ====================================================================================== +// JsRpcEventInfo + +JsRpcEventInfo::JsRpcEventInfo(kj::String methodName): methodName(kj::mv(methodName)) {} + +JsRpcEventInfo::JsRpcEventInfo(rpc::Trace::JsRpcEventInfo::Reader reader) + : methodName(kj::str(reader.getMethodName())) {} + +void JsRpcEventInfo::copyTo(rpc::Trace::JsRpcEventInfo::Builder builder) const { + builder.setMethodName(methodName); +} + +JsRpcEventInfo JsRpcEventInfo::clone() const { + return JsRpcEventInfo(kj::str(methodName)); +} + +// ====================================================================================== +// ScheduledEventInfo + +ScheduledEventInfo::ScheduledEventInfo(double scheduledTime, kj::String cron) + : scheduledTime(scheduledTime), + cron(kj::mv(cron)) {} + +ScheduledEventInfo::ScheduledEventInfo(rpc::Trace::ScheduledEventInfo::Reader reader) + : scheduledTime(reader.getScheduledTime()), + cron(kj::str(reader.getCron())) {} + +void ScheduledEventInfo::copyTo(rpc::Trace::ScheduledEventInfo::Builder builder) const { + builder.setScheduledTime(scheduledTime); + builder.setCron(cron); +} + +ScheduledEventInfo ScheduledEventInfo::clone() const { + return ScheduledEventInfo(scheduledTime, kj::str(cron)); +} + +// ====================================================================================== +// AlarmEventInfo + +AlarmEventInfo::AlarmEventInfo(kj::Date scheduledTime): scheduledTime(scheduledTime) {} + +AlarmEventInfo::AlarmEventInfo(rpc::Trace::AlarmEventInfo::Reader reader) + : scheduledTime(reader.getScheduledTimeMs() * kj::MILLISECONDS + kj::UNIX_EPOCH) {} + +void AlarmEventInfo::copyTo(rpc::Trace::AlarmEventInfo::Builder builder) const { + builder.setScheduledTimeMs((scheduledTime - kj::UNIX_EPOCH) / kj::MILLISECONDS); +} + +AlarmEventInfo AlarmEventInfo::clone() const { + return AlarmEventInfo(scheduledTime); +} + +// ====================================================================================== +// QueueEventInfo + +QueueEventInfo::QueueEventInfo(kj::String queueName, uint32_t batchSize) + : queueName(kj::mv(queueName)), + batchSize(batchSize) {} + +QueueEventInfo::QueueEventInfo(rpc::Trace::QueueEventInfo::Reader reader) + : queueName(kj::heapString(reader.getQueueName())), + batchSize(reader.getBatchSize()) {} + +void QueueEventInfo::copyTo(rpc::Trace::QueueEventInfo::Builder builder) const { + builder.setQueueName(queueName); + builder.setBatchSize(batchSize); +} + +QueueEventInfo QueueEventInfo::clone() const { + return QueueEventInfo(kj::str(queueName), batchSize); +} + +// ====================================================================================== +// EmailEventInfo + +EmailEventInfo::EmailEventInfo(kj::String mailFrom, kj::String rcptTo, uint32_t rawSize) + : mailFrom(kj::mv(mailFrom)), + rcptTo(kj::mv(rcptTo)), + rawSize(rawSize) {} + +EmailEventInfo::EmailEventInfo(rpc::Trace::EmailEventInfo::Reader reader) + : mailFrom(kj::heapString(reader.getMailFrom())), + rcptTo(kj::heapString(reader.getRcptTo())), + rawSize(reader.getRawSize()) {} + +void EmailEventInfo::copyTo(rpc::Trace::EmailEventInfo::Builder builder) const { + builder.setMailFrom(mailFrom); + builder.setRcptTo(rcptTo); + builder.setRawSize(rawSize); +} + +EmailEventInfo EmailEventInfo::clone() const { + return EmailEventInfo(kj::str(mailFrom), kj::str(rcptTo), rawSize); +} + +// ====================================================================================== +// HibernatableWebSocketEventInfo + +HibernatableWebSocketEventInfo::HibernatableWebSocketEventInfo(Type type): type(type) {} + +HibernatableWebSocketEventInfo::HibernatableWebSocketEventInfo( + rpc::Trace::HibernatableWebSocketEventInfo::Reader reader) + : type(readFrom(reader)) {} + +void HibernatableWebSocketEventInfo::copyTo( + rpc::Trace::HibernatableWebSocketEventInfo::Builder builder) const { + auto typeBuilder = builder.initType(); + KJ_SWITCH_ONEOF(type) { + KJ_CASE_ONEOF(_, Message) { + typeBuilder.setMessage(); + } + KJ_CASE_ONEOF(close, Close) { + auto closeBuilder = typeBuilder.initClose(); + closeBuilder.setCode(close.code); + closeBuilder.setWasClean(close.wasClean); + } + KJ_CASE_ONEOF(_, Error) { + typeBuilder.setError(); + } + } +} + +HibernatableWebSocketEventInfo::Type HibernatableWebSocketEventInfo::readFrom( + rpc::Trace::HibernatableWebSocketEventInfo::Reader reader) { + auto type = reader.getType(); + switch (type.which()) { + case rpc::Trace::HibernatableWebSocketEventInfo::Type::MESSAGE: { + return Message{}; + } + case rpc::Trace::HibernatableWebSocketEventInfo::Type::CLOSE: { + auto close = type.getClose(); + return Close{ + .code = close.getCode(), + .wasClean = close.getWasClean(), + }; + } + case rpc::Trace::HibernatableWebSocketEventInfo::Type::ERROR: { + return Error{}; + } + } +} + +HibernatableWebSocketEventInfo HibernatableWebSocketEventInfo::clone() const { + auto newType = ([this]() -> Type { + KJ_SWITCH_ONEOF(type) { + KJ_CASE_ONEOF(_, Message) { + return Message{}; + } + KJ_CASE_ONEOF(close, Close) { + return Close{close.code, close.wasClean}; + } + KJ_CASE_ONEOF(_, Error) { + return Error{}; + } + } + KJ_UNREACHABLE; + })(); + return HibernatableWebSocketEventInfo(kj::mv(newType)); +} + +// ====================================================================================== +// TraceEventInfo + +namespace { +kj::Vector getTraceItemsFromTraces(kj::ArrayPtr> traces) { + kj::Vector results(traces.size()); + for (size_t n = 0; n < traces.size(); n++) { + KJ_IF_SOME(name, traces[n]->onsetInfo.scriptName) { + TraceEventInfo::TraceItem item(kj::str(name)); + results.add(kj::mv(item)); + } else { + TraceEventInfo::TraceItem item(kj::str(""_kj)); + results.add(kj::mv(item)); + } + } + return results.releaseAsArray(); +} + +kj::Vector getTraceItemsFromReader( + rpc::Trace::TraceEventInfo::Reader reader) { + auto traces = reader.getTraces(); + kj::Vector results; + results.addAll(traces); + return results.releaseAsArray(); +} +} // namespace + +TraceEventInfo::TraceEventInfo(kj::ArrayPtr> traces) + : traces(getTraceItemsFromTraces(traces)) {} + +TraceEventInfo::TraceEventInfo(kj::Array traces): traces(kj::mv(traces)) {} + +TraceEventInfo::TraceEventInfo(rpc::Trace::TraceEventInfo::Reader reader) + : traces(getTraceItemsFromReader(reader)) {} + +void TraceEventInfo::copyTo(rpc::Trace::TraceEventInfo::Builder builder) const { + auto list = builder.initTraces(traces.size()); + for (auto i: kj::indices(traces)) { + traces[i].copyTo(list[i]); + } +} + +TraceEventInfo::TraceItem::TraceItem(kj::Maybe scriptName) + : scriptName(kj::mv(scriptName)) {} + +TraceEventInfo::TraceItem::TraceItem(rpc::Trace::TraceEventInfo::TraceItem::Reader reader) + : scriptName(kj::str(reader.getScriptName())) {} + +void TraceEventInfo::TraceItem::copyTo( + rpc::Trace::TraceEventInfo::TraceItem::Builder builder) const { + KJ_IF_SOME(name, scriptName) { + builder.setScriptName(name); + } +} + +TraceEventInfo::TraceItem TraceEventInfo::TraceItem::clone() const { + return TraceEventInfo::TraceItem(scriptName.map([](const kj::String& s) { return kj::str(s); })); +} + +TraceEventInfo TraceEventInfo::clone() const { + kj::Vector newTraces(traces.size()); + for (auto& trace: traces) { + newTraces.add(trace.clone()); + } + return TraceEventInfo(newTraces.releaseAsArray()); +} + +// ====================================================================================== +// DiagnosticChannelEvent + +DiagnosticChannelEvent::DiagnosticChannelEvent( + kj::Date timestamp, kj::String channel, kj::Array message) + : timestamp(timestamp), + channel(kj::mv(channel)), + message(kj::mv(message)) {} + +DiagnosticChannelEvent::DiagnosticChannelEvent(rpc::Trace::DiagnosticChannelEvent::Reader reader) + : timestamp(kj::UNIX_EPOCH + reader.getTimestampNs() * kj::NANOSECONDS), + channel(kj::heapString(reader.getChannel())), + message(kj::heapArray(reader.getMessage())) {} + +void DiagnosticChannelEvent::copyTo(rpc::Trace::DiagnosticChannelEvent::Builder builder) const { + builder.setTimestampNs((timestamp - kj::UNIX_EPOCH) / kj::NANOSECONDS); + builder.setChannel(channel); + builder.setMessage(message); +} + +DiagnosticChannelEvent DiagnosticChannelEvent::clone() const { + return DiagnosticChannelEvent(timestamp, kj::str(channel), kj::heapArray(message)); +} + +// ====================================================================================== +// Log + +namespace { +kj::OneOf, kj::String> getMessageForLog( + const rpc::Trace::LogV2::Reader& reader) { + auto message = reader.getMessage(); + switch (message.which()) { + case rpc::Trace::LogV2::Message::Which::TEXT: { + return kj::str(message.getText()); + } + case rpc::Trace::LogV2::Message::Which::DATA: { + return kj::heapArray(message.getData()); + } + } + KJ_UNREACHABLE; +} +} // namespace + +Log::Log(kj::Date timestamp, LogLevel logLevel, kj::String message) + : timestamp(timestamp), + logLevel(logLevel), + message(kj::mv(message)) {} + +Log::Log(rpc::Trace::Log::Reader reader) + : timestamp(kj::UNIX_EPOCH + reader.getTimestampNs() * kj::NANOSECONDS), + logLevel(reader.getLogLevel()), + message(kj::str(reader.getMessage())) {} + +void Log::copyTo(rpc::Trace::Log::Builder builder) const { + builder.setTimestampNs((timestamp - kj::UNIX_EPOCH) / kj::NANOSECONDS); + builder.setLogLevel(logLevel); + builder.setMessage(message); +} + +Log Log::clone() const { + return Log(timestamp, logLevel, kj::str(message)); +} + +LogV2::LogV2(kj::Date timestamp, + LogLevel logLevel, + kj::OneOf, kj::String> message, + Tags tags, + bool truncated) + : timestamp(timestamp), + logLevel(logLevel), + message(kj::mv(message)), + tags(kj::mv(tags)), + truncated(truncated) {} + +LogV2::LogV2(rpc::Trace::LogV2::Reader reader) + : timestamp(kj::UNIX_EPOCH + reader.getTimestampNs() * kj::NANOSECONDS), + logLevel(reader.getLogLevel()), + message(getMessageForLog(reader)), + truncated(reader.getTruncated()) {} + +void LogV2::copyTo(rpc::Trace::LogV2::Builder builder) const { + builder.setTimestampNs((timestamp - kj::UNIX_EPOCH) / kj::NANOSECONDS); + builder.setLogLevel(logLevel); + KJ_SWITCH_ONEOF(message) { + KJ_CASE_ONEOF(str, kj::String) { + builder.initMessage().setText(str); + } + KJ_CASE_ONEOF(data, kj::Array) { + builder.initMessage().setData(data); + } + } + builder.setTruncated(truncated); + auto outTags = builder.initTags(tags.size()); + for (size_t n = 0; n < tags.size(); n++) { + tags[n].copyTo(outTags[n]); + } +} + +LogV2 LogV2::clone() const { + kj::Vector newTags(tags.size()); + for (auto& tag: tags) { + newTags.add(tag.clone()); + } + auto newMessage = ([&]() -> kj::OneOf, kj::String> { + KJ_SWITCH_ONEOF(message) { + KJ_CASE_ONEOF(str, kj::String) { + return kj::str(str); + } + KJ_CASE_ONEOF(data, kj::Array) { + return kj::heapArray(data); + } + } + KJ_UNREACHABLE; + })(); + return LogV2(timestamp, logLevel, kj::mv(newMessage), newTags.releaseAsArray(), truncated); +} + +// ====================================================================================== +// Exception +namespace { +kj::Maybe maybeGetStack(const rpc::Trace::Exception::Reader& reader) { + if (!reader.hasStack()) return kj::none; + return kj::str(reader.getStack()); +} +Exception::Detail getDetail(const rpc::Trace::Exception::Reader& reader) { + auto detailReader = reader.getDetail(); + Exception::Detail detail; + if (detailReader.hasCause()) { + detail.cause = kj::heap(Exception(detailReader.getCause())); + } + if (detailReader.hasErrors()) { + kj::Vector> errors(detailReader.getErrors().size()); + for (auto error: detailReader.getErrors()) { + errors.add(kj::heap(Exception(error))); + } + detail.errors = errors.releaseAsArray(); + } + if (detailReader.hasTags()) { + detail.tags = getTagsFromReader(detailReader.getTags()); + } + detail.retryable = detailReader.getRetryable(); + detail.remote = detailReader.getRemote(); + detail.overloaded = detailReader.getOverloaded(); + detail.durableObjectReset = detailReader.getDurableObjectReset(); + return kj::mv(detail); +} +} // namespace + +Exception::Detail Exception::Detail::clone() const { + auto newCause = ([&]() -> kj::Maybe> { + KJ_IF_SOME(exception, cause) { + return kj::Maybe(kj::heap(exception->clone())); + } + return kj::none; + })(); + auto newErrors = ([&]() -> kj::Array> { + kj::Vector> results(errors.size()); + for (auto& error: errors) { + results.add(kj::heap(error->clone())); + } + return results.releaseAsArray(); + })(); + auto newTags = ([&]() -> Tags { + kj::Vector results(tags.size()); + for (auto& tag: tags) { + results.add(tag.clone()); + } + return results.releaseAsArray(); + })(); + return Detail{ + .cause = kj::mv(newCause), + .errors = kj::mv(newErrors), + .remote = remote, + .retryable = retryable, + .overloaded = overloaded, + .durableObjectReset = durableObjectReset, + .tags = kj::mv(newTags), + }; +} + +Exception::Exception(kj::Date timestamp, + kj::String name, + kj::String message, + kj::Maybe stack, + kj::Maybe detail) + : timestamp(timestamp), + name(kj::mv(name)), + message(kj::mv(message)), + stack(kj::mv(stack)), + detail(kj::mv(detail).orDefault({})) {} + +Exception::Exception(rpc::Trace::Exception::Reader reader) + : timestamp(kj::UNIX_EPOCH + reader.getTimestampNs() * kj::NANOSECONDS), + name(kj::str(reader.getName())), + message(kj::str(reader.getMessage())), + stack(maybeGetStack(reader)), + detail(getDetail(reader)) {} + +void Exception::copyTo(rpc::Trace::Exception::Builder builder) const { + builder.setTimestampNs((timestamp - kj::UNIX_EPOCH) / kj::NANOSECONDS); + builder.setName(name); + builder.setMessage(message); + KJ_IF_SOME(s, stack) { + builder.setStack(s); + } + + auto detailBuilder = builder.initDetail(); + KJ_IF_SOME(cause, detail.cause) { + cause->copyTo(detailBuilder.initCause()); + } + if (detail.errors.size() > 0) { + auto errorsBuilder = detailBuilder.initErrors(detail.errors.size()); + for (size_t n = 0; n < detail.errors.size(); n++) { + detail.errors[n]->copyTo(errorsBuilder[n]); + } + } + detailBuilder.setRemote(detail.remote); + detailBuilder.setRetryable(detail.retryable); + detailBuilder.setOverloaded(detail.overloaded); + detailBuilder.setDurableObjectReset(detail.durableObjectReset); + if (detail.tags.size() > 0) { + auto tagsBuilder = detailBuilder.initTags(detail.tags.size()); + for (size_t n = 0; n < detail.tags.size(); n++) { + detail.tags[n].copyTo(tagsBuilder[n]); + } + } +} + +Exception Exception::clone() const { + return Exception(timestamp, kj::str(name), kj::str(message), + stack.map([](const kj::String& s) { return kj::str(s); }), detail.clone()); +} + +// ====================================================================================== +// Subrequest +namespace { +kj::Maybe maybeGetSubrequestInfo(const rpc::Trace::Subrequest::Reader& reader) { + auto info = reader.getInfo(); + switch (info.which()) { + case rpc::Trace::Subrequest::Info::Which::NONE: { + return kj::none; + } + case rpc::Trace::Subrequest::Info::Which::FETCH: { + return kj::Maybe(FetchEventInfo(info.getFetch())); + } + case rpc::Trace::Subrequest::Info::Which::JS_RPC: { + return kj::Maybe(JsRpcEventInfo(info.getJsRpc())); + } + } + KJ_UNREACHABLE; +} +} // namespace + +Subrequest::Subrequest(uint32_t id, kj::Maybe info): id(id), info(kj::mv(info)) {} + +Subrequest::Subrequest(rpc::Trace::Subrequest::Reader reader) + : id(reader.getId()), + info(maybeGetSubrequestInfo(reader)) {} + +void Subrequest::copyTo(rpc::Trace::Subrequest::Builder builder) const { + builder.setId(id); + KJ_IF_SOME(i, info) { + KJ_SWITCH_ONEOF(i) { + KJ_CASE_ONEOF(fetch, FetchEventInfo) { + fetch.copyTo(builder.getInfo().initFetch()); + } + KJ_CASE_ONEOF(jsRpc, JsRpcEventInfo) { + builder.getInfo().which(); + jsRpc.copyTo(builder.getInfo().initJsRpc()); + } + } + } +} + +Subrequest Subrequest::clone() const { + auto newInfo = ([&]() -> kj::Maybe { + KJ_IF_SOME(i, info) { + KJ_SWITCH_ONEOF(i) { + KJ_CASE_ONEOF(fetch, FetchEventInfo) { + return kj::Maybe(fetch.clone()); + } + KJ_CASE_ONEOF(jsRpc, JsRpcEventInfo) { + return kj::Maybe(jsRpc.clone()); + } + } + KJ_UNREACHABLE; + } + return kj::none; + })(); + return Subrequest(id, kj::mv(newInfo)); +} + +// ====================================================================================== +// SubrequestOutcome +namespace { +kj::Maybe maybeGetSubrequestOutcome( + const rpc::Trace::SubrequestOutcome::Reader& reader) { + auto info = reader.getInfo(); + switch (info.which()) { + case rpc::Trace::SubrequestOutcome::Info::Which::NONE: { + return kj::none; + } + case rpc::Trace::SubrequestOutcome::Info::Which::FETCH: { + return kj::Maybe(FetchResponseInfo(info.getFetch())); + } + case rpc::Trace::SubrequestOutcome::Info::Which::CUSTOM: { + return kj::Maybe(getTagsFromReader(info.getCustom())); + } + } + KJ_UNREACHABLE; +} +} // namespace +SubrequestOutcome::SubrequestOutcome(uint32_t id, kj::Maybe info, SpanClose::Outcome outcome) + : id(id), + info(kj::mv(info)), + outcome(outcome) {} + +SubrequestOutcome::SubrequestOutcome(rpc::Trace::SubrequestOutcome::Reader reader) + : id(reader.getId()), + info(maybeGetSubrequestOutcome(reader)), + outcome(reader.getOutcome()) {} + +void SubrequestOutcome::copyTo(rpc::Trace::SubrequestOutcome::Builder builder) const { + builder.setId(id); + builder.setOutcome(outcome); + KJ_IF_SOME(i, info) { + KJ_SWITCH_ONEOF(i) { + KJ_CASE_ONEOF(fetch, FetchResponseInfo) { + fetch.copyTo(builder.getInfo().initFetch()); + } + KJ_CASE_ONEOF(tags, Tags) { + auto custom = builder.getInfo().initCustom(tags.size()); + for (size_t n = 0; n < tags.size(); n++) { + tags[n].copyTo(custom[n]); + } + } + } + } +} + +SubrequestOutcome SubrequestOutcome::clone() const { + auto newInfo = ([&]() -> kj::Maybe { + KJ_IF_SOME(i, info) { + KJ_SWITCH_ONEOF(i) { + KJ_CASE_ONEOF(fetch, FetchResponseInfo) { + return kj::Maybe(fetch.clone()); + } + KJ_CASE_ONEOF(tags, Tags) { + kj::Vector newTags(tags.size()); + for (auto& tag: tags) { + newTags.add(tag.clone()); + } + return kj::Maybe(newTags.releaseAsArray()); + } + } + } + return kj::none; + })(); + return SubrequestOutcome(id, kj::mv(newInfo), outcome); +} + +// ====================================================================================== +// SpanClose + +SpanClose::SpanClose(Outcome outcome, Tags tags): outcome(outcome), tags(kj::mv(tags)) {} + +SpanClose::SpanClose(rpc::Trace::SpanClose::Reader reader) + : outcome(reader.getOutcome()), + tags(maybeGetTags(reader)) {} + +void SpanClose::copyTo(rpc::Trace::SpanClose::Builder builder) const { + builder.setOutcome(outcome); + auto outTags = builder.initTags(tags.size()); + for (size_t n = 0; n < tags.size(); n++) { + tags[n].copyTo(outTags[n]); + } +} + +SpanClose SpanClose::clone() const { + return SpanClose(outcome, KJ_MAP(tag, tags) { return tag.clone(); }); +} + +// ====================================================================================== +// Mark + +Mark::Mark(kj::String name): name(kj::mv(name)) {} + +Mark::Mark(rpc::Trace::Mark::Reader reader): name(kj::str(reader.getName())) {} + +void Mark::copyTo(rpc::Trace::Mark::Builder builder) const { + builder.setName(name); +} + +Mark Mark::clone() const { + return Mark(kj::str(name)); +} + +// ====================================================================================== +// Metric +namespace { +Metric::Key getMetricKey(const rpc::Trace::Metric::Reader& reader) { + auto key = reader.getKey(); + switch (key.which()) { + case rpc::Trace::Metric::Key::Which::TEXT: { + return kj::str(key.getText()); + } + case rpc::Trace::Metric::Key::Which::ID: { + return key.getId(); + } + } + KJ_UNREACHABLE; +} +} // namespace + +Metric::Metric(Type type, Key key, double value) + : type(type), + key(kj::mv(key)), + value(kj::mv(value)) {} + +Metric::Metric(rpc::Trace::Metric::Reader reader) + : type(reader.getType()), + key(getMetricKey(reader)), + value(reader.getValue()) {} + +void Metric::copyTo(rpc::Trace::Metric::Builder builder) const { + builder.setType(type); + KJ_SWITCH_ONEOF(key) { + KJ_CASE_ONEOF(str, kj::String) { + builder.getKey().setText(str); + } + KJ_CASE_ONEOF(id, uint32_t) { + builder.getKey().setId(id); + } + } + builder.setValue(value); +} + +Metric Metric::clone() const { + auto newKey = ([&]() -> Key { + KJ_SWITCH_ONEOF(key) { + KJ_CASE_ONEOF(str, kj::String) { + return kj::str(str); + } + KJ_CASE_ONEOF(id, uint32_t) { + return id; + } + } + KJ_UNREACHABLE; + })(); + return Metric(type, kj::mv(newKey), value); +} + +bool Metric::keyMatches(kj::OneOf check) { + KJ_SWITCH_ONEOF(check) { + KJ_CASE_ONEOF(c, kj::StringPtr) { + KJ_IF_SOME(k, key.tryGet()) { + return k == c; + } + } + KJ_CASE_ONEOF(u, uint32_t) { + KJ_IF_SOME(k, key.tryGet()) { + return k == u; + } + } + } + return false; +} + +// ====================================================================================== +// Dropped + +Dropped::Dropped(uint32_t start, uint32_t end): start(start), end(end) {} + +Dropped::Dropped(rpc::Trace::Dropped::Reader reader) + : start(reader.getStart()), + end(reader.getEnd()) {} + +void Dropped::copyTo(rpc::Trace::Dropped::Builder builder) const { + builder.setStart(start); + builder.setEnd(end); +} + +Dropped Dropped::clone() const { + return Dropped(start, end); +} + +} // namespace workerd::trace diff --git a/src/workerd/io/trace-common.h b/src/workerd/io/trace-common.h new file mode 100644 index 00000000000..03cce0f0ff6 --- /dev/null +++ b/src/workerd/io/trace-common.h @@ -0,0 +1,611 @@ +// Copyright (c) 2017-2022 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 +#pragma once + +#include +#include +#include + +#include +#include +#include +#include + +namespace kj { +enum class HttpMethod; +} // namespace kj + +namespace workerd { + +// trace-commmon.h defines the common data structures used by both the legacy and streaming trace +// models. These are primarily structs adapting the capnp schema definitions of the types. + +using LogLevel = rpc::Trace::Log::Level; +using ExecutionModel = rpc::Trace::ExecutionModel; + +enum class PipelineLogLevel { + // WARNING: This must be kept in sync with PipelineDef::LogLevel (which is not in the OSS + // release). + NONE, + FULL +}; + +// A legacy in-memory accumulated Trace +class Trace; + +namespace trace { + +template +concept IsEnum = std::is_enum::value; + +// A trace::Tag is a key-value pair that can be attached to multiple types of objects +// as an extension field. They can be used to provide additional context, extend +// types, etc. +// +// The tag key can be either a string or a numeric index. When a numeric index is +// used, the tag represents some internally defined field in the runtime that should +// be defined in a capnp schema somewhere. Workerd does not define any of these keys. +// +// The tag value can be one of either a boolean, signed or unsigned 64-bit integer, +// a double, a string, or an arbitrary byte array. When a byte array is used, the +// specific format is specific to the tag key. No general assumptions can be made +// about the value without knowledge of the specific key. +struct Tag final { + using TagValue = kj::OneOf>; + using TagKey = kj::OneOf; + TagKey key; + TagValue value; + explicit Tag(TagKey key, TagValue value); + + template + explicit Tag(Key key, TagValue value): Tag(static_cast(key), kj::mv(value)) {} + + Tag(rpc::Trace::Tag::Reader reader); + + bool keyMatches(kj::OneOf key); + + template + inline bool keyMatches(Key key) { + return keyMatches(static_cast(key)); + } + + void copyTo(rpc::Trace::Tag::Builder builder) const; + Tag clone() const; +}; +using Tags = kj::Array; + +// Metadata describing the start of a fetch request. +struct FetchEventInfo final { + struct Header; + + explicit FetchEventInfo( + kj::HttpMethod method, kj::String url, kj::String cfJson, kj::Array
headers); + FetchEventInfo(rpc::Trace::FetchEventInfo::Reader reader); + FetchEventInfo(FetchEventInfo&&) = default; + FetchEventInfo& operator=(FetchEventInfo&&) = default; + KJ_DISALLOW_COPY(FetchEventInfo); + + struct Header final { + explicit Header(kj::String name, kj::String value); + Header(rpc::Trace::FetchEventInfo::Header::Reader reader); + + kj::String name; + kj::String value; + + void copyTo(rpc::Trace::FetchEventInfo::Header::Builder builder) const; + + JSG_MEMORY_INFO(Header) { + tracker.trackField("name", name); + tracker.trackField("value", value); + } + }; + + kj::HttpMethod method; + kj::String url; + // TODO(perf): It might be more efficient to store some sort of parsed JSON result instead? + kj::String cfJson; + kj::Array
headers; + + void copyTo(rpc::Trace::FetchEventInfo::Builder builder) const; + + FetchEventInfo clone() const; +}; + +// The FetchResponseInfo struct is used to attach additional detail about the conclusion +// of fetch request. It is to be included in a closing Span event. The FetchResponseInfo +// is not an event on it's own. +struct FetchResponseInfo final { + explicit FetchResponseInfo(uint16_t statusCode); + FetchResponseInfo(rpc::Trace::FetchResponseInfo::Reader reader); + FetchResponseInfo(FetchResponseInfo&&) = default; + FetchResponseInfo& operator=(FetchResponseInfo&&) = default; + KJ_DISALLOW_COPY(FetchResponseInfo); + + uint16_t statusCode; + + void copyTo(rpc::Trace::FetchResponseInfo::Builder builder) const; + FetchResponseInfo clone() const; +}; + +// Metadata describing the start of a JS RPC event +struct JsRpcEventInfo final { + explicit JsRpcEventInfo(kj::String methodName); + JsRpcEventInfo(rpc::Trace::JsRpcEventInfo::Reader reader); + JsRpcEventInfo(JsRpcEventInfo&&) = default; + JsRpcEventInfo& operator=(JsRpcEventInfo&&) = default; + KJ_DISALLOW_COPY(JsRpcEventInfo); + + kj::String methodName; + + void copyTo(rpc::Trace::JsRpcEventInfo::Builder builder) const; + JsRpcEventInfo clone() const; +}; + +// Metadata describing the start of a scheduled worker event (cron workers). +struct ScheduledEventInfo final { + explicit ScheduledEventInfo(double scheduledTime, kj::String cron); + ScheduledEventInfo(rpc::Trace::ScheduledEventInfo::Reader reader); + ScheduledEventInfo(ScheduledEventInfo&&) = default; + ScheduledEventInfo& operator=(ScheduledEventInfo&&) = default; + KJ_DISALLOW_COPY(ScheduledEventInfo); + + double scheduledTime; + kj::String cron; + + void copyTo(rpc::Trace::ScheduledEventInfo::Builder builder) const; + ScheduledEventInfo clone() const; +}; + +// Metadata describing the start of an alarm event. +struct AlarmEventInfo final { + explicit AlarmEventInfo(kj::Date scheduledTime); + AlarmEventInfo(rpc::Trace::AlarmEventInfo::Reader reader); + AlarmEventInfo(AlarmEventInfo&&) = default; + AlarmEventInfo& operator=(AlarmEventInfo&&) = default; + KJ_DISALLOW_COPY(AlarmEventInfo); + + kj::Date scheduledTime; + + void copyTo(rpc::Trace::AlarmEventInfo::Builder builder) const; + AlarmEventInfo clone() const; +}; + +// Metadata describing the start of a queue event. +struct QueueEventInfo final { + explicit QueueEventInfo(kj::String queueName, uint32_t batchSize); + QueueEventInfo(rpc::Trace::QueueEventInfo::Reader reader); + QueueEventInfo(QueueEventInfo&&) = default; + QueueEventInfo& operator=(QueueEventInfo&&) = default; + KJ_DISALLOW_COPY(QueueEventInfo); + + kj::String queueName; + uint32_t batchSize; + + void copyTo(rpc::Trace::QueueEventInfo::Builder builder) const; + QueueEventInfo clone() const; +}; + +// Metadata describing the start of an email event. +struct EmailEventInfo final { + explicit EmailEventInfo(kj::String mailFrom, kj::String rcptTo, uint32_t rawSize); + EmailEventInfo(rpc::Trace::EmailEventInfo::Reader reader); + EmailEventInfo(EmailEventInfo&&) = default; + EmailEventInfo& operator=(EmailEventInfo&&) = default; + KJ_DISALLOW_COPY(EmailEventInfo); + + kj::String mailFrom; + kj::String rcptTo; + uint32_t rawSize; + + void copyTo(rpc::Trace::EmailEventInfo::Builder builder) const; + EmailEventInfo clone() const; +}; + +// Metadata describing the start of a hibernatable web socket event. +struct HibernatableWebSocketEventInfo final { + struct Message {}; + struct Close { + uint16_t code; + bool wasClean; + }; + struct Error {}; + + using Type = kj::OneOf; + + explicit HibernatableWebSocketEventInfo(Type type); + HibernatableWebSocketEventInfo(rpc::Trace::HibernatableWebSocketEventInfo::Reader reader); + HibernatableWebSocketEventInfo(HibernatableWebSocketEventInfo&&) = default; + HibernatableWebSocketEventInfo& operator=(HibernatableWebSocketEventInfo&&) = default; + KJ_DISALLOW_COPY(HibernatableWebSocketEventInfo); + + Type type; + + void copyTo(rpc::Trace::HibernatableWebSocketEventInfo::Builder builder) const; + static Type readFrom(rpc::Trace::HibernatableWebSocketEventInfo::Reader reader); + HibernatableWebSocketEventInfo clone() const; +}; + +// Metadata describing the start of a custom event (currently unused) but here +// to support legacy trace use cases. +struct CustomEventInfo final { + explicit CustomEventInfo() {}; + CustomEventInfo(rpc::Trace::CustomEventInfo::Reader reader) {}; + CustomEventInfo(CustomEventInfo&&) = default; + CustomEventInfo& operator=(CustomEventInfo&&) = default; + KJ_DISALLOW_COPY(CustomEventInfo); + + CustomEventInfo clone() const { + return CustomEventInfo(); + } +}; + +// Metadata describing the start of a legacy tail event (that is, the event +// that delivers collected traces to a legacy tail worker) +struct TraceEventInfo final { + struct TraceItem; + + explicit TraceEventInfo(kj::ArrayPtr> traces); + explicit TraceEventInfo(kj::Array traces); + TraceEventInfo(rpc::Trace::TraceEventInfo::Reader reader); + TraceEventInfo(TraceEventInfo&&) = default; + TraceEventInfo& operator=(TraceEventInfo&&) = default; + KJ_DISALLOW_COPY(TraceEventInfo); + + struct TraceItem { + explicit TraceItem(kj::Maybe scriptName); + TraceItem(rpc::Trace::TraceEventInfo::TraceItem::Reader reader); + TraceItem(TraceItem&&) = default; + TraceItem& operator=(TraceItem&&) = default; + KJ_DISALLOW_COPY(TraceItem); + + kj::Maybe scriptName; + + void copyTo(rpc::Trace::TraceEventInfo::TraceItem::Builder builder) const; + TraceItem clone() const; + }; + + kj::Vector traces; + + void copyTo(rpc::Trace::TraceEventInfo::Builder builder) const; + TraceEventInfo clone() const; +}; + +// The set of structs that make up the EventInfo union all represent trigger +// events for worker requests... That is, for instance, when a worker receives +// a fetch request, a FetchEventInfo will be emitted; if the worker receives a +// JS RPC request, a JsRpcEventInfo will be emitted; and so on. +using EventInfo = kj::OneOf; + +// Metadata describing the onset of a trace session. The first event in any trace session +// will always be an Onset event. It details information about which worker, script, etc +// is being traced. Every trace session will have exactly one Onset event. +struct Onset final { + explicit Onset() = default; + Onset(kj::Maybe scriptName, + kj::Maybe> scriptVersion, + kj::Maybe dispatchNamespace, + kj::Maybe scriptId, + kj::Array scriptTags, + kj::Maybe entrypoint, + ExecutionModel executionModel, + Tags tags = nullptr); + Onset(rpc::Trace::Onset::Reader reader); + Onset(Onset&&) = default; + Onset& operator=(Onset&&) = default; + KJ_DISALLOW_COPY(Onset); + + // Note that all of these fields could be represented by tags but are kept + // as separate fields for legacy reasons. + kj::Maybe scriptName = kj::none; + kj::Maybe> scriptVersion = kj::none; + kj::Maybe dispatchNamespace = kj::none; + kj::Maybe scriptId = kj::none; + kj::Array scriptTags = nullptr; + kj::Maybe entrypoint = kj::none; + ExecutionModel executionModel; + kj::Maybe info = kj::none; + Tags tags = nullptr; + + void copyTo(rpc::Trace::Onset::Builder builder) const; + Onset clone() const; +}; + +// Used to describe the final outcome of the trace. Every trace session should +// have at most a single Outcome event that is the final event in the stream. +struct Outcome final { + using Info = kj::OneOf; + Outcome() = default; + explicit Outcome(EventOutcome outcome, kj::Maybe info = kj::none); + Outcome(rpc::Trace::Outcome::Reader reader); + Outcome(Outcome&&) = default; + Outcome& operator=(Outcome&&) = default; + KJ_DISALLOW_COPY(Outcome); + + EventOutcome outcome = EventOutcome::UNKNOWN; + + // The closing outcome event should include an info field that matches the kind of + // event that started the span. For instance, if the span was started with a + // FetchEventInfo event, then the outcome should be closed with a FetchResponseInfo. + kj::Maybe info; + + void copyTo(rpc::Trace::Outcome::Builder builder) const; + Outcome clone() const; +}; + +// The SpanClose struct identifies the *close* of a span in the stream. A span is +// a logical grouping of events. Spans can be nested. +struct SpanClose final { + using Outcome = rpc::Trace::SpanClose::SpanOutcome; + explicit SpanClose(Outcome outcome, Tags tags = nullptr); + SpanClose(rpc::Trace::SpanClose::Reader reader); + SpanClose(SpanClose&&) = default; + SpanClose& operator=(SpanClose&&) = default; + KJ_DISALLOW_COPY(SpanClose); + + Outcome outcome; + Tags tags; + + void copyTo(rpc::Trace::SpanClose::Builder builder) const; + SpanClose clone() const; +}; + +// Describes an event caused by use of the Node.js diagnostics-channel API. +struct DiagnosticChannelEvent final { + explicit DiagnosticChannelEvent( + kj::Date timestamp, kj::String channel, kj::Array message); + DiagnosticChannelEvent(rpc::Trace::DiagnosticChannelEvent::Reader reader); + DiagnosticChannelEvent(DiagnosticChannelEvent&&) = default; + DiagnosticChannelEvent& operator=(DiagnosticChannelEvent&&) = default; + KJ_DISALLOW_COPY(DiagnosticChannelEvent); + + kj::Date timestamp; + kj::String channel; + kj::Array message; + + void copyTo(rpc::Trace::DiagnosticChannelEvent::Builder builder) const; + DiagnosticChannelEvent clone() const; +}; + +// The Log struct is used by the legacy tracing (v1 tail workers) model and +// is unchanged for treaming traces. This is the struct that is used, for +// instance, to carry console.log outputs into the trace. +struct Log final { + explicit Log(kj::Date timestamp, LogLevel logLevel, kj::String message); + Log(rpc::Trace::Log::Reader reader); + Log(Log&&) = default; + KJ_DISALLOW_COPY(Log); + + // Only as accurate as Worker's Date.now(), for Spectre mitigation. + kj::Date timestamp; + + LogLevel logLevel; + // TODO(soon): Just string for now. Eventually, capture serialized JS objects. + kj::String message; + + void copyTo(rpc::Trace::Log::Builder builder) const; + Log clone() const; +}; + +// The LogV2 struct is used by the streaming trace model. It serves the same +// purpose as the Log struct above but allows for v8 serialized binary data +// as an alternative to plain text and allows for additional metadata tags +// to be added. It is separated out into a new struct in order to prevent +// introducing any backwards compat issues with the original legacy trace +// implementation. +// TODO(soon): Coallesce Log and LogV2 +struct LogV2 final { + explicit LogV2(kj::Date timestamp, + LogLevel logLevel, + kj::OneOf, kj::String> message, + Tags tags = nullptr, + bool truncated = false); + LogV2(rpc::Trace::LogV2::Reader reader); + LogV2(LogV2&&) = default; + LogV2& operator=(LogV2&&) = default; + KJ_DISALLOW_COPY(LogV2); + + // Only as accurate as Worker's Date.now(), for Spectre mitigation. + kj::Date timestamp; + + LogLevel logLevel; + kj::OneOf, kj::String> message; + Tags tags = nullptr; + bool truncated = false; + + void copyTo(rpc::Trace::LogV2::Builder builder) const; + LogV2 clone() const; +}; + +// Describes metadata of an Exception that is added to the trace. +struct Exception final { + // The Detail here is additional, optional information about the exception. + // Intende to provide additional context when appropriate. Not every error + // will have a Detail. + struct Detail { + // If the JS Error object has a cause property, then it will be serialized + // into it's own Exception instance and attached here. + kj::Maybe> cause = kj::none; + // If the JS Error object is an AggregateError or SuppressedError, or if + // it has an errors property like those standard types, then those errors + // will be serialized into their own Exception instances and attached here. + kj::Array> errors = nullptr; + + // These flags match the additional workers-specific properties that we + // add to errors. See makeInternalError in jsg/util.c++. + bool remote = false; + bool retryable = false; + bool overloaded = false; + bool durableObjectReset = false; + Tags tags = nullptr; + + Detail clone() const; + }; + + explicit Exception(kj::Date timestamp, + kj::String name, + kj::String message, + kj::Maybe stack, + kj::Maybe detail = kj::none); + Exception(rpc::Trace::Exception::Reader reader); + Exception(Exception&&) = default; + Exception& operator=(Exception&&) = default; + KJ_DISALLOW_COPY(Exception); + + // Only as accurate as Worker's Date.now(), for Spectre mitigation. + kj::Date timestamp; + + kj::String name; + kj::String message; + kj::Maybe stack; + Detail detail; + + void copyTo(rpc::Trace::Exception::Builder builder) const; + Exception clone() const; +}; + +// Describes the start (and kind) of a subrequest initiated during the trace. +// For instance, a fetch request or a JS RPC call. +struct Subrequest final { + using Info = kj::OneOf; + explicit Subrequest(uint32_t id, kj::Maybe info); + Subrequest(rpc::Trace::Subrequest::Reader reader); + Subrequest(Subrequest&&) = default; + Subrequest& operator=(Subrequest&&) = default; + KJ_DISALLOW_COPY(Subrequest); + + uint32_t id; + kj::Maybe info; + + void copyTo(rpc::Trace::Subrequest::Builder builder) const; + Subrequest clone() const; +}; + +// Describes the results of a subrequest initiated during the trace. The id +// must match a previously emitted Subrequest event, and the info (if provided) +// should correlate to the kind of subrequest that was made. +struct SubrequestOutcome final { + using Info = kj::OneOf; + explicit SubrequestOutcome(uint32_t id, kj::Maybe info, SpanClose::Outcome outcome); + SubrequestOutcome(rpc::Trace::SubrequestOutcome::Reader reader); + SubrequestOutcome(SubrequestOutcome&&) = default; + SubrequestOutcome& operator=(SubrequestOutcome&&) = default; + KJ_DISALLOW_COPY(SubrequestOutcome); + + uint32_t id; + kj::Maybe info; + + // A subrequest is a technically a special form of span, and therefore can + // have a Span outcome. + SpanClose::Outcome outcome; + + void copyTo(rpc::Trace::SubrequestOutcome::Builder builder) const; + SubrequestOutcome clone() const; +}; + +// A Mark is a simple event that can be used to mark a significant point in the +// trace. This currently has no analog in the current tail workers model. +struct Mark final { + explicit Mark(kj::String name); + Mark(rpc::Trace::Mark::Reader reader); + Mark(Mark&&) = default; + Mark& operator=(Mark&&) = default; + KJ_DISALLOW_COPY(Mark); + + kj::String name; + + void copyTo(rpc::Trace::Mark::Builder builder) const; + Mark clone() const; +}; + +// A Metric is a key-value pair that can be emitted as part of the trace. Metrics +// include things like CPU time, Wall time, isolate heap memory usage, etc. The +// structure is left intentionally flexible to allow for a wide range of possible +// metrics to be emitted. This is a new feature in the streaming trace model. +struct Metric final { + using Key = kj::OneOf; + using Type = rpc::Trace::Metric::Type; + + enum class Common { + CPU_TIME, + WALL_TIME, + }; + + explicit Metric(Type type, Key key, double value); + + template + explicit Metric(Type type, K key, double value) + : Metric(type, static_cast(key), value) {} + + Metric(rpc::Trace::Metric::Reader reader); + Metric(Metric&&) = default; + Metric& operator=(Metric&&) = default; + KJ_DISALLOW_COPY(Metric); + + Type type; + Key key; + double value; + + bool keyMatches(kj::OneOf key); + + template + inline bool keyMatches(Key key) { + return keyMatches(static_cast(key)); + } + + void copyTo(rpc::Trace::Metric::Builder builder) const; + Metric clone() const; + + static inline Metric forWallTime(kj::Duration duration) { + return Metric(Type::COUNTER, Common::WALL_TIME, duration / kj::MILLISECONDS); + } + + static Metric forCpuTime(kj::Duration duration) { + return Metric(Type::COUNTER, Common::CPU_TIME, duration / kj::MILLISECONDS); + } +}; +using Metrics = kj::Array; + +// A Dropped event can be used to indicate that a specific range of events were +// dropped from the stream. This would typically be used in cases where the process +// may be too overloaded to successfully deliver all events to the tail worker. +// The start/end values are inclusive and indicate the range of events (by sequence +// number) that were dropped. +struct Dropped final { + explicit Dropped(uint32_t start, uint32_t end); + Dropped(rpc::Trace::Dropped::Reader reader); + Dropped(Dropped&&) = default; + Dropped& operator=(Dropped&&) = default; + KJ_DISALLOW_COPY(Dropped); + + uint32_t start; + uint32_t end; + + void copyTo(rpc::Trace::Dropped::Builder builder) const; + Dropped clone() const; +}; + +// ====================================================================================== +// The base class for both the original legacy Trace (defined in trace-legacy.h) +// and the new StreamingTrace (defined in trace-streaming.h) +class TraceBase { +public: + virtual void addException(trace::Exception&& exception) = 0; + virtual void addDiagnosticChannelEvent(trace::DiagnosticChannelEvent&& event) = 0; + virtual void addMark(kj::StringPtr mark) = 0; + virtual void addMetrics(trace::Metrics&& metrics) = 0; + virtual void addSubrequest(trace::Subrequest&& subrequest) = 0; + virtual void addSubrequestOutcome(trace::SubrequestOutcome&& outcome) = 0; + virtual void addCustom(Tags&& tags) = 0; +}; + +} // namespace trace +} // namespace workerd diff --git a/src/workerd/io/trace-legacy.c++ b/src/workerd/io/trace-legacy.c++ new file mode 100644 index 00000000000..1af1d157732 --- /dev/null +++ b/src/workerd/io/trace-legacy.c++ @@ -0,0 +1,387 @@ +// Copyright (c) 2017-2022 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 +#include "trace-legacy.h" + +#include + +#include +#include + +namespace workerd { + +namespace { +// Approximately how much external data we allow in a trace before we start ignoring requests. We +// want this number to be big enough to be useful for tracing, but small enough to make it hard to +// DoS the C++ heap -- keeping in mind we can record a trace per handler run during a request. +static constexpr size_t MAX_TRACE_BYTES = 128 * 1024; +// Limit spans to at most 512, it could be difficult to fit e.g. 1024 spans within MAX_TRACE_BYTES +// unless most of the included spans do not include tags. If use cases arise where this amount is +// insufficient, merge smaller spans together or drop smaller spans. +static constexpr size_t MAX_USER_SPANS = 512; +} // namespace + +Trace::Trace(trace::Onset&& onset): onsetInfo(kj::mv(onset)) {} +Trace::Trace(rpc::Trace::Reader reader) { + mergeFrom(reader, PipelineLogLevel::FULL); +} + +Trace::~Trace() noexcept(false) {} + +void Trace::copyTo(rpc::Trace::Builder builder) { + { + auto list = builder.initLogs(logs.size() + spans.size()); + for (auto i: kj::indices(logs)) { + logs[i].copyTo(list[i]); + } + // Add spans represented as logs to the logs object. + for (auto i: kj::indices(spans)) { + spans[i].copyTo(list[i + logs.size()]); + } + } + + { + auto list = builder.initExceptions(exceptions.size()); + for (auto i: kj::indices(exceptions)) { + exceptions[i].copyTo(list[i]); + } + } + + builder.setTruncated(truncated); + builder.setOutcome(outcomeInfo.outcome); + builder.setCpuTime(cpuTime / kj::MILLISECONDS); + builder.setWallTime(wallTime / kj::MILLISECONDS); + KJ_IF_SOME(name, onsetInfo.scriptName) { + builder.setScriptName(name); + } + KJ_IF_SOME(version, onsetInfo.scriptVersion) { + builder.setScriptVersion(*version); + } + KJ_IF_SOME(id, onsetInfo.scriptId) { + builder.setScriptId(id); + } + KJ_IF_SOME(ns, onsetInfo.dispatchNamespace) { + builder.setDispatchNamespace(ns); + } + + { + auto list = builder.initScriptTags(onsetInfo.scriptTags.size()); + for (auto i: kj::indices(onsetInfo.scriptTags)) { + list.set(i, onsetInfo.scriptTags[i]); + } + } + + KJ_IF_SOME(e, onsetInfo.entrypoint) { + builder.setEntrypoint(e); + } + builder.setExecutionModel(onsetInfo.executionModel); + + builder.setEventTimestampNs((eventTimestamp - kj::UNIX_EPOCH) / kj::NANOSECONDS); + + auto eventInfoBuilder = builder.initEventInfo(); + KJ_IF_SOME(e, onsetInfo.info) { + KJ_SWITCH_ONEOF(e) { + KJ_CASE_ONEOF(fetch, trace::FetchEventInfo) { + auto fetchBuilder = eventInfoBuilder.initFetch(); + fetch.copyTo(fetchBuilder); + } + KJ_CASE_ONEOF(jsRpc, trace::JsRpcEventInfo) { + auto jsRpcBuilder = eventInfoBuilder.initJsRpc(); + jsRpc.copyTo(jsRpcBuilder); + } + KJ_CASE_ONEOF(scheduled, trace::ScheduledEventInfo) { + auto scheduledBuilder = eventInfoBuilder.initScheduled(); + scheduled.copyTo(scheduledBuilder); + } + KJ_CASE_ONEOF(alarm, trace::AlarmEventInfo) { + auto alarmBuilder = eventInfoBuilder.initAlarm(); + alarm.copyTo(alarmBuilder); + } + KJ_CASE_ONEOF(queue, trace::QueueEventInfo) { + auto queueBuilder = eventInfoBuilder.initQueue(); + queue.copyTo(queueBuilder); + } + KJ_CASE_ONEOF(email, trace::EmailEventInfo) { + auto emailBuilder = eventInfoBuilder.initEmail(); + email.copyTo(emailBuilder); + } + KJ_CASE_ONEOF(trace, trace::TraceEventInfo) { + auto traceBuilder = eventInfoBuilder.initTrace(); + trace.copyTo(traceBuilder); + } + KJ_CASE_ONEOF(hibWs, trace::HibernatableWebSocketEventInfo) { + auto hibWsBuilder = eventInfoBuilder.initHibernatableWebSocket(); + hibWs.copyTo(hibWsBuilder); + } + KJ_CASE_ONEOF(custom, trace::CustomEventInfo) { + eventInfoBuilder.initCustom(); + } + } + } else { + eventInfoBuilder.setNone(); + } + + KJ_IF_SOME(fetchResponseInfo, this->fetchResponseInfo) { + auto fetchResponseInfoBuilder = builder.initResponse(); + fetchResponseInfo.copyTo(fetchResponseInfoBuilder); + } + + { + auto list = builder.initDiagnosticChannelEvents(diagnosticChannelEvents.size()); + for (auto i: kj::indices(diagnosticChannelEvents)) { + diagnosticChannelEvents[i].copyTo(list[i]); + } + } +} + +void Trace::mergeFrom(rpc::Trace::Reader reader, PipelineLogLevel pipelineLogLevel) { + // Sandboxed workers currently record their traces as if the pipeline log level were set to + // "full", so we may need to filter out the extra data after receiving the traces back. + if (pipelineLogLevel != PipelineLogLevel::NONE) { + logs.addAll(reader.getLogs()); + exceptions.addAll(reader.getExceptions()); + diagnosticChannelEvents.addAll(reader.getDiagnosticChannelEvents()); + } + + truncated = reader.getTruncated(); + outcomeInfo.outcome = reader.getOutcome(); + cpuTime = reader.getCpuTime() * kj::MILLISECONDS; + wallTime = reader.getWallTime() * kj::MILLISECONDS; + + // mergeFrom() is called both when deserializing traces from a sandboxed + // worker and when deserializing traces sent to a sandboxed trace worker. In + // the former case, the trace's scriptName (and other fields like + // scriptVersion) are already set and the deserialized value is missing, so + // we need to be careful not to overwrite the set value. + if (reader.hasScriptName()) { + onsetInfo.scriptName = kj::str(reader.getScriptName()); + } + + if (reader.hasScriptVersion()) { + onsetInfo.scriptVersion = capnp::clone(reader.getScriptVersion()); + } + + if (reader.hasScriptId()) { + onsetInfo.scriptId = kj::str(reader.getScriptId()); + } + + if (reader.hasDispatchNamespace()) { + onsetInfo.dispatchNamespace = kj::str(reader.getDispatchNamespace()); + } + + if (auto tags = reader.getScriptTags(); tags.size() > 0) { + onsetInfo.scriptTags = KJ_MAP(tag, tags) { return kj::str(tag); }; + } + + if (reader.hasEntrypoint()) { + onsetInfo.entrypoint = kj::str(reader.getEntrypoint()); + } + onsetInfo.executionModel = reader.getExecutionModel(); + + eventTimestamp = kj::UNIX_EPOCH + reader.getEventTimestampNs() * kj::NANOSECONDS; + + if (pipelineLogLevel == PipelineLogLevel::NONE) { + onsetInfo.info = kj::none; + } else { + auto e = reader.getEventInfo(); + switch (e.which()) { + case rpc::Trace::EventInfo::Which::FETCH: + onsetInfo.info = trace::FetchEventInfo(e.getFetch()); + break; + case rpc::Trace::EventInfo::Which::JS_RPC: + onsetInfo.info = trace::JsRpcEventInfo(e.getJsRpc()); + break; + case rpc::Trace::EventInfo::Which::SCHEDULED: + onsetInfo.info = trace::ScheduledEventInfo(e.getScheduled()); + break; + case rpc::Trace::EventInfo::Which::ALARM: + onsetInfo.info = trace::AlarmEventInfo(e.getAlarm()); + break; + case rpc::Trace::EventInfo::Which::QUEUE: + onsetInfo.info = trace::QueueEventInfo(e.getQueue()); + break; + case rpc::Trace::EventInfo::Which::EMAIL: + onsetInfo.info = trace::EmailEventInfo(e.getEmail()); + break; + case rpc::Trace::EventInfo::Which::TRACE: + onsetInfo.info = trace::TraceEventInfo(e.getTrace()); + break; + case rpc::Trace::EventInfo::Which::HIBERNATABLE_WEB_SOCKET: + onsetInfo.info = trace::HibernatableWebSocketEventInfo(e.getHibernatableWebSocket()); + break; + case rpc::Trace::EventInfo::Which::CUSTOM: + onsetInfo.info = trace::CustomEventInfo(e.getCustom()); + break; + case rpc::Trace::EventInfo::Which::NONE: + onsetInfo.info = kj::none; + break; + } + } + + if (reader.hasResponse()) { + fetchResponseInfo = trace::FetchResponseInfo(reader.getResponse()); + } +} + +void Trace::setEventInfo(kj::Date timestamp, trace::EventInfo&& info) { + KJ_ASSERT(onsetInfo.info == kj::none, "tracer can only be used for a single event"); + eventTimestamp = timestamp; + + size_t newSize = bytesUsed; + KJ_SWITCH_ONEOF(info) { + KJ_CASE_ONEOF(fetch, trace::FetchEventInfo) { + newSize += fetch.url.size(); + for (const auto& header: fetch.headers) { + newSize += header.name.size() + header.value.size(); + } + newSize += fetch.cfJson.size(); + if (newSize > MAX_TRACE_BYTES) { + truncated = true; + logs.add(timestamp, LogLevel::WARN, + kj::str("[\"Trace resource limit exceeded; could not capture event info.\"]")); + onsetInfo.info = trace::FetchEventInfo(fetch.method, {}, {}, {}); + return; + } + } + KJ_CASE_ONEOF(_, trace::JsRpcEventInfo) {} + KJ_CASE_ONEOF(_, trace::ScheduledEventInfo) {} + KJ_CASE_ONEOF(_, trace::AlarmEventInfo) {} + KJ_CASE_ONEOF(_, trace::QueueEventInfo) {} + KJ_CASE_ONEOF(_, trace::EmailEventInfo) {} + KJ_CASE_ONEOF(_, trace::TraceEventInfo) {} + KJ_CASE_ONEOF(_, trace::HibernatableWebSocketEventInfo) {} + KJ_CASE_ONEOF(_, trace::CustomEventInfo) {} + } + bytesUsed = newSize; + onsetInfo.info = kj::mv(info); +} + +void Trace::setOutcome(trace::Outcome&& info) { + outcomeInfo = kj::mv(info); +} + +void Trace::addLog(trace::Log&& log, bool isSpan) { + if (exceededLogLimit) { + return; + } + size_t newSize = bytesUsed + sizeof(trace::Log) + log.message.size(); + if (newSize > MAX_TRACE_BYTES) { + exceededLogLimit = true; + truncated = true; + // We use a JSON encoded array/string to match other console.log() recordings: + logs.add(log.timestamp, LogLevel::WARN, + kj::str( + "[\"Log size limit exceeded: More than 128KB of data (across console.log statements, exception, request metadata and headers) was logged during a single request. Subsequent data for this request will not be recorded in logs, appear when tailing this Worker's logs, or in Tail Workers.\"]")); + return; + } + bytesUsed = newSize; + if (isSpan) { + spans.add(kj::mv(log)); + numSpans++; + return; + } + logs.add(kj::mv(log)); +} + +void Trace::addException(trace::Exception&& exception) { + if (exceededExceptionLimit) { + return; + } + size_t newSize = + bytesUsed + sizeof(trace::Exception) + exception.name.size() + exception.message.size(); + KJ_IF_SOME(s, exception.stack) { + newSize += s.size(); + } + if (newSize > MAX_TRACE_BYTES) { + exceededExceptionLimit = true; + truncated = true; + exceptions.add(exception.timestamp, kj::str("Error"), + kj::str("Trace resource limit exceeded; subsequent exceptions not recorded."), kj::none); + return; + } + bytesUsed = newSize; + exceptions.add(kj::mv(exception)); +} + +void Trace::addDiagnosticChannelEvent(trace::DiagnosticChannelEvent&& event) { + if (exceededDiagnosticChannelEventLimit) { + return; + } + size_t newSize = bytesUsed + sizeof(trace::DiagnosticChannelEvent) + event.channel.size() + + event.message.size(); + if (newSize > MAX_TRACE_BYTES) { + exceededDiagnosticChannelEventLimit = true; + truncated = true; + diagnosticChannelEvents.add( + event.timestamp, kj::str("workerd.LimitExceeded"), kj::Array()); + return; + } + bytesUsed = newSize; + diagnosticChannelEvents.add(kj::mv(event)); +} + +void Trace::addSpan(const Span&& span, kj::String spanContext) { + // This is where we'll actually encode the span for now. + // Drop any spans beyond MAX_USER_SPANS. + if (numSpans >= MAX_USER_SPANS) { + return; + } + if (isPredictableModeForTest()) { + // Do not emit span duration information in predictable mode. + addLog(trace::Log(span.endTime, LogLevel::LOG, kj::str("[\"span: ", span.operationName, "\"]")), + true); + } else { + // Time since Unix epoch in seconds, with millisecond precision + double epochSecondsStart = (span.startTime - kj::UNIX_EPOCH) / kj::MILLISECONDS / 1000.0; + double epochSecondsEnd = (span.endTime - kj::UNIX_EPOCH) / kj::MILLISECONDS / 1000.0; + auto message = kj::str("[\"span: ", span.operationName, " ", kj::mv(spanContext), " ", + epochSecondsStart, " ", epochSecondsEnd, "\"]"); + addLog(trace::Log(span.endTime, LogLevel::LOG, kj::mv(message)), true); + } + + // TODO(cleanup): Create a function in kj::OneOf to automatically convert to a given type (i.e + // String) to avoid having to handle each type explicitly here. + for (const Span::TagMap::Entry& tag: span.tags) { + auto value = [&]() { + KJ_SWITCH_ONEOF(tag.value) { + KJ_CASE_ONEOF(str, kj::String) { + return kj::str(str); + } + KJ_CASE_ONEOF(val, int64_t) { + return kj::str(val); + } + KJ_CASE_ONEOF(val, double) { + return kj::str(val); + } + KJ_CASE_ONEOF(val, bool) { + return kj::str(val); + } + } + KJ_UNREACHABLE; + }(); + kj::String message = kj::str("[\"tag: "_kj, tag.key, " => "_kj, value, "\"]"); + addLog(trace::Log(span.endTime, LogLevel::LOG, kj::mv(message)), true); + } +} + +void Trace::setFetchResponseInfo(trace::FetchResponseInfo&& info) { + KJ_REQUIRE(KJ_REQUIRE_NONNULL(onsetInfo.info).is()); + KJ_ASSERT(fetchResponseInfo == kj::none, "setFetchResponseInfo can only be called once"); + fetchResponseInfo = kj::mv(info); +} + +void Trace::addMetrics(trace::Metrics&& metrics) { + for (auto& metric: metrics) { + if (metric.keyMatches(trace::Metric::Common::CPU_TIME)) { + // The CPU_TIME metric will always be a int64_t converted from a kj::Duration + // If it's not, we'll ignore it. + cpuTime = static_cast(metric.value) * kj::MILLISECONDS; + } else if (metric.keyMatches(trace::Metric::Common::WALL_TIME)) { + // The WALL_TIME metric will always be a int64_t converted from a kj::Duration + // If it's not, we'll ignore it. + wallTime = static_cast(metric.value) * kj::MILLISECONDS; + } + } +} + +} // namespace workerd diff --git a/src/workerd/io/trace-legacy.h b/src/workerd/io/trace-legacy.h new file mode 100644 index 00000000000..5bd7ffd667e --- /dev/null +++ b/src/workerd/io/trace-legacy.h @@ -0,0 +1,147 @@ +// Copyright (c) 2017-2022 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 +#pragma once + +#include "trace-common.h" + +#include + +#include +#include + +namespace workerd { + +// ====================================================================================== +// Represents a trace span. `Span` objects are delivered to `SpanObserver`s for recording. To +// create a `Span`, use a `SpanBuilder`. (Used in the legacy trace api) +// Note that this is not the same thing as a trace::Span. This class is part of the legacy +// API for representing spans using log messages instead. +struct Span final { + using TagValue = kj::OneOf; + // TODO(someday): Support binary bytes, too. + using TagMap = kj::HashMap; + using Tag = TagMap::Entry; + + struct Log { + kj::Date timestamp; + Tag tag; + }; + + kj::ConstString operationName; + kj::Date startTime; + kj::Date endTime; + TagMap tags; + kj::Vector logs; + + // We set an arbitrary (-ish) cap on log messages for safety. If we drop logs because of this, + // we report how many in a final "dropped_logs" log. + // + // At the risk of being too clever, I chose a limit that is one below a power of two so that + // we'll typically have space for one last element available for the "dropped_logs" log without + // needing to grow the vector. + static constexpr uint16_t MAX_LOGS = 1023; + kj::uint droppedLogs = 0; + + explicit Span(kj::ConstString operationName, kj::Date startTime) + : operationName(kj::mv(operationName)), + startTime(startTime), + endTime(startTime) {} + Span(Span&&) = default; + Span& operator=(Span&&) = default; + KJ_DISALLOW_COPY(Span); +}; + +// This is the original implementation of how trace worker data was collected. All of the +// data is stored in an in-memory structure and delivered as a single unit to the trace +// worker only when the request is fully completed. The data is held in memory and capped +// at a specific limit. Once the limit is reached, new data is silently dropped. + +// TODO(someday): See if we can merge similar code concepts... Trace fills a role similar to +// MetricsCollector::Reporter::StageEvent, and Tracer fills a role similar to +// MetricsCollector::Request. Currently, the major differences are: +// +// - MetricsCollector::Request uses its destructor to measure a IoContext's wall time, so +// it needs to live exactly as long as its IoContext. Tracer currently needs to live as +// long as both the IoContext and those of any subrequests. +// - Due to the difference in lifetimes, results of each become available in a different order, +// and intermediate values can be freed at different times. +// - Request builds a vector of results, while Tracer builds a tree. + +// TODO(cleanup) - worth separating into immutable Trace vs. mutable TraceBuilder? + +// Collects trace information about the handling of a worker/pipeline fetch event. +class Trace final: public kj::Refcounted, public trace::TraceBase { +public: + explicit Trace(trace::Onset&& onset = trace::Onset()); + Trace(rpc::Trace::Reader reader); + ~Trace() noexcept(false); + KJ_DISALLOW_COPY_AND_MOVE(Trace); + + // We treat the origin value as "unset". + kj::Date eventTimestamp = kj::UNIX_EPOCH; + + trace::Onset onsetInfo; + trace::Outcome outcomeInfo{}; + kj::Duration cpuTime; + kj::Duration wallTime; + + kj::Vector logs; + // TODO(o11y): Convert this to actually store spans. + kj::Vector spans; + // A request's trace can have multiple exceptions due to separate request/waitUntil tasks. + kj::Vector exceptions; + + kj::Vector diagnosticChannelEvents; + + kj::Maybe fetchResponseInfo; + + // ==================================================================================== + // trace::TraceBase implementation + + void setEventInfo(kj::Date timestamp, trace::EventInfo&& info); + void setOutcome(trace::Outcome&& outcome); + void setFetchResponseInfo(trace::FetchResponseInfo&& info); + void addSpan(const Span&& span, kj::String spanContext); + void addLog(trace::Log&& log, bool isSpan = false); + + void addException(trace::Exception&& exception) override; + void addDiagnosticChannelEvent(trace::DiagnosticChannelEvent&& event) override; + + void addMark(kj::StringPtr mark) override { + // These are currently ignored for legacy traces. + } + + void addMetrics(trace::Metrics&& metrics) override; + + void addSubrequest(trace::Subrequest&& subrequest) override { + // These are currently ignored for legacy traces. + } + + void addSubrequestOutcome(trace::SubrequestOutcome&& outcome) override { + // These are currently ignored for legacy traces. + } + + void addCustom(trace::Tags&& tags) override { + // These are currently ignored for legacy traces. + } + + bool truncated = false; + bool exceededLogLimit = false; + bool exceededExceptionLimit = false; + bool exceededDiagnosticChannelEventLimit = false; + // Trace data is recorded outside of the JS heap. To avoid DoS, we keep an estimate of trace + // data size, and we stop recording if too much is used. + size_t bytesUsed = 0; + size_t numSpans = 0; + + // Copy content from this trace into `builder`. + void copyTo(rpc::Trace::Builder builder); + + // Adds all content from `reader` to this `Trace`. (Typically this trace is empty before the + // call.) Also applies filtering to the trace as if it were recorded with the given + // pipelineLogLevel. + void mergeFrom(rpc::Trace::Reader reader, PipelineLogLevel pipelineLogLevel); +}; + +} // namespace workerd diff --git a/src/workerd/io/trace-streaming-test.c++ b/src/workerd/io/trace-streaming-test.c++ new file mode 100644 index 00000000000..978a24ee0f9 --- /dev/null +++ b/src/workerd/io/trace-streaming-test.c++ @@ -0,0 +1,163 @@ +#include "trace-streaming.h" + +#include +#include + +namespace workerd { +namespace { + +static auto idFactory = StreamingTrace::IdFactory::newUuidIdFactory(); + +struct MockTimeProvider final: public StreamingTrace::TimeProvider { + kj::Date getNow() const override { + return 0 * kj::MILLISECONDS + kj::UNIX_EPOCH; + } +}; +MockTimeProvider mockTimeProvider; + +KJ_TEST("We can create a simple, empty streaming trace session with implicit unknown outcome") { + trace::Onset onset; + // In this test we are creating a simple trace with no events or spans. + // The delegate should be called exactly twice, once with the onset and + // once with an implicit unknown outcome (since we're not explicitly calling + // setOutcome ourselves) + int callCount = 0; + kj::String id = nullptr; + { + auto streamingTrace = workerd::StreamingTrace::create( + *idFactory, kj::mv(onset), [&callCount, &id](workerd::StreamEvent&& event) { + KJ_EXPECT(event.span.id == 0, "the root span should have id 0"); + KJ_EXPECT(event.span.parent == 0, "the root span should have no parent"); + switch (callCount++) { + case 0: { + id = kj::str(event.id); + KJ_EXPECT(id.size() > 0, "there should be a non-empty id. we don't care what it is."); + KJ_EXPECT(event.sequence == 0, "the sequence should be 0"); + auto& onset = KJ_ASSERT_NONNULL( + event.event.tryGet(), "the event should be an onset event"); + auto& info = KJ_ASSERT_NONNULL(onset.info, "the onset event should have info"); + KJ_ASSERT_NONNULL( + info.tryGet(), "the onset event should have fetch info"); + break; + } + case 1: { + KJ_EXPECT(event.id == id, "the event id should be the same as the onset id"); + KJ_EXPECT(event.sequence == 1, "the sequence should have been incremented"); + auto& outcome = KJ_ASSERT_NONNULL( + event.event.tryGet(), "the event should be an outcome event"); + KJ_EXPECT(outcome.outcome == EventOutcome::UNKNOWN, "the outcome should be unknown"); + break; + } + } + }, mockTimeProvider); + // The onset event will not be sent until the event info is set. + streamingTrace->setEventInfo( + trace::FetchEventInfo(kj::HttpMethod::GET, kj::String(), kj::String(), nullptr)); + } + KJ_EXPECT(callCount == 2); +} + +KJ_TEST("We can create a simple, empty streaming trace session with expicit canceled outcome") { + trace::Onset onset; + // In this test we are creating a simple trace with no events or spans. + // The delegate should be called exactly twice, once with the onset and + // once with an explicit canceled outcome. + int callCount = 0; + kj::String id = nullptr; + auto streamingTrace = workerd::StreamingTrace::create( + *idFactory, kj::mv(onset), [&callCount, &id](workerd::StreamEvent&& event) { + KJ_EXPECT(event.span.id == 0, "the root span should have id 0"); + KJ_EXPECT(event.span.parent == 0, "the root span should have no parent"); + switch (callCount++) { + case 0: { + id = kj::str(event.id); + KJ_EXPECT(id.size() > 0, "there should be a non-empty id. we don't care what it is."); + KJ_EXPECT(event.sequence == 0, "the sequence should be 0"); + KJ_ASSERT_NONNULL(event.event.tryGet(), "the event should be an onset event"); + break; + } + case 1: { + KJ_EXPECT(event.id == id, "the event id should be the same as the onset id"); + KJ_EXPECT(event.sequence == 1, "the sequence should have been incremented"); + auto& outcome = KJ_ASSERT_NONNULL( + event.event.tryGet(), "the event should be an outcome event"); + KJ_EXPECT(outcome.outcome == EventOutcome::CANCELED, "the outcome should be canceled"); + break; + } + } + }, mockTimeProvider); + // The onset event will not be sent until the event info is set. + streamingTrace->setEventInfo( + trace::FetchEventInfo(kj::HttpMethod::GET, kj::String(), kj::String(), nullptr)); + streamingTrace->setOutcome(trace::Outcome(EventOutcome::CANCELED)); + KJ_EXPECT(callCount == 2); +} + +KJ_TEST("We can create a simple, streaming trace session with a single explicitly canceled trace") { + trace::Onset onset; + // In this test we are creating a simple trace with no events or spans. + // The delegate should be called exactly five times. + int callCount = 0; + kj::String id = nullptr; + { + auto streamingTrace = workerd::StreamingTrace::create( + *idFactory, kj::mv(onset), [&callCount, &id](workerd::StreamEvent&& event) { + switch (callCount++) { + case 0: { + KJ_EXPECT(event.span.id == 0, "the root span should have id 0"); + KJ_EXPECT(event.span.parent == 0, "the root span should have no parent"); + id = kj::str(event.id); + KJ_EXPECT(id.size() > 0, "there should be a non-empty id. we don't care what it is."); + KJ_EXPECT(event.sequence == 0, "the sequence should be 0"); + KJ_ASSERT_NONNULL( + event.event.tryGet(), "the event should be an onset event"); + break; + } + case 1: { + KJ_EXPECT(event.span.id == 1, "the mark event should have span id 1"); + KJ_EXPECT(event.span.parent == 0, "the parent span should be the root span"); + KJ_EXPECT(event.id == id); + KJ_EXPECT(event.sequence == 1); + auto& mark = KJ_ASSERT_NONNULL(event.event.tryGet()); + KJ_EXPECT(mark.name == "bar"_kj); + break; + } + case 2: { + KJ_EXPECT(event.span.id == 1, "the spanclose should have span id 1"); + KJ_EXPECT(event.span.parent == 0, "the parent span should be the root span"); + KJ_EXPECT(event.sequence, 2); + KJ_EXPECT(event.id == id); + auto& span = KJ_ASSERT_NONNULL( + event.event.tryGet(), "the event should be a spanclose event"); + KJ_EXPECT(span.outcome == trace::SpanClose::Outcome::CANCELED); + break; + } + case 3: { + KJ_EXPECT(event.span.id == 0, "the root span should have id 0"); + KJ_EXPECT(event.span.parent == 0, "the root span should have no parent"); + KJ_EXPECT(event.id == id, "the event id should be the same as the onset id"); + KJ_EXPECT(event.sequence == 3, "the sequence should have been incremented"); + auto& outcome = KJ_ASSERT_NONNULL( + event.event.tryGet(), "the event should be an outcome event"); + KJ_EXPECT(outcome.outcome == EventOutcome::CANCELED, "the outcome should be canceled"); + break; + } + } + }, mockTimeProvider); + + streamingTrace->setEventInfo(trace::FetchEventInfo( + kj::HttpMethod::GET, kj::str("http://example.com"), kj::String(), {})); + auto span = KJ_ASSERT_NONNULL(streamingTrace->newChildSpan()); + span->addMark("bar"); + // Intentionally not calling setOutcome on the span. + streamingTrace->setOutcome(trace::Outcome(EventOutcome::CANCELED)); + + // Once the outcome is set, no more events should be emitted but calling the methods on + // the span shouldn't crash or error. + span->addMark("foo"); + } + KJ_EXPECT(callCount == 4); +} + +} // namespace +} // namespace workerd diff --git a/src/workerd/io/trace-streaming.c++ b/src/workerd/io/trace-streaming.c++ new file mode 100644 index 00000000000..9396bb33ccc --- /dev/null +++ b/src/workerd/io/trace-streaming.c++ @@ -0,0 +1,595 @@ +// Copyright (c) 2017-2022 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 +#include "trace-streaming.h" + +#include + +namespace workerd { + +// ====================================================================================== +// TailIDs +namespace { +// The UuidId implementation is really intended only for testing and local development. +// In production, it likely makes more sense to use a RayID or something that can be +// better correlated to other diagnostic and tracing mechanisms, and that can be better +// guaranteed to be sufficiently unique across the entire production environment. +class UuidId final: public StreamingTrace::IdFactory::Id { +public: + UuidId(): uuid(randomUUID(kj::none)) {} + UuidId(kj::String value): uuid(kj::mv(value)) {} + KJ_DISALLOW_COPY_AND_MOVE(UuidId); + + kj::String toString() const override { + return kj::str(uuid); + } + + bool equals(const Id& other) const override { + return uuid == other.toString(); + } + + kj::Own clone() const override { + return kj::heap(toString()); + } + +private: + kj::String uuid; +}; + +class UuidIdFactory final: public StreamingTrace::IdFactory { +public: + kj::Own newId() override { + return kj::heap(); + } +}; + +UuidIdFactory uuidIdFactory; +} // namespace + +kj::Own StreamingTrace::IdFactory::newUuidIdFactory() { + return kj::Own(&uuidIdFactory, kj::NullDisposer::instance); +} + +kj::Own StreamingTrace::IdFactory::newIdFromString( + kj::StringPtr str) { + // This is cheating a bit. We're not actually creating a UUID here but the UuidId class + // is really just a wrapper around a string so we can safely use it here. + return kj::heap(kj::str(str)); +} + +// ====================================================================================== +// StreamingTrace + +namespace { +constexpr trace::SpanClose::Outcome eventOutcomeToSpanOutcome(const EventOutcome& outcome) { + switch (outcome) { + case EventOutcome::UNKNOWN: + return trace::SpanClose::Outcome::UNKNOWN; + case EventOutcome::OK: + return trace::SpanClose::Outcome::OK; + case EventOutcome::RESPONSE_STREAM_DISCONNECTED: + [[fallthrough]]; + case EventOutcome::CANCELED: + return trace::SpanClose::Outcome::CANCELED; + case EventOutcome::LOAD_SHED: + [[fallthrough]]; + case EventOutcome::EXCEEDED_CPU: + [[fallthrough]]; + case EventOutcome::KILL_SWITCH: + [[fallthrough]]; + case EventOutcome::DAEMON_DOWN: + [[fallthrough]]; + case EventOutcome::SCRIPT_NOT_FOUND: + [[fallthrough]]; + case EventOutcome::EXCEEDED_MEMORY: + [[fallthrough]]; + case EventOutcome::EXCEPTION: + return trace::SpanClose::Outcome::EXCEPTION; + } + KJ_UNREACHABLE; +} +} // namespace + +struct StreamingTrace::Impl { + kj::Own id; + trace::Onset onsetInfo; + StreamingTrace::Delegate delegate; + const StreamingTrace::TimeProvider& timeProvider; + uint32_t spanCounter = 0; + uint32_t sequenceCounter = 0; + + Impl(kj::Own id, + trace::Onset&& onset, + StreamingTrace::Delegate delegate, + const TimeProvider& timeProvider) + : id(kj::mv(id)), + onsetInfo(kj::mv(onset)), + delegate(kj::mv(delegate)), + timeProvider(timeProvider) {} +}; + +kj::Own StreamingTrace::create(IdFactory& idFactory, + trace::Onset&& onset, + Delegate delegate, + const TimeProvider& timeProvider) { + return kj::heap(idFactory.newId(), kj::mv(onset), kj::mv(delegate), timeProvider); +} + +StreamingTrace::StreamingTrace(kj::Own id, + trace::Onset&& onset, + Delegate delegate, + const TimeProvider& timeProvider) + : impl(kj::heap( + kj::mv(id), kj::mv(onset), kj::mv(delegate), timeProvider)) {} + +StreamingTrace::~StreamingTrace() noexcept(false) { + if (impl != kj::none) { + // If the streaming tracing is dropped without having an outcome explicitly + // specified, the outcome is explicitly set to unknown. + setOutcome(trace::Outcome(EventOutcome::UNKNOWN)); + } + // Stage spans should be closed by calling setOutcome above + KJ_ASSERT(spans.empty(), "all stage spans must be closed before the trace is destroyed"); +} + +void StreamingTrace::setEventInfo(trace::EventInfo&& eventInfo) { + auto& i = KJ_ASSERT_NONNULL(impl, "the streaming trace is closed"); + KJ_ASSERT(i->onsetInfo.info == kj::none, "the onset event info can only be set once"); + i->onsetInfo.info = kj::mv(eventInfo); + StreamEvent event( + i->id->toString(), {}, i->timeProvider.getNow(), getNextSequence(), i->onsetInfo.clone()); + addStreamEvent(kj::mv(event)); +} + +void StreamingTrace::setOutcome(trace::Outcome&& outcome) { + KJ_IF_SOME(i, impl) { + // If the event info was never set on the streaming trace, setting the outcome + // is a non-op. + if (i->onsetInfo.info == kj::none) { + impl = kj::none; + return; + } + + for (auto& span: spans) { + span.setOutcome(eventOutcomeToSpanOutcome(outcome.outcome)); + } + KJ_ASSERT(spans.empty(), "all stage spans must be closed before the trace is destroyed"); + StreamEvent event( + i->id->toString(), {}, i->timeProvider.getNow(), getNextSequence(), kj::mv(outcome)); + addStreamEvent(kj::mv(event)); + + // Then close out the stream by destroying the impl + impl = kj::none; + } +} + +void StreamingTrace::addDropped(uint32_t start, uint32_t end) { + KJ_IF_SOME(i, impl) { + KJ_REQUIRE_NONNULL(i->onsetInfo.info, "the event info must be set before other events"); + StreamEvent event(i->id->toString(), {}, i->timeProvider.getNow(), getNextSequence(), + trace::Dropped{start, end}); + addStreamEvent(kj::mv(event)); + } +} + +kj::Maybe> StreamingTrace::newChildSpan(trace::Tags tags) { + KJ_IF_SOME(i, impl) { + KJ_REQUIRE_NONNULL(i->onsetInfo.info, "the event info must be set before other events"); + return kj::heap(spans, *this, 0, kj::mv(tags)); + } + return kj::none; +} + +void StreamingTrace::addLog(trace::LogV2&& log) { + KJ_IF_SOME(i, impl) { + KJ_REQUIRE_NONNULL(i->onsetInfo.info, "the event info must be set before other events"); + StreamEvent event( + i->id->toString(), {}, i->timeProvider.getNow(), getNextSequence(), kj::mv(log)); + addStreamEvent(kj::mv(event)); + } +} + +void StreamingTrace::addException(trace::Exception&& exception) { + KJ_IF_SOME(i, impl) { + KJ_REQUIRE_NONNULL(i->onsetInfo.info, "the event info must be set before other events"); + StreamEvent event( + i->id->toString(), {}, i->timeProvider.getNow(), getNextSequence(), kj::mv(exception)); + addStreamEvent(kj::mv(event)); + } +} + +void StreamingTrace::addDiagnosticChannelEvent(trace::DiagnosticChannelEvent&& dce) { + KJ_IF_SOME(i, impl) { + KJ_REQUIRE_NONNULL(i->onsetInfo.info, "the event info must be set before other events"); + StreamEvent event( + i->id->toString(), {}, i->timeProvider.getNow(), getNextSequence(), kj::mv(dce)); + addStreamEvent(kj::mv(event)); + } +} + +void StreamingTrace::addMark(kj::StringPtr mark) { + KJ_IF_SOME(i, impl) { + KJ_REQUIRE_NONNULL(i->onsetInfo.info, "the event info must be set before other events"); + StreamEvent event(i->id->toString(), {}, i->timeProvider.getNow(), getNextSequence(), + trace::Mark(kj::str(mark))); + addStreamEvent(kj::mv(event)); + } +} + +void StreamingTrace::addMetrics(trace::Metrics&& metrics) { + KJ_IF_SOME(i, impl) { + KJ_REQUIRE_NONNULL(i->onsetInfo.info, "the event info must be set before other events"); + StreamEvent event( + i->id->toString(), {}, i->timeProvider.getNow(), getNextSequence(), kj::mv(metrics)); + addStreamEvent(kj::mv(event)); + } +} + +void StreamingTrace::addSubrequest(trace::Subrequest&& subrequest) { + KJ_IF_SOME(i, impl) { + KJ_REQUIRE_NONNULL(i->onsetInfo.info, "the event info must be set before other events"); + StreamEvent event( + i->id->toString(), {}, i->timeProvider.getNow(), getNextSequence(), kj::mv(subrequest)); + addStreamEvent(kj::mv(event)); + } +} + +void StreamingTrace::addSubrequestOutcome(trace::SubrequestOutcome&& outcome) { + KJ_IF_SOME(i, impl) { + KJ_REQUIRE_NONNULL(i->onsetInfo.info, "the event info must be set before other events"); + StreamEvent event( + i->id->toString(), {}, i->timeProvider.getNow(), getNextSequence(), kj::mv(outcome)); + addStreamEvent(kj::mv(event)); + } +} + +void StreamingTrace::addCustom(trace::Tags&& tags) { + KJ_IF_SOME(i, impl) { + KJ_REQUIRE_NONNULL(i->onsetInfo.info, "the event info must be set before other events"); + StreamEvent event( + i->id->toString(), {}, i->timeProvider.getNow(), getNextSequence(), kj::mv(tags)); + addStreamEvent(kj::mv(event)); + } +} + +uint32_t StreamingTrace::getNextSpanId() { + auto& i = KJ_ASSERT_NONNULL(impl, "the streaming trace is closed"); + return ++i->spanCounter; +} + +uint32_t StreamingTrace::getNextSequence() { + auto& i = KJ_ASSERT_NONNULL(impl, "the streaming trace is closed"); + return i->sequenceCounter++; +} + +void StreamingTrace::addStreamEvent(StreamEvent&& event) { + KJ_IF_SOME(i, impl) { + i->delegate(kj::mv(event)); + } +} + +kj::Maybe StreamingTrace::getId() const { + KJ_IF_SOME(i, impl) { + return *i->id; + } + return kj::none; +} + +// ====================================================================================== + +struct StreamingTrace::Span::Impl { + StreamingTrace::Span& self; + kj::List& spans; + StreamingTrace& trace; + uint32_t id; + uint32_t parentSpan; + trace::Tags tags; + bool eventInfoSet = false; + Impl(StreamingTrace::Span& self, + kj::List& spans, + StreamingTrace& trace, + uint32_t parentSpan, + trace::Tags tags) + : self(self), + spans(spans), + trace(trace), + id(this->trace.getNextSpanId()), + parentSpan(parentSpan), + tags(kj::mv(tags)) { + spans.add(self); + } + KJ_DISALLOW_COPY_AND_MOVE(Impl); + ~Impl() { + KJ_ASSERT(self.link.isLinked()); + spans.remove(self); + } + + StreamEvent makeStreamEvent(auto payload) const { + auto& tailId = KJ_ASSERT_NONNULL(trace.getId(), "the streaming trace is closed"); + auto& traceImpl = KJ_ASSERT_NONNULL(trace.impl); + return StreamEvent(tailId.toString(), + StreamEvent::Span{ + .id = id, + .parent = parentSpan, + }, + traceImpl->timeProvider.getNow(), trace.getNextSequence(), kj::mv(payload)); + } +}; + +StreamingTrace::Span::Span(kj::List& parentSpans, + StreamingTrace& trace, + uint32_t parentSpan, + trace::Tags tags) + : impl(kj::heap(*this, parentSpans, trace, parentSpan, kj::mv(tags))) { + KJ_DASSERT(this, link.isLinked()); +} + +void StreamingTrace::Span::setOutcome(trace::SpanClose::Outcome outcome, trace::Tags tags) { + KJ_IF_SOME(i, impl) { // Then close out the stream by destroying the impl + for (auto& span: spans) { + span.setOutcome(outcome); + } + KJ_ASSERT(spans.empty(), "all child spans must be closed before the parent span is closed"); + i->trace.addStreamEvent(i->makeStreamEvent(trace::SpanClose(outcome, kj::mv(tags)))); + impl = kj::none; + } +} + +StreamingTrace::Span::~Span() noexcept(false) { + setOutcome(trace::SpanClose::Outcome::UNKNOWN); + KJ_ASSERT(spans.empty(), "all schild spans must be closed before the trace is destroyed"); +} + +void StreamingTrace::Span::addLog(trace::LogV2&& log) { + KJ_IF_SOME(i, impl) { + i->trace.addStreamEvent(i->makeStreamEvent(kj::mv(log))); + } +} + +void StreamingTrace::Span::addException(trace::Exception&& exception) { + KJ_IF_SOME(i, impl) { + i->trace.addStreamEvent(i->makeStreamEvent(kj::mv(exception))); + } +} + +void StreamingTrace::Span::addDiagnosticChannelEvent(trace::DiagnosticChannelEvent&& event) { + KJ_IF_SOME(i, impl) { + i->trace.addStreamEvent(i->makeStreamEvent(kj::mv(event))); + } +} + +void StreamingTrace::Span::addMark(kj::StringPtr mark) { + KJ_IF_SOME(i, impl) { + i->trace.addStreamEvent(i->makeStreamEvent(trace::Mark(kj::str(mark)))); + } +} + +void StreamingTrace::Span::addMetrics(trace::Metrics&& metrics) { + KJ_IF_SOME(i, impl) { + i->trace.addStreamEvent(i->makeStreamEvent(kj::mv(metrics))); + } +} + +void StreamingTrace::Span::addSubrequest(trace::Subrequest&& subrequest) { + KJ_IF_SOME(i, impl) { + i->trace.addStreamEvent(i->makeStreamEvent(kj::mv(subrequest))); + } +} + +void StreamingTrace::Span::addSubrequestOutcome(trace::SubrequestOutcome&& outcome) { + KJ_IF_SOME(i, impl) { + i->trace.addStreamEvent(i->makeStreamEvent(kj::mv(outcome))); + } +} + +void StreamingTrace::Span::addCustom(trace::Tags&& tags) { + KJ_IF_SOME(i, impl) { + i->trace.addStreamEvent(i->makeStreamEvent(kj::mv(tags))); + } +} + +kj::Maybe> StreamingTrace::Span::newChildSpan(trace::Tags tags) { + KJ_IF_SOME(i, impl) { + return kj::heap(spans, i->trace, i->id, kj::mv(tags)); + } + return kj::none; +} + +// ====================================================================================== +// StreamEvent + +namespace { +StreamEvent::Span getSpan(const rpc::Trace::StreamEvent::Reader& reader) { + auto span = reader.getSpan(); + return StreamEvent::Span{ + .id = span.getId(), + .parent = span.getParent(), + }; +} + +StreamEvent::Event getEvent(const rpc::Trace::StreamEvent::Reader& reader) { + auto event = reader.getEvent(); + switch (event.which()) { + case rpc::Trace::StreamEvent::Event::Which::ONSET: { + return trace::Onset(event.getOnset()); + } + case rpc::Trace::StreamEvent::Event::Which::OUTCOME: { + return trace::Outcome(event.getOutcome()); + } + case rpc::Trace::StreamEvent::Event::Which::DROPPED: { + return trace::Dropped(event.getDropped()); + } + case rpc::Trace::StreamEvent::Event::Which::SPAN_CLOSE: { + return trace::SpanClose(event.getSpanClose()); + } + case rpc::Trace::StreamEvent::Event::Which::LOG: { + return trace::LogV2(event.getLog()); + } + case rpc::Trace::StreamEvent::Event::Which::EXCEPTION: { + return trace::Exception(event.getException()); + } + case rpc::Trace::StreamEvent::Event::Which::DIAGNOSTIC_CHANNEL: { + return trace::DiagnosticChannelEvent(event.getDiagnosticChannel()); + } + case rpc::Trace::StreamEvent::Event::Which::MARK: { + return trace::Mark(event.getMark()); + } + case rpc::Trace::StreamEvent::Event::Which::METRICS: { + auto metrics = event.getMetrics(); + kj::Vector vec(metrics.size()); + for (size_t i = 0; i < metrics.size(); i++) { + trace::Metric metric(metrics[i]); + vec.add(kj::mv(metric)); + } + return vec.releaseAsArray(); + } + case rpc::Trace::StreamEvent::Event::Which::SUBREQUEST: { + return trace::Subrequest(event.getSubrequest()); + } + case rpc::Trace::StreamEvent::Event::Which::SUBREQUEST_OUTCOME: { + return trace::SubrequestOutcome(event.getSubrequestOutcome()); + } + case rpc::Trace::StreamEvent::Event::Which::CUSTOM: { + auto custom = event.getCustom(); + kj::Vector vec(custom.size()); + for (size_t i = 0; i < custom.size(); i++) { + trace::Tag tag(custom[i]); + vec.add(kj::mv(tag)); + } + return vec.releaseAsArray(); + } + } + KJ_UNREACHABLE; +} +} // namespace + +StreamEvent::StreamEvent( + kj::String id, Span span, kj::Date timestampNs, uint32_t sequence, Event event) + : id(kj::mv(id)), + span(kj::mv(span)), + timestampNs(timestampNs), + sequence(sequence), + event(kj::mv(event)) {} + +StreamEvent::StreamEvent(rpc::Trace::StreamEvent::Reader reader) + : id(kj::str(reader.getId())), + span(getSpan(reader)), + timestampNs(reader.getTimestampNs() * kj::MILLISECONDS + kj::UNIX_EPOCH), + sequence(reader.getSequence()), + event(getEvent(reader)) {} + +void StreamEvent::copyTo(rpc::Trace::StreamEvent::Builder builder) const { + builder.setId(id); + auto spanBuilder = builder.initSpan(); + spanBuilder.setId(span.id); + spanBuilder.setParent(span.parent); + builder.setTimestampNs((timestampNs - kj::UNIX_EPOCH) / kj::MILLISECONDS); + builder.setSequence(sequence); + + auto eventBuilder = builder.initEvent(); + KJ_SWITCH_ONEOF(event) { + KJ_CASE_ONEOF(onset, trace::Onset) { + onset.copyTo(eventBuilder.getOnset()); + } + KJ_CASE_ONEOF(outcome, trace::Outcome) { + outcome.copyTo(eventBuilder.getOutcome()); + } + KJ_CASE_ONEOF(dropped, trace::Dropped) { + dropped.copyTo(eventBuilder.getDropped()); + } + KJ_CASE_ONEOF(span, trace::SpanClose) { + span.copyTo(eventBuilder.getSpanClose()); + } + KJ_CASE_ONEOF(log, trace::LogV2) { + log.copyTo(eventBuilder.getLog()); + } + KJ_CASE_ONEOF(exception, trace::Exception) { + exception.copyTo(eventBuilder.getException()); + } + KJ_CASE_ONEOF(diagnosticChannelEvent, trace::DiagnosticChannelEvent) { + diagnosticChannelEvent.copyTo(eventBuilder.getDiagnosticChannel()); + } + KJ_CASE_ONEOF(mark, trace::Mark) { + mark.copyTo(eventBuilder.getMark()); + } + KJ_CASE_ONEOF(metrics, trace::Metrics) { + auto metricsBuilder = eventBuilder.initMetrics(metrics.size()); + for (size_t i = 0; i < metrics.size(); i++) { + metrics[i].copyTo(metricsBuilder[i]); + } + } + KJ_CASE_ONEOF(subrequest, trace::Subrequest) { + subrequest.copyTo(eventBuilder.getSubrequest()); + } + KJ_CASE_ONEOF(subrequestOutcome, trace::SubrequestOutcome) { + subrequestOutcome.copyTo(eventBuilder.getSubrequestOutcome()); + } + KJ_CASE_ONEOF(tags, trace::Tags) { + auto tagsBuilder = eventBuilder.initCustom(tags.size()); + for (size_t i = 0; i < tags.size(); i++) { + tags[i].copyTo(tagsBuilder[i]); + } + } + } +} + +StreamEvent StreamEvent::clone() const { + Span maybeNewSpan{ + .id = span.id, + .parent = span.parent, + }; + + Event newEvent = ([&]() -> Event { + KJ_SWITCH_ONEOF(event) { + KJ_CASE_ONEOF(onset, trace::Onset) { + return onset.clone(); + } + KJ_CASE_ONEOF(outcome, trace::Outcome) { + return outcome.clone(); + } + KJ_CASE_ONEOF(dropped, trace::Dropped) { + return dropped.clone(); + } + KJ_CASE_ONEOF(span, trace::SpanClose) { + return span.clone(); + } + KJ_CASE_ONEOF(log, trace::LogV2) { + return log.clone(); + } + KJ_CASE_ONEOF(exception, trace::Exception) { + return exception.clone(); + } + KJ_CASE_ONEOF(diagnosticChannelEvent, trace::DiagnosticChannelEvent) { + return diagnosticChannelEvent.clone(); + } + KJ_CASE_ONEOF(mark, trace::Mark) { + return mark.clone(); + } + KJ_CASE_ONEOF(metric, trace::Metrics) { + kj::Vector newMetrics(metric.size()); + for (auto& m: metric) { + newMetrics.add(m.clone()); + } + return newMetrics.releaseAsArray(); + } + KJ_CASE_ONEOF(subrequest, trace::Subrequest) { + return subrequest.clone(); + } + KJ_CASE_ONEOF(subrequestOutcome, trace::SubrequestOutcome) { + return subrequestOutcome.clone(); + } + KJ_CASE_ONEOF(tags, trace::Tags) { + kj::Vector newTags(tags.size()); + for (auto& tag: tags) { + newTags.add(tag.clone()); + } + return newTags.releaseAsArray(); + } + } + KJ_UNREACHABLE; + })(); + + return StreamEvent(kj::str(id), kj::mv(maybeNewSpan), timestampNs, sequence, kj::mv(newEvent)); +} + +} // namespace workerd diff --git a/src/workerd/io/trace-streaming.h b/src/workerd/io/trace-streaming.h new file mode 100644 index 00000000000..3ac8c936f40 --- /dev/null +++ b/src/workerd/io/trace-streaming.h @@ -0,0 +1,238 @@ +// Copyright (c) 2017-2022 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 +#include "trace-common.h" + +#include + +namespace workerd { + +// The Streaming Trace model is designed around the idea of spans. A span is a logical +// grouping of events. Spans can be nested and have outcomes. +// All events always occur within the context of a span. +// +// The streaming trace itself is considered the root span, whose span ID is always 0. The +// root span will always start with an Onset event that communicates basic metadata about +// the worker being traced (for instance, script ID, script version, etc) and the triggering +// event. The streaming trace always ends with an Outcome event that communicates the final +// disposition of the traced worker. +// +// Stage spans can have any number of child spans (and those spans can have child spans of +// their own). +// +// Every span always ends with a Span event that identifies the outcome of that span (which +// can be unknown, ok, canceled, or exception). +// +// Setting the outcome of a span will implicitly close all child spans with the same outcome +// if those are not already closed. If a span is dropped without setting the outcome, and the +// streaming trace is still alive, the span will be implicitly canceled. +// +// Currently the StreamingTrace implementation is not thread-safe. It is expected that +// the StreamingTrace and all Spans are used by a single-thread. + +// ====================================================================================== +// StreamEvent + +// All events on the streaming trace are StreamEvents. A StreamEvent is essentialy +// just an envelope for the actual event data. +struct StreamEvent final { + // The ID of the streaming trace session. This is used to correlate all events + // occurring within the same trace session. + kj::String id; + + struct Span { + uint32_t id = 0; + uint32_t parent = 0; + }; + // The span in which this event has occurred. + Span span; + + kj::Date timestampNs; + + // All events in the stream are sequentially ordered, regardless of what span + // they are in. This allows the exact sequence of events to be easily reconstructed + // on the receiving end. + uint32_t sequence; + + using Event = kj::OneOf; + Event event; + + explicit StreamEvent( + kj::String id, Span span, kj::Date timestampNs, uint32_t sequence, Event event); + StreamEvent(rpc::Trace::StreamEvent::Reader reader); + + void copyTo(rpc::Trace::StreamEvent::Builder builder) const; + StreamEvent clone() const; +}; + +// ====================================================================================== +// StreamingTrace + +class SpanBase: public trace::TraceBase { +public: + virtual void addLog(trace::LogV2&& log) = 0; +}; + +class StreamingTrace final: public SpanBase { +public: + // A StreamingTrace Id provides the unique identifier for a streaming tail session. + // It is used as a correlation key for all events in a single tail stream. + // There need to be some reasonable guarantees of uniqueness at a fairly + // large scale but does not necessarily need to be globally unique. Tail + // workers that are receiving and aggregating tails from multiple workers + // and metals across many colos need to have some reasonable assurance + // that there are unlikely to see collisions. The requirements for generating + // reasonably unique IDs in workerd will be different than generating the + // same in a production environment so we abstract the details and allow + // different implementations to be used. + // + // Applications should generally treat Ids as opaque strings. Every StreamEvent + // within a single tail stream will share the same Id + class IdFactory { + public: + class Id { + public: + virtual kj::String toString() const = 0; + virtual bool equals(const Id& other) const = 0; + virtual kj::Own clone() const = 0; + }; + + virtual kj::Own newId() = 0; + + // An IdFactory implementation that generates Ids that are simply + // random UUIDs. This should generally only be used in local development + // or standalone uses of workerd. + static kj::Own newUuidIdFactory(); + static kj::Own newIdFromString(kj::StringPtr str); + }; + + // The delegate is the piece that actually handles the output of the stream events + using Delegate = kj::Function; + + // The timeProvider is a function that returns the current time. This is used + // to abstract exactly how the trace gets current time. + struct TimeProvider { + virtual kj::Date getNow() const = 0; + }; + + static kj::Own create(IdFactory& idFactory, + trace::Onset&& onset, + Delegate delegate, + const TimeProvider& timeProvider); + + // The constructor is public only to support use of kj::heap. It is not intended + // to be used directly. Use the static create(...) method instead. + explicit StreamingTrace(kj::Own id, + trace::Onset&& onset, + Delegate delegate, + const TimeProvider& timeProvider); + ~StreamingTrace() noexcept(false); + KJ_DISALLOW_COPY_AND_MOVE(StreamingTrace); + + // Spans represent logical groupings of events within a tail stream. For instance, + // a span might represent a single stage in a pipeline, or nested subgroupings of + // events within a stage. + // + // Calling setOutcome(...) on the span will cause the span to be explicitly + // closed with a Span event emitted to the tail stream indicating the outcome. + // If the span is dropped without setting the outcome, and the StreamingTrace + // is still active, then a Span event indicating that the span was canceled + // will be emitted. If the StreamingTrace is not active, then dropping the + // span becomes a non-op and the consumer of the stream will need to infer + // the outcome from the absence of a Span event. + // + // Unrelated spans are permitted to overlap in time but dropping or setting + // the outcome of a parent span will implicitly close all active child spans. + // + // Setting the outcome on the StreamingTrace will implicitly close all active + // spans. + // spans and prevent any new spans from being opened. + class Span final: public SpanBase { + public: + KJ_DISALLOW_COPY_AND_MOVE(Span); + virtual ~Span() noexcept(false); + + void addLog(trace::LogV2&& log) override; + void addException(trace::Exception&& exception) override; + void addDiagnosticChannelEvent(trace::DiagnosticChannelEvent&& event) override; + void addMark(kj::StringPtr mark) override; + void addMetrics(trace::Metrics&& metrics) override; + void addSubrequest(trace::Subrequest&& subrequest) override; + void addSubrequestOutcome(trace::SubrequestOutcome&& outcome) override; + void addCustom(trace::Tags&& tags) override; + kj::Maybe> newChildSpan(trace::Tags tags = nullptr); + + // Setting the outcome of the span explicitly closes the span, after which + // no further events can be emitted. + void setOutcome(trace::SpanClose::Outcome outcome, trace::Tags tags = nullptr); + + private: + struct Impl; + + // Keep the link and spans before the impl so that they are created and destroyed + // in the correct order. + kj::ListLink link; + kj::List spans; + kj::Maybe> impl; + + friend class StageSpan; + friend class StreamingTrace; + + public: + // Public only to support use of kj::heap. Not intended to be called directly. + Span(kj::List& spans, + StreamingTrace& trace, + uint32_t parentSpan = 0, + trace::Tags tags = nullptr); + }; + + // Set the EventInfo for the Onset event. If the Onset already has event + // info, then this will assert. + void setEventInfo(trace::EventInfo&& eventInfo); + + // Explicitly close the tail stream with the given outcome. All open stage + // spans will be implicitly closed with the same outcome. If the outcome + // has already been set, this will assert. + void setOutcome(trace::Outcome&& outcome); + + // Notify the streaming trace that events in the sequence range (start:end) have been dropped. + void addDropped(uint32_t start, uint32_t end); + + kj::Maybe getId() const; + + void addLog(trace::LogV2&& log) override; + void addException(trace::Exception&& exception) override; + void addDiagnosticChannelEvent(trace::DiagnosticChannelEvent&& event) override; + void addMark(kj::StringPtr mark) override; + void addMetrics(trace::Metrics&& metrics) override; + void addSubrequest(trace::Subrequest&& subrequest) override; + void addSubrequestOutcome(trace::SubrequestOutcome&& outcome) override; + void addCustom(trace::Tags&& tags) override; + kj::Maybe> newChildSpan(trace::Tags tags = nullptr); + +private: + struct Impl; + kj::Maybe> impl; + kj::List spans; + + uint32_t getNextSpanId(); + uint32_t getNextSequence(); + void addStreamEvent(StreamEvent&& event); + + friend class Span; + friend class StageSpan; + friend struct Span::Impl; +}; + +} // namespace workerd diff --git a/src/workerd/io/trace.c++ b/src/workerd/io/trace.c++ index cd0c109caf2..ada1cd96141 100644 --- a/src/workerd/io/trace.c++ +++ b/src/workerd/io/trace.c++ @@ -5,8 +5,6 @@ #include #include -#include -#include #include #include @@ -14,493 +12,6 @@ namespace workerd { -// Approximately how much external data we allow in a trace before we start ignoring requests. We -// want this number to be big enough to be useful for tracing, but small enough to make it hard to -// DoS the C++ heap -- keeping in mind we can record a trace per handler run during a request. -static constexpr size_t MAX_TRACE_BYTES = 128 * 1024; -// Limit spans to at most 512, it could be difficult to fit e.g. 1024 spans within MAX_TRACE_BYTES -// unless most of the included spans do not include tags. If use cases arise where this amount is -// insufficient, merge smaller spans together or drop smaller spans. -static constexpr size_t MAX_USER_SPANS = 512; - -namespace { - -static kj::HttpMethod validateMethod(capnp::HttpMethod method) { - KJ_REQUIRE(method <= capnp::HttpMethod::UNSUBSCRIBE, "unknown method", method); - return static_cast(method); -} - -} // namespace - -Trace::FetchEventInfo::FetchEventInfo( - kj::HttpMethod method, kj::String url, kj::String cfJson, kj::Array
headers) - : method(method), - url(kj::mv(url)), - cfJson(kj::mv(cfJson)), - headers(kj::mv(headers)) {} - -Trace::FetchEventInfo::FetchEventInfo(rpc::Trace::FetchEventInfo::Reader reader) - : method(validateMethod(reader.getMethod())), - url(kj::str(reader.getUrl())), - cfJson(kj::str(reader.getCfJson())) { - kj::Vector
v; - v.addAll(reader.getHeaders()); - headers = v.releaseAsArray(); -} - -void Trace::FetchEventInfo::copyTo(rpc::Trace::FetchEventInfo::Builder builder) { - builder.setMethod(static_cast(method)); - builder.setUrl(url); - builder.setCfJson(cfJson); - - auto list = builder.initHeaders(headers.size()); - for (auto i: kj::indices(headers)) { - headers[i].copyTo(list[i]); - } -} - -Trace::FetchEventInfo::Header::Header(kj::String name, kj::String value) - : name(kj::mv(name)), - value(kj::mv(value)) {} - -Trace::FetchEventInfo::Header::Header(rpc::Trace::FetchEventInfo::Header::Reader reader) - : name(kj::str(reader.getName())), - value(kj::str(reader.getValue())) {} - -void Trace::FetchEventInfo::Header::copyTo(rpc::Trace::FetchEventInfo::Header::Builder builder) { - builder.setName(name); - builder.setValue(value); -} - -Trace::JsRpcEventInfo::JsRpcEventInfo(kj::String methodName): methodName(kj::mv(methodName)) {} - -Trace::JsRpcEventInfo::JsRpcEventInfo(rpc::Trace::JsRpcEventInfo::Reader reader) - : methodName(kj::str(reader.getMethodName())) {} - -void Trace::JsRpcEventInfo::copyTo(rpc::Trace::JsRpcEventInfo::Builder builder) { - builder.setMethodName(methodName); -} - -Trace::ScheduledEventInfo::ScheduledEventInfo(double scheduledTime, kj::String cron) - : scheduledTime(scheduledTime), - cron(kj::mv(cron)) {} - -Trace::ScheduledEventInfo::ScheduledEventInfo(rpc::Trace::ScheduledEventInfo::Reader reader) - : scheduledTime(reader.getScheduledTime()), - cron(kj::str(reader.getCron())) {} - -void Trace::ScheduledEventInfo::copyTo(rpc::Trace::ScheduledEventInfo::Builder builder) { - builder.setScheduledTime(scheduledTime); - builder.setCron(cron); -} - -Trace::AlarmEventInfo::AlarmEventInfo(kj::Date scheduledTime): scheduledTime(scheduledTime) {} - -Trace::AlarmEventInfo::AlarmEventInfo(rpc::Trace::AlarmEventInfo::Reader reader) - : scheduledTime(reader.getScheduledTimeMs() * kj::MILLISECONDS + kj::UNIX_EPOCH) {} - -void Trace::AlarmEventInfo::copyTo(rpc::Trace::AlarmEventInfo::Builder builder) { - builder.setScheduledTimeMs((scheduledTime - kj::UNIX_EPOCH) / kj::MILLISECONDS); -} - -Trace::QueueEventInfo::QueueEventInfo(kj::String queueName, uint32_t batchSize) - : queueName(kj::mv(queueName)), - batchSize(batchSize) {} - -Trace::QueueEventInfo::QueueEventInfo(rpc::Trace::QueueEventInfo::Reader reader) - : queueName(kj::heapString(reader.getQueueName())), - batchSize(reader.getBatchSize()) {} - -void Trace::QueueEventInfo::copyTo(rpc::Trace::QueueEventInfo::Builder builder) { - builder.setQueueName(queueName); - builder.setBatchSize(batchSize); -} - -Trace::EmailEventInfo::EmailEventInfo(kj::String mailFrom, kj::String rcptTo, uint32_t rawSize) - : mailFrom(kj::mv(mailFrom)), - rcptTo(kj::mv(rcptTo)), - rawSize(rawSize) {} - -Trace::EmailEventInfo::EmailEventInfo(rpc::Trace::EmailEventInfo::Reader reader) - : mailFrom(kj::heapString(reader.getMailFrom())), - rcptTo(kj::heapString(reader.getRcptTo())), - rawSize(reader.getRawSize()) {} - -void Trace::EmailEventInfo::copyTo(rpc::Trace::EmailEventInfo::Builder builder) { - builder.setMailFrom(mailFrom); - builder.setRcptTo(rcptTo); - builder.setRawSize(rawSize); -} - -kj::Vector getTraceItemsFromTraces( - kj::ArrayPtr> traces) { - return KJ_MAP(t, traces) -> Trace::TraceEventInfo::TraceItem { - return Trace::TraceEventInfo::TraceItem( - t->scriptName.map([](auto& scriptName) { return kj::str(scriptName); })); - }; -} - -Trace::TraceEventInfo::TraceEventInfo(kj::ArrayPtr> traces) - : traces(getTraceItemsFromTraces(traces)) {} - -kj::Vector getTraceItemsFromReader( - rpc::Trace::TraceEventInfo::Reader reader) { - return KJ_MAP(r, reader.getTraces()) -> Trace::TraceEventInfo::TraceItem { - return Trace::TraceEventInfo::TraceItem(r); - }; -} - -Trace::TraceEventInfo::TraceEventInfo(rpc::Trace::TraceEventInfo::Reader reader) - : traces(getTraceItemsFromReader(reader)) {} - -void Trace::TraceEventInfo::copyTo(rpc::Trace::TraceEventInfo::Builder builder) { - auto list = builder.initTraces(traces.size()); - for (auto i: kj::indices(traces)) { - traces[i].copyTo(list[i]); - } -} - -Trace::TraceEventInfo::TraceItem::TraceItem(kj::Maybe scriptName) - : scriptName(kj::mv(scriptName)) {} - -Trace::TraceEventInfo::TraceItem::TraceItem(rpc::Trace::TraceEventInfo::TraceItem::Reader reader) - : scriptName(kj::str(reader.getScriptName())) {} - -void Trace::TraceEventInfo::TraceItem::copyTo( - rpc::Trace::TraceEventInfo::TraceItem::Builder builder) { - KJ_IF_SOME(name, scriptName) { - builder.setScriptName(name); - } -} - -Trace::DiagnosticChannelEvent::DiagnosticChannelEvent( - kj::Date timestamp, kj::String channel, kj::Array message) - : timestamp(timestamp), - channel(kj::mv(channel)), - message(kj::mv(message)) {} - -Trace::DiagnosticChannelEvent::DiagnosticChannelEvent( - rpc::Trace::DiagnosticChannelEvent::Reader reader) - : timestamp(kj::UNIX_EPOCH + reader.getTimestampNs() * kj::NANOSECONDS), - channel(kj::heapString(reader.getChannel())), - message(kj::heapArray(reader.getMessage())) {} - -void Trace::DiagnosticChannelEvent::copyTo(rpc::Trace::DiagnosticChannelEvent::Builder builder) { - builder.setTimestampNs((timestamp - kj::UNIX_EPOCH) / kj::NANOSECONDS); - builder.setChannel(channel); - builder.setMessage(message); -} - -Trace::HibernatableWebSocketEventInfo::HibernatableWebSocketEventInfo(Type type): type(type) {} - -Trace::HibernatableWebSocketEventInfo::HibernatableWebSocketEventInfo( - rpc::Trace::HibernatableWebSocketEventInfo::Reader reader) - : type(readFrom(reader)) {} - -void Trace::HibernatableWebSocketEventInfo::copyTo( - rpc::Trace::HibernatableWebSocketEventInfo::Builder builder) { - auto typeBuilder = builder.initType(); - KJ_SWITCH_ONEOF(type) { - KJ_CASE_ONEOF(_, Message) { - typeBuilder.setMessage(); - } - KJ_CASE_ONEOF(close, Close) { - auto closeBuilder = typeBuilder.initClose(); - closeBuilder.setCode(close.code); - closeBuilder.setWasClean(close.wasClean); - } - KJ_CASE_ONEOF(_, Error) { - typeBuilder.setError(); - } - } -} - -Trace::HibernatableWebSocketEventInfo::Type Trace::HibernatableWebSocketEventInfo::readFrom( - rpc::Trace::HibernatableWebSocketEventInfo::Reader reader) { - auto type = reader.getType(); - switch (type.which()) { - case rpc::Trace::HibernatableWebSocketEventInfo::Type::MESSAGE: { - return Message{}; - } - case rpc::Trace::HibernatableWebSocketEventInfo::Type::CLOSE: { - auto close = type.getClose(); - return Close{ - .code = close.getCode(), - .wasClean = close.getWasClean(), - }; - } - case rpc::Trace::HibernatableWebSocketEventInfo::Type::ERROR: { - return Error{}; - } - } -} - -Trace::FetchResponseInfo::FetchResponseInfo(uint16_t statusCode): statusCode(statusCode) {} - -Trace::FetchResponseInfo::FetchResponseInfo(rpc::Trace::FetchResponseInfo::Reader reader) - : statusCode(reader.getStatusCode()) {} - -void Trace::FetchResponseInfo::copyTo(rpc::Trace::FetchResponseInfo::Builder builder) { - builder.setStatusCode(statusCode); -} - -Trace::Log::Log(kj::Date timestamp, LogLevel logLevel, kj::String message) - : timestamp(timestamp), - logLevel(logLevel), - message(kj::mv(message)) {} - -Trace::Exception::Exception( - kj::Date timestamp, kj::String name, kj::String message, kj::Maybe stack) - : timestamp(timestamp), - name(kj::mv(name)), - message(kj::mv(message)), - stack(kj::mv(stack)) {} - -Trace::Trace(kj::Maybe stableId, - kj::Maybe scriptName, - kj::Maybe> scriptVersion, - kj::Maybe dispatchNamespace, - kj::Maybe scriptId, - kj::Array scriptTags, - kj::Maybe entrypoint, - ExecutionModel executionModel) - : stableId(kj::mv(stableId)), - scriptName(kj::mv(scriptName)), - scriptVersion(kj::mv(scriptVersion)), - dispatchNamespace(kj::mv(dispatchNamespace)), - scriptId(kj::mv(scriptId)), - scriptTags(kj::mv(scriptTags)), - entrypoint(kj::mv(entrypoint)), - executionModel(executionModel) {} -Trace::Trace(rpc::Trace::Reader reader) { - mergeFrom(reader, PipelineLogLevel::FULL); -} - -Trace::~Trace() noexcept(false) {} - -void Trace::copyTo(rpc::Trace::Builder builder) { - { - auto list = builder.initLogs(logs.size() + spans.size()); - for (auto i: kj::indices(logs)) { - logs[i].copyTo(list[i]); - } - // Add spans represented as logs to the logs object. - for (auto i: kj::indices(spans)) { - spans[i].copyTo(list[i + logs.size()]); - } - } - - { - auto list = builder.initExceptions(exceptions.size()); - for (auto i: kj::indices(exceptions)) { - exceptions[i].copyTo(list[i]); - } - } - - builder.setTruncated(truncated); - builder.setOutcome(outcome); - builder.setCpuTime(cpuTime / kj::MILLISECONDS); - builder.setWallTime(wallTime / kj::MILLISECONDS); - KJ_IF_SOME(name, scriptName) { - builder.setScriptName(name); - } - KJ_IF_SOME(version, scriptVersion) { - builder.setScriptVersion(*version); - } - KJ_IF_SOME(id, scriptId) { - builder.setScriptId(id); - } - KJ_IF_SOME(ns, dispatchNamespace) { - builder.setDispatchNamespace(ns); - } - builder.setExecutionModel(executionModel); - - { - auto list = builder.initScriptTags(scriptTags.size()); - for (auto i: kj::indices(scriptTags)) { - list.set(i, scriptTags[i]); - } - } - - KJ_IF_SOME(e, entrypoint) { - builder.setEntrypoint(e); - } - - builder.setEventTimestampNs((eventTimestamp - kj::UNIX_EPOCH) / kj::NANOSECONDS); - - auto eventInfoBuilder = builder.initEventInfo(); - KJ_IF_SOME(e, eventInfo) { - KJ_SWITCH_ONEOF(e) { - KJ_CASE_ONEOF(fetch, FetchEventInfo) { - auto fetchBuilder = eventInfoBuilder.initFetch(); - fetch.copyTo(fetchBuilder); - } - KJ_CASE_ONEOF(jsRpc, JsRpcEventInfo) { - auto jsRpcBuilder = eventInfoBuilder.initJsRpc(); - jsRpc.copyTo(jsRpcBuilder); - } - KJ_CASE_ONEOF(scheduled, ScheduledEventInfo) { - auto scheduledBuilder = eventInfoBuilder.initScheduled(); - scheduled.copyTo(scheduledBuilder); - } - KJ_CASE_ONEOF(alarm, AlarmEventInfo) { - auto alarmBuilder = eventInfoBuilder.initAlarm(); - alarm.copyTo(alarmBuilder); - } - KJ_CASE_ONEOF(queue, QueueEventInfo) { - auto queueBuilder = eventInfoBuilder.initQueue(); - queue.copyTo(queueBuilder); - } - KJ_CASE_ONEOF(email, EmailEventInfo) { - auto emailBuilder = eventInfoBuilder.initEmail(); - email.copyTo(emailBuilder); - } - KJ_CASE_ONEOF(trace, TraceEventInfo) { - auto traceBuilder = eventInfoBuilder.initTrace(); - trace.copyTo(traceBuilder); - } - KJ_CASE_ONEOF(hibWs, HibernatableWebSocketEventInfo) { - auto hibWsBuilder = eventInfoBuilder.initHibernatableWebSocket(); - hibWs.copyTo(hibWsBuilder); - } - KJ_CASE_ONEOF(custom, CustomEventInfo) { - eventInfoBuilder.initCustom(); - } - } - } else { - eventInfoBuilder.setNone(); - } - - KJ_IF_SOME(fetchResponseInfo, this->fetchResponseInfo) { - auto fetchResponseInfoBuilder = builder.initResponse(); - fetchResponseInfo.copyTo(fetchResponseInfoBuilder); - } - - { - auto list = builder.initDiagnosticChannelEvents(diagnosticChannelEvents.size()); - for (auto i: kj::indices(diagnosticChannelEvents)) { - diagnosticChannelEvents[i].copyTo(list[i]); - } - } -} - -void Trace::Log::copyTo(rpc::Trace::Log::Builder builder) { - builder.setTimestampNs((timestamp - kj::UNIX_EPOCH) / kj::NANOSECONDS); - builder.setLogLevel(logLevel); - builder.setMessage(message); -} - -void Trace::Exception::copyTo(rpc::Trace::Exception::Builder builder) { - builder.setTimestampNs((timestamp - kj::UNIX_EPOCH) / kj::NANOSECONDS); - builder.setName(name); - builder.setMessage(message); - KJ_IF_SOME(s, stack) { - builder.setStack(s); - } -} - -void Trace::mergeFrom(rpc::Trace::Reader reader, PipelineLogLevel pipelineLogLevel) { - // Sandboxed workers currently record their traces as if the pipeline log level were set to - // "full", so we may need to filter out the extra data after receiving the traces back. - if (pipelineLogLevel != PipelineLogLevel::NONE) { - logs.addAll(reader.getLogs()); - exceptions.addAll(reader.getExceptions()); - diagnosticChannelEvents.addAll(reader.getDiagnosticChannelEvents()); - } - - truncated = reader.getTruncated(); - outcome = reader.getOutcome(); - cpuTime = reader.getCpuTime() * kj::MILLISECONDS; - wallTime = reader.getWallTime() * kj::MILLISECONDS; - - // mergeFrom() is called both when deserializing traces from a sandboxed - // worker and when deserializing traces sent to a sandboxed trace worker. In - // the former case, the trace's scriptName (and other fields like - // scriptVersion) are already set and the deserialized value is missing, so - // we need to be careful not to overwrite the set value. - if (reader.hasScriptName()) { - scriptName = kj::str(reader.getScriptName()); - } - - if (reader.hasScriptVersion()) { - scriptVersion = capnp::clone(reader.getScriptVersion()); - } - - if (reader.hasScriptId()) { - scriptId = kj::str(reader.getScriptId()); - } - - if (reader.hasDispatchNamespace()) { - dispatchNamespace = kj::str(reader.getDispatchNamespace()); - } - executionModel = reader.getExecutionModel(); - - if (auto tags = reader.getScriptTags(); tags.size() > 0) { - scriptTags = KJ_MAP(tag, tags) { return kj::str(tag); }; - } - - if (reader.hasEntrypoint()) { - entrypoint = kj::str(reader.getEntrypoint()); - } - - eventTimestamp = kj::UNIX_EPOCH + reader.getEventTimestampNs() * kj::NANOSECONDS; - - if (pipelineLogLevel == PipelineLogLevel::NONE) { - eventInfo = kj::none; - } else { - auto e = reader.getEventInfo(); - switch (e.which()) { - case rpc::Trace::EventInfo::Which::FETCH: - eventInfo = FetchEventInfo(e.getFetch()); - break; - case rpc::Trace::EventInfo::Which::JS_RPC: - eventInfo = JsRpcEventInfo(e.getJsRpc()); - break; - case rpc::Trace::EventInfo::Which::SCHEDULED: - eventInfo = ScheduledEventInfo(e.getScheduled()); - break; - case rpc::Trace::EventInfo::Which::ALARM: - eventInfo = AlarmEventInfo(e.getAlarm()); - break; - case rpc::Trace::EventInfo::Which::QUEUE: - eventInfo = QueueEventInfo(e.getQueue()); - break; - case rpc::Trace::EventInfo::Which::EMAIL: - eventInfo = EmailEventInfo(e.getEmail()); - break; - case rpc::Trace::EventInfo::Which::TRACE: - eventInfo = TraceEventInfo(e.getTrace()); - break; - case rpc::Trace::EventInfo::Which::HIBERNATABLE_WEB_SOCKET: - eventInfo = HibernatableWebSocketEventInfo(e.getHibernatableWebSocket()); - break; - case rpc::Trace::EventInfo::Which::CUSTOM: - eventInfo = CustomEventInfo(e.getCustom()); - break; - case rpc::Trace::EventInfo::Which::NONE: - eventInfo = kj::none; - break; - } - } - - if (reader.hasResponse()) { - fetchResponseInfo = FetchResponseInfo(reader.getResponse()); - } -} - -Trace::Log::Log(rpc::Trace::Log::Reader reader) - : timestamp(kj::UNIX_EPOCH + reader.getTimestampNs() * kj::NANOSECONDS), - logLevel(reader.getLogLevel()), - message(kj::str(reader.getMessage())) {} -Trace::Exception::Exception(rpc::Trace::Exception::Reader reader) - : timestamp(kj::UNIX_EPOCH + reader.getTimestampNs() * kj::NANOSECONDS), - name(kj::str(reader.getName())), - message(kj::str(reader.getMessage())) { - if (reader.hasStack()) { - stack = kj::str(reader.getStack()); - } -} - SpanBuilder& SpanBuilder::operator=(SpanBuilder&& other) { end(); observer = kj::mv(other.observer); @@ -559,16 +70,17 @@ void SpanBuilder::addLog(kj::Date timestamp, kj::ConstString key, TagValue value } PipelineTracer::~PipelineTracer() noexcept(false) { - KJ_IF_SOME(p, parentTracer) { - for (auto& t: traces) { - p->traces.add(kj::addRef(*t)); - } - } KJ_IF_SOME(f, completeFulfiller) { f.get()->fulfill(traces.releaseAsArray()); } } +void PipelineTracer::addTracesFromChild(kj::ArrayPtr> traces) { + for (auto& t: traces) { + this->traces.add(kj::addRef(*t)); + } +} + kj::Promise>> PipelineTracer::onComplete() { KJ_REQUIRE(completeFulfiller == kj::none, "onComplete() can only be called once"); @@ -586,9 +98,9 @@ kj::Own PipelineTracer::makeWorkerTracer(PipelineLogLevel pipeline kj::Maybe dispatchNamespace, kj::Array scriptTags, kj::Maybe entrypoint) { - auto trace = kj::refcounted(kj::mv(stableId), kj::mv(scriptName), kj::mv(scriptVersion), - kj::mv(dispatchNamespace), kj::mv(scriptId), kj::mv(scriptTags), kj::mv(entrypoint), - executionModel); + auto trace = kj::refcounted( + trace::Onset(kj::mv(scriptName), kj::mv(scriptVersion), kj::mv(dispatchNamespace), + kj::mv(scriptId), kj::mv(scriptTags), kj::mv(entrypoint), executionModel)); traces.add(kj::addRef(*trace)); return kj::refcounted(kj::addRef(*this), kj::mv(trace), pipelineLogLevel); } @@ -606,128 +118,48 @@ WorkerTracer::WorkerTracer( WorkerTracer::WorkerTracer(PipelineLogLevel pipelineLogLevel, ExecutionModel executionModel) : pipelineLogLevel(pipelineLogLevel), trace(kj::refcounted( - kj::none, kj::none, kj::none, kj::none, kj::none, nullptr, kj::none, executionModel)), + trace::Onset(kj::none, kj::none, kj::none, kj::none, nullptr, kj::none, executionModel))), self(kj::refcounted>(kj::Badge{}, *this)) {} void WorkerTracer::log(kj::Date timestamp, LogLevel logLevel, kj::String message, bool isSpan) { - if (trace->exceededLogLimit) { - return; - } if (pipelineLogLevel == PipelineLogLevel::NONE) { return; } - size_t newSize = trace->bytesUsed + sizeof(Trace::Log) + message.size(); - if (newSize > MAX_TRACE_BYTES) { - trace->exceededLogLimit = true; - trace->truncated = true; - // We use a JSON encoded array/string to match other console.log() recordings: - trace->logs.add(timestamp, LogLevel::WARN, - kj::str( - "[\"Log size limit exceeded: More than 128KB of data (across console.log statements, exception, request metadata and headers) was logged during a single request. Subsequent data for this request will not be recorded in logs, appear when tailing this Worker's logs, or in Tail Workers.\"]")); - return; - } - trace->bytesUsed = newSize; - if (isSpan) { - trace->spans.add(timestamp, logLevel, kj::mv(message)); - trace->numSpans++; - return; - } - trace->logs.add(timestamp, logLevel, kj::mv(message)); + trace->addLog(trace::Log(timestamp, logLevel, kj::mv(message)), isSpan); } void WorkerTracer::addSpan(const Span& span, kj::String spanContext) { - // This is where we'll actually encode the span for now. - // Drop any spans beyond MAX_USER_SPANS. - if (trace->numSpans >= MAX_USER_SPANS) { + // TODO(someday): For now, we're using logLevel == none as a hint to avoid doing anything + // expensive while tracing. We may eventually want separate configuration for exceptions vs. + // logs. + if (pipelineLogLevel == PipelineLogLevel::NONE) { return; } - if (isPredictableModeForTest()) { - // Do not emit span duration information in predictable mode. - log(span.endTime, LogLevel::LOG, kj::str("[\"span: ", span.operationName, "\"]"), true); - } else { - // Time since Unix epoch in seconds, with millisecond precision - double epochSecondsStart = (span.startTime - kj::UNIX_EPOCH) / kj::MILLISECONDS / 1000.0; - double epochSecondsEnd = (span.endTime - kj::UNIX_EPOCH) / kj::MILLISECONDS / 1000.0; - auto message = kj::str("[\"span: ", span.operationName, " ", kj::mv(spanContext), " ", - epochSecondsStart, " ", epochSecondsEnd, "\"]"); - log(span.endTime, LogLevel::LOG, kj::mv(message), true); - } - // TODO(cleanup): Create a function in kj::OneOf to automatically convert to a given type (i.e - // String) to avoid having to handle each type explicitly here. - for (const Span::TagMap::Entry& tag: span.tags) { - auto value = [&]() { - KJ_SWITCH_ONEOF(tag.value) { - KJ_CASE_ONEOF(str, kj::String) { - return kj::str(str); - } - KJ_CASE_ONEOF(val, int64_t) { - return kj::str(val); - } - KJ_CASE_ONEOF(val, double) { - return kj::str(val); - } - KJ_CASE_ONEOF(val, bool) { - return kj::str(val); - } - } - KJ_UNREACHABLE; - }(); - kj::String message = kj::str("[\"tag: "_kj, tag.key, " => "_kj, value, "\"]"); - log(span.endTime, LogLevel::LOG, kj::mv(message), true); - } + trace->addSpan(kj::mv(span), kj::mv(spanContext)); } void WorkerTracer::addException( kj::Date timestamp, kj::String name, kj::String message, kj::Maybe stack) { - if (trace->exceededExceptionLimit) { - return; - } // TODO(someday): For now, we're using logLevel == none as a hint to avoid doing anything // expensive while tracing. We may eventually want separate configuration for exceptions vs. // logs. if (pipelineLogLevel == PipelineLogLevel::NONE) { return; } - size_t newSize = trace->bytesUsed + sizeof(Trace::Exception) + name.size() + message.size(); - KJ_IF_SOME(s, stack) { - newSize += s.size(); - } - if (newSize > MAX_TRACE_BYTES) { - trace->exceededExceptionLimit = true; - trace->truncated = true; - trace->exceptions.add(timestamp, kj::str("Error"), - kj::str("Trace resource limit exceeded; subsequent exceptions not recorded."), kj::none); - return; - } - trace->bytesUsed = newSize; - trace->exceptions.add(timestamp, kj::mv(name), kj::mv(message), kj::mv(stack)); + trace->addException(trace::Exception(timestamp, kj::mv(name), kj::mv(message), kj::mv(stack))); } void WorkerTracer::addDiagnosticChannelEvent( kj::Date timestamp, kj::String channel, kj::Array message) { - if (trace->exceededDiagnosticChannelEventLimit) { - return; - } if (pipelineLogLevel == PipelineLogLevel::NONE) { return; } - size_t newSize = - trace->bytesUsed + sizeof(Trace::DiagnosticChannelEvent) + channel.size() + message.size(); - if (newSize > MAX_TRACE_BYTES) { - trace->exceededDiagnosticChannelEventLimit = true; - trace->truncated = true; - trace->diagnosticChannelEvents.add( - timestamp, kj::str("workerd.LimitExceeded"), kj::Array()); - return; - } - trace->bytesUsed = newSize; - trace->diagnosticChannelEvents.add(timestamp, kj::mv(channel), kj::mv(message)); + trace->addDiagnosticChannelEvent( + trace::DiagnosticChannelEvent(timestamp, kj::mv(channel), kj::mv(message))); } -void WorkerTracer::setEventInfo(kj::Date timestamp, Trace::EventInfo&& info) { - KJ_ASSERT(trace->eventInfo == kj::none, "tracer can only be used for a single event"); - +void WorkerTracer::setEventInfo(kj::Date timestamp, trace::EventInfo&& info) { // TODO(someday): For now, we're using logLevel == none as a hint to avoid doing anything // expensive while tracing. We may eventually want separate configuration for event info vs. // logs. @@ -737,60 +169,25 @@ void WorkerTracer::setEventInfo(kj::Date timestamp, Trace::EventInfo&& info) { return; } - trace->eventTimestamp = timestamp; - - size_t newSize = trace->bytesUsed; - KJ_SWITCH_ONEOF(info) { - KJ_CASE_ONEOF(fetch, Trace::FetchEventInfo) { - newSize += fetch.url.size(); - for (const auto& header: fetch.headers) { - newSize += header.name.size() + header.value.size(); - } - newSize += fetch.cfJson.size(); - if (newSize > MAX_TRACE_BYTES) { - trace->truncated = true; - trace->logs.add(timestamp, LogLevel::WARN, - kj::str("[\"Trace resource limit exceeded; could not capture event info.\"]")); - trace->eventInfo = Trace::FetchEventInfo(fetch.method, {}, {}, {}); - return; - } - } - KJ_CASE_ONEOF(_, Trace::JsRpcEventInfo) {} - KJ_CASE_ONEOF(_, Trace::ScheduledEventInfo) {} - KJ_CASE_ONEOF(_, Trace::AlarmEventInfo) {} - KJ_CASE_ONEOF(_, Trace::QueueEventInfo) {} - KJ_CASE_ONEOF(_, Trace::EmailEventInfo) {} - KJ_CASE_ONEOF(_, Trace::TraceEventInfo) {} - KJ_CASE_ONEOF(_, Trace::HibernatableWebSocketEventInfo) {} - KJ_CASE_ONEOF(_, Trace::CustomEventInfo) {} - } - trace->bytesUsed = newSize; - trace->eventInfo = kj::mv(info); + trace->setEventInfo(timestamp, kj::mv(info)); } -void WorkerTracer::setOutcome(EventOutcome outcome) { - trace->outcome = outcome; +void WorkerTracer::addMetrics(trace::Metrics&& metrics) { + trace->addMetrics(kj::mv(metrics)); } -void WorkerTracer::setCPUTime(kj::Duration cpuTime) { - trace->cpuTime = cpuTime; +void WorkerTracer::setOutcomeInfo(trace::Outcome&& info) { + trace->setOutcome(kj::mv(info)); } -void WorkerTracer::setWallTime(kj::Duration wallTime) { - trace->wallTime = wallTime; -} - -void WorkerTracer::setFetchResponseInfo(Trace::FetchResponseInfo&& info) { +void WorkerTracer::setFetchResponseInfo(trace::FetchResponseInfo&& info) { // Match the behavior of setEventInfo(). Any resolution of the TODO comments // in setEventInfo() that are related to this check while probably also affect // this function. if (pipelineLogLevel == PipelineLogLevel::NONE) { return; } - - KJ_REQUIRE(KJ_REQUIRE_NONNULL(trace->eventInfo).is()); - KJ_ASSERT(trace->fetchResponseInfo == kj::none, "setFetchResponseInfo can only be called once"); - trace->fetchResponseInfo = kj::mv(info); + trace->setFetchResponseInfo(kj::mv(info)); } void WorkerTracer::extractTrace(rpc::Trace::Builder builder) { diff --git a/src/workerd/io/trace.h b/src/workerd/io/trace.h index cfd07a1e0c8..f30f25e2f54 100644 --- a/src/workerd/io/trace.h +++ b/src/workerd/io/trace.h @@ -4,6 +4,8 @@ #pragma once +#include "trace-legacy.h" + #include #include #include @@ -28,306 +30,6 @@ namespace workerd { using kj::byte; using kj::uint; -typedef rpc::Trace::Log::Level LogLevel; -typedef rpc::Trace::ExecutionModel ExecutionModel; - -enum class PipelineLogLevel { - // WARNING: This must be kept in sync with PipelineDef::LogLevel (which is not in the OSS - // release). - NONE, - FULL -}; - -struct Span; - -// TODO(someday): See if we can merge similar code concepts... Trace fills a role similar to -// MetricsCollector::Reporter::StageEvent, and Tracer fills a role similar to -// MetricsCollector::Request. Currently, the major differences are: -// -// - MetricsCollector::Request uses its destructor to measure a IoContext's wall time, so -// it needs to live exactly as long as its IoContext. Tracer currently needs to live as -// long as both the IoContext and those of any subrequests. -// - Due to the difference in lifetimes, results of each become available in a different order, -// and intermediate values can be freed at different times. -// - Request builds a vector of results, while Tracer builds a tree. - -// TODO(cleanup) - worth separating into immutable Trace vs. mutable TraceBuilder? - -// Collects trace information about the handling of a worker/pipeline fetch event. -class Trace final: public kj::Refcounted { -public: - explicit Trace(kj::Maybe stableId, - kj::Maybe scriptName, - kj::Maybe> scriptVersion, - kj::Maybe dispatchNamespace, - kj::Maybe scriptId, - kj::Array scriptTags, - kj::Maybe entrypoint, - ExecutionModel executionModel); - Trace(rpc::Trace::Reader reader); - ~Trace() noexcept(false); - KJ_DISALLOW_COPY_AND_MOVE(Trace); - - class FetchEventInfo { - public: - class Header; - - explicit FetchEventInfo( - kj::HttpMethod method, kj::String url, kj::String cfJson, kj::Array
headers); - FetchEventInfo(rpc::Trace::FetchEventInfo::Reader reader); - - class Header { - public: - explicit Header(kj::String name, kj::String value); - Header(rpc::Trace::FetchEventInfo::Header::Reader reader); - - kj::String name; - kj::String value; - - void copyTo(rpc::Trace::FetchEventInfo::Header::Builder builder); - - JSG_MEMORY_INFO(Header) { - tracker.trackField("name", name); - tracker.trackField("value", value); - } - }; - - kj::HttpMethod method; - kj::String url; - // TODO(perf): It might be more efficient to store some sort of parsed JSON result instead? - kj::String cfJson; - kj::Array
headers; - - void copyTo(rpc::Trace::FetchEventInfo::Builder builder); - }; - - class JsRpcEventInfo { - public: - explicit JsRpcEventInfo(kj::String methodName); - JsRpcEventInfo(rpc::Trace::JsRpcEventInfo::Reader reader); - - kj::String methodName; - - void copyTo(rpc::Trace::JsRpcEventInfo::Builder builder); - }; - - class ScheduledEventInfo { - public: - explicit ScheduledEventInfo(double scheduledTime, kj::String cron); - ScheduledEventInfo(rpc::Trace::ScheduledEventInfo::Reader reader); - - double scheduledTime; - kj::String cron; - - void copyTo(rpc::Trace::ScheduledEventInfo::Builder builder); - }; - - class AlarmEventInfo { - public: - explicit AlarmEventInfo(kj::Date scheduledTime); - AlarmEventInfo(rpc::Trace::AlarmEventInfo::Reader reader); - - kj::Date scheduledTime; - - void copyTo(rpc::Trace::AlarmEventInfo::Builder builder); - }; - - class QueueEventInfo { - public: - explicit QueueEventInfo(kj::String queueName, uint32_t batchSize); - QueueEventInfo(rpc::Trace::QueueEventInfo::Reader reader); - - kj::String queueName; - uint32_t batchSize; - - void copyTo(rpc::Trace::QueueEventInfo::Builder builder); - }; - - class EmailEventInfo { - public: - explicit EmailEventInfo(kj::String mailFrom, kj::String rcptTo, uint32_t rawSize); - EmailEventInfo(rpc::Trace::EmailEventInfo::Reader reader); - - kj::String mailFrom; - kj::String rcptTo; - uint32_t rawSize; - - void copyTo(rpc::Trace::EmailEventInfo::Builder builder); - }; - - class TraceEventInfo { - public: - class TraceItem; - - explicit TraceEventInfo(kj::ArrayPtr> traces); - TraceEventInfo(rpc::Trace::TraceEventInfo::Reader reader); - - class TraceItem { - public: - explicit TraceItem(kj::Maybe scriptName); - TraceItem(rpc::Trace::TraceEventInfo::TraceItem::Reader reader); - - kj::Maybe scriptName; - - void copyTo(rpc::Trace::TraceEventInfo::TraceItem::Builder builder); - }; - - kj::Vector traces; - - void copyTo(rpc::Trace::TraceEventInfo::Builder builder); - }; - - class HibernatableWebSocketEventInfo { - public: - struct Message {}; - struct Close { - uint16_t code; - bool wasClean; - }; - struct Error {}; - - using Type = kj::OneOf; - - explicit HibernatableWebSocketEventInfo(Type type); - HibernatableWebSocketEventInfo(rpc::Trace::HibernatableWebSocketEventInfo::Reader reader); - - Type type; - - void copyTo(rpc::Trace::HibernatableWebSocketEventInfo::Builder builder); - static Type readFrom(rpc::Trace::HibernatableWebSocketEventInfo::Reader reader); - }; - - class CustomEventInfo { - public: - explicit CustomEventInfo() {}; - CustomEventInfo(rpc::Trace::CustomEventInfo::Reader reader) {}; - }; - - class FetchResponseInfo { - public: - explicit FetchResponseInfo(uint16_t statusCode); - FetchResponseInfo(rpc::Trace::FetchResponseInfo::Reader reader); - - uint16_t statusCode; - - void copyTo(rpc::Trace::FetchResponseInfo::Builder builder); - }; - - class DiagnosticChannelEvent { - public: - explicit DiagnosticChannelEvent( - kj::Date timestamp, kj::String channel, kj::Array message); - DiagnosticChannelEvent(rpc::Trace::DiagnosticChannelEvent::Reader reader); - DiagnosticChannelEvent(DiagnosticChannelEvent&&) = default; - KJ_DISALLOW_COPY(DiagnosticChannelEvent); - - kj::Date timestamp; - kj::String channel; - kj::Array message; - - void copyTo(rpc::Trace::DiagnosticChannelEvent::Builder builder); - }; - - class Log { - public: - explicit Log(kj::Date timestamp, LogLevel logLevel, kj::String message); - Log(rpc::Trace::Log::Reader reader); - Log(Log&&) = default; - KJ_DISALLOW_COPY(Log); - ~Log() noexcept(false) = default; - - // Only as accurate as Worker's Date.now(), for Spectre mitigation. - kj::Date timestamp; - - LogLevel logLevel; - // TODO(soon): Just string for now. Eventually, capture serialized JS objects. - kj::String message; - - void copyTo(rpc::Trace::Log::Builder builder); - }; - - class Exception { - public: - explicit Exception( - kj::Date timestamp, kj::String name, kj::String message, kj::Maybe stack); - Exception(rpc::Trace::Exception::Reader reader); - Exception(Exception&&) = default; - KJ_DISALLOW_COPY(Exception); - ~Exception() noexcept(false) = default; - - // Only as accurate as Worker's Date.now(), for Spectre mitigation. - kj::Date timestamp; - - kj::String name; - kj::String message; - - kj::Maybe stack; - - void copyTo(rpc::Trace::Exception::Builder builder); - }; - - // Empty for toplevel worker. - kj::Maybe stableId; - - // We treat the origin value as "unset". - kj::Date eventTimestamp = kj::UNIX_EPOCH; - - typedef kj::OneOf - EventInfo; - kj::Maybe eventInfo; - // TODO(someday): Support more event types. - // TODO(someday): Work out what sort of information we may want to convey about the parent - // trace, if any. - - kj::Maybe scriptName; - kj::Maybe> scriptVersion; - kj::Maybe dispatchNamespace; - kj::Maybe scriptId; - kj::Array scriptTags; - kj::Maybe entrypoint; - - kj::Vector logs; - // TODO(o11y): Convert this to actually store spans. - kj::Vector spans; - // A request's trace can have multiple exceptions due to separate request/waitUntil tasks. - kj::Vector exceptions; - - kj::Vector diagnosticChannelEvents; - - ExecutionModel executionModel; - EventOutcome outcome = EventOutcome::UNKNOWN; - - kj::Maybe fetchResponseInfo; - - kj::Duration cpuTime; - kj::Duration wallTime; - - bool truncated = false; - bool exceededLogLimit = false; - bool exceededExceptionLimit = false; - bool exceededDiagnosticChannelEventLimit = false; - // Trace data is recorded outside of the JS heap. To avoid DoS, we keep an estimate of trace - // data size, and we stop recording if too much is used. - size_t bytesUsed = 0; - size_t numSpans = 0; - - // Copy content from this trace into `builder`. - void copyTo(rpc::Trace::Builder builder); - - // Adds all content from `reader` to this `Trace`. (Typically this trace is empty before the - // call.) Also applies filtering to the trace as if it were recorded with the given - // pipelineLogLevel. - void mergeFrom(rpc::Trace::Reader reader, PipelineLogLevel pipelineLogLevel); -}; - // ======================================================================================= class WorkerTracer; @@ -338,9 +40,7 @@ class WorkerTracer; class PipelineTracer final: public kj::Refcounted { public: // Creates a pipeline tracer (with a possible parent). - explicit PipelineTracer(kj::Maybe> parentPipeline = kj::none) - : parentTracer(kj::mv(parentPipeline)) {} - + explicit PipelineTracer() = default; ~PipelineTracer() noexcept(false); KJ_DISALLOW_COPY_AND_MOVE(PipelineTracer); @@ -348,11 +48,6 @@ class PipelineTracer final: public kj::Refcounted { // exist at a time. kj::Promise>> onComplete(); - // Makes a tracer for a subpipeline. - kj::Own makePipelineSubtracer() { - return kj::refcounted(kj::addRef(*this)); - } - // Makes a tracer for a worker stage. kj::Own makeWorkerTracer(PipelineLogLevel pipelineLogLevel, ExecutionModel executionModel, @@ -368,19 +63,18 @@ class PipelineTracer final: public kj::Refcounted { // to the host where tracing was initiated. void addTrace(rpc::Trace::Reader reader); + void addTracesFromChild(kj::ArrayPtr> traces); + private: kj::Vector> traces; kj::Maybe>>>> completeFulfiller; - kj::Maybe> parentTracer; - friend class WorkerTracer; }; -// Records a worker stage's trace information into a Trace object. When all references to the -// Tracer are released, its Trace is considered complete and ready for submission. If the Trace to -// write to isn't provided (that already exists in a PipelineTracer), the trace must by extracted -// via extractTrace. +// Records a worker stage's trace information. When all references to the Tracer are released, +// its Trace is considered complete. If the Trace to write to isn't provided (that already exists +// in a PipelineTracer), the trace must by extracted via extractTrace. class WorkerTracer final: public kj::Refcounted { public: explicit WorkerTracer(kj::Own parentPipeline, @@ -392,6 +86,16 @@ class WorkerTracer final: public kj::Refcounted { } KJ_DISALLOW_COPY_AND_MOVE(WorkerTracer); + // Sets info about the event that triggered the trace. Must not be called more than once. + void setEventInfo(kj::Date timestamp, trace::EventInfo&&); + + // Add metrics to the trace. Can be called more than once. + void addMetrics(trace::Metrics&& metrics); + + // Sets info about the result of this trace. Can be called more than once, overriding the + // previous detail. + void setOutcomeInfo(trace::Outcome&& info); + // Adds log line to trace. For Spectre, timestamp should only be as accurate as JS Date.now(). // The isSpan parameter allows for logging spans, which will be emitted after regular logs. There // can be at most MAX_USER_SPANS spans in a trace. @@ -408,18 +112,9 @@ class WorkerTracer final: public kj::Refcounted { void addDiagnosticChannelEvent( kj::Date timestamp, kj::String channel, kj::Array message); - // Adds info about the event that triggered the trace. Must not be called more than once. - void setEventInfo(kj::Date timestamp, Trace::EventInfo&&); - // Adds info about the response. Must not be called more than once, and only // after passing a FetchEventInfo to setEventInfo(). - void setFetchResponseInfo(Trace::FetchResponseInfo&&); - - void setOutcome(EventOutcome outcome); - - void setCPUTime(kj::Duration cpuTime); - - void setWallTime(kj::Duration wallTime); + void setFetchResponseInfo(trace::FetchResponseInfo&&); // Used only for a Trace in a process sandbox. Copies the content of this tracer's trace to the // builder. @@ -472,42 +167,6 @@ inline kj::String truncateScriptId(kj::StringPtr id) { class SpanBuilder; class SpanObserver; -struct Span { - // Represents a trace span. `Span` objects are delivered to `SpanObserver`s for recording. To - // create a `Span`, use a `SpanBuilder`. - -public: - using TagValue = kj::OneOf; - // TODO(someday): Support binary bytes, too. - using TagMap = kj::HashMap; - using Tag = TagMap::Entry; - - struct Log { - kj::Date timestamp; - Tag tag; - }; - - kj::ConstString operationName; - kj::Date startTime; - kj::Date endTime; - TagMap tags; - kj::Vector logs; - - // We set an arbitrary (-ish) cap on log messages for safety. If we drop logs because of this, - // we report how many in a final "dropped_logs" log. - // - // At the risk of being too clever, I chose a limit that is one below a power of two so that - // we'll typically have space for one last element available for the "dropped_logs" log without - // needing to grow the vector. - static constexpr auto MAX_LOGS = 1023; - uint droppedLogs = 0; - - explicit Span(kj::ConstString operationName, kj::Date startTime) - : operationName(kj::mv(operationName)), - startTime(startTime), - endTime(startTime) {} -}; - // An opaque token which can be used to create child spans of some parent. This is typically // passed down from a caller to a callee when the caller wants to allow the callee to create // spans for itself that show up as children of the caller's span, but the caller does not diff --git a/src/workerd/io/worker-entrypoint.c++ b/src/workerd/io/worker-entrypoint.c++ index 9b3bbc93fc0..fdb74706370 100644 --- a/src/workerd/io/worker-entrypoint.c++ +++ b/src/workerd/io/worker-entrypoint.c++ @@ -254,11 +254,11 @@ kj::Promise WorkerEntrypoint::request(kj::HttpMethod method, slot.add(value); }); auto traceHeadersArray = KJ_MAP(entry, traceHeaders) { - return Trace::FetchEventInfo::Header(kj::mv(entry.key), kj::strArray(entry.value, ", ")); + return trace::FetchEventInfo::Header(kj::mv(entry.key), kj::strArray(entry.value, ", ")); }; t.setEventInfo(timestamp, - Trace::FetchEventInfo(method, kj::str(url), kj::mv(cfJson), kj::mv(traceHeadersArray))); + trace::FetchEventInfo(method, kj::str(url), kj::mv(cfJson), kj::mv(traceHeadersArray))); } auto metricsForCatch = kj::addRef(incomingRequest->getMetrics()); @@ -478,7 +478,7 @@ kj::Promise WorkerEntrypoint::runScheduled( KJ_IF_SOME(t, context.getWorkerTracer()) { double eventTime = (scheduledTime - kj::UNIX_EPOCH) / kj::MILLISECONDS; - t.setEventInfo(context.now(), Trace::ScheduledEventInfo(eventTime, kj::str(cron))); + t.setEventInfo(context.now(), trace::ScheduledEventInfo(eventTime, kj::str(cron))); } // Scheduled handlers run entirely in waitUntil() tasks. @@ -536,7 +536,7 @@ kj::Promise WorkerEntrypoint::runAlarmImpl( incomingRequest->delivered(); KJ_IF_SOME(t, incomingRequest->getWorkerTracer()) { - t.setEventInfo(context.now(), Trace::AlarmEventInfo(scheduledTime)); + t.setEventInfo(context.now(), trace::AlarmEventInfo(scheduledTime)); } auto scheduleAlarmResult = co_await actor.scheduleAlarm(scheduledTime); diff --git a/src/workerd/io/worker-interface.capnp b/src/workerd/io/worker-interface.capnp index 0875f0f4ecb..ecfe480e59d 100644 --- a/src/workerd/io/worker-interface.capnp +++ b/src/workerd/io/worker-interface.capnp @@ -32,12 +32,60 @@ struct Trace @0x8e8d911203762d34 { message @2 :Text; } + struct LogV2 { + # Streaming tail workers support an expanded version of Log that supports arbitrary + # v8 serialized data or a text message. We define this as a separate new + # struct in order to avoid any possible non-backwards compatible disruption to anything + # using the existing Log struct in the original trace worker impl. The two structs are + # virtually identical with the exception that the message field can be v8 serialized data. + timestampNs @0 :Int64; + logLevel @1 :Log.Level; + message :union { + data @2 :Data; + # When data is used, the LogV2 message is expected to be a v8 serialized value. + text @3 :Text; + # Text would be used, for instance, for simple string outputs (e.g. from things + # like console.log(...)) + } + tags @4 :List(Tag); + # Additional bits of information that are not known to workerd but may be injected + # by workerd embedders (such as the Cloudflare production environment) or structured + # logging mechanisms (so called "wide events"). + truncated @5 :Bool; + # A Log entry might be truncated if it exceeds the maximum size limit configured + # for the process. Truncation should occur before the data is serialized so it + # should always be possible to deserialize the data field successfully, regardless + # of the specific format of the data. + } + exceptions @1 :List(Exception); struct Exception { timestampNs @0 :Int64; name @1 :Text; message @2 :Text; stack @3 :Text; + + detail :group { + # Additional optional detail accompanying the exception event. + cause @4 :Exception; + # If the exception has a cause property, it is serialized here. + + errors @5 :List(Exception); + # If the exception represents an AggregateError or SupressedError, the + # errors are serialized here. + + remote @6 :Bool; + retryable @7 :Bool; + overloaded @8 :Bool; + durableObjectReset @9 :Bool; + tags @10 :List(Tag); + # Additional metadata fields that are set on some errors originating + # from the runtime. The remote, retryable, overloaded, and durableObjectReset + # fields *could* be defined as tags but those are already known to workerd + # and will be common enough to just represent those separately. If/when new + # fields are introduced that are not known to workerd, they would be added + # as tags rather than distinct fields. + } } outcome @2 :EventOutcome; @@ -114,7 +162,7 @@ struct Trace @0x8e8d911203762d34 { } } - struct CustomEventInfo {} + struct CustomEventInfo { } response @8 :FetchResponseInfo; struct FetchResponseInfo { @@ -147,6 +195,233 @@ struct Trace @0x8e8d911203762d34 { executionModel @25 :ExecutionModel; # the execution model of the worker being traced. Can be stateless for a regular worker, # durableObject for a DO worker or workflow for the upcoming Workflows feature. + + # The following structs are used only in Streaming Traces. + + struct Onset { + # The Onset struct is always sent as the first event in any trace stream. It + # contains general metadata about the pipeline that is being traced. + scriptName @0 :Text; + scriptVersion @1 :ScriptVersion; + dispatchNamespace @2 :Text; + scriptId @3 :Text; + scriptTags @4 :List(Text); + entrypoint @5 :Text; + executionModel @6 :ExecutionModel; + + info :union { + # Info structs are used at the start of a stream to identify the kind + # of trigger that started the stream. For instance, a fetch event will have + # a fetch info event at the start of the stream. + none @7 :Void; + fetch @8 :FetchEventInfo; + jsRpc @9 :JsRpcEventInfo; + scheduled @10 :ScheduledEventInfo; + alarm @11 :AlarmEventInfo; + queue @12 :QueueEventInfo; + email @13 :EmailEventInfo; + trace @14 :TraceEventInfo; + hibernatableWebSocket @15 :HibernatableWebSocketEventInfo; + custom @16 :List(Tag); + # A custom info event is used to enable arbitrary, non-typed extension + # events to be injected. It is most useful as a way of extending + # the event stream with new types of events without modifying the + # schema. This is a tradeoff. Using a custom event is more flexible + # but there's no schema to verify the data. + } + + tags @17 :List(Tag); + # Any additional arbitrary metadata that should be associated with the onset. + # These are different from the tags that appear in the StreamEvent structure + # in that those are considered unique for each event in the stream, whereas + # these are considered part of the onset metadata itself, just like any of + # the fields above. The goal is to make Onset extensible without requiring + # schema changes. + } + + struct Outcome { + # The Outcome struct is always sent as the last event in any trace stream. It + # contains the final outcome of the trace. + outcome @0 :EventOutcome; + + info :union { + none @1 :Void; + fetch @2 :FetchResponseInfo; + custom @3 :List(Tag); + } + + tags @4 :List(Tag); + # Any additional arbitrary metadata that should be associated with the outcome. + } + + struct Subrequest { + # A detail event indicating a subrequest that was made during a request. This + # can be a fetch subrequest, an RPC subrequest, a call out to a KV namespace, + # etc. + # TODO(streaming-trace): This needs to be flushed out more. + id @0 :UInt32; + # A monotonic sequence number that is unique within the tail. The id is + # used primarily to correlate with the SubrequestOutcome. + info :union { + none @1 :Void; + fetch @2 :FetchEventInfo; + jsRpc @3 :JsRpcEventInfo; + } + } + + struct SubrequestOutcome { + id @0 :UInt32; + info :union { + none @1 :Void; + fetch @2 :FetchResponseInfo; + custom @3 :List(Tag); + } + outcome @4 :SpanClose.SpanOutcome; + # A Subrequest is really a specialist kind of span, so it can have an outcome. + # just like a span. + } + + struct SpanClose { + # A SpanClose is sent only at the completion of a span. + + enum SpanOutcome { + unknown @0; + ok @1; + exception @2; + canceled @3; + } + outcome @0 :SpanOutcome; + + tags @1 :List(Tag); + # Any additional metadata specific to the span itself. + } + + struct Mark { + # A mark is a special event that simply marks a point of interest in the trace. + name @0 :Text; + } + + struct Metric { + # A metric is a special event that represents a metric value of some form injected + # into the trace. A metric can be used, for instance, to communicate the current + # isolate memory usage at a given point in time. + + key :union { + # The key field can be either arbitrary text or a numeric ID. The ID field is used + # to identify commonly used metrics defined and injected by the runtime. There is no + # enum definition for these here as it is expected the actual definitions will + # come from the internal project and not workerd. + # Metrics with the same name can appear multiple times, in which case the calculated + # logical value is the concatenation of all values into a collection. So, for + # instance, if a metric with name "foo" appears with value 1 and then again with + # value 2, the logical value of the "foo" metric is [1, 2]. The ordering of such + # metrics is significant. + # Multiple appearances of the same metric with the same key can be of different + # types and units. + text @0 :Text; + id @1 :UInt32; + } + enum Type { + counter @0; + # A counter metric, for instance, the number of times a given event has occurred, + # the number of requests processed, etc. Counters nearly aways represent a conceptually + # monotonically increasing value (conceptually because in reality the value may not + # be strictly increasing unit-by-unit due to sampling frequency, etc). + + gauge @1; + # A gauge metric, for instance, the current memory heap size, etc. Gauges represent + # a snapshot of a value at a given point in time and therefore may increase or decrease. + } + type @2 :Type; + value @3 :Float64; + tags @4 :List(Tag); + } + + struct Dropped { + # The Dropped struct is used to indicate that a trace has dropped a given number of + # events in the sequence. A Dropped events sequence number must always be greater + # than the sequence number specified by the end field. + start @0 :UInt32; + end @1 :UInt32; + } + + struct Tag { + # A Tag is an additional piece of information that can added to each event in a trace. + + key :union { + # The key field can be either arbitrary text or a numeric ID. The ID field is used + # to identify commonly used tags defined and injected by the runtime. There is no + # enum definition for these here as it is expected the actual definitions will + # come from the internal project and not workerd. + # Tags with the same key can appear multiple times, in which case the calculated + # logical value is the concatenation of all values into a collection. So, for + # instance, if a tag with key "foo" appears with value "bar" and then again with + # value "baz", the logical value of the "foo" tag is ["bar", "baz"]. The ordering + # of such tags is significant. + text @0 :Text; + id @1 :UInt32; + } + value :union { + bool @2 :Bool; + int64 @3 :Int64; + uint64 @4 :UInt64; + float64 @5 :Float64; + text @6 :Text; + data @7 :Data; + } + } + + struct StreamEvent { + id @0 :Text; + # A unique identifier used to correlate traces across multiple events + # in a single tail session. Typically this will correlate to a top-level + # pipeline or specific pipeline stage. + + span :group { + id @1 :UInt32; + parent @2 :UInt32; + } + timestampNs @3 :Int64; + sequence @4 :UInt32; + # The sequence order for this event. This is a strictly monotonically + # increasing sequence number that is unique within the tail. The onset + # event sequence number will always be 0. The purpose of the sequence + # is to make it possible to reconstruct the specific ordering of events + # in the stream. + + event :union { + onset @5 :Onset; + # When a tail stream is first created, the first event will always be + # an onset event. + + outcome @6 :Outcome; + # The final event in every successfully completed stream be will an outcome + # event. + + dropped @7 :Dropped; + # The dropped event is used to identify events that have been dropped from + # the stream. The start field indicates the sequence number of the first + # event dropped, and the end field indicates the sequence number of the + # last event dropped. + + spanClose @8 :SpanClose; + # Span events mark the ending and outcome of a span. + + log @9 :LogV2; + exception @10 :Exception; + diagnosticChannel @11 :DiagnosticChannelEvent; + mark @12 :Mark; + metrics @13 :List(Metric); + subrequest @14 :Subrequest; + subrequestOutcome @15 :SubrequestOutcome; + custom @16 :List(Tag); + # A custom event is used to enable arbitrary, non-typed extension + # events to be injected. It is most useful as a way of extending + # the event stream with new types of events without modifying the + # schema. This is a tradeoff. Using a custom event is more flexible + # but there's no schema to verify the data. + } + } } struct SendTracesRun @0xde913ebe8e1b82a5 { diff --git a/src/workerd/server/server.c++ b/src/workerd/server/server.c++ index a4cdcd10042..bdd7eb46fda 100644 --- a/src/workerd/server/server.c++ +++ b/src/workerd/server/server.c++ @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -1446,6 +1447,147 @@ void Server::InspectorServiceIsolateRegistrar::registerIsolate( // ======================================================================================= +namespace { +class RequestObserverWithTracer final: public RequestObserver, public WorkerInterface { +public: + RequestObserverWithTracer(kj::Maybe> tracer): tracer(kj::mv(tracer)) {} + ~RequestObserverWithTracer() noexcept(false) { + KJ_IF_SOME(t, tracer) { + if (fetchStatus != 0) { + t->setFetchResponseInfo(trace::FetchResponseInfo(fetchStatus)); + } + t->setOutcomeInfo(trace::Outcome(outcome)); + } + } + + WorkerInterface& wrapWorkerInterface(WorkerInterface& worker) override { + if (tracer != kj::none) { + inner = worker; + return *this; + } + return worker; + } + + void reportFailure(const kj::Exception& exception, FailureSource source) override { + outcome = EventOutcome::EXCEPTION; + } + + class ResponseObserver final: public kj::HttpService::Response { + public: + ResponseObserver(kj::HttpService::Response& response): inner(response) {} + + kj::Own send(uint statusCode, + kj::StringPtr statusText, + const kj::HttpHeaders& headers, + kj::Maybe expectedBodySize) override { + this->statusCode = statusCode; + return inner.send(statusCode, statusText, headers, expectedBodySize); + } + + kj::Own acceptWebSocket(const kj::HttpHeaders& headers) override { + return inner.acceptWebSocket(headers); + } + + uint getStatusCode() const { + return statusCode; + } + + private: + kj::HttpService::Response& inner; + uint statusCode; + }; + + // WorkerInterface + kj::Promise request(kj::HttpMethod method, + kj::StringPtr url, + const kj::HttpHeaders& headers, + kj::AsyncInputStream& requestBody, + kj::HttpService::Response& response) override { + try { + ResponseObserver responseWrapper(response); + co_await KJ_ASSERT_NONNULL(inner).request(method, url, headers, requestBody, responseWrapper); + fetchStatus = responseWrapper.getStatusCode(); + } catch (...) { + auto exception = kj::getCaughtExceptionAsKj(); + reportFailure(exception, FailureSource::OTHER); + fetchStatus = 500; + throw exception; + } + } + + kj::Promise connect(kj::StringPtr host, + const kj::HttpHeaders& headers, + kj::AsyncIoStream& connection, + ConnectResponse& response, + kj::HttpConnectSettings settings) override { + try { + co_return co_await KJ_ASSERT_NONNULL(inner).connect( + host, headers, connection, response, settings); + } catch (...) { + auto exception = kj::getCaughtExceptionAsKj(); + reportFailure(exception, FailureSource::OTHER); + throw exception; + } + } + + kj::Promise prewarm(kj::StringPtr url) override { + try { + co_return co_await KJ_ASSERT_NONNULL(inner).prewarm(url); + } catch (...) { + auto exception = kj::getCaughtExceptionAsKj(); + reportFailure(exception, FailureSource::OTHER); + throw exception; + } + } + + kj::Promise runScheduled(kj::Date scheduledTime, kj::StringPtr cron) override { + try { + co_return co_await KJ_ASSERT_NONNULL(inner).runScheduled(scheduledTime, cron); + } catch (...) { + auto exception = kj::getCaughtExceptionAsKj(); + reportFailure(exception, FailureSource::OTHER); + throw exception; + } + } + + kj::Promise runAlarm(kj::Date scheduledTime, uint32_t retryCount) override { + try { + co_return co_await KJ_ASSERT_NONNULL(inner).runAlarm(scheduledTime, retryCount); + } catch (...) { + auto exception = kj::getCaughtExceptionAsKj(); + reportFailure(exception, FailureSource::OTHER); + throw exception; + } + } + + kj::Promise test() override { + try { + co_return co_await KJ_ASSERT_NONNULL(inner).test(); + } catch (...) { + auto exception = kj::getCaughtExceptionAsKj(); + reportFailure(exception, FailureSource::OTHER); + throw exception; + } + } + + kj::Promise customEvent(kj::Own event) override { + try { + co_return co_await KJ_ASSERT_NONNULL(inner).customEvent(kj::mv(event)); + } catch (...) { + auto exception = kj::getCaughtExceptionAsKj(); + reportFailure(exception, FailureSource::OTHER); + throw exception; + } + } + +private: + kj::Maybe> tracer; + kj::Maybe inner; + EventOutcome outcome = EventOutcome::OK; + int fetchStatus = 0; +}; +} // namespace + class Server::WorkerService final: public Service, private kj::TaskSet::ErrorHandler, private IoChannelFactory, @@ -1464,6 +1606,8 @@ public: }; using LinkCallback = kj::Function; using AbortActorsCallback = kj::Function; + using LookupServiceCallback = + kj::Function; WorkerService(ThreadContext& threadContext, kj::Own worker, @@ -1471,13 +1615,17 @@ public: kj::HashMap> namedEntrypointsParam, const kj::HashMap& actorClasses, LinkCallback linkCallback, - AbortActorsCallback abortActorsCallback) + AbortActorsCallback abortActorsCallback, + kj::Array> loggingServices, + LookupServiceCallback lookupService) : threadContext(threadContext), ioChannels(kj::mv(linkCallback)), worker(kj::mv(worker)), defaultEntrypointHandlers(kj::mv(defaultEntrypointHandlers)), waitUntilTasks(*this), - abortActorsCallback(kj::mv(abortActorsCallback)) { + abortActorsCallback(kj::mv(abortActorsCallback)), + loggingServices(kj::mv(loggingServices)), + lookupService(kj::mv(lookupService)) { namedEntrypoints.reserve(namedEntrypointsParam.size()); for (auto& ep: namedEntrypointsParam) { @@ -1535,15 +1683,43 @@ public: kj::Maybe entrypointName, kj::Maybe> actor = kj::none) { TRACE_EVENT("workerd", "Server::WorkerService::startRequest()"); + + // We'll create a WorkerTracer if, and only if, there are tail workers configured. + auto maybeWorkerTracer = ([&]() -> kj::Maybe> { + if (loggingServices.size() > 0) { + auto tracer = kj::refcounted(); + + waitUntilTasks.add(dispatchTraces(tracer->onComplete(), KJ_MAP(svc, loggingServices) { + auto& service = lookupService(*svc, "resolving logging service"); + KJ_ASSERT(&service != this, "A worker currently cannot log to itself"); + return service.startRequest({}); + })); + + // The PipelineTracer is a kj::Refcounted. The call to makeWorkerTracer + // will cause the created WorkerTracer to retain a strong reference to + // the PipelineTracer so we don't need to arrange for it to stay alive. + // The instance will be dropped when the WorkerTracer is dropped, causing + // the onComplete promise to be fulfilled, allowing the collected traces + // to be dispatched. + + // TODO(streaming-trace): Make sure the execution model here accurately reflects reality. + // e.g. this might be a durable object, or eventually a workflow. + return tracer->makeWorkerTracer(PipelineLogLevel::FULL, ExecutionModel::STATELESS, kj::none, + kj::none, kj::none, kj::none, kj::none, nullptr, kj::none); + }; + + return kj::none; + })(); + return newWorkerEntrypoint(threadContext, kj::atomicAddRef(*worker), entrypointName, kj::mv(actor), kj::Own(this, kj::NullDisposer::instance), {}, // ioContextDependency kj::Own(this, kj::NullDisposer::instance), - kj::refcounted(), // default observer makes no observations + kj::refcounted( + mapAddRef(maybeWorkerTracer)), // default observer makes no observations waitUntilTasks, - true, // tunnelExceptions - kj::none, // workerTracer - kj::mv(metadata.cfBlobJson)); + true, // tunnelExceptions + kj::mv(maybeWorkerTracer), kj::mv(metadata.cfBlobJson)); } class ActorNamespace final { @@ -2031,6 +2207,16 @@ public: }; private: + kj::Promise dispatchTraces(kj::Promise>> promise, + kj::Array> tailWorkers) { + auto traces = co_await promise; + for (auto& worker: tailWorkers) { + auto event = kj::heap( + workerd::api::TraceCustomEventImpl::TYPE, mapAddRef(traces)); + co_await worker->customEvent(kj::mv(event)).ignoreResult(); + } + } + class EntrypointService final: public Service { public: EntrypointService( @@ -2064,6 +2250,8 @@ private: kj::HashMap> actorNamespaces; kj::TaskSet waitUntilTasks; AbortActorsCallback abortActorsCallback; + kj::Array> loggingServices; + LookupServiceCallback lookupService; class ActorChannelImpl final: public IoChannelFactory::ActorChannel { public: @@ -3068,9 +3256,31 @@ kj::Own Server::makeWorker(kj::StringPtr name, return result; }; + kj::Vector> loggingServices; + auto logging = conf.getLogging(); + switch (conf.getLogging().which()) { + case config::Worker::Logging::Which::NONE: + // Nothing to do here. + break; + case config::Worker::Logging::Which::TO_SERVICE: { + loggingServices.add(capnp::clone(logging.getToService())); + break; + } + case config::Worker::Logging::Which::TO_SERVICES: { + for (auto svc: logging.getToServices()) { + loggingServices.add(capnp::clone(svc)); + } + break; + } + } + return kj::heap(globalContext->threadContext, kj::mv(worker), kj::mv(errorReporter.defaultEntrypoint), kj::mv(errorReporter.namedEntrypoints), - localActorConfigs, kj::mv(linkCallback), KJ_BIND_METHOD(*this, abortAllActors)); + localActorConfigs, kj::mv(linkCallback), KJ_BIND_METHOD(*this, abortAllActors), + loggingServices.releaseAsArray(), + [this](const config::ServiceDesignator::Reader& service, kj::StringPtr context) -> Service& { + return lookupService(service, kj::str(context)); + }); } // ======================================================================================= diff --git a/src/workerd/server/workerd.capnp b/src/workerd/server/workerd.capnp index 4a2e5794735..d26cd10e29b 100644 --- a/src/workerd/server/workerd.capnp +++ b/src/workerd/server/workerd.capnp @@ -630,6 +630,11 @@ struct Worker { moduleFallback @13 :Text; + logging :union { + none @14 :Void; + toService @15 :ServiceDesignator; + toServices @16 :List(ServiceDesignator); + } } struct ExternalServer { diff --git a/src/workerd/util/autogate.c++ b/src/workerd/util/autogate.c++ index 79376ebbf1b..c5264a9fa03 100644 --- a/src/workerd/util/autogate.c++ +++ b/src/workerd/util/autogate.c++ @@ -17,6 +17,8 @@ kj::StringPtr KJ_STRINGIFY(AutogateKey key) { switch (key) { case AutogateKey::TEST_WORKERD: return "test-workerd"_kj; + case AutogateKey::STREAMING_TAIL_WORKERS: + return "streaming-tail-workers"_kj; case AutogateKey::NumOfKeys: KJ_FAIL_ASSERT("NumOfKeys should not be used in getName"); } diff --git a/src/workerd/util/autogate.h b/src/workerd/util/autogate.h index fe934116c10..a97be15628c 100644 --- a/src/workerd/util/autogate.h +++ b/src/workerd/util/autogate.h @@ -14,6 +14,7 @@ namespace workerd::util { // Workerd-specific list of autogate keys (can also be used in internal repo). enum class AutogateKey { TEST_WORKERD, + STREAMING_TAIL_WORKERS, NumOfKeys // Reserved for iteration. };