From a987853d3e7e2bcc0099be5ab1bc23c8a7dac2a8 Mon Sep 17 00:00:00 2001 From: Florian Engelhardt Date: Thu, 7 Nov 2024 09:18:38 +0100 Subject: [PATCH] feat(profiling): Add fatals and thread start/end events to timeline (#2820) --- profiling/build.rs | 32 ++++++++- profiling/src/bindings/mod.rs | 13 ++++ profiling/src/lib.rs | 29 ++++++++ profiling/src/php_ffi.h | 6 ++ profiling/src/profiling/mod.rs | 74 +++++++++++++++++++++ profiling/src/timeline.rs | 117 +++++++++++++++++++++++++++++++++ 6 files changed, 268 insertions(+), 3 deletions(-) diff --git a/profiling/build.rs b/profiling/build.rs index b5b35294ea..a87476a460 100644 --- a/profiling/build.rs +++ b/profiling/build.rs @@ -30,8 +30,9 @@ fn main() { let fibers = cfg_fibers(vernum); let run_time_cache = cfg_run_time_cache(vernum); let trigger_time_sample = cfg_trigger_time_sample(); + let zend_error_observer = cfg_zend_error_observer(vernum); - generate_bindings(php_config_includes, fibers); + generate_bindings(php_config_includes, fibers, zend_error_observer); build_zend_php_ffis( php_config_includes, post_startup_cb, @@ -39,6 +40,7 @@ fn main() { run_time_cache, fibers, trigger_time_sample, + zend_error_observer, vernum, ); @@ -83,6 +85,7 @@ const ZAI_H_FILES: &[&str] = &[ "../zend_abstract_interface/json/json.h", ]; +#[allow(clippy::too_many_arguments)] fn build_zend_php_ffis( php_config_includes: &str, post_startup_cb: bool, @@ -90,6 +93,7 @@ fn build_zend_php_ffis( run_time_cache: bool, fibers: bool, trigger_time_sample: bool, + zend_error_observer: bool, vernum: u64, ) { println!("cargo:rerun-if-changed=src/php_ffi.h"); @@ -135,6 +139,7 @@ fn build_zend_php_ffis( let fibers = if fibers { "1" } else { "0" }; let run_time_cache = if run_time_cache { "1" } else { "0" }; let trigger_time_sample = if trigger_time_sample { "1" } else { "0" }; + let zend_error_observer = if zend_error_observer { "1" } else { "0" }; #[cfg(feature = "stack_walking_tests")] let stack_walking_tests = "1"; @@ -150,6 +155,7 @@ fn build_zend_php_ffis( .define("CFG_RUN_TIME_CACHE", run_time_cache) .define("CFG_STACK_WALKING_TESTS", stack_walking_tests) .define("CFG_TRIGGER_TIME_SAMPLE", trigger_time_sample) + .define("CFG_ZEND_ERROR_OBSERVER", zend_error_observer) .includes([Path::new("../ext")]) .includes( str::replace(php_config_includes, "-I", "") @@ -190,7 +196,7 @@ impl bindgen::callbacks::ParseCallbacks for IgnoreMacros { } } -fn generate_bindings(php_config_includes: &str, fibers: bool) { +fn generate_bindings(php_config_includes: &str, fibers: bool, zend_error_observer: bool) { println!("cargo:rerun-if-changed=src/php_ffi.h"); println!("cargo:rerun-if-changed=../ext/handlers_api.h"); let ignored_macros = IgnoreMacros( @@ -206,12 +212,20 @@ fn generate_bindings(php_config_includes: &str, fibers: bool) { .collect(), ); - let clang_args = if fibers { + let mut clang_args = if fibers { vec!["-D", "CFG_FIBERS=1"] } else { vec!["-D", "CFG_FIBERS=0"] }; + if zend_error_observer { + clang_args.push("-D"); + clang_args.push("CFG_ZEND_ERROR_OBSERVER=1"); + } else { + clang_args.push("-D"); + clang_args.push("CFG_ZEND_ERROR_OBSERVER=0"); + } + let bindings = bindgen::Builder::default() .ctypes_prefix("libc") .clang_args(clang_args) @@ -296,6 +310,18 @@ fn cfg_trigger_time_sample() -> bool { env::var("CARGO_FEATURE_TRIGGER_TIME_SAMPLE").is_ok() } +fn cfg_zend_error_observer(vernum: u64) -> bool { + if vernum >= 80000 { + println!("cargo:rustc-cfg=zend_error_observer"); + if vernum < 80100 { + println!("cargo:rustc-cfg=zend_error_observer_80"); + } + true + } else { + false + } +} + fn cfg_php_major_version(vernum: u64) { let major_version = match vernum { 70000..=79999 => 7, diff --git a/profiling/src/bindings/mod.rs b/profiling/src/bindings/mod.rs index b4b9fe0814..b1734cc87c 100644 --- a/profiling/src/bindings/mod.rs +++ b/profiling/src/bindings/mod.rs @@ -169,12 +169,25 @@ pub struct ModuleEntry { Option ZendResult>, pub info_func: Option, pub version: *const u8, + /// Size of the module globals in bytes. In ZTS this will be the size TSRM will allocate per + /// thread for module globals. The function pointers in [`ModuleEntry::globals_ctor`] and + /// [`ModuleEntry::globals_dtor`] will only be called if this is a non-zero. pub globals_size: size_t, #[cfg(php_zts)] + /// Pointer to a `ts_rsrc_id` (which is a [`i32`]). For C-Extension this is created using the + /// `ZEND_DECLARE_MODULE_GLOBALS(module_name)` macro. + /// See pub globals_id_ptr: *mut ts_rsrc_id, #[cfg(not(php_zts))] + /// Pointer to the module globals struct in NTS mode pub globals_ptr: *mut c_void, + /// Constructor for module globals. + /// Be aware this will only be called in case [`ModuleEntry::globals_size`] is non-zero and for + /// ZTS you need to make sure [`ModuleEntry::globals_id_ptr`] is a valid, non-null pointer. pub globals_ctor: Option, + /// Destructor for module globals. + /// Be aware this will only be called in case [`ModuleEntry::globals_size`] is non-zero and for + /// ZTS you need to make sure [`ModuleEntry::globals_id_ptr`] is a valid, non-null pointer. pub globals_dtor: Option, pub post_deactivate_func: Option ZendResult>, pub module_started: c_int, diff --git a/profiling/src/lib.rs b/profiling/src/lib.rs index 02cb66a8e8..d3232c2898 100644 --- a/profiling/src/lib.rs +++ b/profiling/src/lib.rs @@ -31,6 +31,8 @@ use core::ptr; use ddcommon::{cstr, tag, tag::Tag}; use lazy_static::lazy_static; use libc::c_char; +#[cfg(php_zts)] +use libc::c_void; use log::{debug, error, info, trace, warn}; use once_cell::sync::{Lazy, OnceCell}; use profiling::{LocalRootSpanResourceMessage, Profiler, VmInterrupt}; @@ -128,6 +130,13 @@ extern "C" { pub static ddtrace_runtime_id: *const Uuid; } +/// We do not have any globals, but we need TSRM to call into GINIT and GSHUTDOWN to observe +/// spawning and joining threads. This will be pointed to by the [`ModuleEntry::globals_id_ptr`] in +/// the `zend_module_entry` and the TSRM will store it's thread-safe-resource id here. +/// See: +#[cfg(php_zts)] +static mut GLOBALS_ID_PTR: i32 = 0; + /// The function `get_module` is what makes this a PHP module. Please do not /// call this directly; only let it be called by the engine. Generally it is /// only called once, but if someone accidentally loads the module twice then @@ -160,6 +169,14 @@ pub extern "C" fn get_module() -> &'static mut zend::ModuleEntry { version: PROFILER_VERSION.as_ptr(), post_deactivate_func: Some(prshutdown), deps: DEPS.as_ptr(), + #[cfg(php_zts)] + globals_ctor: Some(ginit), + #[cfg(php_zts)] + globals_dtor: Some(gshutdown), + #[cfg(php_zts)] + globals_size: 1, + #[cfg(php_zts)] + globals_id_ptr: unsafe { &mut GLOBALS_ID_PTR }, ..Default::default() }); @@ -167,6 +184,18 @@ pub extern "C" fn get_module() -> &'static mut zend::ModuleEntry { unsafe { &mut *ptr::addr_of_mut!(MODULE) } } +#[cfg(php_zts)] +unsafe extern "C" fn ginit(_globals_ptr: *mut c_void) { + #[cfg(all(feature = "timeline", php_zts))] + timeline::timeline_ginit(); +} + +#[cfg(php_zts)] +unsafe extern "C" fn gshutdown(_globals_ptr: *mut c_void) { + #[cfg(all(feature = "timeline", php_zts))] + timeline::timeline_gshutdown(); +} + /* Important note on the PHP lifecycle: * Based on how some SAPIs work and the documentation, one might expect that * MINIT is called once per process, but this is only sort-of true. Some SAPIs diff --git a/profiling/src/php_ffi.h b/profiling/src/php_ffi.h index d2c1123e9b..15de64ec0a 100644 --- a/profiling/src/php_ffi.h +++ b/profiling/src/php_ffi.h @@ -16,6 +16,12 @@ #include +// Needed for `zend_observer_error_register` starting from PHP 8 +#if CFG_ZEND_ERROR_OBSERVER // defined by build.rs +#include +#include +#endif + // Profiling needs ZAI config for INI support. #include // And json to cleanup json state for graceful restart diff --git a/profiling/src/profiling/mod.rs b/profiling/src/profiling/mod.rs index f98fee3484..9b59d4fd9c 100644 --- a/profiling/src/profiling/mod.rs +++ b/profiling/src/profiling/mod.rs @@ -976,6 +976,80 @@ impl Profiler { } } + /// This function will collect a thread start or stop timeline event + #[cfg(all(feature = "timeline", php_zts))] + pub fn collect_thread_start_end(&self, now: i64, event: &'static str) { + let mut labels = Profiler::common_labels(1); + + labels.push(Label { + key: "event", + value: LabelValue::Str(std::borrow::Cow::Borrowed(event)), + }); + + let n_labels = labels.len(); + + match self.prepare_and_send_message( + vec![ZendFrame { + function: format!("[{event}]").into(), + file: None, + line: 0, + }], + SampleValues { + timeline: 1, + ..Default::default() + }, + labels, + now, + ) { + Ok(_) => { + trace!("Sent event '{event}' with {n_labels} labels to profiler.") + } + Err(err) => { + warn!("Failed to send event '{event}' with {n_labels} labels to profiler: {err}") + } + } + } + + /// This function can be called to collect any fatal errors + #[cfg(feature = "timeline")] + pub fn collect_fatal(&self, now: i64, file: String, line: u32, message: String) { + let mut labels = Profiler::common_labels(2); + + labels.push(Label { + key: "event", + value: LabelValue::Str("fatal".into()), + }); + labels.push(Label { + key: "message", + value: LabelValue::Str(message.into()), + }); + + let n_labels = labels.len(); + + match self.prepare_and_send_message( + vec![ZendFrame { + function: "[fatal]".into(), + file: Some(file), + line, + }], + SampleValues { + timeline: 1, + ..Default::default() + }, + labels, + now, + ) { + Ok(_) => { + trace!("Sent event 'fatal error' with {n_labels} labels to profiler.") + } + Err(err) => { + warn!( + "Failed to send event 'fatal error' with {n_labels} labels to profiler: {err}" + ) + } + } + } + /// This function can be called to collect any kind of inactivity that is happening #[cfg(feature = "timeline")] pub fn collect_idle(&self, now: i64, duration: i64, reason: &'static str) { diff --git a/profiling/src/timeline.rs b/profiling/src/timeline.rs index 596c887a14..ab69528ca1 100644 --- a/profiling/src/timeline.rs +++ b/profiling/src/timeline.rs @@ -6,6 +6,8 @@ use crate::zend::{ use crate::REQUEST_LOCALS; use libc::c_char; use log::{error, trace}; +#[cfg(php_zts)] +use std::cell::Cell; use std::cell::RefCell; use std::ffi::CStr; use std::ptr; @@ -30,11 +32,17 @@ static mut FRANKENPHP_HANDLE_REQUEST_HANDLER: InternalFunctionHandler = None; thread_local! { static IDLE_SINCE: RefCell = RefCell::new(Instant::now()); + #[cfg(php_zts)] + static IS_NEW_THREAD: Cell = const { Cell::new(false) }; } enum State { Idle, Sleeping, + #[cfg(php_zts)] + ThreadStart, + #[cfg(php_zts)] + ThreadStop, } impl State { @@ -42,6 +50,10 @@ impl State { match self { State::Idle => "idle", State::Sleeping => "sleeping", + #[cfg(php_zts)] + State::ThreadStart => "thread start", + #[cfg(php_zts)] + State::ThreadStop => "thread stop", } } } @@ -144,9 +156,48 @@ unsafe extern "C" fn ddog_php_prof_frankenphp_handle_request( } } +/// Will be called by the ZendEngine on all errors happening. This is a PHP 8 API +#[cfg(zend_error_observer)] +unsafe extern "C" fn ddog_php_prof_zend_error_observer( + _type: i32, + #[cfg(zend_error_observer_80)] file: *const c_char, + #[cfg(not(zend_error_observer_80))] file: *mut zend::ZendString, + line: u32, + message: *mut zend::ZendString, +) { + // we are only interested in FATAL errors + + if _type & zend::E_FATAL_ERRORS as i32 == 0 { + return; + } + + #[cfg(zend_error_observer_80)] + let file = unsafe { + let mut len = 0; + let file = file as *const u8; + while *file.add(len) != 0 { + len += 1; + } + std::str::from_utf8_unchecked(std::slice::from_raw_parts(file, len)).to_string() + }; + #[cfg(not(zend_error_observer_80))] + let file = unsafe { zend::zai_str_from_zstr(file.as_mut()).into_string() }; + + let now = SystemTime::now().duration_since(UNIX_EPOCH).unwrap(); + if let Some(profiler) = Profiler::get() { + let now = now.as_nanos() as i64; + profiler.collect_fatal(now, file, line, unsafe { + zend::zai_str_from_zstr(message.as_mut()).into_string() + }); + } +} + /// This functions needs to be called in MINIT of the module pub fn timeline_minit() { unsafe { + #[cfg(zend_error_observer)] + zend::zend_observer_error_register(Some(ddog_php_prof_zend_error_observer)); + // register our function in the `gc_collect_cycles` pointer PREV_GC_COLLECT_CYCLES = zend::gc_collect_cycles; zend::gc_collect_cycles = Some(ddog_php_prof_gc_collect_cycles); @@ -231,6 +282,35 @@ pub unsafe fn timeline_rinit() { } }); }); + + #[cfg(php_zts)] + IS_NEW_THREAD.with(|cell| { + if !cell.get() { + return; + } + cell.set(false); + REQUEST_LOCALS.with(|cell| { + let is_timeline_enabled = cell + .try_borrow() + .map(|locals| locals.system_settings().profiling_timeline_enabled) + .unwrap_or(false); + + if !is_timeline_enabled { + return; + } + + if let Some(profiler) = Profiler::get() { + profiler.collect_thread_start_end( + // Safety: checked for `is_err()` above + SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_nanos() as i64, + State::ThreadStart.as_str(), + ); + } + }); + }); } /// This function is run during the P-RSHUTDOWN phase and resets the `IDLE_SINCE` thread local to @@ -290,6 +370,43 @@ pub(crate) unsafe fn timeline_mshutdown() { } }); }); + #[cfg(php_zts)] + timeline_gshutdown(); +} + +#[cfg(php_zts)] +pub(crate) fn timeline_ginit() { + // During GINIT in "this" thread, the request locals are not initialized, which happens in + // RINIT, so we currently do not know if profile is enabled at all and if, if timeline is + // enabled. That's why we raise this flag here and read it in RINIT. + IS_NEW_THREAD.with(|cell| { + cell.set(true); + }); +} + +#[cfg(php_zts)] +pub(crate) fn timeline_gshutdown() { + REQUEST_LOCALS.with(|cell| { + let is_timeline_enabled = cell + .try_borrow() + .map(|locals| locals.system_settings().profiling_timeline_enabled) + .unwrap_or(false); + + if !is_timeline_enabled { + return; + } + + if let Some(profiler) = Profiler::get() { + profiler.collect_thread_start_end( + // Safety: checked for `is_err()` above + SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_nanos() as i64, + State::ThreadStop.as_str(), + ); + } + }); } /// This function gets called when a `eval()` is being called. This is done by letting the