Skip to content

Commit

Permalink
Merge pull request #479 from pythonspeed/474-recent-versions-likely-h…
Browse files Browse the repository at this point in the history
…ave-source-code-less-jupyter-tracebacks

Recent versions likely have source-code-less Jupyter traceback
  • Loading branch information
itamarst authored Jan 20, 2023
2 parents 0d58901 + 39b700c commit 7a51d82
Show file tree
Hide file tree
Showing 12 changed files with 478 additions and 310 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Release notes

## 2023.1.0 (2023-1-20)

### Bugfixes

- Fix regression where source code would sometimes be missing from flamegraphs, most notably in Jupyter profiling. ([#474](https://github.com/pythonspeed/filprofiler/issues/474))

## 2022.11.0 (2022-11-07)

### Features
Expand Down
65 changes: 57 additions & 8 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion filpreload/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ path = "../memapi"
features = []

[dependencies.pyo3]
version = "0.17"
version = "0.18"
default-features = false

[build-dependencies]
Expand Down
79 changes: 71 additions & 8 deletions filpreload/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@
use parking_lot::Mutex;
use pymemprofile_api::memorytracking::LineNumberInfo::LineNumber;
use pymemprofile_api::memorytracking::{
AllocationTracker, CallSiteId, Callstack, FunctionId, VecFunctionLocations, PARENT_PROCESS,
AllocationTracker, CallSiteId, Callstack, FunctionId, IdentityCleaner, VecFunctionLocations,
PARENT_PROCESS,
};
use pymemprofile_api::oom::{InfiniteMemory, OutOfMemoryEstimator, RealMemoryInfo};
use std::cell::RefCell;
use std::ffi::CStr;
use std::os::raw::{c_char, c_int, c_void};
use std::path::Path;

#[macro_use]
extern crate lazy_static;
Expand Down Expand Up @@ -87,6 +89,7 @@ fn set_current_callstack(callstack: &Callstack) {
}

extern "C" {
fn _exit(exit_code: std::os::raw::c_int);
fn free(address: *mut c_void);
}

Expand Down Expand Up @@ -152,7 +155,23 @@ fn add_allocation(

if oom {
// Uh-oh, we're out of memory.
allocations.oom_dump();
eprintln!(
"=fil-profile= We'll try to dump out SVGs. Note that no HTML file will be written."
);
let default_path = allocations.default_path.clone();
// Release the lock, since dumping the flamegraph will reacquire it:
drop(tracker_state);

dump_to_flamegraph(
&default_path,
false,
"out-of-memory",
"Current allocations at out-of-memory time",
false,
);
unsafe {
_exit(53);
}
};
Ok(())
}
Expand Down Expand Up @@ -180,11 +199,55 @@ fn reset(default_path: String) {
tracker_state.allocations.reset(default_path);
}

fn dump_to_flamegraph(
path: &str,
peak: bool,
base_filename: &str,
title: &str,
to_be_post_processed: bool,
) {
// In order to render the flamegraph, we want to load source code using
// Python's linecache. That means calling into Python, which might release
// the GIL, allowing another thread to run, and it will try to allocation
// and hit the TRACKER_STATE mutex. And now we're deadlocked. So we make
// sure flamegraph rendering does not require TRACKER_STATE to be locked.
let (allocated_bytes, flamegraph_callstacks) = {
let mut tracker_state = TRACKER_STATE.lock();
let allocations = &mut tracker_state.allocations;

// Print warning if we're missing allocations.
allocations.warn_on_problems(peak);
let allocated_bytes = if peak {
allocations.get_peak_allocated_bytes()
} else {
allocations.get_current_allocated_bytes()
};
let flamegraph_callstacks = allocations.combine_callstacks(peak, IdentityCleaner);
(allocated_bytes, flamegraph_callstacks)
};

eprintln!("=fil-profile= Preparing to write to {}", path);
let directory_path = Path::new(path);

let title = format!(
"{} ({:.1} MiB)",
title,
allocated_bytes as f64 / (1024.0 * 1024.0)
);
let subtitle = r#"Made with the Fil profiler. <a href="https://pythonspeed.com/fil/" style="text-decoration: underline;" target="_parent">Try it on your code!</a>"#;
flamegraph_callstacks.write_flamegraphs(
directory_path,
base_filename,
&title,
subtitle,
"bytes",
to_be_post_processed,
)
}

/// Dump all callstacks in peak memory usage to format used by flamegraph.
fn dump_peak_to_flamegraph(path: &str) {
let mut tracker_state = TRACKER_STATE.lock();
let allocations = &mut tracker_state.allocations;
allocations.dump_peak_to_flamegraph(path);
dump_to_flamegraph(path, true, "peak-memory", "Peak Tracked Memory Usage", true);
}

#[no_mangle]
Expand Down Expand Up @@ -318,8 +381,8 @@ extern "C" {
fn is_initialized() -> c_int;

// Increment/decrement reentrancy counter.
fn fil_increment_reentrancy();
fn fil_decrement_reentrancy();
//fn fil_increment_reentrancy();
//fn fil_decrement_reentrancy();
}

struct FilMmapAPI;
Expand All @@ -337,7 +400,7 @@ impl pymemprofile_api::mmap::MmapAPI for FilMmapAPI {
}

fn is_initialized(&self) -> bool {
return unsafe { is_initialized() == 1 };
unsafe { is_initialized() == 1 }
}
}

Expand Down
17 changes: 7 additions & 10 deletions filprofiler/_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,19 +76,16 @@ def render_report(output_path: str, now: datetime) -> str:
<h2>Profiling result</h2>
<div style="text-align: center;"><p><input type="button" onclick="fullScreen('#peak');" value="Full screen"> · <a href="peak-memory.svg" target="_blank"><button>Open in new window</button></a></p>
<iframe id="peak" src="peak-memory.svg" width="100%" height="400" scrolling="auto" frameborder="0"></iframe>
<iframe id="peak" src="peak-memory.svg" width="100%" height="700" scrolling="auto" frameborder="0"></iframe>
</div>
<br>
<blockquote class="center">
<p style="text-align: center;"><em>Check out my other project:</em></p>
<h3>Find memory and performance bottlenecks in production!</h3>
<p>When your data pipeline is too slow in production, reproducing the problem
on your laptop is hard or impossible—which means identifying and fixing the problem can be tricky.</p>
<p>What if you knew the cause of the problem as soon as you realized it was happening?</p>
<p>That's how
<strong><a href="https://sciagraph.com/">the Sciagraph profiler</a></strong> can help you:
it's designed to <strong>find performance
and memory bottlenecks by continuously profiling in production.</strong></p></blockquote>
<h3>Find performance bottlenecks in your data processing jobs with the Sciagraph profiler</h3>
<p><strong><a href="https://sciagraph.com/">The Sciagraph profiler</a></strong> can help you
<strong>find performance
and memory bottlenecks with low overhead, so you can use it in both development and production.</strong></p>
<p>Unlike Fil, it includes performance profiling. Sciagraph's memory profiling uses sampling so it runs faster than Fil, but unlike Fil
it can't accurately profile small allocations or run natively on macOS.</p></blockquote>
<br>
<br>
<div style="text-align: center;"><p><input type="button" onclick="fullScreen('#peak-reversed');" value="Full screen"> ·
Expand Down
10 changes: 5 additions & 5 deletions memapi/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
name = "pymemprofile_api"
version = "0.1.0"
authors = ["Itamar Turner-Trauring <[email protected]>"]
edition = "2018"
edition = "2021"
license = "Apache-2.0"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
Expand All @@ -18,6 +18,7 @@ once_cell = "1.17"
libloading = "0.7"
libc = "0.2"
serde = {version = "1", features = ["derive"] }
parking_lot = "0.12.1"

[dependencies.inferno]
version = "0.11"
Expand All @@ -29,9 +30,7 @@ default-features = false
features = ["memory", "process"]

[dependencies.pyo3]
version = "0.17"
default-features = false
features = []
version = "0.18"

[target.'cfg(target_os = "linux")'.dependencies]
cgroups-rs = "0.3.0"
Expand All @@ -40,8 +39,9 @@ cgroups-rs = "0.3.0"
proptest = "1.0"
proc-maps = "0.3.0"
tempfile = "3.3.0"
rusty-fork = "0.3.0"

[features]
default = []
# Optimize for the production version of Fil.
fil4prod = []
fil4prod = []
Loading

0 comments on commit 7a51d82

Please sign in to comment.