diff --git a/docs/man.md b/docs/man.md index 7e1ebd1..fb9f438 100644 --- a/docs/man.md +++ b/docs/man.md @@ -79,11 +79,6 @@ is not specified. The following options are less commonly used. -**-T**, **--timestamp** -: Print the timestamp for each stack. This is useful for generating "flame - chart" profiles. Generally regular flame graphs are encouraged, since the - timestamp flame charts are harder to use. - **--abi**=*VERSION* : Force a particular Python ABI. This option should only be needed in edge cases when profiling embedded Python builds (e.g. uWSGI), and only if @@ -91,6 +86,10 @@ The following options are less commonly used. two digit integer consisting of the Python major and minor version, e.g. 27 for Python 2.7 or 36 for Python 3.6. +**--flamechart** +: Print the timestamp for each stack. This is useful for generating "flame + chart" profiles. Generally regular flame graphs are encouraged, since the + timestamp flame charts are harder to use. # ONLINE DOCUMENTATION diff --git a/docs/pyflame.man b/docs/pyflame.man index fe66c65..27e8aa2 100644 --- a/docs/pyflame.man +++ b/docs/pyflame.man @@ -1,6 +1,6 @@ .\" Automatically generated by Pandoc 1.19.1 .\" -.TH "PYFLAME" "1" "July 2017" "" "" +.TH "PYFLAME" "1" "August 2017" "" "" .hy .SH NAME .PP @@ -106,14 +106,6 @@ Enable profiling multi\-threaded Python apps. .PP The following options are less commonly used. .TP -.B \f[B]\-T\f[], \f[B]\-\-timestamp\f[] -Print the timestamp for each stack. -This is useful for generating "flame chart" profiles. -Generally regular flame graphs are encouraged, since the timestamp flame -charts are harder to use. -.RS -.RE -.TP .B \f[B]\-\-abi\f[]=\f[I]VERSION\f[] Force a particular Python ABI. This option should only be needed in edge cases when profiling embedded @@ -125,6 +117,14 @@ major and minor version, e.g. 27 for Python 2.7 or 36 for Python 3.6. .RS .RE +.TP +.B \f[B]\-\-flamechart\f[] +Print the timestamp for each stack. +This is useful for generating "flame chart" profiles. +Generally regular flame graphs are encouraged, since the timestamp flame +charts are harder to use. +.RS +.RE .SH ONLINE DOCUMENTATION .PP You can find the complete documentation online at: diff --git a/docs/usage.rst b/docs/usage.rst index f4ca9be..f1e5f01 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -109,14 +109,14 @@ stamp output format, useful for generating `"flame charts" inverted flame graph) that are viewable in Chrome. In some cases, the flame chart format is easier to understand. -To generate a flame chart, use ``pyflame -T``, and then pass the output to -``utils/flame-chart-json`` to convert the output into the JSON format required -by the Chrome CPU profiler: +To generate a flame chart, use ``pyflame --flamechart``, and then pass the +output to ``utils/flame-chart-json`` to convert the output into the JSON format +required by the Chrome CPU profiler: .. code:: bash # Generate flame chart data viewable in Chrome. - pyflame -T [other pyflame options] | flame-chart-json > foo.cpuprofile + pyflame --flamechart [other pyflame options] | flame-chart-json > foo.cpuprofile Read the following `Chrome DevTools article `__ diff --git a/src/Makefile.am b/src/Makefile.am index cdd54fb..11d8ab6 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -9,7 +9,7 @@ # flags, and frob3{4,6}.cc are compiled with python3.4/3.6 flags. bin_PROGRAMS = pyflame -pyflame_SOURCES = aslr.cc frame.cc thread.cc namespace.cc posix.cc ptrace.cc pyflame.cc pyfrob.cc symbol.cc +pyflame_SOURCES = aslr.cc frame.cc thread.cc namespace.cc posix.cc prober.cc ptrace.cc pyflame.cc pyfrob.cc symbol.cc pyflame_LDADD = noinst_LTLIBRARIES = diff --git a/src/prober.cc b/src/prober.cc new file mode 100644 index 0000000..34aed6a --- /dev/null +++ b/src/prober.cc @@ -0,0 +1,416 @@ +// Copyright 2017 Evan Klitzke +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./prober.h" + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "./config.h" +#include "./exc.h" +#include "./ptrace.h" +#include "./pyfrob.h" +#include "./symbol.h" +#include "./thread.h" + +// Microseconds in a second. +static const char usage_str[] = + ("Usage: pyflame [options] -p PID\n" + " pyflame [options] -t command arg1 arg2...\n" + "\n" + "Common Options:\n" +#ifdef ENABLE_THREADS + " --threads Enable multi-threading support\n" +#endif + " -h, --help Show help\n" + " -o, --output=PATH Output to file path\n" + " -p, --pid=PID The PID to trace\n" + " -r, --rate=RATE Sample rate, as a fractional value of seconds " + "(default 0.001)\n" + " -s, --seconds=SECS How many seconds to run for (default 1)\n" + " -t, --trace Trace a child process\n" + " -v, --version Show the version\n" + " -x, --exclude-idle Exclude idle time from statistics\n" + "\n" + "Advanced Options:\n" + " --abi Force a particular Python ABI (26, 34, 36)\n" + " --flamechart Include timestamps for generating Chrome " + "\"flamecharts\"\n"); + +static inline std::chrono::microseconds ToMicroseconds(double val) { + return std::chrono::microseconds{static_cast(val * 1000000)}; +} + +namespace pyflame { + +typedef std::unordered_map buckets_t; + +// Prints all stack traces +static void PrintFrames(std::ostream &out, + const std::vector &call_stacks, + size_t idle_count) { + if (idle_count) { + out << "(idle) " << idle_count << "\n"; + } + // Put the call stacks into buckets + buckets_t buckets; + for (const auto &call_stack : call_stacks) { + auto bucket = buckets.find(call_stack.frames); + if (bucket == buckets.end()) { + buckets.insert(bucket, {call_stack.frames, 1}); + } else { + bucket->second++; + } + } + // Process the frames + for (const auto &kv : buckets) { + if (kv.first.empty()) { + std::cerr << "fatal error\n"; + return; + } + auto last = kv.first.rend(); + last--; + for (auto it = kv.first.rbegin(); it != last; ++it) { + out << *it << ";"; + } + out << *last << " " << kv.second << "\n"; + } +} + +// Prints all stack traces with timestamps +static void PrintFramesTS(std::ostream &out, + const std::vector &call_stacks) { + for (const auto &call_stack : call_stacks) { + out << std::chrono::duration_cast( + call_stack.ts.time_since_epoch()) + .count() + << "\n"; + // Handle idle + if (call_stack.frames.empty()) { + out << "(idle)\n"; + continue; + } + // Print the call stack + for (auto it = call_stack.frames.rbegin(); it != call_stack.frames.rend(); + ++it) { + out << *it << ";"; + } + out << "\n"; + } +} + +int Prober::ParseOpts(int argc, char **argv) { + static const char short_opts[] = "ho:p:r:s:tvx"; + static struct option long_opts[] = { + {"abi", required_argument, 0, 'a'}, + {"help", no_argument, 0, 'h'}, + {"rate", required_argument, 0, 'r'}, + {"seconds", required_argument, 0, 's'}, +#if ENABLE_THREADS + {"threads", no_argument, 0, 'L'}, +#endif + {"output", required_argument, 0, 'o'}, + {"pid", required_argument, 0, 'p'}, + {"trace", no_argument, 0, 't'}, + {"flamechart", no_argument, 0, 'T'}, + {"version", no_argument, 0, 'v'}, + {"exclude-idle", no_argument, 0, 'x'}, + {0, 0, 0, 0} + }; + + long abi_version; + for (;;) { + int c = getopt_long(argc, argv, short_opts, long_opts, nullptr); + if (c == -1) { + break; + } + switch (c) { + case 'a': + abi_version = std::strtol(optarg, nullptr, 10); + switch (abi_version) { + case 26: + case 27: + abi_ = PyABI::Py26; + break; + case 34: + case 35: + abi_ = PyABI::Py34; + break; + case 36: + abi_ = PyABI::Py36; + break; + default: + std::cerr << "Unknown or unsupported ABI version: " << abi_version + << "\n"; + return 1; + break; + } + break; + case 'h': + std::cout << PYFLAME_VERSION_STR << "\n\n" << usage_str; + return 0; + break; +#ifdef ENABLE_THREADS + case 'L': + enable_threads_ = true; + break; +#endif + case 'p': + if ((pid_ = ParsePid(optarg)) == -1) { + return 1; + } + break; + case 'r': + sample_rate_ = std::stod(optarg); + break; + case 's': + seconds_ = std::stod(optarg); + break; + case 't': + trace_ = true; + seconds_ = -1; + goto finish_arg_parse; + break; + case 'T': + include_ts_ = true; + break; + case 'v': + std::cout << PYFLAME_VERSION_STR << "\n"; + return 0; + break; + case 'x': + include_idle_ = false; + break; + case 'o': + output_file_ = optarg; + break; + case '?': + // getopt_long should already have printed an error message + break; + default: + abort(); + } + } +finish_arg_parse: + if (trace_) { + if (pid_ != -1) { + std::cerr << "Options -t and -p are not mutually compatible.\n"; + return 1; + } + if (optind == argc) { + std::cerr << usage_str; + return 1; + } + trace_target_ = argv[optind]; + } else if (pid_ == -1) { + // Users should use -p to supply the PID to trace. However, older versions + // of Pyflame used a convention where the PID to trace was the final + // argument to the pyflame command. This code path handles this legacy use + // case, to preserve backward compatibility. + if (optind != argc - 1 || (pid_ = ParsePid(argv[optind])) == -1) { + std::cerr << usage_str; + return 1; + } + std::cerr << "WARNING: Specifying a PID to trace without -p is deprecated; " + "see Pyflame issue #99 for details.\n "; + } + interval_ = ToMicroseconds(sample_rate_); + return -1; +} + +int Prober::InitiatePtrace(char **argv) { + if (trace_) { + if (trace_target_.find("pyflame") != std::string::npos) { + std::cerr << "You tried to pyflame a pyflame, naughty!\n"; + return 1; + } + // In trace mode, all of the remaining arguments are a command to run. We + // fork and have the child run the command; the parent traces. + pid_ = fork(); + if (pid_ == -1) { + perror("fork()"); + return 1; + } else if (pid_ == 0) { + // Child: request to be traced. + PtraceTraceme(); + if (execvp(trace_target_.c_str(), argv + optind)) { + std::cerr << "execvp() failed for: " << trace_target_ + << ", err = " << strerror(errno) << "\n"; + return 1; + } + } else { + // Parent: we trace the child until it's exec'ed the new process before + // proceeding. For a dynamically linked Python build, there's still a race + // condition between when the exec() happens and when symbols are + // available. But there's no point in polling the child until it's at + // least had a chance to run exec. + pid_t child = waitpid(0, nullptr, 0); + assert(child == pid_); + PtraceSetOptions(pid_, PTRACE_O_TRACEEXEC); + PtraceCont(pid_); + int status = 0; + while (!SawEventExec(status)) { + pid_t p = waitpid(-1, &status, 0); + if (p == -1) { + perror("waitpid()"); + return 1; + } + if (WIFEXITED(status)) { + std::cerr << "Child process exited with status: " + << WEXITSTATUS(status) << "\n"; + return 1; + } + } + // We can only use PtraceInterrupt, used later in the main loop, if the + // process was seized. So we reattach and seize. + PtraceDetach(pid_); + PtraceSeize(pid_); + } + } else { + try { + PtraceSeize(pid_); + } catch (const PtraceException &err) { + std::cerr << "Failed to seize PID " << pid_ << "\n"; + return 1; + } + } + PtraceInterrupt(pid_); + return 0; +} + +// Main loop to probe the Python process. +int Prober::ProbeLoop(const PyFrob &frobber) { + std::unique_ptr file_ptr; + std::ostream *output; + if (output_file_.empty()) { + output = &std::cout; + } else { + file_ptr.reset(new std::ofstream); + file_ptr->open(output_file_, std::ios::out | std::ios::trunc); + if (!file_ptr->is_open()) { + std::cerr << "cannot open file \"" << output_file_ << "\" as output\n"; + return 1; + } + output = file_ptr.get(); + } + + std::vector call_stacks; + size_t idle_count = 0; + bool check_end = seconds_ >= 0; + auto end = std::chrono::system_clock::now() + ToMicroseconds(seconds_); + try { + for (;;) { + auto now = std::chrono::system_clock::now(); + std::vector threads = frobber.GetThreads(); + + // Only true for non-GIL stacks that we couldn't find a way to profile + // Currently this means stripped builds on non-AMD64 archs + if (threads.empty() && include_idle_) { + idle_count++; + // Timestamp empty call stacks only if required. Since lots of time the + // process will be idle, this is a good optimization to have. + if (include_ts_) { + call_stacks.push_back({now, {}}); + } + } + + for (const auto &thread : threads) { + call_stacks.push_back({now, thread.frames()}); + } + + if ((check_end) && (now + interval_ >= end)) { + break; + } + PtraceCont(pid_); + std::this_thread::sleep_for(interval_); + PtraceInterrupt(pid_); + } + if (!include_ts_) { + PrintFrames(*output, call_stacks, idle_count); + } else { + PrintFramesTS(*output, call_stacks); + } + } catch (const PtraceException &exc) { + // If the process terminates early then we just print the stack traces up + // until that point in time. + if (!call_stacks.empty() || idle_count) { + if (!include_ts_) { + PrintFrames(*output, call_stacks, idle_count); + } else { + PrintFramesTS(*output, call_stacks); + } + } else { + std::cerr << exc.what() << std::endl; + return 1; + } + } catch (const std::exception &exc) { + std::cerr << exc.what() << std::endl; + return 1; + } + return 0; +} + +int Prober::FindSymbols(PyFrob *frobber) { + // When tracing a dynamically linked Python build, it may take a while for + // ld.so to actually load symbols into the process. Therefore we retry probing + // in a loop, until the symbols are loaded. A more reliable way of doing this + // would be to break at entry to a known static function (e.g. Py_Main), but + // this isn't reliable in all cases. For instance, /usr/bin/python{,3} will + // start at Py_Main, but uWSGI will not. + try { + for (size_t i = 0;;) { + if (frobber->DetectABI(abi_)) { + if (++i >= MaxRetries()) { + std::cerr << "Failed to locate libpython within timeout period.\n"; + return 1; + } + PtraceCont(pid_); + std::this_thread::sleep_for(interval_); + PtraceInterrupt(pid_); + continue; + } + break; + } + } catch (const FatalException &exc) { + std::cerr << exc.what() << "\n"; + return 1; + } + return 0; +} + +pid_t Prober::ParsePid(const char *pid_str) { + long pid = std::strtol(pid_str, nullptr, 10); + if (pid <= 0 || pid > std::numeric_limits::max()) { + std::cerr << "PID " << pid << " is out of valid PID range.\n"; + return -1; + } + return static_cast(pid); +} +} // namespace pyflame diff --git a/src/prober.h b/src/prober.h new file mode 100644 index 0000000..8bf3a12 --- /dev/null +++ b/src/prober.h @@ -0,0 +1,74 @@ +// Copyright 2017 Evan Klitzke +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include + +#include "./pyfrob.h" +#include "./symbol.h" + +// Maximum number of times to retry checking for Python symbols when -p is used. +#define MAX_ATTACH_RETRIES 1 + +// Maximum number of times to retry checking for Python symbols when -t is used. +#define MAX_TRACE_RETRIES 50 + +namespace pyflame { + +class Prober { + public: + Prober() + : abi_(PyABI::Unknown), + pid_(-1), + trace_(false), + include_idle_(true), + include_ts_(false), + enable_threads_(false), + seconds_(1), + sample_rate_(0.001) {} + Prober(const Prober &other) = delete; + + int ParseOpts(int argc, char **argv); + + int InitiatePtrace(char **argv); + + int FindSymbols(PyFrob *frobber); + + int ProbeLoop(const PyFrob &frobber); + + inline bool enable_threads() const { return enable_threads_; } + inline pid_t pid() const { return pid_; } + + private: + PyABI abi_; + pid_t pid_; + bool trace_; + bool include_idle_; + bool include_ts_; + bool enable_threads_; + double seconds_; + double sample_rate_; + std::chrono::microseconds interval_; + std::string output_file_; + std::string trace_target_; + + pid_t ParsePid(const char *pid_str); + + inline size_t MaxRetries() const { + return trace_ ? MAX_TRACE_RETRIES : MAX_ATTACH_RETRIES; + } +}; +} // namespace pyflame diff --git a/src/pyflame.cc b/src/pyflame.cc index 29ce971..4a22960 100644 --- a/src/pyflame.cc +++ b/src/pyflame.cc @@ -12,404 +12,27 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include #include -#include -#include -#include #include -#include -#include - -#include "./config.h" -#include "./exc.h" -#include "./ptrace.h" -#include "./pyfrob.h" -#include "./symbol.h" -#include "./thread.h" - -// Maximum number of times to retry checking for Python symbols when -p is used. -#define MAX_ATTACH_RETRIES 1 - -// Maximum number of times to retry checking for Python symbols when -t is used. -#define MAX_TRACE_RETRIES 50 -// Microseconds in a second. -#define MICROSECONDS_PER_SEC 1000000 +#include "./prober.h" using namespace pyflame; -typedef std::unordered_map buckets_t; - -static pid_t ParsePid(const char *pid_str) { - long pid = std::strtol(pid_str, nullptr, 10); - if (pid <= 0 || pid > std::numeric_limits::max()) { - std::cerr << "PID " << pid << " is out of valid PID range.\n"; - return -1; - } - return static_cast(pid); -} - -// Prints all stack traces -static void PrintFrames(std::ostream &out, - const std::vector &call_stacks, - size_t idle_count) { - if (idle_count) { - out << "(idle) " << idle_count << "\n"; - } - // Put the call stacks into buckets - buckets_t buckets; - for (const auto &call_stack : call_stacks) { - auto bucket = buckets.find(call_stack.frames); - if (bucket == buckets.end()) { - buckets.insert(bucket, {call_stack.frames, 1}); - } else { - bucket->second++; - } - } - // process the frames - for (const auto &kv : buckets) { - if (kv.first.empty()) { - std::cerr << "fatal error\n"; - return; - } - auto last = kv.first.rend(); - last--; - for (auto it = kv.first.rbegin(); it != last; ++it) { - out << *it << ";"; - } - out << *last << " " << kv.second << "\n"; - } -} - -// Prints all stack traces with timestamps -static void PrintFramesTS(std::ostream &out, - const std::vector &call_stacks) { - for (const auto &call_stack : call_stacks) { - out << std::chrono::duration_cast( - call_stack.ts.time_since_epoch()) - .count() - << "\n"; - // Handle idle - if (call_stack.frames.empty()) { - out << "(idle)\n"; - continue; - } - // Print the call stack - for (auto it = call_stack.frames.rbegin(); it != call_stack.frames.rend(); - ++it) { - out << *it << ";"; - } - out << "\n"; - } -} - -static inline bool IsPyflame(const std::string &str) { - return str.find("pyflame") != std::string::npos; -} - int main(int argc, char **argv) { - PyABI abi{}; - pid_t pid = -1; - long abi_version; - bool trace = false; - bool include_idle = true; - bool include_ts = false; - bool enable_threads = false; - double seconds = 1; - double sample_rate = 0.001; - std::ofstream output_file; - - static const char usage_str[] = - ("Usage: pyflame [options] -p PID\n" - " pyflame [options] -t command arg1 arg2...\n" - "\n" - "General Options:\n" - " --abi Force a particular Python ABI (26, 34, 36)\n" - " --threads Enable multi-threading support\n" - " -h, --help Show help\n" - " -p, --pid=PID The PID to trace\n" - " -s, --seconds=SECS How many seconds to run for (default 1)\n" - " -r, --rate=RATE Sample rate, as a fractional value of seconds " - "(default 0.001)\n" - " -o, --output=PATH Output to file path\n" - " -t, --trace Trace a child process\n" - " -T, --timestamp Include timestamps for each stacktrace\n" - " -v, --version Show the version\n" - " -x, --exclude-idle Exclude idle time from statistics\n"); - - static const char short_opts[] = "ho:p:r:s:tTvx"; - static struct option long_opts[] = { - {"abi", required_argument, 0, 'a'}, - {"help", no_argument, 0, 'h'}, - {"rate", required_argument, 0, 'r'}, - {"seconds", required_argument, 0, 's'}, -#if ENABLE_THREADS - {"threads", no_argument, 0, 'L'}, -#endif - {"output", required_argument, 0, 'o'}, - {"pid", required_argument, 0, 'p'}, - {"trace", no_argument, 0, 't'}, - {"timestamp", no_argument, 0, 'T'}, - {"version", no_argument, 0, 'v'}, - {"exclude-idle", no_argument, 0, 'x'}, - {0, 0, 0, 0} - }; - - for (;;) { - int c = getopt_long(argc, argv, short_opts, long_opts, nullptr); - if (c == -1) { - break; - } - switch (c) { - case 'a': - abi_version = std::strtol(optarg, nullptr, 10); - switch (abi_version) { - case 26: - case 27: - abi = PyABI::Py26; - break; - case 34: - case 35: - abi = PyABI::Py34; - break; - case 36: - abi = PyABI::Py36; - break; - default: - std::cerr << "Unknown or unsupported ABI version: " << abi_version - << "\n"; - return 1; - break; - } - break; - case 'h': - std::cout << PYFLAME_VERSION_STR << "\n\n" << usage_str; - return 0; - break; -#ifdef ENABLE_THREADS - case 'L': - enable_threads = true; - break; -#endif - case 'p': - if ((pid = ParsePid(optarg)) == -1) { - return 1; - } - break; - case 'r': - sample_rate = std::stod(optarg); - break; - case 's': - seconds = std::stod(optarg); - break; - case 't': - trace = true; - seconds = -1; - goto finish_arg_parse; - break; - case 'T': - include_ts = true; - break; - case 'v': - std::cout << PYFLAME_VERSION_STR << "\n"; - return 0; - break; - case 'x': - include_idle = false; - break; - case 'o': - output_file.open(optarg, std::ios::out | std::ios::trunc); - if (!output_file.is_open()) { - std::cerr << "cannot open file \"" << optarg << "\" as output\n"; - return 1; - } - break; - case '?': - // getopt_long should already have printed an error message - break; - default: - abort(); - } + Prober prober; + int ret = prober.ParseOpts(argc, argv); + if (ret != -1) { + return ret; } -finish_arg_parse: - if (trace && pid != -1) { - std::cerr << "Options -t and -p are not mutually compatible.\n"; + if (prober.InitiatePtrace(argv)) { return 1; } - std::ostream *output = &std::cout; - if (output_file.is_open()) { - output = &output_file; - } - if (trace) { - if (optind == argc) { - std::cerr << usage_str; - return 1; - } - if (IsPyflame(argv[optind])) { - std::cerr << "You tried to pyflame a pyflame, naughty!\n"; - return 1; - } - // In trace mode, all of the remaining arguments are a command to run. We - // fork and have the child run the command; the parent traces. - pid = fork(); - if (pid == -1) { - perror("fork()"); - return 1; - } else if (pid == 0) { - // Child: request to be traced. - PtraceTraceme(); - if (execvp(argv[optind], argv + optind)) { - std::cerr << "execvp() failed for: " << argv[optind] - << ", err = " << strerror(errno) << "\n"; - return 1; - } - } else { - // Parent: we trace the child until it's exec'ed the new process before - // proceeding. For a dynamically linked Python build, there's still a race - // condition between when the exec() happens and when symbols are - // available. But there's no point in polling the child until it's at - // least had a chance to run exec. - pid_t child = waitpid(0, nullptr, 0); - assert(child == pid); - PtraceSetOptions(pid, PTRACE_O_TRACEEXEC); - PtraceCont(pid); - int status = 0; - while (!SawEventExec(status)) { - pid_t p = waitpid(-1, &status, 0); - if (p == -1) { - perror("waitpid()"); - return 1; - } - if (WIFEXITED(status)) { - std::cerr << "Child process exited with status: " - << WEXITSTATUS(status) << "\n"; - return 1; - } - } - // We can only use PtraceInterrupt, used later in the main loop, if the - // process was seized. So we reattach and seize. - PtraceDetach(pid); - PtraceSeize(pid); - } - } else { - if (pid == -1) { - // Users should use -p to supply the PID to trace. However, older versions - // of Pyflame used a convention where the PID to trace was the final - // argument to the pyflame command. This code path handles this legacy use - // case, to preserve backward compatibility. - if (optind != argc - 1 || (pid = ParsePid(argv[optind])) == -1) { - std::cerr << usage_str; - return 1; - } - std::cerr - << "WARNING: Specifying a PID to trace without -p is deprecated; " - "see Pyflame issue #99 for details.\n "; - } - try { - PtraceSeize(pid); - } catch (const PtraceException &err) { - std::cerr << "Failed to seize PID " << pid << "\n"; - return 1; - } - } - PtraceInterrupt(pid); - - std::vector call_stacks; - size_t idle_count = 0; - const std::chrono::microseconds interval{ - static_cast(sample_rate * MICROSECONDS_PER_SEC)}; - try { - PyFrob frobber(pid, enable_threads); - - // When tracing a dynamically linked Python build, it may take a while for - // ld.so to actually load symbols into the process. Therefore we retry - // probing in a loop, until the symbols are loaded. A more reliable way of - // doing this would be to break at entry to a known static function (e.g. - // Py_Main), but this isn't reliable in all cases. For instance, - // /usr/bin/python{,3} will start at Py_Main, but uWSGI will not. - try { - const size_t max_retries = trace ? MAX_TRACE_RETRIES : MAX_ATTACH_RETRIES; - for (size_t i = 0;;) { - if (frobber.DetectABI(abi)) { - if (++i >= max_retries) { - std::cerr << "Failed to locate libpython within timeout period.\n"; - return 1; - } - PtraceCont(pid); - std::this_thread::sleep_for(interval); - PtraceInterrupt(pid); - continue; - } - break; - } - } catch (const FatalException &exc) { - std::cerr << exc.what() << "\n"; - return 1; - } - - const std::chrono::microseconds interval{ - static_cast(sample_rate * 1000000)}; - bool check_end = seconds >= 0; - auto end = std::chrono::system_clock::now() + - std::chrono::microseconds(static_cast(seconds * 1000000)); - for (;;) { - auto now = std::chrono::system_clock::now(); - std::vector threads = frobber.GetThreads(); - - // Only true for non-GIL stacks that we couldn't find a way to profile - // Currently this means stripped builds on non-AMD64 archs - if (threads.empty() && include_idle) { - idle_count++; - // Time stamp empty call stacks only if required. Since lots of time - // the process will be idle, this is a good optimization to have - if (include_ts) { - call_stacks.push_back({now, {}}); - } - } - - for (const auto &thread : threads) { - call_stacks.push_back({now, thread.frames()}); - } - - if ((check_end) && (now + interval >= end)) { - break; - } - PtraceCont(pid); - std::this_thread::sleep_for(interval); - PtraceInterrupt(pid); - } - if (!include_ts) { - PrintFrames(*output, call_stacks, idle_count); - } else { - PrintFramesTS(*output, call_stacks); - } - } catch (const PtraceException &exc) { - // If the process terminates early then we just print the stack traces up - // until that point in time. - if (!call_stacks.empty() || idle_count) { - if (!include_ts) { - PrintFrames(*output, call_stacks, idle_count); - } else { - PrintFramesTS(*output, call_stacks); - } - } else { - std::cerr << exc.what() << std::endl; - return 1; - } - } catch (const std::exception &exc) { - std::cerr << exc.what() << std::endl; + PyFrob frobber(prober.pid(), prober.enable_threads()); + if (prober.FindSymbols(&frobber)) { return 1; } - return 0; + + // Probe in a loop. + return prober.ProbeLoop(frobber); } diff --git a/src/pyfrob.cc b/src/pyfrob.cc index 783be03..7c0588d 100644 --- a/src/pyfrob.cc +++ b/src/pyfrob.cc @@ -195,7 +195,7 @@ std::string PyFrob::Status() const { return line; } -std::vector PyFrob::GetThreads(void) { +std::vector PyFrob::GetThreads(void) const { return get_threads_(pid_, addrs_, enable_threads_); } } // namespace pyflame diff --git a/src/pyfrob.h b/src/pyfrob.h index 3ba400c..4af168e 100644 --- a/src/pyfrob.h +++ b/src/pyfrob.h @@ -37,7 +37,7 @@ class PyFrob { int DetectABI(PyABI abi); // Get the current frame list. - std::vector GetThreads(void); + std::vector GetThreads(void) const; // Useful when debugging. std::string Status() const; diff --git a/tests/test_end_to_end.py b/tests/test_end_to_end.py index 9b73f8d..2836ed6 100644 --- a/tests/test_end_to_end.py +++ b/tests/test_end_to_end.py @@ -436,7 +436,7 @@ def test_invalid_pid(pid): def test_include_ts(sleeper): """Basic test for timestamp processes.""" proc = subprocess.Popen( - ['./src/pyflame', '-T', '-p', + ['./src/pyflame', '--flamechart', '-p', str(sleeper.pid)], stdout=subprocess.PIPE, stderr=subprocess.PIPE, @@ -454,7 +454,7 @@ def test_include_ts(sleeper): def test_include_ts_exclude_idle(sleeper): """Basic test for timestamp processes.""" proc = subprocess.Popen( - ['./src/pyflame', '-T', '-x', '-p', + ['./src/pyflame', '--flamechart', '-x', '-p', str(sleeper.pid)], stdout=subprocess.PIPE, stderr=subprocess.PIPE,