From 5dc0780a1e2a5fa91acf793eafd01de45e3d713a Mon Sep 17 00:00:00 2001 From: Gargi Sharma Date: Wed, 4 Dec 2019 17:15:32 +0100 Subject: [PATCH] Remove profiling support --- dune | 12 +- orun.ml | 21 +-- orun.opam | 4 +- profiler.c | 449 --------------------------------------------------- profiler.ml | 244 ---------------------------- profiler.mli | 12 -- 6 files changed, 5 insertions(+), 737 deletions(-) delete mode 100644 profiler.c delete mode 100644 profiler.ml delete mode 100644 profiler.mli diff --git a/dune b/dune index 7b9ecb6..d293a7a 100644 --- a/dune +++ b/dune @@ -3,18 +3,8 @@ (c_names wait4) (modules )) -(library - (name profiler) - (c_names profiler) - (c_library_flags (:include profiler_library_flags.sexp)) - (modules )) - (executable (name orun) (public_name orun) - (libraries str cmdliner yojson unix wait4 profiler)) + (libraries str cmdliner yojson unix wait4)) -(rule - (targets profiler_library_flags.sexp) - (deps (:detect_os config/detect_os.sh)) - (action (run %{detect_os}))) \ No newline at end of file diff --git a/orun.ml b/orun.ml index efd3636..75fa5fe 100644 --- a/orun.ml +++ b/orun.ml @@ -76,12 +76,10 @@ let get_ocaml_config () = | "ranlib" | "asm" | "ccomp_type" - | "cc_profile" | "default_executable_name" | "bytecomp_c_libraries" | "native_c_libraries" | "native_pack_linker" - | "profiling" | "host" (* arch info available elsewhere *) | "os_type" (* already have more specific "system" *) | "target" @@ -161,23 +159,8 @@ let run output input cmdline = else prog in try - let profiling = - match Sys.getenv_opt "ORUN_CONFIG_PROFILE" with - | None -> - false - | Some _ -> - true - in - let exec_prog output_name prog cmdline env stdin stdout stderr = - if profiling then ( - let pid, parent_ready = - Profiler.create_process_env_paused prog cmdline env stdin stdout - stderr - in - let result = Profiler.start_profiling pid parent_ready in - Profiler.write_profiling_result output_name result ; - pid ) - else Unix.create_process_env prog cmdline env stdin stdout stderr + let exec_prog _ prog cmdline env stdin stdout stderr = + Unix.create_process_env prog cmdline env stdin stdout stderr in let before = Unix.gettimeofday () in let captured_stderr_filename = Filename.temp_file "orun" "stderr" in diff --git a/orun.opam b/orun.opam index 3796c05..a441307 100644 --- a/orun.opam +++ b/orun.opam @@ -1,11 +1,11 @@ opam-version: "2.0" name: "orun" -version: "0.1" +version: "0.1.0" synopsis: "Run benchmarks and measure performance" maintainer: "Stephen Dolan " authors: "Stephen Dolan " license: "MIT" -dev-repo: "" +dev-repo: "git+https://github.com/ocaml-bench/orun.git" depends: [ "ocaml" "cmdliner" "yojson" ] build: [ ["ocaml-update-c" "wait4.c"] {ocaml:update-c} diff --git a/profiler.c b/profiler.c deleted file mode 100644 index 15ec73d..0000000 --- a/profiler.c +++ /dev/null @@ -1,449 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#ifdef __linux__ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define DATA_PAGES 1024 - -struct sample_id -{ - uint32_t pid; - uint32_t tid; - uint64_t time; -}; - -struct perf_event_record_mmap2 -{ - struct perf_event_header header; - uint32_t pid; - uint32_t tid; - uint64_t addr; - uint64_t len; - uint64_t pgoff; - uint32_t maj; - uint32_t min; - uint64_t ino; - uint64_t ino_generation; - uint32_t prot; - uint32_t flags; - char filename[]; -}; - -struct read_format -{ - uint64_t value; /* The value of the event */ - uint64_t time_enabled; /* if PERF_FORMAT_TOTAL_TIME_ENABLED */ - uint64_t time_running; /* if PERF_FORMAT_TOTAL_TIME_RUNNING */ - uint64_t id; /* if PERF_FORMAT_ID */ -}; - -struct perf_event_record_sample -{ - struct perf_event_header header; - uint64_t ip; /* if PERF_SAMPLE_IP */ - uint32_t pid; /* if PERF_SAMPLE_TID */ - uint32_t tid; /* if PERF_SAMPLE_TID */ - uint64_t time; /* if PERF_SAMPLE_TIME */ - uint32_t cpu; /* if PERF_SAMPLE_CPU */ - uint32_t res; /* if PERF_SAMPLE_CPU */ - uint64_t bnr; /* if PERF_SAMPLE_CALLCHAIN */ -}; - -long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, - int cpu, int group_fd, unsigned long flags) -{ - int ret; - - ret = syscall(__NR_perf_event_open, hw_event, pid, cpu, - group_fd, flags); - return ret; -} - -int poll_event(int fd) -{ - struct pollfd pfd = {.fd = fd, .events = POLLIN | POLLHUP}; - - int ret = poll(&pfd, 1, 1000); - - return pfd.revents; -} - -value some(value contents) -{ - CAMLparam1(contents); - CAMLlocal1(option); - - option = caml_alloc(1, 0); - - Store_field(option, 0, contents); - - CAMLreturn(option); -} - -int get_line_info(Dwfl *dwfl, uint64_t ip, const char **ip_filename, const char **ip_comp_dir, const char **ip_function_name, Dwarf_Addr *addr, int *ip_lineno) -{ - Dwfl_Module *module = dwfl_addrmodule(dwfl, ip); - - dwfl_module_relocate_address(module, addr); - - *ip_function_name = dwfl_module_addrname(module, ip); - - Dwfl_Line *line = dwfl_getsrc(dwfl, ip); - - if (line != NULL) - { - const char *filename = dwfl_lineinfo(line, NULL, ip_lineno, NULL, NULL, NULL); - - if (filename != NULL) - { - const char *comp_dir = dwfl_line_comp_dir(line); - - *ip_filename = filename; - *ip_comp_dir = comp_dir; - } - } -} - -value get_source_line_for_ip(Dwfl *dwfl, uint64_t ip) -{ - CAMLparam0(); - CAMLlocal1(source_line_record); - - const char *filename = NULL; - const char *comp_dir = NULL; - const char *function_name = NULL; - int lineno = -1; - - Dwarf_Addr addr = ip; - - get_line_info(dwfl, ip, &filename, &comp_dir, &function_name, &addr, &lineno); - - source_line_record = caml_alloc(5, 0); - - if (function_name != NULL) - { - Store_field(source_line_record, 1, some(caml_copy_string(function_name))); - } - else - { - Store_field(source_line_record, 1, Val_unit); - } - - if (filename != NULL) - { - int filename_length = strlen(filename); - - char *resolved_path = NULL; - - if (filename_length > 0 && filename[0] != '/' && comp_dir != NULL) - { - char full_path[filename_length + strlen(comp_dir) + 2]; - - strcpy(full_path, comp_dir); - strcat(full_path, "/"); - strcat(full_path, filename); - - resolved_path = realpath(full_path, NULL); - - if (resolved_path == NULL) - { - Store_field(source_line_record, 0, some(caml_copy_string(full_path))); - } - else - { - Store_field(source_line_record, 0, some(caml_copy_string(resolved_path))); - free(resolved_path); - } - } - else - { - resolved_path = realpath(filename, NULL); - - if (resolved_path == NULL) - { - Store_field(source_line_record, 0, some(caml_copy_string(filename))); - } - else - { - Store_field(source_line_record, 0, some(caml_copy_string(resolved_path))); - free(resolved_path); - } - } - } - else - { - Store_field(source_line_record, 0, Val_unit); - } - - Store_field(source_line_record, 2, Val_int(lineno)); - Store_field(source_line_record, 3, Val_int(addr)); - - CAMLreturn(source_line_record); - - CAMLreturn(Val_unit); -} - -int read_event(uint32_t type, unsigned char *buf, value sample_callback, Dwfl *dwfl, pid_t child_pid, int *sample_id) -{ - CAMLparam1(sample_callback); - CAMLlocal5(sample_record, branches_head, branches_entry, source_line_option, callback_return); - - if (type == PERF_RECORD_MMAP2) - { - struct perf_event_record_mmap2 *record = (struct perf_event_record_mmap2 *)buf; - - dwfl_report_begin_add(dwfl); - - Dwfl_Module *module = dwfl_report_elf(dwfl, (const char *)record->filename, (const char *)record->filename, -1, record->addr - record->pgoff, false); - - dwfl_report_end(dwfl, NULL, NULL); - } - else if (type == PERF_RECORD_EXIT) - { - CAMLdrop; - return 0; - } - else if (type == PERF_RECORD_SAMPLE) - { - struct perf_event_record_sample *record = (struct perf_event_record_sample *)buf; - - unsigned char *pos = buf + sizeof(struct perf_event_record_sample); - - source_line_option = get_source_line_for_ip(dwfl, record->ip); - - sample_record = caml_alloc(6, 0); - Store_field(sample_record, 0, source_line_option); - - branches_head = Val_unit; - - // walk branch stack now and add these - uint64_t branches = record->bnr; - - for (int c = 0; c < branches; c++) - { - struct perf_branch_entry *entry = (struct perf_branch_entry *)pos; - - uint64_t from_ip = entry->from; - - source_line_option = get_source_line_for_ip(dwfl, from_ip); - - branches_entry = caml_alloc(2, 0); - - Store_field(branches_entry, 0, source_line_option); - Store_field(branches_entry, 1, branches_head); - - branches_head = branches_entry; - - pos += sizeof(struct perf_branch_entry); - } - - Store_field(sample_record, 1, branches_head); - Store_field(sample_record, 2, Val_int(record->time)); - Store_field(sample_record, 3, Val_int(record->tid)); - Store_field(sample_record, 4, Val_int(record->cpu)); - Store_field(sample_record, 5, Val_int(*sample_id)); - - (*sample_id)++; - - callback_return = caml_callback(sample_callback, sample_record); - } - - CAMLdrop; - return 1; -} - -value ml_unpause_and_start_profiling(value ml_pid, value ml_pipe_fds, value sample_callback) -{ - CAMLparam3(ml_pid, ml_pipe_fds, sample_callback); - - int parent_ready_write = Long_val(ml_pipe_fds); - - int sample_id = 0; - - // Set up DWARF stuff - static char *debuginfo_path; - - static const Dwfl_Callbacks offline_callbacks = - { - .find_debuginfo = dwfl_standard_find_debuginfo, - .debuginfo_path = &debuginfo_path, - .section_address = dwfl_offline_section_address, - .find_elf = dwfl_build_id_find_elf, - }; - - struct Dwfl *dwfl = dwfl_begin(&offline_callbacks); - - struct mmap_node *head_ptr = NULL; - - pid_t pid = Long_val(ml_pid); - - struct perf_event_attr pe; - int perf_fd; - struct perf_event_mmap_page *header; - unsigned char *base, *data; - int page_size = getpagesize(); - - memset(&pe, 0, sizeof(struct perf_event_attr)); - - pe.type = 0; - pe.size = sizeof(pe); - pe.sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TIME | PERF_SAMPLE_CPU | PERF_SAMPLE_TID | PERF_SAMPLE_BRANCH_STACK; - pe.branch_sample_type = PERF_SAMPLE_BRANCH_USER | PERF_SAMPLE_BRANCH_CALL_STACK; - pe.sample_freq = 3000; - pe.freq = 1; - pe.exclude_kernel = 1; - pe.exclude_hv = 1; - pe.exclude_guest = 1; - pe.enable_on_exec = 1; - pe.disabled = 1; - pe.task = 1; - pe.mmap = 1; - pe.mmap2 = 1; - pe.wakeup_events = 1; - pe.precise_ip = 2; - - perf_fd = perf_event_open(&pe, pid, -1, -1, 0); - - if (perf_fd < 0) - { - perror("perf_event_open"); - return -1; - } - - uint64_t mmap_size = (1 + DATA_PAGES) * page_size; - base = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, perf_fd, 0); - - if (base == MAP_FAILED) - { - printf("mmap failed: %d\n", perf_fd); - err(EXIT_FAILURE, "mmap"); - } - - header = (struct perf_event_mmap_page *)base; - data = base + header->data_offset; - - // Tell child we're ready - char *go = "!"; - - while (1) - { - int ret = write(parent_ready_write, go, 1); - - if (ret < 0) - { - if (errno == EAGAIN || errno == EINTR) - { - continue; - } - else - { - perror("write"); - exit(-1); - } - } - - break; - } - - pid_t child_pid = Int_val(ml_pid); - uint64_t data_read = 0; - - while (1) - { - uint64_t original_tail = header->data_tail; - uint64_t tail = original_tail; - uint64_t original_head = __atomic_load_n(&header->data_head, __ATOMIC_ACQUIRE); - uint64_t head = original_head; - - if ((head - tail) % header->data_size == 0) - { - // Ring buffer is empty, let's wait for something interesting to happen - int revents = poll_event(perf_fd); - - if ((revents & POLLHUP) && (__atomic_load_n(&header->data_head, __ATOMIC_ACQUIRE) - tail) % header->data_size == 0) - { - break; - } - - // Right, time to go check things again - continue; - } - - head = head % header->data_size; - tail = tail % header->data_size; - - struct perf_event_header *event_header = (struct perf_event_header *)(data + tail); - - int space_left_in_ring = header->data_size - (tail + event_header->size); - - if (space_left_in_ring < 0) - { - // Slow path, need to copy the data out first - unsigned char buffer[event_header->size]; - - int remaining = header->data_size - tail; - - memcpy(buffer, data + tail, remaining); - memcpy(buffer + remaining, data, event_header->size - remaining); - - int status = read_event(event_header->type, buffer, sample_callback, dwfl, child_pid, &sample_id); - - if (status == 0) - { - break; // Success - } - } - else - { - // Fast path, can just hand the memory straight from the ring - int status = read_event(event_header->type, data + tail, sample_callback, dwfl, child_pid, &sample_id); - - if (status == 0) - { - break; // Success - } - } - - data_read += event_header->size; - - __atomic_store_n(&header->data_tail, original_tail + event_header->size, __ATOMIC_RELEASE); - } - - close(perf_fd); - munmap(base, (1 + DATA_PAGES) * page_size); - - dwfl_end(dwfl); - - CAMLreturn(Val_unit); -} -#endif - -#ifdef __APPLE__ -value ml_unpause_and_start_profiling(value ml_pid, value ml_pipe_fds) -{ - CAMLparam2(ml_pid, ml_pipe_fds); - - CAMLreturn(Val_unit); -} -#endif diff --git a/profiler.ml b/profiler.ml deleted file mode 100644 index 8a3dfec..0000000 --- a/profiler.ml +++ /dev/null @@ -1,244 +0,0 @@ -open Printf -open Common - -external unpause_and_start_profiling : - int -> Unix.file_descr -> (sample -> unit) -> unit - = "ml_unpause_and_start_profiling" - -exception ExpectedSome - -let unwrap = function None -> raise ExpectedSome () | Some x -> x - -let agg_hash = Hashtbl.create 1000 - -let update_line src_line self_time_inc total_time_inc = - match Hashtbl.find_opt agg_hash src_line with - | None -> - Hashtbl.add agg_hash src_line {self_time= 1; total_time= 1} - | Some x -> - x.self_time <- x.self_time + self_time_inc ; - x.total_time <- x.total_time + total_time_inc - -let rec update_lines = function - | [] -> - () - | h :: t -> - update_line h 0 1 ; update_lines t - -let src_line_to_idx = Hashtbl.create 10000 - -let find_src_line_idx src_line = - match Hashtbl.find_opt src_line_to_idx src_line with - | None -> - let new_idx = Hashtbl.length src_line_to_idx in - Hashtbl.add src_line_to_idx src_line new_idx ; - new_idx - | Some x -> - x - -let samples_list = ref [] - -let sample_callback sample = - (* increment self for the current source line *) - update_line sample.current 1 1 ; - update_lines sample.call_stack ; - let new_stack = List.map (fun a -> find_src_line_idx a) sample.call_stack in - let compressed_stack = - { stack= find_src_line_idx sample.current :: List.rev new_stack - ; thread_id= sample.thread_id - ; cpu= sample.cpu - ; timestamp= sample.timestamp - ; id= sample.id } - in - samples_list := compressed_stack :: !samples_list - -let start_profiling pid pipe_fd = - unpause_and_start_profiling pid pipe_fd sample_callback ; - agg_hash - -let int_of_fd (x : Unix.file_descr) : int = Obj.magic x - -let rec file_descr_not_standard (fd : Unix.file_descr) = - if int_of_fd fd >= 3 then fd else file_descr_not_standard (Unix.dup fd) - -let safe_close fd = try Unix.close fd with Unix.Unix_error (_, _, _) -> () - -let perform_redirections new_stdin new_stdout new_stderr = - let new_stdin = file_descr_not_standard new_stdin in - let new_stdout = file_descr_not_standard new_stdout in - let new_stderr = file_descr_not_standard new_stderr in - (* The three dup2 close the original stdin, stdout, stderr, - which are the descriptors possibly left open - by file_descr_not_standard *) - Unix.dup2 ~cloexec:false new_stdin Unix.stdin ; - Unix.dup2 ~cloexec:false new_stdout Unix.stdout ; - Unix.dup2 ~cloexec:false new_stderr Unix.stderr ; - safe_close new_stdin ; - safe_close new_stdout ; - safe_close new_stderr - -let rec wait_for_parent parent_ready = - let read_fds, _write_fds, _exception_fds = - Unix.select [parent_ready] [] [] (-1.0) - in - if List.mem parent_ready read_fds then () else wait_for_parent parent_ready - -let create_process_env_paused cmd args env new_stdin new_stdout new_stderr = - let parent_ready, parent_ready_write = Unix.pipe () in - match Unix.fork () with - | 0 -> ( - try - perform_redirections new_stdin new_stdout new_stderr ; - wait_for_parent parent_ready ; - Unix.execvpe cmd args env - with _ -> exit 127 ) - | id -> - (id, parent_ready_write) - -module StringMap = Map.Make (String) - -module IntMap = Map.Make (struct - type t = int - - let compare = Pervasives.compare -end) - -let slash_regex = Str.regexp "[/\.]" - -let add_to_line_list src_line counts l = - match l with - | None -> - Some [(src_line, counts)] - | Some v -> - Some ((src_line, counts) :: v) - -let group_by_source_file src_line counts m = - match src_line.filename with - | None -> - m - | Some f -> - StringMap.update f - (function - | None -> - Some (IntMap.add src_line.line [(src_line, counts)] IntMap.empty) - | Some l -> - Some - (IntMap.update src_line.line - (add_to_line_list src_line counts) - l) ) - m - -let map_some f l = - List.map - (fun x -> f (unwrap x)) - (List.filter (function None -> false | Some _ -> true) l) - -let source_line_counts_to_json (filename, function_name) (counts, lc) = - `Assoc - [ ("filename", `String filename) - ; ("function", `String function_name) - ; ("self_time", `Int counts.self_time) - ; ("total_time", `Int counts.total_time) - ; ( "line_counts" - , `List - ( match lc with - | None -> - [] - | Some line_counts -> - List.map - (fun (line, count) -> - `List [`Int line; `Int count.self_time; `Int count.total_time] - ) - (List.sort (fun (a, _) (b, _) -> a - b) line_counts) ) ) ] - -let hotspots_to_json hotspots = - `List (List.map (fun (k, v) -> source_line_counts_to_json k v) hotspots) - -let group_by (f : 'a -> 'b) (ll : 'a list) : ('b, 'a list) Hashtbl.t = - List.fold_left - (fun acc e -> - let grp = f e in - let grp_mems = try Hashtbl.find acc grp with Not_found -> [] in - Hashtbl.replace acc grp (e :: grp_mems) ; - acc ) - (Hashtbl.create 100) ll - -let fold_groups (f : 'b -> 'a list -> 'c) (g : ('b, 'a list) Hashtbl.t) : - ('b, 'c) Hashtbl.t = - Hashtbl.fold - (fun a b m -> - Hashtbl.add m a (f a b) ; - m ) - g (Hashtbl.create 100) - -let flatten h = Hashtbl.fold (fun a b c -> (a, b) :: c) h [] - -let group_by_fold (f : 'a -> 'b) (l : 'a list) (f2 : 'b -> 'a list -> 'c) : - ('b * 'c) list = - let grouped = group_by f l in - flatten (fold_groups f2 grouped) - -let write_profiling_result output_name (agg_result : aggregate_result) = - (* first write out the json representation of results *) - let total_samples = - Hashtbl.fold (fun a b c -> c + b.self_time) agg_result 0 - in - let key_values = flatten agg_result in - let only_present_filenames = - List.filter - (function - | {filename= None}, _ -> - false - | {filename= Some x}, _ -> - Sys.file_exists x ) - key_values - in - (* calculate hotspots *) - let grouped_by_file_function = - group_by - (fun (k, v) -> - (get_or "unknown" k.filename, get_or "unknown" k.function_name) ) - key_values - in - let sum_counts l = - List.fold_left - (fun s (k, v) -> - s.self_time <- s.self_time + v.self_time ; - s.total_time <- s.total_time + v.total_time ; - s ) - {self_time= 0; total_time= 0} - l - in - let sum_counts_by_file_function = - fold_groups (fun _ l -> sum_counts l) grouped_by_file_function - in - let sum_counts_by_line = - fold_groups - (fun _ l -> - group_by_fold (fun (k, v) -> k.line) l (fun _ cs -> sum_counts cs) ) - grouped_by_file_function - in - let hottest_file_functions = - take - (List.sort - (fun (k0, v0) (k1, v1) -> v1.self_time - v0.self_time) - (flatten sum_counts_by_file_function)) - 20 - in - let hotspots = - List.map - (fun (ff, c) -> (ff, (c, Hashtbl.find_opt sum_counts_by_line ff))) - hottest_file_functions - in - let profile_out = open_out_bin (output_name ^ ".prof.json") in - let hotspots_json = - `Assoc - [ ("total_samples", `Int total_samples) - ; ("hotspots", hotspots_to_json hotspots) ] - in - Yojson.Basic.to_channel profile_out hotspots_json ; - close_out profile_out ; - let dir_name = output_name ^ "_prof_results" in - if not (Sys.file_exists dir_name) then Unix.mkdir dir_name 0o740 ; - Reports.render_hotspots_html output_name hotspots total_samples ; - Reports.render_trace_json output_name !samples_list src_line_to_idx diff --git a/profiler.mli b/profiler.mli deleted file mode 100644 index 35ae354..0000000 --- a/profiler.mli +++ /dev/null @@ -1,12 +0,0 @@ -val create_process_env_paused : - string - -> string array - -> string array - -> Unix.file_descr - -> Unix.file_descr - -> Unix.file_descr - -> int * Unix.file_descr - -val start_profiling : int -> Unix.file_descr -> Common.aggregate_result - -val write_profiling_result : string -> Common.aggregate_result -> unit