Skip to content

Commit

Permalink
Add proper abort handling for Intel Transactional Synchronization Ext…
Browse files Browse the repository at this point in the history
…ensions (TSX)

This also makes a minor change which corrects error printing, where a
flush was intended (%!) but the % accidentally omitted.

Closes #284.

Signed-off-by: Brian Nigito <[email protected]>
  • Loading branch information
bnigito authored and Xyene committed Oct 16, 2024
1 parent 7ac635e commit b4402b8
Show file tree
Hide file tree
Showing 9 changed files with 97 additions and 21 deletions.
5 changes: 5 additions & 0 deletions src/env_vars.ml
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,8 @@ let fzf_demangle_symbols = Option.is_some (Unix.getenv "MAGIC_TRACE_FZF_DEMANGLE
let no_ocaml_exception_debug_info =
Option.is_some (Unix.getenv "MAGIC_TRACE_NO_OCAML_EXCEPTION_DEBUG_INFO")
;;

(* Skip any special case transaction handling, intended for debugging tx/tx_abrt. *)
let skip_transaction_handling =
Option.is_some (Unix.getenv "MAGIC_TRACE_SKIP_TX_HANDLING")
;;
1 change: 1 addition & 0 deletions src/env_vars.mli
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ val perfetto_dir : string option
val no_dlfilter : bool
val fzf_demangle_symbols : bool
val no_ocaml_exception_debug_info : bool
val skip_transaction_handling : bool
2 changes: 2 additions & 0 deletions src/event.ml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ module Kind = struct
| Hardware_interrupt
| Iret
| Jump
| Tx_abort
[@@deriving sexp, compare, bin_io]
end

Expand Down Expand Up @@ -88,6 +89,7 @@ module Ok = struct
{ thread : Thread.t
; time : Time_ns.Span.t
; data : Data.t
; in_transaction : bool [@sexp.bool]
}
[@@deriving sexp, bin_io]
end
Expand Down
2 changes: 2 additions & 0 deletions src/event.mli
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ module Kind : sig
| Hardware_interrupt
| Iret
| Jump
| Tx_abort
[@@deriving sexp, compare]
end

Expand Down Expand Up @@ -60,6 +61,7 @@ module Ok : sig
{ thread : Thread.t
; time : Time_ns.Span.t
; data : Data.t
; in_transaction : bool
}
[@@deriving sexp]
end
Expand Down
20 changes: 15 additions & 5 deletions src/perf_decode.ml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ let perf_callstack_entry_re = Re.Perl.re "^\t *([0-9a-f]+) (.*)$" |> Re.compile

let perf_branches_event_re =
Re.Perl.re
{|^ *(call|return|tr strt|syscall|sysret|hw int|iret|tx abrt|tr end|tr strt tr end|tr end (?:async|call|return|syscall|sysret|iret)|jmp|jcc) +(?:\(x\) +)?([0-9a-f]+) (.*) => +([0-9a-f]+) (.*)$|}
{|^ *(call|return|tr strt|syscall|sysret|hw int|iret|tx abrt|tr end|tr strt tr end|tr end (?:async|call|return|syscall|sysret|iret)|jmp|jcc) +(\(x\) +)?([0-9a-f]+) (.*) => +([0-9a-f]+) (.*)$|}
|> Re.compile
;;

Expand Down Expand Up @@ -170,7 +170,8 @@ let trace_error_to_event line : Event.Decode_error.t =
let parse_perf_cbr_event thread time line : Event.t =
match Re.Group.all (Re.exec perf_cbr_event_re line) with
| [| _; _; _; freq; _ |] ->
Ok { thread; time; data = Power { freq = Int.of_string freq } }
Ok
{ thread; time; data = Power { freq = Int.of_string freq }; in_transaction = false }
| results ->
raise_s
[%message
Expand Down Expand Up @@ -202,13 +203,14 @@ let parse_perf_cycles_event ?perf_maps (thread : Event.Thread.t) time lines : Ev
let callstack =
List.map lines ~f:(parse_callstack_entry ?perf_maps thread) |> List.rev
in
Ok { thread; time; data = Stacktrace_sample { callstack } }
Ok { thread; time; data = Stacktrace_sample { callstack }; in_transaction = false }
;;

let parse_perf_branches_event ?perf_maps (thread : Event.Thread.t) time line : Event.t =
match Re.Group.all (Re.exec perf_branches_event_re line) with
| [| _
; kind
; aux_flags
; src_instruction_pointer
; src_symbol_and_offset
; dst_instruction_pointer
Expand Down Expand Up @@ -251,6 +253,8 @@ let parse_perf_branches_event ?perf_maps (thread : Event.Thread.t) time line : E
to show a broken trace instead of crashing here. *)
| true, true -> None
in
(* record the flag indicating we're within a transaction *)
let in_transaction = String.contains aux_flags 'x' in
let kind : Event.Kind.t option =
match String.strip kind with
| "call" -> Some Call
Expand All @@ -262,7 +266,7 @@ let parse_perf_branches_event ?perf_maps (thread : Event.Thread.t) time line : E
| "iret" -> Some Iret
| "sysret" -> Some Sysret
| "async" -> Some Async
| "tx abrt" -> Some Jump
| "tx abrt" -> Some Tx_abort
| "" -> None
| _ ->
printf "Warning: skipping unrecognized perf output: %s\n%!" line;
Expand All @@ -286,6 +290,7 @@ let parse_perf_branches_event ?perf_maps (thread : Event.Thread.t) time line : E
; symbol_offset = dst_symbol_offset
}
}
; in_transaction
}
| results ->
raise_s
Expand Down Expand Up @@ -314,7 +319,12 @@ let parse_perf_extra_sampled_event
"Regex of perf event did not match expected fields" (results : string array)])
| lines -> List.hd_exn lines |> parse_callstack_entry ?perf_maps thread
in
Ok { thread; time; data = Event_sample { location; count = period; name } }
Ok
{ thread
; time
; data = Event_sample { location; count = period; name }
; in_transaction = false
}
;;

let to_event ?perf_maps lines : Event.t option =
Expand Down
69 changes: 63 additions & 6 deletions src/trace_writer.ml
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ type 'thread inner =
; annotate_inferred_start_times : bool
; mutable in_filtered_region : bool
; suppressed_errors : Hash_set.M(Source_code_position).t
; mutable transaction_events : Event.With_write_info.t Deque.t
}

type t = T : 'thread inner -> t
Expand Down Expand Up @@ -321,6 +322,7 @@ let create_expert
; annotate_inferred_start_times
; in_filtered_region = true
; suppressed_errors = Hash_set.create (module Source_code_position)
; transaction_events = Deque.create ()
}
in
write_hits t hits;
Expand Down Expand Up @@ -970,17 +972,56 @@ let write_event_and_callstack

let warn_decode_error ~instruction_pointer ~message =
eprintf
"Warning: perf reported an error decoding the trace: %s\n!"
"Warning: perf reported an error decoding the trace: %s\n%!"
(match instruction_pointer with
| None -> [%string "'%{message}'"]
| Some instruction_pointer ->
[%string "'%{message}' @ IP %{instruction_pointer#Int64.Hex}."])
;;

(* Write perf_events into a file as a Fuchsia trace (stack events). Events should be
collected with --itrace=be or cre, and -F pid,tid,time,flags,addr,sym,symoff as per
collected with --itrace=bep or cre, and -F pid,tid,time,flags,addr,sym,symoff as per
the constants defined above. *)
let write_event (T t) ?events_writer event =
let rec write_event (T t) ?events_writer original_event =
if Env_vars.skip_transaction_handling
then write_event' (T t) ?events_writer original_event
else (
let { Event.With_write_info.event; should_write = _ } = original_event in
(* 1. If this event is within a transaction, queue it.
2. If this event ends a transaction, deliver all queued events (then deliver it)
3. If this event is a transaction abort, clear all queued events and discard
the [Tx_abort]. *)
match event with
| Ok { Event.Ok.thread = _; time = _; data; in_transaction } ->
let is_abort =
match data with
| Trace { kind = Some Tx_abort; _ } -> true
| _ -> false
in
if is_abort
then (
Deque.clear t.transaction_events;
write_event' (T t) ?events_writer original_event)
else if in_transaction
then Deque.enqueue_back t.transaction_events original_event
else (
if not (Deque.is_empty t.transaction_events)
then (
Deque.iter' t.transaction_events `front_to_back ~f:(fun ev ->
write_event' (T t) ?events_writer ev);
Deque.clear t.transaction_events);
write_event' (T t) ?events_writer original_event)
| Error _ ->
(* Unsure how to best handle errors during a transaction. *)
if not (Deque.is_empty t.transaction_events)
then (
eprintf
"Warning: error received during transaction, dropping all transaction events\n\
%!";
Deque.clear t.transaction_events);
write_event' (T t) ?events_writer original_event)

and write_event' (T t) ?events_writer event =
let { Event.With_write_info.event; should_write } = event in
let thread = Event.thread event in
let thread_info =
Expand Down Expand Up @@ -1008,7 +1049,11 @@ let write_event (T t) ?events_writer event =
Option.iter events_writer ~f:(fun events_writer ->
write_event_and_callstack events_writer event thread_info.callstack);
(match event_value with
| { Event.Ok.thread = _; time = _; data = Event_sample { location; count; name } } ->
| { Event.Ok.thread = _
; time = _
; data = Event_sample { location; count; name }
; in_transaction = _
} ->
let track_name = Collection_mode.Event.Name.to_string name in
let track_thread =
Hashtbl.find_or_add thread_info.extra_event_tracks name ~default:(fun () ->
Expand Down Expand Up @@ -1041,6 +1086,7 @@ let write_event (T t) ?events_writer event =
| { Event.Ok.thread = _ (* Already used this to look up thread info. *)
; time = _ (* Already in scope. Also, this time hasn't been [map_time]'d. *)
; data = Power { freq }
; in_transaction = _
} ->
write_counter
t
Expand All @@ -1051,6 +1097,7 @@ let write_event (T t) ?events_writer event =
| { Event.Ok.thread = _ (* Already used this to look up thread info. *)
; time = _ (* Already in scope. Also, this time hasn't been [map_time]'d. *)
; data = Stacktrace_sample { callstack }
; in_transaction = _
} ->
let how_many_ret =
Stack.length thread_info.callstack.stack
Expand All @@ -1062,6 +1109,7 @@ let write_event (T t) ?events_writer event =
| { Event.Ok.thread = _ (* Already used this to look up thread info. *)
; time = _ (* Already in scope. Also, this time hasn't been [map_time]'d. *)
; data = Trace { kind; trace_state_change; src; dst }
; in_transaction = _
} ->
Ocaml_hacks.track_executed_pushtraps_and_poptraps_in_range
t
Expand All @@ -1072,10 +1120,18 @@ let write_event (T t) ?events_writer event =
(match kind, trace_state_change with
| Some Call, (None | Some End) -> call t thread_info ~time ~location:dst
| ( Some
(Async | Call | Syscall | Return | Hardware_interrupt | Iret | Sysret | Jump)
( Async
| Call
| Syscall
| Return
| Hardware_interrupt
| Iret
| Sysret
| Jump
| Tx_abort )
, Some Start )
| Some Async, None
| Some (Hardware_interrupt | Jump), Some End ->
| Some (Hardware_interrupt | Jump | Tx_abort), Some End ->
raise_s
[%message
"BUG: magic-trace devs thought this event was impossible, but you just \
Expand Down Expand Up @@ -1166,6 +1222,7 @@ let write_event (T t) ?events_writer event =
~addr:dst.instruction_pointer
~time;
check_current_symbol t thread_info ~time dst)
| Some Tx_abort, None -> check_current_symbol t thread_info ~time dst
| Some Jump, None ->
Ocaml_hacks.check_current_symbol_track_entertraps t thread_info ~time dst
(* (None, _) comes up when perf spews something magic-trace doesn't recognize.
Expand Down
12 changes: 5 additions & 7 deletions test/decode_errors.ml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ let%expect_test "decode error during memmove" =
let%map () = Perf_script.run ~trace_scope:Userspace "memmove_decode_error.perf" in
[%expect
{|
Warning: perf reported an error decoding the trace: 'Overflow packet' @ IP 0x7ffff7327730.
293415/293415 47170.086912824: 1 branches:uH: call 40b06c itch_bbo::book::Book::add_order+0x51c (foo.so) => 7ffff7327730 __memmove_avx_unaligned_erms+0x0 (foo.so)
293415/293415 47170.086912825: 1 branches:uH: return 7ffff73277c7 __memmove_avx_unaligned_erms+0x97 (foo.so) => 40b072 itch_bbo::book::Book::add_order+0x522 (foo.so)
-> 19ns BEGIN __memmove_avx_unaligned_erms
Expand All @@ -23,9 +24,7 @@ let%expect_test "decode error during memmove" =
INPUT TRACE STREAM ENDED, any lines printed below this were deferred
-> 21ns BEGIN itch_bbo::book::Book::add_order [inferred start time]
-> 141ns END __memmove_avx_unaligned_erms
-> 141ns END itch_bbo::book::Book::add_order
Warning: perf reported an error decoding the trace: 'Overflow packet' @ IP 0x7ffff7327730.
! |}]
-> 141ns END itch_bbo::book::Book::add_order |}]
;;

let%expect_test "decode error during rust B-tree rebalance" =
Expand All @@ -34,6 +33,8 @@ let%expect_test "decode error during rust B-tree rebalance" =
in
[%expect
{|
Warning: perf reported an error decoding the trace: 'Overflow packet' @ IP 0x7ffff7327730.
Warning: perf reported an error decoding the trace: 'Overflow packet' @ IP 0x7ffff7327730.
364691/364691 62709.735347729: 1 branches:uH: call 40bc5a itch_bbo::book::Book::delete_order+0x3ca (foo.so) => 40a550 remove_leaf_kv+0x0 (foo.so)
364691/364691 62709.735347731: 1 branches:uH: call 40a5b2 remove_leaf_kv+0x62 (foo.so) => 7ffff7327730 __memmove_avx_unaligned_erms+0x0 (foo.so)
-> 28ns BEGIN remove_leaf_kv
Expand Down Expand Up @@ -97,8 +98,5 @@ let%expect_test "decode error during rust B-tree rebalance" =
-> 419ns END _int_free
-> 419ns END merge_tracking_child_edge
-> 420ns END remove_leaf_kv
INPUT TRACE STREAM ENDED, any lines printed below this were deferred
Warning: perf reported an error decoding the trace: 'Overflow packet' @ IP 0x7ffff7327730.
!Warning: perf reported an error decoding the trace: 'Overflow packet' @ IP 0x7ffff7327730.
! |}]
INPUT TRACE STREAM ENDED, any lines printed below this were deferred |}]
;;
5 changes: 2 additions & 3 deletions test/page_fault.ml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ let%expect_test "A page fault during demo.c" =
let%map () = Perf_script.run ~trace_scope:Userspace_and_kernel "page_fault.perf" in
[%expect
{|
Warning: perf reported an error decoding the trace: 'Overflow packet (foo.so)' @ IP 0x7f5948676e18.
1439745/1439745 2472089.651284813: 1 branches:uH: jmp 7f59488f90f7 call_destructors+0x67 (foo.so) => 7f5948676e18 _fini+0x0 (foo.so)
instruction trace error type 1 time 2472089.651285037 cpu -1 pid 1439745 tid 1439745 ip 0x7f5948676e18 code 7: Overflow packet (foo.so)
-> 224ns BEGIN [decode error: Overflow packet (foo.so)]
Expand Down Expand Up @@ -643,7 +644,5 @@ let%expect_test "A page fault during demo.c" =
-> 224ns BEGIN _dl_catch_exception [inferred start time]
-> 224ns BEGIN _fini [inferred start time]
-> 1.373us END _fini
-> 1.373us END _dl_catch_exception
Warning: perf reported an error decoding the trace: 'Overflow packet (foo.so)' @ IP 0x7f5948676e18.
! |}]
-> 1.373us END _dl_catch_exception |}]
;;
2 changes: 2 additions & 0 deletions test/test.ml
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ end = struct
; src = random_location ()
; dst = random_location ?symbol ()
}
; in_transaction = false
}
;;

Expand Down Expand Up @@ -94,6 +95,7 @@ end = struct
{ thread
; time
; data = Trace { trace_state_change = None; kind = Some kind; src; dst }
; in_transaction = false
})
;;

Expand Down

0 comments on commit b4402b8

Please sign in to comment.