Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

optimize cmm shifts and tags #3669

Open
wants to merge 14 commits into
base: main
Choose a base branch
from
2 changes: 1 addition & 1 deletion backend/amd64/selection.ml
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ class selector =
method! select_operation op args dbg =
match op with
(* Recognize the LEA instruction *)
| Caddi | Caddv | Cadda | Csubi -> (
| Caddi | Caddv | Cadda | Csubi | Cor -> (
match self#select_addressing Word_int (Cop (op, args, dbg)) with
| Iindexed _, _ | Iindexed2 0, _ -> super#select_operation op args dbg
| ( ((Iindexed2 _ | Iscaled _ | Iindexed2scaled _ | Ibased _) as addr),
Expand Down
7 changes: 7 additions & 0 deletions backend/amd64/selection_utils.ml
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,13 @@ let rec select_addr exp =
| ( ((Asymbol _ | Aadd (_, _) | Ascaledadd (_, _, _)), _),
((Asymbol _ | Alinear _ | Aadd (_, _) | Ascaledadd (_, _, _)), _) ) ->
Aadd (arg1, arg2), 0)
| Cmm.Cop (Cor, [arg; Cconst_int (1, _)], _)
| Cmm.Cop (Cor, [Cconst_int (1, _); arg], _) -> (
(* optimize tagging integers *)
match select_addr arg with
| Ascale (e, scale), off when scale mod 2 = 0 ->
Ascale (e, scale), off lor 1
| _ -> default)
| _ -> default

(* Special constraints on operand and result registers *)
Expand Down
245 changes: 180 additions & 65 deletions backend/cmm_helpers.ml
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,13 @@ let rec add_const c n dbg =
Cop (Csubi, [Cconst_int (n + x, dbg); c], dbg)
| Cop (Csubi, [c; Cconst_int (x, _)], _) when Misc.no_overflow_sub n x ->
add_const c (n - x) dbg
| Cop
( Cor,
[(Cop (Clsl, [_; Cconst_int (1, _)], _) as inner); Cconst_int (1, _)],
_ )
when n = -1 ->
(* undo setting the tag bit *)
inner
| c -> Cop (Caddi, [c; Cconst_int (n, dbg)], dbg)

let incr_int c dbg = add_const c 1 dbg
Expand All @@ -360,18 +367,167 @@ let rec sub_int c1 c2 dbg =

let neg_int c dbg = sub_int (Cconst_int (0, dbg)) c dbg

let rec lsl_int c1 c2 dbg =
(* identify cmm operations whose result is guaranteed to be small integers (e.g.
in the range [min_int / 4; max_int / 4]) *)
let guaranteed_to_be_small_int = function
| Cop ((Ccmpi _ | Ccmpf _), _, _) ->
(* integer/float comparisons return either [1] or [0]. *)
true
| _ -> false

let is_defined_shift n = 0 <= n && n < arch_bits

let ignore_low_bit_int = function
| Cop
( Caddi,
[(Cop (Clsl, [_; Cconst_int (n, _)], _) as c); Cconst_int (1, _)],
_ )
when n > 0 && is_defined_shift n ->
c
| Cop (Cor, [c; Cconst_int (1, _)], _) -> c
| c -> c

let[@inline] get_const = function
| Cconst_int (i, _) -> Some (Nativeint.of_int i)
| Cconst_natint (i, _) -> Some i
| _ -> None

let or_int c1 c2 dbg =
match get_const c1, get_const c2 with
| Some c1, Some c2 -> natint_const_untagged dbg (Nativeint.logor c1 c2)
| _, Some 0n | Some -1n, _ -> c1
| Some 0n, _ | _, Some -1n -> c2
| Some _, None ->
(* prefer putting constants on the right *)
Cop (Cor, [c2; c1], dbg)
| _, _ -> Cop (Cor, [c1; c2], dbg)

let and_int c1 c2 dbg =
match get_const c1, get_const c2 with
| Some c1, Some c2 -> natint_const_untagged dbg (Nativeint.logand c1 c2)
| _, Some 0n | Some -1n, _ -> c2
| Some 0n, _ | _, Some -1n -> c1
| Some _, None ->
(* prefer putting constants on the right *)
Cop (Cand, [c2; c1], dbg)
| _, _ -> Cop (Cand, [c1; c2], dbg)

let xor_int c1 c2 dbg =
match get_const c1, get_const c2 with
| Some c1, Some c2 -> natint_const_untagged dbg (Nativeint.logxor c1 c2)
| _, Some 0n -> c1
| Some 0n, _ -> c2
| Some _, None ->
(* prefer putting constants on the right *)
Cop (Cxor, [c2; c1], dbg)
| _, _ -> Cop (Cxor, [c1; c2], dbg)

let replace x ~with_ =
match x with
| Cconst_int _ | Cconst_natint _ | Cconst_symbol _ | Cvar _ | Ctuple [] ->
with_
| inner -> Csequence (inner, with_)

let rec lsr_int c1 c2 dbg =
match c1, c2 with
| c1, Cconst_int (0, _) -> c1
| Cop (Clsl, [c; Cconst_int (n1, _)], _), Cconst_int (n2, _)
when n1 > 0 && n2 > 0 && n1 + n2 < size_int * 8 ->
Cop (Clsl, [c; Cconst_int (n1 + n2, dbg)], dbg)
| Cop (Caddi, [c1; Cconst_int (n1, _)], _), Cconst_int (n2, _)
when Misc.no_overflow_lsl n1 n2 ->
add_const (lsl_int c1 c2 dbg) (n1 lsl n2) dbg
| c1, Cconst_int (n, _) when is_defined_shift n -> (
let c1 = ignore_low_bit_int c1 in
match get_const c1 with
| Some x -> natint_const_untagged dbg (Nativeint.shift_right_logical x n)
| None -> (
match c1 with
| Cop (Clsr, [inner; Cconst_int (n', _)], _) when is_defined_shift n' ->
if is_defined_shift (n + n')
then lsr_const inner (n + n') dbg
else replace inner ~with_:(Cconst_int (0, dbg))
| Cop (Cor, [x; ((Cconst_int _ | Cconst_natint _) as y)], _) ->
or_int (lsr_int x c2 dbg) (lsr_int y c2 dbg) dbg
| Cop (Cand, [x; ((Cconst_int _ | Cconst_natint _) as y)], _) ->
and_int (lsr_int x c2 dbg) (lsr_int y c2 dbg) dbg
| Cop (Cxor, [x; ((Cconst_int _ | Cconst_natint _) as y)], _) ->
xor_int (lsr_int x c2 dbg) (lsr_int y c2 dbg) dbg
| c1 -> Cop (Clsr, [c1; c2], dbg)))
| Cop (Clsr, [x; (Cconst_int (n', _) as y)], z), c2 when is_defined_shift n'
->
(* prefer putting the constant shift on the outside to help enable further
peephole optimizations *)
Cop (Clsr, [Cop (Clsr, [x; c2], dbg); y], z)
| c1, c2 -> Cop (Clsr, [c1; c2], dbg)

and asr_int c1 c2 dbg =
match c1, c2 with
| c1, Cconst_int (0, _) -> c1
| c1, Cconst_int (n, _) when is_defined_shift n -> (
let c1 = ignore_low_bit_int c1 in
match get_const c1 with
| Some x -> natint_const_untagged dbg (Nativeint.shift_right x n)
| None -> (
match c1 with
| Cconst_int (x, _) -> Cconst_int (x asr n, dbg)
| Cconst_natint (x, _) ->
natint_const_untagged dbg (Nativeint.shift_right x n)
| Cop (Casr, [inner; Cconst_int (n', _)], _) when is_defined_shift n' ->
(* saturating add, since the sign bit extends to the left *)
asr_const inner (Int.min (n + n') (arch_bits - 1)) dbg
| Cop (Clsr, [_; Cconst_int (n', _)], _)
when n' > 0 && is_defined_shift n' ->
(* If the argument is guaranteed non-negative, then we know the sign bit
is 0 and we can weaken this operation to a logical shift *)
lsr_const c1 n dbg
| Cop (Clsl, [c; Cconst_int (1, _)], _)
when n = 1 && guaranteed_to_be_small_int c ->
(* some operations always return small enough integers that it is safe
and correct to optimise [asr (lsl x 1) 1] into [x]. *)
c
| Cop (Cor, [x; ((Cconst_int _ | Cconst_natint _) as y)], _) ->
or_int (asr_int x c2 dbg) (asr_int y c2 dbg) dbg
| Cop (Cand, [x; ((Cconst_int _ | Cconst_natint _) as y)], _) ->
and_int (asr_int x c2 dbg) (asr_int y c2 dbg) dbg
| Cop (Cxor, [x; ((Cconst_int _ | Cconst_natint _) as y)], _) ->
xor_int (asr_int x c2 dbg) (asr_int y c2 dbg) dbg
| c1 -> Cop (Casr, [c1; c2], dbg)))
| Cop (Casr, [x; (Cconst_int (n', _) as y)], z), c2 when is_defined_shift n'
->
(* prefer putting the constant shift on the outside to help enable further
peephole optimizations *)
Cop (Casr, [Cop (Casr, [x; c2], dbg); y], z)
| _ -> Cop (Casr, [c1; c2], dbg)

and lsl_int c1 c2 dbg =
match c1, c2 with
| c1, Cconst_int (0, _) -> c1
| c1, Cconst_int (n, _) when is_defined_shift n -> (
match get_const c1 with
| Some c1 -> natint_const_untagged dbg (Nativeint.shift_left c1 n)
| None -> (
match c1 with
| Cop (Clsl, [inner; Cconst_int (n', _)], dbg) when is_defined_shift n' ->
if is_defined_shift (n + n')
then lsl_const inner (n + n') dbg
else replace inner ~with_:(Cconst_int (0, dbg))
| Cop (Caddi, [c1; Cconst_int (offset, _)], _)
when Misc.no_overflow_lsl offset n ->
add_const (lsl_int c1 c2 dbg) (offset lsl n) dbg
| Cop (Cor, [x; ((Cconst_int _ | Cconst_natint _) as y)], _) ->
or_int (lsl_int x c2 dbg) (lsl_int y c2 dbg) dbg
| Cop (Cand, [x; ((Cconst_int _ | Cconst_natint _) as y)], _) ->
and_int (lsl_int x c2 dbg) (lsl_int y c2 dbg) dbg
| Cop (Cxor, [x; ((Cconst_int _ | Cconst_natint _) as y)], _) ->
xor_int (lsl_int x c2 dbg) (lsl_int y c2 dbg) dbg
| c1 -> Cop (Clsl, [c1; c2], dbg)))
| Cop (Clsl, [x; (Cconst_int (n', _) as y)], z), c2 when is_defined_shift n'
->
(* prefer putting the constant shift on the outside to help enable further
peephole optimizations *)
Cop (Clsl, [Cop (Clsl, [x; c2], dbg); y], z)
| _, _ -> Cop (Clsl, [c1; c2], dbg)

let lsl_const c n dbg = lsl_int c (Cconst_int (n, dbg)) dbg
and lsl_const c n dbg = lsl_int c (Cconst_int (n, dbg)) dbg

and asr_const c n dbg = asr_int c (Cconst_int (n, dbg)) dbg

and lsr_const c n dbg = lsr_int c (Cconst_int (n, dbg)) dbg

let is_power2 n = n = 1 lsl Misc.log2 n

Expand All @@ -392,74 +548,33 @@ let rec mul_int c1 c2 dbg =
add_const (mul_int c (Cconst_int (k, dbg)) dbg) (n * k) dbg
| c1, c2 -> Cop (Cmuli, [c1; c2], dbg)

(* identify cmm operations whose result is guaranteed to be small integers (e.g.
in the range [min_int / 4; max_int / 4]) *)
let guaranteed_to_be_small_int = function
| Cop ((Ccmpi _ | Ccmpf _), _, _) ->
(* integer/float comparisons return either [1] or [0]. *)
true
| _ -> false

let ignore_low_bit_int = function
| Cop
( Caddi,
[(Cop (Clsl, [_; Cconst_int (n, _)], _) as c); Cconst_int (1, _)],
_ )
when n > 0 ->
c
| Cop (Cor, [c; Cconst_int (1, _)], _) -> c
| c -> c

let lsr_int c1 c2 dbg =
match c1, c2 with
| c1, Cconst_int (0, _) -> c1
| Cop (Clsr, [c; Cconst_int (n1, _)], _), Cconst_int (n2, _)
when n1 > 0 && n2 > 0 && n1 + n2 < size_int * 8 ->
Cop (Clsr, [c; Cconst_int (n1 + n2, dbg)], dbg)
| c1, Cconst_int (n, _) when n > 0 ->
Cop (Clsr, [ignore_low_bit_int c1; c2], dbg)
| _ -> Cop (Clsr, [c1; c2], dbg)

let lsr_const c n dbg = lsr_int c (Cconst_int (n, dbg)) dbg

let asr_int c1 c2 dbg =
match c2 with
| Cconst_int (0, _) -> c1
| Cconst_int (n, _) when n > 0 -> (
match ignore_low_bit_int c1 with
(* some operations always return small enough integers that it is safe and
correct to optimise [asr (lsl x 1) 1] into [x]. *)
| Cop (Clsl, [c; Cconst_int (1, _)], _)
when n = 1 && guaranteed_to_be_small_int c ->
c
| c1' -> Cop (Casr, [c1'; c2], dbg))
| _ -> Cop (Casr, [c1; c2], dbg)

let asr_const c n dbg = asr_int c (Cconst_int (n, dbg)) dbg

let tag_int i dbg =
match i with
| Cconst_int (n, _) -> int_const dbg n
| Cop (Casr, [c; Cconst_int (n, _)], _) when n > 0 ->
Cop
(Cor, [asr_int c (Cconst_int (n - 1, dbg)) dbg; Cconst_int (1, dbg)], dbg)
| Cop (Clsr, [c; Cconst_int (n, _)], _) when n > 0 ->
Cop (Cor, [lsr_const c (n - 1) dbg; Cconst_int (1, dbg)], dbg)
| c -> incr_int (lsl_int c (Cconst_int (1, dbg)) dbg) dbg

let untag_int i dbg =
match i with
| Cconst_int (n, _) -> Cconst_int (n asr 1, dbg)
| Cop (Cor, [Cop (Casr, [c; Cconst_int (n, _)], _); Cconst_int (1, _)], _)
when n > 0 && n < (size_int * 8) - 1 ->
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
when n > 0 && n < (size_int * 8) - 1 ->
when n > 0 && is_defined_shift (n + 1) ->

might be simpler and more consistent ?

Cop (Casr, [c; Cconst_int (n + 1, dbg)], dbg)
asr_const c (n + 1) dbg
| Cop (Cor, [Cop (Clsr, [c; Cconst_int (n, _)], _); Cconst_int (1, _)], _)
when n > 0 && n < (size_int * 8) - 1 ->
Cop (Clsr, [c; Cconst_int (n + 1, dbg)], dbg)
| c -> asr_int c (Cconst_int (1, dbg)) dbg

let mk_not dbg cmm =
match cmm with
| Cop (Caddi, [Cop (Clsl, [c; Cconst_int (1, _)], _); Cconst_int (1, _)], dbg')
-> (
| Cop
( (Caddi | Cor),
[Cop (Clsl, [c; Cconst_int (1, _)], _); Cconst_int (1, _)],
dbg' ) -> (
match c with
| Cop (Ccmpi cmp, [c1; c2], dbg'') ->
tag_int
Expand Down Expand Up @@ -799,8 +914,10 @@ let mod_int ?dividend_cannot_be_min_int c1 c2 dbg =

let test_bool dbg cmm =
match cmm with
| Cop (Caddi, [Cop (Clsl, [c; Cconst_int (1, _)], _); Cconst_int (1, _)], _)
->
| Cop
( (Caddi | Cor),
[Cop (Clsl, [c; Cconst_int (1, _)], _); Cconst_int (1, _)],
_ ) ->
c
| Cconst_int (n, dbg) ->
if n = 1 then Cconst_int (0, dbg) else Cconst_int (1, dbg)
Expand Down Expand Up @@ -1035,8 +1152,10 @@ let array_indexing ?typ log2size ptr ofs dbg =
if i = 0
then ptr
else Cop (add, [ptr; Cconst_int (i lsl log2size, dbg)], dbg)
| Cop (Caddi, [Cop (Clsl, [c; Cconst_int (1, _)], _); Cconst_int (1, _)], dbg')
->
| Cop
( (Caddi | Cor),
[Cop (Clsl, [c; Cconst_int (1, _)], _); Cconst_int (1, _)],
dbg' ) ->
Cop (add, [ptr; lsl_const c log2size dbg], dbg')
| Cop (Caddi, [c; Cconst_int (n, _)], dbg') when log2size = 0 ->
Cop
Expand Down Expand Up @@ -1976,11 +2095,7 @@ let and_int e1 e2 dbg =
match e1, e2 with
| e, m when is_mask32 m -> zero_extend ~bits:32 e ~dbg
| m, e when is_mask32 m -> zero_extend ~bits:32 e ~dbg
| e1, e2 -> Cop (Cand, [e1; e2], dbg)

let or_int e1 e2 dbg = Cop (Cor, [e1; e2], dbg)

let xor_int e1 e2 dbg = Cop (Cxor, [e1; e2], dbg)
| e1, e2 -> and_int e1 e2 dbg

(* Boxed integers *)

Expand Down