ocaml-flambda · jvanburen · Mar 7, 2025 · Mar 10, 2025 · Mar 10, 2025 · Mar 10, 2025
diff --git a/backend/amd64/selection.ml b/backend/amd64/selection.ml
@@ -194,7 +194,7 @@ class selector =
     method! select_operation op args dbg =
       match op with
       (* Recognize the LEA instruction *)
-      | Caddi | Caddv | Cadda | Csubi -> (
+      | Caddi | Caddv | Cadda | Csubi | Cor -> (
         match self#select_addressing Word_int (Cop (op, args, dbg)) with
         | Iindexed _, _ | Iindexed2 0, _ -> super#select_operation op args dbg
         | ( ((Iindexed2 _ | Iscaled _ | Iindexed2scaled _ | Ibased _) as addr),

diff --git a/backend/amd64/selection_utils.ml b/backend/amd64/selection_utils.ml
@@ -76,6 +76,13 @@ let rec select_addr exp =
     | ( ((Asymbol _ | Aadd (_, _) | Ascaledadd (_, _, _)), _),
         ((Asymbol _ | Alinear _ | Aadd (_, _) | Ascaledadd (_, _, _)), _) ) ->
       Aadd (arg1, arg2), 0)
+  | Cmm.Cop (Cor, [arg; Cconst_int (1, _)], _)
+  | Cmm.Cop (Cor, [Cconst_int (1, _); arg], _) -> (
+    (* optimize tagging integers *)
+    match select_addr arg with
+    | Ascale (e, scale), off when scale mod 2 = 0 ->
+      Ascale (e, scale), off lor 1
+    | _ -> default)
   | _ -> default
 
 (* Special constraints on operand and result registers *)

diff --git a/backend/cmm_helpers.ml b/backend/cmm_helpers.ml
@@ -334,6 +334,13 @@ let rec add_const c n dbg =
       Cop (Csubi, [Cconst_int (n + x, dbg); c], dbg)
     | Cop (Csubi, [c; Cconst_int (x, _)], _) when Misc.no_overflow_sub n x ->
       add_const c (n - x) dbg
+    | Cop
+        ( Cor,
+          [(Cop (Clsl, [_; Cconst_int (1, _)], _) as inner); Cconst_int (1, _)],
+          _ )
+      when n = -1 ->
+      (* undo setting the tag bit *)
+      inner
     | c -> Cop (Caddi, [c; Cconst_int (n, dbg)], dbg)
 
 let incr_int c dbg = add_const c 1 dbg
@@ -360,18 +367,167 @@ let rec sub_int c1 c2 dbg =
 
 let neg_int c dbg = sub_int (Cconst_int (0, dbg)) c dbg
 
-let rec lsl_int c1 c2 dbg =
+(* identify cmm operations whose result is guaranteed to be small integers (e.g.
+   in the range [min_int / 4; max_int / 4]) *)
+let guaranteed_to_be_small_int = function
+  | Cop ((Ccmpi _ | Ccmpf _), _, _) ->
+    (* integer/float comparisons return either [1] or [0]. *)
+    true
+  | _ -> false
+
+let is_defined_shift n = 0 <= n && n < arch_bits
+
+let ignore_low_bit_int = function
+  | Cop
+      ( Caddi,
+        [(Cop (Clsl, [_; Cconst_int (n, _)], _) as c); Cconst_int (1, _)],
+        _ )
+    when n > 0 && is_defined_shift n ->
+    c
+  | Cop (Cor, [c; Cconst_int (1, _)], _) -> c
+  | c -> c
+
+let[@inline] get_const = function
+  | Cconst_int (i, _) -> Some (Nativeint.of_int i)
+  | Cconst_natint (i, _) -> Some i
+  | _ -> None
+
+let or_int c1 c2 dbg =
+  match get_const c1, get_const c2 with
+  | Some c1, Some c2 -> natint_const_untagged dbg (Nativeint.logor c1 c2)
+  | _, Some 0n | Some -1n, _ -> c1
+  | Some 0n, _ | _, Some -1n -> c2
+  | Some _, None ->
+    (* prefer putting constants on the right *)
+    Cop (Cor, [c2; c1], dbg)
+  | _, _ -> Cop (Cor, [c1; c2], dbg)
+
+let and_int c1 c2 dbg =
+  match get_const c1, get_const c2 with
+  | Some c1, Some c2 -> natint_const_untagged dbg (Nativeint.logand c1 c2)
+  | _, Some 0n | Some -1n, _ -> c2
+  | Some 0n, _ | _, Some -1n -> c1
+  | Some _, None ->
+    (* prefer putting constants on the right *)
+    Cop (Cand, [c2; c1], dbg)
+  | _, _ -> Cop (Cand, [c1; c2], dbg)
+
+let xor_int c1 c2 dbg =
+  match get_const c1, get_const c2 with
+  | Some c1, Some c2 -> natint_const_untagged dbg (Nativeint.logxor c1 c2)
+  | _, Some 0n -> c1
+  | Some 0n, _ -> c2
+  | Some _, None ->
+    (* prefer putting constants on the right *)
+    Cop (Cxor, [c2; c1], dbg)
+  | _, _ -> Cop (Cxor, [c1; c2], dbg)
+
+let replace x ~with_ =
+  match x with
+  | Cconst_int _ | Cconst_natint _ | Cconst_symbol _ | Cvar _ | Ctuple [] ->
+    with_
+  | inner -> Csequence (inner, with_)
+
+let rec lsr_int c1 c2 dbg =
   match c1, c2 with
   | c1, Cconst_int (0, _) -> c1
-  | Cop (Clsl, [c; Cconst_int (n1, _)], _), Cconst_int (n2, _)
-    when n1 > 0 && n2 > 0 && n1 + n2 < size_int * 8 ->
-    Cop (Clsl, [c; Cconst_int (n1 + n2, dbg)], dbg)
-  | Cop (Caddi, [c1; Cconst_int (n1, _)], _), Cconst_int (n2, _)
-    when Misc.no_overflow_lsl n1 n2 ->
-    add_const (lsl_int c1 c2 dbg) (n1 lsl n2) dbg
+  | c1, Cconst_int (n, _) when is_defined_shift n -> (
+    let c1 = ignore_low_bit_int c1 in
+    match get_const c1 with
+    | Some x -> natint_const_untagged dbg (Nativeint.shift_right_logical x n)
+    | None -> (
+      match c1 with
+      | Cop (Clsr, [inner; Cconst_int (n', _)], _) when is_defined_shift n' ->
+        if is_defined_shift (n + n')
+        then lsr_const inner (n + n') dbg
+        else replace inner ~with_:(Cconst_int (0, dbg))
+      | Cop (Cor, [x; ((Cconst_int _ | Cconst_natint _) as y)], _) ->
+        or_int (lsr_int x c2 dbg) (lsr_int y c2 dbg) dbg
+      | Cop (Cand, [x; ((Cconst_int _ | Cconst_natint _) as y)], _) ->
+        and_int (lsr_int x c2 dbg) (lsr_int y c2 dbg) dbg
+      | Cop (Cxor, [x; ((Cconst_int _ | Cconst_natint _) as y)], _) ->
+        xor_int (lsr_int x c2 dbg) (lsr_int y c2 dbg) dbg
+      | c1 -> Cop (Clsr, [c1; c2], dbg)))
+  | Cop (Clsr, [x; (Cconst_int (n', _) as y)], z), c2 when is_defined_shift n'
+    ->
+    (* prefer putting the constant shift on the outside to help enable further
+       peephole optimizations *)
+    Cop (Clsr, [Cop (Clsr, [x; c2], dbg); y], z)
+  | c1, c2 -> Cop (Clsr, [c1; c2], dbg)
+
+and asr_int c1 c2 dbg =
+  match c1, c2 with
+  | c1, Cconst_int (0, _) -> c1
+  | c1, Cconst_int (n, _) when is_defined_shift n -> (
+    let c1 = ignore_low_bit_int c1 in
+    match get_const c1 with
+    | Some x -> natint_const_untagged dbg (Nativeint.shift_right x n)
+    | None -> (
+      match c1 with
+      | Cconst_int (x, _) -> Cconst_int (x asr n, dbg)
+      | Cconst_natint (x, _) ->
+        natint_const_untagged dbg (Nativeint.shift_right x n)
+      | Cop (Casr, [inner; Cconst_int (n', _)], _) when is_defined_shift n' ->
+        (* saturating add, since the sign bit extends to the left *)
+        asr_const inner (Int.min (n + n') (arch_bits - 1)) dbg
+      | Cop (Clsr, [_; Cconst_int (n', _)], _)
+        when n' > 0 && is_defined_shift n' ->
+        (* If the argument is guaranteed non-negative, then we know the sign bit
+           is 0 and we can weaken this operation to a logical shift *)
+        lsr_const c1 n dbg
+      | Cop (Clsl, [c; Cconst_int (1, _)], _)
+        when n = 1 && guaranteed_to_be_small_int c ->
+        (* some operations always return small enough integers that it is safe
+           and correct to optimise [asr (lsl x 1) 1] into [x]. *)
+        c
+      | Cop (Cor, [x; ((Cconst_int _ | Cconst_natint _) as y)], _) ->
+        or_int (asr_int x c2 dbg) (asr_int y c2 dbg) dbg
+      | Cop (Cand, [x; ((Cconst_int _ | Cconst_natint _) as y)], _) ->
+        and_int (asr_int x c2 dbg) (asr_int y c2 dbg) dbg
+      | Cop (Cxor, [x; ((Cconst_int _ | Cconst_natint _) as y)], _) ->
+        xor_int (asr_int x c2 dbg) (asr_int y c2 dbg) dbg
+      | c1 -> Cop (Casr, [c1; c2], dbg)))
+  | Cop (Casr, [x; (Cconst_int (n', _) as y)], z), c2 when is_defined_shift n'
+    ->
+    (* prefer putting the constant shift on the outside to help enable further
+       peephole optimizations *)
+    Cop (Casr, [Cop (Casr, [x; c2], dbg); y], z)
+  | _ -> Cop (Casr, [c1; c2], dbg)
+
+and lsl_int c1 c2 dbg =
+  match c1, c2 with
+  | c1, Cconst_int (0, _) -> c1
+  | c1, Cconst_int (n, _) when is_defined_shift n -> (
+    match get_const c1 with
+    | Some c1 -> natint_const_untagged dbg (Nativeint.shift_left c1 n)
+    | None -> (
+      match c1 with
+      | Cop (Clsl, [inner; Cconst_int (n', _)], dbg) when is_defined_shift n' ->
+        if is_defined_shift (n + n')
+        then lsl_const inner (n + n') dbg
+        else replace inner ~with_:(Cconst_int (0, dbg))
+      | Cop (Caddi, [c1; Cconst_int (offset, _)], _)
+        when Misc.no_overflow_lsl offset n ->
+        add_const (lsl_int c1 c2 dbg) (offset lsl n) dbg
+      | Cop (Cor, [x; ((Cconst_int _ | Cconst_natint _) as y)], _) ->
+        or_int (lsl_int x c2 dbg) (lsl_int y c2 dbg) dbg
+      | Cop (Cand, [x; ((Cconst_int _ | Cconst_natint _) as y)], _) ->
+        and_int (lsl_int x c2 dbg) (lsl_int y c2 dbg) dbg
+      | Cop (Cxor, [x; ((Cconst_int _ | Cconst_natint _) as y)], _) ->
+        xor_int (lsl_int x c2 dbg) (lsl_int y c2 dbg) dbg
+      | c1 -> Cop (Clsl, [c1; c2], dbg)))
+  | Cop (Clsl, [x; (Cconst_int (n', _) as y)], z), c2 when is_defined_shift n'
+    ->
+    (* prefer putting the constant shift on the outside to help enable further
+       peephole optimizations *)
+    Cop (Clsl, [Cop (Clsl, [x; c2], dbg); y], z)
   | _, _ -> Cop (Clsl, [c1; c2], dbg)
 
-let lsl_const c n dbg = lsl_int c (Cconst_int (n, dbg)) dbg
+and lsl_const c n dbg = lsl_int c (Cconst_int (n, dbg)) dbg
+
+and asr_const c n dbg = asr_int c (Cconst_int (n, dbg)) dbg
+
+and lsr_const c n dbg = lsr_int c (Cconst_int (n, dbg)) dbg
 
 let is_power2 n = n = 1 lsl Misc.log2 n
 
@@ -392,74 +548,33 @@ let rec mul_int c1 c2 dbg =
     add_const (mul_int c (Cconst_int (k, dbg)) dbg) (n * k) dbg
   | c1, c2 -> Cop (Cmuli, [c1; c2], dbg)
 
-(* identify cmm operations whose result is guaranteed to be small integers (e.g.
-   in the range [min_int / 4; max_int / 4]) *)
-let guaranteed_to_be_small_int = function
-  | Cop ((Ccmpi _ | Ccmpf _), _, _) ->
-    (* integer/float comparisons return either [1] or [0]. *)
-    true
-  | _ -> false
-
-let ignore_low_bit_int = function
-  | Cop
-      ( Caddi,
-        [(Cop (Clsl, [_; Cconst_int (n, _)], _) as c); Cconst_int (1, _)],
-        _ )
-    when n > 0 ->
-    c
-  | Cop (Cor, [c; Cconst_int (1, _)], _) -> c
-  | c -> c
-
-let lsr_int c1 c2 dbg =
-  match c1, c2 with
-  | c1, Cconst_int (0, _) -> c1
-  | Cop (Clsr, [c; Cconst_int (n1, _)], _), Cconst_int (n2, _)
-    when n1 > 0 && n2 > 0 && n1 + n2 < size_int * 8 ->
-    Cop (Clsr, [c; Cconst_int (n1 + n2, dbg)], dbg)
-  | c1, Cconst_int (n, _) when n > 0 ->
-    Cop (Clsr, [ignore_low_bit_int c1; c2], dbg)
-  | _ -> Cop (Clsr, [c1; c2], dbg)
-
-let lsr_const c n dbg = lsr_int c (Cconst_int (n, dbg)) dbg
-
-let asr_int c1 c2 dbg =
-  match c2 with
-  | Cconst_int (0, _) -> c1
-  | Cconst_int (n, _) when n > 0 -> (
-    match ignore_low_bit_int c1 with
-    (* some operations always return small enough integers that it is safe and
-       correct to optimise [asr (lsl x 1) 1] into [x]. *)
-    | Cop (Clsl, [c; Cconst_int (1, _)], _)
-      when n = 1 && guaranteed_to_be_small_int c ->
-      c
-    | c1' -> Cop (Casr, [c1'; c2], dbg))
-  | _ -> Cop (Casr, [c1; c2], dbg)
-
-let asr_const c n dbg = asr_int c (Cconst_int (n, dbg)) dbg
-
 let tag_int i dbg =
   match i with
   | Cconst_int (n, _) -> int_const dbg n
   | Cop (Casr, [c; Cconst_int (n, _)], _) when n > 0 ->
     Cop
       (Cor, [asr_int c (Cconst_int (n - 1, dbg)) dbg; Cconst_int (1, dbg)], dbg)
+  | Cop (Clsr, [c; Cconst_int (n, _)], _) when n > 0 ->
+    Cop (Cor, [lsr_const c (n - 1) dbg; Cconst_int (1, dbg)], dbg)
   | c -> incr_int (lsl_int c (Cconst_int (1, dbg)) dbg) dbg
 
 let untag_int i dbg =
   match i with
   | Cconst_int (n, _) -> Cconst_int (n asr 1, dbg)
   | Cop (Cor, [Cop (Casr, [c; Cconst_int (n, _)], _); Cconst_int (1, _)], _)
     when n > 0 && n < (size_int * 8) - 1 ->
-    when n > 0 && n < (size_int * 8) - 1 ->
+    when n > 0 && is_defined_shift (n + 1) ->
-    when n > 0 && n < (size_int * 8) - 1 ->
+    when n > 0 && is_defined_shift (n + 1) ->
-    Cop (Casr, [c; Cconst_int (n + 1, dbg)], dbg)
+    asr_const c (n + 1) dbg
   | Cop (Cor, [Cop (Clsr, [c; Cconst_int (n, _)], _); Cconst_int (1, _)], _)
     when n > 0 && n < (size_int * 8) - 1 ->
     Cop (Clsr, [c; Cconst_int (n + 1, dbg)], dbg)
   | c -> asr_int c (Cconst_int (1, dbg)) dbg
 
 let mk_not dbg cmm =
   match cmm with
-  | Cop (Caddi, [Cop (Clsl, [c; Cconst_int (1, _)], _); Cconst_int (1, _)], dbg')
-    -> (
+  | Cop
+      ( (Caddi | Cor),
+        [Cop (Clsl, [c; Cconst_int (1, _)], _); Cconst_int (1, _)],
+        dbg' ) -> (
     match c with
     | Cop (Ccmpi cmp, [c1; c2], dbg'') ->
       tag_int
@@ -799,8 +914,10 @@ let mod_int ?dividend_cannot_be_min_int c1 c2 dbg =
 
 let test_bool dbg cmm =
   match cmm with
-  | Cop (Caddi, [Cop (Clsl, [c; Cconst_int (1, _)], _); Cconst_int (1, _)], _)
-    ->
+  | Cop
+      ( (Caddi | Cor),
+        [Cop (Clsl, [c; Cconst_int (1, _)], _); Cconst_int (1, _)],
+        _ ) ->
     c
   | Cconst_int (n, dbg) ->
     if n = 1 then Cconst_int (0, dbg) else Cconst_int (1, dbg)
@@ -1035,8 +1152,10 @@ let array_indexing ?typ log2size ptr ofs dbg =
     if i = 0
     then ptr
     else Cop (add, [ptr; Cconst_int (i lsl log2size, dbg)], dbg)
-  | Cop (Caddi, [Cop (Clsl, [c; Cconst_int (1, _)], _); Cconst_int (1, _)], dbg')
-    ->
+  | Cop
+      ( (Caddi | Cor),
+        [Cop (Clsl, [c; Cconst_int (1, _)], _); Cconst_int (1, _)],
+        dbg' ) ->
     Cop (add, [ptr; lsl_const c log2size dbg], dbg')
   | Cop (Caddi, [c; Cconst_int (n, _)], dbg') when log2size = 0 ->
     Cop
@@ -1976,11 +2095,7 @@ let and_int e1 e2 dbg =
   match e1, e2 with
   | e, m when is_mask32 m -> zero_extend ~bits:32 e ~dbg
   | m, e when is_mask32 m -> zero_extend ~bits:32 e ~dbg
-  | e1, e2 -> Cop (Cand, [e1; e2], dbg)
-
-let or_int e1 e2 dbg = Cop (Cor, [e1; e2], dbg)
-
-let xor_int e1 e2 dbg = Cop (Cxor, [e1; e2], dbg)
+  | e1, e2 -> and_int e1 e2 dbg
 
 (* Boxed integers *)