oxcaml
diff --git a/‎backend/cmm_helpers.ml
Lines changed: 119 additions & 105 deletions b/‎backend/cmm_helpers.ml
Lines changed: 119 additions & 105 deletions
diff --git a/‎backend/cmm_helpers.mli
Lines changed: 7 additions & 5 deletions b/‎backend/cmm_helpers.mli
Lines changed: 7 additions & 5 deletions
diff --git a/‎middle_end/flambda2/to_cmm/to_cmm_expr.ml
Lines changed: 4 additions & 1 deletion b/‎middle_end/flambda2/to_cmm/to_cmm_expr.ml
Lines changed: 4 additions & 1 deletion
@@ -434,6 +434,8 @@ let asr_int c1 c2 dbg =
     | c1' -> Cop (Casr, [c1'; c2], dbg))
   | _ -> Cop (Casr, [c1; c2], dbg)
 
+let asr_const c n dbg = asr_int c (Cconst_int (n, dbg)) dbg
+
 let tag_int i dbg =
   match i with
   | Cconst_int (n, _) -> int_const dbg n
@@ -543,45 +545,37 @@ let create_loop body dbg =
    [division_parameters] function is used in module Emit for those target
    platforms that support this optimization. *)
 
-(* Unsigned comparison between native integers. *)
-
-let ucompare x y = Nativeint.(compare (add x min_int) (add y min_int))
-
-(* Unsigned division and modulus at type nativeint. Algorithm: Hacker's Delight
-   section 9.3 *)
-
-let udivmod n d =
-  Nativeint.(
-    if d < 0n
-    then if ucompare n d < 0 then 0n, n else 1n, sub n d
-    else
-      let q = shift_left (div (shift_right_logical n 1) d) 1 in
-      let r = sub n (mul q d) in
-      if ucompare r d >= 0 then succ q, sub r d else q, r)
-
-(* Compute division parameters. Algorithm: Hacker's Delight chapter 10, fig
-   10-1. *)
-
 let divimm_parameters d =
-  Nativeint.(
-    assert (d > 0n);
-    let twopsm1 = min_int in
-    (* 2^31 for 32-bit archs, 2^63 for 64-bit archs *)
-    let nc = sub (pred twopsm1) (snd (udivmod twopsm1 d)) in
-    let rec loop p (q1, r1) (q2, r2) =
-      let p = p + 1 in
-      let q1 = shift_left q1 1 and r1 = shift_left r1 1 in
-      let q1, r1 = if ucompare r1 nc >= 0 then succ q1, sub r1 nc else q1, r1 in
-      let q2 = shift_left q2 1 and r2 = shift_left r2 1 in
-      let q2, r2 = if ucompare r2 d >= 0 then succ q2, sub r2 d else q2, r2 in
-      let delta = sub d r2 in
-      if ucompare q1 delta < 0 || (q1 = delta && r1 = 0n)
-      then loop p (q1, r1) (q2, r2)
-      else succ q2, p - size
-    in
-    loop (size - 1) (udivmod twopsm1 nc) (udivmod twopsm1 d))
+  (* Signed division and modulus at type nativeint. Algorithm: Hacker's Delight,
+     2nd ed, Figure 10-1. *)
+  let open Nativeint in
+  let udivmod n d =
+    let q = unsigned_div n d in
+    q, sub n (mul q d)
+  in
+  let ad = abs d in
+  assert (ad > 1n);
+  let t = add min_int (shift_right_logical d (size - 1)) in
+  let anc = sub (pred t) (unsigned_rem t ad) in
+  let step (q, r) x =
+    let q = shift_left q 1 and r = shift_left r 1 in
+    if unsigned_compare r x >= 0 then succ q, sub r x else q, r
+  in
+  let rec loop p qr1 qr2 =
+    let p = p + 1 in
+    let q1, r1 = step qr1 anc in
+    let q2, r2 = step qr2 ad in
+    let delta = sub ad r2 in
+    if unsigned_compare q1 delta < 0 || (q1 = delta && r1 = 0n)
+    then loop p (q1, r1) (q2, r2)
+    else
+      let m = succ q2 in
+      let m = if d < 0n then neg m else m in
+      m, p - size
+  in
+  loop (size - 1) (udivmod min_int anc) (udivmod min_int ad)
 
-(* The result [(m, p)] of [divimm_parameters d] satisfies the following
+(* For d > 1, the result [(m, p)] of [divimm_parameters d] satisfies the following
    inequality:
 
    2^(wordsize + p) < m * d <= 2^(wordsize + p) + 2^(p + 1) (i)
@@ -598,7 +592,7 @@ let divimm_parameters d =
 
  * let add2 (xh, xl) (yh, yl) =
  *   let zl = add xl yl and zh = add xh yh in
- *   (if ucompare zl xl < 0 then succ zh else zh), zl
+ *   (if unsigned_compare zl xl < 0 then succ zh else zh), zl
  *
  * let shl2 (xh, xl) n =
  *   assert (0 < n && n < size + size);
@@ -619,16 +613,16 @@ let divimm_parameters d =
  *        (shl2 (0n, mul xl yh) halfsize)
  *        (add2 (shl2 (0n, mul xh yl) halfsize) (0n, mul xl yl)))
  *
- * let ucompare2 (xh, xl) (yh, yl) =
- *   let c = ucompare xh yh in
- *   if c = 0 then ucompare xl yl else c
+ * let unsigned_compare2 (xh, xl) (yh, yl) =
+ *   let c = unsigned_compare xh yh in
+ *   if c = 0 then unsigned_compare xl yl else c
  *
  * let validate d m p =
  *   let md = mul2 m d in
  *   let one2 = 0n, 1n in
  *   let twoszp = shl2 one2 (size + p) in
  *   let twop1 = shl2 one2 (p + 1) in
- *   ucompare2 twoszp md < 0 && ucompare2 md (add2 twoszp twop1) <= 0
+ *   unsigned_compare2 twoszp md < 0 && unsigned_compare2 md (add2 twoszp twop1) <= 0
  *)
 
 let raise_symbol dbg symb =
@@ -662,93 +656,117 @@ let make_safe_divmod operator ~if_divisor_is_negative_one
                 dbg,
                 Any )))
 
-let rec div_int ?dividend_cannot_be_min_int c1 c2 dbg =
+let is_power_of_2_or_zero n = Nativeint.logand n (Nativeint.pred n) = 0n
+
+let divide_by_zero dividend ~dbg =
+  bind "dividend" dividend (fun _ ->
+      raise_symbol dbg "caml_exn_Division_by_zero")
+
+let div_int ?dividend_cannot_be_min_int c1 c2 dbg =
   let if_divisor_is_negative_one ~dividend ~dbg = neg_int dividend dbg in
   match get_const c1, get_const c2 with
-  | _, Some 0n -> Csequence (c1, raise_symbol dbg "caml_exn_Division_by_zero")
+  | _, Some 0n -> divide_by_zero c1 ~dbg
   | _, Some 1n -> c1
   | Some n1, Some n2 -> natint_const_untagged dbg (Nativeint.div n1 n2)
   | _, Some -1n -> if_divisor_is_negative_one ~dividend:c1 ~dbg
-  | _, Some n ->
-    if n < 0n
+  | _, Some divisor ->
+    if divisor = Nativeint.min_int
     then
-      if n = Nativeint.min_int
-      then Cop (Ccmpi Ceq, [c1; Cconst_natint (Nativeint.min_int, dbg)], dbg)
-      else
-        neg_int
-          (div_int ?dividend_cannot_be_min_int c1
-             (Cconst_natint (Nativeint.neg n, dbg))
-             dbg)
-          dbg
-    else if Nativeint.logand n (Nativeint.pred n) = 0n
+      (* integer division by min_int always returns 0 unless the dividend is
+         also min_int, in which case it's 1. *)
+      Cifthenelse
+        ( Cop (Ccmpi Ceq, [c1; Cconst_natint (divisor, dbg)], dbg),
+          dbg,
+          Cconst_int (1, dbg),
+          dbg,
+          Cconst_int (0, dbg),
+          dbg,
+          Any )
+    else if is_power_of_2_or_zero divisor
     then
-      let l = Misc.log2_nativeint n in
+      (* [divisor] must be positive be here since we already handled zero and
+         min_int (the only negative power of 2) *)
+      let l = Misc.log2_nativeint divisor in
       (* Algorithm:
 
          t = shift-right-signed(c1, l - 1)
 
          t = shift-right(t, W - l)
 
-         t = c1 + t res = shift-right-signed(c1 + t, l) *)
-      Cop
-        ( Casr,
-          [ bind "dividend" c1 (fun c1 ->
-                assert (l >= 1);
-                let t = asr_int c1 (Cconst_int (l - 1, dbg)) dbg in
-                let t = lsr_int t (Cconst_int (Nativeint.size - l, dbg)) dbg in
-                add_int c1 t dbg);
-            Cconst_int (l, dbg) ],
-          dbg )
+         t = c1 + t
+
+         res = shift-right-signed(c1 + t, l) *)
+      asr_const
+        (bind "dividend" c1 (fun c1 ->
+             assert (l >= 1);
+             let t = asr_const c1 (l - 1) dbg in
+             let t = lsr_const t (Nativeint.size - l) dbg in
+             add_int c1 t dbg))
+        l dbg
     else
-      let m, p = divimm_parameters n in
-      (* Algorithm:
+      bind "dividend" c1 (fun n ->
+          (* Algorithm:
 
-         t = multiply-high-signed(c1, m) if m < 0,
+             q = smulhi n, M
 
-         t = t + c1 if p > 0,
+             if m < 0 && d > 0: q += n
 
-         t = shift-right-signed(t, p)
+             if m > 0 && d < 0: q -= n
 
-         res = t + sign-bit(c1) *)
-      bind "dividend" c1 (fun c1 ->
-          let t =
-            Cop
-              (Cmulhi { signed = true }, [c1; natint_const_untagged dbg m], dbg)
+             q >>= s
+
+             q += sign-bit(q) *)
+          let m, s = divimm_parameters divisor in
+          let q =
+            Cop (Cmulhi { signed = true }, [n; natint_const_untagged dbg m], dbg)
+          in
+          let q =
+            if m < 0n && divisor >= 0n
+            then add_int q n dbg
+            else if m >= 0n && divisor < 0n
+            then sub_int q n dbg
+            else q
           in
-          let t = if m < 0n then Cop (Caddi, [t; c1], dbg) else t in
-          let t =
-            if p > 0 then Cop (Casr, [t; Cconst_int (p, dbg)], dbg) else t
+          let q = asr_const q s dbg in
+          let sign_bit =
+            (* we can use n instead of q when the divisor is non-negative. This
+               makes the instruction dependency graph shallower. *)
+            lsr_const (if divisor >= 0n then n else q) (Nativeint.size - 1) dbg
           in
-          add_int t (lsr_int c1 (Cconst_int (Nativeint.size - 1, dbg)) dbg) dbg)
+          add_int q sign_bit dbg)
   | _, _ ->
     make_safe_divmod ?dividend_cannot_be_min_int ~if_divisor_is_negative_one
       Cdivi c1 c2 ~dbg
 
 let mod_int ?dividend_cannot_be_min_int c1 c2 dbg =
   let if_divisor_is_positive_or_negative_one ~dividend ~dbg =
-    match dividend with
-    | Cvar _ -> Cconst_int (0, dbg)
-    | dividend -> Csequence (dividend, Cconst_int (0, dbg))
+    bind "dividend" dividend (fun _ -> Cconst_int (0, dbg))
   in
   match get_const c1, get_const c2 with
-  | _, Some 0n -> Csequence (c1, raise_symbol dbg "caml_exn_Division_by_zero")
+  | _, Some 0n -> divide_by_zero c1 ~dbg
   | _, Some (1n | -1n) ->
     if_divisor_is_positive_or_negative_one ~dividend:c1 ~dbg
   | Some n1, Some n2 -> natint_const_untagged dbg (Nativeint.rem n1 n2)
   | _, Some n ->
     if n = Nativeint.min_int
     then
+      (* Similarly to the division by min_int almost always being 0, modulo
+         min_int is almost always the identity, the exception being when the
+         divisor is min_int *)
       bind "dividend" c1 (fun c1 ->
+          let min_int = Cconst_natint (Nativeint.min_int, dbg) in
           Cifthenelse
-            ( Cop (Ccmpi Ceq, [c1; neg_int c1 dbg], dbg),
+            ( Cop (Ccmpi Ceq, [c1; min_int], dbg),
               dbg,
               Cconst_int (0, dbg),
               dbg,
-              Cop (Cor, [c1; Cconst_natint (Nativeint.min_int, dbg)], dbg),
+              c1,
               dbg,
               Any ))
-    else if Nativeint.logand n (Nativeint.pred n) = 0n
+    else if is_power_of_2_or_zero n
     then
+      (* [divisor] must be positive be here since we already handled zero and
+         min_int (the only negative power of 2). *)
       let l = Misc.log2_nativeint n in
       (* Algorithm:
 
@@ -776,16 +794,6 @@ let mod_int ?dividend_cannot_be_min_int c1 c2 dbg =
       ~if_divisor_is_negative_one:if_divisor_is_positive_or_negative_one Cmodi
       c1 c2 ~dbg
 
-let div_int ?dividend_cannot_be_min_int c1 c2 dbg =
-  bind "divisor" c2 (fun c2 ->
-      bind "dividend" c1 (fun c1 ->
-          div_int ?dividend_cannot_be_min_int c1 c2 dbg))
-
-let mod_int ?dividend_cannot_be_min_int c1 c2 dbg =
-  bind "divisor" c2 (fun c2 ->
-      bind "dividend" c1 (fun c1 ->
-          mod_int ?dividend_cannot_be_min_int c1 c2 dbg))
-
 (* Bool *)
 
 let test_bool dbg cmm =
@@ -3460,20 +3468,26 @@ let mul_int_caml arg1 arg2 dbg =
     incr_int (mul_int (untag_int c1 dbg) (decr_int c2 dbg) dbg) dbg
   | c1, c2 -> incr_int (mul_int (decr_int c1 dbg) (untag_int c2 dbg) dbg) dbg
 
-(* Since caml integers are tagged, we know that they when they're untagged, they
-   can't be [Nativeint.min_int] *)
-let caml_integers_are_tagged = true
-
 let div_int_caml arg1 arg2 dbg =
+  let dividend_cannot_be_min_int =
+    (* Since caml integers are tagged, we know that they when they're untagged,
+       they can't be [Nativeint.min_int] *)
+    true
+  in
   tag_int
-    (div_int ~dividend_cannot_be_min_int:caml_integers_are_tagged
-       (untag_int arg1 dbg) (untag_int arg2 dbg) dbg)
+    (div_int ~dividend_cannot_be_min_int (untag_int arg1 dbg)
+       (untag_int arg2 dbg) dbg)
     dbg
 
 let mod_int_caml arg1 arg2 dbg =
+  let dividend_cannot_be_min_int =
+    (* Since caml integers are tagged, we know that they when they're untagged,
+       they can't be [Nativeint.min_int] *)
+    true
+  in
   tag_int
-    (mod_int ~dividend_cannot_be_min_int:caml_integers_are_tagged
-       (untag_int arg1 dbg) (untag_int arg2 dbg) dbg)
+    (mod_int ~dividend_cannot_be_min_int (untag_int arg1 dbg)
+       (untag_int arg2 dbg) dbg)
     dbg
 
 let and_int_caml arg1 arg2 dbg = and_int arg1 arg2 dbg
 
@@ -94,14 +94,15 @@ val tag_int : expression -> Debuginfo.t -> expression
 (** Integer untagging. [untag_int x = (x asr 1)] *)
 val untag_int : expression -> Debuginfo.t -> expression
 
-(** Specific division operations for boxed integers *)
+(** signed division of two register-width integers *)
 val div_int :
   ?dividend_cannot_be_min_int:bool ->
   expression ->
   expression ->
   Debuginfo.t ->
   expression
 
+(** signed remainder of two register-width integers *)
 val mod_int :
   ?dividend_cannot_be_min_int:bool ->
   expression ->
@@ -699,7 +700,7 @@ val create_ccatch :
 (** Shift operations.
     Inputs: a tagged caml integer and an untagged machine integer.
     Outputs: a tagged caml integer.
-    ake as first argument a tagged caml integer, and as
+    Take as first argument a tagged caml integer, and as
     second argument an untagged machine intger which is the amount to shift the
     first argument by. *)
 
@@ -1183,9 +1184,10 @@ val unboxed_int64_or_nativeint_array_set :
     The first argument is the heap block to modify a field of.
     The [index_in_words] should be an untagged integer.
 
-    In constrast to [setfield] and [setfield_computed], [immediate_or_pointer] is not
-    needed as the layout is implied from the name, and [initialization_or_assignment] is
-    not needed as unboxed ints can always be assigned without caml_modify (etc.). *)
+    In contrast to [setfield] and [setfield_computed], [immediate_or_pointer] is not
+    needed as the layout is known from the [memory_chunk] argument, and
+    [initialization_or_assignment] is not needed as unboxed ints can always be assigned
+    without caml_modify (etc.). *)
 
 val get_field_unboxed :
   dbg:Debuginfo.t ->
 
@@ -169,7 +169,10 @@ let translate_external_call env res ~free_vars apply ~callee_simple ~args
          https://github.com/ARM-software/abi-aa/releases/download/2024Q3/aapcs64.pdf
 
          and figure out what happens for mixed int/float struct returns (it
-         looks like the floats may be returned in int regs) *)
+         looks like the floats may be returned in int regs)
+
+         jvanburen: that seems to be what clang does:
+         https://godbolt.org/z/snzEoME9h *)
       (match Target_system.architecture () with
       | X86_64 -> ()
       | AArch64 ->