Skip to content

Commit d0e8914

Browse files
committed
Merge branch 'generalize-cmm-helpers-interface' into cmm-refactor-unboxed-fields
2 parents abcd725 + dd1d945 commit d0e8914

File tree

5 files changed

+1225
-113
lines changed

5 files changed

+1225
-113
lines changed

backend/cmm_helpers.ml

+119-105
Original file line numberDiff line numberDiff line change
@@ -434,6 +434,8 @@ let asr_int c1 c2 dbg =
434434
| c1' -> Cop (Casr, [c1'; c2], dbg))
435435
| _ -> Cop (Casr, [c1; c2], dbg)
436436

437+
let asr_const c n dbg = asr_int c (Cconst_int (n, dbg)) dbg
438+
437439
let tag_int i dbg =
438440
match i with
439441
| Cconst_int (n, _) -> int_const dbg n
@@ -543,45 +545,37 @@ let create_loop body dbg =
543545
[division_parameters] function is used in module Emit for those target
544546
platforms that support this optimization. *)
545547

546-
(* Unsigned comparison between native integers. *)
547-
548-
let ucompare x y = Nativeint.(compare (add x min_int) (add y min_int))
549-
550-
(* Unsigned division and modulus at type nativeint. Algorithm: Hacker's Delight
551-
section 9.3 *)
552-
553-
let udivmod n d =
554-
Nativeint.(
555-
if d < 0n
556-
then if ucompare n d < 0 then 0n, n else 1n, sub n d
557-
else
558-
let q = shift_left (div (shift_right_logical n 1) d) 1 in
559-
let r = sub n (mul q d) in
560-
if ucompare r d >= 0 then succ q, sub r d else q, r)
561-
562-
(* Compute division parameters. Algorithm: Hacker's Delight chapter 10, fig
563-
10-1. *)
564-
565548
let divimm_parameters d =
566-
Nativeint.(
567-
assert (d > 0n);
568-
let twopsm1 = min_int in
569-
(* 2^31 for 32-bit archs, 2^63 for 64-bit archs *)
570-
let nc = sub (pred twopsm1) (snd (udivmod twopsm1 d)) in
571-
let rec loop p (q1, r1) (q2, r2) =
572-
let p = p + 1 in
573-
let q1 = shift_left q1 1 and r1 = shift_left r1 1 in
574-
let q1, r1 = if ucompare r1 nc >= 0 then succ q1, sub r1 nc else q1, r1 in
575-
let q2 = shift_left q2 1 and r2 = shift_left r2 1 in
576-
let q2, r2 = if ucompare r2 d >= 0 then succ q2, sub r2 d else q2, r2 in
577-
let delta = sub d r2 in
578-
if ucompare q1 delta < 0 || (q1 = delta && r1 = 0n)
579-
then loop p (q1, r1) (q2, r2)
580-
else succ q2, p - size
581-
in
582-
loop (size - 1) (udivmod twopsm1 nc) (udivmod twopsm1 d))
549+
(* Signed division and modulus at type nativeint. Algorithm: Hacker's Delight,
550+
2nd ed, Figure 10-1. *)
551+
let open Nativeint in
552+
let udivmod n d =
553+
let q = unsigned_div n d in
554+
q, sub n (mul q d)
555+
in
556+
let ad = abs d in
557+
assert (ad > 1n);
558+
let t = add min_int (shift_right_logical d (size - 1)) in
559+
let anc = sub (pred t) (unsigned_rem t ad) in
560+
let step (q, r) x =
561+
let q = shift_left q 1 and r = shift_left r 1 in
562+
if unsigned_compare r x >= 0 then succ q, sub r x else q, r
563+
in
564+
let rec loop p qr1 qr2 =
565+
let p = p + 1 in
566+
let q1, r1 = step qr1 anc in
567+
let q2, r2 = step qr2 ad in
568+
let delta = sub ad r2 in
569+
if unsigned_compare q1 delta < 0 || (q1 = delta && r1 = 0n)
570+
then loop p (q1, r1) (q2, r2)
571+
else
572+
let m = succ q2 in
573+
let m = if d < 0n then neg m else m in
574+
m, p - size
575+
in
576+
loop (size - 1) (udivmod min_int anc) (udivmod min_int ad)
583577

584-
(* The result [(m, p)] of [divimm_parameters d] satisfies the following
578+
(* For d > 1, the result [(m, p)] of [divimm_parameters d] satisfies the following
585579
inequality:
586580
587581
2^(wordsize + p) < m * d <= 2^(wordsize + p) + 2^(p + 1) (i)
@@ -598,7 +592,7 @@ let divimm_parameters d =
598592
599593
* let add2 (xh, xl) (yh, yl) =
600594
* let zl = add xl yl and zh = add xh yh in
601-
* (if ucompare zl xl < 0 then succ zh else zh), zl
595+
* (if unsigned_compare zl xl < 0 then succ zh else zh), zl
602596
*
603597
* let shl2 (xh, xl) n =
604598
* assert (0 < n && n < size + size);
@@ -619,16 +613,16 @@ let divimm_parameters d =
619613
* (shl2 (0n, mul xl yh) halfsize)
620614
* (add2 (shl2 (0n, mul xh yl) halfsize) (0n, mul xl yl)))
621615
*
622-
* let ucompare2 (xh, xl) (yh, yl) =
623-
* let c = ucompare xh yh in
624-
* if c = 0 then ucompare xl yl else c
616+
* let unsigned_compare2 (xh, xl) (yh, yl) =
617+
* let c = unsigned_compare xh yh in
618+
* if c = 0 then unsigned_compare xl yl else c
625619
*
626620
* let validate d m p =
627621
* let md = mul2 m d in
628622
* let one2 = 0n, 1n in
629623
* let twoszp = shl2 one2 (size + p) in
630624
* let twop1 = shl2 one2 (p + 1) in
631-
* ucompare2 twoszp md < 0 && ucompare2 md (add2 twoszp twop1) <= 0
625+
* unsigned_compare2 twoszp md < 0 && unsigned_compare2 md (add2 twoszp twop1) <= 0
632626
*)
633627

634628
let raise_symbol dbg symb =
@@ -662,93 +656,117 @@ let make_safe_divmod operator ~if_divisor_is_negative_one
662656
dbg,
663657
Any )))
664658

665-
let rec div_int ?dividend_cannot_be_min_int c1 c2 dbg =
659+
let is_power_of_2_or_zero n = Nativeint.logand n (Nativeint.pred n) = 0n
660+
661+
let divide_by_zero dividend ~dbg =
662+
bind "dividend" dividend (fun _ ->
663+
raise_symbol dbg "caml_exn_Division_by_zero")
664+
665+
let div_int ?dividend_cannot_be_min_int c1 c2 dbg =
666666
let if_divisor_is_negative_one ~dividend ~dbg = neg_int dividend dbg in
667667
match get_const c1, get_const c2 with
668-
| _, Some 0n -> Csequence (c1, raise_symbol dbg "caml_exn_Division_by_zero")
668+
| _, Some 0n -> divide_by_zero c1 ~dbg
669669
| _, Some 1n -> c1
670670
| Some n1, Some n2 -> natint_const_untagged dbg (Nativeint.div n1 n2)
671671
| _, Some -1n -> if_divisor_is_negative_one ~dividend:c1 ~dbg
672-
| _, Some n ->
673-
if n < 0n
672+
| _, Some divisor ->
673+
if divisor = Nativeint.min_int
674674
then
675-
if n = Nativeint.min_int
676-
then Cop (Ccmpi Ceq, [c1; Cconst_natint (Nativeint.min_int, dbg)], dbg)
677-
else
678-
neg_int
679-
(div_int ?dividend_cannot_be_min_int c1
680-
(Cconst_natint (Nativeint.neg n, dbg))
681-
dbg)
682-
dbg
683-
else if Nativeint.logand n (Nativeint.pred n) = 0n
675+
(* integer division by min_int always returns 0 unless the dividend is
676+
also min_int, in which case it's 1. *)
677+
Cifthenelse
678+
( Cop (Ccmpi Ceq, [c1; Cconst_natint (divisor, dbg)], dbg),
679+
dbg,
680+
Cconst_int (1, dbg),
681+
dbg,
682+
Cconst_int (0, dbg),
683+
dbg,
684+
Any )
685+
else if is_power_of_2_or_zero divisor
684686
then
685-
let l = Misc.log2_nativeint n in
687+
(* [divisor] must be positive be here since we already handled zero and
688+
min_int (the only negative power of 2) *)
689+
let l = Misc.log2_nativeint divisor in
686690
(* Algorithm:
687691
688692
t = shift-right-signed(c1, l - 1)
689693
690694
t = shift-right(t, W - l)
691695
692-
t = c1 + t res = shift-right-signed(c1 + t, l) *)
693-
Cop
694-
( Casr,
695-
[ bind "dividend" c1 (fun c1 ->
696-
assert (l >= 1);
697-
let t = asr_int c1 (Cconst_int (l - 1, dbg)) dbg in
698-
let t = lsr_int t (Cconst_int (Nativeint.size - l, dbg)) dbg in
699-
add_int c1 t dbg);
700-
Cconst_int (l, dbg) ],
701-
dbg )
696+
t = c1 + t
697+
698+
res = shift-right-signed(c1 + t, l) *)
699+
asr_const
700+
(bind "dividend" c1 (fun c1 ->
701+
assert (l >= 1);
702+
let t = asr_const c1 (l - 1) dbg in
703+
let t = lsr_const t (Nativeint.size - l) dbg in
704+
add_int c1 t dbg))
705+
l dbg
702706
else
703-
let m, p = divimm_parameters n in
704-
(* Algorithm:
707+
bind "dividend" c1 (fun n ->
708+
(* Algorithm:
705709
706-
t = multiply-high-signed(c1, m) if m < 0,
710+
q = smulhi n, M
707711
708-
t = t + c1 if p > 0,
712+
if m < 0 && d > 0: q += n
709713
710-
t = shift-right-signed(t, p)
714+
if m > 0 && d < 0: q -= n
711715
712-
res = t + sign-bit(c1) *)
713-
bind "dividend" c1 (fun c1 ->
714-
let t =
715-
Cop
716-
(Cmulhi { signed = true }, [c1; natint_const_untagged dbg m], dbg)
716+
q >>= s
717+
718+
q += sign-bit(q) *)
719+
let m, s = divimm_parameters divisor in
720+
let q =
721+
Cop (Cmulhi { signed = true }, [n; natint_const_untagged dbg m], dbg)
722+
in
723+
let q =
724+
if m < 0n && divisor >= 0n
725+
then add_int q n dbg
726+
else if m >= 0n && divisor < 0n
727+
then sub_int q n dbg
728+
else q
717729
in
718-
let t = if m < 0n then Cop (Caddi, [t; c1], dbg) else t in
719-
let t =
720-
if p > 0 then Cop (Casr, [t; Cconst_int (p, dbg)], dbg) else t
730+
let q = asr_const q s dbg in
731+
let sign_bit =
732+
(* we can use n instead of q when the divisor is non-negative. This
733+
makes the instruction dependency graph shallower. *)
734+
lsr_const (if divisor >= 0n then n else q) (Nativeint.size - 1) dbg
721735
in
722-
add_int t (lsr_int c1 (Cconst_int (Nativeint.size - 1, dbg)) dbg) dbg)
736+
add_int q sign_bit dbg)
723737
| _, _ ->
724738
make_safe_divmod ?dividend_cannot_be_min_int ~if_divisor_is_negative_one
725739
Cdivi c1 c2 ~dbg
726740

727741
let mod_int ?dividend_cannot_be_min_int c1 c2 dbg =
728742
let if_divisor_is_positive_or_negative_one ~dividend ~dbg =
729-
match dividend with
730-
| Cvar _ -> Cconst_int (0, dbg)
731-
| dividend -> Csequence (dividend, Cconst_int (0, dbg))
743+
bind "dividend" dividend (fun _ -> Cconst_int (0, dbg))
732744
in
733745
match get_const c1, get_const c2 with
734-
| _, Some 0n -> Csequence (c1, raise_symbol dbg "caml_exn_Division_by_zero")
746+
| _, Some 0n -> divide_by_zero c1 ~dbg
735747
| _, Some (1n | -1n) ->
736748
if_divisor_is_positive_or_negative_one ~dividend:c1 ~dbg
737749
| Some n1, Some n2 -> natint_const_untagged dbg (Nativeint.rem n1 n2)
738750
| _, Some n ->
739751
if n = Nativeint.min_int
740752
then
753+
(* Similarly to the division by min_int almost always being 0, modulo
754+
min_int is almost always the identity, the exception being when the
755+
divisor is min_int *)
741756
bind "dividend" c1 (fun c1 ->
757+
let min_int = Cconst_natint (Nativeint.min_int, dbg) in
742758
Cifthenelse
743-
( Cop (Ccmpi Ceq, [c1; neg_int c1 dbg], dbg),
759+
( Cop (Ccmpi Ceq, [c1; min_int], dbg),
744760
dbg,
745761
Cconst_int (0, dbg),
746762
dbg,
747-
Cop (Cor, [c1; Cconst_natint (Nativeint.min_int, dbg)], dbg),
763+
c1,
748764
dbg,
749765
Any ))
750-
else if Nativeint.logand n (Nativeint.pred n) = 0n
766+
else if is_power_of_2_or_zero n
751767
then
768+
(* [divisor] must be positive be here since we already handled zero and
769+
min_int (the only negative power of 2). *)
752770
let l = Misc.log2_nativeint n in
753771
(* Algorithm:
754772
@@ -776,16 +794,6 @@ let mod_int ?dividend_cannot_be_min_int c1 c2 dbg =
776794
~if_divisor_is_negative_one:if_divisor_is_positive_or_negative_one Cmodi
777795
c1 c2 ~dbg
778796

779-
let div_int ?dividend_cannot_be_min_int c1 c2 dbg =
780-
bind "divisor" c2 (fun c2 ->
781-
bind "dividend" c1 (fun c1 ->
782-
div_int ?dividend_cannot_be_min_int c1 c2 dbg))
783-
784-
let mod_int ?dividend_cannot_be_min_int c1 c2 dbg =
785-
bind "divisor" c2 (fun c2 ->
786-
bind "dividend" c1 (fun c1 ->
787-
mod_int ?dividend_cannot_be_min_int c1 c2 dbg))
788-
789797
(* Bool *)
790798

791799
let test_bool dbg cmm =
@@ -3460,20 +3468,26 @@ let mul_int_caml arg1 arg2 dbg =
34603468
incr_int (mul_int (untag_int c1 dbg) (decr_int c2 dbg) dbg) dbg
34613469
| c1, c2 -> incr_int (mul_int (decr_int c1 dbg) (untag_int c2 dbg) dbg) dbg
34623470

3463-
(* Since caml integers are tagged, we know that they when they're untagged, they
3464-
can't be [Nativeint.min_int] *)
3465-
let caml_integers_are_tagged = true
3466-
34673471
let div_int_caml arg1 arg2 dbg =
3472+
let dividend_cannot_be_min_int =
3473+
(* Since caml integers are tagged, we know that they when they're untagged,
3474+
they can't be [Nativeint.min_int] *)
3475+
true
3476+
in
34683477
tag_int
3469-
(div_int ~dividend_cannot_be_min_int:caml_integers_are_tagged
3470-
(untag_int arg1 dbg) (untag_int arg2 dbg) dbg)
3478+
(div_int ~dividend_cannot_be_min_int (untag_int arg1 dbg)
3479+
(untag_int arg2 dbg) dbg)
34713480
dbg
34723481

34733482
let mod_int_caml arg1 arg2 dbg =
3483+
let dividend_cannot_be_min_int =
3484+
(* Since caml integers are tagged, we know that they when they're untagged,
3485+
they can't be [Nativeint.min_int] *)
3486+
true
3487+
in
34743488
tag_int
3475-
(mod_int ~dividend_cannot_be_min_int:caml_integers_are_tagged
3476-
(untag_int arg1 dbg) (untag_int arg2 dbg) dbg)
3489+
(mod_int ~dividend_cannot_be_min_int (untag_int arg1 dbg)
3490+
(untag_int arg2 dbg) dbg)
34773491
dbg
34783492

34793493
let and_int_caml arg1 arg2 dbg = and_int arg1 arg2 dbg

backend/cmm_helpers.mli

+7-5
Original file line numberDiff line numberDiff line change
@@ -94,14 +94,15 @@ val tag_int : expression -> Debuginfo.t -> expression
9494
(** Integer untagging. [untag_int x = (x asr 1)] *)
9595
val untag_int : expression -> Debuginfo.t -> expression
9696

97-
(** Specific division operations for boxed integers *)
97+
(** signed division of two register-width integers *)
9898
val div_int :
9999
?dividend_cannot_be_min_int:bool ->
100100
expression ->
101101
expression ->
102102
Debuginfo.t ->
103103
expression
104104

105+
(** signed remainder of two register-width integers *)
105106
val mod_int :
106107
?dividend_cannot_be_min_int:bool ->
107108
expression ->
@@ -699,7 +700,7 @@ val create_ccatch :
699700
(** Shift operations.
700701
Inputs: a tagged caml integer and an untagged machine integer.
701702
Outputs: a tagged caml integer.
702-
ake as first argument a tagged caml integer, and as
703+
Take as first argument a tagged caml integer, and as
703704
second argument an untagged machine intger which is the amount to shift the
704705
first argument by. *)
705706

@@ -1183,9 +1184,10 @@ val unboxed_int64_or_nativeint_array_set :
11831184
The first argument is the heap block to modify a field of.
11841185
The [index_in_words] should be an untagged integer.
11851186
1186-
In constrast to [setfield] and [setfield_computed], [immediate_or_pointer] is not
1187-
needed as the layout is implied from the name, and [initialization_or_assignment] is
1188-
not needed as unboxed ints can always be assigned without caml_modify (etc.). *)
1187+
In contrast to [setfield] and [setfield_computed], [immediate_or_pointer] is not
1188+
needed as the layout is known from the [memory_chunk] argument, and
1189+
[initialization_or_assignment] is not needed as unboxed ints can always be assigned
1190+
without caml_modify (etc.). *)
11891191

11901192
val get_field_unboxed :
11911193
dbg:Debuginfo.t ->

middle_end/flambda2/to_cmm/to_cmm_expr.ml

+4-1
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,10 @@ let translate_external_call env res ~free_vars apply ~callee_simple ~args
169169
https://github.com/ARM-software/abi-aa/releases/download/2024Q3/aapcs64.pdf
170170
171171
and figure out what happens for mixed int/float struct returns (it
172-
looks like the floats may be returned in int regs) *)
172+
looks like the floats may be returned in int regs)
173+
174+
jvanburen: that seems to be what clang does:
175+
https://godbolt.org/z/snzEoME9h *)
173176
(match Target_system.architecture () with
174177
| X86_64 -> ()
175178
| AArch64 ->

0 commit comments

Comments
 (0)