Skip to content

Commit 0703b41

Browse files
authored
Avoid polymorphic comparison in backend/amd64 (#3643)
1 parent b584f5f commit 0703b41

16 files changed

+196
-35
lines changed

backend/amd64/CSE.ml

+2
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
(**************************************************************************)
1515
[@@@ocaml.warning "+4"]
1616

17+
open! Int_replace_polymorphic_compare
18+
1719
(* CSE for the AMD64 *)
1820

1921
open Arch

backend/amd64/arch.ml

+23-2
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
(**************************************************************************)
1515
[@@@ocaml.warning "+4"]
1616

17+
open! Int_replace_polymorphic_compare
18+
1719
module Extension = struct
1820
module T = struct
1921
type t =
@@ -29,7 +31,20 @@ module Extension = struct
2931
| BMI
3032
| BMI2
3133

32-
let compare = compare
34+
let rank = function
35+
| POPCNT -> 0
36+
| PREFETCHW -> 1
37+
| PREFETCHWT1 -> 2
38+
| SSE3 -> 3
39+
| SSSE3 -> 4
40+
| SSE4_1 -> 5
41+
| SSE4_2 -> 6
42+
| CLMUL -> 7
43+
| LZCNT -> 8
44+
| BMI -> 9
45+
| BMI2 -> 10
46+
47+
let compare left right = Int.compare (rank left) (rank right)
3348
end
3449

3550
include T
@@ -119,6 +134,12 @@ open Format
119134

120135
type sym_global = Global | Local
121136

137+
let equal_sym_global left right =
138+
match left, right with
139+
| Global, Global
140+
| Local, Local -> true
141+
| (Global | Local), _ -> false
142+
122143
type addressing_mode =
123144
Ibased of string * sym_global * int (* symbol + displ *)
124145
| Iindexed of int (* reg + displ *)
@@ -352,7 +373,7 @@ let float_cond_and_need_swap cond =
352373
let equal_addressing_mode left right =
353374
match left, right with
354375
| Ibased (left_sym, left_glob, left_displ), Ibased (right_sym, right_glob, right_displ) ->
355-
String.equal left_sym right_sym && left_glob = right_glob && Int.equal left_displ right_displ
376+
String.equal left_sym right_sym && equal_sym_global left_glob right_glob && Int.equal left_displ right_displ
356377
| Iindexed left_displ, Iindexed right_displ ->
357378
Int.equal left_displ right_displ
358379
| Iindexed2 left_displ, Iindexed2 right_displ ->

backend/amd64/emit.ml

+49-32
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
[Flambda_backend_flags] and shared variables.
2020
For details, see [asmgen.mli]. *)
2121

22+
open! Int_replace_polymorphic_compare
23+
2224
open Cmm
2325
open Arch
2426
open Proc
@@ -28,6 +30,7 @@ open Linear
2830
open Emitaux
2931

3032
open X86_ast
33+
open X86_ast_utils
3134
open X86_proc
3235
open X86_dsl
3336
module String = Misc.Stdlib.String
@@ -42,6 +45,18 @@ open! Branch_relaxation
4245

4346
let _label s = D.label ~typ:QWORD s
4447

48+
let is_linux = function
49+
| S_linux -> true
50+
| _ -> false
51+
52+
let is_macosx = function
53+
| S_macosx -> true
54+
| _ -> false
55+
56+
let is_win64 = function
57+
| S_win64 -> true
58+
| _ -> false
59+
4560
(* Override proc.ml *)
4661

4762
let int_reg_name =
@@ -143,7 +158,7 @@ let pop r =
143158

144159
(* Symbols *)
145160

146-
let symbol_prefix = if system = S_macosx then "_" else ""
161+
let symbol_prefix = match system with S_macosx -> "_" | _ -> ""
147162

148163
let emit_symbol s = string_of_symbol symbol_prefix s
149164

@@ -652,7 +667,7 @@ let emit_float_test (width : Cmm.float_width)
652667
| Float32 -> I.ucomiss, I.comiss
653668
in
654669
match cmp with
655-
| CFeq when arg i 1 = arg i 0 ->
670+
| CFeq when equal_arg (arg i 1) (arg i 0) ->
656671
ucomi (arg i 1) (arg i 0);
657672
taken NP
658673
| CFeq ->
@@ -661,7 +676,7 @@ let emit_float_test (width : Cmm.float_width)
661676
I.jp (label next); (* skip if unordered *)
662677
taken E; (* branch taken if x=y *)
663678
def_label next
664-
| CFneq when arg i 1 = arg i 0 ->
679+
| CFneq when equal_arg (arg i 1) (arg i 0) ->
665680
ucomi (arg i 1) (arg i 0);
666681
taken P
667682
| CFneq ->
@@ -826,8 +841,8 @@ let move (src : Reg.t) (dst : Reg.t) =
826841
end
827842

828843
let stack_to_stack_move (src : Reg.t) (dst : Reg.t) =
829-
assert (src.typ = dst.typ);
830-
if (src.loc <> dst.loc) then begin
844+
assert (Cmm.equal_machtype_component src.typ dst.typ);
845+
if not (Reg.equal_location src.loc dst.loc) then begin
831846
match src.typ with
832847
| Int | Val ->
833848
(* Not calling move because r15 is not in int_reg_name. *)
@@ -1071,11 +1086,11 @@ end = struct
10711086
abstraction barrier of [X86_ast]. *)
10721087
let[@inline always] uses_register register = function
10731088
| Reg8L register' | Reg16 register' | Reg32 register' | Reg64 register' ->
1074-
register = register'
1089+
equal_reg64 register register'
10751090
| Mem { idx = register'; base = None; scale; _ } ->
1076-
scale <> 0 && register = register'
1091+
scale <> 0 && equal_reg64 register register'
10771092
| Mem { idx = register'; base = Some register''; _ } ->
1078-
register = register' || register = register''
1093+
equal_reg64 register register' || equal_reg64 register register''
10791094
| _ -> false
10801095
;;
10811096

@@ -1339,7 +1354,7 @@ let emit_simd_instr op i =
13391354
| BMI2 Deposit_64 -> I.pdep (arg i 1) (arg i 0) (res i 0)
13401355
| SSE Round_current_f32_i64 -> I.cvtss2si (arg i 0) (res i 0)
13411356
| SSE Sqrt_scalar_f32 ->
1342-
if arg i 0 <> res i 0 then
1357+
if not (equal_arg (arg i 0) (res i 0)) then
13431358
I.xorpd (res i 0) (res i 0); (* avoid partial register stall *)
13441359
I.sqrtss (arg i 0) (res i 0)
13451360
| SSE Max_scalar_f32 -> I.maxss (arg i 1) (res i 0)
@@ -1364,7 +1379,7 @@ let emit_simd_instr op i =
13641379
| SSE2 Max_scalar_f64 -> I.maxsd (arg i 1) (res i 0)
13651380
| SSE2 Min_scalar_f64 -> I.minsd (arg i 1) (res i 0)
13661381
| SSE2 Sqrt_scalar_f64 ->
1367-
if arg i 0 <> res i 0 then
1382+
if not(equal_arg (arg i 0) (res i 0)) then
13681383
I.xorpd (res i 0) (res i 0); (* avoid partial register stall *)
13691384
I.sqrtsd (arg i 0) (res i 0)
13701385
| SSE2 Sqrt_f64 -> I.sqrtpd (arg i 0) (res i 0)
@@ -1522,11 +1537,11 @@ let emit_simd_instr op i =
15221537
| SSE41 Min_unsigned_i16 -> I.pminuw (arg i 1) (res i 0)
15231538
| SSE41 Min_unsigned_i32 -> I.pminud (arg i 1) (res i 0)
15241539
| SSE41 (Round_scalar_f64 n) ->
1525-
if arg i 0 <> res i 0 then
1540+
if not (equal_arg (arg i 0) (res i 0)) then
15261541
I.xorpd (res i 0) (res i 0); (* avoid partial register stall *)
15271542
I.roundsd n (arg i 0) (res i 0)
15281543
| SSE41 (Round_scalar_f32 n) ->
1529-
if arg i 0 <> res i 0 then
1544+
if not (equal_arg (arg i 0) (res i 0)) then
15301545
I.xorpd (res i 0) (res i 0); (* avoid partial register stall *)
15311546
I.roundss n (arg i 0) (res i 0)
15321547
| SSE41 (Round_f64 n) -> I.roundpd n (arg i 0) (res i 0)
@@ -1584,7 +1599,7 @@ let emit_instr ~first ~fallthrough i =
15841599
| Lop(Move | Spill | Reload) ->
15851600
move i.arg.(0) i.res.(0)
15861601
| Lop(Const_int n) ->
1587-
if n = 0n then begin
1602+
if Nativeint.equal n 0n then begin
15881603
match i.res.(0).loc with
15891604
| Reg _ ->
15901605
(* Clearing the bottom half also clears the top half (except for
@@ -1593,7 +1608,7 @@ let emit_instr ~first ~fallthrough i =
15931608
I.xor (res32 i 0) (res32 i 0)
15941609
| _ ->
15951610
I.mov (int 0) (res i 0)
1596-
end else if n > 0n && n <= 0xFFFF_FFFFn then begin
1611+
end else if Nativeint.compare n 0n > 0 && Nativeint.compare n 0xFFFF_FFFFn <= 0 then begin
15971612
match i.res.(0).loc with
15981613
| Reg _ ->
15991614
(* Similarly, setting only the bottom half clears the top half. *)
@@ -1641,7 +1656,7 @@ let emit_instr ~first ~fallthrough i =
16411656
output_epilogue (fun () -> I.jmp (arg i 0))
16421657
| Lcall_op(Ltailcall_imm { func; }) ->
16431658
begin
1644-
if func.sym_name = !function_name then
1659+
if String.equal func.sym_name !function_name then
16451660
match !tailrec_entry_point with
16461661
| None -> Misc.fatal_error "jump to missing tailrec entry point"
16471662
| Some tailrec_entry_point -> I.jmp (label tailrec_entry_point)
@@ -1664,7 +1679,7 @@ let emit_instr ~first ~fallthrough i =
16641679
load_symbol_addr (Cmm.global_symbol func) rax;
16651680
emit_call (Cmm.global_symbol "caml_c_call");
16661681
record_frame i.live (Dbg_other i.dbg);
1667-
if not Config.runtime5 && system <> S_win64 then begin
1682+
if not Config.runtime5 && not (is_win64 system) then begin
16681683

16691684
(* In amd64.S, "caml_c_call" tail-calls the C function (in order to
16701685
produce nicer backtraces), so we need to restore r15 manually after
@@ -1819,7 +1834,7 @@ let emit_instr ~first ~fallthrough i =
18191834
I.movzx al (res i 0)
18201835
| Lop(Intop_imm (Iand, n)) when n >= 0 && n <= 0xFFFF_FFFF && Reg.is_reg i.res.(0) ->
18211836
I.and_ (int n) (res32 i 0)
1822-
| Lop(Intop Ixor) when i.arg.(1).loc = i.res.(0).loc && Reg.is_reg i.res.(0) ->
1837+
| Lop(Intop Ixor) when Reg.equal_location i.arg.(1).loc i.res.(0).loc && Reg.is_reg i.res.(0) ->
18231838
I.xor (res32 i 0) (res32 i 0)
18241839
| Lop(Intop(Idiv | Imod)) ->
18251840
I.cqo ();
@@ -1834,7 +1849,7 @@ let emit_instr ~first ~fallthrough i =
18341849
| Lop(Intop ((Iadd|Isub|Imul|Iand|Ior|Ixor) as op)) ->
18351850
(* We have i.arg.(0) = i.res.(0) *)
18361851
instr_for_intop op (arg i 1) (res i 0)
1837-
| Lop(Intop_imm(Iadd, n)) when i.arg.(0).loc <> i.res.(0).loc ->
1852+
| Lop(Intop_imm(Iadd, n)) when not (Reg.equal_location i.arg.(0).loc i.res.(0).loc) ->
18381853
I.lea (mem64 NONE n (arg64 i 0)) (res i 0)
18391854
| Lop(Intop_imm(Iadd, 1) | Intop_imm(Isub, -1)) ->
18401855
I.inc (res i 0)
@@ -1871,7 +1886,7 @@ let emit_instr ~first ~fallthrough i =
18711886
| Lop(Floatop(width, (Iaddf | Isubf | Imulf | Idivf as floatop))) ->
18721887
instr_for_floatop width floatop (arg i 1) (res i 0)
18731888
| Lop(Opaque) ->
1874-
assert (i.arg.(0).loc = i.res.(0).loc)
1889+
assert (Reg.equal_location i.arg.(0).loc i.res.(0).loc)
18751890
| Lop(Specific(Ilea addr)) ->
18761891
I.lea (addressing addr NONE i 0) (res i 0)
18771892
| Lop(Specific(Ioffset_loc(n, addr))) ->
@@ -1964,7 +1979,7 @@ let emit_instr ~first ~fallthrough i =
19641979
I.mov rax (res i 0);
19651980
I.or_ rdx (res i 0))
19661981
| Lop(Specific Irdpmc) ->
1967-
assert (arg64 i 0 = RCX);
1982+
assert (equal_reg64 (arg64 i 0) RCX);
19681983
I.rdpmc ();
19691984
let rdx = Reg64 RDX in
19701985
(* The instruction fills in the low 32 bits of the result registers. *)
@@ -2096,7 +2111,7 @@ let emit_instr ~first ~fallthrough i =
20962111
can still be assigned to one of these two registers, so
20972112
we must be careful not to clobber it before use. *)
20982113
let (tmp1, tmp2) =
2099-
if i.arg.(0).loc = Reg 0 (* rax *)
2114+
if Reg.equal_location i.arg.(0).loc (Reg 0) (* rax *)
21002115
then (phys_rdx, phys_rax)
21012116
else (phys_rax, phys_rdx) in
21022117

@@ -2220,7 +2235,7 @@ let fundecl fundecl =
22202235
emit_function_or_basic_block_section_name ();
22212236
D.align ~data:false 16;
22222237
add_def_symbol fundecl.fun_name;
2223-
if system = S_macosx
2238+
if is_macosx system
22242239
&& not !Clflags.output_c_object
22252240
&& is_generic_function fundecl.fun_name
22262241
then (* PR#4690 *)
@@ -2234,7 +2249,7 @@ let fundecl fundecl =
22342249
D.label (label_name (emit_symbol fundecl.fun_name));
22352250
emit_debug_info fundecl.fun_dbg;
22362251
cfi_startproc ();
2237-
if Config.runtime5 && (not Config.no_stack_checks) && !Clflags.runtime_variant = "d" then begin
2252+
if Config.runtime5 && (not Config.no_stack_checks) && String.equal !Clflags.runtime_variant "d" then begin
22382253
emit_call (Cmm.global_symbol "caml_assert_stack_invariants");
22392254
end;
22402255
emit_all ~first:true ~fallthrough:true fundecl.fun_body;
@@ -2314,7 +2329,7 @@ let begin_assembly unix =
23142329
Emitaux.Dwarf_helpers.begin_dwarf ~build_asm_directives ~code_begin ~code_end
23152330
~file_emitter:D.file;
23162331

2317-
if system = S_win64 then begin
2332+
if is_win64 system then begin
23182333
D.extrn "caml_call_gc" NEAR;
23192334
D.extrn "caml_c_call" NEAR;
23202335
D.extrn "caml_allocN" NEAR;
@@ -2355,7 +2370,7 @@ let begin_assembly unix =
23552370

23562371
emit_named_text_section code_begin;
23572372
emit_global_label_for_symbol code_begin;
2358-
if system = S_macosx then I.nop (); (* PR#4690 *)
2373+
if is_macosx system then I.nop (); (* PR#4690 *)
23592374

23602375
D.label (emit_cmm_symbol call_gc_local_sym);
23612376
cfi_startproc ();
@@ -2694,7 +2709,7 @@ let emit_trap_notes () =
26942709
end
26952710

26962711
let end_assembly () =
2697-
if !float_constants <> [] then begin
2712+
if not (Misc.Stdlib.List.is_empty !float_constants) then begin
26982713
begin match system with
26992714
| S_macosx -> D.section ["__TEXT";"__literal8"] None ["8byte_literals"]
27002715
| S_mingw64 | S_cygwin -> D.section [".rdata"] (Some "dr") []
@@ -2704,7 +2719,7 @@ let end_assembly () =
27042719
D.align ~data:true 8;
27052720
List.iter (fun (cst,lbl) -> emit_float_constant cst lbl) !float_constants;
27062721
end;
2707-
if !vec128_constants <> [] then begin
2722+
if not (Misc.Stdlib.List.is_empty !vec128_constants) then begin
27082723
begin match system with
27092724
| S_macosx -> D.section ["__TEXT";"__literal16"] None ["16byte_literals"]
27102725
| S_mingw64 | S_cygwin -> D.section [".rdata"] (Some "dr") []
@@ -2723,7 +2738,7 @@ let end_assembly () =
27232738

27242739
let code_end = Cmm_helpers.make_symbol "code_end" in
27252740
emit_named_text_section code_end;
2726-
if system = S_macosx then I.nop ();
2741+
if is_macosx system then I.nop ();
27272742
(* suppress "ld warning: atom sorting error" *)
27282743

27292744
emit_global_label_for_symbol code_end;
@@ -2755,7 +2770,7 @@ let end_assembly () =
27552770
ConstSub(ConstLabel(emit_label lbl), ConstThis),
27562771
const_32 ofs
27572772
) in
2758-
if system = S_macosx then begin
2773+
if is_macosx system then begin
27592774
incr setcnt;
27602775
let s = Printf.sprintf "L$set$%d" !setcnt in
27612776
D.setvar (s, c);
@@ -2767,20 +2782,22 @@ let end_assembly () =
27672782
efa_string = (fun s -> D.bytes (s ^ "\000"))
27682783
};
27692784

2770-
if system = S_linux || system = S_freebsd || system = S_netbsd || system = S_openbsd then begin
2785+
begin match system with
2786+
| S_linux | S_freebsd | S_netbsd | S_openbsd ->
27712787
let frametable = emit_symbol (Cmm_helpers.make_symbol "frametable") in
27722788
D.size frametable (ConstSub (ConstThis, ConstLabel frametable))
2789+
| _ -> ()
27732790
end;
27742791

27752792
D.data ();
27762793
emit_probe_notes ();
27772794
emit_trap_notes ();
27782795

2779-
if system = S_linux then
2796+
if is_linux system then
27802797
(* Mark stack as non-executable, PR#4564 *)
27812798
D.section [".note.GNU-stack"] (Some "") [ "%progbits" ];
27822799

2783-
if system = S_win64 then begin
2800+
if is_win64 system then begin
27842801
D.comment "External functions";
27852802
String.Set.iter
27862803
(fun s ->

backend/amd64/proc.ml

+2
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616

1717
(* Description of the AMD64 processor *)
1818

19+
open! Int_replace_polymorphic_compare
20+
1921
open Misc
2022
open Arch
2123
open Cmm

backend/amd64/regalloc_stack_operands.ml

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
[@@@ocaml.warning "+a-4-30-40-41-42"]
22

33
open! Regalloc_utils
4+
open! Int_replace_polymorphic_compare
45

56
let debug = false
67

@@ -214,7 +215,7 @@ let basic (map : spilled_map) (instr : Cfg.basic Cfg.instruction) =
214215
else
215216
may_use_stack_operand_for_only_result map instr
216217
| Op (Const_int n) ->
217-
if n <= 0x7FFFFFFFn && n >= -0x80000000n then begin
218+
if (Nativeint.compare n 0x7FFFFFFFn) <= 0 && (Nativeint.compare n (-0x80000000n)) >= 0 then begin
218219
may_use_stack_operand_for_only_result map instr
219220
end else begin
220221
May_still_have_spilled_registers

backend/amd64/reload.ml

+3
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@
1414
(**************************************************************************)
1515
[@@@ocaml.warning "+4"]
1616

17+
(* note: no `open! Int_replace_polymorphic_compare` as the module is about
18+
to be deleted. *)
19+
1720
open Cmm
1821
open Reg
1922
open Mach

0 commit comments

Comments
 (0)