ocaml-flambda
diff --git a/‎backend/amd64/cfg_selection.ml
Lines changed: 7 additions & 2 deletions b/‎backend/amd64/cfg_selection.ml
Lines changed: 7 additions & 2 deletions
diff --git a/‎backend/amd64/selection.ml
Lines changed: 352 additions & 0 deletions b/‎backend/amd64/selection.ml
Lines changed: 352 additions & 0 deletions
diff --git a/‎backend/amd64/selection_utils.ml
Lines changed: 7 additions & 0 deletions b/‎backend/amd64/selection_utils.ml
Lines changed: 7 additions & 0 deletions
@@ -194,7 +194,7 @@ class selector =
     method! select_operation op args dbg ~label_after =
       match op with
       (* Recognize the LEA instruction *)
-      | Caddi | Caddv | Cadda | Csubi -> (
+      | Caddi | Caddv | Cadda | Csubi | Cor -> (
         match self#select_addressing Word_int (Cop (op, args, dbg)) with
         | Iindexed _, _ | Iindexed2 0, _ ->
           super#select_operation op args dbg ~label_after
@@ -252,13 +252,18 @@ class selector =
       | Cbswap { bitwidth } ->
         let bitwidth = select_bitwidth bitwidth in
         specific (Ibswap { bitwidth }), args
-      (* Recognize sign extension *)
       | Casr -> (
+        (* Recognize sign extension *)
         match args with
         | [Cop (Clsl, [k; Cconst_int (32, _)], _); Cconst_int (32, _)] ->
           specific Isextend32, [k]
         | _ -> super#select_operation op args dbg ~label_after)
       (* Recognize zero extension *)
+      | Clsr -> (
+        match args with
+        | [Cop (Clsl, [k; Cconst_int (32, _)], _); Cconst_int (32, _)] ->
+          specific Izextend32, [k]
+        | _ -> super#select_operation op args dbg ~label_after)
       | Cand -> (
         match args with
         | [arg; Cconst_int (0xffff_ffff, _)]
 
@@ -0,0 +1,352 @@
+(**************************************************************************)
+(*                                                                        *)
+(*                                 OCaml                                  *)
+(*                                                                        *)
+(*             Xavier Leroy, projet Cristal, INRIA Rocquencourt           *)
+(*                                                                        *)
+(*   Copyright 2000 Institut National de Recherche en Informatique et     *)
+(*     en Automatique.                                                    *)
+(*                                                                        *)
+(*   All rights reserved.  This file is distributed under the terms of    *)
+(*   the GNU Lesser General Public License version 2.1, with the          *)
+(*   special exception on linking described in the file LICENSE.          *)
+(*                                                                        *)
+(**************************************************************************)
+
+(* Instruction selection for the AMD64 *)
+
+[@@@ocaml.warning "+a-4-9-40-41-42"]
+
+(* note: no `open! Int_replace_polymorphic_compare` as the module is about to be
+   deleted. *)
+
+open Arch
+open Selection_utils
+
+let pseudoregs_for_operation op arg res =
+  match (op : Mach.operation) with
+  (* Two-address binary operations: arg.(0) and res.(0) must be the same *)
+  | Iintop (Iadd | Isub | Imul | Iand | Ior | Ixor)
+  | Ifloatop ((Float32 | Float64), (Iaddf | Isubf | Imulf | Idivf)) ->
+    [| res.(0); arg.(1) |], res
+  | Iintop_atomic { op = Compare_set; size = _; addr = _ } ->
+    (* first arg must be rax *)
+    let arg = Array.copy arg in
+    arg.(0) <- rax;
+    arg, res
+  | Iintop_atomic { op = Compare_exchange; size = _; addr = _ } ->
+    (* first arg must be rax, res.(0) must be rax. *)
+    let arg = Array.copy arg in
+    arg.(0) <- rax;
+    arg, [| rax |]
+  | Iintop_atomic { op = Exchange | Fetch_and_add; size = _; addr = _ } ->
+    (* first arg must be the same as res.(0) *)
+    let arg = Array.copy arg in
+    arg.(0) <- res.(0);
+    arg, res
+  (* One-address unary operations: arg.(0) and res.(0) must be the same *)
+  | Iintop_imm ((Iadd | Isub | Imul | Iand | Ior | Ixor | Ilsl | Ilsr | Iasr), _)
+  | Ifloatop ((Float64 | Float32), (Iabsf | Inegf))
+  | Ispecific (Ibswap { bitwidth = Thirtytwo | Sixtyfour }) ->
+    res, res
+  (* For xchg, args must be a register allowing access to high 8 bit register
+     (rax, rbx, rcx or rdx). Keep it simple, just force the argument in rax. *)
+  | Ispecific (Ibswap { bitwidth = Sixteen }) -> [| rax |], [| rax |]
+  (* For imulh, first arg must be in rax, rax is clobbered, and result is in
+     rdx. *)
+  | Iintop (Imulh _) -> [| rax; arg.(1) |], [| rdx |]
+  | Ispecific (Ifloatarithmem (_, _, _)) ->
+    let arg' = Array.copy arg in
+    arg'.(0) <- res.(0);
+    arg', res
+  (* For shifts with variable shift count, second arg must be in rcx *)
+  | Iintop (Ilsl | Ilsr | Iasr) -> [| res.(0); rcx |], res
+  (* For div and mod, first arg must be in rax, rdx is clobbered, and result is
+     in rax or rdx respectively. Keep it simple, just force second argument in
+     rcx. *)
+  | Iintop Idiv -> [| rax; rcx |], [| rax |]
+  | Iintop Imod -> [| rax; rcx |], [| rdx |]
+  | Ifloatop (Float64, Icompf cond) ->
+    (* CR gyorsh: make this optimization as a separate PR. *)
+    (* We need to temporarily store the result of the comparison in a float
+       register, but we don't want to clobber any of the inputs if they would
+       still be live after this operation -- so we add a fresh register as both
+       an input and output. We don't use [destroyed_at_oper], because that
+       forces us to choose a fixed register, which makes it more likely an extra
+       mov would be added to transfer the argument to the fixed register. *)
+    let treg = Reg.create Float in
+    let _, is_swapped = float_cond_and_need_swap cond in
+    ( (if is_swapped then [| arg.(0); treg |] else [| treg; arg.(1) |]),
+      [| res.(0); treg |] )
+  | Ifloatop (Float32, Icompf cond) ->
+    let treg = Reg.create Float32 in
+    let _, is_swapped = float_cond_and_need_swap cond in
+    ( (if is_swapped then [| arg.(0); treg |] else [| treg; arg.(1) |]),
+      [| res.(0); treg |] )
+  | Ispecific Irdpmc ->
+    (* For rdpmc instruction, the argument must be in ecx and the result is in
+       edx (high) and eax (low). Make it simple and force the argument in rcx,
+       and rax and rdx clobbered *)
+    [| rcx |], res
+  | Ispecific (Isimd op) ->
+    Simd_selection.pseudoregs_for_operation
+      (Simd_proc.register_behavior op)
+      arg res
+  | Ispecific (Isimd_mem (op, _addr)) ->
+    Simd_selection.pseudoregs_for_operation
+      (Simd_proc.Mem.register_behavior op)
+      arg res
+  | Icsel _ ->
+    (* last arg must be the same as res.(0) *)
+    let len = Array.length arg in
+    let arg = Array.copy arg in
+    arg.(len - 1) <- res.(0);
+    arg, res
+  (* Other instructions are regular *)
+  | Iintop_atomic { op = Add | Sub | Land | Lor | Lxor; _ }
+  | Iintop (Ipopcnt | Iclz _ | Ictz _ | Icomp _)
+  | Iintop_imm ((Imulh _ | Idiv | Imod | Icomp _ | Ipopcnt | Iclz _ | Ictz _), _)
+  | Ispecific
+      ( Isextend32 | Izextend32 | Ilea _
+      | Istore_int (_, _, _)
+      | Ipause | Ilfence | Isfence | Imfence
+      | Ioffset_loc (_, _)
+      | Irdtsc | Icldemote _ | Iprefetch _ )
+  | Imove | Ispill | Ireload | Ireinterpret_cast _ | Istatic_cast _
+  | Iconst_int _ | Iconst_float32 _ | Iconst_float _ | Iconst_vec128 _
+  | Iconst_symbol _ | Icall_ind | Icall_imm _ | Itailcall_ind | Itailcall_imm _
+  | Iextcall _ | Istackoffset _ | Iload _
+  | Istore (_, _, _)
+  | Ialloc _ | Iname_for_debugger _ | Iprobe _ | Iprobe_is_enabled _ | Iopaque
+  | Ibeginregion | Iendregion | Ipoll _ | Idls_get ->
+    raise Use_default
+
+(* The selector class *)
+
+class selector =
+  object (self)
+    inherit Selectgen.selector_generic as super
+
+    method! is_immediate op n =
+      match op with
+      | Iadd | Isub | Imul | Iand | Ior | Ixor | Icomp _ -> is_immediate n
+      | _ -> super#is_immediate op n
+
+    method is_immediate_test _cmp n = is_immediate n
+
+    method! is_simple_expr e =
+      match e with
+      | Cop (Cextcall { func = fn }, args, _) when List.mem fn inline_ops ->
+        (* inlined ops are simple if their arguments are *)
+        List.for_all self#is_simple_expr args
+      | _ -> super#is_simple_expr e
+
+    method! effects_of e =
+      match e with
+      | Cop (Cextcall { func = fn }, args, _) when List.mem fn inline_ops ->
+        Select_utils.Effect_and_coeffect.join_list_map args self#effects_of
+      | _ -> super#effects_of e
+
+    method select_addressing _chunk exp =
+      let a, d = select_addr exp in
+      (* PR#4625: displacement must be a signed 32-bit immediate *)
+      if not (is_immediate d)
+      then Iindexed 0, exp
+      else
+        match a with
+        | Asymbol s ->
+          let glob : Arch.sym_global =
+            match s.sym_global with Global -> Global | Local -> Local
+          in
+          Ibased (s.sym_name, glob, d), Ctuple []
+        | Alinear e -> Iindexed d, e
+        | Aadd (e1, e2) -> Iindexed2 d, Ctuple [e1; e2]
+        | Ascale (e, scale) -> Iscaled (scale, d), e
+        | Ascaledadd (e1, e2, scale) ->
+          Iindexed2scaled (scale, d), Ctuple [e1; e2]
+
+    method! select_store is_assign addr exp =
+      match exp with
+      | Cconst_int (n, _dbg) when is_immediate n ->
+        Ispecific (Istore_int (Nativeint.of_int n, addr, is_assign)), Ctuple []
+      | Cconst_natint (n, _dbg) when is_immediate_natint n ->
+        Ispecific (Istore_int (n, addr, is_assign)), Ctuple []
+      | Cconst_int _ | Cconst_vec128 _
+      | Cconst_natint (_, _)
+      | Cconst_float32 (_, _)
+      | Cconst_float (_, _)
+      | Cconst_symbol (_, _)
+      | Cvar _
+      | Clet (_, _, _)
+      | Clet_mut (_, _, _, _)
+      | Cphantom_let (_, _, _)
+      | Cassign (_, _)
+      | Ctuple _
+      | Cop (_, _, _)
+      | Csequence (_, _)
+      | Cifthenelse (_, _, _, _, _, _, _)
+      | Cswitch (_, _, _, _, _)
+      | Ccatch (_, _, _, _)
+      | Cexit (_, _, _)
+      | Ctrywith (_, _, _, _, _, _) ->
+        super#select_store is_assign addr exp
+
+    method! select_operation op args dbg =
+      match op with
+      (* Recognize the LEA instruction *)
+      | Caddi | Caddv | Cadda | Csubi | Cor -> (
+        match self#select_addressing Word_int (Cop (op, args, dbg)) with
+        | Iindexed _, _ | Iindexed2 0, _ -> super#select_operation op args dbg
+        | ( ((Iindexed2 _ | Iscaled _ | Iindexed2scaled _ | Ibased _) as addr),
+            arg ) ->
+          Ispecific (Ilea addr), [arg])
+      (* Recognize float arithmetic with memory. *)
+      | Caddf width ->
+        self#select_floatarith true width Simple_operation.Iaddf Arch.Ifloatadd
+          args
+      | Csubf width ->
+        self#select_floatarith false width Simple_operation.Isubf Arch.Ifloatsub
+          args
+      | Cmulf width ->
+        self#select_floatarith true width Simple_operation.Imulf Arch.Ifloatmul
+          args
+      | Cdivf width ->
+        self#select_floatarith false width Simple_operation.Idivf Arch.Ifloatdiv
+          args
+      | Cpackf32 ->
+        (* We must operate on registers. This is because if the second argument
+           was a float stack slot, the resulting UNPCKLPS instruction would
+           enforce the validity of loading it as a 128-bit memory location, even
+           though it only loads 64 bits. *)
+        Ispecific (Isimd (SSE Interleave_low_32_regs)), args
+      (* Special cases overriding C implementations (regardless of
+         [@@builtin]). *)
+      | Cextcall { func = "sqrt" as func; _ }
+      (* x86 intrinsics ([@@builtin]) *)
+      | Cextcall { func; builtin = true; _ } -> (
+        match func with
+        | "caml_rdtsc_unboxed" -> Ispecific Irdtsc, args
+        | "caml_rdpmc_unboxed" -> Ispecific Irdpmc, args
+        | "caml_pause_hint" -> Ispecific Ipause, args
+        | "caml_load_fence" -> Ispecific Ilfence, args
+        | "caml_store_fence" -> Ispecific Isfence, args
+        | "caml_memory_fence" -> Ispecific Imfence, args
+        | "caml_cldemote" ->
+          let addr, eloc =
+            self#select_addressing Word_int (one_arg "cldemote" args)
+          in
+          Ispecific (Icldemote addr), [eloc]
+        | _ -> (
+          match Simd_selection.select_operation func args with
+          | Some (op, args) -> op, args
+          | None -> super#select_operation op args dbg))
+      (* Recognize store instructions *)
+      | Cstore (((Word_int | Word_val) as chunk), _init) -> (
+        match args with
+        | [loc; Cop (Caddi, [Cop (Cload _, [loc'], _); Cconst_int (n, _dbg)], _)]
+          when loc = loc' && is_immediate n ->
+          let addr, arg = self#select_addressing chunk loc in
+          Ispecific (Ioffset_loc (n, addr)), [arg]
+        | _ -> super#select_operation op args dbg)
+      | Cbswap { bitwidth } ->
+        let bitwidth = select_bitwidth bitwidth in
+        Ispecific (Ibswap { bitwidth }), args
+      (* Recognize sign extension *)
+      | Casr -> (
+        match args with
+        | [Cop (Clsl, [k; Cconst_int (32, _)], _); Cconst_int (32, _)] ->
+          Ispecific Isextend32, [k]
+        | _ -> super#select_operation op args dbg)
+      (* Recognize zero extension *)
+      | Clsr -> (
+        match args with
+        | [Cop (Clsl, [k; Cconst_int (32, _)], _); Cconst_int (32, _)] ->
+          Ispecific Izextend32, [k]
+        | _ -> super#select_operation op args dbg)
+      (* Recognize zero extension again *)
+      | Cand -> (
+        match args with
+        | [arg; Cconst_int (0xffff_ffff, _)]
+        | [arg; Cconst_natint (0xffff_ffffn, _)]
+        | [Cconst_int (0xffff_ffff, _); arg]
+        | [Cconst_natint (0xffff_ffffn, _); arg] ->
+          Ispecific Izextend32, [arg]
+        | _ -> super#select_operation op args dbg)
+      | Ccsel _ -> (
+        match args with
+        | [cond; ifso; ifnot] -> (
+          let cond, earg = self#select_condition cond in
+          match cond with
+          | Ifloattest (w, CFeq) ->
+            (* CFeq cannot be represented as cmov without a jump. CFneq emits
+               cmov for "unordered" and "not equal" cases. Use Cneq and swap the
+               arguments. *)
+            Icsel (Ifloattest (w, CFneq)), [earg; ifnot; ifso]
+          | _ -> Icsel cond, [earg; ifso; ifnot])
+        | _ -> super#select_operation op args dbg)
+      | Cprefetch { is_write; locality } ->
+        (* Emit prefetch for read hint when prefetchw is not supported. Matches
+           the behavior of gcc's __builtin_prefetch *)
+        let is_write =
+          if is_write && not (Arch.Extension.enabled PREFETCHW)
+          then false
+          else is_write
+        in
+        let locality : Arch.prefetch_temporal_locality_hint =
+          match select_locality locality with
+          | Moderate when is_write && not (Arch.Extension.enabled PREFETCHWT1)
+            ->
+            High
+          | l -> l
+        in
+        let addr, eloc =
+          self#select_addressing Word_int (one_arg "prefetch" args)
+        in
+        Ispecific (Iprefetch { is_write; addr; locality }), [eloc]
+      | _ -> super#select_operation op args dbg
+
+    (* Recognize float arithmetic with mem *)
+
+    method select_floatarith commutative width regular_op mem_op args =
+      let open Cmm in
+      match width, args with
+      | ( Float64,
+          [arg1; Cop (Cload { memory_chunk = Double as chunk; _ }, [loc2], _)] )
+      | ( Float32,
+          [ arg1;
+            Cop
+              ( Cload { memory_chunk = Single { reg = Float32 } as chunk; _ },
+                [loc2],
+                _ ) ] ) ->
+        let addr, arg2 = self#select_addressing chunk loc2 in
+        Mach.Ispecific (Ifloatarithmem (width, mem_op, addr)), [arg1; arg2]
+      | ( Float64,
+          [Cop (Cload { memory_chunk = Double as chunk; _ }, [loc1], _); arg2] )
+      | ( Float32,
+          [ Cop
+              ( Cload { memory_chunk = Single { reg = Float32 } as chunk; _ },
+                [loc1],
+                _ );
+            arg2 ] )
+        when commutative ->
+        let addr, arg1 = self#select_addressing chunk loc1 in
+        Mach.Ispecific (Ifloatarithmem (width, mem_op, addr)), [arg2; arg1]
+      | _, [arg1; arg2] -> Mach.Ifloatop (width, regular_op), [arg1; arg2]
+      | _ -> assert false
+
+    method! mark_c_tailcall = contains_calls := true
+
+    (* Deal with register constraints *)
+
+    method! insert_op_debug env op dbg rs rd =
+      try
+        let rsrc, rdst = pseudoregs_for_operation op rs rd in
+        self#insert_moves env rs rsrc;
+        self#insert_debug env (Iop op) dbg rsrc rdst;
+        self#insert_moves env rdst rd;
+        rd
+      with Use_default -> super#insert_op_debug env op dbg rs rd
+  end
+
+let fundecl ~future_funcnames f =
+  (new selector)#emit_fundecl ~future_funcnames f
@@ -76,6 +76,13 @@ let rec select_addr exp =
     | ( ((Asymbol _ | Aadd (_, _) | Ascaledadd (_, _, _)), _),
         ((Asymbol _ | Alinear _ | Aadd (_, _) | Ascaledadd (_, _, _)), _) ) ->
       Aadd (arg1, arg2), 0)
+  | Cmm.Cop (Cor, [arg; Cconst_int (1, _)], _)
+  | Cmm.Cop (Cor, [Cconst_int (1, _); arg], _) -> (
+    (* optimize tagging integers *)
+    match select_addr arg with
+    | Ascale (e, scale), off when scale mod 2 = 0 ->
+      Ascale (e, scale), off lor 1
+    | _ -> default)
   | _ -> default
 
 (* Special constraints on operand and result registers *)