From 2f5d60b15addaa3382b4d9fabedec81870ba1679 Mon Sep 17 00:00:00 2001 From: Jacob Van Buren Date: Tue, 31 Dec 2024 11:00:07 -0500 Subject: [PATCH 1/6] unified unboxed field getters/setters. This will be useful once we have unboxed integers of different sizes --- backend/cmm_helpers.ml | 200 +++++++++++------------------------------ 1 file changed, 50 insertions(+), 150 deletions(-) diff --git a/backend/cmm_helpers.ml b/backend/cmm_helpers.ml index 59dc47ddc5e..bcf0cf7855b 100644 --- a/backend/cmm_helpers.ml +++ b/backend/cmm_helpers.ml @@ -924,7 +924,20 @@ let complex_im c dbg = (* Unit *) -let return_unit dbg c = Csequence (c, Cconst_int (1, dbg)) +let return_unit dbg c = + match c with + | Csequence (_, Cconst_int (1, _)) as c -> c + | c -> Csequence (c, Cconst_int (1, dbg)) + +let memory_chunk_width_in_bytes : memory_chunk -> int = function + | Byte_unsigned | Byte_signed -> 1 + | Sixteen_unsigned | Sixteen_signed -> 2 + | Thirtytwo_unsigned | Thirtytwo_signed -> 4 + | Single { reg = Float64 | Float32 } -> 4 + | Word_int -> size_int + | Word_val -> size_addr + | Double -> size_float + | Onetwentyeight_unaligned | Onetwentyeight_aligned -> size_vec128 let strided_field_address ptr ~index ~stride dbg = if index * stride = 0 @@ -932,24 +945,8 @@ let strided_field_address ptr ~index ~stride dbg = else Cop (Cadda, [ptr; Cconst_int (index * stride, dbg)], dbg) let field_address ?(memory_chunk = Word_val) ptr n dbg = - if n = 0 - then ptr - else - let field_size_in_bytes = - match memory_chunk with - | Byte_unsigned | Byte_signed -> 1 - | Sixteen_unsigned | Sixteen_signed -> 2 - | Thirtytwo_unsigned | Thirtytwo_signed -> 4 - | Single { reg = Float64 | Float32 } -> - assert (size_float = 8); - (* unclear what to do if this is false *) - size_float / 2 - | Word_int -> size_int - | Word_val -> size_addr - | Double -> size_float - | Onetwentyeight_unaligned | Onetwentyeight_aligned -> size_vec128 - in - Cop (Cadda, [ptr; Cconst_int (n * field_size_in_bytes, dbg)], dbg) + strided_field_address ptr dbg ~index:n + ~stride:(memory_chunk_width_in_bytes memory_chunk) let get_field_gen_given_memory_chunk memory_chunk mutability ptr n dbg = Cop @@ -1410,7 +1407,21 @@ let unboxed_int64_or_nativeint_array_set ~has_custom_ops arr ~index ~new_value in int_array_set arr index new_value dbg))) -(* Get the field of a block given a possibly inconstant index *) +let get_field_unboxed ~dbg memory_chunk mutability block ~index_in_words = + if Arch.big_endian && memory_chunk_width_in_bytes memory_chunk <> size_addr + then + (* CR layouts v5.1: Properly support big-endian. *) + Misc.fatal_error + "Unboxed non-word size integer fields are only supported on \ + little-endian architectures"; + (* CR layouts v5.1: We'll need to vary log2_size_addr among other things to + efficiently pack small integers *) + let field_address = + assert (size_float = size_addr); + array_indexing log2_size_addr block index_in_words dbg + in + Cop + (Cload { memory_chunk; mutability; is_atomic = false }, [field_address], dbg) let get_field_computed imm_or_ptr mutability ~block ~index dbg = let memory_chunk = @@ -1418,139 +1429,28 @@ let get_field_computed imm_or_ptr mutability ~block ~index dbg = | Lambda.Immediate -> Word_int | Lambda.Pointer -> Word_val in - let field_address = array_indexing log2_size_addr block index dbg in - Cop - (Cload { memory_chunk; mutability; is_atomic = false }, [field_address], dbg) - -(* Getters for unboxed int fields *) - -let get_field_unboxed_int32 mutability ~block ~index dbg = - let memory_chunk = Thirtytwo_signed in - (* CR layouts v5.1: Properly support big-endian. *) - if Arch.big_endian - then - Misc.fatal_error - "Unboxed int32 fields only supported on little-endian architectures"; - (* CR layouts v5.1: We'll need to vary log2_size_addr to efficiently pack - * int32s *) - let field_address = array_indexing log2_size_addr block index dbg in - Cop - (Cload { memory_chunk; mutability; is_atomic = false }, [field_address], dbg) - -let get_field_unboxed_int64_or_nativeint mutability ~block ~index dbg = - let memory_chunk = Word_int in - let field_address = array_indexing log2_size_addr block index dbg in - Cop - (Cload { memory_chunk; mutability; is_atomic = false }, [field_address], dbg) - -(* Setters for unboxed int fields *) - -let setfield_unboxed_int32 arr ofs newval dbg = - (* CR layouts v5.1: Properly support big-endian. *) - if Arch.big_endian - then - Misc.fatal_error - "Unboxed int32 fields only supported on little-endian architectures"; - (* CR layouts v5.1: We will need to vary log2_size_addr when int32 fields are - efficiently packed. *) - return_unit dbg - (Cop - ( Cstore (Thirtytwo_signed, Assignment), - [array_indexing log2_size_addr arr ofs dbg; newval], - dbg )) - -let setfield_unboxed_int64_or_nativeint arr ofs newval dbg = - return_unit dbg - (Cop - ( Cstore (Word_int, Assignment), - [array_indexing log2_size_addr arr ofs dbg; newval], - dbg )) - -(* Getters and setters for unboxed float32 fields *) - -let get_field_unboxed_float32 mutability ~block ~index dbg = - (* CR layouts v5.1: Properly support big-endian. *) - if Arch.big_endian - then - Misc.fatal_error - "Unboxed float32 fields only supported on little-endian architectures"; - let memory_chunk = Single { reg = Float32 } in - (* CR layouts v5.1: We'll need to vary log2_size_addr to efficiently pack - * float32s *) - let field_address = array_indexing log2_size_addr block index dbg in - Cop - (Cload { memory_chunk; mutability; is_atomic = false }, [field_address], dbg) - -let setfield_unboxed_float32 arr ofs newval dbg = - (* CR layouts v5.1: Properly support big-endian. *) - if Arch.big_endian - then - Misc.fatal_error - "Unboxed float32 fields only supported on little-endian architectures"; - (* CR layouts v5.1: We will need to vary log2_size_addr when float32 fields - are efficiently packed. *) - return_unit dbg - (Cop - ( Cstore (Single { reg = Float32 }, Assignment), - [array_indexing log2_size_addr arr ofs dbg; newval], - dbg )) - -(* Getters and setters for unboxed vec128 fields *) - -let get_field_unboxed_vec128 mutability ~block ~index_in_words dbg = - (* CR layouts v5.1: Properly support big-endian. *) - if Arch.big_endian - then - Misc.fatal_error - "Unboxed vec128 fields only supported on little-endian architectures"; - let memory_chunk = Onetwentyeight_unaligned in - let field_address = array_indexing log2_size_addr block index_in_words dbg in - Cop - (Cload { memory_chunk; mutability; is_atomic = false }, [field_address], dbg) - -let setfield_unboxed_vec128 arr ~index_in_words newval dbg = - (* CR layouts v5.1: Properly support big-endian. *) - if Arch.big_endian - then - Misc.fatal_error - "Unboxed vec128 fields only supported on little-endian architectures"; - let field_address = array_indexing log2_size_addr arr index_in_words dbg in - return_unit dbg - (Cop - ( Cstore (Onetwentyeight_unaligned, Assignment), - [field_address; newval], - dbg )) - -let get_field_unboxed ~dbg memory_chunk mutability block ~index_in_words = - match (memory_chunk : memory_chunk) with - | Single { reg = Float32 } -> - get_field_unboxed_float32 mutability ~block ~index:index_in_words dbg - | Double -> - unboxed_float_array_ref mutability ~block ~index:index_in_words dbg - | Onetwentyeight_unaligned | Onetwentyeight_aligned -> - get_field_unboxed_vec128 mutability ~block ~index_in_words dbg - | Thirtytwo_signed -> - get_field_unboxed_int32 mutability ~block ~index:index_in_words dbg - | Word_int -> - get_field_unboxed_int64_or_nativeint mutability ~block ~index:index_in_words - dbg - | Word_val -> - Misc.fatal_error "cannot use get_field_unboxed with a heap block" - | _ -> Misc.fatal_error "get_field_unboxed: unexpected memory chunk" + get_field_unboxed ~dbg memory_chunk mutability block ~index_in_words:index let set_field_unboxed ~dbg memory_chunk block ~index_in_words newval = - match (memory_chunk : memory_chunk) with - | Single { reg = Float32 } -> - setfield_unboxed_float32 block index_in_words newval dbg - | Double -> float_array_set block index_in_words newval dbg - | Onetwentyeight_unaligned | Onetwentyeight_aligned -> - setfield_unboxed_vec128 block ~index_in_words newval dbg - | Thirtytwo_signed -> setfield_unboxed_int32 block index_in_words newval dbg - | Word_int -> - setfield_unboxed_int64_or_nativeint block index_in_words newval dbg + match memory_chunk with | Word_val -> - Misc.fatal_error "cannot use set_field_unboxed with a heap block" - | _ -> Misc.fatal_error "set_field_unboxed : unexpected memory chunk" + Misc.fatal_error "Attempted to set a value via [setfield_unboxed]" + | memory_chunk -> + let size_in_bytes = memory_chunk_width_in_bytes memory_chunk in + (* CR layouts v5.1: Properly support big-endian. *) + if Arch.big_endian && size_in_bytes <> size_addr + then + Misc.fatal_error + "Unboxed non-word-size fields are only supported on little-endian \ + architectures"; + (* CR layouts v5.1: We will need to vary log2_size_addr, among other things, + when small fields are efficiently packed. *) + let field_address = + array_indexing log2_size_addr block index_in_words dbg + in + let newval = if size_in_bytes = 4 then low_32 dbg newval else newval in + return_unit dbg + (Cop (Cstore (memory_chunk, Assignment), [field_address; newval], dbg)) (* String length *) From e5c749977b8248fc3eff4d00cf77b3607b8826ca Mon Sep 17 00:00:00 2001 From: Jacob Van Buren Date: Thu, 13 Feb 2025 17:51:46 -0500 Subject: [PATCH 2/6] refactored cmm to handle more integer widths --- backend/arm64/cfg_selection.ml | 2 +- backend/arm64/proc.ml | 2 +- backend/arm64/selection.ml | 7 +- backend/cmm.ml | 33 +- backend/cmm.mli | 2 + backend/cmm_helpers.ml | 284 ++++++++---------- backend/cmm_helpers.mli | 7 +- backend/printcmm.ml | 2 + middle_end/flambda2/to_cmm/to_cmm_expr.ml | 6 +- .../flambda2/to_cmm/to_cmm_primitive.ml | 2 +- middle_end/flambda2/to_cmm/to_cmm_shared.ml | 23 +- middle_end/flambda2/to_cmm/to_cmm_shared.mli | 6 + 12 files changed, 182 insertions(+), 194 deletions(-) diff --git a/backend/arm64/cfg_selection.ml b/backend/arm64/cfg_selection.ml index a08fece7ab1..f0d01d0e1a1 100644 --- a/backend/arm64/cfg_selection.ml +++ b/backend/arm64/cfg_selection.ml @@ -171,7 +171,7 @@ class selector = method! insert_move_extcall_arg env ty_arg src dst = let ty_arg_is_int32 = match ty_arg with - | XInt32 -> true + | XInt8 | XInt16 | XInt32 -> true | XInt | XInt64 | XFloat32 | XFloat | XVec128 -> false in if macosx && ty_arg_is_int32 && is_stack_slot dst diff --git a/backend/arm64/proc.ml b/backend/arm64/proc.ml index a3244782404..4d20f3ad06d 100644 --- a/backend/arm64/proc.ml +++ b/backend/arm64/proc.ml @@ -297,7 +297,7 @@ let external_calling_conventions begin match ty_arg with | XInt | XInt64 -> loc.(i) <- [| loc_int last_int make_stack int ofs |] - | XInt32 -> + | XInt32 | XInt16 | XInt8 -> loc.(i) <- [| loc_int32 last_int make_stack int ofs |] | XFloat -> loc.(i) <- [| loc_float last_float make_stack float ofs |] diff --git a/backend/arm64/selection.ml b/backend/arm64/selection.ml index ee57ee21c77..915cd8266fd 100644 --- a/backend/arm64/selection.ml +++ b/backend/arm64/selection.ml @@ -157,7 +157,12 @@ class selector = | _ -> super#select_operation op args dbg method! insert_move_extcall_arg env ty_arg src dst = - if macosx && ty_arg = XInt32 && is_stack_slot dst + let ty_arg_is_int32 = + match ty_arg with + | XInt8 | XInt16 | XInt32 -> true + | XInt | XInt64 | XFloat32 | XFloat | XVec128 -> false + in + if macosx && ty_arg_is_int32 && is_stack_slot dst then self#insert env (Iop (Ispecific Imove32)) src dst else self#insert_moves env src dst end diff --git a/backend/cmm.ml b/backend/cmm.ml index fcfae698f87..af61fcab9ee 100644 --- a/backend/cmm.ml +++ b/backend/cmm.ml @@ -1,3 +1,4 @@ +# 1 "backend/cmm.ml" (**************************************************************************) (* *) (* OCaml *) @@ -107,6 +108,8 @@ let ge_component comp1 comp2 = type exttype = | XInt + | XInt8 + | XInt16 | XInt32 | XInt64 | XFloat32 @@ -115,6 +118,8 @@ type exttype = let machtype_of_exttype = function | XInt -> typ_int + | XInt8 -> typ_int + | XInt16 -> typ_int | XInt32 -> typ_int | XInt64 -> typ_int | XFloat -> typ_float @@ -611,21 +616,19 @@ let equal_machtype_component (left : machtype_component) (right : machtype_compo | Float32, (Val | Addr | Int | Float | Vec128 | Valx2) -> false -let equal_exttype left right = - match left, right with - | XInt, XInt -> true - | XInt32, XInt32 -> true - | XInt64, XInt64 -> true - | XFloat32, XFloat32 -> true - | XFloat, XFloat -> true - | XVec128, XVec128 -> true - | XInt, (XInt32 | XInt64 | XFloat | XFloat32 | XVec128) - | XInt32, (XInt | XInt64 | XFloat | XFloat32 | XVec128) - | XInt64, (XInt | XInt32 | XFloat | XFloat32 | XVec128) - | XFloat, (XInt | XInt32 | XFloat32 | XInt64 | XVec128) - | XVec128, (XInt | XInt32 | XInt64 | XFloat | XFloat32) - | XFloat32, (XInt | XInt32 | XInt64 | XFloat | XVec128) -> - false +let equal_exttype + ((XInt + | XInt8 + | XInt16 + | XInt32 + | XInt64 + | XFloat32 + | XFloat + | XVec128) as left) + right + = + (* we can use polymorphic compare as long as exttype is all constant constructors *) + left = right let equal_vec128_type v1 v2 = match v1, v2 with diff --git a/backend/cmm.mli b/backend/cmm.mli index 6e9d0f4b3e8..372cf5759bc 100644 --- a/backend/cmm.mli +++ b/backend/cmm.mli @@ -75,6 +75,8 @@ val ge_component type exttype = | XInt (**r OCaml value, word-sized integer *) + | XInt8 (**r 8-bit integer *) + | XInt16 (**r 16-bit integer *) | XInt32 (**r 32-bit integer *) | XInt64 (**r 64-bit integer *) | XFloat32 (**r single-precision FP number *) diff --git a/backend/cmm_helpers.ml b/backend/cmm_helpers.ml index bcf0cf7855b..e8dee907801 100644 --- a/backend/cmm_helpers.ml +++ b/backend/cmm_helpers.ml @@ -362,6 +362,7 @@ let neg_int c dbg = sub_int (Cconst_int (0, dbg)) c dbg let rec lsl_int c1 c2 dbg = match c1, c2 with + | c1, Cconst_int (0, _) -> c1 | Cop (Clsl, [c; Cconst_int (n1, _)], _), Cconst_int (n2, _) when n1 > 0 && n2 > 0 && n1 + n2 < size_int * 8 -> Cop (Clsl, [c; Cconst_int (n1 + n2, dbg)], dbg) @@ -1280,35 +1281,104 @@ let addr_array_initialize arr ofs newval dbg = [array_indexing log2_size_addr arr ofs dbg; newval], dbg ) -(* low_32 x is a value which agrees with x on at least the low 32 bits *) -let rec low_32 dbg = function - (* Ignore sign and zero extensions, which do not affect the low bits *) - | Cop (Casr, [Cop (Clsl, [x; Cconst_int (32, _)], _); Cconst_int (32, _)], _) - | Cop (Cand, [x; Cconst_natint (0xFFFFFFFFn, _)], _) -> - low_32 dbg x - | Clet (id, e, body) -> Clet (id, e, low_32 dbg body) - | x -> x - -(* sign_extend_32 sign-extends values from 32 bits to the word size. *) -let sign_extend_32 dbg e = - match low_32 dbg e with - | Cop - ( Cload - { memory_chunk = Thirtytwo_unsigned | Thirtytwo_signed; - mutability; - is_atomic - }, - args, - dbg ) -> - Cop - ( Cload { memory_chunk = Thirtytwo_signed; mutability; is_atomic }, - args, - dbg ) - | e -> - Cop - ( Casr, - [Cop (Clsl, [e; Cconst_int (32, dbg)], dbg); Cconst_int (32, dbg)], - dbg ) +(** [get_const_bitmask x] returns [Some (y, mask)] if [x] is [y & mask] *) +let get_const_bitmask = function + | Cop (Cand, ([x; Cconst_natint (mask, _)] | [Cconst_natint (mask, _); x]), _) + -> + Some (x, mask) + | Cop (Cand, ([x; Cconst_int (mask, _)] | [Cconst_int (mask, _); x]), _) -> + Some (x, Nativeint.of_int mask) + | _ -> None + +(** [low_bits ~bits x] is a (hopefully simplified) value which agrees with x on at least + the low [bits] bits. E.g., [low_bits ~bits x & mask = x & mask], where [mask] is a + bitmask of the low [bits] bits . *) +let rec low_bits ~bits ~dbg x = + assert (bits > 0); + if bits >= arch_bits + then x + else + let unused_bits = arch_bits - bits in + let does_mask_ignore_low_bits test_mask = + (* If the mask has all the low bits set, then the low bits are unchanged. + This could happen from zero-extension. *) + let mask = Nativeint.pred (Nativeint.shift_left 1n bits) in + Nativeint.equal mask (Nativeint.logand test_mask mask) + in + (* Ignore sign and zero extensions, which do not affect the low bits *) + map_tail + (function + | Cop + ( (Casr | Clsr), + [Cop (Clsl, [x; Cconst_int (left, _)], _); Cconst_int (right, _)], + _ ) + when 0 <= right && right <= left && left <= unused_bits -> + low_bits ~bits (lsl_const x (left - right) dbg) ~dbg + | x -> ( + match get_const_bitmask x with + | Some (x, bitmask) when does_mask_ignore_low_bits bitmask -> + low_bits ~bits x ~dbg + | _ -> x)) + x + +(** [zero_extend ~bits dbg e] returns [e] with the most significant [arch_bits - bits] + bits set to 0 *) +let zero_extend ~bits ~dbg e = + assert (0 < bits && bits <= arch_bits); + let mask = Nativeint.pred (Nativeint.shift_left 1n bits) in + let zero_extend_via_mask e = + Cop (Cand, [e; natint_const_untagged dbg mask], dbg) + in + if bits = arch_bits + then e + else + map_tail + (function + | Cop (Cload { memory_chunk; mutability; is_atomic }, args, dbg) as e + -> ( + let load memory_chunk = + Cop (Cload { memory_chunk; mutability; is_atomic }, args, dbg) + in + match memory_chunk, bits with + | (Byte_signed | Byte_unsigned), 8 -> load Byte_unsigned + | (Sixteen_signed | Sixteen_unsigned), 16 -> load Sixteen_unsigned + | (Thirtytwo_signed | Thirtytwo_unsigned), 32 -> + load Thirtytwo_unsigned + | _ -> zero_extend_via_mask e) + | e -> zero_extend_via_mask e) + (low_bits ~bits e ~dbg) + +let sign_extend ~bits ~dbg e = + assert (0 < bits && bits <= arch_bits); + let unused_bits = arch_bits - bits in + let sign_extend_via_shift e = + asr_const (lsl_const e unused_bits dbg) unused_bits dbg + in + if bits = arch_bits + then e + else + map_tail + (function + | Cop ((Casr | Clsr), [inner; Cconst_int (n, _)], _) as e + when 0 <= n && n < arch_bits -> + (* see middle_end/flambda2/z3/sign_extension.py for proof *) + if n > unused_bits + then (* already sign-extended *) e + else + let e = lsl_const inner (unused_bits - n) dbg in + asr_const e unused_bits dbg + | Cop (Cload { memory_chunk; mutability; is_atomic }, args, dbg) as e + -> ( + let load memory_chunk = + Cop (Cload { memory_chunk; mutability; is_atomic }, args, dbg) + in + match memory_chunk, bits with + | (Byte_signed | Byte_unsigned), 8 -> load Byte_signed + | (Sixteen_signed | Sixteen_unsigned), 16 -> load Sixteen_signed + | (Thirtytwo_signed | Thirtytwo_unsigned), 32 -> load Thirtytwo_signed + | _ -> sign_extend_via_shift e) + | e -> sign_extend_via_shift e) + (low_bits ~bits e ~dbg) let unboxed_packed_array_ref arr index dbg ~memory_chunk ~elements_per_word = bind "arr" arr (fun arr -> @@ -1335,18 +1405,19 @@ let unboxed_int32_array_ref = let unboxed_mutable_int32_unboxed_product_array_ref arr ~array_index dbg = bind "arr" arr (fun arr -> bind "index" array_index (fun index -> - sign_extend_32 dbg + sign_extend ~bits:32 (Cop ( mk_load_mut Thirtytwo_signed, [array_indexing log2_size_addr arr index dbg], - dbg )))) + dbg )) + ~dbg)) let unboxed_mutable_int32_unboxed_product_array_set arr ~array_index ~new_value dbg = bind "arr" arr (fun arr -> bind "index" array_index (fun index -> bind "new_value" new_value (fun new_value -> - let new_value = sign_extend_32 dbg new_value in + let new_value = sign_extend ~bits:32 new_value ~dbg in Cop ( Cstore (Word_int, Assignment), [array_indexing log2_size_addr arr index dbg; new_value], @@ -1448,7 +1519,7 @@ let set_field_unboxed ~dbg memory_chunk block ~index_in_words newval = let field_address = array_indexing log2_size_addr block index_in_words dbg in - let newval = if size_in_bytes = 4 then low_32 dbg newval else newval in + let newval = low_bits newval ~dbg ~bits:(8 * size_in_bytes) in return_unit dbg (Cop (Cstore (memory_chunk, Assignment), [field_address; newval], dbg)) @@ -1647,16 +1718,12 @@ let call_cached_method obj tag cache pos args args_type result (apos, mode) dbg (* Allocation *) -(* CR layouts 5.1: When we pack int32s/float32s more efficiently, this code will - need to change. *) +(* CR layouts 5.1: When we pack int8/16/32s/float32s more efficiently, this code + will need to change. *) let memory_chunk_size_in_words_for_mixed_block = function - | (Byte_unsigned | Byte_signed | Sixteen_unsigned | Sixteen_signed) as - memory_chunk -> - Misc.fatal_errorf - "Fields with memory chunk %s are not allowed in mixed blocks" - (Printcmm.chunk memory_chunk) + | Byte_unsigned | Byte_signed | Sixteen_unsigned | Sixteen_signed | Thirtytwo_unsigned | Thirtytwo_signed -> - (* Int32s are currently stored using a whole word *) + (* small integers are currently stored using a whole word *) 1 | Single _ | Double -> (* Float32s are currently stored using a whole word *) @@ -1680,8 +1747,9 @@ let alloc_generic_set_fn block ofs newval memory_chunk dbg = addr_array_initialize block ofs newval dbg | Word_int -> generic_case () (* Generic cases that may differ under big endian archs *) - | Single _ | Double | Thirtytwo_unsigned | Thirtytwo_signed - | Onetwentyeight_unaligned | Onetwentyeight_aligned -> + | Byte_unsigned | Byte_signed | Sixteen_unsigned | Sixteen_signed | Single _ + | Double | Thirtytwo_unsigned | Thirtytwo_signed | Onetwentyeight_unaligned + | Onetwentyeight_aligned -> if Arch.big_endian then Misc.fatal_errorf @@ -1689,11 +1757,6 @@ let alloc_generic_set_fn block ofs newval memory_chunk dbg = architectures" (Printcmm.chunk memory_chunk); generic_case () - (* Forbidden cases *) - | Byte_unsigned | Byte_signed | Sixteen_unsigned | Sixteen_signed -> - Misc.fatal_errorf - "Fields with memory_chunk %s are not supported in generic allocations" - (Printcmm.chunk memory_chunk) let make_alloc_generic ~block_kind ~mode ~alloc_block_kind dbg tag wordsize args args_memory_chunks = @@ -1793,8 +1856,7 @@ let make_mixed_alloc ~mode dbg ~tag ~value_prefix_size args args_memory_chunks = (* regular scanned part of a block *) match memory_chunk with | Word_int | Word_val -> ok () - | Byte_unsigned | Byte_signed | Sixteen_unsigned | Sixteen_signed -> - error "mixed blocks" + | Byte_unsigned | Byte_signed | Sixteen_unsigned | Sixteen_signed | Thirtytwo_unsigned | Thirtytwo_signed | Single _ | Double | Onetwentyeight_unaligned | Onetwentyeight_aligned -> error "the value prefix of a mixed block" @@ -1802,10 +1864,9 @@ let make_mixed_alloc ~mode dbg ~tag ~value_prefix_size args args_memory_chunks = (* flat suffix part of the block *) match memory_chunk with | Word_int | Thirtytwo_unsigned | Thirtytwo_signed | Double - | Onetwentyeight_unaligned | Onetwentyeight_aligned | Single _ -> - ok () + | Onetwentyeight_unaligned | Onetwentyeight_aligned | Single _ | Byte_unsigned | Byte_signed | Sixteen_unsigned | Sixteen_signed -> - error "mixed blocks" + ok () | Word_val -> error "the flat suffix of a mixed block") 0 args_memory_chunks in @@ -1896,107 +1957,6 @@ let bigarray_word_kind : Lambda.bigarray_kind -> memory_chunk = function | Pbigarray_complex32 -> Single { reg = Float64 } | Pbigarray_complex64 -> Double -(* the three functions below assume 64-bit words *) -let () = assert (size_int = 8) - -let check_64_bit_target func = - if size_int <> 8 - then - Misc.fatal_errorf - "Cmm helpers function %s can only be used on 64-bit targets" func - -(* Like [low_32] but for 63-bit integers held in 64-bit registers. *) -(* CR gbury: Why not use Cmm.map_tail here ? It seems designed for that kind of - thing (and covers more cases than just Clet). *) -let rec low_63 dbg e = - check_64_bit_target "low_63"; - match e with - | Cop (Casr, [Cop (Clsl, [x; Cconst_int (1, _)], _); Cconst_int (1, _)], _) -> - low_63 dbg x - | Cop (Cand, [x; Cconst_natint (0x7FFF_FFFF_FFFF_FFFFn, _)], _) -> - low_63 dbg x - | Clet (id, x, body) -> Clet (id, x, low_63 dbg body) - | _ -> e - -(* CR-someday mshinwell/gbury: sign_extend_63 then tag_int should simplify to - just tag_int. *) -let sign_extend_63 dbg e = - check_64_bit_target "sign_extend_63"; - match e with - | Cop (Casr, [_; Cconst_int (n, _)], _) when n > 0 && n < 64 -> - (* [asr] by a positive constant is sign-preserving. However: - - - Some architectures treat the shift length modulo the word size. - - - OCaml does not define behavior of shifts by more than the word size. - - So we don't make the simplification for shifts of length 64 or more. *) - e - | _ -> - let e = low_63 dbg e in - Cop - ( Casr, - [Cop (Clsl, [e; Cconst_int (1, dbg)], dbg); Cconst_int (1, dbg)], - dbg ) - -(* zero_extend_32 zero-extends values from 32 bits to the word size. *) -let zero_extend_32 dbg e = - (* CR mshinwell for gbury: same question as above *) - match low_32 dbg e with - | Cop - ( Cload - { memory_chunk = Thirtytwo_signed | Thirtytwo_unsigned; - mutability; - is_atomic - }, - args, - dbg ) -> - Cop - ( Cload { memory_chunk = Thirtytwo_unsigned; mutability; is_atomic }, - args, - dbg ) - | e -> Cop (Cand, [e; natint_const_untagged dbg 0xFFFFFFFFn], dbg) - -let zero_extend_63 dbg e = - check_64_bit_target "zero_extend_63"; - let e = low_63 dbg e in - Cop (Cand, [e; natint_const_untagged dbg 0x7FFF_FFFF_FFFF_FFFFn], dbg) - -let zero_extend ~bits ~dbg e = - assert (0 < bits && bits <= arch_bits); - if bits = arch_bits - then e - else - match bits with - | 63 -> zero_extend_63 dbg e - | 32 -> zero_extend_32 dbg e - | bits -> Misc.fatal_errorf "zero_extend not implemented for %d bits" bits - -let sign_extend ~bits ~dbg e = - assert (0 < bits && bits <= arch_bits); - if bits = arch_bits - then e - else - match bits with - | 63 -> sign_extend_63 dbg e - | 32 -> sign_extend_32 dbg e - | bits -> Misc.fatal_errorf "sign_extend not implemented for %d bits" bits - -let low_bits ~bits ~(dbg : Debuginfo.t) e = - assert (0 < bits && bits <= arch_bits); - if bits = arch_bits - then e - else - match bits with - | 63 -> low_63 dbg e - | 32 -> low_32 dbg e - | bits -> Misc.fatal_errorf "low_bits not implemented for %d bits" bits - -let ignore_low_bits ~bits ~dbg:(_ : Debuginfo.t) e = - if bits = 1 - then ignore_low_bit_int e - else Misc.fatal_error "ignore_low_bits expected bits=1 for now" - let and_int e1 e2 dbg = let is_mask32 = function | Cconst_natint (0xFFFF_FFFFn, _) -> true @@ -2004,8 +1964,8 @@ let and_int e1 e2 dbg = | _ -> false in match e1, e2 with - | e, m when is_mask32 m -> zero_extend_32 dbg e - | m, e when is_mask32 m -> zero_extend_32 dbg e + | e, m when is_mask32 m -> zero_extend ~bits:32 e ~dbg + | m, e when is_mask32 m -> zero_extend ~bits:32 e ~dbg | e1, e2 -> Cop (Cand, [e1; e2], dbg) let or_int e1 e2 dbg = Cop (Cor, [e1; e2], dbg) @@ -2033,9 +1993,7 @@ let box_int_gen dbg (bi : Primitive.boxed_integer) mode arg = let arg' = if bi = Primitive.Boxed_int32 then - if big_endian - then Cop (Clsl, [arg; Cconst_int (32, dbg)], dbg) - else sign_extend_32 dbg arg + if big_endian then lsl_const arg 32 dbg else sign_extend ~bits:32 arg ~dbg else arg in Cop @@ -2079,12 +2037,12 @@ let unbox_int dbg bi = when bi = Primitive.Boxed_int32 && big_endian && alloc_matches_boxed_int bi ~hdr ~ops -> (* Force sign-extension of low 32 bits *) - sign_extend_32 dbg contents + sign_extend ~bits:32 contents ~dbg | Cop (Calloc _, [hdr; ops; contents], _dbg) when bi = Primitive.Boxed_int32 && (not big_endian) && alloc_matches_boxed_int bi ~hdr ~ops -> (* Force sign-extension of low 32 bits *) - sign_extend_32 dbg contents + sign_extend ~bits:32 contents ~dbg | Cop (Calloc _, [hdr; ops; contents], _dbg) when alloc_matches_boxed_int bi ~hdr ~ops -> contents @@ -2100,7 +2058,7 @@ let unbox_int dbg bi = | cmm -> default cmm) let make_unsigned_int bi arg dbg = - if bi = Primitive.Unboxed_int32 then zero_extend_32 dbg arg else arg + if bi = Primitive.Unboxed_int32 then zero_extend ~bits:32 arg ~dbg else arg let unaligned_load_16 ptr idx dbg = if Arch.allow_unaligned_access @@ -4315,7 +4273,7 @@ let make_unboxed_int32_array_payload dbg unboxed_int32_list = ( Cor, [ (* [a] is sign-extended by default. We need to change it to be zero-extended for the `or` operation to be correct. *) - zero_extend_32 dbg a; + zero_extend ~bits:32 a ~dbg; Cop (Clsl, [b; Cconst_int (32, dbg)], dbg) ], dbg ) in diff --git a/backend/cmm_helpers.mli b/backend/cmm_helpers.mli index 3381191f5b5..dea4644ab67 100644 --- a/backend/cmm_helpers.mli +++ b/backend/cmm_helpers.mli @@ -375,8 +375,9 @@ val bigarray_word_kind : Lambda.bigarray_kind -> memory_chunk (** Operations on n-bit integers *) -(** Simplify the given expression knowing low [bits] bits will be irrelevant *) -val ignore_low_bits : bits:int -> dbg:Debuginfo.t -> expression -> expression +(** Simplify the given expression knowing the low bit of the argument will be irrelevant +*) +val ignore_low_bit_int : expression -> expression (** Simplify the given expression knowing that bits other than the low [bits] bits will be irrelevant *) @@ -700,7 +701,7 @@ val create_ccatch : (** Shift operations. Inputs: a tagged caml integer and an untagged machine integer. Outputs: a tagged caml integer. - Take as first argument a tagged caml integer, and as + Takes as first argument a tagged caml integer, and as second argument an untagged machine intger which is the amount to shift the first argument by. *) diff --git a/backend/printcmm.ml b/backend/printcmm.ml index 15d6b27cf72..712fe3ee54a 100644 --- a/backend/printcmm.ml +++ b/backend/printcmm.ml @@ -49,6 +49,8 @@ let machtype ppf mty = let exttype ppf = function | XInt -> fprintf ppf "int" + | XInt8 -> fprintf ppf "int8" + | XInt16 -> fprintf ppf "int16" | XInt32 -> fprintf ppf "int32" | XInt64 -> fprintf ppf "int64" | XFloat -> fprintf ppf "float" diff --git a/middle_end/flambda2/to_cmm/to_cmm_expr.ml b/middle_end/flambda2/to_cmm/to_cmm_expr.ml index 0cdb343ac8a..bff2a7e27fd 100644 --- a/middle_end/flambda2/to_cmm/to_cmm_expr.ml +++ b/middle_end/flambda2/to_cmm/to_cmm_expr.ml @@ -110,9 +110,9 @@ let translate_external_call env res ~free_vars apply ~callee_simple ~args 2. All of the [machtype_component]s are singleton arrays. *) Array.map (fun machtype -> [| machtype |]) return_ty in - (* Returned int32 values need to be sign_extended because it's not clear - whether C code that returns an int32 returns one that is sign extended or - not. There is no need to wrap other return arities. *) + (* Returned small integer values need to be sign-extended because it's not + clear whether C code that returns a small integer returns one that is sign + extended or not. There is no need to wrap other return arities. *) let maybe_sign_extend kind dbg cmm = match Flambda_kind.With_subkind.kind kind with | Naked_number Naked_int32 -> C.sign_extend ~bits:32 ~dbg cmm diff --git a/middle_end/flambda2/to_cmm/to_cmm_primitive.ml b/middle_end/flambda2/to_cmm/to_cmm_primitive.ml index 4a650024cc6..bd4e8faf02b 100644 --- a/middle_end/flambda2/to_cmm/to_cmm_primitive.ml +++ b/middle_end/flambda2/to_cmm/to_cmm_primitive.ml @@ -742,7 +742,7 @@ let binary_int_shift_primitive _env dbg kind op x y = | (Naked_int64 | Naked_nativeint), Asr -> C.asr_int x y dbg let binary_int_comp_primitive _env dbg kind cmp x y = - let ignore_low_bit_int = C.ignore_low_bits ~bits:1 ~dbg in + let ignore_low_bit_int = C.ignore_low_bit_int in match ( (kind : Flambda_kind.Standard_int.t), (cmp : P.signed_or_unsigned P.comparison) ) diff --git a/middle_end/flambda2/to_cmm/to_cmm_shared.ml b/middle_end/flambda2/to_cmm/to_cmm_shared.ml index 7da2054eee5..ee353b7f14a 100644 --- a/middle_end/flambda2/to_cmm/to_cmm_shared.ml +++ b/middle_end/flambda2/to_cmm/to_cmm_shared.ml @@ -205,7 +205,7 @@ let name_static res name = ~symbol:(fun s -> `Static_data [symbol_address (To_cmm_result.symbol res s)]) -let const_static cst = +let const_static cst : Cmm.data_item list = match Reg_width_const.descr cst with | Naked_immediate i -> [cint (nativeint_of_targetint (Targetint_31_63.to_targetint i))] @@ -306,6 +306,8 @@ module Update_kind = struct type kind = | Pointer | Immediate + | Naked_int8 + | Naked_int16 | Naked_int32 | Naked_int64 | Naked_float @@ -329,8 +331,8 @@ module Update_kind = struct let field_size_in_words t = match t.kind with - | Pointer | Immediate | Naked_int32 | Naked_int64 | Naked_float - | Naked_float32 -> + | Pointer | Immediate | Naked_int8 | Naked_int16 | Naked_int32 | Naked_int64 + | Naked_float | Naked_float32 -> 1 | Naked_vec128 -> 2 @@ -348,6 +350,10 @@ module Update_kind = struct let naked_vec128s = { kind = Naked_vec128; stride = 16 } + let naked_int8_fields = { kind = Naked_int8; stride = Arch.size_addr } + + let naked_int16_fields = { kind = Naked_int16; stride = Arch.size_addr } + let naked_int32_fields = { kind = Naked_int32; stride = Arch.size_addr } let naked_float32_fields = { kind = Naked_float32; stride = Arch.size_addr } @@ -372,8 +378,8 @@ let make_update env res dbg ({ kind; stride } : Update_kind.t) ~symbol var | Immediate -> (* See [caml_initialize]; we can avoid this function in this case. *) None - | Naked_int32 | Naked_int64 | Naked_float | Naked_float32 | Naked_vec128 - -> + | Naked_int8 | Naked_int16 | Naked_int32 | Naked_int64 | Naked_float + | Naked_float32 | Naked_vec128 -> (* The GC never sees these fields, so we can avoid using [caml_initialize]. This is important as it significantly reduces the complexity of the statically-allocated inconstant unboxed int32 @@ -389,12 +395,17 @@ let make_update env res dbg ({ kind; stride } : Update_kind.t) ~symbol var match kind with | Pointer -> Word_val | Immediate -> Word_int + | Naked_int8 | Naked_int16 -> + (* CR layouts v5.1: we only support small integers in being + sign-extended in word fields *) + assert (stride = Arch.size_addr); + Word_int | Naked_int32 -> (* Cmm expressions representing int32 values are always sign extended. By using [Word_int] in the "fields" cases (see [Update_kind], above) we maintain the convention that 32-bit integers in 64-bit fields are sign extended. *) - if stride > 4 then Word_int else Thirtytwo_signed + if stride = Arch.size_addr then Word_int else Thirtytwo_signed | Naked_int64 -> Word_int | Naked_float -> Double | Naked_float32 -> diff --git a/middle_end/flambda2/to_cmm/to_cmm_shared.mli b/middle_end/flambda2/to_cmm/to_cmm_shared.mli index f10d1d61744..eb90428fbf7 100644 --- a/middle_end/flambda2/to_cmm/to_cmm_shared.mli +++ b/middle_end/flambda2/to_cmm/to_cmm_shared.mli @@ -113,6 +113,12 @@ module Update_kind : sig val tagged_immediates : t + (** Assumes each field is a word; the byte offset is [index * size_addr]. *) + val naked_int8_fields : t + + (** Assumes each field is a word; the byte offset is [index * size_addr]. *) + val naked_int16_fields : t + (** Tightly packed; the byte offset is [index * 4]. ([index] is as for [make_update], below.) *) val naked_int32s : t From e0b8691fb40be409893bb40652ffeb84a5e21e6e Mon Sep 17 00:00:00 2001 From: Jacob Van Buren Date: Tue, 18 Feb 2025 12:21:44 -0500 Subject: [PATCH 3/6] removed small int parts --- backend/arm64/cfg_selection.ml | 2 +- backend/arm64/proc.ml | 2 +- backend/arm64/selection.ml | 7 +---- backend/cmm.ml | 33 +++++++++----------- backend/cmm.mli | 2 -- backend/printcmm.ml | 2 -- middle_end/flambda2/to_cmm/to_cmm_shared.ml | 19 +++-------- middle_end/flambda2/to_cmm/to_cmm_shared.mli | 6 ---- 8 files changed, 22 insertions(+), 51 deletions(-) diff --git a/backend/arm64/cfg_selection.ml b/backend/arm64/cfg_selection.ml index f0d01d0e1a1..a08fece7ab1 100644 --- a/backend/arm64/cfg_selection.ml +++ b/backend/arm64/cfg_selection.ml @@ -171,7 +171,7 @@ class selector = method! insert_move_extcall_arg env ty_arg src dst = let ty_arg_is_int32 = match ty_arg with - | XInt8 | XInt16 | XInt32 -> true + | XInt32 -> true | XInt | XInt64 | XFloat32 | XFloat | XVec128 -> false in if macosx && ty_arg_is_int32 && is_stack_slot dst diff --git a/backend/arm64/proc.ml b/backend/arm64/proc.ml index 4d20f3ad06d..a3244782404 100644 --- a/backend/arm64/proc.ml +++ b/backend/arm64/proc.ml @@ -297,7 +297,7 @@ let external_calling_conventions begin match ty_arg with | XInt | XInt64 -> loc.(i) <- [| loc_int last_int make_stack int ofs |] - | XInt32 | XInt16 | XInt8 -> + | XInt32 -> loc.(i) <- [| loc_int32 last_int make_stack int ofs |] | XFloat -> loc.(i) <- [| loc_float last_float make_stack float ofs |] diff --git a/backend/arm64/selection.ml b/backend/arm64/selection.ml index 915cd8266fd..ee57ee21c77 100644 --- a/backend/arm64/selection.ml +++ b/backend/arm64/selection.ml @@ -157,12 +157,7 @@ class selector = | _ -> super#select_operation op args dbg method! insert_move_extcall_arg env ty_arg src dst = - let ty_arg_is_int32 = - match ty_arg with - | XInt8 | XInt16 | XInt32 -> true - | XInt | XInt64 | XFloat32 | XFloat | XVec128 -> false - in - if macosx && ty_arg_is_int32 && is_stack_slot dst + if macosx && ty_arg = XInt32 && is_stack_slot dst then self#insert env (Iop (Ispecific Imove32)) src dst else self#insert_moves env src dst end diff --git a/backend/cmm.ml b/backend/cmm.ml index af61fcab9ee..fcfae698f87 100644 --- a/backend/cmm.ml +++ b/backend/cmm.ml @@ -1,4 +1,3 @@ -# 1 "backend/cmm.ml" (**************************************************************************) (* *) (* OCaml *) @@ -108,8 +107,6 @@ let ge_component comp1 comp2 = type exttype = | XInt - | XInt8 - | XInt16 | XInt32 | XInt64 | XFloat32 @@ -118,8 +115,6 @@ type exttype = let machtype_of_exttype = function | XInt -> typ_int - | XInt8 -> typ_int - | XInt16 -> typ_int | XInt32 -> typ_int | XInt64 -> typ_int | XFloat -> typ_float @@ -616,19 +611,21 @@ let equal_machtype_component (left : machtype_component) (right : machtype_compo | Float32, (Val | Addr | Int | Float | Vec128 | Valx2) -> false -let equal_exttype - ((XInt - | XInt8 - | XInt16 - | XInt32 - | XInt64 - | XFloat32 - | XFloat - | XVec128) as left) - right - = - (* we can use polymorphic compare as long as exttype is all constant constructors *) - left = right +let equal_exttype left right = + match left, right with + | XInt, XInt -> true + | XInt32, XInt32 -> true + | XInt64, XInt64 -> true + | XFloat32, XFloat32 -> true + | XFloat, XFloat -> true + | XVec128, XVec128 -> true + | XInt, (XInt32 | XInt64 | XFloat | XFloat32 | XVec128) + | XInt32, (XInt | XInt64 | XFloat | XFloat32 | XVec128) + | XInt64, (XInt | XInt32 | XFloat | XFloat32 | XVec128) + | XFloat, (XInt | XInt32 | XFloat32 | XInt64 | XVec128) + | XVec128, (XInt | XInt32 | XInt64 | XFloat | XFloat32) + | XFloat32, (XInt | XInt32 | XInt64 | XFloat | XVec128) -> + false let equal_vec128_type v1 v2 = match v1, v2 with diff --git a/backend/cmm.mli b/backend/cmm.mli index 372cf5759bc..6e9d0f4b3e8 100644 --- a/backend/cmm.mli +++ b/backend/cmm.mli @@ -75,8 +75,6 @@ val ge_component type exttype = | XInt (**r OCaml value, word-sized integer *) - | XInt8 (**r 8-bit integer *) - | XInt16 (**r 16-bit integer *) | XInt32 (**r 32-bit integer *) | XInt64 (**r 64-bit integer *) | XFloat32 (**r single-precision FP number *) diff --git a/backend/printcmm.ml b/backend/printcmm.ml index 712fe3ee54a..15d6b27cf72 100644 --- a/backend/printcmm.ml +++ b/backend/printcmm.ml @@ -49,8 +49,6 @@ let machtype ppf mty = let exttype ppf = function | XInt -> fprintf ppf "int" - | XInt8 -> fprintf ppf "int8" - | XInt16 -> fprintf ppf "int16" | XInt32 -> fprintf ppf "int32" | XInt64 -> fprintf ppf "int64" | XFloat -> fprintf ppf "float" diff --git a/middle_end/flambda2/to_cmm/to_cmm_shared.ml b/middle_end/flambda2/to_cmm/to_cmm_shared.ml index ee353b7f14a..a1e7e069935 100644 --- a/middle_end/flambda2/to_cmm/to_cmm_shared.ml +++ b/middle_end/flambda2/to_cmm/to_cmm_shared.ml @@ -306,8 +306,6 @@ module Update_kind = struct type kind = | Pointer | Immediate - | Naked_int8 - | Naked_int16 | Naked_int32 | Naked_int64 | Naked_float @@ -331,8 +329,8 @@ module Update_kind = struct let field_size_in_words t = match t.kind with - | Pointer | Immediate | Naked_int8 | Naked_int16 | Naked_int32 | Naked_int64 - | Naked_float | Naked_float32 -> + | Pointer | Immediate | Naked_int32 | Naked_int64 | Naked_float + | Naked_float32 -> 1 | Naked_vec128 -> 2 @@ -350,10 +348,6 @@ module Update_kind = struct let naked_vec128s = { kind = Naked_vec128; stride = 16 } - let naked_int8_fields = { kind = Naked_int8; stride = Arch.size_addr } - - let naked_int16_fields = { kind = Naked_int16; stride = Arch.size_addr } - let naked_int32_fields = { kind = Naked_int32; stride = Arch.size_addr } let naked_float32_fields = { kind = Naked_float32; stride = Arch.size_addr } @@ -378,8 +372,8 @@ let make_update env res dbg ({ kind; stride } : Update_kind.t) ~symbol var | Immediate -> (* See [caml_initialize]; we can avoid this function in this case. *) None - | Naked_int8 | Naked_int16 | Naked_int32 | Naked_int64 | Naked_float - | Naked_float32 | Naked_vec128 -> + | Naked_int32 | Naked_int64 | Naked_float | Naked_float32 | Naked_vec128 + -> (* The GC never sees these fields, so we can avoid using [caml_initialize]. This is important as it significantly reduces the complexity of the statically-allocated inconstant unboxed int32 @@ -395,11 +389,6 @@ let make_update env res dbg ({ kind; stride } : Update_kind.t) ~symbol var match kind with | Pointer -> Word_val | Immediate -> Word_int - | Naked_int8 | Naked_int16 -> - (* CR layouts v5.1: we only support small integers in being - sign-extended in word fields *) - assert (stride = Arch.size_addr); - Word_int | Naked_int32 -> (* Cmm expressions representing int32 values are always sign extended. By using [Word_int] in the "fields" cases (see [Update_kind], diff --git a/middle_end/flambda2/to_cmm/to_cmm_shared.mli b/middle_end/flambda2/to_cmm/to_cmm_shared.mli index eb90428fbf7..f10d1d61744 100644 --- a/middle_end/flambda2/to_cmm/to_cmm_shared.mli +++ b/middle_end/flambda2/to_cmm/to_cmm_shared.mli @@ -113,12 +113,6 @@ module Update_kind : sig val tagged_immediates : t - (** Assumes each field is a word; the byte offset is [index * size_addr]. *) - val naked_int8_fields : t - - (** Assumes each field is a word; the byte offset is [index * size_addr]. *) - val naked_int16_fields : t - (** Tightly packed; the byte offset is [index * 4]. ([index] is as for [make_update], below.) *) val naked_int32s : t From a2f97a916dadcac9512e09fc5c63b38f22a482be Mon Sep 17 00:00:00 2001 From: Jacob Van Buren Date: Thu, 27 Feb 2025 16:51:24 -0500 Subject: [PATCH 4/6] committed sign-extension proof --- backend/cmm_helpers.ml | 8 +-- middle_end/flambda2/z3/sign_extension.py | 83 ++++++++++++++++++++++++ 2 files changed, 87 insertions(+), 4 deletions(-) create mode 100755 middle_end/flambda2/z3/sign_extension.py diff --git a/backend/cmm_helpers.ml b/backend/cmm_helpers.ml index e8dee907801..9abcd73a98f 100644 --- a/backend/cmm_helpers.ml +++ b/backend/cmm_helpers.ml @@ -1290,7 +1290,7 @@ let get_const_bitmask = function Some (x, Nativeint.of_int mask) | _ -> None -(** [low_bits ~bits x] is a (hopefully simplified) value which agrees with x on at least +(** [low_bits ~bits x] is a (potentially simplified) value which agrees with x on at least the low [bits] bits. E.g., [low_bits ~bits x & mask = x & mask], where [mask] is a bitmask of the low [bits] bits . *) let rec low_bits ~bits ~dbg x = @@ -1299,13 +1299,13 @@ let rec low_bits ~bits ~dbg x = then x else let unused_bits = arch_bits - bits in - let does_mask_ignore_low_bits test_mask = + let does_mask_keep_low_bits test_mask = (* If the mask has all the low bits set, then the low bits are unchanged. This could happen from zero-extension. *) let mask = Nativeint.pred (Nativeint.shift_left 1n bits) in Nativeint.equal mask (Nativeint.logand test_mask mask) in - (* Ignore sign and zero extensions, which do not affect the low bits *) + (* Ignore sign and zero extensions which do not affect the low bits *) map_tail (function | Cop @@ -1316,7 +1316,7 @@ let rec low_bits ~bits ~dbg x = low_bits ~bits (lsl_const x (left - right) dbg) ~dbg | x -> ( match get_const_bitmask x with - | Some (x, bitmask) when does_mask_ignore_low_bits bitmask -> + | Some (x, bitmask) when does_mask_keep_low_bits bitmask -> low_bits ~bits x ~dbg | _ -> x)) x diff --git a/middle_end/flambda2/z3/sign_extension.py b/middle_end/flambda2/z3/sign_extension.py new file mode 100755 index 00000000000..6613558072d --- /dev/null +++ b/middle_end/flambda2/z3/sign_extension.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python3 +from z3 import * + +ARCH_BITS = 64 + + +class Op: + def __init__(self, name): + self.inner = BitVec(f"{name}.inner", ARCH_BITS) + self.shift_right = BitVec(f"{name}.shift_right", ARCH_BITS) + self.arith = Bool(f"{name}.arith") + + def as_ast(self) -> BitVecRef: + return If( + self.arith, + self.inner >> self.shift_right, + LShR(self.inner, self.shift_right), + ) + + def __repr__(self): + return repr(self.as_ast()) + + def size(self): + return self.as_ast().size() + + def reference_sign_extend(self, bits): + unused_bits = self.size() - bits + return (self.as_ast() << unused_bits) >> unused_bits + + def experimental_sign_extend(self, bits) -> BitVecRef: + unused_bits = self.size() - bits + return If( + self.shift_right > unused_bits, + self.as_ast(), + (self.inner << (unused_bits - self.shift_right)) >> unused_bits, + ) + + +if __name__ == "__main__": + s = Solver() + + x = Op("x") + bits = BitVec("bits", ARCH_BITS) # Number of low bits to preserve + + # assumptions + s.add(And(0 <= x.shift_right, x.shift_right < x.size())) + s.add(And(0 < bits, bits <= x.size())) + + # sanity check that we haven't introduced something crazy + assert s.check() == sat + + # falsify this + s.add(x.reference_sign_extend(bits) != x.experimental_sign_extend(bits)) + + print(s.to_smt2()) + + print("Verifying sign_extend...") + if s.check() == unsat: + print("sign_extend optimization is correct.") + else: + print("sign_extend is incorrect.") + model = s.model() + print("Counterexample:", model) + exit(1) + + +# ; benchmark generated from python API +# (set-info :status unknown) +# (declare-fun x.shift_right () (_ BitVec 64)) +# (declare-fun bits () (_ BitVec 64)) +# (declare-fun x.inner () (_ BitVec 64)) +# (declare-fun x.arith () Bool) +# (assert +# (and (bvsge x.shift_right (_ bv0 64)) (bvslt x.shift_right (_ bv64 64)))) +# (assert +# (let (($x36 (bvsle bits (_ bv64 64)))) +# (and (bvsgt bits (_ bv0 64)) $x36))) +# (assert +# (let ((?x51 (bvsub (_ bv64 64) bits))) +# (let ((?x47 (ite x.arith (bvashr x.inner x.shift_right) (bvlshr x.inner x.shift_right)))) +# (let ((?x66 (ite (bvsgt x.shift_right ?x51) ?x47 (bvashr (bvshl x.inner (bvsub ?x51 x.shift_right)) ?x51)))) +# (and (distinct (bvashr (bvshl ?x47 ?x51) ?x51) ?x66) true))))) +# (check-sat) From a26729aa360913c36d0f91d16b606c2f9e517e03 Mon Sep 17 00:00:00 2001 From: Jacob Van Buren Date: Thu, 27 Feb 2025 16:54:31 -0500 Subject: [PATCH 5/6] revert some error message changes --- backend/cmm_helpers.ml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/backend/cmm_helpers.ml b/backend/cmm_helpers.ml index 9abcd73a98f..29b56e5cdb0 100644 --- a/backend/cmm_helpers.ml +++ b/backend/cmm_helpers.ml @@ -1856,7 +1856,8 @@ let make_mixed_alloc ~mode dbg ~tag ~value_prefix_size args args_memory_chunks = (* regular scanned part of a block *) match memory_chunk with | Word_int | Word_val -> ok () - | Byte_unsigned | Byte_signed | Sixteen_unsigned | Sixteen_signed + | Byte_unsigned | Byte_signed | Sixteen_unsigned | Sixteen_signed -> + error "mixed blocks" | Thirtytwo_unsigned | Thirtytwo_signed | Single _ | Double | Onetwentyeight_unaligned | Onetwentyeight_aligned -> error "the value prefix of a mixed block" @@ -1864,9 +1865,10 @@ let make_mixed_alloc ~mode dbg ~tag ~value_prefix_size args args_memory_chunks = (* flat suffix part of the block *) match memory_chunk with | Word_int | Thirtytwo_unsigned | Thirtytwo_signed | Double - | Onetwentyeight_unaligned | Onetwentyeight_aligned | Single _ - | Byte_unsigned | Byte_signed | Sixteen_unsigned | Sixteen_signed -> + | Onetwentyeight_unaligned | Onetwentyeight_aligned | Single _ -> ok () + | Byte_unsigned | Byte_signed | Sixteen_unsigned | Sixteen_signed -> + error "mixed blocks" | Word_val -> error "the flat suffix of a mixed block") 0 args_memory_chunks in From ff980b36ed2a52f4863e89602bf47eec9f14fd8d Mon Sep 17 00:00:00 2001 From: Jacob Van Buren Date: Thu, 27 Feb 2025 17:01:54 -0500 Subject: [PATCH 6/6] comments + reverted allowing blocks with small integers --- backend/cmm_helpers.ml | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/backend/cmm_helpers.ml b/backend/cmm_helpers.ml index 29b56e5cdb0..9c8199d604c 100644 --- a/backend/cmm_helpers.ml +++ b/backend/cmm_helpers.ml @@ -1313,6 +1313,8 @@ let rec low_bits ~bits ~dbg x = [Cop (Clsl, [x; Cconst_int (left, _)], _); Cconst_int (right, _)], _ ) when 0 <= right && right <= left && left <= unused_bits -> + (* these sign-extensions can be replaced with a left shift since we + don't care about the high bits that it changed *) low_bits ~bits (lsl_const x (left - right) dbg) ~dbg | x -> ( match get_const_bitmask x with @@ -1363,7 +1365,9 @@ let sign_extend ~bits ~dbg e = when 0 <= n && n < arch_bits -> (* see middle_end/flambda2/z3/sign_extension.py for proof *) if n > unused_bits - then (* already sign-extended *) e + then + (* sign-extension is a no-op since the top n bits already match *) + e else let e = lsl_const inner (unused_bits - n) dbg in asr_const e unused_bits dbg @@ -1747,9 +1751,8 @@ let alloc_generic_set_fn block ofs newval memory_chunk dbg = addr_array_initialize block ofs newval dbg | Word_int -> generic_case () (* Generic cases that may differ under big endian archs *) - | Byte_unsigned | Byte_signed | Sixteen_unsigned | Sixteen_signed | Single _ - | Double | Thirtytwo_unsigned | Thirtytwo_signed | Onetwentyeight_unaligned - | Onetwentyeight_aligned -> + | Single _ | Double | Thirtytwo_unsigned | Thirtytwo_signed + | Onetwentyeight_unaligned | Onetwentyeight_aligned -> if Arch.big_endian then Misc.fatal_errorf @@ -1757,6 +1760,11 @@ let alloc_generic_set_fn block ofs newval memory_chunk dbg = architectures" (Printcmm.chunk memory_chunk); generic_case () + (* Forbidden cases *) + | Byte_unsigned | Byte_signed | Sixteen_unsigned | Sixteen_signed -> + Misc.fatal_errorf + "Fields with memory_chunk %s are not supported in generic allocations" + (Printcmm.chunk memory_chunk) let make_alloc_generic ~block_kind ~mode ~alloc_block_kind dbg tag wordsize args args_memory_chunks =