From 2f5d60b15addaa3382b4d9fabedec81870ba1679 Mon Sep 17 00:00:00 2001
From: Jacob Van Buren <jvanburen@janestreet.com>
Date: Tue, 31 Dec 2024 11:00:07 -0500
Subject: [PATCH 1/6] unified unboxed field getters/setters. This will be
 useful once we have unboxed integers of different sizes

---
 backend/cmm_helpers.ml | 200 +++++++++++------------------------------
 1 file changed, 50 insertions(+), 150 deletions(-)

diff --git a/backend/cmm_helpers.ml b/backend/cmm_helpers.ml
index 59dc47ddc5e..bcf0cf7855b 100644
--- a/backend/cmm_helpers.ml
+++ b/backend/cmm_helpers.ml
@@ -924,7 +924,20 @@ let complex_im c dbg =
 
 (* Unit *)
 
-let return_unit dbg c = Csequence (c, Cconst_int (1, dbg))
+let return_unit dbg c =
+  match c with
+  | Csequence (_, Cconst_int (1, _)) as c -> c
+  | c -> Csequence (c, Cconst_int (1, dbg))
+
+let memory_chunk_width_in_bytes : memory_chunk -> int = function
+  | Byte_unsigned | Byte_signed -> 1
+  | Sixteen_unsigned | Sixteen_signed -> 2
+  | Thirtytwo_unsigned | Thirtytwo_signed -> 4
+  | Single { reg = Float64 | Float32 } -> 4
+  | Word_int -> size_int
+  | Word_val -> size_addr
+  | Double -> size_float
+  | Onetwentyeight_unaligned | Onetwentyeight_aligned -> size_vec128
 
 let strided_field_address ptr ~index ~stride dbg =
   if index * stride = 0
@@ -932,24 +945,8 @@ let strided_field_address ptr ~index ~stride dbg =
   else Cop (Cadda, [ptr; Cconst_int (index * stride, dbg)], dbg)
 
 let field_address ?(memory_chunk = Word_val) ptr n dbg =
-  if n = 0
-  then ptr
-  else
-    let field_size_in_bytes =
-      match memory_chunk with
-      | Byte_unsigned | Byte_signed -> 1
-      | Sixteen_unsigned | Sixteen_signed -> 2
-      | Thirtytwo_unsigned | Thirtytwo_signed -> 4
-      | Single { reg = Float64 | Float32 } ->
-        assert (size_float = 8);
-        (* unclear what to do if this is false *)
-        size_float / 2
-      | Word_int -> size_int
-      | Word_val -> size_addr
-      | Double -> size_float
-      | Onetwentyeight_unaligned | Onetwentyeight_aligned -> size_vec128
-    in
-    Cop (Cadda, [ptr; Cconst_int (n * field_size_in_bytes, dbg)], dbg)
+  strided_field_address ptr dbg ~index:n
+    ~stride:(memory_chunk_width_in_bytes memory_chunk)
 
 let get_field_gen_given_memory_chunk memory_chunk mutability ptr n dbg =
   Cop
@@ -1410,7 +1407,21 @@ let unboxed_int64_or_nativeint_array_set ~has_custom_ops arr ~index ~new_value
               in
               int_array_set arr index new_value dbg)))
 
-(* Get the field of a block given a possibly inconstant index *)
+let get_field_unboxed ~dbg memory_chunk mutability block ~index_in_words =
+  if Arch.big_endian && memory_chunk_width_in_bytes memory_chunk <> size_addr
+  then
+    (* CR layouts v5.1: Properly support big-endian. *)
+    Misc.fatal_error
+      "Unboxed non-word size integer fields are only supported on \
+       little-endian architectures";
+  (* CR layouts v5.1: We'll need to vary log2_size_addr among other things to
+     efficiently pack small integers *)
+  let field_address =
+    assert (size_float = size_addr);
+    array_indexing log2_size_addr block index_in_words dbg
+  in
+  Cop
+    (Cload { memory_chunk; mutability; is_atomic = false }, [field_address], dbg)
 
 let get_field_computed imm_or_ptr mutability ~block ~index dbg =
   let memory_chunk =
@@ -1418,139 +1429,28 @@ let get_field_computed imm_or_ptr mutability ~block ~index dbg =
     | Lambda.Immediate -> Word_int
     | Lambda.Pointer -> Word_val
   in
-  let field_address = array_indexing log2_size_addr block index dbg in
-  Cop
-    (Cload { memory_chunk; mutability; is_atomic = false }, [field_address], dbg)
-
-(* Getters for unboxed int fields *)
-
-let get_field_unboxed_int32 mutability ~block ~index dbg =
-  let memory_chunk = Thirtytwo_signed in
-  (* CR layouts v5.1: Properly support big-endian. *)
-  if Arch.big_endian
-  then
-    Misc.fatal_error
-      "Unboxed int32 fields only supported on little-endian architectures";
-  (* CR layouts v5.1: We'll need to vary log2_size_addr to efficiently pack
-   * int32s *)
-  let field_address = array_indexing log2_size_addr block index dbg in
-  Cop
-    (Cload { memory_chunk; mutability; is_atomic = false }, [field_address], dbg)
-
-let get_field_unboxed_int64_or_nativeint mutability ~block ~index dbg =
-  let memory_chunk = Word_int in
-  let field_address = array_indexing log2_size_addr block index dbg in
-  Cop
-    (Cload { memory_chunk; mutability; is_atomic = false }, [field_address], dbg)
-
-(* Setters for unboxed int fields *)
-
-let setfield_unboxed_int32 arr ofs newval dbg =
-  (* CR layouts v5.1: Properly support big-endian. *)
-  if Arch.big_endian
-  then
-    Misc.fatal_error
-      "Unboxed int32 fields only supported on little-endian architectures";
-  (* CR layouts v5.1: We will need to vary log2_size_addr when int32 fields are
-     efficiently packed. *)
-  return_unit dbg
-    (Cop
-       ( Cstore (Thirtytwo_signed, Assignment),
-         [array_indexing log2_size_addr arr ofs dbg; newval],
-         dbg ))
-
-let setfield_unboxed_int64_or_nativeint arr ofs newval dbg =
-  return_unit dbg
-    (Cop
-       ( Cstore (Word_int, Assignment),
-         [array_indexing log2_size_addr arr ofs dbg; newval],
-         dbg ))
-
-(* Getters and setters for unboxed float32 fields *)
-
-let get_field_unboxed_float32 mutability ~block ~index dbg =
-  (* CR layouts v5.1: Properly support big-endian. *)
-  if Arch.big_endian
-  then
-    Misc.fatal_error
-      "Unboxed float32 fields only supported on little-endian architectures";
-  let memory_chunk = Single { reg = Float32 } in
-  (* CR layouts v5.1: We'll need to vary log2_size_addr to efficiently pack
-   * float32s *)
-  let field_address = array_indexing log2_size_addr block index dbg in
-  Cop
-    (Cload { memory_chunk; mutability; is_atomic = false }, [field_address], dbg)
-
-let setfield_unboxed_float32 arr ofs newval dbg =
-  (* CR layouts v5.1: Properly support big-endian. *)
-  if Arch.big_endian
-  then
-    Misc.fatal_error
-      "Unboxed float32 fields only supported on little-endian architectures";
-  (* CR layouts v5.1: We will need to vary log2_size_addr when float32 fields
-     are efficiently packed. *)
-  return_unit dbg
-    (Cop
-       ( Cstore (Single { reg = Float32 }, Assignment),
-         [array_indexing log2_size_addr arr ofs dbg; newval],
-         dbg ))
-
-(* Getters and setters for unboxed vec128 fields *)
-
-let get_field_unboxed_vec128 mutability ~block ~index_in_words dbg =
-  (* CR layouts v5.1: Properly support big-endian. *)
-  if Arch.big_endian
-  then
-    Misc.fatal_error
-      "Unboxed vec128 fields only supported on little-endian architectures";
-  let memory_chunk = Onetwentyeight_unaligned in
-  let field_address = array_indexing log2_size_addr block index_in_words dbg in
-  Cop
-    (Cload { memory_chunk; mutability; is_atomic = false }, [field_address], dbg)
-
-let setfield_unboxed_vec128 arr ~index_in_words newval dbg =
-  (* CR layouts v5.1: Properly support big-endian. *)
-  if Arch.big_endian
-  then
-    Misc.fatal_error
-      "Unboxed vec128 fields only supported on little-endian architectures";
-  let field_address = array_indexing log2_size_addr arr index_in_words dbg in
-  return_unit dbg
-    (Cop
-       ( Cstore (Onetwentyeight_unaligned, Assignment),
-         [field_address; newval],
-         dbg ))
-
-let get_field_unboxed ~dbg memory_chunk mutability block ~index_in_words =
-  match (memory_chunk : memory_chunk) with
-  | Single { reg = Float32 } ->
-    get_field_unboxed_float32 mutability ~block ~index:index_in_words dbg
-  | Double ->
-    unboxed_float_array_ref mutability ~block ~index:index_in_words dbg
-  | Onetwentyeight_unaligned | Onetwentyeight_aligned ->
-    get_field_unboxed_vec128 mutability ~block ~index_in_words dbg
-  | Thirtytwo_signed ->
-    get_field_unboxed_int32 mutability ~block ~index:index_in_words dbg
-  | Word_int ->
-    get_field_unboxed_int64_or_nativeint mutability ~block ~index:index_in_words
-      dbg
-  | Word_val ->
-    Misc.fatal_error "cannot use get_field_unboxed with a heap block"
-  | _ -> Misc.fatal_error "get_field_unboxed: unexpected memory chunk"
+  get_field_unboxed ~dbg memory_chunk mutability block ~index_in_words:index
 
 let set_field_unboxed ~dbg memory_chunk block ~index_in_words newval =
-  match (memory_chunk : memory_chunk) with
-  | Single { reg = Float32 } ->
-    setfield_unboxed_float32 block index_in_words newval dbg
-  | Double -> float_array_set block index_in_words newval dbg
-  | Onetwentyeight_unaligned | Onetwentyeight_aligned ->
-    setfield_unboxed_vec128 block ~index_in_words newval dbg
-  | Thirtytwo_signed -> setfield_unboxed_int32 block index_in_words newval dbg
-  | Word_int ->
-    setfield_unboxed_int64_or_nativeint block index_in_words newval dbg
+  match memory_chunk with
   | Word_val ->
-    Misc.fatal_error "cannot use set_field_unboxed with a heap block"
-  | _ -> Misc.fatal_error "set_field_unboxed : unexpected memory chunk"
+    Misc.fatal_error "Attempted to set a value via [setfield_unboxed]"
+  | memory_chunk ->
+    let size_in_bytes = memory_chunk_width_in_bytes memory_chunk in
+    (* CR layouts v5.1: Properly support big-endian. *)
+    if Arch.big_endian && size_in_bytes <> size_addr
+    then
+      Misc.fatal_error
+        "Unboxed non-word-size fields are only supported on little-endian \
+         architectures";
+    (* CR layouts v5.1: We will need to vary log2_size_addr, among other things,
+       when small fields are efficiently packed. *)
+    let field_address =
+      array_indexing log2_size_addr block index_in_words dbg
+    in
+    let newval = if size_in_bytes = 4 then low_32 dbg newval else newval in
+    return_unit dbg
+      (Cop (Cstore (memory_chunk, Assignment), [field_address; newval], dbg))
 
 (* String length *)
 

From e5c749977b8248fc3eff4d00cf77b3607b8826ca Mon Sep 17 00:00:00 2001
From: Jacob Van Buren <jvanburen@janestreet.com>
Date: Thu, 13 Feb 2025 17:51:46 -0500
Subject: [PATCH 2/6] refactored cmm to handle more integer widths

---
 backend/arm64/cfg_selection.ml                |   2 +-
 backend/arm64/proc.ml                         |   2 +-
 backend/arm64/selection.ml                    |   7 +-
 backend/cmm.ml                                |  33 +-
 backend/cmm.mli                               |   2 +
 backend/cmm_helpers.ml                        | 284 ++++++++----------
 backend/cmm_helpers.mli                       |   7 +-
 backend/printcmm.ml                           |   2 +
 middle_end/flambda2/to_cmm/to_cmm_expr.ml     |   6 +-
 .../flambda2/to_cmm/to_cmm_primitive.ml       |   2 +-
 middle_end/flambda2/to_cmm/to_cmm_shared.ml   |  23 +-
 middle_end/flambda2/to_cmm/to_cmm_shared.mli  |   6 +
 12 files changed, 182 insertions(+), 194 deletions(-)

diff --git a/backend/arm64/cfg_selection.ml b/backend/arm64/cfg_selection.ml
index a08fece7ab1..f0d01d0e1a1 100644
--- a/backend/arm64/cfg_selection.ml
+++ b/backend/arm64/cfg_selection.ml
@@ -171,7 +171,7 @@ class selector =
     method! insert_move_extcall_arg env ty_arg src dst =
       let ty_arg_is_int32 =
         match ty_arg with
-        | XInt32 -> true
+        | XInt8 | XInt16 | XInt32 -> true
         | XInt | XInt64 | XFloat32 | XFloat | XVec128 -> false
       in
       if macosx && ty_arg_is_int32 && is_stack_slot dst
diff --git a/backend/arm64/proc.ml b/backend/arm64/proc.ml
index a3244782404..4d20f3ad06d 100644
--- a/backend/arm64/proc.ml
+++ b/backend/arm64/proc.ml
@@ -297,7 +297,7 @@ let external_calling_conventions
     begin match ty_arg with
     | XInt | XInt64 ->
         loc.(i) <- [| loc_int last_int make_stack int ofs |]
-    | XInt32 ->
+    | XInt32 | XInt16 | XInt8 ->
         loc.(i) <- [| loc_int32 last_int make_stack int ofs |]
     | XFloat ->
         loc.(i) <- [| loc_float last_float make_stack float ofs |]
diff --git a/backend/arm64/selection.ml b/backend/arm64/selection.ml
index ee57ee21c77..915cd8266fd 100644
--- a/backend/arm64/selection.ml
+++ b/backend/arm64/selection.ml
@@ -157,7 +157,12 @@ class selector =
       | _ -> super#select_operation op args dbg
 
     method! insert_move_extcall_arg env ty_arg src dst =
-      if macosx && ty_arg = XInt32 && is_stack_slot dst
+      let ty_arg_is_int32 =
+        match ty_arg with
+        | XInt8 | XInt16 | XInt32 -> true
+        | XInt | XInt64 | XFloat32 | XFloat | XVec128 -> false
+      in
+      if macosx && ty_arg_is_int32 && is_stack_slot dst
       then self#insert env (Iop (Ispecific Imove32)) src dst
       else self#insert_moves env src dst
   end
diff --git a/backend/cmm.ml b/backend/cmm.ml
index fcfae698f87..af61fcab9ee 100644
--- a/backend/cmm.ml
+++ b/backend/cmm.ml
@@ -1,3 +1,4 @@
+# 1 "backend/cmm.ml"
 (**************************************************************************)
 (*                                                                        *)
 (*                                 OCaml                                  *)
@@ -107,6 +108,8 @@ let ge_component comp1 comp2 =
 
 type exttype =
   | XInt
+  | XInt8
+  | XInt16
   | XInt32
   | XInt64
   | XFloat32
@@ -115,6 +118,8 @@ type exttype =
 
 let machtype_of_exttype = function
   | XInt -> typ_int
+  | XInt8 -> typ_int
+  | XInt16 -> typ_int
   | XInt32 -> typ_int
   | XInt64 -> typ_int
   | XFloat -> typ_float
@@ -611,21 +616,19 @@ let equal_machtype_component (left : machtype_component) (right : machtype_compo
   | Float32, (Val | Addr | Int | Float | Vec128 | Valx2) ->
     false
 
-let equal_exttype left right =
-  match left, right with
-  | XInt, XInt -> true
-  | XInt32, XInt32 -> true
-  | XInt64, XInt64 -> true
-  | XFloat32, XFloat32 -> true
-  | XFloat, XFloat -> true
-  | XVec128, XVec128 -> true
-  | XInt, (XInt32 | XInt64 | XFloat | XFloat32 | XVec128)
-  | XInt32, (XInt | XInt64 | XFloat | XFloat32 | XVec128)
-  | XInt64, (XInt | XInt32 | XFloat | XFloat32 | XVec128)
-  | XFloat, (XInt | XInt32 | XFloat32 | XInt64 | XVec128)
-  | XVec128, (XInt | XInt32 | XInt64 | XFloat | XFloat32)
-  | XFloat32, (XInt | XInt32 | XInt64 | XFloat | XVec128) ->
-    false
+let equal_exttype
+      ((XInt
+       | XInt8
+       | XInt16
+       | XInt32
+       | XInt64
+       | XFloat32
+       | XFloat
+       | XVec128) as left)
+      right
+  =
+  (* we can use polymorphic compare as long as exttype is all constant constructors *)
+  left = right
 
 let equal_vec128_type v1 v2 =
   match v1, v2 with
diff --git a/backend/cmm.mli b/backend/cmm.mli
index 6e9d0f4b3e8..372cf5759bc 100644
--- a/backend/cmm.mli
+++ b/backend/cmm.mli
@@ -75,6 +75,8 @@ val ge_component
 
 type exttype =
   | XInt                                (**r OCaml value, word-sized integer *)
+  | XInt8                               (**r 8-bit integer *)
+  | XInt16                              (**r 16-bit integer  *)
   | XInt32                              (**r 32-bit integer *)
   | XInt64                              (**r 64-bit integer  *)
   | XFloat32                            (**r single-precision FP number *)
diff --git a/backend/cmm_helpers.ml b/backend/cmm_helpers.ml
index bcf0cf7855b..e8dee907801 100644
--- a/backend/cmm_helpers.ml
+++ b/backend/cmm_helpers.ml
@@ -362,6 +362,7 @@ let neg_int c dbg = sub_int (Cconst_int (0, dbg)) c dbg
 
 let rec lsl_int c1 c2 dbg =
   match c1, c2 with
+  | c1, Cconst_int (0, _) -> c1
   | Cop (Clsl, [c; Cconst_int (n1, _)], _), Cconst_int (n2, _)
     when n1 > 0 && n2 > 0 && n1 + n2 < size_int * 8 ->
     Cop (Clsl, [c; Cconst_int (n1 + n2, dbg)], dbg)
@@ -1280,35 +1281,104 @@ let addr_array_initialize arr ofs newval dbg =
       [array_indexing log2_size_addr arr ofs dbg; newval],
       dbg )
 
-(* low_32 x is a value which agrees with x on at least the low 32 bits *)
-let rec low_32 dbg = function
-  (* Ignore sign and zero extensions, which do not affect the low bits *)
-  | Cop (Casr, [Cop (Clsl, [x; Cconst_int (32, _)], _); Cconst_int (32, _)], _)
-  | Cop (Cand, [x; Cconst_natint (0xFFFFFFFFn, _)], _) ->
-    low_32 dbg x
-  | Clet (id, e, body) -> Clet (id, e, low_32 dbg body)
-  | x -> x
-
-(* sign_extend_32 sign-extends values from 32 bits to the word size. *)
-let sign_extend_32 dbg e =
-  match low_32 dbg e with
-  | Cop
-      ( Cload
-          { memory_chunk = Thirtytwo_unsigned | Thirtytwo_signed;
-            mutability;
-            is_atomic
-          },
-        args,
-        dbg ) ->
-    Cop
-      ( Cload { memory_chunk = Thirtytwo_signed; mutability; is_atomic },
-        args,
-        dbg )
-  | e ->
-    Cop
-      ( Casr,
-        [Cop (Clsl, [e; Cconst_int (32, dbg)], dbg); Cconst_int (32, dbg)],
-        dbg )
+(** [get_const_bitmask x] returns [Some (y, mask)] if [x] is [y & mask] *)
+let get_const_bitmask = function
+  | Cop (Cand, ([x; Cconst_natint (mask, _)] | [Cconst_natint (mask, _); x]), _)
+    ->
+    Some (x, mask)
+  | Cop (Cand, ([x; Cconst_int (mask, _)] | [Cconst_int (mask, _); x]), _) ->
+    Some (x, Nativeint.of_int mask)
+  | _ -> None
+
+(** [low_bits ~bits x] is a (hopefully simplified) value which agrees with x on at least
+    the low [bits] bits. E.g., [low_bits ~bits x & mask = x & mask], where [mask] is a
+    bitmask of the low [bits] bits . *)
+let rec low_bits ~bits ~dbg x =
+  assert (bits > 0);
+  if bits >= arch_bits
+  then x
+  else
+    let unused_bits = arch_bits - bits in
+    let does_mask_ignore_low_bits test_mask =
+      (* If the mask has all the low bits set, then the low bits are unchanged.
+         This could happen from zero-extension. *)
+      let mask = Nativeint.pred (Nativeint.shift_left 1n bits) in
+      Nativeint.equal mask (Nativeint.logand test_mask mask)
+    in
+    (* Ignore sign and zero extensions, which do not affect the low bits *)
+    map_tail
+      (function
+        | Cop
+            ( (Casr | Clsr),
+              [Cop (Clsl, [x; Cconst_int (left, _)], _); Cconst_int (right, _)],
+              _ )
+          when 0 <= right && right <= left && left <= unused_bits ->
+          low_bits ~bits (lsl_const x (left - right) dbg) ~dbg
+        | x -> (
+          match get_const_bitmask x with
+          | Some (x, bitmask) when does_mask_ignore_low_bits bitmask ->
+            low_bits ~bits x ~dbg
+          | _ -> x))
+      x
+
+(** [zero_extend ~bits dbg e] returns [e] with the most significant [arch_bits - bits]
+    bits set to 0 *)
+let zero_extend ~bits ~dbg e =
+  assert (0 < bits && bits <= arch_bits);
+  let mask = Nativeint.pred (Nativeint.shift_left 1n bits) in
+  let zero_extend_via_mask e =
+    Cop (Cand, [e; natint_const_untagged dbg mask], dbg)
+  in
+  if bits = arch_bits
+  then e
+  else
+    map_tail
+      (function
+        | Cop (Cload { memory_chunk; mutability; is_atomic }, args, dbg) as e
+          -> (
+          let load memory_chunk =
+            Cop (Cload { memory_chunk; mutability; is_atomic }, args, dbg)
+          in
+          match memory_chunk, bits with
+          | (Byte_signed | Byte_unsigned), 8 -> load Byte_unsigned
+          | (Sixteen_signed | Sixteen_unsigned), 16 -> load Sixteen_unsigned
+          | (Thirtytwo_signed | Thirtytwo_unsigned), 32 ->
+            load Thirtytwo_unsigned
+          | _ -> zero_extend_via_mask e)
+        | e -> zero_extend_via_mask e)
+      (low_bits ~bits e ~dbg)
+
+let sign_extend ~bits ~dbg e =
+  assert (0 < bits && bits <= arch_bits);
+  let unused_bits = arch_bits - bits in
+  let sign_extend_via_shift e =
+    asr_const (lsl_const e unused_bits dbg) unused_bits dbg
+  in
+  if bits = arch_bits
+  then e
+  else
+    map_tail
+      (function
+        | Cop ((Casr | Clsr), [inner; Cconst_int (n, _)], _) as e
+          when 0 <= n && n < arch_bits ->
+          (* see middle_end/flambda2/z3/sign_extension.py for proof *)
+          if n > unused_bits
+          then (* already sign-extended *) e
+          else
+            let e = lsl_const inner (unused_bits - n) dbg in
+            asr_const e unused_bits dbg
+        | Cop (Cload { memory_chunk; mutability; is_atomic }, args, dbg) as e
+          -> (
+          let load memory_chunk =
+            Cop (Cload { memory_chunk; mutability; is_atomic }, args, dbg)
+          in
+          match memory_chunk, bits with
+          | (Byte_signed | Byte_unsigned), 8 -> load Byte_signed
+          | (Sixteen_signed | Sixteen_unsigned), 16 -> load Sixteen_signed
+          | (Thirtytwo_signed | Thirtytwo_unsigned), 32 -> load Thirtytwo_signed
+          | _ -> sign_extend_via_shift e)
+        | e -> sign_extend_via_shift e)
+      (low_bits ~bits e ~dbg)
 
 let unboxed_packed_array_ref arr index dbg ~memory_chunk ~elements_per_word =
   bind "arr" arr (fun arr ->
@@ -1335,18 +1405,19 @@ let unboxed_int32_array_ref =
 let unboxed_mutable_int32_unboxed_product_array_ref arr ~array_index dbg =
   bind "arr" arr (fun arr ->
       bind "index" array_index (fun index ->
-          sign_extend_32 dbg
+          sign_extend ~bits:32
             (Cop
                ( mk_load_mut Thirtytwo_signed,
                  [array_indexing log2_size_addr arr index dbg],
-                 dbg ))))
+                 dbg ))
+            ~dbg))
 
 let unboxed_mutable_int32_unboxed_product_array_set arr ~array_index ~new_value
     dbg =
   bind "arr" arr (fun arr ->
       bind "index" array_index (fun index ->
           bind "new_value" new_value (fun new_value ->
-              let new_value = sign_extend_32 dbg new_value in
+              let new_value = sign_extend ~bits:32 new_value ~dbg in
               Cop
                 ( Cstore (Word_int, Assignment),
                   [array_indexing log2_size_addr arr index dbg; new_value],
@@ -1448,7 +1519,7 @@ let set_field_unboxed ~dbg memory_chunk block ~index_in_words newval =
     let field_address =
       array_indexing log2_size_addr block index_in_words dbg
     in
-    let newval = if size_in_bytes = 4 then low_32 dbg newval else newval in
+    let newval = low_bits newval ~dbg ~bits:(8 * size_in_bytes) in
     return_unit dbg
       (Cop (Cstore (memory_chunk, Assignment), [field_address; newval], dbg))
 
@@ -1647,16 +1718,12 @@ let call_cached_method obj tag cache pos args args_type result (apos, mode) dbg
 
 (* Allocation *)
 
-(* CR layouts 5.1: When we pack int32s/float32s more efficiently, this code will
-   need to change. *)
+(* CR layouts 5.1: When we pack int8/16/32s/float32s more efficiently, this code
+   will need to change. *)
 let memory_chunk_size_in_words_for_mixed_block = function
-  | (Byte_unsigned | Byte_signed | Sixteen_unsigned | Sixteen_signed) as
-    memory_chunk ->
-    Misc.fatal_errorf
-      "Fields with memory chunk %s are not allowed in mixed blocks"
-      (Printcmm.chunk memory_chunk)
+  | Byte_unsigned | Byte_signed | Sixteen_unsigned | Sixteen_signed
   | Thirtytwo_unsigned | Thirtytwo_signed ->
-    (* Int32s are currently stored using a whole word *)
+    (* small integers are currently stored using a whole word *)
     1
   | Single _ | Double ->
     (* Float32s are currently stored using a whole word *)
@@ -1680,8 +1747,9 @@ let alloc_generic_set_fn block ofs newval memory_chunk dbg =
     addr_array_initialize block ofs newval dbg
   | Word_int -> generic_case ()
   (* Generic cases that may differ under big endian archs *)
-  | Single _ | Double | Thirtytwo_unsigned | Thirtytwo_signed
-  | Onetwentyeight_unaligned | Onetwentyeight_aligned ->
+  | Byte_unsigned | Byte_signed | Sixteen_unsigned | Sixteen_signed | Single _
+  | Double | Thirtytwo_unsigned | Thirtytwo_signed | Onetwentyeight_unaligned
+  | Onetwentyeight_aligned ->
     if Arch.big_endian
     then
       Misc.fatal_errorf
@@ -1689,11 +1757,6 @@ let alloc_generic_set_fn block ofs newval memory_chunk dbg =
          architectures"
         (Printcmm.chunk memory_chunk);
     generic_case ()
-  (* Forbidden cases *)
-  | Byte_unsigned | Byte_signed | Sixteen_unsigned | Sixteen_signed ->
-    Misc.fatal_errorf
-      "Fields with memory_chunk %s are not supported in generic allocations"
-      (Printcmm.chunk memory_chunk)
 
 let make_alloc_generic ~block_kind ~mode ~alloc_block_kind dbg tag wordsize args
     args_memory_chunks =
@@ -1793,8 +1856,7 @@ let make_mixed_alloc ~mode dbg ~tag ~value_prefix_size args args_memory_chunks =
           (* regular scanned part of a block *)
           match memory_chunk with
           | Word_int | Word_val -> ok ()
-          | Byte_unsigned | Byte_signed | Sixteen_unsigned | Sixteen_signed ->
-            error "mixed blocks"
+          | Byte_unsigned | Byte_signed | Sixteen_unsigned | Sixteen_signed
           | Thirtytwo_unsigned | Thirtytwo_signed | Single _ | Double
           | Onetwentyeight_unaligned | Onetwentyeight_aligned ->
             error "the value prefix of a mixed block"
@@ -1802,10 +1864,9 @@ let make_mixed_alloc ~mode dbg ~tag ~value_prefix_size args args_memory_chunks =
           (* flat suffix part of the block *)
           match memory_chunk with
           | Word_int | Thirtytwo_unsigned | Thirtytwo_signed | Double
-          | Onetwentyeight_unaligned | Onetwentyeight_aligned | Single _ ->
-            ok ()
+          | Onetwentyeight_unaligned | Onetwentyeight_aligned | Single _
           | Byte_unsigned | Byte_signed | Sixteen_unsigned | Sixteen_signed ->
-            error "mixed blocks"
+            ok ()
           | Word_val -> error "the flat suffix of a mixed block")
       0 args_memory_chunks
   in
@@ -1896,107 +1957,6 @@ let bigarray_word_kind : Lambda.bigarray_kind -> memory_chunk = function
   | Pbigarray_complex32 -> Single { reg = Float64 }
   | Pbigarray_complex64 -> Double
 
-(* the three functions below assume 64-bit words *)
-let () = assert (size_int = 8)
-
-let check_64_bit_target func =
-  if size_int <> 8
-  then
-    Misc.fatal_errorf
-      "Cmm helpers function %s can only be used on 64-bit targets" func
-
-(* Like [low_32] but for 63-bit integers held in 64-bit registers. *)
-(* CR gbury: Why not use Cmm.map_tail here ? It seems designed for that kind of
-   thing (and covers more cases than just Clet). *)
-let rec low_63 dbg e =
-  check_64_bit_target "low_63";
-  match e with
-  | Cop (Casr, [Cop (Clsl, [x; Cconst_int (1, _)], _); Cconst_int (1, _)], _) ->
-    low_63 dbg x
-  | Cop (Cand, [x; Cconst_natint (0x7FFF_FFFF_FFFF_FFFFn, _)], _) ->
-    low_63 dbg x
-  | Clet (id, x, body) -> Clet (id, x, low_63 dbg body)
-  | _ -> e
-
-(* CR-someday mshinwell/gbury: sign_extend_63 then tag_int should simplify to
-   just tag_int. *)
-let sign_extend_63 dbg e =
-  check_64_bit_target "sign_extend_63";
-  match e with
-  | Cop (Casr, [_; Cconst_int (n, _)], _) when n > 0 && n < 64 ->
-    (* [asr] by a positive constant is sign-preserving. However:
-
-       - Some architectures treat the shift length modulo the word size.
-
-       - OCaml does not define behavior of shifts by more than the word size.
-
-       So we don't make the simplification for shifts of length 64 or more. *)
-    e
-  | _ ->
-    let e = low_63 dbg e in
-    Cop
-      ( Casr,
-        [Cop (Clsl, [e; Cconst_int (1, dbg)], dbg); Cconst_int (1, dbg)],
-        dbg )
-
-(* zero_extend_32 zero-extends values from 32 bits to the word size. *)
-let zero_extend_32 dbg e =
-  (* CR mshinwell for gbury: same question as above *)
-  match low_32 dbg e with
-  | Cop
-      ( Cload
-          { memory_chunk = Thirtytwo_signed | Thirtytwo_unsigned;
-            mutability;
-            is_atomic
-          },
-        args,
-        dbg ) ->
-    Cop
-      ( Cload { memory_chunk = Thirtytwo_unsigned; mutability; is_atomic },
-        args,
-        dbg )
-  | e -> Cop (Cand, [e; natint_const_untagged dbg 0xFFFFFFFFn], dbg)
-
-let zero_extend_63 dbg e =
-  check_64_bit_target "zero_extend_63";
-  let e = low_63 dbg e in
-  Cop (Cand, [e; natint_const_untagged dbg 0x7FFF_FFFF_FFFF_FFFFn], dbg)
-
-let zero_extend ~bits ~dbg e =
-  assert (0 < bits && bits <= arch_bits);
-  if bits = arch_bits
-  then e
-  else
-    match bits with
-    | 63 -> zero_extend_63 dbg e
-    | 32 -> zero_extend_32 dbg e
-    | bits -> Misc.fatal_errorf "zero_extend not implemented for %d bits" bits
-
-let sign_extend ~bits ~dbg e =
-  assert (0 < bits && bits <= arch_bits);
-  if bits = arch_bits
-  then e
-  else
-    match bits with
-    | 63 -> sign_extend_63 dbg e
-    | 32 -> sign_extend_32 dbg e
-    | bits -> Misc.fatal_errorf "sign_extend not implemented for %d bits" bits
-
-let low_bits ~bits ~(dbg : Debuginfo.t) e =
-  assert (0 < bits && bits <= arch_bits);
-  if bits = arch_bits
-  then e
-  else
-    match bits with
-    | 63 -> low_63 dbg e
-    | 32 -> low_32 dbg e
-    | bits -> Misc.fatal_errorf "low_bits not implemented for %d bits" bits
-
-let ignore_low_bits ~bits ~dbg:(_ : Debuginfo.t) e =
-  if bits = 1
-  then ignore_low_bit_int e
-  else Misc.fatal_error "ignore_low_bits expected bits=1 for now"
-
 let and_int e1 e2 dbg =
   let is_mask32 = function
     | Cconst_natint (0xFFFF_FFFFn, _) -> true
@@ -2004,8 +1964,8 @@ let and_int e1 e2 dbg =
     | _ -> false
   in
   match e1, e2 with
-  | e, m when is_mask32 m -> zero_extend_32 dbg e
-  | m, e when is_mask32 m -> zero_extend_32 dbg e
+  | e, m when is_mask32 m -> zero_extend ~bits:32 e ~dbg
+  | m, e when is_mask32 m -> zero_extend ~bits:32 e ~dbg
   | e1, e2 -> Cop (Cand, [e1; e2], dbg)
 
 let or_int e1 e2 dbg = Cop (Cor, [e1; e2], dbg)
@@ -2033,9 +1993,7 @@ let box_int_gen dbg (bi : Primitive.boxed_integer) mode arg =
   let arg' =
     if bi = Primitive.Boxed_int32
     then
-      if big_endian
-      then Cop (Clsl, [arg; Cconst_int (32, dbg)], dbg)
-      else sign_extend_32 dbg arg
+      if big_endian then lsl_const arg 32 dbg else sign_extend ~bits:32 arg ~dbg
     else arg
   in
   Cop
@@ -2079,12 +2037,12 @@ let unbox_int dbg bi =
       when bi = Primitive.Boxed_int32 && big_endian
            && alloc_matches_boxed_int bi ~hdr ~ops ->
       (* Force sign-extension of low 32 bits *)
-      sign_extend_32 dbg contents
+      sign_extend ~bits:32 contents ~dbg
     | Cop (Calloc _, [hdr; ops; contents], _dbg)
       when bi = Primitive.Boxed_int32 && (not big_endian)
            && alloc_matches_boxed_int bi ~hdr ~ops ->
       (* Force sign-extension of low 32 bits *)
-      sign_extend_32 dbg contents
+      sign_extend ~bits:32 contents ~dbg
     | Cop (Calloc _, [hdr; ops; contents], _dbg)
       when alloc_matches_boxed_int bi ~hdr ~ops ->
       contents
@@ -2100,7 +2058,7 @@ let unbox_int dbg bi =
     | cmm -> default cmm)
 
 let make_unsigned_int bi arg dbg =
-  if bi = Primitive.Unboxed_int32 then zero_extend_32 dbg arg else arg
+  if bi = Primitive.Unboxed_int32 then zero_extend ~bits:32 arg ~dbg else arg
 
 let unaligned_load_16 ptr idx dbg =
   if Arch.allow_unaligned_access
@@ -4315,7 +4273,7 @@ let make_unboxed_int32_array_payload dbg unboxed_int32_list =
           ( Cor,
             [ (* [a] is sign-extended by default. We need to change it to be
                  zero-extended for the `or` operation to be correct. *)
-              zero_extend_32 dbg a;
+              zero_extend ~bits:32 a ~dbg;
               Cop (Clsl, [b; Cconst_int (32, dbg)], dbg) ],
             dbg )
       in
diff --git a/backend/cmm_helpers.mli b/backend/cmm_helpers.mli
index 3381191f5b5..dea4644ab67 100644
--- a/backend/cmm_helpers.mli
+++ b/backend/cmm_helpers.mli
@@ -375,8 +375,9 @@ val bigarray_word_kind : Lambda.bigarray_kind -> memory_chunk
 
 (** Operations on n-bit integers *)
 
-(** Simplify the given expression knowing low [bits] bits will be irrelevant *)
-val ignore_low_bits : bits:int -> dbg:Debuginfo.t -> expression -> expression
+(** Simplify the given expression knowing the low bit of the argument will be irrelevant
+*)
+val ignore_low_bit_int : expression -> expression
 
 (** Simplify the given expression knowing that bits other than the low [bits] bits will be
     irrelevant *)
@@ -700,7 +701,7 @@ val create_ccatch :
 (** Shift operations.
     Inputs: a tagged caml integer and an untagged machine integer.
     Outputs: a tagged caml integer.
-    Take as first argument a tagged caml integer, and as
+    Takes as first argument a tagged caml integer, and as
     second argument an untagged machine intger which is the amount to shift the
     first argument by. *)
 
diff --git a/backend/printcmm.ml b/backend/printcmm.ml
index 15d6b27cf72..712fe3ee54a 100644
--- a/backend/printcmm.ml
+++ b/backend/printcmm.ml
@@ -49,6 +49,8 @@ let machtype ppf mty =
 
 let exttype ppf = function
   | XInt -> fprintf ppf "int"
+  | XInt8 -> fprintf ppf "int8"
+  | XInt16 -> fprintf ppf "int16"
   | XInt32 -> fprintf ppf "int32"
   | XInt64 -> fprintf ppf "int64"
   | XFloat -> fprintf ppf "float"
diff --git a/middle_end/flambda2/to_cmm/to_cmm_expr.ml b/middle_end/flambda2/to_cmm/to_cmm_expr.ml
index 0cdb343ac8a..bff2a7e27fd 100644
--- a/middle_end/flambda2/to_cmm/to_cmm_expr.ml
+++ b/middle_end/flambda2/to_cmm/to_cmm_expr.ml
@@ -110,9 +110,9 @@ let translate_external_call env res ~free_vars apply ~callee_simple ~args
        2. All of the [machtype_component]s are singleton arrays. *)
     Array.map (fun machtype -> [| machtype |]) return_ty
   in
-  (* Returned int32 values need to be sign_extended because it's not clear
-     whether C code that returns an int32 returns one that is sign extended or
-     not. There is no need to wrap other return arities. *)
+  (* Returned small integer values need to be sign-extended because it's not
+     clear whether C code that returns a small integer returns one that is sign
+     extended or not. There is no need to wrap other return arities. *)
   let maybe_sign_extend kind dbg cmm =
     match Flambda_kind.With_subkind.kind kind with
     | Naked_number Naked_int32 -> C.sign_extend ~bits:32 ~dbg cmm
diff --git a/middle_end/flambda2/to_cmm/to_cmm_primitive.ml b/middle_end/flambda2/to_cmm/to_cmm_primitive.ml
index 4a650024cc6..bd4e8faf02b 100644
--- a/middle_end/flambda2/to_cmm/to_cmm_primitive.ml
+++ b/middle_end/flambda2/to_cmm/to_cmm_primitive.ml
@@ -742,7 +742,7 @@ let binary_int_shift_primitive _env dbg kind op x y =
   | (Naked_int64 | Naked_nativeint), Asr -> C.asr_int x y dbg
 
 let binary_int_comp_primitive _env dbg kind cmp x y =
-  let ignore_low_bit_int = C.ignore_low_bits ~bits:1 ~dbg in
+  let ignore_low_bit_int = C.ignore_low_bit_int in
   match
     ( (kind : Flambda_kind.Standard_int.t),
       (cmp : P.signed_or_unsigned P.comparison) )
diff --git a/middle_end/flambda2/to_cmm/to_cmm_shared.ml b/middle_end/flambda2/to_cmm/to_cmm_shared.ml
index 7da2054eee5..ee353b7f14a 100644
--- a/middle_end/flambda2/to_cmm/to_cmm_shared.ml
+++ b/middle_end/flambda2/to_cmm/to_cmm_shared.ml
@@ -205,7 +205,7 @@ let name_static res name =
     ~symbol:(fun s ->
       `Static_data [symbol_address (To_cmm_result.symbol res s)])
 
-let const_static cst =
+let const_static cst : Cmm.data_item list =
   match Reg_width_const.descr cst with
   | Naked_immediate i ->
     [cint (nativeint_of_targetint (Targetint_31_63.to_targetint i))]
@@ -306,6 +306,8 @@ module Update_kind = struct
   type kind =
     | Pointer
     | Immediate
+    | Naked_int8
+    | Naked_int16
     | Naked_int32
     | Naked_int64
     | Naked_float
@@ -329,8 +331,8 @@ module Update_kind = struct
 
   let field_size_in_words t =
     match t.kind with
-    | Pointer | Immediate | Naked_int32 | Naked_int64 | Naked_float
-    | Naked_float32 ->
+    | Pointer | Immediate | Naked_int8 | Naked_int16 | Naked_int32 | Naked_int64
+    | Naked_float | Naked_float32 ->
       1
     | Naked_vec128 -> 2
 
@@ -348,6 +350,10 @@ module Update_kind = struct
 
   let naked_vec128s = { kind = Naked_vec128; stride = 16 }
 
+  let naked_int8_fields = { kind = Naked_int8; stride = Arch.size_addr }
+
+  let naked_int16_fields = { kind = Naked_int16; stride = Arch.size_addr }
+
   let naked_int32_fields = { kind = Naked_int32; stride = Arch.size_addr }
 
   let naked_float32_fields = { kind = Naked_float32; stride = Arch.size_addr }
@@ -372,8 +378,8 @@ let make_update env res dbg ({ kind; stride } : Update_kind.t) ~symbol var
         | Immediate ->
           (* See [caml_initialize]; we can avoid this function in this case. *)
           None
-        | Naked_int32 | Naked_int64 | Naked_float | Naked_float32 | Naked_vec128
-          ->
+        | Naked_int8 | Naked_int16 | Naked_int32 | Naked_int64 | Naked_float
+        | Naked_float32 | Naked_vec128 ->
           (* The GC never sees these fields, so we can avoid using
              [caml_initialize]. This is important as it significantly reduces
              the complexity of the statically-allocated inconstant unboxed int32
@@ -389,12 +395,17 @@ let make_update env res dbg ({ kind; stride } : Update_kind.t) ~symbol var
         match kind with
         | Pointer -> Word_val
         | Immediate -> Word_int
+        | Naked_int8 | Naked_int16 ->
+          (* CR layouts v5.1: we only support small integers in being
+             sign-extended in word fields *)
+          assert (stride = Arch.size_addr);
+          Word_int
         | Naked_int32 ->
           (* Cmm expressions representing int32 values are always sign extended.
              By using [Word_int] in the "fields" cases (see [Update_kind],
              above) we maintain the convention that 32-bit integers in 64-bit
              fields are sign extended. *)
-          if stride > 4 then Word_int else Thirtytwo_signed
+          if stride = Arch.size_addr then Word_int else Thirtytwo_signed
         | Naked_int64 -> Word_int
         | Naked_float -> Double
         | Naked_float32 ->
diff --git a/middle_end/flambda2/to_cmm/to_cmm_shared.mli b/middle_end/flambda2/to_cmm/to_cmm_shared.mli
index f10d1d61744..eb90428fbf7 100644
--- a/middle_end/flambda2/to_cmm/to_cmm_shared.mli
+++ b/middle_end/flambda2/to_cmm/to_cmm_shared.mli
@@ -113,6 +113,12 @@ module Update_kind : sig
 
   val tagged_immediates : t
 
+  (** Assumes each field is a word; the byte offset is [index * size_addr]. *)
+  val naked_int8_fields : t
+
+  (** Assumes each field is a word; the byte offset is [index * size_addr]. *)
+  val naked_int16_fields : t
+
   (** Tightly packed; the byte offset is [index * 4].  ([index] is as for
       [make_update], below.) *)
   val naked_int32s : t

From e0b8691fb40be409893bb40652ffeb84a5e21e6e Mon Sep 17 00:00:00 2001
From: Jacob Van Buren <jvanburen@janestreet.com>
Date: Tue, 18 Feb 2025 12:21:44 -0500
Subject: [PATCH 3/6] removed small int parts

---
 backend/arm64/cfg_selection.ml               |  2 +-
 backend/arm64/proc.ml                        |  2 +-
 backend/arm64/selection.ml                   |  7 +----
 backend/cmm.ml                               | 33 +++++++++-----------
 backend/cmm.mli                              |  2 --
 backend/printcmm.ml                          |  2 --
 middle_end/flambda2/to_cmm/to_cmm_shared.ml  | 19 +++--------
 middle_end/flambda2/to_cmm/to_cmm_shared.mli |  6 ----
 8 files changed, 22 insertions(+), 51 deletions(-)

diff --git a/backend/arm64/cfg_selection.ml b/backend/arm64/cfg_selection.ml
index f0d01d0e1a1..a08fece7ab1 100644
--- a/backend/arm64/cfg_selection.ml
+++ b/backend/arm64/cfg_selection.ml
@@ -171,7 +171,7 @@ class selector =
     method! insert_move_extcall_arg env ty_arg src dst =
       let ty_arg_is_int32 =
         match ty_arg with
-        | XInt8 | XInt16 | XInt32 -> true
+        | XInt32 -> true
         | XInt | XInt64 | XFloat32 | XFloat | XVec128 -> false
       in
       if macosx && ty_arg_is_int32 && is_stack_slot dst
diff --git a/backend/arm64/proc.ml b/backend/arm64/proc.ml
index 4d20f3ad06d..a3244782404 100644
--- a/backend/arm64/proc.ml
+++ b/backend/arm64/proc.ml
@@ -297,7 +297,7 @@ let external_calling_conventions
     begin match ty_arg with
     | XInt | XInt64 ->
         loc.(i) <- [| loc_int last_int make_stack int ofs |]
-    | XInt32 | XInt16 | XInt8 ->
+    | XInt32 ->
         loc.(i) <- [| loc_int32 last_int make_stack int ofs |]
     | XFloat ->
         loc.(i) <- [| loc_float last_float make_stack float ofs |]
diff --git a/backend/arm64/selection.ml b/backend/arm64/selection.ml
index 915cd8266fd..ee57ee21c77 100644
--- a/backend/arm64/selection.ml
+++ b/backend/arm64/selection.ml
@@ -157,12 +157,7 @@ class selector =
       | _ -> super#select_operation op args dbg
 
     method! insert_move_extcall_arg env ty_arg src dst =
-      let ty_arg_is_int32 =
-        match ty_arg with
-        | XInt8 | XInt16 | XInt32 -> true
-        | XInt | XInt64 | XFloat32 | XFloat | XVec128 -> false
-      in
-      if macosx && ty_arg_is_int32 && is_stack_slot dst
+      if macosx && ty_arg = XInt32 && is_stack_slot dst
       then self#insert env (Iop (Ispecific Imove32)) src dst
       else self#insert_moves env src dst
   end
diff --git a/backend/cmm.ml b/backend/cmm.ml
index af61fcab9ee..fcfae698f87 100644
--- a/backend/cmm.ml
+++ b/backend/cmm.ml
@@ -1,4 +1,3 @@
-# 1 "backend/cmm.ml"
 (**************************************************************************)
 (*                                                                        *)
 (*                                 OCaml                                  *)
@@ -108,8 +107,6 @@ let ge_component comp1 comp2 =
 
 type exttype =
   | XInt
-  | XInt8
-  | XInt16
   | XInt32
   | XInt64
   | XFloat32
@@ -118,8 +115,6 @@ type exttype =
 
 let machtype_of_exttype = function
   | XInt -> typ_int
-  | XInt8 -> typ_int
-  | XInt16 -> typ_int
   | XInt32 -> typ_int
   | XInt64 -> typ_int
   | XFloat -> typ_float
@@ -616,19 +611,21 @@ let equal_machtype_component (left : machtype_component) (right : machtype_compo
   | Float32, (Val | Addr | Int | Float | Vec128 | Valx2) ->
     false
 
-let equal_exttype
-      ((XInt
-       | XInt8
-       | XInt16
-       | XInt32
-       | XInt64
-       | XFloat32
-       | XFloat
-       | XVec128) as left)
-      right
-  =
-  (* we can use polymorphic compare as long as exttype is all constant constructors *)
-  left = right
+let equal_exttype left right =
+  match left, right with
+  | XInt, XInt -> true
+  | XInt32, XInt32 -> true
+  | XInt64, XInt64 -> true
+  | XFloat32, XFloat32 -> true
+  | XFloat, XFloat -> true
+  | XVec128, XVec128 -> true
+  | XInt, (XInt32 | XInt64 | XFloat | XFloat32 | XVec128)
+  | XInt32, (XInt | XInt64 | XFloat | XFloat32 | XVec128)
+  | XInt64, (XInt | XInt32 | XFloat | XFloat32 | XVec128)
+  | XFloat, (XInt | XInt32 | XFloat32 | XInt64 | XVec128)
+  | XVec128, (XInt | XInt32 | XInt64 | XFloat | XFloat32)
+  | XFloat32, (XInt | XInt32 | XInt64 | XFloat | XVec128) ->
+    false
 
 let equal_vec128_type v1 v2 =
   match v1, v2 with
diff --git a/backend/cmm.mli b/backend/cmm.mli
index 372cf5759bc..6e9d0f4b3e8 100644
--- a/backend/cmm.mli
+++ b/backend/cmm.mli
@@ -75,8 +75,6 @@ val ge_component
 
 type exttype =
   | XInt                                (**r OCaml value, word-sized integer *)
-  | XInt8                               (**r 8-bit integer *)
-  | XInt16                              (**r 16-bit integer  *)
   | XInt32                              (**r 32-bit integer *)
   | XInt64                              (**r 64-bit integer  *)
   | XFloat32                            (**r single-precision FP number *)
diff --git a/backend/printcmm.ml b/backend/printcmm.ml
index 712fe3ee54a..15d6b27cf72 100644
--- a/backend/printcmm.ml
+++ b/backend/printcmm.ml
@@ -49,8 +49,6 @@ let machtype ppf mty =
 
 let exttype ppf = function
   | XInt -> fprintf ppf "int"
-  | XInt8 -> fprintf ppf "int8"
-  | XInt16 -> fprintf ppf "int16"
   | XInt32 -> fprintf ppf "int32"
   | XInt64 -> fprintf ppf "int64"
   | XFloat -> fprintf ppf "float"
diff --git a/middle_end/flambda2/to_cmm/to_cmm_shared.ml b/middle_end/flambda2/to_cmm/to_cmm_shared.ml
index ee353b7f14a..a1e7e069935 100644
--- a/middle_end/flambda2/to_cmm/to_cmm_shared.ml
+++ b/middle_end/flambda2/to_cmm/to_cmm_shared.ml
@@ -306,8 +306,6 @@ module Update_kind = struct
   type kind =
     | Pointer
     | Immediate
-    | Naked_int8
-    | Naked_int16
     | Naked_int32
     | Naked_int64
     | Naked_float
@@ -331,8 +329,8 @@ module Update_kind = struct
 
   let field_size_in_words t =
     match t.kind with
-    | Pointer | Immediate | Naked_int8 | Naked_int16 | Naked_int32 | Naked_int64
-    | Naked_float | Naked_float32 ->
+    | Pointer | Immediate | Naked_int32 | Naked_int64 | Naked_float
+    | Naked_float32 ->
       1
     | Naked_vec128 -> 2
 
@@ -350,10 +348,6 @@ module Update_kind = struct
 
   let naked_vec128s = { kind = Naked_vec128; stride = 16 }
 
-  let naked_int8_fields = { kind = Naked_int8; stride = Arch.size_addr }
-
-  let naked_int16_fields = { kind = Naked_int16; stride = Arch.size_addr }
-
   let naked_int32_fields = { kind = Naked_int32; stride = Arch.size_addr }
 
   let naked_float32_fields = { kind = Naked_float32; stride = Arch.size_addr }
@@ -378,8 +372,8 @@ let make_update env res dbg ({ kind; stride } : Update_kind.t) ~symbol var
         | Immediate ->
           (* See [caml_initialize]; we can avoid this function in this case. *)
           None
-        | Naked_int8 | Naked_int16 | Naked_int32 | Naked_int64 | Naked_float
-        | Naked_float32 | Naked_vec128 ->
+        | Naked_int32 | Naked_int64 | Naked_float | Naked_float32 | Naked_vec128
+          ->
           (* The GC never sees these fields, so we can avoid using
              [caml_initialize]. This is important as it significantly reduces
              the complexity of the statically-allocated inconstant unboxed int32
@@ -395,11 +389,6 @@ let make_update env res dbg ({ kind; stride } : Update_kind.t) ~symbol var
         match kind with
         | Pointer -> Word_val
         | Immediate -> Word_int
-        | Naked_int8 | Naked_int16 ->
-          (* CR layouts v5.1: we only support small integers in being
-             sign-extended in word fields *)
-          assert (stride = Arch.size_addr);
-          Word_int
         | Naked_int32 ->
           (* Cmm expressions representing int32 values are always sign extended.
              By using [Word_int] in the "fields" cases (see [Update_kind],
diff --git a/middle_end/flambda2/to_cmm/to_cmm_shared.mli b/middle_end/flambda2/to_cmm/to_cmm_shared.mli
index eb90428fbf7..f10d1d61744 100644
--- a/middle_end/flambda2/to_cmm/to_cmm_shared.mli
+++ b/middle_end/flambda2/to_cmm/to_cmm_shared.mli
@@ -113,12 +113,6 @@ module Update_kind : sig
 
   val tagged_immediates : t
 
-  (** Assumes each field is a word; the byte offset is [index * size_addr]. *)
-  val naked_int8_fields : t
-
-  (** Assumes each field is a word; the byte offset is [index * size_addr]. *)
-  val naked_int16_fields : t
-
   (** Tightly packed; the byte offset is [index * 4].  ([index] is as for
       [make_update], below.) *)
   val naked_int32s : t

From a2f97a916dadcac9512e09fc5c63b38f22a482be Mon Sep 17 00:00:00 2001
From: Jacob Van Buren <jvanburen@janestreet.com>
Date: Thu, 27 Feb 2025 16:51:24 -0500
Subject: [PATCH 4/6] committed sign-extension proof

---
 backend/cmm_helpers.ml                   |  8 +--
 middle_end/flambda2/z3/sign_extension.py | 83 ++++++++++++++++++++++++
 2 files changed, 87 insertions(+), 4 deletions(-)
 create mode 100755 middle_end/flambda2/z3/sign_extension.py

diff --git a/backend/cmm_helpers.ml b/backend/cmm_helpers.ml
index e8dee907801..9abcd73a98f 100644
--- a/backend/cmm_helpers.ml
+++ b/backend/cmm_helpers.ml
@@ -1290,7 +1290,7 @@ let get_const_bitmask = function
     Some (x, Nativeint.of_int mask)
   | _ -> None
 
-(** [low_bits ~bits x] is a (hopefully simplified) value which agrees with x on at least
+(** [low_bits ~bits x] is a (potentially simplified) value which agrees with x on at least
     the low [bits] bits. E.g., [low_bits ~bits x & mask = x & mask], where [mask] is a
     bitmask of the low [bits] bits . *)
 let rec low_bits ~bits ~dbg x =
@@ -1299,13 +1299,13 @@ let rec low_bits ~bits ~dbg x =
   then x
   else
     let unused_bits = arch_bits - bits in
-    let does_mask_ignore_low_bits test_mask =
+    let does_mask_keep_low_bits test_mask =
       (* If the mask has all the low bits set, then the low bits are unchanged.
          This could happen from zero-extension. *)
       let mask = Nativeint.pred (Nativeint.shift_left 1n bits) in
       Nativeint.equal mask (Nativeint.logand test_mask mask)
     in
-    (* Ignore sign and zero extensions, which do not affect the low bits *)
+    (* Ignore sign and zero extensions which do not affect the low bits *)
     map_tail
       (function
         | Cop
@@ -1316,7 +1316,7 @@ let rec low_bits ~bits ~dbg x =
           low_bits ~bits (lsl_const x (left - right) dbg) ~dbg
         | x -> (
           match get_const_bitmask x with
-          | Some (x, bitmask) when does_mask_ignore_low_bits bitmask ->
+          | Some (x, bitmask) when does_mask_keep_low_bits bitmask ->
             low_bits ~bits x ~dbg
           | _ -> x))
       x
diff --git a/middle_end/flambda2/z3/sign_extension.py b/middle_end/flambda2/z3/sign_extension.py
new file mode 100755
index 00000000000..6613558072d
--- /dev/null
+++ b/middle_end/flambda2/z3/sign_extension.py
@@ -0,0 +1,83 @@
+#!/usr/bin/env python3
+from z3 import *
+
+ARCH_BITS = 64
+
+
+class Op:
+    def __init__(self, name):
+        self.inner = BitVec(f"{name}.inner", ARCH_BITS)
+        self.shift_right = BitVec(f"{name}.shift_right", ARCH_BITS)
+        self.arith = Bool(f"{name}.arith")
+
+    def as_ast(self) -> BitVecRef:
+        return If(
+            self.arith,
+            self.inner >> self.shift_right,
+            LShR(self.inner, self.shift_right),
+        )
+
+    def __repr__(self):
+        return repr(self.as_ast())
+
+    def size(self):
+        return self.as_ast().size()
+
+    def reference_sign_extend(self, bits):
+        unused_bits = self.size() - bits
+        return (self.as_ast() << unused_bits) >> unused_bits
+
+    def experimental_sign_extend(self, bits) -> BitVecRef:
+        unused_bits = self.size() - bits
+        return If(
+            self.shift_right > unused_bits,
+            self.as_ast(),
+            (self.inner << (unused_bits - self.shift_right)) >> unused_bits,
+        )
+
+
+if __name__ == "__main__":
+    s = Solver()
+
+    x = Op("x")
+    bits = BitVec("bits", ARCH_BITS)  # Number of low bits to preserve
+
+    # assumptions
+    s.add(And(0 <= x.shift_right, x.shift_right < x.size()))
+    s.add(And(0 < bits, bits <= x.size()))
+
+    # sanity check that we haven't introduced something crazy
+    assert s.check() == sat
+
+    # falsify this
+    s.add(x.reference_sign_extend(bits) != x.experimental_sign_extend(bits))
+
+    print(s.to_smt2())
+
+    print("Verifying sign_extend...")
+    if s.check() == unsat:
+        print("sign_extend optimization is correct.")
+    else:
+        print("sign_extend is incorrect.")
+        model = s.model()
+        print("Counterexample:", model)
+        exit(1)
+
+
+# ; benchmark generated from python API
+# (set-info :status unknown)
+# (declare-fun x.shift_right () (_ BitVec 64))
+# (declare-fun bits () (_ BitVec 64))
+# (declare-fun x.inner () (_ BitVec 64))
+# (declare-fun x.arith () Bool)
+# (assert
+#  (and (bvsge x.shift_right (_ bv0 64)) (bvslt x.shift_right (_ bv64 64))))
+# (assert
+#  (let (($x36 (bvsle bits (_ bv64 64))))
+#  (and (bvsgt bits (_ bv0 64)) $x36)))
+# (assert
+#  (let ((?x51 (bvsub (_ bv64 64) bits)))
+#  (let ((?x47 (ite x.arith (bvashr x.inner x.shift_right) (bvlshr x.inner x.shift_right))))
+#  (let ((?x66 (ite (bvsgt x.shift_right ?x51) ?x47 (bvashr (bvshl x.inner (bvsub ?x51 x.shift_right)) ?x51))))
+#  (and (distinct (bvashr (bvshl ?x47 ?x51) ?x51) ?x66) true)))))
+# (check-sat)

From a26729aa360913c36d0f91d16b606c2f9e517e03 Mon Sep 17 00:00:00 2001
From: Jacob Van Buren <jvanburen@janestreet.com>
Date: Thu, 27 Feb 2025 16:54:31 -0500
Subject: [PATCH 5/6] revert some error message changes

---
 backend/cmm_helpers.ml | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/backend/cmm_helpers.ml b/backend/cmm_helpers.ml
index 9abcd73a98f..29b56e5cdb0 100644
--- a/backend/cmm_helpers.ml
+++ b/backend/cmm_helpers.ml
@@ -1856,7 +1856,8 @@ let make_mixed_alloc ~mode dbg ~tag ~value_prefix_size args args_memory_chunks =
           (* regular scanned part of a block *)
           match memory_chunk with
           | Word_int | Word_val -> ok ()
-          | Byte_unsigned | Byte_signed | Sixteen_unsigned | Sixteen_signed
+          | Byte_unsigned | Byte_signed | Sixteen_unsigned | Sixteen_signed ->
+            error "mixed blocks"
           | Thirtytwo_unsigned | Thirtytwo_signed | Single _ | Double
           | Onetwentyeight_unaligned | Onetwentyeight_aligned ->
             error "the value prefix of a mixed block"
@@ -1864,9 +1865,10 @@ let make_mixed_alloc ~mode dbg ~tag ~value_prefix_size args args_memory_chunks =
           (* flat suffix part of the block *)
           match memory_chunk with
           | Word_int | Thirtytwo_unsigned | Thirtytwo_signed | Double
-          | Onetwentyeight_unaligned | Onetwentyeight_aligned | Single _
-          | Byte_unsigned | Byte_signed | Sixteen_unsigned | Sixteen_signed ->
+          | Onetwentyeight_unaligned | Onetwentyeight_aligned | Single _ ->
             ok ()
+          | Byte_unsigned | Byte_signed | Sixteen_unsigned | Sixteen_signed ->
+            error "mixed blocks"
           | Word_val -> error "the flat suffix of a mixed block")
       0 args_memory_chunks
   in

From ff980b36ed2a52f4863e89602bf47eec9f14fd8d Mon Sep 17 00:00:00 2001
From: Jacob Van Buren <jvanburen@janestreet.com>
Date: Thu, 27 Feb 2025 17:01:54 -0500
Subject: [PATCH 6/6] comments + reverted allowing blocks with small integers

---
 backend/cmm_helpers.ml | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/backend/cmm_helpers.ml b/backend/cmm_helpers.ml
index 29b56e5cdb0..9c8199d604c 100644
--- a/backend/cmm_helpers.ml
+++ b/backend/cmm_helpers.ml
@@ -1313,6 +1313,8 @@ let rec low_bits ~bits ~dbg x =
               [Cop (Clsl, [x; Cconst_int (left, _)], _); Cconst_int (right, _)],
               _ )
           when 0 <= right && right <= left && left <= unused_bits ->
+          (* these sign-extensions can be replaced with a left shift since we
+             don't care about the high bits that it changed *)
           low_bits ~bits (lsl_const x (left - right) dbg) ~dbg
         | x -> (
           match get_const_bitmask x with
@@ -1363,7 +1365,9 @@ let sign_extend ~bits ~dbg e =
           when 0 <= n && n < arch_bits ->
           (* see middle_end/flambda2/z3/sign_extension.py for proof *)
           if n > unused_bits
-          then (* already sign-extended *) e
+          then
+            (* sign-extension is a no-op since the top n bits already match *)
+            e
           else
             let e = lsl_const inner (unused_bits - n) dbg in
             asr_const e unused_bits dbg
@@ -1747,9 +1751,8 @@ let alloc_generic_set_fn block ofs newval memory_chunk dbg =
     addr_array_initialize block ofs newval dbg
   | Word_int -> generic_case ()
   (* Generic cases that may differ under big endian archs *)
-  | Byte_unsigned | Byte_signed | Sixteen_unsigned | Sixteen_signed | Single _
-  | Double | Thirtytwo_unsigned | Thirtytwo_signed | Onetwentyeight_unaligned
-  | Onetwentyeight_aligned ->
+  | Single _ | Double | Thirtytwo_unsigned | Thirtytwo_signed
+  | Onetwentyeight_unaligned | Onetwentyeight_aligned ->
     if Arch.big_endian
     then
       Misc.fatal_errorf
@@ -1757,6 +1760,11 @@ let alloc_generic_set_fn block ofs newval memory_chunk dbg =
          architectures"
         (Printcmm.chunk memory_chunk);
     generic_case ()
+  (* Forbidden cases *)
+  | Byte_unsigned | Byte_signed | Sixteen_unsigned | Sixteen_signed ->
+    Misc.fatal_errorf
+      "Fields with memory_chunk %s are not supported in generic allocations"
+      (Printcmm.chunk memory_chunk)
 
 let make_alloc_generic ~block_kind ~mode ~alloc_block_kind dbg tag wordsize args
     args_memory_chunks =