Skip to content

Commit 7811bd7

Browse files
authored
Refactor cmm functions to be more generic over operand width (#3404)
* unified unboxed field getters/setters. This will be useful once we have unboxed integers of different sizes * refactored cmm to handle more integer widths * committed sign-extension proof
1 parent 29c4fc1 commit 7811bd7

File tree

6 files changed

+212
-160
lines changed

6 files changed

+212
-160
lines changed

backend/cmm_helpers.ml

+119-151
Original file line numberDiff line numberDiff line change
@@ -362,6 +362,7 @@ let neg_int c dbg = sub_int (Cconst_int (0, dbg)) c dbg
362362

363363
let rec lsl_int c1 c2 dbg =
364364
match c1, c2 with
365+
| c1, Cconst_int (0, _) -> c1
365366
| Cop (Clsl, [c; Cconst_int (n1, _)], _), Cconst_int (n2, _)
366367
when n1 > 0 && n2 > 0 && n1 + n2 < size_int * 8 ->
367368
Cop (Clsl, [c; Cconst_int (n1 + n2, dbg)], dbg)
@@ -1280,35 +1281,108 @@ let addr_array_initialize arr ofs newval dbg =
12801281
[array_indexing log2_size_addr arr ofs dbg; newval],
12811282
dbg )
12821283

1283-
(* low_32 x is a value which agrees with x on at least the low 32 bits *)
1284-
let rec low_32 dbg = function
1285-
(* Ignore sign and zero extensions, which do not affect the low bits *)
1286-
| Cop (Casr, [Cop (Clsl, [x; Cconst_int (32, _)], _); Cconst_int (32, _)], _)
1287-
| Cop (Cand, [x; Cconst_natint (0xFFFFFFFFn, _)], _) ->
1288-
low_32 dbg x
1289-
| Clet (id, e, body) -> Clet (id, e, low_32 dbg body)
1290-
| x -> x
1291-
1292-
(* sign_extend_32 sign-extends values from 32 bits to the word size. *)
1293-
let sign_extend_32 dbg e =
1294-
match low_32 dbg e with
1295-
| Cop
1296-
( Cload
1297-
{ memory_chunk = Thirtytwo_unsigned | Thirtytwo_signed;
1298-
mutability;
1299-
is_atomic
1300-
},
1301-
args,
1302-
dbg ) ->
1303-
Cop
1304-
( Cload { memory_chunk = Thirtytwo_signed; mutability; is_atomic },
1305-
args,
1306-
dbg )
1307-
| e ->
1308-
Cop
1309-
( Casr,
1310-
[Cop (Clsl, [e; Cconst_int (32, dbg)], dbg); Cconst_int (32, dbg)],
1311-
dbg )
1284+
(** [get_const_bitmask x] returns [Some (y, mask)] if [x] is [y & mask] *)
1285+
let get_const_bitmask = function
1286+
| Cop (Cand, ([x; Cconst_natint (mask, _)] | [Cconst_natint (mask, _); x]), _)
1287+
->
1288+
Some (x, mask)
1289+
| Cop (Cand, ([x; Cconst_int (mask, _)] | [Cconst_int (mask, _); x]), _) ->
1290+
Some (x, Nativeint.of_int mask)
1291+
| _ -> None
1292+
1293+
(** [low_bits ~bits x] is a (potentially simplified) value which agrees with x on at least
1294+
the low [bits] bits. E.g., [low_bits ~bits x & mask = x & mask], where [mask] is a
1295+
bitmask of the low [bits] bits . *)
1296+
let rec low_bits ~bits ~dbg x =
1297+
assert (bits > 0);
1298+
if bits >= arch_bits
1299+
then x
1300+
else
1301+
let unused_bits = arch_bits - bits in
1302+
let does_mask_keep_low_bits test_mask =
1303+
(* If the mask has all the low bits set, then the low bits are unchanged.
1304+
This could happen from zero-extension. *)
1305+
let mask = Nativeint.pred (Nativeint.shift_left 1n bits) in
1306+
Nativeint.equal mask (Nativeint.logand test_mask mask)
1307+
in
1308+
(* Ignore sign and zero extensions which do not affect the low bits *)
1309+
map_tail
1310+
(function
1311+
| Cop
1312+
( (Casr | Clsr),
1313+
[Cop (Clsl, [x; Cconst_int (left, _)], _); Cconst_int (right, _)],
1314+
_ )
1315+
when 0 <= right && right <= left && left <= unused_bits ->
1316+
(* these sign-extensions can be replaced with a left shift since we
1317+
don't care about the high bits that it changed *)
1318+
low_bits ~bits (lsl_const x (left - right) dbg) ~dbg
1319+
| x -> (
1320+
match get_const_bitmask x with
1321+
| Some (x, bitmask) when does_mask_keep_low_bits bitmask ->
1322+
low_bits ~bits x ~dbg
1323+
| _ -> x))
1324+
x
1325+
1326+
(** [zero_extend ~bits dbg e] returns [e] with the most significant [arch_bits - bits]
1327+
bits set to 0 *)
1328+
let zero_extend ~bits ~dbg e =
1329+
assert (0 < bits && bits <= arch_bits);
1330+
let mask = Nativeint.pred (Nativeint.shift_left 1n bits) in
1331+
let zero_extend_via_mask e =
1332+
Cop (Cand, [e; natint_const_untagged dbg mask], dbg)
1333+
in
1334+
if bits = arch_bits
1335+
then e
1336+
else
1337+
map_tail
1338+
(function
1339+
| Cop (Cload { memory_chunk; mutability; is_atomic }, args, dbg) as e
1340+
-> (
1341+
let load memory_chunk =
1342+
Cop (Cload { memory_chunk; mutability; is_atomic }, args, dbg)
1343+
in
1344+
match memory_chunk, bits with
1345+
| (Byte_signed | Byte_unsigned), 8 -> load Byte_unsigned
1346+
| (Sixteen_signed | Sixteen_unsigned), 16 -> load Sixteen_unsigned
1347+
| (Thirtytwo_signed | Thirtytwo_unsigned), 32 ->
1348+
load Thirtytwo_unsigned
1349+
| _ -> zero_extend_via_mask e)
1350+
| e -> zero_extend_via_mask e)
1351+
(low_bits ~bits e ~dbg)
1352+
1353+
let sign_extend ~bits ~dbg e =
1354+
assert (0 < bits && bits <= arch_bits);
1355+
let unused_bits = arch_bits - bits in
1356+
let sign_extend_via_shift e =
1357+
asr_const (lsl_const e unused_bits dbg) unused_bits dbg
1358+
in
1359+
if bits = arch_bits
1360+
then e
1361+
else
1362+
map_tail
1363+
(function
1364+
| Cop ((Casr | Clsr), [inner; Cconst_int (n, _)], _) as e
1365+
when 0 <= n && n < arch_bits ->
1366+
(* see middle_end/flambda2/z3/sign_extension.py for proof *)
1367+
if n > unused_bits
1368+
then
1369+
(* sign-extension is a no-op since the top n bits already match *)
1370+
e
1371+
else
1372+
let e = lsl_const inner (unused_bits - n) dbg in
1373+
asr_const e unused_bits dbg
1374+
| Cop (Cload { memory_chunk; mutability; is_atomic }, args, dbg) as e
1375+
-> (
1376+
let load memory_chunk =
1377+
Cop (Cload { memory_chunk; mutability; is_atomic }, args, dbg)
1378+
in
1379+
match memory_chunk, bits with
1380+
| (Byte_signed | Byte_unsigned), 8 -> load Byte_signed
1381+
| (Sixteen_signed | Sixteen_unsigned), 16 -> load Sixteen_signed
1382+
| (Thirtytwo_signed | Thirtytwo_unsigned), 32 -> load Thirtytwo_signed
1383+
| _ -> sign_extend_via_shift e)
1384+
| e -> sign_extend_via_shift e)
1385+
(low_bits ~bits e ~dbg)
13121386

13131387
let unboxed_packed_array_ref arr index dbg ~memory_chunk ~elements_per_word =
13141388
bind "arr" arr (fun arr ->
@@ -1335,18 +1409,19 @@ let unboxed_int32_array_ref =
13351409
let unboxed_mutable_int32_unboxed_product_array_ref arr ~array_index dbg =
13361410
bind "arr" arr (fun arr ->
13371411
bind "index" array_index (fun index ->
1338-
sign_extend_32 dbg
1412+
sign_extend ~bits:32
13391413
(Cop
13401414
( mk_load_mut Thirtytwo_signed,
13411415
[array_indexing log2_size_addr arr index dbg],
1342-
dbg ))))
1416+
dbg ))
1417+
~dbg))
13431418

13441419
let unboxed_mutable_int32_unboxed_product_array_set arr ~array_index ~new_value
13451420
dbg =
13461421
bind "arr" arr (fun arr ->
13471422
bind "index" array_index (fun index ->
13481423
bind "new_value" new_value (fun new_value ->
1349-
let new_value = sign_extend_32 dbg new_value in
1424+
let new_value = sign_extend ~bits:32 new_value ~dbg in
13501425
Cop
13511426
( Cstore (Word_int, Assignment),
13521427
[array_indexing log2_size_addr arr index dbg; new_value],
@@ -1448,7 +1523,7 @@ let set_field_unboxed ~dbg memory_chunk block ~index_in_words newval =
14481523
let field_address =
14491524
array_indexing log2_size_addr block index_in_words dbg
14501525
in
1451-
let newval = if size_in_bytes = 4 then low_32 dbg newval else newval in
1526+
let newval = low_bits newval ~dbg ~bits:(8 * size_in_bytes) in
14521527
return_unit dbg
14531528
(Cop (Cstore (memory_chunk, Assignment), [field_address; newval], dbg))
14541529

@@ -1647,16 +1722,12 @@ let call_cached_method obj tag cache pos args args_type result (apos, mode) dbg
16471722

16481723
(* Allocation *)
16491724

1650-
(* CR layouts 5.1: When we pack int32s/float32s more efficiently, this code will
1651-
need to change. *)
1725+
(* CR layouts 5.1: When we pack int8/16/32s/float32s more efficiently, this code
1726+
will need to change. *)
16521727
let memory_chunk_size_in_words_for_mixed_block = function
1653-
| (Byte_unsigned | Byte_signed | Sixteen_unsigned | Sixteen_signed) as
1654-
memory_chunk ->
1655-
Misc.fatal_errorf
1656-
"Fields with memory chunk %s are not allowed in mixed blocks"
1657-
(Printcmm.chunk memory_chunk)
1728+
| Byte_unsigned | Byte_signed | Sixteen_unsigned | Sixteen_signed
16581729
| Thirtytwo_unsigned | Thirtytwo_signed ->
1659-
(* Int32s are currently stored using a whole word *)
1730+
(* small integers are currently stored using a whole word *)
16601731
1
16611732
| Single _ | Double ->
16621733
(* Float32s are currently stored using a whole word *)
@@ -1896,116 +1967,15 @@ let bigarray_word_kind : Lambda.bigarray_kind -> memory_chunk = function
18961967
| Pbigarray_complex32 -> Single { reg = Float64 }
18971968
| Pbigarray_complex64 -> Double
18981969

1899-
(* the three functions below assume 64-bit words *)
1900-
let () = assert (size_int = 8)
1901-
1902-
let check_64_bit_target func =
1903-
if size_int <> 8
1904-
then
1905-
Misc.fatal_errorf
1906-
"Cmm helpers function %s can only be used on 64-bit targets" func
1907-
1908-
(* Like [low_32] but for 63-bit integers held in 64-bit registers. *)
1909-
(* CR gbury: Why not use Cmm.map_tail here ? It seems designed for that kind of
1910-
thing (and covers more cases than just Clet). *)
1911-
let rec low_63 dbg e =
1912-
check_64_bit_target "low_63";
1913-
match e with
1914-
| Cop (Casr, [Cop (Clsl, [x; Cconst_int (1, _)], _); Cconst_int (1, _)], _) ->
1915-
low_63 dbg x
1916-
| Cop (Cand, [x; Cconst_natint (0x7FFF_FFFF_FFFF_FFFFn, _)], _) ->
1917-
low_63 dbg x
1918-
| Clet (id, x, body) -> Clet (id, x, low_63 dbg body)
1919-
| _ -> e
1920-
1921-
(* CR-someday mshinwell/gbury: sign_extend_63 then tag_int should simplify to
1922-
just tag_int. *)
1923-
let sign_extend_63 dbg e =
1924-
check_64_bit_target "sign_extend_63";
1925-
match e with
1926-
| Cop (Casr, [_; Cconst_int (n, _)], _) when n > 0 && n < 64 ->
1927-
(* [asr] by a positive constant is sign-preserving. However:
1928-
1929-
- Some architectures treat the shift length modulo the word size.
1930-
1931-
- OCaml does not define behavior of shifts by more than the word size.
1932-
1933-
So we don't make the simplification for shifts of length 64 or more. *)
1934-
e
1935-
| _ ->
1936-
let e = low_63 dbg e in
1937-
Cop
1938-
( Casr,
1939-
[Cop (Clsl, [e; Cconst_int (1, dbg)], dbg); Cconst_int (1, dbg)],
1940-
dbg )
1941-
1942-
(* zero_extend_32 zero-extends values from 32 bits to the word size. *)
1943-
let zero_extend_32 dbg e =
1944-
(* CR mshinwell for gbury: same question as above *)
1945-
match low_32 dbg e with
1946-
| Cop
1947-
( Cload
1948-
{ memory_chunk = Thirtytwo_signed | Thirtytwo_unsigned;
1949-
mutability;
1950-
is_atomic
1951-
},
1952-
args,
1953-
dbg ) ->
1954-
Cop
1955-
( Cload { memory_chunk = Thirtytwo_unsigned; mutability; is_atomic },
1956-
args,
1957-
dbg )
1958-
| e -> Cop (Cand, [e; natint_const_untagged dbg 0xFFFFFFFFn], dbg)
1959-
1960-
let zero_extend_63 dbg e =
1961-
check_64_bit_target "zero_extend_63";
1962-
let e = low_63 dbg e in
1963-
Cop (Cand, [e; natint_const_untagged dbg 0x7FFF_FFFF_FFFF_FFFFn], dbg)
1964-
1965-
let zero_extend ~bits ~dbg e =
1966-
assert (0 < bits && bits <= arch_bits);
1967-
if bits = arch_bits
1968-
then e
1969-
else
1970-
match bits with
1971-
| 63 -> zero_extend_63 dbg e
1972-
| 32 -> zero_extend_32 dbg e
1973-
| bits -> Misc.fatal_errorf "zero_extend not implemented for %d bits" bits
1974-
1975-
let sign_extend ~bits ~dbg e =
1976-
assert (0 < bits && bits <= arch_bits);
1977-
if bits = arch_bits
1978-
then e
1979-
else
1980-
match bits with
1981-
| 63 -> sign_extend_63 dbg e
1982-
| 32 -> sign_extend_32 dbg e
1983-
| bits -> Misc.fatal_errorf "sign_extend not implemented for %d bits" bits
1984-
1985-
let low_bits ~bits ~(dbg : Debuginfo.t) e =
1986-
assert (0 < bits && bits <= arch_bits);
1987-
if bits = arch_bits
1988-
then e
1989-
else
1990-
match bits with
1991-
| 63 -> low_63 dbg e
1992-
| 32 -> low_32 dbg e
1993-
| bits -> Misc.fatal_errorf "low_bits not implemented for %d bits" bits
1994-
1995-
let ignore_low_bits ~bits ~dbg:(_ : Debuginfo.t) e =
1996-
if bits = 1
1997-
then ignore_low_bit_int e
1998-
else Misc.fatal_error "ignore_low_bits expected bits=1 for now"
1999-
20001970
let and_int e1 e2 dbg =
20011971
let is_mask32 = function
20021972
| Cconst_natint (0xFFFF_FFFFn, _) -> true
20031973
| Cconst_int (n, _) -> Nativeint.of_int n = 0xFFFF_FFFFn
20041974
| _ -> false
20051975
in
20061976
match e1, e2 with
2007-
| e, m when is_mask32 m -> zero_extend_32 dbg e
2008-
| m, e when is_mask32 m -> zero_extend_32 dbg e
1977+
| e, m when is_mask32 m -> zero_extend ~bits:32 e ~dbg
1978+
| m, e when is_mask32 m -> zero_extend ~bits:32 e ~dbg
20091979
| e1, e2 -> Cop (Cand, [e1; e2], dbg)
20101980

20111981
let or_int e1 e2 dbg = Cop (Cor, [e1; e2], dbg)
@@ -2033,9 +2003,7 @@ let box_int_gen dbg (bi : Primitive.boxed_integer) mode arg =
20332003
let arg' =
20342004
if bi = Primitive.Boxed_int32
20352005
then
2036-
if big_endian
2037-
then Cop (Clsl, [arg; Cconst_int (32, dbg)], dbg)
2038-
else sign_extend_32 dbg arg
2006+
if big_endian then lsl_const arg 32 dbg else sign_extend ~bits:32 arg ~dbg
20392007
else arg
20402008
in
20412009
Cop
@@ -2079,12 +2047,12 @@ let unbox_int dbg bi =
20792047
when bi = Primitive.Boxed_int32 && big_endian
20802048
&& alloc_matches_boxed_int bi ~hdr ~ops ->
20812049
(* Force sign-extension of low 32 bits *)
2082-
sign_extend_32 dbg contents
2050+
sign_extend ~bits:32 contents ~dbg
20832051
| Cop (Calloc _, [hdr; ops; contents], _dbg)
20842052
when bi = Primitive.Boxed_int32 && (not big_endian)
20852053
&& alloc_matches_boxed_int bi ~hdr ~ops ->
20862054
(* Force sign-extension of low 32 bits *)
2087-
sign_extend_32 dbg contents
2055+
sign_extend ~bits:32 contents ~dbg
20882056
| Cop (Calloc _, [hdr; ops; contents], _dbg)
20892057
when alloc_matches_boxed_int bi ~hdr ~ops ->
20902058
contents
@@ -2100,7 +2068,7 @@ let unbox_int dbg bi =
21002068
| cmm -> default cmm)
21012069

21022070
let make_unsigned_int bi arg dbg =
2103-
if bi = Primitive.Unboxed_int32 then zero_extend_32 dbg arg else arg
2071+
if bi = Primitive.Unboxed_int32 then zero_extend ~bits:32 arg ~dbg else arg
21042072

21052073
let unaligned_load_16 ptr idx dbg =
21062074
if Arch.allow_unaligned_access
@@ -4315,7 +4283,7 @@ let make_unboxed_int32_array_payload dbg unboxed_int32_list =
43154283
( Cor,
43164284
[ (* [a] is sign-extended by default. We need to change it to be
43174285
zero-extended for the `or` operation to be correct. *)
4318-
zero_extend_32 dbg a;
4286+
zero_extend ~bits:32 a ~dbg;
43194287
Cop (Clsl, [b; Cconst_int (32, dbg)], dbg) ],
43204288
dbg )
43214289
in

backend/cmm_helpers.mli

+4-3
Original file line numberDiff line numberDiff line change
@@ -375,8 +375,9 @@ val bigarray_word_kind : Lambda.bigarray_kind -> memory_chunk
375375

376376
(** Operations on n-bit integers *)
377377

378-
(** Simplify the given expression knowing low [bits] bits will be irrelevant *)
379-
val ignore_low_bits : bits:int -> dbg:Debuginfo.t -> expression -> expression
378+
(** Simplify the given expression knowing the low bit of the argument will be irrelevant
379+
*)
380+
val ignore_low_bit_int : expression -> expression
380381

381382
(** Simplify the given expression knowing that bits other than the low [bits] bits will be
382383
irrelevant *)
@@ -700,7 +701,7 @@ val create_ccatch :
700701
(** Shift operations.
701702
Inputs: a tagged caml integer and an untagged machine integer.
702703
Outputs: a tagged caml integer.
703-
Take as first argument a tagged caml integer, and as
704+
Takes as first argument a tagged caml integer, and as
704705
second argument an untagged machine intger which is the amount to shift the
705706
first argument by. *)
706707

middle_end/flambda2/to_cmm/to_cmm_expr.ml

+3-3
Original file line numberDiff line numberDiff line change
@@ -110,9 +110,9 @@ let translate_external_call env res ~free_vars apply ~callee_simple ~args
110110
2. All of the [machtype_component]s are singleton arrays. *)
111111
Array.map (fun machtype -> [| machtype |]) return_ty
112112
in
113-
(* Returned int32 values need to be sign_extended because it's not clear
114-
whether C code that returns an int32 returns one that is sign extended or
115-
not. There is no need to wrap other return arities. *)
113+
(* Returned small integer values need to be sign-extended because it's not
114+
clear whether C code that returns a small integer returns one that is sign
115+
extended or not. There is no need to wrap other return arities. *)
116116
let maybe_sign_extend kind dbg cmm =
117117
match Flambda_kind.With_subkind.kind kind with
118118
| Naked_number Naked_int32 -> C.sign_extend ~bits:32 ~dbg cmm

0 commit comments

Comments
 (0)