@@ -362,6 +362,7 @@ let neg_int c dbg = sub_int (Cconst_int (0, dbg)) c dbg
362
362
363
363
let rec lsl_int c1 c2 dbg =
364
364
match c1, c2 with
365
+ | c1 , Cconst_int (0 , _ ) -> c1
365
366
| Cop (Clsl , [c; Cconst_int (n1, _)], _), Cconst_int (n2, _)
366
367
when n1 > 0 && n2 > 0 && n1 + n2 < size_int * 8 ->
367
368
Cop (Clsl , [c; Cconst_int (n1 + n2, dbg)], dbg)
@@ -1280,35 +1281,108 @@ let addr_array_initialize arr ofs newval dbg =
1280
1281
[array_indexing log2_size_addr arr ofs dbg; newval],
1281
1282
dbg )
1282
1283
1283
- (* low_32 x is a value which agrees with x on at least the low 32 bits *)
1284
- let rec low_32 dbg = function
1285
- (* Ignore sign and zero extensions, which do not affect the low bits *)
1286
- | Cop (Casr , [Cop (Clsl , [x; Cconst_int (32 , _)], _); Cconst_int (32 , _)], _)
1287
- | Cop (Cand, [x ; Cconst_natint (0xFFFFFFFFn , _ )], _ ) ->
1288
- low_32 dbg x
1289
- | Clet (id , e , body ) -> Clet (id, e, low_32 dbg body)
1290
- | x -> x
1291
-
1292
- (* sign_extend_32 sign-extends values from 32 bits to the word size. *)
1293
- let sign_extend_32 dbg e =
1294
- match low_32 dbg e with
1295
- | Cop
1296
- ( Cload
1297
- { memory_chunk = Thirtytwo_unsigned | Thirtytwo_signed ;
1298
- mutability;
1299
- is_atomic
1300
- },
1301
- args,
1302
- dbg ) ->
1303
- Cop
1304
- ( Cload { memory_chunk = Thirtytwo_signed ; mutability; is_atomic },
1305
- args,
1306
- dbg )
1307
- | e ->
1308
- Cop
1309
- ( Casr ,
1310
- [Cop (Clsl , [e; Cconst_int (32 , dbg)], dbg); Cconst_int (32 , dbg)],
1311
- dbg )
1284
+ (* * [get_const_bitmask x] returns [Some (y, mask)] if [x] is [y & mask] *)
1285
+ let get_const_bitmask = function
1286
+ | Cop (Cand , ([x; Cconst_natint (mask, _)] | [Cconst_natint (mask, _); x]), _)
1287
+ ->
1288
+ Some (x, mask)
1289
+ | Cop (Cand , ([x; Cconst_int (mask, _)] | [Cconst_int (mask, _); x]), _ ) ->
1290
+ Some (x, Nativeint. of_int mask)
1291
+ | _ -> None
1292
+
1293
+ (* * [low_bits ~bits x] is a (potentially simplified) value which agrees with x on at least
1294
+ the low [bits] bits. E.g., [low_bits ~bits x & mask = x & mask], where [mask] is a
1295
+ bitmask of the low [bits] bits . *)
1296
+ let rec low_bits ~bits ~dbg x =
1297
+ assert (bits > 0 );
1298
+ if bits > = arch_bits
1299
+ then x
1300
+ else
1301
+ let unused_bits = arch_bits - bits in
1302
+ let does_mask_keep_low_bits test_mask =
1303
+ (* If the mask has all the low bits set, then the low bits are unchanged.
1304
+ This could happen from zero-extension. *)
1305
+ let mask = Nativeint. pred (Nativeint. shift_left 1n bits) in
1306
+ Nativeint. equal mask (Nativeint. logand test_mask mask)
1307
+ in
1308
+ (* Ignore sign and zero extensions which do not affect the low bits *)
1309
+ map_tail
1310
+ (function
1311
+ | Cop
1312
+ ( (Casr | Clsr ),
1313
+ [Cop (Clsl , [x; Cconst_int (left, _)], _); Cconst_int (right, _)],
1314
+ _ )
1315
+ when 0 < = right && right < = left && left < = unused_bits ->
1316
+ (* these sign-extensions can be replaced with a left shift since we
1317
+ don't care about the high bits that it changed *)
1318
+ low_bits ~bits (lsl_const x (left - right) dbg) ~dbg
1319
+ | x -> (
1320
+ match get_const_bitmask x with
1321
+ | Some (x , bitmask ) when does_mask_keep_low_bits bitmask ->
1322
+ low_bits ~bits x ~dbg
1323
+ | _ -> x))
1324
+ x
1325
+
1326
+ (* * [zero_extend ~bits dbg e] returns [e] with the most significant [arch_bits - bits]
1327
+ bits set to 0 *)
1328
+ let zero_extend ~bits ~dbg e =
1329
+ assert (0 < bits && bits < = arch_bits);
1330
+ let mask = Nativeint. pred (Nativeint. shift_left 1n bits) in
1331
+ let zero_extend_via_mask e =
1332
+ Cop (Cand , [e; natint_const_untagged dbg mask], dbg)
1333
+ in
1334
+ if bits = arch_bits
1335
+ then e
1336
+ else
1337
+ map_tail
1338
+ (function
1339
+ | Cop (Cload { memory_chunk; mutability; is_atomic }, args, dbg) as e
1340
+ -> (
1341
+ let load memory_chunk =
1342
+ Cop (Cload { memory_chunk; mutability; is_atomic }, args, dbg)
1343
+ in
1344
+ match memory_chunk, bits with
1345
+ | (Byte_signed | Byte_unsigned ), 8 -> load Byte_unsigned
1346
+ | (Sixteen_signed | Sixteen_unsigned ), 16 -> load Sixteen_unsigned
1347
+ | (Thirtytwo_signed | Thirtytwo_unsigned ), 32 ->
1348
+ load Thirtytwo_unsigned
1349
+ | _ -> zero_extend_via_mask e)
1350
+ | e -> zero_extend_via_mask e)
1351
+ (low_bits ~bits e ~dbg )
1352
+
1353
+ let sign_extend ~bits ~dbg e =
1354
+ assert (0 < bits && bits < = arch_bits);
1355
+ let unused_bits = arch_bits - bits in
1356
+ let sign_extend_via_shift e =
1357
+ asr_const (lsl_const e unused_bits dbg) unused_bits dbg
1358
+ in
1359
+ if bits = arch_bits
1360
+ then e
1361
+ else
1362
+ map_tail
1363
+ (function
1364
+ | Cop ((Casr | Clsr ), [inner; Cconst_int (n, _)], _) as e
1365
+ when 0 < = n && n < arch_bits ->
1366
+ (* see middle_end/flambda2/z3/sign_extension.py for proof *)
1367
+ if n > unused_bits
1368
+ then
1369
+ (* sign-extension is a no-op since the top n bits already match *)
1370
+ e
1371
+ else
1372
+ let e = lsl_const inner (unused_bits - n) dbg in
1373
+ asr_const e unused_bits dbg
1374
+ | Cop (Cload { memory_chunk; mutability; is_atomic }, args, dbg) as e
1375
+ -> (
1376
+ let load memory_chunk =
1377
+ Cop (Cload { memory_chunk; mutability; is_atomic }, args, dbg)
1378
+ in
1379
+ match memory_chunk, bits with
1380
+ | (Byte_signed | Byte_unsigned ), 8 -> load Byte_signed
1381
+ | (Sixteen_signed | Sixteen_unsigned ), 16 -> load Sixteen_signed
1382
+ | (Thirtytwo_signed | Thirtytwo_unsigned ), 32 -> load Thirtytwo_signed
1383
+ | _ -> sign_extend_via_shift e)
1384
+ | e -> sign_extend_via_shift e)
1385
+ (low_bits ~bits e ~dbg )
1312
1386
1313
1387
let unboxed_packed_array_ref arr index dbg ~memory_chunk ~elements_per_word =
1314
1388
bind " arr" arr (fun arr ->
@@ -1335,18 +1409,19 @@ let unboxed_int32_array_ref =
1335
1409
let unboxed_mutable_int32_unboxed_product_array_ref arr ~array_index dbg =
1336
1410
bind " arr" arr (fun arr ->
1337
1411
bind " index" array_index (fun index ->
1338
- sign_extend_32 dbg
1412
+ sign_extend ~bits: 32
1339
1413
(Cop
1340
1414
( mk_load_mut Thirtytwo_signed ,
1341
1415
[array_indexing log2_size_addr arr index dbg],
1342
- dbg ))))
1416
+ dbg ))
1417
+ ~dbg ))
1343
1418
1344
1419
let unboxed_mutable_int32_unboxed_product_array_set arr ~array_index ~new_value
1345
1420
dbg =
1346
1421
bind " arr" arr (fun arr ->
1347
1422
bind " index" array_index (fun index ->
1348
1423
bind " new_value" new_value (fun new_value ->
1349
- let new_value = sign_extend_32 dbg new_value in
1424
+ let new_value = sign_extend ~bits: 32 new_value ~dbg in
1350
1425
Cop
1351
1426
( Cstore (Word_int , Assignment ),
1352
1427
[array_indexing log2_size_addr arr index dbg; new_value],
@@ -1448,7 +1523,7 @@ let set_field_unboxed ~dbg memory_chunk block ~index_in_words newval =
1448
1523
let field_address =
1449
1524
array_indexing log2_size_addr block index_in_words dbg
1450
1525
in
1451
- let newval = if size_in_bytes = 4 then low_32 dbg newval else newval in
1526
+ let newval = low_bits newval ~ dbg ~bits: ( 8 * size_in_bytes) in
1452
1527
return_unit dbg
1453
1528
(Cop (Cstore (memory_chunk, Assignment ), [field_address; newval], dbg))
1454
1529
@@ -1647,16 +1722,12 @@ let call_cached_method obj tag cache pos args args_type result (apos, mode) dbg
1647
1722
1648
1723
(* Allocation *)
1649
1724
1650
- (* CR layouts 5.1: When we pack int32s/ float32s more efficiently, this code will
1651
- need to change. *)
1725
+ (* CR layouts 5.1: When we pack int8/16/32s/ float32s more efficiently, this code
1726
+ will need to change. *)
1652
1727
let memory_chunk_size_in_words_for_mixed_block = function
1653
- | (Byte_unsigned | Byte_signed | Sixteen_unsigned | Sixteen_signed ) as
1654
- memory_chunk ->
1655
- Misc. fatal_errorf
1656
- " Fields with memory chunk %s are not allowed in mixed blocks"
1657
- (Printcmm. chunk memory_chunk)
1728
+ | Byte_unsigned | Byte_signed | Sixteen_unsigned | Sixteen_signed
1658
1729
| Thirtytwo_unsigned | Thirtytwo_signed ->
1659
- (* Int32s are currently stored using a whole word *)
1730
+ (* small integers are currently stored using a whole word *)
1660
1731
1
1661
1732
| Single _ | Double ->
1662
1733
(* Float32s are currently stored using a whole word *)
@@ -1896,116 +1967,15 @@ let bigarray_word_kind : Lambda.bigarray_kind -> memory_chunk = function
1896
1967
| Pbigarray_complex32 -> Single { reg = Float64 }
1897
1968
| Pbigarray_complex64 -> Double
1898
1969
1899
- (* the three functions below assume 64-bit words *)
1900
- let () = assert (size_int = 8 )
1901
-
1902
- let check_64_bit_target func =
1903
- if size_int <> 8
1904
- then
1905
- Misc. fatal_errorf
1906
- " Cmm helpers function %s can only be used on 64-bit targets" func
1907
-
1908
- (* Like [low_32] but for 63-bit integers held in 64-bit registers. *)
1909
- (* CR gbury: Why not use Cmm.map_tail here ? It seems designed for that kind of
1910
- thing (and covers more cases than just Clet). *)
1911
- let rec low_63 dbg e =
1912
- check_64_bit_target " low_63" ;
1913
- match e with
1914
- | Cop (Casr, [Cop (Clsl, [x ; Cconst_int (1 , _ )], _ ); Cconst_int (1 , _ )], _ ) ->
1915
- low_63 dbg x
1916
- | Cop (Cand, [x ; Cconst_natint (0x7FFF_FFFF_FFFF_FFFFn , _ )], _ ) ->
1917
- low_63 dbg x
1918
- | Clet (id , x , body ) -> Clet (id, x, low_63 dbg body)
1919
- | _ -> e
1920
-
1921
- (* CR-someday mshinwell/gbury: sign_extend_63 then tag_int should simplify to
1922
- just tag_int. *)
1923
- let sign_extend_63 dbg e =
1924
- check_64_bit_target " sign_extend_63" ;
1925
- match e with
1926
- | Cop (Casr, [_ ; Cconst_int (n , _ )], _ ) when n > 0 && n < 64 ->
1927
- (* [asr] by a positive constant is sign-preserving. However:
1928
-
1929
- - Some architectures treat the shift length modulo the word size.
1930
-
1931
- - OCaml does not define behavior of shifts by more than the word size.
1932
-
1933
- So we don't make the simplification for shifts of length 64 or more. *)
1934
- e
1935
- | _ ->
1936
- let e = low_63 dbg e in
1937
- Cop
1938
- ( Casr ,
1939
- [Cop (Clsl , [e; Cconst_int (1 , dbg)], dbg); Cconst_int (1 , dbg)],
1940
- dbg )
1941
-
1942
- (* zero_extend_32 zero-extends values from 32 bits to the word size. *)
1943
- let zero_extend_32 dbg e =
1944
- (* CR mshinwell for gbury: same question as above *)
1945
- match low_32 dbg e with
1946
- | Cop
1947
- ( Cload
1948
- { memory_chunk = Thirtytwo_signed | Thirtytwo_unsigned ;
1949
- mutability;
1950
- is_atomic
1951
- },
1952
- args,
1953
- dbg ) ->
1954
- Cop
1955
- ( Cload { memory_chunk = Thirtytwo_unsigned ; mutability; is_atomic },
1956
- args,
1957
- dbg )
1958
- | e -> Cop (Cand , [e; natint_const_untagged dbg 0xFFFFFFFFn ], dbg)
1959
-
1960
- let zero_extend_63 dbg e =
1961
- check_64_bit_target " zero_extend_63" ;
1962
- let e = low_63 dbg e in
1963
- Cop (Cand , [e; natint_const_untagged dbg 0x7FFF_FFFF_FFFF_FFFFn ], dbg)
1964
-
1965
- let zero_extend ~bits ~dbg e =
1966
- assert (0 < bits && bits < = arch_bits);
1967
- if bits = arch_bits
1968
- then e
1969
- else
1970
- match bits with
1971
- | 63 -> zero_extend_63 dbg e
1972
- | 32 -> zero_extend_32 dbg e
1973
- | bits -> Misc. fatal_errorf " zero_extend not implemented for %d bits" bits
1974
-
1975
- let sign_extend ~bits ~dbg e =
1976
- assert (0 < bits && bits < = arch_bits);
1977
- if bits = arch_bits
1978
- then e
1979
- else
1980
- match bits with
1981
- | 63 -> sign_extend_63 dbg e
1982
- | 32 -> sign_extend_32 dbg e
1983
- | bits -> Misc. fatal_errorf " sign_extend not implemented for %d bits" bits
1984
-
1985
- let low_bits ~bits ~(dbg : Debuginfo.t ) e =
1986
- assert (0 < bits && bits < = arch_bits);
1987
- if bits = arch_bits
1988
- then e
1989
- else
1990
- match bits with
1991
- | 63 -> low_63 dbg e
1992
- | 32 -> low_32 dbg e
1993
- | bits -> Misc. fatal_errorf " low_bits not implemented for %d bits" bits
1994
-
1995
- let ignore_low_bits ~bits ~dbg :(_ : Debuginfo.t ) e =
1996
- if bits = 1
1997
- then ignore_low_bit_int e
1998
- else Misc. fatal_error " ignore_low_bits expected bits=1 for now"
1999
-
2000
1970
let and_int e1 e2 dbg =
2001
1971
let is_mask32 = function
2002
1972
| Cconst_natint (0xFFFF_FFFFn , _ ) -> true
2003
1973
| Cconst_int (n , _ ) -> Nativeint. of_int n = 0xFFFF_FFFFn
2004
1974
| _ -> false
2005
1975
in
2006
1976
match e1, e2 with
2007
- | e , m when is_mask32 m -> zero_extend_32 dbg e
2008
- | m , e when is_mask32 m -> zero_extend_32 dbg e
1977
+ | e , m when is_mask32 m -> zero_extend ~bits: 32 e ~dbg
1978
+ | m , e when is_mask32 m -> zero_extend ~bits: 32 e ~dbg
2009
1979
| e1 , e2 -> Cop (Cand , [e1; e2], dbg)
2010
1980
2011
1981
let or_int e1 e2 dbg = Cop (Cor , [e1; e2], dbg)
@@ -2033,9 +2003,7 @@ let box_int_gen dbg (bi : Primitive.boxed_integer) mode arg =
2033
2003
let arg' =
2034
2004
if bi = Primitive. Boxed_int32
2035
2005
then
2036
- if big_endian
2037
- then Cop (Clsl , [arg; Cconst_int (32 , dbg)], dbg)
2038
- else sign_extend_32 dbg arg
2006
+ if big_endian then lsl_const arg 32 dbg else sign_extend ~bits: 32 arg ~dbg
2039
2007
else arg
2040
2008
in
2041
2009
Cop
@@ -2079,12 +2047,12 @@ let unbox_int dbg bi =
2079
2047
when bi = Primitive. Boxed_int32 && big_endian
2080
2048
&& alloc_matches_boxed_int bi ~hdr ~ops ->
2081
2049
(* Force sign-extension of low 32 bits *)
2082
- sign_extend_32 dbg contents
2050
+ sign_extend ~bits: 32 contents ~dbg
2083
2051
| Cop (Calloc _, [hdr; ops; contents], _dbg)
2084
2052
when bi = Primitive. Boxed_int32 && (not big_endian)
2085
2053
&& alloc_matches_boxed_int bi ~hdr ~ops ->
2086
2054
(* Force sign-extension of low 32 bits *)
2087
- sign_extend_32 dbg contents
2055
+ sign_extend ~bits: 32 contents ~dbg
2088
2056
| Cop (Calloc _, [hdr; ops; contents], _dbg)
2089
2057
when alloc_matches_boxed_int bi ~hdr ~ops ->
2090
2058
contents
@@ -2100,7 +2068,7 @@ let unbox_int dbg bi =
2100
2068
| cmm -> default cmm)
2101
2069
2102
2070
let make_unsigned_int bi arg dbg =
2103
- if bi = Primitive. Unboxed_int32 then zero_extend_32 dbg arg else arg
2071
+ if bi = Primitive. Unboxed_int32 then zero_extend ~bits: 32 arg ~dbg else arg
2104
2072
2105
2073
let unaligned_load_16 ptr idx dbg =
2106
2074
if Arch. allow_unaligned_access
@@ -4315,7 +4283,7 @@ let make_unboxed_int32_array_payload dbg unboxed_int32_list =
4315
4283
( Cor ,
4316
4284
[ (* [a] is sign-extended by default. We need to change it to be
4317
4285
zero-extended for the `or` operation to be correct. *)
4318
- zero_extend_32 dbg a ;
4286
+ zero_extend ~bits: 32 a ~dbg ;
4319
4287
Cop (Clsl , [b; Cconst_int (32 , dbg)], dbg) ],
4320
4288
dbg )
4321
4289
in
0 commit comments