@@ -1203,6 +1203,97 @@ is_element_type_primitive (MonoType *vector_type)
1203
1203
}
1204
1204
}
1205
1205
1206
+ static MonoInst *
1207
+ emit_msb_vector_mask (MonoCompile * cfg , MonoClass * arg_class , MonoTypeEnum arg_type )
1208
+ {
1209
+ guint64 msb_mask_value [2 ];
1210
+
1211
+ switch (arg_type ) {
1212
+ case MONO_TYPE_I1 :
1213
+ case MONO_TYPE_U1 :
1214
+ msb_mask_value [0 ] = 0x8080808080808080 ;
1215
+ msb_mask_value [1 ] = 0x8080808080808080 ;
1216
+ break ;
1217
+ case MONO_TYPE_I2 :
1218
+ case MONO_TYPE_U2 :
1219
+ msb_mask_value [0 ] = 0x8000800080008000 ;
1220
+ msb_mask_value [1 ] = 0x8000800080008000 ;
1221
+ break ;
1222
+ #if TARGET_SIZEOF_VOID_P == 4
1223
+ case MONO_TYPE_I :
1224
+ case MONO_TYPE_U :
1225
+ #endif
1226
+ case MONO_TYPE_I4 :
1227
+ case MONO_TYPE_U4 :
1228
+ case MONO_TYPE_R4 :
1229
+ msb_mask_value [0 ] = 0x8000000080000000 ;
1230
+ msb_mask_value [1 ] = 0x8000000080000000 ;
1231
+ break ;
1232
+ #if TARGET_SIZEOF_VOID_P == 8
1233
+ case MONO_TYPE_I :
1234
+ case MONO_TYPE_U :
1235
+ #endif
1236
+ case MONO_TYPE_I8 :
1237
+ case MONO_TYPE_U8 :
1238
+ case MONO_TYPE_R8 :
1239
+ msb_mask_value [0 ] = 0x8000000000000000 ;
1240
+ msb_mask_value [1 ] = 0x8000000000000000 ;
1241
+ break ;
1242
+ default :
1243
+ g_assert_not_reached ();
1244
+ }
1245
+
1246
+ MonoInst * msb_mask_vec = emit_xconst_v128 (cfg , arg_class , (guint8 * )msb_mask_value );
1247
+ msb_mask_vec -> klass = arg_class ;
1248
+ return msb_mask_vec ;
1249
+ }
1250
+
1251
+ static MonoInst *
1252
+ emit_msb_shift_vector_constant (MonoCompile * cfg , MonoClass * arg_class , MonoTypeEnum arg_type )
1253
+ {
1254
+ guint64 msb_shift_value [2 ];
1255
+
1256
+ // NOTE: On ARM64 ushl shifts a vector left or right depending on the sign of the shift constant
1257
+ switch (arg_type ) {
1258
+ case MONO_TYPE_I1 :
1259
+ case MONO_TYPE_U1 :
1260
+ msb_shift_value [0 ] = 0x00FFFEFDFCFBFAF9 ;
1261
+ msb_shift_value [1 ] = 0x00FFFEFDFCFBFAF9 ;
1262
+ break ;
1263
+ case MONO_TYPE_I2 :
1264
+ case MONO_TYPE_U2 :
1265
+ msb_shift_value [0 ] = 0xFFF4FFF3FFF2FFF1 ;
1266
+ msb_shift_value [1 ] = 0xFFF8FFF7FFF6FFF5 ;
1267
+ break ;
1268
+ #if TARGET_SIZEOF_VOID_P == 4
1269
+ case MONO_TYPE_I :
1270
+ case MONO_TYPE_U :
1271
+ #endif
1272
+ case MONO_TYPE_I4 :
1273
+ case MONO_TYPE_U4 :
1274
+ case MONO_TYPE_R4 :
1275
+ msb_shift_value [0 ] = 0xFFFFFFE2FFFFFFE1 ;
1276
+ msb_shift_value [1 ] = 0xFFFFFFE4FFFFFFE3 ;
1277
+ break ;
1278
+ #if TARGET_SIZEOF_VOID_P == 8
1279
+ case MONO_TYPE_I :
1280
+ case MONO_TYPE_U :
1281
+ #endif
1282
+ case MONO_TYPE_I8 :
1283
+ case MONO_TYPE_U8 :
1284
+ case MONO_TYPE_R8 :
1285
+ msb_shift_value [0 ] = 0xFFFFFFFFFFFFFFC1 ;
1286
+ msb_shift_value [1 ] = 0xFFFFFFFFFFFFFFC2 ;
1287
+ break ;
1288
+ default :
1289
+ g_assert_not_reached ();
1290
+ }
1291
+
1292
+ MonoInst * msb_shift_vec = emit_xconst_v128 (cfg , arg_class , (guint8 * )msb_shift_value );
1293
+ msb_shift_vec -> klass = arg_class ;
1294
+ return msb_shift_vec ;
1295
+ }
1296
+
1206
1297
static MonoInst *
1207
1298
emit_sri_vector (MonoCompile * cfg , MonoMethod * cmethod , MonoMethodSignature * fsig , MonoInst * * args )
1208
1299
{
@@ -1234,7 +1325,6 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
1234
1325
case SN_ConvertToUInt64 :
1235
1326
case SN_Create :
1236
1327
case SN_Dot :
1237
- case SN_ExtractMostSignificantBits :
1238
1328
case SN_GetElement :
1239
1329
case SN_GetLower :
1240
1330
case SN_GetUpper :
@@ -1542,7 +1632,49 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
1542
1632
return NULL ;
1543
1633
#ifdef TARGET_WASM
1544
1634
return emit_simd_ins_for_sig (cfg , klass , OP_WASM_SIMD_BITMASK , -1 , -1 , fsig , args );
1545
- #else
1635
+ #elif defined(TARGET_ARM64 )
1636
+ if (COMPILE_LLVM (cfg ))
1637
+ return NULL ;
1638
+
1639
+ MonoInst * result_ins = NULL ;
1640
+ MonoClass * arg_class = mono_class_from_mono_type_internal (fsig -> params [0 ]);
1641
+ int size = mono_class_value_size (arg_class , NULL );
1642
+ if (size != 16 )
1643
+ return NULL ;
1644
+
1645
+ MonoInst * msb_mask_vec = emit_msb_vector_mask (cfg , arg_class , arg0_type );
1646
+ MonoInst * and_res_vec = emit_simd_ins_for_binary_op (cfg , arg_class , fsig , args , arg0_type , SN_BitwiseAnd );
1647
+ and_res_vec -> sreg2 = msb_mask_vec -> dreg ;
1648
+
1649
+ MonoInst * msb_shift_vec = emit_msb_shift_vector_constant (cfg , arg_class , arg0_type );
1650
+ MonoInst * shift_res_vec = emit_simd_ins (cfg , arg_class , OP_ARM64_USHL , and_res_vec -> dreg , msb_shift_vec -> dreg );
1651
+ shift_res_vec -> inst_c1 = arg0_type ;
1652
+
1653
+ if (arg0_type == MONO_TYPE_I1 || arg0_type == MONO_TYPE_U1 ) {
1654
+ // Always perform usigned operations as vector sum and extract operations could sign-extend the result into the GP register
1655
+ // making the final result invalid. This is not needed for wider type as the maximum sum of extracted MSB cannot be larger than 8bits
1656
+ arg0_type = MONO_TYPE_U1 ;
1657
+
1658
+ // In order to sum high and low 64bits of the shifted vector separatly, we use a zeroed vector and the extract operation
1659
+ MonoInst * zero_vec = emit_xzero (cfg , arg_class );
1660
+
1661
+ MonoInst * ext_low_vec = emit_simd_ins (cfg , arg_class , OP_ARM64_EXT_IMM , zero_vec -> dreg , shift_res_vec -> dreg );
1662
+ ext_low_vec -> inst_c0 = 8 ;
1663
+ ext_low_vec -> inst_c1 = arg0_type ;
1664
+ MonoInst * sum_low_vec = emit_sum_vector (cfg , fsig -> params [0 ], arg0_type , ext_low_vec );
1665
+
1666
+ MonoInst * ext_high_vec = emit_simd_ins (cfg , arg_class , OP_ARM64_EXT_IMM , shift_res_vec -> dreg , zero_vec -> dreg );
1667
+ ext_high_vec -> inst_c0 = 8 ;
1668
+ ext_high_vec -> inst_c1 = arg0_type ;
1669
+ MonoInst * sum_high_vec = emit_sum_vector (cfg , fsig -> params [0 ], arg0_type , ext_high_vec );
1670
+
1671
+ MONO_EMIT_NEW_BIALU_IMM (cfg , OP_SHL_IMM , sum_high_vec -> dreg , sum_high_vec -> dreg , 8 );
1672
+ EMIT_NEW_BIALU (cfg , result_ins , OP_IOR , sum_high_vec -> dreg , sum_high_vec -> dreg , sum_low_vec -> dreg );
1673
+ } else {
1674
+ result_ins = emit_sum_vector (cfg , fsig -> params [0 ], arg0_type , shift_res_vec );
1675
+ }
1676
+ return result_ins ;
1677
+ #elif defined(TARGET_AMD64 )
1546
1678
return NULL ;
1547
1679
#endif
1548
1680
}
0 commit comments