Skip to content

Commit 46c3ac3

Browse files
authored
Handle DelayFree for HW_Category_SIMDByIndexedElement intrinsics (#114525)
* Handle DelayFree for HW_Category_SIMDByIndexedElement intrinsics * Add test case
1 parent a01a7d8 commit 46c3ac3

File tree

3 files changed

+94
-0
lines changed

3 files changed

+94
-0
lines changed

src/coreclr/jit/lsraarm64.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1481,6 +1481,21 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
14811481
{
14821482
srcCount += BuildContainedCselUses(containedCselOp, delayFreeOp, candidates);
14831483
}
1484+
else if ((intrin.category == HW_Category_SIMDByIndexedElement) && (genTypeSize(intrin.baseType) == 2) && !HWIntrinsicInfo::HasImmediateOperand(intrin.id))
1485+
{
1486+
// Some "Advanced SIMD scalar x indexed element" and "Advanced SIMD vector x indexed element" instructions (e.g.
1487+
// "MLA (by element)") have encoding that restricts what registers that can be used for the indexed element when
1488+
// the element size is H (i.e. 2 bytes).
1489+
if (((opNum == 2) || (opNum == 3)))
1490+
{
1491+
// For those intrinsics, just force the delay-free registers, so they do not conflict with the definition.
1492+
srcCount += BuildDelayFreeUses(operand, nullptr, candidates);
1493+
}
1494+
else
1495+
{
1496+
srcCount += BuildOperandUses(operand, candidates);
1497+
}
1498+
}
14841499
// Only build as delay free use if register types match
14851500
else if ((delayFreeOp != nullptr) &&
14861501
(varTypeUsesSameRegType(delayFreeOp->TypeGet(), operand->TypeGet()) ||
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
// Found by Antigen
4+
// Reduced from 206.63 KB to 1.9 KB.
5+
6+
7+
using System;
8+
using System.Collections.Generic;
9+
using System.Runtime.CompilerServices;
10+
using System.Runtime.Intrinsics;
11+
using System.Runtime.Intrinsics.Arm;
12+
using System.Runtime.Intrinsics.X86;
13+
using System.Numerics;
14+
using Xunit;
15+
16+
public class TestClass_114358
17+
{
18+
public struct S1
19+
{
20+
public int int_1;
21+
}
22+
static byte s_byte_4 = 1;
23+
static Vector64<short> s_v64_short_20 = Vector64.Create(94, -2, 3, 3);
24+
static Vector128<byte> s_v128_byte_28 = Vector128.Create((byte)2);
25+
static Vector128<ushort> s_v128_ushort_31 = Vector128.Create((ushort)32766);
26+
Vector64<short> v64_short_70 = Vector64<short>.AllBitsSet;
27+
Vector128<byte> v128_byte_78 = Vector128.CreateScalar((byte)0);
28+
Vector128<short> v128_short_80 = Vector128.Create(-2, 0, 2, 94, 3, 0, 3, 0);
29+
Vector128<ushort> v128_ushort_81 = Vector128<ushort>.AllBitsSet;
30+
private static List<string> toPrint = new List<string>();
31+
internal void Method0()
32+
{
33+
unchecked
34+
{
35+
S1 s1_172 = new S1();
36+
s_v128_ushort_31 = Vector128.LessThan(s_v128_ushort_31 -= Vector128<ushort>.Zero | v128_ushort_81, AdvSimd.AddWideningUpper(v128_ushort_81 & v128_ushort_81, s_v128_byte_28 = v128_byte_78));
37+
v128_short_80 = AdvSimd.ExtractVector128(AdvSimd.MultiplyByScalar(v128_short_80 - v128_short_80, AdvSimd.MultiplySubtractByScalar(v64_short_70, s_v64_short_20, v64_short_70)), v128_short_80 - v128_short_80, s_byte_4);
38+
s_v64_short_20 = AdvSimd.ShiftRightLogicalRoundedAdd(v64_short_70 -= v64_short_70 += v64_short_70, v64_short_70 + Vector64<short>.AllBitsSet + v64_short_70 + Vector64<short>.AllBitsSet & v64_short_70, s_byte_4 >>= s1_172.int_1 <<= 15 + 4);
39+
return;
40+
}
41+
}
42+
43+
[Fact]
44+
public static void Repro()
45+
{
46+
if (AdvSimd.IsSupported)
47+
{
48+
new TestClass_114358().Method0();
49+
}
50+
}
51+
}
52+
/*
53+
Environment:
54+
55+
set DOTNET_AltJit=Method0
56+
set DOTNET_AltJitName=clrjit_universal_arm64_x64.dll
57+
set DOTNET_EnableWriteXorExecute=0
58+
set DOTNET_JitDisasm=Method0
59+
set DOTNET_JitStressRegs=2
60+
set DOTNET_TieredCompilation=0
61+
62+
Debug: 1639727076
63+
64+
Release: 0
65+
JIT assert failed:
66+
Assertion failed '(targetReg == op1Reg) || (targetReg != op3Reg)' in 'TestClass:Method0():this' during 'Generate code' (IL size 298; hash 0x46e9aa75; FullOpts)
67+
68+
File: /Users/runner/work/1/s/src/coreclr/jit/hwintrinsiccodegenarm64.cpp Line: 416
69+
70+
71+
*/
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
<PropertyGroup>
3+
<Optimize>True</Optimize>
4+
</PropertyGroup>
5+
<ItemGroup>
6+
<Compile Include="$(MSBuildProjectName).cs" />
7+
</ItemGroup>
8+
</Project>

0 commit comments

Comments
 (0)