-
Notifications
You must be signed in to change notification settings - Fork 5k
Handle DelayFree for HW_Category_SIMDByIndexedElement intrinsics #114525
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
2 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
71 changes: 71 additions & 0 deletions
71
src/tests/JIT/Regression/JitBlue/Runtime_114358/Runtime_114358.cs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
// Licensed to the .NET Foundation under one or more agreements. | ||
// The .NET Foundation licenses this file to you under the MIT license. | ||
// Found by Antigen | ||
// Reduced from 206.63 KB to 1.9 KB. | ||
|
||
|
||
using System; | ||
using System.Collections.Generic; | ||
using System.Runtime.CompilerServices; | ||
using System.Runtime.Intrinsics; | ||
using System.Runtime.Intrinsics.Arm; | ||
using System.Runtime.Intrinsics.X86; | ||
using System.Numerics; | ||
using Xunit; | ||
|
||
public class TestClass_114358 | ||
{ | ||
public struct S1 | ||
{ | ||
public int int_1; | ||
} | ||
static byte s_byte_4 = 1; | ||
static Vector64<short> s_v64_short_20 = Vector64.Create(94, -2, 3, 3); | ||
static Vector128<byte> s_v128_byte_28 = Vector128.Create((byte)2); | ||
static Vector128<ushort> s_v128_ushort_31 = Vector128.Create((ushort)32766); | ||
Vector64<short> v64_short_70 = Vector64<short>.AllBitsSet; | ||
Vector128<byte> v128_byte_78 = Vector128.CreateScalar((byte)0); | ||
Vector128<short> v128_short_80 = Vector128.Create(-2, 0, 2, 94, 3, 0, 3, 0); | ||
Vector128<ushort> v128_ushort_81 = Vector128<ushort>.AllBitsSet; | ||
private static List<string> toPrint = new List<string>(); | ||
internal void Method0() | ||
{ | ||
unchecked | ||
{ | ||
S1 s1_172 = new S1(); | ||
s_v128_ushort_31 = Vector128.LessThan(s_v128_ushort_31 -= Vector128<ushort>.Zero | v128_ushort_81, AdvSimd.AddWideningUpper(v128_ushort_81 & v128_ushort_81, s_v128_byte_28 = v128_byte_78)); | ||
v128_short_80 = AdvSimd.ExtractVector128(AdvSimd.MultiplyByScalar(v128_short_80 - v128_short_80, AdvSimd.MultiplySubtractByScalar(v64_short_70, s_v64_short_20, v64_short_70)), v128_short_80 - v128_short_80, s_byte_4); | ||
s_v64_short_20 = AdvSimd.ShiftRightLogicalRoundedAdd(v64_short_70 -= v64_short_70 += v64_short_70, v64_short_70 + Vector64<short>.AllBitsSet + v64_short_70 + Vector64<short>.AllBitsSet & v64_short_70, s_byte_4 >>= s1_172.int_1 <<= 15 + 4); | ||
return; | ||
} | ||
} | ||
|
||
[Fact] | ||
public static void Repro() | ||
{ | ||
if (AdvSimd.IsSupported) | ||
{ | ||
new TestClass_114358().Method0(); | ||
} | ||
} | ||
} | ||
/* | ||
Environment: | ||
|
||
set DOTNET_AltJit=Method0 | ||
set DOTNET_AltJitName=clrjit_universal_arm64_x64.dll | ||
set DOTNET_EnableWriteXorExecute=0 | ||
set DOTNET_JitDisasm=Method0 | ||
set DOTNET_JitStressRegs=2 | ||
set DOTNET_TieredCompilation=0 | ||
|
||
Debug: 1639727076 | ||
|
||
Release: 0 | ||
JIT assert failed: | ||
Assertion failed '(targetReg == op1Reg) || (targetReg != op3Reg)' in 'TestClass:Method0():this' during 'Generate code' (IL size 298; hash 0x46e9aa75; FullOpts) | ||
|
||
File: /Users/runner/work/1/s/src/coreclr/jit/hwintrinsiccodegenarm64.cpp Line: 416 | ||
|
||
|
||
*/ |
8 changes: 8 additions & 0 deletions
8
src/tests/JIT/Regression/JitBlue/Runtime_114358/Runtime_114358.csproj
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
<Project Sdk="Microsoft.NET.Sdk"> | ||
<PropertyGroup> | ||
<Optimize>True</Optimize> | ||
</PropertyGroup> | ||
<ItemGroup> | ||
<Compile Include="$(MSBuildProjectName).cs" /> | ||
</ItemGroup> | ||
</Project> |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for finding this. To keep in the existing style, could you:
Expand
getDelayFreeOperand()
to:GenTree* LinearScan::getDelayFreeOperand(GenTreeHWIntrinsic* intrinsicTree, bool embedded, bool *forceDelay)
Where
forceDelay
is a return argument.Move your
else if
intogetDelayFreeOperand()
(I guess it'll go into the default case) and if it passes, set*forceDelay=true
and return nullptr.Then in line 1500 ("Only build as delay free use if register types match") add a
|| forceDelay == true
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That was my first choice, but
getDelayFreeOperand()
is outside this for loop and that will returnnullptr
even forop1
. If that happens, we won't be able totgtPrefUse = delayUse
i.e. we won't be able to prefer the register allocated forop1
fortarget
as well. Usually whentargetReg == op1Reg
, we avoid an extra move before the RMW instruction.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, I hadn't really taken the
for
loop into account properly.For all these
SIMDByIndexedElement
intrisnics, willop1
always be a delay operand?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If you see the code, it's not delayed as such, but rather is a way for us to tell that it is the preferred register for target. It is not marked as delay-free.