From 8f1fdb8c2d226f4ccfd24f46490c3add9ff6dee7 Mon Sep 17 00:00:00 2001 From: "David L. Jones" Date: Wed, 10 May 2017 23:56:21 +0000 Subject: [PATCH] Revert "[SDAG] Relax conditions under stores of loaded values can be merged" This reverts r302712. The change fails with ASAN enabled: ERROR: AddressSanitizer: use-after-poison on address ... at ... READ of size 2 at ... thread T0 #0 ... in llvm::SDNode::getNumValues() const /include/llvm/CodeGen/SelectionDAGNodes.h:855:42 #1 ... in llvm::SDNode::hasAnyUseOfValue(unsigned int) const /lib/CodeGen/SelectionDAG/SelectionDAG.cpp:7270:3 #2 ... in llvm::SDValue::use_empty() const include/llvm/CodeGen/SelectionDAGNodes.h:1042:17 #3 ... in (anonymous namespace)::DAGCombiner::MergeConsecutiveStores(llvm::StoreSDNode*) /lib/CodeGen/SelectionDAG/DAGCombiner.cpp:12944:7 Reviewers: niravd Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D33081 --- llvm/include/llvm/CodeGen/ISDOpcodes.h | 4 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 32 ++++++------------- .../X86/merge_store_duplicated_loads.ll | 28 ++++++++++------ 3 files changed, 28 insertions(+), 36 deletions(-) diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h index e00668cb7913..f2a9a9f73ca6 100644 --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -44,10 +44,6 @@ namespace ISD { /// EntryToken - This is the marker used to indicate the start of a region. EntryToken, - /// DummyNode - Temporary node for node replacement. These nodes - /// should not persist beyond their introduction. - DummyNode, - /// TokenFactor - This node takes multiple tokens as input and produces a /// single token result. This is used to represent the fact that the operand /// operators are independent of each other. diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index df776c58001c..28354c1f4574 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -12783,6 +12783,10 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { LoadSDNode *Ld = dyn_cast(St->getValue()); if (!Ld) break; + // Loads must only have one use. + if (!Ld->hasNUsesOfValue(1, 0)) + break; + // The memory operands must not be volatile. if (Ld->isVolatile() || Ld->isIndexed()) break; @@ -12791,6 +12795,10 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { if (Ld->getExtensionType() != ISD::NON_EXTLOAD) break; + // The stored memory type must be the same. + if (Ld->getMemoryVT() != MemVT) + break; + BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG); // If this is not the first ptr that we check. if (LdBasePtr.Base.getNode()) { @@ -12922,28 +12930,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { // Transfer chain users from old loads to the new load. for (unsigned i = 0; i < NumElem; ++i) { LoadSDNode *Ld = cast(LoadNodes[i].MemNode); - if (SDValue(Ld, 0).hasOneUse()) { - // Only the original store used value so just replace chain. - DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), - SDValue(NewLoad.getNode(), 1)); - } else { - // Multiple uses exist. Keep the old load in line with the new - // load, i.e. Replace chains using Ld's chain with a - // TokenFactor. Create a temporary node to serve as a placer so - // we do not replace the reference to original Load's chain in - // the TokenFactor. - SDValue TokenDummy = DAG.getNode(ISD::DummyNode, SDLoc(Ld), MVT::Other); - - // Replace all references to Load's output chain to TokenDummy - CombineTo(Ld, SDValue(Ld, 0), TokenDummy, false); - SDValue Token = - DAG.getNode(ISD::TokenFactor, SDLoc(Ld), MVT::Other, SDValue(Ld, 1), - SDValue(NewLoad.getNode(), 1)); - // Replace all uses of TokenDummy from itself to Ld's output chain. - CombineTo(TokenDummy.getNode(), Token); - assert(TokenDummy.use_empty() && "TokenDummy should be unused"); - AddToWorklist(Ld); - } + DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), + SDValue(NewLoad.getNode(), 1)); } // Replace the all stores with the new store. diff --git a/llvm/test/CodeGen/X86/merge_store_duplicated_loads.ll b/llvm/test/CodeGen/X86/merge_store_duplicated_loads.ll index 9303d1dfabd9..cfc39035e403 100644 --- a/llvm/test/CodeGen/X86/merge_store_duplicated_loads.ll +++ b/llvm/test/CodeGen/X86/merge_store_duplicated_loads.ll @@ -1,15 +1,18 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -o - | FileCheck %s -; PR32086 + target triple = "x86_64-unknown-linux-gnu" define void @merge_double(double* noalias nocapture %st, double* noalias nocapture readonly %ld) #0 { ; CHECK-LABEL: merge_double: ; CHECK: # BB#0: -; CHECK-NEXT: movups (%rsi), %xmm0 -; CHECK-NEXT: movups %xmm0, (%rdi) -; CHECK-NEXT: movups %xmm0, 16(%rdi) +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; CHECK-NEXT: movsd %xmm0, (%rdi) +; CHECK-NEXT: movsd %xmm1, 8(%rdi) +; CHECK-NEXT: movsd %xmm0, 16(%rdi) +; CHECK-NEXT: movsd %xmm1, 24(%rdi) ; CHECK-NEXT: retq %ld_idx1 = getelementptr inbounds double, double* %ld, i64 1 %ld0 = load double, double* %ld, align 8, !tbaa !2 @@ -29,9 +32,12 @@ define void @merge_double(double* noalias nocapture %st, double* noalias nocaptu define void @merge_loadstore_int(i64* noalias nocapture readonly %p, i64* noalias nocapture %q) local_unnamed_addr #0 { ; CHECK-LABEL: merge_loadstore_int: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: movups (%rdi), %xmm0 -; CHECK-NEXT: movups %xmm0, (%rsi) -; CHECK-NEXT: movups %xmm0, 16(%rsi) +; CHECK-NEXT: movq (%rdi), %rax +; CHECK-NEXT: movq 8(%rdi), %rcx +; CHECK-NEXT: movq %rax, (%rsi) +; CHECK-NEXT: movq %rcx, 8(%rsi) +; CHECK-NEXT: movq %rax, 16(%rsi) +; CHECK-NEXT: movq %rcx, 24(%rsi) ; CHECK-NEXT: retq entry: %0 = load i64, i64* %p, align 8, !tbaa !1 @@ -51,9 +57,11 @@ define i64 @merge_loadstore_int_with_extra_use(i64* noalias nocapture readonly % ; CHECK-LABEL: merge_loadstore_int_with_extra_use: ; CHECK: # BB#0: # %entry ; CHECK-NEXT: movq (%rdi), %rax -; CHECK-NEXT: movups (%rdi), %xmm0 -; CHECK-NEXT: movups %xmm0, (%rsi) -; CHECK-NEXT: movups %xmm0, 16(%rsi) +; CHECK-NEXT: movq 8(%rdi), %rcx +; CHECK-NEXT: movq %rax, (%rsi) +; CHECK-NEXT: movq %rcx, 8(%rsi) +; CHECK-NEXT: movq %rax, 16(%rsi) +; CHECK-NEXT: movq %rcx, 24(%rsi) ; CHECK-NEXT: retq entry: %0 = load i64, i64* %p, align 8, !tbaa !1