| commit b1777b04dc4b1a9fee0e7effa7e177892ab32ef0 |
| Author: Nico Weber <thakis@chromium.org> |
| Date: Sun Jul 25 17:31:02 2021 -0400 |
| |
| Revert "[VPlan] Add recipe for first-order rec phis, make splicing explicit." |
| |
| Makes clang crash: https://reviews.llvm.org/D105008#2903350 |
| This reverts commit d2a73fb44ea0b8c981e4b923f811f18793fc4770. |
| |
| Also revert a minor formatting follow-up: |
| This reverts commit 82834a673246f27a541ffcc57e0eb65b008102ef. |
| |
| diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp |
| index 9e645cd696d0..5d5bec53b919 100644 |
| --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp |
| +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp |
| @@ -4166,10 +4166,14 @@ void InnerLoopVectorizer::fixCrossIterationPHIs(VPTransformState &State) { |
| // the incoming edges. |
| VPBasicBlock *Header = State.Plan->getEntry()->getEntryBasicBlock(); |
| for (VPRecipeBase &R : Header->phis()) { |
| - if (auto *ReductionPhi = dyn_cast<VPReductionPHIRecipe>(&R)) |
| + auto *PhiR = dyn_cast<VPWidenPHIRecipe>(&R); |
| + if (!PhiR) |
| + continue; |
| + auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue()); |
| + if (auto *ReductionPhi = dyn_cast<VPReductionPHIRecipe>(PhiR)) { |
| fixReduction(ReductionPhi, State); |
| - else if (auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&R)) |
| - fixFirstOrderRecurrence(FOR, State); |
| + } else if (Legal->isFirstOrderRecurrence(OrigPhi)) |
| + fixFirstOrderRecurrence(PhiR, State); |
| } |
| } |
| |
| @@ -4198,7 +4202,7 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(VPWidenPHIRecipe *PhiR, |
| // |
| // In this example, s1 is a recurrence because it's value depends on the |
| // previous iteration. In the first phase of vectorization, we created a |
| - // vector phi v1 for s1. We now complete the vectorization and produce the |
| + // temporary value for s1. We now complete the vectorization and produce the |
| // shorthand vector IR shown below (for VF = 4, UF = 1). |
| // |
| // vector.ph: |
| @@ -4224,19 +4228,82 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(VPWidenPHIRecipe *PhiR, |
| // After execution completes the vector loop, we extract the next value of |
| // the recurrence (x) to use as the initial value in the scalar loop. |
| |
| + auto *ScalarInit = PhiR->getStartValue()->getLiveInIRValue(); |
| + |
| auto *IdxTy = Builder.getInt32Ty(); |
| - auto *VecPhi = cast<PHINode>(State.get(PhiR, 0)); |
| + auto *One = ConstantInt::get(IdxTy, 1); |
| + |
| + // Create a vector from the initial value. |
| + auto *VectorInit = ScalarInit; |
| + if (VF.isVector()) { |
| + Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator()); |
| + auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, VF); |
| + auto *LastIdx = Builder.CreateSub(RuntimeVF, One); |
| + VectorInit = Builder.CreateInsertElement( |
| + PoisonValue::get(VectorType::get(VectorInit->getType(), VF)), |
| + VectorInit, LastIdx, "vector.recur.init"); |
| + } |
| |
| - // Fix the latch value of the new recurrence in the vector loop. |
| VPValue *PreviousDef = PhiR->getBackedgeValue(); |
| - Value *Incoming = State.get(PreviousDef, UF - 1); |
| + // We constructed a temporary phi node in the first phase of vectorization. |
| + // This phi node will eventually be deleted. |
| + Builder.SetInsertPoint(cast<Instruction>(State.get(PhiR, 0))); |
| + |
| + // Create a phi node for the new recurrence. The current value will either be |
| + // the initial value inserted into a vector or loop-varying vector value. |
| + auto *VecPhi = Builder.CreatePHI(VectorInit->getType(), 2, "vector.recur"); |
| + VecPhi->addIncoming(VectorInit, LoopVectorPreHeader); |
| + |
| + // Get the vectorized previous value of the last part UF - 1. It appears last |
| + // among all unrolled iterations, due to the order of their construction. |
| + Value *PreviousLastPart = State.get(PreviousDef, UF - 1); |
| + |
| + // Find and set the insertion point after the previous value if it is an |
| + // instruction. |
| + BasicBlock::iterator InsertPt; |
| + // Note that the previous value may have been constant-folded so it is not |
| + // guaranteed to be an instruction in the vector loop. |
| + // FIXME: Loop invariant values do not form recurrences. We should deal with |
| + // them earlier. |
| + if (LI->getLoopFor(LoopVectorBody)->isLoopInvariant(PreviousLastPart)) |
| + InsertPt = LoopVectorBody->getFirstInsertionPt(); |
| + else { |
| + Instruction *PreviousInst = cast<Instruction>(PreviousLastPart); |
| + if (isa<PHINode>(PreviousLastPart)) |
| + // If the previous value is a phi node, we should insert after all the phi |
| + // nodes in the block containing the PHI to avoid breaking basic block |
| + // verification. Note that the basic block may be different to |
| + // LoopVectorBody, in case we predicate the loop. |
| + InsertPt = PreviousInst->getParent()->getFirstInsertionPt(); |
| + else |
| + InsertPt = ++PreviousInst->getIterator(); |
| + } |
| + Builder.SetInsertPoint(&*InsertPt); |
| + |
| + // The vector from which to take the initial value for the current iteration |
| + // (actual or unrolled). Initially, this is the vector phi node. |
| + Value *Incoming = VecPhi; |
| + |
| + // Shuffle the current and previous vector and update the vector parts. |
| + for (unsigned Part = 0; Part < UF; ++Part) { |
| + Value *PreviousPart = State.get(PreviousDef, Part); |
| + Value *PhiPart = State.get(PhiR, Part); |
| + auto *Shuffle = VF.isVector() |
| + ? Builder.CreateVectorSplice(Incoming, PreviousPart, -1) |
| + : Incoming; |
| + PhiPart->replaceAllUsesWith(Shuffle); |
| + cast<Instruction>(PhiPart)->eraseFromParent(); |
| + State.reset(PhiR, Shuffle, Part); |
| + Incoming = PreviousPart; |
| + } |
| + |
| + // Fix the latch value of the new recurrence in the vector loop. |
| VecPhi->addIncoming(Incoming, LI->getLoopFor(LoopVectorBody)->getLoopLatch()); |
| |
| // Extract the last vector element in the middle block. This will be the |
| // initial value for the recurrence when jumping to the scalar loop. |
| auto *ExtractForScalar = Incoming; |
| if (VF.isVector()) { |
| - auto *One = ConstantInt::get(IdxTy, 1); |
| Builder.SetInsertPoint(LoopMiddleBlock->getTerminator()); |
| auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, VF); |
| auto *LastIdx = Builder.CreateSub(RuntimeVF, One); |
| @@ -4265,7 +4332,6 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(VPWidenPHIRecipe *PhiR, |
| Builder.SetInsertPoint(&*LoopScalarPreHeader->begin()); |
| PHINode *Phi = cast<PHINode>(PhiR->getUnderlyingValue()); |
| auto *Start = Builder.CreatePHI(Phi->getType(), 2, "scalar.recur.init"); |
| - auto *ScalarInit = PhiR->getStartValue()->getLiveInIRValue(); |
| for (auto *BB : predecessors(LoopScalarPreHeader)) { |
| auto *Incoming = BB == LoopMiddleBlock ? ExtractForScalar : ScalarInit; |
| Start->addIncoming(Incoming, BB); |
| @@ -4721,6 +4787,18 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, |
| // Phi nodes have cycles, so we need to vectorize them in two stages. This is |
| // stage #1: We create a new vector PHI node with no incoming edges. We'll use |
| // this value when we vectorize all of the instructions that use the PHI. |
| + if (Legal->isFirstOrderRecurrence(P)) { |
| + Type *VecTy = State.VF.isScalar() |
| + ? PN->getType() |
| + : VectorType::get(PN->getType(), State.VF); |
| + |
| + for (unsigned Part = 0; Part < State.UF; ++Part) { |
| + Value *EntryPart = PHINode::Create( |
| + VecTy, 2, "vec.phi", &*LoopVectorBody->getFirstInsertionPt()); |
| + State.set(PhiR, EntryPart, Part); |
| + } |
| + return; |
| + } |
| |
| assert(!Legal->isReductionVariable(P) && |
| "reductions should be handled elsewhere"); |
| @@ -8993,7 +9071,7 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr, |
| CM.isInLoopReduction(Phi), |
| CM.useOrderedReductions(RdxDesc)); |
| } else { |
| - PhiRecipe = new VPFirstOrderRecurrencePHIRecipe(Phi, *StartV); |
| + PhiRecipe = new VPWidenPHIRecipe(Phi, *StartV); |
| } |
| |
| // Record the incoming value from the backedge, so we can add the incoming |
| @@ -9234,22 +9312,23 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( |
| // --------------------------------------------------------------------------- |
| |
| // Apply Sink-After legal constraints. |
| - auto GetReplicateRegion = [](VPRecipeBase *R) -> VPRegionBlock * { |
| - auto *Region = dyn_cast_or_null<VPRegionBlock>(R->getParent()->getParent()); |
| - if (Region && Region->isReplicator()) { |
| - assert(Region->getNumSuccessors() == 1 && |
| - Region->getNumPredecessors() == 1 && "Expected SESE region!"); |
| - assert(R->getParent()->size() == 1 && |
| - "A recipe in an original replicator region must be the only " |
| - "recipe in its block"); |
| - return Region; |
| - } |
| - return nullptr; |
| - }; |
| for (auto &Entry : SinkAfter) { |
| VPRecipeBase *Sink = RecipeBuilder.getRecipe(Entry.first); |
| VPRecipeBase *Target = RecipeBuilder.getRecipe(Entry.second); |
| |
| + auto GetReplicateRegion = [](VPRecipeBase *R) -> VPRegionBlock * { |
| + auto *Region = |
| + dyn_cast_or_null<VPRegionBlock>(R->getParent()->getParent()); |
| + if (Region && Region->isReplicator()) { |
| + assert(Region->getNumSuccessors() == 1 && |
| + Region->getNumPredecessors() == 1 && "Expected SESE region!"); |
| + assert(R->getParent()->size() == 1 && |
| + "A recipe in an original replicator region must be the only " |
| + "recipe in its block"); |
| + return Region; |
| + } |
| + return nullptr; |
| + }; |
| auto *TargetRegion = GetReplicateRegion(Target); |
| auto *SinkRegion = GetReplicateRegion(Sink); |
| if (!SinkRegion) { |
| @@ -9281,8 +9360,8 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( |
| VPBlockUtils::connectBlocks(SinkRegion, TargetSucc); |
| } else { |
| // The sink source is in a replicate region, we need to move the whole |
| - // replicate region, which should only contain a single recipe in the |
| - // main block. |
| + // replicate region, which should only contain a single recipe in the main |
| + // block. |
| auto *SplitBlock = |
| Target->getParent()->splitAt(std::next(Target->getIterator())); |
| |
| @@ -9296,29 +9375,6 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( |
| } |
| } |
| |
| - // Introduce a recipe to combine the incoming and previous values of a |
| - // first-order recurrence. |
| - for (VPRecipeBase &R : Plan->getEntry()->getEntryBasicBlock()->phis()) { |
| - auto *RecurPhi = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&R); |
| - if (!RecurPhi) |
| - continue; |
| - |
| - auto *RecurSplice = cast<VPInstruction>( |
| - Builder.createNaryOp(VPInstruction::FirstOrderRecurrenceSplice, |
| - {RecurPhi, RecurPhi->getBackedgeValue()})); |
| - |
| - VPRecipeBase *PrevRecipe = RecurPhi->getBackedgeRecipe(); |
| - if (auto *Region = GetReplicateRegion(PrevRecipe)) { |
| - VPBasicBlock *Succ = cast<VPBasicBlock>(Region->getSingleSuccessor()); |
| - RecurSplice->moveBefore(*Succ, Succ->getFirstNonPhi()); |
| - } else |
| - RecurSplice->moveAfter(PrevRecipe); |
| - RecurPhi->replaceAllUsesWith(RecurSplice); |
| - // Set the first operand of RecurSplice to RecurPhi again, after replacing |
| - // all users. |
| - RecurSplice->setOperand(0, RecurPhi); |
| - } |
| - |
| // Interleave memory: for each Interleave Group we marked earlier as relevant |
| // for this VPlan, replace the Recipes widening its memory instructions with a |
| // single VPInterleaveRecipe at its insertion point. |
| diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp |
| index 5f39fe1c17a3..344c9974f711 100644 |
| --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp |
| +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp |
| @@ -687,30 +687,6 @@ void VPInstruction::generateInstruction(VPTransformState &State, |
| State.set(this, Call, Part); |
| break; |
| } |
| - case VPInstruction::FirstOrderRecurrenceSplice: { |
| - // Generate code to combine the previous and current values in vector v3. |
| - // |
| - // vector.ph: |
| - // v_init = vector(..., ..., ..., a[-1]) |
| - // br vector.body |
| - // |
| - // vector.body |
| - // i = phi [0, vector.ph], [i+4, vector.body] |
| - // v1 = phi [v_init, vector.ph], [v2, vector.body] |
| - // v2 = a[i, i+1, i+2, i+3]; |
| - // v3 = vector(v1(3), v2(0, 1, 2)) |
| - |
| - // For the first part, use the recurrence phi (v1), otherwise v2. |
| - auto *V1 = State.get(getOperand(0), 0); |
| - Value *PartMinus1 = Part == 0 ? V1 : State.get(getOperand(1), Part - 1); |
| - if (!PartMinus1->getType()->isVectorTy()) { |
| - State.set(this, PartMinus1, Part); |
| - } else { |
| - Value *V2 = State.get(getOperand(1), Part); |
| - State.set(this, Builder.CreateVectorSplice(PartMinus1, V2, -1), Part); |
| - } |
| - break; |
| - } |
| default: |
| llvm_unreachable("Unsupported opcode for instruction"); |
| } |
| @@ -753,9 +729,7 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent, |
| case VPInstruction::ActiveLaneMask: |
| O << "active lane mask"; |
| break; |
| - case VPInstruction::FirstOrderRecurrenceSplice: |
| - O << "first-order splice"; |
| - break; |
| + |
| default: |
| O << Instruction::getOpcodeName(getOpcode()); |
| } |
| @@ -1248,43 +1222,6 @@ void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent, |
| } |
| #endif |
| |
| -void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) { |
| - auto &Builder = State.Builder; |
| - // Create a vector from the initial value. |
| - auto *VectorInit = getStartValue()->getLiveInIRValue(); |
| - |
| - Type *VecTy = State.VF.isScalar() |
| - ? VectorInit->getType() |
| - : VectorType::get(VectorInit->getType(), State.VF); |
| - |
| - if (State.VF.isVector()) { |
| - auto *IdxTy = Builder.getInt32Ty(); |
| - auto *One = ConstantInt::get(IdxTy, 1); |
| - IRBuilder<>::InsertPointGuard Guard(Builder); |
| - Builder.SetInsertPoint(State.CFG.VectorPreHeader->getTerminator()); |
| - auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF); |
| - auto *LastIdx = Builder.CreateSub(RuntimeVF, One); |
| - VectorInit = Builder.CreateInsertElement( |
| - PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init"); |
| - } |
| - |
| - // Create a phi node for the new recurrence. |
| - PHINode *EntryPart = PHINode::Create( |
| - VecTy, 2, "vector.recur", &*State.CFG.PrevBB->getFirstInsertionPt()); |
| - EntryPart->addIncoming(VectorInit, State.CFG.VectorPreHeader); |
| - State.set(this, EntryPart, 0); |
| -} |
| - |
| -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
| -void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent, |
| - VPSlotTracker &SlotTracker) const { |
| - O << Indent << "FIRST-ORDER-RECURRENCE-PHI "; |
| - printAsOperand(O, SlotTracker); |
| - O << " = phi "; |
| - printOperands(O, SlotTracker); |
| -} |
| -#endif |
| - |
| void VPReductionPHIRecipe::execute(VPTransformState &State) { |
| PHINode *PN = cast<PHINode>(getUnderlyingValue()); |
| auto &Builder = State.Builder; |
| diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h |
| index bdf09d15c27f..d92ea16ad058 100644 |
| --- a/llvm/lib/Transforms/Vectorize/VPlan.h |
| +++ b/llvm/lib/Transforms/Vectorize/VPlan.h |
| @@ -776,10 +776,7 @@ class VPInstruction : public VPRecipeBase, public VPValue { |
| public: |
| /// VPlan opcodes, extending LLVM IR with idiomatics instructions. |
| enum { |
| - FirstOrderRecurrenceSplice = |
| - Instruction::OtherOpsEnd + 1, // Combines the incoming and previous |
| - // values of a first-order recurrence. |
| - Not, |
| + Not = Instruction::OtherOpsEnd + 1, |
| ICmpULE, |
| SLPLoad, |
| SLPStore, |
| @@ -1063,12 +1060,8 @@ class VPWidenPHIRecipe : public VPRecipeBase, public VPValue { |
| SmallVector<VPBasicBlock *, 2> IncomingBlocks; |
| |
| protected: |
| - VPWidenPHIRecipe(unsigned char VPVID, unsigned char VPDefID, PHINode *Phi, |
| - VPValue *Start = nullptr) |
| - : VPRecipeBase(VPDefID, {}), VPValue(VPVID, Phi, this) { |
| - if (Start) |
| - addOperand(Start); |
| - } |
| + VPWidenPHIRecipe(unsigned char VPVID, unsigned char VPDefID, PHINode *Phi) |
| + : VPRecipeBase(VPDefID, {}), VPValue(VPVID, Phi, this) {} |
| |
| public: |
| /// Create a VPWidenPHIRecipe for \p Phi |
| @@ -1085,12 +1078,10 @@ public: |
| /// Method to support type inquiry through isa, cast, and dyn_cast. |
| static inline bool classof(const VPRecipeBase *B) { |
| return B->getVPDefID() == VPRecipeBase::VPWidenPHISC || |
| - B->getVPDefID() == VPRecipeBase::VPFirstOrderRecurrencePHISC || |
| B->getVPDefID() == VPRecipeBase::VPReductionPHISC; |
| } |
| static inline bool classof(const VPValue *V) { |
| return V->getVPValueID() == VPValue::VPVWidenPHISC || |
| - V->getVPValueID() == VPValue::VPVFirstOrderRecurrencePHISC || |
| V->getVPValueID() == VPValue::VPVReductionPHISC; |
| } |
| |
| @@ -1115,12 +1106,6 @@ public: |
| return getOperand(1); |
| } |
| |
| - /// Returns the backedge value as a recipe. The backedge value is guaranteed |
| - /// to be a recipe. |
| - VPRecipeBase *getBackedgeRecipe() { |
| - return cast<VPRecipeBase>(getBackedgeValue()->getDef()); |
| - } |
| - |
| /// Adds a pair (\p IncomingV, \p IncomingBlock) to the phi. |
| void addIncoming(VPValue *IncomingV, VPBasicBlock *IncomingBlock) { |
| addOperand(IncomingV); |
| @@ -1134,34 +1119,6 @@ public: |
| VPBasicBlock *getIncomingBlock(unsigned I) { return IncomingBlocks[I]; } |
| }; |
| |
| -/// A recipe for handling first-order recurrence phis. The start value is the |
| -/// first operand of the recipe and the incoming value from the backedge is the |
| -/// second operand. |
| -struct VPFirstOrderRecurrencePHIRecipe : public VPWidenPHIRecipe { |
| - VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start) |
| - : VPWidenPHIRecipe(VPVFirstOrderRecurrencePHISC, |
| - VPFirstOrderRecurrencePHISC, Phi, &Start) {} |
| - |
| - /// Method to support type inquiry through isa, cast, and dyn_cast. |
| - static inline bool classof(const VPRecipeBase *R) { |
| - return R->getVPDefID() == VPRecipeBase::VPFirstOrderRecurrencePHISC; |
| - } |
| - static inline bool classof(const VPWidenPHIRecipe *D) { |
| - return D->getVPDefID() == VPRecipeBase::VPFirstOrderRecurrencePHISC; |
| - } |
| - static inline bool classof(const VPValue *V) { |
| - return V->getVPValueID() == VPValue::VPVFirstOrderRecurrencePHISC; |
| - } |
| - |
| - void execute(VPTransformState &State) override; |
| - |
| -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
| - /// Print the recipe. |
| - void print(raw_ostream &O, const Twine &Indent, |
| - VPSlotTracker &SlotTracker) const override; |
| -#endif |
| -}; |
| - |
| /// A recipe for handling reduction phis. The start value is the first operand |
| /// of the recipe and the incoming value from the backedge is the second |
| /// operand. |
| @@ -1181,9 +1138,10 @@ public: |
| VPReductionPHIRecipe(PHINode *Phi, RecurrenceDescriptor &RdxDesc, |
| VPValue &Start, bool IsInLoop = false, |
| bool IsOrdered = false) |
| - : VPWidenPHIRecipe(VPVReductionPHISC, VPReductionPHISC, Phi, &Start), |
| + : VPWidenPHIRecipe(VPVReductionPHISC, VPReductionPHISC, Phi), |
| RdxDesc(RdxDesc), IsInLoop(IsInLoop), IsOrdered(IsOrdered) { |
| assert((!IsOrdered || IsInLoop) && "IsOrdered requires IsInLoop"); |
| + addOperand(&Start); |
| } |
| |
| ~VPReductionPHIRecipe() override = default; |
| diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp |
| index 52b5ae083d0e..500a679f618f 100644 |
| --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp |
| +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp |
| @@ -221,6 +221,27 @@ bool VPlanTransforms::mergeReplicateRegions(VPlan &Plan) { |
| // region. Such dependencies should be rejected during earlier dependence |
| // checks, which guarantee accesses can be re-ordered for vectorization. |
| // |
| + // If a recipe is used by a first-order recurrence phi, we cannot move it at |
| + // the moment: a recipe R feeding a first order recurrence phi must allow |
| + // for a *vector* shuffle to be inserted immediately after it, and therefore |
| + // if R is *scalarized and predicated* it must appear last in its basic |
| + // block. In addition, other recipes may need to "sink after" R, so best if |
| + // R not be moved at all. |
| + auto IsImmovableRecipe = [](VPRecipeBase &R) { |
| + assert(R.getNumDefinedValues() <= 1 && |
| + "no multi-defs are expected in predicated blocks"); |
| + for (VPUser *U : R.getVPSingleValue()->users()) { |
| + auto *UI = dyn_cast<VPRecipeBase>(U); |
| + if (!UI) |
| + continue; |
| + if (isa<VPWidenPHIRecipe>(UI) && !isa<VPReductionPHIRecipe>(UI)) |
| + return true; |
| + } |
| + return false; |
| + }; |
| + if (any_of(*Then1, IsImmovableRecipe)) |
| + continue; |
| + |
| // Move recipes to the successor region. |
| for (VPRecipeBase &ToMove : make_early_inc_range(reverse(*Then1))) |
| ToMove.moveBefore(*Then2, Then2->getFirstNonPhi()); |
| diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h |
| index fd92201614df..866b0602e80c 100644 |
| --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h |
| +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h |
| @@ -101,7 +101,6 @@ public: |
| |
| // Phi-like VPValues. Need to be kept together. |
| VPVBlendSC, |
| - VPVFirstOrderRecurrencePHISC, |
| VPVWidenPHISC, |
| VPVWidenCanonicalIVSC, |
| VPVWidenIntOrFpInductionSC, |
| @@ -332,7 +331,6 @@ public: |
| |
| // Phi-like recipes. Need to be kept together. |
| VPBlendSC, |
| - VPFirstOrderRecurrencePHISC, |
| VPWidenPHISC, |
| VPWidenCanonicalIVSC, |
| VPWidenIntOrFpInductionSC, |
| diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll |
| index cad57883f417..8b8ca971cf76 100644 |
| --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll |
| +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll |
| @@ -16,9 +16,9 @@ define void @can_sink_after_store(i32 %x, i32* %ptr, i64 %tc) local_unnamed_addr |
| ; CHECK-NEXT: [[DOTPRE:%.*]] = load i32, i32* [[IDX_PHI_TRANS]], align 4 |
| ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] |
| ; CHECK: vector.ph: |
| -; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[DOTPRE]], i32 3 |
| ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0 |
| ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer |
| +; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[DOTPRE]], i32 3 |
| ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] |
| ; CHECK: vector.body: |
| ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] |
| @@ -100,9 +100,9 @@ define void @sink_sdiv(i32 %x, i32* %ptr, i64 %tc) local_unnamed_addr #0 { |
| ; CHECK-NEXT: [[DOTPRE:%.*]] = load i32, i32* [[IDX_PHI_TRANS]], align 4 |
| ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] |
| ; CHECK: vector.ph: |
| -; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[DOTPRE]], i32 3 |
| ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0 |
| ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer |
| +; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[DOTPRE]], i32 3 |
| ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] |
| ; CHECK: vector.body: |
| ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] |
| @@ -183,9 +183,9 @@ define void @can_sink_with_additional_user(i32 %x, i32* %ptr, i64 %tc) { |
| ; CHECK-NEXT: [[DOTPRE:%.*]] = load i32, i32* [[IDX_PHI_TRANS]], align 4 |
| ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] |
| ; CHECK: vector.ph: |
| -; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[DOTPRE]], i32 3 |
| ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0 |
| ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer |
| +; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[DOTPRE]], i32 3 |
| ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] |
| ; CHECK: vector.body: |
| ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] |
| @@ -502,7 +502,7 @@ define i16 @multiple_exit(i16* %p, i32 %n) { |
| ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 |
| ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4> |
| ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] |
| -; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] |
| +; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] |
| ; CHECK: middle.block: |
| ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3 |
| ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2 |
| @@ -523,7 +523,7 @@ define i16 @multiple_exit(i16* %p, i32 %n) { |
| ; CHECK-NEXT: store i16 [[SCALAR_RECUR]], i16* [[B]], align 4 |
| ; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 |
| ; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 |
| -; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP9:![0-9]+]] |
| +; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP7:![0-9]+]] |
| ; CHECK: if.end: |
| ; CHECK-NEXT: [[REC_LCSSA:%.*]] = phi i16 [ [[SCALAR_RECUR]], [[FOR_BODY]] ], [ [[SCALAR_RECUR]], [[FOR_COND]] ] |
| ; CHECK-NEXT: ret i16 [[REC_LCSSA]] |
| @@ -586,7 +586,7 @@ define i16 @multiple_exit2(i16* %p, i32 %n) { |
| ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 |
| ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4> |
| ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] |
| -; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] |
| +; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] |
| ; CHECK: middle.block: |
| ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3 |
| ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2 |
| @@ -607,7 +607,7 @@ define i16 @multiple_exit2(i16* %p, i32 %n) { |
| ; CHECK-NEXT: store i16 [[SCALAR_RECUR]], i16* [[B]], align 4 |
| ; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 |
| ; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 |
| -; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP11:![0-9]+]] |
| +; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP9:![0-9]+]] |
| ; CHECK: if.end: |
| ; CHECK-NEXT: [[REC_LCSSA:%.*]] = phi i16 [ [[SCALAR_RECUR]], [[FOR_COND]] ], [ 10, [[FOR_BODY]] ] |
| ; CHECK-NEXT: ret i16 [[REC_LCSSA]] |
| diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll |
| index bbc357617456..8803a609da5d 100644 |
| --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll |
| +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll |
| @@ -10,7 +10,7 @@ define void @sink_replicate_region_1(i32 %x, i8* %ptr) optsize { |
| ; CHECK-LABEL: sink_replicate_region_1 |
| ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { |
| ; CHECK-NEXT: loop: |
| -; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%0> = phi ir<0>, ir<%conv> |
| +; CHECK-NEXT: WIDEN-PHI ir<%0> = phi ir<0>, ir<%conv> |
| ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next |
| ; CHECK-NEXT: EMIT vp<%3> = icmp ule ir<%iv> vp<%0> |
| ; CHECK-NEXT: Successor(s): loop.0 |
| @@ -37,7 +37,6 @@ define void @sink_replicate_region_1(i32 %x, i8* %ptr) optsize { |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: loop.1: |
| ; CHECK-NEXT: WIDEN ir<%conv> = sext vp<%6> |
| -; CHECK-NEXT: EMIT vp<%8> = first-order splice ir<%0> ir<%conv> |
| ; CHECK-NEXT: Successor(s): pred.srem |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: <xVFxUF> pred.srem: { |
| @@ -47,17 +46,17 @@ define void @sink_replicate_region_1(i32 %x, i8* %ptr) optsize { |
| ; CHECK-NEXT: CondBit: vp<%3> (loop) |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: pred.srem.if: |
| -; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<%8>, ir<%x> (S->V) |
| +; CHECK-NEXT: REPLICATE ir<%rem> = srem ir<%0>, ir<%x> (S->V) |
| ; CHECK-NEXT: Successor(s): pred.srem.continue |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: pred.srem.continue: |
| -; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%10> = ir<%rem> |
| +; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%9> = ir<%rem> |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: Successor(s): loop.1.split |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: loop.1.split: |
| -; CHECK-NEXT: WIDEN ir<%add> = add ir<%conv>, vp<%10> |
| +; CHECK-NEXT: WIDEN ir<%add> = add ir<%conv>, vp<%9> |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; |
| @@ -84,14 +83,13 @@ define void @sink_replicate_region_2(i32 %x, i8 %y, i32* %ptr) optsize { |
| ; CHECK-LABEL: sink_replicate_region_2 |
| ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { |
| ; CHECK-NEXT: loop: |
| -; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%recur> = phi ir<0>, ir<%recur.next> |
| +; CHECK-NEXT: WIDEN-PHI ir<%recur> = phi ir<0>, ir<%recur.next> |
| ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next |
| ; CHECK-NEXT: EMIT vp<%3> = icmp ule ir<%iv> vp<%0> |
| ; CHECK-NEXT: Successor(s): loop.0 |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: loop.0: |
| ; CHECK-NEXT: WIDEN ir<%recur.next> = sext ir<%y> |
| -; CHECK-NEXT: EMIT vp<%5> = first-order splice ir<%recur> ir<%recur.next> |
| ; CHECK-NEXT: Successor(s): loop.0.split |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: loop.0.split: |
| @@ -104,14 +102,14 @@ define void @sink_replicate_region_2(i32 %x, i8 %y, i32* %ptr) optsize { |
| ; CHECK-NEXT: CondBit: vp<%3> (loop) |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: pred.store.if: |
| -; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<%5>, ir<%x> |
| +; CHECK-NEXT: REPLICATE ir<%rem> = srem ir<%recur>, ir<%x> |
| ; CHECK-NEXT: REPLICATE ir<%add> = add ir<%rem>, ir<%recur.next> |
| ; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%ptr>, ir<%iv> |
| ; CHECK-NEXT: REPLICATE store ir<%add>, ir<%gep> |
| ; CHECK-NEXT: Successor(s): pred.store.continue |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: pred.store.continue: |
| -; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%10> = ir<%rem> |
| +; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%9> = ir<%rem> |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: Successor(s): loop.1 |
| @@ -143,7 +141,7 @@ define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, i32* %ptr) optsize |
| ; CHECK-LABEL: sink_replicate_region_3_reduction |
| ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { |
| ; CHECK-NEXT: loop: |
| -; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%recur> = phi ir<0>, ir<%recur.next> |
| +; CHECK-NEXT: WIDEN-PHI ir<%recur> = phi ir<0>, ir<%recur.next> |
| ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next |
| ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%and.red> = phi ir<1234>, ir<%and.red.next> |
| ; CHECK-NEXT: EMIT vp<%4> = icmp ule ir<%iv> vp<%0> |
| @@ -151,7 +149,6 @@ define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, i32* %ptr) optsize |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: loop.0: |
| ; CHECK-NEXT: WIDEN ir<%recur.next> = sext ir<%y> |
| -; CHECK-NEXT: EMIT vp<%6> = first-order splice ir<%recur> ir<%recur.next> |
| ; CHECK-NEXT: Successor(s): pred.srem |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: <xVFxUF> pred.srem: { |
| @@ -161,19 +158,19 @@ define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, i32* %ptr) optsize |
| ; CHECK-NEXT: CondBit: vp<%4> (loop) |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: pred.srem.if: |
| -; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<%6>, ir<%x> (S->V) |
| +; CHECK-NEXT: REPLICATE ir<%rem> = srem ir<%recur>, ir<%x> (S->V) |
| ; CHECK-NEXT: Successor(s): pred.srem.continue |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: pred.srem.continue: |
| -; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%8> = ir<%rem> |
| +; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%7> = ir<%rem> |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: Successor(s): loop.0.split |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: loop.0.split: |
| -; CHECK-NEXT: WIDEN ir<%add> = add vp<%8>, ir<%recur.next> |
| +; CHECK-NEXT: WIDEN ir<%add> = add vp<%7>, ir<%recur.next> |
| ; CHECK-NEXT: WIDEN ir<%and.red.next> = and ir<%and.red>, ir<%add> |
| -; CHECK-NEXT: EMIT vp<%11> = select vp<%4> ir<%and.red.next> ir<%and.red> |
| +; CHECK-NEXT: EMIT vp<%10> = select vp<%4> ir<%and.red.next> ir<%and.red> |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; |
| @@ -203,7 +200,7 @@ define void @sink_replicate_region_4_requires_split_at_end_of_block(i32 %x, i8* |
| ; CHECK-LABEL: sink_replicate_region_4_requires_split_at_end_of_block |
| ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { |
| ; CHECK-NEXT: loop: |
| -; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%0> = phi ir<0>, ir<%conv> |
| +; CHECK-NEXT: WIDEN-PHI ir<%0> = phi ir<0>, ir<%conv> |
| ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next |
| ; CHECK-NEXT: EMIT vp<%3> = icmp ule ir<%iv> vp<%0> |
| ; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%ptr>, ir<%iv> |
| @@ -230,7 +227,6 @@ define void @sink_replicate_region_4_requires_split_at_end_of_block(i32 %x, i8* |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: loop.1: |
| ; CHECK-NEXT: WIDEN ir<%conv> = sext vp<%6> |
| -; CHECK-NEXT: EMIT vp<%8> = first-order splice ir<%0> ir<%conv> |
| ; CHECK-NEXT: Successor(s): loop.1.split |
| |
| ; CHECK: loop.1.split: |
| @@ -243,19 +239,19 @@ define void @sink_replicate_region_4_requires_split_at_end_of_block(i32 %x, i8* |
| ; CHECK-NEXT: CondBit: vp<%3> (loop) |
| |
| ; CHECK: pred.load.if: |
| -; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<%8>, ir<%x> (S->V) |
| +; CHECK-NEXT: REPLICATE ir<%rem> = srem ir<%0>, ir<%x> (S->V) |
| ; CHECK-NEXT: REPLICATE ir<%lv.2> = load ir<%gep> (S->V) |
| ; CHECK-NEXT: Successor(s): pred.load.continue |
| |
| ; CHECK: pred.load.continue: |
| -; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%11> = ir<%rem> |
| -; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%12> = ir<%lv.2> |
| +; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%10> = ir<%rem> |
| +; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%11> = ir<%lv.2> |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| |
| ; CHECK: loop.2: |
| -; CHECK-NEXT: WIDEN ir<%add.1> = add ir<%conv>, vp<%11> |
| -; CHECK-NEXT: WIDEN ir<%conv.lv.2> = sext vp<%12> |
| +; CHECK-NEXT: WIDEN ir<%add.1> = add ir<%conv>, vp<%10> |
| +; CHECK-NEXT: WIDEN ir<%conv.lv.2> = sext vp<%11> |
| ; CHECK-NEXT: WIDEN ir<%add> = add ir<%add.1>, ir<%conv.lv.2> |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| @@ -287,7 +283,7 @@ define void @sink_replicate_region_after_replicate_region(i32* %ptr, i32 %x, i8 |
| ; CHECK-LABEL: sink_replicate_region_after_replicate_region |
| ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { |
| ; CHECK-NEXT: loop: |
| -; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%recur> = phi ir<0>, ir<%recur.next> |
| +; CHECK-NEXT: WIDEN-PHI ir<%recur> = phi ir<0>, ir<%recur.next> |
| ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next |
| ; CHECK-NEXT: EMIT vp<%3> = icmp ule ir<%iv> vp<%0> |
| ; CHECK-NEXT: Successor(s): loop.0 |
| @@ -297,7 +293,6 @@ define void @sink_replicate_region_after_replicate_region(i32* %ptr, i32 %x, i8 |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: loop.1: |
| ; CHECK-NEXT: WIDEN ir<%recur.next> = sext ir<%y> |
| -; CHECK-NEXT: EMIT vp<%5> = first-order splice ir<%recur> ir<%recur.next> |
| ; CHECK-NEXT: Successor(s): pred.srem |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: <xVFxUF> pred.srem: { |
| @@ -307,11 +302,11 @@ define void @sink_replicate_region_after_replicate_region(i32* %ptr, i32 %x, i8 |
| ; CHECK-NEXT: CondBit: vp<%3> (loop) |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: pred.srem.if: |
| -; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<%5>, ir<%x> |
| +; CHECK-NEXT: REPLICATE ir<%rem> = srem ir<%recur>, ir<%x> |
| ; CHECK-NEXT: Successor(s): pred.srem.continue |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: pred.srem.continue: |
| -; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%7> = ir<%rem> |
| +; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%6> = ir<%rem> |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: Successor(s): loop.1.split |
| @@ -326,13 +321,13 @@ define void @sink_replicate_region_after_replicate_region(i32* %ptr, i32 %x, i8 |
| ; CHECK-NEXT: CondBit: vp<%3> (loop) |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: pred.store.if: |
| -; CHECK-NEXT: REPLICATE ir<%rem.div> = sdiv ir<20>, vp<%7> |
| +; CHECK-NEXT: REPLICATE ir<%rem.div> = sdiv ir<20>, vp<%6> |
| ; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%ptr>, ir<%iv> |
| ; CHECK-NEXT: REPLICATE store ir<%rem.div>, ir<%gep> |
| ; CHECK-NEXT: Successor(s): pred.store.continue |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: pred.store.continue: |
| -; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%11> = ir<%rem.div> |
| +; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%10> = ir<%rem.div> |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: Successor(s): loop.2 |
| diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll |
| index c65f62cef65e..dac449440069 100644 |
| --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll |
| +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll |
| @@ -337,8 +337,7 @@ for.end: |
| ; UNROLL-NO-IC-LABEL: @constant_folded_previous_value( |
| ; UNROLL-NO-IC: vector.body: |
| ; UNROLL-NO-IC: [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ <i64 poison, i64 poison, i64 poison, i64 0>, %vector.ph ], [ <i64 1, i64 1, i64 1, i64 1>, %vector.body ] |
| -; UNROLL-NO-IC: [[TMP0:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> <i64 1, i64 1, i64 1, i64 1>, <4 x i32> <i32 3, i32 4, i32 5, i32 6> |
| -; CHECK-NO-IC-NEXT: add nuw i64 |
| +; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> <i64 1, i64 1, i64 1, i64 1>, <4 x i32> <i32 3, i32 4, i32 5, i32 6> |
| ; UNROLL-NO-IC: br i1 {{.*}}, label %middle.block, label %vector.body |
| ; |
| define void @constant_folded_previous_value() { |
| @@ -642,10 +641,10 @@ define void @sink_dead_inst() { |
| ; SINK-AFTER-NEXT: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] |
| ; SINK-AFTER-NEXT: %vec.ind = phi <4 x i16> [ <i16 -27, i16 -26, i16 -25, i16 -24>, %vector.ph ], [ %vec.ind.next, %vector.body ] |
| ; SINK-AFTER-NEXT: %vector.recur = phi <4 x i16> [ <i16 poison, i16 poison, i16 poison, i16 0>, %vector.ph ], [ %3, %vector.body ] |
| -; SINK-AFTER-NEXT: %vector.recur1 = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 -27>, %vector.ph ], [ %1, %vector.body ] |
| +; SINK-AFTER-NEXT: %vector.recur2 = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 -27>, %vector.ph ], [ %1, %vector.body ] |
| ; SINK-AFTER-NEXT: %0 = add <4 x i16> %vec.ind, <i16 1, i16 1, i16 1, i16 1> |
| ; SINK-AFTER-NEXT: %1 = zext <4 x i16> %0 to <4 x i32> |
| -; SINK-AFTER-NEXT: %2 = shufflevector <4 x i32> %vector.recur1, <4 x i32> %1, <4 x i32> <i32 3, i32 4, i32 5, i32 6> |
| +; SINK-AFTER-NEXT: %2 = shufflevector <4 x i32> %vector.recur2, <4 x i32> %1, <4 x i32> <i32 3, i32 4, i32 5, i32 6> |
| ; SINK-AFTER-NEXT: %3 = add <4 x i16> %0, <i16 5, i16 5, i16 5, i16 5> |
| ; SINK-AFTER-NEXT: %4 = shufflevector <4 x i16> %vector.recur, <4 x i16> %3, <4 x i32> <i32 3, i32 4, i32 5, i32 6> |
| ; SINK-AFTER-NEXT: %5 = sub <4 x i16> %4, <i16 10, i16 10, i16 10, i16 10> |
| @@ -705,30 +704,30 @@ define i32 @sink_into_replication_region(i32 %y) { |
| ; CHECK-NEXT: [[TMP6:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP5]], [[PRED_UDIV_IF]] ] |
| ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1 |
| ; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_UDIV_IF4:%.*]], label [[PRED_UDIV_CONTINUE5:%.*]] |
| -; CHECK: pred.udiv.if3: |
| +; CHECK: pred.udiv.if4: |
| ; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[OFFSET_IDX]], -1 |
| ; CHECK-NEXT: [[TMP9:%.*]] = udiv i32 219220132, [[TMP8]] |
| ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP9]], i32 1 |
| ; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE5]] |
| -; CHECK: pred.udiv.continue4: |
| +; CHECK: pred.udiv.continue5: |
| ; CHECK-NEXT: [[TMP11:%.*]] = phi <4 x i32> [ [[TMP6]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP10]], [[PRED_UDIV_IF4]] ] |
| ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2 |
| ; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_UDIV_IF6:%.*]], label [[PRED_UDIV_CONTINUE7:%.*]] |
| -; CHECK: pred.udiv.if5: |
| +; CHECK: pred.udiv.if6: |
| ; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[OFFSET_IDX]], -2 |
| ; CHECK-NEXT: [[TMP14:%.*]] = udiv i32 219220132, [[TMP13]] |
| ; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP14]], i32 2 |
| ; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE7]] |
| -; CHECK: pred.udiv.continue6: |
| +; CHECK: pred.udiv.continue7: |
| ; CHECK-NEXT: [[TMP16:%.*]] = phi <4 x i32> [ [[TMP11]], [[PRED_UDIV_CONTINUE5]] ], [ [[TMP15]], [[PRED_UDIV_IF6]] ] |
| ; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i1> [[TMP2]], i32 3 |
| ; CHECK-NEXT: br i1 [[TMP17]], label [[PRED_UDIV_IF8:%.*]], label [[PRED_UDIV_CONTINUE9]] |
| -; CHECK: pred.udiv.if7: |
| +; CHECK: pred.udiv.if8: |
| ; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[OFFSET_IDX]], -3 |
| ; CHECK-NEXT: [[TMP19:%.*]] = udiv i32 219220132, [[TMP18]] |
| ; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i32> [[TMP16]], i32 [[TMP19]], i32 3 |
| ; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE9]] |
| -; CHECK: pred.udiv.continue8: |
| +; CHECK: pred.udiv.continue9: |
| ; CHECK-NEXT: [[TMP21]] = phi <4 x i32> [ [[TMP16]], [[PRED_UDIV_CONTINUE7]] ], [ [[TMP20]], [[PRED_UDIV_IF8]] ] |
| ; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP21]], <4 x i32> <i32 3, i32 4, i32 5, i32 6> |
| ; CHECK-NEXT: [[TMP23]] = add <4 x i32> [[VEC_PHI1]], [[TMP22]] |
| @@ -799,27 +798,27 @@ define i32 @sink_into_replication_region_multiple(i32 *%x, i32 %y) { |
| ; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP8]], [[PRED_UDIV_IF]] ] |
| ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP5]], i32 1 |
| ; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_UDIV_IF5:%.*]], label [[PRED_UDIV_CONTINUE6:%.*]] |
| -; CHECK: pred.udiv.if4: |
| +; CHECK: pred.udiv.if5: |
| ; CHECK-NEXT: [[TMP11:%.*]] = udiv i32 219220132, [[TMP2]] |
| ; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP11]], i32 1 |
| ; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE6]] |
| -; CHECK: pred.udiv.continue5: |
| +; CHECK: pred.udiv.continue6: |
| ; CHECK-NEXT: [[TMP13:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP12]], [[PRED_UDIV_IF5]] ] |
| ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP5]], i32 2 |
| ; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_UDIV_IF7:%.*]], label [[PRED_UDIV_CONTINUE8:%.*]] |
| -; CHECK: pred.udiv.if6: |
| +; CHECK: pred.udiv.if7: |
| ; CHECK-NEXT: [[TMP15:%.*]] = udiv i32 219220132, [[TMP3]] |
| ; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP15]], i32 2 |
| ; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE8]] |
| -; CHECK: pred.udiv.continue7: |
| +; CHECK: pred.udiv.continue8: |
| ; CHECK-NEXT: [[TMP17:%.*]] = phi <4 x i32> [ [[TMP13]], [[PRED_UDIV_CONTINUE6]] ], [ [[TMP16]], [[PRED_UDIV_IF7]] ] |
| ; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP5]], i32 3 |
| ; CHECK-NEXT: br i1 [[TMP18]], label [[PRED_UDIV_IF9:%.*]], label [[PRED_UDIV_CONTINUE10:%.*]] |
| -; CHECK: pred.udiv.if8: |
| +; CHECK: pred.udiv.if9: |
| ; CHECK-NEXT: [[TMP19:%.*]] = udiv i32 219220132, [[TMP4]] |
| ; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP19]], i32 3 |
| ; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE10]] |
| -; CHECK: pred.udiv.continue9: |
| +; CHECK: pred.udiv.continue10: |
| ; CHECK-NEXT: [[TMP21]] = phi <4 x i32> [ [[TMP17]], [[PRED_UDIV_CONTINUE8]] ], [ [[TMP20]], [[PRED_UDIV_IF9]] ] |
| ; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP21]], <4 x i32> <i32 3, i32 4, i32 5, i32 6> |
| ; CHECK-NEXT: [[TMP23]] = add <4 x i32> [[VEC_PHI4]], [[TMP22]] |
| @@ -833,31 +832,31 @@ define i32 @sink_into_replication_region_multiple(i32 *%x, i32 %y) { |
| ; CHECK: pred.store.continue: |
| ; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i1> [[TMP5]], i32 1 |
| ; CHECK-NEXT: br i1 [[TMP27]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12:%.*]] |
| -; CHECK: pred.store.if10: |
| +; CHECK: pred.store.if11: |
| ; CHECK-NEXT: [[TMP28:%.*]] = or i32 [[INDEX]], 1 |
| ; CHECK-NEXT: [[TMP29:%.*]] = sext i32 [[TMP28]] to i64 |
| ; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[TMP29]] |
| ; CHECK-NEXT: store i32 [[TMP2]], i32* [[TMP30]], align 4 |
| ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE12]] |
| -; CHECK: pred.store.continue11: |
| +; CHECK: pred.store.continue12: |
| ; CHECK-NEXT: [[TMP31:%.*]] = extractelement <4 x i1> [[TMP5]], i32 2 |
| ; CHECK-NEXT: br i1 [[TMP31]], label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE14:%.*]] |
| -; CHECK: pred.store.if12: |
| +; CHECK: pred.store.if13: |
| ; CHECK-NEXT: [[TMP32:%.*]] = or i32 [[INDEX]], 2 |
| ; CHECK-NEXT: [[TMP33:%.*]] = sext i32 [[TMP32]] to i64 |
| ; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[TMP33]] |
| ; CHECK-NEXT: store i32 [[TMP3]], i32* [[TMP34]], align 4 |
| ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE14]] |
| -; CHECK: pred.store.continue13: |
| +; CHECK: pred.store.continue14: |
| ; CHECK-NEXT: [[TMP35:%.*]] = extractelement <4 x i1> [[TMP5]], i32 3 |
| ; CHECK-NEXT: br i1 [[TMP35]], label [[PRED_STORE_IF15:%.*]], label [[PRED_STORE_CONTINUE16]] |
| -; CHECK: pred.store.if14: |
| +; CHECK: pred.store.if15: |
| ; CHECK-NEXT: [[TMP36:%.*]] = or i32 [[INDEX]], 3 |
| ; CHECK-NEXT: [[TMP37:%.*]] = sext i32 [[TMP36]] to i64 |
| ; CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[TMP37]] |
| ; CHECK-NEXT: store i32 [[TMP4]], i32* [[TMP38]], align 4 |
| ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE16]] |
| -; CHECK: pred.store.continue15: |
| +; CHECK: pred.store.continue16: |
| ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 |
| ; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <4 x i32> [[VEC_IND2]], <i32 4, i32 4, i32 4, i32 4> |
| ; CHECK-NEXT: [[TMP39:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] |
| diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll |
| index 6de7b0737347..3788aaa72ac2 100644 |
| --- a/llvm/test/Transforms/LoopVectorize/induction.ll |
| +++ b/llvm/test/Transforms/LoopVectorize/induction.ll |
| @@ -860,10 +860,10 @@ define i64 @trunc_with_first_order_recurrence() { |
| ; CHECK-NEXT: %vec.phi = phi <2 x i64> |
| ; CHECK-NEXT: %vec.ind = phi <2 x i64> [ <i64 1, i64 2>, %vector.ph ], [ %vec.ind.next, %vector.body ] |
| ; CHECK-NEXT: %vec.ind2 = phi <2 x i32> [ <i32 1, i32 2>, %vector.ph ], [ %vec.ind.next3, %vector.body ] |
| -; CHECK-NEXT: %vector.recur = phi <2 x i32> [ <i32 poison, i32 42>, %vector.ph ], [ %vec.ind4, %vector.body ] |
| -; CHECK-NEXT: %vec.ind4 = phi <2 x i32> [ <i32 1, i32 2>, %vector.ph ], [ %vec.ind.next5, %vector.body ] |
| -; CHECK-NEXT: %vec.ind6 = phi <2 x i32> [ <i32 1, i32 2>, %vector.ph ], [ %vec.ind.next7, %vector.body ] |
| -; CHECK-NEXT: shufflevector <2 x i32> %vector.recur, <2 x i32> %vec.ind4, <2 x i32> <i32 1, i32 2> |
| +; CHECK-NEXT: %vector.recur = phi <2 x i32> [ <i32 poison, i32 42>, %vector.ph ], [ %vec.ind5, %vector.body ] |
| +; CHECK-NEXT: %vec.ind5 = phi <2 x i32> [ <i32 1, i32 2>, %vector.ph ], [ %vec.ind.next6, %vector.body ] |
| +; CHECK-NEXT: %vec.ind7 = phi <2 x i32> [ <i32 1, i32 2>, %vector.ph ], [ %vec.ind.next8, %vector.body ] |
| +; CHECK-NEXT: shufflevector <2 x i32> %vector.recur, <2 x i32> %vec.ind5, <2 x i32> <i32 1, i32 2> |
| entry: |
| br label %loop |
| |
| diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll |
| index e56b607342e6..3ded17f813b1 100644 |
| --- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll |
| +++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll |
| @@ -1530,11 +1530,11 @@ define void @PR34743(i16* %a, i32* %b, i64 %n) { |
| ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i16>, <8 x i16>* [[TMP10]], align 4 |
| ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i16> [[WIDE_VEC]], <8 x i16> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> |
| ; CHECK-NEXT: [[STRIDED_VEC8]] = shufflevector <8 x i16> [[WIDE_VEC]], <8 x i16> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> |
| -; CHECK-NEXT: [[TMP11:%.*]] = sext <4 x i16> [[STRIDED_VEC]] to <4 x i32> |
| -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[STRIDED_VEC8]], <4 x i32> <i32 3, i32 4, i32 5, i32 6> |
| -; CHECK-NEXT: [[TMP13:%.*]] = sext <4 x i16> [[TMP12]] to <4 x i32> |
| +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[STRIDED_VEC8]], <4 x i32> <i32 3, i32 4, i32 5, i32 6> |
| +; CHECK-NEXT: [[TMP12:%.*]] = sext <4 x i16> [[STRIDED_VEC]] to <4 x i32> |
| +; CHECK-NEXT: [[TMP13:%.*]] = sext <4 x i16> [[TMP11]] to <4 x i32> |
| ; CHECK-NEXT: [[TMP14:%.*]] = sext <4 x i16> [[STRIDED_VEC8]] to <4 x i32> |
| -; CHECK-NEXT: [[TMP15:%.*]] = mul nsw <4 x i32> [[TMP13]], [[TMP11]] |
| +; CHECK-NEXT: [[TMP15:%.*]] = mul nsw <4 x i32> [[TMP13]], [[TMP12]] |
| ; CHECK-NEXT: [[TMP16:%.*]] = mul nsw <4 x i32> [[TMP15]], [[TMP14]] |
| ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]] |
| ; CHECK-NEXT: [[TMP18:%.*]] = bitcast i32* [[TMP17]] to <4 x i32>* |
| diff --git a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll |
| index ad07c1853d92..0e660a9ea533 100644 |
| --- a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll |
| +++ b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll |
| @@ -172,9 +172,8 @@ define void @constant_folded_previous_value() { |
| ; CHECK-VF4UF2-LABEL: @constant_folded_previous_value |
| ; CHECK-VF4UF2: vector.body |
| ; CHECK-VF4UF2: %[[VECTOR_RECUR:.*]] = phi <vscale x 4 x i64> [ %vector.recur.init, %vector.ph ], [ shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> undef, i64 1, i32 0), <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer), %vector.body ] |
| -; CHECK-VF4UF2: %[[SPLICE1:.*]] = call <vscale x 4 x i64> @llvm.experimental.vector.splice.nxv4i64(<vscale x 4 x i64> %vector.recur, <vscale x 4 x i64> shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> undef, i64 1, i32 0), <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer), i32 -1) |
| +; CHECK-VF4UF2-NEXT: %[[SPLICE1:.*]] = call <vscale x 4 x i64> @llvm.experimental.vector.splice.nxv4i64(<vscale x 4 x i64> %vector.recur, <vscale x 4 x i64> shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> undef, i64 1, i32 0), <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer), i32 -1) |
| ; CHECK-VF4UF2: %[[SPLICE2:.*]] = call <vscale x 4 x i64> @llvm.experimental.vector.splice.nxv4i64(<vscale x 4 x i64> shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> undef, i64 1, i32 0), <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i64> shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> undef, i64 1, i32 0), <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer), i32 -1) |
| -; CHECK-VF4UF2: br i1 {{.*}}, label %middle.block, label %vector.body |
| entry: |
| br label %scalar.body |
| |
| @@ -198,17 +197,15 @@ for.end: |
| define i32 @extract_second_last_iteration(i32* %cval, i32 %x) { |
| ; CHECK-VF4UF2-LABEL: @extract_second_last_iteration |
| ; CHECK-VF4UF2: vector.ph |
| -; CHECK-VF4UF2: call i32 @llvm.vscale.i32() |
| -; CHECK-VF4UF2: call i32 @llvm.vscale.i32() |
| -; CHECK-VF4UF2: %[[VSCALE1:.*]] = call i32 @llvm.vscale.i32() |
| -; CHECK-VF4UF2: %[[MUL1:.*]] = mul i32 %[[VSCALE1]], 4 |
| -; CHECK-VF4UF2: %[[SUB1:.*]] = sub i32 %[[MUL1]], 1 |
| -; CHECK-VF4UF2: %[[VEC_RECUR_INIT:.*]] = insertelement <vscale x 4 x i32> poison, i32 0, i32 %[[SUB1]] |
| ; CHECK-VF4UF2: %[[SPLAT_INS1:.*]] = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0 |
| ; CHECK-VF4UF2: %[[SPLAT1:.*]] = shufflevector <vscale x 4 x i32> %[[SPLAT_INS1]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer |
| ; CHECK-VF4UF2: %[[SPLAT_INS2:.*]] = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0 |
| ; CHECK-VF4UF2: %[[SPLAT2:.*]] = shufflevector <vscale x 4 x i32> %[[SPLAT_INS2]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer |
| -; ; CHECK-VF4UF2: vector.body |
| +; CHECK-VF4UF2: %[[VSCALE1:.*]] = call i32 @llvm.vscale.i32() |
| +; CHECK-VF4UF2: %[[MUL1:.*]] = mul i32 %[[VSCALE1]], 4 |
| +; CHECK-VF4UF2: %[[SUB1:.*]] = sub i32 %[[MUL1]], 1 |
| +; CHECK-VF4UF2: %[[VEC_RECUR_INIT:.*]] = insertelement <vscale x 4 x i32> poison, i32 0, i32 %[[SUB1]] |
| +; CHECK-VF4UF2: vector.body |
| ; CHECK-VF4UF2: %[[VEC_RECUR:.*]] = phi <vscale x 4 x i32> [ %[[VEC_RECUR_INIT]], %vector.ph ], [ %[[ADD2:.*]], %vector.body ] |
| ; CHECK-VF4UF2: %[[ADD1:.*]] = add <vscale x 4 x i32> %{{.*}}, %[[SPLAT1]] |
| ; CHECK-VF4UF2: %[[ADD2]] = add <vscale x 4 x i32> %{{.*}}, %[[SPLAT2]] |
| diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll |
| index 13b45d054fef..934d55d92ae5 100644 |
| --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll |
| +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll |
| @@ -766,7 +766,7 @@ define void @recipe_in_merge_candidate_used_by_first_order_recurrence(i32 %k) { |
| ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { |
| ; CHECK-NEXT: loop: |
| ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next |
| -; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for> = phi ir<0>, ir<%lv.a> |
| +; CHECK-NEXT: WIDEN-PHI ir<%for> = phi ir<0>, ir<%lv.a> |
| ; CHECK-NEXT: EMIT vp<%3> = icmp ule ir<%iv> vp<%0> |
| ; CHECK-NEXT: REPLICATE ir<%gep.a> = getelementptr ir<@a>, ir<0>, ir<%iv> |
| ; CHECK-NEXT: Successor(s): pred.load |
| @@ -788,7 +788,6 @@ define void @recipe_in_merge_candidate_used_by_first_order_recurrence(i32 %k) { |
| ; CHECK-NEXT: Successor(s): loop.0 |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: loop.0: |
| -; CHECK-NEXT: EMIT vp<%7> = first-order splice ir<%for> ir<%lv.a> |
| ; CHECK-NEXT: Successor(s): loop.1 |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: loop.1: |
| @@ -801,12 +800,12 @@ define void @recipe_in_merge_candidate_used_by_first_order_recurrence(i32 %k) { |
| ; CHECK-NEXT: CondBit: vp<%3> (loop) |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: pred.store.if: |
| -; CHECK-NEXT: REPLICATE ir<%div> = sdiv vp<%7>, vp<%6> |
| +; CHECK-NEXT: REPLICATE ir<%div> = sdiv ir<%for>, vp<%6> |
| ; CHECK-NEXT: REPLICATE store ir<%div>, ir<%gep.a> |
| ; CHECK-NEXT: Successor(s): pred.store.continue |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: pred.store.continue: |
| -; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%10> = ir<%div> |
| +; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%9> = ir<%div> |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: Successor(s): loop.2 |