blob: d37bb4655449ebbfbe82858a88ea15eb03960538 [file] [log] [blame]
commit e463bd53c03ff9183bd30030477dfe6f3b2fdd0c
Author: Alexey Bataev <alexey.bataev@intel.com>
Date: Tue Jan 19 11:19:09 2021 -0800
Revert "[SLP]Merge reorder and reuse shuffles."
This reverts commit 438682de6a38ac97f89fa38faf5c8dc9b09cd9ad to fix the
bug with the reducing size of the resulting vector for the entry node
with multiple users.
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 0fee52dcdd93..24885e4d8257 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -3500,7 +3500,6 @@ InstructionCost BoUpSLP::getEntryCost(TreeEntry *E) {
case Instruction::ExtractValue:
case Instruction::ExtractElement: {
- InstructionCost DeadCost = 0;
if (NeedToShuffleReuses) {
unsigned Idx = 0;
for (unsigned I : E->ReuseShuffleIndices) {
@@ -3528,10 +3527,12 @@ InstructionCost BoUpSLP::getEntryCost(TreeEntry *E) {
ReuseShuffleCost +=
TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, Idx);
}
- DeadCost = ReuseShuffleCost;
- } else if (!E->ReorderIndices.empty()) {
- DeadCost = TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
- VecTy);
+ }
+ InstructionCost DeadCost = ReuseShuffleCost;
+ if (!E->ReorderIndices.empty()) {
+ // TODO: Merge this shuffle with the ReuseShuffleCost.
+ DeadCost += TTI->getShuffleCost(
+ TargetTransformInfo::SK_PermuteSingleSrc, VecTy);
}
for (unsigned I = 0, E = VL.size(); I < E; ++I) {
Instruction *EI = cast<Instruction>(VL[I]);
@@ -3755,9 +3756,11 @@ InstructionCost BoUpSLP::getEntryCost(TreeEntry *E) {
Instruction::Load, VecTy, cast<LoadInst>(VL0)->getPointerOperand(),
/*VariableMask=*/false, alignment, CostKind, VL0);
}
- if (!NeedToShuffleReuses && !E->ReorderIndices.empty())
+ if (!E->ReorderIndices.empty()) {
+ // TODO: Merge this shuffle with the ReuseShuffleCost.
VecLdCost += TTI->getShuffleCost(
TargetTransformInfo::SK_PermuteSingleSrc, VecTy);
+ }
LLVM_DEBUG(dumpTreeCosts(E, ReuseShuffleCost, VecLdCost, ScalarLdCost));
return ReuseShuffleCost + VecLdCost - ScalarLdCost;
}
@@ -3769,14 +3772,18 @@ InstructionCost BoUpSLP::getEntryCost(TreeEntry *E) {
Align Alignment = SI->getAlign();
InstructionCost ScalarEltCost = TTI->getMemoryOpCost(
Instruction::Store, ScalarTy, Alignment, 0, CostKind, VL0);
+ if (NeedToShuffleReuses)
+ ReuseShuffleCost = -(ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
InstructionCost ScalarStCost = VecTy->getNumElements() * ScalarEltCost;
InstructionCost VecStCost = TTI->getMemoryOpCost(
Instruction::Store, VecTy, Alignment, 0, CostKind, VL0);
- if (IsReorder)
+ if (IsReorder) {
+ // TODO: Merge this shuffle with the ReuseShuffleCost.
VecStCost += TTI->getShuffleCost(
TargetTransformInfo::SK_PermuteSingleSrc, VecTy);
+ }
LLVM_DEBUG(dumpTreeCosts(E, ReuseShuffleCost, VecStCost, ScalarStCost));
- return VecStCost - ScalarStCost;
+ return ReuseShuffleCost + VecStCost - ScalarStCost;
}
case Instruction::Call: {
CallInst *CI = cast<CallInst>(VL0);
@@ -4323,64 +4330,6 @@ Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) {
return Vec;
}
-namespace {
-/// Merges shuffle masks and emits final shuffle instruction, if required.
-class ShuffleInstructionBuilder {
- IRBuilderBase &Builder;
- bool IsFinalized = false;
- SmallVector<int, 4> Mask;
-
-public:
- ShuffleInstructionBuilder(IRBuilderBase &Builder) : Builder(Builder) {}
-
- /// Adds a mask, inverting it before applying.
- void addInversedMask(ArrayRef<unsigned> SubMask) {
- if (SubMask.empty())
- return;
- SmallVector<int, 4> NewMask;
- inversePermutation(SubMask, NewMask);
- addMask(NewMask);
- }
-
- /// Functions adds masks, merging them into single one.
- void addMask(ArrayRef<unsigned> SubMask) {
- SmallVector<int, 4> NewMask(SubMask.begin(), SubMask.end());
- addMask(NewMask);
- }
-
- void addMask(ArrayRef<int> SubMask) {
- if (SubMask.empty())
- return;
- if (Mask.empty()) {
- Mask.append(SubMask.begin(), SubMask.end());
- return;
- }
- SmallVector<int, 4> NewMask(SubMask.size(), SubMask.size());
- int TermValue = std::min(Mask.size(), SubMask.size());
- for (int I = 0, E = SubMask.size(); I < E; ++I) {
- if (SubMask[I] >= TermValue || Mask[SubMask[I]] >= TermValue) {
- NewMask[I] = E;
- continue;
- }
- NewMask[I] = Mask[SubMask[I]];
- }
- Mask.swap(NewMask);
- }
-
- Value *finalize(Value *V) {
- IsFinalized = true;
- if (Mask.empty())
- return V;
- return Builder.CreateShuffleVector(V, Mask, "shuffle");
- }
-
- ~ShuffleInstructionBuilder() {
- assert((IsFinalized || Mask.empty()) &&
- "Must be finalized construction of the shuffles.");
- }
-};
-} // namespace
-
Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
IRBuilder<>::InsertPointGuard Guard(Builder);
@@ -4389,14 +4338,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
return E->VectorizedValue;
}
- ShuffleInstructionBuilder ShuffleBuilder(Builder);
bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty();
if (E->State == TreeEntry::NeedToGather) {
setInsertPointAfterBundle(E);
Value *Vec = gather(E->Scalars);
if (NeedToShuffleReuses) {
- ShuffleBuilder.addMask(E->ReuseShuffleIndices);
- Vec = ShuffleBuilder.finalize(Vec);
+ Vec = Builder.CreateShuffleVector(Vec, E->ReuseShuffleIndices, "shuffle");
if (auto *I = dyn_cast<Instruction>(Vec)) {
GatherSeq.insert(I);
CSEBlocks.insert(I->getParent());
@@ -4454,10 +4401,18 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
case Instruction::ExtractElement: {
Value *V = E->getSingleOperand(0);
- Builder.SetInsertPoint(VL0);
- ShuffleBuilder.addInversedMask(E->ReorderIndices);
- ShuffleBuilder.addMask(E->ReuseShuffleIndices);
- V = ShuffleBuilder.finalize(V);
+ if (!E->ReorderIndices.empty()) {
+ SmallVector<int, 4> Mask;
+ inversePermutation(E->ReorderIndices, Mask);
+ Builder.SetInsertPoint(VL0);
+ V = Builder.CreateShuffleVector(V, Mask, "reorder_shuffle");
+ }
+ if (NeedToShuffleReuses) {
+ // TODO: Merge this shuffle with the ReorderShuffleMask.
+ if (E->ReorderIndices.empty())
+ Builder.SetInsertPoint(VL0);
+ V = Builder.CreateShuffleVector(V, E->ReuseShuffleIndices, "shuffle");
+ }
E->VectorizedValue = V;
return V;
}
@@ -4468,9 +4423,16 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
Value *Ptr = Builder.CreateBitCast(LI->getOperand(0), PtrTy);
LoadInst *V = Builder.CreateAlignedLoad(VecTy, Ptr, LI->getAlign());
Value *NewV = propagateMetadata(V, E->Scalars);
- ShuffleBuilder.addInversedMask(E->ReorderIndices);
- ShuffleBuilder.addMask(E->ReuseShuffleIndices);
- NewV = ShuffleBuilder.finalize(NewV);
+ if (!E->ReorderIndices.empty()) {
+ SmallVector<int, 4> Mask;
+ inversePermutation(E->ReorderIndices, Mask);
+ NewV = Builder.CreateShuffleVector(NewV, Mask, "reorder_shuffle");
+ }
+ if (NeedToShuffleReuses) {
+ // TODO: Merge this shuffle with the ReorderShuffleMask.
+ NewV = Builder.CreateShuffleVector(NewV, E->ReuseShuffleIndices,
+ "shuffle");
+ }
E->VectorizedValue = NewV;
return NewV;
}
@@ -4497,8 +4459,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
auto *CI = cast<CastInst>(VL0);
Value *V = Builder.CreateCast(CI->getOpcode(), InVec, VecTy);
- ShuffleBuilder.addMask(E->ReuseShuffleIndices);
- V = ShuffleBuilder.finalize(V);
+ if (NeedToShuffleReuses)
+ V = Builder.CreateShuffleVector(V, E->ReuseShuffleIndices, "shuffle");
E->VectorizedValue = V;
++NumVectorInstructions;
@@ -4519,8 +4481,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
CmpInst::Predicate P0 = cast<CmpInst>(VL0)->getPredicate();
Value *V = Builder.CreateCmp(P0, L, R);
propagateIRFlags(V, E->Scalars, VL0);
- ShuffleBuilder.addMask(E->ReuseShuffleIndices);
- V = ShuffleBuilder.finalize(V);
+ if (NeedToShuffleReuses)
+ V = Builder.CreateShuffleVector(V, E->ReuseShuffleIndices, "shuffle");
E->VectorizedValue = V;
++NumVectorInstructions;
@@ -4539,8 +4501,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
}
Value *V = Builder.CreateSelect(Cond, True, False);
- ShuffleBuilder.addMask(E->ReuseShuffleIndices);
- V = ShuffleBuilder.finalize(V);
+ if (NeedToShuffleReuses)
+ V = Builder.CreateShuffleVector(V, E->ReuseShuffleIndices, "shuffle");
E->VectorizedValue = V;
++NumVectorInstructions;
@@ -4562,8 +4524,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
if (auto *I = dyn_cast<Instruction>(V))
V = propagateMetadata(I, E->Scalars);
- ShuffleBuilder.addMask(E->ReuseShuffleIndices);
- V = ShuffleBuilder.finalize(V);
+ if (NeedToShuffleReuses)
+ V = Builder.CreateShuffleVector(V, E->ReuseShuffleIndices, "shuffle");
E->VectorizedValue = V;
++NumVectorInstructions;
@@ -4605,8 +4567,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
if (auto *I = dyn_cast<Instruction>(V))
V = propagateMetadata(I, E->Scalars);
- ShuffleBuilder.addMask(E->ReuseShuffleIndices);
- V = ShuffleBuilder.finalize(V);
+ if (NeedToShuffleReuses)
+ V = Builder.CreateShuffleVector(V, E->ReuseShuffleIndices, "shuffle");
E->VectorizedValue = V;
++NumVectorInstructions;
@@ -4648,9 +4610,15 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
}
Value *V = propagateMetadata(NewLI, E->Scalars);
- ShuffleBuilder.addInversedMask(E->ReorderIndices);
- ShuffleBuilder.addMask(E->ReuseShuffleIndices);
- V = ShuffleBuilder.finalize(V);
+ if (IsReorder) {
+ SmallVector<int, 4> Mask;
+ inversePermutation(E->ReorderIndices, Mask);
+ V = Builder.CreateShuffleVector(V, Mask, "reorder_shuffle");
+ }
+ if (NeedToShuffleReuses) {
+ // TODO: Merge this shuffle with the ReorderShuffleMask.
+ V = Builder.CreateShuffleVector(V, E->ReuseShuffleIndices, "shuffle");
+ }
E->VectorizedValue = V;
++NumVectorInstructions;
return V;
@@ -4664,9 +4632,11 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
setInsertPointAfterBundle(E);
Value *VecValue = vectorizeTree(E->getOperand(0));
- ShuffleBuilder.addMask(E->ReorderIndices);
- VecValue = ShuffleBuilder.finalize(VecValue);
-
+ if (IsReorder) {
+ SmallVector<int, 4> Mask(E->ReorderIndices.begin(),
+ E->ReorderIndices.end());
+ VecValue = Builder.CreateShuffleVector(VecValue, Mask, "reorder_shuf");
+ }
Value *ScalarPtr = SI->getPointerOperand();
Value *VecPtr = Builder.CreateBitCast(
ScalarPtr, VecValue->getType()->getPointerTo(AS));
@@ -4680,6 +4650,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
ExternalUses.push_back(ExternalUser(ScalarPtr, cast<User>(VecPtr), 0));
Value *V = propagateMetadata(ST, E->Scalars);
+ if (NeedToShuffleReuses)
+ V = Builder.CreateShuffleVector(V, E->ReuseShuffleIndices, "shuffle");
E->VectorizedValue = V;
++NumVectorInstructions;
@@ -4717,8 +4689,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
if (Instruction *I = dyn_cast<Instruction>(V))
V = propagateMetadata(I, E->Scalars);
- ShuffleBuilder.addMask(E->ReuseShuffleIndices);
- V = ShuffleBuilder.finalize(V);
+ if (NeedToShuffleReuses)
+ V = Builder.CreateShuffleVector(V, E->ReuseShuffleIndices, "shuffle");
E->VectorizedValue = V;
++NumVectorInstructions;
@@ -4780,8 +4752,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
ExternalUses.push_back(ExternalUser(ScalarArg, cast<User>(V), 0));
propagateIRFlags(V, E->Scalars, VL0);
- ShuffleBuilder.addMask(E->ReuseShuffleIndices);
- V = ShuffleBuilder.finalize(V);
+ if (NeedToShuffleReuses)
+ V = Builder.CreateShuffleVector(V, E->ReuseShuffleIndices, "shuffle");
E->VectorizedValue = V;
++NumVectorInstructions;
@@ -4847,8 +4819,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
Value *V = Builder.CreateShuffleVector(V0, V1, Mask);
if (Instruction *I = dyn_cast<Instruction>(V))
V = propagateMetadata(I, E->Scalars);
- ShuffleBuilder.addMask(E->ReuseShuffleIndices);
- V = ShuffleBuilder.finalize(V);
+ if (NeedToShuffleReuses)
+ V = Builder.CreateShuffleVector(V, E->ReuseShuffleIndices, "shuffle");
E->VectorizedValue = V;
++NumVectorInstructions;
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/PR38339.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/PR38339.ll
index d1754c0bbc54..1c8ddba98882 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/PR38339.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/PR38339.ll
@@ -82,7 +82,8 @@ define void @f3(<2 x i16> %x, i16* %a) {
; CHECK: cont:
; CHECK-NEXT: [[XX:%.*]] = phi <2 x i16> [ [[X:%.*]], [[ENTRY:%.*]] ], [ undef, [[CONT]] ]
; CHECK-NEXT: [[AA:%.*]] = phi i16* [ [[A:%.*]], [[ENTRY]] ], [ undef, [[CONT]] ]
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i16> [[XX]], <2 x i16> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 1>
+; CHECK-NEXT: [[REORDER_SHUFFLE:%.*]] = shufflevector <2 x i16> [[XX]], <2 x i16> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i16> [[REORDER_SHUFFLE]], <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 0>
; CHECK-NEXT: [[PTR0:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 0
; CHECK-NEXT: [[PTR1:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 1
; CHECK-NEXT: [[PTR2:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 2
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR32086.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR32086.ll
index 741dbcec392e..f9e38eaebc3f 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/PR32086.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/PR32086.ll
@@ -35,7 +35,8 @@ define void @i64_simplifiedi_reversed(i64* noalias %st, i64* noalias %ld) {
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[LD:%.*]], i64 1
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[LD]] to <2 x i64>*
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 8
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
+; CHECK-NEXT: [[REORDER_SHUFFLE:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i64> [[REORDER_SHUFFLE]], <2 x i64> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, i64* [[ST:%.*]], i64 1
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, i64* [[ST]], i64 2
; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i64, i64* [[ST]], i64 3
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll b/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll
index 1b89c9a437a2..e7b2ce8ecbb2 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll
@@ -8,9 +8,10 @@ define void @wombat(i32* %ptr, i32* %ptr1) {
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i64 0
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[TMP8]] to <2 x i32>*
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* [[TMP0]], align 8
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
+; CHECK-NEXT: [[REORDER_SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[REORDER_SHUFFLE]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, i32* [[PTR1:%.*]], i32 3
-; CHECK-NEXT: [[TMP2:%.*]] = add nsw <2 x i32> [[TMP1]], <i32 -1, i32 -1>
+; CHECK-NEXT: [[TMP2:%.*]] = add nsw <2 x i32> [[REORDER_SHUFFLE]], <i32 -1, i32 -1>
; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, i32* [[PTR1]], i32 4
; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds i32, i32* [[PTR1]], i32 5