sys-devel/llvm/files/cherry/d7a40a447f1ed0294c6bc8fe82b6b2460e31de06.patch - third_party/overlays/chromiumos-overlay - Git at Google

 commit d7a40a447f1ed0294c6bc8fe82b6b2460e31de06
 Author: Alexey Bataev <a.bataev@outlook.com>
 Date:   Tue Apr 18 10:40:20 2023 -0700

     Revert "[SLP]Add final resize to ShuffleCostEstimator::finalize member function and basic add member functions."

     This reverts commit cd341f3f4878137d1c9e7a05c4c3a7bd8ff216dc to fix
     a crash revealed by buildbot https://lab.llvm.org/buildbot#builders/124/builds/7108.

 diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
 index 163e3581ea7b..a1fabec2a8d7 100644
 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
 +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
 @@ -946,14 +946,9 @@ static bool isSimple(Instruction *I) {
  }

  /// Shuffles \p Mask in accordance with the given \p SubMask.
 -/// \param ExtendingManyInputs Supports reshuffling of the mask with not only
 -/// one but two input vectors.
 -static void addMask(SmallVectorImpl<int> &Mask, ArrayRef<int> SubMask,
 -                    bool ExtendingManyInputs = false) {
 +static void addMask(SmallVectorImpl<int> &Mask, ArrayRef<int> SubMask) {
    if (SubMask.empty())
      return;
 -  assert((!ExtendingManyInputs || SubMask.size() > Mask.size()) &&
 -         "SubMask with many inputs support must be larger than the mask.");
    if (Mask.empty()) {
      Mask.append(SubMask.begin(), SubMask.end());
      return;
 @@ -961,9 +956,8 @@ static void addMask(SmallVectorImpl<int> &Mask, ArrayRef<int> SubMask,
    SmallVector<int> NewMask(SubMask.size(), UndefMaskElem);
    int TermValue = std::min(Mask.size(), SubMask.size());
    for (int I = 0, E = SubMask.size(); I < E; ++I) {
 -    if ((!ExtendingManyInputs &&
 -         (SubMask[I] >= TermValue || Mask[SubMask[I]] >= TermValue)) ||
 -        SubMask[I] == UndefMaskElem)
 +    if (SubMask[I] >= TermValue || SubMask[I] == UndefMaskElem ||
 +        Mask[SubMask[I]] >= TermValue)
        continue;
      NewMask[I] = Mask[SubMask[I]];
    }
 @@ -6794,8 +6788,6 @@ protected:
  /// analysis/transformations.
  class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
    bool IsFinalized = false;
 -  SmallVector<int> CommonMask;
 -  SmallVector<Value *, 2> InVectors;
    const TargetTransformInfo &TTI;
    InstructionCost Cost = 0;
    ArrayRef<Value *> VectorizedVals;
 @@ -7017,53 +7009,19 @@ public:
                                     VecTy, std::nullopt, CostKind, 0, EEVTy);
        }
      }
 -    InVectors.assign(1, VecBase);
      return VecBase;
    }
 -  void add(const TreeEntry *E1, const TreeEntry *E2, ArrayRef<int> Mask) {
 -    CommonMask.assign(Mask.begin(), Mask.end());
 -    InVectors.assign(
 -        2, Constant::getNullValue(FixedVectorType::get(
 -               E1->Scalars.front()->getType(),
 -               std::max(E1->getVectorFactor(), E2->getVectorFactor()))));
 -  }
 -  void add(const TreeEntry *E1, ArrayRef<int> Mask) {
 -    CommonMask.assign(Mask.begin(), Mask.end());
 -    InVectors.assign(
 -        1, Constant::getNullValue(FixedVectorType::get(
 -               E1->Scalars.front()->getType(), E1->getVectorFactor())));
 -  }
    void gather(ArrayRef<Value *> VL, Value *Root = nullptr) {
      Cost += getBuildVectorCost(VL, Root);
 -    if (!Root) {
 -      assert(InVectors.empty() && "Unexpected input vectors for buildvector.");
 -      InVectors.assign(1, Constant::getNullValue(FixedVectorType::get(
 -                              VL.front()->getType(), VL.size())));
 -    }
    }
    /// Finalize emission of the shuffles.
 -  InstructionCost finalize(ArrayRef<int> ExtMask) {
 +  InstructionCost finalize() {
      IsFinalized = true;
 -    ::addMask(CommonMask, ExtMask, /*ExtendingManyInputs=*/true);
 -    if (CommonMask.empty())
 -      return Cost;
 -    int Limit = CommonMask.size() * 2;
 -    if (all_of(CommonMask, [=](int Idx) { return Idx < Limit; }) &&
 -        ShuffleVectorInst::isIdentityMask(CommonMask))
 -      return Cost;
 -    return Cost +
 -           TTI.getShuffleCost(InVectors.size() == 2 ? TTI::SK_PermuteTwoSrc
 -                                                    : TTI::SK_PermuteSingleSrc,
 -                              FixedVectorType::get(
 -                                  cast<VectorType>(InVectors.front()->getType())
 -                                      ->getElementType(),
 -                                  CommonMask.size()),
 -                              CommonMask);
 +    return Cost;
    }

    ~ShuffleCostEstimator() {
 -    assert((IsFinalized || CommonMask.empty()) &&
 -           "Shuffle construction must be finalized.");
 +      assert(IsFinalized && "Shuffle construction must be finalized.");
    }
  };

 @@ -7151,30 +7109,35 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
          if (Mask[I] != UndefMaskElem)
            GatheredScalars[I] = PoisonValue::get(ScalarTy);
        }
 -      LLVM_DEBUG(
 -          int Limit = Mask.size() * 2;
 -          if (*GatherShuffle == TTI::SK_PermuteSingleSrc &&
 -              all_of(Mask, [=](int Idx) { return Idx < Limit; }) &&
 -              ShuffleVectorInst::isIdentityMask(Mask)) {
 -            // Perfect match in the graph, will reuse the previously
 -            // vectorized node. Cost is 0.
 -            dbgs() << "SLP: perfect diamond match for gather bundle "
 -                      "that starts with "
 -                   << *VL.front() << ".\n";
 -          } else {
 -            dbgs() << "SLP: shuffled " << Entries.size()
 -                   << " entries for bundle that starts with " << *VL.front()
 -                   << ".\n";
 -          });
 -      if (Entries.size() == 1)
 -        Estimator.add(Entries.front(), Mask);
 -      else
 -        Estimator.add(Entries.front(), Entries.back(), Mask);
 +      InstructionCost GatherCost = 0;
 +      int Limit = Mask.size() * 2;
 +      if (all_of(Mask, [=](int Idx) { return Idx < Limit; }) &&
 +          ShuffleVectorInst::isIdentityMask(Mask)) {
 +        // Perfect match in the graph, will reuse the previously vectorized
 +        // node. Cost is 0.
 +        LLVM_DEBUG(
 +            dbgs()
 +            << "SLP: perfect diamond match for gather bundle that starts with "
 +            << *VL.front() << ".\n");
 +        if (NeedToShuffleReuses)
 +          GatherCost =
 +              TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
 +                                  FinalVecTy, E->ReuseShuffleIndices);
 +      } else {
 +        LLVM_DEBUG(dbgs() << "SLP: shuffled " << Entries.size()
 +                          << " entries for bundle that starts with "
 +                          << *VL.front() << ".\n");
 +        // Detected that instead of gather we can emit a shuffle of single/two
 +        // previously vectorized nodes. Add the cost of the permutation rather
 +        // than gather.
 +        ::addMask(Mask, E->ReuseShuffleIndices);
 +        GatherCost = TTI->getShuffleCost(*GatherShuffle, FinalVecTy, Mask);
 +      }
        Estimator.gather(
            GatheredScalars,
            Constant::getNullValue(FixedVectorType::get(
                GatheredScalars.front()->getType(), GatheredScalars.size())));
 -      return Estimator.finalize(E->ReuseShuffleIndices);
 +      return GatherCost + Estimator.finalize();
      }
      if (ExtractShuffle && all_of(GatheredScalars, PoisonValue::classof)) {
        // Check that gather of extractelements can be represented as just a
 @@ -7184,15 +7147,17 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
        // single input vector or of 2 input vectors.
        InstructionCost Cost =
            computeExtractCost(VL, VecTy, *ExtractShuffle, ExtractMask, *TTI);
 -      return Cost + Estimator.finalize(E->ReuseShuffleIndices);
 -    }
 -    Estimator.gather(
 -        GatheredScalars,
 -        (ExtractShuffle || GatherShuffle)
 -            ? Constant::getNullValue(FixedVectorType::get(
 -                  GatheredScalars.front()->getType(), GatheredScalars.size()))
 -            : nullptr);
 -    return Estimator.finalize(E->ReuseShuffleIndices);
 +      if (NeedToShuffleReuses)
 +        Cost += TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
 +                                    FinalVecTy, E->ReuseShuffleIndices);
 +      return Cost + Estimator.finalize();
 +    }
 +    InstructionCost ReuseShuffleCost = 0;
 +    if (NeedToShuffleReuses)
 +      ReuseShuffleCost = TTI->getShuffleCost(
 +          TTI::SK_PermuteSingleSrc, FinalVecTy, E->ReuseShuffleIndices);
 +    Estimator.gather(GatheredScalars);
 +    return ReuseShuffleCost + Estimator.finalize();
    }
    InstructionCost CommonCost = 0;
    SmallVector<int> Mask;
 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll
 index f26d14c44db8..c5b6ac647aa3 100644
 --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll
 +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll
 @@ -93,36 +93,20 @@ define i1 @logical_or_fcmp(<4 x float> %x) {
  }

  define i1 @logical_and_icmp_diff_preds(<4 x i32> %x) {
 -; SSE-LABEL: @logical_and_icmp_diff_preds(
 -; SSE-NEXT:    [[X0:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0
 -; SSE-NEXT:    [[X3:%.*]] = extractelement <4 x i32> [[X]], i32 3
 -; SSE-NEXT:    [[C0:%.*]] = icmp ult i32 [[X0]], 0
 -; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> poison, <2 x i32> <i32 1, i32 2>
 -; SSE-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> <i32 poison, i32 0>, <2 x i32> <i32 0, i32 3>
 -; SSE-NEXT:    [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> <i32 0, i32 poison>, <2 x i32> <i32 2, i32 1>
 -; SSE-NEXT:    [[TMP4:%.*]] = icmp slt <2 x i32> [[TMP2]], [[TMP3]]
 -; SSE-NEXT:    [[C3:%.*]] = icmp slt i32 [[X3]], 0
 -; SSE-NEXT:    [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0
 -; SSE-NEXT:    [[S1:%.*]] = select i1 [[C0]], i1 [[TMP5]], i1 false
 -; SSE-NEXT:    [[TMP6:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1
 -; SSE-NEXT:    [[S2:%.*]] = select i1 [[S1]], i1 [[TMP6]], i1 false
 -; SSE-NEXT:    [[S3:%.*]] = select i1 [[S2]], i1 [[C3]], i1 false
 -; SSE-NEXT:    ret i1 [[S3]]
 -;
 -; AVX-LABEL: @logical_and_icmp_diff_preds(
 -; AVX-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> <i32 poison, i32 poison, i32 poison, i32 0>, <4 x i32> <i32 0, i32 3, i32 1, i32 7>
 -; AVX-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> <i32 0, i32 0, i32 0, i32 poison>, <4 x i32> <i32 4, i32 5, i32 6, i32 2>
 -; AVX-NEXT:    [[TMP3:%.*]] = icmp ult <4 x i32> [[TMP1]], [[TMP2]]
 -; AVX-NEXT:    [[TMP4:%.*]] = icmp slt <4 x i32> [[TMP1]], [[TMP2]]
 -; AVX-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i1> [[TMP3]], <4 x i1> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
 -; AVX-NEXT:    [[TMP6:%.*]] = extractelement <4 x i1> [[TMP5]], i32 0
 -; AVX-NEXT:    [[TMP7:%.*]] = extractelement <4 x i1> [[TMP5]], i32 2
 -; AVX-NEXT:    [[S1:%.*]] = select i1 [[TMP6]], i1 [[TMP7]], i1 false
 -; AVX-NEXT:    [[TMP8:%.*]] = extractelement <4 x i1> [[TMP5]], i32 3
 -; AVX-NEXT:    [[S2:%.*]] = select i1 [[S1]], i1 [[TMP8]], i1 false
 -; AVX-NEXT:    [[TMP9:%.*]] = extractelement <4 x i1> [[TMP5]], i32 1
 -; AVX-NEXT:    [[S3:%.*]] = select i1 [[S2]], i1 [[TMP9]], i1 false
 -; AVX-NEXT:    ret i1 [[S3]]
 +; CHECK-LABEL: @logical_and_icmp_diff_preds(
 +; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> <i32 poison, i32 poison, i32 poison, i32 0>, <4 x i32> <i32 0, i32 3, i32 1, i32 7>
 +; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> <i32 0, i32 0, i32 0, i32 poison>, <4 x i32> <i32 4, i32 5, i32 6, i32 2>
 +; CHECK-NEXT:    [[TMP3:%.*]] = icmp ult <4 x i32> [[TMP1]], [[TMP2]]
 +; CHECK-NEXT:    [[TMP4:%.*]] = icmp slt <4 x i32> [[TMP1]], [[TMP2]]
 +; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i1> [[TMP3]], <4 x i1> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
 +; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i1> [[TMP5]], i32 0
 +; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x i1> [[TMP5]], i32 2
 +; CHECK-NEXT:    [[S1:%.*]] = select i1 [[TMP6]], i1 [[TMP7]], i1 false
 +; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x i1> [[TMP5]], i32 3
 +; CHECK-NEXT:    [[S2:%.*]] = select i1 [[S1]], i1 [[TMP8]], i1 false
 +; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x i1> [[TMP5]], i32 1
 +; CHECK-NEXT:    [[S3:%.*]] = select i1 [[S2]], i1 [[TMP9]], i1 false
 +; CHECK-NEXT:    ret i1 [[S3]]
  ;
    %x0 = extractelement <4 x i32> %x, i32 0
    %x1 = extractelement <4 x i32> %x, i32 1
	commit d7a40a447f1ed0294c6bc8fe82b6b2460e31de06
	Author: Alexey Bataev <a.bataev@outlook.com>
	Date: Tue Apr 18 10:40:20 2023 -0700

	Revert "[SLP]Add final resize to ShuffleCostEstimator::finalize member function and basic add member functions."

	This reverts commit cd341f3f4878137d1c9e7a05c4c3a7bd8ff216dc to fix
	a crash revealed by buildbot https://lab.llvm.org/buildbot#builders/124/builds/7108.

	diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
	index 163e3581ea7b..a1fabec2a8d7 100644
	--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
	+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
	@@ -946,14 +946,9 @@ static bool isSimple(Instruction *I) {
	}

	/// Shuffles \p Mask in accordance with the given \p SubMask.
	-/// \param ExtendingManyInputs Supports reshuffling of the mask with not only
	-/// one but two input vectors.
	-static void addMask(SmallVectorImpl<int> &Mask, ArrayRef<int> SubMask,
	- bool ExtendingManyInputs = false) {
	+static void addMask(SmallVectorImpl<int> &Mask, ArrayRef<int> SubMask) {
	if (SubMask.empty())
	return;
	- assert((!ExtendingManyInputs \|\| SubMask.size() > Mask.size()) &&
	- "SubMask with many inputs support must be larger than the mask.");
	if (Mask.empty()) {
	Mask.append(SubMask.begin(), SubMask.end());
	return;
	@@ -961,9 +956,8 @@ static void addMask(SmallVectorImpl<int> &Mask, ArrayRef<int> SubMask,
	SmallVector<int> NewMask(SubMask.size(), UndefMaskElem);
	int TermValue = std::min(Mask.size(), SubMask.size());
	for (int I = 0, E = SubMask.size(); I < E; ++I) {
	- if ((!ExtendingManyInputs &&
	- (SubMask[I] >= TermValue \|\| Mask[SubMask[I]] >= TermValue)) \|\|
	- SubMask[I] == UndefMaskElem)
	+ if (SubMask[I] >= TermValue \|\| SubMask[I] == UndefMaskElem \|\|
	+ Mask[SubMask[I]] >= TermValue)
	continue;
	NewMask[I] = Mask[SubMask[I]];
	}
	@@ -6794,8 +6788,6 @@ protected:
	/// analysis/transformations.
	class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
	bool IsFinalized = false;
	- SmallVector<int> CommonMask;
	- SmallVector<Value *, 2> InVectors;
	const TargetTransformInfo &TTI;
	InstructionCost Cost = 0;
	ArrayRef<Value *> VectorizedVals;
	@@ -7017,53 +7009,19 @@ public:
	VecTy, std::nullopt, CostKind, 0, EEVTy);
	}
	}
	- InVectors.assign(1, VecBase);
	return VecBase;
	}
	- void add(const TreeEntry E1, const TreeEntry E2, ArrayRef<int> Mask) {
	- CommonMask.assign(Mask.begin(), Mask.end());
	- InVectors.assign(
	- 2, Constant::getNullValue(FixedVectorType::get(
	- E1->Scalars.front()->getType(),
	- std::max(E1->getVectorFactor(), E2->getVectorFactor()))));
	- }
	- void add(const TreeEntry *E1, ArrayRef<int> Mask) {
	- CommonMask.assign(Mask.begin(), Mask.end());
	- InVectors.assign(
	- 1, Constant::getNullValue(FixedVectorType::get(
	- E1->Scalars.front()->getType(), E1->getVectorFactor())));
	- }
	void gather(ArrayRef<Value > VL, Value Root = nullptr) {
	Cost += getBuildVectorCost(VL, Root);
	- if (!Root) {
	- assert(InVectors.empty() && "Unexpected input vectors for buildvector.");
	- InVectors.assign(1, Constant::getNullValue(FixedVectorType::get(
	- VL.front()->getType(), VL.size())));
	- }
	}
	/// Finalize emission of the shuffles.
	- InstructionCost finalize(ArrayRef<int> ExtMask) {
	+ InstructionCost finalize() {
	IsFinalized = true;
	- ::addMask(CommonMask, ExtMask, /ExtendingManyInputs=/true);
	- if (CommonMask.empty())
	- return Cost;
	- int Limit = CommonMask.size() * 2;
	- if (all_of(CommonMask, [=](int Idx) { return Idx < Limit; }) &&
	- ShuffleVectorInst::isIdentityMask(CommonMask))
	- return Cost;
	- return Cost +
	- TTI.getShuffleCost(InVectors.size() == 2 ? TTI::SK_PermuteTwoSrc
	- : TTI::SK_PermuteSingleSrc,
	- FixedVectorType::get(
	- cast<VectorType>(InVectors.front()->getType())
	- ->getElementType(),
	- CommonMask.size()),
	- CommonMask);
	+ return Cost;
	}

	~ShuffleCostEstimator() {
	- assert((IsFinalized \|\| CommonMask.empty()) &&
	- "Shuffle construction must be finalized.");
	+ assert(IsFinalized && "Shuffle construction must be finalized.");
	}
	};

	@@ -7151,30 +7109,35 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
	if (Mask[I] != UndefMaskElem)
	GatheredScalars[I] = PoisonValue::get(ScalarTy);
	}
	- LLVM_DEBUG(
	- int Limit = Mask.size() * 2;
	- if (*GatherShuffle == TTI::SK_PermuteSingleSrc &&
	- all_of(Mask, [=](int Idx) { return Idx < Limit; }) &&
	- ShuffleVectorInst::isIdentityMask(Mask)) {
	- // Perfect match in the graph, will reuse the previously
	- // vectorized node. Cost is 0.
	- dbgs() << "SLP: perfect diamond match for gather bundle "
	- "that starts with "
	- << *VL.front() << ".\n";
	- } else {
	- dbgs() << "SLP: shuffled " << Entries.size()
	- << " entries for bundle that starts with " << *VL.front()
	- << ".\n";
	- });
	- if (Entries.size() == 1)
	- Estimator.add(Entries.front(), Mask);
	- else
	- Estimator.add(Entries.front(), Entries.back(), Mask);
	+ InstructionCost GatherCost = 0;
	+ int Limit = Mask.size() * 2;
	+ if (all_of(Mask, [=](int Idx) { return Idx < Limit; }) &&
	+ ShuffleVectorInst::isIdentityMask(Mask)) {
	+ // Perfect match in the graph, will reuse the previously vectorized
	+ // node. Cost is 0.
	+ LLVM_DEBUG(
	+ dbgs()
	+ << "SLP: perfect diamond match for gather bundle that starts with "
	+ << *VL.front() << ".\n");
	+ if (NeedToShuffleReuses)
	+ GatherCost =
	+ TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
	+ FinalVecTy, E->ReuseShuffleIndices);
	+ } else {
	+ LLVM_DEBUG(dbgs() << "SLP: shuffled " << Entries.size()
	+ << " entries for bundle that starts with "
	+ << *VL.front() << ".\n");
	+ // Detected that instead of gather we can emit a shuffle of single/two
	+ // previously vectorized nodes. Add the cost of the permutation rather
	+ // than gather.
	+ ::addMask(Mask, E->ReuseShuffleIndices);
	+ GatherCost = TTI->getShuffleCost(*GatherShuffle, FinalVecTy, Mask);
	+ }
	Estimator.gather(
	GatheredScalars,
	Constant::getNullValue(FixedVectorType::get(
	GatheredScalars.front()->getType(), GatheredScalars.size())));
	- return Estimator.finalize(E->ReuseShuffleIndices);
	+ return GatherCost + Estimator.finalize();
	}
	if (ExtractShuffle && all_of(GatheredScalars, PoisonValue::classof)) {
	// Check that gather of extractelements can be represented as just a
	@@ -7184,15 +7147,17 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
	// single input vector or of 2 input vectors.
	InstructionCost Cost =
	computeExtractCost(VL, VecTy, ExtractShuffle, ExtractMask, TTI);
	- return Cost + Estimator.finalize(E->ReuseShuffleIndices);
	- }
	- Estimator.gather(
	- GatheredScalars,
	- (ExtractShuffle \|\| GatherShuffle)
	- ? Constant::getNullValue(FixedVectorType::get(
	- GatheredScalars.front()->getType(), GatheredScalars.size()))
	- : nullptr);
	- return Estimator.finalize(E->ReuseShuffleIndices);
	+ if (NeedToShuffleReuses)
	+ Cost += TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
	+ FinalVecTy, E->ReuseShuffleIndices);
	+ return Cost + Estimator.finalize();
	+ }
	+ InstructionCost ReuseShuffleCost = 0;
	+ if (NeedToShuffleReuses)
	+ ReuseShuffleCost = TTI->getShuffleCost(
	+ TTI::SK_PermuteSingleSrc, FinalVecTy, E->ReuseShuffleIndices);
	+ Estimator.gather(GatheredScalars);
	+ return ReuseShuffleCost + Estimator.finalize();
	}
	InstructionCost CommonCost = 0;
	SmallVector<int> Mask;
	diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll
	index f26d14c44db8..c5b6ac647aa3 100644
	--- a/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll
	+++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll
	@@ -93,36 +93,20 @@ define i1 @logical_or_fcmp(<4 x float> %x) {
	}

	define i1 @logical_and_icmp_diff_preds(<4 x i32> %x) {
	-; SSE-LABEL: @logical_and_icmp_diff_preds(
	-; SSE-NEXT: [[X0:%.]] = extractelement <4 x i32> [[X:%.]], i32 0
	-; SSE-NEXT: [[X3:%.*]] = extractelement <4 x i32> [[X]], i32 3
	-; SSE-NEXT: [[C0:%.*]] = icmp ult i32 [[X0]], 0
	-; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> poison, <2 x i32> <i32 1, i32 2>
	-; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> <i32 poison, i32 0>, <2 x i32> <i32 0, i32 3>
	-; SSE-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> <i32 0, i32 poison>, <2 x i32> <i32 2, i32 1>
	-; SSE-NEXT: [[TMP4:%.*]] = icmp slt <2 x i32> [[TMP2]], [[TMP3]]
	-; SSE-NEXT: [[C3:%.*]] = icmp slt i32 [[X3]], 0
	-; SSE-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0
	-; SSE-NEXT: [[S1:%.*]] = select i1 [[C0]], i1 [[TMP5]], i1 false
	-; SSE-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1
	-; SSE-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 [[TMP6]], i1 false
	-; SSE-NEXT: [[S3:%.*]] = select i1 [[S2]], i1 [[C3]], i1 false
	-; SSE-NEXT: ret i1 [[S3]]
	-;
	-; AVX-LABEL: @logical_and_icmp_diff_preds(
	-; AVX-NEXT: [[TMP1:%.]] = shufflevector <4 x i32> [[X:%.]], <4 x i32> <i32 poison, i32 poison, i32 poison, i32 0>, <4 x i32> <i32 0, i32 3, i32 1, i32 7>
	-; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> <i32 0, i32 0, i32 0, i32 poison>, <4 x i32> <i32 4, i32 5, i32 6, i32 2>
	-; AVX-NEXT: [[TMP3:%.*]] = icmp ult <4 x i32> [[TMP1]], [[TMP2]]
	-; AVX-NEXT: [[TMP4:%.*]] = icmp slt <4 x i32> [[TMP1]], [[TMP2]]
	-; AVX-NEXT: [[TMP5:%.*]] = shufflevector <4 x i1> [[TMP3]], <4 x i1> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
	-; AVX-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP5]], i32 0
	-; AVX-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP5]], i32 2
	-; AVX-NEXT: [[S1:%.*]] = select i1 [[TMP6]], i1 [[TMP7]], i1 false
	-; AVX-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP5]], i32 3
	-; AVX-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 [[TMP8]], i1 false
	-; AVX-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP5]], i32 1
	-; AVX-NEXT: [[S3:%.*]] = select i1 [[S2]], i1 [[TMP9]], i1 false
	-; AVX-NEXT: ret i1 [[S3]]
	+; CHECK-LABEL: @logical_and_icmp_diff_preds(
	+; CHECK-NEXT: [[TMP1:%.]] = shufflevector <4 x i32> [[X:%.]], <4 x i32> <i32 poison, i32 poison, i32 poison, i32 0>, <4 x i32> <i32 0, i32 3, i32 1, i32 7>
	+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> <i32 0, i32 0, i32 0, i32 poison>, <4 x i32> <i32 4, i32 5, i32 6, i32 2>
	+; CHECK-NEXT: [[TMP3:%.*]] = icmp ult <4 x i32> [[TMP1]], [[TMP2]]
	+; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <4 x i32> [[TMP1]], [[TMP2]]
	+; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i1> [[TMP3]], <4 x i1> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
	+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP5]], i32 0
	+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP5]], i32 2
	+; CHECK-NEXT: [[S1:%.*]] = select i1 [[TMP6]], i1 [[TMP7]], i1 false
	+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP5]], i32 3
	+; CHECK-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 [[TMP8]], i1 false
	+; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP5]], i32 1
	+; CHECK-NEXT: [[S3:%.*]] = select i1 [[S2]], i1 [[TMP9]], i1 false
	+; CHECK-NEXT: ret i1 [[S3]]
	;
	%x0 = extractelement <4 x i32> %x, i32 0
	%x1 = extractelement <4 x i32> %x, i32 1