| commit 403bd583a8cc1f041430ff1b236ab296a2acdc85 |
| Author: Alexey Bataev <a.bataev@outlook.com> |
| Date: Fri Apr 21 07:44:36 2023 -0700 |
| |
| [SLP]Fix a crash on scalarized vectors. |
| |
| Need to register in-vector for scalarized types to avoid crash in |
| further analysis. |
| |
| diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp |
| index 58a51f0ad061..7b1b7b282fd6 100644 |
| --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp |
| +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp |
| @@ -6934,8 +6934,10 @@ public: |
| auto *VecTy = FixedVectorType::get(VL.front()->getType(), VL.size()); |
| // If the resulting type is scalarized, do not adjust the cost. |
| unsigned VecNumParts = TTI.getNumberOfParts(VecTy); |
| - if (VecNumParts == VecTy->getNumElements()) |
| + if (VecNumParts == VecTy->getNumElements()) { |
| + InVectors.assign(1, Constant::getNullValue(VecTy)); |
| return nullptr; |
| + } |
| DenseMap<Value *, int> ExtractVectorsTys; |
| SmallPtrSet<Value *, 4> CheckedExtracts; |
| for (auto [I, V] : enumerate(VL)) { |
| diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/extracts-from-scalarizable-vector.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/extracts-from-scalarizable-vector.ll |
| new file mode 100644 |
| index 000000000000..1ace27f1b46e |
| --- /dev/null |
| +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/extracts-from-scalarizable-vector.ll |
| @@ -0,0 +1,22 @@ |
| +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 |
| +; RUN: opt -S -passes=slp-vectorizer -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s |
| + |
| +define i1 @test() { |
| +; CHECK-LABEL: define i1 @test() { |
| +; CHECK-NEXT: entry: |
| +; CHECK-NEXT: [[TMP0:%.*]] = freeze <4 x i1> zeroinitializer |
| +; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP0]]) |
| +; CHECK-NEXT: ret i1 [[TMP1]] |
| +; |
| +entry: |
| + %0 = extractelement <4 x fp128> zeroinitializer, i32 0 |
| + %cmp = fcmp ogt fp128 %0, 0xL00000000000000000000000000000000 |
| + %cmp3 = fcmp olt fp128 %0, 0xL00000000000000000000000000000000 |
| + %or.cond = and i1 %cmp, %cmp3 |
| + %1 = extractelement <4 x fp128> zeroinitializer, i32 0 |
| + %cmp6 = fcmp ogt fp128 %1, 0xL00000000000000000000000000000000 |
| + %or.cond29 = select i1 %or.cond, i1 %cmp6, i1 false |
| + %cmp10 = fcmp olt fp128 %1, 0xL00000000000000000000000000000000 |
| + %or.cond30 = select i1 %or.cond29, i1 %cmp10, i1 false |
| + ret i1 %or.cond30 |
| +} |