| commit 455d43b951ae31dac133a8650532797f01343a36 |
| Author: Simon Pilgrim <llvm-dev@redking.me.uk> |
| Date: Fri Feb 26 14:55:02 2021 +0000 |
| |
| [Utils] collectBitParts - bail for integers > 128-bits |
| |
| collectBitParts uses int8_t for the bit indices, leaving a 128-bit limit. |
| |
| We already test for this before calling collectBitParts, but rGb94c215592bd added truncate handling which meant we could end up processing wider integers. |
| |
| Thanks to @manojgupta for the repro. |
| |
| diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp |
| index 23cd5e47e6f8..915e5d7eba69 100644 |
| --- a/llvm/lib/Transforms/Utils/Local.cpp |
| +++ b/llvm/lib/Transforms/Utils/Local.cpp |
| @@ -2851,6 +2851,10 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, |
| auto &Result = BPS[V] = None; |
| auto BitWidth = V->getType()->getScalarSizeInBits(); |
| |
| + // Can't do integer/elements > 128 bits. |
| + if (BitWidth > 128) |
| + return Result; |
| + |
| // Prevent stack overflow by limiting the recursion depth |
| if (Depth == BitPartRecursionMaxDepth) { |
| LLVM_DEBUG(dbgs() << "collectBitParts max recursion depth reached.\n"); |
| diff --git a/llvm/test/Transforms/InstCombine/bswap.ll b/llvm/test/Transforms/InstCombine/bswap.ll |
| index 8d0adcf15eec..32caf4a89d60 100644 |
| --- a/llvm/test/Transforms/InstCombine/bswap.ll |
| +++ b/llvm/test/Transforms/InstCombine/bswap.ll |
| @@ -736,6 +736,31 @@ define i32 @funnel_and(i32 %abcd) { |
| ret i32 %dcba |
| } |
| |
| +; Don't attempt to collectBitParts from >128 bit integers |
| +define i16 @trunc_bswap_i160(i160* %a0) { |
| +; CHECK-LABEL: @trunc_bswap_i160( |
| +; CHECK-NEXT: [[LOAD:%.*]] = load i160, i160* [[A0:%.*]], align 4 |
| +; CHECK-NEXT: [[LSHR1:%.*]] = lshr i160 [[LOAD]], 136 |
| +; CHECK-NEXT: [[CAST1:%.*]] = trunc i160 [[LSHR1]] to i16 |
| +; CHECK-NEXT: [[AND1:%.*]] = and i16 [[CAST1]], 255 |
| +; CHECK-NEXT: [[TMP1:%.*]] = lshr i160 [[LOAD]], 120 |
| +; CHECK-NEXT: [[TMP2:%.*]] = trunc i160 [[TMP1]] to i16 |
| +; CHECK-NEXT: [[SHL:%.*]] = and i16 [[TMP2]], -256 |
| +; CHECK-NEXT: [[OR:%.*]] = or i16 [[AND1]], [[SHL]] |
| +; CHECK-NEXT: ret i16 [[OR]] |
| +; |
| + %load = load i160, i160* %a0, align 4 |
| + %lshr0 = lshr i160 %load, 128 |
| + %lshr1 = lshr i160 %load, 136 |
| + %cast0 = trunc i160 %lshr0 to i16 |
| + %cast1 = trunc i160 %lshr1 to i16 |
| + %and0 = and i16 %cast0, 255 |
| + %and1 = and i16 %cast1, 255 |
| + %shl = shl i16 %and0, 8 |
| + %or = or i16 %and1, %shl |
| + ret i16 %or |
| +} |
| + |
| ; PR47191 - deep IR trees prevent ADD/XOR instructions being simplified to OR. |
| |
| define i64 @PR47191_problem1(i64 %0) { |