blob: fc922728d9da57b192f076ae907e5b4e8864b570 [file] [log] [blame]
commit 455d43b951ae31dac133a8650532797f01343a36
Author: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Fri Feb 26 14:55:02 2021 +0000
[Utils] collectBitParts - bail for integers > 128-bits
collectBitParts uses int8_t for the bit indices, leaving a 128-bit limit.
We already test for this before calling collectBitParts, but rGb94c215592bd added truncate handling which meant we could end up processing wider integers.
Thanks to @manojgupta for the repro.
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index 23cd5e47e6f8..915e5d7eba69 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -2851,6 +2851,10 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals,
auto &Result = BPS[V] = None;
auto BitWidth = V->getType()->getScalarSizeInBits();
+ // Can't do integer/elements > 128 bits.
+ if (BitWidth > 128)
+ return Result;
+
// Prevent stack overflow by limiting the recursion depth
if (Depth == BitPartRecursionMaxDepth) {
LLVM_DEBUG(dbgs() << "collectBitParts max recursion depth reached.\n");
diff --git a/llvm/test/Transforms/InstCombine/bswap.ll b/llvm/test/Transforms/InstCombine/bswap.ll
index 8d0adcf15eec..32caf4a89d60 100644
--- a/llvm/test/Transforms/InstCombine/bswap.ll
+++ b/llvm/test/Transforms/InstCombine/bswap.ll
@@ -736,6 +736,31 @@ define i32 @funnel_and(i32 %abcd) {
ret i32 %dcba
}
+; Don't attempt to collectBitParts from >128 bit integers
+define i16 @trunc_bswap_i160(i160* %a0) {
+; CHECK-LABEL: @trunc_bswap_i160(
+; CHECK-NEXT: [[LOAD:%.*]] = load i160, i160* [[A0:%.*]], align 4
+; CHECK-NEXT: [[LSHR1:%.*]] = lshr i160 [[LOAD]], 136
+; CHECK-NEXT: [[CAST1:%.*]] = trunc i160 [[LSHR1]] to i16
+; CHECK-NEXT: [[AND1:%.*]] = and i16 [[CAST1]], 255
+; CHECK-NEXT: [[TMP1:%.*]] = lshr i160 [[LOAD]], 120
+; CHECK-NEXT: [[TMP2:%.*]] = trunc i160 [[TMP1]] to i16
+; CHECK-NEXT: [[SHL:%.*]] = and i16 [[TMP2]], -256
+; CHECK-NEXT: [[OR:%.*]] = or i16 [[AND1]], [[SHL]]
+; CHECK-NEXT: ret i16 [[OR]]
+;
+ %load = load i160, i160* %a0, align 4
+ %lshr0 = lshr i160 %load, 128
+ %lshr1 = lshr i160 %load, 136
+ %cast0 = trunc i160 %lshr0 to i16
+ %cast1 = trunc i160 %lshr1 to i16
+ %and0 = and i16 %cast0, 255
+ %and1 = and i16 %cast1, 255
+ %shl = shl i16 %and0, 8
+ %or = or i16 %and1, %shl
+ ret i16 %or
+}
+
; PR47191 - deep IR trees prevent ADD/XOR instructions being simplified to OR.
define i64 @PR47191_problem1(i64 %0) {