blob: 555d1eab1770735c1c4002bc00499a375fb5d6ec [file] [log] [blame]
commit 14845b2c459021e3dbf2ead52d707d4a7db40cbb
Author: Philip Reames <listmail@philipreames.com>
Date: Tue Feb 25 08:56:53 2020 -0800
Revert "[LICM] Support hosting of dynamic allocas out of loops"
This reverts commit 8d22100f66c4170510c6ff028c60672acfe1cff9.
There was a functional regression reported (https://bugs.llvm.org/show_bug.cgi?id=44996). I'm not actually sure the patch is wrong, but I don't have time to investigate currently, and this line of work isn't something I'm likely to get back to quickly.
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index 15d40a50e60..d429fc24dc3 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -742,41 +742,6 @@ public:
};
} // namespace
-
-/// Return true if we know how to rewrite all uses of the given alloca after
-/// hoisting it out of the loop. The main concerns are a) potential captures
-/// and b) invariant.start markers which don't capture, but are no longer
-/// valid w/o a corresponding invariant.end.
-static bool canRewriteUsesOfAlloca(AllocaInst &AI) {
- // TODO: This looks a lot like capture tracking, but we need to remove any
- // invariant starts if we extend the lifetime of the alloca by hoisting it.
- // We should probably refactor capture tracking into a form which allows us
- // to reuse the relevant bits and remove the duplicated logic here.
-
- SmallVector<Use *, 16> Worklist;
- for (Use &U : AI.uses())
- Worklist.push_back(&U);
-
- unsigned NumUsesExplored = 0;
- while (!Worklist.empty()) {
- Use *U = Worklist.pop_back_val();
- Instruction *I = cast<Instruction>(U->getUser());
- NumUsesExplored++;
- if (NumUsesExplored > DefaultMaxUsesToExplore)
- return false;
- // Non capturing, terminating uses
- if (isa<LoadInst>(I) ||
- (isa<StoreInst>(I) && U->getOperandNo() == 1))
- continue;
- // Non capturing, non-terminating
- if (!isa<BitCastInst>(I) && !isa<GetElementPtrInst>(I))
- return false;
- for (Use &U : I->uses())
- Worklist.push_back(&U);
- }
- return true;
-}
-
/// Walk the specified region of the CFG (defined by all blocks dominated by
/// the specified block, and that are in the current loop) in depth first
/// order w.r.t the DominatorTree. This allows us to visit definitions before
@@ -897,16 +862,6 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
continue;
}
- if (isa<AllocaInst>(&I) &&
- SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop) &&
- canRewriteUsesOfAlloca(cast<AllocaInst>(I))) {
- hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
- MSSAU, SE, ORE);
- HoistedInstructions.push_back(&I);
- Changed = true;
- continue;
- }
-
if (PHINode *PN = dyn_cast<PHINode>(&I)) {
if (CFH.canHoistPHI(PN)) {
// Redirect incoming blocks first to ensure that we create hoisted
diff --git a/llvm/test/Transforms/LICM/hoist-alloca.ll b/llvm/test/Transforms/LICM/hoist-alloca.ll
deleted file mode 100644
index 8e4debac283..00000000000
--- a/llvm/test/Transforms/LICM/hoist-alloca.ll
+++ /dev/null
@@ -1,168 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -licm < %s | FileCheck %s
-
-@G = external global i64
-
-define void @test(i64 %n) {
-; CHECK-LABEL: @test(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[A:%.*]] = alloca i64
-; CHECK-NEXT: [[VAL:%.*]] = load i64, i64* [[A]]
-; CHECK-NEXT: store i64 [[VAL]], i64* @G
-; CHECK-NEXT: br label [[FOR_BODY:%.*]]
-; CHECK: for.body:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
-; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ult i64 [[IV]], [[N:%.*]]
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[EXIT:%.*]]
-; CHECK: exit:
-; CHECK-NEXT: ret void
-;
-entry:
- br label %for.body
-
-for.body:
- %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
- %a = alloca i64
- %val = load i64, i64* %a
- store i64 %val, i64* @G
- %iv.next = add nuw nsw i64 %iv, 1
- %exitcond = icmp ult i64 %iv, %n
- br i1 %exitcond, label %for.body, label %exit
-exit:
- ret void
-}
-
-define void @test2(i64 %n) {
-; CHECK-LABEL: @test2(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[A:%.*]] = alloca i64
-; CHECK-NEXT: br label [[FOR_BODY:%.*]]
-; CHECK: for.body:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
-; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ult i64 [[IV]], [[N:%.*]]
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[EXIT:%.*]]
-; CHECK: exit:
-; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i64 [ [[IV]], [[FOR_BODY]] ]
-; CHECK-NEXT: store i64 [[IV_LCSSA]], i64* [[A]], align 4
-; CHECK-NEXT: ret void
-;
-entry:
- br label %for.body
-
-for.body:
- %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
- %a = alloca i64
- store i64 %iv, i64* %a
- %iv.next = add nuw nsw i64 %iv, 1
- %exitcond = icmp ult i64 %iv, %n
- br i1 %exitcond, label %for.body, label %exit
-exit:
- ret void
-}
-
-
-define void @test3(i64 %n) {
-; CHECK-LABEL: @test3(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[A:%.*]] = alloca i64
-; CHECK-NEXT: [[A_I8:%.*]] = bitcast i64* [[A]] to i8*
-; CHECK-NEXT: [[A_OFFSET:%.*]] = getelementptr i8, i8* [[A_I8]], i64 4
-; CHECK-NEXT: store i8 0, i8* [[A_OFFSET]]
-; CHECK-NEXT: br label [[FOR_BODY:%.*]]
-; CHECK: for.body:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
-; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ult i64 [[IV]], [[N:%.*]]
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[EXIT:%.*]]
-; CHECK: exit:
-; CHECK-NEXT: ret void
-;
-entry:
- br label %for.body
-
-for.body:
- %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
- %a = alloca i64
- %a.i8 = bitcast i64* %a to i8*
- %a.offset = getelementptr i8, i8* %a.i8, i64 4
- store i8 0, i8* %a.offset
- %iv.next = add nuw nsw i64 %iv, 1
- %exitcond = icmp ult i64 %iv, %n
- br i1 %exitcond, label %for.body, label %exit
-exit:
- ret void
-}
-
-; This example is subtle. Because the dynamic alloca isn't reclaimed until
-; end of function scope, the captured value can legally point to a dynamic
-; alloca stack region from a previous iteration.
-define void @test4(i64 %n) {
-; CHECK-LABEL: @test4(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: br label [[FOR_BODY:%.*]]
-; CHECK: for.body:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT: [[A:%.*]] = alloca i64
-; CHECK-NEXT: store i64 [[IV]], i64* [[A]]
-; CHECK-NEXT: call void @capture(i64* [[A]])
-; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
-; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ult i64 [[IV]], [[N:%.*]]
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[EXIT:%.*]]
-; CHECK: exit:
-; CHECK-NEXT: ret void
-;
-entry:
- br label %for.body
-
-for.body:
- %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
- %a = alloca i64
- store i64 %iv, i64* %a
- %a.i8 = bitcast i64* %a to i8*
- call void @capture(i64* %a)
- %iv.next = add nuw nsw i64 %iv, 1
- %exitcond = icmp ult i64 %iv, %n
- br i1 %exitcond, label %for.body, label %exit
-exit:
- ret void
-}
-declare void @capture(i64* %a)
-
-
-; TODO: not yet handled
-define void @test5(i64 %n) {
-; CHECK-LABEL: @test5(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: br label [[FOR_BODY:%.*]]
-; CHECK: for.body:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT: [[A:%.*]] = alloca i64
-; CHECK-NEXT: store i64 [[IV]], i64* [[A]]
-; CHECK-NEXT: [[A_I8:%.*]] = bitcast i64* [[A]] to i8*
-; CHECK-NEXT: [[TMP0:%.*]] = call {}* @llvm.invariant.start.p0i8(i64 8, i8* [[A_I8]])
-; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
-; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ult i64 [[IV]], [[N:%.*]]
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[EXIT:%.*]]
-; CHECK: exit:
-; CHECK-NEXT: ret void
-;
-entry:
- br label %for.body
-
-for.body:
- %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
- %a = alloca i64
- store i64 %iv, i64* %a
- %a.i8 = bitcast i64* %a to i8*
- call {}* @llvm.invariant.start.p0i8(i64 8, i8* %a.i8)
- %iv.next = add nuw nsw i64 %iv, 1
- %exitcond = icmp ult i64 %iv, %n
- br i1 %exitcond, label %for.body, label %exit
-exit:
- ret void
-}
-
-declare {}* @llvm.invariant.start.p0i8(i64, i8* nocapture) nounwind readonly
-