sys-devel/llvm/files/cherry/b9ed8ebe0e2ffa803b0bda60f9bbc9bb26f95000.patch - third_party/overlays/chromiumos-overlay - Git at Google

 commit b9ed8ebe0e2ffa803b0bda60f9bbc9bb26f95000
 Author: Tomas Matheson <tomas.matheson@arm.com>
 Date:   Wed Jan 20 15:55:26 2021 +0000

     [ARM][RegisterScavenging] Don't consider LR liveout if it is not reloaded

     https://bugs.llvm.org/show_bug.cgi?id=48232

     When PrologEpilogInserter writes callee-saved registers to the stack, LR is not reloaded but is instead loaded directly into PC.
     This was not taken into account when determining if each callee-saved register was liveout for the block.
     When frame elimination inserts virtual registers, and the register scavenger tries to scavenge LR, it considers it liveout and tries to spill again.
     However there is no emergency spill slot to use, and it fails with an error:

         fatal error: error in backend: Error while trying to spill LR from class GPR: Cannot scavenge register without an emergency spill slot!

     This patch pervents any callee-saved registers which are not reloaded (including LR) from being marked liveout.
     They are therefore available to scavenge without requiring an extra spill.

 diff --git a/llvm/lib/CodeGen/LiveRegUnits.cpp b/llvm/lib/CodeGen/LiveRegUnits.cpp
 index ea2075bc139d..d8d8bd5d61a2 100644
 --- a/llvm/lib/CodeGen/LiveRegUnits.cpp
 +++ b/llvm/lib/CodeGen/LiveRegUnits.cpp
 @@ -81,8 +81,17 @@ static void addBlockLiveIns(LiveRegUnits &LiveUnits,
  static void addCalleeSavedRegs(LiveRegUnits &LiveUnits,
                                 const MachineFunction &MF) {
    const MachineRegisterInfo &MRI = MF.getRegInfo();
 -  for (const MCPhysReg *CSR = MRI.getCalleeSavedRegs(); CSR && *CSR; ++CSR)
 -    LiveUnits.addReg(*CSR);
 +  const MachineFrameInfo &MFI = MF.getFrameInfo();
 +  for (const MCPhysReg *CSR = MRI.getCalleeSavedRegs(); CSR && *CSR; ++CSR) {
 +    const unsigned N = *CSR;
 +
 +    const auto &CSI = MFI.getCalleeSavedInfo();
 +    auto Info =
 +        llvm::find_if(CSI, [N](auto Info) { return Info.getReg() == N; });
 +    // If we have no info for this callee-saved register, assume it is liveout
 +    if (Info == CSI.end() || Info->isRestored())
 +      LiveUnits.addReg(N);
 +  }
  }

  void LiveRegUnits::addPristines(const MachineFunction &MF) {
 diff --git a/llvm/test/CodeGen/AArch64/scavenge-lr.mir b/llvm/test/CodeGen/AArch64/scavenge-lr.mir
 new file mode 100644
 index 000000000000..a2296c12eb60
 --- /dev/null
 +++ b/llvm/test/CodeGen/AArch64/scavenge-lr.mir
 @@ -0,0 +1,221 @@
 +# RUN: llc -mtriple=thumbv7-unknown-linux-android30 -run-pass=prologepilog -verify-machineinstrs %s -o - | FileCheck %s
 +
 +# When saving and restoring callee-saved registers, LR is saved but not restored,
 +# because it is reloaded directly into PC. Therefore it should be available to scavenge
 +# without requiring an emergency spill slot.
 +
 +# Used to result in
 +#   LLVM ERROR: Error while trying to spill LR from class GPR: Cannot scavenge register without an emergency spill slot!
 +
 +# Check that LR is considered live in
 +# CHECK: liveins: {{.*}}$lr
 +
 +# Check that LR is saved to the stack
 +# CHECK: frame-setup t2STMDB_UPD {{.*}} killed $lr
 +# CHECK: frame-setup CFI_INSTRUCTION offset $lr,
 +
 +# Check that LR was successfully scavenged somewhere in the function
 +# CHECK:  $lr = t2ADDri
 +# CHECK: VSTMQIA $q11, killed $lr
 +
 +# Check that LR is not restored at the end of the function
 +# CHECK-NOT: $lr = frame-destroy
 +# CHECK-NOT: frame-destroy VLDMDIA_UPD {{.*}} def $lr
 +# CHECK-NOT: frame-destroy t2LDMIA_RET {{.*}} def $lr
 +# CHECK: frame-destroy t2LDMIA_RET {{.*}} def $pc
 +
 +--- |
 +  target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 +
 +  %S = type { [32 x i8] }
 +
 +  define void @f(%S* %arg) {
 +  entry:
 +    %ppp..sroa_idx = getelementptr inbounds %S, %S* %arg, i32 -8
 +    %ppp..sroa_cast248 = bitcast %S* %ppp..sroa_idx to <8 x float>*
 +    %ppp.copyload = load <8 x float>, <8 x float>* %ppp..sroa_cast248, align 32
 +
 +    %xxx..sroa_idx = getelementptr inbounds %S, %S* %arg, i32 -5
 +    %xxx..sroa_cast248 = bitcast %S* %xxx..sroa_idx to <8 x float>*
 +    %xxx.copyload = load <8 x float>, <8 x float>* %xxx..sroa_cast248, align 32
 +
 +    %yyy..sroa_idx = getelementptr inbounds %S, %S* %arg, i32 -2
 +    %yyy..sroa_cast244 = bitcast %S* %yyy..sroa_idx to <8 x float>*
 +    %yyy.copyload = load <8 x float>, <8 x float>* %yyy..sroa_cast244, align 32
 +
 +    %zzz..sroa_idx = getelementptr inbounds %S, %S* %arg, i32 -7
 +    %zzz..sroa_cast241 = bitcast %S* %zzz..sroa_idx to <8 x float>*
 +    %zzz.copyload = load <8 x float>, <8 x float>* %zzz..sroa_cast241, align 32
 +
 +    %www..sroa_idx = getelementptr inbounds %S, %S* %arg, i32 -4
 +    %www..sroa_cast238 = bitcast %S* %www..sroa_idx to <8 x float>*
 +    %www.copyload = load <8 x float>, <8 x float>* %www..sroa_cast238, align 32
 +
 +    %uuu..sroa_idx = getelementptr inbounds %S, %S* %arg, i32 1
 +    %uuu..sroa_cast235 = bitcast %S* %uuu..sroa_idx to <8 x float>*
 +    %uuu.copyload = load <8 x float>, <8 x float>* %uuu..sroa_cast235, align 32
 +
 +    %vvv..sroa_idx = getelementptr inbounds %S, %S* %arg, i32 -6
 +    %vvv..sroa_cast230 = bitcast %S* %vvv..sroa_idx to <8 x float>*
 +    %vvv.copyload = load <8 x float>, <8 x float>* %vvv..sroa_cast230, align 32
 +
 +    %ttt..sroa_idx = getelementptr inbounds %S, %S* %arg, i32 -3
 +    %ttt..sroa_cast226 = bitcast %S* %ttt..sroa_idx to <8 x float>*
 +    %ttt.copyload = load <8 x float>, <8 x float>* %ttt..sroa_cast226, align 32
 +
 +    %sss..sroa_cast223 = bitcast %S* %arg to <8 x float>*
 +    %sss.copyload = load <8 x float>, <8 x float>* %sss..sroa_cast223, align 32
 +
 +    %mul.i = fmul <8 x float> %ppp.copyload, %www.copyload
 +    %mul.i185 = fmul <8 x float> %xxx.copyload, %uuu.copyload
 +    %mul.i179 = fmul <8 x float> %mul.i185, %vvv.copyload
 +    %mul.i173 = fmul <8 x float> %mul.i179, %ttt.copyload
 +    %mul.i167 = fmul <8 x float> %zzz.copyload, %mul.i173
 +    %add.i = fadd <8 x float> %mul.i, %mul.i167
 +    %div.i = fdiv <8 x float> zeroinitializer, %add.i
 +    %mul.i153 = fmul <8 x float> %uuu.copyload, %div.i
 +
 +    store <8 x float> %mul.i153, <8 x float>* %ppp..sroa_cast248, align 32
 +
 +    %mul.i147 = fmul <8 x float> %uuu.copyload, %vvv.copyload
 +    %mul.i141 = fmul <8 x float> %zzz.copyload, %sss.copyload
 +    %mul.i135 = fmul <8 x float> %mul.i141, %div.i
 +    %sub.i129 = fsub <8 x float> %mul.i147, %mul.i135
 +
 +    store <8 x float> %sub.i129, <8 x float>* %zzz..sroa_cast241, align 32
 +    store <8 x float> %div.i, <8 x float>* %vvv..sroa_cast230, align 32
 +    store <8 x float> %div.i, <8 x float>* %xxx..sroa_cast248, align 32
 +
 +    %mul.i123 = fmul <8 x float> %yyy.copyload, %vvv.copyload
 +    %mul.i117 = fmul <8 x float> %mul.i123, %div.i
 +    %sub.i111 = fsub <8 x float> %sss.copyload, %mul.i117
 +    store <8 x float> %sub.i111, <8 x float>* %www..sroa_cast238, align 32
 +
 +    %mul.i105 = fmul <8 x float> %ppp.copyload, %ttt.copyload
 +    %mul.i99 = fmul <8 x float> %mul.i105, %div.i
 +    %sub.i93 = fsub <8 x float> %xxx.copyload, %mul.i99
 +    store <8 x float> %sub.i93, <8 x float>* %ttt..sroa_cast226, align 32
 +
 +    %mul.i81 = fmul <8 x float> %yyy.copyload, %www.copyload
 +    %mul.i75 = fmul <8 x float> %mul.i81, %div.i
 +    %sub.i = fsub <8 x float> %mul.i185, %mul.i75
 +    store <8 x float> %sub.i, <8 x float>* %yyy..sroa_cast244, align 32
 +
 +    ret void
 +  }
 +...
 +---
 +name:            f
 +alignment:       2
 +tracksRegLiveness: true
 +liveins:
 +  - { reg: '$r0' }
 +frameInfo:
 +  maxAlignment:    16
 +  maxCallFrameSize: 0
 +stack:
 +  - { id: 0, type: spill-slot, size: 16, alignment: 16 }
 +  - { id: 1, type: spill-slot, size: 16, alignment: 16 }
 +  - { id: 2, type: spill-slot, size: 16, alignment: 16 }
 +  - { id: 3, type: spill-slot, size: 16, alignment: 16 }
 +constants:
 +  - id:              0
 +    value:           'float 0.000000e+00'
 +    alignment:       4
 +machineFunctionInfo: {}
 +body:             |
 +  bb.0.entry:
 +    liveins: $r0
 +    $r2 = t2SUBri $r0, 128, 14 /* CC::al */, $noreg, $noreg
 +    $q8 = VLD1q64 $r2, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.www..sroa_cast238, align 32)
 +    VSTMQIA $q8, %stack.0, 14 /* CC::al */, $noreg :: (store 16 into %stack.0)
 +    $r12 = t2SUBri $r0, 256, 14 /* CC::al */, $noreg, $noreg
 +    $q12 = VLD1q64 $r12, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.ppp..sroa_cast248, align 32)
 +    $q1 = VMULfq $q12, killed $q8, 14 /* CC::al */, $noreg
 +    $r3 = nuw t2ADDri $r0, 32, 14 /* CC::al */, $noreg, $noreg
 +    $q10 = VLD1q64 killed $r3, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.uuu..sroa_cast235, align 32)
 +    $r5 = t2SUBri $r0, 160, 14 /* CC::al */, $noreg, $noreg
 +    $q15 = VLD1q64 $r5, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.xxx..sroa_cast248, align 32)
 +    $q14 = VMULfq $q15, $q10, 14 /* CC::al */, $noreg
 +    $r6 = t2SUBri $r0, 192, 14 /* CC::al */, $noreg, $noreg
 +    $q13 = VLD1q64 $r6, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.vvv..sroa_cast230, align 32)
 +    $q8 = VMULfq $q14, $q13, 14 /* CC::al */, $noreg
 +    $r4 = t2SUBri $r0, 96, 14 /* CC::al */, $noreg, $noreg
 +    $q6 = VLD1q64 $r4, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.ttt..sroa_cast226, align 32)
 +    $q8 = VMULfq killed $q8, $q6, 14 /* CC::al */, $noreg
 +    $r3 = t2SUBri $r0, 224, 14 /* CC::al */, $noreg, $noreg
 +    $q5 = VLD1q64 $r3, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.zzz..sroa_cast241, align 32)
 +    $q1 = VMLAfq killed $q1, $q5, killed $q8, 14 /* CC::al */, $noreg
 +    $s8 = VLDRS %const.0, 0, 14 /* CC::al */, $noreg :: (load 4 from constant-pool)
 +    $s3 = VDIVS $s8, $s7, 14 /* CC::al */, $noreg, implicit-def $q0
 +    $s2 = VDIVS $s8, $s6, 14 /* CC::al */, $noreg, implicit killed $q0, implicit-def $q0
 +    $s1 = VDIVS $s8, $s5, 14 /* CC::al */, $noreg, implicit killed $q0, implicit-def $q0
 +    $s0 = VDIVS $s8, $s4, 14 /* CC::al */, $noreg, implicit killed $q1, implicit killed $q0, implicit-def $q0
 +    $r7 = t2SUBri $r0, 64, 14 /* CC::al */, $noreg, $noreg
 +    $q8 = VLD1q64 $r7, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.yyy..sroa_cast244, align 32)
 +    VSTMQIA $q8, %stack.1, 14 /* CC::al */, $noreg :: (store 16 into %stack.1)
 +    $q8 = VMULfq killed $q8, $q13, 14 /* CC::al */, $noreg
 +    $r1 = t2ADDri $r0, 48, 14 /* CC::al */, $noreg, $noreg
 +    $q9, $r0 = VLD1q32wb_fixed killed $r0, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.sss..sroa_cast223, align 32)
 +    $q11 = COPY $q9
 +    $q11 = VMLSfq killed $q11, killed $q8, $q0, 14 /* CC::al */, $noreg
 +    $r2 = VST1q32wb_fixed killed $r2, 16, killed $q11, 14 /* CC::al */, $noreg :: (store 16 into %ir.www..sroa_cast238, align 32)
 +    $q8 = VLD1q64 $r2, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.www..sroa_cast238 + 16, basealign 32)
 +    VSTMQIA $q8, %stack.3, 14 /* CC::al */, $noreg :: (store 16 into %stack.3)
 +    $q11 = VMULfq $q10, $q0, 14 /* CC::al */, $noreg
 +    $r12 = VST1q32wb_fixed killed $r12, 16, killed $q11, 14 /* CC::al */, $noreg :: (store 16 into %ir.ppp..sroa_cast248, align 32)
 +    $q11 = VLD1q64 $r12, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.ppp..sroa_cast248 + 16, basealign 32)
 +    VSTMQIA $q11, %stack.2, 14 /* CC::al */, $noreg :: (store 16 into %stack.2)
 +    $q1 = VMULfq killed $q11, killed $q8, 14 /* CC::al */, $noreg
 +    $r5 = VST1q32wb_fixed killed $r5, 16, $q0, 14 /* CC::al */, $noreg :: (store 16 into %ir.xxx..sroa_cast248, align 32)
 +    $q4 = VLD1q64 $r5, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.xxx..sroa_cast248 + 16, basealign 32)
 +    $q11 = VLD1q64 killed $r1, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.uuu..sroa_cast235 + 16, basealign 32)
 +    $q7 = VMULfq $q4, $q11, 14 /* CC::al */, $noreg
 +    $r6 = VST1q32wb_fixed killed $r6, 16, $q0, 14 /* CC::al */, $noreg :: (store 16 into %ir.vvv..sroa_cast230, align 32)
 +    $q3 = VLD1q64 $r6, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.vvv..sroa_cast230 + 16, basealign 32)
 +    $q8 = VMULfq $q7, $q3, 14 /* CC::al */, $noreg
 +    $q12 = VMULfq killed $q12, killed $q6, 14 /* CC::al */, $noreg
 +    $q15 = VMLSfq killed $q15, killed $q12, $q0, 14 /* CC::al */, $noreg
 +    $r4 = VST1q32wb_fixed killed $r4, 16, killed $q15, 14 /* CC::al */, $noreg :: (store 16 into %ir.ttt..sroa_cast226, align 32)
 +    $q12 = VLD1q64 $r4, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.ttt..sroa_cast226 + 16, basealign 32)
 +    $q8 = VMULfq killed $q8, $q12, 14 /* CC::al */, $noreg
 +    $q9 = VMULfq killed $q5, killed $q9, 14 /* CC::al */, $noreg
 +    $q10 = VMULfq killed $q10, killed $q13, 14 /* CC::al */, $noreg
 +    $q10 = VMLSfq killed $q10, killed $q9, $q0, 14 /* CC::al */, $noreg
 +    $r3 = VST1q32wb_fixed killed $r3, 16, killed $q10, 14 /* CC::al */, $noreg :: (store 16 into %ir.zzz..sroa_cast241, align 32)
 +    $q10 = VLD1q64 $r3, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.zzz..sroa_cast241 + 16, basealign 32)
 +    $q1 = VMLAfq killed $q1, $q10, killed $q8, 14 /* CC::al */, $noreg
 +    $s23 = VDIVS $s8, $s7, 14 /* CC::al */, $noreg, implicit-def $q5
 +    $s22 = VDIVS $s8, $s6, 14 /* CC::al */, $noreg, implicit killed $q5, implicit-def $q5
 +    $s21 = VDIVS $s8, $s5, 14 /* CC::al */, $noreg, implicit killed $q5, implicit-def $q5
 +    $s20 = VDIVS killed $s8, $s4, 14 /* CC::al */, $noreg, implicit killed $q1, implicit killed $q5, implicit-def $q5
 +    VST1q64 killed $r5, 16, $q5, 14 /* CC::al */, $noreg :: (store 16 into %ir.xxx..sroa_cast248 + 16, basealign 32)
 +    VST1q64 killed $r6, 16, $q5, 14 /* CC::al */, $noreg :: (store 16 into %ir.vvv..sroa_cast230 + 16, basealign 32)
 +    $q8 = VLDMQIA %stack.0, 14 /* CC::al */, $noreg :: (load 16 from %stack.0)
 +    $q9 = VLDMQIA %stack.1, 14 /* CC::al */, $noreg :: (load 16 from %stack.1)
 +    $q8 = VMULfq killed $q9, killed $q8, 14 /* CC::al */, $noreg
 +    $q14 = VMLSfq killed $q14, killed $q8, killed $q0, 14 /* CC::al */, $noreg
 +    $r7 = VST1q32wb_fixed killed $r7, 16, killed $q14, 14 /* CC::al */, $noreg :: (store 16 into %ir.yyy..sroa_cast244, align 32)
 +    $q8 = VLD1q64 $r7, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.yyy..sroa_cast244 + 16, basealign 32)
 +    $q9 = VLDMQIA %stack.3, 14 /* CC::al */, $noreg :: (load 16 from %stack.3)
 +    $q9 = VMULfq $q8, killed $q9, 14 /* CC::al */, $noreg
 +    $q7 = VMLSfq killed $q7, killed $q9, $q5, 14 /* CC::al */, $noreg
 +    VST1q64 killed $r7, 16, killed $q7, 14 /* CC::al */, $noreg :: (store 16 into %ir.yyy..sroa_cast244 + 16, basealign 32)
 +    $q9 = VLDMQIA %stack.2, 14 /* CC::al */, $noreg :: (load 16 from %stack.2)
 +    $q9 = VMULfq killed $q9, killed $q12, 14 /* CC::al */, $noreg
 +    $q4 = VMLSfq killed $q4, killed $q9, $q5, 14 /* CC::al */, $noreg
 +    VST1q64 killed $r4, 16, killed $q4, 14 /* CC::al */, $noreg :: (store 16 into %ir.ttt..sroa_cast226 + 16, basealign 32)
 +    $q8 = VMULfq killed $q8, $q3, 14 /* CC::al */, $noreg
 +    $q9 = VLD1q64 killed $r0, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.sss..sroa_cast223 + 16, basealign 32)
 +    $q12 = COPY $q9
 +    $q12 = VMLSfq killed $q12, killed $q8, $q5, 14 /* CC::al */, $noreg
 +    VST1q64 killed $r2, 16, killed $q12, 14 /* CC::al */, $noreg :: (store 16 into %ir.www..sroa_cast238 + 16, basealign 32)
 +    $q8 = VMULfq $q11, killed $q3, 14 /* CC::al */, $noreg
 +    $q9 = VMULfq killed $q10, killed $q9, 14 /* CC::al */, $noreg
 +    $q8 = VMLSfq killed $q8, killed $q9, $q5, 14 /* CC::al */, $noreg
 +    VST1q64 killed $r3, 16, killed $q8, 14 /* CC::al */, $noreg :: (store 16 into %ir.zzz..sroa_cast241 + 16, basealign 32)
 +    $q8 = VMULfq killed $q11, killed $q5, 14 /* CC::al */, $noreg
 +    VST1q64 killed $r12, 16, killed $q8, 14 /* CC::al */, $noreg :: (store 16 into %ir.ppp..sroa_cast248 + 16, basealign 32)
 +    tBX_RET 14 /* CC::al */, $noreg
 +
 +...
 diff --git a/llvm/test/CodeGen/Thumb2/mve-multivec-spill.ll b/llvm/test/CodeGen/Thumb2/mve-multivec-spill.ll
 index a24637870b31..8449b4a9989b 100644
 --- a/llvm/test/CodeGen/Thumb2/mve-multivec-spill.ll
 +++ b/llvm/test/CodeGen/Thumb2/mve-multivec-spill.ll
 @@ -35,18 +35,18 @@ define arm_aapcs_vfpcc void @spill_multivector(<4 x i32>* %p) {
  ; CHECK-NEXT:    vld21.32 {q4, q5}, [r0]
  ; CHECK-NEXT:    bl external_function
  ; CHECK-NEXT:    vldmia sp, {d2, d3, d4, d5} @ 32-byte Reload
 -; CHECK-NEXT:    add r0, sp, #32
 +; CHECK-NEXT:    add.w lr, sp, #32
  ; CHECK-NEXT:    vstrw.32 q2, [r4, #80]
  ; CHECK-NEXT:    vstrw.32 q5, [r4, #144]
  ; CHECK-NEXT:    vstrw.32 q4, [r4, #128]
  ; CHECK-NEXT:    vstrw.32 q7, [r4, #112]
  ; CHECK-NEXT:    vstrw.32 q1, [r4, #64]
 -; CHECK-NEXT:    vldmia r0, {d2, d3, d4, d5} @ 32-byte Reload
 -; CHECK-NEXT:    add r0, sp, #64
 +; CHECK-NEXT:    vldmia lr, {d2, d3, d4, d5} @ 32-byte Reload
 +; CHECK-NEXT:    add.w lr, sp, #64
  ; CHECK-NEXT:    vstrw.32 q2, [r4, #48]
  ; CHECK-NEXT:    vstrw.32 q6, [r4, #96]
  ; CHECK-NEXT:    vstrw.32 q1, [r5]
 -; CHECK-NEXT:    vldmia r0, {d2, d3, d4, d5} @ 32-byte Reload
 +; CHECK-NEXT:    vldmia lr, {d2, d3, d4, d5} @ 32-byte Reload
  ; CHECK-NEXT:    vstrw.32 q2, [r4, #16]
  ; CHECK-NEXT:    vstrw.32 q1, [r4]
  ; CHECK-NEXT:    add sp, #112
	commit b9ed8ebe0e2ffa803b0bda60f9bbc9bb26f95000
	Author: Tomas Matheson <tomas.matheson@arm.com>
	Date: Wed Jan 20 15:55:26 2021 +0000

	[ARM][RegisterScavenging] Don't consider LR liveout if it is not reloaded

	https://bugs.llvm.org/show_bug.cgi?id=48232

	When PrologEpilogInserter writes callee-saved registers to the stack, LR is not reloaded but is instead loaded directly into PC.
	This was not taken into account when determining if each callee-saved register was liveout for the block.
	When frame elimination inserts virtual registers, and the register scavenger tries to scavenge LR, it considers it liveout and tries to spill again.
	However there is no emergency spill slot to use, and it fails with an error:

	fatal error: error in backend: Error while trying to spill LR from class GPR: Cannot scavenge register without an emergency spill slot!

	This patch pervents any callee-saved registers which are not reloaded (including LR) from being marked liveout.
	They are therefore available to scavenge without requiring an extra spill.

	diff --git a/llvm/lib/CodeGen/LiveRegUnits.cpp b/llvm/lib/CodeGen/LiveRegUnits.cpp
	index ea2075bc139d..d8d8bd5d61a2 100644
	--- a/llvm/lib/CodeGen/LiveRegUnits.cpp
	+++ b/llvm/lib/CodeGen/LiveRegUnits.cpp
	@@ -81,8 +81,17 @@ static void addBlockLiveIns(LiveRegUnits &LiveUnits,
	static void addCalleeSavedRegs(LiveRegUnits &LiveUnits,
	const MachineFunction &MF) {
	const MachineRegisterInfo &MRI = MF.getRegInfo();
	- for (const MCPhysReg CSR = MRI.getCalleeSavedRegs(); CSR && CSR; ++CSR)
	- LiveUnits.addReg(*CSR);
	+ const MachineFrameInfo &MFI = MF.getFrameInfo();
	+ for (const MCPhysReg CSR = MRI.getCalleeSavedRegs(); CSR && CSR; ++CSR) {
	+ const unsigned N = *CSR;
	+
	+ const auto &CSI = MFI.getCalleeSavedInfo();
	+ auto Info =
	+ llvm::find_if(CSI, [N](auto Info) { return Info.getReg() == N; });
	+ // If we have no info for this callee-saved register, assume it is liveout
	+ if (Info == CSI.end() \|\| Info->isRestored())
	+ LiveUnits.addReg(N);
	+ }
	}

	void LiveRegUnits::addPristines(const MachineFunction &MF) {
	diff --git a/llvm/test/CodeGen/AArch64/scavenge-lr.mir b/llvm/test/CodeGen/AArch64/scavenge-lr.mir
	new file mode 100644
	index 000000000000..a2296c12eb60
	--- /dev/null
	+++ b/llvm/test/CodeGen/AArch64/scavenge-lr.mir
	@@ -0,0 +1,221 @@
	+# RUN: llc -mtriple=thumbv7-unknown-linux-android30 -run-pass=prologepilog -verify-machineinstrs %s -o - \| FileCheck %s
	+
	+# When saving and restoring callee-saved registers, LR is saved but not restored,
	+# because it is reloaded directly into PC. Therefore it should be available to scavenge
	+# without requiring an emergency spill slot.
	+
	+# Used to result in
	+# LLVM ERROR: Error while trying to spill LR from class GPR: Cannot scavenge register without an emergency spill slot!
	+
	+# Check that LR is considered live in
	+# CHECK: liveins: {{.*}}$lr
	+
	+# Check that LR is saved to the stack
	+# CHECK: frame-setup t2STMDB_UPD {{.*}} killed $lr
	+# CHECK: frame-setup CFI_INSTRUCTION offset $lr,
	+
	+# Check that LR was successfully scavenged somewhere in the function
	+# CHECK: $lr = t2ADDri
	+# CHECK: VSTMQIA $q11, killed $lr
	+
	+# Check that LR is not restored at the end of the function
	+# CHECK-NOT: $lr = frame-destroy
	+# CHECK-NOT: frame-destroy VLDMDIA_UPD {{.*}} def $lr
	+# CHECK-NOT: frame-destroy t2LDMIA_RET {{.*}} def $lr
	+# CHECK: frame-destroy t2LDMIA_RET {{.*}} def $pc
	+
	+--- \|
	+ target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
	+
	+ %S = type { [32 x i8] }
	+
	+ define void @f(%S* %arg) {
	+ entry:
	+ %ppp..sroa_idx = getelementptr inbounds %S, %S* %arg, i32 -8
	+ %ppp..sroa_cast248 = bitcast %S* %ppp..sroa_idx to <8 x float>*
	+ %ppp.copyload = load <8 x float>, <8 x float>* %ppp..sroa_cast248, align 32
	+
	+ %xxx..sroa_idx = getelementptr inbounds %S, %S* %arg, i32 -5
	+ %xxx..sroa_cast248 = bitcast %S* %xxx..sroa_idx to <8 x float>*
	+ %xxx.copyload = load <8 x float>, <8 x float>* %xxx..sroa_cast248, align 32
	+
	+ %yyy..sroa_idx = getelementptr inbounds %S, %S* %arg, i32 -2
	+ %yyy..sroa_cast244 = bitcast %S* %yyy..sroa_idx to <8 x float>*
	+ %yyy.copyload = load <8 x float>, <8 x float>* %yyy..sroa_cast244, align 32
	+
	+ %zzz..sroa_idx = getelementptr inbounds %S, %S* %arg, i32 -7
	+ %zzz..sroa_cast241 = bitcast %S* %zzz..sroa_idx to <8 x float>*
	+ %zzz.copyload = load <8 x float>, <8 x float>* %zzz..sroa_cast241, align 32
	+
	+ %www..sroa_idx = getelementptr inbounds %S, %S* %arg, i32 -4
	+ %www..sroa_cast238 = bitcast %S* %www..sroa_idx to <8 x float>*
	+ %www.copyload = load <8 x float>, <8 x float>* %www..sroa_cast238, align 32
	+
	+ %uuu..sroa_idx = getelementptr inbounds %S, %S* %arg, i32 1
	+ %uuu..sroa_cast235 = bitcast %S* %uuu..sroa_idx to <8 x float>*
	+ %uuu.copyload = load <8 x float>, <8 x float>* %uuu..sroa_cast235, align 32
	+
	+ %vvv..sroa_idx = getelementptr inbounds %S, %S* %arg, i32 -6
	+ %vvv..sroa_cast230 = bitcast %S* %vvv..sroa_idx to <8 x float>*
	+ %vvv.copyload = load <8 x float>, <8 x float>* %vvv..sroa_cast230, align 32
	+
	+ %ttt..sroa_idx = getelementptr inbounds %S, %S* %arg, i32 -3
	+ %ttt..sroa_cast226 = bitcast %S* %ttt..sroa_idx to <8 x float>*
	+ %ttt.copyload = load <8 x float>, <8 x float>* %ttt..sroa_cast226, align 32
	+
	+ %sss..sroa_cast223 = bitcast %S* %arg to <8 x float>*
	+ %sss.copyload = load <8 x float>, <8 x float>* %sss..sroa_cast223, align 32
	+
	+ %mul.i = fmul <8 x float> %ppp.copyload, %www.copyload
	+ %mul.i185 = fmul <8 x float> %xxx.copyload, %uuu.copyload
	+ %mul.i179 = fmul <8 x float> %mul.i185, %vvv.copyload
	+ %mul.i173 = fmul <8 x float> %mul.i179, %ttt.copyload
	+ %mul.i167 = fmul <8 x float> %zzz.copyload, %mul.i173
	+ %add.i = fadd <8 x float> %mul.i, %mul.i167
	+ %div.i = fdiv <8 x float> zeroinitializer, %add.i
	+ %mul.i153 = fmul <8 x float> %uuu.copyload, %div.i
	+
	+ store <8 x float> %mul.i153, <8 x float>* %ppp..sroa_cast248, align 32
	+
	+ %mul.i147 = fmul <8 x float> %uuu.copyload, %vvv.copyload
	+ %mul.i141 = fmul <8 x float> %zzz.copyload, %sss.copyload
	+ %mul.i135 = fmul <8 x float> %mul.i141, %div.i
	+ %sub.i129 = fsub <8 x float> %mul.i147, %mul.i135
	+
	+ store <8 x float> %sub.i129, <8 x float>* %zzz..sroa_cast241, align 32
	+ store <8 x float> %div.i, <8 x float>* %vvv..sroa_cast230, align 32
	+ store <8 x float> %div.i, <8 x float>* %xxx..sroa_cast248, align 32
	+
	+ %mul.i123 = fmul <8 x float> %yyy.copyload, %vvv.copyload
	+ %mul.i117 = fmul <8 x float> %mul.i123, %div.i
	+ %sub.i111 = fsub <8 x float> %sss.copyload, %mul.i117
	+ store <8 x float> %sub.i111, <8 x float>* %www..sroa_cast238, align 32
	+
	+ %mul.i105 = fmul <8 x float> %ppp.copyload, %ttt.copyload
	+ %mul.i99 = fmul <8 x float> %mul.i105, %div.i
	+ %sub.i93 = fsub <8 x float> %xxx.copyload, %mul.i99
	+ store <8 x float> %sub.i93, <8 x float>* %ttt..sroa_cast226, align 32
	+
	+ %mul.i81 = fmul <8 x float> %yyy.copyload, %www.copyload
	+ %mul.i75 = fmul <8 x float> %mul.i81, %div.i
	+ %sub.i = fsub <8 x float> %mul.i185, %mul.i75
	+ store <8 x float> %sub.i, <8 x float>* %yyy..sroa_cast244, align 32
	+
	+ ret void
	+ }
	+...
	+---
	+name: f
	+alignment: 2
	+tracksRegLiveness: true
	+liveins:
	+ - { reg: '$r0' }
	+frameInfo:
	+ maxAlignment: 16
	+ maxCallFrameSize: 0
	+stack:
	+ - { id: 0, type: spill-slot, size: 16, alignment: 16 }
	+ - { id: 1, type: spill-slot, size: 16, alignment: 16 }
	+ - { id: 2, type: spill-slot, size: 16, alignment: 16 }
	+ - { id: 3, type: spill-slot, size: 16, alignment: 16 }
	+constants:
	+ - id: 0
	+ value: 'float 0.000000e+00'
	+ alignment: 4
	+machineFunctionInfo: {}
	+body: \|
	+ bb.0.entry:
	+ liveins: $r0
	+ $r2 = t2SUBri $r0, 128, 14 /* CC::al */, $noreg, $noreg
	+ $q8 = VLD1q64 $r2, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.www..sroa_cast238, align 32)
	+ VSTMQIA $q8, %stack.0, 14 /* CC::al */, $noreg :: (store 16 into %stack.0)
	+ $r12 = t2SUBri $r0, 256, 14 /* CC::al */, $noreg, $noreg
	+ $q12 = VLD1q64 $r12, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.ppp..sroa_cast248, align 32)
	+ $q1 = VMULfq $q12, killed $q8, 14 /* CC::al */, $noreg
	+ $r3 = nuw t2ADDri $r0, 32, 14 /* CC::al */, $noreg, $noreg
	+ $q10 = VLD1q64 killed $r3, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.uuu..sroa_cast235, align 32)
	+ $r5 = t2SUBri $r0, 160, 14 /* CC::al */, $noreg, $noreg
	+ $q15 = VLD1q64 $r5, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.xxx..sroa_cast248, align 32)
	+ $q14 = VMULfq $q15, $q10, 14 /* CC::al */, $noreg
	+ $r6 = t2SUBri $r0, 192, 14 /* CC::al */, $noreg, $noreg
	+ $q13 = VLD1q64 $r6, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.vvv..sroa_cast230, align 32)
	+ $q8 = VMULfq $q14, $q13, 14 /* CC::al */, $noreg
	+ $r4 = t2SUBri $r0, 96, 14 /* CC::al */, $noreg, $noreg
	+ $q6 = VLD1q64 $r4, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.ttt..sroa_cast226, align 32)
	+ $q8 = VMULfq killed $q8, $q6, 14 /* CC::al */, $noreg
	+ $r3 = t2SUBri $r0, 224, 14 /* CC::al */, $noreg, $noreg
	+ $q5 = VLD1q64 $r3, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.zzz..sroa_cast241, align 32)
	+ $q1 = VMLAfq killed $q1, $q5, killed $q8, 14 /* CC::al */, $noreg
	+ $s8 = VLDRS %const.0, 0, 14 /* CC::al */, $noreg :: (load 4 from constant-pool)
	+ $s3 = VDIVS $s8, $s7, 14 /* CC::al */, $noreg, implicit-def $q0
	+ $s2 = VDIVS $s8, $s6, 14 /* CC::al */, $noreg, implicit killed $q0, implicit-def $q0
	+ $s1 = VDIVS $s8, $s5, 14 /* CC::al */, $noreg, implicit killed $q0, implicit-def $q0
	+ $s0 = VDIVS $s8, $s4, 14 /* CC::al */, $noreg, implicit killed $q1, implicit killed $q0, implicit-def $q0
	+ $r7 = t2SUBri $r0, 64, 14 /* CC::al */, $noreg, $noreg
	+ $q8 = VLD1q64 $r7, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.yyy..sroa_cast244, align 32)
	+ VSTMQIA $q8, %stack.1, 14 /* CC::al */, $noreg :: (store 16 into %stack.1)
	+ $q8 = VMULfq killed $q8, $q13, 14 /* CC::al */, $noreg
	+ $r1 = t2ADDri $r0, 48, 14 /* CC::al */, $noreg, $noreg
	+ $q9, $r0 = VLD1q32wb_fixed killed $r0, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.sss..sroa_cast223, align 32)
	+ $q11 = COPY $q9
	+ $q11 = VMLSfq killed $q11, killed $q8, $q0, 14 /* CC::al */, $noreg
	+ $r2 = VST1q32wb_fixed killed $r2, 16, killed $q11, 14 /* CC::al */, $noreg :: (store 16 into %ir.www..sroa_cast238, align 32)
	+ $q8 = VLD1q64 $r2, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.www..sroa_cast238 + 16, basealign 32)
	+ VSTMQIA $q8, %stack.3, 14 /* CC::al */, $noreg :: (store 16 into %stack.3)
	+ $q11 = VMULfq $q10, $q0, 14 /* CC::al */, $noreg
	+ $r12 = VST1q32wb_fixed killed $r12, 16, killed $q11, 14 /* CC::al */, $noreg :: (store 16 into %ir.ppp..sroa_cast248, align 32)
	+ $q11 = VLD1q64 $r12, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.ppp..sroa_cast248 + 16, basealign 32)
	+ VSTMQIA $q11, %stack.2, 14 /* CC::al */, $noreg :: (store 16 into %stack.2)
	+ $q1 = VMULfq killed $q11, killed $q8, 14 /* CC::al */, $noreg
	+ $r5 = VST1q32wb_fixed killed $r5, 16, $q0, 14 /* CC::al */, $noreg :: (store 16 into %ir.xxx..sroa_cast248, align 32)
	+ $q4 = VLD1q64 $r5, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.xxx..sroa_cast248 + 16, basealign 32)
	+ $q11 = VLD1q64 killed $r1, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.uuu..sroa_cast235 + 16, basealign 32)
	+ $q7 = VMULfq $q4, $q11, 14 /* CC::al */, $noreg
	+ $r6 = VST1q32wb_fixed killed $r6, 16, $q0, 14 /* CC::al */, $noreg :: (store 16 into %ir.vvv..sroa_cast230, align 32)
	+ $q3 = VLD1q64 $r6, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.vvv..sroa_cast230 + 16, basealign 32)
	+ $q8 = VMULfq $q7, $q3, 14 /* CC::al */, $noreg
	+ $q12 = VMULfq killed $q12, killed $q6, 14 /* CC::al */, $noreg
	+ $q15 = VMLSfq killed $q15, killed $q12, $q0, 14 /* CC::al */, $noreg
	+ $r4 = VST1q32wb_fixed killed $r4, 16, killed $q15, 14 /* CC::al */, $noreg :: (store 16 into %ir.ttt..sroa_cast226, align 32)
	+ $q12 = VLD1q64 $r4, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.ttt..sroa_cast226 + 16, basealign 32)
	+ $q8 = VMULfq killed $q8, $q12, 14 /* CC::al */, $noreg
	+ $q9 = VMULfq killed $q5, killed $q9, 14 /* CC::al */, $noreg
	+ $q10 = VMULfq killed $q10, killed $q13, 14 /* CC::al */, $noreg
	+ $q10 = VMLSfq killed $q10, killed $q9, $q0, 14 /* CC::al */, $noreg
	+ $r3 = VST1q32wb_fixed killed $r3, 16, killed $q10, 14 /* CC::al */, $noreg :: (store 16 into %ir.zzz..sroa_cast241, align 32)
	+ $q10 = VLD1q64 $r3, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.zzz..sroa_cast241 + 16, basealign 32)
	+ $q1 = VMLAfq killed $q1, $q10, killed $q8, 14 /* CC::al */, $noreg
	+ $s23 = VDIVS $s8, $s7, 14 /* CC::al */, $noreg, implicit-def $q5
	+ $s22 = VDIVS $s8, $s6, 14 /* CC::al */, $noreg, implicit killed $q5, implicit-def $q5
	+ $s21 = VDIVS $s8, $s5, 14 /* CC::al */, $noreg, implicit killed $q5, implicit-def $q5
	+ $s20 = VDIVS killed $s8, $s4, 14 /* CC::al */, $noreg, implicit killed $q1, implicit killed $q5, implicit-def $q5
	+ VST1q64 killed $r5, 16, $q5, 14 /* CC::al */, $noreg :: (store 16 into %ir.xxx..sroa_cast248 + 16, basealign 32)
	+ VST1q64 killed $r6, 16, $q5, 14 /* CC::al */, $noreg :: (store 16 into %ir.vvv..sroa_cast230 + 16, basealign 32)
	+ $q8 = VLDMQIA %stack.0, 14 /* CC::al */, $noreg :: (load 16 from %stack.0)
	+ $q9 = VLDMQIA %stack.1, 14 /* CC::al */, $noreg :: (load 16 from %stack.1)
	+ $q8 = VMULfq killed $q9, killed $q8, 14 /* CC::al */, $noreg
	+ $q14 = VMLSfq killed $q14, killed $q8, killed $q0, 14 /* CC::al */, $noreg
	+ $r7 = VST1q32wb_fixed killed $r7, 16, killed $q14, 14 /* CC::al */, $noreg :: (store 16 into %ir.yyy..sroa_cast244, align 32)
	+ $q8 = VLD1q64 $r7, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.yyy..sroa_cast244 + 16, basealign 32)
	+ $q9 = VLDMQIA %stack.3, 14 /* CC::al */, $noreg :: (load 16 from %stack.3)
	+ $q9 = VMULfq $q8, killed $q9, 14 /* CC::al */, $noreg
	+ $q7 = VMLSfq killed $q7, killed $q9, $q5, 14 /* CC::al */, $noreg
	+ VST1q64 killed $r7, 16, killed $q7, 14 /* CC::al */, $noreg :: (store 16 into %ir.yyy..sroa_cast244 + 16, basealign 32)
	+ $q9 = VLDMQIA %stack.2, 14 /* CC::al */, $noreg :: (load 16 from %stack.2)
	+ $q9 = VMULfq killed $q9, killed $q12, 14 /* CC::al */, $noreg
	+ $q4 = VMLSfq killed $q4, killed $q9, $q5, 14 /* CC::al */, $noreg
	+ VST1q64 killed $r4, 16, killed $q4, 14 /* CC::al */, $noreg :: (store 16 into %ir.ttt..sroa_cast226 + 16, basealign 32)
	+ $q8 = VMULfq killed $q8, $q3, 14 /* CC::al */, $noreg
	+ $q9 = VLD1q64 killed $r0, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.sss..sroa_cast223 + 16, basealign 32)
	+ $q12 = COPY $q9
	+ $q12 = VMLSfq killed $q12, killed $q8, $q5, 14 /* CC::al */, $noreg
	+ VST1q64 killed $r2, 16, killed $q12, 14 /* CC::al */, $noreg :: (store 16 into %ir.www..sroa_cast238 + 16, basealign 32)
	+ $q8 = VMULfq $q11, killed $q3, 14 /* CC::al */, $noreg
	+ $q9 = VMULfq killed $q10, killed $q9, 14 /* CC::al */, $noreg
	+ $q8 = VMLSfq killed $q8, killed $q9, $q5, 14 /* CC::al */, $noreg
	+ VST1q64 killed $r3, 16, killed $q8, 14 /* CC::al */, $noreg :: (store 16 into %ir.zzz..sroa_cast241 + 16, basealign 32)
	+ $q8 = VMULfq killed $q11, killed $q5, 14 /* CC::al */, $noreg
	+ VST1q64 killed $r12, 16, killed $q8, 14 /* CC::al */, $noreg :: (store 16 into %ir.ppp..sroa_cast248 + 16, basealign 32)
	+ tBX_RET 14 /* CC::al */, $noreg
	+
	+...
	diff --git a/llvm/test/CodeGen/Thumb2/mve-multivec-spill.ll b/llvm/test/CodeGen/Thumb2/mve-multivec-spill.ll
	index a24637870b31..8449b4a9989b 100644
	--- a/llvm/test/CodeGen/Thumb2/mve-multivec-spill.ll
	+++ b/llvm/test/CodeGen/Thumb2/mve-multivec-spill.ll
	@@ -35,18 +35,18 @@ define arm_aapcs_vfpcc void @spill_multivector(<4 x i32>* %p) {
	; CHECK-NEXT: vld21.32 {q4, q5}, [r0]
	; CHECK-NEXT: bl external_function
	; CHECK-NEXT: vldmia sp, {d2, d3, d4, d5} @ 32-byte Reload
	-; CHECK-NEXT: add r0, sp, #32
	+; CHECK-NEXT: add.w lr, sp, #32
	; CHECK-NEXT: vstrw.32 q2, [r4, #80]
	; CHECK-NEXT: vstrw.32 q5, [r4, #144]
	; CHECK-NEXT: vstrw.32 q4, [r4, #128]
	; CHECK-NEXT: vstrw.32 q7, [r4, #112]
	; CHECK-NEXT: vstrw.32 q1, [r4, #64]
	-; CHECK-NEXT: vldmia r0, {d2, d3, d4, d5} @ 32-byte Reload
	-; CHECK-NEXT: add r0, sp, #64
	+; CHECK-NEXT: vldmia lr, {d2, d3, d4, d5} @ 32-byte Reload
	+; CHECK-NEXT: add.w lr, sp, #64
	; CHECK-NEXT: vstrw.32 q2, [r4, #48]
	; CHECK-NEXT: vstrw.32 q6, [r4, #96]
	; CHECK-NEXT: vstrw.32 q1, [r5]
	-; CHECK-NEXT: vldmia r0, {d2, d3, d4, d5} @ 32-byte Reload
	+; CHECK-NEXT: vldmia lr, {d2, d3, d4, d5} @ 32-byte Reload
	; CHECK-NEXT: vstrw.32 q2, [r4, #16]
	; CHECK-NEXT: vstrw.32 q1, [r4]
	; CHECK-NEXT: add sp, #112