blob: b721c1195ea1bab837594961880063c90b32c313 [file] [log] [blame]
commit a21387c65470417c58021f8d3194a4510bb64f46
Author: Hans Wennborg <hans@chromium.org>
Date: Tue Sep 15 10:47:02 2020 +0200
Revert "RegAllocFast: Record internal state based on register units"
This seems to have caused incorrect register allocation in some cases,
breaking tests in the Zig standard library (PR47278).
As discussed on the bug, revert back to green for now.
> Record internal state based on register units. This is often more
> efficient as there are typically fewer register units to update
> compared to iterating over all the aliases of a register.
>
> Original patch by Matthias Braun, but I've been rebasing and fixing it
> for almost 2 years and fixed a few bugs causing intermediate failures
> to make this patch independent of the changes in
> https://reviews.llvm.org/D52010.
This reverts commit 66251f7e1de79a7c1620659b7f58352b8c8e892e, and
follow-ups 931a68f26b9a3de853807ffad7b2cd0a2dd30922
and 0671a4c5087d40450603d9d26cf239f1a8b1367e. It also adjust some
test expectations.
diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp
index e0742c4508e..d93fd8f601c 100644
--- a/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -106,8 +106,13 @@ namespace {
/// that it is alive across blocks.
BitVector MayLiveAcrossBlocks;
- /// State of a register unit.
- enum RegUnitState {
+ /// State of a physical register.
+ enum RegState {
+ /// A disabled register is not available for allocation, but an alias may
+ /// be in use. A register can only be moved out of the disabled state if
+ /// all aliases are disabled.
+ regDisabled,
+
/// A free register is not currently in use and can be allocated
/// immediately without checking aliases.
regFree,
@@ -121,8 +126,8 @@ namespace {
/// register. In that case, LiveVirtRegs contains the inverse mapping.
};
- /// Maps each physical register to a RegUnitState enum or virtual register.
- std::vector<unsigned> RegUnitStates;
+ /// Maps each physical register to a RegState enum or a virtual register.
+ std::vector<unsigned> PhysRegState;
SmallVector<Register, 16> VirtDead;
SmallVector<MachineInstr *, 32> Coalesced;
@@ -184,10 +189,6 @@ namespace {
bool isLastUseOfLocalReg(const MachineOperand &MO) const;
void addKillFlag(const LiveReg &LRI);
-#ifndef NDEBUG
- bool verifyRegStateMapping(const LiveReg &LR) const;
-#endif
-
void killVirtReg(LiveReg &LR);
void killVirtReg(Register VirtReg);
void spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR);
@@ -195,7 +196,7 @@ namespace {
void usePhysReg(MachineOperand &MO);
void definePhysReg(MachineBasicBlock::iterator MI, MCPhysReg PhysReg,
- unsigned NewState);
+ RegState NewState);
unsigned calcSpillCost(MCPhysReg PhysReg) const;
void assignVirtToPhysReg(LiveReg &, MCPhysReg PhysReg);
@@ -228,7 +229,7 @@ namespace {
bool mayLiveOut(Register VirtReg);
bool mayLiveIn(Register VirtReg);
- void dumpState() const;
+ void dumpState();
};
} // end anonymous namespace
@@ -239,8 +240,7 @@ INITIALIZE_PASS(RegAllocFast, "regallocfast", "Fast Register Allocator", false,
false)
void RegAllocFast::setPhysRegState(MCPhysReg PhysReg, unsigned NewState) {
- for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI)
- RegUnitStates[*UI] = NewState;
+ PhysRegState[PhysReg] = NewState;
}
/// This allocates space for the specified virtual register to be held on the
@@ -384,23 +384,12 @@ void RegAllocFast::addKillFlag(const LiveReg &LR) {
}
}
-#ifndef NDEBUG
-bool RegAllocFast::verifyRegStateMapping(const LiveReg &LR) const {
- for (MCRegUnitIterator UI(LR.PhysReg, TRI); UI.isValid(); ++UI) {
- if (RegUnitStates[*UI] != LR.VirtReg)
- return false;
- }
-
- return true;
-}
-#endif
-
/// Mark virtreg as no longer available.
void RegAllocFast::killVirtReg(LiveReg &LR) {
- assert(verifyRegStateMapping(LR) && "Broken RegState mapping");
addKillFlag(LR);
- MCPhysReg PhysReg = LR.PhysReg;
- setPhysRegState(PhysReg, regFree);
+ assert(PhysRegState[LR.PhysReg] == LR.VirtReg &&
+ "Broken RegState mapping");
+ setPhysRegState(LR.PhysReg, regFree);
LR.PhysReg = 0;
}
@@ -427,9 +416,7 @@ void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI,
/// Do the actual work of spilling.
void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR) {
- assert(verifyRegStateMapping(LR) && "Broken RegState mapping");
-
- MCPhysReg PhysReg = LR.PhysReg;
+ assert(PhysRegState[LR.PhysReg] == LR.VirtReg && "Broken RegState mapping");
if (LR.Dirty) {
// If this physreg is used by the instruction, we want to kill it on the
@@ -437,7 +424,7 @@ void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR) {
bool SpillKill = MachineBasicBlock::iterator(LR.LastUse) != MI;
LR.Dirty = false;
- spill(MI, LR.VirtReg, PhysReg, SpillKill);
+ spill(MI, LR.VirtReg, LR.PhysReg, SpillKill);
if (SpillKill)
LR.LastUse = nullptr; // Don't kill register again
@@ -473,16 +460,53 @@ void RegAllocFast::usePhysReg(MachineOperand &MO) {
assert(PhysReg.isPhysical() && "Bad usePhysReg operand");
markRegUsedInInstr(PhysReg);
+ switch (PhysRegState[PhysReg]) {
+ case regDisabled:
+ break;
+ case regReserved:
+ PhysRegState[PhysReg] = regFree;
+ LLVM_FALLTHROUGH;
+ case regFree:
+ MO.setIsKill();
+ return;
+ default:
+ // The physreg was allocated to a virtual register. That means the value we
+ // wanted has been clobbered.
+ llvm_unreachable("Instruction uses an allocated register");
+ }
- for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) {
- switch (RegUnitStates[*UI]) {
+ // Maybe a superregister is reserved?
+ for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) {
+ MCPhysReg Alias = *AI;
+ switch (PhysRegState[Alias]) {
+ case regDisabled:
+ break;
case regReserved:
- RegUnitStates[*UI] = regFree;
+ // Either PhysReg is a subregister of Alias and we mark the
+ // whole register as free, or PhysReg is the superregister of
+ // Alias and we mark all the aliases as disabled before freeing
+ // PhysReg.
+ // In the latter case, since PhysReg was disabled, this means that
+ // its value is defined only by physical sub-registers. This check
+ // is performed by the assert of the default case in this loop.
+ // Note: The value of the superregister may only be partial
+ // defined, that is why regDisabled is a valid state for aliases.
+ assert((TRI->isSuperRegister(PhysReg, Alias) ||
+ TRI->isSuperRegister(Alias, PhysReg)) &&
+ "Instruction is not using a subregister of a reserved register");
LLVM_FALLTHROUGH;
case regFree:
+ if (TRI->isSuperRegister(PhysReg, Alias)) {
+ // Leave the superregister in the working set.
+ setPhysRegState(Alias, regFree);
+ MO.getParent()->addRegisterKilled(Alias, TRI, true);
+ return;
+ }
+ // Some other alias was in the working set - clear it.
+ setPhysRegState(Alias, regDisabled);
break;
default:
- llvm_unreachable("Unexpected reg unit state");
+ llvm_unreachable("Instruction uses an alias of an allocated register");
}
}
@@ -495,20 +519,38 @@ void RegAllocFast::usePhysReg(MachineOperand &MO) {
/// similar to defineVirtReg except the physreg is reserved instead of
/// allocated.
void RegAllocFast::definePhysReg(MachineBasicBlock::iterator MI,
- MCPhysReg PhysReg, unsigned NewState) {
- for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) {
- switch (unsigned VirtReg = RegUnitStates[*UI]) {
+ MCPhysReg PhysReg, RegState NewState) {
+ markRegUsedInInstr(PhysReg);
+ switch (Register VirtReg = PhysRegState[PhysReg]) {
+ case regDisabled:
+ break;
+ default:
+ spillVirtReg(MI, VirtReg);
+ LLVM_FALLTHROUGH;
+ case regFree:
+ case regReserved:
+ setPhysRegState(PhysReg, NewState);
+ return;
+ }
+
+ // This is a disabled register, disable all aliases.
+ setPhysRegState(PhysReg, NewState);
+ for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) {
+ MCPhysReg Alias = *AI;
+ switch (Register VirtReg = PhysRegState[Alias]) {
+ case regDisabled:
+ break;
default:
spillVirtReg(MI, VirtReg);
- break;
+ LLVM_FALLTHROUGH;
case regFree:
case regReserved:
+ setPhysRegState(Alias, regDisabled);
+ if (TRI->isSuperRegister(PhysReg, Alias))
+ return;
break;
}
}
-
- markRegUsedInInstr(PhysReg);
- setPhysRegState(PhysReg, NewState);
}
/// Return the cost of spilling clearing out PhysReg and aliases so it is free
@@ -521,24 +563,46 @@ unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const {
<< " is already used in instr.\n");
return spillImpossible;
}
+ switch (Register VirtReg = PhysRegState[PhysReg]) {
+ case regDisabled:
+ break;
+ case regFree:
+ return 0;
+ case regReserved:
+ LLVM_DEBUG(dbgs() << printReg(VirtReg, TRI) << " corresponding "
+ << printReg(PhysReg, TRI) << " is reserved already.\n");
+ return spillImpossible;
+ default: {
+ LiveRegMap::const_iterator LRI = findLiveVirtReg(VirtReg);
+ assert(LRI != LiveVirtRegs.end() && LRI->PhysReg &&
+ "Missing VirtReg entry");
+ return LRI->Dirty ? spillDirty : spillClean;
+ }
+ }
- for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) {
- switch (unsigned VirtReg = RegUnitStates[*UI]) {
+ // This is a disabled register, add up cost of aliases.
+ LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << " is disabled.\n");
+ unsigned Cost = 0;
+ for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) {
+ MCPhysReg Alias = *AI;
+ switch (Register VirtReg = PhysRegState[Alias]) {
+ case regDisabled:
+ break;
case regFree:
+ ++Cost;
break;
case regReserved:
- LLVM_DEBUG(dbgs() << printReg(VirtReg, TRI) << " corresponding "
- << printReg(PhysReg, TRI) << " is reserved already.\n");
return spillImpossible;
default: {
LiveRegMap::const_iterator LRI = findLiveVirtReg(VirtReg);
assert(LRI != LiveVirtRegs.end() && LRI->PhysReg &&
"Missing VirtReg entry");
- return LRI->Dirty ? spillDirty : spillClean;
+ Cost += LRI->Dirty ? spillDirty : spillClean;
+ break;
}
}
}
- return 0;
+ return Cost;
}
/// This method updates local state so that we know that PhysReg is the
@@ -845,17 +909,9 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI,
if (!Reg || !Reg.isPhysical())
continue;
markRegUsedInInstr(Reg);
-
- for (MCRegUnitIterator UI(Reg, TRI); UI.isValid(); ++UI) {
- if (!ThroughRegs.count(RegUnitStates[*UI]))
- continue;
-
- // Need to spill any aliasing registers.
- for (MCRegUnitRootIterator RI(*UI, TRI); RI.isValid(); ++RI) {
- for (MCSuperRegIterator SI(*RI, TRI, true); SI.isValid(); ++SI) {
- definePhysReg(MI, *SI, regFree);
- }
- }
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
+ if (ThroughRegs.count(PhysRegState[*AI]))
+ definePhysReg(MI, *AI, regFree);
}
}
@@ -919,40 +975,37 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI,
}
#ifndef NDEBUG
-
-void RegAllocFast::dumpState() const {
- for (unsigned Unit = 1, UnitE = TRI->getNumRegUnits(); Unit != UnitE;
- ++Unit) {
- switch (unsigned VirtReg = RegUnitStates[Unit]) {
+void RegAllocFast::dumpState() {
+ for (unsigned Reg = 1, E = TRI->getNumRegs(); Reg != E; ++Reg) {
+ if (PhysRegState[Reg] == regDisabled) continue;
+ dbgs() << " " << printReg(Reg, TRI);
+ switch(PhysRegState[Reg]) {
case regFree:
break;
case regReserved:
- dbgs() << " " << printRegUnit(Unit, TRI) << "[P]";
+ dbgs() << "*";
break;
default: {
- dbgs() << ' ' << printRegUnit(Unit, TRI) << '=' << printReg(VirtReg);
- LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg);
- assert(I != LiveVirtRegs.end() && "have LiveVirtRegs entry");
- if (I->Dirty)
- dbgs() << "[D]";
- assert(TRI->hasRegUnit(I->PhysReg, Unit) && "inverse mapping present");
+ dbgs() << '=' << printReg(PhysRegState[Reg]);
+ LiveRegMap::iterator LRI = findLiveVirtReg(PhysRegState[Reg]);
+ assert(LRI != LiveVirtRegs.end() && LRI->PhysReg &&
+ "Missing VirtReg entry");
+ if (LRI->Dirty)
+ dbgs() << "*";
+ assert(LRI->PhysReg == Reg && "Bad inverse map");
break;
}
}
}
dbgs() << '\n';
// Check that LiveVirtRegs is the inverse.
- for (const LiveReg &LR : LiveVirtRegs) {
- Register VirtReg = LR.VirtReg;
- assert(VirtReg.isVirtual() && "Bad map key");
- MCPhysReg PhysReg = LR.PhysReg;
- if (PhysReg != 0) {
- assert(Register::isPhysicalRegister(PhysReg) &&
- "mapped to physreg");
- for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) {
- assert(RegUnitStates[*UI] == VirtReg && "inverse map valid");
- }
- }
+ for (LiveRegMap::iterator i = LiveVirtRegs.begin(),
+ e = LiveVirtRegs.end(); i != e; ++i) {
+ if (!i->PhysReg)
+ continue;
+ assert(i->VirtReg.isVirtual() && "Bad map key");
+ assert(Register::isPhysicalRegister(i->PhysReg) && "Bad map value");
+ assert(PhysRegState[i->PhysReg] == i->VirtReg && "Bad inverse map");
}
}
#endif
@@ -1194,7 +1247,7 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
this->MBB = &MBB;
LLVM_DEBUG(dbgs() << "\nAllocating " << MBB);
- RegUnitStates.assign(TRI->getNumRegUnits(), regFree);
+ PhysRegState.assign(TRI->getNumRegs(), regDisabled);
assert(LiveVirtRegs.empty() && "Mapping not cleared from last block?");
MachineBasicBlock::iterator MII = MBB.begin();
diff --git a/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll b/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll
index 7c546936ba2..392af063eb8 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll
@@ -4,8 +4,8 @@
define i32 @fptosi_wh(half %a) nounwind ssp {
entry:
; CHECK-LABEL: fptosi_wh
-; CHECK: fcvt s0, h0
-; CHECK: fcvtzs [[REG:w[0-9]+]], s0
+; CHECK: fcvt s1, h0
+; CHECK: fcvtzs [[REG:w[0-9]+]], s1
; CHECK: mov w0, [[REG]]
%conv = fptosi half %a to i32
ret i32 %conv
@@ -15,8 +15,8 @@ entry:
define i32 @fptoui_swh(half %a) nounwind ssp {
entry:
; CHECK-LABEL: fptoui_swh
-; CHECK: fcvt s0, h0
-; CHECK: fcvtzu [[REG:w[0-9]+]], s0
+; CHECK: fcvt s1, h0
+; CHECK: fcvtzu [[REG:w[0-9]+]], s1
; CHECK: mov w0, [[REG]]
%conv = fptoui half %a to i32
ret i32 %conv
diff --git a/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll b/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll
index d8abf14c136..ed03aec07e7 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll
@@ -54,8 +54,8 @@ entry:
; CHECK: ldrh w8, [sp, #12]
; CHECK: str w8, [sp, #8]
; CHECK: ldr w8, [sp, #8]
-; CHECK: ; kill: def $x8 killed $w8
-; CHECK: str x8, [sp]
+; CHECK: mov x9, x8
+; CHECK: str x9, [sp]
; CHECK: ldr x0, [sp]
; CHECK: ret
%a.addr = alloca i8, align 1
@@ -109,8 +109,8 @@ entry:
; CHECK: strh w8, [sp, #12]
; CHECK: ldrsh w8, [sp, #12]
; CHECK: str w8, [sp, #8]
-; CHECK: ldrsw x8, [sp, #8]
-; CHECK: str x8, [sp]
+; CHECK: ldrsw x9, [sp, #8]
+; CHECK: str x9, [sp]
; CHECK: ldr x0, [sp]
; CHECK: ret
%a.addr = alloca i8, align 1
diff --git a/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll b/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll
index e1e889b906c..6b3e8d747d4 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll
@@ -285,11 +285,11 @@ define i16 @to_half(float %in) {
; FAST: // %bb.0:
; FAST-NEXT: sub sp, sp, #16 // =16
; FAST-NEXT: .cfi_def_cfa_offset 16
-; FAST-NEXT: fcvt h0, s0
+; FAST-NEXT: fcvt h1, s0
; FAST-NEXT: // implicit-def: $w0
-; FAST-NEXT: fmov s1, w0
-; FAST-NEXT: mov.16b v1, v0
-; FAST-NEXT: fmov w8, s1
+; FAST-NEXT: fmov s0, w0
+; FAST-NEXT: mov.16b v0, v1
+; FAST-NEXT: fmov w8, s0
; FAST-NEXT: mov w0, w8
; FAST-NEXT: str w0, [sp, #12] // 4-byte Folded Spill
; FAST-NEXT: mov w0, w8
diff --git a/llvm/test/CodeGen/AArch64/fast-isel-sp-adjust.ll b/llvm/test/CodeGen/AArch64/fast-isel-sp-adjust.ll
index 22e3ccf2b12..8d62fb35566 100644
--- a/llvm/test/CodeGen/AArch64/fast-isel-sp-adjust.ll
+++ b/llvm/test/CodeGen/AArch64/fast-isel-sp-adjust.ll
@@ -15,7 +15,8 @@
; CHECK-LABEL: foo:
; CHECK: sub
; CHECK-DAG: mov x[[SP:[0-9]+]], sp
-; CHECK-DAG: mov w[[OFFSET:[0-9]+]], #4104
+; CHECK-DAG: mov [[TMP:w[0-9]+]], #4104
+; CHECK: mov w[[OFFSET:[0-9]+]], [[TMP]]
; CHECK: strb w0, [x[[SP]], x[[OFFSET]]]
define void @foo(i8 %in) {
diff --git a/llvm/test/CodeGen/AArch64/popcount.ll b/llvm/test/CodeGen/AArch64/popcount.ll
index 105969717e4..1e796fff710 100644
--- a/llvm/test/CodeGen/AArch64/popcount.ll
+++ b/llvm/test/CodeGen/AArch64/popcount.ll
@@ -10,11 +10,12 @@ define i8 @popcount128(i128* nocapture nonnull readonly %0) {
; CHECK-NEXT: // implicit-def: $q1
; CHECK-NEXT: mov v1.16b, v0.16b
; CHECK-NEXT: mov v1.d[1], x8
-; CHECK-NEXT: cnt v0.16b, v1.16b
-; CHECK-NEXT: uaddlv h0, v0.16b
+; CHECK-NEXT: cnt v1.16b, v1.16b
+; CHECK-NEXT: uaddlv h2, v1.16b
; CHECK-NEXT: // implicit-def: $q1
-; CHECK-NEXT: mov v1.16b, v0.16b
-; CHECK-NEXT: fmov w0, s1
+; CHECK-NEXT: mov v1.16b, v2.16b
+; CHECK-NEXT: fmov w1, s1
+; CHECK-NEXT: mov w0, w1
; CHECK-NEXT: ret
Entry:
%1 = load i128, i128* %0, align 16
@@ -36,21 +37,21 @@ define i16 @popcount256(i256* nocapture nonnull readonly %0) {
; CHECK-NEXT: // implicit-def: $q1
; CHECK-NEXT: mov v1.16b, v0.16b
; CHECK-NEXT: mov v1.d[1], x9
-; CHECK-NEXT: cnt v0.16b, v1.16b
-; CHECK-NEXT: uaddlv h0, v0.16b
+; CHECK-NEXT: cnt v1.16b, v1.16b
+; CHECK-NEXT: uaddlv h2, v1.16b
; CHECK-NEXT: // implicit-def: $q1
-; CHECK-NEXT: mov v1.16b, v0.16b
-; CHECK-NEXT: fmov w9, s1
+; CHECK-NEXT: mov v1.16b, v2.16b
+; CHECK-NEXT: fmov w10, s1
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: // implicit-def: $q1
; CHECK-NEXT: mov v1.16b, v0.16b
; CHECK-NEXT: mov v1.d[1], x8
-; CHECK-NEXT: cnt v0.16b, v1.16b
-; CHECK-NEXT: uaddlv h0, v0.16b
+; CHECK-NEXT: cnt v1.16b, v1.16b
+; CHECK-NEXT: uaddlv h2, v1.16b
; CHECK-NEXT: // implicit-def: $q1
-; CHECK-NEXT: mov v1.16b, v0.16b
-; CHECK-NEXT: fmov w8, s1
-; CHECK-NEXT: add w0, w8, w9
+; CHECK-NEXT: mov v1.16b, v2.16b
+; CHECK-NEXT: fmov w11, s1
+; CHECK-NEXT: add w0, w11, w10
; CHECK-NEXT: ret
Entry:
%1 = load i256, i256* %0, align 16
@@ -69,11 +70,11 @@ define <1 x i128> @popcount1x128(<1 x i128> %0) {
; CHECK-NEXT: fmov d0, x0
; CHECK-NEXT: mov v0.d[1], x1
; CHECK-NEXT: cnt v0.16b, v0.16b
-; CHECK-NEXT: uaddlv h0, v0.16b
-; CHECK-NEXT: // implicit-def: $q1
-; CHECK-NEXT: mov v1.16b, v0.16b
-; CHECK-NEXT: fmov w0, s1
-; CHECK-NEXT: // kill: def $x0 killed $w0
+; CHECK-NEXT: uaddlv h1, v0.16b
+; CHECK-NEXT: // implicit-def: $q0
+; CHECK-NEXT: mov v0.16b, v1.16b
+; CHECK-NEXT: fmov w2, s0
+; CHECK-NEXT: mov w0, w2
; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: mov x1, v0.d[1]
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll
index 3d3b511ab34..8999cd91169 100644
--- a/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll
+++ b/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll
@@ -69,15 +69,15 @@ define amdgpu_kernel void @extract_w_offset_vgpr(i32 addrspace(1)* %out) {
; GCN: renamable $vgpr30 = COPY killed renamable $vgpr14
; GCN: renamable $vgpr31 = COPY killed renamable $vgpr15
; GCN: renamable $vgpr32 = COPY killed renamable $vgpr16
- ; GCN: renamable $sgpr0_sgpr1 = S_MOV_B64 $exec
+ ; GCN: renamable $sgpr20_sgpr21 = S_MOV_B64 $exec
; GCN: renamable $vgpr1 = IMPLICIT_DEF
- ; GCN: renamable $sgpr2_sgpr3 = IMPLICIT_DEF
+ ; GCN: renamable $sgpr22_sgpr23 = IMPLICIT_DEF
; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
; GCN: SI_SPILL_S128_SAVE killed $sgpr4_sgpr5_sgpr6_sgpr7, %stack.1, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 16 into %stack.1, align 4, addrspace 5)
; GCN: SI_SPILL_V512_SAVE killed $vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32, %stack.2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 64 into %stack.2, align 4, addrspace 5)
- ; GCN: SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.3, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.3, align 4, addrspace 5)
+ ; GCN: SI_SPILL_S64_SAVE killed $sgpr20_sgpr21, %stack.3, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.3, align 4, addrspace 5)
; GCN: SI_SPILL_V32_SAVE killed $vgpr1, %stack.4, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.4, addrspace 5)
- ; GCN: SI_SPILL_S64_SAVE killed $sgpr2_sgpr3, %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.5, align 4, addrspace 5)
+ ; GCN: SI_SPILL_S64_SAVE killed $sgpr22_sgpr23, %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.5, align 4, addrspace 5)
; GCN: bb.1:
; GCN: successors: %bb.1(0x40000000), %bb.3(0x40000000)
; GCN: $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (load 8 from %stack.5, align 4, addrspace 5)
@@ -91,8 +91,8 @@ define amdgpu_kernel void @extract_w_offset_vgpr(i32 addrspace(1)* %out) {
; GCN: renamable $vgpr18 = V_MOV_B32_e32 $vgpr3, implicit $exec, implicit killed $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, implicit $m0
; GCN: S_SET_GPR_IDX_OFF implicit-def $mode, implicit $mode
; GCN: renamable $vgpr19 = COPY renamable $vgpr18
- ; GCN: renamable $sgpr2_sgpr3 = COPY renamable $sgpr4_sgpr5
- ; GCN: SI_SPILL_S64_SAVE killed $sgpr2_sgpr3, %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.5, align 4, addrspace 5)
+ ; GCN: renamable $sgpr6_sgpr7 = COPY renamable $sgpr4_sgpr5
+ ; GCN: SI_SPILL_S64_SAVE killed $sgpr6_sgpr7, %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.5, align 4, addrspace 5)
; GCN: SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.6, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.6, align 4, addrspace 5)
; GCN: SI_SPILL_V32_SAVE killed $vgpr19, %stack.4, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.4, addrspace 5)
; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.7, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.7, addrspace 5)
diff --git a/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll
index b119ffd303e..e991c550c6b 100644
--- a/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll
+++ b/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll
@@ -11,7 +11,7 @@
define amdgpu_kernel void @spill_sgprs_to_multiple_vgprs(i32 addrspace(1)* %out, i32 %in) #0 {
; GCN-LABEL: spill_sgprs_to_multiple_vgprs:
; GCN: ; %bb.0:
-; GCN-NEXT: s_load_dword s0, s[0:1], 0xb
+; GCN-NEXT: s_load_dword s2, s[0:1], 0xb
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; def s[4:11]
; GCN-NEXT: ;;#ASMEND
@@ -42,354 +42,352 @@ define amdgpu_kernel void @spill_sgprs_to_multiple_vgprs(i32 addrspace(1)* %out,
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; def s[84:91]
; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_writelane_b32 v0, s4, 0
+; GCN-NEXT: v_writelane_b32 v0, s5, 1
+; GCN-NEXT: v_writelane_b32 v0, s6, 2
+; GCN-NEXT: v_writelane_b32 v0, s7, 3
+; GCN-NEXT: v_writelane_b32 v0, s8, 4
+; GCN-NEXT: v_writelane_b32 v0, s9, 5
+; GCN-NEXT: v_writelane_b32 v0, s10, 6
+; GCN-NEXT: v_writelane_b32 v0, s11, 7
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; def s[4:11]
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_writelane_b32 v0, s4, 8
+; GCN-NEXT: v_writelane_b32 v0, s5, 9
+; GCN-NEXT: v_writelane_b32 v0, s6, 10
+; GCN-NEXT: v_writelane_b32 v0, s7, 11
+; GCN-NEXT: v_writelane_b32 v0, s8, 12
+; GCN-NEXT: v_writelane_b32 v0, s9, 13
+; GCN-NEXT: v_writelane_b32 v0, s10, 14
+; GCN-NEXT: v_writelane_b32 v0, s11, 15
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; def s[4:11]
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_writelane_b32 v0, s4, 16
+; GCN-NEXT: v_writelane_b32 v0, s5, 17
+; GCN-NEXT: v_writelane_b32 v0, s6, 18
+; GCN-NEXT: v_writelane_b32 v0, s7, 19
+; GCN-NEXT: v_writelane_b32 v0, s8, 20
+; GCN-NEXT: v_writelane_b32 v0, s9, 21
+; GCN-NEXT: v_writelane_b32 v0, s10, 22
+; GCN-NEXT: v_writelane_b32 v0, s11, 23
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; def s[4:11]
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_writelane_b32 v0, s4, 24
+; GCN-NEXT: v_writelane_b32 v0, s5, 25
+; GCN-NEXT: v_writelane_b32 v0, s6, 26
+; GCN-NEXT: v_writelane_b32 v0, s7, 27
+; GCN-NEXT: v_writelane_b32 v0, s8, 28
+; GCN-NEXT: v_writelane_b32 v0, s9, 29
+; GCN-NEXT: v_writelane_b32 v0, s10, 30
+; GCN-NEXT: v_writelane_b32 v0, s11, 31
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; def s[4:11]
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_writelane_b32 v0, s4, 32
+; GCN-NEXT: v_writelane_b32 v0, s5, 33
+; GCN-NEXT: v_writelane_b32 v0, s6, 34
+; GCN-NEXT: v_writelane_b32 v0, s7, 35
+; GCN-NEXT: v_writelane_b32 v0, s8, 36
+; GCN-NEXT: v_writelane_b32 v0, s9, 37
+; GCN-NEXT: v_writelane_b32 v0, s10, 38
+; GCN-NEXT: v_writelane_b32 v0, s11, 39
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; def s[4:11]
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_writelane_b32 v0, s4, 40
+; GCN-NEXT: v_writelane_b32 v0, s5, 41
+; GCN-NEXT: v_writelane_b32 v0, s6, 42
+; GCN-NEXT: v_writelane_b32 v0, s7, 43
+; GCN-NEXT: v_writelane_b32 v0, s8, 44
+; GCN-NEXT: v_writelane_b32 v0, s9, 45
+; GCN-NEXT: v_writelane_b32 v0, s10, 46
+; GCN-NEXT: v_writelane_b32 v0, s11, 47
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; def s[4:11]
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_writelane_b32 v0, s4, 48
+; GCN-NEXT: v_writelane_b32 v0, s5, 49
+; GCN-NEXT: v_writelane_b32 v0, s6, 50
+; GCN-NEXT: v_writelane_b32 v0, s7, 51
+; GCN-NEXT: v_writelane_b32 v0, s8, 52
+; GCN-NEXT: v_writelane_b32 v0, s9, 53
+; GCN-NEXT: v_writelane_b32 v0, s10, 54
+; GCN-NEXT: v_writelane_b32 v0, s11, 55
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; def s[4:11]
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: s_mov_b32 s3, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: v_writelane_b32 v0, s0, 0
-; GCN-NEXT: v_writelane_b32 v0, s4, 1
-; GCN-NEXT: v_writelane_b32 v0, s5, 2
-; GCN-NEXT: v_writelane_b32 v0, s6, 3
-; GCN-NEXT: v_writelane_b32 v0, s7, 4
-; GCN-NEXT: v_writelane_b32 v0, s8, 5
-; GCN-NEXT: v_writelane_b32 v0, s9, 6
-; GCN-NEXT: v_writelane_b32 v0, s10, 7
-; GCN-NEXT: v_writelane_b32 v0, s11, 8
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; def s[0:7]
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v0, s0, 9
-; GCN-NEXT: v_writelane_b32 v0, s1, 10
-; GCN-NEXT: v_writelane_b32 v0, s2, 11
-; GCN-NEXT: v_writelane_b32 v0, s3, 12
-; GCN-NEXT: v_writelane_b32 v0, s4, 13
-; GCN-NEXT: v_writelane_b32 v0, s5, 14
-; GCN-NEXT: v_writelane_b32 v0, s6, 15
-; GCN-NEXT: v_writelane_b32 v0, s7, 16
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; def s[0:7]
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v0, s0, 17
-; GCN-NEXT: v_writelane_b32 v0, s1, 18
-; GCN-NEXT: v_writelane_b32 v0, s2, 19
-; GCN-NEXT: v_writelane_b32 v0, s3, 20
-; GCN-NEXT: v_writelane_b32 v0, s4, 21
-; GCN-NEXT: v_writelane_b32 v0, s5, 22
-; GCN-NEXT: v_writelane_b32 v0, s6, 23
-; GCN-NEXT: v_writelane_b32 v0, s7, 24
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; def s[0:7]
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v0, s0, 25
-; GCN-NEXT: v_writelane_b32 v0, s1, 26
-; GCN-NEXT: v_writelane_b32 v0, s2, 27
-; GCN-NEXT: v_writelane_b32 v0, s3, 28
-; GCN-NEXT: v_writelane_b32 v0, s4, 29
-; GCN-NEXT: v_writelane_b32 v0, s5, 30
-; GCN-NEXT: v_writelane_b32 v0, s6, 31
-; GCN-NEXT: v_writelane_b32 v0, s7, 32
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; def s[0:7]
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v0, s0, 33
-; GCN-NEXT: v_writelane_b32 v0, s1, 34
-; GCN-NEXT: v_writelane_b32 v0, s2, 35
-; GCN-NEXT: v_writelane_b32 v0, s3, 36
-; GCN-NEXT: v_writelane_b32 v0, s4, 37
-; GCN-NEXT: v_writelane_b32 v0, s5, 38
-; GCN-NEXT: v_writelane_b32 v0, s6, 39
-; GCN-NEXT: v_writelane_b32 v0, s7, 40
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; def s[0:7]
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v0, s0, 41
-; GCN-NEXT: v_writelane_b32 v0, s1, 42
-; GCN-NEXT: v_writelane_b32 v0, s2, 43
-; GCN-NEXT: v_writelane_b32 v0, s3, 44
-; GCN-NEXT: v_writelane_b32 v0, s4, 45
-; GCN-NEXT: v_writelane_b32 v0, s5, 46
-; GCN-NEXT: v_writelane_b32 v0, s6, 47
-; GCN-NEXT: v_writelane_b32 v0, s7, 48
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; def s[0:7]
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v0, s0, 49
-; GCN-NEXT: v_writelane_b32 v0, s1, 50
-; GCN-NEXT: v_writelane_b32 v0, s2, 51
-; GCN-NEXT: v_writelane_b32 v0, s3, 52
-; GCN-NEXT: v_writelane_b32 v0, s4, 53
-; GCN-NEXT: v_writelane_b32 v0, s5, 54
-; GCN-NEXT: v_writelane_b32 v0, s6, 55
-; GCN-NEXT: v_writelane_b32 v0, s7, 56
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; def s[0:7]
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: s_mov_b32 s8, 0
-; GCN-NEXT: v_readlane_b32 s9, v0, 0
-; GCN-NEXT: s_cmp_lg_u32 s9, s8
-; GCN-NEXT: v_writelane_b32 v0, s12, 57
-; GCN-NEXT: v_writelane_b32 v0, s13, 58
-; GCN-NEXT: v_writelane_b32 v0, s14, 59
-; GCN-NEXT: v_writelane_b32 v0, s15, 60
-; GCN-NEXT: v_writelane_b32 v0, s16, 61
-; GCN-NEXT: v_writelane_b32 v0, s17, 62
-; GCN-NEXT: v_writelane_b32 v0, s18, 63
-; GCN-NEXT: v_writelane_b32 v1, s19, 0
-; GCN-NEXT: v_writelane_b32 v1, s20, 1
-; GCN-NEXT: v_writelane_b32 v1, s21, 2
-; GCN-NEXT: v_writelane_b32 v1, s22, 3
-; GCN-NEXT: v_writelane_b32 v1, s23, 4
-; GCN-NEXT: v_writelane_b32 v1, s24, 5
-; GCN-NEXT: v_writelane_b32 v1, s25, 6
-; GCN-NEXT: v_writelane_b32 v1, s26, 7
-; GCN-NEXT: v_writelane_b32 v1, s27, 8
-; GCN-NEXT: v_writelane_b32 v1, s36, 9
-; GCN-NEXT: v_writelane_b32 v1, s37, 10
-; GCN-NEXT: v_writelane_b32 v1, s38, 11
-; GCN-NEXT: v_writelane_b32 v1, s39, 12
-; GCN-NEXT: v_writelane_b32 v1, s40, 13
-; GCN-NEXT: v_writelane_b32 v1, s41, 14
-; GCN-NEXT: v_writelane_b32 v1, s42, 15
-; GCN-NEXT: v_writelane_b32 v1, s43, 16
-; GCN-NEXT: v_writelane_b32 v1, s44, 17
-; GCN-NEXT: v_writelane_b32 v1, s45, 18
-; GCN-NEXT: v_writelane_b32 v1, s46, 19
-; GCN-NEXT: v_writelane_b32 v1, s47, 20
-; GCN-NEXT: v_writelane_b32 v1, s48, 21
-; GCN-NEXT: v_writelane_b32 v1, s49, 22
-; GCN-NEXT: v_writelane_b32 v1, s50, 23
-; GCN-NEXT: v_writelane_b32 v1, s51, 24
-; GCN-NEXT: v_writelane_b32 v1, s52, 25
-; GCN-NEXT: v_writelane_b32 v1, s53, 26
-; GCN-NEXT: v_writelane_b32 v1, s54, 27
-; GCN-NEXT: v_writelane_b32 v1, s55, 28
-; GCN-NEXT: v_writelane_b32 v1, s56, 29
-; GCN-NEXT: v_writelane_b32 v1, s57, 30
-; GCN-NEXT: v_writelane_b32 v1, s58, 31
-; GCN-NEXT: v_writelane_b32 v1, s59, 32
-; GCN-NEXT: v_writelane_b32 v1, s60, 33
-; GCN-NEXT: v_writelane_b32 v1, s61, 34
-; GCN-NEXT: v_writelane_b32 v1, s62, 35
-; GCN-NEXT: v_writelane_b32 v1, s63, 36
-; GCN-NEXT: v_writelane_b32 v1, s64, 37
-; GCN-NEXT: v_writelane_b32 v1, s65, 38
-; GCN-NEXT: v_writelane_b32 v1, s66, 39
-; GCN-NEXT: v_writelane_b32 v1, s67, 40
-; GCN-NEXT: v_writelane_b32 v1, s68, 41
-; GCN-NEXT: v_writelane_b32 v1, s69, 42
-; GCN-NEXT: v_writelane_b32 v1, s70, 43
-; GCN-NEXT: v_writelane_b32 v1, s71, 44
-; GCN-NEXT: v_writelane_b32 v1, s72, 45
-; GCN-NEXT: v_writelane_b32 v1, s73, 46
-; GCN-NEXT: v_writelane_b32 v1, s74, 47
-; GCN-NEXT: v_writelane_b32 v1, s75, 48
-; GCN-NEXT: v_writelane_b32 v1, s76, 49
-; GCN-NEXT: v_writelane_b32 v1, s77, 50
-; GCN-NEXT: v_writelane_b32 v1, s78, 51
-; GCN-NEXT: v_writelane_b32 v1, s79, 52
-; GCN-NEXT: v_writelane_b32 v1, s80, 53
-; GCN-NEXT: v_writelane_b32 v1, s81, 54
-; GCN-NEXT: v_writelane_b32 v1, s82, 55
-; GCN-NEXT: v_writelane_b32 v1, s83, 56
-; GCN-NEXT: v_writelane_b32 v1, s84, 57
-; GCN-NEXT: v_writelane_b32 v1, s85, 58
-; GCN-NEXT: v_writelane_b32 v1, s86, 59
-; GCN-NEXT: v_writelane_b32 v1, s87, 60
-; GCN-NEXT: v_writelane_b32 v1, s88, 61
-; GCN-NEXT: v_writelane_b32 v1, s89, 62
-; GCN-NEXT: v_writelane_b32 v1, s90, 63
-; GCN-NEXT: v_writelane_b32 v2, s91, 0
-; GCN-NEXT: v_writelane_b32 v2, s0, 1
-; GCN-NEXT: v_writelane_b32 v2, s1, 2
-; GCN-NEXT: v_writelane_b32 v2, s2, 3
-; GCN-NEXT: v_writelane_b32 v2, s3, 4
-; GCN-NEXT: v_writelane_b32 v2, s4, 5
-; GCN-NEXT: v_writelane_b32 v2, s5, 6
-; GCN-NEXT: v_writelane_b32 v2, s6, 7
-; GCN-NEXT: v_writelane_b32 v2, s7, 8
+; GCN-NEXT: s_cmp_lg_u32 s2, s3
+; GCN-NEXT: v_writelane_b32 v0, s12, 56
+; GCN-NEXT: v_writelane_b32 v0, s13, 57
+; GCN-NEXT: v_writelane_b32 v0, s14, 58
+; GCN-NEXT: v_writelane_b32 v0, s15, 59
+; GCN-NEXT: v_writelane_b32 v0, s16, 60
+; GCN-NEXT: v_writelane_b32 v0, s17, 61
+; GCN-NEXT: v_writelane_b32 v0, s18, 62
+; GCN-NEXT: v_writelane_b32 v0, s19, 63
+; GCN-NEXT: v_writelane_b32 v1, s20, 0
+; GCN-NEXT: v_writelane_b32 v1, s21, 1
+; GCN-NEXT: v_writelane_b32 v1, s22, 2
+; GCN-NEXT: v_writelane_b32 v1, s23, 3
+; GCN-NEXT: v_writelane_b32 v1, s24, 4
+; GCN-NEXT: v_writelane_b32 v1, s25, 5
+; GCN-NEXT: v_writelane_b32 v1, s26, 6
+; GCN-NEXT: v_writelane_b32 v1, s27, 7
+; GCN-NEXT: v_writelane_b32 v1, s36, 8
+; GCN-NEXT: v_writelane_b32 v1, s37, 9
+; GCN-NEXT: v_writelane_b32 v1, s38, 10
+; GCN-NEXT: v_writelane_b32 v1, s39, 11
+; GCN-NEXT: v_writelane_b32 v1, s40, 12
+; GCN-NEXT: v_writelane_b32 v1, s41, 13
+; GCN-NEXT: v_writelane_b32 v1, s42, 14
+; GCN-NEXT: v_writelane_b32 v1, s43, 15
+; GCN-NEXT: v_writelane_b32 v1, s44, 16
+; GCN-NEXT: v_writelane_b32 v1, s45, 17
+; GCN-NEXT: v_writelane_b32 v1, s46, 18
+; GCN-NEXT: v_writelane_b32 v1, s47, 19
+; GCN-NEXT: v_writelane_b32 v1, s48, 20
+; GCN-NEXT: v_writelane_b32 v1, s49, 21
+; GCN-NEXT: v_writelane_b32 v1, s50, 22
+; GCN-NEXT: v_writelane_b32 v1, s51, 23
+; GCN-NEXT: v_writelane_b32 v1, s52, 24
+; GCN-NEXT: v_writelane_b32 v1, s53, 25
+; GCN-NEXT: v_writelane_b32 v1, s54, 26
+; GCN-NEXT: v_writelane_b32 v1, s55, 27
+; GCN-NEXT: v_writelane_b32 v1, s56, 28
+; GCN-NEXT: v_writelane_b32 v1, s57, 29
+; GCN-NEXT: v_writelane_b32 v1, s58, 30
+; GCN-NEXT: v_writelane_b32 v1, s59, 31
+; GCN-NEXT: v_writelane_b32 v1, s60, 32
+; GCN-NEXT: v_writelane_b32 v1, s61, 33
+; GCN-NEXT: v_writelane_b32 v1, s62, 34
+; GCN-NEXT: v_writelane_b32 v1, s63, 35
+; GCN-NEXT: v_writelane_b32 v1, s64, 36
+; GCN-NEXT: v_writelane_b32 v1, s65, 37
+; GCN-NEXT: v_writelane_b32 v1, s66, 38
+; GCN-NEXT: v_writelane_b32 v1, s67, 39
+; GCN-NEXT: v_writelane_b32 v1, s68, 40
+; GCN-NEXT: v_writelane_b32 v1, s69, 41
+; GCN-NEXT: v_writelane_b32 v1, s70, 42
+; GCN-NEXT: v_writelane_b32 v1, s71, 43
+; GCN-NEXT: v_writelane_b32 v1, s72, 44
+; GCN-NEXT: v_writelane_b32 v1, s73, 45
+; GCN-NEXT: v_writelane_b32 v1, s74, 46
+; GCN-NEXT: v_writelane_b32 v1, s75, 47
+; GCN-NEXT: v_writelane_b32 v1, s76, 48
+; GCN-NEXT: v_writelane_b32 v1, s77, 49
+; GCN-NEXT: v_writelane_b32 v1, s78, 50
+; GCN-NEXT: v_writelane_b32 v1, s79, 51
+; GCN-NEXT: v_writelane_b32 v1, s80, 52
+; GCN-NEXT: v_writelane_b32 v1, s81, 53
+; GCN-NEXT: v_writelane_b32 v1, s82, 54
+; GCN-NEXT: v_writelane_b32 v1, s83, 55
+; GCN-NEXT: v_writelane_b32 v1, s84, 56
+; GCN-NEXT: v_writelane_b32 v1, s85, 57
+; GCN-NEXT: v_writelane_b32 v1, s86, 58
+; GCN-NEXT: v_writelane_b32 v1, s87, 59
+; GCN-NEXT: v_writelane_b32 v1, s88, 60
+; GCN-NEXT: v_writelane_b32 v1, s89, 61
+; GCN-NEXT: v_writelane_b32 v1, s90, 62
+; GCN-NEXT: v_writelane_b32 v1, s91, 63
+; GCN-NEXT: v_writelane_b32 v2, s4, 0
+; GCN-NEXT: v_writelane_b32 v2, s5, 1
+; GCN-NEXT: v_writelane_b32 v2, s6, 2
+; GCN-NEXT: v_writelane_b32 v2, s7, 3
+; GCN-NEXT: v_writelane_b32 v2, s8, 4
+; GCN-NEXT: v_writelane_b32 v2, s9, 5
+; GCN-NEXT: v_writelane_b32 v2, s10, 6
+; GCN-NEXT: v_writelane_b32 v2, s11, 7
; GCN-NEXT: s_cbranch_scc1 BB0_2
; GCN-NEXT: ; %bb.1: ; %bb0
-; GCN-NEXT: v_readlane_b32 s0, v0, 1
-; GCN-NEXT: v_readlane_b32 s1, v0, 2
-; GCN-NEXT: v_readlane_b32 s2, v0, 3
-; GCN-NEXT: v_readlane_b32 s3, v0, 4
-; GCN-NEXT: v_readlane_b32 s4, v0, 5
-; GCN-NEXT: v_readlane_b32 s5, v0, 6
-; GCN-NEXT: v_readlane_b32 s6, v0, 7
-; GCN-NEXT: v_readlane_b32 s7, v0, 8
+; GCN-NEXT: v_readlane_b32 s0, v0, 0
+; GCN-NEXT: v_readlane_b32 s1, v0, 1
+; GCN-NEXT: v_readlane_b32 s2, v0, 2
+; GCN-NEXT: v_readlane_b32 s3, v0, 3
+; GCN-NEXT: v_readlane_b32 s4, v0, 4
+; GCN-NEXT: v_readlane_b32 s5, v0, 5
+; GCN-NEXT: v_readlane_b32 s6, v0, 6
+; GCN-NEXT: v_readlane_b32 s7, v0, 7
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:7]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s0, v0, 57
-; GCN-NEXT: v_readlane_b32 s1, v0, 58
-; GCN-NEXT: v_readlane_b32 s2, v0, 59
-; GCN-NEXT: v_readlane_b32 s3, v0, 60
-; GCN-NEXT: v_readlane_b32 s4, v0, 61
-; GCN-NEXT: v_readlane_b32 s5, v0, 62
-; GCN-NEXT: v_readlane_b32 s6, v0, 63
-; GCN-NEXT: v_readlane_b32 s7, v1, 0
+; GCN-NEXT: v_readlane_b32 s0, v0, 56
+; GCN-NEXT: v_readlane_b32 s1, v0, 57
+; GCN-NEXT: v_readlane_b32 s2, v0, 58
+; GCN-NEXT: v_readlane_b32 s3, v0, 59
+; GCN-NEXT: v_readlane_b32 s4, v0, 60
+; GCN-NEXT: v_readlane_b32 s5, v0, 61
+; GCN-NEXT: v_readlane_b32 s6, v0, 62
+; GCN-NEXT: v_readlane_b32 s7, v0, 63
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:7]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s0, v1, 1
-; GCN-NEXT: v_readlane_b32 s1, v1, 2
-; GCN-NEXT: v_readlane_b32 s2, v1, 3
-; GCN-NEXT: v_readlane_b32 s3, v1, 4
-; GCN-NEXT: v_readlane_b32 s4, v1, 5
-; GCN-NEXT: v_readlane_b32 s5, v1, 6
-; GCN-NEXT: v_readlane_b32 s6, v1, 7
-; GCN-NEXT: v_readlane_b32 s7, v1, 8
+; GCN-NEXT: v_readlane_b32 s0, v1, 0
+; GCN-NEXT: v_readlane_b32 s1, v1, 1
+; GCN-NEXT: v_readlane_b32 s2, v1, 2
+; GCN-NEXT: v_readlane_b32 s3, v1, 3
+; GCN-NEXT: v_readlane_b32 s4, v1, 4
+; GCN-NEXT: v_readlane_b32 s5, v1, 5
+; GCN-NEXT: v_readlane_b32 s6, v1, 6
+; GCN-NEXT: v_readlane_b32 s7, v1, 7
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:7]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s0, v1, 9
-; GCN-NEXT: v_readlane_b32 s1, v1, 10
-; GCN-NEXT: v_readlane_b32 s2, v1, 11
-; GCN-NEXT: v_readlane_b32 s3, v1, 12
-; GCN-NEXT: v_readlane_b32 s4, v1, 13
-; GCN-NEXT: v_readlane_b32 s5, v1, 14
-; GCN-NEXT: v_readlane_b32 s6, v1, 15
-; GCN-NEXT: v_readlane_b32 s7, v1, 16
+; GCN-NEXT: v_readlane_b32 s0, v1, 8
+; GCN-NEXT: v_readlane_b32 s1, v1, 9
+; GCN-NEXT: v_readlane_b32 s2, v1, 10
+; GCN-NEXT: v_readlane_b32 s3, v1, 11
+; GCN-NEXT: v_readlane_b32 s4, v1, 12
+; GCN-NEXT: v_readlane_b32 s5, v1, 13
+; GCN-NEXT: v_readlane_b32 s6, v1, 14
+; GCN-NEXT: v_readlane_b32 s7, v1, 15
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:7]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s0, v1, 17
-; GCN-NEXT: v_readlane_b32 s1, v1, 18
-; GCN-NEXT: v_readlane_b32 s2, v1, 19
-; GCN-NEXT: v_readlane_b32 s3, v1, 20
-; GCN-NEXT: v_readlane_b32 s4, v1, 21
-; GCN-NEXT: v_readlane_b32 s5, v1, 22
-; GCN-NEXT: v_readlane_b32 s6, v1, 23
-; GCN-NEXT: v_readlane_b32 s7, v1, 24
+; GCN-NEXT: v_readlane_b32 s0, v1, 16
+; GCN-NEXT: v_readlane_b32 s1, v1, 17
+; GCN-NEXT: v_readlane_b32 s2, v1, 18
+; GCN-NEXT: v_readlane_b32 s3, v1, 19
+; GCN-NEXT: v_readlane_b32 s4, v1, 20
+; GCN-NEXT: v_readlane_b32 s5, v1, 21
+; GCN-NEXT: v_readlane_b32 s6, v1, 22
+; GCN-NEXT: v_readlane_b32 s7, v1, 23
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:7]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s0, v1, 25
-; GCN-NEXT: v_readlane_b32 s1, v1, 26
-; GCN-NEXT: v_readlane_b32 s2, v1, 27
-; GCN-NEXT: v_readlane_b32 s3, v1, 28
-; GCN-NEXT: v_readlane_b32 s4, v1, 29
-; GCN-NEXT: v_readlane_b32 s5, v1, 30
-; GCN-NEXT: v_readlane_b32 s6, v1, 31
-; GCN-NEXT: v_readlane_b32 s7, v1, 32
+; GCN-NEXT: v_readlane_b32 s0, v1, 24
+; GCN-NEXT: v_readlane_b32 s1, v1, 25
+; GCN-NEXT: v_readlane_b32 s2, v1, 26
+; GCN-NEXT: v_readlane_b32 s3, v1, 27
+; GCN-NEXT: v_readlane_b32 s4, v1, 28
+; GCN-NEXT: v_readlane_b32 s5, v1, 29
+; GCN-NEXT: v_readlane_b32 s6, v1, 30
+; GCN-NEXT: v_readlane_b32 s7, v1, 31
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:7]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s0, v1, 33
-; GCN-NEXT: v_readlane_b32 s1, v1, 34
-; GCN-NEXT: v_readlane_b32 s2, v1, 35
-; GCN-NEXT: v_readlane_b32 s3, v1, 36
-; GCN-NEXT: v_readlane_b32 s4, v1, 37
-; GCN-NEXT: v_readlane_b32 s5, v1, 38
-; GCN-NEXT: v_readlane_b32 s6, v1, 39
-; GCN-NEXT: v_readlane_b32 s7, v1, 40
+; GCN-NEXT: v_readlane_b32 s0, v1, 32
+; GCN-NEXT: v_readlane_b32 s1, v1, 33
+; GCN-NEXT: v_readlane_b32 s2, v1, 34
+; GCN-NEXT: v_readlane_b32 s3, v1, 35
+; GCN-NEXT: v_readlane_b32 s4, v1, 36
+; GCN-NEXT: v_readlane_b32 s5, v1, 37
+; GCN-NEXT: v_readlane_b32 s6, v1, 38
+; GCN-NEXT: v_readlane_b32 s7, v1, 39
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:7]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s0, v1, 41
-; GCN-NEXT: v_readlane_b32 s1, v1, 42
-; GCN-NEXT: v_readlane_b32 s2, v1, 43
-; GCN-NEXT: v_readlane_b32 s3, v1, 44
-; GCN-NEXT: v_readlane_b32 s4, v1, 45
-; GCN-NEXT: v_readlane_b32 s5, v1, 46
-; GCN-NEXT: v_readlane_b32 s6, v1, 47
-; GCN-NEXT: v_readlane_b32 s7, v1, 48
+; GCN-NEXT: v_readlane_b32 s0, v1, 40
+; GCN-NEXT: v_readlane_b32 s1, v1, 41
+; GCN-NEXT: v_readlane_b32 s2, v1, 42
+; GCN-NEXT: v_readlane_b32 s3, v1, 43
+; GCN-NEXT: v_readlane_b32 s4, v1, 44
+; GCN-NEXT: v_readlane_b32 s5, v1, 45
+; GCN-NEXT: v_readlane_b32 s6, v1, 46
+; GCN-NEXT: v_readlane_b32 s7, v1, 47
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:7]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s0, v1, 49
-; GCN-NEXT: v_readlane_b32 s1, v1, 50
-; GCN-NEXT: v_readlane_b32 s2, v1, 51
-; GCN-NEXT: v_readlane_b32 s3, v1, 52
-; GCN-NEXT: v_readlane_b32 s4, v1, 53
-; GCN-NEXT: v_readlane_b32 s5, v1, 54
-; GCN-NEXT: v_readlane_b32 s6, v1, 55
-; GCN-NEXT: v_readlane_b32 s7, v1, 56
+; GCN-NEXT: v_readlane_b32 s0, v1, 48
+; GCN-NEXT: v_readlane_b32 s1, v1, 49
+; GCN-NEXT: v_readlane_b32 s2, v1, 50
+; GCN-NEXT: v_readlane_b32 s3, v1, 51
+; GCN-NEXT: v_readlane_b32 s4, v1, 52
+; GCN-NEXT: v_readlane_b32 s5, v1, 53
+; GCN-NEXT: v_readlane_b32 s6, v1, 54
+; GCN-NEXT: v_readlane_b32 s7, v1, 55
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:7]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s0, v1, 57
-; GCN-NEXT: v_readlane_b32 s1, v1, 58
-; GCN-NEXT: v_readlane_b32 s2, v1, 59
-; GCN-NEXT: v_readlane_b32 s3, v1, 60
-; GCN-NEXT: v_readlane_b32 s4, v1, 61
-; GCN-NEXT: v_readlane_b32 s5, v1, 62
-; GCN-NEXT: v_readlane_b32 s6, v1, 63
-; GCN-NEXT: v_readlane_b32 s7, v2, 0
+; GCN-NEXT: v_readlane_b32 s0, v1, 56
+; GCN-NEXT: v_readlane_b32 s1, v1, 57
+; GCN-NEXT: v_readlane_b32 s2, v1, 58
+; GCN-NEXT: v_readlane_b32 s3, v1, 59
+; GCN-NEXT: v_readlane_b32 s4, v1, 60
+; GCN-NEXT: v_readlane_b32 s5, v1, 61
+; GCN-NEXT: v_readlane_b32 s6, v1, 62
+; GCN-NEXT: v_readlane_b32 s7, v1, 63
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:7]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s0, v0, 9
-; GCN-NEXT: v_readlane_b32 s1, v0, 10
-; GCN-NEXT: v_readlane_b32 s2, v0, 11
-; GCN-NEXT: v_readlane_b32 s3, v0, 12
-; GCN-NEXT: v_readlane_b32 s4, v0, 13
-; GCN-NEXT: v_readlane_b32 s5, v0, 14
-; GCN-NEXT: v_readlane_b32 s6, v0, 15
-; GCN-NEXT: v_readlane_b32 s7, v0, 16
+; GCN-NEXT: v_readlane_b32 s0, v0, 8
+; GCN-NEXT: v_readlane_b32 s1, v0, 9
+; GCN-NEXT: v_readlane_b32 s2, v0, 10
+; GCN-NEXT: v_readlane_b32 s3, v0, 11
+; GCN-NEXT: v_readlane_b32 s4, v0, 12
+; GCN-NEXT: v_readlane_b32 s5, v0, 13
+; GCN-NEXT: v_readlane_b32 s6, v0, 14
+; GCN-NEXT: v_readlane_b32 s7, v0, 15
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:7]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s0, v0, 17
-; GCN-NEXT: v_readlane_b32 s1, v0, 18
-; GCN-NEXT: v_readlane_b32 s2, v0, 19
-; GCN-NEXT: v_readlane_b32 s3, v0, 20
-; GCN-NEXT: v_readlane_b32 s4, v0, 21
-; GCN-NEXT: v_readlane_b32 s5, v0, 22
-; GCN-NEXT: v_readlane_b32 s6, v0, 23
-; GCN-NEXT: v_readlane_b32 s7, v0, 24
+; GCN-NEXT: v_readlane_b32 s0, v0, 16
+; GCN-NEXT: v_readlane_b32 s1, v0, 17
+; GCN-NEXT: v_readlane_b32 s2, v0, 18
+; GCN-NEXT: v_readlane_b32 s3, v0, 19
+; GCN-NEXT: v_readlane_b32 s4, v0, 20
+; GCN-NEXT: v_readlane_b32 s5, v0, 21
+; GCN-NEXT: v_readlane_b32 s6, v0, 22
+; GCN-NEXT: v_readlane_b32 s7, v0, 23
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:7]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s0, v0, 25
-; GCN-NEXT: v_readlane_b32 s1, v0, 26
-; GCN-NEXT: v_readlane_b32 s2, v0, 27
-; GCN-NEXT: v_readlane_b32 s3, v0, 28
-; GCN-NEXT: v_readlane_b32 s4, v0, 29
-; GCN-NEXT: v_readlane_b32 s5, v0, 30
-; GCN-NEXT: v_readlane_b32 s6, v0, 31
-; GCN-NEXT: v_readlane_b32 s7, v0, 32
+; GCN-NEXT: v_readlane_b32 s0, v0, 24
+; GCN-NEXT: v_readlane_b32 s1, v0, 25
+; GCN-NEXT: v_readlane_b32 s2, v0, 26
+; GCN-NEXT: v_readlane_b32 s3, v0, 27
+; GCN-NEXT: v_readlane_b32 s4, v0, 28
+; GCN-NEXT: v_readlane_b32 s5, v0, 29
+; GCN-NEXT: v_readlane_b32 s6, v0, 30
+; GCN-NEXT: v_readlane_b32 s7, v0, 31
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:7]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s0, v0, 33
-; GCN-NEXT: v_readlane_b32 s1, v0, 34
-; GCN-NEXT: v_readlane_b32 s2, v0, 35
-; GCN-NEXT: v_readlane_b32 s3, v0, 36
-; GCN-NEXT: v_readlane_b32 s4, v0, 37
-; GCN-NEXT: v_readlane_b32 s5, v0, 38
-; GCN-NEXT: v_readlane_b32 s6, v0, 39
-; GCN-NEXT: v_readlane_b32 s7, v0, 40
+; GCN-NEXT: v_readlane_b32 s0, v0, 32
+; GCN-NEXT: v_readlane_b32 s1, v0, 33
+; GCN-NEXT: v_readlane_b32 s2, v0, 34
+; GCN-NEXT: v_readlane_b32 s3, v0, 35
+; GCN-NEXT: v_readlane_b32 s4, v0, 36
+; GCN-NEXT: v_readlane_b32 s5, v0, 37
+; GCN-NEXT: v_readlane_b32 s6, v0, 38
+; GCN-NEXT: v_readlane_b32 s7, v0, 39
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:7]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s0, v0, 41
-; GCN-NEXT: v_readlane_b32 s1, v0, 42
-; GCN-NEXT: v_readlane_b32 s2, v0, 43
-; GCN-NEXT: v_readlane_b32 s3, v0, 44
-; GCN-NEXT: v_readlane_b32 s4, v0, 45
-; GCN-NEXT: v_readlane_b32 s5, v0, 46
-; GCN-NEXT: v_readlane_b32 s6, v0, 47
-; GCN-NEXT: v_readlane_b32 s7, v0, 48
+; GCN-NEXT: v_readlane_b32 s0, v0, 40
+; GCN-NEXT: v_readlane_b32 s1, v0, 41
+; GCN-NEXT: v_readlane_b32 s2, v0, 42
+; GCN-NEXT: v_readlane_b32 s3, v0, 43
+; GCN-NEXT: v_readlane_b32 s4, v0, 44
+; GCN-NEXT: v_readlane_b32 s5, v0, 45
+; GCN-NEXT: v_readlane_b32 s6, v0, 46
+; GCN-NEXT: v_readlane_b32 s7, v0, 47
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:7]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s0, v0, 49
-; GCN-NEXT: v_readlane_b32 s1, v0, 50
-; GCN-NEXT: v_readlane_b32 s2, v0, 51
-; GCN-NEXT: v_readlane_b32 s3, v0, 52
-; GCN-NEXT: v_readlane_b32 s4, v0, 53
-; GCN-NEXT: v_readlane_b32 s5, v0, 54
-; GCN-NEXT: v_readlane_b32 s6, v0, 55
-; GCN-NEXT: v_readlane_b32 s7, v0, 56
+; GCN-NEXT: v_readlane_b32 s0, v0, 48
+; GCN-NEXT: v_readlane_b32 s1, v0, 49
+; GCN-NEXT: v_readlane_b32 s2, v0, 50
+; GCN-NEXT: v_readlane_b32 s3, v0, 51
+; GCN-NEXT: v_readlane_b32 s4, v0, 52
+; GCN-NEXT: v_readlane_b32 s5, v0, 53
+; GCN-NEXT: v_readlane_b32 s6, v0, 54
+; GCN-NEXT: v_readlane_b32 s7, v0, 55
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:7]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s0, v2, 1
-; GCN-NEXT: v_readlane_b32 s1, v2, 2
-; GCN-NEXT: v_readlane_b32 s2, v2, 3
-; GCN-NEXT: v_readlane_b32 s3, v2, 4
-; GCN-NEXT: v_readlane_b32 s4, v2, 5
-; GCN-NEXT: v_readlane_b32 s5, v2, 6
-; GCN-NEXT: v_readlane_b32 s6, v2, 7
-; GCN-NEXT: v_readlane_b32 s7, v2, 8
+; GCN-NEXT: v_readlane_b32 s0, v2, 0
+; GCN-NEXT: v_readlane_b32 s1, v2, 1
+; GCN-NEXT: v_readlane_b32 s2, v2, 2
+; GCN-NEXT: v_readlane_b32 s3, v2, 3
+; GCN-NEXT: v_readlane_b32 s4, v2, 4
+; GCN-NEXT: v_readlane_b32 s5, v2, 5
+; GCN-NEXT: v_readlane_b32 s6, v2, 6
+; GCN-NEXT: v_readlane_b32 s7, v2, 7
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:7]
; GCN-NEXT: ;;#ASMEND
@@ -444,195 +442,193 @@ ret:
define amdgpu_kernel void @split_sgpr_spill_2_vgprs(i32 addrspace(1)* %out, i32 %in) #1 {
; GCN-LABEL: split_sgpr_spill_2_vgprs:
; GCN: ; %bb.0:
-; GCN-NEXT: s_load_dword s0, s[0:1], 0xb
+; GCN-NEXT: s_load_dword s2, s[0:1], 0xb
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; def s[4:19]
; GCN-NEXT: ;;#ASMEND
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; def s[36:51]
; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_writelane_b32 v0, s4, 0
+; GCN-NEXT: v_writelane_b32 v0, s5, 1
+; GCN-NEXT: v_writelane_b32 v0, s6, 2
+; GCN-NEXT: v_writelane_b32 v0, s7, 3
+; GCN-NEXT: v_writelane_b32 v0, s8, 4
+; GCN-NEXT: v_writelane_b32 v0, s9, 5
+; GCN-NEXT: v_writelane_b32 v0, s10, 6
+; GCN-NEXT: v_writelane_b32 v0, s11, 7
+; GCN-NEXT: v_writelane_b32 v0, s12, 8
+; GCN-NEXT: v_writelane_b32 v0, s13, 9
+; GCN-NEXT: v_writelane_b32 v0, s14, 10
+; GCN-NEXT: v_writelane_b32 v0, s15, 11
+; GCN-NEXT: v_writelane_b32 v0, s16, 12
+; GCN-NEXT: v_writelane_b32 v0, s17, 13
+; GCN-NEXT: v_writelane_b32 v0, s18, 14
+; GCN-NEXT: v_writelane_b32 v0, s19, 15
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; def s[4:19]
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_writelane_b32 v0, s4, 16
+; GCN-NEXT: v_writelane_b32 v0, s5, 17
+; GCN-NEXT: v_writelane_b32 v0, s6, 18
+; GCN-NEXT: v_writelane_b32 v0, s7, 19
+; GCN-NEXT: v_writelane_b32 v0, s8, 20
+; GCN-NEXT: v_writelane_b32 v0, s9, 21
+; GCN-NEXT: v_writelane_b32 v0, s10, 22
+; GCN-NEXT: v_writelane_b32 v0, s11, 23
+; GCN-NEXT: v_writelane_b32 v0, s12, 24
+; GCN-NEXT: v_writelane_b32 v0, s13, 25
+; GCN-NEXT: v_writelane_b32 v0, s14, 26
+; GCN-NEXT: v_writelane_b32 v0, s15, 27
+; GCN-NEXT: v_writelane_b32 v0, s16, 28
+; GCN-NEXT: v_writelane_b32 v0, s17, 29
+; GCN-NEXT: v_writelane_b32 v0, s18, 30
+; GCN-NEXT: v_writelane_b32 v0, s19, 31
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; def s[4:19]
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; def s[20:27]
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; def s[0:1]
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: s_mov_b32 s3, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: v_writelane_b32 v0, s0, 0
-; GCN-NEXT: v_writelane_b32 v0, s4, 1
-; GCN-NEXT: v_writelane_b32 v0, s5, 2
-; GCN-NEXT: v_writelane_b32 v0, s6, 3
-; GCN-NEXT: v_writelane_b32 v0, s7, 4
-; GCN-NEXT: v_writelane_b32 v0, s8, 5
-; GCN-NEXT: v_writelane_b32 v0, s9, 6
-; GCN-NEXT: v_writelane_b32 v0, s10, 7
-; GCN-NEXT: v_writelane_b32 v0, s11, 8
-; GCN-NEXT: v_writelane_b32 v0, s12, 9
-; GCN-NEXT: v_writelane_b32 v0, s13, 10
-; GCN-NEXT: v_writelane_b32 v0, s14, 11
-; GCN-NEXT: v_writelane_b32 v0, s15, 12
-; GCN-NEXT: v_writelane_b32 v0, s16, 13
-; GCN-NEXT: v_writelane_b32 v0, s17, 14
-; GCN-NEXT: v_writelane_b32 v0, s18, 15
-; GCN-NEXT: v_writelane_b32 v0, s19, 16
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; def s[0:15]
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; def s[16:31]
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v0, s0, 17
-; GCN-NEXT: v_writelane_b32 v0, s1, 18
-; GCN-NEXT: v_writelane_b32 v0, s2, 19
-; GCN-NEXT: v_writelane_b32 v0, s3, 20
-; GCN-NEXT: v_writelane_b32 v0, s4, 21
-; GCN-NEXT: v_writelane_b32 v0, s5, 22
-; GCN-NEXT: v_writelane_b32 v0, s6, 23
-; GCN-NEXT: v_writelane_b32 v0, s7, 24
-; GCN-NEXT: v_writelane_b32 v0, s8, 25
-; GCN-NEXT: v_writelane_b32 v0, s9, 26
-; GCN-NEXT: v_writelane_b32 v0, s10, 27
-; GCN-NEXT: v_writelane_b32 v0, s11, 28
-; GCN-NEXT: v_writelane_b32 v0, s12, 29
-; GCN-NEXT: v_writelane_b32 v0, s13, 30
-; GCN-NEXT: v_writelane_b32 v0, s14, 31
-; GCN-NEXT: v_writelane_b32 v0, s15, 32
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; def s[0:7]
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; def s[8:9]
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: s_mov_b32 s10, 0
-; GCN-NEXT: v_readlane_b32 s11, v0, 0
-; GCN-NEXT: s_cmp_lg_u32 s11, s10
-; GCN-NEXT: v_writelane_b32 v0, s36, 33
-; GCN-NEXT: v_writelane_b32 v0, s37, 34
-; GCN-NEXT: v_writelane_b32 v0, s38, 35
-; GCN-NEXT: v_writelane_b32 v0, s39, 36
-; GCN-NEXT: v_writelane_b32 v0, s40, 37
-; GCN-NEXT: v_writelane_b32 v0, s41, 38
-; GCN-NEXT: v_writelane_b32 v0, s42, 39
-; GCN-NEXT: v_writelane_b32 v0, s43, 40
-; GCN-NEXT: v_writelane_b32 v0, s44, 41
-; GCN-NEXT: v_writelane_b32 v0, s45, 42
-; GCN-NEXT: v_writelane_b32 v0, s46, 43
-; GCN-NEXT: v_writelane_b32 v0, s47, 44
-; GCN-NEXT: v_writelane_b32 v0, s48, 45
-; GCN-NEXT: v_writelane_b32 v0, s49, 46
-; GCN-NEXT: v_writelane_b32 v0, s50, 47
-; GCN-NEXT: v_writelane_b32 v0, s51, 48
-; GCN-NEXT: v_writelane_b32 v0, s16, 49
-; GCN-NEXT: v_writelane_b32 v0, s17, 50
-; GCN-NEXT: v_writelane_b32 v0, s18, 51
-; GCN-NEXT: v_writelane_b32 v0, s19, 52
-; GCN-NEXT: v_writelane_b32 v0, s20, 53
-; GCN-NEXT: v_writelane_b32 v0, s21, 54
-; GCN-NEXT: v_writelane_b32 v0, s22, 55
-; GCN-NEXT: v_writelane_b32 v0, s23, 56
-; GCN-NEXT: v_writelane_b32 v0, s24, 57
-; GCN-NEXT: v_writelane_b32 v0, s25, 58
-; GCN-NEXT: v_writelane_b32 v0, s26, 59
-; GCN-NEXT: v_writelane_b32 v0, s27, 60
-; GCN-NEXT: v_writelane_b32 v0, s28, 61
-; GCN-NEXT: v_writelane_b32 v0, s29, 62
-; GCN-NEXT: v_writelane_b32 v0, s30, 63
-; GCN-NEXT: v_writelane_b32 v1, s31, 0
-; GCN-NEXT: v_writelane_b32 v1, s0, 1
-; GCN-NEXT: v_writelane_b32 v1, s1, 2
-; GCN-NEXT: v_writelane_b32 v1, s2, 3
-; GCN-NEXT: v_writelane_b32 v1, s3, 4
-; GCN-NEXT: v_writelane_b32 v1, s4, 5
-; GCN-NEXT: v_writelane_b32 v1, s5, 6
-; GCN-NEXT: v_writelane_b32 v1, s6, 7
-; GCN-NEXT: v_writelane_b32 v1, s7, 8
-; GCN-NEXT: v_writelane_b32 v1, s8, 9
-; GCN-NEXT: v_writelane_b32 v1, s9, 10
+; GCN-NEXT: s_cmp_lg_u32 s2, s3
+; GCN-NEXT: v_writelane_b32 v0, s36, 32
+; GCN-NEXT: v_writelane_b32 v0, s37, 33
+; GCN-NEXT: v_writelane_b32 v0, s38, 34
+; GCN-NEXT: v_writelane_b32 v0, s39, 35
+; GCN-NEXT: v_writelane_b32 v0, s40, 36
+; GCN-NEXT: v_writelane_b32 v0, s41, 37
+; GCN-NEXT: v_writelane_b32 v0, s42, 38
+; GCN-NEXT: v_writelane_b32 v0, s43, 39
+; GCN-NEXT: v_writelane_b32 v0, s44, 40
+; GCN-NEXT: v_writelane_b32 v0, s45, 41
+; GCN-NEXT: v_writelane_b32 v0, s46, 42
+; GCN-NEXT: v_writelane_b32 v0, s47, 43
+; GCN-NEXT: v_writelane_b32 v0, s48, 44
+; GCN-NEXT: v_writelane_b32 v0, s49, 45
+; GCN-NEXT: v_writelane_b32 v0, s50, 46
+; GCN-NEXT: v_writelane_b32 v0, s51, 47
+; GCN-NEXT: v_writelane_b32 v0, s4, 48
+; GCN-NEXT: v_writelane_b32 v0, s5, 49
+; GCN-NEXT: v_writelane_b32 v0, s6, 50
+; GCN-NEXT: v_writelane_b32 v0, s7, 51
+; GCN-NEXT: v_writelane_b32 v0, s8, 52
+; GCN-NEXT: v_writelane_b32 v0, s9, 53
+; GCN-NEXT: v_writelane_b32 v0, s10, 54
+; GCN-NEXT: v_writelane_b32 v0, s11, 55
+; GCN-NEXT: v_writelane_b32 v0, s12, 56
+; GCN-NEXT: v_writelane_b32 v0, s13, 57
+; GCN-NEXT: v_writelane_b32 v0, s14, 58
+; GCN-NEXT: v_writelane_b32 v0, s15, 59
+; GCN-NEXT: v_writelane_b32 v0, s16, 60
+; GCN-NEXT: v_writelane_b32 v0, s17, 61
+; GCN-NEXT: v_writelane_b32 v0, s18, 62
+; GCN-NEXT: v_writelane_b32 v0, s19, 63
+; GCN-NEXT: v_writelane_b32 v1, s20, 0
+; GCN-NEXT: v_writelane_b32 v1, s21, 1
+; GCN-NEXT: v_writelane_b32 v1, s22, 2
+; GCN-NEXT: v_writelane_b32 v1, s23, 3
+; GCN-NEXT: v_writelane_b32 v1, s24, 4
+; GCN-NEXT: v_writelane_b32 v1, s25, 5
+; GCN-NEXT: v_writelane_b32 v1, s26, 6
+; GCN-NEXT: v_writelane_b32 v1, s27, 7
+; GCN-NEXT: v_writelane_b32 v1, s0, 8
+; GCN-NEXT: v_writelane_b32 v1, s1, 9
; GCN-NEXT: s_cbranch_scc1 BB1_2
; GCN-NEXT: ; %bb.1: ; %bb0
-; GCN-NEXT: v_readlane_b32 s0, v0, 1
-; GCN-NEXT: v_readlane_b32 s1, v0, 2
-; GCN-NEXT: v_readlane_b32 s2, v0, 3
-; GCN-NEXT: v_readlane_b32 s3, v0, 4
-; GCN-NEXT: v_readlane_b32 s4, v0, 5
-; GCN-NEXT: v_readlane_b32 s5, v0, 6
-; GCN-NEXT: v_readlane_b32 s6, v0, 7
-; GCN-NEXT: v_readlane_b32 s7, v0, 8
-; GCN-NEXT: v_readlane_b32 s8, v0, 9
-; GCN-NEXT: v_readlane_b32 s9, v0, 10
-; GCN-NEXT: v_readlane_b32 s10, v0, 11
-; GCN-NEXT: v_readlane_b32 s11, v0, 12
-; GCN-NEXT: v_readlane_b32 s12, v0, 13
-; GCN-NEXT: v_readlane_b32 s13, v0, 14
-; GCN-NEXT: v_readlane_b32 s14, v0, 15
-; GCN-NEXT: v_readlane_b32 s15, v0, 16
+; GCN-NEXT: v_readlane_b32 s0, v0, 0
+; GCN-NEXT: v_readlane_b32 s1, v0, 1
+; GCN-NEXT: v_readlane_b32 s2, v0, 2
+; GCN-NEXT: v_readlane_b32 s3, v0, 3
+; GCN-NEXT: v_readlane_b32 s4, v0, 4
+; GCN-NEXT: v_readlane_b32 s5, v0, 5
+; GCN-NEXT: v_readlane_b32 s6, v0, 6
+; GCN-NEXT: v_readlane_b32 s7, v0, 7
+; GCN-NEXT: v_readlane_b32 s8, v0, 8
+; GCN-NEXT: v_readlane_b32 s9, v0, 9
+; GCN-NEXT: v_readlane_b32 s10, v0, 10
+; GCN-NEXT: v_readlane_b32 s11, v0, 11
+; GCN-NEXT: v_readlane_b32 s12, v0, 12
+; GCN-NEXT: v_readlane_b32 s13, v0, 13
+; GCN-NEXT: v_readlane_b32 s14, v0, 14
+; GCN-NEXT: v_readlane_b32 s15, v0, 15
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:15]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s0, v0, 33
-; GCN-NEXT: v_readlane_b32 s1, v0, 34
-; GCN-NEXT: v_readlane_b32 s2, v0, 35
-; GCN-NEXT: v_readlane_b32 s3, v0, 36
-; GCN-NEXT: v_readlane_b32 s4, v0, 37
-; GCN-NEXT: v_readlane_b32 s5, v0, 38
-; GCN-NEXT: v_readlane_b32 s6, v0, 39
-; GCN-NEXT: v_readlane_b32 s7, v0, 40
-; GCN-NEXT: v_readlane_b32 s8, v0, 41
-; GCN-NEXT: v_readlane_b32 s9, v0, 42
-; GCN-NEXT: v_readlane_b32 s10, v0, 43
-; GCN-NEXT: v_readlane_b32 s11, v0, 44
-; GCN-NEXT: v_readlane_b32 s12, v0, 45
-; GCN-NEXT: v_readlane_b32 s13, v0, 46
-; GCN-NEXT: v_readlane_b32 s14, v0, 47
-; GCN-NEXT: v_readlane_b32 s15, v0, 48
+; GCN-NEXT: v_readlane_b32 s0, v0, 32
+; GCN-NEXT: v_readlane_b32 s1, v0, 33
+; GCN-NEXT: v_readlane_b32 s2, v0, 34
+; GCN-NEXT: v_readlane_b32 s3, v0, 35
+; GCN-NEXT: v_readlane_b32 s4, v0, 36
+; GCN-NEXT: v_readlane_b32 s5, v0, 37
+; GCN-NEXT: v_readlane_b32 s6, v0, 38
+; GCN-NEXT: v_readlane_b32 s7, v0, 39
+; GCN-NEXT: v_readlane_b32 s8, v0, 40
+; GCN-NEXT: v_readlane_b32 s9, v0, 41
+; GCN-NEXT: v_readlane_b32 s10, v0, 42
+; GCN-NEXT: v_readlane_b32 s11, v0, 43
+; GCN-NEXT: v_readlane_b32 s12, v0, 44
+; GCN-NEXT: v_readlane_b32 s13, v0, 45
+; GCN-NEXT: v_readlane_b32 s14, v0, 46
+; GCN-NEXT: v_readlane_b32 s15, v0, 47
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:15]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s0, v0, 17
-; GCN-NEXT: v_readlane_b32 s1, v0, 18
-; GCN-NEXT: v_readlane_b32 s2, v0, 19
-; GCN-NEXT: v_readlane_b32 s3, v0, 20
-; GCN-NEXT: v_readlane_b32 s4, v0, 21
-; GCN-NEXT: v_readlane_b32 s5, v0, 22
-; GCN-NEXT: v_readlane_b32 s6, v0, 23
-; GCN-NEXT: v_readlane_b32 s7, v0, 24
-; GCN-NEXT: v_readlane_b32 s8, v0, 25
-; GCN-NEXT: v_readlane_b32 s9, v0, 26
-; GCN-NEXT: v_readlane_b32 s10, v0, 27
-; GCN-NEXT: v_readlane_b32 s11, v0, 28
-; GCN-NEXT: v_readlane_b32 s12, v0, 29
-; GCN-NEXT: v_readlane_b32 s13, v0, 30
-; GCN-NEXT: v_readlane_b32 s14, v0, 31
-; GCN-NEXT: v_readlane_b32 s15, v0, 32
+; GCN-NEXT: v_readlane_b32 s0, v0, 16
+; GCN-NEXT: v_readlane_b32 s1, v0, 17
+; GCN-NEXT: v_readlane_b32 s2, v0, 18
+; GCN-NEXT: v_readlane_b32 s3, v0, 19
+; GCN-NEXT: v_readlane_b32 s4, v0, 20
+; GCN-NEXT: v_readlane_b32 s5, v0, 21
+; GCN-NEXT: v_readlane_b32 s6, v0, 22
+; GCN-NEXT: v_readlane_b32 s7, v0, 23
+; GCN-NEXT: v_readlane_b32 s8, v0, 24
+; GCN-NEXT: v_readlane_b32 s9, v0, 25
+; GCN-NEXT: v_readlane_b32 s10, v0, 26
+; GCN-NEXT: v_readlane_b32 s11, v0, 27
+; GCN-NEXT: v_readlane_b32 s12, v0, 28
+; GCN-NEXT: v_readlane_b32 s13, v0, 29
+; GCN-NEXT: v_readlane_b32 s14, v0, 30
+; GCN-NEXT: v_readlane_b32 s15, v0, 31
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:15]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s0, v1, 1
-; GCN-NEXT: v_readlane_b32 s1, v1, 2
-; GCN-NEXT: v_readlane_b32 s2, v1, 3
-; GCN-NEXT: v_readlane_b32 s3, v1, 4
-; GCN-NEXT: v_readlane_b32 s4, v1, 5
-; GCN-NEXT: v_readlane_b32 s5, v1, 6
-; GCN-NEXT: v_readlane_b32 s6, v1, 7
-; GCN-NEXT: v_readlane_b32 s7, v1, 8
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; use s[0:7]
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s0, v1, 9
-; GCN-NEXT: v_readlane_b32 s1, v1, 10
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; use s[0:1]
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s0, v0, 49
-; GCN-NEXT: v_readlane_b32 s1, v0, 50
-; GCN-NEXT: v_readlane_b32 s2, v0, 51
-; GCN-NEXT: v_readlane_b32 s3, v0, 52
-; GCN-NEXT: v_readlane_b32 s4, v0, 53
-; GCN-NEXT: v_readlane_b32 s5, v0, 54
-; GCN-NEXT: v_readlane_b32 s6, v0, 55
-; GCN-NEXT: v_readlane_b32 s7, v0, 56
-; GCN-NEXT: v_readlane_b32 s8, v0, 57
-; GCN-NEXT: v_readlane_b32 s9, v0, 58
-; GCN-NEXT: v_readlane_b32 s10, v0, 59
-; GCN-NEXT: v_readlane_b32 s11, v0, 60
-; GCN-NEXT: v_readlane_b32 s12, v0, 61
-; GCN-NEXT: v_readlane_b32 s13, v0, 62
-; GCN-NEXT: v_readlane_b32 s14, v0, 63
-; GCN-NEXT: v_readlane_b32 s15, v1, 0
+; GCN-NEXT: v_readlane_b32 s16, v1, 0
+; GCN-NEXT: v_readlane_b32 s17, v1, 1
+; GCN-NEXT: v_readlane_b32 s18, v1, 2
+; GCN-NEXT: v_readlane_b32 s19, v1, 3
+; GCN-NEXT: v_readlane_b32 s20, v1, 4
+; GCN-NEXT: v_readlane_b32 s21, v1, 5
+; GCN-NEXT: v_readlane_b32 s22, v1, 6
+; GCN-NEXT: v_readlane_b32 s23, v1, 7
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; use s[16:23]
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_readlane_b32 s24, v1, 8
+; GCN-NEXT: v_readlane_b32 s25, v1, 9
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; use s[24:25]
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_readlane_b32 s0, v0, 48
+; GCN-NEXT: v_readlane_b32 s1, v0, 49
+; GCN-NEXT: v_readlane_b32 s2, v0, 50
+; GCN-NEXT: v_readlane_b32 s3, v0, 51
+; GCN-NEXT: v_readlane_b32 s4, v0, 52
+; GCN-NEXT: v_readlane_b32 s5, v0, 53
+; GCN-NEXT: v_readlane_b32 s6, v0, 54
+; GCN-NEXT: v_readlane_b32 s7, v0, 55
+; GCN-NEXT: v_readlane_b32 s8, v0, 56
+; GCN-NEXT: v_readlane_b32 s9, v0, 57
+; GCN-NEXT: v_readlane_b32 s10, v0, 58
+; GCN-NEXT: v_readlane_b32 s11, v0, 59
+; GCN-NEXT: v_readlane_b32 s12, v0, 60
+; GCN-NEXT: v_readlane_b32 s13, v0, 61
+; GCN-NEXT: v_readlane_b32 s14, v0, 62
+; GCN-NEXT: v_readlane_b32 s15, v0, 63
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:15]
; GCN-NEXT: ;;#ASMEND
@@ -667,13 +663,13 @@ ret:
define amdgpu_kernel void @no_vgprs_last_sgpr_spill(i32 addrspace(1)* %out, i32 %in) #1 {
; GCN-LABEL: no_vgprs_last_sgpr_spill:
; GCN: ; %bb.0:
-; GCN-NEXT: s_mov_b32 s56, SCRATCH_RSRC_DWORD0
-; GCN-NEXT: s_mov_b32 s57, SCRATCH_RSRC_DWORD1
-; GCN-NEXT: s_mov_b32 s58, -1
-; GCN-NEXT: s_mov_b32 s59, 0xe8f000
-; GCN-NEXT: s_add_u32 s56, s56, s3
-; GCN-NEXT: s_addc_u32 s57, s57, 0
-; GCN-NEXT: s_load_dword s0, s[0:1], 0xb
+; GCN-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0
+; GCN-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1
+; GCN-NEXT: s_mov_b32 s22, -1
+; GCN-NEXT: s_mov_b32 s23, 0xe8f000
+; GCN-NEXT: s_add_u32 s20, s20, s3
+; GCN-NEXT: s_addc_u32 s21, s21, 0
+; GCN-NEXT: s_load_dword s2, s[0:1], 0xb
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ;;#ASMEND
; GCN-NEXT: ;;#ASMSTART
@@ -692,179 +688,177 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill(i32 addrspace(1)* %out, i32
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; def s[36:51]
; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_writelane_b32 v31, s4, 0
+; GCN-NEXT: v_writelane_b32 v31, s5, 1
+; GCN-NEXT: v_writelane_b32 v31, s6, 2
+; GCN-NEXT: v_writelane_b32 v31, s7, 3
+; GCN-NEXT: v_writelane_b32 v31, s8, 4
+; GCN-NEXT: v_writelane_b32 v31, s9, 5
+; GCN-NEXT: v_writelane_b32 v31, s10, 6
+; GCN-NEXT: v_writelane_b32 v31, s11, 7
+; GCN-NEXT: v_writelane_b32 v31, s12, 8
+; GCN-NEXT: v_writelane_b32 v31, s13, 9
+; GCN-NEXT: v_writelane_b32 v31, s14, 10
+; GCN-NEXT: v_writelane_b32 v31, s15, 11
+; GCN-NEXT: v_writelane_b32 v31, s16, 12
+; GCN-NEXT: v_writelane_b32 v31, s17, 13
+; GCN-NEXT: v_writelane_b32 v31, s18, 14
+; GCN-NEXT: v_writelane_b32 v31, s19, 15
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; def s[4:19]
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_writelane_b32 v31, s4, 16
+; GCN-NEXT: v_writelane_b32 v31, s5, 17
+; GCN-NEXT: v_writelane_b32 v31, s6, 18
+; GCN-NEXT: v_writelane_b32 v31, s7, 19
+; GCN-NEXT: v_writelane_b32 v31, s8, 20
+; GCN-NEXT: v_writelane_b32 v31, s9, 21
+; GCN-NEXT: v_writelane_b32 v31, s10, 22
+; GCN-NEXT: v_writelane_b32 v31, s11, 23
+; GCN-NEXT: v_writelane_b32 v31, s12, 24
+; GCN-NEXT: v_writelane_b32 v31, s13, 25
+; GCN-NEXT: v_writelane_b32 v31, s14, 26
+; GCN-NEXT: v_writelane_b32 v31, s15, 27
+; GCN-NEXT: v_writelane_b32 v31, s16, 28
+; GCN-NEXT: v_writelane_b32 v31, s17, 29
+; GCN-NEXT: v_writelane_b32 v31, s18, 30
+; GCN-NEXT: v_writelane_b32 v31, s19, 31
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; def s[4:19]
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; def s[0:1]
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: s_mov_b32 s3, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: v_writelane_b32 v31, s0, 0
-; GCN-NEXT: v_writelane_b32 v31, s4, 1
-; GCN-NEXT: v_writelane_b32 v31, s5, 2
-; GCN-NEXT: v_writelane_b32 v31, s6, 3
-; GCN-NEXT: v_writelane_b32 v31, s7, 4
-; GCN-NEXT: v_writelane_b32 v31, s8, 5
-; GCN-NEXT: v_writelane_b32 v31, s9, 6
-; GCN-NEXT: v_writelane_b32 v31, s10, 7
-; GCN-NEXT: v_writelane_b32 v31, s11, 8
-; GCN-NEXT: v_writelane_b32 v31, s12, 9
-; GCN-NEXT: v_writelane_b32 v31, s13, 10
-; GCN-NEXT: v_writelane_b32 v31, s14, 11
-; GCN-NEXT: v_writelane_b32 v31, s15, 12
-; GCN-NEXT: v_writelane_b32 v31, s16, 13
-; GCN-NEXT: v_writelane_b32 v31, s17, 14
-; GCN-NEXT: v_writelane_b32 v31, s18, 15
-; GCN-NEXT: v_writelane_b32 v31, s19, 16
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; def s[0:15]
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; def s[16:31]
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; def s[34:35]
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: s_mov_b32 s33, 0
-; GCN-NEXT: v_readlane_b32 s52, v31, 0
-; GCN-NEXT: s_cmp_lg_u32 s52, s33
-; GCN-NEXT: v_writelane_b32 v31, s36, 17
-; GCN-NEXT: v_writelane_b32 v31, s37, 18
-; GCN-NEXT: v_writelane_b32 v31, s38, 19
-; GCN-NEXT: v_writelane_b32 v31, s39, 20
-; GCN-NEXT: v_writelane_b32 v31, s40, 21
-; GCN-NEXT: v_writelane_b32 v31, s41, 22
-; GCN-NEXT: v_writelane_b32 v31, s42, 23
-; GCN-NEXT: v_writelane_b32 v31, s43, 24
-; GCN-NEXT: v_writelane_b32 v31, s44, 25
-; GCN-NEXT: v_writelane_b32 v31, s45, 26
-; GCN-NEXT: v_writelane_b32 v31, s46, 27
-; GCN-NEXT: v_writelane_b32 v31, s47, 28
-; GCN-NEXT: v_writelane_b32 v31, s48, 29
-; GCN-NEXT: v_writelane_b32 v31, s49, 30
-; GCN-NEXT: v_writelane_b32 v31, s50, 31
-; GCN-NEXT: v_writelane_b32 v31, s51, 32
-; GCN-NEXT: v_writelane_b32 v31, s0, 33
-; GCN-NEXT: v_writelane_b32 v31, s1, 34
-; GCN-NEXT: v_writelane_b32 v31, s2, 35
-; GCN-NEXT: v_writelane_b32 v31, s3, 36
-; GCN-NEXT: v_writelane_b32 v31, s4, 37
-; GCN-NEXT: v_writelane_b32 v31, s5, 38
-; GCN-NEXT: v_writelane_b32 v31, s6, 39
-; GCN-NEXT: v_writelane_b32 v31, s7, 40
-; GCN-NEXT: v_writelane_b32 v31, s8, 41
-; GCN-NEXT: v_writelane_b32 v31, s9, 42
-; GCN-NEXT: v_writelane_b32 v31, s10, 43
-; GCN-NEXT: v_writelane_b32 v31, s11, 44
-; GCN-NEXT: v_writelane_b32 v31, s12, 45
-; GCN-NEXT: v_writelane_b32 v31, s13, 46
-; GCN-NEXT: v_writelane_b32 v31, s14, 47
-; GCN-NEXT: v_writelane_b32 v31, s15, 48
-; GCN-NEXT: buffer_store_dword v0, off, s[56:59], 0
-; GCN-NEXT: v_writelane_b32 v0, s16, 0
-; GCN-NEXT: v_writelane_b32 v0, s17, 1
-; GCN-NEXT: v_writelane_b32 v0, s18, 2
-; GCN-NEXT: v_writelane_b32 v0, s19, 3
-; GCN-NEXT: v_writelane_b32 v0, s20, 4
-; GCN-NEXT: v_writelane_b32 v0, s21, 5
-; GCN-NEXT: v_writelane_b32 v0, s22, 6
-; GCN-NEXT: v_writelane_b32 v0, s23, 7
-; GCN-NEXT: v_writelane_b32 v0, s24, 8
-; GCN-NEXT: v_writelane_b32 v0, s25, 9
-; GCN-NEXT: v_writelane_b32 v0, s26, 10
-; GCN-NEXT: v_writelane_b32 v0, s27, 11
-; GCN-NEXT: v_writelane_b32 v0, s28, 12
-; GCN-NEXT: v_writelane_b32 v0, s29, 13
-; GCN-NEXT: v_writelane_b32 v0, s30, 14
-; GCN-NEXT: v_writelane_b32 v0, s31, 15
-; GCN-NEXT: s_mov_b64 s[16:17], exec
-; GCN-NEXT: s_mov_b64 exec, 0xffff
-; GCN-NEXT: buffer_store_dword v0, off, s[56:59], 0 offset:4 ; 4-byte Folded Spill
-; GCN-NEXT: s_mov_b64 exec, s[16:17]
-; GCN-NEXT: v_writelane_b32 v31, s34, 49
-; GCN-NEXT: v_writelane_b32 v31, s35, 50
-; GCN-NEXT: buffer_load_dword v0, off, s[56:59], 0
+; GCN-NEXT: s_cmp_lg_u32 s2, s3
+; GCN-NEXT: v_writelane_b32 v31, s36, 32
+; GCN-NEXT: v_writelane_b32 v31, s37, 33
+; GCN-NEXT: v_writelane_b32 v31, s38, 34
+; GCN-NEXT: v_writelane_b32 v31, s39, 35
+; GCN-NEXT: v_writelane_b32 v31, s40, 36
+; GCN-NEXT: v_writelane_b32 v31, s41, 37
+; GCN-NEXT: v_writelane_b32 v31, s42, 38
+; GCN-NEXT: v_writelane_b32 v31, s43, 39
+; GCN-NEXT: v_writelane_b32 v31, s44, 40
+; GCN-NEXT: v_writelane_b32 v31, s45, 41
+; GCN-NEXT: v_writelane_b32 v31, s46, 42
+; GCN-NEXT: v_writelane_b32 v31, s47, 43
+; GCN-NEXT: v_writelane_b32 v31, s48, 44
+; GCN-NEXT: v_writelane_b32 v31, s49, 45
+; GCN-NEXT: v_writelane_b32 v31, s50, 46
+; GCN-NEXT: v_writelane_b32 v31, s51, 47
+; GCN-NEXT: v_writelane_b32 v31, s4, 48
+; GCN-NEXT: v_writelane_b32 v31, s5, 49
+; GCN-NEXT: v_writelane_b32 v31, s6, 50
+; GCN-NEXT: v_writelane_b32 v31, s7, 51
+; GCN-NEXT: v_writelane_b32 v31, s8, 52
+; GCN-NEXT: v_writelane_b32 v31, s9, 53
+; GCN-NEXT: v_writelane_b32 v31, s10, 54
+; GCN-NEXT: v_writelane_b32 v31, s11, 55
+; GCN-NEXT: v_writelane_b32 v31, s12, 56
+; GCN-NEXT: v_writelane_b32 v31, s13, 57
+; GCN-NEXT: v_writelane_b32 v31, s14, 58
+; GCN-NEXT: v_writelane_b32 v31, s15, 59
+; GCN-NEXT: v_writelane_b32 v31, s16, 60
+; GCN-NEXT: v_writelane_b32 v31, s17, 61
+; GCN-NEXT: v_writelane_b32 v31, s18, 62
+; GCN-NEXT: v_writelane_b32 v31, s19, 63
+; GCN-NEXT: buffer_store_dword v0, off, s[20:23], 0
+; GCN-NEXT: v_writelane_b32 v0, s0, 0
+; GCN-NEXT: v_writelane_b32 v0, s1, 1
+; GCN-NEXT: s_mov_b64 s[0:1], exec
+; GCN-NEXT: s_mov_b64 exec, 3
+; GCN-NEXT: buffer_store_dword v0, off, s[20:23], 0 offset:4 ; 4-byte Folded Spill
+; GCN-NEXT: s_mov_b64 exec, s[0:1]
+; GCN-NEXT: buffer_load_dword v0, off, s[20:23], 0
; GCN-NEXT: s_cbranch_scc1 BB2_2
; GCN-NEXT: ; %bb.1: ; %bb0
-; GCN-NEXT: v_readlane_b32 s0, v31, 1
-; GCN-NEXT: v_readlane_b32 s1, v31, 2
-; GCN-NEXT: v_readlane_b32 s2, v31, 3
-; GCN-NEXT: v_readlane_b32 s3, v31, 4
-; GCN-NEXT: v_readlane_b32 s4, v31, 5
-; GCN-NEXT: v_readlane_b32 s5, v31, 6
-; GCN-NEXT: v_readlane_b32 s6, v31, 7
-; GCN-NEXT: v_readlane_b32 s7, v31, 8
-; GCN-NEXT: v_readlane_b32 s8, v31, 9
-; GCN-NEXT: v_readlane_b32 s9, v31, 10
-; GCN-NEXT: v_readlane_b32 s10, v31, 11
-; GCN-NEXT: v_readlane_b32 s11, v31, 12
-; GCN-NEXT: v_readlane_b32 s12, v31, 13
-; GCN-NEXT: v_readlane_b32 s13, v31, 14
-; GCN-NEXT: v_readlane_b32 s14, v31, 15
-; GCN-NEXT: v_readlane_b32 s15, v31, 16
+; GCN-NEXT: v_readlane_b32 s0, v31, 0
+; GCN-NEXT: v_readlane_b32 s1, v31, 1
+; GCN-NEXT: v_readlane_b32 s2, v31, 2
+; GCN-NEXT: v_readlane_b32 s3, v31, 3
+; GCN-NEXT: v_readlane_b32 s4, v31, 4
+; GCN-NEXT: v_readlane_b32 s5, v31, 5
+; GCN-NEXT: v_readlane_b32 s6, v31, 6
+; GCN-NEXT: v_readlane_b32 s7, v31, 7
+; GCN-NEXT: v_readlane_b32 s8, v31, 8
+; GCN-NEXT: v_readlane_b32 s9, v31, 9
+; GCN-NEXT: v_readlane_b32 s10, v31, 10
+; GCN-NEXT: v_readlane_b32 s11, v31, 11
+; GCN-NEXT: v_readlane_b32 s12, v31, 12
+; GCN-NEXT: v_readlane_b32 s13, v31, 13
+; GCN-NEXT: v_readlane_b32 s14, v31, 14
+; GCN-NEXT: v_readlane_b32 s15, v31, 15
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:15]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s0, v31, 17
-; GCN-NEXT: v_readlane_b32 s1, v31, 18
-; GCN-NEXT: v_readlane_b32 s2, v31, 19
-; GCN-NEXT: v_readlane_b32 s3, v31, 20
-; GCN-NEXT: v_readlane_b32 s4, v31, 21
-; GCN-NEXT: v_readlane_b32 s5, v31, 22
-; GCN-NEXT: v_readlane_b32 s6, v31, 23
-; GCN-NEXT: v_readlane_b32 s7, v31, 24
-; GCN-NEXT: v_readlane_b32 s8, v31, 25
-; GCN-NEXT: v_readlane_b32 s9, v31, 26
-; GCN-NEXT: v_readlane_b32 s10, v31, 27
-; GCN-NEXT: v_readlane_b32 s11, v31, 28
-; GCN-NEXT: v_readlane_b32 s12, v31, 29
-; GCN-NEXT: v_readlane_b32 s13, v31, 30
-; GCN-NEXT: v_readlane_b32 s14, v31, 31
-; GCN-NEXT: v_readlane_b32 s15, v31, 32
+; GCN-NEXT: v_readlane_b32 s0, v31, 32
+; GCN-NEXT: v_readlane_b32 s1, v31, 33
+; GCN-NEXT: v_readlane_b32 s2, v31, 34
+; GCN-NEXT: v_readlane_b32 s3, v31, 35
+; GCN-NEXT: v_readlane_b32 s4, v31, 36
+; GCN-NEXT: v_readlane_b32 s5, v31, 37
+; GCN-NEXT: v_readlane_b32 s6, v31, 38
+; GCN-NEXT: v_readlane_b32 s7, v31, 39
+; GCN-NEXT: v_readlane_b32 s8, v31, 40
+; GCN-NEXT: v_readlane_b32 s9, v31, 41
+; GCN-NEXT: v_readlane_b32 s10, v31, 42
+; GCN-NEXT: v_readlane_b32 s11, v31, 43
+; GCN-NEXT: v_readlane_b32 s12, v31, 44
+; GCN-NEXT: v_readlane_b32 s13, v31, 45
+; GCN-NEXT: v_readlane_b32 s14, v31, 46
+; GCN-NEXT: v_readlane_b32 s15, v31, 47
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:15]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s0, v31, 33
-; GCN-NEXT: v_readlane_b32 s1, v31, 34
-; GCN-NEXT: v_readlane_b32 s2, v31, 35
-; GCN-NEXT: v_readlane_b32 s3, v31, 36
-; GCN-NEXT: v_readlane_b32 s4, v31, 37
-; GCN-NEXT: v_readlane_b32 s5, v31, 38
-; GCN-NEXT: v_readlane_b32 s6, v31, 39
-; GCN-NEXT: v_readlane_b32 s7, v31, 40
-; GCN-NEXT: v_readlane_b32 s8, v31, 41
-; GCN-NEXT: v_readlane_b32 s9, v31, 42
-; GCN-NEXT: v_readlane_b32 s10, v31, 43
-; GCN-NEXT: v_readlane_b32 s11, v31, 44
-; GCN-NEXT: v_readlane_b32 s12, v31, 45
-; GCN-NEXT: v_readlane_b32 s13, v31, 46
-; GCN-NEXT: v_readlane_b32 s14, v31, 47
-; GCN-NEXT: v_readlane_b32 s15, v31, 48
+; GCN-NEXT: v_readlane_b32 s0, v31, 16
+; GCN-NEXT: v_readlane_b32 s1, v31, 17
+; GCN-NEXT: v_readlane_b32 s2, v31, 18
+; GCN-NEXT: v_readlane_b32 s3, v31, 19
+; GCN-NEXT: v_readlane_b32 s4, v31, 20
+; GCN-NEXT: v_readlane_b32 s5, v31, 21
+; GCN-NEXT: v_readlane_b32 s6, v31, 22
+; GCN-NEXT: v_readlane_b32 s7, v31, 23
+; GCN-NEXT: v_readlane_b32 s8, v31, 24
+; GCN-NEXT: v_readlane_b32 s9, v31, 25
+; GCN-NEXT: v_readlane_b32 s10, v31, 26
+; GCN-NEXT: v_readlane_b32 s11, v31, 27
+; GCN-NEXT: v_readlane_b32 s12, v31, 28
+; GCN-NEXT: v_readlane_b32 s13, v31, 29
+; GCN-NEXT: v_readlane_b32 s14, v31, 30
+; GCN-NEXT: v_readlane_b32 s15, v31, 31
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:15]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: s_mov_b64 s[0:1], exec
-; GCN-NEXT: s_mov_b64 exec, 0xffff
-; GCN-NEXT: buffer_load_dword v0, off, s[56:59], 0 offset:4 ; 4-byte Folded Reload
-; GCN-NEXT: s_mov_b64 exec, s[0:1]
-; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_readlane_b32 s0, v0, 0
-; GCN-NEXT: v_readlane_b32 s1, v0, 1
-; GCN-NEXT: v_readlane_b32 s2, v0, 2
-; GCN-NEXT: v_readlane_b32 s3, v0, 3
-; GCN-NEXT: v_readlane_b32 s4, v0, 4
-; GCN-NEXT: v_readlane_b32 s5, v0, 5
-; GCN-NEXT: v_readlane_b32 s6, v0, 6
-; GCN-NEXT: v_readlane_b32 s7, v0, 7
-; GCN-NEXT: v_readlane_b32 s8, v0, 8
-; GCN-NEXT: v_readlane_b32 s9, v0, 9
-; GCN-NEXT: v_readlane_b32 s10, v0, 10
-; GCN-NEXT: v_readlane_b32 s11, v0, 11
-; GCN-NEXT: v_readlane_b32 s12, v0, 12
-; GCN-NEXT: v_readlane_b32 s13, v0, 13
-; GCN-NEXT: v_readlane_b32 s14, v0, 14
-; GCN-NEXT: v_readlane_b32 s15, v0, 15
+; GCN-NEXT: v_readlane_b32 s0, v31, 48
+; GCN-NEXT: v_readlane_b32 s1, v31, 49
+; GCN-NEXT: v_readlane_b32 s2, v31, 50
+; GCN-NEXT: v_readlane_b32 s3, v31, 51
+; GCN-NEXT: v_readlane_b32 s4, v31, 52
+; GCN-NEXT: v_readlane_b32 s5, v31, 53
+; GCN-NEXT: v_readlane_b32 s6, v31, 54
+; GCN-NEXT: v_readlane_b32 s7, v31, 55
+; GCN-NEXT: v_readlane_b32 s8, v31, 56
+; GCN-NEXT: v_readlane_b32 s9, v31, 57
+; GCN-NEXT: v_readlane_b32 s10, v31, 58
+; GCN-NEXT: v_readlane_b32 s11, v31, 59
+; GCN-NEXT: v_readlane_b32 s12, v31, 60
+; GCN-NEXT: v_readlane_b32 s13, v31, 61
+; GCN-NEXT: v_readlane_b32 s14, v31, 62
+; GCN-NEXT: v_readlane_b32 s15, v31, 63
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:15]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s0, v31, 49
-; GCN-NEXT: v_readlane_b32 s1, v31, 50
+; GCN-NEXT: s_mov_b64 s[16:17], exec
+; GCN-NEXT: s_mov_b64 exec, 3
+; GCN-NEXT: buffer_load_dword v0, off, s[20:23], 0 offset:4 ; 4-byte Folded Reload
+; GCN-NEXT: s_mov_b64 exec, s[16:17]
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_readlane_b32 s16, v0, 0
+; GCN-NEXT: v_readlane_b32 s17, v0, 1
; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; use s[0:1]
+; GCN-NEXT: ; use s[16:17]
; GCN-NEXT: ;;#ASMEND
; GCN-NEXT: BB2_2: ; %ret
; GCN-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/spill-m0.ll b/llvm/test/CodeGen/AMDGPU/spill-m0.ll
index 9b629a5f911..a03318ead71 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-m0.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-m0.ll
@@ -77,101 +77,6 @@ endif: ; preds = %else, %if
ret void
}
-; Force save and restore of m0 during SMEM spill
-; GCN-LABEL: {{^}}m0_unavailable_spill:
-
-; GCN: ; def m0, 1
-
-; GCN: s_mov_b32 m0, s0
-; GCN: v_interp_mov_f32
-
-; GCN: ; clobber m0
-
-; TOSMEM: s_mov_b32 s2, m0
-; TOSMEM: s_add_u32 m0, s3, 0x100
-; TOSMEM-NEXT: s_buffer_store_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, m0 ; 8-byte Folded Spill
-; TOSMEM: s_mov_b32 m0, s2
-
-; TOSMEM: s_mov_b64 exec,
-; TOSMEM: s_cbranch_execz
-; TOSMEM: s_branch
-
-; TOSMEM: BB{{[0-9]+_[0-9]+}}:
-; TOSMEM: s_add_u32 m0, s3, 0x100
-; TOSMEM-NEXT: s_buffer_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, m0 ; 8-byte Folded Reload
-
-; GCN-NOT: v_readlane_b32 m0
-; GCN-NOT: s_buffer_store_dword m0
-; GCN-NOT: s_buffer_load_dword m0
-define amdgpu_kernel void @m0_unavailable_spill(i32 %m0.arg) #0 {
-main_body:
- %m0 = call i32 asm sideeffect "; def $0, 1", "={m0}"() #0
- %tmp = call float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %m0.arg)
- call void asm sideeffect "; clobber $0", "~{m0}"() #0
- %cmp = fcmp ueq float 0.000000e+00, %tmp
- br i1 %cmp, label %if, label %else
-
-if: ; preds = %main_body
- store volatile i32 8, i32 addrspace(1)* undef
- br label %endif
-
-else: ; preds = %main_body
- store volatile i32 11, i32 addrspace(1)* undef
- br label %endif
-
-endif:
- ret void
-}
-
-; GCN-LABEL: {{^}}restore_m0_lds:
-; TOSMEM: s_load_dwordx2 [[REG:s\[[0-9]+:[0-9]+\]]]
-; TOSMEM: s_cmp_eq_u32
-; FIXME: RegScavenger::isRegUsed() always returns true if m0 is reserved, so we have to save and restore it
-; FIXME-TOSMEM-NOT: m0
-; TOSMEM: s_add_u32 m0, s3, 0x100
-; TOSMEM: s_buffer_store_dword s{{[0-9]+}}, s[88:91], m0 ; 4-byte Folded Spill
-; FIXME-TOSMEM-NOT: m0
-; TOSMEM: s_add_u32 m0, s3, 0x200
-; TOSMEM: s_buffer_store_dwordx2 [[REG]], s[88:91], m0 ; 8-byte Folded Spill
-; FIXME-TOSMEM-NOT: m0
-; TOSMEM: s_cbranch_scc1
-
-; TOSMEM: s_mov_b32 m0, -1
-
-; TOSMEM: s_mov_b32 s2, m0
-; TOSMEM: s_add_u32 m0, s3, 0x200
-; TOSMEM: s_buffer_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[88:91], m0 ; 8-byte Folded Reload
-; TOSMEM: s_mov_b32 m0, s2
-; TOSMEM: s_waitcnt lgkmcnt(0)
-
-; TOSMEM: ds_write_b64
-
-; FIXME-TOSMEM-NOT: m0
-; TOSMEM: s_add_u32 m0, s3, 0x100
-; TOSMEM: s_buffer_load_dword s2, s[88:91], m0 ; 4-byte Folded Reload
-; FIXME-TOSMEM-NOT: m0
-; TOSMEM: s_waitcnt lgkmcnt(0)
-; TOSMEM-NOT: m0
-; TOSMEM: s_mov_b32 m0, s2
-; TOSMEM: ; use m0
-
-; TOSMEM: s_dcache_wb
-; TOSMEM: s_endpgm
-define amdgpu_kernel void @restore_m0_lds(i32 %arg) {
- %m0 = call i32 asm sideeffect "s_mov_b32 m0, 0", "={m0}"() #0
- %sval = load volatile i64, i64 addrspace(4)* undef
- %cmp = icmp eq i32 %arg, 0
- br i1 %cmp, label %ret, label %bb
-
-bb:
- store volatile i64 %sval, i64 addrspace(3)* undef
- call void asm sideeffect "; use $0", "{m0}"(i32 %m0) #0
- br label %ret
-
-ret:
- ret void
-}
-
declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #1
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
diff --git a/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll b/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll
index 1a48e76a241..e4beac77e1b 100644
--- a/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll
+++ b/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll
@@ -94,10 +94,10 @@ define i32 @called(i32 %a) noinline {
; GFX9-LABEL: {{^}}call:
define amdgpu_kernel void @call(<4 x i32> inreg %tmp14, i32 inreg %arg) {
-; GFX9-O0: v_mov_b32_e32 v0, s0
+; GFX9-O0: v_mov_b32_e32 v0, s2
; GFX9-O3: v_mov_b32_e32 v2, s0
; GFX9-NEXT: s_not_b64 exec, exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v0, s1
+; GFX9-O0-NEXT: v_mov_b32_e32 v0, s3
; GFX9-O3-NEXT: v_mov_b32_e32 v2, 0
; GFX9-NEXT: s_not_b64 exec, exec
%tmp107 = tail call i32 @llvm.amdgcn.set.inactive.i32(i32 %arg, i32 0)
@@ -142,8 +142,8 @@ define amdgpu_kernel void @call_i64(<4 x i32> inreg %tmp14, i64 inreg %arg) {
; GFX9-O0: buffer_store_dword v1
; GFX9: s_swappc_b64
%tmp134 = call i64 @called_i64(i64 %tmp107)
-; GFX9-O0: buffer_load_dword v4
-; GFX9-O0: buffer_load_dword v5
+; GFX9-O0: buffer_load_dword v6
+; GFX9-O0: buffer_load_dword v7
%tmp136 = add i64 %tmp134, %tmp107
%tmp137 = tail call i64 @llvm.amdgcn.wwm.i64(i64 %tmp136)
%tmp138 = bitcast i64 %tmp137 to <2 x i32>
diff --git a/llvm/test/CodeGen/ARM/legalize-bitcast.ll b/llvm/test/CodeGen/ARM/legalize-bitcast.ll
index 529775df5fd..478ff985bf4 100644
--- a/llvm/test/CodeGen/ARM/legalize-bitcast.ll
+++ b/llvm/test/CodeGen/ARM/legalize-bitcast.ll
@@ -49,9 +49,9 @@ define i16 @int_to_vec(i80 %in) {
; CHECK-NEXT: vmov.32 d16[0], r0
; CHECK-NEXT: @ implicit-def: $q9
; CHECK-NEXT: vmov.f64 d18, d16
-; CHECK-NEXT: vrev32.16 q8, q9
-; CHECK-NEXT: @ kill: def $d16 killed $d16 killed $q8
-; CHECK-NEXT: vmov.u16 r0, d16[0]
+; CHECK-NEXT: vrev32.16 q9, q9
+; CHECK-NEXT: @ kill: def $d18 killed $d18 killed $q9
+; CHECK-NEXT: vmov.u16 r0, d18[0]
; CHECK-NEXT: bx lr
%vec = bitcast i80 %in to <5 x i16>
%e0 = extractelement <5 x i16> %vec, i32 0
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fptosi_and_fptoui.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fptosi_and_fptoui.ll
index a98c6eb9fd6..c63f24ea692 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fptosi_and_fptoui.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fptosi_and_fptoui.ll
@@ -235,15 +235,15 @@ define i32 @f64tou32(double %a) {
; FP32-NEXT: mfc1 $1, $f0
; FP32-NEXT: lui $2, 16864
; FP32-NEXT: ori $3, $zero, 0
-; FP32-NEXT: mtc1 $3, $f0
-; FP32-NEXT: mtc1 $2, $f1
-; FP32-NEXT: sub.d $f2, $f12, $f0
-; FP32-NEXT: trunc.w.d $f2, $f2
-; FP32-NEXT: mfc1 $2, $f2
+; FP32-NEXT: mtc1 $3, $f2
+; FP32-NEXT: mtc1 $2, $f3
+; FP32-NEXT: sub.d $f4, $f12, $f2
+; FP32-NEXT: trunc.w.d $f0, $f4
+; FP32-NEXT: mfc1 $2, $f0
; FP32-NEXT: lui $3, 32768
; FP32-NEXT: xor $2, $2, $3
; FP32-NEXT: addiu $3, $zero, 1
-; FP32-NEXT: c.ult.d $f12, $f0
+; FP32-NEXT: c.ult.d $f12, $f2
; FP32-NEXT: movf $3, $zero, $fcc0
; FP32-NEXT: andi $3, $3, 1
; FP32-NEXT: movn $2, $1, $3
@@ -256,15 +256,15 @@ define i32 @f64tou32(double %a) {
; FP64-NEXT: mfc1 $1, $f0
; FP64-NEXT: lui $2, 16864
; FP64-NEXT: ori $3, $zero, 0
-; FP64-NEXT: mtc1 $3, $f0
-; FP64-NEXT: mthc1 $2, $f0
-; FP64-NEXT: sub.d $f1, $f12, $f0
-; FP64-NEXT: trunc.w.d $f1, $f1
-; FP64-NEXT: mfc1 $2, $f1
+; FP64-NEXT: mtc1 $3, $f1
+; FP64-NEXT: mthc1 $2, $f1
+; FP64-NEXT: sub.d $f2, $f12, $f1
+; FP64-NEXT: trunc.w.d $f0, $f2
+; FP64-NEXT: mfc1 $2, $f0
; FP64-NEXT: lui $3, 32768
; FP64-NEXT: xor $2, $2, $3
; FP64-NEXT: addiu $3, $zero, 1
-; FP64-NEXT: c.ult.d $f12, $f0
+; FP64-NEXT: c.ult.d $f12, $f1
; FP64-NEXT: movf $3, $zero, $fcc0
; FP64-NEXT: andi $3, $3, 1
; FP64-NEXT: movn $2, $1, $3
@@ -282,15 +282,15 @@ define zeroext i16 @f64tou16(double %a) {
; FP32-NEXT: mfc1 $1, $f0
; FP32-NEXT: lui $2, 16864
; FP32-NEXT: ori $3, $zero, 0
-; FP32-NEXT: mtc1 $3, $f0
-; FP32-NEXT: mtc1 $2, $f1
-; FP32-NEXT: sub.d $f2, $f12, $f0
-; FP32-NEXT: trunc.w.d $f2, $f2
-; FP32-NEXT: mfc1 $2, $f2
+; FP32-NEXT: mtc1 $3, $f2
+; FP32-NEXT: mtc1 $2, $f3
+; FP32-NEXT: sub.d $f4, $f12, $f2
+; FP32-NEXT: trunc.w.d $f0, $f4
+; FP32-NEXT: mfc1 $2, $f0
; FP32-NEXT: lui $3, 32768
; FP32-NEXT: xor $2, $2, $3
; FP32-NEXT: addiu $3, $zero, 1
-; FP32-NEXT: c.ult.d $f12, $f0
+; FP32-NEXT: c.ult.d $f12, $f2
; FP32-NEXT: movf $3, $zero, $fcc0
; FP32-NEXT: andi $3, $3, 1
; FP32-NEXT: movn $2, $1, $3
@@ -304,15 +304,15 @@ define zeroext i16 @f64tou16(double %a) {
; FP64-NEXT: mfc1 $1, $f0
; FP64-NEXT: lui $2, 16864
; FP64-NEXT: ori $3, $zero, 0
-; FP64-NEXT: mtc1 $3, $f0
-; FP64-NEXT: mthc1 $2, $f0
-; FP64-NEXT: sub.d $f1, $f12, $f0
-; FP64-NEXT: trunc.w.d $f1, $f1
-; FP64-NEXT: mfc1 $2, $f1
+; FP64-NEXT: mtc1 $3, $f1
+; FP64-NEXT: mthc1 $2, $f1
+; FP64-NEXT: sub.d $f2, $f12, $f1
+; FP64-NEXT: trunc.w.d $f0, $f2
+; FP64-NEXT: mfc1 $2, $f0
; FP64-NEXT: lui $3, 32768
; FP64-NEXT: xor $2, $2, $3
; FP64-NEXT: addiu $3, $zero, 1
-; FP64-NEXT: c.ult.d $f12, $f0
+; FP64-NEXT: c.ult.d $f12, $f1
; FP64-NEXT: movf $3, $zero, $fcc0
; FP64-NEXT: andi $3, $3, 1
; FP64-NEXT: movn $2, $1, $3
@@ -331,15 +331,15 @@ define zeroext i8 @f64tou8(double %a) {
; FP32-NEXT: mfc1 $1, $f0
; FP32-NEXT: lui $2, 16864
; FP32-NEXT: ori $3, $zero, 0
-; FP32-NEXT: mtc1 $3, $f0
-; FP32-NEXT: mtc1 $2, $f1
-; FP32-NEXT: sub.d $f2, $f12, $f0
-; FP32-NEXT: trunc.w.d $f2, $f2
-; FP32-NEXT: mfc1 $2, $f2
+; FP32-NEXT: mtc1 $3, $f2
+; FP32-NEXT: mtc1 $2, $f3
+; FP32-NEXT: sub.d $f4, $f12, $f2
+; FP32-NEXT: trunc.w.d $f0, $f4
+; FP32-NEXT: mfc1 $2, $f0
; FP32-NEXT: lui $3, 32768
; FP32-NEXT: xor $2, $2, $3
; FP32-NEXT: addiu $3, $zero, 1
-; FP32-NEXT: c.ult.d $f12, $f0
+; FP32-NEXT: c.ult.d $f12, $f2
; FP32-NEXT: movf $3, $zero, $fcc0
; FP32-NEXT: andi $3, $3, 1
; FP32-NEXT: movn $2, $1, $3
@@ -353,15 +353,15 @@ define zeroext i8 @f64tou8(double %a) {
; FP64-NEXT: mfc1 $1, $f0
; FP64-NEXT: lui $2, 16864
; FP64-NEXT: ori $3, $zero, 0
-; FP64-NEXT: mtc1 $3, $f0
-; FP64-NEXT: mthc1 $2, $f0
-; FP64-NEXT: sub.d $f1, $f12, $f0
-; FP64-NEXT: trunc.w.d $f1, $f1
-; FP64-NEXT: mfc1 $2, $f1
+; FP64-NEXT: mtc1 $3, $f1
+; FP64-NEXT: mthc1 $2, $f1
+; FP64-NEXT: sub.d $f2, $f12, $f1
+; FP64-NEXT: trunc.w.d $f0, $f2
+; FP64-NEXT: mfc1 $2, $f0
; FP64-NEXT: lui $3, 32768
; FP64-NEXT: xor $2, $2, $3
; FP64-NEXT: addiu $3, $zero, 1
-; FP64-NEXT: c.ult.d $f12, $f0
+; FP64-NEXT: c.ult.d $f12, $f1
; FP64-NEXT: movf $3, $zero, $fcc0
; FP64-NEXT: andi $3, $3, 1
; FP64-NEXT: movn $2, $1, $3
diff --git a/llvm/test/CodeGen/Mips/atomic-min-max.ll b/llvm/test/CodeGen/Mips/atomic-min-max.ll
index 646af650c00..a6200851940 100644
--- a/llvm/test/CodeGen/Mips/atomic-min-max.ll
+++ b/llvm/test/CodeGen/Mips/atomic-min-max.ll
@@ -1154,26 +1154,26 @@ define i16 @test_max_16(i16* nocapture %ptr, i16 signext %val) {
; MIPS64-NEXT: sll $2, $2, 3
; MIPS64-NEXT: ori $3, $zero, 65535
; MIPS64-NEXT: sllv $3, $3, $2
-; MIPS64-NEXT: nor $4, $zero, $3
+; MIPS64-NEXT: nor $6, $zero, $3
; MIPS64-NEXT: sllv $5, $5, $2
; MIPS64-NEXT: .LBB4_1: # %entry
; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64-NEXT: ll $7, 0($1)
-; MIPS64-NEXT: slt $10, $7, $5
-; MIPS64-NEXT: move $8, $7
-; MIPS64-NEXT: movn $8, $5, $10
-; MIPS64-NEXT: and $8, $8, $3
-; MIPS64-NEXT: and $9, $7, $4
-; MIPS64-NEXT: or $9, $9, $8
-; MIPS64-NEXT: sc $9, 0($1)
-; MIPS64-NEXT: beqz $9, .LBB4_1
+; MIPS64-NEXT: ll $8, 0($1)
+; MIPS64-NEXT: slt $11, $8, $5
+; MIPS64-NEXT: move $9, $8
+; MIPS64-NEXT: movn $9, $5, $11
+; MIPS64-NEXT: and $9, $9, $3
+; MIPS64-NEXT: and $10, $8, $6
+; MIPS64-NEXT: or $10, $10, $9
+; MIPS64-NEXT: sc $10, 0($1)
+; MIPS64-NEXT: beqz $10, .LBB4_1
; MIPS64-NEXT: nop
; MIPS64-NEXT: # %bb.2: # %entry
-; MIPS64-NEXT: and $6, $7, $3
-; MIPS64-NEXT: srlv $6, $6, $2
-; MIPS64-NEXT: seh $6, $6
+; MIPS64-NEXT: and $7, $8, $3
+; MIPS64-NEXT: srlv $7, $7, $2
+; MIPS64-NEXT: seh $7, $7
; MIPS64-NEXT: # %bb.3: # %entry
-; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64-NEXT: # %bb.4: # %entry
; MIPS64-NEXT: sync
; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
@@ -1194,26 +1194,26 @@ define i16 @test_max_16(i16* nocapture %ptr, i16 signext %val) {
; MIPS64R6-NEXT: sll $2, $2, 3
; MIPS64R6-NEXT: ori $3, $zero, 65535
; MIPS64R6-NEXT: sllv $3, $3, $2
-; MIPS64R6-NEXT: nor $4, $zero, $3
+; MIPS64R6-NEXT: nor $6, $zero, $3
; MIPS64R6-NEXT: sllv $5, $5, $2
; MIPS64R6-NEXT: .LBB4_1: # %entry
; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6-NEXT: ll $7, 0($1)
-; MIPS64R6-NEXT: slt $10, $7, $5
-; MIPS64R6-NEXT: seleqz $8, $7, $10
-; MIPS64R6-NEXT: selnez $10, $5, $10
-; MIPS64R6-NEXT: or $8, $8, $10
-; MIPS64R6-NEXT: and $8, $8, $3
-; MIPS64R6-NEXT: and $9, $7, $4
-; MIPS64R6-NEXT: or $9, $9, $8
-; MIPS64R6-NEXT: sc $9, 0($1)
-; MIPS64R6-NEXT: beqzc $9, .LBB4_1
+; MIPS64R6-NEXT: ll $8, 0($1)
+; MIPS64R6-NEXT: slt $11, $8, $5
+; MIPS64R6-NEXT: seleqz $9, $8, $11
+; MIPS64R6-NEXT: selnez $11, $5, $11
+; MIPS64R6-NEXT: or $9, $9, $11
+; MIPS64R6-NEXT: and $9, $9, $3
+; MIPS64R6-NEXT: and $10, $8, $6
+; MIPS64R6-NEXT: or $10, $10, $9
+; MIPS64R6-NEXT: sc $10, 0($1)
+; MIPS64R6-NEXT: beqzc $10, .LBB4_1
; MIPS64R6-NEXT: # %bb.2: # %entry
-; MIPS64R6-NEXT: and $6, $7, $3
-; MIPS64R6-NEXT: srlv $6, $6, $2
-; MIPS64R6-NEXT: seh $6, $6
+; MIPS64R6-NEXT: and $7, $8, $3
+; MIPS64R6-NEXT: srlv $7, $7, $2
+; MIPS64R6-NEXT: seh $7, $7
; MIPS64R6-NEXT: # %bb.3: # %entry
-; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64R6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64R6-NEXT: # %bb.4: # %entry
; MIPS64R6-NEXT: sync
; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
@@ -1232,28 +1232,28 @@ define i16 @test_max_16(i16* nocapture %ptr, i16 signext %val) {
; MIPS64EL-NEXT: sll $2, $2, 3
; MIPS64EL-NEXT: ori $3, $zero, 65535
; MIPS64EL-NEXT: sllv $3, $3, $2
-; MIPS64EL-NEXT: nor $4, $zero, $3
+; MIPS64EL-NEXT: nor $6, $zero, $3
; MIPS64EL-NEXT: sllv $5, $5, $2
; MIPS64EL-NEXT: .LBB4_1: # %entry
; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64EL-NEXT: ll $7, 0($1)
-; MIPS64EL-NEXT: and $7, $7, $3
-; MIPS64EL-NEXT: and $5, $5, $3
-; MIPS64EL-NEXT: slt $10, $7, $5
-; MIPS64EL-NEXT: move $8, $7
-; MIPS64EL-NEXT: movn $8, $5, $10
+; MIPS64EL-NEXT: ll $8, 0($1)
; MIPS64EL-NEXT: and $8, $8, $3
-; MIPS64EL-NEXT: and $9, $7, $4
-; MIPS64EL-NEXT: or $9, $9, $8
-; MIPS64EL-NEXT: sc $9, 0($1)
-; MIPS64EL-NEXT: beqz $9, .LBB4_1
+; MIPS64EL-NEXT: and $5, $5, $3
+; MIPS64EL-NEXT: slt $11, $8, $5
+; MIPS64EL-NEXT: move $9, $8
+; MIPS64EL-NEXT: movn $9, $5, $11
+; MIPS64EL-NEXT: and $9, $9, $3
+; MIPS64EL-NEXT: and $10, $8, $6
+; MIPS64EL-NEXT: or $10, $10, $9
+; MIPS64EL-NEXT: sc $10, 0($1)
+; MIPS64EL-NEXT: beqz $10, .LBB4_1
; MIPS64EL-NEXT: nop
; MIPS64EL-NEXT: # %bb.2: # %entry
-; MIPS64EL-NEXT: and $6, $7, $3
-; MIPS64EL-NEXT: srlv $6, $6, $2
-; MIPS64EL-NEXT: seh $6, $6
+; MIPS64EL-NEXT: and $7, $8, $3
+; MIPS64EL-NEXT: srlv $7, $7, $2
+; MIPS64EL-NEXT: seh $7, $7
; MIPS64EL-NEXT: # %bb.3: # %entry
-; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64EL-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64EL-NEXT: # %bb.4: # %entry
; MIPS64EL-NEXT: sync
; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
@@ -1273,28 +1273,28 @@ define i16 @test_max_16(i16* nocapture %ptr, i16 signext %val) {
; MIPS64ELR6-NEXT: sll $2, $2, 3
; MIPS64ELR6-NEXT: ori $3, $zero, 65535
; MIPS64ELR6-NEXT: sllv $3, $3, $2
-; MIPS64ELR6-NEXT: nor $4, $zero, $3
+; MIPS64ELR6-NEXT: nor $6, $zero, $3
; MIPS64ELR6-NEXT: sllv $5, $5, $2
; MIPS64ELR6-NEXT: .LBB4_1: # %entry
; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64ELR6-NEXT: ll $7, 0($1)
-; MIPS64ELR6-NEXT: and $7, $7, $3
-; MIPS64ELR6-NEXT: and $5, $5, $3
-; MIPS64ELR6-NEXT: slt $10, $7, $5
-; MIPS64ELR6-NEXT: seleqz $8, $7, $10
-; MIPS64ELR6-NEXT: selnez $10, $5, $10
-; MIPS64ELR6-NEXT: or $8, $8, $10
+; MIPS64ELR6-NEXT: ll $8, 0($1)
; MIPS64ELR6-NEXT: and $8, $8, $3
-; MIPS64ELR6-NEXT: and $9, $7, $4
-; MIPS64ELR6-NEXT: or $9, $9, $8
-; MIPS64ELR6-NEXT: sc $9, 0($1)
-; MIPS64ELR6-NEXT: beqzc $9, .LBB4_1
+; MIPS64ELR6-NEXT: and $5, $5, $3
+; MIPS64ELR6-NEXT: slt $11, $8, $5
+; MIPS64ELR6-NEXT: seleqz $9, $8, $11
+; MIPS64ELR6-NEXT: selnez $11, $5, $11
+; MIPS64ELR6-NEXT: or $9, $9, $11
+; MIPS64ELR6-NEXT: and $9, $9, $3
+; MIPS64ELR6-NEXT: and $10, $8, $6
+; MIPS64ELR6-NEXT: or $10, $10, $9
+; MIPS64ELR6-NEXT: sc $10, 0($1)
+; MIPS64ELR6-NEXT: beqzc $10, .LBB4_1
; MIPS64ELR6-NEXT: # %bb.2: # %entry
-; MIPS64ELR6-NEXT: and $6, $7, $3
-; MIPS64ELR6-NEXT: srlv $6, $6, $2
-; MIPS64ELR6-NEXT: seh $6, $6
+; MIPS64ELR6-NEXT: and $7, $8, $3
+; MIPS64ELR6-NEXT: srlv $7, $7, $2
+; MIPS64ELR6-NEXT: seh $7, $7
; MIPS64ELR6-NEXT: # %bb.3: # %entry
-; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64ELR6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64ELR6-NEXT: # %bb.4: # %entry
; MIPS64ELR6-NEXT: sync
; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
@@ -1635,26 +1635,26 @@ define i16 @test_min_16(i16* nocapture %ptr, i16 signext %val) {
; MIPS64-NEXT: sll $2, $2, 3
; MIPS64-NEXT: ori $3, $zero, 65535
; MIPS64-NEXT: sllv $3, $3, $2
-; MIPS64-NEXT: nor $4, $zero, $3
+; MIPS64-NEXT: nor $6, $zero, $3
; MIPS64-NEXT: sllv $5, $5, $2
; MIPS64-NEXT: .LBB5_1: # %entry
; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64-NEXT: ll $7, 0($1)
-; MIPS64-NEXT: slt $10, $7, $5
-; MIPS64-NEXT: move $8, $7
-; MIPS64-NEXT: movz $8, $5, $10
-; MIPS64-NEXT: and $8, $8, $3
-; MIPS64-NEXT: and $9, $7, $4
-; MIPS64-NEXT: or $9, $9, $8
-; MIPS64-NEXT: sc $9, 0($1)
-; MIPS64-NEXT: beqz $9, .LBB5_1
+; MIPS64-NEXT: ll $8, 0($1)
+; MIPS64-NEXT: slt $11, $8, $5
+; MIPS64-NEXT: move $9, $8
+; MIPS64-NEXT: movz $9, $5, $11
+; MIPS64-NEXT: and $9, $9, $3
+; MIPS64-NEXT: and $10, $8, $6
+; MIPS64-NEXT: or $10, $10, $9
+; MIPS64-NEXT: sc $10, 0($1)
+; MIPS64-NEXT: beqz $10, .LBB5_1
; MIPS64-NEXT: nop
; MIPS64-NEXT: # %bb.2: # %entry
-; MIPS64-NEXT: and $6, $7, $3
-; MIPS64-NEXT: srlv $6, $6, $2
-; MIPS64-NEXT: seh $6, $6
+; MIPS64-NEXT: and $7, $8, $3
+; MIPS64-NEXT: srlv $7, $7, $2
+; MIPS64-NEXT: seh $7, $7
; MIPS64-NEXT: # %bb.3: # %entry
-; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64-NEXT: # %bb.4: # %entry
; MIPS64-NEXT: sync
; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
@@ -1675,26 +1675,26 @@ define i16 @test_min_16(i16* nocapture %ptr, i16 signext %val) {
; MIPS64R6-NEXT: sll $2, $2, 3
; MIPS64R6-NEXT: ori $3, $zero, 65535
; MIPS64R6-NEXT: sllv $3, $3, $2
-; MIPS64R6-NEXT: nor $4, $zero, $3
+; MIPS64R6-NEXT: nor $6, $zero, $3
; MIPS64R6-NEXT: sllv $5, $5, $2
; MIPS64R6-NEXT: .LBB5_1: # %entry
; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6-NEXT: ll $7, 0($1)
-; MIPS64R6-NEXT: slt $10, $7, $5
-; MIPS64R6-NEXT: selnez $8, $7, $10
-; MIPS64R6-NEXT: seleqz $10, $5, $10
-; MIPS64R6-NEXT: or $8, $8, $10
-; MIPS64R6-NEXT: and $8, $8, $3
-; MIPS64R6-NEXT: and $9, $7, $4
-; MIPS64R6-NEXT: or $9, $9, $8
-; MIPS64R6-NEXT: sc $9, 0($1)
-; MIPS64R6-NEXT: beqzc $9, .LBB5_1
+; MIPS64R6-NEXT: ll $8, 0($1)
+; MIPS64R6-NEXT: slt $11, $8, $5
+; MIPS64R6-NEXT: selnez $9, $8, $11
+; MIPS64R6-NEXT: seleqz $11, $5, $11
+; MIPS64R6-NEXT: or $9, $9, $11
+; MIPS64R6-NEXT: and $9, $9, $3
+; MIPS64R6-NEXT: and $10, $8, $6
+; MIPS64R6-NEXT: or $10, $10, $9
+; MIPS64R6-NEXT: sc $10, 0($1)
+; MIPS64R6-NEXT: beqzc $10, .LBB5_1
; MIPS64R6-NEXT: # %bb.2: # %entry
-; MIPS64R6-NEXT: and $6, $7, $3
-; MIPS64R6-NEXT: srlv $6, $6, $2
-; MIPS64R6-NEXT: seh $6, $6
+; MIPS64R6-NEXT: and $7, $8, $3
+; MIPS64R6-NEXT: srlv $7, $7, $2
+; MIPS64R6-NEXT: seh $7, $7
; MIPS64R6-NEXT: # %bb.3: # %entry
-; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64R6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64R6-NEXT: # %bb.4: # %entry
; MIPS64R6-NEXT: sync
; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
@@ -1713,28 +1713,28 @@ define i16 @test_min_16(i16* nocapture %ptr, i16 signext %val) {
; MIPS64EL-NEXT: sll $2, $2, 3
; MIPS64EL-NEXT: ori $3, $zero, 65535
; MIPS64EL-NEXT: sllv $3, $3, $2
-; MIPS64EL-NEXT: nor $4, $zero, $3
+; MIPS64EL-NEXT: nor $6, $zero, $3
; MIPS64EL-NEXT: sllv $5, $5, $2
; MIPS64EL-NEXT: .LBB5_1: # %entry
; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64EL-NEXT: ll $7, 0($1)
-; MIPS64EL-NEXT: and $7, $7, $3
-; MIPS64EL-NEXT: and $5, $5, $3
-; MIPS64EL-NEXT: slt $10, $7, $5
-; MIPS64EL-NEXT: move $8, $7
-; MIPS64EL-NEXT: movz $8, $5, $10
+; MIPS64EL-NEXT: ll $8, 0($1)
; MIPS64EL-NEXT: and $8, $8, $3
-; MIPS64EL-NEXT: and $9, $7, $4
-; MIPS64EL-NEXT: or $9, $9, $8
-; MIPS64EL-NEXT: sc $9, 0($1)
-; MIPS64EL-NEXT: beqz $9, .LBB5_1
+; MIPS64EL-NEXT: and $5, $5, $3
+; MIPS64EL-NEXT: slt $11, $8, $5
+; MIPS64EL-NEXT: move $9, $8
+; MIPS64EL-NEXT: movz $9, $5, $11
+; MIPS64EL-NEXT: and $9, $9, $3
+; MIPS64EL-NEXT: and $10, $8, $6
+; MIPS64EL-NEXT: or $10, $10, $9
+; MIPS64EL-NEXT: sc $10, 0($1)
+; MIPS64EL-NEXT: beqz $10, .LBB5_1
; MIPS64EL-NEXT: nop
; MIPS64EL-NEXT: # %bb.2: # %entry
-; MIPS64EL-NEXT: and $6, $7, $3
-; MIPS64EL-NEXT: srlv $6, $6, $2
-; MIPS64EL-NEXT: seh $6, $6
+; MIPS64EL-NEXT: and $7, $8, $3
+; MIPS64EL-NEXT: srlv $7, $7, $2
+; MIPS64EL-NEXT: seh $7, $7
; MIPS64EL-NEXT: # %bb.3: # %entry
-; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64EL-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64EL-NEXT: # %bb.4: # %entry
; MIPS64EL-NEXT: sync
; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
@@ -1754,28 +1754,28 @@ define i16 @test_min_16(i16* nocapture %ptr, i16 signext %val) {
; MIPS64ELR6-NEXT: sll $2, $2, 3
; MIPS64ELR6-NEXT: ori $3, $zero, 65535
; MIPS64ELR6-NEXT: sllv $3, $3, $2
-; MIPS64ELR6-NEXT: nor $4, $zero, $3
+; MIPS64ELR6-NEXT: nor $6, $zero, $3
; MIPS64ELR6-NEXT: sllv $5, $5, $2
; MIPS64ELR6-NEXT: .LBB5_1: # %entry
; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64ELR6-NEXT: ll $7, 0($1)
-; MIPS64ELR6-NEXT: and $7, $7, $3
-; MIPS64ELR6-NEXT: and $5, $5, $3
-; MIPS64ELR6-NEXT: slt $10, $7, $5
-; MIPS64ELR6-NEXT: selnez $8, $7, $10
-; MIPS64ELR6-NEXT: seleqz $10, $5, $10
-; MIPS64ELR6-NEXT: or $8, $8, $10
+; MIPS64ELR6-NEXT: ll $8, 0($1)
; MIPS64ELR6-NEXT: and $8, $8, $3
-; MIPS64ELR6-NEXT: and $9, $7, $4
-; MIPS64ELR6-NEXT: or $9, $9, $8
-; MIPS64ELR6-NEXT: sc $9, 0($1)
-; MIPS64ELR6-NEXT: beqzc $9, .LBB5_1
+; MIPS64ELR6-NEXT: and $5, $5, $3
+; MIPS64ELR6-NEXT: slt $11, $8, $5
+; MIPS64ELR6-NEXT: selnez $9, $8, $11
+; MIPS64ELR6-NEXT: seleqz $11, $5, $11
+; MIPS64ELR6-NEXT: or $9, $9, $11
+; MIPS64ELR6-NEXT: and $9, $9, $3
+; MIPS64ELR6-NEXT: and $10, $8, $6
+; MIPS64ELR6-NEXT: or $10, $10, $9
+; MIPS64ELR6-NEXT: sc $10, 0($1)
+; MIPS64ELR6-NEXT: beqzc $10, .LBB5_1
; MIPS64ELR6-NEXT: # %bb.2: # %entry
-; MIPS64ELR6-NEXT: and $6, $7, $3
-; MIPS64ELR6-NEXT: srlv $6, $6, $2
-; MIPS64ELR6-NEXT: seh $6, $6
+; MIPS64ELR6-NEXT: and $7, $8, $3
+; MIPS64ELR6-NEXT: srlv $7, $7, $2
+; MIPS64ELR6-NEXT: seh $7, $7
; MIPS64ELR6-NEXT: # %bb.3: # %entry
-; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64ELR6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64ELR6-NEXT: # %bb.4: # %entry
; MIPS64ELR6-NEXT: sync
; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
@@ -2116,26 +2116,26 @@ define i16 @test_umax_16(i16* nocapture %ptr, i16 signext %val) {
; MIPS64-NEXT: sll $2, $2, 3
; MIPS64-NEXT: ori $3, $zero, 65535
; MIPS64-NEXT: sllv $3, $3, $2
-; MIPS64-NEXT: nor $4, $zero, $3
+; MIPS64-NEXT: nor $6, $zero, $3
; MIPS64-NEXT: sllv $5, $5, $2
; MIPS64-NEXT: .LBB6_1: # %entry
; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64-NEXT: ll $7, 0($1)
-; MIPS64-NEXT: sltu $10, $7, $5
-; MIPS64-NEXT: move $8, $7
-; MIPS64-NEXT: movn $8, $5, $10
-; MIPS64-NEXT: and $8, $8, $3
-; MIPS64-NEXT: and $9, $7, $4
-; MIPS64-NEXT: or $9, $9, $8
-; MIPS64-NEXT: sc $9, 0($1)
-; MIPS64-NEXT: beqz $9, .LBB6_1
+; MIPS64-NEXT: ll $8, 0($1)
+; MIPS64-NEXT: sltu $11, $8, $5
+; MIPS64-NEXT: move $9, $8
+; MIPS64-NEXT: movn $9, $5, $11
+; MIPS64-NEXT: and $9, $9, $3
+; MIPS64-NEXT: and $10, $8, $6
+; MIPS64-NEXT: or $10, $10, $9
+; MIPS64-NEXT: sc $10, 0($1)
+; MIPS64-NEXT: beqz $10, .LBB6_1
; MIPS64-NEXT: nop
; MIPS64-NEXT: # %bb.2: # %entry
-; MIPS64-NEXT: and $6, $7, $3
-; MIPS64-NEXT: srlv $6, $6, $2
-; MIPS64-NEXT: seh $6, $6
+; MIPS64-NEXT: and $7, $8, $3
+; MIPS64-NEXT: srlv $7, $7, $2
+; MIPS64-NEXT: seh $7, $7
; MIPS64-NEXT: # %bb.3: # %entry
-; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64-NEXT: # %bb.4: # %entry
; MIPS64-NEXT: sync
; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
@@ -2156,26 +2156,26 @@ define i16 @test_umax_16(i16* nocapture %ptr, i16 signext %val) {
; MIPS64R6-NEXT: sll $2, $2, 3
; MIPS64R6-NEXT: ori $3, $zero, 65535
; MIPS64R6-NEXT: sllv $3, $3, $2
-; MIPS64R6-NEXT: nor $4, $zero, $3
+; MIPS64R6-NEXT: nor $6, $zero, $3
; MIPS64R6-NEXT: sllv $5, $5, $2
; MIPS64R6-NEXT: .LBB6_1: # %entry
; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6-NEXT: ll $7, 0($1)
-; MIPS64R6-NEXT: sltu $10, $7, $5
-; MIPS64R6-NEXT: seleqz $8, $7, $10
-; MIPS64R6-NEXT: selnez $10, $5, $10
-; MIPS64R6-NEXT: or $8, $8, $10
-; MIPS64R6-NEXT: and $8, $8, $3
-; MIPS64R6-NEXT: and $9, $7, $4
-; MIPS64R6-NEXT: or $9, $9, $8
-; MIPS64R6-NEXT: sc $9, 0($1)
-; MIPS64R6-NEXT: beqzc $9, .LBB6_1
+; MIPS64R6-NEXT: ll $8, 0($1)
+; MIPS64R6-NEXT: sltu $11, $8, $5
+; MIPS64R6-NEXT: seleqz $9, $8, $11
+; MIPS64R6-NEXT: selnez $11, $5, $11
+; MIPS64R6-NEXT: or $9, $9, $11
+; MIPS64R6-NEXT: and $9, $9, $3
+; MIPS64R6-NEXT: and $10, $8, $6
+; MIPS64R6-NEXT: or $10, $10, $9
+; MIPS64R6-NEXT: sc $10, 0($1)
+; MIPS64R6-NEXT: beqzc $10, .LBB6_1
; MIPS64R6-NEXT: # %bb.2: # %entry
-; MIPS64R6-NEXT: and $6, $7, $3
-; MIPS64R6-NEXT: srlv $6, $6, $2
-; MIPS64R6-NEXT: seh $6, $6
+; MIPS64R6-NEXT: and $7, $8, $3
+; MIPS64R6-NEXT: srlv $7, $7, $2
+; MIPS64R6-NEXT: seh $7, $7
; MIPS64R6-NEXT: # %bb.3: # %entry
-; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64R6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64R6-NEXT: # %bb.4: # %entry
; MIPS64R6-NEXT: sync
; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
@@ -2194,28 +2194,28 @@ define i16 @test_umax_16(i16* nocapture %ptr, i16 signext %val) {
; MIPS64EL-NEXT: sll $2, $2, 3
; MIPS64EL-NEXT: ori $3, $zero, 65535
; MIPS64EL-NEXT: sllv $3, $3, $2
-; MIPS64EL-NEXT: nor $4, $zero, $3
+; MIPS64EL-NEXT: nor $6, $zero, $3
; MIPS64EL-NEXT: sllv $5, $5, $2
; MIPS64EL-NEXT: .LBB6_1: # %entry
; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64EL-NEXT: ll $7, 0($1)
-; MIPS64EL-NEXT: and $7, $7, $3
-; MIPS64EL-NEXT: and $5, $5, $3
-; MIPS64EL-NEXT: sltu $10, $7, $5
-; MIPS64EL-NEXT: move $8, $7
-; MIPS64EL-NEXT: movn $8, $5, $10
+; MIPS64EL-NEXT: ll $8, 0($1)
; MIPS64EL-NEXT: and $8, $8, $3
-; MIPS64EL-NEXT: and $9, $7, $4
-; MIPS64EL-NEXT: or $9, $9, $8
-; MIPS64EL-NEXT: sc $9, 0($1)
-; MIPS64EL-NEXT: beqz $9, .LBB6_1
+; MIPS64EL-NEXT: and $5, $5, $3
+; MIPS64EL-NEXT: sltu $11, $8, $5
+; MIPS64EL-NEXT: move $9, $8
+; MIPS64EL-NEXT: movn $9, $5, $11
+; MIPS64EL-NEXT: and $9, $9, $3
+; MIPS64EL-NEXT: and $10, $8, $6
+; MIPS64EL-NEXT: or $10, $10, $9
+; MIPS64EL-NEXT: sc $10, 0($1)
+; MIPS64EL-NEXT: beqz $10, .LBB6_1
; MIPS64EL-NEXT: nop
; MIPS64EL-NEXT: # %bb.2: # %entry
-; MIPS64EL-NEXT: and $6, $7, $3
-; MIPS64EL-NEXT: srlv $6, $6, $2
-; MIPS64EL-NEXT: seh $6, $6
+; MIPS64EL-NEXT: and $7, $8, $3
+; MIPS64EL-NEXT: srlv $7, $7, $2
+; MIPS64EL-NEXT: seh $7, $7
; MIPS64EL-NEXT: # %bb.3: # %entry
-; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64EL-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64EL-NEXT: # %bb.4: # %entry
; MIPS64EL-NEXT: sync
; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
@@ -2235,28 +2235,28 @@ define i16 @test_umax_16(i16* nocapture %ptr, i16 signext %val) {
; MIPS64ELR6-NEXT: sll $2, $2, 3
; MIPS64ELR6-NEXT: ori $3, $zero, 65535
; MIPS64ELR6-NEXT: sllv $3, $3, $2
-; MIPS64ELR6-NEXT: nor $4, $zero, $3
+; MIPS64ELR6-NEXT: nor $6, $zero, $3
; MIPS64ELR6-NEXT: sllv $5, $5, $2
; MIPS64ELR6-NEXT: .LBB6_1: # %entry
; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64ELR6-NEXT: ll $7, 0($1)
-; MIPS64ELR6-NEXT: and $7, $7, $3
-; MIPS64ELR6-NEXT: and $5, $5, $3
-; MIPS64ELR6-NEXT: sltu $10, $7, $5
-; MIPS64ELR6-NEXT: seleqz $8, $7, $10
-; MIPS64ELR6-NEXT: selnez $10, $5, $10
-; MIPS64ELR6-NEXT: or $8, $8, $10
+; MIPS64ELR6-NEXT: ll $8, 0($1)
; MIPS64ELR6-NEXT: and $8, $8, $3
-; MIPS64ELR6-NEXT: and $9, $7, $4
-; MIPS64ELR6-NEXT: or $9, $9, $8
-; MIPS64ELR6-NEXT: sc $9, 0($1)
-; MIPS64ELR6-NEXT: beqzc $9, .LBB6_1
+; MIPS64ELR6-NEXT: and $5, $5, $3
+; MIPS64ELR6-NEXT: sltu $11, $8, $5
+; MIPS64ELR6-NEXT: seleqz $9, $8, $11
+; MIPS64ELR6-NEXT: selnez $11, $5, $11
+; MIPS64ELR6-NEXT: or $9, $9, $11
+; MIPS64ELR6-NEXT: and $9, $9, $3
+; MIPS64ELR6-NEXT: and $10, $8, $6
+; MIPS64ELR6-NEXT: or $10, $10, $9
+; MIPS64ELR6-NEXT: sc $10, 0($1)
+; MIPS64ELR6-NEXT: beqzc $10, .LBB6_1
; MIPS64ELR6-NEXT: # %bb.2: # %entry
-; MIPS64ELR6-NEXT: and $6, $7, $3
-; MIPS64ELR6-NEXT: srlv $6, $6, $2
-; MIPS64ELR6-NEXT: seh $6, $6
+; MIPS64ELR6-NEXT: and $7, $8, $3
+; MIPS64ELR6-NEXT: srlv $7, $7, $2
+; MIPS64ELR6-NEXT: seh $7, $7
; MIPS64ELR6-NEXT: # %bb.3: # %entry
-; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64ELR6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64ELR6-NEXT: # %bb.4: # %entry
; MIPS64ELR6-NEXT: sync
; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
@@ -2597,26 +2597,26 @@ define i16 @test_umin_16(i16* nocapture %ptr, i16 signext %val) {
; MIPS64-NEXT: sll $2, $2, 3
; MIPS64-NEXT: ori $3, $zero, 65535
; MIPS64-NEXT: sllv $3, $3, $2
-; MIPS64-NEXT: nor $4, $zero, $3
+; MIPS64-NEXT: nor $6, $zero, $3
; MIPS64-NEXT: sllv $5, $5, $2
; MIPS64-NEXT: .LBB7_1: # %entry
; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64-NEXT: ll $7, 0($1)
-; MIPS64-NEXT: sltu $10, $7, $5
-; MIPS64-NEXT: move $8, $7
-; MIPS64-NEXT: movz $8, $5, $10
-; MIPS64-NEXT: and $8, $8, $3
-; MIPS64-NEXT: and $9, $7, $4
-; MIPS64-NEXT: or $9, $9, $8
-; MIPS64-NEXT: sc $9, 0($1)
-; MIPS64-NEXT: beqz $9, .LBB7_1
+; MIPS64-NEXT: ll $8, 0($1)
+; MIPS64-NEXT: sltu $11, $8, $5
+; MIPS64-NEXT: move $9, $8
+; MIPS64-NEXT: movz $9, $5, $11
+; MIPS64-NEXT: and $9, $9, $3
+; MIPS64-NEXT: and $10, $8, $6
+; MIPS64-NEXT: or $10, $10, $9
+; MIPS64-NEXT: sc $10, 0($1)
+; MIPS64-NEXT: beqz $10, .LBB7_1
; MIPS64-NEXT: nop
; MIPS64-NEXT: # %bb.2: # %entry
-; MIPS64-NEXT: and $6, $7, $3
-; MIPS64-NEXT: srlv $6, $6, $2
-; MIPS64-NEXT: seh $6, $6
+; MIPS64-NEXT: and $7, $8, $3
+; MIPS64-NEXT: srlv $7, $7, $2
+; MIPS64-NEXT: seh $7, $7
; MIPS64-NEXT: # %bb.3: # %entry
-; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64-NEXT: # %bb.4: # %entry
; MIPS64-NEXT: sync
; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
@@ -2637,26 +2637,26 @@ define i16 @test_umin_16(i16* nocapture %ptr, i16 signext %val) {
; MIPS64R6-NEXT: sll $2, $2, 3
; MIPS64R6-NEXT: ori $3, $zero, 65535
; MIPS64R6-NEXT: sllv $3, $3, $2
-; MIPS64R6-NEXT: nor $4, $zero, $3
+; MIPS64R6-NEXT: nor $6, $zero, $3
; MIPS64R6-NEXT: sllv $5, $5, $2
; MIPS64R6-NEXT: .LBB7_1: # %entry
; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6-NEXT: ll $7, 0($1)
-; MIPS64R6-NEXT: sltu $10, $7, $5
-; MIPS64R6-NEXT: selnez $8, $7, $10
-; MIPS64R6-NEXT: seleqz $10, $5, $10
-; MIPS64R6-NEXT: or $8, $8, $10
-; MIPS64R6-NEXT: and $8, $8, $3
-; MIPS64R6-NEXT: and $9, $7, $4
-; MIPS64R6-NEXT: or $9, $9, $8
-; MIPS64R6-NEXT: sc $9, 0($1)
-; MIPS64R6-NEXT: beqzc $9, .LBB7_1
+; MIPS64R6-NEXT: ll $8, 0($1)
+; MIPS64R6-NEXT: sltu $11, $8, $5
+; MIPS64R6-NEXT: selnez $9, $8, $11
+; MIPS64R6-NEXT: seleqz $11, $5, $11
+; MIPS64R6-NEXT: or $9, $9, $11
+; MIPS64R6-NEXT: and $9, $9, $3
+; MIPS64R6-NEXT: and $10, $8, $6
+; MIPS64R6-NEXT: or $10, $10, $9
+; MIPS64R6-NEXT: sc $10, 0($1)
+; MIPS64R6-NEXT: beqzc $10, .LBB7_1
; MIPS64R6-NEXT: # %bb.2: # %entry
-; MIPS64R6-NEXT: and $6, $7, $3
-; MIPS64R6-NEXT: srlv $6, $6, $2
-; MIPS64R6-NEXT: seh $6, $6
+; MIPS64R6-NEXT: and $7, $8, $3
+; MIPS64R6-NEXT: srlv $7, $7, $2
+; MIPS64R6-NEXT: seh $7, $7
; MIPS64R6-NEXT: # %bb.3: # %entry
-; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64R6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64R6-NEXT: # %bb.4: # %entry
; MIPS64R6-NEXT: sync
; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
@@ -2675,28 +2675,28 @@ define i16 @test_umin_16(i16* nocapture %ptr, i16 signext %val) {
; MIPS64EL-NEXT: sll $2, $2, 3
; MIPS64EL-NEXT: ori $3, $zero, 65535
; MIPS64EL-NEXT: sllv $3, $3, $2
-; MIPS64EL-NEXT: nor $4, $zero, $3
+; MIPS64EL-NEXT: nor $6, $zero, $3
; MIPS64EL-NEXT: sllv $5, $5, $2
; MIPS64EL-NEXT: .LBB7_1: # %entry
; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64EL-NEXT: ll $7, 0($1)
-; MIPS64EL-NEXT: and $7, $7, $3
-; MIPS64EL-NEXT: and $5, $5, $3
-; MIPS64EL-NEXT: sltu $10, $7, $5
-; MIPS64EL-NEXT: move $8, $7
-; MIPS64EL-NEXT: movz $8, $5, $10
+; MIPS64EL-NEXT: ll $8, 0($1)
; MIPS64EL-NEXT: and $8, $8, $3
-; MIPS64EL-NEXT: and $9, $7, $4
-; MIPS64EL-NEXT: or $9, $9, $8
-; MIPS64EL-NEXT: sc $9, 0($1)
-; MIPS64EL-NEXT: beqz $9, .LBB7_1
+; MIPS64EL-NEXT: and $5, $5, $3
+; MIPS64EL-NEXT: sltu $11, $8, $5
+; MIPS64EL-NEXT: move $9, $8
+; MIPS64EL-NEXT: movz $9, $5, $11
+; MIPS64EL-NEXT: and $9, $9, $3
+; MIPS64EL-NEXT: and $10, $8, $6
+; MIPS64EL-NEXT: or $10, $10, $9
+; MIPS64EL-NEXT: sc $10, 0($1)
+; MIPS64EL-NEXT: beqz $10, .LBB7_1
; MIPS64EL-NEXT: nop
; MIPS64EL-NEXT: # %bb.2: # %entry
-; MIPS64EL-NEXT: and $6, $7, $3
-; MIPS64EL-NEXT: srlv $6, $6, $2
-; MIPS64EL-NEXT: seh $6, $6
+; MIPS64EL-NEXT: and $7, $8, $3
+; MIPS64EL-NEXT: srlv $7, $7, $2
+; MIPS64EL-NEXT: seh $7, $7
; MIPS64EL-NEXT: # %bb.3: # %entry
-; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64EL-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64EL-NEXT: # %bb.4: # %entry
; MIPS64EL-NEXT: sync
; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
@@ -2716,28 +2716,28 @@ define i16 @test_umin_16(i16* nocapture %ptr, i16 signext %val) {
; MIPS64ELR6-NEXT: sll $2, $2, 3
; MIPS64ELR6-NEXT: ori $3, $zero, 65535
; MIPS64ELR6-NEXT: sllv $3, $3, $2
-; MIPS64ELR6-NEXT: nor $4, $zero, $3
+; MIPS64ELR6-NEXT: nor $6, $zero, $3
; MIPS64ELR6-NEXT: sllv $5, $5, $2
; MIPS64ELR6-NEXT: .LBB7_1: # %entry
; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64ELR6-NEXT: ll $7, 0($1)
-; MIPS64ELR6-NEXT: and $7, $7, $3
-; MIPS64ELR6-NEXT: and $5, $5, $3
-; MIPS64ELR6-NEXT: sltu $10, $7, $5
-; MIPS64ELR6-NEXT: selnez $8, $7, $10
-; MIPS64ELR6-NEXT: seleqz $10, $5, $10
-; MIPS64ELR6-NEXT: or $8, $8, $10
+; MIPS64ELR6-NEXT: ll $8, 0($1)
; MIPS64ELR6-NEXT: and $8, $8, $3
-; MIPS64ELR6-NEXT: and $9, $7, $4
-; MIPS64ELR6-NEXT: or $9, $9, $8
-; MIPS64ELR6-NEXT: sc $9, 0($1)
-; MIPS64ELR6-NEXT: beqzc $9, .LBB7_1
+; MIPS64ELR6-NEXT: and $5, $5, $3
+; MIPS64ELR6-NEXT: sltu $11, $8, $5
+; MIPS64ELR6-NEXT: selnez $9, $8, $11
+; MIPS64ELR6-NEXT: seleqz $11, $5, $11
+; MIPS64ELR6-NEXT: or $9, $9, $11
+; MIPS64ELR6-NEXT: and $9, $9, $3
+; MIPS64ELR6-NEXT: and $10, $8, $6
+; MIPS64ELR6-NEXT: or $10, $10, $9
+; MIPS64ELR6-NEXT: sc $10, 0($1)
+; MIPS64ELR6-NEXT: beqzc $10, .LBB7_1
; MIPS64ELR6-NEXT: # %bb.2: # %entry
-; MIPS64ELR6-NEXT: and $6, $7, $3
-; MIPS64ELR6-NEXT: srlv $6, $6, $2
-; MIPS64ELR6-NEXT: seh $6, $6
+; MIPS64ELR6-NEXT: and $7, $8, $3
+; MIPS64ELR6-NEXT: srlv $7, $7, $2
+; MIPS64ELR6-NEXT: seh $7, $7
; MIPS64ELR6-NEXT: # %bb.3: # %entry
-; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64ELR6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64ELR6-NEXT: # %bb.4: # %entry
; MIPS64ELR6-NEXT: sync
; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
@@ -3079,26 +3079,26 @@ define i8 @test_max_8(i8* nocapture %ptr, i8 signext %val) {
; MIPS64-NEXT: sll $2, $2, 3
; MIPS64-NEXT: ori $3, $zero, 255
; MIPS64-NEXT: sllv $3, $3, $2
-; MIPS64-NEXT: nor $4, $zero, $3
+; MIPS64-NEXT: nor $6, $zero, $3
; MIPS64-NEXT: sllv $5, $5, $2
; MIPS64-NEXT: .LBB8_1: # %entry
; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64-NEXT: ll $7, 0($1)
-; MIPS64-NEXT: slt $10, $7, $5
-; MIPS64-NEXT: move $8, $7
-; MIPS64-NEXT: movn $8, $5, $10
-; MIPS64-NEXT: and $8, $8, $3
-; MIPS64-NEXT: and $9, $7, $4
-; MIPS64-NEXT: or $9, $9, $8
-; MIPS64-NEXT: sc $9, 0($1)
-; MIPS64-NEXT: beqz $9, .LBB8_1
+; MIPS64-NEXT: ll $8, 0($1)
+; MIPS64-NEXT: slt $11, $8, $5
+; MIPS64-NEXT: move $9, $8
+; MIPS64-NEXT: movn $9, $5, $11
+; MIPS64-NEXT: and $9, $9, $3
+; MIPS64-NEXT: and $10, $8, $6
+; MIPS64-NEXT: or $10, $10, $9
+; MIPS64-NEXT: sc $10, 0($1)
+; MIPS64-NEXT: beqz $10, .LBB8_1
; MIPS64-NEXT: nop
; MIPS64-NEXT: # %bb.2: # %entry
-; MIPS64-NEXT: and $6, $7, $3
-; MIPS64-NEXT: srlv $6, $6, $2
-; MIPS64-NEXT: seh $6, $6
+; MIPS64-NEXT: and $7, $8, $3
+; MIPS64-NEXT: srlv $7, $7, $2
+; MIPS64-NEXT: seh $7, $7
; MIPS64-NEXT: # %bb.3: # %entry
-; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64-NEXT: # %bb.4: # %entry
; MIPS64-NEXT: sync
; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
@@ -3119,26 +3119,26 @@ define i8 @test_max_8(i8* nocapture %ptr, i8 signext %val) {
; MIPS64R6-NEXT: sll $2, $2, 3
; MIPS64R6-NEXT: ori $3, $zero, 255
; MIPS64R6-NEXT: sllv $3, $3, $2
-; MIPS64R6-NEXT: nor $4, $zero, $3
+; MIPS64R6-NEXT: nor $6, $zero, $3
; MIPS64R6-NEXT: sllv $5, $5, $2
; MIPS64R6-NEXT: .LBB8_1: # %entry
; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6-NEXT: ll $7, 0($1)
-; MIPS64R6-NEXT: slt $10, $7, $5
-; MIPS64R6-NEXT: seleqz $8, $7, $10
-; MIPS64R6-NEXT: selnez $10, $5, $10
-; MIPS64R6-NEXT: or $8, $8, $10
-; MIPS64R6-NEXT: and $8, $8, $3
-; MIPS64R6-NEXT: and $9, $7, $4
-; MIPS64R6-NEXT: or $9, $9, $8
-; MIPS64R6-NEXT: sc $9, 0($1)
-; MIPS64R6-NEXT: beqzc $9, .LBB8_1
+; MIPS64R6-NEXT: ll $8, 0($1)
+; MIPS64R6-NEXT: slt $11, $8, $5
+; MIPS64R6-NEXT: seleqz $9, $8, $11
+; MIPS64R6-NEXT: selnez $11, $5, $11
+; MIPS64R6-NEXT: or $9, $9, $11
+; MIPS64R6-NEXT: and $9, $9, $3
+; MIPS64R6-NEXT: and $10, $8, $6
+; MIPS64R6-NEXT: or $10, $10, $9
+; MIPS64R6-NEXT: sc $10, 0($1)
+; MIPS64R6-NEXT: beqzc $10, .LBB8_1
; MIPS64R6-NEXT: # %bb.2: # %entry
-; MIPS64R6-NEXT: and $6, $7, $3
-; MIPS64R6-NEXT: srlv $6, $6, $2
-; MIPS64R6-NEXT: seh $6, $6
+; MIPS64R6-NEXT: and $7, $8, $3
+; MIPS64R6-NEXT: srlv $7, $7, $2
+; MIPS64R6-NEXT: seh $7, $7
; MIPS64R6-NEXT: # %bb.3: # %entry
-; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64R6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64R6-NEXT: # %bb.4: # %entry
; MIPS64R6-NEXT: sync
; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
@@ -3157,28 +3157,28 @@ define i8 @test_max_8(i8* nocapture %ptr, i8 signext %val) {
; MIPS64EL-NEXT: sll $2, $2, 3
; MIPS64EL-NEXT: ori $3, $zero, 255
; MIPS64EL-NEXT: sllv $3, $3, $2
-; MIPS64EL-NEXT: nor $4, $zero, $3
+; MIPS64EL-NEXT: nor $6, $zero, $3
; MIPS64EL-NEXT: sllv $5, $5, $2
; MIPS64EL-NEXT: .LBB8_1: # %entry
; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64EL-NEXT: ll $7, 0($1)
-; MIPS64EL-NEXT: and $7, $7, $3
-; MIPS64EL-NEXT: and $5, $5, $3
-; MIPS64EL-NEXT: slt $10, $7, $5
-; MIPS64EL-NEXT: move $8, $7
-; MIPS64EL-NEXT: movn $8, $5, $10
+; MIPS64EL-NEXT: ll $8, 0($1)
; MIPS64EL-NEXT: and $8, $8, $3
-; MIPS64EL-NEXT: and $9, $7, $4
-; MIPS64EL-NEXT: or $9, $9, $8
-; MIPS64EL-NEXT: sc $9, 0($1)
-; MIPS64EL-NEXT: beqz $9, .LBB8_1
+; MIPS64EL-NEXT: and $5, $5, $3
+; MIPS64EL-NEXT: slt $11, $8, $5
+; MIPS64EL-NEXT: move $9, $8
+; MIPS64EL-NEXT: movn $9, $5, $11
+; MIPS64EL-NEXT: and $9, $9, $3
+; MIPS64EL-NEXT: and $10, $8, $6
+; MIPS64EL-NEXT: or $10, $10, $9
+; MIPS64EL-NEXT: sc $10, 0($1)
+; MIPS64EL-NEXT: beqz $10, .LBB8_1
; MIPS64EL-NEXT: nop
; MIPS64EL-NEXT: # %bb.2: # %entry
-; MIPS64EL-NEXT: and $6, $7, $3
-; MIPS64EL-NEXT: srlv $6, $6, $2
-; MIPS64EL-NEXT: seh $6, $6
+; MIPS64EL-NEXT: and $7, $8, $3
+; MIPS64EL-NEXT: srlv $7, $7, $2
+; MIPS64EL-NEXT: seh $7, $7
; MIPS64EL-NEXT: # %bb.3: # %entry
-; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64EL-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64EL-NEXT: # %bb.4: # %entry
; MIPS64EL-NEXT: sync
; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
@@ -3198,28 +3198,28 @@ define i8 @test_max_8(i8* nocapture %ptr, i8 signext %val) {
; MIPS64ELR6-NEXT: sll $2, $2, 3
; MIPS64ELR6-NEXT: ori $3, $zero, 255
; MIPS64ELR6-NEXT: sllv $3, $3, $2
-; MIPS64ELR6-NEXT: nor $4, $zero, $3
+; MIPS64ELR6-NEXT: nor $6, $zero, $3
; MIPS64ELR6-NEXT: sllv $5, $5, $2
; MIPS64ELR6-NEXT: .LBB8_1: # %entry
; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64ELR6-NEXT: ll $7, 0($1)
-; MIPS64ELR6-NEXT: and $7, $7, $3
-; MIPS64ELR6-NEXT: and $5, $5, $3
-; MIPS64ELR6-NEXT: slt $10, $7, $5
-; MIPS64ELR6-NEXT: seleqz $8, $7, $10
-; MIPS64ELR6-NEXT: selnez $10, $5, $10
-; MIPS64ELR6-NEXT: or $8, $8, $10
+; MIPS64ELR6-NEXT: ll $8, 0($1)
; MIPS64ELR6-NEXT: and $8, $8, $3
-; MIPS64ELR6-NEXT: and $9, $7, $4
-; MIPS64ELR6-NEXT: or $9, $9, $8
-; MIPS64ELR6-NEXT: sc $9, 0($1)
-; MIPS64ELR6-NEXT: beqzc $9, .LBB8_1
+; MIPS64ELR6-NEXT: and $5, $5, $3
+; MIPS64ELR6-NEXT: slt $11, $8, $5
+; MIPS64ELR6-NEXT: seleqz $9, $8, $11
+; MIPS64ELR6-NEXT: selnez $11, $5, $11
+; MIPS64ELR6-NEXT: or $9, $9, $11
+; MIPS64ELR6-NEXT: and $9, $9, $3
+; MIPS64ELR6-NEXT: and $10, $8, $6
+; MIPS64ELR6-NEXT: or $10, $10, $9
+; MIPS64ELR6-NEXT: sc $10, 0($1)
+; MIPS64ELR6-NEXT: beqzc $10, .LBB8_1
; MIPS64ELR6-NEXT: # %bb.2: # %entry
-; MIPS64ELR6-NEXT: and $6, $7, $3
-; MIPS64ELR6-NEXT: srlv $6, $6, $2
-; MIPS64ELR6-NEXT: seh $6, $6
+; MIPS64ELR6-NEXT: and $7, $8, $3
+; MIPS64ELR6-NEXT: srlv $7, $7, $2
+; MIPS64ELR6-NEXT: seh $7, $7
; MIPS64ELR6-NEXT: # %bb.3: # %entry
-; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64ELR6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64ELR6-NEXT: # %bb.4: # %entry
; MIPS64ELR6-NEXT: sync
; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
@@ -3560,26 +3560,26 @@ define i8 @test_min_8(i8* nocapture %ptr, i8 signext %val) {
; MIPS64-NEXT: sll $2, $2, 3
; MIPS64-NEXT: ori $3, $zero, 255
; MIPS64-NEXT: sllv $3, $3, $2
-; MIPS64-NEXT: nor $4, $zero, $3
+; MIPS64-NEXT: nor $6, $zero, $3
; MIPS64-NEXT: sllv $5, $5, $2
; MIPS64-NEXT: .LBB9_1: # %entry
; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64-NEXT: ll $7, 0($1)
-; MIPS64-NEXT: slt $10, $7, $5
-; MIPS64-NEXT: move $8, $7
-; MIPS64-NEXT: movz $8, $5, $10
-; MIPS64-NEXT: and $8, $8, $3
-; MIPS64-NEXT: and $9, $7, $4
-; MIPS64-NEXT: or $9, $9, $8
-; MIPS64-NEXT: sc $9, 0($1)
-; MIPS64-NEXT: beqz $9, .LBB9_1
+; MIPS64-NEXT: ll $8, 0($1)
+; MIPS64-NEXT: slt $11, $8, $5
+; MIPS64-NEXT: move $9, $8
+; MIPS64-NEXT: movz $9, $5, $11
+; MIPS64-NEXT: and $9, $9, $3
+; MIPS64-NEXT: and $10, $8, $6
+; MIPS64-NEXT: or $10, $10, $9
+; MIPS64-NEXT: sc $10, 0($1)
+; MIPS64-NEXT: beqz $10, .LBB9_1
; MIPS64-NEXT: nop
; MIPS64-NEXT: # %bb.2: # %entry
-; MIPS64-NEXT: and $6, $7, $3
-; MIPS64-NEXT: srlv $6, $6, $2
-; MIPS64-NEXT: seh $6, $6
+; MIPS64-NEXT: and $7, $8, $3
+; MIPS64-NEXT: srlv $7, $7, $2
+; MIPS64-NEXT: seh $7, $7
; MIPS64-NEXT: # %bb.3: # %entry
-; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64-NEXT: # %bb.4: # %entry
; MIPS64-NEXT: sync
; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
@@ -3600,26 +3600,26 @@ define i8 @test_min_8(i8* nocapture %ptr, i8 signext %val) {
; MIPS64R6-NEXT: sll $2, $2, 3
; MIPS64R6-NEXT: ori $3, $zero, 255
; MIPS64R6-NEXT: sllv $3, $3, $2
-; MIPS64R6-NEXT: nor $4, $zero, $3
+; MIPS64R6-NEXT: nor $6, $zero, $3
; MIPS64R6-NEXT: sllv $5, $5, $2
; MIPS64R6-NEXT: .LBB9_1: # %entry
; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6-NEXT: ll $7, 0($1)
-; MIPS64R6-NEXT: slt $10, $7, $5
-; MIPS64R6-NEXT: selnez $8, $7, $10
-; MIPS64R6-NEXT: seleqz $10, $5, $10
-; MIPS64R6-NEXT: or $8, $8, $10
-; MIPS64R6-NEXT: and $8, $8, $3
-; MIPS64R6-NEXT: and $9, $7, $4
-; MIPS64R6-NEXT: or $9, $9, $8
-; MIPS64R6-NEXT: sc $9, 0($1)
-; MIPS64R6-NEXT: beqzc $9, .LBB9_1
+; MIPS64R6-NEXT: ll $8, 0($1)
+; MIPS64R6-NEXT: slt $11, $8, $5
+; MIPS64R6-NEXT: selnez $9, $8, $11
+; MIPS64R6-NEXT: seleqz $11, $5, $11
+; MIPS64R6-NEXT: or $9, $9, $11
+; MIPS64R6-NEXT: and $9, $9, $3
+; MIPS64R6-NEXT: and $10, $8, $6
+; MIPS64R6-NEXT: or $10, $10, $9
+; MIPS64R6-NEXT: sc $10, 0($1)
+; MIPS64R6-NEXT: beqzc $10, .LBB9_1
; MIPS64R6-NEXT: # %bb.2: # %entry
-; MIPS64R6-NEXT: and $6, $7, $3
-; MIPS64R6-NEXT: srlv $6, $6, $2
-; MIPS64R6-NEXT: seh $6, $6
+; MIPS64R6-NEXT: and $7, $8, $3
+; MIPS64R6-NEXT: srlv $7, $7, $2
+; MIPS64R6-NEXT: seh $7, $7
; MIPS64R6-NEXT: # %bb.3: # %entry
-; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64R6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64R6-NEXT: # %bb.4: # %entry
; MIPS64R6-NEXT: sync
; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
@@ -3638,28 +3638,28 @@ define i8 @test_min_8(i8* nocapture %ptr, i8 signext %val) {
; MIPS64EL-NEXT: sll $2, $2, 3
; MIPS64EL-NEXT: ori $3, $zero, 255
; MIPS64EL-NEXT: sllv $3, $3, $2
-; MIPS64EL-NEXT: nor $4, $zero, $3
+; MIPS64EL-NEXT: nor $6, $zero, $3
; MIPS64EL-NEXT: sllv $5, $5, $2
; MIPS64EL-NEXT: .LBB9_1: # %entry
; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64EL-NEXT: ll $7, 0($1)
-; MIPS64EL-NEXT: and $7, $7, $3
-; MIPS64EL-NEXT: and $5, $5, $3
-; MIPS64EL-NEXT: slt $10, $7, $5
-; MIPS64EL-NEXT: move $8, $7
-; MIPS64EL-NEXT: movz $8, $5, $10
+; MIPS64EL-NEXT: ll $8, 0($1)
; MIPS64EL-NEXT: and $8, $8, $3
-; MIPS64EL-NEXT: and $9, $7, $4
-; MIPS64EL-NEXT: or $9, $9, $8
-; MIPS64EL-NEXT: sc $9, 0($1)
-; MIPS64EL-NEXT: beqz $9, .LBB9_1
+; MIPS64EL-NEXT: and $5, $5, $3
+; MIPS64EL-NEXT: slt $11, $8, $5
+; MIPS64EL-NEXT: move $9, $8
+; MIPS64EL-NEXT: movz $9, $5, $11
+; MIPS64EL-NEXT: and $9, $9, $3
+; MIPS64EL-NEXT: and $10, $8, $6
+; MIPS64EL-NEXT: or $10, $10, $9
+; MIPS64EL-NEXT: sc $10, 0($1)
+; MIPS64EL-NEXT: beqz $10, .LBB9_1
; MIPS64EL-NEXT: nop
; MIPS64EL-NEXT: # %bb.2: # %entry
-; MIPS64EL-NEXT: and $6, $7, $3
-; MIPS64EL-NEXT: srlv $6, $6, $2
-; MIPS64EL-NEXT: seh $6, $6
+; MIPS64EL-NEXT: and $7, $8, $3
+; MIPS64EL-NEXT: srlv $7, $7, $2
+; MIPS64EL-NEXT: seh $7, $7
; MIPS64EL-NEXT: # %bb.3: # %entry
-; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64EL-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64EL-NEXT: # %bb.4: # %entry
; MIPS64EL-NEXT: sync
; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
@@ -3679,28 +3679,28 @@ define i8 @test_min_8(i8* nocapture %ptr, i8 signext %val) {
; MIPS64ELR6-NEXT: sll $2, $2, 3
; MIPS64ELR6-NEXT: ori $3, $zero, 255
; MIPS64ELR6-NEXT: sllv $3, $3, $2
-; MIPS64ELR6-NEXT: nor $4, $zero, $3
+; MIPS64ELR6-NEXT: nor $6, $zero, $3
; MIPS64ELR6-NEXT: sllv $5, $5, $2
; MIPS64ELR6-NEXT: .LBB9_1: # %entry
; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64ELR6-NEXT: ll $7, 0($1)
-; MIPS64ELR6-NEXT: and $7, $7, $3
-; MIPS64ELR6-NEXT: and $5, $5, $3
-; MIPS64ELR6-NEXT: slt $10, $7, $5
-; MIPS64ELR6-NEXT: selnez $8, $7, $10
-; MIPS64ELR6-NEXT: seleqz $10, $5, $10
-; MIPS64ELR6-NEXT: or $8, $8, $10
+; MIPS64ELR6-NEXT: ll $8, 0($1)
; MIPS64ELR6-NEXT: and $8, $8, $3
-; MIPS64ELR6-NEXT: and $9, $7, $4
-; MIPS64ELR6-NEXT: or $9, $9, $8
-; MIPS64ELR6-NEXT: sc $9, 0($1)
-; MIPS64ELR6-NEXT: beqzc $9, .LBB9_1
+; MIPS64ELR6-NEXT: and $5, $5, $3
+; MIPS64ELR6-NEXT: slt $11, $8, $5
+; MIPS64ELR6-NEXT: selnez $9, $8, $11
+; MIPS64ELR6-NEXT: seleqz $11, $5, $11
+; MIPS64ELR6-NEXT: or $9, $9, $11
+; MIPS64ELR6-NEXT: and $9, $9, $3
+; MIPS64ELR6-NEXT: and $10, $8, $6
+; MIPS64ELR6-NEXT: or $10, $10, $9
+; MIPS64ELR6-NEXT: sc $10, 0($1)
+; MIPS64ELR6-NEXT: beqzc $10, .LBB9_1
; MIPS64ELR6-NEXT: # %bb.2: # %entry
-; MIPS64ELR6-NEXT: and $6, $7, $3
-; MIPS64ELR6-NEXT: srlv $6, $6, $2
-; MIPS64ELR6-NEXT: seh $6, $6
+; MIPS64ELR6-NEXT: and $7, $8, $3
+; MIPS64ELR6-NEXT: srlv $7, $7, $2
+; MIPS64ELR6-NEXT: seh $7, $7
; MIPS64ELR6-NEXT: # %bb.3: # %entry
-; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64ELR6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64ELR6-NEXT: # %bb.4: # %entry
; MIPS64ELR6-NEXT: sync
; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
@@ -4041,26 +4041,26 @@ define i8 @test_umax_8(i8* nocapture %ptr, i8 signext %val) {
; MIPS64-NEXT: sll $2, $2, 3
; MIPS64-NEXT: ori $3, $zero, 255
; MIPS64-NEXT: sllv $3, $3, $2
-; MIPS64-NEXT: nor $4, $zero, $3
+; MIPS64-NEXT: nor $6, $zero, $3
; MIPS64-NEXT: sllv $5, $5, $2
; MIPS64-NEXT: .LBB10_1: # %entry
; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64-NEXT: ll $7, 0($1)
-; MIPS64-NEXT: sltu $10, $7, $5
-; MIPS64-NEXT: move $8, $7
-; MIPS64-NEXT: movn $8, $5, $10
-; MIPS64-NEXT: and $8, $8, $3
-; MIPS64-NEXT: and $9, $7, $4
-; MIPS64-NEXT: or $9, $9, $8
-; MIPS64-NEXT: sc $9, 0($1)
-; MIPS64-NEXT: beqz $9, .LBB10_1
+; MIPS64-NEXT: ll $8, 0($1)
+; MIPS64-NEXT: sltu $11, $8, $5
+; MIPS64-NEXT: move $9, $8
+; MIPS64-NEXT: movn $9, $5, $11
+; MIPS64-NEXT: and $9, $9, $3
+; MIPS64-NEXT: and $10, $8, $6
+; MIPS64-NEXT: or $10, $10, $9
+; MIPS64-NEXT: sc $10, 0($1)
+; MIPS64-NEXT: beqz $10, .LBB10_1
; MIPS64-NEXT: nop
; MIPS64-NEXT: # %bb.2: # %entry
-; MIPS64-NEXT: and $6, $7, $3
-; MIPS64-NEXT: srlv $6, $6, $2
-; MIPS64-NEXT: seh $6, $6
+; MIPS64-NEXT: and $7, $8, $3
+; MIPS64-NEXT: srlv $7, $7, $2
+; MIPS64-NEXT: seh $7, $7
; MIPS64-NEXT: # %bb.3: # %entry
-; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64-NEXT: # %bb.4: # %entry
; MIPS64-NEXT: sync
; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
@@ -4081,26 +4081,26 @@ define i8 @test_umax_8(i8* nocapture %ptr, i8 signext %val) {
; MIPS64R6-NEXT: sll $2, $2, 3
; MIPS64R6-NEXT: ori $3, $zero, 255
; MIPS64R6-NEXT: sllv $3, $3, $2
-; MIPS64R6-NEXT: nor $4, $zero, $3
+; MIPS64R6-NEXT: nor $6, $zero, $3
; MIPS64R6-NEXT: sllv $5, $5, $2
; MIPS64R6-NEXT: .LBB10_1: # %entry
; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6-NEXT: ll $7, 0($1)
-; MIPS64R6-NEXT: sltu $10, $7, $5
-; MIPS64R6-NEXT: seleqz $8, $7, $10
-; MIPS64R6-NEXT: selnez $10, $5, $10
-; MIPS64R6-NEXT: or $8, $8, $10
-; MIPS64R6-NEXT: and $8, $8, $3
-; MIPS64R6-NEXT: and $9, $7, $4
-; MIPS64R6-NEXT: or $9, $9, $8
-; MIPS64R6-NEXT: sc $9, 0($1)
-; MIPS64R6-NEXT: beqzc $9, .LBB10_1
+; MIPS64R6-NEXT: ll $8, 0($1)
+; MIPS64R6-NEXT: sltu $11, $8, $5
+; MIPS64R6-NEXT: seleqz $9, $8, $11
+; MIPS64R6-NEXT: selnez $11, $5, $11
+; MIPS64R6-NEXT: or $9, $9, $11
+; MIPS64R6-NEXT: and $9, $9, $3
+; MIPS64R6-NEXT: and $10, $8, $6
+; MIPS64R6-NEXT: or $10, $10, $9
+; MIPS64R6-NEXT: sc $10, 0($1)
+; MIPS64R6-NEXT: beqzc $10, .LBB10_1
; MIPS64R6-NEXT: # %bb.2: # %entry
-; MIPS64R6-NEXT: and $6, $7, $3
-; MIPS64R6-NEXT: srlv $6, $6, $2
-; MIPS64R6-NEXT: seh $6, $6
+; MIPS64R6-NEXT: and $7, $8, $3
+; MIPS64R6-NEXT: srlv $7, $7, $2
+; MIPS64R6-NEXT: seh $7, $7
; MIPS64R6-NEXT: # %bb.3: # %entry
-; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64R6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64R6-NEXT: # %bb.4: # %entry
; MIPS64R6-NEXT: sync
; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
@@ -4119,28 +4119,28 @@ define i8 @test_umax_8(i8* nocapture %ptr, i8 signext %val) {
; MIPS64EL-NEXT: sll $2, $2, 3
; MIPS64EL-NEXT: ori $3, $zero, 255
; MIPS64EL-NEXT: sllv $3, $3, $2
-; MIPS64EL-NEXT: nor $4, $zero, $3
+; MIPS64EL-NEXT: nor $6, $zero, $3
; MIPS64EL-NEXT: sllv $5, $5, $2
; MIPS64EL-NEXT: .LBB10_1: # %entry
; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64EL-NEXT: ll $7, 0($1)
-; MIPS64EL-NEXT: and $7, $7, $3
-; MIPS64EL-NEXT: and $5, $5, $3
-; MIPS64EL-NEXT: sltu $10, $7, $5
-; MIPS64EL-NEXT: move $8, $7
-; MIPS64EL-NEXT: movn $8, $5, $10
+; MIPS64EL-NEXT: ll $8, 0($1)
; MIPS64EL-NEXT: and $8, $8, $3
-; MIPS64EL-NEXT: and $9, $7, $4
-; MIPS64EL-NEXT: or $9, $9, $8
-; MIPS64EL-NEXT: sc $9, 0($1)
-; MIPS64EL-NEXT: beqz $9, .LBB10_1
+; MIPS64EL-NEXT: and $5, $5, $3
+; MIPS64EL-NEXT: sltu $11, $8, $5
+; MIPS64EL-NEXT: move $9, $8
+; MIPS64EL-NEXT: movn $9, $5, $11
+; MIPS64EL-NEXT: and $9, $9, $3
+; MIPS64EL-NEXT: and $10, $8, $6
+; MIPS64EL-NEXT: or $10, $10, $9
+; MIPS64EL-NEXT: sc $10, 0($1)
+; MIPS64EL-NEXT: beqz $10, .LBB10_1
; MIPS64EL-NEXT: nop
; MIPS64EL-NEXT: # %bb.2: # %entry
-; MIPS64EL-NEXT: and $6, $7, $3
-; MIPS64EL-NEXT: srlv $6, $6, $2
-; MIPS64EL-NEXT: seh $6, $6
+; MIPS64EL-NEXT: and $7, $8, $3
+; MIPS64EL-NEXT: srlv $7, $7, $2
+; MIPS64EL-NEXT: seh $7, $7
; MIPS64EL-NEXT: # %bb.3: # %entry
-; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64EL-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64EL-NEXT: # %bb.4: # %entry
; MIPS64EL-NEXT: sync
; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
@@ -4160,28 +4160,28 @@ define i8 @test_umax_8(i8* nocapture %ptr, i8 signext %val) {
; MIPS64ELR6-NEXT: sll $2, $2, 3
; MIPS64ELR6-NEXT: ori $3, $zero, 255
; MIPS64ELR6-NEXT: sllv $3, $3, $2
-; MIPS64ELR6-NEXT: nor $4, $zero, $3
+; MIPS64ELR6-NEXT: nor $6, $zero, $3
; MIPS64ELR6-NEXT: sllv $5, $5, $2
; MIPS64ELR6-NEXT: .LBB10_1: # %entry
; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64ELR6-NEXT: ll $7, 0($1)
-; MIPS64ELR6-NEXT: and $7, $7, $3
-; MIPS64ELR6-NEXT: and $5, $5, $3
-; MIPS64ELR6-NEXT: sltu $10, $7, $5
-; MIPS64ELR6-NEXT: seleqz $8, $7, $10
-; MIPS64ELR6-NEXT: selnez $10, $5, $10
-; MIPS64ELR6-NEXT: or $8, $8, $10
+; MIPS64ELR6-NEXT: ll $8, 0($1)
; MIPS64ELR6-NEXT: and $8, $8, $3
-; MIPS64ELR6-NEXT: and $9, $7, $4
-; MIPS64ELR6-NEXT: or $9, $9, $8
-; MIPS64ELR6-NEXT: sc $9, 0($1)
-; MIPS64ELR6-NEXT: beqzc $9, .LBB10_1
+; MIPS64ELR6-NEXT: and $5, $5, $3
+; MIPS64ELR6-NEXT: sltu $11, $8, $5
+; MIPS64ELR6-NEXT: seleqz $9, $8, $11
+; MIPS64ELR6-NEXT: selnez $11, $5, $11
+; MIPS64ELR6-NEXT: or $9, $9, $11
+; MIPS64ELR6-NEXT: and $9, $9, $3
+; MIPS64ELR6-NEXT: and $10, $8, $6
+; MIPS64ELR6-NEXT: or $10, $10, $9
+; MIPS64ELR6-NEXT: sc $10, 0($1)
+; MIPS64ELR6-NEXT: beqzc $10, .LBB10_1
; MIPS64ELR6-NEXT: # %bb.2: # %entry
-; MIPS64ELR6-NEXT: and $6, $7, $3
-; MIPS64ELR6-NEXT: srlv $6, $6, $2
-; MIPS64ELR6-NEXT: seh $6, $6
+; MIPS64ELR6-NEXT: and $7, $8, $3
+; MIPS64ELR6-NEXT: srlv $7, $7, $2
+; MIPS64ELR6-NEXT: seh $7, $7
; MIPS64ELR6-NEXT: # %bb.3: # %entry
-; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64ELR6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64ELR6-NEXT: # %bb.4: # %entry
; MIPS64ELR6-NEXT: sync
; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
@@ -4522,26 +4522,26 @@ define i8 @test_umin_8(i8* nocapture %ptr, i8 signext %val) {
; MIPS64-NEXT: sll $2, $2, 3
; MIPS64-NEXT: ori $3, $zero, 255
; MIPS64-NEXT: sllv $3, $3, $2
-; MIPS64-NEXT: nor $4, $zero, $3
+; MIPS64-NEXT: nor $6, $zero, $3
; MIPS64-NEXT: sllv $5, $5, $2
; MIPS64-NEXT: .LBB11_1: # %entry
; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64-NEXT: ll $7, 0($1)
-; MIPS64-NEXT: sltu $10, $7, $5
-; MIPS64-NEXT: move $8, $7
-; MIPS64-NEXT: movz $8, $5, $10
-; MIPS64-NEXT: and $8, $8, $3
-; MIPS64-NEXT: and $9, $7, $4
-; MIPS64-NEXT: or $9, $9, $8
-; MIPS64-NEXT: sc $9, 0($1)
-; MIPS64-NEXT: beqz $9, .LBB11_1
+; MIPS64-NEXT: ll $8, 0($1)
+; MIPS64-NEXT: sltu $11, $8, $5
+; MIPS64-NEXT: move $9, $8
+; MIPS64-NEXT: movz $9, $5, $11
+; MIPS64-NEXT: and $9, $9, $3
+; MIPS64-NEXT: and $10, $8, $6
+; MIPS64-NEXT: or $10, $10, $9
+; MIPS64-NEXT: sc $10, 0($1)
+; MIPS64-NEXT: beqz $10, .LBB11_1
; MIPS64-NEXT: nop
; MIPS64-NEXT: # %bb.2: # %entry
-; MIPS64-NEXT: and $6, $7, $3
-; MIPS64-NEXT: srlv $6, $6, $2
-; MIPS64-NEXT: seh $6, $6
+; MIPS64-NEXT: and $7, $8, $3
+; MIPS64-NEXT: srlv $7, $7, $2
+; MIPS64-NEXT: seh $7, $7
; MIPS64-NEXT: # %bb.3: # %entry
-; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64-NEXT: # %bb.4: # %entry
; MIPS64-NEXT: sync
; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
@@ -4562,26 +4562,26 @@ define i8 @test_umin_8(i8* nocapture %ptr, i8 signext %val) {
; MIPS64R6-NEXT: sll $2, $2, 3
; MIPS64R6-NEXT: ori $3, $zero, 255
; MIPS64R6-NEXT: sllv $3, $3, $2
-; MIPS64R6-NEXT: nor $4, $zero, $3
+; MIPS64R6-NEXT: nor $6, $zero, $3
; MIPS64R6-NEXT: sllv $5, $5, $2
; MIPS64R6-NEXT: .LBB11_1: # %entry
; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6-NEXT: ll $7, 0($1)
-; MIPS64R6-NEXT: sltu $10, $7, $5
-; MIPS64R6-NEXT: selnez $8, $7, $10
-; MIPS64R6-NEXT: seleqz $10, $5, $10
-; MIPS64R6-NEXT: or $8, $8, $10
-; MIPS64R6-NEXT: and $8, $8, $3
-; MIPS64R6-NEXT: and $9, $7, $4
-; MIPS64R6-NEXT: or $9, $9, $8
-; MIPS64R6-NEXT: sc $9, 0($1)
-; MIPS64R6-NEXT: beqzc $9, .LBB11_1
+; MIPS64R6-NEXT: ll $8, 0($1)
+; MIPS64R6-NEXT: sltu $11, $8, $5
+; MIPS64R6-NEXT: selnez $9, $8, $11
+; MIPS64R6-NEXT: seleqz $11, $5, $11
+; MIPS64R6-NEXT: or $9, $9, $11
+; MIPS64R6-NEXT: and $9, $9, $3
+; MIPS64R6-NEXT: and $10, $8, $6
+; MIPS64R6-NEXT: or $10, $10, $9
+; MIPS64R6-NEXT: sc $10, 0($1)
+; MIPS64R6-NEXT: beqzc $10, .LBB11_1
; MIPS64R6-NEXT: # %bb.2: # %entry
-; MIPS64R6-NEXT: and $6, $7, $3
-; MIPS64R6-NEXT: srlv $6, $6, $2
-; MIPS64R6-NEXT: seh $6, $6
+; MIPS64R6-NEXT: and $7, $8, $3
+; MIPS64R6-NEXT: srlv $7, $7, $2
+; MIPS64R6-NEXT: seh $7, $7
; MIPS64R6-NEXT: # %bb.3: # %entry
-; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64R6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64R6-NEXT: # %bb.4: # %entry
; MIPS64R6-NEXT: sync
; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
@@ -4600,28 +4600,28 @@ define i8 @test_umin_8(i8* nocapture %ptr, i8 signext %val) {
; MIPS64EL-NEXT: sll $2, $2, 3
; MIPS64EL-NEXT: ori $3, $zero, 255
; MIPS64EL-NEXT: sllv $3, $3, $2
-; MIPS64EL-NEXT: nor $4, $zero, $3
+; MIPS64EL-NEXT: nor $6, $zero, $3
; MIPS64EL-NEXT: sllv $5, $5, $2
; MIPS64EL-NEXT: .LBB11_1: # %entry
; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64EL-NEXT: ll $7, 0($1)
-; MIPS64EL-NEXT: and $7, $7, $3
-; MIPS64EL-NEXT: and $5, $5, $3
-; MIPS64EL-NEXT: sltu $10, $7, $5
-; MIPS64EL-NEXT: move $8, $7
-; MIPS64EL-NEXT: movz $8, $5, $10
+; MIPS64EL-NEXT: ll $8, 0($1)
; MIPS64EL-NEXT: and $8, $8, $3
-; MIPS64EL-NEXT: and $9, $7, $4
-; MIPS64EL-NEXT: or $9, $9, $8
-; MIPS64EL-NEXT: sc $9, 0($1)
-; MIPS64EL-NEXT: beqz $9, .LBB11_1
+; MIPS64EL-NEXT: and $5, $5, $3
+; MIPS64EL-NEXT: sltu $11, $8, $5
+; MIPS64EL-NEXT: move $9, $8
+; MIPS64EL-NEXT: movz $9, $5, $11
+; MIPS64EL-NEXT: and $9, $9, $3
+; MIPS64EL-NEXT: and $10, $8, $6
+; MIPS64EL-NEXT: or $10, $10, $9
+; MIPS64EL-NEXT: sc $10, 0($1)
+; MIPS64EL-NEXT: beqz $10, .LBB11_1
; MIPS64EL-NEXT: nop
; MIPS64EL-NEXT: # %bb.2: # %entry
-; MIPS64EL-NEXT: and $6, $7, $3
-; MIPS64EL-NEXT: srlv $6, $6, $2
-; MIPS64EL-NEXT: seh $6, $6
+; MIPS64EL-NEXT: and $7, $8, $3
+; MIPS64EL-NEXT: srlv $7, $7, $2
+; MIPS64EL-NEXT: seh $7, $7
; MIPS64EL-NEXT: # %bb.3: # %entry
-; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64EL-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64EL-NEXT: # %bb.4: # %entry
; MIPS64EL-NEXT: sync
; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
@@ -4641,28 +4641,28 @@ define i8 @test_umin_8(i8* nocapture %ptr, i8 signext %val) {
; MIPS64ELR6-NEXT: sll $2, $2, 3
; MIPS64ELR6-NEXT: ori $3, $zero, 255
; MIPS64ELR6-NEXT: sllv $3, $3, $2
-; MIPS64ELR6-NEXT: nor $4, $zero, $3
+; MIPS64ELR6-NEXT: nor $6, $zero, $3
; MIPS64ELR6-NEXT: sllv $5, $5, $2
; MIPS64ELR6-NEXT: .LBB11_1: # %entry
; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64ELR6-NEXT: ll $7, 0($1)
-; MIPS64ELR6-NEXT: and $7, $7, $3
-; MIPS64ELR6-NEXT: and $5, $5, $3
-; MIPS64ELR6-NEXT: sltu $10, $7, $5
-; MIPS64ELR6-NEXT: selnez $8, $7, $10
-; MIPS64ELR6-NEXT: seleqz $10, $5, $10
-; MIPS64ELR6-NEXT: or $8, $8, $10
+; MIPS64ELR6-NEXT: ll $8, 0($1)
; MIPS64ELR6-NEXT: and $8, $8, $3
-; MIPS64ELR6-NEXT: and $9, $7, $4
-; MIPS64ELR6-NEXT: or $9, $9, $8
-; MIPS64ELR6-NEXT: sc $9, 0($1)
-; MIPS64ELR6-NEXT: beqzc $9, .LBB11_1
+; MIPS64ELR6-NEXT: and $5, $5, $3
+; MIPS64ELR6-NEXT: sltu $11, $8, $5
+; MIPS64ELR6-NEXT: selnez $9, $8, $11
+; MIPS64ELR6-NEXT: seleqz $11, $5, $11
+; MIPS64ELR6-NEXT: or $9, $9, $11
+; MIPS64ELR6-NEXT: and $9, $9, $3
+; MIPS64ELR6-NEXT: and $10, $8, $6
+; MIPS64ELR6-NEXT: or $10, $10, $9
+; MIPS64ELR6-NEXT: sc $10, 0($1)
+; MIPS64ELR6-NEXT: beqzc $10, .LBB11_1
; MIPS64ELR6-NEXT: # %bb.2: # %entry
-; MIPS64ELR6-NEXT: and $6, $7, $3
-; MIPS64ELR6-NEXT: srlv $6, $6, $2
-; MIPS64ELR6-NEXT: seh $6, $6
+; MIPS64ELR6-NEXT: and $7, $8, $3
+; MIPS64ELR6-NEXT: srlv $7, $7, $2
+; MIPS64ELR6-NEXT: seh $7, $7
; MIPS64ELR6-NEXT: # %bb.3: # %entry
-; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64ELR6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64ELR6-NEXT: # %bb.4: # %entry
; MIPS64ELR6-NEXT: sync
; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/Mips/atomic.ll b/llvm/test/CodeGen/Mips/atomic.ll
index 59ff83e4969..3846fda47b1 100644
--- a/llvm/test/CodeGen/Mips/atomic.ll
+++ b/llvm/test/CodeGen/Mips/atomic.ll
@@ -2559,28 +2559,28 @@ define signext i8 @AtomicLoadAdd8(i8 signext %incr) nounwind {
; MIPS64R6O0-NEXT: ld $1, %got_disp(y)($1)
; MIPS64R6O0-NEXT: daddiu $2, $zero, -4
; MIPS64R6O0-NEXT: and $2, $1, $2
-; MIPS64R6O0-NEXT: andi $1, $1, 3
-; MIPS64R6O0-NEXT: xori $1, $1, 3
-; MIPS64R6O0-NEXT: sll $1, $1, 3
-; MIPS64R6O0-NEXT: ori $3, $zero, 255
-; MIPS64R6O0-NEXT: sllv $3, $3, $1
-; MIPS64R6O0-NEXT: nor $5, $zero, $3
-; MIPS64R6O0-NEXT: sllv $4, $4, $1
+; MIPS64R6O0-NEXT: andi $3, $1, 3
+; MIPS64R6O0-NEXT: xori $3, $3, 3
+; MIPS64R6O0-NEXT: sll $3, $3, 3
+; MIPS64R6O0-NEXT: ori $5, $zero, 255
+; MIPS64R6O0-NEXT: sllv $5, $5, $3
+; MIPS64R6O0-NEXT: nor $6, $zero, $5
+; MIPS64R6O0-NEXT: sllv $4, $4, $3
; MIPS64R6O0-NEXT: .LBB8_1: # %entry
; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6O0-NEXT: ll $7, 0($2)
-; MIPS64R6O0-NEXT: addu $8, $7, $4
-; MIPS64R6O0-NEXT: and $8, $8, $3
-; MIPS64R6O0-NEXT: and $9, $7, $5
-; MIPS64R6O0-NEXT: or $9, $9, $8
-; MIPS64R6O0-NEXT: sc $9, 0($2)
-; MIPS64R6O0-NEXT: beqzc $9, .LBB8_1
+; MIPS64R6O0-NEXT: ll $8, 0($2)
+; MIPS64R6O0-NEXT: addu $9, $8, $4
+; MIPS64R6O0-NEXT: and $9, $9, $5
+; MIPS64R6O0-NEXT: and $10, $8, $6
+; MIPS64R6O0-NEXT: or $10, $10, $9
+; MIPS64R6O0-NEXT: sc $10, 0($2)
+; MIPS64R6O0-NEXT: beqzc $10, .LBB8_1
; MIPS64R6O0-NEXT: # %bb.2: # %entry
-; MIPS64R6O0-NEXT: and $6, $7, $3
-; MIPS64R6O0-NEXT: srlv $6, $6, $1
-; MIPS64R6O0-NEXT: seb $6, $6
+; MIPS64R6O0-NEXT: and $7, $8, $5
+; MIPS64R6O0-NEXT: srlv $7, $7, $3
+; MIPS64R6O0-NEXT: seb $7, $7
; MIPS64R6O0-NEXT: # %bb.3: # %entry
-; MIPS64R6O0-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64R6O0-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64R6O0-NEXT: # %bb.4: # %entry
; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload
; MIPS64R6O0-NEXT: seb $2, $1
@@ -3075,28 +3075,28 @@ define signext i8 @AtomicLoadSub8(i8 signext %incr) nounwind {
; MIPS64R6O0-NEXT: ld $1, %got_disp(y)($1)
; MIPS64R6O0-NEXT: daddiu $2, $zero, -4
; MIPS64R6O0-NEXT: and $2, $1, $2
-; MIPS64R6O0-NEXT: andi $1, $1, 3
-; MIPS64R6O0-NEXT: xori $1, $1, 3
-; MIPS64R6O0-NEXT: sll $1, $1, 3
-; MIPS64R6O0-NEXT: ori $3, $zero, 255
-; MIPS64R6O0-NEXT: sllv $3, $3, $1
-; MIPS64R6O0-NEXT: nor $5, $zero, $3
-; MIPS64R6O0-NEXT: sllv $4, $4, $1
+; MIPS64R6O0-NEXT: andi $3, $1, 3
+; MIPS64R6O0-NEXT: xori $3, $3, 3
+; MIPS64R6O0-NEXT: sll $3, $3, 3
+; MIPS64R6O0-NEXT: ori $5, $zero, 255
+; MIPS64R6O0-NEXT: sllv $5, $5, $3
+; MIPS64R6O0-NEXT: nor $6, $zero, $5
+; MIPS64R6O0-NEXT: sllv $4, $4, $3
; MIPS64R6O0-NEXT: .LBB9_1: # %entry
; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6O0-NEXT: ll $7, 0($2)
-; MIPS64R6O0-NEXT: subu $8, $7, $4
-; MIPS64R6O0-NEXT: and $8, $8, $3
-; MIPS64R6O0-NEXT: and $9, $7, $5
-; MIPS64R6O0-NEXT: or $9, $9, $8
-; MIPS64R6O0-NEXT: sc $9, 0($2)
-; MIPS64R6O0-NEXT: beqzc $9, .LBB9_1
+; MIPS64R6O0-NEXT: ll $8, 0($2)
+; MIPS64R6O0-NEXT: subu $9, $8, $4
+; MIPS64R6O0-NEXT: and $9, $9, $5
+; MIPS64R6O0-NEXT: and $10, $8, $6
+; MIPS64R6O0-NEXT: or $10, $10, $9
+; MIPS64R6O0-NEXT: sc $10, 0($2)
+; MIPS64R6O0-NEXT: beqzc $10, .LBB9_1
; MIPS64R6O0-NEXT: # %bb.2: # %entry
-; MIPS64R6O0-NEXT: and $6, $7, $3
-; MIPS64R6O0-NEXT: srlv $6, $6, $1
-; MIPS64R6O0-NEXT: seb $6, $6
+; MIPS64R6O0-NEXT: and $7, $8, $5
+; MIPS64R6O0-NEXT: srlv $7, $7, $3
+; MIPS64R6O0-NEXT: seb $7, $7
; MIPS64R6O0-NEXT: # %bb.3: # %entry
-; MIPS64R6O0-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64R6O0-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64R6O0-NEXT: # %bb.4: # %entry
; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload
; MIPS64R6O0-NEXT: seb $2, $1
@@ -3601,29 +3601,29 @@ define signext i8 @AtomicLoadNand8(i8 signext %incr) nounwind {
; MIPS64R6O0-NEXT: ld $1, %got_disp(y)($1)
; MIPS64R6O0-NEXT: daddiu $2, $zero, -4
; MIPS64R6O0-NEXT: and $2, $1, $2
-; MIPS64R6O0-NEXT: andi $1, $1, 3
-; MIPS64R6O0-NEXT: xori $1, $1, 3
-; MIPS64R6O0-NEXT: sll $1, $1, 3
-; MIPS64R6O0-NEXT: ori $3, $zero, 255
-; MIPS64R6O0-NEXT: sllv $3, $3, $1
-; MIPS64R6O0-NEXT: nor $5, $zero, $3
-; MIPS64R6O0-NEXT: sllv $4, $4, $1
+; MIPS64R6O0-NEXT: andi $3, $1, 3
+; MIPS64R6O0-NEXT: xori $3, $3, 3
+; MIPS64R6O0-NEXT: sll $3, $3, 3
+; MIPS64R6O0-NEXT: ori $5, $zero, 255
+; MIPS64R6O0-NEXT: sllv $5, $5, $3
+; MIPS64R6O0-NEXT: nor $6, $zero, $5
+; MIPS64R6O0-NEXT: sllv $4, $4, $3
; MIPS64R6O0-NEXT: .LBB10_1: # %entry
; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6O0-NEXT: ll $7, 0($2)
-; MIPS64R6O0-NEXT: and $8, $7, $4
-; MIPS64R6O0-NEXT: nor $8, $zero, $8
-; MIPS64R6O0-NEXT: and $8, $8, $3
-; MIPS64R6O0-NEXT: and $9, $7, $5
-; MIPS64R6O0-NEXT: or $9, $9, $8
-; MIPS64R6O0-NEXT: sc $9, 0($2)
-; MIPS64R6O0-NEXT: beqzc $9, .LBB10_1
+; MIPS64R6O0-NEXT: ll $8, 0($2)
+; MIPS64R6O0-NEXT: and $9, $8, $4
+; MIPS64R6O0-NEXT: nor $9, $zero, $9
+; MIPS64R6O0-NEXT: and $9, $9, $5
+; MIPS64R6O0-NEXT: and $10, $8, $6
+; MIPS64R6O0-NEXT: or $10, $10, $9
+; MIPS64R6O0-NEXT: sc $10, 0($2)
+; MIPS64R6O0-NEXT: beqzc $10, .LBB10_1
; MIPS64R6O0-NEXT: # %bb.2: # %entry
-; MIPS64R6O0-NEXT: and $6, $7, $3
-; MIPS64R6O0-NEXT: srlv $6, $6, $1
-; MIPS64R6O0-NEXT: seb $6, $6
+; MIPS64R6O0-NEXT: and $7, $8, $5
+; MIPS64R6O0-NEXT: srlv $7, $7, $3
+; MIPS64R6O0-NEXT: seb $7, $7
; MIPS64R6O0-NEXT: # %bb.3: # %entry
-; MIPS64R6O0-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64R6O0-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64R6O0-NEXT: # %bb.4: # %entry
; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload
; MIPS64R6O0-NEXT: seb $2, $1
@@ -4115,27 +4115,27 @@ define signext i8 @AtomicSwap8(i8 signext %newval) nounwind {
; MIPS64R6O0-NEXT: ld $1, %got_disp(y)($1)
; MIPS64R6O0-NEXT: daddiu $2, $zero, -4
; MIPS64R6O0-NEXT: and $2, $1, $2
-; MIPS64R6O0-NEXT: andi $1, $1, 3
-; MIPS64R6O0-NEXT: xori $1, $1, 3
-; MIPS64R6O0-NEXT: sll $1, $1, 3
-; MIPS64R6O0-NEXT: ori $3, $zero, 255
-; MIPS64R6O0-NEXT: sllv $3, $3, $1
-; MIPS64R6O0-NEXT: nor $5, $zero, $3
-; MIPS64R6O0-NEXT: sllv $4, $4, $1
+; MIPS64R6O0-NEXT: andi $3, $1, 3
+; MIPS64R6O0-NEXT: xori $3, $3, 3
+; MIPS64R6O0-NEXT: sll $3, $3, 3
+; MIPS64R6O0-NEXT: ori $5, $zero, 255
+; MIPS64R6O0-NEXT: sllv $5, $5, $3
+; MIPS64R6O0-NEXT: nor $6, $zero, $5
+; MIPS64R6O0-NEXT: sllv $4, $4, $3
; MIPS64R6O0-NEXT: .LBB11_1: # %entry
; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6O0-NEXT: ll $7, 0($2)
-; MIPS64R6O0-NEXT: and $8, $4, $3
-; MIPS64R6O0-NEXT: and $9, $7, $5
-; MIPS64R6O0-NEXT: or $9, $9, $8
-; MIPS64R6O0-NEXT: sc $9, 0($2)
-; MIPS64R6O0-NEXT: beqzc $9, .LBB11_1
+; MIPS64R6O0-NEXT: ll $8, 0($2)
+; MIPS64R6O0-NEXT: and $9, $4, $5
+; MIPS64R6O0-NEXT: and $10, $8, $6
+; MIPS64R6O0-NEXT: or $10, $10, $9
+; MIPS64R6O0-NEXT: sc $10, 0($2)
+; MIPS64R6O0-NEXT: beqzc $10, .LBB11_1
; MIPS64R6O0-NEXT: # %bb.2: # %entry
-; MIPS64R6O0-NEXT: and $6, $7, $3
-; MIPS64R6O0-NEXT: srlv $6, $6, $1
-; MIPS64R6O0-NEXT: seb $6, $6
+; MIPS64R6O0-NEXT: and $7, $8, $5
+; MIPS64R6O0-NEXT: srlv $7, $7, $3
+; MIPS64R6O0-NEXT: seb $7, $7
; MIPS64R6O0-NEXT: # %bb.3: # %entry
-; MIPS64R6O0-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64R6O0-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64R6O0-NEXT: # %bb.4: # %entry
; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload
; MIPS64R6O0-NEXT: seb $2, $1
@@ -4666,32 +4666,32 @@ define signext i8 @AtomicCmpSwap8(i8 signext %oldval, i8 signext %newval) nounwi
; MIPS64R6O0-NEXT: ld $1, %got_disp(y)($1)
; MIPS64R6O0-NEXT: daddiu $2, $zero, -4
; MIPS64R6O0-NEXT: and $2, $1, $2
-; MIPS64R6O0-NEXT: andi $1, $1, 3
-; MIPS64R6O0-NEXT: xori $1, $1, 3
-; MIPS64R6O0-NEXT: sll $1, $1, 3
-; MIPS64R6O0-NEXT: ori $3, $zero, 255
-; MIPS64R6O0-NEXT: sllv $3, $3, $1
-; MIPS64R6O0-NEXT: nor $6, $zero, $3
+; MIPS64R6O0-NEXT: andi $3, $1, 3
+; MIPS64R6O0-NEXT: xori $3, $3, 3
+; MIPS64R6O0-NEXT: sll $3, $3, 3
+; MIPS64R6O0-NEXT: ori $6, $zero, 255
+; MIPS64R6O0-NEXT: sllv $6, $6, $3
+; MIPS64R6O0-NEXT: nor $7, $zero, $6
; MIPS64R6O0-NEXT: andi $4, $4, 255
-; MIPS64R6O0-NEXT: sllv $4, $4, $1
+; MIPS64R6O0-NEXT: sllv $4, $4, $3
; MIPS64R6O0-NEXT: andi $5, $5, 255
-; MIPS64R6O0-NEXT: sllv $5, $5, $1
+; MIPS64R6O0-NEXT: sllv $5, $5, $3
; MIPS64R6O0-NEXT: .LBB12_1: # %entry
; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6O0-NEXT: ll $8, 0($2)
-; MIPS64R6O0-NEXT: and $9, $8, $3
-; MIPS64R6O0-NEXT: bnec $9, $4, .LBB12_3
+; MIPS64R6O0-NEXT: ll $9, 0($2)
+; MIPS64R6O0-NEXT: and $10, $9, $6
+; MIPS64R6O0-NEXT: bnec $10, $4, .LBB12_3
; MIPS64R6O0-NEXT: # %bb.2: # %entry
; MIPS64R6O0-NEXT: # in Loop: Header=BB12_1 Depth=1
-; MIPS64R6O0-NEXT: and $8, $8, $6
-; MIPS64R6O0-NEXT: or $8, $8, $5
-; MIPS64R6O0-NEXT: sc $8, 0($2)
-; MIPS64R6O0-NEXT: beqzc $8, .LBB12_1
+; MIPS64R6O0-NEXT: and $9, $9, $7
+; MIPS64R6O0-NEXT: or $9, $9, $5
+; MIPS64R6O0-NEXT: sc $9, 0($2)
+; MIPS64R6O0-NEXT: beqzc $9, .LBB12_1
; MIPS64R6O0-NEXT: .LBB12_3: # %entry
-; MIPS64R6O0-NEXT: srlv $7, $9, $1
-; MIPS64R6O0-NEXT: seb $7, $7
+; MIPS64R6O0-NEXT: srlv $8, $10, $3
+; MIPS64R6O0-NEXT: seb $8, $8
; MIPS64R6O0-NEXT: # %bb.4: # %entry
-; MIPS64R6O0-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
+; MIPS64R6O0-NEXT: sw $8, 12($sp) # 4-byte Folded Spill
; MIPS64R6O0-NEXT: # %bb.5: # %entry
; MIPS64R6O0-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16
@@ -5236,28 +5236,28 @@ define i1 @AtomicCmpSwapRes8(i8* %ptr, i8 signext %oldval, i8 signext %newval) n
; MIPS64R6O0-NEXT: sll $2, $2, 3
; MIPS64R6O0-NEXT: ori $3, $zero, 255
; MIPS64R6O0-NEXT: sllv $3, $3, $2
-; MIPS64R6O0-NEXT: nor $4, $zero, $3
-; MIPS64R6O0-NEXT: andi $7, $5, 255
-; MIPS64R6O0-NEXT: sllv $7, $7, $2
+; MIPS64R6O0-NEXT: nor $7, $zero, $3
+; MIPS64R6O0-NEXT: andi $8, $5, 255
+; MIPS64R6O0-NEXT: sllv $8, $8, $2
; MIPS64R6O0-NEXT: andi $6, $6, 255
; MIPS64R6O0-NEXT: sllv $6, $6, $2
; MIPS64R6O0-NEXT: .LBB13_1: # %entry
; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6O0-NEXT: ll $9, 0($1)
-; MIPS64R6O0-NEXT: and $10, $9, $3
-; MIPS64R6O0-NEXT: bnec $10, $7, .LBB13_3
+; MIPS64R6O0-NEXT: ll $10, 0($1)
+; MIPS64R6O0-NEXT: and $11, $10, $3
+; MIPS64R6O0-NEXT: bnec $11, $8, .LBB13_3
; MIPS64R6O0-NEXT: # %bb.2: # %entry
; MIPS64R6O0-NEXT: # in Loop: Header=BB13_1 Depth=1
-; MIPS64R6O0-NEXT: and $9, $9, $4
-; MIPS64R6O0-NEXT: or $9, $9, $6
-; MIPS64R6O0-NEXT: sc $9, 0($1)
-; MIPS64R6O0-NEXT: beqzc $9, .LBB13_1
+; MIPS64R6O0-NEXT: and $10, $10, $7
+; MIPS64R6O0-NEXT: or $10, $10, $6
+; MIPS64R6O0-NEXT: sc $10, 0($1)
+; MIPS64R6O0-NEXT: beqzc $10, .LBB13_1
; MIPS64R6O0-NEXT: .LBB13_3: # %entry
-; MIPS64R6O0-NEXT: srlv $8, $10, $2
-; MIPS64R6O0-NEXT: seb $8, $8
+; MIPS64R6O0-NEXT: srlv $9, $11, $2
+; MIPS64R6O0-NEXT: seb $9, $9
; MIPS64R6O0-NEXT: # %bb.4: # %entry
; MIPS64R6O0-NEXT: sw $5, 12($sp) # 4-byte Folded Spill
-; MIPS64R6O0-NEXT: sw $8, 8($sp) # 4-byte Folded Spill
+; MIPS64R6O0-NEXT: sw $9, 8($sp) # 4-byte Folded Spill
; MIPS64R6O0-NEXT: # %bb.5: # %entry
; MIPS64R6O0-NEXT: lw $1, 8($sp) # 4-byte Folded Reload
; MIPS64R6O0-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
@@ -5775,28 +5775,28 @@ define signext i16 @AtomicLoadAdd16(i16 signext %incr) nounwind {
; MIPS64R6O0-NEXT: ld $1, %got_disp(z)($1)
; MIPS64R6O0-NEXT: daddiu $2, $zero, -4
; MIPS64R6O0-NEXT: and $2, $1, $2
-; MIPS64R6O0-NEXT: andi $1, $1, 3
-; MIPS64R6O0-NEXT: xori $1, $1, 2
-; MIPS64R6O0-NEXT: sll $1, $1, 3
-; MIPS64R6O0-NEXT: ori $3, $zero, 65535
-; MIPS64R6O0-NEXT: sllv $3, $3, $1
-; MIPS64R6O0-NEXT: nor $5, $zero, $3
-; MIPS64R6O0-NEXT: sllv $4, $4, $1
+; MIPS64R6O0-NEXT: andi $3, $1, 3
+; MIPS64R6O0-NEXT: xori $3, $3, 2
+; MIPS64R6O0-NEXT: sll $3, $3, 3
+; MIPS64R6O0-NEXT: ori $5, $zero, 65535
+; MIPS64R6O0-NEXT: sllv $5, $5, $3
+; MIPS64R6O0-NEXT: nor $6, $zero, $5
+; MIPS64R6O0-NEXT: sllv $4, $4, $3
; MIPS64R6O0-NEXT: .LBB14_1: # %entry
; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6O0-NEXT: ll $7, 0($2)
-; MIPS64R6O0-NEXT: addu $8, $7, $4
-; MIPS64R6O0-NEXT: and $8, $8, $3
-; MIPS64R6O0-NEXT: and $9, $7, $5
-; MIPS64R6O0-NEXT: or $9, $9, $8
-; MIPS64R6O0-NEXT: sc $9, 0($2)
-; MIPS64R6O0-NEXT: beqzc $9, .LBB14_1
+; MIPS64R6O0-NEXT: ll $8, 0($2)
+; MIPS64R6O0-NEXT: addu $9, $8, $4
+; MIPS64R6O0-NEXT: and $9, $9, $5
+; MIPS64R6O0-NEXT: and $10, $8, $6
+; MIPS64R6O0-NEXT: or $10, $10, $9
+; MIPS64R6O0-NEXT: sc $10, 0($2)
+; MIPS64R6O0-NEXT: beqzc $10, .LBB14_1
; MIPS64R6O0-NEXT: # %bb.2: # %entry
-; MIPS64R6O0-NEXT: and $6, $7, $3
-; MIPS64R6O0-NEXT: srlv $6, $6, $1
-; MIPS64R6O0-NEXT: seh $6, $6
+; MIPS64R6O0-NEXT: and $7, $8, $5
+; MIPS64R6O0-NEXT: srlv $7, $7, $3
+; MIPS64R6O0-NEXT: seh $7, $7
; MIPS64R6O0-NEXT: # %bb.3: # %entry
-; MIPS64R6O0-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64R6O0-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64R6O0-NEXT: # %bb.4: # %entry
; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload
; MIPS64R6O0-NEXT: seh $2, $1
@@ -6359,33 +6359,33 @@ define {i16, i1} @foo(i16* %addr, i16 %l, i16 %r, i16 %new) {
; MIPS64R6O0-NEXT: sll $3, $5, 0
; MIPS64R6O0-NEXT: addu $2, $3, $2
; MIPS64R6O0-NEXT: sync
-; MIPS64R6O0-NEXT: daddiu $3, $zero, -4
-; MIPS64R6O0-NEXT: and $3, $4, $3
-; MIPS64R6O0-NEXT: andi $4, $4, 3
-; MIPS64R6O0-NEXT: xori $4, $4, 2
-; MIPS64R6O0-NEXT: sll $4, $4, 3
+; MIPS64R6O0-NEXT: daddiu $8, $zero, -4
+; MIPS64R6O0-NEXT: and $8, $4, $8
+; MIPS64R6O0-NEXT: andi $3, $4, 3
+; MIPS64R6O0-NEXT: xori $3, $3, 2
+; MIPS64R6O0-NEXT: sll $3, $3, 3
; MIPS64R6O0-NEXT: ori $5, $zero, 65535
-; MIPS64R6O0-NEXT: sllv $5, $5, $4
+; MIPS64R6O0-NEXT: sllv $5, $5, $3
; MIPS64R6O0-NEXT: nor $6, $zero, $5
; MIPS64R6O0-NEXT: andi $7, $2, 65535
-; MIPS64R6O0-NEXT: sllv $7, $7, $4
+; MIPS64R6O0-NEXT: sllv $7, $7, $3
; MIPS64R6O0-NEXT: andi $1, $1, 65535
-; MIPS64R6O0-NEXT: sllv $1, $1, $4
+; MIPS64R6O0-NEXT: sllv $1, $1, $3
; MIPS64R6O0-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1
-; MIPS64R6O0-NEXT: ll $9, 0($3)
-; MIPS64R6O0-NEXT: and $10, $9, $5
-; MIPS64R6O0-NEXT: bnec $10, $7, .LBB15_3
+; MIPS64R6O0-NEXT: ll $10, 0($8)
+; MIPS64R6O0-NEXT: and $11, $10, $5
+; MIPS64R6O0-NEXT: bnec $11, $7, .LBB15_3
; MIPS64R6O0-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1
-; MIPS64R6O0-NEXT: and $9, $9, $6
-; MIPS64R6O0-NEXT: or $9, $9, $1
-; MIPS64R6O0-NEXT: sc $9, 0($3)
-; MIPS64R6O0-NEXT: beqzc $9, .LBB15_1
+; MIPS64R6O0-NEXT: and $10, $10, $6
+; MIPS64R6O0-NEXT: or $10, $10, $1
+; MIPS64R6O0-NEXT: sc $10, 0($8)
+; MIPS64R6O0-NEXT: beqzc $10, .LBB15_1
; MIPS64R6O0-NEXT: .LBB15_3:
-; MIPS64R6O0-NEXT: srlv $8, $10, $4
-; MIPS64R6O0-NEXT: seh $8, $8
+; MIPS64R6O0-NEXT: srlv $9, $11, $3
+; MIPS64R6O0-NEXT: seh $9, $9
; MIPS64R6O0-NEXT: # %bb.4:
; MIPS64R6O0-NEXT: sw $2, 12($sp) # 4-byte Folded Spill
-; MIPS64R6O0-NEXT: sw $8, 8($sp) # 4-byte Folded Spill
+; MIPS64R6O0-NEXT: sw $9, 8($sp) # 4-byte Folded Spill
; MIPS64R6O0-NEXT: # %bb.5:
; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload
; MIPS64R6O0-NEXT: seh $2, $1
@@ -7145,8 +7145,8 @@ define i32 @zeroreg() nounwind {
; MIPS64R6O0-NEXT: sc $6, 0($1)
; MIPS64R6O0-NEXT: beqzc $6, .LBB17_1
; MIPS64R6O0-NEXT: .LBB17_3: # %entry
-; MIPS64R6O0-NEXT: xor $1, $5, $3
-; MIPS64R6O0-NEXT: sltiu $2, $1, 1
+; MIPS64R6O0-NEXT: xor $2, $5, $3
+; MIPS64R6O0-NEXT: sltiu $2, $2, 1
; MIPS64R6O0-NEXT: sync
; MIPS64R6O0-NEXT: jrc $ra
;
diff --git a/llvm/test/CodeGen/Mips/implicit-sret.ll b/llvm/test/CodeGen/Mips/implicit-sret.ll
index b9f6568e40c..e86cec37d51 100644
--- a/llvm/test/CodeGen/Mips/implicit-sret.ll
+++ b/llvm/test/CodeGen/Mips/implicit-sret.ll
@@ -48,8 +48,8 @@ define internal { i32, i128, i64 } @implicit_sret_impl() unnamed_addr nounwind {
; CHECK-NEXT: sd $zero, 8($4)
; CHECK-NEXT: daddiu $3, $zero, 30
; CHECK-NEXT: sd $3, 24($4)
-; CHECK-NEXT: addiu $3, $zero, 10
-; CHECK-NEXT: sw $3, 0($4)
+; CHECK-NEXT: addiu $5, $zero, 10
+; CHECK-NEXT: sw $5, 0($4)
; CHECK-NEXT: jr $ra
; CHECK-NEXT: nop
ret { i32, i128, i64 } { i32 10, i128 20, i64 30 }
@@ -70,12 +70,10 @@ define internal void @test2() unnamed_addr nounwind {
; CHECK-NEXT: lw $3, 4($sp)
; CHECK-NEXT: # implicit-def: $a0_64
; CHECK-NEXT: move $4, $3
-; CHECK-NEXT: # implicit-def: $v1_64
-; CHECK-NEXT: move $3, $2
-; CHECK-NEXT: # implicit-def: $v0_64
-; CHECK-NEXT: move $2, $1
-; CHECK-NEXT: move $5, $3
-; CHECK-NEXT: move $6, $2
+; CHECK-NEXT: # implicit-def: $a1_64
+; CHECK-NEXT: move $5, $2
+; CHECK-NEXT: # implicit-def: $a2_64
+; CHECK-NEXT: move $6, $1
; CHECK-NEXT: jal use_sret2
; CHECK-NEXT: nop
; CHECK-NEXT: ld $ra, 24($sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/PowerPC/addegluecrash.ll b/llvm/test/CodeGen/PowerPC/addegluecrash.ll
index c38f377869f..a1d98054583 100644
--- a/llvm/test/CodeGen/PowerPC/addegluecrash.ll
+++ b/llvm/test/CodeGen/PowerPC/addegluecrash.ll
@@ -21,11 +21,11 @@ define void @bn_mul_comba8(i64* nocapture %r, i64* nocapture readonly %a, i64* n
; CHECK-NEXT: addze 5, 5
; CHECK-NEXT: add 4, 5, 4
; CHECK-NEXT: cmpld 7, 4, 5
-; CHECK-NEXT: mfocrf 4, 1
-; CHECK-NEXT: rlwinm 4, 4, 29, 31, 31
-; CHECK-NEXT: # implicit-def: $x5
-; CHECK-NEXT: mr 5, 4
-; CHECK-NEXT: clrldi 4, 5, 32
+; CHECK-NEXT: mfocrf 10, 1
+; CHECK-NEXT: rlwinm 10, 10, 29, 31, 31
+; CHECK-NEXT: # implicit-def: $x4
+; CHECK-NEXT: mr 4, 10
+; CHECK-NEXT: clrldi 4, 4, 32
; CHECK-NEXT: std 4, 0(3)
; CHECK-NEXT: blr
%1 = load i64, i64* %a, align 8
diff --git a/llvm/test/CodeGen/PowerPC/popcount.ll b/llvm/test/CodeGen/PowerPC/popcount.ll
index fb20f1d3ee4..170d3d77d08 100644
--- a/llvm/test/CodeGen/PowerPC/popcount.ll
+++ b/llvm/test/CodeGen/PowerPC/popcount.ll
@@ -58,17 +58,17 @@ define <1 x i128> @popcount1x128(<1 x i128> %0) {
; CHECK-NEXT: # kill: def $f0 killed $f0 killed $vsl0
; CHECK-NEXT: mffprd 3, 0
; CHECK-NEXT: popcntd 3, 3
-; CHECK-NEXT: xxswapd 0, 34
-; CHECK-NEXT: # kill: def $f0 killed $f0 killed $vsl0
-; CHECK-NEXT: mffprd 4, 0
+; CHECK-NEXT: xxswapd 1, 34
+; CHECK-NEXT: # kill: def $f1 killed $f1 killed $vsl1
+; CHECK-NEXT: mffprd 4, 1
; CHECK-NEXT: popcntd 4, 4
; CHECK-NEXT: add 3, 4, 3
; CHECK-NEXT: mtfprd 0, 3
-; CHECK-NEXT: # kill: def $vsl0 killed $f0
+; CHECK-NEXT: fmr 2, 0
; CHECK-NEXT: li 3, 0
-; CHECK-NEXT: mtfprd 1, 3
-; CHECK-NEXT: # kill: def $vsl1 killed $f1
-; CHECK-NEXT: xxmrghd 34, 1, 0
+; CHECK-NEXT: mtfprd 0, 3
+; CHECK-NEXT: fmr 3, 0
+; CHECK-NEXT: xxmrghd 34, 3, 2
; CHECK-NEXT: blr
Entry:
%1 = tail call <1 x i128> @llvm.ctpop.v1.i128(<1 x i128> %0)
diff --git a/llvm/test/CodeGen/PowerPC/vsx.ll b/llvm/test/CodeGen/PowerPC/vsx.ll
index 4a78218262c..39469d63b90 100644
--- a/llvm/test/CodeGen/PowerPC/vsx.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx.ll
@@ -1548,8 +1548,8 @@ define <2 x i64> @test46(<2 x float> %a) {
; CHECK-FISL-NEXT: ld r3, -24(r1)
; CHECK-FISL-NEXT: std r3, -16(r1)
; CHECK-FISL-NEXT: addi r3, r1, -16
-; CHECK-FISL-NEXT: lxvd2x vs0, 0, r3
-; CHECK-FISL-NEXT: xxlor v2, vs0, vs0
+; CHECK-FISL-NEXT: lxvd2x vs1, 0, r3
+; CHECK-FISL-NEXT: xxlor v2, vs1, vs1
; CHECK-FISL-NEXT: blr
;
; CHECK-LE-LABEL: test46:
@@ -1616,8 +1616,8 @@ define <2 x i64> @test47(<2 x float> %a) {
; CHECK-FISL-NEXT: ld r3, -24(r1)
; CHECK-FISL-NEXT: std r3, -16(r1)
; CHECK-FISL-NEXT: addi r3, r1, -16
-; CHECK-FISL-NEXT: lxvd2x vs0, 0, r3
-; CHECK-FISL-NEXT: xxlor v2, vs0, vs0
+; CHECK-FISL-NEXT: lxvd2x vs1, 0, r3
+; CHECK-FISL-NEXT: xxlor v2, vs1, vs1
; CHECK-FISL-NEXT: