| commit a21387c65470417c58021f8d3194a4510bb64f46 |
| Author: Hans Wennborg <hans@chromium.org> |
| Date: Tue Sep 15 10:47:02 2020 +0200 |
| |
| Revert "RegAllocFast: Record internal state based on register units" |
| |
| This seems to have caused incorrect register allocation in some cases, |
| breaking tests in the Zig standard library (PR47278). |
| |
| As discussed on the bug, revert back to green for now. |
| |
| > Record internal state based on register units. This is often more |
| > efficient as there are typically fewer register units to update |
| > compared to iterating over all the aliases of a register. |
| > |
| > Original patch by Matthias Braun, but I've been rebasing and fixing it |
| > for almost 2 years and fixed a few bugs causing intermediate failures |
| > to make this patch independent of the changes in |
| > https://reviews.llvm.org/D52010. |
| |
| This reverts commit 66251f7e1de79a7c1620659b7f58352b8c8e892e, and |
| follow-ups 931a68f26b9a3de853807ffad7b2cd0a2dd30922 |
| and 0671a4c5087d40450603d9d26cf239f1a8b1367e. It also adjust some |
| test expectations. |
| |
| diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp |
| index e0742c4508e..d93fd8f601c 100644 |
| --- a/llvm/lib/CodeGen/RegAllocFast.cpp |
| +++ b/llvm/lib/CodeGen/RegAllocFast.cpp |
| @@ -106,8 +106,13 @@ namespace { |
| /// that it is alive across blocks. |
| BitVector MayLiveAcrossBlocks; |
| |
| - /// State of a register unit. |
| - enum RegUnitState { |
| + /// State of a physical register. |
| + enum RegState { |
| + /// A disabled register is not available for allocation, but an alias may |
| + /// be in use. A register can only be moved out of the disabled state if |
| + /// all aliases are disabled. |
| + regDisabled, |
| + |
| /// A free register is not currently in use and can be allocated |
| /// immediately without checking aliases. |
| regFree, |
| @@ -121,8 +126,8 @@ namespace { |
| /// register. In that case, LiveVirtRegs contains the inverse mapping. |
| }; |
| |
| - /// Maps each physical register to a RegUnitState enum or virtual register. |
| - std::vector<unsigned> RegUnitStates; |
| + /// Maps each physical register to a RegState enum or a virtual register. |
| + std::vector<unsigned> PhysRegState; |
| |
| SmallVector<Register, 16> VirtDead; |
| SmallVector<MachineInstr *, 32> Coalesced; |
| @@ -184,10 +189,6 @@ namespace { |
| bool isLastUseOfLocalReg(const MachineOperand &MO) const; |
| |
| void addKillFlag(const LiveReg &LRI); |
| -#ifndef NDEBUG |
| - bool verifyRegStateMapping(const LiveReg &LR) const; |
| -#endif |
| - |
| void killVirtReg(LiveReg &LR); |
| void killVirtReg(Register VirtReg); |
| void spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR); |
| @@ -195,7 +196,7 @@ namespace { |
| |
| void usePhysReg(MachineOperand &MO); |
| void definePhysReg(MachineBasicBlock::iterator MI, MCPhysReg PhysReg, |
| - unsigned NewState); |
| + RegState NewState); |
| unsigned calcSpillCost(MCPhysReg PhysReg) const; |
| void assignVirtToPhysReg(LiveReg &, MCPhysReg PhysReg); |
| |
| @@ -228,7 +229,7 @@ namespace { |
| bool mayLiveOut(Register VirtReg); |
| bool mayLiveIn(Register VirtReg); |
| |
| - void dumpState() const; |
| + void dumpState(); |
| }; |
| |
| } // end anonymous namespace |
| @@ -239,8 +240,7 @@ INITIALIZE_PASS(RegAllocFast, "regallocfast", "Fast Register Allocator", false, |
| false) |
| |
| void RegAllocFast::setPhysRegState(MCPhysReg PhysReg, unsigned NewState) { |
| - for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) |
| - RegUnitStates[*UI] = NewState; |
| + PhysRegState[PhysReg] = NewState; |
| } |
| |
| /// This allocates space for the specified virtual register to be held on the |
| @@ -384,23 +384,12 @@ void RegAllocFast::addKillFlag(const LiveReg &LR) { |
| } |
| } |
| |
| -#ifndef NDEBUG |
| -bool RegAllocFast::verifyRegStateMapping(const LiveReg &LR) const { |
| - for (MCRegUnitIterator UI(LR.PhysReg, TRI); UI.isValid(); ++UI) { |
| - if (RegUnitStates[*UI] != LR.VirtReg) |
| - return false; |
| - } |
| - |
| - return true; |
| -} |
| -#endif |
| - |
| /// Mark virtreg as no longer available. |
| void RegAllocFast::killVirtReg(LiveReg &LR) { |
| - assert(verifyRegStateMapping(LR) && "Broken RegState mapping"); |
| addKillFlag(LR); |
| - MCPhysReg PhysReg = LR.PhysReg; |
| - setPhysRegState(PhysReg, regFree); |
| + assert(PhysRegState[LR.PhysReg] == LR.VirtReg && |
| + "Broken RegState mapping"); |
| + setPhysRegState(LR.PhysReg, regFree); |
| LR.PhysReg = 0; |
| } |
| |
| @@ -427,9 +416,7 @@ void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI, |
| |
| /// Do the actual work of spilling. |
| void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR) { |
| - assert(verifyRegStateMapping(LR) && "Broken RegState mapping"); |
| - |
| - MCPhysReg PhysReg = LR.PhysReg; |
| + assert(PhysRegState[LR.PhysReg] == LR.VirtReg && "Broken RegState mapping"); |
| |
| if (LR.Dirty) { |
| // If this physreg is used by the instruction, we want to kill it on the |
| @@ -437,7 +424,7 @@ void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR) { |
| bool SpillKill = MachineBasicBlock::iterator(LR.LastUse) != MI; |
| LR.Dirty = false; |
| |
| - spill(MI, LR.VirtReg, PhysReg, SpillKill); |
| + spill(MI, LR.VirtReg, LR.PhysReg, SpillKill); |
| |
| if (SpillKill) |
| LR.LastUse = nullptr; // Don't kill register again |
| @@ -473,16 +460,53 @@ void RegAllocFast::usePhysReg(MachineOperand &MO) { |
| assert(PhysReg.isPhysical() && "Bad usePhysReg operand"); |
| |
| markRegUsedInInstr(PhysReg); |
| + switch (PhysRegState[PhysReg]) { |
| + case regDisabled: |
| + break; |
| + case regReserved: |
| + PhysRegState[PhysReg] = regFree; |
| + LLVM_FALLTHROUGH; |
| + case regFree: |
| + MO.setIsKill(); |
| + return; |
| + default: |
| + // The physreg was allocated to a virtual register. That means the value we |
| + // wanted has been clobbered. |
| + llvm_unreachable("Instruction uses an allocated register"); |
| + } |
| |
| - for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) { |
| - switch (RegUnitStates[*UI]) { |
| + // Maybe a superregister is reserved? |
| + for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) { |
| + MCPhysReg Alias = *AI; |
| + switch (PhysRegState[Alias]) { |
| + case regDisabled: |
| + break; |
| case regReserved: |
| - RegUnitStates[*UI] = regFree; |
| + // Either PhysReg is a subregister of Alias and we mark the |
| + // whole register as free, or PhysReg is the superregister of |
| + // Alias and we mark all the aliases as disabled before freeing |
| + // PhysReg. |
| + // In the latter case, since PhysReg was disabled, this means that |
| + // its value is defined only by physical sub-registers. This check |
| + // is performed by the assert of the default case in this loop. |
| + // Note: The value of the superregister may only be partial |
| + // defined, that is why regDisabled is a valid state for aliases. |
| + assert((TRI->isSuperRegister(PhysReg, Alias) || |
| + TRI->isSuperRegister(Alias, PhysReg)) && |
| + "Instruction is not using a subregister of a reserved register"); |
| LLVM_FALLTHROUGH; |
| case regFree: |
| + if (TRI->isSuperRegister(PhysReg, Alias)) { |
| + // Leave the superregister in the working set. |
| + setPhysRegState(Alias, regFree); |
| + MO.getParent()->addRegisterKilled(Alias, TRI, true); |
| + return; |
| + } |
| + // Some other alias was in the working set - clear it. |
| + setPhysRegState(Alias, regDisabled); |
| break; |
| default: |
| - llvm_unreachable("Unexpected reg unit state"); |
| + llvm_unreachable("Instruction uses an alias of an allocated register"); |
| } |
| } |
| |
| @@ -495,20 +519,38 @@ void RegAllocFast::usePhysReg(MachineOperand &MO) { |
| /// similar to defineVirtReg except the physreg is reserved instead of |
| /// allocated. |
| void RegAllocFast::definePhysReg(MachineBasicBlock::iterator MI, |
| - MCPhysReg PhysReg, unsigned NewState) { |
| - for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) { |
| - switch (unsigned VirtReg = RegUnitStates[*UI]) { |
| + MCPhysReg PhysReg, RegState NewState) { |
| + markRegUsedInInstr(PhysReg); |
| + switch (Register VirtReg = PhysRegState[PhysReg]) { |
| + case regDisabled: |
| + break; |
| + default: |
| + spillVirtReg(MI, VirtReg); |
| + LLVM_FALLTHROUGH; |
| + case regFree: |
| + case regReserved: |
| + setPhysRegState(PhysReg, NewState); |
| + return; |
| + } |
| + |
| + // This is a disabled register, disable all aliases. |
| + setPhysRegState(PhysReg, NewState); |
| + for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) { |
| + MCPhysReg Alias = *AI; |
| + switch (Register VirtReg = PhysRegState[Alias]) { |
| + case regDisabled: |
| + break; |
| default: |
| spillVirtReg(MI, VirtReg); |
| - break; |
| + LLVM_FALLTHROUGH; |
| case regFree: |
| case regReserved: |
| + setPhysRegState(Alias, regDisabled); |
| + if (TRI->isSuperRegister(PhysReg, Alias)) |
| + return; |
| break; |
| } |
| } |
| - |
| - markRegUsedInInstr(PhysReg); |
| - setPhysRegState(PhysReg, NewState); |
| } |
| |
| /// Return the cost of spilling clearing out PhysReg and aliases so it is free |
| @@ -521,24 +563,46 @@ unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const { |
| << " is already used in instr.\n"); |
| return spillImpossible; |
| } |
| + switch (Register VirtReg = PhysRegState[PhysReg]) { |
| + case regDisabled: |
| + break; |
| + case regFree: |
| + return 0; |
| + case regReserved: |
| + LLVM_DEBUG(dbgs() << printReg(VirtReg, TRI) << " corresponding " |
| + << printReg(PhysReg, TRI) << " is reserved already.\n"); |
| + return spillImpossible; |
| + default: { |
| + LiveRegMap::const_iterator LRI = findLiveVirtReg(VirtReg); |
| + assert(LRI != LiveVirtRegs.end() && LRI->PhysReg && |
| + "Missing VirtReg entry"); |
| + return LRI->Dirty ? spillDirty : spillClean; |
| + } |
| + } |
| |
| - for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) { |
| - switch (unsigned VirtReg = RegUnitStates[*UI]) { |
| + // This is a disabled register, add up cost of aliases. |
| + LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << " is disabled.\n"); |
| + unsigned Cost = 0; |
| + for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) { |
| + MCPhysReg Alias = *AI; |
| + switch (Register VirtReg = PhysRegState[Alias]) { |
| + case regDisabled: |
| + break; |
| case regFree: |
| + ++Cost; |
| break; |
| case regReserved: |
| - LLVM_DEBUG(dbgs() << printReg(VirtReg, TRI) << " corresponding " |
| - << printReg(PhysReg, TRI) << " is reserved already.\n"); |
| return spillImpossible; |
| default: { |
| LiveRegMap::const_iterator LRI = findLiveVirtReg(VirtReg); |
| assert(LRI != LiveVirtRegs.end() && LRI->PhysReg && |
| "Missing VirtReg entry"); |
| - return LRI->Dirty ? spillDirty : spillClean; |
| + Cost += LRI->Dirty ? spillDirty : spillClean; |
| + break; |
| } |
| } |
| } |
| - return 0; |
| + return Cost; |
| } |
| |
| /// This method updates local state so that we know that PhysReg is the |
| @@ -845,17 +909,9 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI, |
| if (!Reg || !Reg.isPhysical()) |
| continue; |
| markRegUsedInInstr(Reg); |
| - |
| - for (MCRegUnitIterator UI(Reg, TRI); UI.isValid(); ++UI) { |
| - if (!ThroughRegs.count(RegUnitStates[*UI])) |
| - continue; |
| - |
| - // Need to spill any aliasing registers. |
| - for (MCRegUnitRootIterator RI(*UI, TRI); RI.isValid(); ++RI) { |
| - for (MCSuperRegIterator SI(*RI, TRI, true); SI.isValid(); ++SI) { |
| - definePhysReg(MI, *SI, regFree); |
| - } |
| - } |
| + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { |
| + if (ThroughRegs.count(PhysRegState[*AI])) |
| + definePhysReg(MI, *AI, regFree); |
| } |
| } |
| |
| @@ -919,40 +975,37 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI, |
| } |
| |
| #ifndef NDEBUG |
| - |
| -void RegAllocFast::dumpState() const { |
| - for (unsigned Unit = 1, UnitE = TRI->getNumRegUnits(); Unit != UnitE; |
| - ++Unit) { |
| - switch (unsigned VirtReg = RegUnitStates[Unit]) { |
| +void RegAllocFast::dumpState() { |
| + for (unsigned Reg = 1, E = TRI->getNumRegs(); Reg != E; ++Reg) { |
| + if (PhysRegState[Reg] == regDisabled) continue; |
| + dbgs() << " " << printReg(Reg, TRI); |
| + switch(PhysRegState[Reg]) { |
| case regFree: |
| break; |
| case regReserved: |
| - dbgs() << " " << printRegUnit(Unit, TRI) << "[P]"; |
| + dbgs() << "*"; |
| break; |
| default: { |
| - dbgs() << ' ' << printRegUnit(Unit, TRI) << '=' << printReg(VirtReg); |
| - LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg); |
| - assert(I != LiveVirtRegs.end() && "have LiveVirtRegs entry"); |
| - if (I->Dirty) |
| - dbgs() << "[D]"; |
| - assert(TRI->hasRegUnit(I->PhysReg, Unit) && "inverse mapping present"); |
| + dbgs() << '=' << printReg(PhysRegState[Reg]); |
| + LiveRegMap::iterator LRI = findLiveVirtReg(PhysRegState[Reg]); |
| + assert(LRI != LiveVirtRegs.end() && LRI->PhysReg && |
| + "Missing VirtReg entry"); |
| + if (LRI->Dirty) |
| + dbgs() << "*"; |
| + assert(LRI->PhysReg == Reg && "Bad inverse map"); |
| break; |
| } |
| } |
| } |
| dbgs() << '\n'; |
| // Check that LiveVirtRegs is the inverse. |
| - for (const LiveReg &LR : LiveVirtRegs) { |
| - Register VirtReg = LR.VirtReg; |
| - assert(VirtReg.isVirtual() && "Bad map key"); |
| - MCPhysReg PhysReg = LR.PhysReg; |
| - if (PhysReg != 0) { |
| - assert(Register::isPhysicalRegister(PhysReg) && |
| - "mapped to physreg"); |
| - for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) { |
| - assert(RegUnitStates[*UI] == VirtReg && "inverse map valid"); |
| - } |
| - } |
| + for (LiveRegMap::iterator i = LiveVirtRegs.begin(), |
| + e = LiveVirtRegs.end(); i != e; ++i) { |
| + if (!i->PhysReg) |
| + continue; |
| + assert(i->VirtReg.isVirtual() && "Bad map key"); |
| + assert(Register::isPhysicalRegister(i->PhysReg) && "Bad map value"); |
| + assert(PhysRegState[i->PhysReg] == i->VirtReg && "Bad inverse map"); |
| } |
| } |
| #endif |
| @@ -1194,7 +1247,7 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) { |
| this->MBB = &MBB; |
| LLVM_DEBUG(dbgs() << "\nAllocating " << MBB); |
| |
| - RegUnitStates.assign(TRI->getNumRegUnits(), regFree); |
| + PhysRegState.assign(TRI->getNumRegs(), regDisabled); |
| assert(LiveVirtRegs.empty() && "Mapping not cleared from last block?"); |
| |
| MachineBasicBlock::iterator MII = MBB.begin(); |
| diff --git a/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll b/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll |
| index 7c546936ba2..392af063eb8 100644 |
| --- a/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll |
| +++ b/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll |
| @@ -4,8 +4,8 @@ |
| define i32 @fptosi_wh(half %a) nounwind ssp { |
| entry: |
| ; CHECK-LABEL: fptosi_wh |
| -; CHECK: fcvt s0, h0 |
| -; CHECK: fcvtzs [[REG:w[0-9]+]], s0 |
| +; CHECK: fcvt s1, h0 |
| +; CHECK: fcvtzs [[REG:w[0-9]+]], s1 |
| ; CHECK: mov w0, [[REG]] |
| %conv = fptosi half %a to i32 |
| ret i32 %conv |
| @@ -15,8 +15,8 @@ entry: |
| define i32 @fptoui_swh(half %a) nounwind ssp { |
| entry: |
| ; CHECK-LABEL: fptoui_swh |
| -; CHECK: fcvt s0, h0 |
| -; CHECK: fcvtzu [[REG:w[0-9]+]], s0 |
| +; CHECK: fcvt s1, h0 |
| +; CHECK: fcvtzu [[REG:w[0-9]+]], s1 |
| ; CHECK: mov w0, [[REG]] |
| %conv = fptoui half %a to i32 |
| ret i32 %conv |
| diff --git a/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll b/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll |
| index d8abf14c136..ed03aec07e7 100644 |
| --- a/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll |
| +++ b/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll |
| @@ -54,8 +54,8 @@ entry: |
| ; CHECK: ldrh w8, [sp, #12] |
| ; CHECK: str w8, [sp, #8] |
| ; CHECK: ldr w8, [sp, #8] |
| -; CHECK: ; kill: def $x8 killed $w8 |
| -; CHECK: str x8, [sp] |
| +; CHECK: mov x9, x8 |
| +; CHECK: str x9, [sp] |
| ; CHECK: ldr x0, [sp] |
| ; CHECK: ret |
| %a.addr = alloca i8, align 1 |
| @@ -109,8 +109,8 @@ entry: |
| ; CHECK: strh w8, [sp, #12] |
| ; CHECK: ldrsh w8, [sp, #12] |
| ; CHECK: str w8, [sp, #8] |
| -; CHECK: ldrsw x8, [sp, #8] |
| -; CHECK: str x8, [sp] |
| +; CHECK: ldrsw x9, [sp, #8] |
| +; CHECK: str x9, [sp] |
| ; CHECK: ldr x0, [sp] |
| ; CHECK: ret |
| %a.addr = alloca i8, align 1 |
| diff --git a/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll b/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll |
| index e1e889b906c..6b3e8d747d4 100644 |
| --- a/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll |
| +++ b/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll |
| @@ -285,11 +285,11 @@ define i16 @to_half(float %in) { |
| ; FAST: // %bb.0: |
| ; FAST-NEXT: sub sp, sp, #16 // =16 |
| ; FAST-NEXT: .cfi_def_cfa_offset 16 |
| -; FAST-NEXT: fcvt h0, s0 |
| +; FAST-NEXT: fcvt h1, s0 |
| ; FAST-NEXT: // implicit-def: $w0 |
| -; FAST-NEXT: fmov s1, w0 |
| -; FAST-NEXT: mov.16b v1, v0 |
| -; FAST-NEXT: fmov w8, s1 |
| +; FAST-NEXT: fmov s0, w0 |
| +; FAST-NEXT: mov.16b v0, v1 |
| +; FAST-NEXT: fmov w8, s0 |
| ; FAST-NEXT: mov w0, w8 |
| ; FAST-NEXT: str w0, [sp, #12] // 4-byte Folded Spill |
| ; FAST-NEXT: mov w0, w8 |
| diff --git a/llvm/test/CodeGen/AArch64/fast-isel-sp-adjust.ll b/llvm/test/CodeGen/AArch64/fast-isel-sp-adjust.ll |
| index 22e3ccf2b12..8d62fb35566 100644 |
| --- a/llvm/test/CodeGen/AArch64/fast-isel-sp-adjust.ll |
| +++ b/llvm/test/CodeGen/AArch64/fast-isel-sp-adjust.ll |
| @@ -15,7 +15,8 @@ |
| ; CHECK-LABEL: foo: |
| ; CHECK: sub |
| ; CHECK-DAG: mov x[[SP:[0-9]+]], sp |
| -; CHECK-DAG: mov w[[OFFSET:[0-9]+]], #4104 |
| +; CHECK-DAG: mov [[TMP:w[0-9]+]], #4104 |
| +; CHECK: mov w[[OFFSET:[0-9]+]], [[TMP]] |
| ; CHECK: strb w0, [x[[SP]], x[[OFFSET]]] |
| |
| define void @foo(i8 %in) { |
| diff --git a/llvm/test/CodeGen/AArch64/popcount.ll b/llvm/test/CodeGen/AArch64/popcount.ll |
| index 105969717e4..1e796fff710 100644 |
| --- a/llvm/test/CodeGen/AArch64/popcount.ll |
| +++ b/llvm/test/CodeGen/AArch64/popcount.ll |
| @@ -10,11 +10,12 @@ define i8 @popcount128(i128* nocapture nonnull readonly %0) { |
| ; CHECK-NEXT: // implicit-def: $q1 |
| ; CHECK-NEXT: mov v1.16b, v0.16b |
| ; CHECK-NEXT: mov v1.d[1], x8 |
| -; CHECK-NEXT: cnt v0.16b, v1.16b |
| -; CHECK-NEXT: uaddlv h0, v0.16b |
| +; CHECK-NEXT: cnt v1.16b, v1.16b |
| +; CHECK-NEXT: uaddlv h2, v1.16b |
| ; CHECK-NEXT: // implicit-def: $q1 |
| -; CHECK-NEXT: mov v1.16b, v0.16b |
| -; CHECK-NEXT: fmov w0, s1 |
| +; CHECK-NEXT: mov v1.16b, v2.16b |
| +; CHECK-NEXT: fmov w1, s1 |
| +; CHECK-NEXT: mov w0, w1 |
| ; CHECK-NEXT: ret |
| Entry: |
| %1 = load i128, i128* %0, align 16 |
| @@ -36,21 +37,21 @@ define i16 @popcount256(i256* nocapture nonnull readonly %0) { |
| ; CHECK-NEXT: // implicit-def: $q1 |
| ; CHECK-NEXT: mov v1.16b, v0.16b |
| ; CHECK-NEXT: mov v1.d[1], x9 |
| -; CHECK-NEXT: cnt v0.16b, v1.16b |
| -; CHECK-NEXT: uaddlv h0, v0.16b |
| +; CHECK-NEXT: cnt v1.16b, v1.16b |
| +; CHECK-NEXT: uaddlv h2, v1.16b |
| ; CHECK-NEXT: // implicit-def: $q1 |
| -; CHECK-NEXT: mov v1.16b, v0.16b |
| -; CHECK-NEXT: fmov w9, s1 |
| +; CHECK-NEXT: mov v1.16b, v2.16b |
| +; CHECK-NEXT: fmov w10, s1 |
| ; CHECK-NEXT: ldr d0, [x0] |
| ; CHECK-NEXT: // implicit-def: $q1 |
| ; CHECK-NEXT: mov v1.16b, v0.16b |
| ; CHECK-NEXT: mov v1.d[1], x8 |
| -; CHECK-NEXT: cnt v0.16b, v1.16b |
| -; CHECK-NEXT: uaddlv h0, v0.16b |
| +; CHECK-NEXT: cnt v1.16b, v1.16b |
| +; CHECK-NEXT: uaddlv h2, v1.16b |
| ; CHECK-NEXT: // implicit-def: $q1 |
| -; CHECK-NEXT: mov v1.16b, v0.16b |
| -; CHECK-NEXT: fmov w8, s1 |
| -; CHECK-NEXT: add w0, w8, w9 |
| +; CHECK-NEXT: mov v1.16b, v2.16b |
| +; CHECK-NEXT: fmov w11, s1 |
| +; CHECK-NEXT: add w0, w11, w10 |
| ; CHECK-NEXT: ret |
| Entry: |
| %1 = load i256, i256* %0, align 16 |
| @@ -69,11 +70,11 @@ define <1 x i128> @popcount1x128(<1 x i128> %0) { |
| ; CHECK-NEXT: fmov d0, x0 |
| ; CHECK-NEXT: mov v0.d[1], x1 |
| ; CHECK-NEXT: cnt v0.16b, v0.16b |
| -; CHECK-NEXT: uaddlv h0, v0.16b |
| -; CHECK-NEXT: // implicit-def: $q1 |
| -; CHECK-NEXT: mov v1.16b, v0.16b |
| -; CHECK-NEXT: fmov w0, s1 |
| -; CHECK-NEXT: // kill: def $x0 killed $w0 |
| +; CHECK-NEXT: uaddlv h1, v0.16b |
| +; CHECK-NEXT: // implicit-def: $q0 |
| +; CHECK-NEXT: mov v0.16b, v1.16b |
| +; CHECK-NEXT: fmov w2, s0 |
| +; CHECK-NEXT: mov w0, w2 |
| ; CHECK-NEXT: movi v0.2d, #0000000000000000 |
| ; CHECK-NEXT: mov x1, v0.d[1] |
| ; CHECK-NEXT: ret |
| diff --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll |
| index 3d3b511ab34..8999cd91169 100644 |
| --- a/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll |
| +++ b/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll |
| @@ -69,15 +69,15 @@ define amdgpu_kernel void @extract_w_offset_vgpr(i32 addrspace(1)* %out) { |
| ; GCN: renamable $vgpr30 = COPY killed renamable $vgpr14 |
| ; GCN: renamable $vgpr31 = COPY killed renamable $vgpr15 |
| ; GCN: renamable $vgpr32 = COPY killed renamable $vgpr16 |
| - ; GCN: renamable $sgpr0_sgpr1 = S_MOV_B64 $exec |
| + ; GCN: renamable $sgpr20_sgpr21 = S_MOV_B64 $exec |
| ; GCN: renamable $vgpr1 = IMPLICIT_DEF |
| - ; GCN: renamable $sgpr2_sgpr3 = IMPLICIT_DEF |
| + ; GCN: renamable $sgpr22_sgpr23 = IMPLICIT_DEF |
| ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) |
| ; GCN: SI_SPILL_S128_SAVE killed $sgpr4_sgpr5_sgpr6_sgpr7, %stack.1, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 16 into %stack.1, align 4, addrspace 5) |
| ; GCN: SI_SPILL_V512_SAVE killed $vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32, %stack.2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 64 into %stack.2, align 4, addrspace 5) |
| - ; GCN: SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.3, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.3, align 4, addrspace 5) |
| + ; GCN: SI_SPILL_S64_SAVE killed $sgpr20_sgpr21, %stack.3, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.3, align 4, addrspace 5) |
| ; GCN: SI_SPILL_V32_SAVE killed $vgpr1, %stack.4, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.4, addrspace 5) |
| - ; GCN: SI_SPILL_S64_SAVE killed $sgpr2_sgpr3, %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.5, align 4, addrspace 5) |
| + ; GCN: SI_SPILL_S64_SAVE killed $sgpr22_sgpr23, %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.5, align 4, addrspace 5) |
| ; GCN: bb.1: |
| ; GCN: successors: %bb.1(0x40000000), %bb.3(0x40000000) |
| ; GCN: $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (load 8 from %stack.5, align 4, addrspace 5) |
| @@ -91,8 +91,8 @@ define amdgpu_kernel void @extract_w_offset_vgpr(i32 addrspace(1)* %out) { |
| ; GCN: renamable $vgpr18 = V_MOV_B32_e32 $vgpr3, implicit $exec, implicit killed $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, implicit $m0 |
| ; GCN: S_SET_GPR_IDX_OFF implicit-def $mode, implicit $mode |
| ; GCN: renamable $vgpr19 = COPY renamable $vgpr18 |
| - ; GCN: renamable $sgpr2_sgpr3 = COPY renamable $sgpr4_sgpr5 |
| - ; GCN: SI_SPILL_S64_SAVE killed $sgpr2_sgpr3, %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.5, align 4, addrspace 5) |
| + ; GCN: renamable $sgpr6_sgpr7 = COPY renamable $sgpr4_sgpr5 |
| + ; GCN: SI_SPILL_S64_SAVE killed $sgpr6_sgpr7, %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.5, align 4, addrspace 5) |
| ; GCN: SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.6, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.6, align 4, addrspace 5) |
| ; GCN: SI_SPILL_V32_SAVE killed $vgpr19, %stack.4, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.4, addrspace 5) |
| ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.7, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.7, addrspace 5) |
| diff --git a/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll |
| index b119ffd303e..e991c550c6b 100644 |
| --- a/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll |
| +++ b/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll |
| @@ -11,7 +11,7 @@ |
| define amdgpu_kernel void @spill_sgprs_to_multiple_vgprs(i32 addrspace(1)* %out, i32 %in) #0 { |
| ; GCN-LABEL: spill_sgprs_to_multiple_vgprs: |
| ; GCN: ; %bb.0: |
| -; GCN-NEXT: s_load_dword s0, s[0:1], 0xb |
| +; GCN-NEXT: s_load_dword s2, s[0:1], 0xb |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: ; def s[4:11] |
| ; GCN-NEXT: ;;#ASMEND |
| @@ -42,354 +42,352 @@ define amdgpu_kernel void @spill_sgprs_to_multiple_vgprs(i32 addrspace(1)* %out, |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: ; def s[84:91] |
| ; GCN-NEXT: ;;#ASMEND |
| +; GCN-NEXT: v_writelane_b32 v0, s4, 0 |
| +; GCN-NEXT: v_writelane_b32 v0, s5, 1 |
| +; GCN-NEXT: v_writelane_b32 v0, s6, 2 |
| +; GCN-NEXT: v_writelane_b32 v0, s7, 3 |
| +; GCN-NEXT: v_writelane_b32 v0, s8, 4 |
| +; GCN-NEXT: v_writelane_b32 v0, s9, 5 |
| +; GCN-NEXT: v_writelane_b32 v0, s10, 6 |
| +; GCN-NEXT: v_writelane_b32 v0, s11, 7 |
| +; GCN-NEXT: ;;#ASMSTART |
| +; GCN-NEXT: ; def s[4:11] |
| +; GCN-NEXT: ;;#ASMEND |
| +; GCN-NEXT: v_writelane_b32 v0, s4, 8 |
| +; GCN-NEXT: v_writelane_b32 v0, s5, 9 |
| +; GCN-NEXT: v_writelane_b32 v0, s6, 10 |
| +; GCN-NEXT: v_writelane_b32 v0, s7, 11 |
| +; GCN-NEXT: v_writelane_b32 v0, s8, 12 |
| +; GCN-NEXT: v_writelane_b32 v0, s9, 13 |
| +; GCN-NEXT: v_writelane_b32 v0, s10, 14 |
| +; GCN-NEXT: v_writelane_b32 v0, s11, 15 |
| +; GCN-NEXT: ;;#ASMSTART |
| +; GCN-NEXT: ; def s[4:11] |
| +; GCN-NEXT: ;;#ASMEND |
| +; GCN-NEXT: v_writelane_b32 v0, s4, 16 |
| +; GCN-NEXT: v_writelane_b32 v0, s5, 17 |
| +; GCN-NEXT: v_writelane_b32 v0, s6, 18 |
| +; GCN-NEXT: v_writelane_b32 v0, s7, 19 |
| +; GCN-NEXT: v_writelane_b32 v0, s8, 20 |
| +; GCN-NEXT: v_writelane_b32 v0, s9, 21 |
| +; GCN-NEXT: v_writelane_b32 v0, s10, 22 |
| +; GCN-NEXT: v_writelane_b32 v0, s11, 23 |
| +; GCN-NEXT: ;;#ASMSTART |
| +; GCN-NEXT: ; def s[4:11] |
| +; GCN-NEXT: ;;#ASMEND |
| +; GCN-NEXT: v_writelane_b32 v0, s4, 24 |
| +; GCN-NEXT: v_writelane_b32 v0, s5, 25 |
| +; GCN-NEXT: v_writelane_b32 v0, s6, 26 |
| +; GCN-NEXT: v_writelane_b32 v0, s7, 27 |
| +; GCN-NEXT: v_writelane_b32 v0, s8, 28 |
| +; GCN-NEXT: v_writelane_b32 v0, s9, 29 |
| +; GCN-NEXT: v_writelane_b32 v0, s10, 30 |
| +; GCN-NEXT: v_writelane_b32 v0, s11, 31 |
| +; GCN-NEXT: ;;#ASMSTART |
| +; GCN-NEXT: ; def s[4:11] |
| +; GCN-NEXT: ;;#ASMEND |
| +; GCN-NEXT: v_writelane_b32 v0, s4, 32 |
| +; GCN-NEXT: v_writelane_b32 v0, s5, 33 |
| +; GCN-NEXT: v_writelane_b32 v0, s6, 34 |
| +; GCN-NEXT: v_writelane_b32 v0, s7, 35 |
| +; GCN-NEXT: v_writelane_b32 v0, s8, 36 |
| +; GCN-NEXT: v_writelane_b32 v0, s9, 37 |
| +; GCN-NEXT: v_writelane_b32 v0, s10, 38 |
| +; GCN-NEXT: v_writelane_b32 v0, s11, 39 |
| +; GCN-NEXT: ;;#ASMSTART |
| +; GCN-NEXT: ; def s[4:11] |
| +; GCN-NEXT: ;;#ASMEND |
| +; GCN-NEXT: v_writelane_b32 v0, s4, 40 |
| +; GCN-NEXT: v_writelane_b32 v0, s5, 41 |
| +; GCN-NEXT: v_writelane_b32 v0, s6, 42 |
| +; GCN-NEXT: v_writelane_b32 v0, s7, 43 |
| +; GCN-NEXT: v_writelane_b32 v0, s8, 44 |
| +; GCN-NEXT: v_writelane_b32 v0, s9, 45 |
| +; GCN-NEXT: v_writelane_b32 v0, s10, 46 |
| +; GCN-NEXT: v_writelane_b32 v0, s11, 47 |
| +; GCN-NEXT: ;;#ASMSTART |
| +; GCN-NEXT: ; def s[4:11] |
| +; GCN-NEXT: ;;#ASMEND |
| +; GCN-NEXT: v_writelane_b32 v0, s4, 48 |
| +; GCN-NEXT: v_writelane_b32 v0, s5, 49 |
| +; GCN-NEXT: v_writelane_b32 v0, s6, 50 |
| +; GCN-NEXT: v_writelane_b32 v0, s7, 51 |
| +; GCN-NEXT: v_writelane_b32 v0, s8, 52 |
| +; GCN-NEXT: v_writelane_b32 v0, s9, 53 |
| +; GCN-NEXT: v_writelane_b32 v0, s10, 54 |
| +; GCN-NEXT: v_writelane_b32 v0, s11, 55 |
| +; GCN-NEXT: ;;#ASMSTART |
| +; GCN-NEXT: ; def s[4:11] |
| +; GCN-NEXT: ;;#ASMEND |
| +; GCN-NEXT: s_mov_b32 s3, 0 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| -; GCN-NEXT: v_writelane_b32 v0, s0, 0 |
| -; GCN-NEXT: v_writelane_b32 v0, s4, 1 |
| -; GCN-NEXT: v_writelane_b32 v0, s5, 2 |
| -; GCN-NEXT: v_writelane_b32 v0, s6, 3 |
| -; GCN-NEXT: v_writelane_b32 v0, s7, 4 |
| -; GCN-NEXT: v_writelane_b32 v0, s8, 5 |
| -; GCN-NEXT: v_writelane_b32 v0, s9, 6 |
| -; GCN-NEXT: v_writelane_b32 v0, s10, 7 |
| -; GCN-NEXT: v_writelane_b32 v0, s11, 8 |
| -; GCN-NEXT: ;;#ASMSTART |
| -; GCN-NEXT: ; def s[0:7] |
| -; GCN-NEXT: ;;#ASMEND |
| -; GCN-NEXT: v_writelane_b32 v0, s0, 9 |
| -; GCN-NEXT: v_writelane_b32 v0, s1, 10 |
| -; GCN-NEXT: v_writelane_b32 v0, s2, 11 |
| -; GCN-NEXT: v_writelane_b32 v0, s3, 12 |
| -; GCN-NEXT: v_writelane_b32 v0, s4, 13 |
| -; GCN-NEXT: v_writelane_b32 v0, s5, 14 |
| -; GCN-NEXT: v_writelane_b32 v0, s6, 15 |
| -; GCN-NEXT: v_writelane_b32 v0, s7, 16 |
| -; GCN-NEXT: ;;#ASMSTART |
| -; GCN-NEXT: ; def s[0:7] |
| -; GCN-NEXT: ;;#ASMEND |
| -; GCN-NEXT: v_writelane_b32 v0, s0, 17 |
| -; GCN-NEXT: v_writelane_b32 v0, s1, 18 |
| -; GCN-NEXT: v_writelane_b32 v0, s2, 19 |
| -; GCN-NEXT: v_writelane_b32 v0, s3, 20 |
| -; GCN-NEXT: v_writelane_b32 v0, s4, 21 |
| -; GCN-NEXT: v_writelane_b32 v0, s5, 22 |
| -; GCN-NEXT: v_writelane_b32 v0, s6, 23 |
| -; GCN-NEXT: v_writelane_b32 v0, s7, 24 |
| -; GCN-NEXT: ;;#ASMSTART |
| -; GCN-NEXT: ; def s[0:7] |
| -; GCN-NEXT: ;;#ASMEND |
| -; GCN-NEXT: v_writelane_b32 v0, s0, 25 |
| -; GCN-NEXT: v_writelane_b32 v0, s1, 26 |
| -; GCN-NEXT: v_writelane_b32 v0, s2, 27 |
| -; GCN-NEXT: v_writelane_b32 v0, s3, 28 |
| -; GCN-NEXT: v_writelane_b32 v0, s4, 29 |
| -; GCN-NEXT: v_writelane_b32 v0, s5, 30 |
| -; GCN-NEXT: v_writelane_b32 v0, s6, 31 |
| -; GCN-NEXT: v_writelane_b32 v0, s7, 32 |
| -; GCN-NEXT: ;;#ASMSTART |
| -; GCN-NEXT: ; def s[0:7] |
| -; GCN-NEXT: ;;#ASMEND |
| -; GCN-NEXT: v_writelane_b32 v0, s0, 33 |
| -; GCN-NEXT: v_writelane_b32 v0, s1, 34 |
| -; GCN-NEXT: v_writelane_b32 v0, s2, 35 |
| -; GCN-NEXT: v_writelane_b32 v0, s3, 36 |
| -; GCN-NEXT: v_writelane_b32 v0, s4, 37 |
| -; GCN-NEXT: v_writelane_b32 v0, s5, 38 |
| -; GCN-NEXT: v_writelane_b32 v0, s6, 39 |
| -; GCN-NEXT: v_writelane_b32 v0, s7, 40 |
| -; GCN-NEXT: ;;#ASMSTART |
| -; GCN-NEXT: ; def s[0:7] |
| -; GCN-NEXT: ;;#ASMEND |
| -; GCN-NEXT: v_writelane_b32 v0, s0, 41 |
| -; GCN-NEXT: v_writelane_b32 v0, s1, 42 |
| -; GCN-NEXT: v_writelane_b32 v0, s2, 43 |
| -; GCN-NEXT: v_writelane_b32 v0, s3, 44 |
| -; GCN-NEXT: v_writelane_b32 v0, s4, 45 |
| -; GCN-NEXT: v_writelane_b32 v0, s5, 46 |
| -; GCN-NEXT: v_writelane_b32 v0, s6, 47 |
| -; GCN-NEXT: v_writelane_b32 v0, s7, 48 |
| -; GCN-NEXT: ;;#ASMSTART |
| -; GCN-NEXT: ; def s[0:7] |
| -; GCN-NEXT: ;;#ASMEND |
| -; GCN-NEXT: v_writelane_b32 v0, s0, 49 |
| -; GCN-NEXT: v_writelane_b32 v0, s1, 50 |
| -; GCN-NEXT: v_writelane_b32 v0, s2, 51 |
| -; GCN-NEXT: v_writelane_b32 v0, s3, 52 |
| -; GCN-NEXT: v_writelane_b32 v0, s4, 53 |
| -; GCN-NEXT: v_writelane_b32 v0, s5, 54 |
| -; GCN-NEXT: v_writelane_b32 v0, s6, 55 |
| -; GCN-NEXT: v_writelane_b32 v0, s7, 56 |
| -; GCN-NEXT: ;;#ASMSTART |
| -; GCN-NEXT: ; def s[0:7] |
| -; GCN-NEXT: ;;#ASMEND |
| -; GCN-NEXT: s_mov_b32 s8, 0 |
| -; GCN-NEXT: v_readlane_b32 s9, v0, 0 |
| -; GCN-NEXT: s_cmp_lg_u32 s9, s8 |
| -; GCN-NEXT: v_writelane_b32 v0, s12, 57 |
| -; GCN-NEXT: v_writelane_b32 v0, s13, 58 |
| -; GCN-NEXT: v_writelane_b32 v0, s14, 59 |
| -; GCN-NEXT: v_writelane_b32 v0, s15, 60 |
| -; GCN-NEXT: v_writelane_b32 v0, s16, 61 |
| -; GCN-NEXT: v_writelane_b32 v0, s17, 62 |
| -; GCN-NEXT: v_writelane_b32 v0, s18, 63 |
| -; GCN-NEXT: v_writelane_b32 v1, s19, 0 |
| -; GCN-NEXT: v_writelane_b32 v1, s20, 1 |
| -; GCN-NEXT: v_writelane_b32 v1, s21, 2 |
| -; GCN-NEXT: v_writelane_b32 v1, s22, 3 |
| -; GCN-NEXT: v_writelane_b32 v1, s23, 4 |
| -; GCN-NEXT: v_writelane_b32 v1, s24, 5 |
| -; GCN-NEXT: v_writelane_b32 v1, s25, 6 |
| -; GCN-NEXT: v_writelane_b32 v1, s26, 7 |
| -; GCN-NEXT: v_writelane_b32 v1, s27, 8 |
| -; GCN-NEXT: v_writelane_b32 v1, s36, 9 |
| -; GCN-NEXT: v_writelane_b32 v1, s37, 10 |
| -; GCN-NEXT: v_writelane_b32 v1, s38, 11 |
| -; GCN-NEXT: v_writelane_b32 v1, s39, 12 |
| -; GCN-NEXT: v_writelane_b32 v1, s40, 13 |
| -; GCN-NEXT: v_writelane_b32 v1, s41, 14 |
| -; GCN-NEXT: v_writelane_b32 v1, s42, 15 |
| -; GCN-NEXT: v_writelane_b32 v1, s43, 16 |
| -; GCN-NEXT: v_writelane_b32 v1, s44, 17 |
| -; GCN-NEXT: v_writelane_b32 v1, s45, 18 |
| -; GCN-NEXT: v_writelane_b32 v1, s46, 19 |
| -; GCN-NEXT: v_writelane_b32 v1, s47, 20 |
| -; GCN-NEXT: v_writelane_b32 v1, s48, 21 |
| -; GCN-NEXT: v_writelane_b32 v1, s49, 22 |
| -; GCN-NEXT: v_writelane_b32 v1, s50, 23 |
| -; GCN-NEXT: v_writelane_b32 v1, s51, 24 |
| -; GCN-NEXT: v_writelane_b32 v1, s52, 25 |
| -; GCN-NEXT: v_writelane_b32 v1, s53, 26 |
| -; GCN-NEXT: v_writelane_b32 v1, s54, 27 |
| -; GCN-NEXT: v_writelane_b32 v1, s55, 28 |
| -; GCN-NEXT: v_writelane_b32 v1, s56, 29 |
| -; GCN-NEXT: v_writelane_b32 v1, s57, 30 |
| -; GCN-NEXT: v_writelane_b32 v1, s58, 31 |
| -; GCN-NEXT: v_writelane_b32 v1, s59, 32 |
| -; GCN-NEXT: v_writelane_b32 v1, s60, 33 |
| -; GCN-NEXT: v_writelane_b32 v1, s61, 34 |
| -; GCN-NEXT: v_writelane_b32 v1, s62, 35 |
| -; GCN-NEXT: v_writelane_b32 v1, s63, 36 |
| -; GCN-NEXT: v_writelane_b32 v1, s64, 37 |
| -; GCN-NEXT: v_writelane_b32 v1, s65, 38 |
| -; GCN-NEXT: v_writelane_b32 v1, s66, 39 |
| -; GCN-NEXT: v_writelane_b32 v1, s67, 40 |
| -; GCN-NEXT: v_writelane_b32 v1, s68, 41 |
| -; GCN-NEXT: v_writelane_b32 v1, s69, 42 |
| -; GCN-NEXT: v_writelane_b32 v1, s70, 43 |
| -; GCN-NEXT: v_writelane_b32 v1, s71, 44 |
| -; GCN-NEXT: v_writelane_b32 v1, s72, 45 |
| -; GCN-NEXT: v_writelane_b32 v1, s73, 46 |
| -; GCN-NEXT: v_writelane_b32 v1, s74, 47 |
| -; GCN-NEXT: v_writelane_b32 v1, s75, 48 |
| -; GCN-NEXT: v_writelane_b32 v1, s76, 49 |
| -; GCN-NEXT: v_writelane_b32 v1, s77, 50 |
| -; GCN-NEXT: v_writelane_b32 v1, s78, 51 |
| -; GCN-NEXT: v_writelane_b32 v1, s79, 52 |
| -; GCN-NEXT: v_writelane_b32 v1, s80, 53 |
| -; GCN-NEXT: v_writelane_b32 v1, s81, 54 |
| -; GCN-NEXT: v_writelane_b32 v1, s82, 55 |
| -; GCN-NEXT: v_writelane_b32 v1, s83, 56 |
| -; GCN-NEXT: v_writelane_b32 v1, s84, 57 |
| -; GCN-NEXT: v_writelane_b32 v1, s85, 58 |
| -; GCN-NEXT: v_writelane_b32 v1, s86, 59 |
| -; GCN-NEXT: v_writelane_b32 v1, s87, 60 |
| -; GCN-NEXT: v_writelane_b32 v1, s88, 61 |
| -; GCN-NEXT: v_writelane_b32 v1, s89, 62 |
| -; GCN-NEXT: v_writelane_b32 v1, s90, 63 |
| -; GCN-NEXT: v_writelane_b32 v2, s91, 0 |
| -; GCN-NEXT: v_writelane_b32 v2, s0, 1 |
| -; GCN-NEXT: v_writelane_b32 v2, s1, 2 |
| -; GCN-NEXT: v_writelane_b32 v2, s2, 3 |
| -; GCN-NEXT: v_writelane_b32 v2, s3, 4 |
| -; GCN-NEXT: v_writelane_b32 v2, s4, 5 |
| -; GCN-NEXT: v_writelane_b32 v2, s5, 6 |
| -; GCN-NEXT: v_writelane_b32 v2, s6, 7 |
| -; GCN-NEXT: v_writelane_b32 v2, s7, 8 |
| +; GCN-NEXT: s_cmp_lg_u32 s2, s3 |
| +; GCN-NEXT: v_writelane_b32 v0, s12, 56 |
| +; GCN-NEXT: v_writelane_b32 v0, s13, 57 |
| +; GCN-NEXT: v_writelane_b32 v0, s14, 58 |
| +; GCN-NEXT: v_writelane_b32 v0, s15, 59 |
| +; GCN-NEXT: v_writelane_b32 v0, s16, 60 |
| +; GCN-NEXT: v_writelane_b32 v0, s17, 61 |
| +; GCN-NEXT: v_writelane_b32 v0, s18, 62 |
| +; GCN-NEXT: v_writelane_b32 v0, s19, 63 |
| +; GCN-NEXT: v_writelane_b32 v1, s20, 0 |
| +; GCN-NEXT: v_writelane_b32 v1, s21, 1 |
| +; GCN-NEXT: v_writelane_b32 v1, s22, 2 |
| +; GCN-NEXT: v_writelane_b32 v1, s23, 3 |
| +; GCN-NEXT: v_writelane_b32 v1, s24, 4 |
| +; GCN-NEXT: v_writelane_b32 v1, s25, 5 |
| +; GCN-NEXT: v_writelane_b32 v1, s26, 6 |
| +; GCN-NEXT: v_writelane_b32 v1, s27, 7 |
| +; GCN-NEXT: v_writelane_b32 v1, s36, 8 |
| +; GCN-NEXT: v_writelane_b32 v1, s37, 9 |
| +; GCN-NEXT: v_writelane_b32 v1, s38, 10 |
| +; GCN-NEXT: v_writelane_b32 v1, s39, 11 |
| +; GCN-NEXT: v_writelane_b32 v1, s40, 12 |
| +; GCN-NEXT: v_writelane_b32 v1, s41, 13 |
| +; GCN-NEXT: v_writelane_b32 v1, s42, 14 |
| +; GCN-NEXT: v_writelane_b32 v1, s43, 15 |
| +; GCN-NEXT: v_writelane_b32 v1, s44, 16 |
| +; GCN-NEXT: v_writelane_b32 v1, s45, 17 |
| +; GCN-NEXT: v_writelane_b32 v1, s46, 18 |
| +; GCN-NEXT: v_writelane_b32 v1, s47, 19 |
| +; GCN-NEXT: v_writelane_b32 v1, s48, 20 |
| +; GCN-NEXT: v_writelane_b32 v1, s49, 21 |
| +; GCN-NEXT: v_writelane_b32 v1, s50, 22 |
| +; GCN-NEXT: v_writelane_b32 v1, s51, 23 |
| +; GCN-NEXT: v_writelane_b32 v1, s52, 24 |
| +; GCN-NEXT: v_writelane_b32 v1, s53, 25 |
| +; GCN-NEXT: v_writelane_b32 v1, s54, 26 |
| +; GCN-NEXT: v_writelane_b32 v1, s55, 27 |
| +; GCN-NEXT: v_writelane_b32 v1, s56, 28 |
| +; GCN-NEXT: v_writelane_b32 v1, s57, 29 |
| +; GCN-NEXT: v_writelane_b32 v1, s58, 30 |
| +; GCN-NEXT: v_writelane_b32 v1, s59, 31 |
| +; GCN-NEXT: v_writelane_b32 v1, s60, 32 |
| +; GCN-NEXT: v_writelane_b32 v1, s61, 33 |
| +; GCN-NEXT: v_writelane_b32 v1, s62, 34 |
| +; GCN-NEXT: v_writelane_b32 v1, s63, 35 |
| +; GCN-NEXT: v_writelane_b32 v1, s64, 36 |
| +; GCN-NEXT: v_writelane_b32 v1, s65, 37 |
| +; GCN-NEXT: v_writelane_b32 v1, s66, 38 |
| +; GCN-NEXT: v_writelane_b32 v1, s67, 39 |
| +; GCN-NEXT: v_writelane_b32 v1, s68, 40 |
| +; GCN-NEXT: v_writelane_b32 v1, s69, 41 |
| +; GCN-NEXT: v_writelane_b32 v1, s70, 42 |
| +; GCN-NEXT: v_writelane_b32 v1, s71, 43 |
| +; GCN-NEXT: v_writelane_b32 v1, s72, 44 |
| +; GCN-NEXT: v_writelane_b32 v1, s73, 45 |
| +; GCN-NEXT: v_writelane_b32 v1, s74, 46 |
| +; GCN-NEXT: v_writelane_b32 v1, s75, 47 |
| +; GCN-NEXT: v_writelane_b32 v1, s76, 48 |
| +; GCN-NEXT: v_writelane_b32 v1, s77, 49 |
| +; GCN-NEXT: v_writelane_b32 v1, s78, 50 |
| +; GCN-NEXT: v_writelane_b32 v1, s79, 51 |
| +; GCN-NEXT: v_writelane_b32 v1, s80, 52 |
| +; GCN-NEXT: v_writelane_b32 v1, s81, 53 |
| +; GCN-NEXT: v_writelane_b32 v1, s82, 54 |
| +; GCN-NEXT: v_writelane_b32 v1, s83, 55 |
| +; GCN-NEXT: v_writelane_b32 v1, s84, 56 |
| +; GCN-NEXT: v_writelane_b32 v1, s85, 57 |
| +; GCN-NEXT: v_writelane_b32 v1, s86, 58 |
| +; GCN-NEXT: v_writelane_b32 v1, s87, 59 |
| +; GCN-NEXT: v_writelane_b32 v1, s88, 60 |
| +; GCN-NEXT: v_writelane_b32 v1, s89, 61 |
| +; GCN-NEXT: v_writelane_b32 v1, s90, 62 |
| +; GCN-NEXT: v_writelane_b32 v1, s91, 63 |
| +; GCN-NEXT: v_writelane_b32 v2, s4, 0 |
| +; GCN-NEXT: v_writelane_b32 v2, s5, 1 |
| +; GCN-NEXT: v_writelane_b32 v2, s6, 2 |
| +; GCN-NEXT: v_writelane_b32 v2, s7, 3 |
| +; GCN-NEXT: v_writelane_b32 v2, s8, 4 |
| +; GCN-NEXT: v_writelane_b32 v2, s9, 5 |
| +; GCN-NEXT: v_writelane_b32 v2, s10, 6 |
| +; GCN-NEXT: v_writelane_b32 v2, s11, 7 |
| ; GCN-NEXT: s_cbranch_scc1 BB0_2 |
| ; GCN-NEXT: ; %bb.1: ; %bb0 |
| -; GCN-NEXT: v_readlane_b32 s0, v0, 1 |
| -; GCN-NEXT: v_readlane_b32 s1, v0, 2 |
| -; GCN-NEXT: v_readlane_b32 s2, v0, 3 |
| -; GCN-NEXT: v_readlane_b32 s3, v0, 4 |
| -; GCN-NEXT: v_readlane_b32 s4, v0, 5 |
| -; GCN-NEXT: v_readlane_b32 s5, v0, 6 |
| -; GCN-NEXT: v_readlane_b32 s6, v0, 7 |
| -; GCN-NEXT: v_readlane_b32 s7, v0, 8 |
| +; GCN-NEXT: v_readlane_b32 s0, v0, 0 |
| +; GCN-NEXT: v_readlane_b32 s1, v0, 1 |
| +; GCN-NEXT: v_readlane_b32 s2, v0, 2 |
| +; GCN-NEXT: v_readlane_b32 s3, v0, 3 |
| +; GCN-NEXT: v_readlane_b32 s4, v0, 4 |
| +; GCN-NEXT: v_readlane_b32 s5, v0, 5 |
| +; GCN-NEXT: v_readlane_b32 s6, v0, 6 |
| +; GCN-NEXT: v_readlane_b32 s7, v0, 7 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: ; use s[0:7] |
| ; GCN-NEXT: ;;#ASMEND |
| -; GCN-NEXT: v_readlane_b32 s0, v0, 57 |
| -; GCN-NEXT: v_readlane_b32 s1, v0, 58 |
| -; GCN-NEXT: v_readlane_b32 s2, v0, 59 |
| -; GCN-NEXT: v_readlane_b32 s3, v0, 60 |
| -; GCN-NEXT: v_readlane_b32 s4, v0, 61 |
| -; GCN-NEXT: v_readlane_b32 s5, v0, 62 |
| -; GCN-NEXT: v_readlane_b32 s6, v0, 63 |
| -; GCN-NEXT: v_readlane_b32 s7, v1, 0 |
| +; GCN-NEXT: v_readlane_b32 s0, v0, 56 |
| +; GCN-NEXT: v_readlane_b32 s1, v0, 57 |
| +; GCN-NEXT: v_readlane_b32 s2, v0, 58 |
| +; GCN-NEXT: v_readlane_b32 s3, v0, 59 |
| +; GCN-NEXT: v_readlane_b32 s4, v0, 60 |
| +; GCN-NEXT: v_readlane_b32 s5, v0, 61 |
| +; GCN-NEXT: v_readlane_b32 s6, v0, 62 |
| +; GCN-NEXT: v_readlane_b32 s7, v0, 63 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: ; use s[0:7] |
| ; GCN-NEXT: ;;#ASMEND |
| -; GCN-NEXT: v_readlane_b32 s0, v1, 1 |
| -; GCN-NEXT: v_readlane_b32 s1, v1, 2 |
| -; GCN-NEXT: v_readlane_b32 s2, v1, 3 |
| -; GCN-NEXT: v_readlane_b32 s3, v1, 4 |
| -; GCN-NEXT: v_readlane_b32 s4, v1, 5 |
| -; GCN-NEXT: v_readlane_b32 s5, v1, 6 |
| -; GCN-NEXT: v_readlane_b32 s6, v1, 7 |
| -; GCN-NEXT: v_readlane_b32 s7, v1, 8 |
| +; GCN-NEXT: v_readlane_b32 s0, v1, 0 |
| +; GCN-NEXT: v_readlane_b32 s1, v1, 1 |
| +; GCN-NEXT: v_readlane_b32 s2, v1, 2 |
| +; GCN-NEXT: v_readlane_b32 s3, v1, 3 |
| +; GCN-NEXT: v_readlane_b32 s4, v1, 4 |
| +; GCN-NEXT: v_readlane_b32 s5, v1, 5 |
| +; GCN-NEXT: v_readlane_b32 s6, v1, 6 |
| +; GCN-NEXT: v_readlane_b32 s7, v1, 7 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: ; use s[0:7] |
| ; GCN-NEXT: ;;#ASMEND |
| -; GCN-NEXT: v_readlane_b32 s0, v1, 9 |
| -; GCN-NEXT: v_readlane_b32 s1, v1, 10 |
| -; GCN-NEXT: v_readlane_b32 s2, v1, 11 |
| -; GCN-NEXT: v_readlane_b32 s3, v1, 12 |
| -; GCN-NEXT: v_readlane_b32 s4, v1, 13 |
| -; GCN-NEXT: v_readlane_b32 s5, v1, 14 |
| -; GCN-NEXT: v_readlane_b32 s6, v1, 15 |
| -; GCN-NEXT: v_readlane_b32 s7, v1, 16 |
| +; GCN-NEXT: v_readlane_b32 s0, v1, 8 |
| +; GCN-NEXT: v_readlane_b32 s1, v1, 9 |
| +; GCN-NEXT: v_readlane_b32 s2, v1, 10 |
| +; GCN-NEXT: v_readlane_b32 s3, v1, 11 |
| +; GCN-NEXT: v_readlane_b32 s4, v1, 12 |
| +; GCN-NEXT: v_readlane_b32 s5, v1, 13 |
| +; GCN-NEXT: v_readlane_b32 s6, v1, 14 |
| +; GCN-NEXT: v_readlane_b32 s7, v1, 15 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: ; use s[0:7] |
| ; GCN-NEXT: ;;#ASMEND |
| -; GCN-NEXT: v_readlane_b32 s0, v1, 17 |
| -; GCN-NEXT: v_readlane_b32 s1, v1, 18 |
| -; GCN-NEXT: v_readlane_b32 s2, v1, 19 |
| -; GCN-NEXT: v_readlane_b32 s3, v1, 20 |
| -; GCN-NEXT: v_readlane_b32 s4, v1, 21 |
| -; GCN-NEXT: v_readlane_b32 s5, v1, 22 |
| -; GCN-NEXT: v_readlane_b32 s6, v1, 23 |
| -; GCN-NEXT: v_readlane_b32 s7, v1, 24 |
| +; GCN-NEXT: v_readlane_b32 s0, v1, 16 |
| +; GCN-NEXT: v_readlane_b32 s1, v1, 17 |
| +; GCN-NEXT: v_readlane_b32 s2, v1, 18 |
| +; GCN-NEXT: v_readlane_b32 s3, v1, 19 |
| +; GCN-NEXT: v_readlane_b32 s4, v1, 20 |
| +; GCN-NEXT: v_readlane_b32 s5, v1, 21 |
| +; GCN-NEXT: v_readlane_b32 s6, v1, 22 |
| +; GCN-NEXT: v_readlane_b32 s7, v1, 23 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: ; use s[0:7] |
| ; GCN-NEXT: ;;#ASMEND |
| -; GCN-NEXT: v_readlane_b32 s0, v1, 25 |
| -; GCN-NEXT: v_readlane_b32 s1, v1, 26 |
| -; GCN-NEXT: v_readlane_b32 s2, v1, 27 |
| -; GCN-NEXT: v_readlane_b32 s3, v1, 28 |
| -; GCN-NEXT: v_readlane_b32 s4, v1, 29 |
| -; GCN-NEXT: v_readlane_b32 s5, v1, 30 |
| -; GCN-NEXT: v_readlane_b32 s6, v1, 31 |
| -; GCN-NEXT: v_readlane_b32 s7, v1, 32 |
| +; GCN-NEXT: v_readlane_b32 s0, v1, 24 |
| +; GCN-NEXT: v_readlane_b32 s1, v1, 25 |
| +; GCN-NEXT: v_readlane_b32 s2, v1, 26 |
| +; GCN-NEXT: v_readlane_b32 s3, v1, 27 |
| +; GCN-NEXT: v_readlane_b32 s4, v1, 28 |
| +; GCN-NEXT: v_readlane_b32 s5, v1, 29 |
| +; GCN-NEXT: v_readlane_b32 s6, v1, 30 |
| +; GCN-NEXT: v_readlane_b32 s7, v1, 31 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: ; use s[0:7] |
| ; GCN-NEXT: ;;#ASMEND |
| -; GCN-NEXT: v_readlane_b32 s0, v1, 33 |
| -; GCN-NEXT: v_readlane_b32 s1, v1, 34 |
| -; GCN-NEXT: v_readlane_b32 s2, v1, 35 |
| -; GCN-NEXT: v_readlane_b32 s3, v1, 36 |
| -; GCN-NEXT: v_readlane_b32 s4, v1, 37 |
| -; GCN-NEXT: v_readlane_b32 s5, v1, 38 |
| -; GCN-NEXT: v_readlane_b32 s6, v1, 39 |
| -; GCN-NEXT: v_readlane_b32 s7, v1, 40 |
| +; GCN-NEXT: v_readlane_b32 s0, v1, 32 |
| +; GCN-NEXT: v_readlane_b32 s1, v1, 33 |
| +; GCN-NEXT: v_readlane_b32 s2, v1, 34 |
| +; GCN-NEXT: v_readlane_b32 s3, v1, 35 |
| +; GCN-NEXT: v_readlane_b32 s4, v1, 36 |
| +; GCN-NEXT: v_readlane_b32 s5, v1, 37 |
| +; GCN-NEXT: v_readlane_b32 s6, v1, 38 |
| +; GCN-NEXT: v_readlane_b32 s7, v1, 39 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: ; use s[0:7] |
| ; GCN-NEXT: ;;#ASMEND |
| -; GCN-NEXT: v_readlane_b32 s0, v1, 41 |
| -; GCN-NEXT: v_readlane_b32 s1, v1, 42 |
| -; GCN-NEXT: v_readlane_b32 s2, v1, 43 |
| -; GCN-NEXT: v_readlane_b32 s3, v1, 44 |
| -; GCN-NEXT: v_readlane_b32 s4, v1, 45 |
| -; GCN-NEXT: v_readlane_b32 s5, v1, 46 |
| -; GCN-NEXT: v_readlane_b32 s6, v1, 47 |
| -; GCN-NEXT: v_readlane_b32 s7, v1, 48 |
| +; GCN-NEXT: v_readlane_b32 s0, v1, 40 |
| +; GCN-NEXT: v_readlane_b32 s1, v1, 41 |
| +; GCN-NEXT: v_readlane_b32 s2, v1, 42 |
| +; GCN-NEXT: v_readlane_b32 s3, v1, 43 |
| +; GCN-NEXT: v_readlane_b32 s4, v1, 44 |
| +; GCN-NEXT: v_readlane_b32 s5, v1, 45 |
| +; GCN-NEXT: v_readlane_b32 s6, v1, 46 |
| +; GCN-NEXT: v_readlane_b32 s7, v1, 47 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: ; use s[0:7] |
| ; GCN-NEXT: ;;#ASMEND |
| -; GCN-NEXT: v_readlane_b32 s0, v1, 49 |
| -; GCN-NEXT: v_readlane_b32 s1, v1, 50 |
| -; GCN-NEXT: v_readlane_b32 s2, v1, 51 |
| -; GCN-NEXT: v_readlane_b32 s3, v1, 52 |
| -; GCN-NEXT: v_readlane_b32 s4, v1, 53 |
| -; GCN-NEXT: v_readlane_b32 s5, v1, 54 |
| -; GCN-NEXT: v_readlane_b32 s6, v1, 55 |
| -; GCN-NEXT: v_readlane_b32 s7, v1, 56 |
| +; GCN-NEXT: v_readlane_b32 s0, v1, 48 |
| +; GCN-NEXT: v_readlane_b32 s1, v1, 49 |
| +; GCN-NEXT: v_readlane_b32 s2, v1, 50 |
| +; GCN-NEXT: v_readlane_b32 s3, v1, 51 |
| +; GCN-NEXT: v_readlane_b32 s4, v1, 52 |
| +; GCN-NEXT: v_readlane_b32 s5, v1, 53 |
| +; GCN-NEXT: v_readlane_b32 s6, v1, 54 |
| +; GCN-NEXT: v_readlane_b32 s7, v1, 55 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: ; use s[0:7] |
| ; GCN-NEXT: ;;#ASMEND |
| -; GCN-NEXT: v_readlane_b32 s0, v1, 57 |
| -; GCN-NEXT: v_readlane_b32 s1, v1, 58 |
| -; GCN-NEXT: v_readlane_b32 s2, v1, 59 |
| -; GCN-NEXT: v_readlane_b32 s3, v1, 60 |
| -; GCN-NEXT: v_readlane_b32 s4, v1, 61 |
| -; GCN-NEXT: v_readlane_b32 s5, v1, 62 |
| -; GCN-NEXT: v_readlane_b32 s6, v1, 63 |
| -; GCN-NEXT: v_readlane_b32 s7, v2, 0 |
| +; GCN-NEXT: v_readlane_b32 s0, v1, 56 |
| +; GCN-NEXT: v_readlane_b32 s1, v1, 57 |
| +; GCN-NEXT: v_readlane_b32 s2, v1, 58 |
| +; GCN-NEXT: v_readlane_b32 s3, v1, 59 |
| +; GCN-NEXT: v_readlane_b32 s4, v1, 60 |
| +; GCN-NEXT: v_readlane_b32 s5, v1, 61 |
| +; GCN-NEXT: v_readlane_b32 s6, v1, 62 |
| +; GCN-NEXT: v_readlane_b32 s7, v1, 63 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: ; use s[0:7] |
| ; GCN-NEXT: ;;#ASMEND |
| -; GCN-NEXT: v_readlane_b32 s0, v0, 9 |
| -; GCN-NEXT: v_readlane_b32 s1, v0, 10 |
| -; GCN-NEXT: v_readlane_b32 s2, v0, 11 |
| -; GCN-NEXT: v_readlane_b32 s3, v0, 12 |
| -; GCN-NEXT: v_readlane_b32 s4, v0, 13 |
| -; GCN-NEXT: v_readlane_b32 s5, v0, 14 |
| -; GCN-NEXT: v_readlane_b32 s6, v0, 15 |
| -; GCN-NEXT: v_readlane_b32 s7, v0, 16 |
| +; GCN-NEXT: v_readlane_b32 s0, v0, 8 |
| +; GCN-NEXT: v_readlane_b32 s1, v0, 9 |
| +; GCN-NEXT: v_readlane_b32 s2, v0, 10 |
| +; GCN-NEXT: v_readlane_b32 s3, v0, 11 |
| +; GCN-NEXT: v_readlane_b32 s4, v0, 12 |
| +; GCN-NEXT: v_readlane_b32 s5, v0, 13 |
| +; GCN-NEXT: v_readlane_b32 s6, v0, 14 |
| +; GCN-NEXT: v_readlane_b32 s7, v0, 15 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: ; use s[0:7] |
| ; GCN-NEXT: ;;#ASMEND |
| -; GCN-NEXT: v_readlane_b32 s0, v0, 17 |
| -; GCN-NEXT: v_readlane_b32 s1, v0, 18 |
| -; GCN-NEXT: v_readlane_b32 s2, v0, 19 |
| -; GCN-NEXT: v_readlane_b32 s3, v0, 20 |
| -; GCN-NEXT: v_readlane_b32 s4, v0, 21 |
| -; GCN-NEXT: v_readlane_b32 s5, v0, 22 |
| -; GCN-NEXT: v_readlane_b32 s6, v0, 23 |
| -; GCN-NEXT: v_readlane_b32 s7, v0, 24 |
| +; GCN-NEXT: v_readlane_b32 s0, v0, 16 |
| +; GCN-NEXT: v_readlane_b32 s1, v0, 17 |
| +; GCN-NEXT: v_readlane_b32 s2, v0, 18 |
| +; GCN-NEXT: v_readlane_b32 s3, v0, 19 |
| +; GCN-NEXT: v_readlane_b32 s4, v0, 20 |
| +; GCN-NEXT: v_readlane_b32 s5, v0, 21 |
| +; GCN-NEXT: v_readlane_b32 s6, v0, 22 |
| +; GCN-NEXT: v_readlane_b32 s7, v0, 23 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: ; use s[0:7] |
| ; GCN-NEXT: ;;#ASMEND |
| -; GCN-NEXT: v_readlane_b32 s0, v0, 25 |
| -; GCN-NEXT: v_readlane_b32 s1, v0, 26 |
| -; GCN-NEXT: v_readlane_b32 s2, v0, 27 |
| -; GCN-NEXT: v_readlane_b32 s3, v0, 28 |
| -; GCN-NEXT: v_readlane_b32 s4, v0, 29 |
| -; GCN-NEXT: v_readlane_b32 s5, v0, 30 |
| -; GCN-NEXT: v_readlane_b32 s6, v0, 31 |
| -; GCN-NEXT: v_readlane_b32 s7, v0, 32 |
| +; GCN-NEXT: v_readlane_b32 s0, v0, 24 |
| +; GCN-NEXT: v_readlane_b32 s1, v0, 25 |
| +; GCN-NEXT: v_readlane_b32 s2, v0, 26 |
| +; GCN-NEXT: v_readlane_b32 s3, v0, 27 |
| +; GCN-NEXT: v_readlane_b32 s4, v0, 28 |
| +; GCN-NEXT: v_readlane_b32 s5, v0, 29 |
| +; GCN-NEXT: v_readlane_b32 s6, v0, 30 |
| +; GCN-NEXT: v_readlane_b32 s7, v0, 31 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: ; use s[0:7] |
| ; GCN-NEXT: ;;#ASMEND |
| -; GCN-NEXT: v_readlane_b32 s0, v0, 33 |
| -; GCN-NEXT: v_readlane_b32 s1, v0, 34 |
| -; GCN-NEXT: v_readlane_b32 s2, v0, 35 |
| -; GCN-NEXT: v_readlane_b32 s3, v0, 36 |
| -; GCN-NEXT: v_readlane_b32 s4, v0, 37 |
| -; GCN-NEXT: v_readlane_b32 s5, v0, 38 |
| -; GCN-NEXT: v_readlane_b32 s6, v0, 39 |
| -; GCN-NEXT: v_readlane_b32 s7, v0, 40 |
| +; GCN-NEXT: v_readlane_b32 s0, v0, 32 |
| +; GCN-NEXT: v_readlane_b32 s1, v0, 33 |
| +; GCN-NEXT: v_readlane_b32 s2, v0, 34 |
| +; GCN-NEXT: v_readlane_b32 s3, v0, 35 |
| +; GCN-NEXT: v_readlane_b32 s4, v0, 36 |
| +; GCN-NEXT: v_readlane_b32 s5, v0, 37 |
| +; GCN-NEXT: v_readlane_b32 s6, v0, 38 |
| +; GCN-NEXT: v_readlane_b32 s7, v0, 39 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: ; use s[0:7] |
| ; GCN-NEXT: ;;#ASMEND |
| -; GCN-NEXT: v_readlane_b32 s0, v0, 41 |
| -; GCN-NEXT: v_readlane_b32 s1, v0, 42 |
| -; GCN-NEXT: v_readlane_b32 s2, v0, 43 |
| -; GCN-NEXT: v_readlane_b32 s3, v0, 44 |
| -; GCN-NEXT: v_readlane_b32 s4, v0, 45 |
| -; GCN-NEXT: v_readlane_b32 s5, v0, 46 |
| -; GCN-NEXT: v_readlane_b32 s6, v0, 47 |
| -; GCN-NEXT: v_readlane_b32 s7, v0, 48 |
| +; GCN-NEXT: v_readlane_b32 s0, v0, 40 |
| +; GCN-NEXT: v_readlane_b32 s1, v0, 41 |
| +; GCN-NEXT: v_readlane_b32 s2, v0, 42 |
| +; GCN-NEXT: v_readlane_b32 s3, v0, 43 |
| +; GCN-NEXT: v_readlane_b32 s4, v0, 44 |
| +; GCN-NEXT: v_readlane_b32 s5, v0, 45 |
| +; GCN-NEXT: v_readlane_b32 s6, v0, 46 |
| +; GCN-NEXT: v_readlane_b32 s7, v0, 47 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: ; use s[0:7] |
| ; GCN-NEXT: ;;#ASMEND |
| -; GCN-NEXT: v_readlane_b32 s0, v0, 49 |
| -; GCN-NEXT: v_readlane_b32 s1, v0, 50 |
| -; GCN-NEXT: v_readlane_b32 s2, v0, 51 |
| -; GCN-NEXT: v_readlane_b32 s3, v0, 52 |
| -; GCN-NEXT: v_readlane_b32 s4, v0, 53 |
| -; GCN-NEXT: v_readlane_b32 s5, v0, 54 |
| -; GCN-NEXT: v_readlane_b32 s6, v0, 55 |
| -; GCN-NEXT: v_readlane_b32 s7, v0, 56 |
| +; GCN-NEXT: v_readlane_b32 s0, v0, 48 |
| +; GCN-NEXT: v_readlane_b32 s1, v0, 49 |
| +; GCN-NEXT: v_readlane_b32 s2, v0, 50 |
| +; GCN-NEXT: v_readlane_b32 s3, v0, 51 |
| +; GCN-NEXT: v_readlane_b32 s4, v0, 52 |
| +; GCN-NEXT: v_readlane_b32 s5, v0, 53 |
| +; GCN-NEXT: v_readlane_b32 s6, v0, 54 |
| +; GCN-NEXT: v_readlane_b32 s7, v0, 55 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: ; use s[0:7] |
| ; GCN-NEXT: ;;#ASMEND |
| -; GCN-NEXT: v_readlane_b32 s0, v2, 1 |
| -; GCN-NEXT: v_readlane_b32 s1, v2, 2 |
| -; GCN-NEXT: v_readlane_b32 s2, v2, 3 |
| -; GCN-NEXT: v_readlane_b32 s3, v2, 4 |
| -; GCN-NEXT: v_readlane_b32 s4, v2, 5 |
| -; GCN-NEXT: v_readlane_b32 s5, v2, 6 |
| -; GCN-NEXT: v_readlane_b32 s6, v2, 7 |
| -; GCN-NEXT: v_readlane_b32 s7, v2, 8 |
| +; GCN-NEXT: v_readlane_b32 s0, v2, 0 |
| +; GCN-NEXT: v_readlane_b32 s1, v2, 1 |
| +; GCN-NEXT: v_readlane_b32 s2, v2, 2 |
| +; GCN-NEXT: v_readlane_b32 s3, v2, 3 |
| +; GCN-NEXT: v_readlane_b32 s4, v2, 4 |
| +; GCN-NEXT: v_readlane_b32 s5, v2, 5 |
| +; GCN-NEXT: v_readlane_b32 s6, v2, 6 |
| +; GCN-NEXT: v_readlane_b32 s7, v2, 7 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: ; use s[0:7] |
| ; GCN-NEXT: ;;#ASMEND |
| @@ -444,195 +442,193 @@ ret: |
| define amdgpu_kernel void @split_sgpr_spill_2_vgprs(i32 addrspace(1)* %out, i32 %in) #1 { |
| ; GCN-LABEL: split_sgpr_spill_2_vgprs: |
| ; GCN: ; %bb.0: |
| -; GCN-NEXT: s_load_dword s0, s[0:1], 0xb |
| +; GCN-NEXT: s_load_dword s2, s[0:1], 0xb |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: ; def s[4:19] |
| ; GCN-NEXT: ;;#ASMEND |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: ; def s[36:51] |
| ; GCN-NEXT: ;;#ASMEND |
| +; GCN-NEXT: v_writelane_b32 v0, s4, 0 |
| +; GCN-NEXT: v_writelane_b32 v0, s5, 1 |
| +; GCN-NEXT: v_writelane_b32 v0, s6, 2 |
| +; GCN-NEXT: v_writelane_b32 v0, s7, 3 |
| +; GCN-NEXT: v_writelane_b32 v0, s8, 4 |
| +; GCN-NEXT: v_writelane_b32 v0, s9, 5 |
| +; GCN-NEXT: v_writelane_b32 v0, s10, 6 |
| +; GCN-NEXT: v_writelane_b32 v0, s11, 7 |
| +; GCN-NEXT: v_writelane_b32 v0, s12, 8 |
| +; GCN-NEXT: v_writelane_b32 v0, s13, 9 |
| +; GCN-NEXT: v_writelane_b32 v0, s14, 10 |
| +; GCN-NEXT: v_writelane_b32 v0, s15, 11 |
| +; GCN-NEXT: v_writelane_b32 v0, s16, 12 |
| +; GCN-NEXT: v_writelane_b32 v0, s17, 13 |
| +; GCN-NEXT: v_writelane_b32 v0, s18, 14 |
| +; GCN-NEXT: v_writelane_b32 v0, s19, 15 |
| +; GCN-NEXT: ;;#ASMSTART |
| +; GCN-NEXT: ; def s[4:19] |
| +; GCN-NEXT: ;;#ASMEND |
| +; GCN-NEXT: v_writelane_b32 v0, s4, 16 |
| +; GCN-NEXT: v_writelane_b32 v0, s5, 17 |
| +; GCN-NEXT: v_writelane_b32 v0, s6, 18 |
| +; GCN-NEXT: v_writelane_b32 v0, s7, 19 |
| +; GCN-NEXT: v_writelane_b32 v0, s8, 20 |
| +; GCN-NEXT: v_writelane_b32 v0, s9, 21 |
| +; GCN-NEXT: v_writelane_b32 v0, s10, 22 |
| +; GCN-NEXT: v_writelane_b32 v0, s11, 23 |
| +; GCN-NEXT: v_writelane_b32 v0, s12, 24 |
| +; GCN-NEXT: v_writelane_b32 v0, s13, 25 |
| +; GCN-NEXT: v_writelane_b32 v0, s14, 26 |
| +; GCN-NEXT: v_writelane_b32 v0, s15, 27 |
| +; GCN-NEXT: v_writelane_b32 v0, s16, 28 |
| +; GCN-NEXT: v_writelane_b32 v0, s17, 29 |
| +; GCN-NEXT: v_writelane_b32 v0, s18, 30 |
| +; GCN-NEXT: v_writelane_b32 v0, s19, 31 |
| +; GCN-NEXT: ;;#ASMSTART |
| +; GCN-NEXT: ; def s[4:19] |
| +; GCN-NEXT: ;;#ASMEND |
| +; GCN-NEXT: ;;#ASMSTART |
| +; GCN-NEXT: ; def s[20:27] |
| +; GCN-NEXT: ;;#ASMEND |
| +; GCN-NEXT: ;;#ASMSTART |
| +; GCN-NEXT: ; def s[0:1] |
| +; GCN-NEXT: ;;#ASMEND |
| +; GCN-NEXT: s_mov_b32 s3, 0 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| -; GCN-NEXT: v_writelane_b32 v0, s0, 0 |
| -; GCN-NEXT: v_writelane_b32 v0, s4, 1 |
| -; GCN-NEXT: v_writelane_b32 v0, s5, 2 |
| -; GCN-NEXT: v_writelane_b32 v0, s6, 3 |
| -; GCN-NEXT: v_writelane_b32 v0, s7, 4 |
| -; GCN-NEXT: v_writelane_b32 v0, s8, 5 |
| -; GCN-NEXT: v_writelane_b32 v0, s9, 6 |
| -; GCN-NEXT: v_writelane_b32 v0, s10, 7 |
| -; GCN-NEXT: v_writelane_b32 v0, s11, 8 |
| -; GCN-NEXT: v_writelane_b32 v0, s12, 9 |
| -; GCN-NEXT: v_writelane_b32 v0, s13, 10 |
| -; GCN-NEXT: v_writelane_b32 v0, s14, 11 |
| -; GCN-NEXT: v_writelane_b32 v0, s15, 12 |
| -; GCN-NEXT: v_writelane_b32 v0, s16, 13 |
| -; GCN-NEXT: v_writelane_b32 v0, s17, 14 |
| -; GCN-NEXT: v_writelane_b32 v0, s18, 15 |
| -; GCN-NEXT: v_writelane_b32 v0, s19, 16 |
| -; GCN-NEXT: ;;#ASMSTART |
| -; GCN-NEXT: ; def s[0:15] |
| -; GCN-NEXT: ;;#ASMEND |
| -; GCN-NEXT: ;;#ASMSTART |
| -; GCN-NEXT: ; def s[16:31] |
| -; GCN-NEXT: ;;#ASMEND |
| -; GCN-NEXT: v_writelane_b32 v0, s0, 17 |
| -; GCN-NEXT: v_writelane_b32 v0, s1, 18 |
| -; GCN-NEXT: v_writelane_b32 v0, s2, 19 |
| -; GCN-NEXT: v_writelane_b32 v0, s3, 20 |
| -; GCN-NEXT: v_writelane_b32 v0, s4, 21 |
| -; GCN-NEXT: v_writelane_b32 v0, s5, 22 |
| -; GCN-NEXT: v_writelane_b32 v0, s6, 23 |
| -; GCN-NEXT: v_writelane_b32 v0, s7, 24 |
| -; GCN-NEXT: v_writelane_b32 v0, s8, 25 |
| -; GCN-NEXT: v_writelane_b32 v0, s9, 26 |
| -; GCN-NEXT: v_writelane_b32 v0, s10, 27 |
| -; GCN-NEXT: v_writelane_b32 v0, s11, 28 |
| -; GCN-NEXT: v_writelane_b32 v0, s12, 29 |
| -; GCN-NEXT: v_writelane_b32 v0, s13, 30 |
| -; GCN-NEXT: v_writelane_b32 v0, s14, 31 |
| -; GCN-NEXT: v_writelane_b32 v0, s15, 32 |
| -; GCN-NEXT: ;;#ASMSTART |
| -; GCN-NEXT: ; def s[0:7] |
| -; GCN-NEXT: ;;#ASMEND |
| -; GCN-NEXT: ;;#ASMSTART |
| -; GCN-NEXT: ; def s[8:9] |
| -; GCN-NEXT: ;;#ASMEND |
| -; GCN-NEXT: s_mov_b32 s10, 0 |
| -; GCN-NEXT: v_readlane_b32 s11, v0, 0 |
| -; GCN-NEXT: s_cmp_lg_u32 s11, s10 |
| -; GCN-NEXT: v_writelane_b32 v0, s36, 33 |
| -; GCN-NEXT: v_writelane_b32 v0, s37, 34 |
| -; GCN-NEXT: v_writelane_b32 v0, s38, 35 |
| -; GCN-NEXT: v_writelane_b32 v0, s39, 36 |
| -; GCN-NEXT: v_writelane_b32 v0, s40, 37 |
| -; GCN-NEXT: v_writelane_b32 v0, s41, 38 |
| -; GCN-NEXT: v_writelane_b32 v0, s42, 39 |
| -; GCN-NEXT: v_writelane_b32 v0, s43, 40 |
| -; GCN-NEXT: v_writelane_b32 v0, s44, 41 |
| -; GCN-NEXT: v_writelane_b32 v0, s45, 42 |
| -; GCN-NEXT: v_writelane_b32 v0, s46, 43 |
| -; GCN-NEXT: v_writelane_b32 v0, s47, 44 |
| -; GCN-NEXT: v_writelane_b32 v0, s48, 45 |
| -; GCN-NEXT: v_writelane_b32 v0, s49, 46 |
| -; GCN-NEXT: v_writelane_b32 v0, s50, 47 |
| -; GCN-NEXT: v_writelane_b32 v0, s51, 48 |
| -; GCN-NEXT: v_writelane_b32 v0, s16, 49 |
| -; GCN-NEXT: v_writelane_b32 v0, s17, 50 |
| -; GCN-NEXT: v_writelane_b32 v0, s18, 51 |
| -; GCN-NEXT: v_writelane_b32 v0, s19, 52 |
| -; GCN-NEXT: v_writelane_b32 v0, s20, 53 |
| -; GCN-NEXT: v_writelane_b32 v0, s21, 54 |
| -; GCN-NEXT: v_writelane_b32 v0, s22, 55 |
| -; GCN-NEXT: v_writelane_b32 v0, s23, 56 |
| -; GCN-NEXT: v_writelane_b32 v0, s24, 57 |
| -; GCN-NEXT: v_writelane_b32 v0, s25, 58 |
| -; GCN-NEXT: v_writelane_b32 v0, s26, 59 |
| -; GCN-NEXT: v_writelane_b32 v0, s27, 60 |
| -; GCN-NEXT: v_writelane_b32 v0, s28, 61 |
| -; GCN-NEXT: v_writelane_b32 v0, s29, 62 |
| -; GCN-NEXT: v_writelane_b32 v0, s30, 63 |
| -; GCN-NEXT: v_writelane_b32 v1, s31, 0 |
| -; GCN-NEXT: v_writelane_b32 v1, s0, 1 |
| -; GCN-NEXT: v_writelane_b32 v1, s1, 2 |
| -; GCN-NEXT: v_writelane_b32 v1, s2, 3 |
| -; GCN-NEXT: v_writelane_b32 v1, s3, 4 |
| -; GCN-NEXT: v_writelane_b32 v1, s4, 5 |
| -; GCN-NEXT: v_writelane_b32 v1, s5, 6 |
| -; GCN-NEXT: v_writelane_b32 v1, s6, 7 |
| -; GCN-NEXT: v_writelane_b32 v1, s7, 8 |
| -; GCN-NEXT: v_writelane_b32 v1, s8, 9 |
| -; GCN-NEXT: v_writelane_b32 v1, s9, 10 |
| +; GCN-NEXT: s_cmp_lg_u32 s2, s3 |
| +; GCN-NEXT: v_writelane_b32 v0, s36, 32 |
| +; GCN-NEXT: v_writelane_b32 v0, s37, 33 |
| +; GCN-NEXT: v_writelane_b32 v0, s38, 34 |
| +; GCN-NEXT: v_writelane_b32 v0, s39, 35 |
| +; GCN-NEXT: v_writelane_b32 v0, s40, 36 |
| +; GCN-NEXT: v_writelane_b32 v0, s41, 37 |
| +; GCN-NEXT: v_writelane_b32 v0, s42, 38 |
| +; GCN-NEXT: v_writelane_b32 v0, s43, 39 |
| +; GCN-NEXT: v_writelane_b32 v0, s44, 40 |
| +; GCN-NEXT: v_writelane_b32 v0, s45, 41 |
| +; GCN-NEXT: v_writelane_b32 v0, s46, 42 |
| +; GCN-NEXT: v_writelane_b32 v0, s47, 43 |
| +; GCN-NEXT: v_writelane_b32 v0, s48, 44 |
| +; GCN-NEXT: v_writelane_b32 v0, s49, 45 |
| +; GCN-NEXT: v_writelane_b32 v0, s50, 46 |
| +; GCN-NEXT: v_writelane_b32 v0, s51, 47 |
| +; GCN-NEXT: v_writelane_b32 v0, s4, 48 |
| +; GCN-NEXT: v_writelane_b32 v0, s5, 49 |
| +; GCN-NEXT: v_writelane_b32 v0, s6, 50 |
| +; GCN-NEXT: v_writelane_b32 v0, s7, 51 |
| +; GCN-NEXT: v_writelane_b32 v0, s8, 52 |
| +; GCN-NEXT: v_writelane_b32 v0, s9, 53 |
| +; GCN-NEXT: v_writelane_b32 v0, s10, 54 |
| +; GCN-NEXT: v_writelane_b32 v0, s11, 55 |
| +; GCN-NEXT: v_writelane_b32 v0, s12, 56 |
| +; GCN-NEXT: v_writelane_b32 v0, s13, 57 |
| +; GCN-NEXT: v_writelane_b32 v0, s14, 58 |
| +; GCN-NEXT: v_writelane_b32 v0, s15, 59 |
| +; GCN-NEXT: v_writelane_b32 v0, s16, 60 |
| +; GCN-NEXT: v_writelane_b32 v0, s17, 61 |
| +; GCN-NEXT: v_writelane_b32 v0, s18, 62 |
| +; GCN-NEXT: v_writelane_b32 v0, s19, 63 |
| +; GCN-NEXT: v_writelane_b32 v1, s20, 0 |
| +; GCN-NEXT: v_writelane_b32 v1, s21, 1 |
| +; GCN-NEXT: v_writelane_b32 v1, s22, 2 |
| +; GCN-NEXT: v_writelane_b32 v1, s23, 3 |
| +; GCN-NEXT: v_writelane_b32 v1, s24, 4 |
| +; GCN-NEXT: v_writelane_b32 v1, s25, 5 |
| +; GCN-NEXT: v_writelane_b32 v1, s26, 6 |
| +; GCN-NEXT: v_writelane_b32 v1, s27, 7 |
| +; GCN-NEXT: v_writelane_b32 v1, s0, 8 |
| +; GCN-NEXT: v_writelane_b32 v1, s1, 9 |
| ; GCN-NEXT: s_cbranch_scc1 BB1_2 |
| ; GCN-NEXT: ; %bb.1: ; %bb0 |
| -; GCN-NEXT: v_readlane_b32 s0, v0, 1 |
| -; GCN-NEXT: v_readlane_b32 s1, v0, 2 |
| -; GCN-NEXT: v_readlane_b32 s2, v0, 3 |
| -; GCN-NEXT: v_readlane_b32 s3, v0, 4 |
| -; GCN-NEXT: v_readlane_b32 s4, v0, 5 |
| -; GCN-NEXT: v_readlane_b32 s5, v0, 6 |
| -; GCN-NEXT: v_readlane_b32 s6, v0, 7 |
| -; GCN-NEXT: v_readlane_b32 s7, v0, 8 |
| -; GCN-NEXT: v_readlane_b32 s8, v0, 9 |
| -; GCN-NEXT: v_readlane_b32 s9, v0, 10 |
| -; GCN-NEXT: v_readlane_b32 s10, v0, 11 |
| -; GCN-NEXT: v_readlane_b32 s11, v0, 12 |
| -; GCN-NEXT: v_readlane_b32 s12, v0, 13 |
| -; GCN-NEXT: v_readlane_b32 s13, v0, 14 |
| -; GCN-NEXT: v_readlane_b32 s14, v0, 15 |
| -; GCN-NEXT: v_readlane_b32 s15, v0, 16 |
| +; GCN-NEXT: v_readlane_b32 s0, v0, 0 |
| +; GCN-NEXT: v_readlane_b32 s1, v0, 1 |
| +; GCN-NEXT: v_readlane_b32 s2, v0, 2 |
| +; GCN-NEXT: v_readlane_b32 s3, v0, 3 |
| +; GCN-NEXT: v_readlane_b32 s4, v0, 4 |
| +; GCN-NEXT: v_readlane_b32 s5, v0, 5 |
| +; GCN-NEXT: v_readlane_b32 s6, v0, 6 |
| +; GCN-NEXT: v_readlane_b32 s7, v0, 7 |
| +; GCN-NEXT: v_readlane_b32 s8, v0, 8 |
| +; GCN-NEXT: v_readlane_b32 s9, v0, 9 |
| +; GCN-NEXT: v_readlane_b32 s10, v0, 10 |
| +; GCN-NEXT: v_readlane_b32 s11, v0, 11 |
| +; GCN-NEXT: v_readlane_b32 s12, v0, 12 |
| +; GCN-NEXT: v_readlane_b32 s13, v0, 13 |
| +; GCN-NEXT: v_readlane_b32 s14, v0, 14 |
| +; GCN-NEXT: v_readlane_b32 s15, v0, 15 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: ; use s[0:15] |
| ; GCN-NEXT: ;;#ASMEND |
| -; GCN-NEXT: v_readlane_b32 s0, v0, 33 |
| -; GCN-NEXT: v_readlane_b32 s1, v0, 34 |
| -; GCN-NEXT: v_readlane_b32 s2, v0, 35 |
| -; GCN-NEXT: v_readlane_b32 s3, v0, 36 |
| -; GCN-NEXT: v_readlane_b32 s4, v0, 37 |
| -; GCN-NEXT: v_readlane_b32 s5, v0, 38 |
| -; GCN-NEXT: v_readlane_b32 s6, v0, 39 |
| -; GCN-NEXT: v_readlane_b32 s7, v0, 40 |
| -; GCN-NEXT: v_readlane_b32 s8, v0, 41 |
| -; GCN-NEXT: v_readlane_b32 s9, v0, 42 |
| -; GCN-NEXT: v_readlane_b32 s10, v0, 43 |
| -; GCN-NEXT: v_readlane_b32 s11, v0, 44 |
| -; GCN-NEXT: v_readlane_b32 s12, v0, 45 |
| -; GCN-NEXT: v_readlane_b32 s13, v0, 46 |
| -; GCN-NEXT: v_readlane_b32 s14, v0, 47 |
| -; GCN-NEXT: v_readlane_b32 s15, v0, 48 |
| +; GCN-NEXT: v_readlane_b32 s0, v0, 32 |
| +; GCN-NEXT: v_readlane_b32 s1, v0, 33 |
| +; GCN-NEXT: v_readlane_b32 s2, v0, 34 |
| +; GCN-NEXT: v_readlane_b32 s3, v0, 35 |
| +; GCN-NEXT: v_readlane_b32 s4, v0, 36 |
| +; GCN-NEXT: v_readlane_b32 s5, v0, 37 |
| +; GCN-NEXT: v_readlane_b32 s6, v0, 38 |
| +; GCN-NEXT: v_readlane_b32 s7, v0, 39 |
| +; GCN-NEXT: v_readlane_b32 s8, v0, 40 |
| +; GCN-NEXT: v_readlane_b32 s9, v0, 41 |
| +; GCN-NEXT: v_readlane_b32 s10, v0, 42 |
| +; GCN-NEXT: v_readlane_b32 s11, v0, 43 |
| +; GCN-NEXT: v_readlane_b32 s12, v0, 44 |
| +; GCN-NEXT: v_readlane_b32 s13, v0, 45 |
| +; GCN-NEXT: v_readlane_b32 s14, v0, 46 |
| +; GCN-NEXT: v_readlane_b32 s15, v0, 47 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: ; use s[0:15] |
| ; GCN-NEXT: ;;#ASMEND |
| -; GCN-NEXT: v_readlane_b32 s0, v0, 17 |
| -; GCN-NEXT: v_readlane_b32 s1, v0, 18 |
| -; GCN-NEXT: v_readlane_b32 s2, v0, 19 |
| -; GCN-NEXT: v_readlane_b32 s3, v0, 20 |
| -; GCN-NEXT: v_readlane_b32 s4, v0, 21 |
| -; GCN-NEXT: v_readlane_b32 s5, v0, 22 |
| -; GCN-NEXT: v_readlane_b32 s6, v0, 23 |
| -; GCN-NEXT: v_readlane_b32 s7, v0, 24 |
| -; GCN-NEXT: v_readlane_b32 s8, v0, 25 |
| -; GCN-NEXT: v_readlane_b32 s9, v0, 26 |
| -; GCN-NEXT: v_readlane_b32 s10, v0, 27 |
| -; GCN-NEXT: v_readlane_b32 s11, v0, 28 |
| -; GCN-NEXT: v_readlane_b32 s12, v0, 29 |
| -; GCN-NEXT: v_readlane_b32 s13, v0, 30 |
| -; GCN-NEXT: v_readlane_b32 s14, v0, 31 |
| -; GCN-NEXT: v_readlane_b32 s15, v0, 32 |
| +; GCN-NEXT: v_readlane_b32 s0, v0, 16 |
| +; GCN-NEXT: v_readlane_b32 s1, v0, 17 |
| +; GCN-NEXT: v_readlane_b32 s2, v0, 18 |
| +; GCN-NEXT: v_readlane_b32 s3, v0, 19 |
| +; GCN-NEXT: v_readlane_b32 s4, v0, 20 |
| +; GCN-NEXT: v_readlane_b32 s5, v0, 21 |
| +; GCN-NEXT: v_readlane_b32 s6, v0, 22 |
| +; GCN-NEXT: v_readlane_b32 s7, v0, 23 |
| +; GCN-NEXT: v_readlane_b32 s8, v0, 24 |
| +; GCN-NEXT: v_readlane_b32 s9, v0, 25 |
| +; GCN-NEXT: v_readlane_b32 s10, v0, 26 |
| +; GCN-NEXT: v_readlane_b32 s11, v0, 27 |
| +; GCN-NEXT: v_readlane_b32 s12, v0, 28 |
| +; GCN-NEXT: v_readlane_b32 s13, v0, 29 |
| +; GCN-NEXT: v_readlane_b32 s14, v0, 30 |
| +; GCN-NEXT: v_readlane_b32 s15, v0, 31 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: ; use s[0:15] |
| ; GCN-NEXT: ;;#ASMEND |
| -; GCN-NEXT: v_readlane_b32 s0, v1, 1 |
| -; GCN-NEXT: v_readlane_b32 s1, v1, 2 |
| -; GCN-NEXT: v_readlane_b32 s2, v1, 3 |
| -; GCN-NEXT: v_readlane_b32 s3, v1, 4 |
| -; GCN-NEXT: v_readlane_b32 s4, v1, 5 |
| -; GCN-NEXT: v_readlane_b32 s5, v1, 6 |
| -; GCN-NEXT: v_readlane_b32 s6, v1, 7 |
| -; GCN-NEXT: v_readlane_b32 s7, v1, 8 |
| -; GCN-NEXT: ;;#ASMSTART |
| -; GCN-NEXT: ; use s[0:7] |
| -; GCN-NEXT: ;;#ASMEND |
| -; GCN-NEXT: v_readlane_b32 s0, v1, 9 |
| -; GCN-NEXT: v_readlane_b32 s1, v1, 10 |
| -; GCN-NEXT: ;;#ASMSTART |
| -; GCN-NEXT: ; use s[0:1] |
| -; GCN-NEXT: ;;#ASMEND |
| -; GCN-NEXT: v_readlane_b32 s0, v0, 49 |
| -; GCN-NEXT: v_readlane_b32 s1, v0, 50 |
| -; GCN-NEXT: v_readlane_b32 s2, v0, 51 |
| -; GCN-NEXT: v_readlane_b32 s3, v0, 52 |
| -; GCN-NEXT: v_readlane_b32 s4, v0, 53 |
| -; GCN-NEXT: v_readlane_b32 s5, v0, 54 |
| -; GCN-NEXT: v_readlane_b32 s6, v0, 55 |
| -; GCN-NEXT: v_readlane_b32 s7, v0, 56 |
| -; GCN-NEXT: v_readlane_b32 s8, v0, 57 |
| -; GCN-NEXT: v_readlane_b32 s9, v0, 58 |
| -; GCN-NEXT: v_readlane_b32 s10, v0, 59 |
| -; GCN-NEXT: v_readlane_b32 s11, v0, 60 |
| -; GCN-NEXT: v_readlane_b32 s12, v0, 61 |
| -; GCN-NEXT: v_readlane_b32 s13, v0, 62 |
| -; GCN-NEXT: v_readlane_b32 s14, v0, 63 |
| -; GCN-NEXT: v_readlane_b32 s15, v1, 0 |
| +; GCN-NEXT: v_readlane_b32 s16, v1, 0 |
| +; GCN-NEXT: v_readlane_b32 s17, v1, 1 |
| +; GCN-NEXT: v_readlane_b32 s18, v1, 2 |
| +; GCN-NEXT: v_readlane_b32 s19, v1, 3 |
| +; GCN-NEXT: v_readlane_b32 s20, v1, 4 |
| +; GCN-NEXT: v_readlane_b32 s21, v1, 5 |
| +; GCN-NEXT: v_readlane_b32 s22, v1, 6 |
| +; GCN-NEXT: v_readlane_b32 s23, v1, 7 |
| +; GCN-NEXT: ;;#ASMSTART |
| +; GCN-NEXT: ; use s[16:23] |
| +; GCN-NEXT: ;;#ASMEND |
| +; GCN-NEXT: v_readlane_b32 s24, v1, 8 |
| +; GCN-NEXT: v_readlane_b32 s25, v1, 9 |
| +; GCN-NEXT: ;;#ASMSTART |
| +; GCN-NEXT: ; use s[24:25] |
| +; GCN-NEXT: ;;#ASMEND |
| +; GCN-NEXT: v_readlane_b32 s0, v0, 48 |
| +; GCN-NEXT: v_readlane_b32 s1, v0, 49 |
| +; GCN-NEXT: v_readlane_b32 s2, v0, 50 |
| +; GCN-NEXT: v_readlane_b32 s3, v0, 51 |
| +; GCN-NEXT: v_readlane_b32 s4, v0, 52 |
| +; GCN-NEXT: v_readlane_b32 s5, v0, 53 |
| +; GCN-NEXT: v_readlane_b32 s6, v0, 54 |
| +; GCN-NEXT: v_readlane_b32 s7, v0, 55 |
| +; GCN-NEXT: v_readlane_b32 s8, v0, 56 |
| +; GCN-NEXT: v_readlane_b32 s9, v0, 57 |
| +; GCN-NEXT: v_readlane_b32 s10, v0, 58 |
| +; GCN-NEXT: v_readlane_b32 s11, v0, 59 |
| +; GCN-NEXT: v_readlane_b32 s12, v0, 60 |
| +; GCN-NEXT: v_readlane_b32 s13, v0, 61 |
| +; GCN-NEXT: v_readlane_b32 s14, v0, 62 |
| +; GCN-NEXT: v_readlane_b32 s15, v0, 63 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: ; use s[0:15] |
| ; GCN-NEXT: ;;#ASMEND |
| @@ -667,13 +663,13 @@ ret: |
| define amdgpu_kernel void @no_vgprs_last_sgpr_spill(i32 addrspace(1)* %out, i32 %in) #1 { |
| ; GCN-LABEL: no_vgprs_last_sgpr_spill: |
| ; GCN: ; %bb.0: |
| -; GCN-NEXT: s_mov_b32 s56, SCRATCH_RSRC_DWORD0 |
| -; GCN-NEXT: s_mov_b32 s57, SCRATCH_RSRC_DWORD1 |
| -; GCN-NEXT: s_mov_b32 s58, -1 |
| -; GCN-NEXT: s_mov_b32 s59, 0xe8f000 |
| -; GCN-NEXT: s_add_u32 s56, s56, s3 |
| -; GCN-NEXT: s_addc_u32 s57, s57, 0 |
| -; GCN-NEXT: s_load_dword s0, s[0:1], 0xb |
| +; GCN-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0 |
| +; GCN-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1 |
| +; GCN-NEXT: s_mov_b32 s22, -1 |
| +; GCN-NEXT: s_mov_b32 s23, 0xe8f000 |
| +; GCN-NEXT: s_add_u32 s20, s20, s3 |
| +; GCN-NEXT: s_addc_u32 s21, s21, 0 |
| +; GCN-NEXT: s_load_dword s2, s[0:1], 0xb |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: ;;#ASMEND |
| ; GCN-NEXT: ;;#ASMSTART |
| @@ -692,179 +688,177 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill(i32 addrspace(1)* %out, i32 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: ; def s[36:51] |
| ; GCN-NEXT: ;;#ASMEND |
| +; GCN-NEXT: v_writelane_b32 v31, s4, 0 |
| +; GCN-NEXT: v_writelane_b32 v31, s5, 1 |
| +; GCN-NEXT: v_writelane_b32 v31, s6, 2 |
| +; GCN-NEXT: v_writelane_b32 v31, s7, 3 |
| +; GCN-NEXT: v_writelane_b32 v31, s8, 4 |
| +; GCN-NEXT: v_writelane_b32 v31, s9, 5 |
| +; GCN-NEXT: v_writelane_b32 v31, s10, 6 |
| +; GCN-NEXT: v_writelane_b32 v31, s11, 7 |
| +; GCN-NEXT: v_writelane_b32 v31, s12, 8 |
| +; GCN-NEXT: v_writelane_b32 v31, s13, 9 |
| +; GCN-NEXT: v_writelane_b32 v31, s14, 10 |
| +; GCN-NEXT: v_writelane_b32 v31, s15, 11 |
| +; GCN-NEXT: v_writelane_b32 v31, s16, 12 |
| +; GCN-NEXT: v_writelane_b32 v31, s17, 13 |
| +; GCN-NEXT: v_writelane_b32 v31, s18, 14 |
| +; GCN-NEXT: v_writelane_b32 v31, s19, 15 |
| +; GCN-NEXT: ;;#ASMSTART |
| +; GCN-NEXT: ; def s[4:19] |
| +; GCN-NEXT: ;;#ASMEND |
| +; GCN-NEXT: v_writelane_b32 v31, s4, 16 |
| +; GCN-NEXT: v_writelane_b32 v31, s5, 17 |
| +; GCN-NEXT: v_writelane_b32 v31, s6, 18 |
| +; GCN-NEXT: v_writelane_b32 v31, s7, 19 |
| +; GCN-NEXT: v_writelane_b32 v31, s8, 20 |
| +; GCN-NEXT: v_writelane_b32 v31, s9, 21 |
| +; GCN-NEXT: v_writelane_b32 v31, s10, 22 |
| +; GCN-NEXT: v_writelane_b32 v31, s11, 23 |
| +; GCN-NEXT: v_writelane_b32 v31, s12, 24 |
| +; GCN-NEXT: v_writelane_b32 v31, s13, 25 |
| +; GCN-NEXT: v_writelane_b32 v31, s14, 26 |
| +; GCN-NEXT: v_writelane_b32 v31, s15, 27 |
| +; GCN-NEXT: v_writelane_b32 v31, s16, 28 |
| +; GCN-NEXT: v_writelane_b32 v31, s17, 29 |
| +; GCN-NEXT: v_writelane_b32 v31, s18, 30 |
| +; GCN-NEXT: v_writelane_b32 v31, s19, 31 |
| +; GCN-NEXT: ;;#ASMSTART |
| +; GCN-NEXT: ; def s[4:19] |
| +; GCN-NEXT: ;;#ASMEND |
| +; GCN-NEXT: ;;#ASMSTART |
| +; GCN-NEXT: ; def s[0:1] |
| +; GCN-NEXT: ;;#ASMEND |
| +; GCN-NEXT: s_mov_b32 s3, 0 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| -; GCN-NEXT: v_writelane_b32 v31, s0, 0 |
| -; GCN-NEXT: v_writelane_b32 v31, s4, 1 |
| -; GCN-NEXT: v_writelane_b32 v31, s5, 2 |
| -; GCN-NEXT: v_writelane_b32 v31, s6, 3 |
| -; GCN-NEXT: v_writelane_b32 v31, s7, 4 |
| -; GCN-NEXT: v_writelane_b32 v31, s8, 5 |
| -; GCN-NEXT: v_writelane_b32 v31, s9, 6 |
| -; GCN-NEXT: v_writelane_b32 v31, s10, 7 |
| -; GCN-NEXT: v_writelane_b32 v31, s11, 8 |
| -; GCN-NEXT: v_writelane_b32 v31, s12, 9 |
| -; GCN-NEXT: v_writelane_b32 v31, s13, 10 |
| -; GCN-NEXT: v_writelane_b32 v31, s14, 11 |
| -; GCN-NEXT: v_writelane_b32 v31, s15, 12 |
| -; GCN-NEXT: v_writelane_b32 v31, s16, 13 |
| -; GCN-NEXT: v_writelane_b32 v31, s17, 14 |
| -; GCN-NEXT: v_writelane_b32 v31, s18, 15 |
| -; GCN-NEXT: v_writelane_b32 v31, s19, 16 |
| -; GCN-NEXT: ;;#ASMSTART |
| -; GCN-NEXT: ; def s[0:15] |
| -; GCN-NEXT: ;;#ASMEND |
| -; GCN-NEXT: ;;#ASMSTART |
| -; GCN-NEXT: ; def s[16:31] |
| -; GCN-NEXT: ;;#ASMEND |
| -; GCN-NEXT: ;;#ASMSTART |
| -; GCN-NEXT: ; def s[34:35] |
| -; GCN-NEXT: ;;#ASMEND |
| -; GCN-NEXT: s_mov_b32 s33, 0 |
| -; GCN-NEXT: v_readlane_b32 s52, v31, 0 |
| -; GCN-NEXT: s_cmp_lg_u32 s52, s33 |
| -; GCN-NEXT: v_writelane_b32 v31, s36, 17 |
| -; GCN-NEXT: v_writelane_b32 v31, s37, 18 |
| -; GCN-NEXT: v_writelane_b32 v31, s38, 19 |
| -; GCN-NEXT: v_writelane_b32 v31, s39, 20 |
| -; GCN-NEXT: v_writelane_b32 v31, s40, 21 |
| -; GCN-NEXT: v_writelane_b32 v31, s41, 22 |
| -; GCN-NEXT: v_writelane_b32 v31, s42, 23 |
| -; GCN-NEXT: v_writelane_b32 v31, s43, 24 |
| -; GCN-NEXT: v_writelane_b32 v31, s44, 25 |
| -; GCN-NEXT: v_writelane_b32 v31, s45, 26 |
| -; GCN-NEXT: v_writelane_b32 v31, s46, 27 |
| -; GCN-NEXT: v_writelane_b32 v31, s47, 28 |
| -; GCN-NEXT: v_writelane_b32 v31, s48, 29 |
| -; GCN-NEXT: v_writelane_b32 v31, s49, 30 |
| -; GCN-NEXT: v_writelane_b32 v31, s50, 31 |
| -; GCN-NEXT: v_writelane_b32 v31, s51, 32 |
| -; GCN-NEXT: v_writelane_b32 v31, s0, 33 |
| -; GCN-NEXT: v_writelane_b32 v31, s1, 34 |
| -; GCN-NEXT: v_writelane_b32 v31, s2, 35 |
| -; GCN-NEXT: v_writelane_b32 v31, s3, 36 |
| -; GCN-NEXT: v_writelane_b32 v31, s4, 37 |
| -; GCN-NEXT: v_writelane_b32 v31, s5, 38 |
| -; GCN-NEXT: v_writelane_b32 v31, s6, 39 |
| -; GCN-NEXT: v_writelane_b32 v31, s7, 40 |
| -; GCN-NEXT: v_writelane_b32 v31, s8, 41 |
| -; GCN-NEXT: v_writelane_b32 v31, s9, 42 |
| -; GCN-NEXT: v_writelane_b32 v31, s10, 43 |
| -; GCN-NEXT: v_writelane_b32 v31, s11, 44 |
| -; GCN-NEXT: v_writelane_b32 v31, s12, 45 |
| -; GCN-NEXT: v_writelane_b32 v31, s13, 46 |
| -; GCN-NEXT: v_writelane_b32 v31, s14, 47 |
| -; GCN-NEXT: v_writelane_b32 v31, s15, 48 |
| -; GCN-NEXT: buffer_store_dword v0, off, s[56:59], 0 |
| -; GCN-NEXT: v_writelane_b32 v0, s16, 0 |
| -; GCN-NEXT: v_writelane_b32 v0, s17, 1 |
| -; GCN-NEXT: v_writelane_b32 v0, s18, 2 |
| -; GCN-NEXT: v_writelane_b32 v0, s19, 3 |
| -; GCN-NEXT: v_writelane_b32 v0, s20, 4 |
| -; GCN-NEXT: v_writelane_b32 v0, s21, 5 |
| -; GCN-NEXT: v_writelane_b32 v0, s22, 6 |
| -; GCN-NEXT: v_writelane_b32 v0, s23, 7 |
| -; GCN-NEXT: v_writelane_b32 v0, s24, 8 |
| -; GCN-NEXT: v_writelane_b32 v0, s25, 9 |
| -; GCN-NEXT: v_writelane_b32 v0, s26, 10 |
| -; GCN-NEXT: v_writelane_b32 v0, s27, 11 |
| -; GCN-NEXT: v_writelane_b32 v0, s28, 12 |
| -; GCN-NEXT: v_writelane_b32 v0, s29, 13 |
| -; GCN-NEXT: v_writelane_b32 v0, s30, 14 |
| -; GCN-NEXT: v_writelane_b32 v0, s31, 15 |
| -; GCN-NEXT: s_mov_b64 s[16:17], exec |
| -; GCN-NEXT: s_mov_b64 exec, 0xffff |
| -; GCN-NEXT: buffer_store_dword v0, off, s[56:59], 0 offset:4 ; 4-byte Folded Spill |
| -; GCN-NEXT: s_mov_b64 exec, s[16:17] |
| -; GCN-NEXT: v_writelane_b32 v31, s34, 49 |
| -; GCN-NEXT: v_writelane_b32 v31, s35, 50 |
| -; GCN-NEXT: buffer_load_dword v0, off, s[56:59], 0 |
| +; GCN-NEXT: s_cmp_lg_u32 s2, s3 |
| +; GCN-NEXT: v_writelane_b32 v31, s36, 32 |
| +; GCN-NEXT: v_writelane_b32 v31, s37, 33 |
| +; GCN-NEXT: v_writelane_b32 v31, s38, 34 |
| +; GCN-NEXT: v_writelane_b32 v31, s39, 35 |
| +; GCN-NEXT: v_writelane_b32 v31, s40, 36 |
| +; GCN-NEXT: v_writelane_b32 v31, s41, 37 |
| +; GCN-NEXT: v_writelane_b32 v31, s42, 38 |
| +; GCN-NEXT: v_writelane_b32 v31, s43, 39 |
| +; GCN-NEXT: v_writelane_b32 v31, s44, 40 |
| +; GCN-NEXT: v_writelane_b32 v31, s45, 41 |
| +; GCN-NEXT: v_writelane_b32 v31, s46, 42 |
| +; GCN-NEXT: v_writelane_b32 v31, s47, 43 |
| +; GCN-NEXT: v_writelane_b32 v31, s48, 44 |
| +; GCN-NEXT: v_writelane_b32 v31, s49, 45 |
| +; GCN-NEXT: v_writelane_b32 v31, s50, 46 |
| +; GCN-NEXT: v_writelane_b32 v31, s51, 47 |
| +; GCN-NEXT: v_writelane_b32 v31, s4, 48 |
| +; GCN-NEXT: v_writelane_b32 v31, s5, 49 |
| +; GCN-NEXT: v_writelane_b32 v31, s6, 50 |
| +; GCN-NEXT: v_writelane_b32 v31, s7, 51 |
| +; GCN-NEXT: v_writelane_b32 v31, s8, 52 |
| +; GCN-NEXT: v_writelane_b32 v31, s9, 53 |
| +; GCN-NEXT: v_writelane_b32 v31, s10, 54 |
| +; GCN-NEXT: v_writelane_b32 v31, s11, 55 |
| +; GCN-NEXT: v_writelane_b32 v31, s12, 56 |
| +; GCN-NEXT: v_writelane_b32 v31, s13, 57 |
| +; GCN-NEXT: v_writelane_b32 v31, s14, 58 |
| +; GCN-NEXT: v_writelane_b32 v31, s15, 59 |
| +; GCN-NEXT: v_writelane_b32 v31, s16, 60 |
| +; GCN-NEXT: v_writelane_b32 v31, s17, 61 |
| +; GCN-NEXT: v_writelane_b32 v31, s18, 62 |
| +; GCN-NEXT: v_writelane_b32 v31, s19, 63 |
| +; GCN-NEXT: buffer_store_dword v0, off, s[20:23], 0 |
| +; GCN-NEXT: v_writelane_b32 v0, s0, 0 |
| +; GCN-NEXT: v_writelane_b32 v0, s1, 1 |
| +; GCN-NEXT: s_mov_b64 s[0:1], exec |
| +; GCN-NEXT: s_mov_b64 exec, 3 |
| +; GCN-NEXT: buffer_store_dword v0, off, s[20:23], 0 offset:4 ; 4-byte Folded Spill |
| +; GCN-NEXT: s_mov_b64 exec, s[0:1] |
| +; GCN-NEXT: buffer_load_dword v0, off, s[20:23], 0 |
| ; GCN-NEXT: s_cbranch_scc1 BB2_2 |
| ; GCN-NEXT: ; %bb.1: ; %bb0 |
| -; GCN-NEXT: v_readlane_b32 s0, v31, 1 |
| -; GCN-NEXT: v_readlane_b32 s1, v31, 2 |
| -; GCN-NEXT: v_readlane_b32 s2, v31, 3 |
| -; GCN-NEXT: v_readlane_b32 s3, v31, 4 |
| -; GCN-NEXT: v_readlane_b32 s4, v31, 5 |
| -; GCN-NEXT: v_readlane_b32 s5, v31, 6 |
| -; GCN-NEXT: v_readlane_b32 s6, v31, 7 |
| -; GCN-NEXT: v_readlane_b32 s7, v31, 8 |
| -; GCN-NEXT: v_readlane_b32 s8, v31, 9 |
| -; GCN-NEXT: v_readlane_b32 s9, v31, 10 |
| -; GCN-NEXT: v_readlane_b32 s10, v31, 11 |
| -; GCN-NEXT: v_readlane_b32 s11, v31, 12 |
| -; GCN-NEXT: v_readlane_b32 s12, v31, 13 |
| -; GCN-NEXT: v_readlane_b32 s13, v31, 14 |
| -; GCN-NEXT: v_readlane_b32 s14, v31, 15 |
| -; GCN-NEXT: v_readlane_b32 s15, v31, 16 |
| +; GCN-NEXT: v_readlane_b32 s0, v31, 0 |
| +; GCN-NEXT: v_readlane_b32 s1, v31, 1 |
| +; GCN-NEXT: v_readlane_b32 s2, v31, 2 |
| +; GCN-NEXT: v_readlane_b32 s3, v31, 3 |
| +; GCN-NEXT: v_readlane_b32 s4, v31, 4 |
| +; GCN-NEXT: v_readlane_b32 s5, v31, 5 |
| +; GCN-NEXT: v_readlane_b32 s6, v31, 6 |
| +; GCN-NEXT: v_readlane_b32 s7, v31, 7 |
| +; GCN-NEXT: v_readlane_b32 s8, v31, 8 |
| +; GCN-NEXT: v_readlane_b32 s9, v31, 9 |
| +; GCN-NEXT: v_readlane_b32 s10, v31, 10 |
| +; GCN-NEXT: v_readlane_b32 s11, v31, 11 |
| +; GCN-NEXT: v_readlane_b32 s12, v31, 12 |
| +; GCN-NEXT: v_readlane_b32 s13, v31, 13 |
| +; GCN-NEXT: v_readlane_b32 s14, v31, 14 |
| +; GCN-NEXT: v_readlane_b32 s15, v31, 15 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: ; use s[0:15] |
| ; GCN-NEXT: ;;#ASMEND |
| -; GCN-NEXT: v_readlane_b32 s0, v31, 17 |
| -; GCN-NEXT: v_readlane_b32 s1, v31, 18 |
| -; GCN-NEXT: v_readlane_b32 s2, v31, 19 |
| -; GCN-NEXT: v_readlane_b32 s3, v31, 20 |
| -; GCN-NEXT: v_readlane_b32 s4, v31, 21 |
| -; GCN-NEXT: v_readlane_b32 s5, v31, 22 |
| -; GCN-NEXT: v_readlane_b32 s6, v31, 23 |
| -; GCN-NEXT: v_readlane_b32 s7, v31, 24 |
| -; GCN-NEXT: v_readlane_b32 s8, v31, 25 |
| -; GCN-NEXT: v_readlane_b32 s9, v31, 26 |
| -; GCN-NEXT: v_readlane_b32 s10, v31, 27 |
| -; GCN-NEXT: v_readlane_b32 s11, v31, 28 |
| -; GCN-NEXT: v_readlane_b32 s12, v31, 29 |
| -; GCN-NEXT: v_readlane_b32 s13, v31, 30 |
| -; GCN-NEXT: v_readlane_b32 s14, v31, 31 |
| -; GCN-NEXT: v_readlane_b32 s15, v31, 32 |
| +; GCN-NEXT: v_readlane_b32 s0, v31, 32 |
| +; GCN-NEXT: v_readlane_b32 s1, v31, 33 |
| +; GCN-NEXT: v_readlane_b32 s2, v31, 34 |
| +; GCN-NEXT: v_readlane_b32 s3, v31, 35 |
| +; GCN-NEXT: v_readlane_b32 s4, v31, 36 |
| +; GCN-NEXT: v_readlane_b32 s5, v31, 37 |
| +; GCN-NEXT: v_readlane_b32 s6, v31, 38 |
| +; GCN-NEXT: v_readlane_b32 s7, v31, 39 |
| +; GCN-NEXT: v_readlane_b32 s8, v31, 40 |
| +; GCN-NEXT: v_readlane_b32 s9, v31, 41 |
| +; GCN-NEXT: v_readlane_b32 s10, v31, 42 |
| +; GCN-NEXT: v_readlane_b32 s11, v31, 43 |
| +; GCN-NEXT: v_readlane_b32 s12, v31, 44 |
| +; GCN-NEXT: v_readlane_b32 s13, v31, 45 |
| +; GCN-NEXT: v_readlane_b32 s14, v31, 46 |
| +; GCN-NEXT: v_readlane_b32 s15, v31, 47 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: ; use s[0:15] |
| ; GCN-NEXT: ;;#ASMEND |
| -; GCN-NEXT: v_readlane_b32 s0, v31, 33 |
| -; GCN-NEXT: v_readlane_b32 s1, v31, 34 |
| -; GCN-NEXT: v_readlane_b32 s2, v31, 35 |
| -; GCN-NEXT: v_readlane_b32 s3, v31, 36 |
| -; GCN-NEXT: v_readlane_b32 s4, v31, 37 |
| -; GCN-NEXT: v_readlane_b32 s5, v31, 38 |
| -; GCN-NEXT: v_readlane_b32 s6, v31, 39 |
| -; GCN-NEXT: v_readlane_b32 s7, v31, 40 |
| -; GCN-NEXT: v_readlane_b32 s8, v31, 41 |
| -; GCN-NEXT: v_readlane_b32 s9, v31, 42 |
| -; GCN-NEXT: v_readlane_b32 s10, v31, 43 |
| -; GCN-NEXT: v_readlane_b32 s11, v31, 44 |
| -; GCN-NEXT: v_readlane_b32 s12, v31, 45 |
| -; GCN-NEXT: v_readlane_b32 s13, v31, 46 |
| -; GCN-NEXT: v_readlane_b32 s14, v31, 47 |
| -; GCN-NEXT: v_readlane_b32 s15, v31, 48 |
| +; GCN-NEXT: v_readlane_b32 s0, v31, 16 |
| +; GCN-NEXT: v_readlane_b32 s1, v31, 17 |
| +; GCN-NEXT: v_readlane_b32 s2, v31, 18 |
| +; GCN-NEXT: v_readlane_b32 s3, v31, 19 |
| +; GCN-NEXT: v_readlane_b32 s4, v31, 20 |
| +; GCN-NEXT: v_readlane_b32 s5, v31, 21 |
| +; GCN-NEXT: v_readlane_b32 s6, v31, 22 |
| +; GCN-NEXT: v_readlane_b32 s7, v31, 23 |
| +; GCN-NEXT: v_readlane_b32 s8, v31, 24 |
| +; GCN-NEXT: v_readlane_b32 s9, v31, 25 |
| +; GCN-NEXT: v_readlane_b32 s10, v31, 26 |
| +; GCN-NEXT: v_readlane_b32 s11, v31, 27 |
| +; GCN-NEXT: v_readlane_b32 s12, v31, 28 |
| +; GCN-NEXT: v_readlane_b32 s13, v31, 29 |
| +; GCN-NEXT: v_readlane_b32 s14, v31, 30 |
| +; GCN-NEXT: v_readlane_b32 s15, v31, 31 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: ; use s[0:15] |
| ; GCN-NEXT: ;;#ASMEND |
| -; GCN-NEXT: s_mov_b64 s[0:1], exec |
| -; GCN-NEXT: s_mov_b64 exec, 0xffff |
| -; GCN-NEXT: buffer_load_dword v0, off, s[56:59], 0 offset:4 ; 4-byte Folded Reload |
| -; GCN-NEXT: s_mov_b64 exec, s[0:1] |
| -; GCN-NEXT: s_waitcnt vmcnt(0) |
| -; GCN-NEXT: v_readlane_b32 s0, v0, 0 |
| -; GCN-NEXT: v_readlane_b32 s1, v0, 1 |
| -; GCN-NEXT: v_readlane_b32 s2, v0, 2 |
| -; GCN-NEXT: v_readlane_b32 s3, v0, 3 |
| -; GCN-NEXT: v_readlane_b32 s4, v0, 4 |
| -; GCN-NEXT: v_readlane_b32 s5, v0, 5 |
| -; GCN-NEXT: v_readlane_b32 s6, v0, 6 |
| -; GCN-NEXT: v_readlane_b32 s7, v0, 7 |
| -; GCN-NEXT: v_readlane_b32 s8, v0, 8 |
| -; GCN-NEXT: v_readlane_b32 s9, v0, 9 |
| -; GCN-NEXT: v_readlane_b32 s10, v0, 10 |
| -; GCN-NEXT: v_readlane_b32 s11, v0, 11 |
| -; GCN-NEXT: v_readlane_b32 s12, v0, 12 |
| -; GCN-NEXT: v_readlane_b32 s13, v0, 13 |
| -; GCN-NEXT: v_readlane_b32 s14, v0, 14 |
| -; GCN-NEXT: v_readlane_b32 s15, v0, 15 |
| +; GCN-NEXT: v_readlane_b32 s0, v31, 48 |
| +; GCN-NEXT: v_readlane_b32 s1, v31, 49 |
| +; GCN-NEXT: v_readlane_b32 s2, v31, 50 |
| +; GCN-NEXT: v_readlane_b32 s3, v31, 51 |
| +; GCN-NEXT: v_readlane_b32 s4, v31, 52 |
| +; GCN-NEXT: v_readlane_b32 s5, v31, 53 |
| +; GCN-NEXT: v_readlane_b32 s6, v31, 54 |
| +; GCN-NEXT: v_readlane_b32 s7, v31, 55 |
| +; GCN-NEXT: v_readlane_b32 s8, v31, 56 |
| +; GCN-NEXT: v_readlane_b32 s9, v31, 57 |
| +; GCN-NEXT: v_readlane_b32 s10, v31, 58 |
| +; GCN-NEXT: v_readlane_b32 s11, v31, 59 |
| +; GCN-NEXT: v_readlane_b32 s12, v31, 60 |
| +; GCN-NEXT: v_readlane_b32 s13, v31, 61 |
| +; GCN-NEXT: v_readlane_b32 s14, v31, 62 |
| +; GCN-NEXT: v_readlane_b32 s15, v31, 63 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: ; use s[0:15] |
| ; GCN-NEXT: ;;#ASMEND |
| -; GCN-NEXT: v_readlane_b32 s0, v31, 49 |
| -; GCN-NEXT: v_readlane_b32 s1, v31, 50 |
| +; GCN-NEXT: s_mov_b64 s[16:17], exec |
| +; GCN-NEXT: s_mov_b64 exec, 3 |
| +; GCN-NEXT: buffer_load_dword v0, off, s[20:23], 0 offset:4 ; 4-byte Folded Reload |
| +; GCN-NEXT: s_mov_b64 exec, s[16:17] |
| +; GCN-NEXT: s_waitcnt vmcnt(0) |
| +; GCN-NEXT: v_readlane_b32 s16, v0, 0 |
| +; GCN-NEXT: v_readlane_b32 s17, v0, 1 |
| ; GCN-NEXT: ;;#ASMSTART |
| -; GCN-NEXT: ; use s[0:1] |
| +; GCN-NEXT: ; use s[16:17] |
| ; GCN-NEXT: ;;#ASMEND |
| ; GCN-NEXT: BB2_2: ; %ret |
| ; GCN-NEXT: s_endpgm |
| diff --git a/llvm/test/CodeGen/AMDGPU/spill-m0.ll b/llvm/test/CodeGen/AMDGPU/spill-m0.ll |
| index 9b629a5f911..a03318ead71 100644 |
| --- a/llvm/test/CodeGen/AMDGPU/spill-m0.ll |
| +++ b/llvm/test/CodeGen/AMDGPU/spill-m0.ll |
| @@ -77,101 +77,6 @@ endif: ; preds = %else, %if |
| ret void |
| } |
| |
| -; Force save and restore of m0 during SMEM spill |
| -; GCN-LABEL: {{^}}m0_unavailable_spill: |
| - |
| -; GCN: ; def m0, 1 |
| - |
| -; GCN: s_mov_b32 m0, s0 |
| -; GCN: v_interp_mov_f32 |
| - |
| -; GCN: ; clobber m0 |
| - |
| -; TOSMEM: s_mov_b32 s2, m0 |
| -; TOSMEM: s_add_u32 m0, s3, 0x100 |
| -; TOSMEM-NEXT: s_buffer_store_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, m0 ; 8-byte Folded Spill |
| -; TOSMEM: s_mov_b32 m0, s2 |
| - |
| -; TOSMEM: s_mov_b64 exec, |
| -; TOSMEM: s_cbranch_execz |
| -; TOSMEM: s_branch |
| - |
| -; TOSMEM: BB{{[0-9]+_[0-9]+}}: |
| -; TOSMEM: s_add_u32 m0, s3, 0x100 |
| -; TOSMEM-NEXT: s_buffer_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, m0 ; 8-byte Folded Reload |
| - |
| -; GCN-NOT: v_readlane_b32 m0 |
| -; GCN-NOT: s_buffer_store_dword m0 |
| -; GCN-NOT: s_buffer_load_dword m0 |
| -define amdgpu_kernel void @m0_unavailable_spill(i32 %m0.arg) #0 { |
| -main_body: |
| - %m0 = call i32 asm sideeffect "; def $0, 1", "={m0}"() #0 |
| - %tmp = call float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %m0.arg) |
| - call void asm sideeffect "; clobber $0", "~{m0}"() #0 |
| - %cmp = fcmp ueq float 0.000000e+00, %tmp |
| - br i1 %cmp, label %if, label %else |
| - |
| -if: ; preds = %main_body |
| - store volatile i32 8, i32 addrspace(1)* undef |
| - br label %endif |
| - |
| -else: ; preds = %main_body |
| - store volatile i32 11, i32 addrspace(1)* undef |
| - br label %endif |
| - |
| -endif: |
| - ret void |
| -} |
| - |
| -; GCN-LABEL: {{^}}restore_m0_lds: |
| -; TOSMEM: s_load_dwordx2 [[REG:s\[[0-9]+:[0-9]+\]]] |
| -; TOSMEM: s_cmp_eq_u32 |
| -; FIXME: RegScavenger::isRegUsed() always returns true if m0 is reserved, so we have to save and restore it |
| -; FIXME-TOSMEM-NOT: m0 |
| -; TOSMEM: s_add_u32 m0, s3, 0x100 |
| -; TOSMEM: s_buffer_store_dword s{{[0-9]+}}, s[88:91], m0 ; 4-byte Folded Spill |
| -; FIXME-TOSMEM-NOT: m0 |
| -; TOSMEM: s_add_u32 m0, s3, 0x200 |
| -; TOSMEM: s_buffer_store_dwordx2 [[REG]], s[88:91], m0 ; 8-byte Folded Spill |
| -; FIXME-TOSMEM-NOT: m0 |
| -; TOSMEM: s_cbranch_scc1 |
| - |
| -; TOSMEM: s_mov_b32 m0, -1 |
| - |
| -; TOSMEM: s_mov_b32 s2, m0 |
| -; TOSMEM: s_add_u32 m0, s3, 0x200 |
| -; TOSMEM: s_buffer_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[88:91], m0 ; 8-byte Folded Reload |
| -; TOSMEM: s_mov_b32 m0, s2 |
| -; TOSMEM: s_waitcnt lgkmcnt(0) |
| - |
| -; TOSMEM: ds_write_b64 |
| - |
| -; FIXME-TOSMEM-NOT: m0 |
| -; TOSMEM: s_add_u32 m0, s3, 0x100 |
| -; TOSMEM: s_buffer_load_dword s2, s[88:91], m0 ; 4-byte Folded Reload |
| -; FIXME-TOSMEM-NOT: m0 |
| -; TOSMEM: s_waitcnt lgkmcnt(0) |
| -; TOSMEM-NOT: m0 |
| -; TOSMEM: s_mov_b32 m0, s2 |
| -; TOSMEM: ; use m0 |
| - |
| -; TOSMEM: s_dcache_wb |
| -; TOSMEM: s_endpgm |
| -define amdgpu_kernel void @restore_m0_lds(i32 %arg) { |
| - %m0 = call i32 asm sideeffect "s_mov_b32 m0, 0", "={m0}"() #0 |
| - %sval = load volatile i64, i64 addrspace(4)* undef |
| - %cmp = icmp eq i32 %arg, 0 |
| - br i1 %cmp, label %ret, label %bb |
| - |
| -bb: |
| - store volatile i64 %sval, i64 addrspace(3)* undef |
| - call void asm sideeffect "; use $0", "{m0}"(i32 %m0) #0 |
| - br label %ret |
| - |
| -ret: |
| - ret void |
| -} |
| - |
| declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #1 |
| declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0 |
| declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0 |
| diff --git a/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll b/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll |
| index 1a48e76a241..e4beac77e1b 100644 |
| --- a/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll |
| +++ b/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll |
| @@ -94,10 +94,10 @@ define i32 @called(i32 %a) noinline { |
| |
| ; GFX9-LABEL: {{^}}call: |
| define amdgpu_kernel void @call(<4 x i32> inreg %tmp14, i32 inreg %arg) { |
| -; GFX9-O0: v_mov_b32_e32 v0, s0 |
| +; GFX9-O0: v_mov_b32_e32 v0, s2 |
| ; GFX9-O3: v_mov_b32_e32 v2, s0 |
| ; GFX9-NEXT: s_not_b64 exec, exec |
| -; GFX9-O0-NEXT: v_mov_b32_e32 v0, s1 |
| +; GFX9-O0-NEXT: v_mov_b32_e32 v0, s3 |
| ; GFX9-O3-NEXT: v_mov_b32_e32 v2, 0 |
| ; GFX9-NEXT: s_not_b64 exec, exec |
| %tmp107 = tail call i32 @llvm.amdgcn.set.inactive.i32(i32 %arg, i32 0) |
| @@ -142,8 +142,8 @@ define amdgpu_kernel void @call_i64(<4 x i32> inreg %tmp14, i64 inreg %arg) { |
| ; GFX9-O0: buffer_store_dword v1 |
| ; GFX9: s_swappc_b64 |
| %tmp134 = call i64 @called_i64(i64 %tmp107) |
| -; GFX9-O0: buffer_load_dword v4 |
| -; GFX9-O0: buffer_load_dword v5 |
| +; GFX9-O0: buffer_load_dword v6 |
| +; GFX9-O0: buffer_load_dword v7 |
| %tmp136 = add i64 %tmp134, %tmp107 |
| %tmp137 = tail call i64 @llvm.amdgcn.wwm.i64(i64 %tmp136) |
| %tmp138 = bitcast i64 %tmp137 to <2 x i32> |
| diff --git a/llvm/test/CodeGen/ARM/legalize-bitcast.ll b/llvm/test/CodeGen/ARM/legalize-bitcast.ll |
| index 529775df5fd..478ff985bf4 100644 |
| --- a/llvm/test/CodeGen/ARM/legalize-bitcast.ll |
| +++ b/llvm/test/CodeGen/ARM/legalize-bitcast.ll |
| @@ -49,9 +49,9 @@ define i16 @int_to_vec(i80 %in) { |
| ; CHECK-NEXT: vmov.32 d16[0], r0 |
| ; CHECK-NEXT: @ implicit-def: $q9 |
| ; CHECK-NEXT: vmov.f64 d18, d16 |
| -; CHECK-NEXT: vrev32.16 q8, q9 |
| -; CHECK-NEXT: @ kill: def $d16 killed $d16 killed $q8 |
| -; CHECK-NEXT: vmov.u16 r0, d16[0] |
| +; CHECK-NEXT: vrev32.16 q9, q9 |
| +; CHECK-NEXT: @ kill: def $d18 killed $d18 killed $q9 |
| +; CHECK-NEXT: vmov.u16 r0, d18[0] |
| ; CHECK-NEXT: bx lr |
| %vec = bitcast i80 %in to <5 x i16> |
| %e0 = extractelement <5 x i16> %vec, i32 0 |
| diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fptosi_and_fptoui.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fptosi_and_fptoui.ll |
| index a98c6eb9fd6..c63f24ea692 100644 |
| --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fptosi_and_fptoui.ll |
| +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fptosi_and_fptoui.ll |
| @@ -235,15 +235,15 @@ define i32 @f64tou32(double %a) { |
| ; FP32-NEXT: mfc1 $1, $f0 |
| ; FP32-NEXT: lui $2, 16864 |
| ; FP32-NEXT: ori $3, $zero, 0 |
| -; FP32-NEXT: mtc1 $3, $f0 |
| -; FP32-NEXT: mtc1 $2, $f1 |
| -; FP32-NEXT: sub.d $f2, $f12, $f0 |
| -; FP32-NEXT: trunc.w.d $f2, $f2 |
| -; FP32-NEXT: mfc1 $2, $f2 |
| +; FP32-NEXT: mtc1 $3, $f2 |
| +; FP32-NEXT: mtc1 $2, $f3 |
| +; FP32-NEXT: sub.d $f4, $f12, $f2 |
| +; FP32-NEXT: trunc.w.d $f0, $f4 |
| +; FP32-NEXT: mfc1 $2, $f0 |
| ; FP32-NEXT: lui $3, 32768 |
| ; FP32-NEXT: xor $2, $2, $3 |
| ; FP32-NEXT: addiu $3, $zero, 1 |
| -; FP32-NEXT: c.ult.d $f12, $f0 |
| +; FP32-NEXT: c.ult.d $f12, $f2 |
| ; FP32-NEXT: movf $3, $zero, $fcc0 |
| ; FP32-NEXT: andi $3, $3, 1 |
| ; FP32-NEXT: movn $2, $1, $3 |
| @@ -256,15 +256,15 @@ define i32 @f64tou32(double %a) { |
| ; FP64-NEXT: mfc1 $1, $f0 |
| ; FP64-NEXT: lui $2, 16864 |
| ; FP64-NEXT: ori $3, $zero, 0 |
| -; FP64-NEXT: mtc1 $3, $f0 |
| -; FP64-NEXT: mthc1 $2, $f0 |
| -; FP64-NEXT: sub.d $f1, $f12, $f0 |
| -; FP64-NEXT: trunc.w.d $f1, $f1 |
| -; FP64-NEXT: mfc1 $2, $f1 |
| +; FP64-NEXT: mtc1 $3, $f1 |
| +; FP64-NEXT: mthc1 $2, $f1 |
| +; FP64-NEXT: sub.d $f2, $f12, $f1 |
| +; FP64-NEXT: trunc.w.d $f0, $f2 |
| +; FP64-NEXT: mfc1 $2, $f0 |
| ; FP64-NEXT: lui $3, 32768 |
| ; FP64-NEXT: xor $2, $2, $3 |
| ; FP64-NEXT: addiu $3, $zero, 1 |
| -; FP64-NEXT: c.ult.d $f12, $f0 |
| +; FP64-NEXT: c.ult.d $f12, $f1 |
| ; FP64-NEXT: movf $3, $zero, $fcc0 |
| ; FP64-NEXT: andi $3, $3, 1 |
| ; FP64-NEXT: movn $2, $1, $3 |
| @@ -282,15 +282,15 @@ define zeroext i16 @f64tou16(double %a) { |
| ; FP32-NEXT: mfc1 $1, $f0 |
| ; FP32-NEXT: lui $2, 16864 |
| ; FP32-NEXT: ori $3, $zero, 0 |
| -; FP32-NEXT: mtc1 $3, $f0 |
| -; FP32-NEXT: mtc1 $2, $f1 |
| -; FP32-NEXT: sub.d $f2, $f12, $f0 |
| -; FP32-NEXT: trunc.w.d $f2, $f2 |
| -; FP32-NEXT: mfc1 $2, $f2 |
| +; FP32-NEXT: mtc1 $3, $f2 |
| +; FP32-NEXT: mtc1 $2, $f3 |
| +; FP32-NEXT: sub.d $f4, $f12, $f2 |
| +; FP32-NEXT: trunc.w.d $f0, $f4 |
| +; FP32-NEXT: mfc1 $2, $f0 |
| ; FP32-NEXT: lui $3, 32768 |
| ; FP32-NEXT: xor $2, $2, $3 |
| ; FP32-NEXT: addiu $3, $zero, 1 |
| -; FP32-NEXT: c.ult.d $f12, $f0 |
| +; FP32-NEXT: c.ult.d $f12, $f2 |
| ; FP32-NEXT: movf $3, $zero, $fcc0 |
| ; FP32-NEXT: andi $3, $3, 1 |
| ; FP32-NEXT: movn $2, $1, $3 |
| @@ -304,15 +304,15 @@ define zeroext i16 @f64tou16(double %a) { |
| ; FP64-NEXT: mfc1 $1, $f0 |
| ; FP64-NEXT: lui $2, 16864 |
| ; FP64-NEXT: ori $3, $zero, 0 |
| -; FP64-NEXT: mtc1 $3, $f0 |
| -; FP64-NEXT: mthc1 $2, $f0 |
| -; FP64-NEXT: sub.d $f1, $f12, $f0 |
| -; FP64-NEXT: trunc.w.d $f1, $f1 |
| -; FP64-NEXT: mfc1 $2, $f1 |
| +; FP64-NEXT: mtc1 $3, $f1 |
| +; FP64-NEXT: mthc1 $2, $f1 |
| +; FP64-NEXT: sub.d $f2, $f12, $f1 |
| +; FP64-NEXT: trunc.w.d $f0, $f2 |
| +; FP64-NEXT: mfc1 $2, $f0 |
| ; FP64-NEXT: lui $3, 32768 |
| ; FP64-NEXT: xor $2, $2, $3 |
| ; FP64-NEXT: addiu $3, $zero, 1 |
| -; FP64-NEXT: c.ult.d $f12, $f0 |
| +; FP64-NEXT: c.ult.d $f12, $f1 |
| ; FP64-NEXT: movf $3, $zero, $fcc0 |
| ; FP64-NEXT: andi $3, $3, 1 |
| ; FP64-NEXT: movn $2, $1, $3 |
| @@ -331,15 +331,15 @@ define zeroext i8 @f64tou8(double %a) { |
| ; FP32-NEXT: mfc1 $1, $f0 |
| ; FP32-NEXT: lui $2, 16864 |
| ; FP32-NEXT: ori $3, $zero, 0 |
| -; FP32-NEXT: mtc1 $3, $f0 |
| -; FP32-NEXT: mtc1 $2, $f1 |
| -; FP32-NEXT: sub.d $f2, $f12, $f0 |
| -; FP32-NEXT: trunc.w.d $f2, $f2 |
| -; FP32-NEXT: mfc1 $2, $f2 |
| +; FP32-NEXT: mtc1 $3, $f2 |
| +; FP32-NEXT: mtc1 $2, $f3 |
| +; FP32-NEXT: sub.d $f4, $f12, $f2 |
| +; FP32-NEXT: trunc.w.d $f0, $f4 |
| +; FP32-NEXT: mfc1 $2, $f0 |
| ; FP32-NEXT: lui $3, 32768 |
| ; FP32-NEXT: xor $2, $2, $3 |
| ; FP32-NEXT: addiu $3, $zero, 1 |
| -; FP32-NEXT: c.ult.d $f12, $f0 |
| +; FP32-NEXT: c.ult.d $f12, $f2 |
| ; FP32-NEXT: movf $3, $zero, $fcc0 |
| ; FP32-NEXT: andi $3, $3, 1 |
| ; FP32-NEXT: movn $2, $1, $3 |
| @@ -353,15 +353,15 @@ define zeroext i8 @f64tou8(double %a) { |
| ; FP64-NEXT: mfc1 $1, $f0 |
| ; FP64-NEXT: lui $2, 16864 |
| ; FP64-NEXT: ori $3, $zero, 0 |
| -; FP64-NEXT: mtc1 $3, $f0 |
| -; FP64-NEXT: mthc1 $2, $f0 |
| -; FP64-NEXT: sub.d $f1, $f12, $f0 |
| -; FP64-NEXT: trunc.w.d $f1, $f1 |
| -; FP64-NEXT: mfc1 $2, $f1 |
| +; FP64-NEXT: mtc1 $3, $f1 |
| +; FP64-NEXT: mthc1 $2, $f1 |
| +; FP64-NEXT: sub.d $f2, $f12, $f1 |
| +; FP64-NEXT: trunc.w.d $f0, $f2 |
| +; FP64-NEXT: mfc1 $2, $f0 |
| ; FP64-NEXT: lui $3, 32768 |
| ; FP64-NEXT: xor $2, $2, $3 |
| ; FP64-NEXT: addiu $3, $zero, 1 |
| -; FP64-NEXT: c.ult.d $f12, $f0 |
| +; FP64-NEXT: c.ult.d $f12, $f1 |
| ; FP64-NEXT: movf $3, $zero, $fcc0 |
| ; FP64-NEXT: andi $3, $3, 1 |
| ; FP64-NEXT: movn $2, $1, $3 |
| diff --git a/llvm/test/CodeGen/Mips/atomic-min-max.ll b/llvm/test/CodeGen/Mips/atomic-min-max.ll |
| index 646af650c00..a6200851940 100644 |
| --- a/llvm/test/CodeGen/Mips/atomic-min-max.ll |
| +++ b/llvm/test/CodeGen/Mips/atomic-min-max.ll |
| @@ -1154,26 +1154,26 @@ define i16 @test_max_16(i16* nocapture %ptr, i16 signext %val) { |
| ; MIPS64-NEXT: sll $2, $2, 3 |
| ; MIPS64-NEXT: ori $3, $zero, 65535 |
| ; MIPS64-NEXT: sllv $3, $3, $2 |
| -; MIPS64-NEXT: nor $4, $zero, $3 |
| +; MIPS64-NEXT: nor $6, $zero, $3 |
| ; MIPS64-NEXT: sllv $5, $5, $2 |
| ; MIPS64-NEXT: .LBB4_1: # %entry |
| ; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 |
| -; MIPS64-NEXT: ll $7, 0($1) |
| -; MIPS64-NEXT: slt $10, $7, $5 |
| -; MIPS64-NEXT: move $8, $7 |
| -; MIPS64-NEXT: movn $8, $5, $10 |
| -; MIPS64-NEXT: and $8, $8, $3 |
| -; MIPS64-NEXT: and $9, $7, $4 |
| -; MIPS64-NEXT: or $9, $9, $8 |
| -; MIPS64-NEXT: sc $9, 0($1) |
| -; MIPS64-NEXT: beqz $9, .LBB4_1 |
| +; MIPS64-NEXT: ll $8, 0($1) |
| +; MIPS64-NEXT: slt $11, $8, $5 |
| +; MIPS64-NEXT: move $9, $8 |
| +; MIPS64-NEXT: movn $9, $5, $11 |
| +; MIPS64-NEXT: and $9, $9, $3 |
| +; MIPS64-NEXT: and $10, $8, $6 |
| +; MIPS64-NEXT: or $10, $10, $9 |
| +; MIPS64-NEXT: sc $10, 0($1) |
| +; MIPS64-NEXT: beqz $10, .LBB4_1 |
| ; MIPS64-NEXT: nop |
| ; MIPS64-NEXT: # %bb.2: # %entry |
| -; MIPS64-NEXT: and $6, $7, $3 |
| -; MIPS64-NEXT: srlv $6, $6, $2 |
| -; MIPS64-NEXT: seh $6, $6 |
| +; MIPS64-NEXT: and $7, $8, $3 |
| +; MIPS64-NEXT: srlv $7, $7, $2 |
| +; MIPS64-NEXT: seh $7, $7 |
| ; MIPS64-NEXT: # %bb.3: # %entry |
| -; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill |
| +; MIPS64-NEXT: sw $7, 12($sp) # 4-byte Folded Spill |
| ; MIPS64-NEXT: # %bb.4: # %entry |
| ; MIPS64-NEXT: sync |
| ; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload |
| @@ -1194,26 +1194,26 @@ define i16 @test_max_16(i16* nocapture %ptr, i16 signext %val) { |
| ; MIPS64R6-NEXT: sll $2, $2, 3 |
| ; MIPS64R6-NEXT: ori $3, $zero, 65535 |
| ; MIPS64R6-NEXT: sllv $3, $3, $2 |
| -; MIPS64R6-NEXT: nor $4, $zero, $3 |
| +; MIPS64R6-NEXT: nor $6, $zero, $3 |
| ; MIPS64R6-NEXT: sllv $5, $5, $2 |
| ; MIPS64R6-NEXT: .LBB4_1: # %entry |
| ; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 |
| -; MIPS64R6-NEXT: ll $7, 0($1) |
| -; MIPS64R6-NEXT: slt $10, $7, $5 |
| -; MIPS64R6-NEXT: seleqz $8, $7, $10 |
| -; MIPS64R6-NEXT: selnez $10, $5, $10 |
| -; MIPS64R6-NEXT: or $8, $8, $10 |
| -; MIPS64R6-NEXT: and $8, $8, $3 |
| -; MIPS64R6-NEXT: and $9, $7, $4 |
| -; MIPS64R6-NEXT: or $9, $9, $8 |
| -; MIPS64R6-NEXT: sc $9, 0($1) |
| -; MIPS64R6-NEXT: beqzc $9, .LBB4_1 |
| +; MIPS64R6-NEXT: ll $8, 0($1) |
| +; MIPS64R6-NEXT: slt $11, $8, $5 |
| +; MIPS64R6-NEXT: seleqz $9, $8, $11 |
| +; MIPS64R6-NEXT: selnez $11, $5, $11 |
| +; MIPS64R6-NEXT: or $9, $9, $11 |
| +; MIPS64R6-NEXT: and $9, $9, $3 |
| +; MIPS64R6-NEXT: and $10, $8, $6 |
| +; MIPS64R6-NEXT: or $10, $10, $9 |
| +; MIPS64R6-NEXT: sc $10, 0($1) |
| +; MIPS64R6-NEXT: beqzc $10, .LBB4_1 |
| ; MIPS64R6-NEXT: # %bb.2: # %entry |
| -; MIPS64R6-NEXT: and $6, $7, $3 |
| -; MIPS64R6-NEXT: srlv $6, $6, $2 |
| -; MIPS64R6-NEXT: seh $6, $6 |
| +; MIPS64R6-NEXT: and $7, $8, $3 |
| +; MIPS64R6-NEXT: srlv $7, $7, $2 |
| +; MIPS64R6-NEXT: seh $7, $7 |
| ; MIPS64R6-NEXT: # %bb.3: # %entry |
| -; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill |
| +; MIPS64R6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill |
| ; MIPS64R6-NEXT: # %bb.4: # %entry |
| ; MIPS64R6-NEXT: sync |
| ; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload |
| @@ -1232,28 +1232,28 @@ define i16 @test_max_16(i16* nocapture %ptr, i16 signext %val) { |
| ; MIPS64EL-NEXT: sll $2, $2, 3 |
| ; MIPS64EL-NEXT: ori $3, $zero, 65535 |
| ; MIPS64EL-NEXT: sllv $3, $3, $2 |
| -; MIPS64EL-NEXT: nor $4, $zero, $3 |
| +; MIPS64EL-NEXT: nor $6, $zero, $3 |
| ; MIPS64EL-NEXT: sllv $5, $5, $2 |
| ; MIPS64EL-NEXT: .LBB4_1: # %entry |
| ; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1 |
| -; MIPS64EL-NEXT: ll $7, 0($1) |
| -; MIPS64EL-NEXT: and $7, $7, $3 |
| -; MIPS64EL-NEXT: and $5, $5, $3 |
| -; MIPS64EL-NEXT: slt $10, $7, $5 |
| -; MIPS64EL-NEXT: move $8, $7 |
| -; MIPS64EL-NEXT: movn $8, $5, $10 |
| +; MIPS64EL-NEXT: ll $8, 0($1) |
| ; MIPS64EL-NEXT: and $8, $8, $3 |
| -; MIPS64EL-NEXT: and $9, $7, $4 |
| -; MIPS64EL-NEXT: or $9, $9, $8 |
| -; MIPS64EL-NEXT: sc $9, 0($1) |
| -; MIPS64EL-NEXT: beqz $9, .LBB4_1 |
| +; MIPS64EL-NEXT: and $5, $5, $3 |
| +; MIPS64EL-NEXT: slt $11, $8, $5 |
| +; MIPS64EL-NEXT: move $9, $8 |
| +; MIPS64EL-NEXT: movn $9, $5, $11 |
| +; MIPS64EL-NEXT: and $9, $9, $3 |
| +; MIPS64EL-NEXT: and $10, $8, $6 |
| +; MIPS64EL-NEXT: or $10, $10, $9 |
| +; MIPS64EL-NEXT: sc $10, 0($1) |
| +; MIPS64EL-NEXT: beqz $10, .LBB4_1 |
| ; MIPS64EL-NEXT: nop |
| ; MIPS64EL-NEXT: # %bb.2: # %entry |
| -; MIPS64EL-NEXT: and $6, $7, $3 |
| -; MIPS64EL-NEXT: srlv $6, $6, $2 |
| -; MIPS64EL-NEXT: seh $6, $6 |
| +; MIPS64EL-NEXT: and $7, $8, $3 |
| +; MIPS64EL-NEXT: srlv $7, $7, $2 |
| +; MIPS64EL-NEXT: seh $7, $7 |
| ; MIPS64EL-NEXT: # %bb.3: # %entry |
| -; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill |
| +; MIPS64EL-NEXT: sw $7, 12($sp) # 4-byte Folded Spill |
| ; MIPS64EL-NEXT: # %bb.4: # %entry |
| ; MIPS64EL-NEXT: sync |
| ; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload |
| @@ -1273,28 +1273,28 @@ define i16 @test_max_16(i16* nocapture %ptr, i16 signext %val) { |
| ; MIPS64ELR6-NEXT: sll $2, $2, 3 |
| ; MIPS64ELR6-NEXT: ori $3, $zero, 65535 |
| ; MIPS64ELR6-NEXT: sllv $3, $3, $2 |
| -; MIPS64ELR6-NEXT: nor $4, $zero, $3 |
| +; MIPS64ELR6-NEXT: nor $6, $zero, $3 |
| ; MIPS64ELR6-NEXT: sllv $5, $5, $2 |
| ; MIPS64ELR6-NEXT: .LBB4_1: # %entry |
| ; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1 |
| -; MIPS64ELR6-NEXT: ll $7, 0($1) |
| -; MIPS64ELR6-NEXT: and $7, $7, $3 |
| -; MIPS64ELR6-NEXT: and $5, $5, $3 |
| -; MIPS64ELR6-NEXT: slt $10, $7, $5 |
| -; MIPS64ELR6-NEXT: seleqz $8, $7, $10 |
| -; MIPS64ELR6-NEXT: selnez $10, $5, $10 |
| -; MIPS64ELR6-NEXT: or $8, $8, $10 |
| +; MIPS64ELR6-NEXT: ll $8, 0($1) |
| ; MIPS64ELR6-NEXT: and $8, $8, $3 |
| -; MIPS64ELR6-NEXT: and $9, $7, $4 |
| -; MIPS64ELR6-NEXT: or $9, $9, $8 |
| -; MIPS64ELR6-NEXT: sc $9, 0($1) |
| -; MIPS64ELR6-NEXT: beqzc $9, .LBB4_1 |
| +; MIPS64ELR6-NEXT: and $5, $5, $3 |
| +; MIPS64ELR6-NEXT: slt $11, $8, $5 |
| +; MIPS64ELR6-NEXT: seleqz $9, $8, $11 |
| +; MIPS64ELR6-NEXT: selnez $11, $5, $11 |
| +; MIPS64ELR6-NEXT: or $9, $9, $11 |
| +; MIPS64ELR6-NEXT: and $9, $9, $3 |
| +; MIPS64ELR6-NEXT: and $10, $8, $6 |
| +; MIPS64ELR6-NEXT: or $10, $10, $9 |
| +; MIPS64ELR6-NEXT: sc $10, 0($1) |
| +; MIPS64ELR6-NEXT: beqzc $10, .LBB4_1 |
| ; MIPS64ELR6-NEXT: # %bb.2: # %entry |
| -; MIPS64ELR6-NEXT: and $6, $7, $3 |
| -; MIPS64ELR6-NEXT: srlv $6, $6, $2 |
| -; MIPS64ELR6-NEXT: seh $6, $6 |
| +; MIPS64ELR6-NEXT: and $7, $8, $3 |
| +; MIPS64ELR6-NEXT: srlv $7, $7, $2 |
| +; MIPS64ELR6-NEXT: seh $7, $7 |
| ; MIPS64ELR6-NEXT: # %bb.3: # %entry |
| -; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill |
| +; MIPS64ELR6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill |
| ; MIPS64ELR6-NEXT: # %bb.4: # %entry |
| ; MIPS64ELR6-NEXT: sync |
| ; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload |
| @@ -1635,26 +1635,26 @@ define i16 @test_min_16(i16* nocapture %ptr, i16 signext %val) { |
| ; MIPS64-NEXT: sll $2, $2, 3 |
| ; MIPS64-NEXT: ori $3, $zero, 65535 |
| ; MIPS64-NEXT: sllv $3, $3, $2 |
| -; MIPS64-NEXT: nor $4, $zero, $3 |
| +; MIPS64-NEXT: nor $6, $zero, $3 |
| ; MIPS64-NEXT: sllv $5, $5, $2 |
| ; MIPS64-NEXT: .LBB5_1: # %entry |
| ; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 |
| -; MIPS64-NEXT: ll $7, 0($1) |
| -; MIPS64-NEXT: slt $10, $7, $5 |
| -; MIPS64-NEXT: move $8, $7 |
| -; MIPS64-NEXT: movz $8, $5, $10 |
| -; MIPS64-NEXT: and $8, $8, $3 |
| -; MIPS64-NEXT: and $9, $7, $4 |
| -; MIPS64-NEXT: or $9, $9, $8 |
| -; MIPS64-NEXT: sc $9, 0($1) |
| -; MIPS64-NEXT: beqz $9, .LBB5_1 |
| +; MIPS64-NEXT: ll $8, 0($1) |
| +; MIPS64-NEXT: slt $11, $8, $5 |
| +; MIPS64-NEXT: move $9, $8 |
| +; MIPS64-NEXT: movz $9, $5, $11 |
| +; MIPS64-NEXT: and $9, $9, $3 |
| +; MIPS64-NEXT: and $10, $8, $6 |
| +; MIPS64-NEXT: or $10, $10, $9 |
| +; MIPS64-NEXT: sc $10, 0($1) |
| +; MIPS64-NEXT: beqz $10, .LBB5_1 |
| ; MIPS64-NEXT: nop |
| ; MIPS64-NEXT: # %bb.2: # %entry |
| -; MIPS64-NEXT: and $6, $7, $3 |
| -; MIPS64-NEXT: srlv $6, $6, $2 |
| -; MIPS64-NEXT: seh $6, $6 |
| +; MIPS64-NEXT: and $7, $8, $3 |
| +; MIPS64-NEXT: srlv $7, $7, $2 |
| +; MIPS64-NEXT: seh $7, $7 |
| ; MIPS64-NEXT: # %bb.3: # %entry |
| -; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill |
| +; MIPS64-NEXT: sw $7, 12($sp) # 4-byte Folded Spill |
| ; MIPS64-NEXT: # %bb.4: # %entry |
| ; MIPS64-NEXT: sync |
| ; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload |
| @@ -1675,26 +1675,26 @@ define i16 @test_min_16(i16* nocapture %ptr, i16 signext %val) { |
| ; MIPS64R6-NEXT: sll $2, $2, 3 |
| ; MIPS64R6-NEXT: ori $3, $zero, 65535 |
| ; MIPS64R6-NEXT: sllv $3, $3, $2 |
| -; MIPS64R6-NEXT: nor $4, $zero, $3 |
| +; MIPS64R6-NEXT: nor $6, $zero, $3 |
| ; MIPS64R6-NEXT: sllv $5, $5, $2 |
| ; MIPS64R6-NEXT: .LBB5_1: # %entry |
| ; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 |
| -; MIPS64R6-NEXT: ll $7, 0($1) |
| -; MIPS64R6-NEXT: slt $10, $7, $5 |
| -; MIPS64R6-NEXT: selnez $8, $7, $10 |
| -; MIPS64R6-NEXT: seleqz $10, $5, $10 |
| -; MIPS64R6-NEXT: or $8, $8, $10 |
| -; MIPS64R6-NEXT: and $8, $8, $3 |
| -; MIPS64R6-NEXT: and $9, $7, $4 |
| -; MIPS64R6-NEXT: or $9, $9, $8 |
| -; MIPS64R6-NEXT: sc $9, 0($1) |
| -; MIPS64R6-NEXT: beqzc $9, .LBB5_1 |
| +; MIPS64R6-NEXT: ll $8, 0($1) |
| +; MIPS64R6-NEXT: slt $11, $8, $5 |
| +; MIPS64R6-NEXT: selnez $9, $8, $11 |
| +; MIPS64R6-NEXT: seleqz $11, $5, $11 |
| +; MIPS64R6-NEXT: or $9, $9, $11 |
| +; MIPS64R6-NEXT: and $9, $9, $3 |
| +; MIPS64R6-NEXT: and $10, $8, $6 |
| +; MIPS64R6-NEXT: or $10, $10, $9 |
| +; MIPS64R6-NEXT: sc $10, 0($1) |
| +; MIPS64R6-NEXT: beqzc $10, .LBB5_1 |
| ; MIPS64R6-NEXT: # %bb.2: # %entry |
| -; MIPS64R6-NEXT: and $6, $7, $3 |
| -; MIPS64R6-NEXT: srlv $6, $6, $2 |
| -; MIPS64R6-NEXT: seh $6, $6 |
| +; MIPS64R6-NEXT: and $7, $8, $3 |
| +; MIPS64R6-NEXT: srlv $7, $7, $2 |
| +; MIPS64R6-NEXT: seh $7, $7 |
| ; MIPS64R6-NEXT: # %bb.3: # %entry |
| -; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill |
| +; MIPS64R6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill |
| ; MIPS64R6-NEXT: # %bb.4: # %entry |
| ; MIPS64R6-NEXT: sync |
| ; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload |
| @@ -1713,28 +1713,28 @@ define i16 @test_min_16(i16* nocapture %ptr, i16 signext %val) { |
| ; MIPS64EL-NEXT: sll $2, $2, 3 |
| ; MIPS64EL-NEXT: ori $3, $zero, 65535 |
| ; MIPS64EL-NEXT: sllv $3, $3, $2 |
| -; MIPS64EL-NEXT: nor $4, $zero, $3 |
| +; MIPS64EL-NEXT: nor $6, $zero, $3 |
| ; MIPS64EL-NEXT: sllv $5, $5, $2 |
| ; MIPS64EL-NEXT: .LBB5_1: # %entry |
| ; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1 |
| -; MIPS64EL-NEXT: ll $7, 0($1) |
| -; MIPS64EL-NEXT: and $7, $7, $3 |
| -; MIPS64EL-NEXT: and $5, $5, $3 |
| -; MIPS64EL-NEXT: slt $10, $7, $5 |
| -; MIPS64EL-NEXT: move $8, $7 |
| -; MIPS64EL-NEXT: movz $8, $5, $10 |
| +; MIPS64EL-NEXT: ll $8, 0($1) |
| ; MIPS64EL-NEXT: and $8, $8, $3 |
| -; MIPS64EL-NEXT: and $9, $7, $4 |
| -; MIPS64EL-NEXT: or $9, $9, $8 |
| -; MIPS64EL-NEXT: sc $9, 0($1) |
| -; MIPS64EL-NEXT: beqz $9, .LBB5_1 |
| +; MIPS64EL-NEXT: and $5, $5, $3 |
| +; MIPS64EL-NEXT: slt $11, $8, $5 |
| +; MIPS64EL-NEXT: move $9, $8 |
| +; MIPS64EL-NEXT: movz $9, $5, $11 |
| +; MIPS64EL-NEXT: and $9, $9, $3 |
| +; MIPS64EL-NEXT: and $10, $8, $6 |
| +; MIPS64EL-NEXT: or $10, $10, $9 |
| +; MIPS64EL-NEXT: sc $10, 0($1) |
| +; MIPS64EL-NEXT: beqz $10, .LBB5_1 |
| ; MIPS64EL-NEXT: nop |
| ; MIPS64EL-NEXT: # %bb.2: # %entry |
| -; MIPS64EL-NEXT: and $6, $7, $3 |
| -; MIPS64EL-NEXT: srlv $6, $6, $2 |
| -; MIPS64EL-NEXT: seh $6, $6 |
| +; MIPS64EL-NEXT: and $7, $8, $3 |
| +; MIPS64EL-NEXT: srlv $7, $7, $2 |
| +; MIPS64EL-NEXT: seh $7, $7 |
| ; MIPS64EL-NEXT: # %bb.3: # %entry |
| -; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill |
| +; MIPS64EL-NEXT: sw $7, 12($sp) # 4-byte Folded Spill |
| ; MIPS64EL-NEXT: # %bb.4: # %entry |
| ; MIPS64EL-NEXT: sync |
| ; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload |
| @@ -1754,28 +1754,28 @@ define i16 @test_min_16(i16* nocapture %ptr, i16 signext %val) { |
| ; MIPS64ELR6-NEXT: sll $2, $2, 3 |
| ; MIPS64ELR6-NEXT: ori $3, $zero, 65535 |
| ; MIPS64ELR6-NEXT: sllv $3, $3, $2 |
| -; MIPS64ELR6-NEXT: nor $4, $zero, $3 |
| +; MIPS64ELR6-NEXT: nor $6, $zero, $3 |
| ; MIPS64ELR6-NEXT: sllv $5, $5, $2 |
| ; MIPS64ELR6-NEXT: .LBB5_1: # %entry |
| ; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1 |
| -; MIPS64ELR6-NEXT: ll $7, 0($1) |
| -; MIPS64ELR6-NEXT: and $7, $7, $3 |
| -; MIPS64ELR6-NEXT: and $5, $5, $3 |
| -; MIPS64ELR6-NEXT: slt $10, $7, $5 |
| -; MIPS64ELR6-NEXT: selnez $8, $7, $10 |
| -; MIPS64ELR6-NEXT: seleqz $10, $5, $10 |
| -; MIPS64ELR6-NEXT: or $8, $8, $10 |
| +; MIPS64ELR6-NEXT: ll $8, 0($1) |
| ; MIPS64ELR6-NEXT: and $8, $8, $3 |
| -; MIPS64ELR6-NEXT: and $9, $7, $4 |
| -; MIPS64ELR6-NEXT: or $9, $9, $8 |
| -; MIPS64ELR6-NEXT: sc $9, 0($1) |
| -; MIPS64ELR6-NEXT: beqzc $9, .LBB5_1 |
| +; MIPS64ELR6-NEXT: and $5, $5, $3 |
| +; MIPS64ELR6-NEXT: slt $11, $8, $5 |
| +; MIPS64ELR6-NEXT: selnez $9, $8, $11 |
| +; MIPS64ELR6-NEXT: seleqz $11, $5, $11 |
| +; MIPS64ELR6-NEXT: or $9, $9, $11 |
| +; MIPS64ELR6-NEXT: and $9, $9, $3 |
| +; MIPS64ELR6-NEXT: and $10, $8, $6 |
| +; MIPS64ELR6-NEXT: or $10, $10, $9 |
| +; MIPS64ELR6-NEXT: sc $10, 0($1) |
| +; MIPS64ELR6-NEXT: beqzc $10, .LBB5_1 |
| ; MIPS64ELR6-NEXT: # %bb.2: # %entry |
| -; MIPS64ELR6-NEXT: and $6, $7, $3 |
| -; MIPS64ELR6-NEXT: srlv $6, $6, $2 |
| -; MIPS64ELR6-NEXT: seh $6, $6 |
| +; MIPS64ELR6-NEXT: and $7, $8, $3 |
| +; MIPS64ELR6-NEXT: srlv $7, $7, $2 |
| +; MIPS64ELR6-NEXT: seh $7, $7 |
| ; MIPS64ELR6-NEXT: # %bb.3: # %entry |
| -; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill |
| +; MIPS64ELR6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill |
| ; MIPS64ELR6-NEXT: # %bb.4: # %entry |
| ; MIPS64ELR6-NEXT: sync |
| ; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload |
| @@ -2116,26 +2116,26 @@ define i16 @test_umax_16(i16* nocapture %ptr, i16 signext %val) { |
| ; MIPS64-NEXT: sll $2, $2, 3 |
| ; MIPS64-NEXT: ori $3, $zero, 65535 |
| ; MIPS64-NEXT: sllv $3, $3, $2 |
| -; MIPS64-NEXT: nor $4, $zero, $3 |
| +; MIPS64-NEXT: nor $6, $zero, $3 |
| ; MIPS64-NEXT: sllv $5, $5, $2 |
| ; MIPS64-NEXT: .LBB6_1: # %entry |
| ; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 |
| -; MIPS64-NEXT: ll $7, 0($1) |
| -; MIPS64-NEXT: sltu $10, $7, $5 |
| -; MIPS64-NEXT: move $8, $7 |
| -; MIPS64-NEXT: movn $8, $5, $10 |
| -; MIPS64-NEXT: and $8, $8, $3 |
| -; MIPS64-NEXT: and $9, $7, $4 |
| -; MIPS64-NEXT: or $9, $9, $8 |
| -; MIPS64-NEXT: sc $9, 0($1) |
| -; MIPS64-NEXT: beqz $9, .LBB6_1 |
| +; MIPS64-NEXT: ll $8, 0($1) |
| +; MIPS64-NEXT: sltu $11, $8, $5 |
| +; MIPS64-NEXT: move $9, $8 |
| +; MIPS64-NEXT: movn $9, $5, $11 |
| +; MIPS64-NEXT: and $9, $9, $3 |
| +; MIPS64-NEXT: and $10, $8, $6 |
| +; MIPS64-NEXT: or $10, $10, $9 |
| +; MIPS64-NEXT: sc $10, 0($1) |
| +; MIPS64-NEXT: beqz $10, .LBB6_1 |
| ; MIPS64-NEXT: nop |
| ; MIPS64-NEXT: # %bb.2: # %entry |
| -; MIPS64-NEXT: and $6, $7, $3 |
| -; MIPS64-NEXT: srlv $6, $6, $2 |
| -; MIPS64-NEXT: seh $6, $6 |
| +; MIPS64-NEXT: and $7, $8, $3 |
| +; MIPS64-NEXT: srlv $7, $7, $2 |
| +; MIPS64-NEXT: seh $7, $7 |
| ; MIPS64-NEXT: # %bb.3: # %entry |
| -; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill |
| +; MIPS64-NEXT: sw $7, 12($sp) # 4-byte Folded Spill |
| ; MIPS64-NEXT: # %bb.4: # %entry |
| ; MIPS64-NEXT: sync |
| ; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload |
| @@ -2156,26 +2156,26 @@ define i16 @test_umax_16(i16* nocapture %ptr, i16 signext %val) { |
| ; MIPS64R6-NEXT: sll $2, $2, 3 |
| ; MIPS64R6-NEXT: ori $3, $zero, 65535 |
| ; MIPS64R6-NEXT: sllv $3, $3, $2 |
| -; MIPS64R6-NEXT: nor $4, $zero, $3 |
| +; MIPS64R6-NEXT: nor $6, $zero, $3 |
| ; MIPS64R6-NEXT: sllv $5, $5, $2 |
| ; MIPS64R6-NEXT: .LBB6_1: # %entry |
| ; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 |
| -; MIPS64R6-NEXT: ll $7, 0($1) |
| -; MIPS64R6-NEXT: sltu $10, $7, $5 |
| -; MIPS64R6-NEXT: seleqz $8, $7, $10 |
| -; MIPS64R6-NEXT: selnez $10, $5, $10 |
| -; MIPS64R6-NEXT: or $8, $8, $10 |
| -; MIPS64R6-NEXT: and $8, $8, $3 |
| -; MIPS64R6-NEXT: and $9, $7, $4 |
| -; MIPS64R6-NEXT: or $9, $9, $8 |
| -; MIPS64R6-NEXT: sc $9, 0($1) |
| -; MIPS64R6-NEXT: beqzc $9, .LBB6_1 |
| +; MIPS64R6-NEXT: ll $8, 0($1) |
| +; MIPS64R6-NEXT: sltu $11, $8, $5 |
| +; MIPS64R6-NEXT: seleqz $9, $8, $11 |
| +; MIPS64R6-NEXT: selnez $11, $5, $11 |
| +; MIPS64R6-NEXT: or $9, $9, $11 |
| +; MIPS64R6-NEXT: and $9, $9, $3 |
| +; MIPS64R6-NEXT: and $10, $8, $6 |
| +; MIPS64R6-NEXT: or $10, $10, $9 |
| +; MIPS64R6-NEXT: sc $10, 0($1) |
| +; MIPS64R6-NEXT: beqzc $10, .LBB6_1 |
| ; MIPS64R6-NEXT: # %bb.2: # %entry |
| -; MIPS64R6-NEXT: and $6, $7, $3 |
| -; MIPS64R6-NEXT: srlv $6, $6, $2 |
| -; MIPS64R6-NEXT: seh $6, $6 |
| +; MIPS64R6-NEXT: and $7, $8, $3 |
| +; MIPS64R6-NEXT: srlv $7, $7, $2 |
| +; MIPS64R6-NEXT: seh $7, $7 |
| ; MIPS64R6-NEXT: # %bb.3: # %entry |
| -; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill |
| +; MIPS64R6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill |
| ; MIPS64R6-NEXT: # %bb.4: # %entry |
| ; MIPS64R6-NEXT: sync |
| ; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload |
| @@ -2194,28 +2194,28 @@ define i16 @test_umax_16(i16* nocapture %ptr, i16 signext %val) { |
| ; MIPS64EL-NEXT: sll $2, $2, 3 |
| ; MIPS64EL-NEXT: ori $3, $zero, 65535 |
| ; MIPS64EL-NEXT: sllv $3, $3, $2 |
| -; MIPS64EL-NEXT: nor $4, $zero, $3 |
| +; MIPS64EL-NEXT: nor $6, $zero, $3 |
| ; MIPS64EL-NEXT: sllv $5, $5, $2 |
| ; MIPS64EL-NEXT: .LBB6_1: # %entry |
| ; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1 |
| -; MIPS64EL-NEXT: ll $7, 0($1) |
| -; MIPS64EL-NEXT: and $7, $7, $3 |
| -; MIPS64EL-NEXT: and $5, $5, $3 |
| -; MIPS64EL-NEXT: sltu $10, $7, $5 |
| -; MIPS64EL-NEXT: move $8, $7 |
| -; MIPS64EL-NEXT: movn $8, $5, $10 |
| +; MIPS64EL-NEXT: ll $8, 0($1) |
| ; MIPS64EL-NEXT: and $8, $8, $3 |
| -; MIPS64EL-NEXT: and $9, $7, $4 |
| -; MIPS64EL-NEXT: or $9, $9, $8 |
| -; MIPS64EL-NEXT: sc $9, 0($1) |
| -; MIPS64EL-NEXT: beqz $9, .LBB6_1 |
| +; MIPS64EL-NEXT: and $5, $5, $3 |
| +; MIPS64EL-NEXT: sltu $11, $8, $5 |
| +; MIPS64EL-NEXT: move $9, $8 |
| +; MIPS64EL-NEXT: movn $9, $5, $11 |
| +; MIPS64EL-NEXT: and $9, $9, $3 |
| +; MIPS64EL-NEXT: and $10, $8, $6 |
| +; MIPS64EL-NEXT: or $10, $10, $9 |
| +; MIPS64EL-NEXT: sc $10, 0($1) |
| +; MIPS64EL-NEXT: beqz $10, .LBB6_1 |
| ; MIPS64EL-NEXT: nop |
| ; MIPS64EL-NEXT: # %bb.2: # %entry |
| -; MIPS64EL-NEXT: and $6, $7, $3 |
| -; MIPS64EL-NEXT: srlv $6, $6, $2 |
| -; MIPS64EL-NEXT: seh $6, $6 |
| +; MIPS64EL-NEXT: and $7, $8, $3 |
| +; MIPS64EL-NEXT: srlv $7, $7, $2 |
| +; MIPS64EL-NEXT: seh $7, $7 |
| ; MIPS64EL-NEXT: # %bb.3: # %entry |
| -; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill |
| +; MIPS64EL-NEXT: sw $7, 12($sp) # 4-byte Folded Spill |
| ; MIPS64EL-NEXT: # %bb.4: # %entry |
| ; MIPS64EL-NEXT: sync |
| ; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload |
| @@ -2235,28 +2235,28 @@ define i16 @test_umax_16(i16* nocapture %ptr, i16 signext %val) { |
| ; MIPS64ELR6-NEXT: sll $2, $2, 3 |
| ; MIPS64ELR6-NEXT: ori $3, $zero, 65535 |
| ; MIPS64ELR6-NEXT: sllv $3, $3, $2 |
| -; MIPS64ELR6-NEXT: nor $4, $zero, $3 |
| +; MIPS64ELR6-NEXT: nor $6, $zero, $3 |
| ; MIPS64ELR6-NEXT: sllv $5, $5, $2 |
| ; MIPS64ELR6-NEXT: .LBB6_1: # %entry |
| ; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1 |
| -; MIPS64ELR6-NEXT: ll $7, 0($1) |
| -; MIPS64ELR6-NEXT: and $7, $7, $3 |
| -; MIPS64ELR6-NEXT: and $5, $5, $3 |
| -; MIPS64ELR6-NEXT: sltu $10, $7, $5 |
| -; MIPS64ELR6-NEXT: seleqz $8, $7, $10 |
| -; MIPS64ELR6-NEXT: selnez $10, $5, $10 |
| -; MIPS64ELR6-NEXT: or $8, $8, $10 |
| +; MIPS64ELR6-NEXT: ll $8, 0($1) |
| ; MIPS64ELR6-NEXT: and $8, $8, $3 |
| -; MIPS64ELR6-NEXT: and $9, $7, $4 |
| -; MIPS64ELR6-NEXT: or $9, $9, $8 |
| -; MIPS64ELR6-NEXT: sc $9, 0($1) |
| -; MIPS64ELR6-NEXT: beqzc $9, .LBB6_1 |
| +; MIPS64ELR6-NEXT: and $5, $5, $3 |
| +; MIPS64ELR6-NEXT: sltu $11, $8, $5 |
| +; MIPS64ELR6-NEXT: seleqz $9, $8, $11 |
| +; MIPS64ELR6-NEXT: selnez $11, $5, $11 |
| +; MIPS64ELR6-NEXT: or $9, $9, $11 |
| +; MIPS64ELR6-NEXT: and $9, $9, $3 |
| +; MIPS64ELR6-NEXT: and $10, $8, $6 |
| +; MIPS64ELR6-NEXT: or $10, $10, $9 |
| +; MIPS64ELR6-NEXT: sc $10, 0($1) |
| +; MIPS64ELR6-NEXT: beqzc $10, .LBB6_1 |
| ; MIPS64ELR6-NEXT: # %bb.2: # %entry |
| -; MIPS64ELR6-NEXT: and $6, $7, $3 |
| -; MIPS64ELR6-NEXT: srlv $6, $6, $2 |
| -; MIPS64ELR6-NEXT: seh $6, $6 |
| +; MIPS64ELR6-NEXT: and $7, $8, $3 |
| +; MIPS64ELR6-NEXT: srlv $7, $7, $2 |
| +; MIPS64ELR6-NEXT: seh $7, $7 |
| ; MIPS64ELR6-NEXT: # %bb.3: # %entry |
| -; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill |
| +; MIPS64ELR6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill |
| ; MIPS64ELR6-NEXT: # %bb.4: # %entry |
| ; MIPS64ELR6-NEXT: sync |
| ; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload |
| @@ -2597,26 +2597,26 @@ define i16 @test_umin_16(i16* nocapture %ptr, i16 signext %val) { |
| ; MIPS64-NEXT: sll $2, $2, 3 |
| ; MIPS64-NEXT: ori $3, $zero, 65535 |
| ; MIPS64-NEXT: sllv $3, $3, $2 |
| -; MIPS64-NEXT: nor $4, $zero, $3 |
| +; MIPS64-NEXT: nor $6, $zero, $3 |
| ; MIPS64-NEXT: sllv $5, $5, $2 |
| ; MIPS64-NEXT: .LBB7_1: # %entry |
| ; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 |
| -; MIPS64-NEXT: ll $7, 0($1) |
| -; MIPS64-NEXT: sltu $10, $7, $5 |
| -; MIPS64-NEXT: move $8, $7 |
| -; MIPS64-NEXT: movz $8, $5, $10 |
| -; MIPS64-NEXT: and $8, $8, $3 |
| -; MIPS64-NEXT: and $9, $7, $4 |
| -; MIPS64-NEXT: or $9, $9, $8 |
| -; MIPS64-NEXT: sc $9, 0($1) |
| -; MIPS64-NEXT: beqz $9, .LBB7_1 |
| +; MIPS64-NEXT: ll $8, 0($1) |
| +; MIPS64-NEXT: sltu $11, $8, $5 |
| +; MIPS64-NEXT: move $9, $8 |
| +; MIPS64-NEXT: movz $9, $5, $11 |
| +; MIPS64-NEXT: and $9, $9, $3 |
| +; MIPS64-NEXT: and $10, $8, $6 |
| +; MIPS64-NEXT: or $10, $10, $9 |
| +; MIPS64-NEXT: sc $10, 0($1) |
| +; MIPS64-NEXT: beqz $10, .LBB7_1 |
| ; MIPS64-NEXT: nop |
| ; MIPS64-NEXT: # %bb.2: # %entry |
| -; MIPS64-NEXT: and $6, $7, $3 |
| -; MIPS64-NEXT: srlv $6, $6, $2 |
| -; MIPS64-NEXT: seh $6, $6 |
| +; MIPS64-NEXT: and $7, $8, $3 |
| +; MIPS64-NEXT: srlv $7, $7, $2 |
| +; MIPS64-NEXT: seh $7, $7 |
| ; MIPS64-NEXT: # %bb.3: # %entry |
| -; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill |
| +; MIPS64-NEXT: sw $7, 12($sp) # 4-byte Folded Spill |
| ; MIPS64-NEXT: # %bb.4: # %entry |
| ; MIPS64-NEXT: sync |
| ; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload |
| @@ -2637,26 +2637,26 @@ define i16 @test_umin_16(i16* nocapture %ptr, i16 signext %val) { |
| ; MIPS64R6-NEXT: sll $2, $2, 3 |
| ; MIPS64R6-NEXT: ori $3, $zero, 65535 |
| ; MIPS64R6-NEXT: sllv $3, $3, $2 |
| -; MIPS64R6-NEXT: nor $4, $zero, $3 |
| +; MIPS64R6-NEXT: nor $6, $zero, $3 |
| ; MIPS64R6-NEXT: sllv $5, $5, $2 |
| ; MIPS64R6-NEXT: .LBB7_1: # %entry |
| ; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 |
| -; MIPS64R6-NEXT: ll $7, 0($1) |
| -; MIPS64R6-NEXT: sltu $10, $7, $5 |
| -; MIPS64R6-NEXT: selnez $8, $7, $10 |
| -; MIPS64R6-NEXT: seleqz $10, $5, $10 |
| -; MIPS64R6-NEXT: or $8, $8, $10 |
| -; MIPS64R6-NEXT: and $8, $8, $3 |
| -; MIPS64R6-NEXT: and $9, $7, $4 |
| -; MIPS64R6-NEXT: or $9, $9, $8 |
| -; MIPS64R6-NEXT: sc $9, 0($1) |
| -; MIPS64R6-NEXT: beqzc $9, .LBB7_1 |
| +; MIPS64R6-NEXT: ll $8, 0($1) |
| +; MIPS64R6-NEXT: sltu $11, $8, $5 |
| +; MIPS64R6-NEXT: selnez $9, $8, $11 |
| +; MIPS64R6-NEXT: seleqz $11, $5, $11 |
| +; MIPS64R6-NEXT: or $9, $9, $11 |
| +; MIPS64R6-NEXT: and $9, $9, $3 |
| +; MIPS64R6-NEXT: and $10, $8, $6 |
| +; MIPS64R6-NEXT: or $10, $10, $9 |
| +; MIPS64R6-NEXT: sc $10, 0($1) |
| +; MIPS64R6-NEXT: beqzc $10, .LBB7_1 |
| ; MIPS64R6-NEXT: # %bb.2: # %entry |
| -; MIPS64R6-NEXT: and $6, $7, $3 |
| -; MIPS64R6-NEXT: srlv $6, $6, $2 |
| -; MIPS64R6-NEXT: seh $6, $6 |
| +; MIPS64R6-NEXT: and $7, $8, $3 |
| +; MIPS64R6-NEXT: srlv $7, $7, $2 |
| +; MIPS64R6-NEXT: seh $7, $7 |
| ; MIPS64R6-NEXT: # %bb.3: # %entry |
| -; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill |
| +; MIPS64R6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill |
| ; MIPS64R6-NEXT: # %bb.4: # %entry |
| ; MIPS64R6-NEXT: sync |
| ; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload |
| @@ -2675,28 +2675,28 @@ define i16 @test_umin_16(i16* nocapture %ptr, i16 signext %val) { |
| ; MIPS64EL-NEXT: sll $2, $2, 3 |
| ; MIPS64EL-NEXT: ori $3, $zero, 65535 |
| ; MIPS64EL-NEXT: sllv $3, $3, $2 |
| -; MIPS64EL-NEXT: nor $4, $zero, $3 |
| +; MIPS64EL-NEXT: nor $6, $zero, $3 |
| ; MIPS64EL-NEXT: sllv $5, $5, $2 |
| ; MIPS64EL-NEXT: .LBB7_1: # %entry |
| ; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1 |
| -; MIPS64EL-NEXT: ll $7, 0($1) |
| -; MIPS64EL-NEXT: and $7, $7, $3 |
| -; MIPS64EL-NEXT: and $5, $5, $3 |
| -; MIPS64EL-NEXT: sltu $10, $7, $5 |
| -; MIPS64EL-NEXT: move $8, $7 |
| -; MIPS64EL-NEXT: movz $8, $5, $10 |
| +; MIPS64EL-NEXT: ll $8, 0($1) |
| ; MIPS64EL-NEXT: and $8, $8, $3 |
| -; MIPS64EL-NEXT: and $9, $7, $4 |
| -; MIPS64EL-NEXT: or $9, $9, $8 |
| -; MIPS64EL-NEXT: sc $9, 0($1) |
| -; MIPS64EL-NEXT: beqz $9, .LBB7_1 |
| +; MIPS64EL-NEXT: and $5, $5, $3 |
| +; MIPS64EL-NEXT: sltu $11, $8, $5 |
| +; MIPS64EL-NEXT: move $9, $8 |
| +; MIPS64EL-NEXT: movz $9, $5, $11 |
| +; MIPS64EL-NEXT: and $9, $9, $3 |
| +; MIPS64EL-NEXT: and $10, $8, $6 |
| +; MIPS64EL-NEXT: or $10, $10, $9 |
| +; MIPS64EL-NEXT: sc $10, 0($1) |
| +; MIPS64EL-NEXT: beqz $10, .LBB7_1 |
| ; MIPS64EL-NEXT: nop |
| ; MIPS64EL-NEXT: # %bb.2: # %entry |
| -; MIPS64EL-NEXT: and $6, $7, $3 |
| -; MIPS64EL-NEXT: srlv $6, $6, $2 |
| -; MIPS64EL-NEXT: seh $6, $6 |
| +; MIPS64EL-NEXT: and $7, $8, $3 |
| +; MIPS64EL-NEXT: srlv $7, $7, $2 |
| +; MIPS64EL-NEXT: seh $7, $7 |
| ; MIPS64EL-NEXT: # %bb.3: # %entry |
| -; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill |
| +; MIPS64EL-NEXT: sw $7, 12($sp) # 4-byte Folded Spill |
| ; MIPS64EL-NEXT: # %bb.4: # %entry |
| ; MIPS64EL-NEXT: sync |
| ; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload |
| @@ -2716,28 +2716,28 @@ define i16 @test_umin_16(i16* nocapture %ptr, i16 signext %val) { |
| ; MIPS64ELR6-NEXT: sll $2, $2, 3 |
| ; MIPS64ELR6-NEXT: ori $3, $zero, 65535 |
| ; MIPS64ELR6-NEXT: sllv $3, $3, $2 |
| -; MIPS64ELR6-NEXT: nor $4, $zero, $3 |
| +; MIPS64ELR6-NEXT: nor $6, $zero, $3 |
| ; MIPS64ELR6-NEXT: sllv $5, $5, $2 |
| ; MIPS64ELR6-NEXT: .LBB7_1: # %entry |
| ; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1 |
| -; MIPS64ELR6-NEXT: ll $7, 0($1) |
| -; MIPS64ELR6-NEXT: and $7, $7, $3 |
| -; MIPS64ELR6-NEXT: and $5, $5, $3 |
| -; MIPS64ELR6-NEXT: sltu $10, $7, $5 |
| -; MIPS64ELR6-NEXT: selnez $8, $7, $10 |
| -; MIPS64ELR6-NEXT: seleqz $10, $5, $10 |
| -; MIPS64ELR6-NEXT: or $8, $8, $10 |
| +; MIPS64ELR6-NEXT: ll $8, 0($1) |
| ; MIPS64ELR6-NEXT: and $8, $8, $3 |
| -; MIPS64ELR6-NEXT: and $9, $7, $4 |
| -; MIPS64ELR6-NEXT: or $9, $9, $8 |
| -; MIPS64ELR6-NEXT: sc $9, 0($1) |
| -; MIPS64ELR6-NEXT: beqzc $9, .LBB7_1 |
| +; MIPS64ELR6-NEXT: and $5, $5, $3 |
| +; MIPS64ELR6-NEXT: sltu $11, $8, $5 |
| +; MIPS64ELR6-NEXT: selnez $9, $8, $11 |
| +; MIPS64ELR6-NEXT: seleqz $11, $5, $11 |
| +; MIPS64ELR6-NEXT: or $9, $9, $11 |
| +; MIPS64ELR6-NEXT: and $9, $9, $3 |
| +; MIPS64ELR6-NEXT: and $10, $8, $6 |
| +; MIPS64ELR6-NEXT: or $10, $10, $9 |
| +; MIPS64ELR6-NEXT: sc $10, 0($1) |
| +; MIPS64ELR6-NEXT: beqzc $10, .LBB7_1 |
| ; MIPS64ELR6-NEXT: # %bb.2: # %entry |
| -; MIPS64ELR6-NEXT: and $6, $7, $3 |
| -; MIPS64ELR6-NEXT: srlv $6, $6, $2 |
| -; MIPS64ELR6-NEXT: seh $6, $6 |
| +; MIPS64ELR6-NEXT: and $7, $8, $3 |
| +; MIPS64ELR6-NEXT: srlv $7, $7, $2 |
| +; MIPS64ELR6-NEXT: seh $7, $7 |
| ; MIPS64ELR6-NEXT: # %bb.3: # %entry |
| -; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill |
| +; MIPS64ELR6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill |
| ; MIPS64ELR6-NEXT: # %bb.4: # %entry |
| ; MIPS64ELR6-NEXT: sync |
| ; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload |
| @@ -3079,26 +3079,26 @@ define i8 @test_max_8(i8* nocapture %ptr, i8 signext %val) { |
| ; MIPS64-NEXT: sll $2, $2, 3 |
| ; MIPS64-NEXT: ori $3, $zero, 255 |
| ; MIPS64-NEXT: sllv $3, $3, $2 |
| -; MIPS64-NEXT: nor $4, $zero, $3 |
| +; MIPS64-NEXT: nor $6, $zero, $3 |
| ; MIPS64-NEXT: sllv $5, $5, $2 |
| ; MIPS64-NEXT: .LBB8_1: # %entry |
| ; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 |
| -; MIPS64-NEXT: ll $7, 0($1) |
| -; MIPS64-NEXT: slt $10, $7, $5 |
| -; MIPS64-NEXT: move $8, $7 |
| -; MIPS64-NEXT: movn $8, $5, $10 |
| -; MIPS64-NEXT: and $8, $8, $3 |
| -; MIPS64-NEXT: and $9, $7, $4 |
| -; MIPS64-NEXT: or $9, $9, $8 |
| -; MIPS64-NEXT: sc $9, 0($1) |
| -; MIPS64-NEXT: beqz $9, .LBB8_1 |
| +; MIPS64-NEXT: ll $8, 0($1) |
| +; MIPS64-NEXT: slt $11, $8, $5 |
| +; MIPS64-NEXT: move $9, $8 |
| +; MIPS64-NEXT: movn $9, $5, $11 |
| +; MIPS64-NEXT: and $9, $9, $3 |
| +; MIPS64-NEXT: and $10, $8, $6 |
| +; MIPS64-NEXT: or $10, $10, $9 |
| +; MIPS64-NEXT: sc $10, 0($1) |
| +; MIPS64-NEXT: beqz $10, .LBB8_1 |
| ; MIPS64-NEXT: nop |
| ; MIPS64-NEXT: # %bb.2: # %entry |
| -; MIPS64-NEXT: and $6, $7, $3 |
| -; MIPS64-NEXT: srlv $6, $6, $2 |
| -; MIPS64-NEXT: seh $6, $6 |
| +; MIPS64-NEXT: and $7, $8, $3 |
| +; MIPS64-NEXT: srlv $7, $7, $2 |
| +; MIPS64-NEXT: seh $7, $7 |
| ; MIPS64-NEXT: # %bb.3: # %entry |
| -; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill |
| +; MIPS64-NEXT: sw $7, 12($sp) # 4-byte Folded Spill |
| ; MIPS64-NEXT: # %bb.4: # %entry |
| ; MIPS64-NEXT: sync |
| ; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload |
| @@ -3119,26 +3119,26 @@ define i8 @test_max_8(i8* nocapture %ptr, i8 signext %val) { |
| ; MIPS64R6-NEXT: sll $2, $2, 3 |
| ; MIPS64R6-NEXT: ori $3, $zero, 255 |
| ; MIPS64R6-NEXT: sllv $3, $3, $2 |
| -; MIPS64R6-NEXT: nor $4, $zero, $3 |
| +; MIPS64R6-NEXT: nor $6, $zero, $3 |
| ; MIPS64R6-NEXT: sllv $5, $5, $2 |
| ; MIPS64R6-NEXT: .LBB8_1: # %entry |
| ; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 |
| -; MIPS64R6-NEXT: ll $7, 0($1) |
| -; MIPS64R6-NEXT: slt $10, $7, $5 |
| -; MIPS64R6-NEXT: seleqz $8, $7, $10 |
| -; MIPS64R6-NEXT: selnez $10, $5, $10 |
| -; MIPS64R6-NEXT: or $8, $8, $10 |
| -; MIPS64R6-NEXT: and $8, $8, $3 |
| -; MIPS64R6-NEXT: and $9, $7, $4 |
| -; MIPS64R6-NEXT: or $9, $9, $8 |
| -; MIPS64R6-NEXT: sc $9, 0($1) |
| -; MIPS64R6-NEXT: beqzc $9, .LBB8_1 |
| +; MIPS64R6-NEXT: ll $8, 0($1) |
| +; MIPS64R6-NEXT: slt $11, $8, $5 |
| +; MIPS64R6-NEXT: seleqz $9, $8, $11 |
| +; MIPS64R6-NEXT: selnez $11, $5, $11 |
| +; MIPS64R6-NEXT: or $9, $9, $11 |
| +; MIPS64R6-NEXT: and $9, $9, $3 |
| +; MIPS64R6-NEXT: and $10, $8, $6 |
| +; MIPS64R6-NEXT: or $10, $10, $9 |
| +; MIPS64R6-NEXT: sc $10, 0($1) |
| +; MIPS64R6-NEXT: beqzc $10, .LBB8_1 |
| ; MIPS64R6-NEXT: # %bb.2: # %entry |
| -; MIPS64R6-NEXT: and $6, $7, $3 |
| -; MIPS64R6-NEXT: srlv $6, $6, $2 |
| -; MIPS64R6-NEXT: seh $6, $6 |
| +; MIPS64R6-NEXT: and $7, $8, $3 |
| +; MIPS64R6-NEXT: srlv $7, $7, $2 |
| +; MIPS64R6-NEXT: seh $7, $7 |
| ; MIPS64R6-NEXT: # %bb.3: # %entry |
| -; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill |
| +; MIPS64R6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill |
| ; MIPS64R6-NEXT: # %bb.4: # %entry |
| ; MIPS64R6-NEXT: sync |
| ; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload |
| @@ -3157,28 +3157,28 @@ define i8 @test_max_8(i8* nocapture %ptr, i8 signext %val) { |
| ; MIPS64EL-NEXT: sll $2, $2, 3 |
| ; MIPS64EL-NEXT: ori $3, $zero, 255 |
| ; MIPS64EL-NEXT: sllv $3, $3, $2 |
| -; MIPS64EL-NEXT: nor $4, $zero, $3 |
| +; MIPS64EL-NEXT: nor $6, $zero, $3 |
| ; MIPS64EL-NEXT: sllv $5, $5, $2 |
| ; MIPS64EL-NEXT: .LBB8_1: # %entry |
| ; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1 |
| -; MIPS64EL-NEXT: ll $7, 0($1) |
| -; MIPS64EL-NEXT: and $7, $7, $3 |
| -; MIPS64EL-NEXT: and $5, $5, $3 |
| -; MIPS64EL-NEXT: slt $10, $7, $5 |
| -; MIPS64EL-NEXT: move $8, $7 |
| -; MIPS64EL-NEXT: movn $8, $5, $10 |
| +; MIPS64EL-NEXT: ll $8, 0($1) |
| ; MIPS64EL-NEXT: and $8, $8, $3 |
| -; MIPS64EL-NEXT: and $9, $7, $4 |
| -; MIPS64EL-NEXT: or $9, $9, $8 |
| -; MIPS64EL-NEXT: sc $9, 0($1) |
| -; MIPS64EL-NEXT: beqz $9, .LBB8_1 |
| +; MIPS64EL-NEXT: and $5, $5, $3 |
| +; MIPS64EL-NEXT: slt $11, $8, $5 |
| +; MIPS64EL-NEXT: move $9, $8 |
| +; MIPS64EL-NEXT: movn $9, $5, $11 |
| +; MIPS64EL-NEXT: and $9, $9, $3 |
| +; MIPS64EL-NEXT: and $10, $8, $6 |
| +; MIPS64EL-NEXT: or $10, $10, $9 |
| +; MIPS64EL-NEXT: sc $10, 0($1) |
| +; MIPS64EL-NEXT: beqz $10, .LBB8_1 |
| ; MIPS64EL-NEXT: nop |
| ; MIPS64EL-NEXT: # %bb.2: # %entry |
| -; MIPS64EL-NEXT: and $6, $7, $3 |
| -; MIPS64EL-NEXT: srlv $6, $6, $2 |
| -; MIPS64EL-NEXT: seh $6, $6 |
| +; MIPS64EL-NEXT: and $7, $8, $3 |
| +; MIPS64EL-NEXT: srlv $7, $7, $2 |
| +; MIPS64EL-NEXT: seh $7, $7 |
| ; MIPS64EL-NEXT: # %bb.3: # %entry |
| -; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill |
| +; MIPS64EL-NEXT: sw $7, 12($sp) # 4-byte Folded Spill |
| ; MIPS64EL-NEXT: # %bb.4: # %entry |
| ; MIPS64EL-NEXT: sync |
| ; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload |
| @@ -3198,28 +3198,28 @@ define i8 @test_max_8(i8* nocapture %ptr, i8 signext %val) { |
| ; MIPS64ELR6-NEXT: sll $2, $2, 3 |
| ; MIPS64ELR6-NEXT: ori $3, $zero, 255 |
| ; MIPS64ELR6-NEXT: sllv $3, $3, $2 |
| -; MIPS64ELR6-NEXT: nor $4, $zero, $3 |
| +; MIPS64ELR6-NEXT: nor $6, $zero, $3 |
| ; MIPS64ELR6-NEXT: sllv $5, $5, $2 |
| ; MIPS64ELR6-NEXT: .LBB8_1: # %entry |
| ; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1 |
| -; MIPS64ELR6-NEXT: ll $7, 0($1) |
| -; MIPS64ELR6-NEXT: and $7, $7, $3 |
| -; MIPS64ELR6-NEXT: and $5, $5, $3 |
| -; MIPS64ELR6-NEXT: slt $10, $7, $5 |
| -; MIPS64ELR6-NEXT: seleqz $8, $7, $10 |
| -; MIPS64ELR6-NEXT: selnez $10, $5, $10 |
| -; MIPS64ELR6-NEXT: or $8, $8, $10 |
| +; MIPS64ELR6-NEXT: ll $8, 0($1) |
| ; MIPS64ELR6-NEXT: and $8, $8, $3 |
| -; MIPS64ELR6-NEXT: and $9, $7, $4 |
| -; MIPS64ELR6-NEXT: or $9, $9, $8 |
| -; MIPS64ELR6-NEXT: sc $9, 0($1) |
| -; MIPS64ELR6-NEXT: beqzc $9, .LBB8_1 |
| +; MIPS64ELR6-NEXT: and $5, $5, $3 |
| +; MIPS64ELR6-NEXT: slt $11, $8, $5 |
| +; MIPS64ELR6-NEXT: seleqz $9, $8, $11 |
| +; MIPS64ELR6-NEXT: selnez $11, $5, $11 |
| +; MIPS64ELR6-NEXT: or $9, $9, $11 |
| +; MIPS64ELR6-NEXT: and $9, $9, $3 |
| +; MIPS64ELR6-NEXT: and $10, $8, $6 |
| +; MIPS64ELR6-NEXT: or $10, $10, $9 |
| +; MIPS64ELR6-NEXT: sc $10, 0($1) |
| +; MIPS64ELR6-NEXT: beqzc $10, .LBB8_1 |
| ; MIPS64ELR6-NEXT: # %bb.2: # %entry |
| -; MIPS64ELR6-NEXT: and $6, $7, $3 |
| -; MIPS64ELR6-NEXT: srlv $6, $6, $2 |
| -; MIPS64ELR6-NEXT: seh $6, $6 |
| +; MIPS64ELR6-NEXT: and $7, $8, $3 |
| +; MIPS64ELR6-NEXT: srlv $7, $7, $2 |
| +; MIPS64ELR6-NEXT: seh $7, $7 |
| ; MIPS64ELR6-NEXT: # %bb.3: # %entry |
| -; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill |
| +; MIPS64ELR6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill |
| ; MIPS64ELR6-NEXT: # %bb.4: # %entry |
| ; MIPS64ELR6-NEXT: sync |
| ; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload |
| @@ -3560,26 +3560,26 @@ define i8 @test_min_8(i8* nocapture %ptr, i8 signext %val) { |
| ; MIPS64-NEXT: sll $2, $2, 3 |
| ; MIPS64-NEXT: ori $3, $zero, 255 |
| ; MIPS64-NEXT: sllv $3, $3, $2 |
| -; MIPS64-NEXT: nor $4, $zero, $3 |
| +; MIPS64-NEXT: nor $6, $zero, $3 |
| ; MIPS64-NEXT: sllv $5, $5, $2 |
| ; MIPS64-NEXT: .LBB9_1: # %entry |
| ; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 |
| -; MIPS64-NEXT: ll $7, 0($1) |
| -; MIPS64-NEXT: slt $10, $7, $5 |
| -; MIPS64-NEXT: move $8, $7 |
| -; MIPS64-NEXT: movz $8, $5, $10 |
| -; MIPS64-NEXT: and $8, $8, $3 |
| -; MIPS64-NEXT: and $9, $7, $4 |
| -; MIPS64-NEXT: or $9, $9, $8 |
| -; MIPS64-NEXT: sc $9, 0($1) |
| -; MIPS64-NEXT: beqz $9, .LBB9_1 |
| +; MIPS64-NEXT: ll $8, 0($1) |
| +; MIPS64-NEXT: slt $11, $8, $5 |
| +; MIPS64-NEXT: move $9, $8 |
| +; MIPS64-NEXT: movz $9, $5, $11 |
| +; MIPS64-NEXT: and $9, $9, $3 |
| +; MIPS64-NEXT: and $10, $8, $6 |
| +; MIPS64-NEXT: or $10, $10, $9 |
| +; MIPS64-NEXT: sc $10, 0($1) |
| +; MIPS64-NEXT: beqz $10, .LBB9_1 |
| ; MIPS64-NEXT: nop |
| ; MIPS64-NEXT: # %bb.2: # %entry |
| -; MIPS64-NEXT: and $6, $7, $3 |
| -; MIPS64-NEXT: srlv $6, $6, $2 |
| -; MIPS64-NEXT: seh $6, $6 |
| +; MIPS64-NEXT: and $7, $8, $3 |
| +; MIPS64-NEXT: srlv $7, $7, $2 |
| +; MIPS64-NEXT: seh $7, $7 |
| ; MIPS64-NEXT: # %bb.3: # %entry |
| -; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill |
| +; MIPS64-NEXT: sw $7, 12($sp) # 4-byte Folded Spill |
| ; MIPS64-NEXT: # %bb.4: # %entry |
| ; MIPS64-NEXT: sync |
| ; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload |
| @@ -3600,26 +3600,26 @@ define i8 @test_min_8(i8* nocapture %ptr, i8 signext %val) { |
| ; MIPS64R6-NEXT: sll $2, $2, 3 |
| ; MIPS64R6-NEXT: ori $3, $zero, 255 |
| ; MIPS64R6-NEXT: sllv $3, $3, $2 |
| -; MIPS64R6-NEXT: nor $4, $zero, $3 |
| +; MIPS64R6-NEXT: nor $6, $zero, $3 |
| ; MIPS64R6-NEXT: sllv $5, $5, $2 |
| ; MIPS64R6-NEXT: .LBB9_1: # %entry |
| ; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 |
| -; MIPS64R6-NEXT: ll $7, 0($1) |
| -; MIPS64R6-NEXT: slt $10, $7, $5 |
| -; MIPS64R6-NEXT: selnez $8, $7, $10 |
| -; MIPS64R6-NEXT: seleqz $10, $5, $10 |
| -; MIPS64R6-NEXT: or $8, $8, $10 |
| -; MIPS64R6-NEXT: and $8, $8, $3 |
| -; MIPS64R6-NEXT: and $9, $7, $4 |
| -; MIPS64R6-NEXT: or $9, $9, $8 |
| -; MIPS64R6-NEXT: sc $9, 0($1) |
| -; MIPS64R6-NEXT: beqzc $9, .LBB9_1 |
| +; MIPS64R6-NEXT: ll $8, 0($1) |
| +; MIPS64R6-NEXT: slt $11, $8, $5 |
| +; MIPS64R6-NEXT: selnez $9, $8, $11 |
| +; MIPS64R6-NEXT: seleqz $11, $5, $11 |
| +; MIPS64R6-NEXT: or $9, $9, $11 |
| +; MIPS64R6-NEXT: and $9, $9, $3 |
| +; MIPS64R6-NEXT: and $10, $8, $6 |
| +; MIPS64R6-NEXT: or $10, $10, $9 |
| +; MIPS64R6-NEXT: sc $10, 0($1) |
| +; MIPS64R6-NEXT: beqzc $10, .LBB9_1 |
| ; MIPS64R6-NEXT: # %bb.2: # %entry |
| -; MIPS64R6-NEXT: and $6, $7, $3 |
| -; MIPS64R6-NEXT: srlv $6, $6, $2 |
| -; MIPS64R6-NEXT: seh $6, $6 |
| +; MIPS64R6-NEXT: and $7, $8, $3 |
| +; MIPS64R6-NEXT: srlv $7, $7, $2 |
| +; MIPS64R6-NEXT: seh $7, $7 |
| ; MIPS64R6-NEXT: # %bb.3: # %entry |
| -; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill |
| +; MIPS64R6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill |
| ; MIPS64R6-NEXT: # %bb.4: # %entry |
| ; MIPS64R6-NEXT: sync |
| ; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload |
| @@ -3638,28 +3638,28 @@ define i8 @test_min_8(i8* nocapture %ptr, i8 signext %val) { |
| ; MIPS64EL-NEXT: sll $2, $2, 3 |
| ; MIPS64EL-NEXT: ori $3, $zero, 255 |
| ; MIPS64EL-NEXT: sllv $3, $3, $2 |
| -; MIPS64EL-NEXT: nor $4, $zero, $3 |
| +; MIPS64EL-NEXT: nor $6, $zero, $3 |
| ; MIPS64EL-NEXT: sllv $5, $5, $2 |
| ; MIPS64EL-NEXT: .LBB9_1: # %entry |
| ; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1 |
| -; MIPS64EL-NEXT: ll $7, 0($1) |
| -; MIPS64EL-NEXT: and $7, $7, $3 |
| -; MIPS64EL-NEXT: and $5, $5, $3 |
| -; MIPS64EL-NEXT: slt $10, $7, $5 |
| -; MIPS64EL-NEXT: move $8, $7 |
| -; MIPS64EL-NEXT: movz $8, $5, $10 |
| +; MIPS64EL-NEXT: ll $8, 0($1) |
| ; MIPS64EL-NEXT: and $8, $8, $3 |
| -; MIPS64EL-NEXT: and $9, $7, $4 |
| -; MIPS64EL-NEXT: or $9, $9, $8 |
| -; MIPS64EL-NEXT: sc $9, 0($1) |
| -; MIPS64EL-NEXT: beqz $9, .LBB9_1 |
| +; MIPS64EL-NEXT: and $5, $5, $3 |
| +; MIPS64EL-NEXT: slt $11, $8, $5 |
| +; MIPS64EL-NEXT: move $9, $8 |
| +; MIPS64EL-NEXT: movz $9, $5, $11 |
| +; MIPS64EL-NEXT: and $9, $9, $3 |
| +; MIPS64EL-NEXT: and $10, $8, $6 |
| +; MIPS64EL-NEXT: or $10, $10, $9 |
| +; MIPS64EL-NEXT: sc $10, 0($1) |
| +; MIPS64EL-NEXT: beqz $10, .LBB9_1 |
| ; MIPS64EL-NEXT: nop |
| ; MIPS64EL-NEXT: # %bb.2: # %entry |
| -; MIPS64EL-NEXT: and $6, $7, $3 |
| -; MIPS64EL-NEXT: srlv $6, $6, $2 |
| -; MIPS64EL-NEXT: seh $6, $6 |
| +; MIPS64EL-NEXT: and $7, $8, $3 |
| +; MIPS64EL-NEXT: srlv $7, $7, $2 |
| +; MIPS64EL-NEXT: seh $7, $7 |
| ; MIPS64EL-NEXT: # %bb.3: # %entry |
| -; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill |
| +; MIPS64EL-NEXT: sw $7, 12($sp) # 4-byte Folded Spill |
| ; MIPS64EL-NEXT: # %bb.4: # %entry |
| ; MIPS64EL-NEXT: sync |
| ; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload |
| @@ -3679,28 +3679,28 @@ define i8 @test_min_8(i8* nocapture %ptr, i8 signext %val) { |
| ; MIPS64ELR6-NEXT: sll $2, $2, 3 |
| ; MIPS64ELR6-NEXT: ori $3, $zero, 255 |
| ; MIPS64ELR6-NEXT: sllv $3, $3, $2 |
| -; MIPS64ELR6-NEXT: nor $4, $zero, $3 |
| +; MIPS64ELR6-NEXT: nor $6, $zero, $3 |
| ; MIPS64ELR6-NEXT: sllv $5, $5, $2 |
| ; MIPS64ELR6-NEXT: .LBB9_1: # %entry |
| ; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1 |
| -; MIPS64ELR6-NEXT: ll $7, 0($1) |
| -; MIPS64ELR6-NEXT: and $7, $7, $3 |
| -; MIPS64ELR6-NEXT: and $5, $5, $3 |
| -; MIPS64ELR6-NEXT: slt $10, $7, $5 |
| -; MIPS64ELR6-NEXT: selnez $8, $7, $10 |
| -; MIPS64ELR6-NEXT: seleqz $10, $5, $10 |
| -; MIPS64ELR6-NEXT: or $8, $8, $10 |
| +; MIPS64ELR6-NEXT: ll $8, 0($1) |
| ; MIPS64ELR6-NEXT: and $8, $8, $3 |
| -; MIPS64ELR6-NEXT: and $9, $7, $4 |
| -; MIPS64ELR6-NEXT: or $9, $9, $8 |
| -; MIPS64ELR6-NEXT: sc $9, 0($1) |
| -; MIPS64ELR6-NEXT: beqzc $9, .LBB9_1 |
| +; MIPS64ELR6-NEXT: and $5, $5, $3 |
| +; MIPS64ELR6-NEXT: slt $11, $8, $5 |
| +; MIPS64ELR6-NEXT: selnez $9, $8, $11 |
| +; MIPS64ELR6-NEXT: seleqz $11, $5, $11 |
| +; MIPS64ELR6-NEXT: or $9, $9, $11 |
| +; MIPS64ELR6-NEXT: and $9, $9, $3 |
| +; MIPS64ELR6-NEXT: and $10, $8, $6 |
| +; MIPS64ELR6-NEXT: or $10, $10, $9 |
| +; MIPS64ELR6-NEXT: sc $10, 0($1) |
| +; MIPS64ELR6-NEXT: beqzc $10, .LBB9_1 |
| ; MIPS64ELR6-NEXT: # %bb.2: # %entry |
| -; MIPS64ELR6-NEXT: and $6, $7, $3 |
| -; MIPS64ELR6-NEXT: srlv $6, $6, $2 |
| -; MIPS64ELR6-NEXT: seh $6, $6 |
| +; MIPS64ELR6-NEXT: and $7, $8, $3 |
| +; MIPS64ELR6-NEXT: srlv $7, $7, $2 |
| +; MIPS64ELR6-NEXT: seh $7, $7 |
| ; MIPS64ELR6-NEXT: # %bb.3: # %entry |
| -; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill |
| +; MIPS64ELR6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill |
| ; MIPS64ELR6-NEXT: # %bb.4: # %entry |
| ; MIPS64ELR6-NEXT: sync |
| ; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload |
| @@ -4041,26 +4041,26 @@ define i8 @test_umax_8(i8* nocapture %ptr, i8 signext %val) { |
| ; MIPS64-NEXT: sll $2, $2, 3 |
| ; MIPS64-NEXT: ori $3, $zero, 255 |
| ; MIPS64-NEXT: sllv $3, $3, $2 |
| -; MIPS64-NEXT: nor $4, $zero, $3 |
| +; MIPS64-NEXT: nor $6, $zero, $3 |
| ; MIPS64-NEXT: sllv $5, $5, $2 |
| ; MIPS64-NEXT: .LBB10_1: # %entry |
| ; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 |
| -; MIPS64-NEXT: ll $7, 0($1) |
| -; MIPS64-NEXT: sltu $10, $7, $5 |
| -; MIPS64-NEXT: move $8, $7 |
| -; MIPS64-NEXT: movn $8, $5, $10 |
| -; MIPS64-NEXT: and $8, $8, $3 |
| -; MIPS64-NEXT: and $9, $7, $4 |
| -; MIPS64-NEXT: or $9, $9, $8 |
| -; MIPS64-NEXT: sc $9, 0($1) |
| -; MIPS64-NEXT: beqz $9, .LBB10_1 |
| +; MIPS64-NEXT: ll $8, 0($1) |
| +; MIPS64-NEXT: sltu $11, $8, $5 |
| +; MIPS64-NEXT: move $9, $8 |
| +; MIPS64-NEXT: movn $9, $5, $11 |
| +; MIPS64-NEXT: and $9, $9, $3 |
| +; MIPS64-NEXT: and $10, $8, $6 |
| +; MIPS64-NEXT: or $10, $10, $9 |
| +; MIPS64-NEXT: sc $10, 0($1) |
| +; MIPS64-NEXT: beqz $10, .LBB10_1 |
| ; MIPS64-NEXT: nop |
| ; MIPS64-NEXT: # %bb.2: # %entry |
| -; MIPS64-NEXT: and $6, $7, $3 |
| -; MIPS64-NEXT: srlv $6, $6, $2 |
| -; MIPS64-NEXT: seh $6, $6 |
| +; MIPS64-NEXT: and $7, $8, $3 |
| +; MIPS64-NEXT: srlv $7, $7, $2 |
| +; MIPS64-NEXT: seh $7, $7 |
| ; MIPS64-NEXT: # %bb.3: # %entry |
| -; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill |
| +; MIPS64-NEXT: sw $7, 12($sp) # 4-byte Folded Spill |
| ; MIPS64-NEXT: # %bb.4: # %entry |
| ; MIPS64-NEXT: sync |
| ; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload |
| @@ -4081,26 +4081,26 @@ define i8 @test_umax_8(i8* nocapture %ptr, i8 signext %val) { |
| ; MIPS64R6-NEXT: sll $2, $2, 3 |
| ; MIPS64R6-NEXT: ori $3, $zero, 255 |
| ; MIPS64R6-NEXT: sllv $3, $3, $2 |
| -; MIPS64R6-NEXT: nor $4, $zero, $3 |
| +; MIPS64R6-NEXT: nor $6, $zero, $3 |
| ; MIPS64R6-NEXT: sllv $5, $5, $2 |
| ; MIPS64R6-NEXT: .LBB10_1: # %entry |
| ; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 |
| -; MIPS64R6-NEXT: ll $7, 0($1) |
| -; MIPS64R6-NEXT: sltu $10, $7, $5 |
| -; MIPS64R6-NEXT: seleqz $8, $7, $10 |
| -; MIPS64R6-NEXT: selnez $10, $5, $10 |
| -; MIPS64R6-NEXT: or $8, $8, $10 |
| -; MIPS64R6-NEXT: and $8, $8, $3 |
| -; MIPS64R6-NEXT: and $9, $7, $4 |
| -; MIPS64R6-NEXT: or $9, $9, $8 |
| -; MIPS64R6-NEXT: sc $9, 0($1) |
| -; MIPS64R6-NEXT: beqzc $9, .LBB10_1 |
| +; MIPS64R6-NEXT: ll $8, 0($1) |
| +; MIPS64R6-NEXT: sltu $11, $8, $5 |
| +; MIPS64R6-NEXT: seleqz $9, $8, $11 |
| +; MIPS64R6-NEXT: selnez $11, $5, $11 |
| +; MIPS64R6-NEXT: or $9, $9, $11 |
| +; MIPS64R6-NEXT: and $9, $9, $3 |
| +; MIPS64R6-NEXT: and $10, $8, $6 |
| +; MIPS64R6-NEXT: or $10, $10, $9 |
| +; MIPS64R6-NEXT: sc $10, 0($1) |
| +; MIPS64R6-NEXT: beqzc $10, .LBB10_1 |
| ; MIPS64R6-NEXT: # %bb.2: # %entry |
| -; MIPS64R6-NEXT: and $6, $7, $3 |
| -; MIPS64R6-NEXT: srlv $6, $6, $2 |
| -; MIPS64R6-NEXT: seh $6, $6 |
| +; MIPS64R6-NEXT: and $7, $8, $3 |
| +; MIPS64R6-NEXT: srlv $7, $7, $2 |
| +; MIPS64R6-NEXT: seh $7, $7 |
| ; MIPS64R6-NEXT: # %bb.3: # %entry |
| -; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill |
| +; MIPS64R6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill |
| ; MIPS64R6-NEXT: # %bb.4: # %entry |
| ; MIPS64R6-NEXT: sync |
| ; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload |
| @@ -4119,28 +4119,28 @@ define i8 @test_umax_8(i8* nocapture %ptr, i8 signext %val) { |
| ; MIPS64EL-NEXT: sll $2, $2, 3 |
| ; MIPS64EL-NEXT: ori $3, $zero, 255 |
| ; MIPS64EL-NEXT: sllv $3, $3, $2 |
| -; MIPS64EL-NEXT: nor $4, $zero, $3 |
| +; MIPS64EL-NEXT: nor $6, $zero, $3 |
| ; MIPS64EL-NEXT: sllv $5, $5, $2 |
| ; MIPS64EL-NEXT: .LBB10_1: # %entry |
| ; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1 |
| -; MIPS64EL-NEXT: ll $7, 0($1) |
| -; MIPS64EL-NEXT: and $7, $7, $3 |
| -; MIPS64EL-NEXT: and $5, $5, $3 |
| -; MIPS64EL-NEXT: sltu $10, $7, $5 |
| -; MIPS64EL-NEXT: move $8, $7 |
| -; MIPS64EL-NEXT: movn $8, $5, $10 |
| +; MIPS64EL-NEXT: ll $8, 0($1) |
| ; MIPS64EL-NEXT: and $8, $8, $3 |
| -; MIPS64EL-NEXT: and $9, $7, $4 |
| -; MIPS64EL-NEXT: or $9, $9, $8 |
| -; MIPS64EL-NEXT: sc $9, 0($1) |
| -; MIPS64EL-NEXT: beqz $9, .LBB10_1 |
| +; MIPS64EL-NEXT: and $5, $5, $3 |
| +; MIPS64EL-NEXT: sltu $11, $8, $5 |
| +; MIPS64EL-NEXT: move $9, $8 |
| +; MIPS64EL-NEXT: movn $9, $5, $11 |
| +; MIPS64EL-NEXT: and $9, $9, $3 |
| +; MIPS64EL-NEXT: and $10, $8, $6 |
| +; MIPS64EL-NEXT: or $10, $10, $9 |
| +; MIPS64EL-NEXT: sc $10, 0($1) |
| +; MIPS64EL-NEXT: beqz $10, .LBB10_1 |
| ; MIPS64EL-NEXT: nop |
| ; MIPS64EL-NEXT: # %bb.2: # %entry |
| -; MIPS64EL-NEXT: and $6, $7, $3 |
| -; MIPS64EL-NEXT: srlv $6, $6, $2 |
| -; MIPS64EL-NEXT: seh $6, $6 |
| +; MIPS64EL-NEXT: and $7, $8, $3 |
| +; MIPS64EL-NEXT: srlv $7, $7, $2 |
| +; MIPS64EL-NEXT: seh $7, $7 |
| ; MIPS64EL-NEXT: # %bb.3: # %entry |
| -; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill |
| +; MIPS64EL-NEXT: sw $7, 12($sp) # 4-byte Folded Spill |
| ; MIPS64EL-NEXT: # %bb.4: # %entry |
| ; MIPS64EL-NEXT: sync |
| ; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload |
| @@ -4160,28 +4160,28 @@ define i8 @test_umax_8(i8* nocapture %ptr, i8 signext %val) { |
| ; MIPS64ELR6-NEXT: sll $2, $2, 3 |
| ; MIPS64ELR6-NEXT: ori $3, $zero, 255 |
| ; MIPS64ELR6-NEXT: sllv $3, $3, $2 |
| -; MIPS64ELR6-NEXT: nor $4, $zero, $3 |
| +; MIPS64ELR6-NEXT: nor $6, $zero, $3 |
| ; MIPS64ELR6-NEXT: sllv $5, $5, $2 |
| ; MIPS64ELR6-NEXT: .LBB10_1: # %entry |
| ; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1 |
| -; MIPS64ELR6-NEXT: ll $7, 0($1) |
| -; MIPS64ELR6-NEXT: and $7, $7, $3 |
| -; MIPS64ELR6-NEXT: and $5, $5, $3 |
| -; MIPS64ELR6-NEXT: sltu $10, $7, $5 |
| -; MIPS64ELR6-NEXT: seleqz $8, $7, $10 |
| -; MIPS64ELR6-NEXT: selnez $10, $5, $10 |
| -; MIPS64ELR6-NEXT: or $8, $8, $10 |
| +; MIPS64ELR6-NEXT: ll $8, 0($1) |
| ; MIPS64ELR6-NEXT: and $8, $8, $3 |
| -; MIPS64ELR6-NEXT: and $9, $7, $4 |
| -; MIPS64ELR6-NEXT: or $9, $9, $8 |
| -; MIPS64ELR6-NEXT: sc $9, 0($1) |
| -; MIPS64ELR6-NEXT: beqzc $9, .LBB10_1 |
| +; MIPS64ELR6-NEXT: and $5, $5, $3 |
| +; MIPS64ELR6-NEXT: sltu $11, $8, $5 |
| +; MIPS64ELR6-NEXT: seleqz $9, $8, $11 |
| +; MIPS64ELR6-NEXT: selnez $11, $5, $11 |
| +; MIPS64ELR6-NEXT: or $9, $9, $11 |
| +; MIPS64ELR6-NEXT: and $9, $9, $3 |
| +; MIPS64ELR6-NEXT: and $10, $8, $6 |
| +; MIPS64ELR6-NEXT: or $10, $10, $9 |
| +; MIPS64ELR6-NEXT: sc $10, 0($1) |
| +; MIPS64ELR6-NEXT: beqzc $10, .LBB10_1 |
| ; MIPS64ELR6-NEXT: # %bb.2: # %entry |
| -; MIPS64ELR6-NEXT: and $6, $7, $3 |
| -; MIPS64ELR6-NEXT: srlv $6, $6, $2 |
| -; MIPS64ELR6-NEXT: seh $6, $6 |
| +; MIPS64ELR6-NEXT: and $7, $8, $3 |
| +; MIPS64ELR6-NEXT: srlv $7, $7, $2 |
| +; MIPS64ELR6-NEXT: seh $7, $7 |
| ; MIPS64ELR6-NEXT: # %bb.3: # %entry |
| -; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill |
| +; MIPS64ELR6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill |
| ; MIPS64ELR6-NEXT: # %bb.4: # %entry |
| ; MIPS64ELR6-NEXT: sync |
| ; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload |
| @@ -4522,26 +4522,26 @@ define i8 @test_umin_8(i8* nocapture %ptr, i8 signext %val) { |
| ; MIPS64-NEXT: sll $2, $2, 3 |
| ; MIPS64-NEXT: ori $3, $zero, 255 |
| ; MIPS64-NEXT: sllv $3, $3, $2 |
| -; MIPS64-NEXT: nor $4, $zero, $3 |
| +; MIPS64-NEXT: nor $6, $zero, $3 |
| ; MIPS64-NEXT: sllv $5, $5, $2 |
| ; MIPS64-NEXT: .LBB11_1: # %entry |
| ; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 |
| -; MIPS64-NEXT: ll $7, 0($1) |
| -; MIPS64-NEXT: sltu $10, $7, $5 |
| -; MIPS64-NEXT: move $8, $7 |
| -; MIPS64-NEXT: movz $8, $5, $10 |
| -; MIPS64-NEXT: and $8, $8, $3 |
| -; MIPS64-NEXT: and $9, $7, $4 |
| -; MIPS64-NEXT: or $9, $9, $8 |
| -; MIPS64-NEXT: sc $9, 0($1) |
| -; MIPS64-NEXT: beqz $9, .LBB11_1 |
| +; MIPS64-NEXT: ll $8, 0($1) |
| +; MIPS64-NEXT: sltu $11, $8, $5 |
| +; MIPS64-NEXT: move $9, $8 |
| +; MIPS64-NEXT: movz $9, $5, $11 |
| +; MIPS64-NEXT: and $9, $9, $3 |
| +; MIPS64-NEXT: and $10, $8, $6 |
| +; MIPS64-NEXT: or $10, $10, $9 |
| +; MIPS64-NEXT: sc $10, 0($1) |
| +; MIPS64-NEXT: beqz $10, .LBB11_1 |
| ; MIPS64-NEXT: nop |
| ; MIPS64-NEXT: # %bb.2: # %entry |
| -; MIPS64-NEXT: and $6, $7, $3 |
| -; MIPS64-NEXT: srlv $6, $6, $2 |
| -; MIPS64-NEXT: seh $6, $6 |
| +; MIPS64-NEXT: and $7, $8, $3 |
| +; MIPS64-NEXT: srlv $7, $7, $2 |
| +; MIPS64-NEXT: seh $7, $7 |
| ; MIPS64-NEXT: # %bb.3: # %entry |
| -; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill |
| +; MIPS64-NEXT: sw $7, 12($sp) # 4-byte Folded Spill |
| ; MIPS64-NEXT: # %bb.4: # %entry |
| ; MIPS64-NEXT: sync |
| ; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload |
| @@ -4562,26 +4562,26 @@ define i8 @test_umin_8(i8* nocapture %ptr, i8 signext %val) { |
| ; MIPS64R6-NEXT: sll $2, $2, 3 |
| ; MIPS64R6-NEXT: ori $3, $zero, 255 |
| ; MIPS64R6-NEXT: sllv $3, $3, $2 |
| -; MIPS64R6-NEXT: nor $4, $zero, $3 |
| +; MIPS64R6-NEXT: nor $6, $zero, $3 |
| ; MIPS64R6-NEXT: sllv $5, $5, $2 |
| ; MIPS64R6-NEXT: .LBB11_1: # %entry |
| ; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 |
| -; MIPS64R6-NEXT: ll $7, 0($1) |
| -; MIPS64R6-NEXT: sltu $10, $7, $5 |
| -; MIPS64R6-NEXT: selnez $8, $7, $10 |
| -; MIPS64R6-NEXT: seleqz $10, $5, $10 |
| -; MIPS64R6-NEXT: or $8, $8, $10 |
| -; MIPS64R6-NEXT: and $8, $8, $3 |
| -; MIPS64R6-NEXT: and $9, $7, $4 |
| -; MIPS64R6-NEXT: or $9, $9, $8 |
| -; MIPS64R6-NEXT: sc $9, 0($1) |
| -; MIPS64R6-NEXT: beqzc $9, .LBB11_1 |
| +; MIPS64R6-NEXT: ll $8, 0($1) |
| +; MIPS64R6-NEXT: sltu $11, $8, $5 |
| +; MIPS64R6-NEXT: selnez $9, $8, $11 |
| +; MIPS64R6-NEXT: seleqz $11, $5, $11 |
| +; MIPS64R6-NEXT: or $9, $9, $11 |
| +; MIPS64R6-NEXT: and $9, $9, $3 |
| +; MIPS64R6-NEXT: and $10, $8, $6 |
| +; MIPS64R6-NEXT: or $10, $10, $9 |
| +; MIPS64R6-NEXT: sc $10, 0($1) |
| +; MIPS64R6-NEXT: beqzc $10, .LBB11_1 |
| ; MIPS64R6-NEXT: # %bb.2: # %entry |
| -; MIPS64R6-NEXT: and $6, $7, $3 |
| -; MIPS64R6-NEXT: srlv $6, $6, $2 |
| -; MIPS64R6-NEXT: seh $6, $6 |
| +; MIPS64R6-NEXT: and $7, $8, $3 |
| +; MIPS64R6-NEXT: srlv $7, $7, $2 |
| +; MIPS64R6-NEXT: seh $7, $7 |
| ; MIPS64R6-NEXT: # %bb.3: # %entry |
| -; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill |
| +; MIPS64R6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill |
| ; MIPS64R6-NEXT: # %bb.4: # %entry |
| ; MIPS64R6-NEXT: sync |
| ; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload |
| @@ -4600,28 +4600,28 @@ define i8 @test_umin_8(i8* nocapture %ptr, i8 signext %val) { |
| ; MIPS64EL-NEXT: sll $2, $2, 3 |
| ; MIPS64EL-NEXT: ori $3, $zero, 255 |
| ; MIPS64EL-NEXT: sllv $3, $3, $2 |
| -; MIPS64EL-NEXT: nor $4, $zero, $3 |
| +; MIPS64EL-NEXT: nor $6, $zero, $3 |
| ; MIPS64EL-NEXT: sllv $5, $5, $2 |
| ; MIPS64EL-NEXT: .LBB11_1: # %entry |
| ; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1 |
| -; MIPS64EL-NEXT: ll $7, 0($1) |
| -; MIPS64EL-NEXT: and $7, $7, $3 |
| -; MIPS64EL-NEXT: and $5, $5, $3 |
| -; MIPS64EL-NEXT: sltu $10, $7, $5 |
| -; MIPS64EL-NEXT: move $8, $7 |
| -; MIPS64EL-NEXT: movz $8, $5, $10 |
| +; MIPS64EL-NEXT: ll $8, 0($1) |
| ; MIPS64EL-NEXT: and $8, $8, $3 |
| -; MIPS64EL-NEXT: and $9, $7, $4 |
| -; MIPS64EL-NEXT: or $9, $9, $8 |
| -; MIPS64EL-NEXT: sc $9, 0($1) |
| -; MIPS64EL-NEXT: beqz $9, .LBB11_1 |
| +; MIPS64EL-NEXT: and $5, $5, $3 |
| +; MIPS64EL-NEXT: sltu $11, $8, $5 |
| +; MIPS64EL-NEXT: move $9, $8 |
| +; MIPS64EL-NEXT: movz $9, $5, $11 |
| +; MIPS64EL-NEXT: and $9, $9, $3 |
| +; MIPS64EL-NEXT: and $10, $8, $6 |
| +; MIPS64EL-NEXT: or $10, $10, $9 |
| +; MIPS64EL-NEXT: sc $10, 0($1) |
| +; MIPS64EL-NEXT: beqz $10, .LBB11_1 |
| ; MIPS64EL-NEXT: nop |
| ; MIPS64EL-NEXT: # %bb.2: # %entry |
| -; MIPS64EL-NEXT: and $6, $7, $3 |
| -; MIPS64EL-NEXT: srlv $6, $6, $2 |
| -; MIPS64EL-NEXT: seh $6, $6 |
| +; MIPS64EL-NEXT: and $7, $8, $3 |
| +; MIPS64EL-NEXT: srlv $7, $7, $2 |
| +; MIPS64EL-NEXT: seh $7, $7 |
| ; MIPS64EL-NEXT: # %bb.3: # %entry |
| -; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill |
| +; MIPS64EL-NEXT: sw $7, 12($sp) # 4-byte Folded Spill |
| ; MIPS64EL-NEXT: # %bb.4: # %entry |
| ; MIPS64EL-NEXT: sync |
| ; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload |
| @@ -4641,28 +4641,28 @@ define i8 @test_umin_8(i8* nocapture %ptr, i8 signext %val) { |
| ; MIPS64ELR6-NEXT: sll $2, $2, 3 |
| ; MIPS64ELR6-NEXT: ori $3, $zero, 255 |
| ; MIPS64ELR6-NEXT: sllv $3, $3, $2 |
| -; MIPS64ELR6-NEXT: nor $4, $zero, $3 |
| +; MIPS64ELR6-NEXT: nor $6, $zero, $3 |
| ; MIPS64ELR6-NEXT: sllv $5, $5, $2 |
| ; MIPS64ELR6-NEXT: .LBB11_1: # %entry |
| ; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1 |
| -; MIPS64ELR6-NEXT: ll $7, 0($1) |
| -; MIPS64ELR6-NEXT: and $7, $7, $3 |
| -; MIPS64ELR6-NEXT: and $5, $5, $3 |
| -; MIPS64ELR6-NEXT: sltu $10, $7, $5 |
| -; MIPS64ELR6-NEXT: selnez $8, $7, $10 |
| -; MIPS64ELR6-NEXT: seleqz $10, $5, $10 |
| -; MIPS64ELR6-NEXT: or $8, $8, $10 |
| +; MIPS64ELR6-NEXT: ll $8, 0($1) |
| ; MIPS64ELR6-NEXT: and $8, $8, $3 |
| -; MIPS64ELR6-NEXT: and $9, $7, $4 |
| -; MIPS64ELR6-NEXT: or $9, $9, $8 |
| -; MIPS64ELR6-NEXT: sc $9, 0($1) |
| -; MIPS64ELR6-NEXT: beqzc $9, .LBB11_1 |
| +; MIPS64ELR6-NEXT: and $5, $5, $3 |
| +; MIPS64ELR6-NEXT: sltu $11, $8, $5 |
| +; MIPS64ELR6-NEXT: selnez $9, $8, $11 |
| +; MIPS64ELR6-NEXT: seleqz $11, $5, $11 |
| +; MIPS64ELR6-NEXT: or $9, $9, $11 |
| +; MIPS64ELR6-NEXT: and $9, $9, $3 |
| +; MIPS64ELR6-NEXT: and $10, $8, $6 |
| +; MIPS64ELR6-NEXT: or $10, $10, $9 |
| +; MIPS64ELR6-NEXT: sc $10, 0($1) |
| +; MIPS64ELR6-NEXT: beqzc $10, .LBB11_1 |
| ; MIPS64ELR6-NEXT: # %bb.2: # %entry |
| -; MIPS64ELR6-NEXT: and $6, $7, $3 |
| -; MIPS64ELR6-NEXT: srlv $6, $6, $2 |
| -; MIPS64ELR6-NEXT: seh $6, $6 |
| +; MIPS64ELR6-NEXT: and $7, $8, $3 |
| +; MIPS64ELR6-NEXT: srlv $7, $7, $2 |
| +; MIPS64ELR6-NEXT: seh $7, $7 |
| ; MIPS64ELR6-NEXT: # %bb.3: # %entry |
| -; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill |
| +; MIPS64ELR6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill |
| ; MIPS64ELR6-NEXT: # %bb.4: # %entry |
| ; MIPS64ELR6-NEXT: sync |
| ; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload |
| diff --git a/llvm/test/CodeGen/Mips/atomic.ll b/llvm/test/CodeGen/Mips/atomic.ll |
| index 59ff83e4969..3846fda47b1 100644 |
| --- a/llvm/test/CodeGen/Mips/atomic.ll |
| +++ b/llvm/test/CodeGen/Mips/atomic.ll |
| @@ -2559,28 +2559,28 @@ define signext i8 @AtomicLoadAdd8(i8 signext %incr) nounwind { |
| ; MIPS64R6O0-NEXT: ld $1, %got_disp(y)($1) |
| ; MIPS64R6O0-NEXT: daddiu $2, $zero, -4 |
| ; MIPS64R6O0-NEXT: and $2, $1, $2 |
| -; MIPS64R6O0-NEXT: andi $1, $1, 3 |
| -; MIPS64R6O0-NEXT: xori $1, $1, 3 |
| -; MIPS64R6O0-NEXT: sll $1, $1, 3 |
| -; MIPS64R6O0-NEXT: ori $3, $zero, 255 |
| -; MIPS64R6O0-NEXT: sllv $3, $3, $1 |
| -; MIPS64R6O0-NEXT: nor $5, $zero, $3 |
| -; MIPS64R6O0-NEXT: sllv $4, $4, $1 |
| +; MIPS64R6O0-NEXT: andi $3, $1, 3 |
| +; MIPS64R6O0-NEXT: xori $3, $3, 3 |
| +; MIPS64R6O0-NEXT: sll $3, $3, 3 |
| +; MIPS64R6O0-NEXT: ori $5, $zero, 255 |
| +; MIPS64R6O0-NEXT: sllv $5, $5, $3 |
| +; MIPS64R6O0-NEXT: nor $6, $zero, $5 |
| +; MIPS64R6O0-NEXT: sllv $4, $4, $3 |
| ; MIPS64R6O0-NEXT: .LBB8_1: # %entry |
| ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 |
| -; MIPS64R6O0-NEXT: ll $7, 0($2) |
| -; MIPS64R6O0-NEXT: addu $8, $7, $4 |
| -; MIPS64R6O0-NEXT: and $8, $8, $3 |
| -; MIPS64R6O0-NEXT: and $9, $7, $5 |
| -; MIPS64R6O0-NEXT: or $9, $9, $8 |
| -; MIPS64R6O0-NEXT: sc $9, 0($2) |
| -; MIPS64R6O0-NEXT: beqzc $9, .LBB8_1 |
| +; MIPS64R6O0-NEXT: ll $8, 0($2) |
| +; MIPS64R6O0-NEXT: addu $9, $8, $4 |
| +; MIPS64R6O0-NEXT: and $9, $9, $5 |
| +; MIPS64R6O0-NEXT: and $10, $8, $6 |
| +; MIPS64R6O0-NEXT: or $10, $10, $9 |
| +; MIPS64R6O0-NEXT: sc $10, 0($2) |
| +; MIPS64R6O0-NEXT: beqzc $10, .LBB8_1 |
| ; MIPS64R6O0-NEXT: # %bb.2: # %entry |
| -; MIPS64R6O0-NEXT: and $6, $7, $3 |
| -; MIPS64R6O0-NEXT: srlv $6, $6, $1 |
| -; MIPS64R6O0-NEXT: seb $6, $6 |
| +; MIPS64R6O0-NEXT: and $7, $8, $5 |
| +; MIPS64R6O0-NEXT: srlv $7, $7, $3 |
| +; MIPS64R6O0-NEXT: seb $7, $7 |
| ; MIPS64R6O0-NEXT: # %bb.3: # %entry |
| -; MIPS64R6O0-NEXT: sw $6, 12($sp) # 4-byte Folded Spill |
| +; MIPS64R6O0-NEXT: sw $7, 12($sp) # 4-byte Folded Spill |
| ; MIPS64R6O0-NEXT: # %bb.4: # %entry |
| ; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload |
| ; MIPS64R6O0-NEXT: seb $2, $1 |
| @@ -3075,28 +3075,28 @@ define signext i8 @AtomicLoadSub8(i8 signext %incr) nounwind { |
| ; MIPS64R6O0-NEXT: ld $1, %got_disp(y)($1) |
| ; MIPS64R6O0-NEXT: daddiu $2, $zero, -4 |
| ; MIPS64R6O0-NEXT: and $2, $1, $2 |
| -; MIPS64R6O0-NEXT: andi $1, $1, 3 |
| -; MIPS64R6O0-NEXT: xori $1, $1, 3 |
| -; MIPS64R6O0-NEXT: sll $1, $1, 3 |
| -; MIPS64R6O0-NEXT: ori $3, $zero, 255 |
| -; MIPS64R6O0-NEXT: sllv $3, $3, $1 |
| -; MIPS64R6O0-NEXT: nor $5, $zero, $3 |
| -; MIPS64R6O0-NEXT: sllv $4, $4, $1 |
| +; MIPS64R6O0-NEXT: andi $3, $1, 3 |
| +; MIPS64R6O0-NEXT: xori $3, $3, 3 |
| +; MIPS64R6O0-NEXT: sll $3, $3, 3 |
| +; MIPS64R6O0-NEXT: ori $5, $zero, 255 |
| +; MIPS64R6O0-NEXT: sllv $5, $5, $3 |
| +; MIPS64R6O0-NEXT: nor $6, $zero, $5 |
| +; MIPS64R6O0-NEXT: sllv $4, $4, $3 |
| ; MIPS64R6O0-NEXT: .LBB9_1: # %entry |
| ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 |
| -; MIPS64R6O0-NEXT: ll $7, 0($2) |
| -; MIPS64R6O0-NEXT: subu $8, $7, $4 |
| -; MIPS64R6O0-NEXT: and $8, $8, $3 |
| -; MIPS64R6O0-NEXT: and $9, $7, $5 |
| -; MIPS64R6O0-NEXT: or $9, $9, $8 |
| -; MIPS64R6O0-NEXT: sc $9, 0($2) |
| -; MIPS64R6O0-NEXT: beqzc $9, .LBB9_1 |
| +; MIPS64R6O0-NEXT: ll $8, 0($2) |
| +; MIPS64R6O0-NEXT: subu $9, $8, $4 |
| +; MIPS64R6O0-NEXT: and $9, $9, $5 |
| +; MIPS64R6O0-NEXT: and $10, $8, $6 |
| +; MIPS64R6O0-NEXT: or $10, $10, $9 |
| +; MIPS64R6O0-NEXT: sc $10, 0($2) |
| +; MIPS64R6O0-NEXT: beqzc $10, .LBB9_1 |
| ; MIPS64R6O0-NEXT: # %bb.2: # %entry |
| -; MIPS64R6O0-NEXT: and $6, $7, $3 |
| -; MIPS64R6O0-NEXT: srlv $6, $6, $1 |
| -; MIPS64R6O0-NEXT: seb $6, $6 |
| +; MIPS64R6O0-NEXT: and $7, $8, $5 |
| +; MIPS64R6O0-NEXT: srlv $7, $7, $3 |
| +; MIPS64R6O0-NEXT: seb $7, $7 |
| ; MIPS64R6O0-NEXT: # %bb.3: # %entry |
| -; MIPS64R6O0-NEXT: sw $6, 12($sp) # 4-byte Folded Spill |
| +; MIPS64R6O0-NEXT: sw $7, 12($sp) # 4-byte Folded Spill |
| ; MIPS64R6O0-NEXT: # %bb.4: # %entry |
| ; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload |
| ; MIPS64R6O0-NEXT: seb $2, $1 |
| @@ -3601,29 +3601,29 @@ define signext i8 @AtomicLoadNand8(i8 signext %incr) nounwind { |
| ; MIPS64R6O0-NEXT: ld $1, %got_disp(y)($1) |
| ; MIPS64R6O0-NEXT: daddiu $2, $zero, -4 |
| ; MIPS64R6O0-NEXT: and $2, $1, $2 |
| -; MIPS64R6O0-NEXT: andi $1, $1, 3 |
| -; MIPS64R6O0-NEXT: xori $1, $1, 3 |
| -; MIPS64R6O0-NEXT: sll $1, $1, 3 |
| -; MIPS64R6O0-NEXT: ori $3, $zero, 255 |
| -; MIPS64R6O0-NEXT: sllv $3, $3, $1 |
| -; MIPS64R6O0-NEXT: nor $5, $zero, $3 |
| -; MIPS64R6O0-NEXT: sllv $4, $4, $1 |
| +; MIPS64R6O0-NEXT: andi $3, $1, 3 |
| +; MIPS64R6O0-NEXT: xori $3, $3, 3 |
| +; MIPS64R6O0-NEXT: sll $3, $3, 3 |
| +; MIPS64R6O0-NEXT: ori $5, $zero, 255 |
| +; MIPS64R6O0-NEXT: sllv $5, $5, $3 |
| +; MIPS64R6O0-NEXT: nor $6, $zero, $5 |
| +; MIPS64R6O0-NEXT: sllv $4, $4, $3 |
| ; MIPS64R6O0-NEXT: .LBB10_1: # %entry |
| ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 |
| -; MIPS64R6O0-NEXT: ll $7, 0($2) |
| -; MIPS64R6O0-NEXT: and $8, $7, $4 |
| -; MIPS64R6O0-NEXT: nor $8, $zero, $8 |
| -; MIPS64R6O0-NEXT: and $8, $8, $3 |
| -; MIPS64R6O0-NEXT: and $9, $7, $5 |
| -; MIPS64R6O0-NEXT: or $9, $9, $8 |
| -; MIPS64R6O0-NEXT: sc $9, 0($2) |
| -; MIPS64R6O0-NEXT: beqzc $9, .LBB10_1 |
| +; MIPS64R6O0-NEXT: ll $8, 0($2) |
| +; MIPS64R6O0-NEXT: and $9, $8, $4 |
| +; MIPS64R6O0-NEXT: nor $9, $zero, $9 |
| +; MIPS64R6O0-NEXT: and $9, $9, $5 |
| +; MIPS64R6O0-NEXT: and $10, $8, $6 |
| +; MIPS64R6O0-NEXT: or $10, $10, $9 |
| +; MIPS64R6O0-NEXT: sc $10, 0($2) |
| +; MIPS64R6O0-NEXT: beqzc $10, .LBB10_1 |
| ; MIPS64R6O0-NEXT: # %bb.2: # %entry |
| -; MIPS64R6O0-NEXT: and $6, $7, $3 |
| -; MIPS64R6O0-NEXT: srlv $6, $6, $1 |
| -; MIPS64R6O0-NEXT: seb $6, $6 |
| +; MIPS64R6O0-NEXT: and $7, $8, $5 |
| +; MIPS64R6O0-NEXT: srlv $7, $7, $3 |
| +; MIPS64R6O0-NEXT: seb $7, $7 |
| ; MIPS64R6O0-NEXT: # %bb.3: # %entry |
| -; MIPS64R6O0-NEXT: sw $6, 12($sp) # 4-byte Folded Spill |
| +; MIPS64R6O0-NEXT: sw $7, 12($sp) # 4-byte Folded Spill |
| ; MIPS64R6O0-NEXT: # %bb.4: # %entry |
| ; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload |
| ; MIPS64R6O0-NEXT: seb $2, $1 |
| @@ -4115,27 +4115,27 @@ define signext i8 @AtomicSwap8(i8 signext %newval) nounwind { |
| ; MIPS64R6O0-NEXT: ld $1, %got_disp(y)($1) |
| ; MIPS64R6O0-NEXT: daddiu $2, $zero, -4 |
| ; MIPS64R6O0-NEXT: and $2, $1, $2 |
| -; MIPS64R6O0-NEXT: andi $1, $1, 3 |
| -; MIPS64R6O0-NEXT: xori $1, $1, 3 |
| -; MIPS64R6O0-NEXT: sll $1, $1, 3 |
| -; MIPS64R6O0-NEXT: ori $3, $zero, 255 |
| -; MIPS64R6O0-NEXT: sllv $3, $3, $1 |
| -; MIPS64R6O0-NEXT: nor $5, $zero, $3 |
| -; MIPS64R6O0-NEXT: sllv $4, $4, $1 |
| +; MIPS64R6O0-NEXT: andi $3, $1, 3 |
| +; MIPS64R6O0-NEXT: xori $3, $3, 3 |
| +; MIPS64R6O0-NEXT: sll $3, $3, 3 |
| +; MIPS64R6O0-NEXT: ori $5, $zero, 255 |
| +; MIPS64R6O0-NEXT: sllv $5, $5, $3 |
| +; MIPS64R6O0-NEXT: nor $6, $zero, $5 |
| +; MIPS64R6O0-NEXT: sllv $4, $4, $3 |
| ; MIPS64R6O0-NEXT: .LBB11_1: # %entry |
| ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 |
| -; MIPS64R6O0-NEXT: ll $7, 0($2) |
| -; MIPS64R6O0-NEXT: and $8, $4, $3 |
| -; MIPS64R6O0-NEXT: and $9, $7, $5 |
| -; MIPS64R6O0-NEXT: or $9, $9, $8 |
| -; MIPS64R6O0-NEXT: sc $9, 0($2) |
| -; MIPS64R6O0-NEXT: beqzc $9, .LBB11_1 |
| +; MIPS64R6O0-NEXT: ll $8, 0($2) |
| +; MIPS64R6O0-NEXT: and $9, $4, $5 |
| +; MIPS64R6O0-NEXT: and $10, $8, $6 |
| +; MIPS64R6O0-NEXT: or $10, $10, $9 |
| +; MIPS64R6O0-NEXT: sc $10, 0($2) |
| +; MIPS64R6O0-NEXT: beqzc $10, .LBB11_1 |
| ; MIPS64R6O0-NEXT: # %bb.2: # %entry |
| -; MIPS64R6O0-NEXT: and $6, $7, $3 |
| -; MIPS64R6O0-NEXT: srlv $6, $6, $1 |
| -; MIPS64R6O0-NEXT: seb $6, $6 |
| +; MIPS64R6O0-NEXT: and $7, $8, $5 |
| +; MIPS64R6O0-NEXT: srlv $7, $7, $3 |
| +; MIPS64R6O0-NEXT: seb $7, $7 |
| ; MIPS64R6O0-NEXT: # %bb.3: # %entry |
| -; MIPS64R6O0-NEXT: sw $6, 12($sp) # 4-byte Folded Spill |
| +; MIPS64R6O0-NEXT: sw $7, 12($sp) # 4-byte Folded Spill |
| ; MIPS64R6O0-NEXT: # %bb.4: # %entry |
| ; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload |
| ; MIPS64R6O0-NEXT: seb $2, $1 |
| @@ -4666,32 +4666,32 @@ define signext i8 @AtomicCmpSwap8(i8 signext %oldval, i8 signext %newval) nounwi |
| ; MIPS64R6O0-NEXT: ld $1, %got_disp(y)($1) |
| ; MIPS64R6O0-NEXT: daddiu $2, $zero, -4 |
| ; MIPS64R6O0-NEXT: and $2, $1, $2 |
| -; MIPS64R6O0-NEXT: andi $1, $1, 3 |
| -; MIPS64R6O0-NEXT: xori $1, $1, 3 |
| -; MIPS64R6O0-NEXT: sll $1, $1, 3 |
| -; MIPS64R6O0-NEXT: ori $3, $zero, 255 |
| -; MIPS64R6O0-NEXT: sllv $3, $3, $1 |
| -; MIPS64R6O0-NEXT: nor $6, $zero, $3 |
| +; MIPS64R6O0-NEXT: andi $3, $1, 3 |
| +; MIPS64R6O0-NEXT: xori $3, $3, 3 |
| +; MIPS64R6O0-NEXT: sll $3, $3, 3 |
| +; MIPS64R6O0-NEXT: ori $6, $zero, 255 |
| +; MIPS64R6O0-NEXT: sllv $6, $6, $3 |
| +; MIPS64R6O0-NEXT: nor $7, $zero, $6 |
| ; MIPS64R6O0-NEXT: andi $4, $4, 255 |
| -; MIPS64R6O0-NEXT: sllv $4, $4, $1 |
| +; MIPS64R6O0-NEXT: sllv $4, $4, $3 |
| ; MIPS64R6O0-NEXT: andi $5, $5, 255 |
| -; MIPS64R6O0-NEXT: sllv $5, $5, $1 |
| +; MIPS64R6O0-NEXT: sllv $5, $5, $3 |
| ; MIPS64R6O0-NEXT: .LBB12_1: # %entry |
| ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 |
| -; MIPS64R6O0-NEXT: ll $8, 0($2) |
| -; MIPS64R6O0-NEXT: and $9, $8, $3 |
| -; MIPS64R6O0-NEXT: bnec $9, $4, .LBB12_3 |
| +; MIPS64R6O0-NEXT: ll $9, 0($2) |
| +; MIPS64R6O0-NEXT: and $10, $9, $6 |
| +; MIPS64R6O0-NEXT: bnec $10, $4, .LBB12_3 |
| ; MIPS64R6O0-NEXT: # %bb.2: # %entry |
| ; MIPS64R6O0-NEXT: # in Loop: Header=BB12_1 Depth=1 |
| -; MIPS64R6O0-NEXT: and $8, $8, $6 |
| -; MIPS64R6O0-NEXT: or $8, $8, $5 |
| -; MIPS64R6O0-NEXT: sc $8, 0($2) |
| -; MIPS64R6O0-NEXT: beqzc $8, .LBB12_1 |
| +; MIPS64R6O0-NEXT: and $9, $9, $7 |
| +; MIPS64R6O0-NEXT: or $9, $9, $5 |
| +; MIPS64R6O0-NEXT: sc $9, 0($2) |
| +; MIPS64R6O0-NEXT: beqzc $9, .LBB12_1 |
| ; MIPS64R6O0-NEXT: .LBB12_3: # %entry |
| -; MIPS64R6O0-NEXT: srlv $7, $9, $1 |
| -; MIPS64R6O0-NEXT: seb $7, $7 |
| +; MIPS64R6O0-NEXT: srlv $8, $10, $3 |
| +; MIPS64R6O0-NEXT: seb $8, $8 |
| ; MIPS64R6O0-NEXT: # %bb.4: # %entry |
| -; MIPS64R6O0-NEXT: sw $7, 12($sp) # 4-byte Folded Spill |
| +; MIPS64R6O0-NEXT: sw $8, 12($sp) # 4-byte Folded Spill |
| ; MIPS64R6O0-NEXT: # %bb.5: # %entry |
| ; MIPS64R6O0-NEXT: lw $2, 12($sp) # 4-byte Folded Reload |
| ; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 |
| @@ -5236,28 +5236,28 @@ define i1 @AtomicCmpSwapRes8(i8* %ptr, i8 signext %oldval, i8 signext %newval) n |
| ; MIPS64R6O0-NEXT: sll $2, $2, 3 |
| ; MIPS64R6O0-NEXT: ori $3, $zero, 255 |
| ; MIPS64R6O0-NEXT: sllv $3, $3, $2 |
| -; MIPS64R6O0-NEXT: nor $4, $zero, $3 |
| -; MIPS64R6O0-NEXT: andi $7, $5, 255 |
| -; MIPS64R6O0-NEXT: sllv $7, $7, $2 |
| +; MIPS64R6O0-NEXT: nor $7, $zero, $3 |
| +; MIPS64R6O0-NEXT: andi $8, $5, 255 |
| +; MIPS64R6O0-NEXT: sllv $8, $8, $2 |
| ; MIPS64R6O0-NEXT: andi $6, $6, 255 |
| ; MIPS64R6O0-NEXT: sllv $6, $6, $2 |
| ; MIPS64R6O0-NEXT: .LBB13_1: # %entry |
| ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 |
| -; MIPS64R6O0-NEXT: ll $9, 0($1) |
| -; MIPS64R6O0-NEXT: and $10, $9, $3 |
| -; MIPS64R6O0-NEXT: bnec $10, $7, .LBB13_3 |
| +; MIPS64R6O0-NEXT: ll $10, 0($1) |
| +; MIPS64R6O0-NEXT: and $11, $10, $3 |
| +; MIPS64R6O0-NEXT: bnec $11, $8, .LBB13_3 |
| ; MIPS64R6O0-NEXT: # %bb.2: # %entry |
| ; MIPS64R6O0-NEXT: # in Loop: Header=BB13_1 Depth=1 |
| -; MIPS64R6O0-NEXT: and $9, $9, $4 |
| -; MIPS64R6O0-NEXT: or $9, $9, $6 |
| -; MIPS64R6O0-NEXT: sc $9, 0($1) |
| -; MIPS64R6O0-NEXT: beqzc $9, .LBB13_1 |
| +; MIPS64R6O0-NEXT: and $10, $10, $7 |
| +; MIPS64R6O0-NEXT: or $10, $10, $6 |
| +; MIPS64R6O0-NEXT: sc $10, 0($1) |
| +; MIPS64R6O0-NEXT: beqzc $10, .LBB13_1 |
| ; MIPS64R6O0-NEXT: .LBB13_3: # %entry |
| -; MIPS64R6O0-NEXT: srlv $8, $10, $2 |
| -; MIPS64R6O0-NEXT: seb $8, $8 |
| +; MIPS64R6O0-NEXT: srlv $9, $11, $2 |
| +; MIPS64R6O0-NEXT: seb $9, $9 |
| ; MIPS64R6O0-NEXT: # %bb.4: # %entry |
| ; MIPS64R6O0-NEXT: sw $5, 12($sp) # 4-byte Folded Spill |
| -; MIPS64R6O0-NEXT: sw $8, 8($sp) # 4-byte Folded Spill |
| +; MIPS64R6O0-NEXT: sw $9, 8($sp) # 4-byte Folded Spill |
| ; MIPS64R6O0-NEXT: # %bb.5: # %entry |
| ; MIPS64R6O0-NEXT: lw $1, 8($sp) # 4-byte Folded Reload |
| ; MIPS64R6O0-NEXT: lw $2, 12($sp) # 4-byte Folded Reload |
| @@ -5775,28 +5775,28 @@ define signext i16 @AtomicLoadAdd16(i16 signext %incr) nounwind { |
| ; MIPS64R6O0-NEXT: ld $1, %got_disp(z)($1) |
| ; MIPS64R6O0-NEXT: daddiu $2, $zero, -4 |
| ; MIPS64R6O0-NEXT: and $2, $1, $2 |
| -; MIPS64R6O0-NEXT: andi $1, $1, 3 |
| -; MIPS64R6O0-NEXT: xori $1, $1, 2 |
| -; MIPS64R6O0-NEXT: sll $1, $1, 3 |
| -; MIPS64R6O0-NEXT: ori $3, $zero, 65535 |
| -; MIPS64R6O0-NEXT: sllv $3, $3, $1 |
| -; MIPS64R6O0-NEXT: nor $5, $zero, $3 |
| -; MIPS64R6O0-NEXT: sllv $4, $4, $1 |
| +; MIPS64R6O0-NEXT: andi $3, $1, 3 |
| +; MIPS64R6O0-NEXT: xori $3, $3, 2 |
| +; MIPS64R6O0-NEXT: sll $3, $3, 3 |
| +; MIPS64R6O0-NEXT: ori $5, $zero, 65535 |
| +; MIPS64R6O0-NEXT: sllv $5, $5, $3 |
| +; MIPS64R6O0-NEXT: nor $6, $zero, $5 |
| +; MIPS64R6O0-NEXT: sllv $4, $4, $3 |
| ; MIPS64R6O0-NEXT: .LBB14_1: # %entry |
| ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 |
| -; MIPS64R6O0-NEXT: ll $7, 0($2) |
| -; MIPS64R6O0-NEXT: addu $8, $7, $4 |
| -; MIPS64R6O0-NEXT: and $8, $8, $3 |
| -; MIPS64R6O0-NEXT: and $9, $7, $5 |
| -; MIPS64R6O0-NEXT: or $9, $9, $8 |
| -; MIPS64R6O0-NEXT: sc $9, 0($2) |
| -; MIPS64R6O0-NEXT: beqzc $9, .LBB14_1 |
| +; MIPS64R6O0-NEXT: ll $8, 0($2) |
| +; MIPS64R6O0-NEXT: addu $9, $8, $4 |
| +; MIPS64R6O0-NEXT: and $9, $9, $5 |
| +; MIPS64R6O0-NEXT: and $10, $8, $6 |
| +; MIPS64R6O0-NEXT: or $10, $10, $9 |
| +; MIPS64R6O0-NEXT: sc $10, 0($2) |
| +; MIPS64R6O0-NEXT: beqzc $10, .LBB14_1 |
| ; MIPS64R6O0-NEXT: # %bb.2: # %entry |
| -; MIPS64R6O0-NEXT: and $6, $7, $3 |
| -; MIPS64R6O0-NEXT: srlv $6, $6, $1 |
| -; MIPS64R6O0-NEXT: seh $6, $6 |
| +; MIPS64R6O0-NEXT: and $7, $8, $5 |
| +; MIPS64R6O0-NEXT: srlv $7, $7, $3 |
| +; MIPS64R6O0-NEXT: seh $7, $7 |
| ; MIPS64R6O0-NEXT: # %bb.3: # %entry |
| -; MIPS64R6O0-NEXT: sw $6, 12($sp) # 4-byte Folded Spill |
| +; MIPS64R6O0-NEXT: sw $7, 12($sp) # 4-byte Folded Spill |
| ; MIPS64R6O0-NEXT: # %bb.4: # %entry |
| ; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload |
| ; MIPS64R6O0-NEXT: seh $2, $1 |
| @@ -6359,33 +6359,33 @@ define {i16, i1} @foo(i16* %addr, i16 %l, i16 %r, i16 %new) { |
| ; MIPS64R6O0-NEXT: sll $3, $5, 0 |
| ; MIPS64R6O0-NEXT: addu $2, $3, $2 |
| ; MIPS64R6O0-NEXT: sync |
| -; MIPS64R6O0-NEXT: daddiu $3, $zero, -4 |
| -; MIPS64R6O0-NEXT: and $3, $4, $3 |
| -; MIPS64R6O0-NEXT: andi $4, $4, 3 |
| -; MIPS64R6O0-NEXT: xori $4, $4, 2 |
| -; MIPS64R6O0-NEXT: sll $4, $4, 3 |
| +; MIPS64R6O0-NEXT: daddiu $8, $zero, -4 |
| +; MIPS64R6O0-NEXT: and $8, $4, $8 |
| +; MIPS64R6O0-NEXT: andi $3, $4, 3 |
| +; MIPS64R6O0-NEXT: xori $3, $3, 2 |
| +; MIPS64R6O0-NEXT: sll $3, $3, 3 |
| ; MIPS64R6O0-NEXT: ori $5, $zero, 65535 |
| -; MIPS64R6O0-NEXT: sllv $5, $5, $4 |
| +; MIPS64R6O0-NEXT: sllv $5, $5, $3 |
| ; MIPS64R6O0-NEXT: nor $6, $zero, $5 |
| ; MIPS64R6O0-NEXT: andi $7, $2, 65535 |
| -; MIPS64R6O0-NEXT: sllv $7, $7, $4 |
| +; MIPS64R6O0-NEXT: sllv $7, $7, $3 |
| ; MIPS64R6O0-NEXT: andi $1, $1, 65535 |
| -; MIPS64R6O0-NEXT: sllv $1, $1, $4 |
| +; MIPS64R6O0-NEXT: sllv $1, $1, $3 |
| ; MIPS64R6O0-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 |
| -; MIPS64R6O0-NEXT: ll $9, 0($3) |
| -; MIPS64R6O0-NEXT: and $10, $9, $5 |
| -; MIPS64R6O0-NEXT: bnec $10, $7, .LBB15_3 |
| +; MIPS64R6O0-NEXT: ll $10, 0($8) |
| +; MIPS64R6O0-NEXT: and $11, $10, $5 |
| +; MIPS64R6O0-NEXT: bnec $11, $7, .LBB15_3 |
| ; MIPS64R6O0-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 |
| -; MIPS64R6O0-NEXT: and $9, $9, $6 |
| -; MIPS64R6O0-NEXT: or $9, $9, $1 |
| -; MIPS64R6O0-NEXT: sc $9, 0($3) |
| -; MIPS64R6O0-NEXT: beqzc $9, .LBB15_1 |
| +; MIPS64R6O0-NEXT: and $10, $10, $6 |
| +; MIPS64R6O0-NEXT: or $10, $10, $1 |
| +; MIPS64R6O0-NEXT: sc $10, 0($8) |
| +; MIPS64R6O0-NEXT: beqzc $10, .LBB15_1 |
| ; MIPS64R6O0-NEXT: .LBB15_3: |
| -; MIPS64R6O0-NEXT: srlv $8, $10, $4 |
| -; MIPS64R6O0-NEXT: seh $8, $8 |
| +; MIPS64R6O0-NEXT: srlv $9, $11, $3 |
| +; MIPS64R6O0-NEXT: seh $9, $9 |
| ; MIPS64R6O0-NEXT: # %bb.4: |
| ; MIPS64R6O0-NEXT: sw $2, 12($sp) # 4-byte Folded Spill |
| -; MIPS64R6O0-NEXT: sw $8, 8($sp) # 4-byte Folded Spill |
| +; MIPS64R6O0-NEXT: sw $9, 8($sp) # 4-byte Folded Spill |
| ; MIPS64R6O0-NEXT: # %bb.5: |
| ; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload |
| ; MIPS64R6O0-NEXT: seh $2, $1 |
| @@ -7145,8 +7145,8 @@ define i32 @zeroreg() nounwind { |
| ; MIPS64R6O0-NEXT: sc $6, 0($1) |
| ; MIPS64R6O0-NEXT: beqzc $6, .LBB17_1 |
| ; MIPS64R6O0-NEXT: .LBB17_3: # %entry |
| -; MIPS64R6O0-NEXT: xor $1, $5, $3 |
| -; MIPS64R6O0-NEXT: sltiu $2, $1, 1 |
| +; MIPS64R6O0-NEXT: xor $2, $5, $3 |
| +; MIPS64R6O0-NEXT: sltiu $2, $2, 1 |
| ; MIPS64R6O0-NEXT: sync |
| ; MIPS64R6O0-NEXT: jrc $ra |
| ; |
| diff --git a/llvm/test/CodeGen/Mips/implicit-sret.ll b/llvm/test/CodeGen/Mips/implicit-sret.ll |
| index b9f6568e40c..e86cec37d51 100644 |
| --- a/llvm/test/CodeGen/Mips/implicit-sret.ll |
| +++ b/llvm/test/CodeGen/Mips/implicit-sret.ll |
| @@ -48,8 +48,8 @@ define internal { i32, i128, i64 } @implicit_sret_impl() unnamed_addr nounwind { |
| ; CHECK-NEXT: sd $zero, 8($4) |
| ; CHECK-NEXT: daddiu $3, $zero, 30 |
| ; CHECK-NEXT: sd $3, 24($4) |
| -; CHECK-NEXT: addiu $3, $zero, 10 |
| -; CHECK-NEXT: sw $3, 0($4) |
| +; CHECK-NEXT: addiu $5, $zero, 10 |
| +; CHECK-NEXT: sw $5, 0($4) |
| ; CHECK-NEXT: jr $ra |
| ; CHECK-NEXT: nop |
| ret { i32, i128, i64 } { i32 10, i128 20, i64 30 } |
| @@ -70,12 +70,10 @@ define internal void @test2() unnamed_addr nounwind { |
| ; CHECK-NEXT: lw $3, 4($sp) |
| ; CHECK-NEXT: # implicit-def: $a0_64 |
| ; CHECK-NEXT: move $4, $3 |
| -; CHECK-NEXT: # implicit-def: $v1_64 |
| -; CHECK-NEXT: move $3, $2 |
| -; CHECK-NEXT: # implicit-def: $v0_64 |
| -; CHECK-NEXT: move $2, $1 |
| -; CHECK-NEXT: move $5, $3 |
| -; CHECK-NEXT: move $6, $2 |
| +; CHECK-NEXT: # implicit-def: $a1_64 |
| +; CHECK-NEXT: move $5, $2 |
| +; CHECK-NEXT: # implicit-def: $a2_64 |
| +; CHECK-NEXT: move $6, $1 |
| ; CHECK-NEXT: jal use_sret2 |
| ; CHECK-NEXT: nop |
| ; CHECK-NEXT: ld $ra, 24($sp) # 8-byte Folded Reload |
| diff --git a/llvm/test/CodeGen/PowerPC/addegluecrash.ll b/llvm/test/CodeGen/PowerPC/addegluecrash.ll |
| index c38f377869f..a1d98054583 100644 |
| --- a/llvm/test/CodeGen/PowerPC/addegluecrash.ll |
| +++ b/llvm/test/CodeGen/PowerPC/addegluecrash.ll |
| @@ -21,11 +21,11 @@ define void @bn_mul_comba8(i64* nocapture %r, i64* nocapture readonly %a, i64* n |
| ; CHECK-NEXT: addze 5, 5 |
| ; CHECK-NEXT: add 4, 5, 4 |
| ; CHECK-NEXT: cmpld 7, 4, 5 |
| -; CHECK-NEXT: mfocrf 4, 1 |
| -; CHECK-NEXT: rlwinm 4, 4, 29, 31, 31 |
| -; CHECK-NEXT: # implicit-def: $x5 |
| -; CHECK-NEXT: mr 5, 4 |
| -; CHECK-NEXT: clrldi 4, 5, 32 |
| +; CHECK-NEXT: mfocrf 10, 1 |
| +; CHECK-NEXT: rlwinm 10, 10, 29, 31, 31 |
| +; CHECK-NEXT: # implicit-def: $x4 |
| +; CHECK-NEXT: mr 4, 10 |
| +; CHECK-NEXT: clrldi 4, 4, 32 |
| ; CHECK-NEXT: std 4, 0(3) |
| ; CHECK-NEXT: blr |
| %1 = load i64, i64* %a, align 8 |
| diff --git a/llvm/test/CodeGen/PowerPC/popcount.ll b/llvm/test/CodeGen/PowerPC/popcount.ll |
| index fb20f1d3ee4..170d3d77d08 100644 |
| --- a/llvm/test/CodeGen/PowerPC/popcount.ll |
| +++ b/llvm/test/CodeGen/PowerPC/popcount.ll |
| @@ -58,17 +58,17 @@ define <1 x i128> @popcount1x128(<1 x i128> %0) { |
| ; CHECK-NEXT: # kill: def $f0 killed $f0 killed $vsl0 |
| ; CHECK-NEXT: mffprd 3, 0 |
| ; CHECK-NEXT: popcntd 3, 3 |
| -; CHECK-NEXT: xxswapd 0, 34 |
| -; CHECK-NEXT: # kill: def $f0 killed $f0 killed $vsl0 |
| -; CHECK-NEXT: mffprd 4, 0 |
| +; CHECK-NEXT: xxswapd 1, 34 |
| +; CHECK-NEXT: # kill: def $f1 killed $f1 killed $vsl1 |
| +; CHECK-NEXT: mffprd 4, 1 |
| ; CHECK-NEXT: popcntd 4, 4 |
| ; CHECK-NEXT: add 3, 4, 3 |
| ; CHECK-NEXT: mtfprd 0, 3 |
| -; CHECK-NEXT: # kill: def $vsl0 killed $f0 |
| +; CHECK-NEXT: fmr 2, 0 |
| ; CHECK-NEXT: li 3, 0 |
| -; CHECK-NEXT: mtfprd 1, 3 |
| -; CHECK-NEXT: # kill: def $vsl1 killed $f1 |
| -; CHECK-NEXT: xxmrghd 34, 1, 0 |
| +; CHECK-NEXT: mtfprd 0, 3 |
| +; CHECK-NEXT: fmr 3, 0 |
| +; CHECK-NEXT: xxmrghd 34, 3, 2 |
| ; CHECK-NEXT: blr |
| Entry: |
| %1 = tail call <1 x i128> @llvm.ctpop.v1.i128(<1 x i128> %0) |
| diff --git a/llvm/test/CodeGen/PowerPC/vsx.ll b/llvm/test/CodeGen/PowerPC/vsx.ll |
| index 4a78218262c..39469d63b90 100644 |
| --- a/llvm/test/CodeGen/PowerPC/vsx.ll |
| +++ b/llvm/test/CodeGen/PowerPC/vsx.ll |
| @@ -1548,8 +1548,8 @@ define <2 x i64> @test46(<2 x float> %a) { |
| ; CHECK-FISL-NEXT: ld r3, -24(r1) |
| ; CHECK-FISL-NEXT: std r3, -16(r1) |
| ; CHECK-FISL-NEXT: addi r3, r1, -16 |
| -; CHECK-FISL-NEXT: lxvd2x vs0, 0, r3 |
| -; CHECK-FISL-NEXT: xxlor v2, vs0, vs0 |
| +; CHECK-FISL-NEXT: lxvd2x vs1, 0, r3 |
| +; CHECK-FISL-NEXT: xxlor v2, vs1, vs1 |
| ; CHECK-FISL-NEXT: blr |
| ; |
| ; CHECK-LE-LABEL: test46: |
| @@ -1616,8 +1616,8 @@ define <2 x i64> @test47(<2 x float> %a) { |
| ; CHECK-FISL-NEXT: ld r3, -24(r1) |
| ; CHECK-FISL-NEXT: std r3, -16(r1) |
| ; CHECK-FISL-NEXT: addi r3, r1, -16 |
| -; CHECK-FISL-NEXT: lxvd2x vs0, 0, r3 |
| -; CHECK-FISL-NEXT: xxlor v2, vs0, vs0 |
| +; CHECK-FISL-NEXT: lxvd2x vs1, 0, r3 |
| +; CHECK-FISL-NEXT: xxlor v2, vs1, vs1 |
| ; CHECK-FISL-NEXT: blr |
| ; |
| ; CHECK-LE-LABEL: test47: |
| @@ -1859,13 +1859,13 @@ define <2 x i64> @test60(<2 x i64> %a, <2 x i64> %b) { |
| ; CHECK-FISL-NEXT: stxvd2x v3, 0, r3 |
| ; CHECK-FISL-NEXT: addi r3, r1, -48 |
| ; CHECK-FISL-NEXT: stxvd2x v2, 0, r3 |
| -; CHECK-FISL-NEXT: lwz r3, -20(r1) |
| -; CHECK-FISL-NEXT: ld r4, -40(r1) |
| -; CHECK-FISL-NEXT: sld r3, r4, r3 |
| +; CHECK-FISL-NEXT: lwz r4, -20(r1) |
| +; CHECK-FISL-NEXT: ld r3, -40(r1) |
| +; CHECK-FISL-NEXT: sld r3, r3, r4 |
| ; CHECK-FISL-NEXT: std r3, -8(r1) |
| -; CHECK-FISL-NEXT: lwz r3, -28(r1) |
| -; CHECK-FISL-NEXT: ld r4, -48(r1) |
| -; CHECK-FISL-NEXT: sld r3, r4, r3 |
| +; CHECK-FISL-NEXT: lwz r4, -28(r1) |
| +; CHECK-FISL-NEXT: ld r3, -48(r1) |
| +; CHECK-FISL-NEXT: sld r3, r3, r4 |
| ; CHECK-FISL-NEXT: std r3, -16(r1) |
| ; CHECK-FISL-NEXT: addi r3, r1, -16 |
| ; CHECK-FISL-NEXT: lxvd2x vs0, 0, r3 |
| @@ -1925,13 +1925,13 @@ define <2 x i64> @test61(<2 x i64> %a, <2 x i64> %b) { |
| ; CHECK-FISL-NEXT: stxvd2x v3, 0, r3 |
| ; CHECK-FISL-NEXT: addi r3, r1, -48 |
| ; CHECK-FISL-NEXT: stxvd2x v2, 0, r3 |
| -; CHECK-FISL-NEXT: lwz r3, -20(r1) |
| -; CHECK-FISL-NEXT: ld r4, -40(r1) |
| -; CHECK-FISL-NEXT: srd r3, r4, r3 |
| +; CHECK-FISL-NEXT: lwz r4, -20(r1) |
| +; CHECK-FISL-NEXT: ld r3, -40(r1) |
| +; CHECK-FISL-NEXT: srd r3, r3, r4 |
| ; CHECK-FISL-NEXT: std r3, -8(r1) |
| -; CHECK-FISL-NEXT: lwz r3, -28(r1) |
| -; CHECK-FISL-NEXT: ld r4, -48(r1) |
| -; CHECK-FISL-NEXT: srd r3, r4, r3 |
| +; CHECK-FISL-NEXT: lwz r4, -28(r1) |
| +; CHECK-FISL-NEXT: ld r3, -48(r1) |
| +; CHECK-FISL-NEXT: srd r3, r3, r4 |
| ; CHECK-FISL-NEXT: std r3, -16(r1) |
| ; CHECK-FISL-NEXT: addi r3, r1, -16 |
| ; CHECK-FISL-NEXT: lxvd2x vs0, 0, r3 |
| @@ -1991,13 +1991,13 @@ define <2 x i64> @test62(<2 x i64> %a, <2 x i64> %b) { |
| ; CHECK-FISL-NEXT: stxvd2x v3, 0, r3 |
| ; CHECK-FISL-NEXT: addi r3, r1, -48 |
| ; CHECK-FISL-NEXT: stxvd2x v2, 0, r3 |
| -; CHECK-FISL-NEXT: lwz r3, -20(r1) |
| -; CHECK-FISL-NEXT: ld r4, -40(r1) |
| -; CHECK-FISL-NEXT: srad r3, r4, r3 |
| +; CHECK-FISL-NEXT: lwz r4, -20(r1) |
| +; CHECK-FISL-NEXT: ld r3, -40(r1) |
| +; CHECK-FISL-NEXT: srad r3, r3, r4 |
| ; CHECK-FISL-NEXT: std r3, -8(r1) |
| -; CHECK-FISL-NEXT: lwz r3, -28(r1) |
| -; CHECK-FISL-NEXT: ld r4, -48(r1) |
| -; CHECK-FISL-NEXT: srad r3, r4, r3 |
| +; CHECK-FISL-NEXT: lwz r4, -28(r1) |
| +; CHECK-FISL-NEXT: ld r3, -48(r1) |
| +; CHECK-FISL-NEXT: srad r3, r3, r4 |
| ; CHECK-FISL-NEXT: std r3, -16(r1) |
| ; CHECK-FISL-NEXT: addi r3, r1, -16 |
| ; CHECK-FISL-NEXT: lxvd2x vs0, 0, r3 |
| @@ -2426,12 +2426,12 @@ define <2 x i32> @test80(i32 %v) { |
| ; CHECK-FISL: # %bb.0: |
| ; CHECK-FISL-NEXT: # kill: def $r3 killed $r3 killed $x3 |
| ; CHECK-FISL-NEXT: stw r3, -16(r1) |
| -; CHECK-FISL-NEXT: addi r3, r1, -16 |
| -; CHECK-FISL-NEXT: lxvw4x vs0, 0, r3 |
| +; CHECK-FISL-NEXT: addi r4, r1, -16 |
| +; CHECK-FISL-NEXT: lxvw4x vs0, 0, r4 |
| ; CHECK-FISL-NEXT: xxspltw v2, vs0, 0 |
| -; CHECK-FISL-NEXT: addis r3, r2, .LCPI65_0@toc@ha |
| -; CHECK-FISL-NEXT: addi r3, r3, .LCPI65_0@toc@l |
| -; CHECK-FISL-NEXT: lxvw4x v3, 0, r3 |
| +; CHECK-FISL-NEXT: addis r4, r2, .LCPI65_0@toc@ha |
| +; CHECK-FISL-NEXT: addi r4, r4, .LCPI65_0@toc@l |
| +; CHECK-FISL-NEXT: lxvw4x v3, 0, r4 |
| ; CHECK-FISL-NEXT: vadduwm v2, v2, v3 |
| ; CHECK-FISL-NEXT: blr |
| ; |
| diff --git a/llvm/test/CodeGen/SPARC/fp16-promote.ll b/llvm/test/CodeGen/SPARC/fp16-promote.ll |
| index 0c402430dad..9709322f48a 100644 |
| --- a/llvm/test/CodeGen/SPARC/fp16-promote.ll |
| +++ b/llvm/test/CodeGen/SPARC/fp16-promote.ll |
| @@ -182,11 +182,11 @@ define void @test_fptrunc_double(double %d, half* %p) nounwind { |
| ; V8-UNOPT-NEXT: std %i4, [%fp+-8] |
| ; V8-UNOPT-NEXT: ldd [%fp+-8], %f0 |
| ; V8-UNOPT-NEXT: std %f0, [%fp+-16] |
| -; V8-UNOPT-NEXT: ldd [%fp+-16], %i0 |
| -; V8-UNOPT-NEXT: mov %i0, %i3 |
| -; V8-UNOPT-NEXT: ! kill: def $i1 killed $i1 killed $i0_i1 |
| -; V8-UNOPT-NEXT: mov %i3, %o0 |
| -; V8-UNOPT-NEXT: mov %i1, %o1 |
| +; V8-UNOPT-NEXT: ldd [%fp+-16], %i4 |
| +; V8-UNOPT-NEXT: mov %i4, %i0 |
| +; V8-UNOPT-NEXT: ! kill: def $i5 killed $i5 killed $i4_i5 |
| +; V8-UNOPT-NEXT: mov %i0, %o0 |
| +; V8-UNOPT-NEXT: mov %i5, %o1 |
| ; V8-UNOPT-NEXT: call __truncdfhf2 |
| ; V8-UNOPT-NEXT: st %i2, [%fp+-20] |
| ; V8-UNOPT-NEXT: ld [%fp+-20], %i0 ! 4-byte Folded Reload |
| diff --git a/llvm/test/CodeGen/X86/2009-04-14-IllegalRegs.ll b/llvm/test/CodeGen/X86/2009-04-14-IllegalRegs.ll |
| index b5635c7e0f0..48ad2a2c077 100644 |
| --- a/llvm/test/CodeGen/X86/2009-04-14-IllegalRegs.ll |
| +++ b/llvm/test/CodeGen/X86/2009-04-14-IllegalRegs.ll |
| @@ -8,34 +8,34 @@ |
| define i32 @z() nounwind ssp { |
| ; CHECK-LABEL: z: |
| ; CHECK: ## %bb.0: ## %entry |
| +; CHECK-NEXT: pushl %ebx |
| ; CHECK-NEXT: pushl %edi |
| ; CHECK-NEXT: pushl %esi |
| -; CHECK-NEXT: subl $148, %esp |
| +; CHECK-NEXT: subl $144, %esp |
| ; CHECK-NEXT: movl L___stack_chk_guard$non_lazy_ptr, %eax |
| ; CHECK-NEXT: movl (%eax), %eax |
| ; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; CHECK-NEXT: movb $48, {{[0-9]+}}(%esp) |
| -; CHECK-NEXT: movb {{[0-9]+}}(%esp), %al |
| -; CHECK-NEXT: movb %al, {{[0-9]+}}(%esp) |
| +; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl |
| +; CHECK-NEXT: movb %cl, {{[0-9]+}}(%esp) |
| ; CHECK-NEXT: movb $15, {{[0-9]+}}(%esp) |
| ; CHECK-NEXT: movl %esp, %eax |
| -; CHECK-NEXT: movl $8, %ecx |
| -; CHECK-NEXT: leal {{[0-9]+}}(%esp), %edx |
| -; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill |
| +; CHECK-NEXT: movl $8, %edx |
| +; CHECK-NEXT: leal {{[0-9]+}}(%esp), %esi |
| +; CHECK-NEXT: movl %edx, %ecx |
| ; CHECK-NEXT: movl %eax, %edi |
| -; CHECK-NEXT: movl %edx, %esi |
| +; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill |
| ; CHECK-NEXT: rep;movsl (%esi), %es:(%edi) |
| ; CHECK-NEXT: movl %eax, %ecx |
| ; CHECK-NEXT: addl $36, %ecx |
| -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload |
| ; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill |
| -; CHECK-NEXT: movl %esi, %ecx |
| +; CHECK-NEXT: movl %edx, %ecx |
| ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload |
| -; CHECK-NEXT: movl %edx, %esi |
| +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload |
| ; CHECK-NEXT: rep;movsl (%esi), %es:(%edi) |
| -; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl |
| -; CHECK-NEXT: movb %cl, 32(%eax) |
| -; CHECK-NEXT: movb %cl, 68(%eax) |
| +; CHECK-NEXT: movb {{[0-9]+}}(%esp), %bl |
| +; CHECK-NEXT: movb %bl, 32(%eax) |
| +; CHECK-NEXT: movb %bl, 68(%eax) |
| ; CHECK-NEXT: calll _f |
| ; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax |
| @@ -50,9 +50,10 @@ define i32 @z() nounwind ssp { |
| ; CHECK-NEXT: jne LBB0_3 |
| ; CHECK-NEXT: ## %bb.2: ## %SP_return |
| ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload |
| -; CHECK-NEXT: addl $148, %esp |
| +; CHECK-NEXT: addl $144, %esp |
| ; CHECK-NEXT: popl %esi |
| ; CHECK-NEXT: popl %edi |
| +; CHECK-NEXT: popl %ebx |
| ; CHECK-NEXT: retl |
| ; CHECK-NEXT: LBB0_3: ## %CallStackCheckFailBlk |
| ; CHECK-NEXT: calll ___stack_chk_fail |
| diff --git a/llvm/test/CodeGen/X86/atomic-unordered.ll b/llvm/test/CodeGen/X86/atomic-unordered.ll |
| index 7a1f34c65c1..16fde4074ea 100644 |
| --- a/llvm/test/CodeGen/X86/atomic-unordered.ll |
| +++ b/llvm/test/CodeGen/X86/atomic-unordered.ll |
| @@ -126,8 +126,8 @@ define void @narrow_writeback_and(i64* %ptr) { |
| ; CHECK-O0-NEXT: movq (%rdi), %rax |
| ; CHECK-O0-NEXT: # kill: def $eax killed $eax killed $rax |
| ; CHECK-O0-NEXT: andl $-256, %eax |
| -; CHECK-O0-NEXT: # kill: def $rax killed $eax |
| -; CHECK-O0-NEXT: movq %rax, (%rdi) |
| +; CHECK-O0-NEXT: movl %eax, %ecx |
| +; CHECK-O0-NEXT: movq %rcx, (%rdi) |
| ; CHECK-O0-NEXT: retq |
| ; |
| ; CHECK-O3-LABEL: narrow_writeback_and: |
| @@ -231,10 +231,10 @@ define i128 @load_i128(i128* %ptr) { |
| ; CHECK-O0-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-O0-NEXT: .cfi_offset %rbx, -16 |
| ; CHECK-O0-NEXT: xorl %eax, %eax |
| -; CHECK-O0-NEXT: # kill: def $rax killed $eax |
| -; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill |
| -; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload |
| -; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload |
| +; CHECK-O0-NEXT: movl %eax, %ecx |
| +; CHECK-O0-NEXT: movq %rcx, %rax |
| +; CHECK-O0-NEXT: movq %rcx, %rdx |
| +; CHECK-O0-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill |
| ; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload |
| ; CHECK-O0-NEXT: lock cmpxchg16b (%rdi) |
| ; CHECK-O0-NEXT: popq %rbx |
| @@ -326,14 +326,14 @@ define i256 @load_i256(i256* %ptr) { |
| ; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill |
| ; CHECK-O0-NEXT: callq __atomic_load |
| ; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rax |
| -; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rcx |
| ; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rdx |
| ; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rsi |
| -; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload |
| -; CHECK-O0-NEXT: movq %rsi, 24(%rdi) |
| -; CHECK-O0-NEXT: movq %rdx, 16(%rdi) |
| -; CHECK-O0-NEXT: movq %rcx, 8(%rdi) |
| -; CHECK-O0-NEXT: movq %rax, (%rdi) |
| +; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rdi |
| +; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload |
| +; CHECK-O0-NEXT: movq %rdi, 24(%r9) |
| +; CHECK-O0-NEXT: movq %rsi, 16(%r9) |
| +; CHECK-O0-NEXT: movq %rdx, 8(%r9) |
| +; CHECK-O0-NEXT: movq %rax, (%r9) |
| ; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload |
| ; CHECK-O0-NEXT: addq $56, %rsp |
| ; CHECK-O0-NEXT: .cfi_def_cfa_offset 8 |
| @@ -831,8 +831,8 @@ define i64 @load_fold_udiv1(i64* %p) { |
| ; CHECK-O0-NEXT: movq (%rdi), %rax |
| ; CHECK-O0-NEXT: xorl %ecx, %ecx |
| ; CHECK-O0-NEXT: movl %ecx, %edx |
| -; CHECK-O0-NEXT: movl $15, %ecx |
| -; CHECK-O0-NEXT: divq %rcx |
| +; CHECK-O0-NEXT: movl $15, %esi |
| +; CHECK-O0-NEXT: divq %rsi |
| ; CHECK-O0-NEXT: retq |
| ; |
| ; CHECK-O3-CUR-LABEL: load_fold_udiv1: |
| @@ -1024,8 +1024,8 @@ define i64 @load_fold_urem1(i64* %p) { |
| ; CHECK-O0-NEXT: movq (%rdi), %rax |
| ; CHECK-O0-NEXT: xorl %ecx, %ecx |
| ; CHECK-O0-NEXT: movl %ecx, %edx |
| -; CHECK-O0-NEXT: movl $15, %ecx |
| -; CHECK-O0-NEXT: divq %rcx |
| +; CHECK-O0-NEXT: movl $15, %esi |
| +; CHECK-O0-NEXT: divq %rsi |
| ; CHECK-O0-NEXT: movq %rdx, %rax |
| ; CHECK-O0-NEXT: retq |
| ; |
| @@ -1475,9 +1475,9 @@ define i1 @load_fold_icmp3(i64* %p1, i64* %p2) { |
| ; CHECK-O0-NEXT: movq (%rdi), %rax |
| ; CHECK-O0-NEXT: movq (%rsi), %rcx |
| ; CHECK-O0-NEXT: subq %rcx, %rax |
| -; CHECK-O0-NEXT: sete %cl |
| +; CHECK-O0-NEXT: sete %dl |
| ; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill |
| -; CHECK-O0-NEXT: movb %cl, %al |
| +; CHECK-O0-NEXT: movb %dl, %al |
| ; CHECK-O0-NEXT: retq |
| ; |
| ; CHECK-O3-CUR-LABEL: load_fold_icmp3: |
| @@ -2076,8 +2076,8 @@ define void @rmw_fold_and1(i64* %p, i64 %v) { |
| ; CHECK-O0-NEXT: movq (%rdi), %rax |
| ; CHECK-O0-NEXT: # kill: def $eax killed $eax killed $rax |
| ; CHECK-O0-NEXT: andl $15, %eax |
| -; CHECK-O0-NEXT: # kill: def $rax killed $eax |
| -; CHECK-O0-NEXT: movq %rax, (%rdi) |
| +; CHECK-O0-NEXT: movl %eax, %ecx |
| +; CHECK-O0-NEXT: movq %rcx, (%rdi) |
| ; CHECK-O0-NEXT: retq |
| ; |
| ; CHECK-O3-LABEL: rmw_fold_and1: |
| @@ -2541,8 +2541,9 @@ define i16 @load_i8_anyext_i16(i8* %ptr) { |
| ; CHECK-O0-CUR-LABEL: load_i8_anyext_i16: |
| ; CHECK-O0-CUR: # %bb.0: |
| ; CHECK-O0-CUR-NEXT: movb (%rdi), %al |
| -; CHECK-O0-CUR-NEXT: movzbl %al, %eax |
| -; CHECK-O0-CUR-NEXT: # kill: def $ax killed $ax killed $eax |
| +; CHECK-O0-CUR-NEXT: movzbl %al, %ecx |
| +; CHECK-O0-CUR-NEXT: # kill: def $cx killed $cx killed $ecx |
| +; CHECK-O0-CUR-NEXT: movw %cx, %ax |
| ; CHECK-O0-CUR-NEXT: retq |
| ; |
| ; CHECK-O3-CUR-LABEL: load_i8_anyext_i16: |
| @@ -2670,12 +2671,13 @@ define i16 @load_combine(i8* %p) { |
| ; CHECK-O0: # %bb.0: |
| ; CHECK-O0-NEXT: movb (%rdi), %al |
| ; CHECK-O0-NEXT: movb 1(%rdi), %cl |
| -; CHECK-O0-NEXT: movzbl %al, %eax |
| -; CHECK-O0-NEXT: # kill: def $ax killed $ax killed $eax |
| -; CHECK-O0-NEXT: movzbl %cl, %ecx |
| -; CHECK-O0-NEXT: # kill: def $cx killed $cx killed $ecx |
| -; CHECK-O0-NEXT: shlw $8, %cx |
| -; CHECK-O0-NEXT: orw %cx, %ax |
| +; CHECK-O0-NEXT: movzbl %al, %edx |
| +; CHECK-O0-NEXT: # kill: def $dx killed $dx killed $edx |
| +; CHECK-O0-NEXT: movzbl %cl, %esi |
| +; CHECK-O0-NEXT: # kill: def $si killed $si killed $esi |
| +; CHECK-O0-NEXT: shlw $8, %si |
| +; CHECK-O0-NEXT: orw %si, %dx |
| +; CHECK-O0-NEXT: movw %dx, %ax |
| ; CHECK-O0-NEXT: retq |
| ; |
| ; CHECK-O3-LABEL: load_combine: |
| diff --git a/llvm/test/CodeGen/X86/atomic32.ll b/llvm/test/CodeGen/X86/atomic32.ll |
| index 05a10966a4f..24aebbba60d 100644 |
| --- a/llvm/test/CodeGen/X86/atomic32.ll |
| +++ b/llvm/test/CodeGen/X86/atomic32.ll |
| @@ -70,8 +70,8 @@ define void @atomic_fetch_and32() nounwind { |
| ; X64-NEXT: movl %eax, %ecx |
| ; X64-NEXT: andl $5, %ecx |
| ; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip) |
| -; X64-NEXT: sete %cl |
| -; X64-NEXT: testb $1, %cl |
| +; X64-NEXT: sete %dl |
| +; X64-NEXT: testb $1, %dl |
| ; X64-NEXT: movl %eax, %ecx |
| ; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill |
| ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill |
| @@ -94,8 +94,8 @@ define void @atomic_fetch_and32() nounwind { |
| ; X86-NEXT: movl %eax, %ecx |
| ; X86-NEXT: andl $5, %ecx |
| ; X86-NEXT: lock cmpxchgl %ecx, sc32 |
| -; X86-NEXT: sete %cl |
| -; X86-NEXT: testb $1, %cl |
| +; X86-NEXT: sete %dl |
| +; X86-NEXT: testb $1, %dl |
| ; X86-NEXT: movl %eax, %ecx |
| ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl %eax, (%esp) # 4-byte Spill |
| @@ -124,8 +124,8 @@ define void @atomic_fetch_or32() nounwind { |
| ; X64-NEXT: movl %eax, %ecx |
| ; X64-NEXT: orl $5, %ecx |
| ; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip) |
| -; X64-NEXT: sete %cl |
| -; X64-NEXT: testb $1, %cl |
| +; X64-NEXT: sete %dl |
| +; X64-NEXT: testb $1, %dl |
| ; X64-NEXT: movl %eax, %ecx |
| ; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill |
| ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill |
| @@ -148,8 +148,8 @@ define void @atomic_fetch_or32() nounwind { |
| ; X86-NEXT: movl %eax, %ecx |
| ; X86-NEXT: orl $5, %ecx |
| ; X86-NEXT: lock cmpxchgl %ecx, sc32 |
| -; X86-NEXT: sete %cl |
| -; X86-NEXT: testb $1, %cl |
| +; X86-NEXT: sete %dl |
| +; X86-NEXT: testb $1, %dl |
| ; X86-NEXT: movl %eax, %ecx |
| ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl %eax, (%esp) # 4-byte Spill |
| @@ -178,8 +178,8 @@ define void @atomic_fetch_xor32() nounwind { |
| ; X64-NEXT: movl %eax, %ecx |
| ; X64-NEXT: xorl $5, %ecx |
| ; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip) |
| -; X64-NEXT: sete %cl |
| -; X64-NEXT: testb $1, %cl |
| +; X64-NEXT: sete %dl |
| +; X64-NEXT: testb $1, %dl |
| ; X64-NEXT: movl %eax, %ecx |
| ; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill |
| ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill |
| @@ -202,8 +202,8 @@ define void @atomic_fetch_xor32() nounwind { |
| ; X86-NEXT: movl %eax, %ecx |
| ; X86-NEXT: xorl $5, %ecx |
| ; X86-NEXT: lock cmpxchgl %ecx, sc32 |
| -; X86-NEXT: sete %cl |
| -; X86-NEXT: testb $1, %cl |
| +; X86-NEXT: sete %dl |
| +; X86-NEXT: testb $1, %dl |
| ; X86-NEXT: movl %eax, %ecx |
| ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl %eax, (%esp) # 4-byte Spill |
| @@ -234,8 +234,8 @@ define void @atomic_fetch_nand32(i32 %x) nounwind { |
| ; X64-NEXT: andl %edx, %ecx |
| ; X64-NEXT: notl %ecx |
| ; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip) |
| -; X64-NEXT: sete %cl |
| -; X64-NEXT: testb $1, %cl |
| +; X64-NEXT: sete %sil |
| +; X64-NEXT: testb $1, %sil |
| ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill |
| ; X64-NEXT: jne .LBB5_2 |
| ; X64-NEXT: jmp .LBB5_1 |
| @@ -244,6 +244,7 @@ define void @atomic_fetch_nand32(i32 %x) nounwind { |
| ; |
| ; X86-LABEL: atomic_fetch_nand32: |
| ; X86: # %bb.0: |
| +; X86-NEXT: pushl %ebx |
| ; X86-NEXT: subl $8, %esp |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl sc32, %ecx |
| @@ -257,13 +258,14 @@ define void @atomic_fetch_nand32(i32 %x) nounwind { |
| ; X86-NEXT: andl %edx, %ecx |
| ; X86-NEXT: notl %ecx |
| ; X86-NEXT: lock cmpxchgl %ecx, sc32 |
| -; X86-NEXT: sete %cl |
| -; X86-NEXT: testb $1, %cl |
| +; X86-NEXT: sete %bl |
| +; X86-NEXT: testb $1, %bl |
| ; X86-NEXT: movl %eax, (%esp) # 4-byte Spill |
| ; X86-NEXT: jne .LBB5_2 |
| ; X86-NEXT: jmp .LBB5_1 |
| ; X86-NEXT: .LBB5_2: # %atomicrmw.end |
| ; X86-NEXT: addl $8, %esp |
| +; X86-NEXT: popl %ebx |
| ; X86-NEXT: retl |
| %t1 = atomicrmw nand i32* @sc32, i32 %x acquire |
| ret void |
| @@ -283,8 +285,8 @@ define void @atomic_fetch_max32(i32 %x) nounwind { |
| ; X64-NEXT: subl %edx, %ecx |
| ; X64-NEXT: cmovgl %eax, %edx |
| ; X64-NEXT: lock cmpxchgl %edx, {{.*}}(%rip) |
| -; X64-NEXT: sete %dl |
| -; X64-NEXT: testb $1, %dl |
| +; X64-NEXT: sete %sil |
| +; X64-NEXT: testb $1, %sil |
| ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill |
| ; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill |
| ; X64-NEXT: jne .LBB6_2 |
| @@ -294,6 +296,7 @@ define void @atomic_fetch_max32(i32 %x) nounwind { |
| ; |
| ; X86-CMOV-LABEL: atomic_fetch_max32: |
| ; X86-CMOV: # %bb.0: |
| +; X86-CMOV-NEXT: pushl %ebx |
| ; X86-CMOV-NEXT: subl $12, %esp |
| ; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-CMOV-NEXT: movl sc32, %ecx |
| @@ -307,18 +310,20 @@ define void @atomic_fetch_max32(i32 %x) nounwind { |
| ; X86-CMOV-NEXT: subl %edx, %ecx |
| ; X86-CMOV-NEXT: cmovgl %eax, %edx |
| ; X86-CMOV-NEXT: lock cmpxchgl %edx, sc32 |
| -; X86-CMOV-NEXT: sete %dl |
| -; X86-CMOV-NEXT: testb $1, %dl |
| +; X86-CMOV-NEXT: sete %bl |
| +; X86-CMOV-NEXT: testb $1, %bl |
| ; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill |
| ; X86-CMOV-NEXT: jne .LBB6_2 |
| ; X86-CMOV-NEXT: jmp .LBB6_1 |
| ; X86-CMOV-NEXT: .LBB6_2: # %atomicrmw.end |
| ; X86-CMOV-NEXT: addl $12, %esp |
| +; X86-CMOV-NEXT: popl %ebx |
| ; X86-CMOV-NEXT: retl |
| ; |
| ; X86-NOCMOV-LABEL: atomic_fetch_max32: |
| ; X86-NOCMOV: # %bb.0: |
| +; X86-NOCMOV-NEXT: pushl %ebx |
| ; X86-NOCMOV-NEXT: pushl %esi |
| ; X86-NOCMOV-NEXT: subl $20, %esp |
| ; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax |
| @@ -347,18 +352,20 @@ define void @atomic_fetch_max32(i32 %x) nounwind { |
| ; X86-NOCMOV-NEXT: movl %ecx, %eax |
| ; X86-NOCMOV-NEXT: movl (%esp), %edx # 4-byte Reload |
| ; X86-NOCMOV-NEXT: lock cmpxchgl %edx, sc32 |
| -; X86-NOCMOV-NEXT: sete %dl |
| -; X86-NOCMOV-NEXT: testb $1, %dl |
| +; X86-NOCMOV-NEXT: sete %bl |
| +; X86-NOCMOV-NEXT: testb $1, %bl |
| ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NOCMOV-NEXT: jne .LBB6_2 |
| ; X86-NOCMOV-NEXT: jmp .LBB6_1 |
| ; X86-NOCMOV-NEXT: .LBB6_2: # %atomicrmw.end |
| ; X86-NOCMOV-NEXT: addl $20, %esp |
| ; X86-NOCMOV-NEXT: popl %esi |
| +; X86-NOCMOV-NEXT: popl %ebx |
| ; X86-NOCMOV-NEXT: retl |
| ; |
| ; X86-NOX87-LABEL: atomic_fetch_max32: |
| ; X86-NOX87: # %bb.0: |
| +; X86-NOX87-NEXT: pushl %ebx |
| ; X86-NOX87-NEXT: pushl %esi |
| ; X86-NOX87-NEXT: subl $20, %esp |
| ; X86-NOX87-NEXT: movl {{[0-9]+}}(%esp), %eax |
| @@ -387,14 +394,15 @@ define void @atomic_fetch_max32(i32 %x) nounwind { |
| ; X86-NOX87-NEXT: movl %ecx, %eax |
| ; X86-NOX87-NEXT: movl (%esp), %edx # 4-byte Reload |
| ; X86-NOX87-NEXT: lock cmpxchgl %edx, sc32 |
| -; X86-NOX87-NEXT: sete %dl |
| -; X86-NOX87-NEXT: testb $1, %dl |
| +; X86-NOX87-NEXT: sete %bl |
| +; X86-NOX87-NEXT: testb $1, %bl |
| ; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NOX87-NEXT: jne .LBB6_2 |
| ; X86-NOX87-NEXT: jmp .LBB6_1 |
| ; X86-NOX87-NEXT: .LBB6_2: # %atomicrmw.end |
| ; X86-NOX87-NEXT: addl $20, %esp |
| ; X86-NOX87-NEXT: popl %esi |
| +; X86-NOX87-NEXT: popl %ebx |
| ; X86-NOX87-NEXT: retl |
| %t1 = atomicrmw max i32* @sc32, i32 %x acquire |
| ret void |
| @@ -414,8 +422,8 @@ define void @atomic_fetch_min32(i32 %x) nounwind { |
| ; X64-NEXT: subl %edx, %ecx |
| ; X64-NEXT: cmovlel %eax, %edx |
| ; X64-NEXT: lock cmpxchgl %edx, {{.*}}(%rip) |
| -; X64-NEXT: sete %dl |
| -; X64-NEXT: testb $1, %dl |
| +; X64-NEXT: sete %sil |
| +; X64-NEXT: testb $1, %sil |
| ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill |
| ; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill |
| ; X64-NEXT: jne .LBB7_2 |
| @@ -425,6 +433,7 @@ define void @atomic_fetch_min32(i32 %x) nounwind { |
| ; |
| ; X86-CMOV-LABEL: atomic_fetch_min32: |
| ; X86-CMOV: # %bb.0: |
| +; X86-CMOV-NEXT: pushl %ebx |
| ; X86-CMOV-NEXT: subl $12, %esp |
| ; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-CMOV-NEXT: movl sc32, %ecx |
| @@ -438,18 +447,20 @@ define void @atomic_fetch_min32(i32 %x) nounwind { |
| ; X86-CMOV-NEXT: subl %edx, %ecx |
| ; X86-CMOV-NEXT: cmovlel %eax, %edx |
| ; X86-CMOV-NEXT: lock cmpxchgl %edx, sc32 |
| -; X86-CMOV-NEXT: sete %dl |
| -; X86-CMOV-NEXT: testb $1, %dl |
| +; X86-CMOV-NEXT: sete %bl |
| +; X86-CMOV-NEXT: testb $1, %bl |
| ; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill |
| ; X86-CMOV-NEXT: jne .LBB7_2 |
| ; X86-CMOV-NEXT: jmp .LBB7_1 |
| ; X86-CMOV-NEXT: .LBB7_2: # %atomicrmw.end |
| ; X86-CMOV-NEXT: addl $12, %esp |
| +; X86-CMOV-NEXT: popl %ebx |
| ; X86-CMOV-NEXT: retl |
| ; |
| ; X86-NOCMOV-LABEL: atomic_fetch_min32: |
| ; X86-NOCMOV: # %bb.0: |
| +; X86-NOCMOV-NEXT: pushl %ebx |
| ; X86-NOCMOV-NEXT: pushl %esi |
| ; X86-NOCMOV-NEXT: subl $20, %esp |
| ; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax |
| @@ -478,18 +489,20 @@ define void @atomic_fetch_min32(i32 %x) nounwind { |
| ; X86-NOCMOV-NEXT: movl %ecx, %eax |
| ; X86-NOCMOV-NEXT: movl (%esp), %edx # 4-byte Reload |
| ; X86-NOCMOV-NEXT: lock cmpxchgl %edx, sc32 |
| -; X86-NOCMOV-NEXT: sete %dl |
| -; X86-NOCMOV-NEXT: testb $1, %dl |
| +; X86-NOCMOV-NEXT: sete %bl |
| +; X86-NOCMOV-NEXT: testb $1, %bl |
| ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NOCMOV-NEXT: jne .LBB7_2 |
| ; X86-NOCMOV-NEXT: jmp .LBB7_1 |
| ; X86-NOCMOV-NEXT: .LBB7_2: # %atomicrmw.end |
| ; X86-NOCMOV-NEXT: addl $20, %esp |
| ; X86-NOCMOV-NEXT: popl %esi |
| +; X86-NOCMOV-NEXT: popl %ebx |
| ; X86-NOCMOV-NEXT: retl |
| ; |
| ; X86-NOX87-LABEL: atomic_fetch_min32: |
| ; X86-NOX87: # %bb.0: |
| +; X86-NOX87-NEXT: pushl %ebx |
| ; X86-NOX87-NEXT: pushl %esi |
| ; X86-NOX87-NEXT: subl $20, %esp |
| ; X86-NOX87-NEXT: movl {{[0-9]+}}(%esp), %eax |
| @@ -518,14 +531,15 @@ define void @atomic_fetch_min32(i32 %x) nounwind { |
| ; X86-NOX87-NEXT: movl %ecx, %eax |
| ; X86-NOX87-NEXT: movl (%esp), %edx # 4-byte Reload |
| ; X86-NOX87-NEXT: lock cmpxchgl %edx, sc32 |
| -; X86-NOX87-NEXT: sete %dl |
| -; X86-NOX87-NEXT: testb $1, %dl |
| +; X86-NOX87-NEXT: sete %bl |
| +; X86-NOX87-NEXT: testb $1, %bl |
| ; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NOX87-NEXT: jne .LBB7_2 |
| ; X86-NOX87-NEXT: jmp .LBB7_1 |
| ; X86-NOX87-NEXT: .LBB7_2: # %atomicrmw.end |
| ; X86-NOX87-NEXT: addl $20, %esp |
| ; X86-NOX87-NEXT: popl %esi |
| +; X86-NOX87-NEXT: popl %ebx |
| ; X86-NOX87-NEXT: retl |
| %t1 = atomicrmw min i32* @sc32, i32 %x acquire |
| ret void |
| @@ -545,8 +559,8 @@ define void @atomic_fetch_umax32(i32 %x) nounwind { |
| ; X64-NEXT: subl %edx, %ecx |
| ; X64-NEXT: cmoval %eax, %edx |
| ; X64-NEXT: lock cmpxchgl %edx, {{.*}}(%rip) |
| -; X64-NEXT: sete %dl |
| -; X64-NEXT: testb $1, %dl |
| +; X64-NEXT: sete %sil |
| +; X64-NEXT: testb $1, %sil |
| ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill |
| ; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill |
| ; X64-NEXT: jne .LBB8_2 |
| @@ -556,6 +570,7 @@ define void @atomic_fetch_umax32(i32 %x) nounwind { |
| ; |
| ; X86-CMOV-LABEL: atomic_fetch_umax32: |
| ; X86-CMOV: # %bb.0: |
| +; X86-CMOV-NEXT: pushl %ebx |
| ; X86-CMOV-NEXT: subl $12, %esp |
| ; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-CMOV-NEXT: movl sc32, %ecx |
| @@ -569,18 +584,20 @@ define void @atomic_fetch_umax32(i32 %x) nounwind { |
| ; X86-CMOV-NEXT: subl %edx, %ecx |
| ; X86-CMOV-NEXT: cmoval %eax, %edx |
| ; X86-CMOV-NEXT: lock cmpxchgl %edx, sc32 |
| -; X86-CMOV-NEXT: sete %dl |
| -; X86-CMOV-NEXT: testb $1, %dl |
| +; X86-CMOV-NEXT: sete %bl |
| +; X86-CMOV-NEXT: testb $1, %bl |
| ; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill |
| ; X86-CMOV-NEXT: jne .LBB8_2 |
| ; X86-CMOV-NEXT: jmp .LBB8_1 |
| ; X86-CMOV-NEXT: .LBB8_2: # %atomicrmw.end |
| ; X86-CMOV-NEXT: addl $12, %esp |
| +; X86-CMOV-NEXT: popl %ebx |
| ; X86-CMOV-NEXT: retl |
| ; |
| ; X86-NOCMOV-LABEL: atomic_fetch_umax32: |
| ; X86-NOCMOV: # %bb.0: |
| +; X86-NOCMOV-NEXT: pushl %ebx |
| ; X86-NOCMOV-NEXT: pushl %esi |
| ; X86-NOCMOV-NEXT: subl $20, %esp |
| ; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax |
| @@ -609,18 +626,20 @@ define void @atomic_fetch_umax32(i32 %x) nounwind { |
| ; X86-NOCMOV-NEXT: movl %ecx, %eax |
| ; X86-NOCMOV-NEXT: movl (%esp), %edx # 4-byte Reload |
| ; X86-NOCMOV-NEXT: lock cmpxchgl %edx, sc32 |
| -; X86-NOCMOV-NEXT: sete %dl |
| -; X86-NOCMOV-NEXT: testb $1, %dl |
| +; X86-NOCMOV-NEXT: sete %bl |
| +; X86-NOCMOV-NEXT: testb $1, %bl |
| ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NOCMOV-NEXT: jne .LBB8_2 |
| ; X86-NOCMOV-NEXT: jmp .LBB8_1 |
| ; X86-NOCMOV-NEXT: .LBB8_2: # %atomicrmw.end |
| ; X86-NOCMOV-NEXT: addl $20, %esp |
| ; X86-NOCMOV-NEXT: popl %esi |
| +; X86-NOCMOV-NEXT: popl %ebx |
| ; X86-NOCMOV-NEXT: retl |
| ; |
| ; X86-NOX87-LABEL: atomic_fetch_umax32: |
| ; X86-NOX87: # %bb.0: |
| +; X86-NOX87-NEXT: pushl %ebx |
| ; X86-NOX87-NEXT: pushl %esi |
| ; X86-NOX87-NEXT: subl $20, %esp |
| ; X86-NOX87-NEXT: movl {{[0-9]+}}(%esp), %eax |
| @@ -649,14 +668,15 @@ define void @atomic_fetch_umax32(i32 %x) nounwind { |
| ; X86-NOX87-NEXT: movl %ecx, %eax |
| ; X86-NOX87-NEXT: movl (%esp), %edx # 4-byte Reload |
| ; X86-NOX87-NEXT: lock cmpxchgl %edx, sc32 |
| -; X86-NOX87-NEXT: sete %dl |
| -; X86-NOX87-NEXT: testb $1, %dl |
| +; X86-NOX87-NEXT: sete %bl |
| +; X86-NOX87-NEXT: testb $1, %bl |
| ; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NOX87-NEXT: jne .LBB8_2 |
| ; X86-NOX87-NEXT: jmp .LBB8_1 |
| ; X86-NOX87-NEXT: .LBB8_2: # %atomicrmw.end |
| ; X86-NOX87-NEXT: addl $20, %esp |
| ; X86-NOX87-NEXT: popl %esi |
| +; X86-NOX87-NEXT: popl %ebx |
| ; X86-NOX87-NEXT: retl |
| %t1 = atomicrmw umax i32* @sc32, i32 %x acquire |
| ret void |
| @@ -676,8 +696,8 @@ define void @atomic_fetch_umin32(i32 %x) nounwind { |
| ; X64-NEXT: subl %edx, %ecx |
| ; X64-NEXT: cmovbel %eax, %edx |
| ; X64-NEXT: lock cmpxchgl %edx, {{.*}}(%rip) |
| -; X64-NEXT: sete %dl |
| -; X64-NEXT: testb $1, %dl |
| +; X64-NEXT: sete %sil |
| +; X64-NEXT: testb $1, %sil |
| ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill |
| ; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill |
| ; X64-NEXT: jne .LBB9_2 |
| @@ -687,6 +707,7 @@ define void @atomic_fetch_umin32(i32 %x) nounwind { |
| ; |
| ; X86-CMOV-LABEL: atomic_fetch_umin32: |
| ; X86-CMOV: # %bb.0: |
| +; X86-CMOV-NEXT: pushl %ebx |
| ; X86-CMOV-NEXT: subl $12, %esp |
| ; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-CMOV-NEXT: movl sc32, %ecx |
| @@ -700,18 +721,20 @@ define void @atomic_fetch_umin32(i32 %x) nounwind { |
| ; X86-CMOV-NEXT: subl %edx, %ecx |
| ; X86-CMOV-NEXT: cmovbel %eax, %edx |
| ; X86-CMOV-NEXT: lock cmpxchgl %edx, sc32 |
| -; X86-CMOV-NEXT: sete %dl |
| -; X86-CMOV-NEXT: testb $1, %dl |
| +; X86-CMOV-NEXT: sete %bl |
| +; X86-CMOV-NEXT: testb $1, %bl |
| ; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill |
| ; X86-CMOV-NEXT: jne .LBB9_2 |
| ; X86-CMOV-NEXT: jmp .LBB9_1 |
| ; X86-CMOV-NEXT: .LBB9_2: # %atomicrmw.end |
| ; X86-CMOV-NEXT: addl $12, %esp |
| +; X86-CMOV-NEXT: popl %ebx |
| ; X86-CMOV-NEXT: retl |
| ; |
| ; X86-NOCMOV-LABEL: atomic_fetch_umin32: |
| ; X86-NOCMOV: # %bb.0: |
| +; X86-NOCMOV-NEXT: pushl %ebx |
| ; X86-NOCMOV-NEXT: pushl %esi |
| ; X86-NOCMOV-NEXT: subl $20, %esp |
| ; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax |
| @@ -740,18 +763,20 @@ define void @atomic_fetch_umin32(i32 %x) nounwind { |
| ; X86-NOCMOV-NEXT: movl %ecx, %eax |
| ; X86-NOCMOV-NEXT: movl (%esp), %edx # 4-byte Reload |
| ; X86-NOCMOV-NEXT: lock cmpxchgl %edx, sc32 |
| -; X86-NOCMOV-NEXT: sete %dl |
| -; X86-NOCMOV-NEXT: testb $1, %dl |
| +; X86-NOCMOV-NEXT: sete %bl |
| +; X86-NOCMOV-NEXT: testb $1, %bl |
| ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NOCMOV-NEXT: jne .LBB9_2 |
| ; X86-NOCMOV-NEXT: jmp .LBB9_1 |
| ; X86-NOCMOV-NEXT: .LBB9_2: # %atomicrmw.end |
| ; X86-NOCMOV-NEXT: addl $20, %esp |
| ; X86-NOCMOV-NEXT: popl %esi |
| +; X86-NOCMOV-NEXT: popl %ebx |
| ; X86-NOCMOV-NEXT: retl |
| ; |
| ; X86-NOX87-LABEL: atomic_fetch_umin32: |
| ; X86-NOX87: # %bb.0: |
| +; X86-NOX87-NEXT: pushl %ebx |
| ; X86-NOX87-NEXT: pushl %esi |
| ; X86-NOX87-NEXT: subl $20, %esp |
| ; X86-NOX87-NEXT: movl {{[0-9]+}}(%esp), %eax |
| @@ -780,14 +805,15 @@ define void @atomic_fetch_umin32(i32 %x) nounwind { |
| ; X86-NOX87-NEXT: movl %ecx, %eax |
| ; X86-NOX87-NEXT: movl (%esp), %edx # 4-byte Reload |
| ; X86-NOX87-NEXT: lock cmpxchgl %edx, sc32 |
| -; X86-NOX87-NEXT: sete %dl |
| -; X86-NOX87-NEXT: testb $1, %dl |
| +; X86-NOX87-NEXT: sete %bl |
| +; X86-NOX87-NEXT: testb $1, %bl |
| ; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NOX87-NEXT: jne .LBB9_2 |
| ; X86-NOX87-NEXT: jmp .LBB9_1 |
| ; X86-NOX87-NEXT: .LBB9_2: # %atomicrmw.end |
| ; X86-NOX87-NEXT: addl $20, %esp |
| ; X86-NOX87-NEXT: popl %esi |
| +; X86-NOX87-NEXT: popl %ebx |
| ; X86-NOX87-NEXT: retl |
| %t1 = atomicrmw umin i32* @sc32, i32 %x acquire |
| ret void |
| diff --git a/llvm/test/CodeGen/X86/atomic64.ll b/llvm/test/CodeGen/X86/atomic64.ll |
| index 963561dc8de..8b40380afcb 100644 |
| --- a/llvm/test/CodeGen/X86/atomic64.ll |
| +++ b/llvm/test/CodeGen/X86/atomic64.ll |
| @@ -137,12 +137,12 @@ define void @atomic_fetch_and64() nounwind { |
| ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload |
| ; X64-NEXT: movl %eax, %ecx |
| ; X64-NEXT: andl $5, %ecx |
| -; X64-NEXT: # kill: def $rcx killed $ecx |
| -; X64-NEXT: lock cmpxchgq %rcx, {{.*}}(%rip) |
| -; X64-NEXT: sete %cl |
| -; X64-NEXT: testb $1, %cl |
| -; X64-NEXT: movq %rax, %rcx |
| -; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill |
| +; X64-NEXT: movl %ecx, %edx |
| +; X64-NEXT: lock cmpxchgq %rdx, {{.*}}(%rip) |
| +; X64-NEXT: sete %sil |
| +; X64-NEXT: testb $1, %sil |
| +; X64-NEXT: movq %rax, %rdx |
| +; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill |
| ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill |
| ; X64-NEXT: jne .LBB2_2 |
| ; X64-NEXT: jmp .LBB2_1 |
| @@ -202,8 +202,8 @@ define void @atomic_fetch_or64() nounwind { |
| ; X64-NEXT: movq %rax, %rcx |
| ; X64-NEXT: orq $5, %rcx |
| ; X64-NEXT: lock cmpxchgq %rcx, {{.*}}(%rip) |
| -; X64-NEXT: sete %cl |
| -; X64-NEXT: testb $1, %cl |
| +; X64-NEXT: sete %dl |
| +; X64-NEXT: testb $1, %dl |
| ; X64-NEXT: movq %rax, %rcx |
| ; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill |
| ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill |
| @@ -265,8 +265,8 @@ define void @atomic_fetch_xor64() nounwind { |
| ; X64-NEXT: movq %rax, %rcx |
| ; X64-NEXT: xorq $5, %rcx |
| ; X64-NEXT: lock cmpxchgq %rcx, {{.*}}(%rip) |
| -; X64-NEXT: sete %cl |
| -; X64-NEXT: testb $1, %cl |
| +; X64-NEXT: sete %dl |
| +; X64-NEXT: testb $1, %dl |
| ; X64-NEXT: movq %rax, %rcx |
| ; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill |
| ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill |
| @@ -330,8 +330,8 @@ define void @atomic_fetch_nand64(i64 %x) nounwind { |
| ; X64-NEXT: andq %rdx, %rcx |
| ; X64-NEXT: notq %rcx |
| ; X64-NEXT: lock cmpxchgq %rcx, {{.*}}(%rip) |
| -; X64-NEXT: sete %cl |
| -; X64-NEXT: testb $1, %cl |
| +; X64-NEXT: sete %sil |
| +; X64-NEXT: testb $1, %sil |
| ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill |
| ; X64-NEXT: jne .LBB5_2 |
| ; X64-NEXT: jmp .LBB5_1 |
| @@ -373,8 +373,8 @@ define void @atomic_fetch_max64(i64 %x) nounwind { |
| ; X64-NEXT: subq %rdx, %rcx |
| ; X64-NEXT: cmovgq %rax, %rdx |
| ; X64-NEXT: lock cmpxchgq %rdx, {{.*}}(%rip) |
| -; X64-NEXT: sete %dl |
| -; X64-NEXT: testb $1, %dl |
| +; X64-NEXT: sete %sil |
| +; X64-NEXT: testb $1, %sil |
| ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill |
| ; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill |
| ; X64-NEXT: jne .LBB6_2 |
| @@ -471,8 +471,8 @@ define void @atomic_fetch_min64(i64 %x) nounwind { |
| ; X64-NEXT: subq %rdx, %rcx |
| ; X64-NEXT: cmovleq %rax, %rdx |
| ; X64-NEXT: lock cmpxchgq %rdx, {{.*}}(%rip) |
| -; X64-NEXT: sete %dl |
| -; X64-NEXT: testb $1, %dl |
| +; X64-NEXT: sete %sil |
| +; X64-NEXT: testb $1, %sil |
| ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill |
| ; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill |
| ; X64-NEXT: jne .LBB7_2 |
| @@ -569,8 +569,8 @@ define void @atomic_fetch_umax64(i64 %x) nounwind { |
| ; X64-NEXT: subq %rdx, %rcx |
| ; X64-NEXT: cmovaq %rax, %rdx |
| ; X64-NEXT: lock cmpxchgq %rdx, {{.*}}(%rip) |
| -; X64-NEXT: sete %dl |
| -; X64-NEXT: testb $1, %dl |
| +; X64-NEXT: sete %sil |
| +; X64-NEXT: testb $1, %sil |
| ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill |
| ; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill |
| ; X64-NEXT: jne .LBB8_2 |
| @@ -667,8 +667,8 @@ define void @atomic_fetch_umin64(i64 %x) nounwind { |
| ; X64-NEXT: subq %rdx, %rcx |
| ; X64-NEXT: cmovbeq %rax, %rdx |
| ; X64-NEXT: lock cmpxchgq %rdx, {{.*}}(%rip) |
| -; X64-NEXT: sete %dl |
| -; X64-NEXT: testb $1, %dl |
| +; X64-NEXT: sete %sil |
| +; X64-NEXT: testb $1, %sil |
| ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill |
| ; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill |
| ; X64-NEXT: jne .LBB9_2 |
| diff --git a/llvm/test/CodeGen/X86/avx-load-store.ll b/llvm/test/CodeGen/X86/avx-load-store.ll |
| index f448bfec2ec..718449d7a77 100644 |
| --- a/llvm/test/CodeGen/X86/avx-load-store.ll |
| +++ b/llvm/test/CodeGen/X86/avx-load-store.ll |
| @@ -175,8 +175,8 @@ define void @double_save(<4 x i32> %A, <4 x i32> %B, <8 x i32>* %P) nounwind ssp |
| ; CHECK_O0: # %bb.0: |
| ; CHECK_O0-NEXT: # implicit-def: $ymm2 |
| ; CHECK_O0-NEXT: vmovaps %xmm0, %xmm2 |
| -; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm0 |
| -; CHECK_O0-NEXT: vmovdqu %ymm0, (%rdi) |
| +; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm2 |
| +; CHECK_O0-NEXT: vmovdqu %ymm2, (%rdi) |
| ; CHECK_O0-NEXT: vzeroupper |
| ; CHECK_O0-NEXT: retq |
| %Z = shufflevector <4 x i32>%A, <4 x i32>%B, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| @@ -197,8 +197,8 @@ define void @double_save_volatile(<4 x i32> %A, <4 x i32> %B, <8 x i32>* %P) nou |
| ; CHECK_O0: # %bb.0: |
| ; CHECK_O0-NEXT: # implicit-def: $ymm2 |
| ; CHECK_O0-NEXT: vmovaps %xmm0, %xmm2 |
| -; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm0 |
| -; CHECK_O0-NEXT: vmovdqu %ymm0, (%rdi) |
| +; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm2 |
| +; CHECK_O0-NEXT: vmovdqu %ymm2, (%rdi) |
| ; CHECK_O0-NEXT: vzeroupper |
| ; CHECK_O0-NEXT: retq |
| %Z = shufflevector <4 x i32>%A, <4 x i32>%B, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| @@ -239,10 +239,10 @@ define void @f_f() nounwind { |
| ; CHECK_O0-NEXT: .LBB9_3: # %cif_mixed_test_all |
| ; CHECK_O0-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967295,0,0,0] |
| ; CHECK_O0-NEXT: vmovdqa %xmm0, %xmm0 |
| -; CHECK_O0-NEXT: # kill: def $ymm0 killed $xmm0 |
| +; CHECK_O0-NEXT: vmovaps %xmm0, %xmm1 |
| ; CHECK_O0-NEXT: # implicit-def: $rax |
| -; CHECK_O0-NEXT: # implicit-def: $ymm1 |
| -; CHECK_O0-NEXT: vmaskmovps %ymm1, %ymm0, (%rax) |
| +; CHECK_O0-NEXT: # implicit-def: $ymm2 |
| +; CHECK_O0-NEXT: vmaskmovps %ymm2, %ymm1, (%rax) |
| ; CHECK_O0-NEXT: .LBB9_4: # %cif_mixed_test_any_check |
| allocas: |
| br i1 undef, label %cif_mask_all, label %cif_mask_mixed |
| @@ -276,8 +276,8 @@ define void @add8i32(<8 x i32>* %ret, <8 x i32>* %bp) nounwind { |
| ; CHECK_O0-NEXT: vmovdqu 16(%rsi), %xmm1 |
| ; CHECK_O0-NEXT: # implicit-def: $ymm2 |
| ; CHECK_O0-NEXT: vmovaps %xmm0, %xmm2 |
| -; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm0 |
| -; CHECK_O0-NEXT: vmovdqu %ymm0, (%rdi) |
| +; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm2 |
| +; CHECK_O0-NEXT: vmovdqu %ymm2, (%rdi) |
| ; CHECK_O0-NEXT: vzeroupper |
| ; CHECK_O0-NEXT: retq |
| %b = load <8 x i32>, <8 x i32>* %bp, align 1 |
| @@ -321,8 +321,8 @@ define void @add4i64a16(<4 x i64>* %ret, <4 x i64>* %bp) nounwind { |
| ; CHECK_O0-NEXT: vmovdqa 16(%rsi), %xmm1 |
| ; CHECK_O0-NEXT: # implicit-def: $ymm2 |
| ; CHECK_O0-NEXT: vmovaps %xmm0, %xmm2 |
| -; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm0 |
| -; CHECK_O0-NEXT: vmovdqu %ymm0, (%rdi) |
| +; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm2 |
| +; CHECK_O0-NEXT: vmovdqu %ymm2, (%rdi) |
| ; CHECK_O0-NEXT: vzeroupper |
| ; CHECK_O0-NEXT: retq |
| %b = load <4 x i64>, <4 x i64>* %bp, align 16 |
| diff --git a/llvm/test/CodeGen/X86/avx512-mask-zext-bugfix.ll b/llvm/test/CodeGen/X86/avx512-mask-zext-bugfix.ll |
| index 186370ca675..c4e009d54ec 100755 |
| --- a/llvm/test/CodeGen/X86/avx512-mask-zext-bugfix.ll |
| +++ b/llvm/test/CodeGen/X86/avx512-mask-zext-bugfix.ll |
| @@ -40,20 +40,22 @@ define void @test_xmm(i32 %shift, i32 %mulp, <2 x i64> %a,i8* %arraydecay,i8* %f |
| ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload |
| ; CHECK-NEXT: vpmovd2m %xmm0, %k0 |
| ; CHECK-NEXT: kmovq %k0, %k1 |
| -; CHECK-NEXT: kmovd %k0, %ecx |
| -; CHECK-NEXT: ## kill: def $cl killed $cl killed $ecx |
| -; CHECK-NEXT: movzbl %cl, %ecx |
| -; CHECK-NEXT: ## kill: def $cx killed $cx killed $ecx |
| -; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi ## 8-byte Reload |
| -; CHECK-NEXT: movl $4, %edx |
| -; CHECK-NEXT: movl %edx, %esi |
| +; CHECK-NEXT: kmovd %k0, %esi |
| +; CHECK-NEXT: ## kill: def $sil killed $sil killed $esi |
| +; CHECK-NEXT: movzbl %sil, %edi |
| +; CHECK-NEXT: ## kill: def $di killed $di killed $edi |
| +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx ## 8-byte Reload |
| +; CHECK-NEXT: movw %di, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill |
| +; CHECK-NEXT: movq %rcx, %rdi |
| +; CHECK-NEXT: movl $4, %r8d |
| +; CHECK-NEXT: movl %r8d, %esi |
| +; CHECK-NEXT: movl %r8d, %edx |
| ; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill |
| ; CHECK-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill |
| -; CHECK-NEXT: movw %cx, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill |
| ; CHECK-NEXT: callq _calc_expected_mask_val |
| ; CHECK-NEXT: ## kill: def $ax killed $ax killed $rax |
| -; CHECK-NEXT: movw {{[-0-9]+}}(%r{{[sb]}}p), %cx ## 2-byte Reload |
| -; CHECK-NEXT: movzwl %cx, %edi |
| +; CHECK-NEXT: movw {{[-0-9]+}}(%r{{[sb]}}p), %r9w ## 2-byte Reload |
| +; CHECK-NEXT: movzwl %r9w, %edi |
| ; CHECK-NEXT: movzwl %ax, %esi |
| ; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx ## 8-byte Reload |
| ; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx ## 8-byte Reload |
| diff --git a/llvm/test/CodeGen/X86/crash-O0.ll b/llvm/test/CodeGen/X86/crash-O0.ll |
| index 9f9e5584d6f..a93d3dd267b 100644 |
| --- a/llvm/test/CodeGen/X86/crash-O0.ll |
| +++ b/llvm/test/CodeGen/X86/crash-O0.ll |
| @@ -79,12 +79,11 @@ define i64 @addressModeWith32bitIndex(i32 %V) { |
| ; CHECK-NEXT: movq %rsp, %rbp |
| ; CHECK-NEXT: .cfi_def_cfa_register %rbp |
| ; CHECK-NEXT: xorl %eax, %eax |
| -; CHECK-NEXT: ## kill: def $rax killed $eax |
| -; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill |
| +; CHECK-NEXT: movl %eax, %ecx |
| +; CHECK-NEXT: movq %rcx, %rax |
| ; CHECK-NEXT: cqto |
| -; CHECK-NEXT: movslq %edi, %rcx |
| -; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi ## 8-byte Reload |
| -; CHECK-NEXT: idivq (%rsi,%rcx,8) |
| +; CHECK-NEXT: movslq %edi, %rsi |
| +; CHECK-NEXT: idivq (%rcx,%rsi,8) |
| ; CHECK-NEXT: popq %rbp |
| ; CHECK-NEXT: retq |
| %gep = getelementptr i64, i64* null, i32 %V |
| diff --git a/llvm/test/CodeGen/X86/extend-set-cc-uses-dbg.ll b/llvm/test/CodeGen/X86/extend-set-cc-uses-dbg.ll |
| index 664d9ded1e0..7d05a869be8 100644 |
| --- a/llvm/test/CodeGen/X86/extend-set-cc-uses-dbg.ll |
| +++ b/llvm/test/CodeGen/X86/extend-set-cc-uses-dbg.ll |
| @@ -7,8 +7,8 @@ define void @foo(i32* %p) !dbg !4 { |
| bb: |
| %tmp = load i32, i32* %p, align 4, !dbg !7 |
| ; CHECK: $eax = MOV32rm killed {{.*}} $rdi, {{.*}} debug-location !7 :: (load 4 from %ir.p) |
| - ; CHECK-NEXT: $rax = KILL killed renamable $eax, debug-location !7 |
| - ; CHECK-NEXT: $rcx = MOV64rr $rax, debug-location !7 |
| + ; CHECK-NEXT: $ecx = MOV32rr killed $eax, implicit-def $rcx, debug-location !7 |
| + ; CHECK-NEXT: $rdx = MOV64rr $rcx, debug-location !7 |
| |
| switch i32 %tmp, label %bb7 [ |
| i32 0, label %bb1 |
| diff --git a/llvm/test/CodeGen/X86/fast-isel-nontemporal.ll b/llvm/test/CodeGen/X86/fast-isel-nontemporal.ll |
| index 7fffa21f0d2..5d7c83fa19d 100644 |
| --- a/llvm/test/CodeGen/X86/fast-isel-nontemporal.ll |
| +++ b/llvm/test/CodeGen/X86/fast-isel-nontemporal.ll |
| @@ -1013,11 +1013,11 @@ define <16 x float> @test_load_nt16xfloat(<16 x float>* nocapture %ptr) { |
| ; AVX1-NEXT: vmovaps %xmm0, %xmm1 |
| ; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 |
| ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 |
| -; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1 |
| -; AVX1-NEXT: # implicit-def: $ymm2 |
| -; AVX1-NEXT: vmovaps %xmm1, %xmm2 |
| -; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm1 |
| -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 |
| +; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2 |
| +; AVX1-NEXT: # implicit-def: $ymm1 |
| +; AVX1-NEXT: vmovaps %xmm2, %xmm1 |
| +; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2 |
| +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 |
| ; AVX1-NEXT: retq |
| ; |
| ; AVX2-LABEL: test_load_nt16xfloat: |
| @@ -1067,11 +1067,11 @@ define <8 x double> @test_load_nt8xdouble(<8 x double>* nocapture %ptr) { |
| ; AVX1-NEXT: vmovaps %xmm0, %xmm1 |
| ; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 |
| ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 |
| -; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1 |
| -; AVX1-NEXT: # implicit-def: $ymm2 |
| -; AVX1-NEXT: vmovaps %xmm1, %xmm2 |
| -; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm1 |
| -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 |
| +; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2 |
| +; AVX1-NEXT: # implicit-def: $ymm1 |
| +; AVX1-NEXT: vmovaps %xmm2, %xmm1 |
| +; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2 |
| +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 |
| ; AVX1-NEXT: retq |
| ; |
| ; AVX2-LABEL: test_load_nt8xdouble: |
| @@ -1121,11 +1121,11 @@ define <64 x i8> @test_load_nt64xi8(<64 x i8>* nocapture %ptr) { |
| ; AVX1-NEXT: vmovaps %xmm0, %xmm1 |
| ; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 |
| ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 |
| -; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1 |
| -; AVX1-NEXT: # implicit-def: $ymm2 |
| -; AVX1-NEXT: vmovaps %xmm1, %xmm2 |
| -; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm1 |
| -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 |
| +; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2 |
| +; AVX1-NEXT: # implicit-def: $ymm1 |
| +; AVX1-NEXT: vmovaps %xmm2, %xmm1 |
| +; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2 |
| +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 |
| ; AVX1-NEXT: retq |
| ; |
| ; AVX2-LABEL: test_load_nt64xi8: |
| @@ -1175,11 +1175,11 @@ define <32 x i16> @test_load_nt32xi16(<32 x i16>* nocapture %ptr) { |
| ; AVX1-NEXT: vmovaps %xmm0, %xmm1 |
| ; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 |
| ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 |
| -; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1 |
| -; AVX1-NEXT: # implicit-def: $ymm2 |
| -; AVX1-NEXT: vmovaps %xmm1, %xmm2 |
| -; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm1 |
| -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 |
| +; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2 |
| +; AVX1-NEXT: # implicit-def: $ymm1 |
| +; AVX1-NEXT: vmovaps %xmm2, %xmm1 |
| +; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2 |
| +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 |
| ; AVX1-NEXT: retq |
| ; |
| ; AVX2-LABEL: test_load_nt32xi16: |
| @@ -1229,11 +1229,11 @@ define <16 x i32> @test_load_nt16xi32(<16 x i32>* nocapture %ptr) { |
| ; AVX1-NEXT: vmovaps %xmm0, %xmm1 |
| ; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 |
| ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 |
| -; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1 |
| -; AVX1-NEXT: # implicit-def: $ymm2 |
| -; AVX1-NEXT: vmovaps %xmm1, %xmm2 |
| -; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm1 |
| -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 |
| +; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2 |
| +; AVX1-NEXT: # implicit-def: $ymm1 |
| +; AVX1-NEXT: vmovaps %xmm2, %xmm1 |
| +; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2 |
| +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 |
| ; AVX1-NEXT: retq |
| ; |
| ; AVX2-LABEL: test_load_nt16xi32: |
| @@ -1283,11 +1283,11 @@ define <8 x i64> @test_load_nt8xi64(<8 x i64>* nocapture %ptr) { |
| ; AVX1-NEXT: vmovaps %xmm0, %xmm1 |
| ; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 |
| ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 |
| -; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1 |
| -; AVX1-NEXT: # implicit-def: $ymm2 |
| -; AVX1-NEXT: vmovaps %xmm1, %xmm2 |
| -; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm1 |
| -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 |
| +; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2 |
| +; AVX1-NEXT: # implicit-def: $ymm1 |
| +; AVX1-NEXT: vmovaps %xmm2, %xmm1 |
| +; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2 |
| +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 |
| ; AVX1-NEXT: retq |
| ; |
| ; AVX2-LABEL: test_load_nt8xi64: |
| diff --git a/llvm/test/CodeGen/X86/lvi-hardening-loads.ll b/llvm/test/CodeGen/X86/lvi-hardening-loads.ll |
| index ff8276f6f1c..e660f306ef7 100644 |
| --- a/llvm/test/CodeGen/X86/lvi-hardening-loads.ll |
| +++ b/llvm/test/CodeGen/X86/lvi-hardening-loads.ll |
| @@ -117,9 +117,9 @@ if.then: ; preds = %for.body |
| ; X64-NOOPT-NEXT: lfence |
| ; X64-NOOPT-NEXT: movq (%rax,%rcx,8), %rax |
| ; X64-NOOPT-NEXT: lfence |
| -; X64-NOOPT-NEXT: movl (%rax), %eax |
| +; X64-NOOPT-NEXT: movl (%rax), %edx |
| ; X64-NOOPT-NEXT: lfence |
| -; X64-NOOPT-NEXT: movl %eax, -{{[0-9]+}}(%rsp) |
| +; X64-NOOPT-NEXT: movl %edx, -{{[0-9]+}}(%rsp) |
| |
| if.end: ; preds = %if.then, %for.body |
| br label %for.inc |
| diff --git a/llvm/test/CodeGen/X86/mixed-ptr-sizes.ll b/llvm/test/CodeGen/X86/mixed-ptr-sizes.ll |
| index ac55e1a1fc6..a1ad7f3c0f5 100644 |
| --- a/llvm/test/CodeGen/X86/mixed-ptr-sizes.ll |
| +++ b/llvm/test/CodeGen/X86/mixed-ptr-sizes.ll |
| @@ -69,8 +69,8 @@ define dso_local void @test_zero_ext(%struct.Foo* %f, i32 addrspace(271)* %i) { |
| ; CHECK-O0-LABEL: test_zero_ext: |
| ; CHECK-O0: # %bb.0: # %entry |
| ; CHECK-O0-NEXT: movl %edx, %eax |
| -; CHECK-O0-NEXT: # kill: def $rax killed $eax |
| -; CHECK-O0-NEXT: movq %rax, 8(%rcx) |
| +; CHECK-O0-NEXT: movl %eax, %r8d |
| +; CHECK-O0-NEXT: movq %r8, 8(%rcx) |
| ; CHECK-O0-NEXT: jmp use_foo # TAILCALL |
| entry: |
| %0 = addrspacecast i32 addrspace(271)* %i to i32* |
| @@ -125,23 +125,19 @@ entry: |
| |
| ; Test that null can be passed as a 32-bit pointer. |
| define dso_local void @test_null_arg(%struct.Foo* %f) { |
| -; CHECK-LABEL: test_null_arg: |
| -; CHECK: # %bb.0: # %entry |
| -; CHECK-NEXT: subq $40, %rsp |
| -; CHECK: xorl %edx, %edx |
| -; CHECK-NEXT: callq test_noop1 |
| -; CHECK-NEXT: nop |
| -; CHECK-NEXT: addq $40, %rsp |
| -; CHECK-NEXT: retq |
| -; |
| -; CHECK-O0-LABEL: test_null_arg: |
| -; CHECK-O0: # %bb.0: # %entry |
| -; CHECK-O0-NEXT: subq $40, %rsp |
| -; CHECK-O0: xorl %edx, %edx |
| -; CHECK-O0-NEXT: callq test_noop1 |
| -; CHECK-O0-NEXT: nop |
| -; CHECK-O0-NEXT: addq $40, %rsp |
| -; CHECK-O0-NEXT: retq |
| +; ALL-LABEL: test_null_arg: |
| +; ALL: # %bb.0: # %entry |
| +; ALL-NEXT: subq $40, %rsp |
| +; ALL-NEXT: .seh_stackalloc 40 |
| +; ALL-NEXT: .seh_endprologue |
| +; ALL-NEXT: xorl %edx, %edx |
| +; ALL-NEXT: callq test_noop1 |
| +; ALL-NEXT: nop |
| +; ALL-NEXT: addq $40, %rsp |
| +; ALL-NEXT: retq |
| +; ALL-NEXT: .seh_handlerdata |
| +; ALL-NEXT: .text |
| +; ALL-NEXT: .seh_endproc |
| entry: |
| call void @test_noop1(%struct.Foo* %f, i32 addrspace(270)* null) |
| ret void |
| @@ -177,8 +173,8 @@ define void @test_unrecognized2(%struct.Foo* %f, i32 addrspace(271)* %i) { |
| ; CHECK-O0-LABEL: test_unrecognized2: |
| ; CHECK-O0: # %bb.0: # %entry |
| ; CHECK-O0-NEXT: movl %edx, %eax |
| -; CHECK-O0-NEXT: # kill: def $rax killed $eax |
| -; CHECK-O0-NEXT: movq %rax, 16(%rcx) |
| +; CHECK-O0-NEXT: movl %eax, %r8d |
| +; CHECK-O0-NEXT: movq %r8, 16(%rcx) |
| ; CHECK-O0-NEXT: jmp use_foo # TAILCALL |
| entry: |
| %0 = addrspacecast i32 addrspace(271)* %i to i32 addrspace(9)* |
| @@ -189,16 +185,11 @@ entry: |
| } |
| |
| define i32 @test_load_sptr32(i32 addrspace(270)* %i) { |
| -; CHECK-LABEL: test_load_sptr32: |
| -; CHECK: # %bb.0: # %entry |
| -; CHECK-NEXT: movslq %ecx, %rax |
| -; CHECK-NEXT: movl (%rax), %eax |
| -; CHECK-NEXT: retq |
| -; CHECK-O0-LABEL: test_load_sptr32: |
| -; CHECK-O0: # %bb.0: # %entry |
| -; CHECK-O0-NEXT: movslq %ecx, %rax |
| -; CHECK-O0-NEXT: movl (%rax), %eax |
| -; CHECK-O0-NEXT: retq |
| +; ALL-LABEL: test_load_sptr32: |
| +; ALL: # %bb.0: # %entry |
| +; ALL-NEXT: movslq %ecx, %rax |
| +; ALL-NEXT: movl (%rax), %eax |
| +; ALL-NEXT: retq |
| entry: |
| %0 = load i32, i32 addrspace(270)* %i, align 4 |
| ret i32 %0 |
| @@ -210,11 +201,12 @@ define i32 @test_load_uptr32(i32 addrspace(271)* %i) { |
| ; CHECK-NEXT: movl %ecx, %eax |
| ; CHECK-NEXT: movl (%rax), %eax |
| ; CHECK-NEXT: retq |
| +; |
| ; CHECK-O0-LABEL: test_load_uptr32: |
| ; CHECK-O0: # %bb.0: # %entry |
| ; CHECK-O0-NEXT: movl %ecx, %eax |
| -; CHECK-O0-NEXT: # kill: def $rax killed $eax |
| -; CHECK-O0-NEXT: movl (%rax), %eax |
| +; CHECK-O0-NEXT: movl %eax, %edx |
| +; CHECK-O0-NEXT: movl (%rdx), %eax |
| ; CHECK-O0-NEXT: retq |
| entry: |
| %0 = load i32, i32 addrspace(271)* %i, align 4 |
| @@ -222,30 +214,21 @@ entry: |
| } |
| |
| define i32 @test_load_ptr64(i32 addrspace(272)* %i) { |
| -; CHECK-LABEL: test_load_ptr64: |
| -; CHECK: # %bb.0: # %entry |
| -; CHECK-NEXT: movl (%rcx), %eax |
| -; CHECK-NEXT: retq |
| -; CHECK-O0-LABEL: test_load_ptr64: |
| -; CHECK-O0: # %bb.0: # %entry |
| -; CHECK-O0-NEXT: movl (%rcx), %eax |
| -; CHECK-O0-NEXT: retq |
| +; ALL-LABEL: test_load_ptr64: |
| +; ALL: # %bb.0: # %entry |
| +; ALL-NEXT: movl (%rcx), %eax |
| +; ALL-NEXT: retq |
| entry: |
| %0 = load i32, i32 addrspace(272)* %i, align 8 |
| ret i32 %0 |
| } |
| |
| define void @test_store_sptr32(i32 addrspace(270)* %s, i32 %i) { |
| -; CHECK-LABEL: test_store_sptr32: |
| -; CHECK: # %bb.0: # %entry |
| -; CHECK-NEXT: movslq %ecx, %rax |
| -; CHECK-NEXT: movl %edx, (%rax) |
| -; CHECK-NEXT: retq |
| -; CHECK-O0-LABEL: test_store_sptr32: |
| -; CHECK-O0: # %bb.0: # %entry |
| -; CHECK-O0-NEXT: movslq %ecx, %rax |
| -; CHECK-O0-NEXT: movl %edx, (%rax) |
| -; CHECK-O0-NEXT: retq |
| +; ALL-LABEL: test_store_sptr32: |
| +; ALL: # %bb.0: # %entry |
| +; ALL-NEXT: movslq %ecx, %rax |
| +; ALL-NEXT: movl %edx, (%rax) |
| +; ALL-NEXT: retq |
| entry: |
| store i32 %i, i32 addrspace(270)* %s, align 4 |
| ret void |
| @@ -257,11 +240,12 @@ define void @test_store_uptr32(i32 addrspace(271)* %s, i32 %i) { |
| ; CHECK-NEXT: movl %ecx, %eax |
| ; CHECK-NEXT: movl %edx, (%rax) |
| ; CHECK-NEXT: retq |
| +; |
| ; CHECK-O0-LABEL: test_store_uptr32: |
| ; CHECK-O0: # %bb.0: # %entry |
| ; CHECK-O0-NEXT: movl %ecx, %eax |
| -; CHECK-O0-NEXT: # kill: def $rax killed $eax |
| -; CHECK-O0-NEXT: movl %edx, (%rax) |
| +; CHECK-O0-NEXT: movl %eax, %r8d |
| +; CHECK-O0-NEXT: movl %edx, (%r8) |
| ; CHECK-O0-NEXT: retq |
| entry: |
| store i32 %i, i32 addrspace(271)* %s, align 4 |
| @@ -269,14 +253,10 @@ entry: |
| } |
| |
| define void @test_store_ptr64(i32 addrspace(272)* %s, i32 %i) { |
| -; CHECK-LABEL: test_store_ptr64: |
| -; CHECK: # %bb.0: # %entry |
| -; CHECK-NEXT: movl %edx, (%rcx) |
| -; CHECK-NEXT: retq |
| -; CHECK-O0-LABEL: test_store_ptr64: |
| -; CHECK-O0: # %bb.0: # %entry |
| -; CHECK-O0-NEXT: movl %edx, (%rcx) |
| -; CHECK-O0-NEXT: retq |
| +; ALL-LABEL: test_store_ptr64: |
| +; ALL: # %bb.0: # %entry |
| +; ALL-NEXT: movl %edx, (%rcx) |
| +; ALL-NEXT: retq |
| entry: |
| store i32 %i, i32 addrspace(272)* %s, align 8 |
| ret void |
| diff --git a/llvm/test/CodeGen/X86/pr1489.ll b/llvm/test/CodeGen/X86/pr1489.ll |
| index d1148eecb0d..6226ea6caf9 100644 |
| --- a/llvm/test/CodeGen/X86/pr1489.ll |
| +++ b/llvm/test/CodeGen/X86/pr1489.ll |
| @@ -16,9 +16,9 @@ define i32 @quux() nounwind { |
| ; CHECK-NEXT: movl $1082126238, (%eax) ## imm = 0x407FEF9E |
| ; CHECK-NEXT: calll _lrintf |
| ; CHECK-NEXT: cmpl $1, %eax |
| -; CHECK-NEXT: setl %al |
| -; CHECK-NEXT: andb $1, %al |
| -; CHECK-NEXT: movzbl %al, %eax |
| +; CHECK-NEXT: setl %cl |
| +; CHECK-NEXT: andb $1, %cl |
| +; CHECK-NEXT: movzbl %cl, %eax |
| ; CHECK-NEXT: addl $8, %esp |
| ; CHECK-NEXT: popl %ebp |
| ; CHECK-NEXT: retl |
| @@ -42,9 +42,9 @@ define i32 @foo() nounwind { |
| ; CHECK-NEXT: movl $-1236950581, (%eax) ## imm = 0xB645A1CB |
| ; CHECK-NEXT: calll _lrint |
| ; CHECK-NEXT: cmpl $1, %eax |
| -; CHECK-NEXT: setl %al |
| -; CHECK-NEXT: andb $1, %al |
| -; CHECK-NEXT: movzbl %al, %eax |
| +; CHECK-NEXT: setl %cl |
| +; CHECK-NEXT: andb $1, %cl |
| +; CHECK-NEXT: movzbl %cl, %eax |
| ; CHECK-NEXT: addl $8, %esp |
| ; CHECK-NEXT: popl %ebp |
| ; CHECK-NEXT: retl |
| @@ -67,9 +67,9 @@ define i32 @bar() nounwind { |
| ; CHECK-NEXT: movl $1082126238, (%eax) ## imm = 0x407FEF9E |
| ; CHECK-NEXT: calll _lrintf |
| ; CHECK-NEXT: cmpl $1, %eax |
| -; CHECK-NEXT: setl %al |
| -; CHECK-NEXT: andb $1, %al |
| -; CHECK-NEXT: movzbl %al, %eax |
| +; CHECK-NEXT: setl %cl |
| +; CHECK-NEXT: andb $1, %cl |
| +; CHECK-NEXT: movzbl %cl, %eax |
| ; CHECK-NEXT: addl $8, %esp |
| ; CHECK-NEXT: popl %ebp |
| ; CHECK-NEXT: retl |
| @@ -90,9 +90,9 @@ define i32 @baz() nounwind { |
| ; CHECK-NEXT: movl $1082126238, (%eax) ## imm = 0x407FEF9E |
| ; CHECK-NEXT: calll _lrintf |
| ; CHECK-NEXT: cmpl $1, %eax |
| -; CHECK-NEXT: setl %al |
| -; CHECK-NEXT: andb $1, %al |
| -; CHECK-NEXT: movzbl %al, %eax |
| +; CHECK-NEXT: setl %cl |
| +; CHECK-NEXT: andb $1, %cl |
| +; CHECK-NEXT: movzbl %cl, %eax |
| ; CHECK-NEXT: addl $8, %esp |
| ; CHECK-NEXT: popl %ebp |
| ; CHECK-NEXT: retl |
| diff --git a/llvm/test/CodeGen/X86/pr27591.ll b/llvm/test/CodeGen/X86/pr27591.ll |
| index 7455584ac69..97ad6814f19 100644 |
| --- a/llvm/test/CodeGen/X86/pr27591.ll |
| +++ b/llvm/test/CodeGen/X86/pr27591.ll |
| @@ -9,9 +9,9 @@ define void @test1(i32 %x) #0 { |
| ; CHECK-NEXT: pushq %rax |
| ; CHECK-NEXT: cmpl $0, %edi |
| ; CHECK-NEXT: setne %al |
| -; CHECK-NEXT: movzbl %al, %eax |
| -; CHECK-NEXT: andl $1, %eax |
| -; CHECK-NEXT: movl %eax, %edi |
| +; CHECK-NEXT: movzbl %al, %ecx |
| +; CHECK-NEXT: andl $1, %ecx |
| +; CHECK-NEXT: movl %ecx, %edi |
| ; CHECK-NEXT: callq callee1 |
| ; CHECK-NEXT: popq %rax |
| ; CHECK-NEXT: retq |
| @@ -27,10 +27,10 @@ define void @test2(i32 %x) #0 { |
| ; CHECK-NEXT: pushq %rax |
| ; CHECK-NEXT: cmpl $0, %edi |
| ; CHECK-NEXT: setne %al |
| -; CHECK-NEXT: movzbl %al, %eax |
| -; CHECK-NEXT: andl $1, %eax |
| -; CHECK-NEXT: negl %eax |
| -; CHECK-NEXT: movl %eax, %edi |
| +; CHECK-NEXT: movzbl %al, %ecx |
| +; CHECK-NEXT: andl $1, %ecx |
| +; CHECK-NEXT: negl %ecx |
| +; CHECK-NEXT: movl %ecx, %edi |
| ; CHECK-NEXT: callq callee2 |
| ; CHECK-NEXT: popq %rax |
| ; CHECK-NEXT: retq |
| diff --git a/llvm/test/CodeGen/X86/pr30430.ll b/llvm/test/CodeGen/X86/pr30430.ll |
| index e524245daa1..4d40aa09eea 100644 |
| --- a/llvm/test/CodeGen/X86/pr30430.ll |
| +++ b/llvm/test/CodeGen/X86/pr30430.ll |
| @@ -75,28 +75,28 @@ define <16 x float> @makefloat(float %f1, float %f2, float %f3, float %f4, float |
| ; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0] |
| ; CHECK-NEXT: # implicit-def: $ymm2 |
| ; CHECK-NEXT: vmovaps %xmm1, %xmm2 |
| -; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 |
| +; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm2 |
| +; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero |
| +; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero |
| +; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] |
| +; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero |
| +; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] |
| +; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero |
| +; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] |
| ; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero |
| -; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero |
| -; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] |
| -; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero |
| -; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] |
| -; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero |
| -; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0] |
| -; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero |
| ; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero |
| -; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3] |
| +; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm3[0],xmm1[0],xmm3[2,3] |
| ; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero |
| -; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3] |
| +; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm3[0],xmm1[3] |
| ; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero |
| -; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[0] |
| +; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm3[0] |
| ; CHECK-NEXT: # implicit-def: $ymm3 |
| -; CHECK-NEXT: vmovaps %xmm2, %xmm3 |
| -; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1 |
| -; CHECK-NEXT: # implicit-def: $zmm2 |
| -; CHECK-NEXT: vmovaps %ymm1, %ymm2 |
| -; CHECK-NEXT: vinsertf64x4 $1, %ymm0, %zmm2, %zmm0 |
| -; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp) |
| +; CHECK-NEXT: vmovaps %xmm1, %xmm3 |
| +; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm3 |
| +; CHECK-NEXT: # implicit-def: $zmm24 |
| +; CHECK-NEXT: vmovaps %zmm3, %zmm24 |
| +; CHECK-NEXT: vinsertf64x4 $1, %ymm2, %zmm24, %zmm24 |
| +; CHECK-NEXT: vmovaps %zmm24, {{[0-9]+}}(%rsp) |
| ; CHECK-NEXT: vmovaps {{[0-9]+}}(%rsp), %zmm0 |
| ; CHECK-NEXT: movq %rbp, %rsp |
| ; CHECK-NEXT: popq %rbp |
| diff --git a/llvm/test/CodeGen/X86/pr30813.ll b/llvm/test/CodeGen/X86/pr30813.ll |
| index 7266c5bd8d0..e3e096bda6c 100644 |
| --- a/llvm/test/CodeGen/X86/pr30813.ll |
| +++ b/llvm/test/CodeGen/X86/pr30813.ll |
| @@ -1,8 +1,9 @@ |
| ; RUN: llc -mtriple=x86_64-linux-gnu -O0 %s -o - | FileCheck %s |
| ; CHECK: patatino: |
| ; CHECK: .cfi_startproc |
| -; CHECK: movzwl (%rax), %e[[REG0:[abcd]x]] |
| -; CHECK: movq %r[[REG0]], ({{%r[abcd]x}}) |
| +; CHECK: movzwl (%rax), [[REG0:%e[abcd]x]] |
| +; CHECK: movl [[REG0]], %e[[REG1C:[abcd]]]x |
| +; CHECK: movq %r[[REG1C]]x, ({{%r[abcd]x}}) |
| ; CHECK: retq |
| |
| define void @patatino() { |
| diff --git a/llvm/test/CodeGen/X86/pr32241.ll b/llvm/test/CodeGen/X86/pr32241.ll |
| index 1f3d273dfc4..6d628e6962e 100644 |
| --- a/llvm/test/CodeGen/X86/pr32241.ll |
| +++ b/llvm/test/CodeGen/X86/pr32241.ll |
| @@ -23,14 +23,14 @@ define i32 @_Z3foov() { |
| ; CHECK-NEXT: .LBB0_2: # %lor.end |
| ; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload |
| ; CHECK-NEXT: andb $1, %al |
| -; CHECK-NEXT: movzbl %al, %eax |
| -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload |
| -; CHECK-NEXT: cmpl %eax, %ecx |
| +; CHECK-NEXT: movzbl %al, %ecx |
| +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload |
| +; CHECK-NEXT: cmpl %ecx, %edx |
| ; CHECK-NEXT: setl %al |
| ; CHECK-NEXT: andb $1, %al |
| -; CHECK-NEXT: movzbl %al, %eax |
| -; CHECK-NEXT: xorl $-1, %eax |
| -; CHECK-NEXT: cmpl $0, %eax |
| +; CHECK-NEXT: movzbl %al, %ecx |
| +; CHECK-NEXT: xorl $-1, %ecx |
| +; CHECK-NEXT: cmpl $0, %ecx |
| ; CHECK-NEXT: movb $1, %al |
| ; CHECK-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill |
| ; CHECK-NEXT: jne .LBB0_4 |
| @@ -42,9 +42,9 @@ define i32 @_Z3foov() { |
| ; CHECK-NEXT: .LBB0_4: # %lor.end5 |
| ; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload |
| ; CHECK-NEXT: andb $1, %al |
| -; CHECK-NEXT: movzbl %al, %eax |
| -; CHECK-NEXT: # kill: def $ax killed $ax killed $eax |
| -; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp) |
| +; CHECK-NEXT: movzbl %al, %ecx |
| +; CHECK-NEXT: # kill: def $cx killed $cx killed $ecx |
| +; CHECK-NEXT: movw %cx, {{[0-9]+}}(%esp) |
| ; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax |
| ; CHECK-NEXT: addl $16, %esp |
| ; CHECK-NEXT: .cfi_def_cfa_offset 4 |
| diff --git a/llvm/test/CodeGen/X86/pr32284.ll b/llvm/test/CodeGen/X86/pr32284.ll |
| index 533473663d7..a1041ab889c 100644 |
| --- a/llvm/test/CodeGen/X86/pr32284.ll |
| +++ b/llvm/test/CodeGen/X86/pr32284.ll |
| @@ -10,28 +10,28 @@ define void @foo() { |
| ; X86-O0-LABEL: foo: |
| ; X86-O0: # %bb.0: # %entry |
| ; X86-O0-NEXT: xorl %eax, %eax |
| -; X86-O0-NEXT: # kill: def $rax killed $eax |
| -; X86-O0-NEXT: xorl %ecx, %ecx |
| +; X86-O0-NEXT: movl %eax, %ecx |
| +; X86-O0-NEXT: xorl %eax, %eax |
| ; X86-O0-NEXT: movzbl c, %edx |
| -; X86-O0-NEXT: subl %edx, %ecx |
| -; X86-O0-NEXT: movslq %ecx, %rcx |
| -; X86-O0-NEXT: subq %rcx, %rax |
| -; X86-O0-NEXT: # kill: def $al killed $al killed $rax |
| -; X86-O0-NEXT: cmpb $0, %al |
| -; X86-O0-NEXT: setne %al |
| -; X86-O0-NEXT: andb $1, %al |
| -; X86-O0-NEXT: movb %al, -{{[0-9]+}}(%rsp) |
| +; X86-O0-NEXT: subl %edx, %eax |
| +; X86-O0-NEXT: movslq %eax, %rsi |
| +; X86-O0-NEXT: subq %rsi, %rcx |
| +; X86-O0-NEXT: # kill: def $cl killed $cl killed $rcx |
| +; X86-O0-NEXT: cmpb $0, %cl |
| +; X86-O0-NEXT: setne %cl |
| +; X86-O0-NEXT: andb $1, %cl |
| +; X86-O0-NEXT: movb %cl, -{{[0-9]+}}(%rsp) |
| ; X86-O0-NEXT: cmpb $0, c |
| -; X86-O0-NEXT: setne %al |
| -; X86-O0-NEXT: xorb $-1, %al |
| -; X86-O0-NEXT: xorb $-1, %al |
| -; X86-O0-NEXT: andb $1, %al |
| -; X86-O0-NEXT: movzbl %al, %eax |
| -; X86-O0-NEXT: movzbl c, %ecx |
| -; X86-O0-NEXT: cmpl %ecx, %eax |
| -; X86-O0-NEXT: setle %al |
| -; X86-O0-NEXT: andb $1, %al |
| -; X86-O0-NEXT: movzbl %al, %eax |
| +; X86-O0-NEXT: setne %cl |
| +; X86-O0-NEXT: xorb $-1, %cl |
| +; X86-O0-NEXT: xorb $-1, %cl |
| +; X86-O0-NEXT: andb $1, %cl |
| +; X86-O0-NEXT: movzbl %cl, %eax |
| +; X86-O0-NEXT: movzbl c, %edx |
| +; X86-O0-NEXT: cmpl %edx, %eax |
| +; X86-O0-NEXT: setle %cl |
| +; X86-O0-NEXT: andb $1, %cl |
| +; X86-O0-NEXT: movzbl %cl, %eax |
| ; X86-O0-NEXT: movl %eax, -{{[0-9]+}}(%rsp) |
| ; X86-O0-NEXT: retq |
| ; |
| @@ -63,13 +63,13 @@ define void @foo() { |
| ; 686-O0-NEXT: xorb $-1, %al |
| ; 686-O0-NEXT: xorb $-1, %al |
| ; 686-O0-NEXT: andb $1, %al |
| -; 686-O0-NEXT: movzbl %al, %eax |
| -; 686-O0-NEXT: movzbl c, %ecx |
| -; 686-O0-NEXT: cmpl %ecx, %eax |
| +; 686-O0-NEXT: movzbl %al, %ecx |
| +; 686-O0-NEXT: movzbl c, %edx |
| +; 686-O0-NEXT: cmpl %edx, %ecx |
| ; 686-O0-NEXT: setle %al |
| ; 686-O0-NEXT: andb $1, %al |
| -; 686-O0-NEXT: movzbl %al, %eax |
| -; 686-O0-NEXT: movl %eax, (%esp) |
| +; 686-O0-NEXT: movzbl %al, %ecx |
| +; 686-O0-NEXT: movl %ecx, (%esp) |
| ; 686-O0-NEXT: addl $8, %esp |
| ; 686-O0-NEXT: .cfi_def_cfa_offset 4 |
| ; 686-O0-NEXT: retl |
| @@ -126,33 +126,33 @@ define void @f1() { |
| ; X86-O0-NEXT: movabsq $8381627093, %rcx # imm = 0x1F3957AD5 |
| ; X86-O0-NEXT: addq %rcx, %rax |
| ; X86-O0-NEXT: cmpq $0, %rax |
| -; X86-O0-NEXT: setne %al |
| -; X86-O0-NEXT: andb $1, %al |
| -; X86-O0-NEXT: movb %al, -{{[0-9]+}}(%rsp) |
| -; X86-O0-NEXT: movl var_5, %eax |
| -; X86-O0-NEXT: xorl $-1, %eax |
| -; X86-O0-NEXT: cmpl $0, %eax |
| -; X86-O0-NEXT: setne %al |
| -; X86-O0-NEXT: xorb $-1, %al |
| -; X86-O0-NEXT: andb $1, %al |
| -; X86-O0-NEXT: movzbl %al, %eax |
| -; X86-O0-NEXT: # kill: def $rax killed $eax |
| +; X86-O0-NEXT: setne %dl |
| +; X86-O0-NEXT: andb $1, %dl |
| +; X86-O0-NEXT: movb %dl, -{{[0-9]+}}(%rsp) |
| +; X86-O0-NEXT: movl var_5, %esi |
| +; X86-O0-NEXT: xorl $-1, %esi |
| +; X86-O0-NEXT: cmpl $0, %esi |
| +; X86-O0-NEXT: setne %dl |
| +; X86-O0-NEXT: xorb $-1, %dl |
| +; X86-O0-NEXT: andb $1, %dl |
| +; X86-O0-NEXT: movzbl %dl, %esi |
| +; X86-O0-NEXT: movl %esi, %eax |
| ; X86-O0-NEXT: movslq var_5, %rcx |
| ; X86-O0-NEXT: addq $7093, %rcx # imm = 0x1BB5 |
| ; X86-O0-NEXT: cmpq %rcx, %rax |
| -; X86-O0-NEXT: setg %al |
| -; X86-O0-NEXT: andb $1, %al |
| -; X86-O0-NEXT: movzbl %al, %eax |
| -; X86-O0-NEXT: # kill: def $rax killed $eax |
| +; X86-O0-NEXT: setg %dl |
| +; X86-O0-NEXT: andb $1, %dl |
| +; X86-O0-NEXT: movzbl %dl, %esi |
| +; X86-O0-NEXT: movl %esi, %eax |
| ; X86-O0-NEXT: movq %rax, var_57 |
| -; X86-O0-NEXT: movl var_5, %eax |
| -; X86-O0-NEXT: xorl $-1, %eax |
| -; X86-O0-NEXT: cmpl $0, %eax |
| -; X86-O0-NEXT: setne %al |
| -; X86-O0-NEXT: xorb $-1, %al |
| -; X86-O0-NEXT: andb $1, %al |
| -; X86-O0-NEXT: movzbl %al, %eax |
| -; X86-O0-NEXT: # kill: def $rax killed $eax |
| +; X86-O0-NEXT: movl var_5, %esi |
| +; X86-O0-NEXT: xorl $-1, %esi |
| +; X86-O0-NEXT: cmpl $0, %esi |
| +; X86-O0-NEXT: setne %dl |
| +; X86-O0-NEXT: xorb $-1, %dl |
| +; X86-O0-NEXT: andb $1, %dl |
| +; X86-O0-NEXT: movzbl %dl, %esi |
| +; X86-O0-NEXT: movl %esi, %eax |
| ; X86-O0-NEXT: movq %rax, _ZN8struct_210member_2_0E |
| ; X86-O0-NEXT: retq |
| ; |
| @@ -178,17 +178,20 @@ define void @f1() { |
| ; |
| ; 686-O0-LABEL: f1: |
| ; 686-O0: # %bb.0: # %entry |
| -; 686-O0-NEXT: pushl %ebx |
| +; 686-O0-NEXT: pushl %ebp |
| ; 686-O0-NEXT: .cfi_def_cfa_offset 8 |
| -; 686-O0-NEXT: pushl %edi |
| +; 686-O0-NEXT: pushl %ebx |
| ; 686-O0-NEXT: .cfi_def_cfa_offset 12 |
| -; 686-O0-NEXT: pushl %esi |
| +; 686-O0-NEXT: pushl %edi |
| ; 686-O0-NEXT: .cfi_def_cfa_offset 16 |
| +; 686-O0-NEXT: pushl %esi |
| +; 686-O0-NEXT: .cfi_def_cfa_offset 20 |
| ; 686-O0-NEXT: subl $1, %esp |
| -; 686-O0-NEXT: .cfi_def_cfa_offset 17 |
| -; 686-O0-NEXT: .cfi_offset %esi, -16 |
| -; 686-O0-NEXT: .cfi_offset %edi, -12 |
| -; 686-O0-NEXT: .cfi_offset %ebx, -8 |
| +; 686-O0-NEXT: .cfi_def_cfa_offset 21 |
| +; 686-O0-NEXT: .cfi_offset %esi, -20 |
| +; 686-O0-NEXT: .cfi_offset %edi, -16 |
| +; 686-O0-NEXT: .cfi_offset %ebx, -12 |
| +; 686-O0-NEXT: .cfi_offset %ebp, -8 |
| ; 686-O0-NEXT: movl var_5, %eax |
| ; 686-O0-NEXT: movl %eax, %ecx |
| ; 686-O0-NEXT: sarl $31, %ecx |
| @@ -214,16 +217,18 @@ define void @f1() { |
| ; 686-O0-NEXT: movl var_5, %edi |
| ; 686-O0-NEXT: subl $-1, %edi |
| ; 686-O0-NEXT: sete %bl |
| -; 686-O0-NEXT: movzbl %bl, %ebx |
| -; 686-O0-NEXT: movl %ebx, _ZN8struct_210member_2_0E |
| +; 686-O0-NEXT: movzbl %bl, %ebp |
| +; 686-O0-NEXT: movl %ebp, _ZN8struct_210member_2_0E |
| ; 686-O0-NEXT: movl $0, _ZN8struct_210member_2_0E+4 |
| ; 686-O0-NEXT: addl $1, %esp |
| -; 686-O0-NEXT: .cfi_def_cfa_offset 16 |
| +; 686-O0-NEXT: .cfi_def_cfa_offset 20 |
| ; 686-O0-NEXT: popl %esi |
| -; 686-O0-NEXT: .cfi_def_cfa_offset 12 |
| +; 686-O0-NEXT: .cfi_def_cfa_offset 16 |
| ; 686-O0-NEXT: popl %edi |
| -; 686-O0-NEXT: .cfi_def_cfa_offset 8 |
| +; 686-O0-NEXT: .cfi_def_cfa_offset 12 |
| ; 686-O0-NEXT: popl %ebx |
| +; 686-O0-NEXT: .cfi_def_cfa_offset 8 |
| +; 686-O0-NEXT: popl %ebp |
| ; 686-O0-NEXT: .cfi_def_cfa_offset 4 |
| ; 686-O0-NEXT: retl |
| ; |
| @@ -305,25 +310,25 @@ define void @f2() { |
| ; X86-O0-NEXT: setne %cl |
| ; X86-O0-NEXT: xorb $-1, %cl |
| ; X86-O0-NEXT: andb $1, %cl |
| -; X86-O0-NEXT: movzbl %cl, %ecx |
| -; X86-O0-NEXT: xorl %ecx, %eax |
| +; X86-O0-NEXT: movzbl %cl, %edx |
| +; X86-O0-NEXT: xorl %edx, %eax |
| ; X86-O0-NEXT: # kill: def $ax killed $ax killed $eax |
| ; X86-O0-NEXT: movw %ax, -{{[0-9]+}}(%rsp) |
| -; X86-O0-NEXT: movzbl var_7, %eax |
| -; X86-O0-NEXT: # kill: def $ax killed $ax killed $eax |
| -; X86-O0-NEXT: cmpw $0, %ax |
| -; X86-O0-NEXT: setne %al |
| -; X86-O0-NEXT: xorb $-1, %al |
| -; X86-O0-NEXT: andb $1, %al |
| -; X86-O0-NEXT: movzbl %al, %eax |
| -; X86-O0-NEXT: movzbl var_7, %ecx |
| -; X86-O0-NEXT: cmpl %ecx, %eax |
| -; X86-O0-NEXT: sete %al |
| -; X86-O0-NEXT: andb $1, %al |
| -; X86-O0-NEXT: movzbl %al, %eax |
| -; X86-O0-NEXT: # kill: def $ax killed $ax killed $eax |
| -; X86-O0-NEXT: # implicit-def: $rcx |
| -; X86-O0-NEXT: movw %ax, (%rcx) |
| +; X86-O0-NEXT: movzbl var_7, %edx |
| +; X86-O0-NEXT: # kill: def $dx killed $dx killed $edx |
| +; X86-O0-NEXT: cmpw $0, %dx |
| +; X86-O0-NEXT: setne %cl |
| +; X86-O0-NEXT: xorb $-1, %cl |
| +; X86-O0-NEXT: andb $1, %cl |
| +; X86-O0-NEXT: movzbl %cl, %esi |
| +; X86-O0-NEXT: movzbl var_7, %edi |
| +; X86-O0-NEXT: cmpl %edi, %esi |
| +; X86-O0-NEXT: sete %cl |
| +; X86-O0-NEXT: andb $1, %cl |
| +; X86-O0-NEXT: movzbl %cl, %esi |
| +; X86-O0-NEXT: # kill: def $si killed $si killed $esi |
| +; X86-O0-NEXT: # implicit-def: $r8 |
| +; X86-O0-NEXT: movw %si, (%r8) |
| ; X86-O0-NEXT: retq |
| ; |
| ; X64-LABEL: f2: |
| @@ -345,33 +350,43 @@ define void @f2() { |
| ; |
| ; 686-O0-LABEL: f2: |
| ; 686-O0: # %bb.0: # %entry |
| +; 686-O0-NEXT: pushl %edi |
| +; 686-O0-NEXT: .cfi_def_cfa_offset 8 |
| +; 686-O0-NEXT: pushl %esi |
| +; 686-O0-NEXT: .cfi_def_cfa_offset 12 |
| ; 686-O0-NEXT: subl $2, %esp |
| -; 686-O0-NEXT: .cfi_def_cfa_offset 6 |
| +; 686-O0-NEXT: .cfi_def_cfa_offset 14 |
| +; 686-O0-NEXT: .cfi_offset %esi, -12 |
| +; 686-O0-NEXT: .cfi_offset %edi, -8 |
| ; 686-O0-NEXT: movzbl var_7, %eax |
| ; 686-O0-NEXT: cmpb $0, var_7 |
| ; 686-O0-NEXT: setne %cl |
| ; 686-O0-NEXT: xorb $-1, %cl |
| ; 686-O0-NEXT: andb $1, %cl |
| -; 686-O0-NEXT: movzbl %cl, %ecx |
| -; 686-O0-NEXT: xorl %ecx, %eax |
| +; 686-O0-NEXT: movzbl %cl, %edx |
| +; 686-O0-NEXT: xorl %edx, %eax |
| ; 686-O0-NEXT: # kill: def $ax killed $ax killed $eax |
| ; 686-O0-NEXT: movw %ax, (%esp) |
| -; 686-O0-NEXT: movzbl var_7, %eax |
| -; 686-O0-NEXT: # kill: def $ax killed $ax killed $eax |
| -; 686-O0-NEXT: cmpw $0, %ax |
| -; 686-O0-NEXT: setne %al |
| -; 686-O0-NEXT: xorb $-1, %al |
| -; 686-O0-NEXT: andb $1, %al |
| -; 686-O0-NEXT: movzbl %al, %eax |
| -; 686-O0-NEXT: movzbl var_7, %ecx |
| -; 686-O0-NEXT: cmpl %ecx, %eax |
| -; 686-O0-NEXT: sete %al |
| -; 686-O0-NEXT: andb $1, %al |
| -; 686-O0-NEXT: movzbl %al, %eax |
| -; 686-O0-NEXT: # kill: def $ax killed $ax killed $eax |
| -; 686-O0-NEXT: # implicit-def: $ecx |
| -; 686-O0-NEXT: movw %ax, (%ecx) |
| +; 686-O0-NEXT: movzbl var_7, %edx |
| +; 686-O0-NEXT: # kill: def $dx killed $dx killed $edx |
| +; 686-O0-NEXT: cmpw $0, %dx |
| +; 686-O0-NEXT: setne %cl |
| +; 686-O0-NEXT: xorb $-1, %cl |
| +; 686-O0-NEXT: andb $1, %cl |
| +; 686-O0-NEXT: movzbl %cl, %esi |
| +; 686-O0-NEXT: movzbl var_7, %edi |
| +; 686-O0-NEXT: cmpl %edi, %esi |
| +; 686-O0-NEXT: sete %cl |
| +; 686-O0-NEXT: andb $1, %cl |
| +; 686-O0-NEXT: movzbl %cl, %esi |
| +; 686-O0-NEXT: # kill: def $si killed $si killed $esi |
| +; 686-O0-NEXT: # implicit-def: $edi |
| +; 686-O0-NEXT: movw %si, (%edi) |
| ; 686-O0-NEXT: addl $2, %esp |
| +; 686-O0-NEXT: .cfi_def_cfa_offset 12 |
| +; 686-O0-NEXT: popl %esi |
| +; 686-O0-NEXT: .cfi_def_cfa_offset 8 |
| +; 686-O0-NEXT: popl %edi |
| ; 686-O0-NEXT: .cfi_def_cfa_offset 4 |
| ; 686-O0-NEXT: retl |
| ; |
| @@ -431,35 +446,35 @@ define void @f3() #0 { |
| ; X86-O0-NEXT: movl var_13, %eax |
| ; X86-O0-NEXT: xorl $-1, %eax |
| ; X86-O0-NEXT: movl %eax, %eax |
| -; X86-O0-NEXT: # kill: def $rax killed $eax |
| +; X86-O0-NEXT: movl %eax, %ecx |
| ; X86-O0-NEXT: cmpl $0, var_13 |
| -; X86-O0-NEXT: setne %cl |
| -; X86-O0-NEXT: xorb $-1, %cl |
| -; X86-O0-NEXT: andb $1, %cl |
| -; X86-O0-NEXT: movzbl %cl, %ecx |
| -; X86-O0-NEXT: # kill: def $rcx killed $ecx |
| -; X86-O0-NEXT: movl var_13, %edx |
| -; X86-O0-NEXT: xorl $-1, %edx |
| -; X86-O0-NEXT: xorl var_16, %edx |
| -; X86-O0-NEXT: movl %edx, %edx |
| -; X86-O0-NEXT: # kill: def $rdx killed $edx |
| -; X86-O0-NEXT: andq %rdx, %rcx |
| -; X86-O0-NEXT: orq %rcx, %rax |
| -; X86-O0-NEXT: movq %rax, -{{[0-9]+}}(%rsp) |
| +; X86-O0-NEXT: setne %dl |
| +; X86-O0-NEXT: xorb $-1, %dl |
| +; X86-O0-NEXT: andb $1, %dl |
| +; X86-O0-NEXT: movzbl %dl, %eax |
| +; X86-O0-NEXT: movl %eax, %esi |
| ; X86-O0-NEXT: movl var_13, %eax |
| ; X86-O0-NEXT: xorl $-1, %eax |
| +; X86-O0-NEXT: xorl var_16, %eax |
| ; X86-O0-NEXT: movl %eax, %eax |
| -; X86-O0-NEXT: # kill: def $rax killed $eax |
| +; X86-O0-NEXT: movl %eax, %edi |
| +; X86-O0-NEXT: andq %rdi, %rsi |
| +; X86-O0-NEXT: orq %rsi, %rcx |
| +; X86-O0-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| +; X86-O0-NEXT: movl var_13, %eax |
| +; X86-O0-NEXT: xorl $-1, %eax |
| +; X86-O0-NEXT: movl %eax, %eax |
| +; X86-O0-NEXT: movl %eax, %ecx |
| ; X86-O0-NEXT: cmpl $0, var_13 |
| -; X86-O0-NEXT: setne %cl |
| -; X86-O0-NEXT: xorb $-1, %cl |
| -; X86-O0-NEXT: andb $1, %cl |
| -; X86-O0-NEXT: movzbl %cl, %ecx |
| -; X86-O0-NEXT: # kill: def $rcx killed $ecx |
| -; X86-O0-NEXT: andq $0, %rcx |
| -; X86-O0-NEXT: orq %rcx, %rax |
| -; X86-O0-NEXT: # kill: def $eax killed $eax killed $rax |
| -; X86-O0-NEXT: movl %eax, var_46 |
| +; X86-O0-NEXT: setne %dl |
| +; X86-O0-NEXT: xorb $-1, %dl |
| +; X86-O0-NEXT: andb $1, %dl |
| +; X86-O0-NEXT: movzbl %dl, %eax |
| +; X86-O0-NEXT: movl %eax, %esi |
| +; X86-O0-NEXT: andq $0, %rsi |
| +; X86-O0-NEXT: orq %rsi, %rcx |
| +; X86-O0-NEXT: # kill: def $ecx killed $ecx killed $rcx |
| +; X86-O0-NEXT: movl %ecx, var_46 |
| ; X86-O0-NEXT: retq |
| ; |
| ; X64-LABEL: f3: |
| @@ -484,28 +499,31 @@ define void @f3() #0 { |
| ; 686-O0-NEXT: .cfi_offset %ebp, -8 |
| ; 686-O0-NEXT: movl %esp, %ebp |
| ; 686-O0-NEXT: .cfi_def_cfa_register %ebp |
| +; 686-O0-NEXT: pushl %edi |
| ; 686-O0-NEXT: pushl %esi |
| ; 686-O0-NEXT: andl $-8, %esp |
| -; 686-O0-NEXT: subl $16, %esp |
| -; 686-O0-NEXT: .cfi_offset %esi, -12 |
| +; 686-O0-NEXT: subl $8, %esp |
| +; 686-O0-NEXT: .cfi_offset %esi, -16 |
| +; 686-O0-NEXT: .cfi_offset %edi, -12 |
| ; 686-O0-NEXT: movl var_13, %eax |
| ; 686-O0-NEXT: movl %eax, %ecx |
| ; 686-O0-NEXT: notl %ecx |
| ; 686-O0-NEXT: testl %eax, %eax |
| -; 686-O0-NEXT: sete %al |
| -; 686-O0-NEXT: movzbl %al, %eax |
| -; 686-O0-NEXT: movl var_16, %edx |
| -; 686-O0-NEXT: movl %ecx, %esi |
| -; 686-O0-NEXT: xorl %edx, %esi |
| -; 686-O0-NEXT: andl %esi, %eax |
| +; 686-O0-NEXT: sete %dl |
| +; 686-O0-NEXT: movzbl %dl, %eax |
| +; 686-O0-NEXT: movl var_16, %esi |
| +; 686-O0-NEXT: movl %ecx, %edi |
| +; 686-O0-NEXT: xorl %esi, %edi |
| +; 686-O0-NEXT: andl %edi, %eax |
| ; 686-O0-NEXT: orl %eax, %ecx |
| ; 686-O0-NEXT: movl %ecx, (%esp) |
| ; 686-O0-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; 686-O0-NEXT: movl var_13, %eax |
| ; 686-O0-NEXT: notl %eax |
| ; 686-O0-NEXT: movl %eax, var_46 |
| -; 686-O0-NEXT: leal -4(%ebp), %esp |
| +; 686-O0-NEXT: leal -8(%ebp), %esp |
| ; 686-O0-NEXT: popl %esi |
| +; 686-O0-NEXT: popl %edi |
| ; 686-O0-NEXT: popl %ebp |
| ; 686-O0-NEXT: .cfi_def_cfa %esp, 4 |
| ; 686-O0-NEXT: retl |
| diff --git a/llvm/test/CodeGen/X86/pr32340.ll b/llvm/test/CodeGen/X86/pr32340.ll |
| index 98685b959f6..1e428ac7d83 100644 |
| --- a/llvm/test/CodeGen/X86/pr32340.ll |
| +++ b/llvm/test/CodeGen/X86/pr32340.ll |
| @@ -14,37 +14,37 @@ define void @foo() { |
| ; X64-LABEL: foo: |
| ; X64: # %bb.0: # %entry |
| ; X64-NEXT: xorl %eax, %eax |
| -; X64-NEXT: # kill: def $rax killed $eax |
| +; X64-NEXT: movl %eax, %ecx |
| ; X64-NEXT: movw $0, var_825 |
| -; X64-NEXT: movzwl var_32, %ecx |
| +; X64-NEXT: movzwl var_32, %eax |
| ; X64-NEXT: movzwl var_901, %edx |
| -; X64-NEXT: movl %ecx, %esi |
| +; X64-NEXT: movl %eax, %esi |
| ; X64-NEXT: xorl %edx, %esi |
| -; X64-NEXT: movl %ecx, %edx |
| +; X64-NEXT: movl %eax, %edx |
| ; X64-NEXT: xorl %esi, %edx |
| -; X64-NEXT: addl %ecx, %edx |
| -; X64-NEXT: movslq %edx, %rcx |
| -; X64-NEXT: movq %rcx, var_826 |
| -; X64-NEXT: movzwl var_32, %ecx |
| -; X64-NEXT: # kill: def $rcx killed $ecx |
| -; X64-NEXT: movzwl var_901, %edx |
| -; X64-NEXT: xorl $51981, %edx # imm = 0xCB0D |
| -; X64-NEXT: movslq %edx, %rdx |
| -; X64-NEXT: movabsq $-1142377792914660288, %rsi # imm = 0xF02575732E06E440 |
| -; X64-NEXT: xorq %rsi, %rdx |
| -; X64-NEXT: movq %rcx, %rsi |
| -; X64-NEXT: xorq %rdx, %rsi |
| -; X64-NEXT: xorq $-1, %rsi |
| -; X64-NEXT: xorq %rsi, %rcx |
| -; X64-NEXT: movq %rcx, %rdx |
| -; X64-NEXT: orq var_57, %rdx |
| -; X64-NEXT: orq %rdx, %rcx |
| -; X64-NEXT: # kill: def $cx killed $cx killed $rcx |
| -; X64-NEXT: movw %cx, var_900 |
| -; X64-NEXT: cmpq var_28, %rax |
| -; X64-NEXT: setne %al |
| -; X64-NEXT: andb $1, %al |
| -; X64-NEXT: movzbl %al, %eax |
| +; X64-NEXT: addl %eax, %edx |
| +; X64-NEXT: movslq %edx, %rdi |
| +; X64-NEXT: movq %rdi, var_826 |
| +; X64-NEXT: movzwl var_32, %eax |
| +; X64-NEXT: movl %eax, %edi |
| +; X64-NEXT: movzwl var_901, %eax |
| +; X64-NEXT: xorl $51981, %eax # imm = 0xCB0D |
| +; X64-NEXT: movslq %eax, %r8 |
| +; X64-NEXT: movabsq $-1142377792914660288, %r9 # imm = 0xF02575732E06E440 |
| +; X64-NEXT: xorq %r9, %r8 |
| +; X64-NEXT: movq %rdi, %r9 |
| +; X64-NEXT: xorq %r8, %r9 |
| +; X64-NEXT: xorq $-1, %r9 |
| +; X64-NEXT: xorq %r9, %rdi |
| +; X64-NEXT: movq %rdi, %r8 |
| +; X64-NEXT: orq var_57, %r8 |
| +; X64-NEXT: orq %r8, %rdi |
| +; X64-NEXT: # kill: def $di killed $di killed $rdi |
| +; X64-NEXT: movw %di, var_900 |
| +; X64-NEXT: cmpq var_28, %rcx |
| +; X64-NEXT: setne %r10b |
| +; X64-NEXT: andb $1, %r10b |
| +; X64-NEXT: movzbl %r10b, %eax |
| ; X64-NEXT: # kill: def $ax killed $ax killed $eax |
| ; X64-NEXT: movw %ax, var_827 |
| ; X64-NEXT: retq |
| diff --git a/llvm/test/CodeGen/X86/pr32345.ll b/llvm/test/CodeGen/X86/pr32345.ll |
| index 165e0292d46..d5f7fde77f6 100644 |
| --- a/llvm/test/CodeGen/X86/pr32345.ll |
| +++ b/llvm/test/CodeGen/X86/pr32345.ll |
| @@ -15,23 +15,23 @@ define void @foo() { |
| ; X640-NEXT: xorl %ecx, %eax |
| ; X640-NEXT: movzwl var_27, %ecx |
| ; X640-NEXT: xorl %ecx, %eax |
| -; X640-NEXT: cltq |
| -; X640-NEXT: movq %rax, -{{[0-9]+}}(%rsp) |
| +; X640-NEXT: movslq %eax, %rdx |
| +; X640-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; X640-NEXT: movzwl var_22, %eax |
| ; X640-NEXT: movzwl var_27, %ecx |
| ; X640-NEXT: xorl %ecx, %eax |
| ; X640-NEXT: movzwl var_27, %ecx |
| ; X640-NEXT: xorl %ecx, %eax |
| -; X640-NEXT: cltq |
| -; X640-NEXT: movzwl var_27, %ecx |
| -; X640-NEXT: subl $16610, %ecx # imm = 0x40E2 |
| -; X640-NEXT: movl %ecx, %ecx |
| -; X640-NEXT: # kill: def $rcx killed $ecx |
| +; X640-NEXT: movslq %eax, %rdx |
| +; X640-NEXT: movzwl var_27, %eax |
| +; X640-NEXT: subl $16610, %eax # imm = 0x40E2 |
| +; X640-NEXT: movl %eax, %eax |
| +; X640-NEXT: movl %eax, %ecx |
| ; X640-NEXT: # kill: def $cl killed $rcx |
| -; X640-NEXT: sarq %cl, %rax |
| -; X640-NEXT: # kill: def $al killed $al killed $rax |
| -; X640-NEXT: # implicit-def: $rcx |
| -; X640-NEXT: movb %al, (%rcx) |
| +; X640-NEXT: sarq %cl, %rdx |
| +; X640-NEXT: # kill: def $dl killed $dl killed $rdx |
| +; X640-NEXT: # implicit-def: $rsi |
| +; X640-NEXT: movb %dl, (%rsi) |
| ; X640-NEXT: retq |
| ; |
| ; 6860-LABEL: foo: |
| @@ -41,37 +41,43 @@ define void @foo() { |
| ; 6860-NEXT: .cfi_offset %ebp, -8 |
| ; 6860-NEXT: movl %esp, %ebp |
| ; 6860-NEXT: .cfi_def_cfa_register %ebp |
| +; 6860-NEXT: pushl %ebx |
| +; 6860-NEXT: pushl %edi |
| +; 6860-NEXT: pushl %esi |
| ; 6860-NEXT: andl $-8, %esp |
| -; 6860-NEXT: subl $24, %esp |
| +; 6860-NEXT: subl $32, %esp |
| +; 6860-NEXT: .cfi_offset %esi, -20 |
| +; 6860-NEXT: .cfi_offset %edi, -16 |
| +; 6860-NEXT: .cfi_offset %ebx, -12 |
| ; 6860-NEXT: movw var_22, %ax |
| ; 6860-NEXT: movzwl var_27, %ecx |
| ; 6860-NEXT: movw %cx, %dx |
| ; 6860-NEXT: xorw %dx, %ax |
| -; 6860-NEXT: # implicit-def: $edx |
| -; 6860-NEXT: movw %ax, %dx |
| -; 6860-NEXT: xorl %ecx, %edx |
| -; 6860-NEXT: # kill: def $dx killed $dx killed $edx |
| -; 6860-NEXT: movzwl %dx, %eax |
| -; 6860-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| +; 6860-NEXT: # implicit-def: $esi |
| +; 6860-NEXT: movw %ax, %si |
| +; 6860-NEXT: xorl %ecx, %esi |
| +; 6860-NEXT: # kill: def $si killed $si killed $esi |
| +; 6860-NEXT: movzwl %si, %ecx |
| +; 6860-NEXT: movl %ecx, {{[0-9]+}}(%esp) |
| ; 6860-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; 6860-NEXT: movw var_22, %ax |
| ; 6860-NEXT: movzwl var_27, %ecx |
| ; 6860-NEXT: movw %cx, %dx |
| ; 6860-NEXT: xorw %dx, %ax |
| -; 6860-NEXT: # implicit-def: $edx |
| -; 6860-NEXT: movw %ax, %dx |
| -; 6860-NEXT: xorl %ecx, %edx |
| -; 6860-NEXT: # kill: def $dx killed $dx killed $edx |
| -; 6860-NEXT: movzwl %dx, %eax |
| +; 6860-NEXT: # implicit-def: $edi |
| +; 6860-NEXT: movw %ax, %di |
| +; 6860-NEXT: xorl %ecx, %edi |
| +; 6860-NEXT: # kill: def $di killed $di killed $edi |
| +; 6860-NEXT: movzwl %di, %ebx |
| ; 6860-NEXT: # kill: def $cl killed $cl killed $ecx |
| ; 6860-NEXT: addb $30, %cl |
| -; 6860-NEXT: xorl %edx, %edx |
| +; 6860-NEXT: xorl %eax, %eax |
| ; 6860-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill |
| -; 6860-NEXT: shrdl %cl, %edx, %eax |
| +; 6860-NEXT: shrdl %cl, %eax, %ebx |
| ; 6860-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload |
| ; 6860-NEXT: testb $32, %cl |
| +; 6860-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; 6860-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| -; 6860-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; 6860-NEXT: jne .LBB0_2 |
| ; 6860-NEXT: # %bb.1: # %bb |
| ; 6860-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload |
| @@ -81,7 +87,10 @@ define void @foo() { |
| ; 6860-NEXT: # kill: def $al killed $al killed $eax |
| ; 6860-NEXT: # implicit-def: $ecx |
| ; 6860-NEXT: movb %al, (%ecx) |
| -; 6860-NEXT: movl %ebp, %esp |
| +; 6860-NEXT: leal -12(%ebp), %esp |
| +; 6860-NEXT: popl %esi |
| +; 6860-NEXT: popl %edi |
| +; 6860-NEXT: popl %ebx |
| ; 6860-NEXT: popl %ebp |
| ; 6860-NEXT: .cfi_def_cfa %esp, 4 |
| ; 6860-NEXT: retl |
| diff --git a/llvm/test/CodeGen/X86/pr32451.ll b/llvm/test/CodeGen/X86/pr32451.ll |
| index 3b1997234ce..4754d8e4cf6 100644 |
| --- a/llvm/test/CodeGen/X86/pr32451.ll |
| +++ b/llvm/test/CodeGen/X86/pr32451.ll |
| @@ -9,24 +9,29 @@ target triple = "x86_64-unknown-linux-gnu" |
| define i8** @japi1_convert_690(i8**, i8***, i32) { |
| ; CHECK-LABEL: japi1_convert_690: |
| ; CHECK: # %bb.0: # %top |
| +; CHECK-NEXT: pushl %ebx |
| +; CHECK-NEXT: .cfi_def_cfa_offset 8 |
| ; CHECK-NEXT: subl $16, %esp |
| -; CHECK-NEXT: .cfi_def_cfa_offset 20 |
| +; CHECK-NEXT: .cfi_def_cfa_offset 24 |
| +; CHECK-NEXT: .cfi_offset %ebx, -8 |
| ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax |
| -; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| +; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill |
| ; CHECK-NEXT: calll julia.gc_root_decl |
| -; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| +; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill |
| ; CHECK-NEXT: calll jl_get_ptls_states |
| -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload |
| +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload |
| ; CHECK-NEXT: movl 4(%ecx), %edx |
| -; CHECK-NEXT: movb (%edx), %dl |
| -; CHECK-NEXT: andb $1, %dl |
| -; CHECK-NEXT: movzbl %dl, %edx |
| +; CHECK-NEXT: movb (%edx), %bl |
| +; CHECK-NEXT: andb $1, %bl |
| +; CHECK-NEXT: movzbl %bl, %edx |
| ; CHECK-NEXT: movl %edx, (%esp) |
| -; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| +; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill |
| ; CHECK-NEXT: calll jl_box_int32 |
| -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload |
| +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload |
| ; CHECK-NEXT: movl %eax, (%ecx) |
| ; CHECK-NEXT: addl $16, %esp |
| +; CHECK-NEXT: .cfi_def_cfa_offset 8 |
| +; CHECK-NEXT: popl %ebx |
| ; CHECK-NEXT: .cfi_def_cfa_offset 4 |
| ; CHECK-NEXT: retl |
| top: |
| diff --git a/llvm/test/CodeGen/X86/pr34592.ll b/llvm/test/CodeGen/X86/pr34592.ll |
| index 25b068c8fad..0f73036a4c6 100644 |
| --- a/llvm/test/CodeGen/X86/pr34592.ll |
| +++ b/llvm/test/CodeGen/X86/pr34592.ll |
| @@ -10,7 +10,7 @@ define <16 x i64> @pluto(<16 x i64> %arg, <16 x i64> %arg1, <16 x i64> %arg2, <1 |
| ; CHECK-NEXT: movq %rsp, %rbp |
| ; CHECK-NEXT: .cfi_def_cfa_register %rbp |
| ; CHECK-NEXT: andq $-32, %rsp |
| -; CHECK-NEXT: subq $160, %rsp |
| +; CHECK-NEXT: subq $192, %rsp |
| ; CHECK-NEXT: vmovaps 240(%rbp), %ymm8 |
| ; CHECK-NEXT: vmovaps 208(%rbp), %ymm9 |
| ; CHECK-NEXT: vmovaps 176(%rbp), %ymm10 |
| @@ -27,14 +27,14 @@ define <16 x i64> @pluto(<16 x i64> %arg, <16 x i64> %arg1, <16 x i64> %arg2, <1 |
| ; CHECK-NEXT: vpalignr {{.*#+}} ymm2 = ymm2[8,9,10,11,12,13,14,15],ymm11[0,1,2,3,4,5,6,7],ymm2[24,25,26,27,28,29,30,31],ymm11[16,17,18,19,20,21,22,23] |
| ; CHECK-NEXT: vpermq {{.*#+}} ymm2 = ymm2[2,3,2,0] |
| ; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5],ymm2[6,7] |
| -; CHECK-NEXT: vmovaps %xmm7, %xmm2 |
| -; CHECK-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6,7] |
| -; CHECK-NEXT: # implicit-def: $ymm9 |
| -; CHECK-NEXT: vmovaps %xmm2, %xmm9 |
| -; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload |
| -; CHECK-NEXT: vpalignr {{.*#+}} ymm11 = ymm2[8,9,10,11,12,13,14,15],ymm5[0,1,2,3,4,5,6,7],ymm2[24,25,26,27,28,29,30,31],ymm5[16,17,18,19,20,21,22,23] |
| -; CHECK-NEXT: vpermq {{.*#+}} ymm11 = ymm11[0,1,0,3] |
| -; CHECK-NEXT: vpblendd {{.*#+}} ymm9 = ymm9[0,1,2,3],ymm11[4,5,6,7] |
| +; CHECK-NEXT: vmovaps %xmm7, %xmm9 |
| +; CHECK-NEXT: vpslldq {{.*#+}} xmm9 = zero,zero,zero,zero,zero,zero,zero,zero,xmm9[0,1,2,3,4,5,6,7] |
| +; CHECK-NEXT: # implicit-def: $ymm2 |
| +; CHECK-NEXT: vmovaps %xmm9, %xmm2 |
| +; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %ymm11 # 32-byte Reload |
| +; CHECK-NEXT: vpalignr {{.*#+}} ymm9 = ymm11[8,9,10,11,12,13,14,15],ymm5[0,1,2,3,4,5,6,7],ymm11[24,25,26,27,28,29,30,31],ymm5[16,17,18,19,20,21,22,23] |
| +; CHECK-NEXT: vpermq {{.*#+}} ymm9 = ymm9[0,1,0,3] |
| +; CHECK-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm9[4,5,6,7] |
| ; CHECK-NEXT: vpblendd {{.*#+}} ymm8 = ymm7[0,1],ymm8[2,3],ymm7[4,5,6,7] |
| ; CHECK-NEXT: vpermq {{.*#+}} ymm8 = ymm8[2,1,1,3] |
| ; CHECK-NEXT: vpshufd {{.*#+}} ymm5 = ymm5[0,1,0,1,4,5,4,5] |
| @@ -43,11 +43,14 @@ define <16 x i64> @pluto(<16 x i64> %arg, <16 x i64> %arg1, <16 x i64> %arg2, <1 |
| ; CHECK-NEXT: vmovq {{.*#+}} xmm7 = xmm7[0],zero |
| ; CHECK-NEXT: # implicit-def: $ymm8 |
| ; CHECK-NEXT: vmovaps %xmm7, %xmm8 |
| -; CHECK-NEXT: vperm2i128 {{.*#+}} ymm2 = ymm8[0,1],ymm6[0,1] |
| +; CHECK-NEXT: vperm2i128 {{.*#+}} ymm6 = ymm8[0,1],ymm6[0,1] |
| ; CHECK-NEXT: vmovaps %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: vmovaps %ymm5, %ymm1 |
| +; CHECK-NEXT: vmovaps %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| +; CHECK-NEXT: vmovaps %ymm6, %ymm2 |
| +; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %ymm5 # 32-byte Reload |
| ; CHECK-NEXT: vmovaps %ymm3, (%rsp) # 32-byte Spill |
| -; CHECK-NEXT: vmovaps %ymm9, %ymm3 |
| +; CHECK-NEXT: vmovaps %ymm5, %ymm3 |
| ; CHECK-NEXT: movq %rbp, %rsp |
| ; CHECK-NEXT: popq %rbp |
| ; CHECK-NEXT: .cfi_def_cfa %rsp, 8 |
| diff --git a/llvm/test/CodeGen/X86/pr39733.ll b/llvm/test/CodeGen/X86/pr39733.ll |
| index 31bd5b71d0a..cfe5832d7ad 100644 |
| --- a/llvm/test/CodeGen/X86/pr39733.ll |
| +++ b/llvm/test/CodeGen/X86/pr39733.ll |
| @@ -23,8 +23,8 @@ define void @test55() { |
| ; CHECK-NEXT: vmovaps %xmm1, %xmm2 |
| ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] |
| ; CHECK-NEXT: vpmovsxwd %xmm0, %xmm0 |
| -; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 |
| -; CHECK-NEXT: vmovdqa %ymm0, (%rsp) |
| +; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm2 |
| +; CHECK-NEXT: vmovdqa %ymm2, (%rsp) |
| ; CHECK-NEXT: movq %rbp, %rsp |
| ; CHECK-NEXT: popq %rbp |
| ; CHECK-NEXT: .cfi_def_cfa %rsp, 8 |
| diff --git a/llvm/test/CodeGen/X86/pr44749.ll b/llvm/test/CodeGen/X86/pr44749.ll |
| index 1012d8c723b..d465009c7c3 100644 |
| --- a/llvm/test/CodeGen/X86/pr44749.ll |
| +++ b/llvm/test/CodeGen/X86/pr44749.ll |
| @@ -14,22 +14,20 @@ define i32 @a() { |
| ; CHECK-NEXT: movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill |
| ; CHECK-NEXT: callq _b |
| ; CHECK-NEXT: cvtsi2sd %eax, %xmm0 |
| -; CHECK-NEXT: movq _calloc@{{.*}}(%rip), %rax |
| -; CHECK-NEXT: subq $-1, %rax |
| -; CHECK-NEXT: setne %cl |
| -; CHECK-NEXT: movzbl %cl, %ecx |
| -; CHECK-NEXT: ## kill: def $rcx killed $ecx |
| -; CHECK-NEXT: leaq {{.*}}(%rip), %rdx |
| +; CHECK-NEXT: movq _calloc@{{.*}}(%rip), %rcx |
| +; CHECK-NEXT: subq $-1, %rcx |
| +; CHECK-NEXT: setne %dl |
| +; CHECK-NEXT: movzbl %dl, %eax |
| +; CHECK-NEXT: movl %eax, %esi |
| +; CHECK-NEXT: leaq {{.*}}(%rip), %rdi |
| ; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero |
| ; CHECK-NEXT: ucomisd %xmm1, %xmm0 |
| -; CHECK-NEXT: setae %cl |
| -; CHECK-NEXT: movzbl %cl, %ecx |
| -; CHECK-NEXT: ## kill: def $rcx killed $ecx |
| -; CHECK-NEXT: leaq {{.*}}(%rip), %rdx |
| +; CHECK-NEXT: setae %dl |
| +; CHECK-NEXT: movzbl %dl, %eax |
| +; CHECK-NEXT: movl %eax, %esi |
| +; CHECK-NEXT: leaq {{.*}}(%rip), %rdi |
| ; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero |
| -; CHECK-NEXT: cvttsd2si %xmm0, %ecx |
| -; CHECK-NEXT: movq %rax, (%rsp) ## 8-byte Spill |
| -; CHECK-NEXT: movl %ecx, %eax |
| +; CHECK-NEXT: cvttsd2si %xmm0, %eax |
| ; CHECK-NEXT: addq $24, %rsp |
| ; CHECK-NEXT: retq |
| entry: |
| diff --git a/llvm/test/CodeGen/X86/pr47000.ll b/llvm/test/CodeGen/X86/pr47000.ll |
| index 083aa780a07..922b6403cc4 100755 |
| --- a/llvm/test/CodeGen/X86/pr47000.ll |
| +++ b/llvm/test/CodeGen/X86/pr47000.ll |
| @@ -12,47 +12,51 @@ define <4 x half> @doTheTestMod(<4 x half> %0, <4 x half> %1) nounwind { |
| ; CHECK-NEXT: pushl %edi |
| ; CHECK-NEXT: pushl %esi |
| ; CHECK-NEXT: subl $124, %esp |
| -; CHECK-NEXT: movl 144(%esp), %eax |
| +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; CHECK-NEXT: movl %eax, %ecx |
| -; CHECK-NEXT: movw 176(%esp), %dx |
| -; CHECK-NEXT: movw 172(%esp), %si |
| -; CHECK-NEXT: movw 168(%esp), %di |
| -; CHECK-NEXT: movw 164(%esp), %bx |
| -; CHECK-NEXT: movw 160(%esp), %bp |
| +; CHECK-NEXT: movw {{[0-9]+}}(%esp), %dx |
| +; CHECK-NEXT: movw {{[0-9]+}}(%esp), %si |
| +; CHECK-NEXT: movw {{[0-9]+}}(%esp), %di |
| +; CHECK-NEXT: movw {{[0-9]+}}(%esp), %bx |
| +; CHECK-NEXT: movw {{[0-9]+}}(%esp), %bp |
| +; CHECK-NEXT: movw %dx, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill |
| +; CHECK-NEXT: movw {{[0-9]+}}(%esp), %dx |
| +; CHECK-NEXT: movw %dx, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill |
| +; CHECK-NEXT: movw {{[0-9]+}}(%esp), %dx |
| +; CHECK-NEXT: movw %dx, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill |
| +; CHECK-NEXT: movw {{[0-9]+}}(%esp), %dx |
| +; CHECK-NEXT: movw %dx, {{[0-9]+}}(%esp) |
| +; CHECK-NEXT: movw {{[-0-9]+}}(%e{{[sb]}}p), %dx # 2-byte Reload |
| +; CHECK-NEXT: movw %dx, {{[0-9]+}}(%esp) |
| +; CHECK-NEXT: movw {{[-0-9]+}}(%e{{[sb]}}p), %dx # 2-byte Reload |
| +; CHECK-NEXT: movw %dx, {{[0-9]+}}(%esp) |
| +; CHECK-NEXT: movw %bp, {{[0-9]+}}(%esp) |
| +; CHECK-NEXT: movw {{[-0-9]+}}(%e{{[sb]}}p), %bp # 2-byte Reload |
| +; CHECK-NEXT: movw %bp, {{[0-9]+}}(%esp) |
| +; CHECK-NEXT: movw %si, {{[0-9]+}}(%esp) |
| +; CHECK-NEXT: movw %di, {{[0-9]+}}(%esp) |
| +; CHECK-NEXT: movw %bx, {{[0-9]+}}(%esp) |
| +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %esi |
| +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %edi |
| +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %ebx |
| ; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| -; CHECK-NEXT: movw 156(%esp), %ax |
| -; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill |
| -; CHECK-NEXT: movw 152(%esp), %ax |
| -; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill |
| -; CHECK-NEXT: movw 148(%esp), %ax |
| -; CHECK-NEXT: movw %ax, 112(%esp) |
| -; CHECK-NEXT: movw {{[-0-9]+}}(%e{{[sb]}}p), %ax # 2-byte Reload |
| -; CHECK-NEXT: movw %ax, 114(%esp) |
| -; CHECK-NEXT: movw {{[-0-9]+}}(%e{{[sb]}}p), %ax # 2-byte Reload |
| -; CHECK-NEXT: movw %ax, 116(%esp) |
| -; CHECK-NEXT: movw %bp, 118(%esp) |
| -; CHECK-NEXT: movw %dx, 110(%esp) |
| -; CHECK-NEXT: movw %si, 108(%esp) |
| -; CHECK-NEXT: movw %di, 106(%esp) |
| -; CHECK-NEXT: movw %bx, 104(%esp) |
| -; CHECK-NEXT: movzwl 118(%esp), %edx |
| -; CHECK-NEXT: movzwl 116(%esp), %esi |
| -; CHECK-NEXT: movzwl 114(%esp), %edi |
| -; CHECK-NEXT: movzwl 112(%esp), %ebx |
| -; CHECK-NEXT: movzwl 110(%esp), %ebp |
| -; CHECK-NEXT: movzwl 108(%esp), %eax |
| +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax |
| ; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| -; CHECK-NEXT: movzwl 106(%esp), %eax |
| +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax |
| ; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| -; CHECK-NEXT: movzwl 104(%esp), %eax |
| +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax |
| +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax |
| +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax |
| ; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; CHECK-NEXT: movl %esp, %eax |
| -; CHECK-NEXT: movl %ebx, (%eax) |
| ; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| -; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload |
| +; CHECK-NEXT: movl %ecx, (%eax) |
| ; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| -; CHECK-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| +; CHECK-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; CHECK-NEXT: calll __gnu_h2f_ieee |
| ; CHECK-NEXT: movl %esp, %eax |
| ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload |
| @@ -68,58 +72,58 @@ define <4 x half> @doTheTestMod(<4 x half> %0, <4 x half> %1) nounwind { |
| ; CHECK-NEXT: fstps (%eax) |
| ; CHECK-NEXT: calll __gnu_f2h_ieee |
| ; CHECK-NEXT: movl %esp, %ecx |
| -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload |
| -; CHECK-NEXT: movl %edx, (%ecx) |
| +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload |
| +; CHECK-NEXT: movl %esi, (%ecx) |
| ; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill |
| ; CHECK-NEXT: calll __gnu_h2f_ieee |
| -; CHECK-NEXT: movl %esp, %eax |
| -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload |
| -; CHECK-NEXT: movl %ecx, (%eax) |
| +; CHECK-NEXT: movl %esp, %ecx |
| +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload |
| +; CHECK-NEXT: movl %esi, (%ecx) |
| ; CHECK-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill |
| ; CHECK-NEXT: calll __gnu_h2f_ieee |
| -; CHECK-NEXT: movl %esp, %eax |
| -; CHECK-NEXT: fstps 4(%eax) |
| +; CHECK-NEXT: movl %esp, %ecx |
| +; CHECK-NEXT: fstps 4(%ecx) |
| ; CHECK-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload |
| -; CHECK-NEXT: fstps (%eax) |
| +; CHECK-NEXT: fstps (%ecx) |
| ; CHECK-NEXT: calll fmodf |
| -; CHECK-NEXT: movl %esp, %eax |
| -; CHECK-NEXT: fstps (%eax) |
| +; CHECK-NEXT: movl %esp, %ecx |
| +; CHECK-NEXT: fstps (%ecx) |
| ; CHECK-NEXT: calll __gnu_f2h_ieee |
| ; CHECK-NEXT: movl %esp, %ecx |
| -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload |
| -; CHECK-NEXT: movl %edx, (%ecx) |
| +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload |
| +; CHECK-NEXT: movl %esi, (%ecx) |
| ; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill |
| ; CHECK-NEXT: calll __gnu_h2f_ieee |
| -; CHECK-NEXT: movl %esp, %eax |
| -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload |
| -; CHECK-NEXT: movl %ecx, (%eax) |
| +; CHECK-NEXT: movl %esp, %ecx |
| +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload |
| +; CHECK-NEXT: movl %esi, (%ecx) |
| ; CHECK-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill |
| ; CHECK-NEXT: calll __gnu_h2f_ieee |
| -; CHECK-NEXT: movl %esp, %eax |
| -; CHECK-NEXT: fstps 4(%eax) |
| +; CHECK-NEXT: movl %esp, %ecx |
| +; CHECK-NEXT: fstps 4(%ecx) |
| ; CHECK-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload |
| -; CHECK-NEXT: fstps (%eax) |
| +; CHECK-NEXT: fstps (%ecx) |
| ; CHECK-NEXT: calll fmodf |
| -; CHECK-NEXT: movl %esp, %eax |
| -; CHECK-NEXT: fstps (%eax) |
| +; CHECK-NEXT: movl %esp, %ecx |
| +; CHECK-NEXT: fstps (%ecx) |
| ; CHECK-NEXT: calll __gnu_f2h_ieee |
| ; CHECK-NEXT: movl %esp, %ecx |
| -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload |
| -; CHECK-NEXT: movl %edx, (%ecx) |
| +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload |
| +; CHECK-NEXT: movl %esi, (%ecx) |
| ; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill |
| ; CHECK-NEXT: calll __gnu_h2f_ieee |
| -; CHECK-NEXT: movl %esp, %eax |
| -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload |
| -; CHECK-NEXT: movl %ecx, (%eax) |
| +; CHECK-NEXT: movl %esp, %ecx |
| +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload |
| +; CHECK-NEXT: movl %esi, (%ecx) |
| ; CHECK-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill |
| ; CHECK-NEXT: calll __gnu_h2f_ieee |
| -; CHECK-NEXT: movl %esp, %eax |
| -; CHECK-NEXT: fstps 4(%eax) |
| +; CHECK-NEXT: movl %esp, %ecx |
| +; CHECK-NEXT: fstps 4(%ecx) |
| ; CHECK-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload |
| -; CHECK-NEXT: fstps (%eax) |
| +; CHECK-NEXT: fstps (%ecx) |
| ; CHECK-NEXT: calll fmodf |
| -; CHECK-NEXT: movl %esp, %eax |
| -; CHECK-NEXT: fstps (%eax) |
| +; CHECK-NEXT: movl %esp, %ecx |
| +; CHECK-NEXT: fstps (%ecx) |
| ; CHECK-NEXT: calll __gnu_f2h_ieee |
| ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload |
| ; CHECK-NEXT: movw %ax, 6(%ecx) |
| @@ -127,9 +131,10 @@ define <4 x half> @doTheTestMod(<4 x half> %0, <4 x half> %1) nounwind { |
| ; CHECK-NEXT: movw %ax, 4(%ecx) |
| ; CHECK-NEXT: movw {{[-0-9]+}}(%e{{[sb]}}p), %dx # 2-byte Reload |
| ; CHECK-NEXT: movw %dx, 2(%ecx) |
| -; CHECK-NEXT: movw {{[-0-9]+}}(%e{{[sb]}}p), %si # 2-byte Reload |
| -; CHECK-NEXT: movw %si, (%ecx) |
| -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload |
| +; CHECK-NEXT: movw {{[-0-9]+}}(%e{{[sb]}}p), %bp # 2-byte Reload |
| +; CHECK-NEXT: movw %bp, (%ecx) |
| +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload |
| +; CHECK-NEXT: movl %esi, %eax |
| ; CHECK-NEXT: addl $124, %esp |
| ; CHECK-NEXT: popl %esi |
| ; CHECK-NEXT: popl %edi |
| diff --git a/llvm/test/CodeGen/X86/regalloc-fast-missing-live-out-spill.mir b/llvm/test/CodeGen/X86/regalloc-fast-missing-live-out-spill.mir |
| index 2821f00940e..0fe9f60897f 100644 |
| --- a/llvm/test/CodeGen/X86/regalloc-fast-missing-live-out-spill.mir |
| +++ b/llvm/test/CodeGen/X86/regalloc-fast-missing-live-out-spill.mir |
| @@ -23,15 +23,15 @@ body: | |
| ; CHECK: successors: %bb.3(0x80000000) |
| ; CHECK: $rax = MOV64rm %stack.1, 1, $noreg, 0, $noreg :: (load 8 from %stack.1) |
| ; CHECK: renamable $ecx = MOV32r0 implicit-def $eflags |
| - ; CHECK: renamable $rcx = SUBREG_TO_REG 0, killed renamable $ecx, %subreg.sub_32bit |
| + ; CHECK: renamable $rdx = SUBREG_TO_REG 0, killed renamable $ecx, %subreg.sub_32bit |
| ; CHECK: MOV64mi32 killed renamable $rax, 1, $noreg, 0, $noreg, 0 :: (volatile store 8) |
| - ; CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed $rcx :: (store 8 into %stack.0) |
| + ; CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed $rdx :: (store 8 into %stack.0) |
| ; CHECK: bb.3: |
| ; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000) |
| ; CHECK: $rax = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load 8 from %stack.0) |
| ; CHECK: renamable $ecx = MOV32r0 implicit-def dead $eflags |
| - ; CHECK: renamable $rcx = SUBREG_TO_REG 0, killed renamable $ecx, %subreg.sub_32bit |
| - ; CHECK: MOV64mr %stack.1, 1, $noreg, 0, $noreg, killed $rcx :: (store 8 into %stack.1) |
| + ; CHECK: renamable $rdx = SUBREG_TO_REG 0, killed renamable $ecx, %subreg.sub_32bit |
| + ; CHECK: MOV64mr %stack.1, 1, $noreg, 0, $noreg, killed $rdx :: (store 8 into %stack.1) |
| ; CHECK: JMP64r killed renamable $rax |
| bb.0: |
| liveins: $edi, $rsi |
| diff --git a/llvm/test/CodeGen/X86/swift-return.ll b/llvm/test/CodeGen/X86/swift-return.ll |
| index 4934419055a..c62e92f2cac 100644 |
| --- a/llvm/test/CodeGen/X86/swift-return.ll |
| +++ b/llvm/test/CodeGen/X86/swift-return.ll |
| @@ -28,10 +28,11 @@ define i16 @test(i32 %key) { |
| ; CHECK-O0-NEXT: movl %edi, {{[0-9]+}}(%rsp) |
| ; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %edi |
| ; CHECK-O0-NEXT: callq gen |
| -; CHECK-O0-NEXT: cwtl |
| -; CHECK-O0-NEXT: movsbl %dl, %ecx |
| -; CHECK-O0-NEXT: addl %ecx, %eax |
| -; CHECK-O0-NEXT: # kill: def $ax killed $ax killed $eax |
| +; CHECK-O0-NEXT: movswl %ax, %ecx |
| +; CHECK-O0-NEXT: movsbl %dl, %esi |
| +; CHECK-O0-NEXT: addl %esi, %ecx |
| +; CHECK-O0-NEXT: # kill: def $cx killed $cx killed $ecx |
| +; CHECK-O0-NEXT: movw %cx, %ax |
| ; CHECK-O0-NEXT: popq %rcx |
| ; CHECK-O0-NEXT: .cfi_def_cfa_offset 8 |
| ; CHECK-O0-NEXT: retq |
| @@ -79,16 +80,16 @@ define i32 @test2(i32 %key) #0 { |
| ; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %edi |
| ; CHECK-O0-NEXT: movq %rsp, %rax |
| ; CHECK-O0-NEXT: callq gen2 |
| -; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %eax |
| ; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %ecx |
| ; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %edx |
| -; CHECK-O0-NEXT: movl (%rsp), %esi |
| -; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %edi |
| -; CHECK-O0-NEXT: addl %edi, %esi |
| -; CHECK-O0-NEXT: addl %edx, %esi |
| -; CHECK-O0-NEXT: addl %ecx, %esi |
| -; CHECK-O0-NEXT: addl %eax, %esi |
| -; CHECK-O0-NEXT: movl %esi, %eax |
| +; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %esi |
| +; CHECK-O0-NEXT: movl (%rsp), %edi |
| +; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %r8d |
| +; CHECK-O0-NEXT: addl %r8d, %edi |
| +; CHECK-O0-NEXT: addl %esi, %edi |
| +; CHECK-O0-NEXT: addl %edx, %edi |
| +; CHECK-O0-NEXT: addl %ecx, %edi |
| +; CHECK-O0-NEXT: movl %edi, %eax |
| ; CHECK-O0-NEXT: addq $24, %rsp |
| ; CHECK-O0-NEXT: .cfi_def_cfa_offset 8 |
| ; CHECK-O0-NEXT: retq |
| @@ -263,17 +264,17 @@ define void @consume_i1_ret() { |
| ; CHECK-O0-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-O0-NEXT: callq produce_i1_ret |
| ; CHECK-O0-NEXT: andb $1, %al |
| -; CHECK-O0-NEXT: movzbl %al, %eax |
| -; CHECK-O0-NEXT: movl %eax, var |
| +; CHECK-O0-NEXT: movzbl %al, %esi |
| +; CHECK-O0-NEXT: movl %esi, var |
| ; CHECK-O0-NEXT: andb $1, %dl |
| -; CHECK-O0-NEXT: movzbl %dl, %eax |
| -; CHECK-O0-NEXT: movl %eax, var |
| +; CHECK-O0-NEXT: movzbl %dl, %esi |
| +; CHECK-O0-NEXT: movl %esi, var |
| ; CHECK-O0-NEXT: andb $1, %cl |
| -; CHECK-O0-NEXT: movzbl %cl, %eax |
| -; CHECK-O0-NEXT: movl %eax, var |
| +; CHECK-O0-NEXT: movzbl %cl, %esi |
| +; CHECK-O0-NEXT: movl %esi, var |
| ; CHECK-O0-NEXT: andb $1, %r8b |
| -; CHECK-O0-NEXT: movzbl %r8b, %eax |
| -; CHECK-O0-NEXT: movl %eax, var |
| +; CHECK-O0-NEXT: movzbl %r8b, %esi |
| +; CHECK-O0-NEXT: movl %esi, var |
| ; CHECK-O0-NEXT: popq %rax |
| ; CHECK-O0-NEXT: .cfi_def_cfa_offset 8 |
| ; CHECK-O0-NEXT: retq |
| diff --git a/llvm/test/CodeGen/X86/swifterror.ll b/llvm/test/CodeGen/X86/swifterror.ll |
| index 1afae31b2b8..1388c61c189 100644 |
| --- a/llvm/test/CodeGen/X86/swifterror.ll |
| +++ b/llvm/test/CodeGen/X86/swifterror.ll |
| @@ -790,8 +790,8 @@ a: |
| ; CHECK-O0-LABEL: testAssign4 |
| ; CHECK-O0: callq _foo2 |
| ; CHECK-O0: xorl %eax, %eax |
| -; CHECK-O0: ## kill: def $rax killed $eax |
| -; CHECK-O0: movq %rax, [[SLOT:[-a-z0-9\(\)\%]*]] |
| +; CHECK-O0: movl %eax, %ecx |
| +; CHECK-O0: movq %rcx, [[SLOT:[-a-z0-9\(\)\%]*]] |
| ; CHECK-O0: movq [[SLOT]], %rax |
| ; CHECK-O0: movq %rax, [[SLOT2:[-a-z0-9\(\)\%]*]] |
| ; CHECK-O0: movq [[SLOT2]], %r12 |
| diff --git a/llvm/test/DebugInfo/X86/op_deref.ll b/llvm/test/DebugInfo/X86/op_deref.ll |
| index 1b49dc554f7..5de9976d6de 100644 |
| --- a/llvm/test/DebugInfo/X86/op_deref.ll |
| +++ b/llvm/test/DebugInfo/X86/op_deref.ll |
| @@ -6,10 +6,10 @@ |
| ; RUN: | FileCheck %s -check-prefix=CHECK -check-prefix=DWARF3 |
| |
| ; DWARF4: DW_AT_location [DW_FORM_sec_offset] (0x00000000 |
| -; DWARF4-NEXT: {{.*}}: DW_OP_breg2 RCX+0, DW_OP_deref |
| +; DWARF4-NEXT: {{.*}}: DW_OP_breg1 RDX+0, DW_OP_deref |
| |
| ; DWARF3: DW_AT_location [DW_FORM_data4] (0x00000000 |
| -; DWARF3-NEXT: {{.*}}: DW_OP_breg2 RCX+0, DW_OP_deref |
| +; DWARF3-NEXT: {{.*}}: DW_OP_breg1 RDX+0, DW_OP_deref |
| |
| ; CHECK-NOT: DW_TAG |
| ; CHECK: DW_AT_name [DW_FORM_strp] ( .debug_str[0x00000067] = "vla") |
| @@ -17,8 +17,8 @@ |
| ; Check the DEBUG_VALUE comments for good measure. |
| ; RUN: llc -O0 -mtriple=x86_64-apple-darwin %s -o - -filetype=asm | FileCheck %s -check-prefix=ASM-CHECK |
| ; vla should have a register-indirect address at one point. |
| -; ASM-CHECK: DEBUG_VALUE: vla <- [DW_OP_deref] [$rcx+0] |
| -; ASM-CHECK: DW_OP_breg2 |
| +; ASM-CHECK: DEBUG_VALUE: vla <- [DW_OP_deref] [$rdx+0] |
| +; ASM-CHECK: DW_OP_breg1 |
| |
| ; RUN: llvm-as %s -o - | llvm-dis - | FileCheck %s --check-prefix=PRETTY-PRINT |
| ; PRETTY-PRINT: DIExpression(DW_OP_deref) |