| commit fa31e8f4a0f853848d96549a429083941877bf8d |
| Author: Sergei Trofimovich <siarheit@google.com> |
| Date: Sun Dec 14 14:30:12 2014 +0000 |
| |
| powerpc: fix and enable shared libraries by default on linux |
| |
| Summary: |
| And fix things all the way down to it. Namely: |
| - remove 'r30' from free registers, it's an .LCTOC1 register |
| for gcc. generated .plt stubs expect it to be initialised. |
| - fix PicBase computation, which originally forgot to use 'tmp' |
| reg in 'initializePicBase_ppc.fetchPC' |
| - mark 'ForeighTarget's as implicitly using 'PicBase' register |
| (see comment for details) |
| - add 64-bit MO_Sub and test on alloclimit3/4 regtests |
| - fix dynamic label offsets to match with .LCTOC1 offset |
| |
| Signed-off-by: Sergei Trofimovich <siarheit@google.com> |
| |
| Test Plan: validate passes equal amount of vanilla/dyn tests |
| |
| Reviewers: simonmar, erikd, austin |
| |
| Reviewed By: erikd, austin |
| |
| Subscribers: carter, thomie |
| |
| Differential Revision: https://phabricator.haskell.org/D560 |
| |
| GHC Trac Issues: #8024, #9831 |
| |
| diff --git a/compiler/cmm/CLabel.hs b/compiler/cmm/CLabel.hs |
| index 0f2c0ae..37b8ada 100644 |
| --- a/compiler/cmm/CLabel.hs |
| +++ b/compiler/cmm/CLabel.hs |
| @@ -1170,7 +1170,8 @@ pprDynamicLinkerAsmLabel platform dllInfo lbl |
| else if osElfTarget (platformOS platform) |
| then if platformArch platform == ArchPPC |
| then case dllInfo of |
| - CodeStub -> ppr lbl <> text "@plt" |
| + CodeStub -> -- See Note [.LCTOC1 in PPC PIC code] |
| + ppr lbl <> text "+32768@plt" |
| SymbolPtr -> text ".LC_" <> ppr lbl |
| _ -> panic "pprDynamicLinkerAsmLabel" |
| else if platformArch platform == ArchX86_64 |
| diff --git a/compiler/nativeGen/PIC.hs b/compiler/nativeGen/PIC.hs |
| index 9b5c080..6326a8b 100644 |
| --- a/compiler/nativeGen/PIC.hs |
| +++ b/compiler/nativeGen/PIC.hs |
| @@ -54,7 +54,6 @@ import qualified X86.Instr as X86 |
| |
| import Platform |
| import Instruction |
| -import Size |
| import Reg |
| import NCGMonad |
| |
| @@ -468,11 +467,8 @@ pprGotDeclaration dflags ArchX86 OSDarwin |
| pprGotDeclaration _ _ OSDarwin |
| = empty |
| |
| --- pprGotDeclaration |
| +-- Emit GOT declaration |
| -- Output whatever needs to be output once per .s file. |
| --- The .LCTOC1 label is defined to point 32768 bytes into the table, |
| --- to make the most of the PPC's 16-bit displacements. |
| --- Only needed for PIC. |
| pprGotDeclaration dflags arch os |
| | osElfTarget os |
| , arch /= ArchPPC_64 |
| @@ -482,6 +478,7 @@ pprGotDeclaration dflags arch os |
| | osElfTarget os |
| , arch /= ArchPPC_64 |
| = vcat [ |
| + -- See Note [.LCTOC1 in PPC PIC code] |
| ptext (sLit ".section \".got2\",\"aw\""), |
| ptext (sLit ".LCTOC1 = .+32768") ] |
| |
| @@ -688,12 +685,7 @@ pprImportedSymbol _ _ _ |
| |
| |
| -- Get a pointer to our own fake GOT, which is defined on a per-module basis. |
| --- This is exactly how GCC does it, and it's quite horrible: |
| --- We first fetch the address of a local label (mkPicBaseLabel). |
| --- Then we add a 16-bit offset to that to get the address of a .long that we |
| --- define in .text space right next to the proc. This .long literal contains |
| --- the (32-bit) offset from our local label to our global offset table |
| --- (.LCTOC1 aka gotOffLabel). |
| +-- This is exactly how GCC does it in linux. |
| |
| initializePicBase_ppc |
| :: Arch -> OS -> Reg |
| @@ -704,18 +696,9 @@ initializePicBase_ppc ArchPPC os picReg |
| (CmmProc info lab live (ListGraph blocks) : statics) |
| | osElfTarget os |
| = do |
| - dflags <- getDynFlags |
| - gotOffLabel <- getNewLabelNat |
| - tmp <- getNewRegNat $ intSize (wordWidth dflags) |
| let |
| - gotOffset = CmmData Text $ Statics gotOffLabel [ |
| - CmmStaticLit (CmmLabelDiffOff gotLabel |
| - mkPicBaseLabel |
| - 0) |
| - ] |
| - offsetToOffset |
| - = PPC.ImmConstantDiff |
| - (PPC.ImmCLbl gotOffLabel) |
| + gotOffset = PPC.ImmConstantDiff |
| + (PPC.ImmCLbl gotLabel) |
| (PPC.ImmCLbl mkPicBaseLabel) |
| |
| blocks' = case blocks of |
| @@ -726,15 +709,23 @@ initializePicBase_ppc ArchPPC os picReg |
| | bID `mapMember` info = fetchPC b |
| | otherwise = b |
| |
| + -- GCC does PIC prologs thusly: |
| + -- bcl 20,31,.L1 |
| + -- .L1: |
| + -- mflr 30 |
| + -- addis 30,30,.LCTOC1-.L1@ha |
| + -- addi 30,30,.LCTOC1-.L1@l |
| + -- TODO: below we use it over temporary register, |
| + -- it can and should be optimised by picking |
| + -- correct PIC reg. |
| fetchPC (BasicBlock bID insns) = |
| BasicBlock bID (PPC.FETCHPC picReg |
| - : PPC.ADDIS tmp picReg (PPC.HI offsetToOffset) |
| - : PPC.LD PPC.archWordSize tmp |
| - (PPC.AddrRegImm tmp (PPC.LO offsetToOffset)) |
| - : PPC.ADD picReg picReg (PPC.RIReg picReg) |
| + : PPC.ADDIS picReg picReg (PPC.HA gotOffset) |
| + : PPC.ADDI picReg picReg (PPC.LO gotOffset) |
| + : PPC.MR PPC.r30 picReg |
| : insns) |
| |
| - return (CmmProc info lab live (ListGraph blocks') : gotOffset : statics) |
| + return (CmmProc info lab live (ListGraph blocks') : statics) |
| |
| |
| initializePicBase_ppc ArchPPC OSDarwin picReg |
| diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs |
| index ddf483a..c1c4a74 100644 |
| --- a/compiler/nativeGen/PPC/CodeGen.hs |
| +++ b/compiler/nativeGen/PPC/CodeGen.hs |
| @@ -54,7 +54,7 @@ import Outputable |
| import Unique |
| import DynFlags |
| |
| -import Control.Monad ( mapAndUnzipM ) |
| +import Control.Monad ( mapAndUnzipM, when ) |
| import Data.Bits |
| import Data.Word |
| |
| @@ -355,6 +355,19 @@ iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do |
| ADDE rhi r1hi r2hi ] |
| return (ChildCode64 code rlo) |
| |
| +iselExpr64 (CmmMachOp (MO_Sub _) [e1,e2]) = do |
| + ChildCode64 code1 r1lo <- iselExpr64 e1 |
| + ChildCode64 code2 r2lo <- iselExpr64 e2 |
| + (rlo,rhi) <- getNewRegPairNat II32 |
| + let |
| + r1hi = getHiVRegFromLo r1lo |
| + r2hi = getHiVRegFromLo r2lo |
| + code = code1 `appOL` |
| + code2 `appOL` |
| + toOL [ SUBFC rlo r2lo r1lo, |
| + SUBFE rhi r2hi r1hi ] |
| + return (ChildCode64 code rlo) |
| + |
| iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do |
| (expr_reg,expr_code) <- getSomeReg expr |
| (rlo, rhi) <- getNewRegPairNat II32 |
| @@ -918,8 +931,12 @@ genCCall' dflags gcp target dest_regs args0 |
| (toOL []) [] |
| |
| (labelOrExpr, reduceToFF32) <- case target of |
| - ForeignTarget (CmmLit (CmmLabel lbl)) _ -> return (Left lbl, False) |
| - ForeignTarget expr _ -> return (Right expr, False) |
| + ForeignTarget (CmmLit (CmmLabel lbl)) _ -> do |
| + uses_pic_base_implicitly |
| + return (Left lbl, False) |
| + ForeignTarget expr _ -> do |
| + uses_pic_base_implicitly |
| + return (Right expr, False) |
| PrimTarget mop -> outOfLineMachOp mop |
| |
| let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode |
| @@ -940,6 +957,13 @@ genCCall' dflags gcp target dest_regs args0 |
| where |
| platform = targetPlatform dflags |
| |
| + uses_pic_base_implicitly = do |
| + -- See Note [implicit register in PPC PIC code] |
| + -- on why we claim to use PIC register here |
| + when (gopt Opt_PIC dflags) $ do |
| + _ <- getPicBaseNat archWordSize |
| + return () |
| + |
| initialStackOffset = case gcp of |
| GCPDarwin -> 24 |
| GCPLinux -> 8 |
| @@ -1431,3 +1455,21 @@ coerceFP2Int _ toRep x = do |
| -- read low word of value (high word is undefined) |
| LD II32 dst (spRel dflags 3)] |
| return (Any (intSize toRep) code') |
| + |
| +-- Note [.LCTOC1 in PPC PIC code] |
| +-- The .LCTOC1 label is defined to point 32768 bytes into the GOT table |
| +-- to make the most of the PPC's 16-bit displacements. |
| +-- As 16-bit signed offset is used (usually via addi/lwz instructions) |
| +-- first element will have '-32768' offset against .LCTOC1. |
| + |
| +-- Note [implicit register in PPC PIC code] |
| +-- PPC generates calls by labels in assembly |
| +-- in form of: |
| +-- bl puts+32768@plt |
| +-- in this form it's not seen directly (by GHC NCG) |
| +-- that r30 (PicBaseReg) is used, |
| +-- but r30 is a required part of PLT code setup: |
| +-- puts+32768@plt: |
| +-- lwz r11,-30484(r30) ; offset in .LCTOC1 |
| +-- mtctr r11 |
| +-- bctr |
| diff --git a/compiler/nativeGen/PPC/Instr.hs b/compiler/nativeGen/PPC/Instr.hs |
| index f5b9506..b7081f9 100644 |
| --- a/compiler/nativeGen/PPC/Instr.hs |
| +++ b/compiler/nativeGen/PPC/Instr.hs |
| @@ -205,8 +205,11 @@ data Instr |
| | ADD Reg Reg RI -- dst, src1, src2 |
| | ADDC Reg Reg Reg -- (carrying) dst, src1, src2 |
| | ADDE Reg Reg Reg -- (extend) dst, src1, src2 |
| + | ADDI Reg Reg Imm -- Add Immediate dst, src1, src2 |
| | ADDIS Reg Reg Imm -- Add Immediate Shifted dst, src1, src2 |
| | SUBF Reg Reg Reg -- dst, src1, src2 ; dst = src2 - src1 |
| + | SUBFC Reg Reg Reg -- (carrying) dst, src1, src2 ; dst = src2 - src1 |
| + | SUBFE Reg Reg Reg -- (extend) dst, src1, src2 ; dst = src2 - src1 |
| | MULLW Reg Reg RI |
| | DIVW Reg Reg Reg |
| | DIVWU Reg Reg Reg |
| @@ -284,8 +287,11 @@ ppc_regUsageOfInstr platform instr |
| ADD reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) |
| ADDC reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1]) |
| ADDE reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1]) |
| + ADDI reg1 reg2 _ -> usage ([reg2], [reg1]) |
| ADDIS reg1 reg2 _ -> usage ([reg2], [reg1]) |
| SUBF reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1]) |
| + SUBFC reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1]) |
| + SUBFE reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1]) |
| MULLW reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) |
| DIVW reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1]) |
| DIVWU reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1]) |
| @@ -358,8 +364,11 @@ ppc_patchRegsOfInstr instr env |
| ADD reg1 reg2 ri -> ADD (env reg1) (env reg2) (fixRI ri) |
| ADDC reg1 reg2 reg3 -> ADDC (env reg1) (env reg2) (env reg3) |
| ADDE reg1 reg2 reg3 -> ADDE (env reg1) (env reg2) (env reg3) |
| + ADDI reg1 reg2 imm -> ADDI (env reg1) (env reg2) imm |
| ADDIS reg1 reg2 imm -> ADDIS (env reg1) (env reg2) imm |
| SUBF reg1 reg2 reg3 -> SUBF (env reg1) (env reg2) (env reg3) |
| + SUBFC reg1 reg2 reg3 -> SUBFC (env reg1) (env reg2) (env reg3) |
| + SUBFE reg1 reg2 reg3 -> SUBFE (env reg1) (env reg2) (env reg3) |
| MULLW reg1 reg2 ri -> MULLW (env reg1) (env reg2) (fixRI ri) |
| DIVW reg1 reg2 reg3 -> DIVW (env reg1) (env reg2) (env reg3) |
| DIVWU reg1 reg2 reg3 -> DIVWU (env reg1) (env reg2) (env reg3) |
| diff --git a/compiler/nativeGen/PPC/Ppr.hs b/compiler/nativeGen/PPC/Ppr.hs |
| index 6851769..f59d51f 100644 |
| --- a/compiler/nativeGen/PPC/Ppr.hs |
| +++ b/compiler/nativeGen/PPC/Ppr.hs |
| @@ -525,6 +525,16 @@ pprInstr (BCTRL _) = hcat [ |
| ptext (sLit "bctrl") |
| ] |
| pprInstr (ADD reg1 reg2 ri) = pprLogic (sLit "add") reg1 reg2 ri |
| +pprInstr (ADDI reg1 reg2 imm) = hcat [ |
| + char '\t', |
| + ptext (sLit "addi"), |
| + char '\t', |
| + pprReg reg1, |
| + ptext (sLit ", "), |
| + pprReg reg2, |
| + ptext (sLit ", "), |
| + pprImm imm |
| + ] |
| pprInstr (ADDIS reg1 reg2 imm) = hcat [ |
| char '\t', |
| ptext (sLit "addis"), |
| @@ -539,6 +549,8 @@ pprInstr (ADDIS reg1 reg2 imm) = hcat [ |
| pprInstr (ADDC reg1 reg2 reg3) = pprLogic (sLit "addc") reg1 reg2 (RIReg reg3) |
| pprInstr (ADDE reg1 reg2 reg3) = pprLogic (sLit "adde") reg1 reg2 (RIReg reg3) |
| pprInstr (SUBF reg1 reg2 reg3) = pprLogic (sLit "subf") reg1 reg2 (RIReg reg3) |
| +pprInstr (SUBFC reg1 reg2 reg3) = pprLogic (sLit "subfc") reg1 reg2 (RIReg reg3) |
| +pprInstr (SUBFE reg1 reg2 reg3) = pprLogic (sLit "subfe") reg1 reg2 (RIReg reg3) |
| pprInstr (MULLW reg1 reg2 ri@(RIReg _)) = pprLogic (sLit "mullw") reg1 reg2 ri |
| pprInstr (MULLW reg1 reg2 ri@(RIImm _)) = pprLogic (sLit "mull") reg1 reg2 ri |
| pprInstr (DIVW reg1 reg2 reg3) = pprLogic (sLit "divw") reg1 reg2 (RIReg reg3) |
| diff --git a/compiler/nativeGen/PPC/Regs.hs b/compiler/nativeGen/PPC/Regs.hs |
| index 0f636bf..69e69c0 100644 |
| --- a/compiler/nativeGen/PPC/Regs.hs |
| +++ b/compiler/nativeGen/PPC/Regs.hs |
| @@ -37,7 +37,8 @@ module PPC.Regs ( |
| fits16Bits, |
| makeImmediate, |
| fReg, |
| - sp, r3, r4, r27, r28, f1, f20, f21, |
| + sp, r3, r4, r27, r28, r30, |
| + f1, f20, f21, |
| |
| allocatableRegs |
| |
| @@ -295,12 +296,13 @@ point registers. |
| fReg :: Int -> RegNo |
| fReg x = (32 + x) |
| |
| -sp, r3, r4, r27, r28, f1, f20, f21 :: Reg |
| +sp, r3, r4, r27, r28, r30, f1, f20, f21 :: Reg |
| sp = regSingle 1 |
| r3 = regSingle 3 |
| r4 = regSingle 4 |
| r27 = regSingle 27 |
| r28 = regSingle 28 |
| +r30 = regSingle 30 |
| f1 = regSingle $ fReg 1 |
| f20 = regSingle $ fReg 20 |
| f21 = regSingle $ fReg 21 |
| diff --git a/includes/CodeGen.Platform.hs b/includes/CodeGen.Platform.hs |
| index 9916e0e..1d46a01 100644 |
| --- a/includes/CodeGen.Platform.hs |
| +++ b/includes/CodeGen.Platform.hs |
| @@ -881,6 +881,8 @@ freeReg 1 = fastBool False -- The Stack Pointer |
| # if !MACHREGS_darwin |
| -- most non-darwin powerpc OSes use r2 as a TOC pointer or something like that |
| freeReg 2 = fastBool False |
| +-- at least linux in -fPIC relies on r30 in PLT stubs |
| +freeReg 30 = fastBool False |
| # endif |
| # ifdef REG_Base |
| freeReg REG_Base = fastBool False |
| diff --git a/mk/config.mk.in b/mk/config.mk.in |
| index 0f5820f..8f134bc 100644 |
| --- a/mk/config.mk.in |
| +++ b/mk/config.mk.in |
| @@ -95,7 +95,7 @@ TargetElf = YES |
| endif |
| |
| # Some platforms don't support shared libraries |
| -NoSharedLibsPlatformList = powerpc-unknown-linux \ |
| +NoSharedLibsPlatformList = \ |
| x86_64-unknown-mingw32 \ |
| i386-unknown-mingw32 |
| |