CodeGen.hs 74.8 KB
Newer Older
1 2
{-# LANGUAGE CPP, GADTs #-}
{-# OPTIONS_GHC -fno-warn-type-defaults #-}
3 4 5 6 7 8 9
-- ----------------------------------------------------------------------------
-- | Handle conversion of CmmProc to LLVM code.
--
module LlvmCodeGen.CodeGen ( genLlvmProc ) where

#include "HsVersions.h"

10 11
import GhcPrelude

12 13 14 15 16
import Llvm
import LlvmCodeGen.Base
import LlvmCodeGen.Regs

import BlockId
17
import CodeGen.Platform ( activeStgRegs, callerSaves )
18
import CLabel
19 20 21
import Cmm
import PprCmm
import CmmUtils
22
import CmmSwitch
23 24 25
import Hoopl.Block
import Hoopl.Graph
import Hoopl.Collections
26

27
import DynFlags
28 29
import FastString
import ForeignCall
30 31
import Outputable hiding (panic, pprPanic)
import qualified Outputable
32
import Platform
33
import OrdList
34 35
import UniqSupply
import Unique
36
import Util
37

38 39 40
import Control.Monad.Trans.Class
import Control.Monad.Trans.Writer

41
import qualified Data.Semigroup as Semigroup
Peter Wortmann's avatar
Peter Wortmann committed
42 43
import Data.List ( nub )
import Data.Maybe ( catMaybes )
44

45
type Atomic = Bool
46
type LlvmStatements = OrdList LlvmStatement
47

48 49
data Signage = Signed | Unsigned deriving (Eq, Show)

50
-- -----------------------------------------------------------------------------
dterei's avatar
dterei committed
51
-- | Top-level of the LLVM proc Code generator
52
--
Peter Wortmann's avatar
Peter Wortmann committed
53 54
genLlvmProc :: RawCmmDecl -> LlvmM [LlvmCmmDecl]
genLlvmProc (CmmProc infos lbl live graph) = do
55
    let blocks = toBlockListEntryFirstFalseFallthrough graph
Peter Wortmann's avatar
Peter Wortmann committed
56
    (lmblocks, lmdata) <- basicBlocksCodeGen live blocks
57
    let info = mapLookup (g_entry graph) infos
58
        proc = CmmProc info lbl live (ListGraph lmblocks)
Peter Wortmann's avatar
Peter Wortmann committed
59
    return (proc:lmdata)
60

Peter Wortmann's avatar
Peter Wortmann committed
61
genLlvmProc _ = panic "genLlvmProc: case that shouldn't reach here!"
62 63 64 65 66

-- -----------------------------------------------------------------------------
-- * Block code generation
--

Peter Wortmann's avatar
Peter Wortmann committed
67
-- | Generate code for a list of blocks that make up a complete
Gabor Greif's avatar
Gabor Greif committed
68
-- procedure. The first block in the list is expected to be the entry
Peter Wortmann's avatar
Peter Wortmann committed
69 70 71 72 73 74
-- point and will get the prologue.
basicBlocksCodeGen :: LiveGlobalRegs -> [CmmBlock]
                      -> LlvmM ([LlvmBasicBlock], [LlvmCmmDecl])
basicBlocksCodeGen _    []                     = panic "no entry block!"
basicBlocksCodeGen live (entryBlock:cmmBlocks)
  = do (prologue, prologueTops) <- funPrologue live (entryBlock:cmmBlocks)
75

Peter Wortmann's avatar
Peter Wortmann committed
76 77 78
       -- Generate code
       (BasicBlock bid entry, entryTops) <- basicBlockCodeGen entryBlock
       (blocks, topss) <- fmap unzip $ mapM basicBlockCodeGen cmmBlocks
79

Peter Wortmann's avatar
Peter Wortmann committed
80 81 82
       -- Compose
       let entryBlock = BasicBlock bid (fromOL prologue ++ entry)
       return (entryBlock : blocks, prologueTops ++ entryTops ++ concat topss)
83 84 85


-- | Generate code for one block
Peter Wortmann's avatar
Peter Wortmann committed
86 87
basicBlockCodeGen :: CmmBlock -> LlvmM ( LlvmBasicBlock, [LlvmCmmDecl] )
basicBlockCodeGen block
Peter Wortmann's avatar
Peter Wortmann committed
88 89
  = do let (_, nodes, tail)  = blockSplit block
           id = entryLabel block
Peter Wortmann's avatar
Peter Wortmann committed
90 91
       (mid_instrs, top) <- stmtsToInstrs $ blockToList nodes
       (tail_instrs, top')  <- stmtToInstrs tail
92
       let instrs = fromOL (mid_instrs `appOL` tail_instrs)
Peter Wortmann's avatar
Peter Wortmann committed
93
       return (BasicBlock id instrs, top' ++ top)
94 95

-- -----------------------------------------------------------------------------
96
-- * CmmNode code generation
97 98 99
--

-- A statement conversion return data.
dterei's avatar
dterei committed
100
--   * LlvmStatements: The compiled LLVM statements.
Simon Peyton Jones's avatar
Simon Peyton Jones committed
101
--   * LlvmCmmDecl: Any global data needed.
Peter Wortmann's avatar
Peter Wortmann committed
102
type StmtData = (LlvmStatements, [LlvmCmmDecl])
103 104


105
-- | Convert a list of CmmNode's to LlvmStatement's
Peter Wortmann's avatar
Peter Wortmann committed
106 107 108 109
stmtsToInstrs :: [CmmNode e x] -> LlvmM StmtData
stmtsToInstrs stmts
   = do (instrss, topss) <- fmap unzip $ mapM stmtToInstrs stmts
        return (concatOL instrss, concat topss)
110 111


Peter Wortmann's avatar
Peter Wortmann committed
112 113 114
-- | Convert a CmmStmt to a list of LlvmStatement's
stmtToInstrs :: CmmNode e x -> LlvmM StmtData
stmtToInstrs stmt = case stmt of
115

Peter Wortmann's avatar
Peter Wortmann committed
116
    CmmComment _         -> return (nilOL, []) -- nuke comments
Peter Wortmann's avatar
Peter Wortmann committed
117
    CmmTick    _         -> return (nilOL, [])
Peter Wortmann's avatar
Peter Wortmann committed
118
    CmmUnwind  {}        -> return (nilOL, [])
119

Peter Wortmann's avatar
Peter Wortmann committed
120 121
    CmmAssign reg src    -> genAssign reg src
    CmmStore addr src    -> genStore addr src
122

Peter Wortmann's avatar
Peter Wortmann committed
123
    CmmBranch id         -> genBranch id
124 125
    CmmCondBranch arg true false likely
                         -> genCondBranch arg true false likely
Peter Wortmann's avatar
Peter Wortmann committed
126
    CmmSwitch arg ids    -> genSwitch arg ids
127 128

    -- Foreign Call
Peter Wortmann's avatar
Peter Wortmann committed
129 130
    CmmUnsafeForeignCall target res args
        -> genCall target res args
131 132

    -- Tail call
133
    CmmCall { cml_target = arg,
Peter Wortmann's avatar
Peter Wortmann committed
134
              cml_args_regs = live } -> genJump arg live
135

136
    _ -> panic "Llvm.CodeGen.stmtToInstrs"
137

Peter Wortmann's avatar
Peter Wortmann committed
138 139 140 141 142 143 144 145 146 147 148 149
-- | Wrapper function to declare an instrinct function by function type
getInstrinct2 :: LMString -> LlvmType -> LlvmM ExprData
getInstrinct2 fname fty@(LMFunction funSig) = do

    let fv   = LMGlobalVar fname fty (funcLinkage funSig) Nothing Nothing Constant

    fn <- funLookup fname
    tops <- case fn of
      Just _  ->
        return []
      Nothing -> do
        funInsert fname fty
150
        un <- getUniqueM
151 152
        let lbl = mkAsmTempLabel un
        return [CmmData (Section Data lbl) [([],[fty])]]
Peter Wortmann's avatar
Peter Wortmann committed
153 154 155 156 157 158 159 160 161 162 163 164 165

    return (fv, nilOL, tops)

getInstrinct2 _ _ = error "getInstrinct2: Non-function type!"

-- | Declares an instrinct function by return and parameter types
getInstrinct :: LMString -> LlvmType -> [LlvmType] -> LlvmM ExprData
getInstrinct fname retTy parTys =
    let funSig = LlvmFunctionDecl fname ExternallyVisible CC_Ccc retTy
                    FixedArgs (tysToParams parTys) Nothing
        fty = LMFunction funSig
    in getInstrinct2 fname fty

166
-- | Memory barrier instruction for LLVM >= 3.0
Peter Wortmann's avatar
Peter Wortmann committed
167 168
barrier :: LlvmM StmtData
barrier = do
169
    let s = Fence False SyncSeqCst
Peter Wortmann's avatar
Peter Wortmann committed
170
    return (unitOL s, [])
171

172
-- | Foreign Calls
Peter Wortmann's avatar
Peter Wortmann committed
173 174
genCall :: ForeignTarget -> [CmmFormal] -> [CmmActual]
              -> LlvmM StmtData
175 176 177

-- Write barrier needs to be handled specially as it is implemented as an LLVM
-- intrinsic function.
Peter Wortmann's avatar
Peter Wortmann committed
178 179
genCall (PrimTarget MO_WriteBarrier) _ _ = do
    platform <- getLlvmPlatform
180 181 182
    if platformArch platform `elem` [ArchX86, ArchX86_64, ArchSPARC]
       then return (nilOL, [])
       else barrier
Peter Wortmann's avatar
Peter Wortmann committed
183 184 185 186

genCall (PrimTarget MO_Touch) _ _
 = return (nilOL, [])

187 188
genCall (PrimTarget (MO_UF_Conv w)) [dst] [e] = runStmtsDecls $ do
    dstV <- getCmmRegW (CmmLocal dst)
Peter Wortmann's avatar
Peter Wortmann committed
189
    let ty = cmmToLlvmType $ localRegType dst
tibbe's avatar
tibbe committed
190
        width = widthToLlvmFloat w
191 192 193 194
    castV <- lift $ mkLocalVar ty
    ve <- exprToVarW e
    statement $ Assignment castV $ Cast LM_Uitofp ve width
    statement $ Store castV dstV
195

Peter Wortmann's avatar
Peter Wortmann committed
196
genCall (PrimTarget (MO_UF_Conv _)) [_] args =
tibbe's avatar
tibbe committed
197 198 199
    panic $ "genCall: Too many arguments to MO_UF_Conv. " ++
    "Can only handle 1, given" ++ show (length args) ++ "."

gmainland's avatar
gmainland committed
200
-- Handle prefetching data
201
genCall t@(PrimTarget (MO_Prefetch_Data localityInt)) [] args
202
  | 0 <= localityInt && localityInt <= 3 = runStmtsDecls $ do
203
    let argTy = [i8Ptr, i32, i32, i32]
gmainland's avatar
gmainland committed
204 205 206 207 208
        funTy = \name -> LMFunction $ LlvmFunctionDecl name ExternallyVisible
                             CC_Ccc LMVoid FixedArgs (tysToParams argTy) Nothing

    let (_, arg_hints) = foreignTargetHints t
    let args_hints' = zip args arg_hints
209 210
    argVars <- arg_varsW args_hints' ([], nilOL, [])
    fptr    <- liftExprData $ getFunPtr funTy t
211
    argVars' <- castVarsW Signed $ zip argVars argTy
gmainland's avatar
gmainland committed
212

213
    doTrashStmts
214
    let argSuffix = [mkIntLit i32 0, mkIntLit i32 localityInt, mkIntLit i32 1]
215
    statement $ Expr $ Call StdCall fptr (argVars' ++ argSuffix) []
216
  | otherwise = panic $ "prefetch locality level integer must be between 0 and 3, given: " ++ (show localityInt)
gmainland's avatar
gmainland committed
217

218 219
-- Handle PopCnt, Clz, Ctz, and BSwap that need to only convert arg
-- and return types
220 221
genCall t@(PrimTarget (MO_PopCnt w)) dsts args =
    genCallSimpleCast w t dsts args
222 223 224 225 226

genCall t@(PrimTarget (MO_Pdep w)) dsts args =
    genCallSimpleCast2 w t dsts args
genCall t@(PrimTarget (MO_Pext w)) dsts args =
    genCallSimpleCast2 w t dsts args
227 228 229 230
genCall t@(PrimTarget (MO_Clz w)) dsts args =
    genCallSimpleCast w t dsts args
genCall t@(PrimTarget (MO_Ctz w)) dsts args =
    genCallSimpleCast w t dsts args
231 232
genCall t@(PrimTarget (MO_BSwap w)) dsts args =
    genCallSimpleCast w t dsts args
dterei's avatar
dterei committed
233

234 235 236
genCall (PrimTarget (MO_AtomicRMW width amop)) [dst] [addr, n] = runStmtsDecls $ do
    addrVar <- exprToVarW addr
    nVar <- exprToVarW n
237 238
    let targetTy = widthToLlvmInt width
        ptrExpr = Cast LM_Inttoptr addrVar (pLift targetTy)
239 240
    ptrVar <- doExprW (pLift targetTy) ptrExpr
    dstVar <- getCmmRegW (CmmLocal dst)
241 242 243 244 245 246 247
    let op = case amop of
               AMO_Add  -> LAO_Add
               AMO_Sub  -> LAO_Sub
               AMO_And  -> LAO_And
               AMO_Nand -> LAO_Nand
               AMO_Or   -> LAO_Or
               AMO_Xor  -> LAO_Xor
248 249 250 251 252 253 254 255 256 257 258 259 260
    retVar <- doExprW targetTy $ AtomicRMW op ptrVar nVar SyncSeqCst
    statement $ Store retVar dstVar

genCall (PrimTarget (MO_AtomicRead _)) [dst] [addr] = runStmtsDecls $ do
    dstV <- getCmmRegW (CmmLocal dst)
    v1 <- genLoadW True addr (localRegType dst)
    statement $ Store v1 dstV

genCall (PrimTarget (MO_Cmpxchg _width))
        [dst] [addr, old, new] = runStmtsDecls $ do
    addrVar <- exprToVarW addr
    oldVar <- exprToVarW old
    newVar <- exprToVarW new
261 262
    let targetTy = getVarType oldVar
        ptrExpr = Cast LM_Inttoptr addrVar (pLift targetTy)
263 264 265 266 267 268 269 270 271 272
    ptrVar <- doExprW (pLift targetTy) ptrExpr
    dstVar <- getCmmRegW (CmmLocal dst)
    retVar <- doExprW (LMStructU [targetTy,i1])
              $ CmpXChg ptrVar oldVar newVar SyncSeqCst SyncSeqCst
    retVar' <- doExprW targetTy $ ExtractV retVar 0
    statement $ Store retVar' dstVar

genCall (PrimTarget (MO_AtomicWrite _width)) [] [addr, val] = runStmtsDecls $ do
    addrVar <- exprToVarW addr
    valVar <- exprToVarW val
273 274
    let ptrTy = pLift $ getVarType valVar
        ptrExpr = Cast LM_Inttoptr addrVar ptrTy
275 276
    ptrVar <- doExprW ptrTy ptrExpr
    statement $ Expr $ AtomicRMW LAO_Xchg ptrVar valVar SyncSeqCst
277

278 279
-- Handle memcpy function specifically since llvm's intrinsic version takes
-- some extra parameters.
280
genCall t@(PrimTarget op) [] args
281
 | Just align <- machOpMemcpyishAlign op = runStmtsDecls $ do
282
    dflags <- getDynFlags
283
    let isVolTy = [i1]
284
        isVolVal = [mkIntLit i1 0]
285 286
        argTy | MO_Memset _ <- op = [i8Ptr, i8,    llvmWord dflags, i32] ++ isVolTy
              | otherwise         = [i8Ptr, i8Ptr, llvmWord dflags, i32] ++ isVolTy
287 288 289
        funTy = \name -> LMFunction $ LlvmFunctionDecl name ExternallyVisible
                             CC_Ccc LMVoid FixedArgs (tysToParams argTy) Nothing

290 291
    let (_, arg_hints) = foreignTargetHints t
    let args_hints = zip args arg_hints
292 293
    argVars       <- arg_varsW args_hints ([], nilOL, [])
    fptr          <- getFunPtrW funTy t
294
    argVars' <- castVarsW Signed $ zip argVars argTy
295

296
    doTrashStmts
297 298
    let alignVal = mkIntLit i32 align
        arguments = argVars' ++ (alignVal:isVolVal)
299
    statement $ Expr $ Call StdCall fptr arguments []
300

301 302 303 304
-- We handle MO_U_Mul2 by simply using a 'mul' instruction, but with operands
-- twice the width (we first zero-extend them), e.g., on 64-bit arch we will
-- generate 'mul' on 128-bit operands. Then we only need some plumbing to
-- extract the two 64-bit values out of 128-bit result.
305
genCall (PrimTarget (MO_U_Mul2 w)) [dstH, dstL] [lhs, rhs] = runStmtsDecls $ do
306 307 308 309 310 311
    let width = widthToLlvmInt w
        bitWidth = widthInBits w
        width2x = LMInt (bitWidth * 2)
    -- First zero-extend the operands ('mul' instruction requires the operands
    -- and the result to be of the same type). Note that we don't use 'castVars'
    -- because it tries to do LM_Sext.
312 313 314 315
    lhsVar <- exprToVarW lhs
    rhsVar <- exprToVarW rhs
    lhsExt <- doExprW width2x $ Cast LM_Zext lhsVar width2x
    rhsExt <- doExprW width2x $ Cast LM_Zext rhsVar width2x
316
    -- Do the actual multiplication (note that the result is also 2x width).
317
    retV <- doExprW width2x $ LlvmOp LM_MO_Mul lhsExt rhsExt
318
    -- Extract the lower bits of the result into retL.
319
    retL <- doExprW width $ Cast LM_Trunc retV width
320 321
    -- Now we right-shift the higher bits by width.
    let widthLlvmLit = LMLitVar $ LMIntLit (fromIntegral bitWidth) width
322
    retShifted <- doExprW width2x $ LlvmOp LM_MO_LShr retV widthLlvmLit
323
    -- And extract them into retH.
324 325 326 327 328
    retH <- doExprW width $ Cast LM_Trunc retShifted width
    dstRegL <- getCmmRegW (CmmLocal dstL)
    dstRegH <- getCmmRegW (CmmLocal dstH)
    statement $ Store retL dstRegL
    statement $ Store retH dstRegH
329

330 331
-- MO_U_QuotRem2 is another case we handle by widening the registers to double
-- the width and use normal LLVM instructions (similarly to the MO_U_Mul2). The
Gabor Greif's avatar
Gabor Greif committed
332
-- main difference here is that we need to combine two words into one register
333
-- and then use both 'udiv' and 'urem' instructions to compute the result.
334 335
genCall (PrimTarget (MO_U_QuotRem2 w))
        [dstQ, dstR] [lhsH, lhsL, rhs] = runStmtsDecls $ do
336 337 338 339 340
    let width = widthToLlvmInt w
        bitWidth = widthInBits w
        width2x = LMInt (bitWidth * 2)
    -- First zero-extend all parameters to double width.
    let zeroExtend expr = do
341
            var <- exprToVarW expr
342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364
            doExprW width2x $ Cast LM_Zext var width2x
    lhsExtH <- zeroExtend lhsH
    lhsExtL <- zeroExtend lhsL
    rhsExt <- zeroExtend rhs
    -- Now we combine the first two parameters (that represent the high and low
    -- bits of the value). So first left-shift the high bits to their position
    -- and then bit-or them with the low bits.
    let widthLlvmLit = LMLitVar $ LMIntLit (fromIntegral bitWidth) width
    lhsExtHShifted <- doExprW width2x $ LlvmOp LM_MO_Shl lhsExtH widthLlvmLit
    lhsExt <- doExprW width2x $ LlvmOp LM_MO_Or lhsExtHShifted lhsExtL
    -- Finally, we can call 'udiv' and 'urem' to compute the results.
    retExtDiv <- doExprW width2x $ LlvmOp LM_MO_UDiv lhsExt rhsExt
    retExtRem <- doExprW width2x $ LlvmOp LM_MO_URem lhsExt rhsExt
    -- And since everything is in 2x width, we need to truncate the results and
    -- then return them.
    let narrow var = doExprW width $ Cast LM_Trunc var width
    retDiv <- narrow retExtDiv
    retRem <- narrow retExtRem
    dstRegQ <- lift $ getCmmReg (CmmLocal dstQ)
    dstRegR <- lift $ getCmmReg (CmmLocal dstR)
    statement $ Store retDiv dstRegQ
    statement $ Store retRem dstRegR

365 366 367 368 369 370 371 372 373 374 375 376 377 378
-- Handle the MO_{Add,Sub}IntC separately. LLVM versions return a record from
-- which we need to extract the actual values.
genCall t@(PrimTarget (MO_AddIntC w)) [dstV, dstO] [lhs, rhs] =
    genCallWithOverflow t w [dstV, dstO] [lhs, rhs]
genCall t@(PrimTarget (MO_SubIntC w)) [dstV, dstO] [lhs, rhs] =
    genCallWithOverflow t w [dstV, dstO] [lhs, rhs]

-- Similar to MO_{Add,Sub}IntC, but MO_Add2 expects the first element of the
-- return tuple to be the overflow bit and the second element to contain the
-- actual result of the addition. So we still use genCallWithOverflow but swap
-- the return registers.
genCall t@(PrimTarget (MO_Add2 w)) [dstO, dstV] [lhs, rhs] =
    genCallWithOverflow t w [dstV, dstO] [lhs, rhs]

Sebastian Graf's avatar
Sebastian Graf committed
379 380 381
genCall t@(PrimTarget (MO_AddWordC w)) [dstV, dstO] [lhs, rhs] =
    genCallWithOverflow t w [dstV, dstO] [lhs, rhs]

nkaretnikov's avatar
nkaretnikov committed
382 383 384
genCall t@(PrimTarget (MO_SubWordC w)) [dstV, dstO] [lhs, rhs] =
    genCallWithOverflow t w [dstV, dstO] [lhs, rhs]

385
-- Handle all other foreign calls and prim ops.
386
genCall target res args = runStmtsDecls $ do
387
    dflags <- getDynFlags
388

dterei's avatar
dterei committed
389
    -- parameter types
390
    let arg_type (_, AddrHint) = i8Ptr
391
        -- cast pointers to i8*. Llvm equivalent of void*
392
        arg_type (expr, _) = cmmToLlvmType $ cmmExprType dflags expr
393 394

    -- ret type
395 396 397
    let ret_type [] = LMVoid
        ret_type [(_, AddrHint)] = i8Ptr
        ret_type [(reg, _)]      = cmmToLlvmType $ localRegType reg
398 399 400
        ret_type t = panic $ "genCall: Too many return values! Can only handle"
                        ++ " 0 or 1, given " ++ show (length t) ++ "."

401
    -- extract Cmm call convention, and translate to LLVM call convention
402
    platform <- lift $ getLlvmPlatform
403 404 405
    let lmconv = case target of
            ForeignTarget _ (ForeignConvention conv _ _ _) ->
              case conv of
Peter Wortmann's avatar
Peter Wortmann committed
406
                 StdCallConv  -> case platformArch platform of
407 408 409 410 411
                                 ArchX86    -> CC_X86_Stdcc
                                 ArchX86_64 -> CC_X86_Stdcc
                                 _          -> CC_Ccc
                 CCallConv    -> CC_Ccc
                 CApiConv     -> CC_Ccc
Simon Marlow's avatar
Simon Marlow committed
412
                 PrimCallConv -> panic "LlvmCodeGen.CodeGen.genCall: PrimCallConv"
thoughtpolice's avatar
thoughtpolice committed
413
                 JavaScriptCallConv -> panic "LlvmCodeGen.CodeGen.genCall: JavaScriptCallConv"
414 415

            PrimTarget   _ -> CC_Ccc
416 417

    {-
418
        CC_Ccc of the possibilities here are a worry with the use of a custom
419 420 421 422 423 424 425
        calling convention for passing STG args. In practice the more
        dangerous combinations (e.g StdCall + llvmGhcCC) don't occur.

        The native code generator only handles StdCall and CCallConv.
    -}

    -- call attributes
426 427 428 429 430 431
    let fnAttrs | never_returns = NoReturn : llvmStdFunAttrs
                | otherwise     = llvmStdFunAttrs

        never_returns = case target of
             ForeignTarget _ (ForeignConvention _ _ _ CmmNeverReturns) -> True
             _ -> False
432 433

    -- fun type
434 435 436
    let (res_hints, arg_hints) = foreignTargetHints target
    let args_hints = zip args arg_hints
    let ress_hints = zip res  res_hints
437
    let ccTy  = StdCall -- tail calls should be done through CmmJump
438 439
    let retTy = ret_type ress_hints
    let argTy = tysToParams $ map arg_type args_hints
440
    let funTy = \name -> LMFunction $ LlvmFunctionDecl name ExternallyVisible
441
                             lmconv retTy FixedArgs argTy (llvmFunAlign dflags)
442 443


444 445
    argVars <- arg_varsW args_hints ([], nilOL, [])
    fptr    <- getFunPtrW funTy target
446

447 448 449
    let doReturn | ccTy == TailCall  = statement $ Return Nothing
                 | never_returns     = statement $ Unreachable
                 | otherwise         = return ()
450

451
    doTrashStmts
452

453 454 455
    -- make the actual call
    case retTy of
        LMVoid -> do
456
            statement $ Expr $ Call ccTy fptr argVars fnAttrs
457 458

        _ -> do
459
            v1 <- doExprW retTy $ Call ccTy fptr argVars fnAttrs
460
            -- get the return register
461
            let ret_reg [reg] = reg
462 463
                ret_reg t = panic $ "genCall: Bad number of registers! Can only handle"
                                ++ " 1, given " ++ show (length t) ++ "."
464
            let creg = ret_reg res
465
            vreg <- getCmmRegW (CmmLocal creg)
466 467
            if retTy == pLower (getVarType vreg)
                then do
468 469
                    statement $ Store v1 vreg
                    doReturn
470 471 472 473 474 475 476 477 478
                else do
                    let ty = pLower $ getVarType vreg
                    let op = case ty of
                            vt | isPointer vt -> LM_Bitcast
                               | isInt     vt -> LM_Ptrtoint
                               | otherwise    ->
                                   panic $ "genCall: CmmReg bad match for"
                                        ++ " returned type!"

479 480 481
                    v2 <- doExprW ty $ Cast op v1 ty
                    statement $ Store v2 vreg
                    doReturn
482

483 484 485 486 487 488 489 490
-- | Generate a call to an LLVM intrinsic that performs arithmetic operation
-- with overflow bit (i.e., returns a struct containing the actual result of the
-- operation and an overflow bit). This function will also extract the overflow
-- bit and zero-extend it (all the corresponding Cmm PrimOps represent the
-- overflow "bit" as a usual Int# or Word#).
genCallWithOverflow
  :: ForeignTarget -> Width -> [CmmFormal] -> [CmmActual] -> LlvmM StmtData
genCallWithOverflow t@(PrimTarget op) w [dstV, dstO] [lhs, rhs] = do
nkaretnikov's avatar
nkaretnikov committed
491
    -- So far this was only tested for the following four CallishMachOps.
492 493 494
    let valid = op `elem`   [ MO_Add2 w
                            , MO_AddIntC w
                            , MO_SubIntC w
Sebastian Graf's avatar
Sebastian Graf committed
495
                            , MO_AddWordC w
496 497 498
                            , MO_SubWordC w
                            ]
    MASSERT(valid)
499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520
    let width = widthToLlvmInt w
    -- This will do most of the work of generating the call to the intrinsic and
    -- extracting the values from the struct.
    (value, overflowBit, (stmts, top)) <-
      genCallExtract t w (lhs, rhs) (width, i1)
    -- value is i<width>, but overflowBit is i1, so we need to cast (Cmm expects
    -- both to be i<width>)
    (overflow, zext) <- doExpr width $ Cast LM_Zext overflowBit width
    dstRegV <- getCmmReg (CmmLocal dstV)
    dstRegO <- getCmmReg (CmmLocal dstO)
    let storeV = Store value dstRegV
        storeO = Store overflow dstRegO
    return (stmts `snocOL` zext `snocOL` storeV `snocOL` storeO, top)
genCallWithOverflow _ _ _ _ =
    panic "genCallExtract: wrong ForeignTarget or number of arguments"

-- | A helper function for genCallWithOverflow that handles generating the call
-- to the LLVM intrinsic and extracting the result from the struct to LlvmVars.
genCallExtract
    :: ForeignTarget           -- ^ PrimOp
    -> Width                   -- ^ Width of the operands.
    -> (CmmActual, CmmActual)  -- ^ Actual arguments.
Gabor Greif's avatar
Gabor Greif committed
521
    -> (LlvmType, LlvmType)    -- ^ LLVM types of the returned struct.
522 523 524 525 526 527 528 529 530
    -> LlvmM (LlvmVar, LlvmVar, StmtData)
genCallExtract target@(PrimTarget op) w (argA, argB) (llvmTypeA, llvmTypeB) = do
    let width = widthToLlvmInt w
        argTy = [width, width]
        retTy = LMStructU [llvmTypeA, llvmTypeB]

    -- Process the arguments.
    let args_hints = zip [argA, argB] (snd $ foreignTargetHints target)
    (argsV1, args1, top1) <- arg_vars args_hints ([], nilOL, [])
531
    (argsV2, args2) <- castVars Signed $ zip argsV1 argTy
532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550

    -- Get the function and make the call.
    fname <- cmmPrimOpFunctions op
    (fptr, _, top2) <- getInstrinct fname retTy argTy
    -- We use StdCall for primops. See also the last case of genCall.
    (retV, call) <- doExpr retTy $ Call StdCall fptr argsV2 []

    -- This will result in a two element struct, we need to use "extractvalue"
    -- to get them out of it.
    (res1, ext1) <- doExpr llvmTypeA (ExtractV retV 0)
    (res2, ext2) <- doExpr llvmTypeB (ExtractV retV 1)

    let stmts = args1 `appOL` args2 `snocOL` call `snocOL` ext1 `snocOL` ext2
        tops = top1 ++ top2
    return (res1, res2, (stmts, tops))

genCallExtract _ _ _ _ =
    panic "genCallExtract: unsupported ForeignTarget"

551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566
-- Handle simple function call that only need simple type casting, of the form:
--   truncate arg >>= \a -> call(a) >>= zext
--
-- since GHC only really has i32 and i64 types and things like Word8 are backed
-- by an i32 and just present a logical i8 range. So we must handle conversions
-- from i32 to i8 explicitly as LLVM is strict about types.
genCallSimpleCast :: Width -> ForeignTarget -> [CmmFormal] -> [CmmActual]
              -> LlvmM StmtData
genCallSimpleCast w t@(PrimTarget op) [dst] args = do
    let width = widthToLlvmInt w
        dstTy = cmmToLlvmType $ localRegType dst

    fname                       <- cmmPrimOpFunctions op
    (fptr, _, top3)             <- getInstrinct fname width [width]

    dstV                        <- getCmmReg (CmmLocal dst)
567

568 569 570
    let (_, arg_hints) = foreignTargetHints t
    let args_hints = zip args arg_hints
    (argsV, stmts2, top2)       <- arg_vars args_hints ([], nilOL, [])
571
    (argsV', stmts4)            <- castVars Signed $ zip argsV [width]
572
    (retV, s1)                  <- doExpr width $ Call StdCall fptr argsV' []
573 574
    (retVs', stmts5)            <- castVars (cmmPrimOpRetValSignage op) [(retV,dstTy)]
    let retV'                    = singletonPanic "genCallSimpleCast" retVs'
575 576 577 578 579 580 581
    let s2                       = Store retV' dstV

    let stmts = stmts2 `appOL` stmts4 `snocOL`
                s1 `appOL` stmts5 `snocOL` s2
    return (stmts, top2 ++ top3)
genCallSimpleCast _ _ dsts _ =
    panic ("genCallSimpleCast: " ++ show (length dsts) ++ " dsts")
ian@well-typed.com's avatar
ian@well-typed.com committed
582

583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604
-- Handle simple function call that only need simple type casting, of the form:
--   truncate arg >>= \a -> call(a) >>= zext
--
-- since GHC only really has i32 and i64 types and things like Word8 are backed
-- by an i32 and just present a logical i8 range. So we must handle conversions
-- from i32 to i8 explicitly as LLVM is strict about types.
genCallSimpleCast2 :: Width -> ForeignTarget -> [CmmFormal] -> [CmmActual]
              -> LlvmM StmtData
genCallSimpleCast2 w t@(PrimTarget op) [dst] args = do
    let width = widthToLlvmInt w
        dstTy = cmmToLlvmType $ localRegType dst

    fname                       <- cmmPrimOpFunctions op
    (fptr, _, top3)             <- getInstrinct fname width (const width <$> args)

    dstV                        <- getCmmReg (CmmLocal dst)

    let (_, arg_hints) = foreignTargetHints t
    let args_hints = zip args arg_hints
    (argsV, stmts2, top2)       <- arg_vars args_hints ([], nilOL, [])
    (argsV', stmts4)            <- castVars Signed $ zip argsV (const width <$> argsV)
    (retV, s1)                  <- doExpr width $ Call StdCall fptr argsV' []
605 606
    (retVs', stmts5)             <- castVars (cmmPrimOpRetValSignage op) [(retV,dstTy)]
    let retV'                    = singletonPanic "genCallSimpleCast2" retVs'
607 608 609 610 611 612 613 614
    let s2                       = Store retV' dstV

    let stmts = stmts2 `appOL` stmts4 `snocOL`
                s1 `appOL` stmts5 `snocOL` s2
    return (stmts, top2 ++ top3)
genCallSimpleCast2 _ _ dsts _ =
    panic ("genCallSimpleCast2: " ++ show (length dsts) ++ " dsts")

615 616 617 618 619
-- | Create a function pointer from a target.
getFunPtrW :: (LMString -> LlvmType) -> ForeignTarget
           -> WriterT LlvmAccum LlvmM LlvmVar
getFunPtrW funTy targ = liftExprData $ getFunPtr funTy targ

620
-- | Create a function pointer from a target.
Peter Wortmann's avatar
Peter Wortmann committed
621 622 623 624 625 626
getFunPtr :: (LMString -> LlvmType) -> ForeignTarget
          -> LlvmM ExprData
getFunPtr funTy targ = case targ of
    ForeignTarget (CmmLit (CmmLabel lbl)) _ -> do
        name <- strCLabel_llvm lbl
        getHsFunc' name (funTy name)
627

628
    ForeignTarget expr _ -> do
Peter Wortmann's avatar
Peter Wortmann committed
629 630
        (v1, stmts, top) <- exprToVar expr
        dflags <- getDynFlags
631 632 633 634 635 636
        let fty = funTy $ fsLit "dynamic"
            cast = case getVarType v1 of
                ty | isPointer ty -> LM_Bitcast
                ty | isInt ty     -> LM_Inttoptr

                ty -> panic $ "genCall: Expr is of bad type for function"
Peter Wortmann's avatar
Peter Wortmann committed
637
                              ++ " call! (" ++ showSDoc dflags (ppr ty) ++ ")"
638 639

        (v2,s1) <- doExpr (pLift fty) $ Cast cast v1 (pLift fty)
Peter Wortmann's avatar
Peter Wortmann committed
640
        return (v2, stmts `snocOL` s1, top)
641

Peter Wortmann's avatar
Peter Wortmann committed
642 643 644 645
    PrimTarget mop -> do
        name <- cmmPrimOpFunctions mop
        let fty = funTy name
        getInstrinct2 name fty
646

647 648 649 650 651 652 653 654 655
-- | Conversion of call arguments.
arg_varsW :: [(CmmActual, ForeignHint)]
          -> ([LlvmVar], LlvmStatements, [LlvmCmmDecl])
          -> WriterT LlvmAccum LlvmM [LlvmVar]
arg_varsW xs ys = do
    (vars, stmts, decls) <- lift $ arg_vars xs ys
    tell $ LlvmAccum stmts decls
    return vars

656
-- | Conversion of call arguments.
Peter Wortmann's avatar
Peter Wortmann committed
657
arg_vars :: [(CmmActual, ForeignHint)]
Simon Peyton Jones's avatar
Simon Peyton Jones committed
658
         -> ([LlvmVar], LlvmStatements, [LlvmCmmDecl])
Peter Wortmann's avatar
Peter Wortmann committed
659
         -> LlvmM ([LlvmVar], LlvmStatements, [LlvmCmmDecl])
660

Peter Wortmann's avatar
Peter Wortmann committed
661 662
arg_vars [] (vars, stmts, tops)
  = return (vars, stmts, tops)
663

Peter Wortmann's avatar
Peter Wortmann committed
664 665 666
arg_vars ((e, AddrHint):rest) (vars, stmts, tops)
  = do (v1, stmts', top') <- exprToVar e
       dflags <- getDynFlags
667 668 669 670 671
       let op = case getVarType v1 of
               ty | isPointer ty -> LM_Bitcast
               ty | isInt ty     -> LM_Inttoptr

               a  -> panic $ "genCall: Can't cast llvmType to i8*! ("
Peter Wortmann's avatar
Peter Wortmann committed
672
                           ++ showSDoc dflags (ppr a) ++ ")"
673

dterei's avatar
dterei committed
674
       (v2, s1) <- doExpr i8Ptr $ Cast op v1 i8Ptr
Peter Wortmann's avatar
Peter Wortmann committed
675
       arg_vars rest (vars ++ [v2], stmts `appOL` stmts' `snocOL` s1,
676
                               tops ++ top')
677

Peter Wortmann's avatar
Peter Wortmann committed
678 679 680
arg_vars ((e, _):rest) (vars, stmts, tops)
  = do (v1, stmts', top') <- exprToVar e
       arg_vars rest (vars ++ [v1], stmts `appOL` stmts', tops ++ top')
681

682

683
-- | Cast a collection of LLVM variables to specific types.
684 685
castVarsW :: Signage
          -> [(LlvmVar, LlvmType)]
686
          -> WriterT LlvmAccum LlvmM [LlvmVar]
687 688
castVarsW signage vars = do
    (vars, stmts) <- lift $ castVars signage vars
689 690 691
    tell $ LlvmAccum stmts mempty
    return vars

692
-- | Cast a collection of LLVM variables to specific types.
693
castVars :: Signage -> [(LlvmVar, LlvmType)]
Peter Wortmann's avatar
Peter Wortmann committed
694
         -> LlvmM ([LlvmVar], LlvmStatements)
695 696
castVars signage vars = do
                done <- mapM (uncurry (castVar signage)) vars
697 698 699 700
                let (vars', stmts) = unzip done
                return (vars', toOL stmts)

-- | Cast an LLVM variable to a specific type, panicing if it can't be done.
701 702
castVar :: Signage -> LlvmVar -> LlvmType -> LlvmM (LlvmVar, LlvmStatement)
castVar signage v t | getVarType v == t
703 704 705
            = return (v, Nop)

            | otherwise
Peter Wortmann's avatar
Peter Wortmann committed
706 707
            = do dflags <- getDynFlags
                 let op = case (getVarType v, t) of
708
                      (LMInt n, LMInt m)
709
                          -> if n < m then extend else LM_Trunc
710
                      (vt, _) | isFloat vt && isFloat t
711
                          -> if llvmWidthInBits dflags vt < llvmWidthInBits dflags t
712 713 714 715 716 717
                                then LM_Fpext else LM_Fptrunc
                      (vt, _) | isInt vt && isFloat t       -> LM_Sitofp
                      (vt, _) | isFloat vt && isInt t       -> LM_Fptosi
                      (vt, _) | isInt vt && isPointer t     -> LM_Inttoptr
                      (vt, _) | isPointer vt && isInt t     -> LM_Ptrtoint
                      (vt, _) | isPointer vt && isPointer t -> LM_Bitcast
718
                      (vt, _) | isVector vt && isVector t   -> LM_Bitcast
719 720

                      (vt, _) -> panic $ "castVars: Can't cast this type ("
721
                                  ++ showSDoc dflags (ppr vt) ++ ") to (" ++ showSDoc dflags (ppr t) ++ ")"
Peter Wortmann's avatar
Peter Wortmann committed
722
                 doExpr t $ Cast op v t
723 724 725 726
    where extend = case signage of
            Signed      -> LM_Sext
            Unsigned    -> LM_Zext

727

728 729 730 731 732
cmmPrimOpRetValSignage :: CallishMachOp -> Signage
cmmPrimOpRetValSignage mop = case mop of
    MO_Pdep _   -> Unsigned
    MO_Pext _   -> Unsigned
    _           -> Signed
733

734
-- | Decide what C function to use to implement a CallishMachOp
Peter Wortmann's avatar
Peter Wortmann committed
735 736 737 738
cmmPrimOpFunctions :: CallishMachOp -> LlvmM LMString
cmmPrimOpFunctions mop = do

  dflags <- getDynFlags
739 740
  let intrinTy1 = "p0i8.p0i8." ++ showSDoc dflags (ppr $ llvmWord dflags)
      intrinTy2 = "p0i8." ++ showSDoc dflags (ppr $ llvmWord dflags)
Peter Wortmann's avatar
Peter Wortmann committed
741 742 743 744
      unsupported = panic ("cmmPrimOpFunctions: " ++ show mop
                        ++ " not supported here")

  return $ case mop of
745 746
    MO_F32_Exp    -> fsLit "expf"
    MO_F32_Log    -> fsLit "logf"
747
    MO_F32_Sqrt   -> fsLit "llvm.sqrt.f32"
748
    MO_F32_Fabs   -> fsLit "llvm.fabs.f32"
749
    MO_F32_Pwr    -> fsLit "llvm.pow.f32"
750

751 752
    MO_F32_Sin    -> fsLit "llvm.sin.f32"
    MO_F32_Cos    -> fsLit "llvm.cos.f32"
753 754 755 756 757 758 759 760 761 762
    MO_F32_Tan    -> fsLit "tanf"

    MO_F32_Asin   -> fsLit "asinf"
    MO_F32_Acos   -> fsLit "acosf"
    MO_F32_Atan   -> fsLit "atanf"

    MO_F32_Sinh   -> fsLit "sinhf"
    MO_F32_Cosh   -> fsLit "coshf"
    MO_F32_Tanh   -> fsLit "tanhf"

763 764 765 766
    MO_F32_Asinh  -> fsLit "asinhf"
    MO_F32_Acosh  -> fsLit "acoshf"
    MO_F32_Atanh  -> fsLit "atanhf"

767 768
    MO_F64_Exp    -> fsLit "exp"
    MO_F64_Log    -> fsLit "log"
769
    MO_F64_Sqrt   -> fsLit "llvm.sqrt.f64"
770
    MO_F64_Fabs   -> fsLit "llvm.fabs.f64"
771
    MO_F64_Pwr    -> fsLit "llvm.pow.f64"
772

773 774
    MO_F64_Sin    -> fsLit "llvm.sin.f64"
    MO_F64_Cos    -> fsLit "llvm.cos.f64"
775 776 777 778 779 780 781 782 783 784
    MO_F64_Tan    -> fsLit "tan"

    MO_F64_Asin   -> fsLit "asin"
    MO_F64_Acos   -> fsLit "acos"
    MO_F64_Atan   -> fsLit "atan"

    MO_F64_Sinh   -> fsLit "sinh"
    MO_F64_Cosh   -> fsLit "cosh"
    MO_F64_Tanh   -> fsLit "tanh"

785 786 787 788
    MO_F64_Asinh  -> fsLit "asinh"
    MO_F64_Acosh  -> fsLit "acosh"
    MO_F64_Atanh  -> fsLit "atanh"

789 790 791
    MO_Memcpy _   -> fsLit $ "llvm.memcpy."  ++ intrinTy1
    MO_Memmove _  -> fsLit $ "llvm.memmove." ++ intrinTy1
    MO_Memset _   -> fsLit $ "llvm.memset."  ++ intrinTy2
792
    MO_Memcmp _   -> fsLit $ "memcmp"
793

794
    (MO_PopCnt w) -> fsLit $ "llvm.ctpop."  ++ showSDoc dflags (ppr $ widthToLlvmInt w)
795
    (MO_BSwap w)  -> fsLit $ "llvm.bswap."  ++ showSDoc dflags (ppr $ widthToLlvmInt w)
796 797
    (MO_Clz w)    -> fsLit $ "llvm.ctlz."   ++ showSDoc dflags (ppr $ widthToLlvmInt w)
    (MO_Ctz w)    -> fsLit $ "llvm.cttz."   ++ showSDoc dflags (ppr $ widthToLlvmInt w)
dterei's avatar
dterei committed
798

799 800 801 802 803 804 805 806 807
    (MO_Pdep w)   ->  let w' = showSDoc dflags (ppr $ widthInBits w)
                      in  if isBmi2Enabled dflags
                            then fsLit $ "llvm.x86.bmi.pdep."   ++ w'
                            else fsLit $ "hs_pdep"              ++ w'
    (MO_Pext w)   ->  let w' = showSDoc dflags (ppr $ widthInBits w)
                      in  if isBmi2Enabled dflags
                            then fsLit $ "llvm.x86.bmi.pext."   ++ w'
                            else fsLit $ "hs_pext"              ++ w'

808 809
    (MO_Prefetch_Data _ )-> fsLit "llvm.prefetch"

810 811 812 813 814 815
    MO_AddIntC w    -> fsLit $ "llvm.sadd.with.overflow."
                             ++ showSDoc dflags (ppr $ widthToLlvmInt w)
    MO_SubIntC w    -> fsLit $ "llvm.ssub.with.overflow."
                             ++ showSDoc dflags (ppr $ widthToLlvmInt w)
    MO_Add2 w       -> fsLit $ "llvm.uadd.with.overflow."
                             ++ showSDoc dflags (ppr $ widthToLlvmInt w)
816
    MO_AddWordC w   -> fsLit $ "llvm.uadd.with.overflow."
Sebastian Graf's avatar
Sebastian Graf committed
817
                             ++ showSDoc dflags (ppr $ widthToLlvmInt w)
nkaretnikov's avatar
nkaretnikov committed
818 819
    MO_SubWordC w   -> fsLit $ "llvm.usub.with.overflow."
                             ++ showSDoc dflags (ppr $ widthToLlvmInt w)
820

Ian Lynagh's avatar
Ian Lynagh committed
821 822 823
    MO_S_QuotRem {}  -> unsupported
    MO_U_QuotRem {}  -> unsupported
    MO_U_QuotRem2 {} -> unsupported
824 825
    -- We support MO_U_Mul2 through ordinary LLVM mul instruction, see the
    -- appropriate case of genCall.
Ian Lynagh's avatar
Ian Lynagh committed
826 827 828
    MO_U_Mul2 {}     -> unsupported
    MO_WriteBarrier  -> unsupported
    MO_Touch         -> unsupported
tibbe's avatar
tibbe committed
829
    MO_UF_Conv _     -> unsupported
830

831
    MO_AtomicRead _  -> unsupported
832 833 834
    MO_AtomicRMW _ _ -> unsupported
    MO_AtomicWrite _ -> unsupported
    MO_Cmpxchg _     -> unsupported
835

836
-- | Tail function calls
Peter Wortmann's avatar
Peter Wortmann committed
837
genJump :: CmmExpr -> [GlobalReg] -> LlvmM StmtData
838 839

-- Call to known function
Peter Wortmann's avatar
Peter Wortmann committed
840 841 842
genJump (CmmLit (CmmLabel lbl)) live = do
    (vf, stmts, top) <- getHsFunc live lbl
    (stgRegs, stgStmts) <- funEpilogue live
843 844
    let s1  = Expr $ Call TailCall vf stgRegs llvmStdFunAttrs
    let s2  = Return Nothing
Peter Wortmann's avatar
Peter Wortmann committed
845
    return (stmts `appOL` stgStmts `snocOL` s1 `snocOL` s2, top)
846 847 848


-- Call to unknown function / address
Peter Wortmann's avatar
Peter Wortmann committed
849 850 851 852
genJump expr live = do
    fty <- llvmFunTy live
    (vf, stmts, top) <- exprToVar expr
    dflags <- getDynFlags
853 854 855 856 857 858

    let cast = case getVarType vf of
         ty | isPointer ty -> LM_Bitcast
         ty | isInt ty     -> LM_Inttoptr

         ty -> panic $ "genJump: Expr is of bad type for function call! ("
Peter Wortmann's avatar
Peter Wortmann committed
859
                     ++ showSDoc dflags (ppr ty) ++ ")"
860 861

    (v1, s1) <- doExpr (pLift fty) $ Cast cast vf (pLift fty)
Peter Wortmann's avatar
Peter Wortmann committed
862
    (stgRegs, stgStmts) <- funEpilogue live
863 864
    let s2 = Expr $ Call TailCall v1 stgRegs llvmStdFunAttrs
    let s3 = Return Nothing
Peter Wortmann's avatar
Peter Wortmann committed
865
    return (stmts `snocOL` s1 `appOL` stgStmts `snocOL` s2 `snocOL` s3,
866 867 868 869 870 871 872
            top)


-- | CmmAssign operation
--
-- We use stack allocated variables for CmmReg. The optimiser will replace
-- these with registers when possible.
Peter Wortmann's avatar
Peter Wortmann committed
873 874 875 876 877
genAssign :: CmmReg -> CmmExpr -> LlvmM StmtData
genAssign reg val = do
    vreg <- getCmmReg reg
    (vval, stmts2, top2) <- exprToVar val
    let stmts = stmts2
878 879

    let ty = (pLower . getVarType) vreg
Peter Wortmann's avatar
Peter Wortmann committed
880
    dflags <- getDynFlags
881 882 883 884 885
    case ty of
      -- Some registers are pointer types, so need to cast value to pointer
      LMPointer _ | getVarType vval == llvmWord dflags -> do
          (v, s1) <- doExpr ty $ Cast LM_Inttoptr vval ty
          let s2 = Store v vreg
Peter Wortmann's avatar
Peter Wortmann committed
886
          return (stmts `snocOL` s1 `snocOL` s2, top2)
887

888 889 890
      LMVector _ _ -> do
          (v, s1) <- doExpr ty $ Cast LM_Bitcast vval ty
          let s2 = Store v vreg
Peter Wortmann's avatar
Peter Wortmann committed
891
          return (stmts `snocOL` s1 `snocOL` s2, top2)
892 893 894

      _ -> do
          let s1 = Store vval vreg
Peter Wortmann's avatar
Peter Wortmann committed
895
          return (stmts `snocOL` s1, top2)
896 897 898


-- | CmmStore operation
Peter Wortmann's avatar
Peter Wortmann committed
899
genStore :: CmmExpr -> CmmExpr -> LlvmM StmtData
900 901 902 903 904

-- First we try to detect a few common cases and produce better code for
-- these then the default case. We are mostly trying to detect Cmm code
-- like I32[Sp + n] and use 'getelementptr' operations instead of the
-- generic case that uses casts and pointer arithmetic
Peter Wortmann's avatar
Peter Wortmann committed
905 906
genStore addr@(CmmReg (CmmGlobal r)) val
    = genStore_fast addr r 0 val
907

Peter Wortmann's avatar
Peter Wortmann committed
908 909
genStore addr@(CmmRegOff (CmmGlobal r) n) val
    = genStore_fast addr r n val
910

Peter Wortmann's avatar
Peter Wortmann committed
911
genStore addr@(CmmMachOp (MO_Add _) [
912 913 914
                            (CmmReg (CmmGlobal r)),
                            (CmmLit (CmmInt n _))])
                val
Peter Wortmann's avatar
Peter Wortmann committed
915
    = genStore_fast addr r (fromInteger n) val
916

Peter Wortmann's avatar
Peter Wortmann committed
917
genStore addr@(CmmMachOp (MO_Sub _) [
918 919 920
                            (CmmReg (CmmGlobal r)),
                            (CmmLit (CmmInt n _))])
                val
Peter Wortmann's avatar
Peter Wortmann committed
921
    = genStore_fast addr r (negate $ fromInteger n) val
922 923

-- generic case
Peter Wortmann's avatar
Peter Wortmann committed
924
genStore addr val
dobenour's avatar
dobenour committed
925
    = getTBAAMeta topN >>= genStore_slow addr val
926 927 928 929

-- | CmmStore operation
-- This is a special case for storing to a global register pointer
-- offset such as I32[Sp+8].
Peter Wortmann's avatar
Peter Wortmann committed
930 931 932 933 934 935 936 937
genStore_fast :: CmmExpr -> GlobalReg -> Int -> CmmExpr
              -> LlvmM StmtData
genStore_fast addr r n val
  = do dflags <- getDynFlags
       (gv, grt, s1) <- getCmmRegVal (CmmGlobal r)
       meta          <- getTBAARegMeta r
       let (ix,rem) = n `divMod` ((llvmWidthInBits dflags . pLower) grt  `div` 8)
       case isPointer grt && rem == 0 of
938
            True -> do
Peter Wortmann's avatar
Peter Wortmann committed
939
                (vval,  stmts, top) <- exprToVar val
940
                (ptr, s2) <- doExpr grt $ GetElemPtr True gv [toI32 ix]
941 942 943 944
                -- We might need a different pointer type, so check
                case pLower grt == getVarType vval of
                     -- were fine
                     True  -> do
945
                         let s3 = MetaStmt meta $ Store vval ptr