compiler/GHC/StgToByteCode.hs

   1 {-# LANGUAGE CPP                        #-}
   2 {-# LANGUAGE DeriveFunctor              #-}
   3 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
   4 {-# LANGUAGE LambdaCase                 #-}
   5 {-# LANGUAGE RecordWildCards            #-}
   6 {-# LANGUAGE FlexibleContexts           #-}
   7
   8 --
   9 --  (c) The University of Glasgow 2002-2006
  10 --
  11
  12 -- | GHC.StgToByteCode: Generate bytecode from STG
  13 module GHC.StgToByteCode ( UnlinkedBCO, byteCodeGen) where
  14
  15 import GHC.Prelude
  16
  17 import GHC.Driver.DynFlags
  18 import GHC.Driver.Env
  19
  20 import GHC.ByteCode.Instr
  21 import GHC.ByteCode.Asm
  22 import GHC.ByteCode.Types
  23
  24 import GHC.Cmm.CallConv
  25 import GHC.Cmm.Expr
  26 import GHC.Cmm.Reg ( GlobalArgRegs(..) )
  27 import GHC.Cmm.Node
  28 import GHC.Cmm.Utils
  29
  30 import GHC.Platform
  31 import GHC.Platform.Profile
  32
  33 import GHC.Runtime.Interpreter
  34 import GHCi.FFI
  35 import GHCi.RemoteTypes
  36 import GHC.Types.Basic
  37 import GHC.Utils.Outputable
  38 import GHC.Types.Name
  39 import GHC.Types.Id
  40 import GHC.Types.ForeignCall
  41 import GHC.Core
  42 import GHC.Types.Literal
  43 import GHC.Builtin.PrimOps
  44 import GHC.Builtin.PrimOps.Ids (primOpId)
  45 import GHC.Core.Type
  46 import GHC.Core.TyCo.Compare (eqType)
  47 import GHC.Types.RepType
  48 import GHC.Core.DataCon
  49 import GHC.Core.TyCon
  50 import GHC.Utils.Misc
  51 import GHC.Utils.Logger
  52 import GHC.Types.Var.Set
  53 import GHC.Builtin.Types.Prim
  54 import GHC.Core.TyCo.Ppr ( pprType )
  55 import GHC.Utils.Error
  56 import GHC.Builtin.Uniques
  57 import GHC.Data.FastString
  58 import GHC.Utils.Panic
  59 import GHC.Utils.Exception (evaluate)
  60 import GHC.StgToCmm.Closure ( NonVoid(..), fromNonVoid, idPrimRepU,
  61                               addIdReps, addArgReps,
  62                               assertNonVoidIds, assertNonVoidStgArgs )
  63 import GHC.StgToCmm.Layout
  64 import GHC.Runtime.Heap.Layout hiding (WordOff, ByteOff, wordsToBytes)
  65 import GHC.Data.Bitmap
  66 import GHC.Data.FlatBag as FlatBag
  67 import GHC.Data.OrdList
  68 import GHC.Data.Maybe
  69 import GHC.Types.Name.Env (mkNameEnv)
  70 import GHC.Types.Tickish
  71 import GHC.Types.SptEntry
  72
  73 import Data.List ( genericReplicate, intersperse
  74                  , partition, scanl', sortBy, zip4, zip6 )
  75 import Foreign hiding (shiftL, shiftR)
  76 import Control.Monad
  77 import Data.Char
  78
  79 import GHC.Unit.Module
  80 import GHC.Unit.Home.PackageTable (lookupHpt)
  81
  82 import Data.Array
  83 import Data.Coerce (coerce)
  84 import Data.ByteString (ByteString)
  85 #if MIN_VERSION_rts(1,0,3)
  86 import qualified Data.ByteString.Char8 as BS
  87 #endif
  88 import Data.Map (Map)
  89 import Data.IntMap (IntMap)
  90 import qualified Data.Map as Map
  91 import qualified Data.IntMap as IntMap
  92 import qualified GHC.Data.FiniteMap as Map
  93 import Data.Ord
  94 import Data.Either ( partitionEithers )
  95
  96 import GHC.Stg.Syntax
  97 import qualified Data.IntSet as IntSet
  98 import GHC.CoreToIface
  99
 100 -- -----------------------------------------------------------------------------
 101 -- Generating byte code for a complete module
 102
 103 byteCodeGen :: HscEnv
 104             -> Module
 105             -> [CgStgTopBinding]
 106             -> [TyCon]
 107             -> Maybe ModBreaks
 108             -> [SptEntry]
 109             -> IO CompiledByteCode
 110 byteCodeGen hsc_env this_mod binds tycs mb_modBreaks spt_entries
 111    = withTiming logger
 112                 (text "GHC.StgToByteCode"<+>brackets (ppr this_mod))
 113                 (const ()) $ do
 114         -- Split top-level binds into strings and others.
 115         -- See Note [Generating code for top-level string literal bindings].
 116         let (strings, lifted_binds) = partitionEithers $ do  -- list monad
 117                 bnd <- binds
 118                 case bnd of
 119                   StgTopLifted bnd      -> [Right bnd]
 120                   StgTopStringLit b str -> [Left (b, str)]
 121             flattenBind (StgNonRec b e) = [(b,e)]
 122             flattenBind (StgRec bs)     = bs
 123         stringPtrs <- allocateTopStrings interp strings
 124
 125         (BcM_State{..}, proto_bcos) <-
 126            runBc hsc_env this_mod mb_modBreaks $ do
 127              let flattened_binds = concatMap flattenBind (reverse lifted_binds)
 128              FlatBag.fromList (fromIntegral $ length flattened_binds) <$> mapM schemeTopBind flattened_binds
 129
 130         when (notNull ffis)
 131              (panic "GHC.StgToByteCode.byteCodeGen: missing final emitBc?")
 132
 133         putDumpFileMaybe logger Opt_D_dump_BCOs
 134            "Proto-BCOs" FormatByteCode
 135            (vcat (intersperse (char ' ') (map ppr $ elemsFlatBag proto_bcos)))
 136
 137         let mod_breaks = case modBreaks of
 138              Nothing -> Nothing
 139              Just mb -> Just mb{ modBreaks_breakInfo = breakInfo }
 140         cbc <- assembleBCOs interp profile proto_bcos tycs stringPtrs mod_breaks spt_entries
 141
 142         -- Squash space leaks in the CompiledByteCode.  This is really
 143         -- important, because when loading a set of modules into GHCi
 144         -- we don't touch the CompiledByteCode until the end when we
 145         -- do linking.  Forcing out the thunks here reduces space
 146         -- usage by more than 50% when loading a large number of
 147         -- modules.
 148         evaluate (seqCompiledByteCode cbc)
 149
 150         return cbc
 151
 152   where dflags  = hsc_dflags hsc_env
 153         logger  = hsc_logger hsc_env
 154         interp  = hscInterp hsc_env
 155         profile = targetProfile dflags
 156
 157 -- | see Note [Generating code for top-level string literal bindings]
 158 allocateTopStrings
 159   :: Interp
 160   -> [(Id, ByteString)]
 161   -> IO AddrEnv
 162 allocateTopStrings interp topStrings = do
 163   let !(bndrs, strings) = unzip topStrings
 164   ptrs <- interpCmd interp $ MallocStrings strings
 165   return $ mkNameEnv (zipWith mk_entry bndrs ptrs)
 166   where
 167     mk_entry bndr ptr = let nm = getName bndr
 168                         in (nm, (nm, AddrPtr ptr))
 169
 170 {- Note [Generating code for top-level string literal bindings]
 171 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 172 As described in Note [Compilation plan for top-level string literals]
 173 in GHC.Core, the core-to-core optimizer can introduce top-level Addr#
 174 bindings to represent string literals. The creates two challenges for
 175 the bytecode compiler: (1) compiling the bindings themselves, and
 176 (2) compiling references to such bindings. Here is a summary on how
 177 we deal with them:
 178
 179   1. Top-level string literal bindings are separated from the rest of
 180      the module. Memory for them is allocated immediately, via
 181      interpCmd, in allocateTopStrings, and the resulting AddrEnv is
 182      recorded in the bc_strs field of the CompiledByteCode result.
 183
 184   2. When we encounter a reference to a top-level string literal, we
 185      generate a PUSH_ADDR pseudo-instruction, which is assembled to
 186      a PUSH_UBX instruction with a BCONPtrAddr argument.
 187
 188   3. The loader accumulates string literal bindings from loaded
 189      bytecode in the addr_env field of the LinkerEnv.
 190
 191   4. The BCO linker resolves BCONPtrAddr references by searching both
 192      the addr_env (to find literals defined in bytecode) and the native
 193      symbol table (to find literals defined in native code).
 194
 195 This strategy works alright, but it does have one significant problem:
 196 we never free the memory that we allocate for the top-level strings.
 197 In theory, we could explicitly free it when BCOs are unloaded, but
 198 this comes with its own complications; see #22400 for why. For now,
 199 we just accept the leak, but it would nice to find something better. -}
 200
 201 -- -----------------------------------------------------------------------------
 202 -- Compilation schema for the bytecode generator
 203
 204 type BCInstrList = OrdList BCInstr
 205
 206 wordsToBytes :: Platform -> WordOff -> ByteOff
 207 wordsToBytes platform = fromIntegral . (* platformWordSizeInBytes platform) . fromIntegral
 208
 209 -- Used when we know we have a whole number of words
 210 bytesToWords :: Platform -> ByteOff -> WordOff
 211 bytesToWords platform (ByteOff bytes) =
 212     let (q, r) = bytes `quotRem` (platformWordSizeInBytes platform)
 213     in if r == 0
 214            then fromIntegral q
 215            else pprPanic "GHC.StgToByteCode.bytesToWords"
 216                          (text "bytes=" <> ppr bytes)
 217
 218 wordSize :: Platform -> ByteOff
 219 wordSize platform = ByteOff (platformWordSizeInBytes platform)
 220
 221 type Sequel = ByteOff -- back off to this depth before ENTER
 222
 223 type StackDepth = ByteOff
 224
 225 -- | Maps Ids to their stack depth. This allows us to avoid having to mess with
 226 -- it after each push/pop.
 227 type BCEnv = Map Id StackDepth -- To find vars on the stack
 228
 229 {-
 230 ppBCEnv :: BCEnv -> SDoc
 231 ppBCEnv p
 232    = text "begin-env"
 233      $$ nest 4 (vcat (map pp_one (sortBy cmp_snd (Map.toList p))))
 234      $$ text "end-env"
 235      where
 236         pp_one (var, ByteOff offset) = int offset <> colon <+> ppr var <+> ppr (bcIdArgReps var)
 237         cmp_snd x y = compare (snd x) (snd y)
 238 -}
 239
 240 -- Create a BCO and do a spot of peephole optimisation on the insns
 241 -- at the same time.
 242 mkProtoBCO
 243    ::
 244     Platform
 245    -> Maybe Module
 246         -- ^ Just cur_mod <=> label with @BCO_NAME@ instruction
 247         -- see Note [BCO_NAME]
 248    -> Name
 249    -> BCInstrList
 250    -> Either  [CgStgAlt] (CgStgRhs)
 251                 -- ^ original expression; for debugging only
 252    -> Int       -- ^ arity
 253    -> WordOff   -- ^ bitmap size
 254    -> [StgWord] -- ^ bitmap
 255    -> Bool      -- ^ True <=> is a return point, rather than a function
 256    -> [FFIInfo]
 257    -> ProtoBCO Name
 258 mkProtoBCO platform _add_bco_name nm instrs_ordlist origin arity bitmap_size bitmap is_ret ffis
 259    = ProtoBCO {
 260         protoBCOName = nm,
 261         protoBCOInstrs = maybe_add_bco_name $ maybe_add_stack_check peep_d,
 262         protoBCOBitmap = bitmap,
 263         protoBCOBitmapSize = fromIntegral bitmap_size,
 264         protoBCOArity = arity,
 265         protoBCOExpr = origin,
 266         protoBCOFFIs = ffis
 267       }
 268      where
 269 #if MIN_VERSION_rts(1,0,3)
 270         maybe_add_bco_name instrs
 271           | Just cur_mod <- _add_bco_name =
 272               let str = BS.pack $ showSDocOneLine defaultSDocContext (pprFullName cur_mod nm)
 273               in BCO_NAME str : instrs
 274 #endif
 275         maybe_add_bco_name instrs = instrs
 276
 277         -- Overestimate the stack usage (in words) of this BCO,
 278         -- and if >= iNTERP_STACK_CHECK_THRESH, add an explicit
 279         -- stack check.  (The interpreter always does a stack check
 280         -- for iNTERP_STACK_CHECK_THRESH words at the start of each
 281         -- BCO anyway, so we only need to add an explicit one in the
 282         -- (hopefully rare) cases when the (overestimated) stack use
 283         -- exceeds iNTERP_STACK_CHECK_THRESH.
 284         maybe_add_stack_check instrs
 285            | is_ret && stack_usage < fromIntegral (pc_AP_STACK_SPLIM (platformConstants platform)) = instrs
 286                 -- don't do stack checks at return points,
 287                 -- everything is aggregated up to the top BCO
 288                 -- (which must be a function).
 289                 -- That is, unless the stack usage is >= AP_STACK_SPLIM,
 290                 -- see bug #1466.
 291            | stack_usage >= fromIntegral iNTERP_STACK_CHECK_THRESH
 292            = STKCHECK stack_usage : instrs
 293            | otherwise
 294            = instrs     -- the supposedly common case
 295
 296         -- We assume that this sum doesn't wrap
 297         stack_usage = sum (map bciStackUse peep_d)
 298
 299         -- Merge local pushes
 300         peep_d = peep (fromOL instrs_ordlist)
 301
 302         peep (PUSH_L off1 : PUSH_L off2 : PUSH_L off3 : rest)
 303            = PUSH_LLL off1 (off2-1) (off3-2) : peep rest
 304         peep (PUSH_L off1 : PUSH_L off2 : rest)
 305            = PUSH_LL off1 (off2-1) : peep rest
 306         peep (i:rest)
 307            = i : peep rest
 308         peep []
 309            = []
 310
 311 argBits :: Platform -> [ArgRep] -> [Bool]
 312 argBits _        [] = []
 313 argBits platform (rep : args)
 314   | isFollowableArg rep  = False : argBits platform args
 315   | otherwise = replicate (argRepSizeW platform rep) True ++ argBits platform args
 316
 317 -- -----------------------------------------------------------------------------
 318 -- schemeTopBind
 319
 320 -- Compile code for the right-hand side of a top-level binding
 321
 322 schemeTopBind :: (Id, CgStgRhs) -> BcM (ProtoBCO Name)
 323 schemeTopBind (id, rhs)
 324   | Just data_con <- isDataConWorkId_maybe id,
 325     isNullaryRepDataCon data_con = do
 326     platform <- profilePlatform <$> getProfile
 327     add_bco_name <- shouldAddBcoName
 328         -- Special case for the worker of a nullary data con.
 329         -- It'll look like this:        Nil = /\a -> Nil a
 330         -- If we feed it into schemeR, we'll get
 331         --      Nil = Nil
 332         -- because mkConAppCode treats nullary constructor applications
 333         -- by just re-using the single top-level definition.  So
 334         -- for the worker itself, we must allocate it directly.
 335     -- ioToBc (putStrLn $ "top level BCO")
 336     emitBc (mkProtoBCO platform add_bco_name
 337                        (getName id) (toOL [PACK data_con 0, RETURN P])
 338                        (Right rhs) 0 0 [{-no bitmap-}] False{-not alts-})
 339
 340   | otherwise
 341   = schemeR [{- No free variables -}] (getName id, rhs)
 342
 343
 344 -- -----------------------------------------------------------------------------
 345 -- schemeR
 346
 347 -- Compile code for a right-hand side, to give a BCO that,
 348 -- when executed with the free variables and arguments on top of the stack,
 349 -- will return with a pointer to the result on top of the stack, after
 350 -- removing the free variables and arguments.
 351 --
 352 -- Park the resulting BCO in the monad.  Also requires the
 353 -- name of the variable to which this value was bound,
 354 -- so as to give the resulting BCO a name.
 355 schemeR :: [Id]                 -- Free vars of the RHS, ordered as they
 356                                 -- will appear in the thunk.  Empty for
 357                                 -- top-level things, which have no free vars.
 358         -> (Name, CgStgRhs)
 359         -> BcM (ProtoBCO Name)
 360 schemeR fvs (nm, rhs)
 361    = schemeR_wrk fvs nm rhs (collect rhs)
 362
 363 -- If an expression is a lambda, return the
 364 -- list of arguments to the lambda (in R-to-L order) and the
 365 -- underlying expression
 366
 367 collect :: CgStgRhs -> ([Var], CgStgExpr)
 368 collect (StgRhsClosure _ _ _ args body _) = (args, body)
 369 collect (StgRhsCon _cc dc cnum _ticks args _typ) = ([], StgConApp dc cnum args [])
 370
 371 schemeR_wrk
 372     :: [Id]
 373     -> Name
 374     -> CgStgRhs            -- expression e, for debugging only
 375     -> ([Var], CgStgExpr)  -- result of collect on e
 376     -> BcM (ProtoBCO Name)
 377 schemeR_wrk fvs nm original_body (args, body)
 378    = do
 379      add_bco_name <- shouldAddBcoName
 380      profile <- getProfile
 381      let
 382          platform  = profilePlatform profile
 383          all_args  = reverse args ++ fvs
 384          arity     = length all_args
 385          -- all_args are the args in reverse order.  We're compiling a function
 386          -- \fv1..fvn x1..xn -> e
 387          -- i.e. the fvs come first
 388
 389          -- Stack arguments always take a whole number of words, we never pack
 390          -- them unlike constructor fields.
 391          szsb_args = map (wordsToBytes platform . idSizeW platform) all_args
 392          sum_szsb_args  = sum szsb_args
 393          p_init    = Map.fromList (zip all_args (mkStackOffsets 0 szsb_args))
 394
 395          -- make the arg bitmap
 396          bits = argBits platform (reverse (map (idArgRep platform) all_args))
 397          bitmap_size = strictGenericLength bits
 398          bitmap = mkBitmap platform bits
 399      body_code <- schemeER_wrk sum_szsb_args p_init body
 400
 401      emitBc (mkProtoBCO platform add_bco_name nm body_code (Right original_body)
 402                  arity bitmap_size bitmap False{-not alts-})
 403
 404 -- | Introduce break instructions for ticked expressions.
 405 -- If no breakpoint information is available, the instruction is omitted.
 406 schemeER_wrk :: StackDepth -> BCEnv -> CgStgExpr -> BcM BCInstrList
 407 schemeER_wrk d p (StgTick (Breakpoint tick_ty tick_no fvs tick_mod) rhs) = do
 408   code <- schemeE d 0 p rhs
 409   hsc_env <- getHscEnv
 410   current_mod <- getCurrentModule
 411   mb_current_mod_breaks <- getCurrentModBreaks
 412   case mb_current_mod_breaks of
 413     -- if we're not generating ModBreaks for this module for some reason, we
 414     -- can't store breakpoint occurrence information.
 415     Nothing -> pure code
 416     Just current_mod_breaks -> break_info hsc_env tick_mod current_mod mb_current_mod_breaks >>= \case
 417       Nothing -> pure code
 418       Just ModBreaks {modBreaks_flags = breaks, modBreaks_module = tick_mod_ptr, modBreaks_ccs = cc_arr} -> do
 419         platform <- profilePlatform <$> getProfile
 420         let idOffSets = getVarOffSets platform d p fvs
 421             ty_vars   = tyCoVarsOfTypesWellScoped (tick_ty:map idType fvs)
 422             toWord :: Maybe (Id, WordOff) -> Maybe (Id, Word)
 423             toWord = fmap (\(i, wo) -> (i, fromIntegral wo))
 424             breakInfo  = dehydrateCgBreakInfo ty_vars (map toWord idOffSets) tick_ty
 425
 426         let info_mod_ptr = modBreaks_module current_mod_breaks
 427         infox <- newBreakInfo breakInfo
 428
 429         let cc | Just interp <- hsc_interp hsc_env
 430               , interpreterProfiled interp
 431               = cc_arr ! tick_no
 432               | otherwise = toRemotePtr nullPtr
 433
 434         let -- cast that checks that round-tripping through Word16 doesn't change the value
 435             toW16 x = let r = fromIntegral x :: Word16
 436                       in if fromIntegral r == x
 437                         then r
 438                         else pprPanic "schemeER_wrk: breakpoint tick/info index too large!" (ppr x)
 439             breakInstr = BRK_FUN breaks tick_mod_ptr (toW16 tick_no) info_mod_ptr (toW16 infox) cc
 440         return $ breakInstr `consOL` code
 441 schemeER_wrk d p rhs = schemeE d 0 p rhs
 442
 443 -- | Determine the GHCi-allocated 'BreakArray' and module pointer for the module
 444 -- from which the breakpoint originates.
 445 -- These are stored in 'ModBreaks' as remote pointers in order to allow the BCOs
 446 -- to refer to pointers in GHCi's address space.
 447 -- They are initialized in 'GHC.HsToCore.Breakpoints.mkModBreaks', called by
 448 -- 'GHC.HsToCore.deSugar'.
 449 --
 450 -- Breakpoints might be disabled because we're in TH, because
 451 -- @-fno-break-points@ was specified, or because a module was reloaded without
 452 -- reinitializing 'ModBreaks'.
 453 --
 454 -- If the module stored in the breakpoint is the currently processed module, use
 455 -- the 'ModBreaks' from the state.
 456 -- If that is 'Nothing', consider breakpoints to be disabled and skip the
 457 -- instruction.
 458 --
 459 -- If the breakpoint is inlined from another module, look it up in the home
 460 -- package table.
 461 -- If the module doesn't exist there, or its module pointer is null (which means
 462 -- that the 'ModBreaks' value is uninitialized), skip the instruction.
 463 break_info ::
 464   HscEnv ->
 465   Module ->
 466   Module ->
 467   Maybe ModBreaks ->
 468   BcM (Maybe ModBreaks)
 469 break_info hsc_env mod current_mod current_mod_breaks
 470   | mod == current_mod
 471   = pure $ check_mod_ptr =<< current_mod_breaks
 472   | otherwise
 473   = ioToBc (lookupHpt (hsc_HPT hsc_env) (moduleName mod)) >>= \case
 474       Just hp -> pure $ check_mod_ptr (getModBreaks hp)
 475       Nothing -> pure Nothing
 476   where
 477     check_mod_ptr mb
 478       | mod_ptr <- modBreaks_module mb
 479       , fromRemotePtr mod_ptr /= nullPtr
 480       = Just mb
 481       | otherwise
 482       = Nothing
 483
 484 getVarOffSets :: Platform -> StackDepth -> BCEnv -> [Id] -> [Maybe (Id, WordOff)]
 485 getVarOffSets platform depth env = map getOffSet
 486   where
 487     getOffSet id = case lookupBCEnv_maybe id env of
 488         Nothing     -> Nothing
 489         Just offset ->
 490             -- michalt: I'm not entirely sure why we need the stack
 491             -- adjustment by 2 here. I initially thought that there's
 492             -- something off with getIdValFromApStack (the only user of this
 493             -- value), but it looks ok to me. My current hypothesis is that
 494             -- this "adjustment" is needed due to stack manipulation for
 495             -- BRK_FUN in Interpreter.c In any case, this is used only when
 496             -- we trigger a breakpoint.
 497             let !var_depth_ws = bytesToWords platform (depth - offset) + 2
 498             in Just (id, var_depth_ws)
 499
 500 fvsToEnv :: BCEnv -> CgStgRhs -> [Id]
 501 -- Takes the free variables of a right-hand side, and
 502 -- delivers an ordered list of the local variables that will
 503 -- be captured in the thunk for the RHS
 504 -- The BCEnv argument tells which variables are in the local
 505 -- environment: these are the ones that should be captured
 506 --
 507 -- The code that constructs the thunk, and the code that executes
 508 -- it, have to agree about this layout
 509
 510 fvsToEnv p rhs =  [v | v <- dVarSetElems $ freeVarsOfRhs rhs,
 511                        v `Map.member` p]
 512
 513 -- -----------------------------------------------------------------------------
 514 -- schemeE
 515
 516 -- Returning an unlifted value.
 517 -- Heave it on the stack, SLIDE, and RETURN.
 518 returnUnliftedAtom
 519     :: StackDepth
 520     -> Sequel
 521     -> BCEnv
 522     -> StgArg
 523     -> BcM BCInstrList
 524 returnUnliftedAtom d s p e = do
 525     let reps = stgArgRep e
 526     (push, szb) <- pushAtom d p e
 527     ret <- returnUnliftedReps d s szb reps
 528     return (push `appOL` ret)
 529
 530 -- return an unlifted value from the top of the stack
 531 returnUnliftedReps
 532     :: StackDepth
 533     -> Sequel
 534     -> ByteOff    -- size of the thing we're returning
 535     -> [PrimRep]  -- representations
 536     -> BcM BCInstrList
 537 returnUnliftedReps d s szb reps = do
 538     profile <- getProfile
 539     let platform = profilePlatform profile
 540     ret <- case reps of
 541              -- use RETURN for nullary/unary representations
 542              []    -> return (unitOL $ RETURN V)
 543              [rep] -> return (unitOL $ RETURN (toArgRep platform rep))
 544              -- otherwise use RETURN_TUPLE with a tuple descriptor
 545              nv_reps -> do
 546                let (call_info, args_offsets) = layoutNativeCall profile NativeTupleReturn 0 id nv_reps
 547                tuple_bco <- emitBc (tupleBCO platform call_info args_offsets)
 548                return $ PUSH_UBX (mkNativeCallInfoLit platform call_info) 1 `consOL`
 549                         PUSH_BCO tuple_bco `consOL`
 550                         unitOL RETURN_TUPLE
 551     return ( mkSlideB platform szb (d - s) -- clear to sequel
 552              `appOL` ret)                 -- go
 553
 554 -- construct and return an unboxed tuple
 555 returnUnboxedTuple
 556     :: StackDepth
 557     -> Sequel
 558     -> BCEnv
 559     -> [StgArg]
 560     -> BcM BCInstrList
 561 returnUnboxedTuple d s p es = do
 562     profile <- getProfile
 563     let platform = profilePlatform profile
 564         (call_info, tuple_components) = layoutNativeCall profile
 565                                                          NativeTupleReturn
 566                                                          d
 567                                                          stgArgRepU
 568                                                          es
 569         go _   pushes [] = return (reverse pushes)
 570         go !dd pushes ((a, off):cs) = do (push, szb) <- pushAtom dd p a
 571                                          massert (off == dd + szb)
 572                                          go (dd + szb) (push:pushes) cs
 573     pushes <- go d [] tuple_components
 574
 575     ret <- returnUnliftedReps d
 576                               s
 577                               (wordsToBytes platform $ nativeCallSize call_info)
 578                               (map stgArgRepU es)
 579     return (mconcat pushes `appOL` ret)
 580
 581 -- Compile code to apply the given expression to the remaining args
 582 -- on the stack, returning a HNF.
 583 schemeE
 584     :: StackDepth -> Sequel -> BCEnv -> CgStgExpr -> BcM BCInstrList
 585 schemeE d s p (StgLit lit) = returnUnliftedAtom d s p (StgLitArg lit)
 586 schemeE d s p (StgApp x [])
 587    | isUnliftedType (idType x) = returnUnliftedAtom d s p (StgVarArg x)
 588 -- Delegate tail-calls to schemeT.
 589 schemeE d s p e@(StgApp {}) = schemeT d s p e
 590 schemeE d s p e@(StgConApp {}) = schemeT d s p e
 591 schemeE d s p e@(StgOpApp {}) = schemeT d s p e
 592 schemeE d s p (StgLetNoEscape xlet bnd body)
 593    = schemeE d s p (StgLet xlet bnd body)
 594 schemeE d s p (StgLet _xlet
 595                       (StgNonRec x (StgRhsCon _cc data_con _cnum _ticks args _typ))
 596                       body)
 597    = do -- Special case for a non-recursive let whose RHS is a
 598         -- saturated constructor application.
 599         -- Just allocate the constructor and carry on
 600         alloc_code <- mkConAppCode d s p data_con args
 601         platform <- targetPlatform <$> getDynFlags
 602         let !d2 = d + wordSize platform
 603         body_code <- schemeE d2 s (Map.insert x d2 p) body
 604         return (alloc_code `appOL` body_code)
 605 -- General case for let.  Generates correct, if inefficient, code in
 606 -- all situations.
 607 schemeE d s p (StgLet _ext binds body) = do
 608      platform <- targetPlatform <$> getDynFlags
 609      let (xs,rhss) = case binds of StgNonRec x rhs  -> ([x],[rhs])
 610                                    StgRec xs_n_rhss -> unzip xs_n_rhss
 611          n_binds = strictGenericLength xs
 612
 613          fvss  = map (fvsToEnv p') rhss
 614
 615          -- Sizes of free vars
 616          size_w = idSizeW platform
 617          sizes = map (\rhs_fvs -> sum (map size_w rhs_fvs)) fvss
 618
 619          -- the arity of each rhs
 620          arities = map (strictGenericLength . fst . collect) rhss
 621
 622          -- This p', d' defn is safe because all the items being pushed
 623          -- are ptrs, so all have size 1 word.  d' and p' reflect the stack
 624          -- after the closures have been allocated in the heap (but not
 625          -- filled in), and pointers to them parked on the stack.
 626          offsets = mkStackOffsets d (genericReplicate n_binds (wordSize platform))
 627          p' = Map.insertList (zipE xs offsets) p
 628          d' = d + wordsToBytes platform n_binds
 629          zipE = zipEqual "schemeE"
 630
 631          -- ToDo: don't build thunks for things with no free variables
 632          build_thunk
 633              :: StackDepth
 634              -> [Id]
 635              -> WordOff
 636              -> ProtoBCO Name
 637              -> WordOff
 638              -> HalfWord
 639              -> BcM BCInstrList
 640          build_thunk _ [] size bco off arity
 641             = return (PUSH_BCO bco `consOL` unitOL (mkap (off+size) (fromIntegral size)))
 642            where
 643                 mkap | arity == 0 = MKAP
 644                      | otherwise  = MKPAP
 645          build_thunk dd (fv:fvs) size bco off arity = do
 646               (push_code, pushed_szb) <- pushAtom dd p' (StgVarArg fv)
 647               more_push_code <-
 648                   build_thunk (dd + pushed_szb) fvs size bco off arity
 649               return (push_code `appOL` more_push_code)
 650
 651          alloc_code = toOL (zipWith mkAlloc sizes arities)
 652            where mkAlloc sz 0
 653                     | is_tick     = ALLOC_AP_NOUPD (fromIntegral sz)
 654                     | otherwise   = ALLOC_AP (fromIntegral sz)
 655                  mkAlloc sz arity = ALLOC_PAP arity (fromIntegral sz)
 656
 657          is_tick = case binds of
 658                      StgNonRec id _ -> occNameFS (getOccName id) == tickFS
 659                      _other -> False
 660
 661          compile_bind d' fvs x (rhs::CgStgRhs) size arity off = do
 662                 bco <- schemeR fvs (getName x,rhs)
 663                 build_thunk d' fvs size bco off arity
 664
 665          compile_binds =
 666             [ compile_bind d' fvs x rhs size arity n
 667             | (fvs, x, rhs, size, arity, n) <-
 668                 zip6 fvss xs rhss sizes arities [n_binds, n_binds-1 .. 1]
 669             ]
 670      body_code <- schemeE d' s p' body
 671      thunk_codes <- sequence compile_binds
 672      return (alloc_code `appOL` concatOL thunk_codes `appOL` body_code)
 673
 674 schemeE _d _s _p (StgTick (Breakpoint _ bp_id _ _) _rhs)
 675    = panic ("schemeE: Breakpoint without let binding: " ++
 676             show bp_id ++
 677             " forgot to run bcPrep?")
 678
 679 -- ignore other kinds of tick
 680 schemeE d s p (StgTick _ rhs) = schemeE d s p rhs
 681
 682 -- no alts: scrut is guaranteed to diverge
 683 schemeE d s p (StgCase scrut _ _ []) = schemeE d s p scrut
 684
 685 schemeE d s p (StgCase scrut bndr _ alts)
 686    = doCase d s p scrut bndr alts
 687
 688
 689 {-
 690    Ticked Expressions
 691    ------------------
 692
 693   The idea is that the "breakpoint<n,fvs> E" is really just an annotation on
 694   the code. When we find such a thing, we pull out the useful information,
 695   and then compile the code as if it was just the expression E.
 696 -}
 697
 698 -- Compile code to do a tail call.  Specifically, push the fn,
 699 -- slide the on-stack app back down to the sequel depth,
 700 -- and enter.  Four cases:
 701 --
 702 -- 0.  (Nasty hack).
 703 --     An application "GHC.Prim.tagToEnum# <type> unboxed-int".
 704 --     The int will be on the stack.  Generate a code sequence
 705 --     to convert it to the relevant constructor, SLIDE and ENTER.
 706 --
 707 -- 1.  The fn denotes a ccall.  Defer to generateCCall.
 708 --
 709 -- 2.  An unboxed tuple: push the components on the top of
 710 --     the stack and return.
 711 --
 712 -- 3.  Application of a constructor, by defn saturated.
 713 --     Split the args into ptrs and non-ptrs, and push the nonptrs,
 714 --     then the ptrs, and then do PACK and RETURN.
 715 --
 716 -- 4.  Otherwise, it must be a function call.  Push the args
 717 --     right to left, SLIDE and ENTER.
 718
 719 schemeT :: StackDepth   -- Stack depth
 720         -> Sequel       -- Sequel depth
 721         -> BCEnv        -- stack env
 722         -> CgStgExpr
 723         -> BcM BCInstrList
 724
 725    -- Case 0
 726 schemeT d s p app
 727    | Just (arg, constr_names) <- maybe_is_tagToEnum_call app
 728    = implement_tagToId d s p arg constr_names
 729
 730    -- Case 1
 731 schemeT d s p (StgOpApp (StgFCallOp (CCall ccall_spec) _ty) args result_ty)
 732    = if isSupportedCConv ccall_spec
 733       then generateCCall d s p ccall_spec result_ty args
 734       else unsupportedCConvException
 735
 736 schemeT d s p (StgOpApp (StgPrimOp op) args _ty)
 737    = doTailCall d s p (primOpId op) (reverse args)
 738
 739 schemeT d s p (StgOpApp (StgPrimCallOp (PrimCall label unit)) args result_ty)
 740    = generatePrimCall d s p label (Just unit) result_ty args
 741
 742 schemeT d s p (StgConApp con _cn args _tys)
 743    -- Case 2: Unboxed tuple
 744    | isUnboxedTupleDataCon con || isUnboxedSumDataCon con
 745    = returnUnboxedTuple d s p args
 746
 747    -- Case 3: Ordinary data constructor
 748    | otherwise
 749    = do alloc_con <- mkConAppCode d s p con args
 750         platform <- profilePlatform <$> getProfile
 751         return (alloc_con         `appOL`
 752                 mkSlideW 1 (bytesToWords platform $ d - s) `snocOL` RETURN P)
 753
 754    -- Case 4: Tail call of function
 755 schemeT d s p (StgApp fn args)
 756    = doTailCall d s p fn (reverse args)
 757
 758 schemeT _ _ _ e = pprPanic "GHC.StgToByteCode.schemeT"
 759                            (pprStgExpr shortStgPprOpts e)
 760
 761 -- -----------------------------------------------------------------------------
 762 -- Generate code to build a constructor application,
 763 -- leaving it on top of the stack
 764
 765 mkConAppCode
 766     :: StackDepth
 767     -> Sequel
 768     -> BCEnv
 769     -> DataCon                  -- The data constructor
 770     -> [StgArg]                 -- Args, in *reverse* order
 771     -> BcM BCInstrList
 772 mkConAppCode orig_d _ p con args = app_code
 773   where
 774     app_code = do
 775         profile <- getProfile
 776         let platform = profilePlatform profile
 777
 778             non_voids =
 779                 addArgReps (assertNonVoidStgArgs args)
 780             (_, _, args_offsets) =
 781                 mkVirtHeapOffsetsWithPadding profile StdHeader non_voids
 782
 783             do_pushery !d (arg : args) = do
 784                 (push, arg_bytes) <- case arg of
 785                     (Padding l _) -> return $! pushPadding (ByteOff l)
 786                     (FieldOff a _) -> pushConstrAtom d p (fromNonVoid a)
 787                 more_push_code <- do_pushery (d + arg_bytes) args
 788                 return (push `appOL` more_push_code)
 789             do_pushery !d [] = do
 790                 let !n_arg_words = bytesToWords platform (d - orig_d)
 791                 return (unitOL (PACK con n_arg_words))
 792
 793         -- Push on the stack in the reverse order.
 794         do_pushery orig_d (reverse args_offsets)
 795
 796 -- -----------------------------------------------------------------------------
 797 -- Generate code for a tail-call
 798
 799 doTailCall
 800     :: StackDepth
 801     -> Sequel
 802     -> BCEnv
 803     -> Id
 804     -> [StgArg]
 805     -> BcM BCInstrList
 806 doTailCall init_d s p fn args = do
 807    platform <- profilePlatform <$> getProfile
 808    do_pushes init_d args (map (atomRep platform) args)
 809   where
 810   do_pushes !d [] reps = do
 811         assert (null reps) return ()
 812         (push_fn, sz) <- pushAtom d p (StgVarArg fn)
 813         platform <- profilePlatform <$> getProfile
 814         assert (sz == wordSize platform) return ()
 815         let slide = mkSlideB platform (d - init_d + wordSize platform) (init_d - s)
 816         return (push_fn `appOL` (slide `appOL` unitOL ENTER))
 817   do_pushes !d args reps = do
 818       let (push_apply, n, rest_of_reps) = findPushSeq reps
 819           (these_args, rest_of_args) = splitAt n args
 820       (next_d, push_code) <- push_seq d these_args
 821       platform <- profilePlatform <$> getProfile
 822       instrs <- do_pushes (next_d + wordSize platform) rest_of_args rest_of_reps
 823       --                          ^^^ for the PUSH_APPLY_ instruction
 824       return (push_code `appOL` (push_apply `consOL` instrs))
 825
 826   push_seq d [] = return (d, nilOL)
 827   push_seq d (arg:args) = do
 828     (push_code, sz) <- pushAtom d p arg
 829     (final_d, more_push_code) <- push_seq (d + sz) args
 830     return (final_d, push_code `appOL` more_push_code)
 831
 832 -- v. similar to CgStackery.findMatch, ToDo: merge
 833 findPushSeq :: [ArgRep] -> (BCInstr, Int, [ArgRep])
 834 findPushSeq (P: P: P: P: P: P: rest)
 835   = (PUSH_APPLY_PPPPPP, 6, rest)
 836 findPushSeq (P: P: P: P: P: rest)
 837   = (PUSH_APPLY_PPPPP, 5, rest)
 838 findPushSeq (P: P: P: P: rest)
 839   = (PUSH_APPLY_PPPP, 4, rest)
 840 findPushSeq (P: P: P: rest)
 841   = (PUSH_APPLY_PPP, 3, rest)
 842 findPushSeq (P: P: rest)
 843   = (PUSH_APPLY_PP, 2, rest)
 844 findPushSeq (P: rest)
 845   = (PUSH_APPLY_P, 1, rest)
 846 findPushSeq (V: rest)
 847   = (PUSH_APPLY_V, 1, rest)
 848 findPushSeq (N: rest)
 849   = (PUSH_APPLY_N, 1, rest)
 850 findPushSeq (F: rest)
 851   = (PUSH_APPLY_F, 1, rest)
 852 findPushSeq (D: rest)
 853   = (PUSH_APPLY_D, 1, rest)
 854 findPushSeq (L: rest)
 855   = (PUSH_APPLY_L, 1, rest)
 856 findPushSeq argReps
 857   | any (`elem` [V16, V32, V64]) argReps
 858   = sorry "SIMD vector operations are not available in GHCi"
 859 findPushSeq _
 860   = panic "GHC.StgToByteCode.findPushSeq"
 861
 862 -- -----------------------------------------------------------------------------
 863 -- Case expressions
 864
 865 doCase
 866     :: StackDepth
 867     -> Sequel
 868     -> BCEnv
 869     -> CgStgExpr
 870     -> Id
 871     -> [CgStgAlt]
 872     -> BcM BCInstrList
 873 doCase d s p scrut bndr alts
 874   = do
 875      profile <- getProfile
 876      hsc_env <- getHscEnv
 877      let
 878         platform = profilePlatform profile
 879
 880         -- Are we dealing with an unboxed tuple with a tuple return frame?
 881         --
 882         -- 'Simple' tuples with at most one non-void component,
 883         -- like (# Word# #) or (# Int#, State# RealWorld #) do not have a
 884         -- tuple return frame. This is because (# foo #) and (# foo, Void# #)
 885         -- have the same runtime rep. We have more efficient specialized
 886         -- return frames for the situations with one non-void element.
 887
 888         non_void_arg_reps = typeArgReps platform bndr_ty
 889         ubx_tuple_frame =
 890           (isUnboxedTupleType bndr_ty || isUnboxedSumType bndr_ty) &&
 891           length non_void_arg_reps > 1
 892
 893         profiling
 894           | Just interp <- hsc_interp hsc_env
 895           = interpreterProfiled interp
 896           | otherwise = False
 897
 898         -- Top of stack is the return itbl, as usual.
 899         -- underneath it is the pointer to the alt_code BCO.
 900         -- When an alt is entered, it assumes the returned value is
 901         -- on top of the itbl; see Note [Return convention for non-tuple values]
 902         -- for details.
 903         ret_frame_size_b :: StackDepth
 904         ret_frame_size_b | ubx_tuple_frame =
 905                              (if profiling then 5 else 4) * wordSize platform
 906                          | otherwise = 2 * wordSize platform
 907
 908         -- The stack space used to save/restore the CCCS when profiling
 909         save_ccs_size_b | profiling &&
 910                           not ubx_tuple_frame = 2 * wordSize platform
 911                         | otherwise = 0
 912
 913         -- The size of the return frame info table pointer if one exists
 914         unlifted_itbl_size_b :: StackDepth
 915         unlifted_itbl_size_b | ubx_tuple_frame = wordSize platform
 916                              | otherwise       = 0
 917
 918         (bndr_size, call_info, args_offsets)
 919            | ubx_tuple_frame =
 920                let bndr_reps = typePrimRep (idType bndr)
 921                    (call_info, args_offsets) =
 922                        layoutNativeCall profile NativeTupleReturn 0 id bndr_reps
 923                in ( wordsToBytes platform (nativeCallSize call_info)
 924                   , call_info
 925                   , args_offsets
 926                   )
 927            | otherwise = ( wordsToBytes platform (idSizeW platform bndr)
 928                          , voidTupleReturnInfo
 929                          , []
 930                          )
 931
 932         -- depth of stack after the return value has been pushed
 933         d_bndr =
 934             d + ret_frame_size_b + bndr_size
 935
 936         -- depth of stack after the extra info table for an unlifted return
 937         -- has been pushed, if any.  This is the stack depth at the
 938         -- continuation.
 939         d_alts = d + ret_frame_size_b + bndr_size + unlifted_itbl_size_b
 940
 941         -- Env in which to compile the alts, not including
 942         -- any vars bound by the alts themselves
 943         p_alts = Map.insert bndr d_bndr p
 944
 945         bndr_ty = idType bndr
 946         isAlgCase = isAlgType bndr_ty
 947
 948         -- given an alt, return a discr and code for it.
 949         codeAlt :: CgStgAlt -> BcM (Discr, BCInstrList)
 950         codeAlt GenStgAlt{alt_con=DEFAULT,alt_bndrs=_,alt_rhs=rhs}
 951            = do rhs_code <- schemeE d_alts s p_alts rhs
 952                 return (NoDiscr, rhs_code)
 953
 954         codeAlt alt@GenStgAlt{alt_con=_, alt_bndrs=bndrs, alt_rhs=rhs}
 955            -- primitive or nullary constructor alt: no need to UNPACK
 956            | null real_bndrs = do
 957                 rhs_code <- schemeE d_alts s p_alts rhs
 958                 return (my_discr alt, rhs_code)
 959            | isUnboxedTupleType bndr_ty || isUnboxedSumType bndr_ty =
 960              let bndr_ty = idPrimRepU . fromNonVoid
 961                  tuple_start = d_bndr
 962                  (call_info, args_offsets) =
 963                    layoutNativeCall profile
 964                                     NativeTupleReturn
 965                                     0
 966                                     bndr_ty
 967                                     (assertNonVoidIds bndrs)
 968
 969                  stack_bot = d_alts
 970
 971                  p' = Map.insertList
 972                         [ (arg, tuple_start -
 973                                 wordsToBytes platform (nativeCallSize call_info) +
 974                                 offset)
 975                         | (NonVoid arg, offset) <- args_offsets]
 976                         p_alts
 977              in do
 978                rhs_code <- schemeE stack_bot s p' rhs
 979                return (NoDiscr, rhs_code)
 980            -- algebraic alt with some binders
 981            | otherwise =
 982              let (tot_wds, _ptrs_wds, args_offsets) =
 983                      mkVirtHeapOffsets profile NoHeader
 984                          (addIdReps (assertNonVoidIds real_bndrs))
 985                  size = WordOff tot_wds
 986
 987                  stack_bot = d_alts + wordsToBytes platform size
 988
 989                  -- convert offsets from Sp into offsets into the virtual stack
 990                  p' = Map.insertList
 991                         [ (arg, stack_bot - ByteOff offset)
 992                         | (NonVoid arg, offset) <- args_offsets ]
 993                         p_alts
 994
 995              in do
 996              massert isAlgCase
 997              rhs_code <- schemeE stack_bot s p' rhs
 998              return (my_discr alt,
 999                      unitOL (UNPACK size) `appOL` rhs_code)
1000            where
1001              real_bndrs = filterOut isTyVar bndrs
1002
1003         my_discr alt = case alt_con alt of
1004             DEFAULT    -> NoDiscr {-shouldn't really happen-}
1005             DataAlt dc
1006               | isUnboxedTupleDataCon dc || isUnboxedSumDataCon dc
1007               -> NoDiscr
1008               | otherwise
1009               -> DiscrP (fromIntegral (dataConTag dc - fIRST_TAG))
1010             LitAlt l -> case l of
1011               LitNumber LitNumInt i    -> DiscrI (fromInteger i)
1012               LitNumber LitNumInt8 i   -> DiscrI8 (fromInteger i)
1013               LitNumber LitNumInt16 i  -> DiscrI16 (fromInteger i)
1014               LitNumber LitNumInt32 i  -> DiscrI32 (fromInteger i)
1015               LitNumber LitNumInt64 i  -> DiscrI64 (fromInteger i)
1016               LitNumber LitNumWord w   -> DiscrW (fromInteger w)
1017               LitNumber LitNumWord8 w  -> DiscrW8 (fromInteger w)
1018               LitNumber LitNumWord16 w -> DiscrW16 (fromInteger w)
1019               LitNumber LitNumWord32 w -> DiscrW32 (fromInteger w)
1020               LitNumber LitNumWord64 w -> DiscrW64 (fromInteger w)
1021               LitNumber LitNumBigNat _ -> unsupported
1022               LitFloat r               -> DiscrF (fromRational r)
1023               LitDouble r              -> DiscrD (fromRational r)
1024               LitChar i                -> DiscrI (ord i)
1025               LitString {}             -> unsupported
1026               LitRubbish {}            -> unsupported
1027               LitNullAddr {}           -> unsupported
1028               LitLabel {}              -> unsupported
1029               where
1030                   unsupported = pprPanic "schemeE(StgCase).my_discr:" (ppr l)
1031
1032         maybe_ncons
1033            | not isAlgCase = Nothing
1034            | otherwise
1035            = case [dc | DataAlt dc <- alt_con <$> alts] of
1036                 []     -> Nothing
1037                 (dc:_) -> Just (tyConFamilySize (dataConTyCon dc))
1038
1039         -- the bitmap is relative to stack depth d, i.e. before the
1040         -- BCO, info table and return value are pushed on.
1041         -- This bit of code is v. similar to buildLivenessMask in CgBindery,
1042         -- except that here we build the bitmap from the known bindings of
1043         -- things that are pointers, whereas in CgBindery the code builds the
1044         -- bitmap from the free slots and unboxed bindings.
1045         -- (ToDo: merge?)
1046         --
1047         -- NOTE [7/12/2006] bug #1013, testcase ghci/should_run/ghci002.
1048         -- The bitmap must cover the portion of the stack up to the sequel only.
1049         -- Previously we were building a bitmap for the whole depth (d), but we
1050         -- really want a bitmap up to depth (d-s).  This affects compilation of
1051         -- case-of-case expressions, which is the only time we can be compiling a
1052         -- case expression with s /= 0.
1053
1054         -- unboxed tuples get two more words, the second is a pointer (tuple_bco)
1055         (extra_pointers, extra_slots)
1056            | ubx_tuple_frame && profiling = ([1], 3) -- call_info, tuple_BCO, CCCS
1057            | ubx_tuple_frame              = ([1], 2) -- call_info, tuple_BCO
1058            | otherwise                    = ([], 0)
1059
1060         bitmap_size :: WordOff
1061         bitmap_size = fromIntegral extra_slots +
1062                       bytesToWords platform (d - s)
1063
1064         bitmap_size' :: Int
1065         bitmap_size' = fromIntegral bitmap_size
1066
1067
1068         pointers =
1069           extra_pointers ++
1070           filter (< bitmap_size') (map (+extra_slots) rel_slots)
1071           where
1072           -- NB: unboxed tuple cases bind the scrut binder to the same offset
1073           -- as one of the alt binders, so we have to remove any duplicates here:
1074           -- 'toAscList' takes care of sorting the result, which was previously done after the application of 'filter'.
1075           rel_slots = IntSet.toAscList $ IntSet.fromList $ Map.elems $ Map.mapMaybeWithKey spread p
1076           spread id offset | isUnboxedTupleType (idType id) ||
1077                              isUnboxedSumType (idType id) = Nothing
1078                            | isFollowableArg (idArgRep platform id) = Just (fromIntegral rel_offset)
1079                            | otherwise                      = Nothing
1080                 where rel_offset = bytesToWords platform (d - offset)
1081
1082         bitmap = intsToReverseBitmap platform bitmap_size' pointers
1083
1084      alt_stuff <- mapM codeAlt alts
1085      alt_final0 <- mkMultiBranch maybe_ncons alt_stuff
1086
1087      let alt_final
1088            | ubx_tuple_frame    = SLIDE 0 2 `consOL` alt_final0
1089            | otherwise          = alt_final0
1090
1091      add_bco_name <- shouldAddBcoName
1092      let
1093          alt_bco_name = getName bndr
1094          alt_bco = mkProtoBCO platform add_bco_name alt_bco_name alt_final (Left alts)
1095                        0{-no arity-} bitmap_size bitmap True{-is alts-}
1096      scrut_code <- schemeE (d + ret_frame_size_b + save_ccs_size_b)
1097                            (d + ret_frame_size_b + save_ccs_size_b)
1098                            p scrut
1099      alt_bco' <- emitBc alt_bco
1100      if ubx_tuple_frame
1101        then do tuple_bco <- emitBc (tupleBCO platform call_info args_offsets)
1102                return (PUSH_ALTS_TUPLE alt_bco' call_info tuple_bco
1103                        `consOL` scrut_code)
1104        else let scrut_rep = case non_void_arg_reps of
1105                   []    -> V
1106                   [rep] -> rep
1107                   _     -> panic "schemeE(StgCase).push_alts"
1108             in return (PUSH_ALTS alt_bco' scrut_rep `consOL` scrut_code)
1109
1110
1111 -- -----------------------------------------------------------------------------
1112 -- Deal with tuples
1113
1114 -- The native calling convention uses registers for tuples, but in the
1115 -- bytecode interpreter, all values live on the stack.
1116
1117 {- Note [GHCi and native call registers]
1118 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1119 The GHCi bytecode interpreter does not have access to the STG registers
1120 that the native calling convention uses for passing arguments. It uses
1121 helper stack frames to move values between the stack and registers.
1122
1123 If only a single register needs to be moved, GHCi uses a specific stack
1124 frame. For example stg_ctoi_R1p saves a heap pointer value from STG register
1125 R1 and stg_ctoi_D1 saves a double precision floating point value from D1.
1126 In the other direction, helpers stg_ret_p and stg_ret_d move a value from
1127 the stack to the R1 and D1 registers, respectively.
1128
1129 When GHCi needs to move more than one register it cannot use a specific
1130 helper frame. It would simply be impossible to create a helper for all
1131 possible combinations of register values. Instead, there are generic helper
1132 stack frames that use a call_info word that describes the active registers
1133 and the number of stack words used by the arguments of a call.
1134
1135 These helper stack frames are currently:
1136
1137     - stg_ret_t:    return a tuple to the continuation at the top of
1138                         the stack
1139     - stg_ctoi_t:   convert a tuple return value to be used in
1140                         bytecode
1141     - stg_primcall: call a function
1142
1143
1144 The call_info word contains a bitmap of the active registers
1145 for the call and and a stack offset. The layout is as follows:
1146
1147   - bit 0-23:  Bitmap of active registers for the call, the
1148                order corresponds to the list returned by
1149                allArgRegsCover.
1150                For example if bit 0 (the least significant bit) is set, the
1151                first register in the allArgRegsCover
1152                list is active. Bit 1 for the
1153                second register in the list and so on.
1154
1155   - bit 24-31: Unsigned byte indicating the stack offset
1156                of the continuation in words. For tuple returns
1157                this is the number of words returned on the
1158                stack. For primcalls this field is unused, since
1159                we don't jump to a continuation.
1160
1161 The upper 32 bits on 64 bit platforms are currently unused.
1162
1163 If a register is smaller than a word on the stack (for example a
1164 single precision float on a 64 bit system), then the stack slot
1165 is padded to a whole word.
1166
1167   Example:
1168
1169     If a tuple is returned in three registers and an additional two
1170     words on the stack, then three bits in the register bitmap
1171     (bits 0-23) would be set. And bit 24-31 would be
1172     00000010 (two in binary).
1173
1174     The values on the stack before a call to POP_ARG_REGS would
1175     be as follows:
1176
1177       ...
1178       continuation
1179       stack_arg_1
1180       stack_arg_2
1181       register_arg_3
1182       register_arg_2
1183       register_arg_1 <- Sp
1184
1185     A call to POP_ARG_REGS(call_info) would move register_arg_1
1186     to the register corresponding to the lowest set bit in the
1187     call_info word. register_arg_2 would be moved to the register
1188     corresponding to the second lowest set bit, and so on.
1189
1190     After POP_ARG_REGS(call_info), the stack pointer Sp points
1191     to the topmost stack argument, so the stack looks as follows:
1192
1193       ...
1194       continuation
1195       stack_arg_1
1196       stack_arg_2 <- Sp
1197
1198     At this point all the arguments are in place and we are ready
1199     to jump to the continuation, the location (offset from Sp) of
1200     which is found by inspecting the value of bits 24-31. In this
1201     case the offset is two words.
1202
1203 On x86_64, the double precision (Dn) and single precision
1204 floating (Fn) point registers overlap, e.g. D1 uses the same
1205 physical register as F1. On this platform, the list returned
1206 by allArgRegsCover contains only entries for the double
1207 precision registers. If an argument is passed in register
1208 Fn, the bit corresponding to Dn should be set.
1209
1210 Note: if anything changes in how registers for native calls overlap,
1211          make sure to also update GHC.StgToByteCode.layoutNativeCall
1212 -}
1213
1214 layoutNativeCall :: Profile
1215                  -> NativeCallType
1216                  -> ByteOff
1217                  -> (a -> PrimRep)
1218                  -> [a]
1219                  -> ( NativeCallInfo      -- See Note [GHCi TupleInfo]
1220                     , [(a, ByteOff)] -- argument, offset on stack
1221                     )
1222 layoutNativeCall profile call_type start_off arg_rep reps =
1223   let platform = profilePlatform profile
1224       arg_ty = primRepCmmType platform . arg_rep
1225       (orig_stk_bytes, pos) = assignArgumentsPos profile
1226                                                  0
1227                                                  NativeReturn
1228                                                  arg_ty
1229                                                  reps
1230
1231       -- keep the stack parameters in the same place
1232       orig_stk_params = [(x, fromIntegral off) | (x, StackParam off) <- pos]
1233
1234       -- sort the register parameters by register and add them to the stack
1235       regs_order :: Map.Map GlobalReg Int
1236       regs_order = Map.fromList $ zip (allArgRegsCover platform SCALAR_ARG_REGS) [0..]
1237
1238       reg_order :: GlobalReg -> (Int, GlobalReg)
1239       reg_order reg | Just n <- Map.lookup reg regs_order = (n, reg)
1240       -- if we don't have a position for a FloatReg then they must be passed
1241       -- in the equivalent DoubleReg
1242       reg_order (FloatReg n) = reg_order (DoubleReg n)
1243       -- one-tuples can be passed in other registers, but then we don't need
1244       -- to care about the order
1245       reg_order reg          = (0, reg)
1246
1247       (regs, reg_params)
1248           = unzip $ sortBy (comparing fst)
1249                            [(reg_order reg, x) | (x, RegisterParam reg) <- pos]
1250
1251       (new_stk_bytes, new_stk_params) = assignStack platform
1252                                                     orig_stk_bytes
1253                                                     arg_ty
1254                                                     reg_params
1255
1256       regs_set = mkRegSet (map snd regs)
1257
1258       get_byte_off (x, StackParam y) = (x, fromIntegral y)
1259       get_byte_off _                 =
1260           panic "GHC.StgToByteCode.layoutTuple get_byte_off"
1261
1262   in ( NativeCallInfo
1263          { nativeCallType           = call_type
1264          , nativeCallSize           = bytesToWords platform (ByteOff new_stk_bytes)
1265          , nativeCallRegs           = regs_set
1266          , nativeCallStackSpillSize = bytesToWords platform
1267                                                (ByteOff orig_stk_bytes)
1268          }
1269      , sortBy (comparing snd) $
1270               map (\(x, o) -> (x, o + start_off))
1271                   (orig_stk_params ++ map get_byte_off new_stk_params)
1272      )
1273
1274 {- Note [Return convention for non-tuple values]
1275 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1276 The RETURN and ENTER instructions are used to return values. RETURN directly
1277 returns the value at the top of the stack while ENTER evaluates it first (so
1278 RETURN is only used when the result is already known to be evaluated), but the
1279 end result is the same: control returns to the enclosing stack frame with the
1280 result at the top of the stack.
1281
1282 The PUSH_ALTS instruction pushes a two-word stack frame that receives a single
1283 lifted value. Its payload is a BCO that is executed when control returns, with
1284 the stack set up as if a RETURN instruction had just been executed: the returned
1285 value is at the top of the stack, and beneath it is the two-word frame being
1286 returned to. It is the continuation BCO’s job to pop its own frame off the
1287 stack, so the simplest possible continuation consists of two instructions:
1288
1289     SLIDE 1 2   -- pop the return frame off the stack, keeping the returned value
1290     RETURN P    -- return the returned value to our caller
1291
1292 RETURN and PUSH_ALTS are not really instructions but are in fact representation-
1293 polymorphic *families* of instructions indexed by ArgRep. ENTER, however, is a
1294 single real instruction, since it is only used to return lifted values, which
1295 are always pointers.
1296
1297 The RETURN, ENTER, and PUSH_ALTS instructions are only used when the returned
1298 value has nullary or unary representation. Returning/receiving an unboxed
1299 tuple (or, indirectly, an unboxed sum, since unboxed sums have been desugared to
1300 unboxed tuples by Unarise) containing two or more results uses the special
1301 RETURN_TUPLE/PUSH_ALTS_TUPLE instructions, which use a different return
1302 convention. See Note [unboxed tuple bytecodes and tuple_BCO] for details.
1303
1304 Note [unboxed tuple bytecodes and tuple_BCO]
1305 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1306   We have the bytecode instructions RETURN_TUPLE and PUSH_ALTS_TUPLE to
1307   return and receive arbitrary unboxed tuples, respectively. These
1308   instructions use the helper data tuple_BCO and call_info.
1309
1310   The helper data is used to convert tuples between GHCs native calling
1311   convention (object code), which uses stack and registers, and the bytecode
1312   calling convention, which only uses the stack. See Note [GHCi TupleInfo]
1313   for more details.
1314
1315
1316   Returning a tuple
1317   =================
1318
1319   Bytecode that returns a tuple first pushes all the tuple fields followed
1320   by the appropriate call_info and tuple_BCO onto the stack. It then
1321   executes the RETURN_TUPLE instruction, which causes the interpreter
1322   to push stg_ret_t_info to the top of the stack. The stack (growing down)
1323   then looks as follows:
1324
1325       ...
1326       next_frame
1327       tuple_field_1
1328       tuple_field_2
1329       ...
1330       tuple_field_n
1331       call_info
1332       tuple_BCO
1333       stg_ret_t_info <- Sp
1334
1335   If next_frame is bytecode, the interpreter will start executing it. If
1336   it's object code, the interpreter jumps back to the scheduler, which in
1337   turn jumps to stg_ret_t. stg_ret_t converts the tuple to the native
1338   calling convention using the description in call_info, and then jumps
1339   to next_frame.
1340
1341
1342   Receiving a tuple
1343   =================
1344
1345   Bytecode that receives a tuple uses the PUSH_ALTS_TUPLE instruction to
1346   push a continuation, followed by jumping to the code that produces the
1347   tuple. The PUSH_ALTS_TUPLE instuction contains three pieces of data:
1348
1349      * cont_BCO: the continuation that receives the tuple
1350      * call_info: see below
1351      * tuple_BCO: see below
1352
1353   The interpreter pushes these onto the stack when the PUSH_ALTS_TUPLE
1354   instruction is executed, followed by stg_ctoi_tN_info, with N depending
1355   on the number of stack words used by the tuple in the GHC native calling
1356   convention. N is derived from call_info.
1357
1358   For example if we expect a tuple with three words on the stack, the stack
1359   looks as follows after PUSH_ALTS_TUPLE:
1360
1361       ...
1362       next_frame
1363       cont_free_var_1
1364       cont_free_var_2
1365       ...
1366       cont_free_var_n
1367       call_info
1368       tuple_BCO
1369       cont_BCO
1370       stg_ctoi_t3_info <- Sp
1371
1372   If the tuple is returned by object code, stg_ctoi_t3 will deal with
1373   adjusting the stack pointer and converting the tuple to the bytecode
1374   calling convention. See Note [GHCi unboxed tuples stack spills] for more
1375   details.
1376
1377
1378   The tuple_BCO
1379   =============
1380
1381   The tuple_BCO is a helper bytecode object. Its main purpose is describing
1382   the contents of the stack frame containing the tuple for the storage
1383   manager. It contains only instructions to immediately return the tuple
1384   that is already on the stack.
1385
1386
1387   The call_info word
1388   ===================
1389
1390   The call_info word describes the stack and STG register (e.g. R1..R6,
1391   D1..D6) usage for the tuple. call_info contains enough information to
1392   convert the tuple between the stack-only bytecode and stack+registers
1393   GHC native calling conventions.
1394
1395   See Note [GHCi and native call registers] for more details of how the
1396   data is packed in a single word.
1397
1398  -}
1399
1400 tupleBCO :: Platform -> NativeCallInfo -> [(PrimRep, ByteOff)] -> [FFIInfo] -> ProtoBCO Name
1401 tupleBCO platform args_info args =
1402   mkProtoBCO platform Nothing invented_name body_code (Left [])
1403              0{-no arity-} bitmap_size bitmap False{-is alts-}
1404   where
1405     {-
1406       The tuple BCO is never referred to by name, so we can get away
1407       with using a fake name here. We will need to change this if we want
1408       to save some memory by sharing the BCO between places that have
1409       the same tuple shape
1410     -}
1411     invented_name  = mkSystemVarName (mkPseudoUniqueE 0) (fsLit "tuple")
1412
1413     -- the first word in the frame is the call_info word,
1414     -- which is not a pointer
1415     nptrs_prefix = 1
1416     (bitmap_size, bitmap) = mkStackBitmap platform nptrs_prefix args_info args
1417
1418     body_code = mkSlideW 0 1          -- pop frame header
1419                 `snocOL` RETURN_TUPLE -- and add it again
1420
1421 primCallBCO :: Platform -> NativeCallInfo -> [(PrimRep, ByteOff)] -> [FFIInfo] -> ProtoBCO Name
1422 primCallBCO platform args_info args =
1423   mkProtoBCO platform Nothing invented_name body_code (Left [])
1424              0{-no arity-} bitmap_size bitmap False{-is alts-}
1425   where
1426     {-
1427       The primcall BCO is never referred to by name, so we can get away
1428       with using a fake name here. We will need to change this if we want
1429       to save some memory by sharing the BCO between places that have
1430       the same tuple shape
1431     -}
1432     invented_name  = mkSystemVarName (mkPseudoUniqueE 0) (fsLit "primcall")
1433
1434     -- The first two words in the frame (after the BCO) are the call_info word
1435     -- and the pointer to the Cmm function being called. Neither of these is a
1436     -- pointer that should be followed by the garbage collector.
1437     nptrs_prefix = 2
1438     (bitmap_size, bitmap) = mkStackBitmap platform nptrs_prefix args_info args
1439
1440     -- if the primcall BCO is ever run it's a bug, since the BCO should only
1441     -- be pushed immediately before running the PRIMCALL bytecode instruction,
1442     -- which immediately leaves the interpreter to jump to the stg_primcall_info
1443     -- Cmm function
1444     body_code =  unitOL CASEFAIL
1445
1446 -- | Builds a bitmap for a stack layout with a nonpointer prefix followed by
1447 -- some number of arguments.
1448 mkStackBitmap
1449   :: Platform
1450   -> WordOff
1451   -- ^ The number of nonpointer words that prefix the arguments.
1452   -> NativeCallInfo
1453   -> [(PrimRep, ByteOff)]
1454   -- ^ The stack layout of the arguments, where each offset is relative to the
1455   -- /bottom/ of the stack space they occupy. Their offsets must be word-aligned,
1456   -- and the list must be sorted in order of ascending offset (i.e. bottom to top).
1457   -> (WordOff, [StgWord])
1458 mkStackBitmap platform nptrs_prefix args_info args
1459   = (bitmap_size, bitmap)
1460   where
1461     bitmap_size = nptrs_prefix + arg_bottom
1462     bitmap = intsToReverseBitmap platform (fromIntegral bitmap_size) ptr_offsets
1463
1464     arg_bottom = nativeCallSize args_info
1465     ptr_offsets = reverse $ map (fromIntegral . convert_arg_offset)
1466                 $ mapMaybe get_ptr_offset args
1467
1468     get_ptr_offset :: (PrimRep, ByteOff) -> Maybe ByteOff
1469     get_ptr_offset (rep, byte_offset)
1470       | isFollowableArg (toArgRep platform rep) = Just byte_offset
1471       | otherwise                               = Nothing
1472
1473     convert_arg_offset :: ByteOff -> WordOff
1474     convert_arg_offset arg_offset =
1475       -- The argument offsets are relative to `arg_bottom`, but
1476       -- `intsToReverseBitmap` expects offsets from the top, so we need to flip
1477       -- them around.
1478       nptrs_prefix + (arg_bottom - bytesToWords platform arg_offset)
1479
1480 -- -----------------------------------------------------------------------------
1481 -- Deal with a primitive call to native code.
1482
1483 generatePrimCall
1484     :: StackDepth
1485     -> Sequel
1486     -> BCEnv
1487     -> CLabelString          -- where to call
1488     -> Maybe Unit
1489     -> Type
1490     -> [StgArg]              -- args (atoms)
1491     -> BcM BCInstrList
1492 generatePrimCall d s p target _mb_unit _result_ty args
1493  = do
1494      profile <- getProfile
1495      let
1496          platform = profilePlatform profile
1497
1498          non_void VoidRep = False
1499          non_void _       = True
1500
1501          nv_args :: [StgArg]
1502          nv_args = filter (non_void . stgArgRep1) args
1503
1504          (args_info, args_offsets) =
1505               layoutNativeCall profile
1506                                NativePrimCall
1507                                0
1508                                stgArgRepU
1509                                nv_args
1510
1511          prim_args_offsets = mapFst stgArgRepU args_offsets
1512          shifted_args_offsets = mapSnd (+ d) args_offsets
1513
1514          push_target = PUSH_UBX (LitLabel target IsFunction) 1
1515          push_info = PUSH_UBX (mkNativeCallInfoLit platform args_info) 1
1516          {-
1517             compute size to move payload (without stg_primcall_info header)
1518
1519             size of arguments plus three words for:
1520                 - function pointer to the target
1521                 - call_info word
1522                 - BCO to describe the stack frame
1523           -}
1524          szb = wordsToBytes platform (nativeCallSize args_info + 3)
1525          go _   pushes [] = return (reverse pushes)
1526          go !dd pushes ((a, off):cs) = do (push, szb) <- pushAtom dd p a
1527                                           massert (off == dd + szb)
1528                                           go (dd + szb) (push:pushes) cs
1529      push_args <- go d [] shifted_args_offsets
1530      args_bco <- emitBc (primCallBCO platform args_info prim_args_offsets)
1531      return $ mconcat push_args `appOL`
1532               (push_target `consOL`
1533                push_info `consOL`
1534                PUSH_BCO args_bco `consOL`
1535                (mkSlideB platform szb (d - s) `appOL` unitOL PRIMCALL))
1536
1537 -- -----------------------------------------------------------------------------
1538 -- Deal with a CCall.
1539
1540 -- Taggedly push the args onto the stack R->L,
1541 -- deferencing ForeignObj#s and adjusting addrs to point to
1542 -- payloads in Ptr/Byte arrays.  Then, generate the marshalling
1543 -- (machine) code for the ccall, and create bytecodes to call that and
1544 -- then return in the right way.
1545
1546 generateCCall
1547     :: StackDepth
1548     -> Sequel
1549     -> BCEnv
1550     -> CCallSpec               -- where to call
1551     -> Type
1552     -> [StgArg]              -- args (atoms)
1553     -> BcM BCInstrList
1554 generateCCall d0 s p (CCallSpec target PrimCallConv _) result_ty args
1555  | (StaticTarget _ label mb_unit _) <- target
1556  = generatePrimCall d0 s p label mb_unit result_ty args
1557  | otherwise
1558  = panic "GHC.StgToByteCode.generateCCall: primcall convention only supports static targets"
1559 generateCCall d0 s p (CCallSpec target _ safety) result_ty args
1560  = do
1561      profile <- getProfile
1562
1563      let
1564          args_r_to_l = reverse args
1565          platform = profilePlatform profile
1566          -- useful constants
1567          addr_size_b :: ByteOff
1568          addr_size_b = wordSize platform
1569
1570          arrayish_rep_hdr_size :: TyCon -> Maybe Int
1571          arrayish_rep_hdr_size t
1572            | t == arrayPrimTyCon || t == mutableArrayPrimTyCon
1573               = Just (arrPtrsHdrSize profile)
1574            | t == smallArrayPrimTyCon || t == smallMutableArrayPrimTyCon
1575               = Just (smallArrPtrsHdrSize profile)
1576            | t == byteArrayPrimTyCon || t == mutableByteArrayPrimTyCon
1577               = Just (arrWordsHdrSize profile)
1578            | otherwise
1579               = Nothing
1580
1581          -- Get the args on the stack, with tags and suitably
1582          -- dereferenced for the CCall.  For each arg, return the
1583          -- depth to the first word of the bits for that arg, and the
1584          -- ArgRep of what was actually pushed.
1585
1586          pargs
1587              :: ByteOff -> [StgArg] -> BcM [(BCInstrList, PrimOrVoidRep)]
1588          pargs _ [] = return []
1589          pargs d (aa@(StgVarArg a):az)
1590             | Just t      <- tyConAppTyCon_maybe (idType a)
1591             , Just hdr_sz <- arrayish_rep_hdr_size t
1592             -- Do magic for Ptr/Byte arrays.  Push a ptr to the array on
1593             -- the stack but then advance it over the headers, so as to
1594             -- point to the payload.
1595             = do rest <- pargs (d + addr_size_b) az
1596                  (push_fo, _) <- pushAtom d p aa
1597                  -- The ptr points at the header.  Advance it over the
1598                  -- header and then pretend this is an Addr#.
1599                  let code = push_fo `snocOL` SWIZZLE 0 (fromIntegral hdr_sz)
1600                  return ((code, NVRep AddrRep) : rest)
1601          pargs d (aa:az) =  do (code_a, sz_a) <- pushAtom d p aa
1602                                rest <- pargs (d + sz_a) az
1603                                return ((code_a, stgArgRep1 aa) : rest)
1604
1605      code_n_reps <- pargs d0 args_r_to_l
1606      let
1607          (pushs_arg, a_reps_pushed_r_to_l) = unzip code_n_reps
1608          a_reps_sizeW = sum (map (repSizeWords platform) a_reps_pushed_r_to_l)
1609
1610          push_args    = concatOL pushs_arg
1611          !d_after_args = d0 + wordsToBytes platform a_reps_sizeW
1612          a_reps_pushed_RAW
1613             | VoidRep:xs <- a_reps_pushed_r_to_l
1614             = reverse xs
1615             | otherwise
1616             = panic "GHC.StgToByteCode.generateCCall: missing or invalid World token?"
1617
1618          -- Now: a_reps_pushed_RAW are the reps which are actually on the stack.
1619          -- push_args is the code to do that.
1620          -- d_after_args is the stack depth once the args are on.
1621
1622          -- Get the result rep.
1623          r_rep = maybe_getCCallReturnRep result_ty
1624          {-
1625          Because the Haskell stack grows down, the a_reps refer to
1626          lowest to highest addresses in that order.  The args for the call
1627          are on the stack.  Now push an unboxed Addr# indicating
1628          the C function to call.  Then push a dummy placeholder for the
1629          result.  Finally, emit a CCALL insn with an offset pointing to the
1630          Addr# just pushed, and a literal field holding the mallocville
1631          address of the piece of marshalling code we generate.
1632          So, just prior to the CCALL insn, the stack looks like this
1633          (growing down, as usual):
1634
1635             <arg_n>
1636             ...
1637             <arg_1>
1638             Addr# address_of_C_fn
1639             <placeholder-for-result#> (must be an unboxed type)
1640
1641          The interpreter then calls the marshal code mentioned
1642          in the CCALL insn, passing it (& <placeholder-for-result#>),
1643          that is, the addr of the topmost word in the stack.
1644          When this returns, the placeholder will have been
1645          filled in.  The placeholder is slid down to the sequel
1646          depth, and we RETURN.
1647
1648          This arrangement makes it simple to do f-i-dynamic since the Addr#
1649          value is the first arg anyway.
1650
1651          The marshalling code is generated specifically for this
1652          call site, and so knows exactly the (Haskell) stack
1653          offsets of the args, fn address and placeholder.  It
1654          copies the args to the C stack, calls the stacked addr,
1655          and parks the result back in the placeholder.  The interpreter
1656          calls it as a normal C call, assuming it has a signature
1657             void marshal_code ( StgWord* ptr_to_top_of_stack )
1658          -}
1659          -- resolve static address
1660          maybe_static_target :: Maybe Literal
1661          maybe_static_target =
1662              case target of
1663                  DynamicTarget -> Nothing
1664                  StaticTarget _ _ _ False ->
1665                    panic "generateCCall: unexpected FFI value import"
1666                  StaticTarget _ target _ True ->
1667                    Just (LitLabel target IsFunction)
1668
1669      let
1670          is_static = isJust maybe_static_target
1671
1672          -- Get the arg reps, zapping the leading Addr# in the dynamic case
1673          a_reps --  | trace (showSDoc (ppr a_reps_pushed_RAW)) False = error "???"
1674                 | is_static = a_reps_pushed_RAW
1675                 | _:xs <- a_reps_pushed_RAW = xs
1676                 | otherwise = panic "GHC.StgToByteCode.generateCCall: dyn with no args"
1677
1678          -- push the Addr#
1679          (push_Addr, d_after_Addr)
1680             | Just machlabel <- maybe_static_target
1681             = (toOL [PUSH_UBX machlabel 1], d_after_args + addr_size_b)
1682             | otherwise -- is already on the stack
1683             = (nilOL, d_after_args)
1684
1685          -- Push the return placeholder.  For a call returning nothing,
1686          -- this is a V (tag).
1687          r_sizeW   = repSizeWords platform r_rep
1688          d_after_r = d_after_Addr + wordsToBytes platform r_sizeW
1689          push_r = case r_rep of
1690                     VoidRep -> nilOL
1691                     NVRep r -> unitOL (PUSH_UBX (mkDummyLiteral platform r) r_sizeW)
1692
1693          -- generate the marshalling code we're going to call
1694
1695          -- Offset of the next stack frame down the stack.  The CCALL
1696          -- instruction needs to describe the chunk of stack containing
1697          -- the ccall args to the GC, so it needs to know how large it
1698          -- is.  See comment in Interpreter.c with the CCALL instruction.
1699          stk_offset   = bytesToWords platform (d_after_r - s)
1700
1701      -- the only difference in libffi mode is that we prepare a cif
1702      -- describing the call type by calling libffi, and we attach the
1703      -- address of this to the CCALL instruction.
1704
1705
1706      let ffires = primRepToFFIType platform r_rep
1707          ffiargs = map (primRepToFFIType platform) a_reps
1708      interp <- hscInterp <$> getHscEnv
1709      token <- ioToBc $ interpCmd interp (PrepFFI ffiargs ffires)
1710      recordFFIBc token
1711
1712      let
1713          -- do the call
1714          do_call      = unitOL (CCALL stk_offset token flags)
1715            where flags = case safety of
1716                            PlaySafe          -> 0x0
1717                            PlayInterruptible -> 0x1
1718                            PlayRisky         -> 0x2
1719
1720          -- slide and return
1721          d_after_r_min_s = bytesToWords platform (d_after_r - s)
1722          wrapup       = mkSlideW r_sizeW (d_after_r_min_s - r_sizeW)
1723                         `snocOL` RETURN (toArgRepOrV platform r_rep)
1724          --trace (show (arg1_offW, args_offW  ,  (map argRepSizeW a_reps) )) $
1725      return (
1726          push_args `appOL`
1727          push_Addr `appOL` push_r `appOL` do_call `appOL` wrapup
1728          )
1729
1730 primRepToFFIType :: Platform -> PrimOrVoidRep -> FFIType
1731 primRepToFFIType _ VoidRep = FFIVoid
1732 primRepToFFIType platform (NVRep r)
1733   = case r of
1734      IntRep      -> signed_word
1735      WordRep     -> unsigned_word
1736      Int8Rep     -> FFISInt8
1737      Word8Rep    -> FFIUInt8
1738      Int16Rep    -> FFISInt16
1739      Word16Rep   -> FFIUInt16
1740      Int32Rep    -> FFISInt32
1741      Word32Rep   -> FFIUInt32
1742      Int64Rep    -> FFISInt64
1743      Word64Rep   -> FFIUInt64
1744      AddrRep     -> FFIPointer
1745      FloatRep    -> FFIFloat
1746      DoubleRep   -> FFIDouble
1747      BoxedRep _  -> FFIPointer
1748      VecRep{}    -> pprPanic "primRepToFFIType" (ppr r)
1749   where
1750     (signed_word, unsigned_word) = case platformWordSize platform of
1751        PW4 -> (FFISInt32, FFIUInt32)
1752        PW8 -> (FFISInt64, FFIUInt64)
1753
1754 -- Make a dummy literal, to be used as a placeholder for FFI return
1755 -- values on the stack.
1756 mkDummyLiteral :: Platform -> PrimRep -> Literal
1757 mkDummyLiteral platform pr
1758    = case pr of
1759         IntRep      -> mkLitInt  platform 0
1760         WordRep     -> mkLitWord platform 0
1761         Int8Rep     -> mkLitInt8 0
1762         Word8Rep    -> mkLitWord8 0
1763         Int16Rep    -> mkLitInt16 0
1764         Word16Rep   -> mkLitWord16 0
1765         Int32Rep    -> mkLitInt32 0
1766         Word32Rep   -> mkLitWord32 0
1767         Int64Rep    -> mkLitInt64 0
1768         Word64Rep   -> mkLitWord64 0
1769         AddrRep     -> LitNullAddr
1770         DoubleRep   -> LitDouble 0
1771         FloatRep    -> LitFloat 0
1772         BoxedRep _  -> LitNullAddr
1773         VecRep{}    -> pprPanic "mkDummyLiteral" (ppr pr)
1774
1775
1776 -- Convert (eg)
1777 --     GHC.Prim.Char# -> GHC.Prim.State# GHC.Prim.RealWorld
1778 --                   -> (# GHC.Prim.State# GHC.Prim.RealWorld, GHC.Prim.Int# #)
1779 --
1780 -- to  NVRep IntRep
1781 -- and check that an unboxed pair is returned wherein the first arg is V'd.
1782 --
1783 -- Alternatively, for call-targets returning nothing, convert
1784 --
1785 --     GHC.Prim.Char# -> GHC.Prim.State# GHC.Prim.RealWorld
1786 --                   -> (# GHC.Prim.State# GHC.Prim.RealWorld #)
1787 --
1788 -- to  VoidRep
1789
1790 maybe_getCCallReturnRep :: Type -> PrimOrVoidRep
1791 maybe_getCCallReturnRep fn_ty
1792    = let
1793        (_a_tys, r_ty) = splitFunTys (dropForAlls fn_ty)
1794      in
1795        case typePrimRep r_ty of
1796          [] -> VoidRep
1797          [rep] -> NVRep rep
1798
1799                  -- if it was, it would be impossible to create a
1800                  -- valid return value placeholder on the stack
1801          _ -> pprPanic "maybe_getCCallReturn: can't handle:"
1802                          (pprType fn_ty)
1803
1804 maybe_is_tagToEnum_call :: CgStgExpr -> Maybe (Id, [Name])
1805 -- Detect and extract relevant info for the tagToEnum kludge.
1806 maybe_is_tagToEnum_call (StgOpApp (StgPrimOp TagToEnumOp) [StgVarArg v] t)
1807   = Just (v, extract_constr_Names t)
1808   where
1809     extract_constr_Names ty
1810            | rep_ty <- unwrapType ty
1811            , Just tyc <- tyConAppTyCon_maybe rep_ty
1812            , isDataTyCon tyc
1813            = map (getName . dataConWorkId) (tyConDataCons tyc)
1814            -- NOTE: use the worker name, not the source name of
1815            -- the DataCon.  See "GHC.Core.DataCon" for details.
1816            | otherwise
1817            = pprPanic "maybe_is_tagToEnum_call.extract_constr_Ids" (ppr ty)
1818 maybe_is_tagToEnum_call _ = Nothing
1819
1820 {- -----------------------------------------------------------------------------
1821 Note [Implementing tagToEnum#]
1822 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1823 (implement_tagToId arg names) compiles code which takes an argument
1824 'arg', (call it i), and enters the i'th closure in the supplied list
1825 as a consequence.  The [Name] is a list of the constructors of this
1826 (enumeration) type.
1827
1828 The code we generate is this:
1829                 push arg
1830
1831                 TESTEQ_I 0 L1
1832                   PUSH_G <lbl for first data con>
1833                   JMP L_Exit
1834
1835         L1:     TESTEQ_I 1 L2
1836                   PUSH_G <lbl for second data con>
1837                   JMP L_Exit
1838         ...etc...
1839         Ln:     TESTEQ_I n L_fail
1840                   PUSH_G <lbl for last data con>
1841                   JMP L_Exit
1842
1843         L_fail: CASEFAIL
1844
1845         L_exit: SLIDE 1 n
1846                 ENTER
1847 -}
1848
1849
1850 implement_tagToId
1851     :: StackDepth
1852     -> Sequel
1853     -> BCEnv
1854     -> Id
1855     -> [Name]
1856     -> BcM BCInstrList
1857 -- See Note [Implementing tagToEnum#]
1858 implement_tagToId d s p arg names
1859   = assert (notNull names) $
1860     do (push_arg, arg_bytes) <- pushAtom d p (StgVarArg arg)
1861        labels <- getLabelsBc (strictGenericLength names)
1862        label_fail <- getLabelBc
1863        label_exit <- getLabelBc
1864        dflags <- getDynFlags
1865        let infos = zip4 labels (tail labels ++ [label_fail])
1866                                [0 ..] names
1867            platform = targetPlatform dflags
1868            steps = map (mkStep label_exit) infos
1869            slide_ws = bytesToWords platform (d - s + arg_bytes)
1870
1871        return (push_arg
1872                `appOL` concatOL steps
1873                `appOL` toOL [ LABEL label_fail, CASEFAIL,
1874                               LABEL label_exit ]
1875                `appOL` mkSlideW 1 slide_ws
1876                `appOL` unitOL ENTER)
1877   where
1878         mkStep l_exit (my_label, next_label, n, name_for_n)
1879            = toOL [LABEL my_label,
1880                    TESTEQ_I n next_label,
1881                    PUSH_G name_for_n,
1882                    JMP l_exit]
1883
1884
1885 -- -----------------------------------------------------------------------------
1886 -- pushAtom
1887
1888 -- Push an atom onto the stack, returning suitable code & number of
1889 -- stack words used.
1890 --
1891 -- The env p must map each variable to the highest- numbered stack
1892 -- slot for it.  For example, if the stack has depth 4 and we
1893 -- tagged-ly push (v :: Int#) on it, the value will be in stack[4],
1894 -- the tag in stack[5], the stack will have depth 6, and p must map v
1895 -- to 5 and not to 4.  Stack locations are numbered from zero, so a
1896 -- depth 6 stack has valid words 0 .. 5.
1897
1898 pushAtom
1899     :: StackDepth -> BCEnv -> StgArg -> BcM (BCInstrList, ByteOff)
1900
1901 -- See Note [Empty case alternatives] in GHC.Core
1902 -- and Note [Bottoming expressions] in GHC.Core.Utils:
1903 -- The scrutinee of an empty case evaluates to bottom
1904 pushAtom d p (StgVarArg var)
1905    | [] <- typePrimRep (idType var)
1906    = return (nilOL, 0)
1907
1908    | isFCallId var
1909    = pprPanic "pushAtom: shouldn't get an FCallId here" (ppr var)
1910
1911    | Just primop <- isPrimOpId_maybe var
1912    = do
1913        platform <- targetPlatform <$> getDynFlags
1914        return (unitOL (PUSH_PRIMOP primop), wordSize platform)
1915
1916    | Just d_v <- lookupBCEnv_maybe var p  -- var is a local variable
1917    = do platform <- targetPlatform <$> getDynFlags
1918
1919         let !szb = idSizeCon platform var
1920             with_instr :: (ByteOff -> BCInstr) -> BcM (OrdList BCInstr, ByteOff)
1921             with_instr instr = do
1922                 let !off_b = d - d_v
1923                 return (unitOL (instr off_b), wordSize platform)
1924
1925         case szb of
1926             1 -> with_instr PUSH8_W
1927             2 -> with_instr PUSH16_W
1928             4 -> with_instr PUSH32_W
1929             _ -> do
1930                 let !szw = bytesToWords platform szb
1931                     !off_w = bytesToWords platform (d - d_v) + szw - 1
1932                 return (toOL (genericReplicate szw (PUSH_L off_w)),
1933                               wordsToBytes platform szw)
1934         -- d - d_v           offset from TOS to the first slot of the object
1935         --
1936         -- d - d_v + sz - 1  offset from the TOS of the last slot of the object
1937         --
1938         -- Having found the last slot, we proceed to copy the right number of
1939         -- slots on to the top of the stack.
1940
1941    | otherwise  -- var must be a global variable
1942    = do platform <- targetPlatform <$> getDynFlags
1943         let !szb = idSizeCon platform var
1944         massert (szb == wordSize platform)
1945
1946         -- PUSH_G doesn't tag constructors. So we use PACK here
1947         -- if we are dealing with nullary constructor.
1948         case isDataConWorkId_maybe var of
1949           Just con -> do
1950             massert (isNullaryRepDataCon con)
1951             return (unitOL (PACK con 0), szb)
1952
1953           Nothing
1954             -- see Note [Generating code for top-level string literal bindings]
1955             | idType var `eqType` addrPrimTy ->
1956               return (unitOL (PUSH_ADDR (getName var)), szb)
1957
1958             | otherwise -> do
1959               let varTy = idType var
1960               massertPpr (definitelyLiftedType varTy) $
1961                 vcat [ text "pushAtom: unhandled unlifted type"
1962                      , text "var:" <+> ppr var <+> dcolon <+> ppr varTy <> dcolon <+> ppr (typeKind varTy)
1963                      ]
1964               return (unitOL (PUSH_G (getName var)), szb)
1965
1966 pushAtom _ _ (StgLitArg lit) = pushLiteral True lit
1967
1968 pushLiteral :: Bool -> Literal -> BcM (BCInstrList, ByteOff)
1969 pushLiteral padded lit =
1970   do
1971      platform <- targetPlatform <$> getDynFlags
1972      let code :: PrimRep -> BcM (BCInstrList, ByteOff)
1973          code rep =
1974             return (padding_instr `snocOL` instr, size_bytes + padding_bytes)
1975           where
1976             size_bytes = ByteOff $ primRepSizeB platform rep
1977
1978             -- Here we handle the non-word-width cases specifically since we
1979             -- must emit different bytecode for them.
1980
1981             round_to_words (ByteOff bytes) =
1982               ByteOff (roundUpToWords platform bytes)
1983
1984             padding_bytes
1985                 | padded    = round_to_words size_bytes - size_bytes
1986                 | otherwise = 0
1987
1988             (padding_instr, _) = pushPadding padding_bytes
1989
1990             instr =
1991               case size_bytes of
1992                 1  -> PUSH_UBX8 lit
1993                 2  -> PUSH_UBX16 lit
1994                 4  -> PUSH_UBX32 lit
1995                 _  -> PUSH_UBX lit (bytesToWords platform size_bytes)
1996
1997      case lit of
1998         LitLabel {}     -> code AddrRep
1999         LitFloat {}     -> code FloatRep
2000         LitDouble {}    -> code DoubleRep
2001         LitChar {}      -> code WordRep
2002         LitNullAddr     -> code AddrRep
2003         LitString {}    -> code AddrRep
2004         LitRubbish _ rep-> case runtimeRepPrimRep (text "pushLiteral") rep of
2005                              [pr] -> code pr
2006                              _    -> pprPanic "pushLiteral" (ppr lit)
2007         LitNumber nt _  -> case nt of
2008           LitNumInt     -> code IntRep
2009           LitNumWord    -> code WordRep
2010           LitNumInt8    -> code Int8Rep
2011           LitNumWord8   -> code Word8Rep
2012           LitNumInt16   -> code Int16Rep
2013           LitNumWord16  -> code Word16Rep
2014           LitNumInt32   -> code Int32Rep
2015           LitNumWord32  -> code Word32Rep
2016           LitNumInt64   -> code Int64Rep
2017           LitNumWord64  -> code Word64Rep
2018           -- No LitNumBigNat should be left by the time this is called. CorePrep
2019           -- should have converted them all to a real core representation.
2020           LitNumBigNat  -> panic "pushAtom: LitNumBigNat"
2021
2022 -- | Push an atom for constructor (i.e., PACK instruction) onto the stack.
2023 -- This is slightly different to @pushAtom@ due to the fact that we allow
2024 -- packing constructor fields. See also @mkConAppCode@ and @pushPadding@.
2025 pushConstrAtom
2026     :: StackDepth -> BCEnv -> StgArg -> BcM (BCInstrList, ByteOff)
2027 pushConstrAtom _ _ (StgLitArg lit) = pushLiteral False lit
2028
2029 pushConstrAtom d p va@(StgVarArg v)
2030     | Just d_v <- lookupBCEnv_maybe v p = do  -- v is a local variable
2031         platform <- targetPlatform <$> getDynFlags
2032         let !szb = idSizeCon platform v
2033             done instr = do
2034                 let !off = d - d_v
2035                 return (unitOL (instr off), szb)
2036         case szb of
2037             1 -> done PUSH8
2038             2 -> done PUSH16
2039             4 -> done PUSH32
2040             _ -> pushAtom d p va
2041
2042 pushConstrAtom d p expr = pushAtom d p expr
2043
2044 pushPadding :: ByteOff -> (BCInstrList, ByteOff)
2045 pushPadding (ByteOff n) = go n (nilOL, 0)
2046   where
2047     go n acc@(!instrs, !off) = case n of
2048         0 -> acc
2049         1 -> (instrs `mappend` unitOL PUSH_PAD8, off + 1)
2050         2 -> (instrs `mappend` unitOL PUSH_PAD16, off + 2)
2051         3 -> go 1 (go 2 acc)
2052         4 -> (instrs `mappend` unitOL PUSH_PAD32, off + 4)
2053         _ -> go (n - 4) (go 4 acc)
2054
2055 -- -----------------------------------------------------------------------------
2056 -- Given a bunch of alts code and their discrs, do the donkey work
2057 -- of making a multiway branch using a switch tree.
2058 -- What a load of hassle!
2059
2060 mkMultiBranch :: Maybe Int      -- # datacons in tycon, if alg alt
2061                                 -- a hint; generates better code
2062                                 -- Nothing is always safe
2063               -> [(Discr, BCInstrList)]
2064               -> BcM BCInstrList
2065 mkMultiBranch maybe_ncons raw_ways = do
2066      lbl_default <- getLabelBc
2067
2068      let
2069          mkTree :: [(Discr, BCInstrList)] -> Discr -> Discr -> BcM BCInstrList
2070          mkTree [] _range_lo _range_hi = return (unitOL (JMP lbl_default))
2071              -- shouldn't happen?
2072
2073          mkTree [val] range_lo range_hi
2074             | range_lo == range_hi
2075             = return (snd val)
2076             | null defaults -- Note [CASEFAIL]
2077             = do lbl <- getLabelBc
2078                  return (testEQ (fst val) lbl
2079                             `consOL` (snd val
2080                             `appOL`  (LABEL lbl `consOL` unitOL CASEFAIL)))
2081             | otherwise
2082             = return (testEQ (fst val) lbl_default `consOL` snd val)
2083
2084             -- Note [CASEFAIL]
2085             -- ~~~~~~~~~~~~~~~
2086             -- It may be that this case has no default
2087             -- branch, but the alternatives are not exhaustive - this
2088             -- happens for GADT cases for example, where the types
2089             -- prove that certain branches are impossible.  We could
2090             -- just assume that the other cases won't occur, but if
2091             -- this assumption was wrong (because of a bug in GHC)
2092             -- then the result would be a segfault.  So instead we
2093             -- emit an explicit test and a CASEFAIL instruction that
2094             -- causes the interpreter to barf() if it is ever
2095             -- executed.
2096
2097          mkTree vals range_lo range_hi
2098             = let n = length vals `div` 2
2099                   (vals_lo, vals_hi) = splitAt n vals
2100                   v_mid = fst (head vals_hi)
2101               in do
2102               label_geq <- getLabelBc
2103               code_lo <- mkTree vals_lo range_lo (dec v_mid)
2104               code_hi <- mkTree vals_hi v_mid range_hi
2105               return (testLT v_mid label_geq
2106                       `consOL` (code_lo
2107                       `appOL`   unitOL (LABEL label_geq)
2108                       `appOL`   code_hi))
2109
2110          the_default
2111             = case defaults of
2112                 []         -> nilOL
2113                 [(_, def)] -> LABEL lbl_default `consOL` def
2114                 _          -> panic "mkMultiBranch/the_default"
2115      instrs <- mkTree notd_ways init_lo init_hi
2116      return (instrs `appOL` the_default)
2117   where
2118          (defaults, not_defaults) = partition (isNoDiscr.fst) raw_ways
2119          notd_ways = sortBy (comparing fst) not_defaults
2120
2121          testLT (DiscrI i) fail_label = TESTLT_I i fail_label
2122          testLT (DiscrI8 i) fail_label = TESTLT_I8 (fromIntegral i) fail_label
2123          testLT (DiscrI16 i) fail_label = TESTLT_I16 (fromIntegral i) fail_label
2124          testLT (DiscrI32 i) fail_label = TESTLT_I32 (fromIntegral i) fail_label
2125          testLT (DiscrI64 i) fail_label = TESTLT_I64 (fromIntegral i) fail_label
2126          testLT (DiscrW i) fail_label = TESTLT_W i fail_label
2127          testLT (DiscrW8 i) fail_label = TESTLT_W8 (fromIntegral i) fail_label
2128          testLT (DiscrW16 i) fail_label = TESTLT_W16 (fromIntegral i) fail_label
2129          testLT (DiscrW32 i) fail_label = TESTLT_W32 (fromIntegral i) fail_label
2130          testLT (DiscrW64 i) fail_label = TESTLT_W64 (fromIntegral i) fail_label
2131          testLT (DiscrF i) fail_label = TESTLT_F i fail_label
2132          testLT (DiscrD i) fail_label = TESTLT_D i fail_label
2133          testLT (DiscrP i) fail_label = TESTLT_P i fail_label
2134          testLT NoDiscr    _          = panic "mkMultiBranch NoDiscr"
2135
2136          testEQ (DiscrI i) fail_label = TESTEQ_I i fail_label
2137          testEQ (DiscrI8 i) fail_label = TESTEQ_I8 (fromIntegral i) fail_label
2138          testEQ (DiscrI16 i) fail_label = TESTEQ_I16 (fromIntegral i) fail_label
2139          testEQ (DiscrI32 i) fail_label = TESTEQ_I32 (fromIntegral i) fail_label
2140          testEQ (DiscrI64 i) fail_label = TESTEQ_I64 (fromIntegral i) fail_label
2141          testEQ (DiscrW i) fail_label = TESTEQ_W i fail_label
2142          testEQ (DiscrW8 i) fail_label = TESTEQ_W8 (fromIntegral i) fail_label
2143          testEQ (DiscrW16 i) fail_label = TESTEQ_W16 (fromIntegral i) fail_label
2144          testEQ (DiscrW32 i) fail_label = TESTEQ_W32 (fromIntegral i) fail_label
2145          testEQ (DiscrW64 i) fail_label = TESTEQ_W64 (fromIntegral i) fail_label
2146          testEQ (DiscrF i) fail_label = TESTEQ_F i fail_label
2147          testEQ (DiscrD i) fail_label = TESTEQ_D i fail_label
2148          testEQ (DiscrP i) fail_label = TESTEQ_P i fail_label
2149          testEQ NoDiscr    _          = panic "mkMultiBranch NoDiscr"
2150
2151          -- None of these will be needed if there are no non-default alts
2152          (init_lo, init_hi) = case notd_ways of
2153             [] -> panic "mkMultiBranch: awesome foursome"
2154             (discr, _):_ -> case discr of
2155                 DiscrI _ -> ( DiscrI minBound,  DiscrI maxBound )
2156                 DiscrI8 _ -> ( DiscrI8 minBound, DiscrI8 maxBound )
2157                 DiscrI16 _ -> ( DiscrI16 minBound, DiscrI16 maxBound )
2158                 DiscrI32 _ -> ( DiscrI32 minBound, DiscrI32 maxBound )
2159                 DiscrI64 _ -> ( DiscrI64 minBound, DiscrI64 maxBound )
2160                 DiscrW _ -> ( DiscrW minBound,  DiscrW maxBound )
2161                 DiscrW8 _ -> ( DiscrW8 minBound, DiscrW8 maxBound )
2162                 DiscrW16 _ -> ( DiscrW16 minBound, DiscrW16 maxBound )
2163                 DiscrW32 _ -> ( DiscrW32 minBound, DiscrW32 maxBound )
2164                 DiscrW64 _ -> ( DiscrW64 minBound, DiscrW64 maxBound )
2165                 DiscrF _ -> ( DiscrF minF,      DiscrF maxF )
2166                 DiscrD _ -> ( DiscrD minD,      DiscrD maxD )
2167                 DiscrP _ -> ( DiscrP algMinBound, DiscrP algMaxBound )
2168                 NoDiscr -> panic "mkMultiBranch NoDiscr"
2169
2170          (algMinBound, algMaxBound)
2171             = case maybe_ncons of
2172                  -- XXX What happens when n == 0?
2173                  Just n  -> (0, fromIntegral n - 1)
2174                  Nothing -> (minBound, maxBound)
2175
2176          isNoDiscr NoDiscr = True
2177          isNoDiscr _       = False
2178
2179          dec (DiscrI i) = DiscrI (i-1)
2180          dec (DiscrW w) = DiscrW (w-1)
2181          dec (DiscrP i) = DiscrP (i-1)
2182          dec other      = other         -- not really right, but if you
2183                 -- do cases on floating values, you'll get what you deserve
2184
2185          -- same snotty comment applies to the following
2186          minF, maxF :: Float
2187          minD, maxD :: Double
2188          minF = -1.0e37
2189          maxF =  1.0e37
2190          minD = -1.0e308
2191          maxD =  1.0e308
2192
2193
2194 -- -----------------------------------------------------------------------------
2195 -- Supporting junk for the compilation schemes
2196
2197 -- Describes case alts
2198 data Discr
2199    = DiscrI Int
2200    | DiscrI8 Int8
2201    | DiscrI16 Int16
2202    | DiscrI32 Int32
2203    | DiscrI64 Int64
2204    | DiscrW Word
2205    | DiscrW8 Word8
2206    | DiscrW16 Word16
2207    | DiscrW32 Word32
2208    | DiscrW64 Word64
2209    | DiscrF Float
2210    | DiscrD Double
2211    | DiscrP Word16
2212    | NoDiscr
2213     deriving (Eq, Ord)
2214
2215 instance Outputable Discr where
2216    ppr (DiscrI i) = int i
2217    ppr (DiscrI8 i) = text (show i)
2218    ppr (DiscrI16 i) = text (show i)
2219    ppr (DiscrI32 i) = text (show i)
2220    ppr (DiscrI64 i) = text (show i)
2221    ppr (DiscrW w) = text (show w)
2222    ppr (DiscrW8 w) = text (show w)
2223    ppr (DiscrW16 w) = text (show w)
2224    ppr (DiscrW32 w) = text (show w)
2225    ppr (DiscrW64 w) = text (show w)
2226    ppr (DiscrF f) = text (show f)
2227    ppr (DiscrD d) = text (show d)
2228    ppr (DiscrP i) = ppr i
2229    ppr NoDiscr    = text "DEF"
2230
2231
2232 lookupBCEnv_maybe :: Id -> BCEnv -> Maybe ByteOff
2233 lookupBCEnv_maybe = Map.lookup
2234
2235 idSizeW :: Platform -> Id -> WordOff
2236 idSizeW platform = WordOff . argRepSizeW platform . idArgRep platform
2237
2238 idSizeCon :: Platform -> Id -> ByteOff
2239 idSizeCon platform var
2240   -- unboxed tuple components are padded to word size
2241   | isUnboxedTupleType (idType var) ||
2242     isUnboxedSumType (idType var) =
2243     wordsToBytes platform .
2244     WordOff . sum . map (argRepSizeW platform . toArgRep platform) .
2245     typePrimRep . idType $ var
2246   | otherwise = ByteOff (primRepSizeB platform (idPrimRepU var))
2247
2248 repSizeWords :: Platform -> PrimOrVoidRep -> WordOff
2249 repSizeWords platform rep = WordOff $ argRepSizeW platform (toArgRepOrV platform rep)
2250
2251 isFollowableArg :: ArgRep -> Bool
2252 isFollowableArg P = True
2253 isFollowableArg _ = False
2254
2255 -- | Indicate if the calling convention is supported
2256 isSupportedCConv :: CCallSpec -> Bool
2257 isSupportedCConv (CCallSpec _ cconv _) = case cconv of
2258    CCallConv            -> True     -- we explicitly pattern match on every
2259    StdCallConv          -> False    -- convention to ensure that a warning
2260    PrimCallConv         -> True     -- is triggered when a new one is added
2261    JavaScriptCallConv   -> False
2262    CApiConv             -> True
2263
2264 -- See bug #10462
2265 unsupportedCConvException :: a
2266 unsupportedCConvException = throwGhcException (ProgramError
2267   ("Error: bytecode compiler can't handle some foreign calling conventions\n"++
2268    "  Workaround: use -fobject-code, or compile this module to .o separately."))
2269
2270 mkSlideB :: Platform -> ByteOff -> ByteOff -> OrdList BCInstr
2271 mkSlideB platform nb db = mkSlideW n d
2272   where
2273     !n = bytesToWords platform nb
2274     !d = bytesToWords platform db
2275
2276 mkSlideW :: WordOff -> WordOff -> OrdList BCInstr
2277 mkSlideW !n !ws
2278     | ws == 0
2279     = nilOL
2280     | otherwise
2281     = unitOL (SLIDE n $ fromIntegral ws)
2282
2283
2284
2285 atomRep :: Platform -> StgArg -> ArgRep
2286 atomRep platform e = toArgRepOrV platform (stgArgRep1 e)
2287
2288 -- | Let szsw be the sizes in bytes of some items pushed onto the stack, which
2289 -- has initial depth @original_depth@.  Return the values which the stack
2290 -- environment should map these items to.
2291 mkStackOffsets :: ByteOff -> [ByteOff] -> [ByteOff]
2292 mkStackOffsets original_depth szsb = tail (scanl' (+) original_depth szsb)
2293
2294 typeArgReps :: Platform -> Type -> [ArgRep]
2295 typeArgReps platform = map (toArgRep platform) . typePrimRep
2296
2297 -- -----------------------------------------------------------------------------
2298 -- The bytecode generator's monad
2299
2300 data BcM_State
2301    = BcM_State
2302         { bcm_hsc_env :: HscEnv
2303         , thisModule  :: Module          -- current module (for breakpoints)
2304         , nextlabel   :: Word32          -- for generating local labels
2305         , ffis        :: [FFIInfo]       -- ffi info blocks, to free later
2306                                          -- Should be free()d when it is GCd
2307         , modBreaks   :: Maybe ModBreaks -- info about breakpoints
2308
2309         , breakInfo   :: IntMap CgBreakInfo -- ^ Info at breakpoint occurrence.
2310                                             -- Indexed with breakpoint *info* index.
2311                                             -- See Note [Breakpoint identifiers]
2312                                             -- in GHC.Types.Breakpoint
2313         , breakInfoIdx :: !Int              -- ^ Next index for breakInfo array
2314         }
2315
2316 newtype BcM r = BcM (BcM_State -> IO (BcM_State, r)) deriving (Functor)
2317
2318 ioToBc :: IO a -> BcM a
2319 ioToBc io = BcM $ \st -> do
2320   x <- io
2321   return (st, x)
2322
2323 runBc :: HscEnv -> Module -> Maybe ModBreaks
2324       -> BcM r
2325       -> IO (BcM_State, r)
2326 runBc hsc_env this_mod modBreaks (BcM m)
2327    = m (BcM_State hsc_env this_mod 0 [] modBreaks IntMap.empty 0)
2328
2329 thenBc :: BcM a -> (a -> BcM b) -> BcM b
2330 thenBc (BcM expr) cont = BcM $ \st0 -> do
2331   (st1, q) <- expr st0
2332   let BcM k = cont q
2333   (st2, r) <- k st1
2334   return (st2, r)
2335
2336 thenBc_ :: BcM a -> BcM b -> BcM b
2337 thenBc_ (BcM expr) (BcM cont) = BcM $ \st0 -> do
2338   (st1, _) <- expr st0
2339   (st2, r) <- cont st1
2340   return (st2, r)
2341
2342 returnBc :: a -> BcM a
2343 returnBc result = BcM $ \st -> (return (st, result))
2344
2345 instance Applicative BcM where
2346     pure = returnBc
2347     (<*>) = ap
2348     (*>) = thenBc_
2349
2350 instance Monad BcM where
2351   (>>=) = thenBc
2352   (>>)  = (*>)
2353
2354 instance HasDynFlags BcM where
2355     getDynFlags = BcM $ \st -> return (st, hsc_dflags (bcm_hsc_env st))
2356
2357 getHscEnv :: BcM HscEnv
2358 getHscEnv = BcM $ \st -> return (st, bcm_hsc_env st)
2359
2360 getProfile :: BcM Profile
2361 getProfile = targetProfile <$> getDynFlags
2362
2363 shouldAddBcoName :: BcM (Maybe Module)
2364 shouldAddBcoName = do
2365   add <- gopt Opt_AddBcoName <$> getDynFlags
2366   if add
2367     then Just <$> getCurrentModule
2368     else return Nothing
2369
2370 emitBc :: ([FFIInfo] -> ProtoBCO Name) -> BcM (ProtoBCO Name)
2371 emitBc bco
2372   = BcM $ \st -> return (st{ffis=[]}, bco (ffis st))
2373
2374 recordFFIBc :: RemotePtr C_ffi_cif -> BcM ()
2375 recordFFIBc a
2376   = BcM $ \st -> return (st{ffis = FFIInfo a : ffis st}, ())
2377
2378 getLabelBc :: BcM LocalLabel
2379 getLabelBc
2380   = BcM $ \st -> do let nl = nextlabel st
2381                     when (nl == maxBound) $
2382                         panic "getLabelBc: Ran out of labels"
2383                     return (st{nextlabel = nl + 1}, LocalLabel nl)
2384
2385 getLabelsBc :: Word32 -> BcM [LocalLabel]
2386 getLabelsBc n
2387   = BcM $ \st -> let ctr = nextlabel st
2388                  in return (st{nextlabel = ctr+n}, coerce [ctr .. ctr+n-1])
2389
2390 newBreakInfo :: CgBreakInfo -> BcM Int
2391 newBreakInfo info = BcM $ \st ->
2392   let ix = breakInfoIdx st
2393       st' = st
2394               { breakInfo = IntMap.insert ix info (breakInfo st)
2395               , breakInfoIdx = ix + 1
2396               }
2397   in return (st', ix)
2398
2399 getCurrentModule :: BcM Module
2400 getCurrentModule = BcM $ \st -> return (st, thisModule st)
2401
2402 getCurrentModBreaks :: BcM (Maybe ModBreaks)
2403 getCurrentModBreaks = BcM $ \st -> return (st, modBreaks st)
2404
2405 tickFS :: FastString
2406 tickFS = fsLit "ticked"