diff --git a/compiler/GHC/Driver/Config/StgToJS.hs b/compiler/GHC/Driver/Config/StgToJS.hs index b2708494ccbb8091ac7bab44a6cf73b242067918..a737f9a242fde186f2e623265e01a62cc48babba 100644 --- a/compiler/GHC/Driver/Config/StgToJS.hs +++ b/compiler/GHC/Driver/Config/StgToJS.hs @@ -1,11 +1,15 @@ module GHC.Driver.Config.StgToJS ( initStgToJSConfig + , initJSLinkConfig ) where import GHC.StgToJS.Types +import GHC.StgToJS.Linker.Types import GHC.Driver.DynFlags +import GHC.Driver.Config.Linker + import GHC.Platform.Ways import GHC.Utils.Outputable @@ -30,4 +34,19 @@ initStgToJSConfig dflags = StgToJSConfig , csRuntimeAssert = False -- settings , csContext = initSDocContext dflags defaultDumpStyle + , csLinkerConfig = initLinkerConfig dflags + } + +-- | Default linker configuration +initJSLinkConfig :: DynFlags -> JSLinkConfig +initJSLinkConfig dflags = JSLinkConfig + { lcNoJSExecutables = False + , lcNoHsMain = False + , lcNoRts = False + , lcNoStats = False + , lcCombineAll = True + , lcForeignRefs = True + , lcForceEmccRts = False + , lcLinkCsources = not (gopt Opt_DisableJsCsources dflags) } + diff --git a/compiler/GHC/Driver/Flags.hs b/compiler/GHC/Driver/Flags.hs index 823700678b514ff749d93934966cbed655ff11ca..3b5cfc1a26e4c6a43ce5db3f0eeb4dd270a8d8d6 100644 --- a/compiler/GHC/Driver/Flags.hs +++ b/compiler/GHC/Driver/Flags.hs @@ -328,6 +328,7 @@ data GeneralFlag -- JavaScript opts | Opt_DisableJsMinifier -- ^ render JavaScript pretty-printed instead of minified (compacted) + | Opt_DisableJsCsources -- ^ don't link C sources (compiled to JS) with Haskell code (compiled to JS) -- profiling opts | Opt_AutoSccsOnIndividualCafs diff --git a/compiler/GHC/Driver/Pipeline.hs b/compiler/GHC/Driver/Pipeline.hs index f0eadb239ffe6df5e693bd250025d8a8b59d7a8d..a8f2576c2e27211d301f3f8c554584bb81eec2eb 100644 --- a/compiler/GHC/Driver/Pipeline.hs +++ b/compiler/GHC/Driver/Pipeline.hs @@ -76,7 +76,6 @@ import GHC.Linker.Static.Utils import GHC.Linker.Types import GHC.StgToJS.Linker.Linker -import GHC.StgToJS.Linker.Types (defaultJSLinkConfig) import GHC.Utils.Outputable import GHC.Utils.Error @@ -440,7 +439,7 @@ link' logger tmpfs fc dflags unit_env batch_attempt_linking mHscMessager hpt -- Don't showPass in Batch mode; doLink will do that for us. case ghcLink dflags of LinkBinary - | backendUseJSLinker (backend dflags) -> linkJSBinary logger fc dflags unit_env obj_files pkg_deps + | backendUseJSLinker (backend dflags) -> linkJSBinary logger tmpfs fc dflags unit_env obj_files pkg_deps | otherwise -> linkBinary logger tmpfs dflags unit_env obj_files pkg_deps LinkStaticLib -> linkStaticLib logger dflags unit_env obj_files pkg_deps LinkDynLib -> linkDynLibCheck logger tmpfs dflags unit_env obj_files pkg_deps @@ -457,14 +456,13 @@ link' logger tmpfs fc dflags unit_env batch_attempt_linking mHscMessager hpt return Succeeded -linkJSBinary :: Logger -> FinderCache -> DynFlags -> UnitEnv -> [FilePath] -> [UnitId] -> IO () -linkJSBinary logger fc dflags unit_env obj_files pkg_deps = do +linkJSBinary :: Logger -> TmpFs -> FinderCache -> DynFlags -> UnitEnv -> [FilePath] -> [UnitId] -> IO () +linkJSBinary logger tmpfs fc dflags unit_env obj_files pkg_deps = do -- we use the default configuration for now. In the future we may expose -- settings to the user via DynFlags. - let lc_cfg = defaultJSLinkConfig + let lc_cfg = initJSLinkConfig dflags let cfg = initStgToJSConfig dflags - let extra_js = mempty - jsLinkBinary fc lc_cfg cfg extra_js logger dflags unit_env obj_files pkg_deps + jsLinkBinary fc lc_cfg cfg logger tmpfs dflags unit_env obj_files pkg_deps linkingNeeded :: Logger -> DynFlags -> UnitEnv -> Bool -> [Linkable] -> [UnitId] -> IO RecompileRequired linkingNeeded logger dflags unit_env staticLink linkables pkg_deps = do @@ -582,7 +580,7 @@ doLink hsc_env o_files = do NoLink -> return () LinkBinary | backendUseJSLinker (backend dflags) - -> linkJSBinary logger fc dflags unit_env o_files [] + -> linkJSBinary logger tmpfs fc dflags unit_env o_files [] | otherwise -> linkBinary logger tmpfs dflags unit_env o_files [] LinkStaticLib -> linkStaticLib logger dflags unit_env o_files [] LinkDynLib -> linkDynLibCheck logger tmpfs dflags unit_env o_files [] diff --git a/compiler/GHC/Driver/Session.hs b/compiler/GHC/Driver/Session.hs index f04e1cd8ffaacd212c7be9cf05239cd146e30693..1191b657c705e1d12ddb73ae17caaeba7377c153 100644 --- a/compiler/GHC/Driver/Session.hs +++ b/compiler/GHC/Driver/Session.hs @@ -1904,6 +1904,7 @@ dynamic_flags_deps = [ ------ JavaScript flags ----------------------------------------------- ++ [ make_ord_flag defFlag "ddisable-js-minifier" (NoArg (setGeneralFlag Opt_DisableJsMinifier)) + , make_ord_flag defFlag "ddisable-js-c-sources" (NoArg (setGeneralFlag Opt_DisableJsCsources)) ] ------ Language flags ------------------------------------------------- diff --git a/compiler/GHC/Runtime/Interpreter/JS.hs b/compiler/GHC/Runtime/Interpreter/JS.hs index ede29c57064c82d7762b4151843fa8a6a9bddd3e..e4f6efa43b9de1f41206f102d88b53f252c7ced8 100644 --- a/compiler/GHC/Runtime/Interpreter/JS.hs +++ b/compiler/GHC/Runtime/Interpreter/JS.hs @@ -186,6 +186,18 @@ spawnJSInterp cfg = do , instExtra = extra } + -- TODO: to support incremental linking of wasm modules (e.g. produced from C + -- sources), we should: + -- + -- 1. link the emcc rts without trimming dead code as we don't know what might + -- be needed later by the Wasm modules we will dynamically load (cf + -- -sMAIN_MODULE). + -- 2. make the RUN_SERVER command wait for the emcc rts to be loaded. + -- 3. link wasm modules with -sSIDE_MODULE + -- 4. add a new command to load side modules with Emscripten's dlopen + -- + -- cf https://emscripten.org/docs/compiling/Dynamic-Linking.html + -- link rts and its deps jsLinkRts logger tmpfs tmp_dir codegen_cfg unit_env inst @@ -213,22 +225,26 @@ jsLinkRts logger tmpfs tmp_dir cfg unit_env inst = do , lcForeignRefs = False -- we don't need foreign references , lcNoJSExecutables = True -- we don't need executables , lcNoHsMain = True -- nor HsMain + , lcForceEmccRts = False -- nor the emcc rts + , lcLinkCsources = False -- we know that there are no C sources to load for the RTS } -- link the RTS and its dependencies (things it uses from `base`, etc.) let link_spec = LinkSpec { lks_unit_ids = [rtsUnitId, ghcInternalUnitId, primUnitId] - , lks_obj_files = mempty , lks_obj_root_filter = const False , lks_extra_roots = mempty - , lks_extra_js = mempty + , lks_objs_hs = mempty + , lks_objs_js = mempty + , lks_objs_cc = mempty } let finder_opts = instFinderOpts (instExtra inst) finder_cache = instFinderCache (instExtra inst) - link_plan <- computeLinkDependencies cfg unit_env link_spec finder_opts finder_cache - jsLinkPlan logger tmpfs tmp_dir link_cfg cfg inst link_plan + ar_cache <- newArchiveCache + link_plan <- computeLinkDependencies cfg unit_env link_spec finder_opts finder_cache ar_cache + jsLinkPlan logger tmpfs tmp_dir ar_cache link_cfg cfg inst link_plan -- | Link JS interpreter jsLinkInterp :: Logger -> TmpFs -> TempDir -> StgToJSConfig -> UnitEnv -> ExtInterpInstance JSInterpExtra -> IO () @@ -241,6 +257,8 @@ jsLinkInterp logger tmpfs tmp_dir cfg unit_env inst = do , lcForeignRefs = False -- we don't need foreign references , lcNoJSExecutables = True -- we don't need executables , lcNoHsMain = True -- nor HsMain + , lcForceEmccRts = False -- nor the emcc rts + , lcLinkCsources = True -- enable C sources, if any } let is_root _ = True -- FIXME: we shouldn't consider every function as a root @@ -258,18 +276,19 @@ jsLinkInterp logger tmpfs tmp_dir cfg unit_env inst = do -- link the interpreter and its dependencies let link_spec = LinkSpec { lks_unit_ids = units - , lks_obj_files = mempty , lks_obj_root_filter = is_root , lks_extra_roots = root_deps - , lks_extra_js = mempty + , lks_objs_hs = mempty + , lks_objs_js = mempty + , lks_objs_cc = mempty } let finder_cache = instFinderCache (instExtra inst) finder_opts = instFinderOpts (instExtra inst) - link_plan <- computeLinkDependencies cfg unit_env link_spec finder_opts finder_cache - - jsLinkPlan logger tmpfs tmp_dir link_cfg cfg inst link_plan + ar_cache <- newArchiveCache + link_plan <- computeLinkDependencies cfg unit_env link_spec finder_opts finder_cache ar_cache + jsLinkPlan logger tmpfs tmp_dir ar_cache link_cfg cfg inst link_plan -- | Link object files @@ -282,6 +301,8 @@ jsLinkObjects logger tmpfs tmp_dir cfg unit_env inst objs is_root = do , lcForeignRefs = False -- we don't need foreign references , lcNoJSExecutables = True -- we don't need executables , lcNoHsMain = True -- nor HsMain + , lcForceEmccRts = False -- nor the emcc rts + , lcLinkCsources = True -- enable C sources, if any } let units = preloadUnits (ue_units unit_env) @@ -290,19 +311,19 @@ jsLinkObjects logger tmpfs tmp_dir cfg unit_env inst objs is_root = do -- compute dependencies let link_spec = LinkSpec { lks_unit_ids = units - , lks_obj_files = fmap ObjFile objs , lks_obj_root_filter = is_root , lks_extra_roots = mempty - , lks_extra_js = mempty + , lks_objs_hs = objs + , lks_objs_js = mempty + , lks_objs_cc = mempty } let finder_opts = instFinderOpts (instExtra inst) finder_cache = instFinderCache (instExtra inst) - link_plan <- computeLinkDependencies cfg unit_env link_spec finder_opts finder_cache - - -- link - jsLinkPlan logger tmpfs tmp_dir link_cfg cfg inst link_plan + ar_cache <- newArchiveCache + link_plan <- computeLinkDependencies cfg unit_env link_spec finder_opts finder_cache ar_cache + jsLinkPlan logger tmpfs tmp_dir ar_cache link_cfg cfg inst link_plan @@ -317,8 +338,8 @@ jsLinkObject logger tmpfs tmp_dir cfg unit_env inst obj roots = do -- | Link the given link plan -- -- Perform incremental linking by removing what is already linked from the plan -jsLinkPlan :: Logger -> TmpFs -> TempDir -> JSLinkConfig -> StgToJSConfig -> ExtInterpInstance JSInterpExtra -> LinkPlan -> IO () -jsLinkPlan logger tmpfs tmp_dir link_cfg cfg inst link_plan = do +jsLinkPlan :: Logger -> TmpFs -> TempDir -> ArchiveCache -> JSLinkConfig -> StgToJSConfig -> ExtInterpInstance JSInterpExtra -> LinkPlan -> IO () +jsLinkPlan logger tmpfs tmp_dir ar_cache link_cfg cfg inst link_plan = do ---------------------------------------------------------------- -- Get already linked stuff and compute incremental plan ---------------------------------------------------------------- @@ -333,7 +354,7 @@ jsLinkPlan logger tmpfs tmp_dir link_cfg cfg inst link_plan = do ---------------------------------------------------------------- tmp_out <- newTempSubDir logger tmpfs tmp_dir - void $ jsLink link_cfg cfg logger tmp_out diff_plan + void $ jsLink link_cfg cfg logger tmpfs ar_cache tmp_out diff_plan -- Code has been linked into the following files: -- - generated rts from tmp_out/rts.js (depends on link options) diff --git a/compiler/GHC/StgToJS/Linker/Linker.hs b/compiler/GHC/StgToJS/Linker/Linker.hs index ba8085c0211fc69742e102ff4f2143ae3047f003..939d3b3c4300d0b6b46a4f379348fd72624bbf0a 100644 --- a/compiler/GHC/StgToJS/Linker/Linker.hs +++ b/compiler/GHC/StgToJS/Linker/Linker.hs @@ -1,6 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE TupleSections #-} {-# LANGUAGE LambdaCase #-} +{-# LANGUAGE BlockArguments #-} ----------------------------------------------------------------------------- -- | @@ -32,10 +33,12 @@ module GHC.StgToJS.Linker.Linker , LinkPlan (..) , emptyLinkPlan , incrementLinkPlan + , ArchiveCache + , newArchiveCache ) where -import Prelude +import GHC.Prelude import GHC.Platform.Host (hostPlatformArchOS) @@ -54,6 +57,7 @@ import GHC.SysTools import GHC.Linker.Static.Utils (exeFileName) import GHC.Linker.Types (Unlinked(..), linkableUnlinked) +import GHC.Linker.External import GHC.StgToJS.Linker.Types import GHC.StgToJS.Linker.Utils @@ -78,7 +82,6 @@ import GHC.Utils.Error import GHC.Utils.Logger (Logger, logVerbAtLeast) import GHC.Utils.Binary import qualified GHC.Utils.Ppr as Ppr -import GHC.Utils.Monad import GHC.Utils.TmpFs import GHC.Types.Unique.Set @@ -105,6 +108,7 @@ import Data.Maybe import Data.Set (Set) import qualified Data.Set as S import Data.Word +import Data.Monoid import System.IO import System.FilePath ((<.>), (</>), dropExtension, takeDirectory) @@ -125,10 +129,10 @@ data LinkerStats = LinkerStats , packedMetaDataSize :: !Word64 -- ^ number of bytes for metadata } -newtype ArchiveState = ArchiveState { loadedArchives :: IORef (Map FilePath Ar.Archive) } +newtype ArchiveCache = ArchiveCache { loadedArchives :: IORef (Map FilePath Ar.Archive) } -emptyArchiveState :: IO ArchiveState -emptyArchiveState = ArchiveState <$> newIORef M.empty +newArchiveCache :: IO ArchiveCache +newArchiveCache = ArchiveCache <$> newIORef M.empty defaultJsContext :: SDocContext defaultJsContext = defaultSDocContext{sdocStyle = PprCode} @@ -137,52 +141,70 @@ jsLinkBinary :: FinderCache -> JSLinkConfig -> StgToJSConfig - -> [FilePath] -> Logger + -> TmpFs -> DynFlags -> UnitEnv -> [FilePath] -> [UnitId] -> IO () -jsLinkBinary finder_cache lc_cfg cfg js_srcs logger dflags unit_env objs dep_units +jsLinkBinary finder_cache lc_cfg cfg logger tmpfs dflags unit_env hs_objs dep_units | lcNoJSExecutables lc_cfg = return () | otherwise = do + -- additional objects to link are passed as FileOption ldInputs... let cmdline_objs = [ f | FileOption _ f <- ldInputs dflags ] - -- discriminate JavaScript sources from real object files. - (cmdline_js_srcs, cmdline_js_objs) <- partitionM isJsFile cmdline_objs + + -- cmdline objects: discriminate between the 3 kinds of objects we have + let disc hss jss ccs = \case + [] -> pure (hss, jss, ccs) + (o:os) -> getObjectKind o >>= \case + Just ObjHs -> disc (o:hss) jss ccs os + Just ObjJs -> disc hss (o:jss) ccs os + Just ObjCc -> disc hss jss (o:ccs) os + Nothing -> do + logInfo logger (vcat [text "Ignoring unexpected command-line object: ", text o]) + disc hss jss ccs os + (cmdline_hs_objs, cmdline_js_objs, cmdline_cc_objs) <- disc [] [] [] cmdline_objs + let - objs' = map ObjFile (objs ++ cmdline_js_objs) - js_srcs' = js_srcs ++ cmdline_js_srcs - is_root _ = True -- FIXME: we shouldn't consider every function as a root, - -- but only the program entry point (main), either the - -- generated one or coming from an object - exe = jsExeFileName dflags + exe = jsExeFileName dflags + all_hs_objs = hs_objs ++ cmdline_hs_objs + all_js_objs = cmdline_js_objs + all_cc_objs = cmdline_cc_objs + is_root _ = True + -- FIXME: we shouldn't consider every function as a root, + -- but only the program entry point (main), either the + -- generated one or coming from an object -- compute dependencies let link_spec = LinkSpec { lks_unit_ids = dep_units - , lks_obj_files = objs' , lks_obj_root_filter = is_root , lks_extra_roots = mempty - , lks_extra_js = js_srcs' + , lks_objs_hs = all_hs_objs + , lks_objs_js = all_js_objs + , lks_objs_cc = all_cc_objs } let finder_opts = initFinderOpts dflags + ar_cache <- newArchiveCache - link_plan <- computeLinkDependencies cfg unit_env link_spec finder_opts finder_cache + link_plan <- computeLinkDependencies cfg unit_env link_spec finder_opts finder_cache ar_cache - void $ jsLink lc_cfg cfg logger exe link_plan + void $ jsLink lc_cfg cfg logger tmpfs ar_cache exe link_plan -- | link and write result to disk (jsexe directory) jsLink :: JSLinkConfig -> StgToJSConfig -> Logger + -> TmpFs + -> ArchiveCache -> FilePath -- ^ output file/directory -> LinkPlan -> IO () -jsLink lc_cfg cfg logger out link_plan = do +jsLink lc_cfg cfg logger tmpfs ar_cache out link_plan = do -- create output directory createDirectoryIfMissing False out @@ -194,11 +216,11 @@ jsLink lc_cfg cfg logger out link_plan = do -- link all Haskell code (program + dependencies) into out.js -- retrieve code for Haskell dependencies - mods <- collectModuleCodes link_plan + mods <- collectModuleCodes ar_cache link_plan -- LTO + rendering of JS code link_stats <- withBinaryFile (out </> "out.js") WriteMode $ \h -> - renderLinker h (csPrettyRender cfg) mods (lkp_extra_js link_plan) + renderModules h (csPrettyRender cfg) mods ------------------------------------------------------------- @@ -224,49 +246,128 @@ jsLink lc_cfg cfg logger out link_plan = do void $ hPutJS (csPrettyRender cfg) h (jsOptimize $ runJSM jsm $ jStgStatToJS <$> rts cfg) - -- link dependencies' JS files into lib.js - withBinaryFile (out </> "lib.js") WriteMode $ \h -> do - forM_ (lkp_archives link_plan) $ \archive_file -> do - Ar.Archive entries <- Ar.loadAr archive_file - forM_ entries $ \entry -> do - case getJsArchiveEntry entry of - Nothing -> return () - Just bs -> do - B.hPut h bs + -- link user-provided JS files into lib.js + (emcc_opts,lib_cc_objs) <- withBinaryFile (out </> "lib.js") WriteMode $ \h -> do + + let + tmp_dir = linkerTempDir (csLinkerConfig cfg) + + -- JS objects from dependencies' archives (.a) + go_archives emcc_opts cc_objs = \case + [] -> pure (emcc_opts, cc_objs) + (a:as) -> do + Ar.Archive entries <- loadArchive ar_cache a + (emcc_opts', cc_objs') <- go_entries emcc_opts cc_objs entries + go_archives emcc_opts' cc_objs' as + + -- archive's entries + go_entries emcc_opts cc_objs = \case + [] -> pure (emcc_opts, cc_objs) + (e:es) -> case getObjectKindBS (Ar.filedata e) of + Just ObjHs -> do + -- Nothing to do. HS objects are collected in + -- collectModuleCodes + go_entries emcc_opts cc_objs es + Just ObjCc -> do + -- extract the object file from the archive in a temporary + -- file and return its path + cc_obj_fn <- newTempName logger tmpfs tmp_dir TFL_CurrentModule "o" + B.writeFile cc_obj_fn (Ar.filedata e) + let cc_objs' = cc_obj_fn:cc_objs + go_entries emcc_opts cc_objs' es + Just ObjJs -> do + -- extract the JS code and append it to the `lib.js` file + (opts,bs) <- parseJSObjectBS (Ar.filedata e) + B.hPut h bs + hPutChar h '\n' + let emcc_opts' = emcc_opts <> opts + go_entries emcc_opts' cc_objs es + Nothing -> do + logInfo logger (vcat [text "Ignoring unexpected archive entry: ", text (Ar.filename e)]) + go_entries emcc_opts cc_objs es + + -- additional JS objects (e.g. from the command-line) + go_extra emcc_opts = \case + [] -> pure emcc_opts + (e:es) -> do + (opts,bs) <- readJSObject e + B.hPut h bs hPutChar h '\n' + let emcc_opts' = emcc_opts <> opts + go_extra emcc_opts' es + + -- archives + (emcc_opts0, cc_objs) <- go_archives defaultJSOptions [] (S.toList (lkp_archives link_plan)) + -- extra object files + emcc_opts1 <- go_extra emcc_opts0 (S.toList (lkp_objs_js link_plan)) + pure (emcc_opts1,cc_objs) + + + -- Link Cc objects using emcc's linker + -- + -- Cc objects have been extracted from archives (see above) and are listed + -- in lib_cc_objs. + -- + -- We don't link C sources if there are none (obviously) or if asked + -- explicitly by the user with -ddisable-js-c-sources (mostly used for + -- debugging purpose). + let emcc_objs = lib_cc_objs ++ S.toList (lkp_objs_cc link_plan) + let has_emcc_objs = not (null emcc_objs) + let link_c_sources = lcLinkCsources lc_cfg && has_emcc_objs + + when link_c_sources $ do + + runLink logger tmpfs (csLinkerConfig cfg) $ + [ Option "-o" + , FileOption "" (out </> "clibs.js") + -- Embed wasm files into a single .js file + , Option "-sSINGLE_FILE=1" + -- Enable support for addFunction (callbacks) + , Option "-sALLOW_TABLE_GROWTH" + -- keep some RTS methods and functions (otherwise removed as dead + -- code) + , Option ("-sEXPORTED_RUNTIME_METHODS=" ++ concat (intersperse "," (emccExportedRuntimeMethods emcc_opts))) + , Option ("-sEXPORTED_FUNCTIONS=" ++ concat (intersperse "," (emccExportedFunctions emcc_opts))) + ] + -- pass extra options from JS files' pragmas + ++ map Option (emccExtraOptions emcc_opts) + -- link objects + ++ map (FileOption "") emcc_objs + + -- Don't enable the Emcc rts when not needed (i.e. no Wasm module to link + -- with) and not forced by the caller (e.g. in the future iserv may require + -- incremental linking of Wasm modules, hence the emcc rts even building + -- iserv itself doesn't require the emcc rts) + let use_emcc_rts = UseEmccRts $ link_c_sources || lcForceEmccRts lc_cfg + -- link everything together into a runnable all.js -- only if we link a complete application, -- no incremental linking and no skipped parts when (lcCombineAll lc_cfg && not (lcNoRts lc_cfg)) $ do - _ <- combineFiles lc_cfg out + writeRunMain out use_emcc_rts + _ <- combineFiles lc_cfg link_c_sources out writeHtml out - writeRunMain out writeRunner lc_cfg out writeExterns out data LinkSpec = LinkSpec { lks_unit_ids :: [UnitId] - - , lks_obj_files :: [LinkedObj] - - , lks_obj_root_filter :: ExportedFun -> Bool - -- ^ Predicate for exported functions in objects to declare as root - - , lks_extra_roots :: Set ExportedFun - -- ^ Extra root functions from loaded units - - , lks_extra_js :: [FilePath] - -- ^ Extra JS files to link + , lks_obj_root_filter :: ExportedFun -> Bool -- ^ Predicate for exported functions in objects to declare as root + , lks_extra_roots :: Set ExportedFun -- ^ Extra root functions from loaded units + , lks_objs_hs :: [FilePath] -- ^ HS objects to link + , lks_objs_js :: [FilePath] -- ^ JS objects to link + , lks_objs_cc :: [FilePath] -- ^ Cc objects to link } instance Outputable LinkSpec where ppr s = hang (text "LinkSpec") 2 $ vcat [ hcat [text "Unit ids: ", ppr (lks_unit_ids s)] - , hcat [text "Object files:", ppr (lks_obj_files s)] + , hcat [text "HS objects:", vcat (fmap text (lks_objs_hs s))] + , hang (text "JS objects::") 2 (vcat (fmap text (lks_objs_js s))) + , hang (text "Cc objects::") 2 (vcat (fmap text (lks_objs_cc s))) , text "Object root filter: <function>" , hcat [text "Extra roots: ", ppr (lks_extra_roots s)] - , hang (text "Extra JS:") 2 (vcat (fmap text (lks_extra_js s))) ] emptyLinkPlan :: LinkPlan @@ -274,7 +375,8 @@ emptyLinkPlan = LinkPlan { lkp_block_info = mempty , lkp_dep_blocks = mempty , lkp_archives = mempty - , lkp_extra_js = mempty + , lkp_objs_js = mempty + , lkp_objs_cc = mempty } -- | Given a `base` link plan (assumed to be already linked) and a `new` link @@ -289,13 +391,15 @@ incrementLinkPlan base new = (diff,total) { lkp_block_info = M.union (lkp_block_info base) (lkp_block_info new) , lkp_dep_blocks = S.union (lkp_dep_blocks base) (lkp_dep_blocks new) , lkp_archives = S.union (lkp_archives base) (lkp_archives new) - , lkp_extra_js = S.union (lkp_extra_js base) (lkp_extra_js new) + , lkp_objs_js = S.union (lkp_objs_js base) (lkp_objs_js new) + , lkp_objs_cc = S.union (lkp_objs_cc base) (lkp_objs_cc new) } diff = LinkPlan { lkp_block_info = lkp_block_info new -- block info from "new" contains all we need to load new blocks , lkp_dep_blocks = S.difference (lkp_dep_blocks new) (lkp_dep_blocks base) , lkp_archives = S.difference (lkp_archives new) (lkp_archives base) - , lkp_extra_js = S.difference (lkp_extra_js new) (lkp_extra_js base) + , lkp_objs_js = S.difference (lkp_objs_js new) (lkp_objs_js base) + , lkp_objs_cc = S.difference (lkp_objs_cc new) (lkp_objs_cc base) } @@ -305,11 +409,14 @@ computeLinkDependencies -> LinkSpec -> FinderOpts -> FinderCache + -> ArchiveCache -> IO LinkPlan -computeLinkDependencies cfg unit_env link_spec finder_opts finder_cache = do +computeLinkDependencies cfg unit_env link_spec finder_opts finder_cache ar_cache = do let units = lks_unit_ids link_spec - let obj_files = lks_obj_files link_spec + let hs_objs = lks_objs_hs link_spec + let js_objs = lks_objs_js link_spec + let cc_objs = lks_objs_cc link_spec let extra_roots = lks_extra_roots link_spec let obj_is_root = lks_obj_root_filter link_spec @@ -323,7 +430,7 @@ computeLinkDependencies cfg unit_env link_spec finder_opts finder_cache = do -- find/load linkable on-demand when a module is missing. - (objs_block_info, objs_required_blocks) <- loadObjBlockInfo obj_files + (objs_block_info, objs_required_blocks) <- loadObjBlockInfo hs_objs let obj_roots = S.fromList . filter obj_is_root $ concatMap (M.keys . bi_exports . lbi_info) (M.elems objs_block_info) obj_units = map moduleUnitId $ nub (M.keys objs_block_info) @@ -343,7 +450,7 @@ computeLinkDependencies cfg unit_env link_spec finder_opts finder_cache = do let all_units = fmap unitId all_units_infos dep_archives <- getPackageArchives cfg unit_env all_units - (archives_block_info, archives_required_blocks) <- loadArchiveBlockInfo dep_archives + (archives_block_info, archives_required_blocks) <- loadArchiveBlockInfo ar_cache dep_archives -- compute dependencies let block_info = objs_block_info `M.union` archives_block_info @@ -378,7 +485,7 @@ computeLinkDependencies cfg unit_env link_spec finder_opts finder_cache = do case linkableUnlinked linkable of [DotO p] -> do - (bis, req_b) <- loadObjBlockInfo [ObjFile p] + (bis, req_b) <- loadObjBlockInfo [p] -- Store new required blocks in IORef modifyIORef new_required_blocks_var ((++) req_b) case M.lookup mod bis of @@ -405,7 +512,8 @@ computeLinkDependencies cfg unit_env link_spec finder_opts finder_cache = do { lkp_block_info = updated_block_info , lkp_dep_blocks = all_deps , lkp_archives = S.fromList dep_archives - , lkp_extra_js = S.fromList (lks_extra_js link_spec) + , lkp_objs_js = S.fromList js_objs + , lkp_objs_cc = S.fromList cc_objs } return plan @@ -450,19 +558,17 @@ hPutJS render_pretty h = \case pure $! (after - before) -- | Link modules and pretty-print them into the given Handle -renderLinker +renderModules :: Handle -> Bool -- ^ should we render readable JS for debugging? -> [ModuleCode] -- ^ linked code per module - -> Set FilePath -- ^ additional JS files -> IO LinkerStats -renderLinker h render_pretty mods js_files = do +renderModules h render_pretty mods = do -- link modules let (compacted_mods, meta) = linkModules mods let - putBS = B.hPut h putJS = hPutJS render_pretty h --------------------------------------------------------- @@ -482,13 +588,10 @@ renderLinker h render_pretty mods js_files = do !meta_length <- fromIntegral <$> putJS (jsOptimize meta) -- module exports - mapM_ (putBS . cmc_exports) compacted_mods - - -- explicit additional JS files - mapM_ (\i -> B.readFile i >>= putBS) (S.toList js_files) + mapM_ (B.hPut h . cmc_exports) compacted_mods -- stats - let link_stats = LinkerStats + let !link_stats = LinkerStats { bytesPerModule = M.fromList mod_sizes , packedMetaDataSize = meta_length } @@ -547,15 +650,20 @@ getPackageArchives cfg unit_env units = -- | Combine rts.js, lib.js, out.js to all.js that can be run -- directly with node.js or SpiderMonkey jsshell combineFiles :: JSLinkConfig + -> Bool -- has clibs.js -> FilePath -> IO () -combineFiles cfg fp = do - let files = map (fp </>) ["rts.js", "lib.js", "out.js"] - withBinaryFile (fp </> "all.js") WriteMode $ \h -> do - let cpy i = B.readFile i >>= B.hPut h - mapM_ cpy files - unless (lcNoHsMain cfg) $ do - B.hPut h runMainJS +combineFiles cfg has_clibs fp = do + let files = map (fp </>) $ catMaybes + [ Just "rts.js" + , Just "lib.js" + , Just "out.js" + , if has_clibs then Just "clibs.js" else Nothing + , if lcNoHsMain cfg then Nothing else Just "runmain.js" + ] + withBinaryFile (fp </> "all.js") WriteMode $ \h -> + forM_ files $ \i -> + B.readFile i >>= B.hPut h -- | write the index.html file that loads the program if it does not exit writeHtml @@ -583,15 +691,21 @@ templateHtml = -- index.html is loaded writeRunMain :: FilePath -- ^ output directory + -> UseEmccRts -> IO () -writeRunMain out = do +writeRunMain out use_emcc_rts = do let runMainFile = out </> "runmain.js" - e <- doesFileExist runMainFile - unless e $ - B.writeFile runMainFile runMainJS + B.writeFile runMainFile (runMainJS use_emcc_rts) -runMainJS :: B.ByteString -runMainJS = "h$main(h$mainZCZCMainzimain);\n" +newtype UseEmccRts = UseEmccRts Bool + +runMainJS :: UseEmccRts -> B.ByteString +runMainJS (UseEmccRts use_emcc_rts) = if use_emcc_rts + then "Module['onRuntimeInitialized'] = function() {\n\ + \h$initEmscriptenHeap();\n\ + \h$main(h$mainZCZCMainzimain);\n\ + \}\n" + else "h$main(h$mainZCZCMainzimain);\n" writeRunner :: JSLinkConfig -- ^ Settings -> FilePath -- ^ Output directory @@ -711,8 +825,8 @@ getDeps init_infos load_info root_funs root_blocks = traverse_funs init_infos S. in open `S.union` S.fromList (filter (not . alreadyLinked) new_blocks) -- | collect dependencies for a set of roots -collectModuleCodes :: LinkPlan -> IO [ModuleCode] -collectModuleCodes link_plan = do +collectModuleCodes :: ArchiveCache -> LinkPlan -> IO [ModuleCode] +collectModuleCodes ar_cache link_plan = do let block_info = lkp_block_info link_plan let blocks = lkp_dep_blocks link_plan @@ -738,13 +852,12 @@ collectModuleCodes link_plan = do sorted_module_blocks = sortBy cmp (M.toList module_blocks) -- load blocks - ar_state <- emptyArchiveState forM sorted_module_blocks $ \(mod,bids) -> do case M.lookup mod block_info of Nothing -> pprPanic "collectModuleCodes: couldn't find block info for module" (ppr mod) - Just lbi -> extractBlocks ar_state lbi bids + Just lbi -> extractBlocks ar_cache lbi bids -extractBlocks :: ArchiveState -> LocatedBlockInfo -> BlockIds -> IO ModuleCode +extractBlocks :: ArchiveCache -> LocatedBlockInfo -> BlockIds -> IO ModuleCode extractBlocks ar_state lbi blocks = do case lbi_loc lbi of ObjectFile fp -> do @@ -771,16 +884,22 @@ extractBlocks ar_state lbi blocks = do , mc_frefs = concatMap oiFImports l } -readArObject :: ArchiveState -> Module -> FilePath -> IO Object -readArObject ar_state mod ar_file = do - loaded_ars <- readIORef (loadedArchives ar_state) - (Ar.Archive entries) <- case M.lookup ar_file loaded_ars of +-- | Load an archive in memory and store it in the cache for future loads. +loadArchive :: ArchiveCache -> FilePath -> IO Ar.Archive +loadArchive ar_cache ar_file = do + loaded_ars <- readIORef (loadedArchives ar_cache) + case M.lookup ar_file loaded_ars of Just a -> pure a Nothing -> do a <- Ar.loadAr ar_file - modifyIORef (loadedArchives ar_state) (M.insert ar_file a) + modifyIORef (loadedArchives ar_cache) (M.insert ar_file a) pure a + +readArObject :: ArchiveCache -> Module -> FilePath -> IO Object +readArObject ar_cache mod ar_file = do + Ar.Archive entries <- loadArchive ar_cache ar_file + -- look for the right object in archive let go_entries = \case -- XXX this shouldn't be an exception probably @@ -888,15 +1007,16 @@ mkExportedModFuns mod symbols = map mk_fun symbols mk_fun sym = ExportedFun mod (LexicalFastString sym) -- | read all dependency data from the to-be-linked files -loadObjBlockInfo :: [LinkedObj] -- ^ object files to link - -> IO (Map Module LocatedBlockInfo, [BlockRef]) +loadObjBlockInfo + :: [FilePath] -- ^ object files to link + -> IO (Map Module LocatedBlockInfo, [BlockRef]) loadObjBlockInfo objs = (prepareLoadedDeps . catMaybes) <$> mapM readBlockInfoFromObj objs -- | Load dependencies for the Linker from Ar -loadArchiveBlockInfo :: [FilePath] -> IO (Map Module LocatedBlockInfo, [BlockRef]) -loadArchiveBlockInfo archives = do +loadArchiveBlockInfo :: ArchiveCache -> [FilePath] -> IO (Map Module LocatedBlockInfo, [BlockRef]) +loadArchiveBlockInfo ar_cache archives = do archDeps <- forM archives $ \file -> do - (Ar.Archive entries) <- Ar.loadAr file + (Ar.Archive entries) <- loadArchive ar_cache file catMaybes <$> mapM (readEntry file) entries return (prepareLoadedDeps $ concat archDeps) where @@ -911,34 +1031,6 @@ loadArchiveBlockInfo archives = do let !info = objBlockInfo obj pure $ Just (LocatedBlockInfo (ArchiveFile ar_file) info) --- | Predicate to check that an entry in Ar is a JS source --- and to return it without its header -getJsArchiveEntry :: Ar.ArchiveEntry -> Maybe B.ByteString -getJsArchiveEntry entry = getJsBS (Ar.filedata entry) - --- | Predicate to check that a file is a JS source -isJsFile :: FilePath -> IO Bool -isJsFile fp = withBinaryFile fp ReadMode $ \h -> do - bs <- B.hGet h jsHeaderLength - pure (isJsBS bs) - -isJsBS :: B.ByteString -> Bool -isJsBS bs = isJust (getJsBS bs) - --- | Get JS source with its header (if it's one) -getJsBS :: B.ByteString -> Maybe B.ByteString -getJsBS bs = B.stripPrefix jsHeader bs - --- Header added to JS sources to discriminate them from other object files. --- They all have .o extension but JS sources have this header. -jsHeader :: B.ByteString -jsHeader = "//JavaScript" - -jsHeaderLength :: Int -jsHeaderLength = B.length jsHeader - - - prepareLoadedDeps :: [LocatedBlockInfo] -> (Map Module LocatedBlockInfo, [BlockRef]) prepareLoadedDeps lbis = (module_blocks, must_link) @@ -956,25 +1048,21 @@ requiredBlocks d = map mk_block_ref (IS.toList $ bi_must_link d) -- | read block info from an object that might have already been into memory -- pulls in all Deps from an archive -readBlockInfoFromObj :: LinkedObj -> IO (Maybe LocatedBlockInfo) -readBlockInfoFromObj = \case - ObjLoaded name obj -> do - let !info = objBlockInfo obj - pure $ Just (LocatedBlockInfo (InMemory name obj) info) - ObjFile file -> do - readObjectBlockInfo file >>= \case - Nothing -> pure Nothing - Just info -> pure $ Just (LocatedBlockInfo (ObjectFile file) info) - - --- | Embed a JS file into a .o file --- --- The JS file is merely copied into a .o file with an additional header --- ("//Javascript") in order to be recognized later on. +readBlockInfoFromObj :: FilePath -> IO (Maybe LocatedBlockInfo) +readBlockInfoFromObj file = do + readObjectBlockInfo file >>= \case + Nothing -> pure Nothing + Just info -> pure $ Just (LocatedBlockInfo (ObjectFile file) info) + + +-- | Embed a JS file into a JS object .o file -- -- JS files may contain option pragmas of the form: //#OPTIONS: --- For now, only the CPP option is supported. If the CPP option is set, we --- append some common CPP definitions to the file and call cpp on it. +-- One of those is //#OPTIONS:CPP. When it is set, we append some common CPP +-- definitions to the file and call cpp on it. +-- +-- Other options (e.g. EMCC additional flags for link time) are stored in the +-- JS object header. See JSOptions. embedJsFile :: Logger -> DynFlags -> TmpFs -> UnitEnv -> FilePath -> FilePath -> IO () embedJsFile logger dflags tmpfs unit_env input_fn output_fn = do let profiling = False -- FIXME: add support for profiling way @@ -984,12 +1072,14 @@ embedJsFile logger dflags tmpfs unit_env input_fn output_fn = do -- the header lets the linker recognize processed JavaScript files -- But don't add JavaScript header to object files! - -- header appended to JS files stored as .o to recognize them. - let header = "//JavaScript\n" - jsFileNeedsCpp input_fn >>= \case - False -> copyWithHeader header input_fn output_fn - True -> do + -- read pragmas from JS file + -- we need to store them explicitly as they can be removed by CPP. + opts <- getOptionsFromJsFile input_fn + -- run CPP if needed + cpp_fn <- case enableCPP opts of + False -> pure input_fn + True -> do -- append common CPP definitions to the .js file. -- They define macros that avoid directly wiring zencoded names -- in RTS JS files @@ -1011,13 +1101,11 @@ embedJsFile logger dflags tmpfs unit_env input_fn output_fn = do cpp_opts pp_fn js_fn - -- add header to recognize the object as a JS file - copyWithHeader header js_fn output_fn + pure js_fn -jsFileNeedsCpp :: FilePath -> IO Bool -jsFileNeedsCpp fn = do - opts <- getOptionsFromJsFile fn - pure (CPP `elem` opts) + -- write JS object + cpp_bs <- B.readFile cpp_fn + writeJSObject opts cpp_bs output_fn -- | Link module codes. -- diff --git a/compiler/GHC/StgToJS/Linker/Types.hs b/compiler/GHC/StgToJS/Linker/Types.hs index 7f4cc683b96d49aa58193ecd940d444b49498aa4..fa6d64192b09aac0cb84f1886843198a5c05a46a 100644 --- a/compiler/GHC/StgToJS/Linker/Types.hs +++ b/compiler/GHC/StgToJS/Linker/Types.hs @@ -18,8 +18,6 @@ module GHC.StgToJS.Linker.Types ( JSLinkConfig (..) - , defaultJSLinkConfig - , LinkedObj (..) , LinkPlan (..) ) where @@ -27,7 +25,7 @@ where import GHC.StgToJS.Object import GHC.Unit.Types -import GHC.Utils.Outputable (hsep,Outputable(..),text,ppr, hang, IsDoc (vcat), IsLine (hcat)) +import GHC.Utils.Outputable (Outputable(..),text,ppr, hang, IsDoc (vcat), IsLine (hcat)) import Data.Map.Strict (Map) import Data.Set (Set) @@ -42,23 +40,18 @@ import Prelude -------------------------------------------------------------------------------- data JSLinkConfig = JSLinkConfig - { lcNoJSExecutables :: !Bool -- ^ Dont' build JS executables - , lcNoHsMain :: !Bool -- ^ Don't generate Haskell main entry - , lcNoRts :: !Bool -- ^ Don't dump the generated RTS - , lcNoStats :: !Bool -- ^ Disable .stats file generation - , lcForeignRefs :: !Bool -- ^ Dump .frefs (foreign references) files - , lcCombineAll :: !Bool -- ^ Generate all.js (combined js) + wrappers - } - --- | Default linker configuration -defaultJSLinkConfig :: JSLinkConfig -defaultJSLinkConfig = JSLinkConfig - { lcNoJSExecutables = False - , lcNoHsMain = False - , lcNoRts = False - , lcNoStats = False - , lcCombineAll = True - , lcForeignRefs = True + { lcNoJSExecutables :: !Bool -- ^ Dont' build JS executables + , lcNoHsMain :: !Bool -- ^ Don't generate Haskell main entry + , lcNoRts :: !Bool -- ^ Don't dump the generated RTS + , lcNoStats :: !Bool -- ^ Disable .stats file generation + , lcForeignRefs :: !Bool -- ^ Dump .frefs (foreign references) files + , lcCombineAll :: !Bool -- ^ Generate all.js (combined js) + wrappers + , lcForceEmccRts :: !Bool + -- ^ Force the link with the emcc rts. Use this if you plan to dynamically + -- load wasm modules made from C files (e.g. in iserv). + , lcLinkCsources :: !Bool + -- ^ Link C sources (compiled to JS/Wasm) with Haskell code compiled to + -- JS. This implies the use of the Emscripten RTS to load this code. } data LinkPlan = LinkPlan @@ -68,11 +61,15 @@ data LinkPlan = LinkPlan , lkp_dep_blocks :: Set BlockRef -- ^ Blocks to link - , lkp_archives :: Set FilePath - -- ^ Archives to load JS sources from + , lkp_archives :: !(Set FilePath) + -- ^ Archives to load JS and Cc sources from (JS code corresponding to + -- Haskell code is handled with blocks above) - , lkp_extra_js :: Set FilePath - -- ^ Extra JS files to link + , lkp_objs_js :: !(Set FilePath) + -- ^ JS objects to link + + , lkp_objs_cc :: !(Set FilePath) + -- ^ Cc objects to link } instance Outputable LinkPlan where @@ -81,20 +78,7 @@ instance Outputable LinkPlan where -- plan, just meta info used to retrieve actual block contents -- [ hcat [ text "Block info: ", ppr (lkp_block_info s)] [ hcat [ text "Blocks: ", ppr (S.size (lkp_dep_blocks s))] - , hang (text "JS files from archives:") 2 (vcat (fmap text (S.toList (lkp_archives s)))) - , hang (text "Extra JS:") 2 (vcat (fmap text (S.toList (lkp_extra_js s)))) + , hang (text "Archives:") 2 (vcat (fmap text (S.toList (lkp_archives s)))) + , hang (text "Extra JS objects:") 2 (vcat (fmap text (S.toList (lkp_objs_js s)))) + , hang (text "Extra Cc objects:") 2 (vcat (fmap text (S.toList (lkp_objs_cc s)))) ] - --------------------------------------------------------------------------------- --- Linker Environment --------------------------------------------------------------------------------- - --- | An object file that's either already in memory (with name) or on disk -data LinkedObj - = ObjFile FilePath -- ^ load from this file - | ObjLoaded String Object -- ^ already loaded: description and payload - -instance Outputable LinkedObj where - ppr = \case - ObjFile fp -> hsep [text "ObjFile", text fp] - ObjLoaded s o -> hsep [text "ObjLoaded", text s, ppr (objModuleName o)] diff --git a/compiler/GHC/StgToJS/Linker/Utils.hs b/compiler/GHC/StgToJS/Linker/Utils.hs index 128cb4ad704d2f03f531e3f8045c9164eabb9f2f..94361bb9b915bd4c8cf7ce93a836fa3b8b143f6a 100644 --- a/compiler/GHC/StgToJS/Linker/Utils.hs +++ b/compiler/GHC/StgToJS/Linker/Utils.hs @@ -17,9 +17,7 @@ ----------------------------------------------------------------------------- module GHC.StgToJS.Linker.Utils - ( getOptionsFromJsFile - , JSOption(..) - , jsExeFileName + ( jsExeFileName , getInstalledPackageLibDirs , getInstalledPackageHsLibs , commonCppDefs @@ -41,11 +39,7 @@ import GHC.StgToJS.Types import Prelude import GHC.Platform -import GHC.Utils.Misc import Data.List (isPrefixOf) -import System.IO -import Data.Char (isSpace) -import qualified Control.Exception as Exception import GHC.Builtin.Types import Language.Haskell.Syntax.Basic @@ -253,6 +247,10 @@ genCommonCppDefs profiling = mconcat , "#define RETURN_UBX_TUP9(x1,x2,x3,x4,x5,x6,x7,x8,x9) { h$ret1 = (x2); h$ret2 = (x3); h$ret3 = (x4); h$ret4 = (x5); h$ret5 = (x6); h$ret6 = (x7); h$ret7 = (x8); h$ret8 = (x9); return (x1); }\n" , "#define RETURN_UBX_TUP10(x1,x2,x3,x4,x5,x6,x7,x8,x9,x10) { h$ret1 = (x2); h$ret2 = (x3); h$ret3 = (x4); h$ret4 = (x5); h$ret5 = (x6); h$ret6 = (x7); h$ret7 = (x8); h$ret8 = (x9); h$ret9 = (x10); return (x1); }\n" + , "#define RETURN_INT64(h,l) RETURN_UBX_TUP2((h)|0,(l)>>>0)\n" + , "#define RETURN_WORD64(h,l) RETURN_UBX_TUP2((h)>>>0,(l)>>>0)\n" + , "#define RETURN_ADDR(a,o) RETURN_UBX_TUP2(a,o)\n" + , "#define CALL_UBX_TUP2(r1,r2,c) { (r1) = (c); (r2) = h$ret1; }\n" , "#define CALL_UBX_TUP3(r1,r2,r3,c) { (r1) = (c); (r2) = h$ret1; (r3) = h$ret2; }\n" , "#define CALL_UBX_TUP4(r1,r2,r3,r4,c) { (r1) = (c); (r2) = h$ret1; (r3) = h$ret2; (r4) = h$ret3; }\n" @@ -282,38 +280,3 @@ jsExeFileName dflags dropPrefix prefix xs | prefix `isPrefixOf` xs = drop (length prefix) xs | otherwise = xs - - --- | Parse option pragma in JS file -getOptionsFromJsFile :: FilePath -- ^ Input file - -> IO [JSOption] -- ^ Parsed options, if any. -getOptionsFromJsFile filename - = Exception.bracket - (openBinaryFile filename ReadMode) - hClose - getJsOptions - -data JSOption = CPP deriving (Eq, Ord) - -getJsOptions :: Handle -> IO [JSOption] -getJsOptions handle = do - hSetEncoding handle utf8 - prefix' <- B.hGet handle prefixLen - if prefix == prefix' - then parseJsOptions <$> hGetLine handle - else pure [] - where - prefix :: B.ByteString - prefix = "//#OPTIONS:" - prefixLen = B.length prefix - -parseJsOptions :: String -> [JSOption] -parseJsOptions xs = go xs - where - trim = dropWhileEndLE isSpace . dropWhile isSpace - go [] = [] - go xs = let (tok, rest) = break (== ',') xs - tok' = trim tok - rest' = drop 1 rest - in if | tok' == "CPP" -> CPP : go rest' - | otherwise -> go rest' diff --git a/compiler/GHC/StgToJS/Object.hs b/compiler/GHC/StgToJS/Object.hs index 5ebca241139f9ae785bff0c363025f2d84d37c82..e0b530b605fd9273aa92651240e4cde328adcd68 100644 --- a/compiler/GHC/StgToJS/Object.hs +++ b/compiler/GHC/StgToJS/Object.hs @@ -4,6 +4,8 @@ {-# LANGUAGE Rank2Types #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE MultiWayIf #-} -- only for DB.Binary instances on Module {-# OPTIONS_GHC -fno-warn-orphans #-} @@ -21,28 +23,23 @@ -- Stability : experimental -- -- Serialization/deserialization of binary .o files for the JavaScript backend --- The .o files contain dependency information and generated code. --- All strings are mapped to a central string table, which helps reduce --- file size and gives us efficient hash consing on read --- --- Binary intermediate JavaScript object files: --- serialized [Text] -> ([ClosureInfo], JStat) blocks --- --- file layout: --- - magic "GHCJSOBJ" --- - compiler version tag --- - module name --- - offsets of string table --- - dependencies --- - offset of the index --- - unit infos --- - index --- - string table -- ----------------------------------------------------------------------------- module GHC.StgToJS.Object - ( putObject + ( ObjectKind(..) + , getObjectKind + , getObjectKindBS + -- * JS object + , JSOptions(..) + , defaultJSOptions + , getOptionsFromJsFile + , writeJSObject + , readJSObject + , parseJSObject + , parseJSObjectBS + -- * HS object + , putObject , getObjectHeader , getObjectBody , getObject @@ -51,7 +48,6 @@ module GHC.StgToJS.Object , readObjectBlocks , readObjectBlockInfo , isGlobalBlock - , isJsObjectFile , Object(..) , IndexEntry(..) , LocatedBlockInfo (..) @@ -74,12 +70,14 @@ import Data.Int import Data.IntSet (IntSet) import qualified Data.IntSet as IS import Data.List (sortOn) +import qualified Data.List as List import Data.Map (Map) import qualified Data.Map as M import Data.Word -import Data.Char -import Foreign.Storable -import Foreign.Marshal.Array +import Data.Semigroup +import qualified Data.ByteString as B +import qualified Data.ByteString.Unsafe as B +import Data.Char (isSpace) import System.IO import GHC.Settings.Constants (hiVersion) @@ -97,8 +95,76 @@ import GHC.Types.Unique.Map import GHC.Utils.Binary hiding (SymbolTable) import GHC.Utils.Outputable (ppr, Outputable, hcat, vcat, text, hsep) import GHC.Utils.Monad (mapMaybeM) +import GHC.Utils.Panic +import GHC.Utils.Misc (dropWhileEndLE) +import System.IO.Unsafe +import qualified Control.Exception as Exception + +---------------------------------------------- +-- The JS backend supports 3 kinds of objects: +-- 1. HS objects: produced from Haskell sources +-- 2. JS objects: produced from JS sources +-- 3. Cc objects: produced by emcc (e.g. from C sources) +-- +-- They all have a different header that allows them to be distinguished. +-- See ObjectKind type. +---------------------------------------------- + +-- | Different kinds of object (.o) supported by the JS backend +data ObjectKind + = ObjJs -- ^ JavaScript source embedded in a .o + | ObjHs -- ^ JS backend object for Haskell code + | ObjCc -- ^ Wasm module object as produced by emcc + deriving (Show,Eq,Ord) + +-- | Get the kind of a file object, if any +getObjectKind :: FilePath -> IO (Maybe ObjectKind) +getObjectKind fp = withBinaryFile fp ReadMode $ \h -> do + let !max_header_length = max (B.length jsHeader) + $ max (B.length wasmHeader) + (B.length hsHeader) + + bs <- B.hGet h max_header_length + pure $! getObjectKindBS bs + +-- | Get the kind of an object stored in a bytestring, if any +getObjectKindBS :: B.ByteString -> Maybe ObjectKind +getObjectKindBS bs + | jsHeader `B.isPrefixOf` bs = Just ObjJs + | hsHeader `B.isPrefixOf` bs = Just ObjHs + | wasmHeader `B.isPrefixOf` bs = Just ObjCc + | otherwise = Nothing + +-- Header added to JS sources to discriminate them from other object files. +-- They all have .o extension but JS sources have this header. +jsHeader :: B.ByteString +jsHeader = unsafePerformIO $ B.unsafePackAddressLen 8 "GHCJS_JS"# + +hsHeader :: B.ByteString +hsHeader = unsafePerformIO $ B.unsafePackAddressLen 8 "GHCJS_HS"# + +wasmHeader :: B.ByteString +wasmHeader = unsafePerformIO $ B.unsafePackAddressLen 4 "\0asm"# + + + +------------------------------------------------ +-- HS objects +-- +-- file layout: +-- - magic "GHCJS_HS" +-- - compiler version tag +-- - module name +-- - offsets of string table +-- - dependencies +-- - offset of the index +-- - unit infos +-- - index +-- - string table +-- +------------------------------------------------ --- | An object file +-- | A HS object file data Object = Object { objModuleName :: !ModuleName -- ^ name of the module @@ -217,11 +283,6 @@ getObjBlock syms bh = do } --- | A tag that determines the kind of payload in the .o file. See --- @StgToJS.Linker.Arhive.magic@ for another kind of magic -magic :: String -magic = "GHCJSOBJ" - -- | Serialized block indexes and their exported symbols -- (the first block is module-global) type Index = [IndexEntry] @@ -244,7 +305,7 @@ putObject -> [ObjBlock] -- ^ linkable units and their symbols -> IO () putObject bh mod_name deps os = do - forM_ magic (putByte bh . fromIntegral . ord) + putByteString bh hsHeader put_ bh (show hiVersion) -- we store the module name as a String because we don't want to have to @@ -267,37 +328,12 @@ putObject bh mod_name deps os = do pure (oiSymbols o,p) pure idx --- | Test if the object file is a JS object -isJsObjectFile :: FilePath -> IO Bool -isJsObjectFile fp = do - let !n = length magic - withBinaryFile fp ReadMode $ \hdl -> do - allocaArray n $ \ptr -> do - n' <- hGetBuf hdl ptr n - if (n' /= n) - then pure False - else checkMagic (peekElemOff ptr) - --- | Check magic -checkMagic :: (Int -> IO Word8) -> IO Bool -checkMagic get_byte = do - let go_magic !i = \case - [] -> pure True - (e:es) -> get_byte i >>= \case - c | fromIntegral (ord e) == c -> go_magic (i+1) es - | otherwise -> pure False - go_magic 0 magic - --- | Parse object magic -getCheckMagic :: BinHandle -> IO Bool -getCheckMagic bh = checkMagic (const (getByte bh)) - -- | Parse object header getObjectHeader :: BinHandle -> IO (Either String ModuleName) getObjectHeader bh = do - is_magic <- getCheckMagic bh - case is_magic of - False -> pure (Left "invalid magic header") + magic <- getByteString bh (B.length hsHeader) + case magic == hsHeader of + False -> pure (Left "invalid magic header for HS object") True -> do is_correct_version <- ((== hiVersion) . read) <$> get bh case is_correct_version of @@ -630,3 +666,134 @@ instance Binary StaticLit where 6 -> BinLit <$> get bh 7 -> LabelLit <$> get bh <*> get bh n -> error ("Binary get bh StaticLit: invalid tag " ++ show n) + + +------------------------------------------------ +-- JS objects +------------------------------------------------ + +-- | Options obtained from pragmas in JS files +data JSOptions = JSOptions + { enableCPP :: !Bool -- ^ Enable CPP on the JS file + , emccExtraOptions :: ![String] -- ^ Pass additional options to emcc at link time + , emccExportedFunctions :: ![String] -- ^ Arguments for `-sEXPORTED_FUNCTIONS` + , emccExportedRuntimeMethods :: ![String] -- ^ Arguments for `-sEXPORTED_RUNTIME_METHODS` + } + deriving (Eq, Ord) + + +instance Binary JSOptions where + put_ bh (JSOptions a b c d) = do + put_ bh a + put_ bh b + put_ bh c + put_ bh d + get bh = JSOptions <$> get bh <*> get bh <*> get bh <*> get bh + +instance Semigroup JSOptions where + a <> b = JSOptions + { enableCPP = enableCPP a || enableCPP b + , emccExtraOptions = emccExtraOptions a ++ emccExtraOptions b + , emccExportedFunctions = List.nub (List.sort (emccExportedFunctions a ++ emccExportedFunctions b)) + , emccExportedRuntimeMethods = List.nub (List.sort (emccExportedRuntimeMethods a ++ emccExportedRuntimeMethods b)) + } + +defaultJSOptions :: JSOptions +defaultJSOptions = JSOptions + { enableCPP = False + , emccExtraOptions = [] + , emccExportedRuntimeMethods = [] + , emccExportedFunctions = [] + } + +-- mimics `lines` implementation +splitOnComma :: String -> [String] +splitOnComma s = cons $ case break (== ',') s of + (l, s') -> (l, case s' of + [] -> [] + _:s'' -> splitOnComma s'') + where + cons ~(h, t) = h : t + + + +-- | Get the JS option pragmas from .js files +getJsOptions :: Handle -> IO JSOptions +getJsOptions handle = do + hSetEncoding handle utf8 + let trim = dropWhileEndLE isSpace . dropWhile isSpace + let go opts = do + hIsEOF handle >>= \case + True -> pure opts + False -> do + xs <- hGetLine handle + if not ("//#OPTIONS:" `List.isPrefixOf` xs) + then pure opts + else do + -- drop prefix and spaces + let ys = trim (drop 11 xs) + let opts' = if + | ys == "CPP" + -> opts {enableCPP = True} + + | Just s <- List.stripPrefix "EMCC:EXPORTED_FUNCTIONS=" ys + , fns <- fmap trim (splitOnComma s) + -> opts { emccExportedFunctions = emccExportedFunctions opts ++ fns } + + | Just s <- List.stripPrefix "EMCC:EXPORTED_RUNTIME_METHODS=" ys + , fns <- fmap trim (splitOnComma s) + -> opts { emccExportedRuntimeMethods = emccExportedRuntimeMethods opts ++ fns } + + | Just s <- List.stripPrefix "EMCC:EXTRA=" ys + -> opts { emccExtraOptions = emccExtraOptions opts ++ [s] } + + | otherwise + -> panic ("Unrecognized JS pragma: " ++ ys) + + go opts' + go defaultJSOptions + +-- | Parse option pragma in JS file +getOptionsFromJsFile :: FilePath -- ^ Input file + -> IO JSOptions -- ^ Parsed options. +getOptionsFromJsFile filename + = Exception.bracket + (openBinaryFile filename ReadMode) + hClose + getJsOptions + + +-- | Write a JS object (embed some handwritten JS code) +writeJSObject :: JSOptions -> B.ByteString -> FilePath -> IO () +writeJSObject opts contents output_fn = do + bh <- openBinMem (B.length contents + 1000) + + putByteString bh jsHeader + put_ bh opts + put_ bh contents + + writeBinMem bh output_fn + + +-- | Read a JS object from BinHandle +parseJSObject :: BinHandle -> IO (JSOptions, B.ByteString) +parseJSObject bh = do + magic <- getByteString bh (B.length jsHeader) + case magic == jsHeader of + False -> panic "invalid magic header for JS object" + True -> do + opts <- get bh + contents <- get bh + pure (opts,contents) + +-- | Read a JS object from ByteString +parseJSObjectBS :: B.ByteString -> IO (JSOptions, B.ByteString) +parseJSObjectBS bs = do + bh <- unsafeUnpackBinBuffer bs + parseJSObject bh + +-- | Read a JS object from file +readJSObject :: FilePath -> IO (JSOptions, B.ByteString) +readJSObject input_fn = do + bh <- readBinMem input_fn + parseJSObject bh diff --git a/compiler/GHC/StgToJS/Types.hs b/compiler/GHC/StgToJS/Types.hs index e93e642b860e4a7841520ee7566a9f63dd639272..61f3ec145c3f6da3bc008fb282f179656070c61e 100644 --- a/compiler/GHC/StgToJS/Types.hs +++ b/compiler/GHC/StgToJS/Types.hs @@ -30,6 +30,7 @@ import GHC.JS.Ppr () import GHC.Stg.Syntax import GHC.Core.TyCon +import GHC.Linker.Config import GHC.Types.Unique import GHC.Types.Unique.FM @@ -94,6 +95,7 @@ data StgToJSConfig = StgToJSConfig , csRuntimeAssert :: !Bool -- ^ Enable runtime assertions -- settings , csContext :: !SDocContext + , csLinkerConfig :: !LinkerConfig -- ^ Emscripten linker } -- | Information relevenat to code generation for closures. diff --git a/compiler/GHC/Utils/Binary.hs b/compiler/GHC/Utils/Binary.hs index 6deda6e0fede2c5af510ae61f2dd6c9f03f311dc..fa67e83fda22a08211ff3a4dd7295b517531a75b 100644 --- a/compiler/GHC/Utils/Binary.hs +++ b/compiler/GHC/Utils/Binary.hs @@ -47,6 +47,8 @@ module GHC.Utils.Binary -- * For writing instances putByte, getByte, + putByteString, + getByteString, -- * Variable length encodings putULEB128, @@ -1227,6 +1229,19 @@ getFS bh = do l <- get bh :: IO Int getPrim bh l (\src -> pure $! mkFastStringBytes src l ) +-- | Put a ByteString without its length (can't be read back without knowing the +-- length!) +putByteString :: BinHandle -> ByteString -> IO () +putByteString bh bs = + BS.unsafeUseAsCStringLen bs $ \(ptr, l) -> do + putPrim bh l (\op -> copyBytes op (castPtr ptr) l) + +-- | Get a ByteString whose length is known +getByteString :: BinHandle -> Int -> IO ByteString +getByteString bh l = + BS.create l $ \dest -> do + getPrim bh l (\src -> copyBytes dest src l) + putBS :: BinHandle -> ByteString -> IO () putBS bh bs = BS.unsafeUseAsCStringLen bs $ \(ptr, l) -> do diff --git a/docs/users_guide/9.10.1-notes.rst b/docs/users_guide/9.10.1-notes.rst index d044791565143b3e12e31622c115beee88433f9d..385d4328da44eba13e280c5fe8157f7246874d7b 100644 --- a/docs/users_guide/9.10.1-notes.rst +++ b/docs/users_guide/9.10.1-notes.rst @@ -148,6 +148,16 @@ Compiler This means that if you are using ``-fllvm`` you now need ``llc``, ``opt`` and ``clang`` available. +JavaScript backend +~~~~~~~~~~~~~~~~~~ + +- The JavaScript backend now supports linking with C sources. It uses Emscripten + to compile them to WebAssembly. The resulting JS file embeds and loads these + WebAssembly files. Important note: JavaScript wrappers are required to call + into C functions and pragmas have been added to indicate which C functions are + exported (see the users guide). + + GHCi ~~~~ diff --git a/docs/users_guide/debugging.rst b/docs/users_guide/debugging.rst index 744a1afb5467d760ad4b2332a890bb4130135e5e..f43496a5c58b262a59bee29def0cf68e47425d05 100644 --- a/docs/users_guide/debugging.rst +++ b/docs/users_guide/debugging.rst @@ -785,6 +785,14 @@ JavaScript code generator Include human-readable spacing and indentation when generating JavaScript. +.. ghc-flag:: -ddisable-js-c-sources + :shortdesc: Disable the link with C sources compiled to JavaScript + :type: dynamic + + For debugging it can be useful to avoid linking with C sources compiled to + JavaScript with Emscripten. This also avoids linking with Emcscripten's RTS. + Note that code that calls into this C code or that uses Emscripten's + primitives will fail at runtime (e.g. undefined function errors). Miscellaneous backend dumps ~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/users_guide/javascript.rst b/docs/users_guide/javascript.rst index 7ef41ceec76a264bb0c409fb301d236da3c0a8ed..90039cecc95db2da61db7e0a0ab30f1d8cf902b7 100644 --- a/docs/users_guide/javascript.rst +++ b/docs/users_guide/javascript.rst @@ -421,3 +421,116 @@ hand-written JavaScript come from functions with data that stays as JavaScript primitive types for a long time, especially strings. For this, ``JSVal`` allows values to be passed between ``Haskell`` and ``JavaScript`` without a marshalling penalty. + + +Linking with C sources +---------------------- + +GHC supports compiling C sources into JavaScript (using Emscripten) and linking +them with the rest of the JavaScript code (generated from Haskell codes and from +the RTS). + +C functions compiled with Emscripten get a "_" prepended to their name in +JavaScript. For example, C "malloc" becomes "_malloc" in JavaScript. + +EMCC pragmas +~~~~~~~~~~~~ + +By default the EMCC linker drops code considered dead and it has no way to know +which code is alive due to some call from Haskell or from a JavaScript wrapper. +As such, you must explicitly add some pragmas at the top of one of your `.js` +files to indicate which functions are alive: + +``` +//#OPTIONS:EMCC:EXPORTED_RUNTIME_METHODS=foo,bar +``` + +Enable methods `foo` and `bar` from the Emscripten runtime system. This is used +for methods such as `ccall`, `cwrap`, `addFunction`, `removeFunction`... that +are described in Emscripten documentation. + +``` +//#OPTIONS:EMCC:EXPORTED_FUNCTIONS=_foo,_bar +``` + +Enable C functions `foo` and `bar` to be exported respectively as `_foo` and +`_bar` (`_` prepended). This is used for C library functions (e.g. `_malloc`, +`_free`, etc.) and for the C code compiled with your project (e.g. +`_sqlite3_open` and others for the `sqlite` C library). + +You can use both pragmas as many times as you want. Ultimately all the entries +end up in sets of functions passed to the Emscripten linker via +`-sEXPORTED_RUNTIME_METHODS` and `-sEXPORTED_FUNCTIONS` (which can only be +passed once; the latter argument overrides the former ones). + + +``` +//#OPTIONS:EMCC:EXTRA=-foo,-bar +``` + +This pragma allows additional options to be passed to Emscripten if need be. We +already pass: +- `-sSINGLE_FILE=1`: required to create a single `.js` file as artefact + (otherwise `.wasm` files corresponding to C codes need to be present in the + current working directory when invoking the resulting `.js` file). +- `-sALLOW_TABLE_GROWTH`: required to support `addFunction` +- `-sEXPORTED_RUNTIME_METHODS` and `-sEXPORTED_FUNCTIONS`: see above. + +Be careful because some extra arguments may break the build in unsuspected ways. + +Wrappers +~~~~~~~~ + +The JavaScript backend doesn't generate wrappers for foreign imports to call +directly into the compiled C code. I.e. given the following foreign import: + +```haskell +foreign import ccall "foo" foo :: ... +``` + +The JavaScript backend will replace calls to `foo` with calls to the JavaScript +function `h$foo`. It's still up to the programmer to call `_foo` or not from `h$foo` +on a case by case basis. If `h$foo` calls the generated from C function +`_foo`, then we say that `h$foo` is a wrapper function. These wrapper functions +are used to marshal arguments and returned values between the JS heap and the +Emscripten heap. + +On one hand, GHC's JavaScript backend creates a different array of bytes per +allocation (in order to make use of the garbage collector of the JavaScript +engine). On the other hand, Emscripten's C heap consists in a single array of +bytes. To call C functions converted to JavaScript that have pointer arguments, +wrapper functions have to: + +1. allocate some buffer in the Emscripten heap (using `_malloc`) to get a valid + Emscripten pointer +2. copy the bytes from the JS object to the buffer in the Emscripten heap +3. use the Emscripten pointer to make the call to the C function +4. optionally copy back the bytes from the Emscripten heap if the call may have + changed the contents of the buffer +5. free the Emscripten buffer (with `_free`) + +GHC's JavaScript rts provides helper functions for this in `rts/js/mem.js`. See +`h$copyFromHeap`, `h$copyToHeap`, `h$initHeapBuffer`, etc. + +Callbacks +~~~~~~~~~ + +Some C functions take function pointers as arguments (e.g. callbacks). This is +supported by the JavaScript backend but requires some work from the wrapper +functions. + +1. On the Haskell side it is possible to create a pointer to an Haskell function + (a `FunPtr`) by using a "wrapper" foreign import. See the documentation of + `base:Foreign.Ptr.FunPtr`. +2. This `FunPtr` can be passed to a JavaScript wrapper function. However it's + implemented as a `StablePtr` and needs to be converted into a function + pointer that Emscripten understands. This can be done with + `h$registerFunPtrOnHeap`. +3. When a callback is no longer needed, it can be freed with + `h$unregisterFunPtrFromHeap`. + +Note that in some circumstances you may not want to register an Haskell function +directly as a callback. It is perfectly possible to register/free regular JavaScript +function as Emscripten functions using `Module.addFunction` and `Module.removeFunction`. +That's what the helper functions mentioned above do. + diff --git a/rts/js/mem.js b/rts/js/mem.js index 1809e71a2cfee274eef7809c80d343848ddbdbfb..dfcba9474ec876c4e545adfb00e8bcd60234ca96 100644 --- a/rts/js/mem.js +++ b/rts/js/mem.js @@ -1,4 +1,5 @@ -//#OPTIONS: CPP +//#OPTIONS:CPP +//#OPTIONS:EMCC:EXPORTED_RUNTIME_METHODS=addFunction,removeFunction,getEmptyTableSlot // #define GHCJS_TRACE_META 1 @@ -995,6 +996,15 @@ function h$roundUpToMultipleOf(n,m) { function h$newByteArray(len) { var len0 = Math.max(h$roundUpToMultipleOf(len, 8), 8); var buf = new ArrayBuffer(len0); + return h$wrapByteArray(buf,len); +} + +// Create a ByteArray from a given ArrayBuffer +// +// This is useful to wrap pre-existing ArrayBuffer such as Emscripten heap +// (Module.HEAP8). However don't rely on the ByteArray length ("len" field) too +// much in this case because it isn't updated when the heap grows. +function h$wrapByteArray(buf,len) { return { buf: buf , len: len , i3: new Int32Array(buf) @@ -1088,10 +1098,11 @@ function h$compareByteArrays(a1,o1,a2,o2,n) { */ function h$wrapBuffer(buf, unalignedOk, offset, length) { if(!unalignedOk && offset && offset % 8 !== 0) { - throw ("h$wrapBuffer: offset not aligned:" + offset); + throw new Error("h$wrapBuffer: offset not aligned:" + offset); + } + if(!buf || !(buf instanceof ArrayBuffer)) { + throw new Error("h$wrapBuffer: not an ArrayBuffer: " + buf) } - if(!buf || !(buf instanceof ArrayBuffer)) - throw "h$wrapBuffer: not an ArrayBuffer" if(!offset) { offset = 0; } if(!length || length < 0) { length = buf.byteLength - offset; } return { buf: buf @@ -1483,3 +1494,168 @@ function h$checkOverlapByteArray(a1, o1, a2, o2, n) { if (o1 > o2) return o1 - o2 >= n; return true; } + + +///////////////////////////////////////// +// Interface with Emscripten's HEAP +///////////////////////////////////////// + +// The Emscripten Heap is an ArrayBuffer that we wrap as if it was a ByteArray. +// It allows pointers into Emscripten Heap to be representable as our usual +// pointers (ByteArray, Offset). +var h$HEAP = null; + +// Initialize the global h$HEAP variable. This must only be called when linking +// with Emscripten. +function h$initEmscriptenHeap() { + h$HEAP = h$wrapByteArray(Module.HEAP8.buffer, Module.HEAP8.buffer.byteLength); +} + +// Create a pointer in Emscripten's HEAP +function h$mkHeapPtr(offset) { + if (!h$HEAP) { + throw new Error("h$mkHeapPtr: Emscripten h$HEAP not initialized"); + } + return {'array':h$HEAP, 'offset': offset}; +} + +// Copy len bytes from the given buffer to the heap +function h$copyToHeap(buf_d, buf_o, tgt, len) { + if(len === 0) return; + var u8 = buf_d.u8; + for(var i=0;i<len;i++) { + Module.HEAPU8[tgt+i] = u8[buf_o+i]; + } +} + +// Copy len bytes from the heap to the given buffer +function h$copyFromHeap(src, buf_d, buf_o, len) { + var u8 = buf_d.u8; + for(var i=0;i<len;i++) { + u8[buf_o+i] = Module.HEAPU8[src+i]; + } +} + +// malloc and initialize a buffer on the HEAP +function h$initHeapBufferLen(buf_d, buf_o, len) { + var buf_ptr = _malloc(len); + h$copyToHeap(buf_d, buf_o, buf_ptr, len); + return buf_ptr; +} + +// Allocate and copy a JS buffer on the heap +function h$initHeapBuffer(str_d, str_o) { + if(str_d === null) return null; + return ptr = h$initHeapBufferLen(str_d, str_o, str_d.len); +} + + + +// temporarily malloc and initialize a buffer on the HEAP, pass it to the +// continuation, then release the buffer +function h$withOutBufferOnHeap(ptr_d, ptr_o, len, cont) { + var ptr = _malloc(len); + h$copyToHeap(ptr_d, ptr_o, ptr, len); + var ret = cont(ptr); + h$copyFromHeap(ptr, ptr_d, ptr_o, len); + _free(ptr); + return ret; +} + +// Temporarily allocate and initialize a buffer on the heap and pass it to the +// continuation. The buffer is freed from the heap when the continuation +// returns. +function h$withCBufferOnHeap(str_d, str_o, len, cont) { + var str = _malloc(len); + if(str_d !== null) h$copyToHeap(str_d, str_o, str, len); + var ret = cont(str); + _free(str); + return ret; +} + +// Temporarily allocate a CString on the heap and pass it to the continuation. +// The string is freed from the heap when the continuation returns. +function h$withCStringOnHeap(str_d, str_o, cont) { + return h$withCBufferOnHeap(str_d, str_o, str_d === null ? 0 : h$strlen(str_d,str_o), cont); +} + +// Dereference a heap pointer to a heap pointer (a 32-bit offset in the heap) +function h$derefHeapPtr_addr(offset) { + var ptr = h$newByteArray(4); + ptr.u8.set(Module.HEAPU8.subarray(offset, offset+4)); + return ptr.i3[0]; +} + +// Write a heap pointer (h$HEAP,offset) at the given JS pointer +function h$putHeapAddr(a,o,offset) { + if (offset == 0) { + // null pointer in HEAP must become null pointer in JS land + PUT_ADDR(a,o,null,0); + } else { + PUT_ADDR(a,o,h$HEAP,offset); + } +} + +// get a C string (null-terminated) from HEAP +// Convert HEAP null (i.e. 0) into JS null +function h$copyCStringFromHeap(offset) { + if(offset == 0) return null; + var len = 0; + while(HEAPU8[offset+len] !== 0){ len++; }; + var str = h$newByteArray(len+1); + str.u8.set(HEAPU8.subarray(offset,offset+len+1)); + return str; +} + +// get an array of n pointers from HEAP +function h$copyPtrArrayFromHeap(offset,n) { + var ptr = h$newByteArray(4*n); + ptr.u8.set(HEAPU8.subarray(offset, offset+4*n)); + return ptr; +} + +// Given a FunPtr, allocate a function wrapper with Emscripten and register it +// in the HEAP. Return the heap pointer to it. +// +// If `ask_ptr` is true, `mkfn` get passed both the function and the heap +// pointer. This is useful in callbacks which should cleanup themselves from the +// Emscripten heap during their execution. Call h$unregisterFunPtrFromHeap on the +// heap pointer to clean it. +// +// Since Emscripten uses WebAssembly, function types must be known precisely. +// The `ty` serves this purpose. See Emscripten's `addFunction` documentation +// for the syntax. +function h$registerFunPtrOnHeap(funptr_d, funptr_o, ask_ptr, ty, mkfn) { + // TODO: assert funptr_d is the StablePtr array + if (funptr_o == 0) return 0; + + var fun = h$deRefStablePtr(funptr_o); + + // In destroy callbacks we want to call removeFunction on the running + // callback. But it hasn't been registered yet so we don't have its pointer! + // + // So we call getEmptyTableSlot to get the next function slot in advance. + // But this has the side-effect of reserving the next empty slot... so we have + // to release it just after. The following call to addFunction will get the + // same slot. Warning: this hack doesn't work if addFunction is called in + // mkfn, but we check this with an assertion. + if (ask_ptr) { + var cb_ptr = getEmptyTableSlot(); + Module.removeFunction(cb_ptr); + + var cb = mkfn(fun,cb_ptr); + var ptr = Module.addFunction(cb,ty); + + assert(cb_ptr === ptr, "h$registerJSFunPtrOnHeap: got different pointer offsets: " + cb_ptr + " and " + ptr); + return ptr; + } + else { + var cb = mkfn(fun); + return Module.addFunction(cb,ty); + } +} + +// Unregister a function previously registered on the heap with h$registerFunPtrOnHeap +function h$unregisterFunPtrFromHeap(p) { + return Module.removeFunction(p); +} diff --git a/rts/js/rts.js b/rts/js/rts.js index a714c21da6526710a272eaf9a49758cdf6a8d0e4..89aea6607487763f53251cdaf7221f7a79780d9a 100644 --- a/rts/js/rts.js +++ b/rts/js/rts.js @@ -111,25 +111,54 @@ function h$rts_toString(x) { function h$rts_mkPtr(x) { var buf, off = 0; - if(typeof x == 'string') { - // string: UTF-8 encode + // null pointer + if(x === null) { + buf = null; + off = 0; + } + // Haskell pointer + else if(typeof x == 'object' && + typeof x.offset == 'number' && + typeof x.array !== 'undefined') { + buf = x.array; + off = x.offset; + } + // JS string: UTF-8 encode + else if(typeof x == 'string') { buf = h$encodeUtf8(x); off = 0; - } else if(typeof x == 'object' && + } + // Haskell ByteArray + else if(typeof x == 'object' && typeof x.len == 'number' && x.buf instanceof ArrayBuffer) { - // already a Haskell ByteArray buf = x; off = 0; - } else if(x.isView) { - // ArrayBufferView: make ByteArray with the same byteOffset + } + // Offset in the Emscripten heap + else if (typeof x == 'number' && h$HEAP !== null) { + if (x == 0) { + buf = null; + off = 0; + } + else { + buf = h$HEAP; + off = x; + } + } + // ArrayBufferView: make ByteArray with the same byteOffset + else if(x.isView) { buf = h$wrapBuffer(x.buffer, true, 0, x.buffer.byteLength); off = x.byteOffset; - } else { - // plain ArrayBuffer + } + // plain ArrayBuffer + else if (x instanceof ArrayBuffer) { buf = h$wrapBuffer(x, true, 0, x.byteLength); off = 0; } + else { + throw new Error ("h$rts_mkPtr: invalid argument: " + x); + } return MK_PTR(buf, off); } diff --git a/testsuite/tests/driver/all.T b/testsuite/tests/driver/all.T index 5469916c6eeca873e03136a9daec26b26b774645..6525a127d05855f2eb0fd645ced795c5ce1e19d9 100644 --- a/testsuite/tests/driver/all.T +++ b/testsuite/tests/driver/all.T @@ -299,7 +299,7 @@ test('T18369', normal, compile, ['-O']) test('T21682', normal, compile_fail, ['-Werror=unrecognised-warning-flags -Wfoo']) test('FullGHCVersion', normal, compile_and_run, ['-package ghc-boot']) test('OneShotTH', req_th, makefile_test, []) -test('T17481', js_broken(22261), makefile_test, []) +test('T17481', normal, makefile_test, []) test('T20084', normal, makefile_test, []) test('RunMode', [req_interp,extra_files(['RunMode/Test.hs'])], run_command, ['{compiler} --run -iRunMode/ -ignore-dot-ghci RunMode.hs -- hello']) test('T20439', normal, run_command, diff --git a/testsuite/tests/driver/recomp011/all.T b/testsuite/tests/driver/recomp011/all.T index f772aef33cb3c5147396bda26ab6e3dedf5f8360..4cc574d5c53f15661afb3baa36bcede71cac3e73 100644 --- a/testsuite/tests/driver/recomp011/all.T +++ b/testsuite/tests/driver/recomp011/all.T @@ -2,6 +2,5 @@ test('recomp011', [ extra_files(['Main.hs']) - , js_broken(22261) ], makefile_test, []) diff --git a/testsuite/tests/javascript/js-c-sources/js-c-sources01.hs b/testsuite/tests/javascript/js-c-sources/js-c-sources01.hs new file mode 100644 index 0000000000000000000000000000000000000000..5ba5056496a6b7045ca35f559aa39678abf47690 --- /dev/null +++ b/testsuite/tests/javascript/js-c-sources/js-c-sources01.hs @@ -0,0 +1,47 @@ +import Foreign.C.Types +import Foreign.C.String +import Foreign.Ptr +import Data.Char +import Foreign.Marshal.Alloc +import System.IO + +main :: IO () +main = do + -- avoid C and Haskell prints to stdout to be intermingled due to buffering on the Haskell side + hSetBuffering stdout NoBuffering + + -- test sending int values back and forth + -- test printing on stdout in C + print =<< hello_c =<< hello_c =<< hello_c 17 + + -- test printing an Haskell string in C + withCString "Hello from Haskell" write_c + + -- test allocating a CString in C and printing it in Haskell + c_str <- alloc_c + print =<< peekCString c_str + free c_str -- not really needed. The CString lives as an array on the JS heap and will be collected + + -- test modifying Haskell allocated bytes in C + withCString "Hello from Haskell" $ \c_str -> do + modify_c c_str + print =<< peekCString c_str + + -- test calling back into Haskell from C + let to_upper c = fromIntegral (ord (toUpper (chr (fromIntegral c)))) + cb <- mkCallback to_upper + withCString "Hello from Haskell 1234" $ \c_str -> do + callback_c c_str cb + print =<< peekCString c_str + freeHaskellFunPtr cb + + + +foreign import javascript "hello_c_wrapper" hello_c :: Int -> IO Int +foreign import javascript "write_c_wrapper" write_c :: CString -> IO () +foreign import javascript "alloc_c_wrapper" alloc_c :: IO CString +foreign import javascript "modify_c_wrapper" modify_c :: CString -> IO () +foreign import javascript "callback_c_wrapper" callback_c :: CString -> (FunPtr (CChar -> CChar)) -> IO () + +foreign import ccall "wrapper" mkCallback :: (CChar -> CChar) -> IO (FunPtr (CChar -> CChar)) + diff --git a/testsuite/tests/javascript/js-c-sources/js-c-sources01.stdout b/testsuite/tests/javascript/js-c-sources/js-c-sources01.stdout new file mode 100644 index 0000000000000000000000000000000000000000..9c95f47c3f2bcedafe27bc23fd48cd0326e395fc --- /dev/null +++ b/testsuite/tests/javascript/js-c-sources/js-c-sources01.stdout @@ -0,0 +1,8 @@ +Hello from C: 17 +Hello from C: 18 +Hello from C: 19 +20 +Received string: Hello from Haskell +"ghc" +"HELLO FROM HASKELL" +"HELLO FROM HASKELL 1234" diff --git a/testsuite/tests/javascript/js-c-sources/js-c-sources01_c.c b/testsuite/tests/javascript/js-c-sources/js-c-sources01_c.c new file mode 100644 index 0000000000000000000000000000000000000000..80b522d46cab651464fccf5037ed4e11abe89ac7 --- /dev/null +++ b/testsuite/tests/javascript/js-c-sources/js-c-sources01_c.c @@ -0,0 +1,33 @@ +#include<stdio.h> +#include<stdlib.h> + +int hello_c(int a) { + printf("Hello from C: %d\n", a); + return a+1; +} + + +void write_c(char * s) { + printf("Received string: %s\n", s); +} + +char * alloc_c() { + char * s = malloc(4); + s[0] = 'g'; + s[1] = 'h'; + s[2] = 'c'; + s[3] = '\0'; + return s; +} + +void modify_c(char * s) { + for (int i=0;s[i]!=0;i++) { + if (s[i] >= 'a' && s[i] <= 'z') s[i] -= 32; + } +} + +void callback_c(char *s, char (*f)(char)) { + for (int i=0;s[i]!=0;i++) { + s[i] = f(s[i]); + } +} diff --git a/testsuite/tests/javascript/js-c-sources/js-c-sources01_js.js b/testsuite/tests/javascript/js-c-sources/js-c-sources01_js.js new file mode 100644 index 0000000000000000000000000000000000000000..b6d8b818d873dfd1c7e133ba71d56d2f6fe1da6d --- /dev/null +++ b/testsuite/tests/javascript/js-c-sources/js-c-sources01_js.js @@ -0,0 +1,56 @@ +//#OPTIONS:CPP +//#OPTIONS:EMCC:EXPORTED_FUNCTIONS=_hello_c +//#OPTIONS:EMCC:EXPORTED_FUNCTIONS=_write_c,_alloc_c,_modify_c +//#OPTIONS:EMCC:EXPORTED_FUNCTIONS=_callback_c +//#OPTIONS:EMCC:EXPORTED_FUNCTIONS=_free,_strlen,_malloc + +function hello_c_wrapper(a) { + return _hello_c(a); +} + +function write_c_wrapper(a,o) { + h$withCStringOnHeap(a,o, (ptr) => { + _write_c(ptr) + }); +} + +function alloc_c_wrapper() { + const ptr = _alloc_c(); + const a = h$copyCStringFromHeap(ptr); + _free(ptr); + RETURN_ADDR(a,0); +} + +function modify_c_wrapper(a,o) { + const len = h$strlen(a,o); + h$withOutBufferOnHeap(a, o, len, (ptr) => { + _modify_c(ptr); + }); +} + +function callback_c_wrapper(a,o,f_ptr,f_o) { + const cb_c = h$registerFunPtrOnHeap(f_ptr, f_o, false, 'ii', (cb) => { + // we return the function that will actually be called by the C code. + // This is a wrapper to call the Haskell function (cb). + // + // Here it's simple because we only have 'char' arguments and results + // but with other arguments it could have to copy data from/to the heap + // (e.g. if CStrings were involved). + // + // 'ii' is the type of the function, according to Emscripten (return one + // int, take one int as argument). + // + // Finally we pass `false` because we don't need to unregister the + // callback asynchronously as would be the case for a `destructor`-like + // callback. We unregister it below explicitly after its use. + return function(arg) { + return cb(arg); + }; + }); + const len = h$strlen(a,o); + h$withOutBufferOnHeap(a, o, len, (ptr) => { + _callback_c(ptr, cb_c); + }); + + h$unregisterFunPtrFromHeap(cb_c); +}