From 24657f5f9ea239b4a2bcc8c0503c1a495d0972ec Mon Sep 17 00:00:00 2001 From: Cheng Shao <terrorjack@type.dance> Date: Tue, 1 Oct 2024 19:18:16 +0000 Subject: [PATCH] hadrian/ghci: add wasm dyld This commit adds the wasm dynamic linker implementation, as well as ghci logic to call it and hadrian logic to install it to the correct location. See the top-level note in utils/jsffi/dyld.mjs for more details. (cherry picked from commit 549582eff80da6a8c5b7449755eaa726c208c324) --- hadrian/src/Base.hs | 1 + hadrian/src/Rules/Generate.hs | 4 + libraries/ghci/GHCi/ObjLink.hs | 96 +++- libraries/ghci/GHCi/Server.hs | 11 + libraries/ghci/ghci.cabal.in | 3 + utils/jsffi/dyld.mjs | 857 +++++++++++++++++++++++++++++++++ utils/jsffi/post-link.mjs | 6 +- 7 files changed, 966 insertions(+), 12 deletions(-) create mode 100755 utils/jsffi/dyld.mjs diff --git a/hadrian/src/Base.hs b/hadrian/src/Base.hs index 98c64ce1edd..83d9bfc549e 100644 --- a/hadrian/src/Base.hs +++ b/hadrian/src/Base.hs @@ -153,6 +153,7 @@ ghcLibDeps stage iplace = do , "settings" , "ghc-usage.txt" , "ghci-usage.txt" + , "dyld.mjs" , "post-link.mjs" , "prelude.mjs" ] diff --git a/hadrian/src/Rules/Generate.hs b/hadrian/src/Rules/Generate.hs index 5ed31727663..f3a85cfc820 100644 --- a/hadrian/src/Rules/Generate.hs +++ b/hadrian/src/Rules/Generate.hs @@ -212,6 +212,10 @@ copyRules = do prefix -/- "ghc-interp.js" <~ return "." prefix -/- "template-hsc.h" <~ return (pkgPath hsc2hs -/- "data") + prefix -/- "dyld.mjs" %> \file -> do + copyFile ("utils/jsffi" -/- makeRelative prefix file) file + makeExecutable file + prefix -/- "post-link.mjs" %> \file -> do copyFile ("utils/jsffi" -/- makeRelative prefix file) file makeExecutable file diff --git a/libraries/ghci/GHCi/ObjLink.hs b/libraries/ghci/GHCi/ObjLink.hs index 83d3d02912f..a7232f1ccbe 100644 --- a/libraries/ghci/GHCi/ObjLink.hs +++ b/libraries/ghci/GHCi/ObjLink.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE CPP, UnboxedTuples, MagicHash #-} +{-# LANGUAGE CPP, UnboxedTuples, MagicHash, ScopedTypeVariables #-} {-# OPTIONS_GHC -fno-warn-name-shadowing #-} -- -- (c) The University of Glasgow 2002-2006 @@ -38,6 +38,10 @@ import GHC.Exts import System.Posix.Internals ( CFilePath, withFilePath, peekFilePath ) import System.FilePath ( dropExtension, normalise ) +#if defined(wasm32_HOST_ARCH) +import Control.Exception (catch, evaluate) +import GHC.Wasm.Prim +#endif -- --------------------------------------------------------------------------- -- RTS Linker Interface @@ -57,6 +61,78 @@ data ShouldRetainCAFs -- frees these StablePtrs, which will allow the CAFs to -- be GC'd and the code to be removed. +#if defined(wasm32_HOST_ARCH) + +-- On wasm, retain_cafs flag is ignored, revertCAFs is a no-op +initObjLinker :: ShouldRetainCAFs -> IO () +initObjLinker _ = pure () + +loadDLL :: String -> IO (Either String (Ptr LoadedDLL)) +loadDLL f = + m `catch` \(err :: JSException) -> + pure $ Left $ "loadDLL failed for " <> f <> ": " <> show err + where + m = do + evaluate =<< js_loadDLL (toJSString f) + pure $ Right nullPtr + +foreign import javascript safe "__exports.__dyld.loadDLL($1)" + js_loadDLL :: JSString -> IO () + +loadArchive :: String -> IO () +loadArchive f = throwIO $ ErrorCall $ "loadArchive: unsupported on wasm for " <> f + +loadObj :: String -> IO () +loadObj f = throwIO $ ErrorCall $ "loadObj: unsupported on wasm for " <> f + +unloadObj :: String -> IO () +unloadObj f = throwIO $ ErrorCall $ "unloadObj: unsupported on wasm for " <> f + +purgeObj :: String -> IO () +purgeObj f = throwIO $ ErrorCall $ "purgeObj: unsupported on wasm for " <> f + +lookupSymbol :: String -> IO (Maybe (Ptr a)) +lookupSymbol sym = do + r <- js_lookupSymbol $ toJSString sym + evaluate $ if r == nullPtr then Nothing else Just r + +foreign import javascript unsafe "__exports.__dyld.lookupSymbol($1)" + js_lookupSymbol :: JSString -> IO (Ptr a) + +lookupSymbolInDLL :: Ptr LoadedDLL -> String -> IO (Maybe (Ptr a)) +lookupSymbolInDLL _ sym = + throwIO $ ErrorCall $ "lookupSymbolInDLL: unsupported on wasm for " <> sym + +resolveObjs :: IO Bool +resolveObjs = pure True + +-- dyld does not maintain unique handles for added search paths, and +-- removeLibrarySearchPath is simply a no-op, so it's fine to return a +-- null pointer as a placeholder +addLibrarySearchPath :: String -> IO (Ptr ()) +addLibrarySearchPath p = do + evaluate =<< js_addLibrarySearchPath (toJSString p) + pure nullPtr + +foreign import javascript safe "__exports.__dyld.addLibrarySearchPath($1)" + js_addLibrarySearchPath :: JSString -> IO () + +removeLibrarySearchPath :: Ptr () -> IO Bool +removeLibrarySearchPath _ = pure True + +findSystemLibrary :: String -> IO (Maybe String) +findSystemLibrary f = m `catch` \(_ :: JSException) -> pure Nothing + where + m = do + p' <- js_findSystemLibrary (toJSString f) + p <- evaluate $ fromJSString p' + pure $ Just p + +foreign import javascript safe "__exports.__dyld.findSystemLibrary($1)" + js_findSystemLibrary :: JSString -> IO JSString + +#else + initObjLinker :: ShouldRetainCAFs -> IO () initObjLinker RetainCAFs = c_initLinker_ 1 initObjLinker _ = c_initLinker_ 0 @@ -79,14 +155,6 @@ lookupSymbolInDLL dll str_in = do then return Nothing else return (Just addr) -lookupClosure :: String -> IO (Maybe HValueRef) -lookupClosure str = do - m <- lookupSymbol str - case m of - Nothing -> return Nothing - Just (Ptr addr) -> case addrToAny# addr of - (# a #) -> Just <$> mkRemoteRef (HValue a) - prefixUnderscore :: String -> String prefixUnderscore | cLeadingUnderscore = ('_':) @@ -205,3 +273,13 @@ isWindowsHost = True #else isWindowsHost = False #endif + +#endif + +lookupClosure :: String -> IO (Maybe HValueRef) +lookupClosure str = do + m <- lookupSymbol str + case m of + Nothing -> return Nothing + Just (Ptr addr) -> case addrToAny# addr of + (# a #) -> Just <$> mkRemoteRef (HValue a) diff --git a/libraries/ghci/GHCi/Server.hs b/libraries/ghci/GHCi/Server.hs index f46060a01ca..69a5ea7b597 100644 --- a/libraries/ghci/GHCi/Server.hs +++ b/libraries/ghci/GHCi/Server.hs @@ -9,7 +9,9 @@ import Prelude import GHCi.Run import GHCi.TH import GHCi.Message +#if !defined(wasm32_HOST_ARCH) import GHCi.Signals +#endif import GHCi.Utils import Control.DeepSeq @@ -120,7 +122,11 @@ defaultServer = do when verbose $ printf "GHC iserv starting (in: %s; out: %s)\n" (show inh) (show outh) + +#if !defined(wasm32_HOST_ARCH) installSignalHandlers +#endif + lo_ref <- newIORef Nothing let pipe = Pipe{pipeRead = inh, pipeWrite = outh, pipeLeftovers = lo_ref} @@ -146,3 +152,8 @@ dieWithUsage = do msg = "usage: iserv <write-fd> <read-fd> [-v]" #endif +#if defined(wasm32_HOST_ARCH) + +foreign export javascript "defaultServer" defaultServer :: IO () + +#endif diff --git a/libraries/ghci/ghci.cabal.in b/libraries/ghci/ghci.cabal.in index c18d7850ce1..ecf6fb701ce 100644 --- a/libraries/ghci/ghci.cabal.in +++ b/libraries/ghci/ghci.cabal.in @@ -104,3 +104,6 @@ library if !os(windows) Build-Depends: unix >= 2.7 && < 2.9 + + if arch(wasm32) + build-depends: ghc-experimental diff --git a/utils/jsffi/dyld.mjs b/utils/jsffi/dyld.mjs new file mode 100755 index 00000000000..298ce6abd32 --- /dev/null +++ b/utils/jsffi/dyld.mjs @@ -0,0 +1,857 @@ +#!/usr/bin/env -S node --disable-warning=ExperimentalWarning --experimental-wasm-type-reflection --no-turbo-fast-api-calls --wasm-lazy-validation + +// Note [The Wasm Dynamic Linker] +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +// +// This nodejs script implements the wasm dynamic linker to support +// Template Haskell & ghci in the GHC wasm backend. It loads wasm +// shared libraries, resolves symbols and runs code on demand, +// communicating with the host GHC via the standard iserv protocol. +// Below are questions/answers to elaborate the introduction: +// +// *** What works right now and what doesn't work yet? +// +// loadDLL & bytecode interpreter work. Template Haskell & ghci work. +// Profiled dynamic code works. Compiled code and bytecode can all be +// loaded, though the side effects are constrained to what's supported +// by wasi preview1: we map the full host filesystem into wasm cause +// yolo, but things like processes and sockets don't work. +// +// JSFFI is unsupported in bytecode yet. So in ghci you can't type in +// code that contains JSFFI declarations, though you can invoke +// compiled code that uses JSFFI. +// +// loadArchive/loadObj etc are unsupported and will stay that way. The +// only form of compiled code that can be loaded is wasm shared +// library. There's no code unloading logic. The retain_cafs flag is +// ignored and revertCAFs is a no-op. +// +// ghc -j doesn't work yet (#25285). +// +// *** What are implications to end users? +// +// Even if you intend to compile fully static wasm modules, you must +// compile everything with -dynamic-too to ensure shared libraries are +// present, otherwise TH doesn't work. In cabal, this is achieved by +// setting `shared: True` in the global cabal config (or under a +// `package *` stanza in your `cabal.project`). You also need to set +// `library-for-ghci: False` since that's unsupported. +// +// *** Why not extend the RTS linker in C like every other new +// platform? +// +// Aside from all the pain of binary manipulation in C, what you can +// do in C on wasm is fairly limited: for instance, you can't manage +// executable memory regions at all. So you need a lot of back and +// forth between C and JS host, totally not worth the extra effort +// just for the sake of making the original C RTS linker interface +// partially work. +// +// *** What kind of wasm shared library can be loaded? What features +// work to what extent? +// +// We support .so files produced by wasm-ld --shared which conforms to +// https://github.com/WebAssembly/tool-conventions/blob/f44d6c526a06a19eec59003a924e475f57f5a6a1/DynamicLinking.md. +// All .so files in the wasm32-wasi sysroot as well as those produced +// by ghc can be loaded. +// +// For simplicity, we don't have any special treatment for weak +// symbols. Any unresolved symbol at link-time will not produce an +// error, they will only trigger an error when they're used at +// run-time and the data/function definition has not been realized by +// then. +// +// There's no dlopen/dlclose etc exposed to the C/C++ world, the +// interfaces here are directly called by JSFFI imports in ghci. +// There's no so unloading logic yet, but it would be fairly easy to +// add once we need it. +// +// No fancy stuff like LD_PRELOAD, LD_LIBRARY_PATH etc. +// +// *** How does GHC interact with the wasm dynamic linker? +// +// dyld.mjs is tracked as a build dependency and installed in GHC +// libdir with executable perms. When GHC targets wasm and needs to +// start iserv, it starts dyld.mjs and manage the process lifecycle +// through the entire GHC session. nodejs location is not tracked and +// must be present in PATH. +// +// GHC passes the libHSghci*.so location via command line, so dyld.mjs +// will load it as well as all dependent so files, then start the +// default iserv implementation in the ghci library and read/write +// binary messages. nodejs receives pipe file descriptors from GHC, +// though uvwasi doesn't support preopening them as wasi virtual file +// descriptors, therefore we hook a few wasi syscalls and designate +// our own preopen file descriptors for IPC logic. +// +// dyld.mjs inherits default stdin/stdout/stderr from GHC and that's +// how ghci works. Like native external interpreter, you can use the +// -opti GHC flag to pass process arguments, like RTS flags or -opti-v +// to dump the iserv messages. + +import assert from "node:assert/strict"; +import fs from "node:fs/promises"; +import fsSync from "node:fs"; +import path from "node:path"; +import { WASI } from "node:wasi"; +import { JSValManager } from "./prelude.mjs"; +import { parseRecord, parseSections } from "./post-link.mjs"; + +// A simple binary parser +class Parser { + #buf; + #offset = 0; + + constructor(buf) { + this.#buf = buf; + } + + eof() { + return this.#offset === this.#buf.length; + } + + skip(len) { + this.#offset += len; + assert(this.#offset <= this.#buf.length); + } + + readUInt8() { + const r = this.#buf[this.#offset]; + this.#offset += 1; + return r; + } + + readULEB128() { + let acc = 0n, + shift = 0n; + while (true) { + const byte = this.readUInt8(); + acc |= BigInt(byte & 0x7f) << shift; + shift += 7n; + if (byte >> 7 === 0) { + break; + } + } + return Number(acc); + } + + readBuffer() { + const len = this.readULEB128(); + const r = this.#buf.subarray(this.#offset, this.#offset + len); + this.#offset += len; + assert(this.#offset <= this.#buf.length); + return r; + } + + readString() { + return new TextDecoder("utf-8", { fatal: true }).decode(this.readBuffer()); + } +} + +// Parse the dylink.0 section of a wasm module +function parseDyLink0(buf) { + const p0 = new Parser(buf); + // magic, version + p0.skip(8); + // section id + assert(p0.readUInt8() === 0); + const p1 = new Parser(p0.readBuffer()); + // custom section name + assert(p1.readString() === "dylink.0"); + + const r = { neededSos: [], exportInfo: [], importInfo: [] }; + while (!p1.eof()) { + const subsection_type = p1.readUInt8(); + const p2 = new Parser(p1.readBuffer()); + switch (subsection_type) { + case 1: { + // WASM_DYLINK_MEM_INFO + r.memSize = p2.readULEB128(); + r.memP2Align = p2.readULEB128(); + r.tableSize = p2.readULEB128(); + r.tableP2Align = p2.readULEB128(); + break; + } + case 2: { + // WASM_DYLINK_NEEDED + // + // There may be duplicate entries. Not a big deal to not + // dedupe, but why not. + const n = p2.readULEB128(); + const acc = new Set(); + for (let i = 0; i < n; ++i) { + acc.add(p2.readString()); + } + r.neededSos = [...acc]; + break; + } + case 3: { + // WASM_DYLINK_EXPORT_INFO + // + // Not actually used yet, kept for completeness in case of + // future usage. + const n = p2.readULEB128(); + for (let i = 0; i < n; ++i) { + const name = p2.readString(); + const flags = p2.readULEB128(); + r.exportInfo.push({ name, flags }); + } + break; + } + case 4: { + // WASM_DYLINK_IMPORT_INFO + // + // Same. + const n = p2.readULEB128(); + for (let i = 0; i < n; ++i) { + const module = p2.readString(); + const name = p2.readString(); + const flags = p2.readULEB128(); + r.importInfo.push({ module, name, flags }); + } + break; + } + default: { + throw new Error(`unknown subsection type ${subsection_type}`); + } + } + } + + return r; +} + +// The real stuff +class DyLD { + // Wasm page size. + static #pageSize = 0x10000; + + // Placeholder value of GOT.mem addresses that must be imported + // first and later modified to be the correct relocated pointer. + // This value is 0xffffffff subtracts one page, so hopefully any + // memory access near this address will trap immediately. + // + // In JS API i32 is signed, hence this layer of redirection. + static #poison = new WebAssembly.Global( + { value: "i32", mutable: false }, + 0xffffffff - DyLD.#pageSize + ).value; + + // When processing exports, skip the following ones since they're + // generated by wasm-ld. + static #ldGeneratedExportNames = new Set([ + "_initialize", + "__wasm_apply_data_relocs", + "__wasm_apply_global_relocs", + "__wasm_call_ctors", + ]); + + // Virtual file descriptors designated for IPC logic and passed to + // iserv main. uvwasi doesn't support preopening host file + // descriptors as wasi file descriptors so we designate them and + // hook certain wasi syscalls on them, so that the pipe file + // descriptors passed from GHC can be used to communicate with the + // wasm side. + static read_fd = 0x7ffffffe; + static write_fd = 0x7fffffff; + + // The WASI instance to provide wasi imports, shared across all wasm + // instances + #wasi; + + // The actual wasi_snapshot_preview1 import object, after hooking + // the wasi syscalls provided by uvwasi. + #wasiImport; + + // Wasm memory & table + #memory = new WebAssembly.Memory({ initial: 1 }); + #table = new WebAssembly.Table({ element: "anyfunc", initial: 1 }); + + // __stack_pointer + #sp = new WebAssembly.Global( + { + value: "i32", + mutable: true, + }, + DyLD.#pageSize + ); + + // The JSVal manager + #jsvalManager = new JSValManager(); + + // Deduped absolute paths of directories where we lookup .so files + #rpaths = new Set(); + + // sonames of loaded sos. + // + // Note that "soname" is just xxx.so as in file path, not actually + // parsed from a section in .so file. wasm-ld does accept + // --soname=<value>, but it just writes the module name to the name + // section, which can be stripped by wasm-opt and such. We do not + // rely on the name section at all. + // + // Invariant: soname is globally unique! + #loadedSos = new Set(); + + // Mapping from export names to export funcs. It's also passed as + // __exports in JSFFI code, hence the "memory" special field. __dyld + // is used by ghci to call into here. + exportFuncs = { memory: this.#memory, __dyld: this }; + + // The GOT.func table. + #gotFunc = {}; + + // The GOT.mem table. By wasm dylink convention, a wasm global + // exported by .so is always assumed to be a GOT.mem entry, not a + // re-exported actual wasm global. + #gotMem = {}; + + // Global STG registers + #regs = {}; + + constructor({ args, out_fd, in_fd }) { + this.#wasi = new WASI({ + version: "preview1", + args, + env: { PATH: "", PWD: process.cwd() }, + preopens: { "/": "/" }, + }); + + this.#wasiImport = {}; + + // https://gitlab.haskell.org/ghc/wasi-libc/-/blob/master/libc-bottom-half/headers/public/wasi/api.h + for (const k in this.#wasi.wasiImport) { + switch (k) { + case "fd_fdstat_get": { + this.#wasiImport[k] = (fd, retptr0) => { + switch (fd) { + case DyLD.read_fd: { + const fdstat = new DataView(this.#memory.buffer, retptr0, 24); + fdstat.setUint8(0, 6); // __wasi_filetype_t fs_filetype; + fdstat.setUint16(2, 0, true); // __wasi_fdflags_t fs_flags; + fdstat.setBigUint64(8, (1n << 1n) | (1n << 21n), true); // __wasi_rights_t fs_rights_base; + fdstat.setBigUint64(16, (1n << 1n) | (1n << 21n), true); // __wasi_rights_t fs_rights_inheriting; + return 0; + } + case DyLD.write_fd: { + const fdstat = new DataView(this.#memory.buffer, retptr0, 24); + fdstat.setUint8(0, 6); // __wasi_filetype_t fs_filetype; + fdstat.setUint16(2, 0, true); // __wasi_fdflags_t fs_flags; + fdstat.setBigUint64(8, (1n << 6n) | (1n << 21n), true); // __wasi_rights_t fs_rights_base; + fdstat.setBigUint64(16, (1n << 1n) | (1n << 21n), true); // __wasi_rights_t fs_rights_inheriting; + return 0; + } + default: { + return this.#wasi.wasiImport[k](fd, retptr0); + } + } + }; + break; + } + + case "fd_filestat_get": { + this.#wasiImport[k] = (fd, retptr0) => { + switch (fd) { + case DyLD.read_fd: { + const filestat = new DataView(this.#memory.buffer, retptr0, 64); + filestat.setBigUint64(0, 109n, true); // __wasi_device_t dev; + filestat.setBigUint64(8, BigInt(DyLD.read_fd), true); // __wasi_inode_t ino; + filestat.setUint8(16, 6); // __wasi_filetype_t filetype; + filestat.setBigUint64(24, 1n, true); // __wasi_linkcount_t nlink; + filestat.setBigUint64(32, 0n, true); // __wasi_filesize_t size; + filestat.setBigUint64(40, 0n, true); // __wasi_timestamp_t atim; + filestat.setBigUint64(48, 0n, true); // __wasi_timestamp_t mtim; + filestat.setBigUint64(56, 0n, true); // __wasi_timestamp_t ctim; + return 0; + } + case DyLD.write_fd: { + const filestat = new DataView(this.#memory.buffer, retptr0, 64); + filestat.setBigUint64(0, 109n, true); // __wasi_device_t dev; + filestat.setBigUint64(8, BigInt(DyLD.read_fd), true); // __wasi_inode_t ino; + filestat.setUint8(16, 6); // __wasi_filetype_t filetype; + filestat.setBigUint64(24, 1n, true); // __wasi_linkcount_t nlink; + filestat.setBigUint64(32, 0n, true); // __wasi_filesize_t size; + filestat.setBigUint64(40, 0n, true); // __wasi_timestamp_t atim; + filestat.setBigUint64(48, 0n, true); // __wasi_timestamp_t mtim; + filestat.setBigUint64(56, 0n, true); // __wasi_timestamp_t ctim; + return 0; + } + default: { + return this.#wasi.wasiImport[k](fd, retptr0); + } + } + }; + break; + } + + case "fd_read": { + this.#wasiImport[k] = (fd, iovs, iovs_len, retptr0) => { + switch (fd) { + case DyLD.read_fd: { + assert(iovs_len === 1); + const iov = new DataView(this.#memory.buffer, iovs, 8); + const buf = iov.getUint32(0, true), + buf_len = iov.getUint32(4, true); + const bytes_read = fsSync.readSync( + in_fd, + new Uint8Array(this.#memory.buffer, buf, buf_len) + ); + new DataView(this.#memory.buffer, retptr0, 4).setUint32( + 0, + bytes_read, + true + ); + return 0; + } + default: { + return this.#wasi.wasiImport[k](fd, iovs, iovs_len, retptr0); + } + } + }; + break; + } + + case "fd_write": { + this.#wasiImport[k] = (fd, iovs, iovs_len, retptr0) => { + switch (fd) { + case DyLD.write_fd: { + assert(iovs_len === 1); + const iov = new DataView(this.#memory.buffer, iovs, 8); + const buf = iov.getUint32(0, true), + buf_len = iov.getUint32(4, true); + const bytes_written = fsSync.writeSync( + out_fd, + new Uint8Array(this.#memory.buffer, buf, buf_len) + ); + new DataView(this.#memory.buffer, retptr0, 4).setUint32( + 0, + bytes_written, + true + ); + return 0; + } + default: { + return this.#wasi.wasiImport[k](fd, iovs, iovs_len, retptr0); + } + } + }; + break; + } + + default: { + this.#wasiImport[k] = (...args) => this.#wasi.wasiImport[k](...args); + break; + } + } + } + + // Keep this in sync with rts/wasm/Wasm.S! + for (let i = 1; i <= 10; ++i) { + this.#regs[`__R${i}`] = new WebAssembly.Global({ + value: "i32", + mutable: true, + }); + } + + for (let i = 1; i <= 6; ++i) { + this.#regs[`__F${i}`] = new WebAssembly.Global({ + value: "f32", + mutable: true, + }); + } + + for (let i = 1; i <= 6; ++i) { + this.#regs[`__D${i}`] = new WebAssembly.Global({ + value: "f64", + mutable: true, + }); + } + + this.#regs.__L1 = new WebAssembly.Global({ value: "i64", mutable: true }); + + for (const k of ["__Sp", "__SpLim", "__Hp", "__HpLim"]) { + this.#regs[k] = new WebAssembly.Global({ value: "i32", mutable: true }); + } + } + + // removeLibrarySearchPath is a no-op in ghci. If you have a use + // case where it's actually needed, I would like to hear.. + addLibrarySearchPath(p) { + this.#rpaths.add(path.resolve(p)); + } + + // f can be either just soname or an absolute path, will be + // canonicalized and checked for file existence here. Throws if + // non-existent. + async findSystemLibrary(f) { + if (path.isAbsolute(f)) { + await fs.access(f, fs.constants.R_OK); + return f; + } + const r = ( + await Promise.allSettled( + [...this.#rpaths].map(async (p) => { + const r = path.resolve(p, f); + await fs.access(r, fs.constants.R_OK); + return r; + }) + ) + ).find(({ status }) => status === "fulfilled"); + assert(r, `findSystemLibrary(${f}): not found in ${[...this.#rpaths]}`); + return r.value; + } + + // When we do loadDLL, we first perform "downsweep" which return a + // toposorted array of dependencies up to itself, then sequentially + // load the downsweep result. + // + // The rationale of a separate downsweep phase, instead of a simple + // recursive loadDLL function is: V8 delegates async + // WebAssembly.compile to a background worker thread pool. To + // maintain consistent internal linker state, we *must* load each so + // file sequentially, but it's okay to kick off compilation asap, + // store the Promise in downsweep result and await for the actual + // WebAssembly.Module in loadDLL logic. This way we can harness some + // background parallelism. + async #downsweep(p) { + const soname = path.basename(p); + + if (this.#loadedSos.has(soname)) { + return []; + } + + // Do this before loading dependencies to break potential cycles. + this.#loadedSos.add(soname); + + if (path.isAbsolute(p)) { + // GHC may attempt to load libghc_tmp_2.so that needs + // libghc_tmp_1.so in a temporary directory without adding that + // directory via addLibrarySearchPath + this.addLibrarySearchPath(path.dirname(p)); + } else { + p = await this.findSystemLibrary(p); + } + + const buf = await fs.readFile(p); + const modp = WebAssembly.compile(buf); + // Parse dylink.0 from the raw buffer, not via + // WebAssembly.Module.customSections(). At this point we only care + // about WASM_DYLINK_NEEDED, but might as well do the rest of the + // parsing anyway. + const r = parseDyLink0(buf); + r.modp = modp; + r.soname = soname; + let acc = []; + for (const dep of r.neededSos) { + acc.push(...(await this.#downsweep(dep))); + } + acc.push(r); + return acc; + } + + // The real stuff + async loadDLL(p) { + for (const { + memSize, + memP2Align, + tableSize, + tableP2Align, + modp, + soname, + } of await this.#downsweep(p)) { + const import_obj = { + wasi_snapshot_preview1: this.#wasiImport, + env: { + memory: this.#memory, + __indirect_function_table: this.#table, + __stack_pointer: this.#sp, + ...this.exportFuncs, + }, + regs: this.#regs, + // Keep this in sync with post-link.mjs! + ghc_wasm_jsffi: { + newJSVal: (v) => this.#jsvalManager.newJSVal(v), + getJSVal: (k) => this.#jsvalManager.getJSVal(k), + freeJSVal: (k) => this.#jsvalManager.freeJSVal(k), + scheduleWork: () => setImmediate(this.exportFuncs.rts_schedulerLoop), + }, + "GOT.mem": this.#gotMem, + "GOT.func": this.#gotFunc, + }; + + // __memory_base & __table_base, different for each .so + let memory_base; + let table_base = this.#table.grow(tableSize); + assert(tableP2Align === 0); + + // libc.so is always the first one to be ever loaded and has VIP + // treatment. It will never be unloaded even if we support + // unloading in the future. Nor do we support multiple libc.so + // in the same address space. + if (soname === "libc.so") { + // Starting from 0x0: one page of C stack, then global data + // segments of libc.so, then one page space between + // __heap_base/__heap_end so that dlmalloc can initialize + // global state. wasm-ld aligns __heap_base to page sized so + // we follow suit. + assert(memP2Align <= Math.log2(DyLD.#pageSize)); + memory_base = DyLD.#pageSize; + const data_pages = Math.ceil(memSize / DyLD.#pageSize); + this.#memory.grow(data_pages + 1); + + this.#gotMem.__heap_base = new WebAssembly.Global( + { value: "i32", mutable: true }, + DyLD.#pageSize * (1 + data_pages) + ); + this.#gotMem.__heap_end = new WebAssembly.Global( + { value: "i32", mutable: true }, + DyLD.#pageSize * (1 + data_pages + 1) + ); + } else { + // TODO: this would also be __dso_handle@GOT, in case we + // implement so unloading logic in the future. + memory_base = this.exportFuncs.aligned_alloc(1 << memP2Align, memSize); + } + + import_obj.env.__memory_base = new WebAssembly.Global( + { value: "i32", mutable: false }, + memory_base + ); + import_obj.env.__table_base = new WebAssembly.Global( + { value: "i32", mutable: false }, + table_base + ); + + const mod = await modp; + + // Fulfill the ghc_wasm_jsffi imports + Object.assign( + import_obj.ghc_wasm_jsffi, + new Function( + "return (__exports) => ({".concat( + ...parseSections(mod).map( + (rec) => `${rec[0]}: ${parseRecord(rec)}, ` + ), + "});" + ) + )()(this.exportFuncs) + ); + + // Fulfill the rest of the imports + for (const { module, name, kind } of WebAssembly.Module.imports(mod)) { + // Already there, no handling required + if (import_obj[module] && import_obj[module][name]) { + continue; + } + + // Add a lazy function stub in env, but don't put it into + // exportFuncs yet. This lazy binding is only effective for + // the current so, since env is a transient object created on + // the fly. + if (module === "env" && kind === "function") { + import_obj.env[name] = (...args) => { + if (!this.exportFuncs[name]) { + throw new WebAssembly.RuntimeError( + `non-existent function ${name}` + ); + } + return this.exportFuncs[name](...args); + }; + continue; + } + + // Add a lazy GOT.mem entry with poison value, in the hope + // that if they're used before being resolved with real + // addresses, a memory trap will be triggered immediately. + if (module === "GOT.mem" && kind === "global") { + this.#gotMem[name] = new WebAssembly.Global( + { value: "i32", mutable: true }, + DyLD.#poison + ); + continue; + } + + // Missing entry in GOT.func table, could be already defined + // or not + if (module === "GOT.func" && kind === "global") { + // A dependency has exported the function, just create the + // entry on the fly + if (this.exportFuncs[name]) { + this.#gotFunc[name] = new WebAssembly.Global( + { value: "i32", mutable: true }, + this.#table.grow(1, this.exportFuncs[name]) + ); + continue; + } + + // For lazy GOT.func entries we can do better than poison: + // insert a stub in the table, so we at least get an error + // message that includes the missing function's name, not a + // mysterious table trap. The function type is Cmm function + // type as a best effort guess, if there's a type mismatch + // then call_indirect would trap. + // + // Also set a __poison field since we can't compare value + // against DyLD.#poison. + this.#gotFunc[name] = new WebAssembly.Global( + { value: "i32", mutable: true }, + this.#table.grow( + 1, + new WebAssembly.Function( + { parameters: [], results: ["i32"] }, + () => { + throw new WebAssembly.RuntimeError( + `non-existent function ${name}` + ); + } + ) + ) + ); + this.#gotFunc[name].__poison = true; + continue; + } + + throw new Error( + `cannot handle import ${module}.${name} with kind ${kind}` + ); + } + + // Fingers crossed... + const instance = await WebAssembly.instantiate(mod, import_obj); + + // Process the exports + for (const k in instance.exports) { + // Boring stuff + if (DyLD.#ldGeneratedExportNames.has(k)) { + continue; + } + + // Invariant: each function symbol can be defined only once. + // This is incorrect for weak symbols which are allowed to + // appear multiple times but this is sufficient in practice. + assert( + !this.exportFuncs[k], + `duplicate symbol ${k} when loading ${soname}` + ); + + const v = instance.exports[k]; + + if (typeof v === "function") { + this.exportFuncs[k] = v; + // If there's a lazy GOT.func entry, put the function in the + // table and fulfill the entry. Otherwise no need to do + // anything, if it's required later a GOT.func entry will be + // created on demand. + if (this.#gotFunc[k]) { + // ghc-prim/ghc-internal may export functions imported by + // rts + assert(this.#gotFunc[k].__poison); + delete this.#gotFunc[k].__poison; + this.#table.set(this.#gotFunc[k].value, v); + } + continue; + } + + // It's a GOT.mem entry + if (v instanceof WebAssembly.Global) { + const addr = v.value + memory_base; + if (this.#gotMem[k]) { + assert(this.#gotMem[k].value === DyLD.#poison); + this.#gotMem[k].value = addr; + } else { + this.#gotMem[k] = new WebAssembly.Global( + { value: "i32", mutable: true }, + addr + ); + } + continue; + } + + throw new Error(`cannot handle export ${k} ${v}`); + } + + // We call wasi.initialize when loading libc.so, then reuse the + // wasi instance globally. When loading later .so files, just + // manually invoke _initialize(). + if (soname === "libc.so") { + instance.exports.__wasm_apply_data_relocs(); + // wasm-ld forbits --export-memory with --shared, I don't know + // why but this is sufficient to make things work + this.#wasi.initialize({ + exports: { + memory: this.#memory, + _initialize: instance.exports._initialize, + }, + }); + continue; + } + + const init = () => { + // See + // https://github.com/llvm/llvm-project/blob/llvmorg-19.1.1/lld/wasm/Writer.cpp#L1430, + // there's also __wasm_init_memory (not relevant yet, we don't + // use passive segments) & __wasm_apply_global_relocs but + // those are included in the start function and should have + // been called upon instantiation. + instance.exports.__wasm_apply_data_relocs(); + + instance.exports._initialize(); + }; + + // rts init must be deferred until ghc-internal symbols are + // exported. We hard code this hack for now. + if (/libHSrts-\d+(\.\d+)*/i.test(soname)) { + this.rts_init = init; + continue; + } + if (/libHSghc-internal-\d+(\.\d+)*/i.test(soname)) { + this.rts_init(); + delete this.rts_init; + } + init(); + } + } + + lookupSymbol(sym) { + if (this.#gotMem[sym] && this.#gotMem[sym].value !== DyLD.#poison) { + return this.#gotMem[sym].value; + } + if (this.#gotFunc[sym] && !this.#gotFunc[sym].__poison) { + return this.#gotFunc[sym].value; + } + // Not in GOT.func yet, create the entry on demand + if (this.exportFuncs[sym]) { + assert(!this.#gotFunc[sym]); + const addr = this.#table.grow(1, this.exportFuncs[sym]); + this.#gotFunc[sym] = new WebAssembly.Global( + { value: "i32", mutable: true }, + addr + ); + return addr; + } + return 0; + } +} + +function isMain() { + return import.meta.filename === process.argv[1]; +} + +if (isMain()) { + // sysroot libdir that contains libc.so etc + const libdir = process.argv[2], + ghci_so_path = process.argv[3]; + + // Inherited pipe file descriptors from GHC + const out_fd = Number.parseInt(process.argv[4]), + in_fd = Number.parseInt(process.argv[5]); + + const dyld = new DyLD({ + args: ["dyld.so", DyLD.write_fd, DyLD.read_fd, ...process.argv.slice(6)], + out_fd, + in_fd, + }); + dyld.addLibrarySearchPath(libdir); + await dyld.loadDLL(ghci_so_path); + + await dyld.exportFuncs.defaultServer(); +} diff --git a/utils/jsffi/post-link.mjs b/utils/jsffi/post-link.mjs index 5de39fdc400..02fb631f0ac 100755 --- a/utils/jsffi/post-link.mjs +++ b/utils/jsffi/post-link.mjs @@ -15,7 +15,7 @@ import util from "node:util"; // NUL-terminated strings: name, binder, body. We try to parse the // body as an expression and fallback to statements, and return the // completely parsed arrow function source. -function parseRecord([name, binder, body]) { +export function parseRecord([name, binder, body]) { for (const src of [`${binder} => (${body})`, `${binder} => {${body}}`]) { try { new Function(`return ${src};`); @@ -27,13 +27,13 @@ function parseRecord([name, binder, body]) { // Parse ghc_wasm_jsffi custom sections in a WebAssembly.Module object // and return an array of records. -function parseSections(mod) { +export function parseSections(mod) { const recs = []; const dec = new TextDecoder("utf-8", { fatal: true }); const importNames = new Set( WebAssembly.Module.imports(mod) .filter((i) => i.module === "ghc_wasm_jsffi") - .map((i) => i.name), + .map((i) => i.name) ); for (const buf of WebAssembly.Module.customSections(mod, "ghc_wasm_jsffi")) { const ba = new Uint8Array(buf); -- GitLab