From f85f3fdb4646c2fdeba859383fac247beba69c05 Mon Sep 17 00:00:00 2001
From: Cheng Shao <terrorjack@type.dance>
Date: Sat, 16 Dec 2023 23:22:34 +0000
Subject: [PATCH] utils: add JSFFI utility code

This commit adds JavaScript util code to utils to support the wasm
backend's JSFFI functionality:

- jsffi/post-link.mjs, a post-linker to process the linked wasm module
  and emit a small complement JavaScript ESM module to be used with it
  at runtime
- jsffi/prelude.js, a tiny bit of prelude code as the JavaScript side
  of runtime logic
- jsffi/test-runner.mjs, run the jsffi test cases

Co-authored-by: amesgen <amesgen@amesgen.de>
---
 CODEOWNERS                  |   1 +
 utils/jsffi/post-link.mjs   | 100 ++++++++++++++++++++++++++++++++++++
 utils/jsffi/prelude.js      |  91 ++++++++++++++++++++++++++++++++
 utils/jsffi/test-runner.mjs |  55 ++++++++++++++++++++
 4 files changed, 247 insertions(+)
 create mode 100755 utils/jsffi/post-link.mjs
 create mode 100644 utils/jsffi/prelude.js
 create mode 100755 utils/jsffi/test-runner.mjs

diff --git a/CODEOWNERS b/CODEOWNERS
index 71ef05b56727..872965b92b11 100644
--- a/CODEOWNERS
+++ b/CODEOWNERS
@@ -65,6 +65,7 @@
 /utils/iserv-proxy/               @angerman @simonmar
 /utils/iserv/                     @angerman @simonmar
 /utils/fs/                        @Phyx
+/utils/jsffi                      @TerrorJack
 
 [WinIO related code]
 /libraries/base/GHC/Event/Windows/                   @Phyx
diff --git a/utils/jsffi/post-link.mjs b/utils/jsffi/post-link.mjs
new file mode 100755
index 000000000000..abd55622581b
--- /dev/null
+++ b/utils/jsffi/post-link.mjs
@@ -0,0 +1,100 @@
+#!/usr/bin/env -S node
+
+// This is the post-linker program that processes a wasm module with
+// ghc_wasm_jsffi custom section and outputs an ESM module that
+// exports a function to generate the ghc_wasm_jsffi wasm imports. It
+// has a simple CLI interface: "./post-link.mjs -i foo.wasm -o
+// foo.js", as well as an exported postLink function that takes a
+// WebAssembly.Module object and returns the ESM module content.
+
+import fs from "node:fs/promises";
+import path from "node:path";
+import util from "node:util";
+
+// Each record in the ghc_wasm_jsffi custom section are 3
+// NUL-terminated strings: name, binder, body. We try to parse the
+// body as an expression and fallback to statements, and return the
+// completely parsed arrow function source.
+function parseRecord([name, binder, body]) {
+  for (const src of [`${binder} => (${body})`, `${binder} => {${body}}`]) {
+    try {
+      new Function(`return ${src};`);
+      return src;
+    } catch (_) {}
+  }
+  throw new Error(`parseRecord ${name} ${binder} ${body}`);
+}
+
+// Parse ghc_wasm_jsffi custom sections in a WebAssembly.Module object
+// and return an array of records.
+function parseSections(mod) {
+  const recs = [];
+  const dec = new TextDecoder("utf-8", { fatal: true });
+  const importNames = new Set(
+    WebAssembly.Module.imports(mod)
+      .filter((i) => i.module === "ghc_wasm_jsffi")
+      .map((i) => i.name),
+  );
+  for (const buf of WebAssembly.Module.customSections(mod, "ghc_wasm_jsffi")) {
+    const ba = new Uint8Array(buf);
+    let strs = [];
+    for (let l = 0, r; l < ba.length; l = r + 1) {
+      r = ba.indexOf(0, l);
+      strs.push(dec.decode(ba.subarray(l, r)));
+      if (strs.length === 3) {
+        if (importNames.has(strs[0])) {
+          recs.push(strs);
+        }
+        strs = [];
+      }
+    }
+  }
+  return recs;
+}
+
+
+export async function postLink(mod) {
+  let src = await fs.readFile(path.join(import.meta.dirname, "prelude.js"), {
+    encoding: "utf-8",
+  });
+  src = `${src}\nexport default (__exports) => {`;
+  src = `${src}\nconst __ghc_wasm_jsffi_jsval_manager = new JSValManager();`;
+  src = `${src}\nconst __ghc_wasm_jsffi_finalization_registry = new FinalizationRegistry(sp => __exports.rts_freeStablePtr(sp));`;
+  src = `${src}\nreturn {`;
+  src = `${src}\nnewJSVal: (v) => __ghc_wasm_jsffi_jsval_manager.newJSVal(v),`;
+  src = `${src}\ngetJSVal: (k) => __ghc_wasm_jsffi_jsval_manager.getJSVal(k),`;
+  src = `${src}\nfreeJSVal: (k) => __ghc_wasm_jsffi_jsval_manager.freeJSVal(k),`;
+  src = `${src}\nscheduleWork: () => setImmediate(__exports.rts_schedulerLoop),`;
+  for (const rec of parseSections(mod)) {
+    src = `${src}\n${rec[0]}: ${parseRecord(rec)},`;
+  }
+  return `${src}\n};\n};\n`;
+}
+
+function isMain() {
+  return import.meta.filename === process.argv[1];
+}
+
+async function main() {
+  const { input, output } = util.parseArgs({
+    options: {
+      input: {
+        type: "string",
+        short: "i",
+      },
+      output: {
+        type: "string",
+        short: "o",
+      },
+    },
+  }).values;
+
+  await fs.writeFile(
+    output,
+    await postLink(await WebAssembly.compile(await fs.readFile(input)))
+  );
+}
+
+if (isMain()) {
+  await main();
+}
diff --git a/utils/jsffi/prelude.js b/utils/jsffi/prelude.js
new file mode 100644
index 000000000000..a8a3afd0c2f8
--- /dev/null
+++ b/utils/jsffi/prelude.js
@@ -0,0 +1,91 @@
+// This file implements the JavaScript runtime logic for Haskell
+// modules that use JSFFI. It is not an ESM module, but the template
+// of one; the post-linker script will copy all contents into a new
+// ESM module.
+
+// Manage a mapping from unique 32-bit ids to actual JavaScript
+// values.
+class JSValManager {
+  #lastk = 0;
+  #kv = new Map();
+
+  constructor() {}
+
+  // Maybe just bump this.#lastk? For 64-bit ids that's sufficient,
+  // but better safe than sorry in the 32-bit case.
+  #allocKey() {
+    let k = this.#lastk;
+    while (true) {
+      if (!this.#kv.has(k)) {
+        this.#lastk = k;
+        return k;
+      }
+      k = (k + 1) | 0;
+    }
+  }
+
+  newJSVal(v) {
+    const k = this.#allocKey();
+    this.#kv.set(k, v);
+    return k;
+  }
+
+  // A separate has() call to ensure we can store undefined as a value
+  // too. Also, unconditionally check this since the check is cheap
+  // anyway, if the check fails then there's a use-after-free to be
+  // fixed.
+  getJSVal(k) {
+    if (!this.#kv.has(k)) {
+      throw new WebAssembly.RuntimeError(`getJSVal(${k})`);
+    }
+    return this.#kv.get(k);
+  }
+
+  // Check for double free as well.
+  freeJSVal(k) {
+    if (!this.#kv.delete(k)) {
+      throw new WebAssembly.RuntimeError(`freeJSVal(${k})`);
+    }
+  }
+}
+
+// A simple & fast setImmediate() implementation for browsers. It's
+// not a drop-in replacement for node.js setImmediate() because:
+// 1. There's no clearImmediate(), and setImmediate() doesn't return
+//    anything
+// 2. There's no guarantee that callbacks scheduled by setImmediate()
+//    are executed in the same order (in fact it's the opposite lol),
+//    but you are never supposed to rely on this assumption anyway
+class SetImmediate {
+  #fs = [];
+  #mc = new MessageChannel();
+
+  constructor() {
+    this.#mc.port1.addEventListener("message", () => {
+      this.#fs.pop()();
+    });
+    this.#mc.port1.start();
+  }
+
+  setImmediate(cb, ...args) {
+    this.#fs.push(() => cb(...args));
+    this.#mc.port2.postMessage(undefined);
+  }
+}
+
+// The actual setImmediate() to be used. This is a ESM module top
+// level binding and doesn't pollute the globalThis namespace.
+let setImmediate;
+if (globalThis.setImmediate) {
+  // node.js, bun
+  setImmediate = globalThis.setImmediate;
+} else {
+  try {
+    // deno
+    setImmediate = (await import("node:timers")).setImmediate;
+  } catch {
+    // browsers
+    const sm = new SetImmediate();
+    setImmediate = (cb, ...args) => sm.setImmediate(cb, ...args);
+  }
+}
diff --git a/utils/jsffi/test-runner.mjs b/utils/jsffi/test-runner.mjs
new file mode 100755
index 000000000000..175614f4e173
--- /dev/null
+++ b/utils/jsffi/test-runner.mjs
@@ -0,0 +1,55 @@
+#!/usr/bin/env -S node --expose-gc
+
+import fs from "node:fs/promises";
+import path from "node:path";
+import { WASI } from "wasi";
+import { postLink } from "./post-link.mjs";
+
+// The ESM code returned by post-linker doesn't need to be written to
+// a temporary file first. It can be directly imported from a
+// base64-encoded data URL.
+
+function jsToDataURL(src) {
+  return `data:text/javascript;base64,${Buffer.from(src).toString("base64")}`;
+}
+
+async function evalJSModuleDefault(src) {
+  return (await import(jsToDataURL(src))).default;
+}
+
+const wasm_path = path.resolve(process.argv[2]);
+const js_path = path.join(
+  path.dirname(wasm_path),
+  `${path.basename(wasm_path, ".wasm")}.mjs`
+);
+
+const wasm_module = await WebAssembly.compile(await fs.readFile(wasm_path));
+const js_stub_src = await postLink(wasm_module);
+const wasm_import_factory = await evalJSModuleDefault(js_stub_src);
+
+// Yes, you can pass +RTS and other command line flags to Haskell via
+// "./test-runner.mjs foo.wasm +RTS ..."
+const wasi = new WASI({
+  version: "preview1",
+  args: process.argv.slice(2),
+  env: { PATH: "", PWD: process.cwd() },
+  preopens: { "/": "/" },
+});
+
+// Poor man's tying-the-knot
+let __exports = {};
+
+const wasm_instance = await WebAssembly.instantiate(wasm_module, {
+  ghc_wasm_jsffi: wasm_import_factory(__exports),
+  wasi_snapshot_preview1: wasi.wasiImport,
+});
+
+// Do this immediately before you _initialize()
+Object.assign(__exports, wasm_instance.exports);
+
+// This calls _initialize(). Other wasi implementations may differ,
+// always check their doc/src to be sure
+wasi.initialize(wasm_instance);
+
+const k = (await import(js_path)).default;
+await k(__exports);
-- 
GitLab