From 3f92996570f10fe13bd052344f7412500aa8736a Mon Sep 17 00:00:00 2001
From: sheaf <sam.derbyshire@gmail.com>
Date: Wed, 17 Aug 2022 13:22:02 +0200
Subject: [PATCH] Allow offline bootstrapping of cabal-install

This ports to cabal-install the offline bootstrapping logic which was
introduced for Hadrian in GHC MR !6315.

This adds a "fetch" command to the bootstrap script, which fetches
all the dependency tarballs from Hackage, to be used in an offline
build. See bootstrap/README.md for further information.
---
 .github/workflows/bootstrap.yml |   8 +-
 bootstrap/README.md             |  15 ++-
 bootstrap/bootstrap.py          | 225 +++++++++++++++++++++-----------
 changelog.d/pr-8368             |  10 ++
 4 files changed, 179 insertions(+), 79 deletions(-)
 create mode 100644 changelog.d/pr-8368

diff --git a/.github/workflows/bootstrap.yml b/.github/workflows/bootstrap.yml
index 778cd0aac1..5b0b2d452c 100644
--- a/.github/workflows/bootstrap.yml
+++ b/.github/workflows/bootstrap.yml
@@ -32,8 +32,12 @@ jobs:
           GHC_VERSION=${{ matrix.ghc }}
           ghcup config set cache true
           ghcup install ghc $GHC_VERSION
-          # We use linux dependencies also on macos
-          python3 bootstrap/bootstrap.py -w $(ghcup whereis ghc $GHC_VERSION) -d bootstrap/linux-$GHC_VERSION.json
+
+          # Fetch the bootstrap sources (we use linux dependencies also on macos)
+          python3 bootstrap/bootstrap.py -w $(ghcup whereis ghc $GHC_VERSION) -d bootstrap/linux-$GHC_VERSION.json fetch
+
+          # Bootstrap using the bootstrap sources
+          python3 bootstrap/bootstrap.py -w $(ghcup whereis ghc $GHC_VERSION) --bootstrap-sources bootstrap-sources.tar.gz
 
       - name: Smoke test
         run: |
diff --git a/bootstrap/README.md b/bootstrap/README.md
index 79bcb69bd6..80cc8a463a 100644
--- a/bootstrap/README.md
+++ b/bootstrap/README.md
@@ -5,15 +5,24 @@ on a new platform. If you already have a functional (if dated) cabal-install
 please rather run `cabal v2-install`.
 
 The typical usage is porting to a new linux architecture,
-then the `linux-$GHCVER.json` file is available in `bootstrap/` folder:
+then the `linux-{ghc-ver}.json` file is available in the `bootstrap/` folder:
 
 On a (linux) system you are bootstrapping, run
 
-    ./bootstrap/bootstrap.py -d ./bootstrap/linux-$GHCVER.json -w /path/to-ghc
+   ./bootstrap/bootstrap.py -d ./bootstrap/linux-ghcver.json -w /path/to-ghc
 
 from the top directory of the source checkout.
 
-To generate the `$PLATFORM-$GHCVER` files for other platforms, do:
+For offline builds, you can first run
+
+   ./bootstrap/bootstrap.py -d ./bootstrap/linux-ghcver.json -w /path/to-ghc fetch
+
+to fetch tarballs for all the dependencies. These can then be used by a further
+bootstrap command by way of the `--bootstrap-sources` argument:
+
+   ./bootstrap/bootstrap.py -w /path/to-ghc --bootstrap-sources bootstrap-sources.tar.gz
+
+To generate the `platform-{ghc-ver}` files for other platforms, do:
 
   1. On a system with functional cabal-install, install the same GHC version
      as you will use to bootstrap on the host system.
diff --git a/bootstrap/bootstrap.py b/bootstrap/bootstrap.py
index 4145ea4adc..82b792e1e4 100755
--- a/bootstrap/bootstrap.py
+++ b/bootstrap/bootstrap.py
@@ -13,17 +13,20 @@ on a new platform. If you already have a functional (if dated) cabal-install
 please rather run `cabal v2-install .`.
 """
 
+import argparse
 from enum import Enum
 import hashlib
-import logging
 import json
 from pathlib import Path
 import platform
 import shutil
 import subprocess
+import sys
+import tempfile
+import urllib.request
 from textwrap import dedent
-from typing import Set, Optional, Dict, List, Tuple, \
-                   NewType, BinaryIO, NamedTuple, TypeVar
+from typing import Optional, Dict, List, Tuple, \
+                   NewType, BinaryIO, NamedTuple
 
 #logging.basicConfig(level=logging.INFO)
 
@@ -68,6 +71,15 @@ BootstrapInfo = NamedTuple('BootstrapInfo', [
     ('dependencies', List[BootstrapDep]),
 ])
 
+FetchInfo = NamedTuple('FetchInfo', [
+    ('url', str),
+    ('sha256', SHA256Hash)
+])
+
+FetchPlan = Dict[Path, FetchInfo]
+
+local_packages: List[PackageName] = ["Cabal-syntax", "Cabal", "cabal-install-solver", "cabal-install"]
+
 class Compiler:
     def __init__(self, ghc_path: Path):
         if not ghc_path.is_file():
@@ -75,14 +87,17 @@ class Compiler:
 
         self.ghc_path = ghc_path.resolve()
 
+        exe = ''
+        if platform.system() == 'Windows': exe = '.exe'
+
         info = self._get_ghc_info()
         self.version = info['Project version']
         #self.lib_dir = Path(info['LibDir'])
         #self.ghc_pkg_path = (self.lib_dir / 'bin' / 'ghc-pkg').resolve()
-        self.ghc_pkg_path = (self.ghc_path.parent / 'ghc-pkg').resolve()
+        self.ghc_pkg_path = (self.ghc_path.parent / ('ghc-pkg' + exe)).resolve()
         if not self.ghc_pkg_path.is_file():
             raise TypeError(f'ghc-pkg {self.ghc_pkg_path} is not a file')
-        self.hsc2hs_path = (self.ghc_path.parent / 'hsc2hs').resolve()
+        self.hsc2hs_path = (self.ghc_path.parent / ('hsc2hs' + exe)).resolve()
         if not self.hsc2hs_path.is_file():
             raise TypeError(f'hsc2hs {self.hsc2hs_path} is not a file')
 
@@ -118,36 +133,6 @@ def verify_sha256(expected_hash: SHA256Hash, f: Path):
     if h != expected_hash:
         raise BadTarball(f, expected_hash, h)
 
-def fetch_package(package: PackageName,
-                  version: Version,
-                  src_sha256: SHA256Hash,
-                  revision: Optional[int],
-                  cabal_sha256: Optional[SHA256Hash],
-                  ) -> (Path, Path):
-    import urllib.request
-
-    # Download source distribution
-    tarball = TARBALLS / f'{package}-{version}.tar.gz'
-    if not tarball.exists():
-        print(f'Fetching {package}-{version}...')
-        tarball.parent.mkdir(parents=True, exist_ok=True)
-        url = package_url(package, version)
-        with urllib.request.urlopen(url) as resp:
-            shutil.copyfileobj(resp, tarball.open('wb'))
-
-    verify_sha256(src_sha256, tarball)
-
-    # Download revised cabal file
-    cabal_file = TARBALLS / f'{package}.cabal'
-    if revision is not None and not cabal_file.exists():
-        assert cabal_sha256 is not None
-        url = package_cabal_url(package, version, revision)
-        with urllib.request.urlopen(url) as resp:
-            shutil.copyfileobj(resp, cabal_file.open('wb'))
-            verify_sha256(cabal_sha256, cabal_file)
-
-    return (tarball, cabal_file)
-
 def read_bootstrap_info(path: Path) -> BootstrapInfo:
     obj = json.load(path.open())
 
@@ -169,13 +154,15 @@ def check_builtin(dep: BuiltinDep, ghc: Compiler) -> None:
     print(f'Using {dep.package}-{dep.version} from GHC...')
     return
 
-def install_dep(dep: BootstrapDep, ghc: Compiler) -> None:
-    dist_dir = (DISTDIR / f'{dep.package}-{dep.version}').resolve()
-
+def resolve_dep(dep : BootstrapDep) -> Path:
     if dep.source == PackageSource.HACKAGE:
-        assert dep.src_sha256 is not None
-        (tarball, cabal_file) = fetch_package(dep.package, dep.version, dep.src_sha256,
-                                dep.revision, dep.cabal_sha256)
+
+        tarball = TARBALLS / f'{dep.package}-{dep.version}.tar.gz'
+        verify_sha256(dep.src_sha256, tarball)
+
+        cabal_file = TARBALLS / f'{dep.package}.cabal'
+        verify_sha256(dep.cabal_sha256, cabal_file)
+
         UNPACKED.mkdir(parents=True, exist_ok=True)
         shutil.unpack_archive(tarball.resolve(), UNPACKED, 'gztar')
         sdist_dir = UNPACKED / f'{dep.package}-{dep.version}'
@@ -191,16 +178,16 @@ def install_dep(dep: BootstrapDep, ghc: Compiler) -> None:
                 f.write('main = defaultMain\n')
 
     elif dep.source == PackageSource.LOCAL:
-        if dep.package == 'Cabal':
-            sdist_dir = Path('Cabal').resolve()
-        elif dep.package == 'Cabal-syntax':
-            sdist_dir = Path('Cabal-syntax').resolve()
-        elif dep.package == 'cabal-install-solver':
-            sdist_dir = Path('cabal-install-solver').resolve()
-        elif dep.package == 'cabal-install':
-            sdist_dir = Path('cabal-install').resolve()
+        if dep.package in local_packages:
+            sdist_dir = Path(dep.package).resolve()
         else:
             raise ValueError(f'Unknown local package {dep.package}')
+    return sdist_dir
+
+def install_dep(dep: BootstrapDep, ghc: Compiler) -> None:
+    dist_dir = (DISTDIR / f'{dep.package}-{dep.version}').resolve()
+
+    sdist_dir = resolve_dep(dep)
 
     install_sdist(dist_dir, sdist_dir, ghc, dep.flags)
 
@@ -307,7 +294,7 @@ def archive_name(cabalversion):
 
     return f'cabal-install-{cabalversion}-{machine}-{version}'
 
-def make_archive(cabal_path):
+def make_distribution_archive(cabal_path):
     import tempfile
 
     print(f'Creating distribution tarball')
@@ -334,28 +321,62 @@ def make_archive(cabal_path):
 
     return archivename
 
+def fetch_from_plan(plan : FetchPlan, output_dir : Path):
+  output_dir.resolve()
+  output_dir.mkdir(parents=True, exist_ok=True)
+
+  for path in plan:
+    output_path = output_dir / path
+    url = plan[path].url
+    sha = plan[path].sha256
+    if not output_path.exists():
+      print(f'Fetching {url}...')
+      with urllib.request.urlopen(url) as resp:
+        shutil.copyfileobj(resp, output_path.open('wb'))
+    verify_sha256(sha, output_path)
+
+def gen_fetch_plan(info : BootstrapInfo) -> FetchPlan :
+    sources_dict = {}
+    for dep in info.dependencies:
+      if not(dep.package in local_packages):
+        sources_dict[f"{dep.package}-{dep.version}.tar.gz"] = FetchInfo(package_url(dep.package, dep.version), dep.src_sha256)
+        if dep.revision is not None:
+          sources_dict[f"{dep.package}.cabal"] = FetchInfo(package_cabal_url(dep.package, dep.version, dep.revision), dep.cabal_sha256)
+    return sources_dict
+
+def find_ghc(compiler) -> Compiler:
+  if compiler is None:
+      path = shutil.which('ghc')
+      if path is None:
+          raise ValueError("Couldn't find ghc in PATH")
+      ghc = Compiler(Path(path))
+  else:
+      ghc = Compiler(compiler)
+  return ghc
+
 def main() -> None:
-    import argparse
     parser = argparse.ArgumentParser(
         description="bootstrapping utility for cabal-install.",
         epilog = USAGE,
         formatter_class = argparse.RawDescriptionHelpFormatter)
-    parser.add_argument('-d', '--deps', type=Path, default='bootstrap-deps.json',
+    parser.add_argument('-d', '--deps', type=Path,
                         help='bootstrap dependency file')
     parser.add_argument('-w', '--with-compiler', type=Path,
                         help='path to GHC')
-    args = parser.parse_args()
+    parser.add_argument('-s', '--bootstrap-sources', type=Path,
+                        help='path to prefetched bootstrap sources archive')
+    parser.add_argument('--archive', dest='want_archive', action='store_true')
+    parser.add_argument('--no-archive', dest='want_archive', action='store_false')
+    parser.set_defaults(want_archive=True)
 
-    # Find compiler
-    if args.with_compiler is None:
-        path = shutil.which('ghc')
-        if path is None:
-            raise ValueError("Couldn't find ghc in PATH")
-        ghc = Compiler(Path(path))
-    else:
-        ghc = Compiler(args.with_compiler)
+    subparsers = parser.add_subparsers(dest="command")
 
-    print(f'Bootstrapping cabal-install with GHC {ghc.version} at {ghc.ghc_path}...')
+    parser_fetch = subparsers.add_parser('build', help='build cabal-install (default)')
+
+    parser_fetch = subparsers.add_parser('fetch', help='fetch all required sources from Hackage (for offline builds)')
+    parser_fetch.add_argument('-o','--output', type=Path, default='bootstrap-sources')
+
+    args = parser.parse_args()
 
     print(dedent("""
         DO NOT use this script if you have another recent cabal-install available.
@@ -363,26 +384,82 @@ def main() -> None:
         architectures.
     """))
 
+    ghc = find_ghc(args.with_compiler)
+
+    sources_fmt = 'gztar'
+    if platform.system() == 'Windows': sources_fmt = 'zip'
+
+    if args.deps is None:
+      # We have a tarball with all the required information, unpack it
+      if args.bootstrap_sources is not None:
+        print(f'Unpacking {args.bootstrap_sources} to {TARBALLS}')
+        shutil.unpack_archive(args.bootstrap_sources.resolve(), TARBALLS, sources_fmt)
+        args.deps = TARBALLS / 'plan-bootstrap.json'
+        print(f"using plan-bootstrap.json ({args.deps}) from {args.bootstrap_sources}")
+      else:
+        print("The bootstrap script requires a bootstrap plan JSON file.")
+        print("See bootstrap/README.md for more information.")
+        sys.exit(1)
+
     info = read_bootstrap_info(args.deps)
-    bootstrap(info, ghc)
-    cabal_path = (BINDIR / 'cabal').resolve()
 
-    archive = make_archive(cabal_path)
+    if args.command == 'fetch':
+        plan = gen_fetch_plan(info)
+
+        print(f'Fetching sources to bootstrap cabal-install with GHC {ghc.version} at {ghc.ghc_path}...')
+
+        # In temporary directory, create a directory which we will archive
+        tmpdir = TMPDIR.resolve()
+        tmpdir.mkdir(parents=True, exist_ok=True)
+
+        rootdir = Path(tempfile.mkdtemp(dir=tmpdir))
+
+        fetch_from_plan(plan, rootdir)
+
+        shutil.copyfile(args.deps, rootdir / 'plan-bootstrap.json')
+
+        archivename = shutil.make_archive(args.output, sources_fmt, root_dir=rootdir)
+
+        print(dedent(f"""
+            Bootstrap sources saved to {archivename}
+
+            Use these with the command:
+
+            bootstrap.py -w {ghc.ghc_path} -s {archivename}
+            """))
+
+    else: # 'build' command (default behaviour)
+
+        print(f'Bootstrapping cabal-install with GHC {ghc.version} at {ghc.ghc_path}...')
+
+        if args.bootstrap_sources is None:
+          plan = gen_fetch_plan(info)
+          fetch_from_plan(plan, TARBALLS)
+
+        bootstrap(info, ghc)
+        cabal_path = (BINDIR / 'cabal').resolve()
+
+        print(dedent(f'''
+            Bootstrapping finished!
 
-    print(dedent(f'''
-        Bootstrapping finished!
+            The resulting cabal-install executable can be found at
 
-        The resulting cabal-install executable can be found at
+              {cabal_path}
+            '''))
 
-            {cabal_path}
+        if args.want_archive:
+            dist_archive = make_distribution_archive(cabal_path)
 
-        It have been archived for distribution in
+            print(dedent(f'''
+                The cabal-install executable has been archived for distribution in
 
-            {archive}
+                  {dist_archive}
+                '''))
 
-        You now should use this to build a full cabal-install distribution
-        using v2-build.
-    '''))
+        print(dedent(f'''
+            You now should use this to build a full cabal-install distribution
+            using v2-build.
+            '''))
 
 def subprocess_run(args, **kwargs):
     "Like subprocess.run, but also print what we run"
diff --git a/changelog.d/pr-8368 b/changelog.d/pr-8368
new file mode 100644
index 0000000000..cf74efb48d
--- /dev/null
+++ b/changelog.d/pr-8368
@@ -0,0 +1,10 @@
+synopsis: Allow offline bootstrapping of cabal-install
+prs: #8368
+packages: cabal-install
+
+description: {
+
+- The bootstrap script for cabal-install now supports fetching the sources of the dependencies in a separate step.
+  One can then copy over the resulting archive and perform offline bootstrapping of cabal-install.
+
+}
\ No newline at end of file
-- 
GitLab