From 3f92996570f10fe13bd052344f7412500aa8736a Mon Sep 17 00:00:00 2001 From: sheaf <sam.derbyshire@gmail.com> Date: Wed, 17 Aug 2022 13:22:02 +0200 Subject: [PATCH] Allow offline bootstrapping of cabal-install This ports to cabal-install the offline bootstrapping logic which was introduced for Hadrian in GHC MR !6315. This adds a "fetch" command to the bootstrap script, which fetches all the dependency tarballs from Hackage, to be used in an offline build. See bootstrap/README.md for further information. --- .github/workflows/bootstrap.yml | 8 +- bootstrap/README.md | 15 ++- bootstrap/bootstrap.py | 225 +++++++++++++++++++++----------- changelog.d/pr-8368 | 10 ++ 4 files changed, 179 insertions(+), 79 deletions(-) create mode 100644 changelog.d/pr-8368 diff --git a/.github/workflows/bootstrap.yml b/.github/workflows/bootstrap.yml index 778cd0aac1..5b0b2d452c 100644 --- a/.github/workflows/bootstrap.yml +++ b/.github/workflows/bootstrap.yml @@ -32,8 +32,12 @@ jobs: GHC_VERSION=${{ matrix.ghc }} ghcup config set cache true ghcup install ghc $GHC_VERSION - # We use linux dependencies also on macos - python3 bootstrap/bootstrap.py -w $(ghcup whereis ghc $GHC_VERSION) -d bootstrap/linux-$GHC_VERSION.json + + # Fetch the bootstrap sources (we use linux dependencies also on macos) + python3 bootstrap/bootstrap.py -w $(ghcup whereis ghc $GHC_VERSION) -d bootstrap/linux-$GHC_VERSION.json fetch + + # Bootstrap using the bootstrap sources + python3 bootstrap/bootstrap.py -w $(ghcup whereis ghc $GHC_VERSION) --bootstrap-sources bootstrap-sources.tar.gz - name: Smoke test run: | diff --git a/bootstrap/README.md b/bootstrap/README.md index 79bcb69bd6..80cc8a463a 100644 --- a/bootstrap/README.md +++ b/bootstrap/README.md @@ -5,15 +5,24 @@ on a new platform. If you already have a functional (if dated) cabal-install please rather run `cabal v2-install`. The typical usage is porting to a new linux architecture, -then the `linux-$GHCVER.json` file is available in `bootstrap/` folder: +then the `linux-{ghc-ver}.json` file is available in the `bootstrap/` folder: On a (linux) system you are bootstrapping, run - ./bootstrap/bootstrap.py -d ./bootstrap/linux-$GHCVER.json -w /path/to-ghc + ./bootstrap/bootstrap.py -d ./bootstrap/linux-ghcver.json -w /path/to-ghc from the top directory of the source checkout. -To generate the `$PLATFORM-$GHCVER` files for other platforms, do: +For offline builds, you can first run + + ./bootstrap/bootstrap.py -d ./bootstrap/linux-ghcver.json -w /path/to-ghc fetch + +to fetch tarballs for all the dependencies. These can then be used by a further +bootstrap command by way of the `--bootstrap-sources` argument: + + ./bootstrap/bootstrap.py -w /path/to-ghc --bootstrap-sources bootstrap-sources.tar.gz + +To generate the `platform-{ghc-ver}` files for other platforms, do: 1. On a system with functional cabal-install, install the same GHC version as you will use to bootstrap on the host system. diff --git a/bootstrap/bootstrap.py b/bootstrap/bootstrap.py index 4145ea4adc..82b792e1e4 100755 --- a/bootstrap/bootstrap.py +++ b/bootstrap/bootstrap.py @@ -13,17 +13,20 @@ on a new platform. If you already have a functional (if dated) cabal-install please rather run `cabal v2-install .`. """ +import argparse from enum import Enum import hashlib -import logging import json from pathlib import Path import platform import shutil import subprocess +import sys +import tempfile +import urllib.request from textwrap import dedent -from typing import Set, Optional, Dict, List, Tuple, \ - NewType, BinaryIO, NamedTuple, TypeVar +from typing import Optional, Dict, List, Tuple, \ + NewType, BinaryIO, NamedTuple #logging.basicConfig(level=logging.INFO) @@ -68,6 +71,15 @@ BootstrapInfo = NamedTuple('BootstrapInfo', [ ('dependencies', List[BootstrapDep]), ]) +FetchInfo = NamedTuple('FetchInfo', [ + ('url', str), + ('sha256', SHA256Hash) +]) + +FetchPlan = Dict[Path, FetchInfo] + +local_packages: List[PackageName] = ["Cabal-syntax", "Cabal", "cabal-install-solver", "cabal-install"] + class Compiler: def __init__(self, ghc_path: Path): if not ghc_path.is_file(): @@ -75,14 +87,17 @@ class Compiler: self.ghc_path = ghc_path.resolve() + exe = '' + if platform.system() == 'Windows': exe = '.exe' + info = self._get_ghc_info() self.version = info['Project version'] #self.lib_dir = Path(info['LibDir']) #self.ghc_pkg_path = (self.lib_dir / 'bin' / 'ghc-pkg').resolve() - self.ghc_pkg_path = (self.ghc_path.parent / 'ghc-pkg').resolve() + self.ghc_pkg_path = (self.ghc_path.parent / ('ghc-pkg' + exe)).resolve() if not self.ghc_pkg_path.is_file(): raise TypeError(f'ghc-pkg {self.ghc_pkg_path} is not a file') - self.hsc2hs_path = (self.ghc_path.parent / 'hsc2hs').resolve() + self.hsc2hs_path = (self.ghc_path.parent / ('hsc2hs' + exe)).resolve() if not self.hsc2hs_path.is_file(): raise TypeError(f'hsc2hs {self.hsc2hs_path} is not a file') @@ -118,36 +133,6 @@ def verify_sha256(expected_hash: SHA256Hash, f: Path): if h != expected_hash: raise BadTarball(f, expected_hash, h) -def fetch_package(package: PackageName, - version: Version, - src_sha256: SHA256Hash, - revision: Optional[int], - cabal_sha256: Optional[SHA256Hash], - ) -> (Path, Path): - import urllib.request - - # Download source distribution - tarball = TARBALLS / f'{package}-{version}.tar.gz' - if not tarball.exists(): - print(f'Fetching {package}-{version}...') - tarball.parent.mkdir(parents=True, exist_ok=True) - url = package_url(package, version) - with urllib.request.urlopen(url) as resp: - shutil.copyfileobj(resp, tarball.open('wb')) - - verify_sha256(src_sha256, tarball) - - # Download revised cabal file - cabal_file = TARBALLS / f'{package}.cabal' - if revision is not None and not cabal_file.exists(): - assert cabal_sha256 is not None - url = package_cabal_url(package, version, revision) - with urllib.request.urlopen(url) as resp: - shutil.copyfileobj(resp, cabal_file.open('wb')) - verify_sha256(cabal_sha256, cabal_file) - - return (tarball, cabal_file) - def read_bootstrap_info(path: Path) -> BootstrapInfo: obj = json.load(path.open()) @@ -169,13 +154,15 @@ def check_builtin(dep: BuiltinDep, ghc: Compiler) -> None: print(f'Using {dep.package}-{dep.version} from GHC...') return -def install_dep(dep: BootstrapDep, ghc: Compiler) -> None: - dist_dir = (DISTDIR / f'{dep.package}-{dep.version}').resolve() - +def resolve_dep(dep : BootstrapDep) -> Path: if dep.source == PackageSource.HACKAGE: - assert dep.src_sha256 is not None - (tarball, cabal_file) = fetch_package(dep.package, dep.version, dep.src_sha256, - dep.revision, dep.cabal_sha256) + + tarball = TARBALLS / f'{dep.package}-{dep.version}.tar.gz' + verify_sha256(dep.src_sha256, tarball) + + cabal_file = TARBALLS / f'{dep.package}.cabal' + verify_sha256(dep.cabal_sha256, cabal_file) + UNPACKED.mkdir(parents=True, exist_ok=True) shutil.unpack_archive(tarball.resolve(), UNPACKED, 'gztar') sdist_dir = UNPACKED / f'{dep.package}-{dep.version}' @@ -191,16 +178,16 @@ def install_dep(dep: BootstrapDep, ghc: Compiler) -> None: f.write('main = defaultMain\n') elif dep.source == PackageSource.LOCAL: - if dep.package == 'Cabal': - sdist_dir = Path('Cabal').resolve() - elif dep.package == 'Cabal-syntax': - sdist_dir = Path('Cabal-syntax').resolve() - elif dep.package == 'cabal-install-solver': - sdist_dir = Path('cabal-install-solver').resolve() - elif dep.package == 'cabal-install': - sdist_dir = Path('cabal-install').resolve() + if dep.package in local_packages: + sdist_dir = Path(dep.package).resolve() else: raise ValueError(f'Unknown local package {dep.package}') + return sdist_dir + +def install_dep(dep: BootstrapDep, ghc: Compiler) -> None: + dist_dir = (DISTDIR / f'{dep.package}-{dep.version}').resolve() + + sdist_dir = resolve_dep(dep) install_sdist(dist_dir, sdist_dir, ghc, dep.flags) @@ -307,7 +294,7 @@ def archive_name(cabalversion): return f'cabal-install-{cabalversion}-{machine}-{version}' -def make_archive(cabal_path): +def make_distribution_archive(cabal_path): import tempfile print(f'Creating distribution tarball') @@ -334,28 +321,62 @@ def make_archive(cabal_path): return archivename +def fetch_from_plan(plan : FetchPlan, output_dir : Path): + output_dir.resolve() + output_dir.mkdir(parents=True, exist_ok=True) + + for path in plan: + output_path = output_dir / path + url = plan[path].url + sha = plan[path].sha256 + if not output_path.exists(): + print(f'Fetching {url}...') + with urllib.request.urlopen(url) as resp: + shutil.copyfileobj(resp, output_path.open('wb')) + verify_sha256(sha, output_path) + +def gen_fetch_plan(info : BootstrapInfo) -> FetchPlan : + sources_dict = {} + for dep in info.dependencies: + if not(dep.package in local_packages): + sources_dict[f"{dep.package}-{dep.version}.tar.gz"] = FetchInfo(package_url(dep.package, dep.version), dep.src_sha256) + if dep.revision is not None: + sources_dict[f"{dep.package}.cabal"] = FetchInfo(package_cabal_url(dep.package, dep.version, dep.revision), dep.cabal_sha256) + return sources_dict + +def find_ghc(compiler) -> Compiler: + if compiler is None: + path = shutil.which('ghc') + if path is None: + raise ValueError("Couldn't find ghc in PATH") + ghc = Compiler(Path(path)) + else: + ghc = Compiler(compiler) + return ghc + def main() -> None: - import argparse parser = argparse.ArgumentParser( description="bootstrapping utility for cabal-install.", epilog = USAGE, formatter_class = argparse.RawDescriptionHelpFormatter) - parser.add_argument('-d', '--deps', type=Path, default='bootstrap-deps.json', + parser.add_argument('-d', '--deps', type=Path, help='bootstrap dependency file') parser.add_argument('-w', '--with-compiler', type=Path, help='path to GHC') - args = parser.parse_args() + parser.add_argument('-s', '--bootstrap-sources', type=Path, + help='path to prefetched bootstrap sources archive') + parser.add_argument('--archive', dest='want_archive', action='store_true') + parser.add_argument('--no-archive', dest='want_archive', action='store_false') + parser.set_defaults(want_archive=True) - # Find compiler - if args.with_compiler is None: - path = shutil.which('ghc') - if path is None: - raise ValueError("Couldn't find ghc in PATH") - ghc = Compiler(Path(path)) - else: - ghc = Compiler(args.with_compiler) + subparsers = parser.add_subparsers(dest="command") - print(f'Bootstrapping cabal-install with GHC {ghc.version} at {ghc.ghc_path}...') + parser_fetch = subparsers.add_parser('build', help='build cabal-install (default)') + + parser_fetch = subparsers.add_parser('fetch', help='fetch all required sources from Hackage (for offline builds)') + parser_fetch.add_argument('-o','--output', type=Path, default='bootstrap-sources') + + args = parser.parse_args() print(dedent(""" DO NOT use this script if you have another recent cabal-install available. @@ -363,26 +384,82 @@ def main() -> None: architectures. """)) + ghc = find_ghc(args.with_compiler) + + sources_fmt = 'gztar' + if platform.system() == 'Windows': sources_fmt = 'zip' + + if args.deps is None: + # We have a tarball with all the required information, unpack it + if args.bootstrap_sources is not None: + print(f'Unpacking {args.bootstrap_sources} to {TARBALLS}') + shutil.unpack_archive(args.bootstrap_sources.resolve(), TARBALLS, sources_fmt) + args.deps = TARBALLS / 'plan-bootstrap.json' + print(f"using plan-bootstrap.json ({args.deps}) from {args.bootstrap_sources}") + else: + print("The bootstrap script requires a bootstrap plan JSON file.") + print("See bootstrap/README.md for more information.") + sys.exit(1) + info = read_bootstrap_info(args.deps) - bootstrap(info, ghc) - cabal_path = (BINDIR / 'cabal').resolve() - archive = make_archive(cabal_path) + if args.command == 'fetch': + plan = gen_fetch_plan(info) + + print(f'Fetching sources to bootstrap cabal-install with GHC {ghc.version} at {ghc.ghc_path}...') + + # In temporary directory, create a directory which we will archive + tmpdir = TMPDIR.resolve() + tmpdir.mkdir(parents=True, exist_ok=True) + + rootdir = Path(tempfile.mkdtemp(dir=tmpdir)) + + fetch_from_plan(plan, rootdir) + + shutil.copyfile(args.deps, rootdir / 'plan-bootstrap.json') + + archivename = shutil.make_archive(args.output, sources_fmt, root_dir=rootdir) + + print(dedent(f""" + Bootstrap sources saved to {archivename} + + Use these with the command: + + bootstrap.py -w {ghc.ghc_path} -s {archivename} + """)) + + else: # 'build' command (default behaviour) + + print(f'Bootstrapping cabal-install with GHC {ghc.version} at {ghc.ghc_path}...') + + if args.bootstrap_sources is None: + plan = gen_fetch_plan(info) + fetch_from_plan(plan, TARBALLS) + + bootstrap(info, ghc) + cabal_path = (BINDIR / 'cabal').resolve() + + print(dedent(f''' + Bootstrapping finished! - print(dedent(f''' - Bootstrapping finished! + The resulting cabal-install executable can be found at - The resulting cabal-install executable can be found at + {cabal_path} + ''')) - {cabal_path} + if args.want_archive: + dist_archive = make_distribution_archive(cabal_path) - It have been archived for distribution in + print(dedent(f''' + The cabal-install executable has been archived for distribution in - {archive} + {dist_archive} + ''')) - You now should use this to build a full cabal-install distribution - using v2-build. - ''')) + print(dedent(f''' + You now should use this to build a full cabal-install distribution + using v2-build. + ''')) def subprocess_run(args, **kwargs): "Like subprocess.run, but also print what we run" diff --git a/changelog.d/pr-8368 b/changelog.d/pr-8368 new file mode 100644 index 0000000000..cf74efb48d --- /dev/null +++ b/changelog.d/pr-8368 @@ -0,0 +1,10 @@ +synopsis: Allow offline bootstrapping of cabal-install +prs: #8368 +packages: cabal-install + +description: { + +- The bootstrap script for cabal-install now supports fetching the sources of the dependencies in a separate step. + One can then copy over the resulting archive and perform offline bootstrapping of cabal-install. + +} \ No newline at end of file -- GitLab