diff --git a/testsuite/driver/perf_notes.py b/testsuite/driver/perf_notes.py index 3e4aab6c05af2c6766f875879dec0a993f026cf2..edf32e1b46dc00db40fdd5dceb87be445ab55b2c 100644 --- a/testsuite/driver/perf_notes.py +++ b/testsuite/driver/perf_notes.py @@ -86,6 +86,11 @@ PerfStat = NamedTuple('PerfStat', [('test_env', TestEnv), Baseline = NamedTuple('Baseline', [('perfStat', PerfStat), ('commit', GitHash)]) +# The type of exceptions which are thrown when computing the current stat value +# fails. +class StatsException(Exception): + pass + class MetricChange(Enum): # The metric appears to have no baseline and is presumably a new test. NewMetric = 'NewMetric' @@ -123,11 +128,6 @@ AllowedPerfChange = NamedTuple('AllowedPerfChange', ('opts', Dict[str, str]) ]) -MetricBaselineOracle = Callable[[WayName, GitHash], Baseline] -MetricDeviationOracle = Callable[[WayName, GitHash], Optional[float]] -MetricOracles = NamedTuple("MetricOracles", [("baseline", MetricBaselineOracle), - ("deviation", MetricDeviationOracle)]) - def parse_perf_stat(stat_str: str) -> PerfStat: field_vals = stat_str.strip('\t').split('\t') stat = PerfStat(*field_vals) # type: ignore diff --git a/testsuite/driver/testglobals.py b/testsuite/driver/testglobals.py index 611d531faf5811de34e205139db64537dfc1d94d..36f18a8c7f90b23279b90c2ab611501334e6e240 100644 --- a/testsuite/driver/testglobals.py +++ b/testsuite/driver/testglobals.py @@ -4,7 +4,7 @@ from my_typing import * from pathlib import Path -from perf_notes import MetricChange, PerfStat, Baseline, MetricOracles, GitRef +from perf_notes import MetricChange, PerfStat, Baseline, GitRef from datetime import datetime # ----------------------------------------------------------------------------- @@ -378,24 +378,20 @@ class TestOptions: # extra files to copy to the testdir self.extra_files = [] # type: List[str] - # Map from metric to (function from way and commit to baseline value, allowed percentage deviation) e.g. - # { 'bytes allocated': ( - # lambda way commit: - # ... - # if way1: return None ... - # elif way2:return 9300000000 ... - # ... - # , 10) } - # This means no baseline is available for way1. For way 2, allow a 10% - # deviation from 9300000000. - self.stats_range_fields = {} # type: Dict[MetricName, MetricOracles] - # Is the test testing performance? self.is_stats_test = False # Does this test the compiler's performance as opposed to the generated code. self.is_compiler_stats_test = False + # Map from metric to information about that metric + # { metric: { "deviation": <int> + # The action to run to get the current value of the test + # , "current": lambda way: <Int> + # The action to run to get the baseline value of the test + # , "baseline": lambda way, commit: baseline value } } + self.generic_stats_test: Dict = {} # Dict + # should we run this test alone, i.e. not run it in parallel with # any other threads self.alone = False diff --git a/testsuite/driver/testlib.py b/testsuite/driver/testlib.py index e4bb4154a4fa95480ff066fdad6f8399110dcdab..c23757ebfc52bc6a6c6ac65320c29a3341764869 100644 --- a/testsuite/driver/testlib.py +++ b/testsuite/driver/testlib.py @@ -28,7 +28,7 @@ from term_color import Color, colored import testutil from cpu_features import have_cpu_feature import perf_notes as Perf -from perf_notes import MetricChange, PerfStat, MetricOracles +from perf_notes import MetricChange, PerfStat, StatsException extra_src_files = {'T4198': ['exitminus1.c']} # TODO: See #12223 from my_typing import * @@ -99,6 +99,10 @@ def isCompilerStatsTest() -> bool: opts = getTestOpts() return bool(opts.is_compiler_stats_test) +def isGenericStatsTest() -> bool: + opts = getTestOpts() + return bool(opts.generic_stats_test) + def isStatsTest() -> bool: opts = getTestOpts() return opts.is_stats_test @@ -599,6 +603,44 @@ def extra_files(files): def _extra_files(name, opts, files): opts.extra_files.extend(files) +# Record the size of a specific file +def collect_size ( deviation, path ): + return collect_generic_stat ( 'size', deviation, lambda way: os.path.getsize(in_testdir(path)) ) + +# Read a number from a specific file +def stat_from_file ( metric, deviation, path ): + def read_file (way): + with open(in_testdir(path)) as f: + return int(f.read()) + return collect_generic_stat ( metric, deviation, read_file ) + + +# Define a set of generic stat tests +def collect_generic_stats ( metric_info ): + def f(name, opts, f=metric_info): + return _collect_generic_stat(name, opts, metric_info) + return f + +# Define the a generic stat test, which computes the statistic by calling the function +# given as the third argument. +def collect_generic_stat ( metric, deviation, get_stat ): + return collect_generic_stats ( { metric: { 'deviation': deviation, 'current': get_stat } } ) + +def _collect_generic_stat(name : TestName, opts, metric_infos): + + + # Add new stats to the stat list + opts.generic_stats_test.update(metric_infos) + + # Add the way to determine the baseline + for (metric, info) in metric_infos.items(): + def baselineByWay(way, target_commit, metric=metric): + return Perf.baseline_metric( \ + target_commit, name, config.test_env, metric, way, \ + config.baseline_commit ) + opts.generic_stats_test[metric]["baseline"] = baselineByWay + + # ----- # Defaults to "test everything, and only break on extreme cases" @@ -619,11 +661,14 @@ def _extra_files(name, opts, files): def collect_compiler_stats(metric='all',deviation=20): def f(name, opts, m=metric, d=deviation): no_lint(name, opts) - return _collect_stats(name, opts, m, d, True) + return _collect_stats(name, opts, m, d, None, True) return f -def collect_stats(metric='all', deviation=20): - return lambda name, opts, m=metric, d=deviation: _collect_stats(name, opts, m, d) +def collect_stats(metric='all', deviation=20, static_stats_file=None): + return lambda name, opts, m=metric, d=deviation, s=static_stats_file: _collect_stats(name, opts, m, d, s) + +def statsFile(comp_test: bool, name: str) -> str: + return name + ('.comp' if comp_test else '') + '.stats' # This is an internal function that is used only in the implementation. # 'is_compiler_stats_test' is somewhat of an unfortunate name. @@ -631,7 +676,7 @@ def collect_stats(metric='all', deviation=20): # measures the performance numbers of the compiler. # As this is a fairly rare case in the testsuite, it defaults to false to # indicate that it is a 'normal' performance test. -def _collect_stats(name: TestName, opts, metrics, deviation, is_compiler_stats_test=False): +def _collect_stats(name: TestName, opts, metrics, deviation, static_stats_file, is_compiler_stats_test=False): if not re.match('^[0-9]*[a-zA-Z][a-zA-Z0-9._-]*$', name): failBecause('This test has an invalid name.') @@ -664,15 +709,41 @@ def _collect_stats(name: TestName, opts, metrics, deviation, is_compiler_stats_t # The nonmoving collector does not support -G1 _omit_ways(name, opts, [WayName(name) for name in ['nonmoving', 'nonmoving_thr', 'nonmoving_thr_ghc']]) + # How to read the result of the performance test + def read_stats_file(way, metric_name): + # Confusingly compile time ghci tests are actually runtime tests, so we have + # to go and look for the name.stats file rather than name.comp.stats file. + compiler_stats_test = is_compiler_stats_test and not (way == "ghci" or way == "ghci-opt") + + if static_stats_file: + stats_file = in_statsdir(static_stats_file) + else: + stats_file = Path(in_testdir(statsFile(compiler_stats_test, name))) + + + try: + stats_file_contents = stats_file.read_text() + except IOError as e: + raise StatsException(str(e)) + field_match = re.search('\\("' + metric_name + '", "([0-9]+)"\\)', stats_file_contents) + if field_match is None: + print('Failed to find metric: ', metric_name) + raise StatsException("No such metric") + else: + val = field_match.group(1) + assert val is not None + return int(val) + + + collect_stat = {} for metric_name in metrics: + def action_generator(mn): + return lambda way: read_stats_file(way, mn) metric = '{}/{}'.format(tag, metric_name) - def baselineByWay(way, target_commit, metric=metric): - return Perf.baseline_metric( \ - target_commit, name, config.test_env, metric, way, \ - config.baseline_commit ) + collect_stat[metric] = { "deviation": deviation + , "current": action_generator(metric_name) } - opts.stats_range_fields[metric] = MetricOracles(baseline=baselineByWay, - deviation=deviation) + _collect_generic_stat(name, opts, collect_stat) # ----- @@ -1581,6 +1652,11 @@ async def do_compile(name: TestName, diff_file_name.unlink() return failBecause('stderr mismatch', stderr=stderr) + opts = getTestOpts() + if isGenericStatsTest(): + statsResult = check_generic_stats(TestName(name), way, opts.generic_stats_test) + if badResult(statsResult): + return statsResult # no problems found, this test passed return passed() @@ -1717,13 +1793,9 @@ async def multi_compile_and_run( name, way, top_mod, extra_mods, extra_hc_opts ) async def warn_and_run( name, way, extra_hc_opts ): return await compile_and_run__( name, way, None, [], extra_hc_opts, compile_stderr = True) -def stats( name, way, stats_file ): - opts = getTestOpts() - return check_stats(name, way, in_testdir(stats_file), opts.stats_range_fields) - -async def static_stats( name, way, stats_file ): +async def static_stats( name, way ): opts = getTestOpts() - return check_stats(name, way, in_statsdir(stats_file), opts.stats_range_fields) + return check_generic_stats(name, way, opts.generic_stats_test) def metric_dict(name, way, metric, value) -> PerfStat: return Perf.PerfStat( @@ -1733,75 +1805,57 @@ def metric_dict(name, way, metric, value) -> PerfStat: metric = metric, value = value) -# ----------------------------------------------------------------------------- -# Check test stats. This prints the results for the user. -# name: name of the test. -# way: the way. -# stats_file: the path of the stats_file containing the stats for the test. -# range_fields: see TestOptions.stats_range_fields -# Returns a pass/fail object. Passes if the stats are within the expected value ranges. -# This prints the results for the user. -def check_stats(name: TestName, - way: WayName, - stats_file: Path, - range_fields: Dict[MetricName, MetricOracles] - ) -> PassFail: + + +def check_generic_stats(name, way, get_stats): + for (metric, gen_stat) in get_stats.items(): + res = report_stats(name, way, metric, gen_stat) + print(res) + if badResult(res): + return res + return passed() + +def report_stats(name, way, metric, gen_stat): + try: + actual_val = gen_stat['current'](way) + # Metrics can exit early by throwing a StatsException with the failure string. + except StatsException as e: + return failBecause(e.args[0]) + head_commit = Perf.commit_hash(GitRef('HEAD')) if Perf.inside_git_repo() else None if head_commit is None: return passed() result = passed() - if range_fields: - try: - stats_file_contents = stats_file.read_text() - except IOError as e: - return failBecause(str(e)) - - for (metric, baseline_and_dev) in range_fields.items(): - # Remove any metric prefix e.g. "runtime/" and "compile_time/" - stat_file_metric = metric.split("/")[-1] - perf_change = None - - field_match = re.search('\\("' + stat_file_metric + '", "([0-9]+)"\\)', stats_file_contents) - if field_match is None: - print('Failed to find metric: ', stat_file_metric) - result = failBecause('no such stats metric') - else: - val = field_match.group(1) - assert val is not None - actual_val = int(val) - - # Store the metric so it can later be stored in a git note. - perf_stat = metric_dict(name, way, metric, actual_val) - - # If this is the first time running the benchmark, then pass. - baseline = baseline_and_dev.baseline(way, head_commit) \ - if Perf.inside_git_repo() else None - if baseline is None: - metric_result = passed() - perf_change = MetricChange.NewMetric - else: - tolerance_dev = baseline_and_dev.deviation - (perf_change, metric_result) = Perf.check_stats_change( - perf_stat, - baseline, - tolerance_dev, - config.allowed_perf_changes, - config.verbose >= 4) - - t.metrics.append(PerfMetric(change=perf_change, stat=perf_stat, baseline=baseline)) - - # If any metric fails then the test fails. - # Note, the remaining metrics are still run so that - # a complete list of changes can be presented to the user. - if not metric_result.passed: - if config.ignore_perf_increases and perf_change == MetricChange.Increase: - metric_result = passed() - elif config.ignore_perf_decreases and perf_change == MetricChange.Decrease: - metric_result = passed() - - result = metric_result - + # Store the metric so it can later be stored in a git note. + perf_stat = metric_dict(name, way, metric, actual_val) + + # If this is the first time running the benchmark, then pass. + baseline = gen_stat['baseline'](way, head_commit) \ + if Perf.inside_git_repo() else None + if baseline is None: + metric_result = passed() + perf_change = MetricChange.NewMetric + else: + (perf_change, metric_result) = Perf.check_stats_change( + perf_stat, + baseline, + gen_stat["deviation"], + config.allowed_perf_changes, + config.verbose >= 4) + + t.metrics.append(PerfMetric(change=perf_change, stat=perf_stat, baseline=baseline)) + + # If any metric fails then the test fails. + # Note, the remaining metrics are still run so that + # a complete list of changes can be presented to the user. + if not metric_result.passed: + if config.ignore_perf_increases and perf_change == MetricChange.Increase: + metric_result = passed() + elif config.ignore_perf_decreases and perf_change == MetricChange.Decrease: + metric_result = passed() + + result = metric_result return result # ----------------------------------------------------------------------------- @@ -1863,8 +1917,8 @@ async def simple_build(name: Union[TestName, str], else: to_do = '-c' # just compile - stats_file = name + '.comp.stats' if isCompilerStatsTest(): + stats_file = statsFile(True, name) # Set a bigger chunk size to reduce variation due to additional under/overflowing # The tests are attempting to test how much work the compiler is doing by proxy of # bytes allocated. The additional allocations caused by stack overflow can cause @@ -1913,10 +1967,6 @@ async def simple_build(name: Union[TestName, str], stderr_contents = actual_stderr_path.read_text(encoding='UTF-8', errors='replace') return failBecause('exit code non-0', stderr=stderr_contents) - if isCompilerStatsTest(): - statsResult = check_stats(TestName(name), way, in_testdir(stats_file), opts.stats_range_fields) - if badResult(statsResult): - return statsResult return passed() @@ -1953,7 +2003,7 @@ async def simple_run(name: TestName, way: WayName, prog: str, extra_run_opts: st # assume we are running a program via ghci. Collect stats stats_file = None # type: Optional[str] if isStatsTest() and (not isCompilerStatsTest() or way == 'ghci' or way == 'ghci-opt'): - stats_file = name + '.stats' + stats_file = statsFile(False, name) stats_args = ' +RTS -V0 -t' + stats_file + ' --machine-readable -RTS' else: stats_args = '' @@ -1999,11 +2049,13 @@ async def simple_run(name: TestName, way: WayName, prog: str, extra_run_opts: st if check_prof and not await check_prof_ok(name, way): return failBecause('bad profile') - # Check runtime stats if desired. - if stats_file is not None: - return check_stats(name, way, in_testdir(stats_file), opts.stats_range_fields) - else: - return passed() + # Check the results of stats tests + if isGenericStatsTest(): + statsResult = check_generic_stats(TestName(name), way, opts.generic_stats_test) + if badResult(statsResult): + return statsResult + + return passed() def rts_flags(way: WayName) -> str: args = config.way_rts_flags.get(way, []) diff --git a/testsuite/tests/count-deps/Makefile b/testsuite/tests/count-deps/Makefile index 41911c47df05800d731e07dbc959079e00f8da41..554157c845da1f2cfaae71a81fb98ada34b4db0d 100644 --- a/testsuite/tests/count-deps/Makefile +++ b/testsuite/tests/count-deps/Makefile @@ -16,8 +16,10 @@ LIBDIR := "`'$(TEST_HC)' $(TEST_HC_OPTS) --print-libdir | tr -d '\r'`" .PHONY: count-deps-parser count-deps-parser: - $(COUNT_DEPS) $(LIBDIR) "GHC.Parser" + $(COUNT_DEPS) $(LIBDIR) "GHC.Parser" | tee out + cat out | tail -n +2 | wc -l > SIZE .PHONY: count-deps-ast count-deps-ast: - $(COUNT_DEPS) $(LIBDIR) "Language.Haskell.Syntax" + $(COUNT_DEPS) $(LIBDIR) "Language.Haskell.Syntax" | tee out + cat out | tail -n +2 | wc -l > SIZE diff --git a/testsuite/tests/count-deps/all.T b/testsuite/tests/count-deps/all.T index a7c31b50c9c57e58a829792b11b0510779e46a93..e5a70820c3927cf72f9d072cc998829e48793494 100644 --- a/testsuite/tests/count-deps/all.T +++ b/testsuite/tests/count-deps/all.T @@ -1,2 +1,2 @@ -test('CountDepsAst', [req_hadrian_deps(["test:count-deps"])], makefile_test, ['count-deps-ast']) -test('CountDepsParser', [req_hadrian_deps(["test:count-deps"])], makefile_test, ['count-deps-parser']) +test('CountDepsAst', [stat_from_file('deps', 100, 'SIZE'), req_hadrian_deps(["test:count-deps"])], makefile_test, ['count-deps-ast']) +test('CountDepsParser', [stat_from_file('deps', 100, 'SIZE'), req_hadrian_deps(["test:count-deps"])], makefile_test, ['count-deps-parser']) diff --git a/testsuite/tests/perf/haddock/all.T b/testsuite/tests/perf/haddock/all.T index e2f3346898cfa6ef68a37a0071c0ac5f446c11a4..9a07635b78d3cc00b4c51dc2b9d1ee51e72a8eaa 100644 --- a/testsuite/tests/perf/haddock/all.T +++ b/testsuite/tests/perf/haddock/all.T @@ -19,21 +19,21 @@ test('haddock.base', [unless(in_tree_compiler(), skip), req_haddock - ,collect_stats('bytes allocated',5) + ,collect_stats('bytes allocated',5, static_stats_file='base.t') ], static_stats, - ['base.t']) + []) test('haddock.Cabal', [unless(in_tree_compiler(), skip), req_haddock - ,collect_stats('bytes allocated',5) + ,collect_stats('bytes allocated',5, static_stats_file='Cabal.t') ], static_stats, - ['Cabal.t']) + []) test('haddock.compiler', [unless(in_tree_compiler(), skip), req_haddock - ,collect_stats('bytes allocated',10) + ,collect_stats('bytes allocated',10, static_stats_file='ghc.t') ], static_stats, - ['ghc.t']) + []) diff --git a/testsuite/tests/perf/size/Makefile b/testsuite/tests/perf/size/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..df71b12da38391c2d550b820253fb5743339440b --- /dev/null +++ b/testsuite/tests/perf/size/Makefile @@ -0,0 +1,7 @@ +TOP=../../.. +include $(TOP)/mk/boilerplate.mk +include $(TOP)/mk/test.mk + +libdir_size: + du -s `$(TEST_HC) --print-libdir` | cut -f1 > SIZE + diff --git a/testsuite/tests/perf/size/all.T b/testsuite/tests/perf/size/all.T new file mode 100644 index 0000000000000000000000000000000000000000..0f507e0964e6753430c9199010df85d046903a90 --- /dev/null +++ b/testsuite/tests/perf/size/all.T @@ -0,0 +1,3 @@ +test('size_hello_obj', [collect_size(3, 'size_hello_obj.o')], compile, ['']) + +test('libdir',[stat_from_file('size', 3, 'SIZE')], makefile_test, ['libdir_size'] ) diff --git a/testsuite/tests/perf/size/size_hello_obj.hs b/testsuite/tests/perf/size/size_hello_obj.hs new file mode 100644 index 0000000000000000000000000000000000000000..9824cf5cc53496a88f5ab53c78944dc07195d498 --- /dev/null +++ b/testsuite/tests/perf/size/size_hello_obj.hs @@ -0,0 +1,3 @@ +module Main where + +main = print "Hello World!"