perf_notes.py 32.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11
#!/usr/bin/env python3

#
# (c) Jared Weakly 2017
#
# This file will be a utility to help facilitate the comparison of performance
# metrics across arbitrary commits. The file will produce a table comparing
# metrics between measurements taken for given commits in the environment
# (which defaults to 'local' if not given by --test-env).
#

Ben Gamari's avatar
Ben Gamari committed
12
from enum import Enum
13 14 15
import colorsys
import tempfile
import json
16 17 18 19
import argparse
import re
import subprocess
import time
20
import sys
21 22 23 24

from collections import namedtuple
from math import ceil, trunc

25
from testutil import passed, failBecause, testing_metrics
26
from term_color import Color, colored
27

28
from my_typing import *
29

30
# Check if "git rev-parse" can be run successfully.
31
# True implies the current directory is a git repo.
32 33
_inside_git_repo_cache = None # type: Optional[bool]
def inside_git_repo() -> bool:
34
    global _inside_git_repo_cache
35
    if _inside_git_repo_cache is None:
36 37 38 39 40 41 42
        try:
            subprocess.check_call(['git', 'rev-parse', 'HEAD'],
                                stdout=subprocess.DEVNULL)
            _inside_git_repo_cache = True
        except subprocess.CalledProcessError:
            _inside_git_repo_cache = False
    return _inside_git_repo_cache
43

44
# Check if the worktree is dirty.
45
def is_worktree_dirty() -> bool:
46 47
    return subprocess.check_output(['git', 'status', '--porcelain']) != b''

48
#
49
# Some data access functions. At the moment this uses git notes.
50 51
#

52 53 54 55 56 57 58 59 60
NoteNamespace = NewType("NoteNamespace", str)

# The git notes namespace for local results.
LocalNamespace = NoteNamespace("perf")

# The git notes namespace for ci results.
CiNamespace = NoteNamespace("ci/" + LocalNamespace)


61 62 63
# The metrics (a.k.a stats) are named tuples, PerfStat, in this form:
#
# ( test_env : 'val',      # Test environment.
64
#   test     : 'val',      # Name of the test
65 66 67 68 69 70
#   way      : 'val',
#   metric   : 'val',      # Metric being recorded
#   value    : 'val',      # The statistic result e.g. runtime
# )

# All the fields of a metric (excluding commit field).
71 72 73 74 75
PerfStat = NamedTuple('PerfStat', [('test_env', TestEnv),
                                   ('test', TestName),
                                   ('way', WayName),
                                   ('metric', MetricName),
                                   ('value', float)])
76

77
# A baseline recovered form stored metrics.
78 79 80
Baseline = NamedTuple('Baseline', [('perfStat', PerfStat),
                                   ('commit', GitHash),
                                   ('commitDepth', int)])
81

Ben Gamari's avatar
Ben Gamari committed
82
class MetricChange(Enum):
83
    # The metric appears to have no baseline and is presumably a new test.
84
    NewMetric = 'NewMetric'
85 86

    # The metric has not changed.
87
    NoChange = 'NoChange'
88 89

    # The metric increased.
90
    Increase = 'Increase'
91 92

    # The metric decreased.
93 94
    Decrease = 'Decrease'

95 96 97 98 99 100 101 102 103
    def __str__(self):
        strings = {
            MetricChange.NewMetric: colored(Color.BLUE,  "new"),
            MetricChange.NoChange:  colored(Color.WHITE, "unchanged"),
            MetricChange.Increase:  colored(Color.RED,   "increased"),
            MetricChange.Decrease:  colored(Color.GREEN, "decreased")
        }
        return strings[self]

104 105 106 107 108 109
AllowedPerfChange = NamedTuple('AllowedPerfChange',
                               [('direction', MetricChange),
                                ('metrics', List[str]),
                                ('opts', Dict[str, str])
                                ])

110 111 112 113 114
MetricBaselineOracle = Callable[[WayName, GitHash], Baseline]
MetricDeviationOracle = Callable[[WayName, GitHash], Optional[float]]
MetricOracles = NamedTuple("MetricOracles", [("baseline", MetricBaselineOracle),
                                             ("deviation", MetricDeviationOracle)])

115
def parse_perf_stat(stat_str: str) -> PerfStat:
116
    field_vals = stat_str.strip('\t').split('\t')
117
    return PerfStat(*field_vals) # type: ignore
118 119 120

# Get all recorded (in a git note) metrics for a given commit.
# Returns an empty array if the note is not found.
121 122
def get_perf_stats(commit: Union[GitRef, GitHash]=GitRef('HEAD'),
                   namespace: NoteNamespace = LocalNamespace
123
                   ) -> List[PerfStat]:
124 125 126 127 128
    try:
        log = subprocess.check_output(['git', 'notes', '--ref=' + namespace, 'show', commit], stderr=subprocess.STDOUT).decode('utf-8')
    except subprocess.CalledProcessError:
        return []

129 130 131 132 133
    return \
        [ parse_perf_stat(stat_str)
          for stat_str in log.strip('\n').split('\n')
          if stat_str != ''
        ]
134

135 136
# Check if a str is in a 40 character git commit hash.
_commit_hash_re = re.compile('[0-9a-f]' * 40)
137
def is_commit_hash(hash: str) -> bool:
138 139 140
    return _commit_hash_re.fullmatch(hash) != None

# Convert a <ref> to a commit hash code.
141
def commit_hash(commit: Union[GitHash, GitRef]) -> GitHash:
142
    if is_commit_hash(commit):
143 144
        return GitHash(commit)
    hash = subprocess.check_output(['git', 'rev-parse', commit], \
145 146 147
            stderr=subprocess.STDOUT) \
            .decode() \
            .strip()
148
    return GitHash(hash)
149 150 151 152 153 154 155 156 157 158

# Get allowed changes to performance. This is extracted from the commit message of
# the given commit in this form:
#     Metric  (Increase | Decrease)  ['metric' | \['metrics',..\]]  [\((test_env|way)='abc',...\)]: TestName01, TestName02, ...
# Returns a *dictionary* from test name to a *list* of items of the form:
#   {
#           'direction': either 'Increase' or 'Decrease,
#           'metrics': ['metricA', 'metricB', ...],
#           'opts': {
#                   'optionA': 'string value',
159
#                   'optionB': 'string value',          # e.g. test_env: "x86_64-linux"
160 161 162
#                   ...
#               }
#   }
163
_get_allowed_perf_changes_cache = {} # type: Dict[GitHash, Dict[TestName, List[AllowedPerfChange]]]
164
def get_allowed_perf_changes(commit: Union[GitRef, GitHash]=GitRef('HEAD')
165
                             ) -> Dict[TestName, List[AllowedPerfChange]]:
166
    global _get_allowed_perf_changes_cache
167 168 169 170 171
    chash = commit_hash(commit)
    if not chash in _get_allowed_perf_changes_cache:
        _get_allowed_perf_changes_cache[chash] \
            = parse_allowed_perf_changes(get_commit_message(chash))
    return _get_allowed_perf_changes_cache[chash]
172

173 174
# Get the commit message of any commit <ref>.
# This is cached (keyed on the full commit hash).
175
_get_commit_message = {} # type: Dict[GitHash, str]
176
def get_commit_message(commit: Union[GitHash, GitRef]=GitRef('HEAD')) -> str:
177
    global _get_commit_message
178
    commit = commit_hash(commit)
179 180 181 182 183
    if not commit in _get_commit_message:
        _get_commit_message[commit] = subprocess.check_output(\
            ['git', '--no-pager', 'log', '-n1', '--format=%B', commit]).decode()
    return _get_commit_message[commit]

184 185
def parse_allowed_perf_changes(commitMsg: str
                               ) -> Dict[TestName, List[AllowedPerfChange]]:
186 187 188 189 190 191 192 193 194 195
    # Helper regex. Non-capturing unless postfixed with Cap.
    s = r"(?:\s*\n?\s+)"                                    # Space, possible new line with an indent.
    qstr = r"(?:'(?:[^'\\]|\\.)*')"                         # Quoted string.
    qstrCap = r"(?:'((?:[^'\\]|\\.)*)')"                    # Quoted string. Captures the string without the quotes.
    innerQstrList = r"(?:"+qstr+r"(?:"+s+r"?,"+s+r"?"+qstr+r")*)?"     # Inside of a list of strings.gs.s..
    qstrList = r"(?:\["+s+r"?"+innerQstrList+s+r"?\])"      # A list of strings (using box brackets)..

    exp = (r"^Metric"
        +s+r"(Increase|Decrease)"
        +s+r"?("+qstr+r"|"+qstrList+r")?"                   # Metric or list of metrics.s..
196
        +s+r"?(\(" + r"(?:[^')]|"+qstr+r")*" + r"\))?"      # Options surrounded in parenthesis. (allow parenthases in quoted strings)
197 198 199 200 201
        +s+r"?:?"                                           # Optional ":"
        +s+r"?((?:(?!\n\n)(?!\n[^\s])(?:.|\n))*)"           # Test names. Stop parsing on empty or non-indented new line.
        )

    matches = re.findall(exp, commitMsg, re.M)
202
    changes = {} # type: Dict[TestName, List[AllowedPerfChange]]
203
    for (direction, metrics_str, opts_str, tests_str) in matches:
204
        tests = tests_str.split()
205
        for test in tests:
206 207 208 209 210 211
            allowed = AllowedPerfChange(
                direction = MetricChange[direction],
                metrics = re.findall(qstrCap, metrics_str),
                opts = dict(re.findall(r"(\w+)"+s+r"?="+s+r"?"+qstrCap, opts_str))
            )
            changes.setdefault(test, []).append(allowed)
212 213 214 215 216 217

    return changes

# Calculates a suggested string to append to the git commit in order to accept the
# given changes.
# changes: [(MetricChange, PerfStat)]
218
def allow_changes_string(changes: List[Tuple[MetricChange, PerfStat]]
219
                         ) -> str:
220 221 222 223 224 225 226
    Dec = MetricChange.Decrease
    Inc = MetricChange.Increase

    # We only care about increase / decrease metrics.
    changes = [change for change in changes if change[0] in [Inc, Dec]]

    # Map tests to a map from change direction to metrics.
227
    test_to_dir_to_metrics = {} # type: Dict[TestName, Dict[MetricChange, List[MetricName]]]
228
    for (change, perf_stat) in changes:
229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253
        change_dir_to_metrics = test_to_dir_to_metrics.setdefault(perf_stat.test, { Inc: [], Dec: [] })
        change_dir_to_metrics[change].append(perf_stat.metric)

    # Split into 3 groups.
    # Tests where all changes are *increasing*.
    # Tests where all changes are *decreasing*.
    # Tests where changes are *mixed* increasing and decreasing.
    groupDec = []
    groupInc = []
    groupMix = []
    for (test, decsAndIncs) in test_to_dir_to_metrics.items():
        decs = decsAndIncs[Dec]
        incs = decsAndIncs[Inc]
        if decs and incs:
            groupMix.append(test)
        elif not decs:
            groupInc.append(test)
        else:
            groupDec.append(test)

    msgs = []
    nltab = '\n    '

    # Decreasing group.
    if groupDec:
254
        msgs.append('Metric Decrease:' + nltab + nltab.join(sorted(groupDec)))
255 256 257

    # Increasing group.
    if groupInc:
258
        msgs.append('Metric Increase:' + nltab + nltab.join(sorted(groupInc)))
259 260 261 262 263

    # Mixed group.
    if groupMix:
        # Split mixed group tests by decrease/increase, then by metric.
        dir_to_metric_to_tests = {
264 265 266
            Dec: {},
            Inc: {}
        } # type: Dict[MetricChange, Dict[MetricName, List[TestName]]]
267 268 269 270 271 272 273 274
        for test in groupMix:
            for change_dir, metrics in test_to_dir_to_metrics[test].items():
                for metric in metrics:
                    dir_to_metric_to_tests[change_dir].setdefault(metric, []).append(test)

        for change_dir in [Dec, Inc]:
            metric_to_tests = dir_to_metric_to_tests[change_dir]
            for metric in sorted(metric_to_tests.keys()):
275
                tests = sorted(metric_to_tests[metric])
276
                msgs.append('Metric ' + change_dir.value + ' \'' + metric + '\':' + nltab + nltab.join(tests))
277 278 279 280

    return '\n\n'.join(msgs)

# Formats a list of metrics into a string. Used e.g. to save metrics to a file or git note.
281
def format_perf_stat(stats: Union[PerfStat, List[PerfStat]], delimitor: str = "\t") -> str:
282 283 284 285
    # If a single stat, convert to a singleton list.
    if not isinstance(stats, list):
        stats = [stats]

286
    return "\n".join([delimitor.join([str(stat_val) for stat_val in stat]) for stat in stats])
287 288 289 290 291

# Appends a list of metrics to the git note of the given commit.
# Tries up to max_tries times to write to git notes should it fail for some reason.
# Each retry will wait 1 second.
# Returns True if the note was successfully appended.
292 293
def append_perf_stat(stats: List[PerfStat],
                     commit: GitRef = GitRef('HEAD'),
294
                     namespace: NoteNamespace = LocalNamespace,
295 296
                     max_tries: int=5
                     ) -> bool:
297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312
    # Append to git note
    print('Appending ' + str(len(stats)) + ' stats to git notes.')
    stats_str = format_perf_stat(stats)
    def try_append():
            try:
                return subprocess.check_output(['git', 'notes', '--ref=' + namespace, 'append', commit, '-m', stats_str])
            except subprocess.CalledProcessError:
                return b'Git - fatal'

    tries = 0
    while tries < max_tries:
        if not b'Git - fatal' in try_append():
            return True
        tries += 1
        time.sleep(1)

313 314
    print("\nAn error occurred while writing the performance metrics to git notes.\n \
            This is usually due to a lock-file existing somewhere in the git repo.")
315 316 317

    return False

318 319 320 321 322 323 324 325
#
# Baseline calculation
#

# Max number of ancestor commits to search when compiling a baseline performance metric.
BaselineSearchDepth = 75

# (isCalculated, best fit ci test_env or None)
326
BestFitCiTestEnv = (False, None) # type: Tuple[bool, Optional[TestEnv]]
327 328

# test_env string or None
329
def best_fit_ci_test_env() -> Optional[TestEnv]:
330 331 332 333 334 335 336
    global BestFitCiTestEnv
    if not BestFitCiTestEnv[0]:
        platform = sys.platform
        isArch64 = sys.maxsize > 2**32
        arch = "x86_64" if isArch64 else "i386"

        if platform.startswith("linux"):
337
            test_env = TestEnv(arch + "-linux-deb9")  # type: Optional[TestEnv]
338 339 340 341
        elif platform.startswith("win32"):
            # There are no windows CI test results.
            test_env = None
        elif isArch64 and platform.startswith("darwin"):
342
            test_env = TestEnv(arch + "-darwin")
343
        elif isArch64 and platform.startswith("freebsd"):
344
            test_env = TestEnv(arch + "-freebsd")
345 346 347 348 349 350 351
        else:
            test_env = None

        BestFitCiTestEnv = (True, test_env)

    return BestFitCiTestEnv[1]

352
_baseline_depth_commit_log = {} # type: Dict[GitHash, List[GitHash]]
353 354 355

# Get the commit hashes for the last BaselineSearchDepth commits from and
# including the input commit. The output commits are all commit hashes.
356
def baseline_commit_log(commit: Union[GitHash,GitRef]) -> List[GitHash]:
357
    global _baseline_depth_commit_log
358
    chash = commit_hash(commit)
359
    if not commit in _baseline_depth_commit_log:
360
        _baseline_depth_commit_log[chash] = commit_log(chash, BaselineSearchDepth)
361

362
    return _baseline_depth_commit_log[chash]
363

364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380
# Get the commit hashes for the last n commits from and
# including the input commit. The output commits are all commit hashes.
# str -> [str]
def commit_log(commitOrRange, n=None):
    nArgs = ['-n' + str(n)] if n != None else []
    output = subprocess.check_output(['git', 'log', '--format=%H'] + nArgs + [commitOrRange]).decode()
    hashes = list(filter(is_commit_hash, output.split('\n')))

    # We only got 10 results (expecting 75) in a CI pipeline (issue #16662).
    # It's unclear from the logs what went wrong. Since no exception was
    # thrown, we can assume the `git log` call above succeeded. The best we
    # can do for now is improve logging.
    actualN = len(hashes)
    if n != None and actualN != n:
        print("Expected " + str(n) + " hashes, but git gave " + str(actualN) + ":\n" + output)
    return hashes

381 382 383
# Cache of baseline values. This is a dict of dicts indexed on:
# (useCiNamespace, commit) -> (test_env, test, metric, way) -> baseline
# (bool          , str   ) -> (str     , str , str   , str) -> float
384
_commit_metric_cache = {} # type: ignore
385

386 387 388 389 390
# Get the baseline of a test at a given commit. This is the expected value
# *before* the commit is applied (i.e. on the parent commit).
# This searches git notes from older commits for recorded metrics (locally and
# from ci). More recent commits are favoured, then local results over ci results
# are favoured.
391
#
392
# commit: str - must be a commit hash (see commit_hash())
393 394 395 396 397
# name: str - test name
# test_env: str - test environment (note a best fit test_env will be used
#                      instead when looking for ci results)
# metric: str - test metric
# way: str - test way
398
# returns: the Baseline or None if no metric was found within
399 400
#          BaselineSearchDepth commits and since the last expected change
#          (ignoring any expected change in the given commit).
401
def baseline_metric(commit: GitHash,
402 403 404 405
                    name: TestName,
                    test_env: TestEnv,
                    metric: MetricName,
                    way: WayName
406
                    ) -> Optional[Baseline]:
407 408 409 410 411 412 413
    # For performance reasons (in order to avoid calling commit_hash), we assert
    # commit is already a commit hash.
    assert is_commit_hash(commit)

    # Get all recent commit hashes.
    commit_hashes = baseline_commit_log(commit)

414 415
    def has_expected_change(commit: GitHash) -> bool:
        return get_allowed_perf_changes(commit).get(name) is not None
416

417 418 419 420 421 422 423 424 425 426 427 428 429 430
    # Searches through previous commits trying local then ci for each commit in.
    def find_baseline(namespace: NoteNamespace,
                      test_env: TestEnv
                      ) -> Optional[Baseline]:
        for depth, current_commit in list(enumerate(commit_hashes))[1:]:
            # Check for a metric on this commit.
            current_metric = get_commit_metric(namespace, current_commit, test_env, name, metric, way)
            if current_metric is not None:
                return Baseline(current_metric, current_commit, depth)

            # Stop if there is an expected change at this commit. In that case
            # metrics on ancestor commits will not be a valid baseline.
            if has_expected_change(current_commit):
                return None
431

432
        return None
433

434 435
    # Test environment to use when comparing against CI namespace
    ci_test_env = best_fit_ci_test_env()
436

437 438 439
    baseline = find_baseline(LocalNamespace, test_env) # type: Optional[Baseline]
    if baseline is None and ci_test_env is not None:
        baseline = find_baseline(CiNamespace, ci_test_env)
440

441
    return baseline
442

443 444
# Same as get_commit_metric(), but converts the result to a string or keeps it
# as None.
445 446 447 448 449 450 451 452 453
def get_commit_metric_value_str_or_none(gitNoteRef,
                                        commit: GitRef,
                                        test_env: TestEnv,
                                        name: TestName,
                                        metric: MetricName,
                                        way: WayName
                                        ) -> Optional[str]:
    result = get_commit_metric(gitNoteRef, commit, test_env, name, metric, way)
    if result is None:
454
        return None
455
    return str(result.value)
456 457 458

# gets the average commit metric from git notes.
# gitNoteRef: git notes ref sapce e.g. "perf" or "ci/perf"
459
# ref: git commit
460 461 462 463 464
# test_env: test environment
# name: test name
# metric: test metric
# way: test way
# returns: PerfStat | None if stats don't exist for the given input
465
def get_commit_metric(gitNoteRef,
466
                      ref: Union[GitRef, GitHash],
467 468 469 470 471
                      test_env: TestEnv,
                      name: TestName,
                      metric: MetricName,
                      way: WayName
                      ) -> Optional[PerfStat]:
472 473
    global _commit_metric_cache
    assert test_env != None
474
    commit = commit_hash(ref)
475 476 477 478 479 480 481 482 483 484 485

    # Check for cached value.
    cacheKeyA = (gitNoteRef, commit)
    cacheKeyB = (test_env, name, metric, way)
    if cacheKeyA in _commit_metric_cache:
        return _commit_metric_cache[cacheKeyA].get(cacheKeyB)

    # Cache miss.
    # Calculate baselines from the current commit's git note.
    # Note that the git note may contain data for other tests. All tests'
    # baselines will be collected and cached for future use.
486
    allCommitMetrics = get_perf_stats(ref, gitNoteRef)
487 488

    # Collect recorded values by cacheKeyB.
489
    values_by_cache_key_b = {}  # type: Dict[Tuple[TestEnv, TestName, MetricName, WayName], List[float]]
490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508
    for perfStat in allCommitMetrics:
        currentCacheKey = (perfStat.test_env, perfStat.test, \
                            perfStat.metric, perfStat.way)
        currentValues = values_by_cache_key_b.setdefault(currentCacheKey, [])
        currentValues.append(float(perfStat.value))

    # Calculate and baseline (average of values) by cacheKeyB.
    baseline_by_cache_key_b = {}
    for currentCacheKey, currentValues in values_by_cache_key_b.items():
        baseline_by_cache_key_b[currentCacheKey] = PerfStat( \
                currentCacheKey[0],
                currentCacheKey[1],
                currentCacheKey[3],
                currentCacheKey[2],
                sum(currentValues) / len(currentValues))

    # Save baselines to the cache.
    _commit_metric_cache[cacheKeyA] = baseline_by_cache_key_b
    return baseline_by_cache_key_b.get(cacheKeyB)
509

510 511
# Check test stats. This prints the results for the user.
# actual: the PerfStat with actual value.
512
# baseline: the expected Baseline value (this should generally be derived from baseline_metric())
513 514 515
# tolerance_dev: allowed deviation of the actual value from the expected value.
# allowed_perf_changes: allowed changes in stats. This is a dictionary as returned by get_allowed_perf_changes().
# force_print: Print stats even if the test stat was in the tolerance range.
516
# Returns a (MetricChange, pass/fail object) tuple. Passes if the stats are within the expected value ranges.
517
def check_stats_change(actual: PerfStat,
518 519
                       baseline: Baseline,
                       tolerance_dev,
520 521 522
                       allowed_perf_changes: Dict[TestName, List[AllowedPerfChange]] = {},
                       force_print = False
                       ) -> Tuple[MetricChange, Any]:
523
    expected_val = baseline.perfStat.value
524 525 526 527 528 529 530 531 532 533 534 535 536 537 538
    full_name = actual.test + ' (' + actual.way + ')'

    lowerBound = trunc(           int(expected_val) * ((100 - float(tolerance_dev))/100))
    upperBound = trunc(0.5 + ceil(int(expected_val) * ((100 + float(tolerance_dev))/100)))

    actual_dev = round(((float(actual.value) * 100)/ int(expected_val)) - 100, 1)

    # Find the direction of change.
    change = MetricChange.NoChange
    if actual.value < lowerBound:
        change = MetricChange.Decrease
    elif actual.value > upperBound:
        change = MetricChange.Increase

    # Is the change allowed?
539
    allowed_change_directions =  [MetricChange.NoChange] + [ allow_stmt.direction
540 541 542
            for allow_stmt in allowed_perf_changes.get(actual.test, [])

            # List of metrics are not specified or the metric is in the list of metrics.
543
            if not allow_stmt.metrics or actual.metric in allow_stmt.metrics
544 545

            # way/test are not specified, or match the actual way/test.
546 547
            if ((not 'way'      in allow_stmt.opts.keys()) or actual.way      == allow_stmt.opts['way'])
            if ((not 'test_env' in allow_stmt.opts.keys()) or actual.test_env == allow_stmt.opts['test_env'])
548 549 550 551 552 553
        ]
    change_allowed = change in allowed_change_directions

    # Print errors and create pass/fail object.
    result = passed()
    if not change_allowed:
554
        error = str(change) + ' from ' + baseline.perfStat.test_env + \
555
                ' baseline @ HEAD~' + str(baseline.commitDepth)
556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573
        print(actual.metric, error + ':')
        result = failBecause('stat ' + error, tag='stat')

    if not change_allowed or force_print:
        length = max(len(str(x)) for x in [expected_val, lowerBound, upperBound, actual.value])

        def display(descr, val, extra):
            print(descr, str(val).rjust(length), extra)

        display('    Expected    ' + full_name + ' ' + actual.metric + ':', expected_val, '+/-' + str(tolerance_dev) + '%')
        display('    Lower bound ' + full_name + ' ' + actual.metric + ':', lowerBound, '')
        display('    Upper bound ' + full_name + ' ' + actual.metric + ':', upperBound, '')
        display('    Actual      ' + full_name + ' ' + actual.metric + ':', actual.value, '')
        if actual.value != expected_val:
            display('    Deviation   ' + full_name + ' ' + actual.metric + ':', actual_dev, '%')

    return (change, result)

574
# Generate a css color (rgb) string based off of the hash of the input.
575
def hash_rgb_str(x) -> str:
576 577 578 579
    res = 10000.0
    rgb = colorsys.hsv_to_rgb((abs(int(hash(x))) % res)/res, 1.0, 0.9)
    return "rgb(" + str(int(rgb[0] * 255)) + ", " + str(int(rgb[1] * 255)) + ", " + str(int(rgb[2] * 255)) + ")"

580
def main() -> None:
581 582 583 584
    parser = argparse.ArgumentParser()
    parser.add_argument("--add-note", nargs=3,
                        help="Development only. --add-note N commit seed \
                        Adds N fake metrics to the given commit using the random seed.")
585 586 587
    parser.add_argument("--ci", action='store_true',
                        help="Use ci results. You must fetch these with:\n    " \
                            + "$ git fetch https://gitlab.haskell.org/ghc/ghc-performance-notes.git refs/notes/perf:refs/notes/ci/perf")
Ben Gamari's avatar
Ben Gamari committed
588 589 590

    group = parser.add_argument_group(title='Filtering', description="Select which subset of performance metrics to dump")
    group.add_argument("--test-env",
591
                       help="The given test environment to be compared. Use 'local' for locally run results. If using --ci, see .gitlab-ci file for TEST_ENV settings.")
Ben Gamari's avatar
Ben Gamari committed
592 593 594 595 596 597 598 599 600 601 602 603 604 605
    group.add_argument("--test-name",
                       help="Filters for tests matching the given regular expression.")
    group.add_argument("--metric",
                       help="Test metric (one of " + str(testing_metrics()) + ").")
    group.add_argument("--way",
                       help="Test way (one of " + str(testing_metrics()) + ").")

    group = parser.add_argument_group(title='Plotting', description="Plot historical performance metrics")
    group.add_argument("--chart", nargs='?', default=None, action='store', const='./PerformanceChart.html',
                       help='Create a chart of the results an save it to the given file. Default to "./PerformanceChart.html".')
    group.add_argument("--zero-y", action='store_true',
                       help='When charting, include 0 in y axis')

    parser.add_argument("commits", nargs='+',
606
                        help="Either a list of commits or a single commit range (e.g. HEAD~10..HEAD).")
607 608 609 610
    args = parser.parse_args()

    env = 'local'
    name = re.compile('.*')
611 612 613
    CommitAndStat = NamedTuple('CommitAndStat',
                               [('commit', GitHash), ('stat', PerfStat)])
    metrics = [] # type: List[CommitAndStat]
614 615 616 617 618 619
    singleton_commit = len(args.commits) == 1

    #
    # Main logic of the program when called from the command-line.
    #

620
    ref = NoteNamespace('perf')
621
    if args.ci:
622 623
        ref = NoteNamespace('ci/perf')

624
    commits = args.commits
625
    if args.commits:
626 627 628 629 630 631 632 633 634 635 636
        # Commit range
        if len(commits) == 1 and ".." in commits[0]:
            commits = list(reversed(commit_log(commits[0])))
        for c in commits:
            metrics += [CommitAndStat(c, stat) for stat in get_perf_stats(c, ref)]

    if args.metric:
        metrics = [test for test in metrics if test.stat.metric == args.metric]

    if args.way:
        metrics = [test for test in metrics if test.stat.way == args.way]
637 638 639 640 641 642

    if args.test_env:
        metrics = [test for test in metrics if test.stat.test_env == args.test_env]

    if args.test_name:
        nameRe = re.compile(args.test_name)
643
        metrics = [test for test in metrics if nameRe.search(test.stat.test)]
644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664

    if args.add_note:
        def note_gen(n, commit, delta=''):
            note = []
            # Generates simple fake data. Likely not comprehensive enough to catch all edge cases.
            if not delta:
                note.extend([PerfStat('local', 'T'+ str(i*100), 'some_way', 'some_field', str(i*1000)) for i in range(1,int(int(n)/2)+1)])
                note.extend([PerfStat('non-local', 'W'+ str(i*100), 'other_way', 'other_field', str(i*100)) for i in range(int(int(n)/2)+1,int(n)+1)])
            if delta:
                hu = abs(hash(delta))
                hv = abs(hash(hu))
                u = int(hu % 100)
                v = int(hv % 10)
                note.extend([PerfStat('local', 'T'+ str(i*100), 'some_way', 'some_field', str(i*u)) for i in range(1,int(int(n)/2)+1)])
                note.extend([PerfStat('non-local', 'W'+ str(i*100), 'other_way', 'other_field', str(i*v)) for i in range(int(int(n)/2)+1,int(n)+1)])

            append_perf_stat(note, commit)

        note_gen(args.add_note[0],args.add_note[1],args.add_note[2])

    #
665
    # Chart
666
    #
667 668 669 670 671 672 673 674
    def metricAt(commit, testName, testMetric):
        values2 = [float(t.stat.value) for t in metrics if t.commit == commit \
                                                       and t.stat.test == testName \
                                                       and t.stat.metric == testMetric]
        if values2 == []:
            return None
        else:
            return (sum(values2) / len(values2))
675

676
    testSeries = list(set([(test.stat.test_env, test.stat.test, test.stat.metric, test.stat.way) for test in metrics]))
677 678

    #
679
    # Use Chart.js to visualize the data.
680 681
    #

682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701
    if args.chart:
        commitMsgs = dict([(h, get_commit_message(h)) for h in commits])
        chartData = {
                'type': 'line',
                'data': {
                    'labels': [commitMsgs[h].split("\n")[0] + " (" + \
                                    (h[:8] if is_commit_hash(h) else h) + \
                                ")" for h in commits],
                    'datasets': [{
                        'label': name + "(" + way + ") " + metric + " - " + env,
                        'data': [get_commit_metric_value_str_or_none(ref, commit, env, name, metric, way) \
                                        for commit in commits],

                        'fill': 'false',
                        'spanGaps': 'true',
                        'lineTension': 0,
                        'backgroundColor': hash_rgb_str((env, name, metric, way)),
                        'borderColor': hash_rgb_str((env, name, metric, way))
                    } for (env, name, metric, way) in testSeries]
                },
Ben Gamari's avatar
Ben Gamari committed
702 703 704 705 706 707 708
                'options': {
                    'scales': {
                        'yAxes': [{
                            'ticks': { 'beginAtZero': True }
                        }]
                    }
                }
709 710
            }

711

712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734
        # Try use local Chart.js file else use online version.
        tooltipjsFilePath = sys.path[0] + "/js/tooltip.js"
        chartjsFilePath = sys.path[0] + "/js/Chart-2.8.0.min.js"
        tooltipjsTag = None
        try:
            tooltipjsFile = open(tooltipjsFilePath, "r")
            tooltipjsTag = '<script>' + tooltipjsFile.read() + '</script>'
            tooltipjsFile.close()
        except:
            print("Failed to load custom tooltip: " + chartjsFilePath + ".")
            tooltipjsTag = None
        try:
            chartjsFile = open(chartjsFilePath, "r")
            chartjsTag = '<script>' + chartjsFile.read() + '</script>'
            chartjsFile.close()
        except:
            print("Failed to load " + chartjsFilePath + ", reverting to online Chart.js.")
            chartjsTag = '<script src="https://cdn.jsdelivr.net/npm/chart.js@2.8.0"></script>'

        file = open(args.chart, "w+t")
        print(\
            "<html>" + \
                '<head>\n' + \
735
                    (tooltipjsTag if tooltipjsTag is not None else '') + \
736 737 738 739 740 741 742
                    chartjsTag + \
                '</head>' + \
                '<body style="padding: 20px"><canvas id="myChart"></canvas><script>' + \
                    "var ctx = document.getElementById('myChart').getContext('2d');" + \
                    "var commitMsgs = " + json.dumps(commitMsgs, indent=2) + ";" + \
                    "var chartData = " + json.dumps(chartData, indent=2) + ";" + \
                    (("var chart = new Chart(ctx, setCustomTooltip(chartData, commitMsgs));") \
743
                        if tooltipjsTag is not None else \
744 745 746 747 748 749 750 751 752 753
                     ("var chart = new Chart(ctx, chartData);")) + \
                '</script></body>' + \
            "</html>"\
            , file=file)
        file.close()
        exit(0)

    #
    # String utilities for pretty-printing
    #
754

755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786
    #                  T1234                 T1234
    #              max_bytes             max_bytes
    #                 normal                normal
    # commit   x86_64-darwin       i386-linux-deb9
    # --------------------------------------------
    # HEAD              9123                  9123
    # HEAD~1           10023                 10023
    # HEAD~2           21234                 21234
    # HEAD~3           20000                 20000

    # Data is already in colum major format, so do that, calculate column widths
    # then transpose and print each row.
    def strMetric(x):
        return '{:.2f}'.format(x.value) if x != None else ""

    headerCols = [ ["","","","Commit"] ] \
                + [ [name, metric, way, env] for (env, name, metric, way) in testSeries ]
    dataCols = [ commits ] \
                + [ [strMetric(get_commit_metric(ref, commit, env, name, metric, way)) \
                        for commit in commits ] \
                        for (env, name, metric, way) in testSeries ]
    colWidths = [max([2+len(cell) for cell in colH + colD]) for (colH,colD) in zip(headerCols, dataCols)]
    col_fmts = ['{:>' + str(w) + '}' for w in colWidths]

    def printCols(cols):
        for row in zip(*cols):
            # print(list(zip(col_fmts, row)))
            print(''.join([f.format(cell) for (f,cell) in zip(col_fmts, row)]))

    printCols(headerCols)
    print('-'*(sum(colWidths)+2))
    printCols(dataCols)
787 788 789

if __name__ == '__main__':
    main()