Commit cc2261d4 authored by David Eichmann's avatar David Eichmann 🏋 Committed by Ben Gamari

Performance tests: recover a baseline from ancestor commits and CI results.

gitlab-ci: push performance metrics as git notes to the "GHC Performance Notes" repository.
parent 513a449c
......@@ -6,6 +6,7 @@ before_script:
- git submodule sync --recursive
- git submodule update --init --recursive
- git checkout .gitmodules
- "git fetch origin refs/notes/perf:refs/notes/ci/perf || true"
stages:
- lint
......@@ -75,6 +76,7 @@ validate-x86_64-linux-deb8-hadrian:
- git submodule sync --recursive
- git submodule update --init --recursive
- git checkout .gitmodules
- "git fetch origin refs/notes/perf:refs/notes/ci/perf || true"
tags:
- x86_64-linux
......@@ -96,9 +98,17 @@ validate-x86_64-linux-deb8-hadrian:
- |
make binary-dist TAR_COMP_OPTS="-1"
mv ghc-*.tar.xz ghc.tar.xz
- |
# Prepare to push git notes.
METRICS_FILE=$(mktemp)
git config user.email "ben+ghc-ci@smart-cactus.org"
git config user.name "GHC GitLab CI"
- |
THREADS=`mk/detect-cpu-count.sh`
make $TEST_TYPE THREADS=$THREADS JUNIT_FILE=../../junit.xml
make $TEST_TYPE THREADS=$THREADS JUNIT_FILE=../../junit.xml METRICS_FILE=$METRICS_FILE
- |
# Push git notes.
METRICS_FILE=$METRICS_FILE .gitlab/push-test-metrics.sh
dependencies: []
artifacts:
reports:
......@@ -120,12 +130,14 @@ validate-x86_64-darwin:
ac_cv_func_clock_gettime: "no"
LANG: "en_US.UTF-8"
CONFIGURE_ARGS: --with-intree-gmp
TEST_ENV: "x86_64-darwin"
before_script:
- git clean -xdf && git submodule foreach git clean -xdf
- python3 .gitlab/fix-submodules.py
- git submodule sync --recursive
- git submodule update --init --recursive
- git checkout .gitmodules
- "git fetch origin refs/notes/perf:refs/notes/ci/perf || true"
- bash .gitlab/darwin-init.sh
- PATH="`pwd`/toolchain/bin:$PATH"
......@@ -150,6 +162,7 @@ validate-x86_64-darwin:
- git submodule sync --recursive
- git submodule update --init --recursive
- git checkout .gitmodules
- "git fetch origin refs/notes/perf:refs/notes/ci/perf || true"
- bash .circleci/prepare-system.sh
# workaround for docker permissions
......@@ -167,6 +180,8 @@ validate-aarch64-linux-deb9:
stage: full-build
image: ghcci/aarch64-linux-deb9:0.1
allow_failure: true
variables:
TEST_ENV: "aarch64-linux-deb9"
artifacts:
when: always
expire_in: 2 week
......@@ -191,6 +206,8 @@ validate-i386-linux-deb9:
stage: full-build
image: ghcci/i386-linux-deb9:0.1
allow_failure: true
variables:
TEST_ENV: "i386-linux-deb9"
artifacts:
when: always
expire_in: 2 week
......@@ -204,6 +221,7 @@ nightly-i386-linux-deb9:
allow_failure: true
variables:
TEST_TYPE: slowtest
TEST_ENV: "i386-linux-deb9"
artifacts:
when: always
expire_in: 2 week
......@@ -217,6 +235,8 @@ validate-x86_64-linux-deb9:
extends: .validate-linux
stage: build
image: ghcci/x86_64-linux-deb9:0.2
variables:
TEST_ENV: "x86_64-linux-deb9"
artifacts:
when: always
expire_in: 2 week
......@@ -241,6 +261,7 @@ validate-x86_64-linux-deb9-llvm:
image: ghcci/x86_64-linux-deb9:0.2
variables:
BUILD_FLAVOUR: perf-llvm
TEST_ENV: "x86_64-linux-deb9-llvm"
cache:
key: linux-x86_64-deb9
......@@ -248,6 +269,8 @@ validate-x86_64-linux-deb8:
extends: .validate-linux
stage: full-build
image: ghcci/x86_64-linux-deb8:0.1
variables:
TEST_ENV: "x86_64-linux-deb8"
cache:
key: linux-x86_64-deb8
artifacts:
......@@ -258,6 +281,8 @@ validate-x86_64-linux-fedora27:
extends: .validate-linux
stage: full-build
image: ghcci/x86_64-linux-fedora27:0.1
variables:
TEST_ENV: "x86_64-linux-fedora27"
cache:
key: linux-x86_64-fedora27
artifacts:
......@@ -269,6 +294,7 @@ validate-x86_64-linux-deb9-integer-simple:
stage: full-build
variables:
INTEGER_LIBRARY: integer-simple
TEST_ENV: "x86_64-linux-deb9-integer-simple"
image: ghcci/x86_64-linux-deb9:0.2
cache:
key: linux-x86_64-deb9
......@@ -289,6 +315,7 @@ validate-x86_64-linux-deb9-unreg:
stage: full-build
variables:
CONFIGURE_ARGS: --enable-unregisterised
TEST_ENV: "x86_64-linux-deb9-unreg"
image: ghcci/x86_64-linux-deb9:0.2
cache:
key: linux-x86_64-deb9
......@@ -314,6 +341,7 @@ validate-x86_64-linux-deb9-unreg:
- git submodule sync --recursive
- git submodule update --init --recursive
- git checkout .gitmodules
- "git fetch origin refs/notes/perf:refs/notes/ci/perf || true"
- bash .gitlab/win32-init.sh
after_script:
- rd /s /q tmp
......
#!/usr/bin/env bash
# vim: sw=2 et
set -euo pipefail
NOTES_ORIGIN="git@gitlab.haskell.org:ghc/ghc-performance-notes.git"
REF="perf"
fail() {
echo "ERROR: $*" >&2
exit 1
}
# Check that private key is available (Set on all GitLab protected branches).
if ! [ -v PERF_NOTE_KEY ] || [ "$PERF_NOTE_KEY" = "" ]; then
echo "Not pushing performance git notes: PERF_NOTE_KEY is not set."
exit 0
fi
# TEST_ENV must be set.
if ! [ -v TEST_ENV ] || [ "$TEST_ENV" = "" ]; then
fail "Not pushing performance git notes: TEST_ENV must be set."
fi
# Assert that the METRICS_FILE exists and can be read.
if ! [ -v TEST_ENV ] || [ "$METRICS_FILE" = "" ]
then
fail "\$METRICS_FILE not set."
fi
if ! [ -r $METRICS_FILE ]
then
fail "Metrics file not found: $METRICS_FILE"
fi
# Add gitlab as a known host.
mkdir -p ~/.ssh
echo "|1|+AUrMGS1elvPeLNt+NHGa5+c6pU=|4XvfRsQftO1OgZD4c0JJ7oNaii8= ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDXilA5l4kOZPx0nM6xDATF+t4fS6te0eYPDwBI/jLWD9cJVtCnsrwMl5ar+/NfmcD0jnCYztUiVHuXyTaWPJYSQpwltfpTeqpo9/z/0MxkPtSl1uMP2cLbDiqA01OWveChktOXwU6hRQ+7MmO+dNRS/iXrRmYrGv/p1W811QgLBLS9fefEdF25n+0dP71L7Ov7riOawlDmd0C11FraE/R8HX6gs6lbXta1kisdxGyKojYSiCtobUaJxRoatMfUP0a9rwTAyl8tf56LgB+igjMky879VAbL7eQ/AmfHYPrSGJ/YlWP6Jj23Dnos5nOVlWL/rVTs9Y/NakLpPwMs75KTC0Pd74hdf2e3folDdAi2kLrQgO2SI6so7rOYZ+mFkCM751QdDVy4DzjmDvSgSIVf9SV7RQf7e7unE7pSZ/ILupZqz9KhR1MOwVO+ePa5qJMNSdC204PIsRWkIO5KP0QLl507NI9Ri84+aODoHD7gDIWNhU08J2P8/E6r0wcC8uWaxh+HaOjI9BkHjqRYsrgfn54BAuO9kw1cDvyi3c8n7VFlNtvQP15lANwim3gr9upV+r95KEPJCgZMYWJBDPIVtp4GdYxCfXxWj5oMXbA5pf0tNixwNJjAsY7I6RN2htHbuySH36JybOZk+gCj6mQkxpCT/tKaUn14hBJWLq7Q+Q==" >> ~/.ssh/known_hosts
echo "|1|JZkdAPJmpX6SzGeqhmQLfMWLGQA=|4vTELroOlbFxbCr0WX+PK9EcpD0= ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIJknufU+I6A5Nm58lmse4/o11Ai2UzYbYe7782J1+kRk" >> ~/.ssh/known_hosts
# Setup ssh keys.
eval `ssh-agent`
echo "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDJPR1vrZgeGTXmgJw2PsJfMjf22LcDnVVwt3l0rwTZ+8Q2J0bHaYxMRKBco1sON6LGcZepw0Hy76RQ87v057pTz18SXvnfE7U/B6v9qBk0ILJz+4BOX9sEhxu2XmScp/wMxkG9IoyruMlsxXzd1sz09o+rzzx24U2Rp27PRm08vG0oipve6BWLbYEqYrE4/nCufqOJmGd56fju7OTU0lTpEkGDEDWGMxutaX2CbTbDju7qy07Ld8BjSc9aHfvuQaslUbj3ex3EF8EXahURzGpHQn/UFFzVGMokFumiJCAagHQb7cj6jOkKseZLaysbA/mTBQsOzjWiRmkN23bQf1wF ben+ghc-ci@smart-cactus.org" > ~/.ssh/perf_rsa.pub
touch ~/.ssh/perf_rsa
chmod 0600 ~/.ssh/perf_rsa
echo "$PERF_NOTE_KEY" >> ~/.ssh/perf_rsa
ssh-add ~/.ssh/perf_rsa
# Reset the git notes and append the metrics file to the notes, then push and return the result.
# This is favoured over a git notes merge as it avoids potential data loss/duplication from the merge strategy.
function reset_append_note_push {
git fetch -f $NOTES_ORIGIN refs/notes/$REF:refs/notes/$REF || true
echo "git notes --ref=$REF append -F $METRICS_FILE HEAD"
git notes --ref=$REF append -F $METRICS_FILE HEAD
echo "git push $NOTES_ORIGIN refs/notes/$REF"
git push $NOTES_ORIGIN refs/notes/$REF
}
# Push the metrics file as a git note. This may fail if another task pushes a note first. In that case
# the latest note is fetched and appended.
MAX_RETRY=20
until reset_append_note_push || [ $MAX_RETRY -le 0 ]
do
((MAX_RETRY--))
echo ""
echo "Failed to push git notes. Fetching, appending, and retrying... $MAX_RETRY retries left."
done
This diff is collapsed.
......@@ -379,18 +379,37 @@ else:
new_metrics = [metric for (change, metric) in t.metrics if change == MetricChange.NewMetric]
if any(new_metrics):
if canGitStatus:
reason = 'the previous git commit doesn\'t have recorded metrics for the following tests.' + \
' If the tests exist on the previous commit, then check it out and run the tests to generate the missing metrics.'
reason = 'a baseline (expected value) cannot be recovered from' + \
' previous git commits. This may be due to HEAD having' + \
' new tests or having expected changes, the presence of' + \
' expected changes since the last run of the tests, and/or' + \
' the latest test run being too old.'
fix = 'If the tests exist on the previous' + \
' commit (And are configured to run with the same ways),' + \
' then check out that commit and run the tests to generate' + \
' the missing metrics. Alternatively, a baseline may be' + \
' recovered from ci results once fetched (where origin' + \
' is the official ghc git repo):\n\n' + \
spacing + 'git fetch ' + \
'https://gitlab.haskell.org/ghc/ghc-performance-notes.git' + \
' refs/notes/perf:refs/notes/' + Perf.CiNamespace
else:
reason = 'this is not a git repo so the previous git commit\'s metrics cannot be loaded from git notes:'
reason = "this is not a git repo so the previous git commit's" + \
" metrics cannot be loaded from git notes:"
fix = ""
print()
print(str_warn('New Metrics') + ' these metrics trivially pass because ' + reason)
print(spacing + ('\n' + spacing).join(set([metric.test for metric in new_metrics])))
print(str_warn('Missing Baseline Metrics') + \
' these metrics trivially pass because ' + reason)
print(spacing + (' ').join(set([metric.test for metric in new_metrics])))
if fix != "":
print()
print(fix)
# Inform of how to accept metric changes.
if (len(t.unexpected_stat_failures) > 0):
print()
print(str_info("Some stats have changed") + " If this is expected, allow changes by appending the git commit message with this:")
print(str_info("Some stats have changed") + " If this is expected, " + \
"allow changes by appending the git commit message with this:")
print('-' * 25)
print(Perf.allow_changes_string(t.metrics))
print('-' * 25)
......@@ -406,8 +425,9 @@ else:
elif canGitStatus and any(stats):
if is_worktree_dirty():
print()
print(str_warn('Working Tree is Dirty') + ' performance metrics will not be saved.' + \
' Commit changes or use --metrics-file to save metrics to a file.')
print(str_warn('Performance Metrics NOT Saved') + \
' working tree is dirty. Commit changes or use ' + \
'--metrics-file to save metrics to a file.')
else:
Perf.append_perf_stat(stats)
......
......@@ -235,11 +235,17 @@ class TestOptions:
# extra files to copy to the testdir
self.extra_files = []
# Map from metric to expectected value and allowed percentage deviation. e.g.
# { 'bytes allocated': (9300000000, 10) }
# To allow a 10% deviation from 9300000000 for the 'bytes allocated' metric.
# Map from metric to (fuction from way to baseline value, allowed percentage deviation) e.g.
# { 'bytes allocated': (
# lambda way: if way1: return None ... elif way2:return 9300000000 ...,
# 10) }
# This means no baseline is available for way1. For way 2, allow a 10%
# deviation from 9300000000.
self.stats_range_fields = {}
# Is the test testing performance?
self.is_stats_test = False
# Does this test the compiler's performance as opposed to the generated code.
self.is_compiler_stats_test = False
......
......@@ -65,7 +65,7 @@ def isCompilerStatsTest():
def isStatsTest():
opts = getTestOpts()
return bool(opts.stats_range_fields)
return opts.is_stats_test
# This can be called at the top of a file of tests, to set default test options
......@@ -348,29 +348,18 @@ def testing_metrics():
# measures the performance numbers of the compiler.
# As this is a fairly rare case in the testsuite, it defaults to false to
# indicate that it is a 'normal' performance test.
def _collect_stats(name, opts, metric, deviation, is_compiler_stats_test=False):
def _collect_stats(name, opts, metrics, deviation, is_compiler_stats_test=False):
if not re.match('^[0-9]*[a-zA-Z][a-zA-Z0-9._-]*$', name):
failBecause('This test has an invalid name.')
tests = Perf.get_perf_stats('HEAD^')
# Might have multiple metrics being measured for a single test.
test = [t for t in tests if t.test == name]
if tests == [] or test == []:
# There are no prior metrics for this test.
if isinstance(metric, str):
if metric == 'all':
for field in testing_metrics():
opts.stats_range_fields[field] = None
else:
opts.stats_range_fields[metric] = None
if isinstance(metric, list):
for field in metric:
opts.stats_range_fields[field] = None
return
# Normalize metrics to a list of strings.
if isinstance(metrics, str):
if metrics == 'all':
metrics = testing_metrics()
else:
metrics = [metrics]
opts.is_stats_test = True
if is_compiler_stats_test:
opts.is_compiler_stats_test = True
......@@ -379,24 +368,11 @@ def _collect_stats(name, opts, metric, deviation, is_compiler_stats_test=False):
if config.compiler_debugged and is_compiler_stats_test:
opts.skip = 1
# get the average value of the given metric from test
def get_avg_val(metric_2):
metric_2_metrics = [float(t.value) for t in test if t.metric == metric_2]
return sum(metric_2_metrics) / len(metric_2_metrics)
# 'all' is a shorthand to test for bytes allocated, peak megabytes allocated, and max bytes used.
if isinstance(metric, str):
if metric == 'all':
for field in testing_metrics():
opts.stats_range_fields[field] = (get_avg_val(field), deviation)
return
else:
opts.stats_range_fields[metric] = (get_avg_val(metric), deviation)
return
for metric in metrics:
baselineByWay = lambda way, target_commit: Perf.baseline_metric( \
target_commit, name, config.test_env, metric, way)
if isinstance(metric, list):
for field in metric:
opts.stats_range_fields[field] = (get_avg_val(field), deviation)
opts.stats_range_fields[metric] = (baselineByWay, deviation)
# -----
......@@ -1164,10 +1140,11 @@ def metric_dict(name, way, metric, value):
# name: name of the test.
# way: the way.
# stats_file: the path of the stats_file containing the stats for the test.
# range_fields
# range_fields: see TestOptions.stats_range_fields
# Returns a pass/fail object. Passes if the stats are withing the expected value ranges.
# This prints the results for the user.
def check_stats(name, way, stats_file, range_fields):
head_commit = Perf.commit_hash('HEAD')
result = passed()
if range_fields:
try:
......@@ -1177,7 +1154,7 @@ def check_stats(name, way, stats_file, range_fields):
stats_file_contents = f.read()
f.close()
for (metric, range_val_dev) in range_fields.items():
for (metric, baseline_and_dev) in range_fields.items():
field_match = re.search('\("' + metric + '", "([0-9]+)"\)', stats_file_contents)
if field_match == None:
print('Failed to find metric: ', metric)
......@@ -1190,14 +1167,15 @@ def check_stats(name, way, stats_file, range_fields):
change = None
# If this is the first time running the benchmark, then pass.
if range_val_dev == None:
baseline = baseline_and_dev[0](way, head_commit)
if baseline == None:
metric_result = passed()
change = MetricChange.NewMetric
else:
(expected_val, tolerance_dev) = range_val_dev
tolerance_dev = baseline_and_dev[1]
(change, metric_result) = Perf.check_stats_change(
perf_stat,
expected_val,
baseline,
tolerance_dev,
config.allowed_perf_changes,
config.verbose >= 4)
......@@ -1330,8 +1308,13 @@ def simple_run(name, way, prog, extra_run_opts):
my_rts_flags = rts_flags(way)
# Collect stats if necessary:
# isStatsTest and not isCompilerStatsTest():
# assume we are running a ghc compiled program. Collect stats.
# isStatsTest and way == 'ghci':
# assume we are running a program via ghci. Collect stats
stats_file = name + '.stats'
if isStatsTest() and not isCompilerStatsTest():
if isStatsTest() and (not isCompilerStatsTest() or way == 'ghci'):
stats_args = ' +RTS -V0 -t' + stats_file + ' --machine-readable -RTS'
else:
stats_args = ''
......
......@@ -16,7 +16,7 @@ def strip_quotes(s):
return s.strip('\'"')
def str_fail(s):
return '\033[1m\033[43m\033[31m' + s + '\033[0m'
return '\033[1m\033[31m' + s + '\033[0m'
def str_pass(s):
return '\033[1m\033[32m' + s + '\033[0m'
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment