summaryrefslogtreecommitdiffstats
path: root/scripts/lib/resulttool/regression.py
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/lib/resulttool/regression.py')
-rw-r--r--scripts/lib/resulttool/regression.py281
1 files changed, 271 insertions, 10 deletions
diff --git a/scripts/lib/resulttool/regression.py b/scripts/lib/resulttool/regression.py
index 9f952951b3..10e7d13841 100644
--- a/scripts/lib/resulttool/regression.py
+++ b/scripts/lib/resulttool/regression.py
@@ -7,15 +7,209 @@
#
import resulttool.resultutils as resultutils
-import json
from oeqa.utils.git import GitRepo
import oeqa.utils.gitarchive as gitarchive
-def compare_result(logger, base_name, target_name, base_result, target_result):
+METADATA_MATCH_TABLE = {
+ "oeselftest": "OESELFTEST_METADATA"
+}
+
+OESELFTEST_METADATA_GUESS_TABLE={
+ "trigger-build-posttrigger": {
+ "run_all_tests": False,
+ "run_tests":["buildoptions.SourceMirroring.test_yocto_source_mirror"],
+ "skips": None,
+ "machine": None,
+ "select_tags":None,
+ "exclude_tags": None
+ },
+ "reproducible": {
+ "run_all_tests": False,
+ "run_tests":["reproducible"],
+ "skips": None,
+ "machine": None,
+ "select_tags":None,
+ "exclude_tags": None
+ },
+ "arch-qemu-quick": {
+ "run_all_tests": True,
+ "run_tests":None,
+ "skips": None,
+ "machine": None,
+ "select_tags":["machine"],
+ "exclude_tags": None
+ },
+ "arch-qemu-full-x86-or-x86_64": {
+ "run_all_tests": True,
+ "run_tests":None,
+ "skips": None,
+ "machine": None,
+ "select_tags":["machine", "toolchain-system"],
+ "exclude_tags": None
+ },
+ "arch-qemu-full-others": {
+ "run_all_tests": True,
+ "run_tests":None,
+ "skips": None,
+ "machine": None,
+ "select_tags":["machine", "toolchain-user"],
+ "exclude_tags": None
+ },
+ "selftest": {
+ "run_all_tests": True,
+ "run_tests":None,
+ "skips": ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror", "reproducible"],
+ "machine": None,
+ "select_tags":None,
+ "exclude_tags": ["machine", "toolchain-system", "toolchain-user"]
+ },
+ "bringup": {
+ "run_all_tests": True,
+ "run_tests":None,
+ "skips": ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror"],
+ "machine": None,
+ "select_tags":None,
+ "exclude_tags": ["machine", "toolchain-system", "toolchain-user"]
+ }
+}
+
+STATUS_STRINGS = {
+ "None": "No matching test result"
+}
+
+REGRESSIONS_DISPLAY_LIMIT=50
+
+MISSING_TESTS_BANNER = "-------------------------- Missing tests --------------------------"
+ADDITIONAL_DATA_BANNER = "--------------------- Matches and improvements --------------------"
+
+def test_has_at_least_one_matching_tag(test, tag_list):
+ return "oetags" in test and any(oetag in tag_list for oetag in test["oetags"])
+
+def all_tests_have_at_least_one_matching_tag(results, tag_list):
+ return all(test_has_at_least_one_matching_tag(test_result, tag_list) or test_name.startswith("ptestresult") for (test_name, test_result) in results.items())
+
+def any_test_have_any_matching_tag(results, tag_list):
+ return any(test_has_at_least_one_matching_tag(test, tag_list) for test in results.values())
+
+def have_skipped_test(result, test_prefix):
+ return all( result[test]['status'] == "SKIPPED" for test in result if test.startswith(test_prefix))
+
+def have_all_tests_skipped(result, test_prefixes_list):
+ return all(have_skipped_test(result, test_prefix) for test_prefix in test_prefixes_list)
+
+def guess_oeselftest_metadata(results):
+ """
+ When an oeselftest test result is lacking OESELFTEST_METADATA, we can try to guess it based on results content.
+ Check results for specific values (absence/presence of oetags, number and name of executed tests...),
+ and if it matches one of known configuration from autobuilder configuration, apply guessed OSELFTEST_METADATA
+ to it to allow proper test filtering.
+ This guessing process is tightly coupled to config.json in autobuilder. It should trigger less and less,
+ as new tests will have OESELFTEST_METADATA properly appended at test reporting time
+ """
+
+ if len(results) == 1 and "buildoptions.SourceMirroring.test_yocto_source_mirror" in results:
+ return OESELFTEST_METADATA_GUESS_TABLE['trigger-build-posttrigger']
+ elif all(result.startswith("reproducible") for result in results):
+ return OESELFTEST_METADATA_GUESS_TABLE['reproducible']
+ elif all_tests_have_at_least_one_matching_tag(results, ["machine"]):
+ return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-quick']
+ elif all_tests_have_at_least_one_matching_tag(results, ["machine", "toolchain-system"]):
+ return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-full-x86-or-x86_64']
+ elif all_tests_have_at_least_one_matching_tag(results, ["machine", "toolchain-user"]):
+ return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-full-others']
+ elif not any_test_have_any_matching_tag(results, ["machine", "toolchain-user", "toolchain-system"]):
+ if have_all_tests_skipped(results, ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror", "reproducible"]):
+ return OESELFTEST_METADATA_GUESS_TABLE['selftest']
+ elif have_all_tests_skipped(results, ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror"]):
+ return OESELFTEST_METADATA_GUESS_TABLE['bringup']
+
+ return None
+
+
+def metadata_matches(base_configuration, target_configuration):
+ """
+ For passed base and target, check test type. If test type matches one of
+ properties described in METADATA_MATCH_TABLE, compare metadata if it is
+ present in base. Return true if metadata matches, or if base lacks some
+ data (either TEST_TYPE or the corresponding metadata)
+ """
+ test_type = base_configuration.get('TEST_TYPE')
+ if test_type not in METADATA_MATCH_TABLE:
+ return True
+
+ metadata_key = METADATA_MATCH_TABLE.get(test_type)
+ if target_configuration.get(metadata_key) != base_configuration.get(metadata_key):
+ return False
+
+ return True
+
+
+def machine_matches(base_configuration, target_configuration):
+ return base_configuration.get('MACHINE') == target_configuration.get('MACHINE')
+
+
+def can_be_compared(logger, base, target):
+ """
+ Some tests are not relevant to be compared, for example some oeselftest
+ run with different tests sets or parameters. Return true if tests can be
+ compared
+ """
+ ret = True
+ base_configuration = base['configuration']
+ target_configuration = target['configuration']
+
+ # Older test results lack proper OESELFTEST_METADATA: if not present, try to guess it based on tests results.
+ if base_configuration.get('TEST_TYPE') == 'oeselftest' and 'OESELFTEST_METADATA' not in base_configuration:
+ guess = guess_oeselftest_metadata(base['result'])
+ if guess is None:
+ logger.error(f"ERROR: did not manage to guess oeselftest metadata for {base_configuration['STARTTIME']}")
+ else:
+ logger.debug(f"Enriching {base_configuration['STARTTIME']} with {guess}")
+ base_configuration['OESELFTEST_METADATA'] = guess
+ if target_configuration.get('TEST_TYPE') == 'oeselftest' and 'OESELFTEST_METADATA' not in target_configuration:
+ guess = guess_oeselftest_metadata(target['result'])
+ if guess is None:
+ logger.error(f"ERROR: did not manage to guess oeselftest metadata for {target_configuration['STARTTIME']}")
+ else:
+ logger.debug(f"Enriching {target_configuration['STARTTIME']} with {guess}")
+ target_configuration['OESELFTEST_METADATA'] = guess
+
+ # Test runs with LTP results in should only be compared with other runs with LTP tests in them
+ if base_configuration.get('TEST_TYPE') == 'runtime' and any(result.startswith("ltpresult") for result in base['result']):
+ ret = target_configuration.get('TEST_TYPE') == 'runtime' and any(result.startswith("ltpresult") for result in target['result'])
+
+ return ret and metadata_matches(base_configuration, target_configuration) \
+ and machine_matches(base_configuration, target_configuration)
+
+def get_status_str(raw_status):
+ raw_status_lower = raw_status.lower() if raw_status else "None"
+ return STATUS_STRINGS.get(raw_status_lower, raw_status)
+
+def get_additional_info_line(new_pass_count, new_tests):
+ result=[]
+ if new_tests:
+ result.append(f'+{new_tests} test(s) present')
+ if new_pass_count:
+ result.append(f'+{new_pass_count} test(s) now passing')
+
+ if not result:
+ return ""
+
+ return ' -> ' + ', '.join(result) + '\n'
+
+def compare_result(logger, base_name, target_name, base_result, target_result, display_limit=None):
base_result = base_result.get('result')
target_result = target_result.get('result')
result = {}
+ new_tests = 0
+ regressions = {}
+ resultstring = ""
+ new_tests = 0
+ new_pass_count = 0
+
+ display_limit = int(display_limit) if display_limit else REGRESSIONS_DISPLAY_LIMIT
+
if base_result and target_result:
for k in base_result:
base_testcase = base_result[k]
@@ -27,12 +221,47 @@ def compare_result(logger, base_name, target_name, base_result, target_result):
result[k] = {'base': base_status, 'target': target_status}
else:
logger.error('Failed to retrieved base test case status: %s' % k)
+
+ # Also count new tests that were not present in base results: it
+ # could be newly added tests, but it could also highlights some tests
+ # renames or fixed faulty ptests
+ for k in target_result:
+ if k not in base_result:
+ new_tests += 1
if result:
- resultstring = "Regression: %s\n %s\n" % (base_name, target_name)
- for k in sorted(result):
- resultstring += ' %s: %s -> %s\n' % (k, result[k]['base'], result[k]['target'])
+ new_pass_count = sum(test['target'] is not None and test['target'].startswith("PASS") for test in result.values())
+ # Print a regression report only if at least one test has a regression status (FAIL, SKIPPED, absent...)
+ if new_pass_count < len(result):
+ resultstring = "Regression: %s\n %s\n" % (base_name, target_name)
+ for k in sorted(result):
+ if not result[k]['target'] or not result[k]['target'].startswith("PASS"):
+ # Differentiate each ptest kind when listing regressions
+ key_parts = k.split('.')
+ key = '.'.join(key_parts[:2]) if k.startswith('ptest') else key_parts[0]
+ # Append new regression to corresponding test family
+ regressions[key] = regressions.setdefault(key, []) + [' %s: %s -> %s\n' % (k, get_status_str(result[k]['base']), get_status_str(result[k]['target']))]
+ resultstring += f" Total: {sum([len(regressions[r]) for r in regressions])} new regression(s):\n"
+ for k in regressions:
+ resultstring += f" {len(regressions[k])} regression(s) for {k}\n"
+ count_to_print=min([display_limit, len(regressions[k])]) if display_limit > 0 else len(regressions[k])
+ resultstring += ''.join(regressions[k][:count_to_print])
+ if count_to_print < len(regressions[k]):
+ resultstring+=' [...]\n'
+ if new_pass_count > 0:
+ resultstring += f' Additionally, {new_pass_count} previously failing test(s) is/are now passing\n'
+ if new_tests > 0:
+ resultstring += f' Additionally, {new_tests} new test(s) is/are present\n'
+ else:
+ resultstring = "%s\n%s\n" % (base_name, target_name)
+ result = None
else:
- resultstring = "Match: %s\n %s" % (base_name, target_name)
+ resultstring = "%s\n%s\n" % (base_name, target_name)
+
+ if not result:
+ additional_info = get_additional_info_line(new_pass_count, new_tests)
+ if additional_info:
+ resultstring += additional_info
+
return result, resultstring
def get_results(logger, source):
@@ -44,12 +273,38 @@ def regression(args, logger):
regression_common(args, logger, base_results, target_results)
+# Some test case naming is poor and contains random strings, particularly lttng/babeltrace.
+# Truncating the test names works since they contain file and line number identifiers
+# which allows us to match them without the random components.
+def fixup_ptest_names(results, logger):
+ for r in results:
+ for i in results[r]:
+ tests = list(results[r][i]['result'].keys())
+ for test in tests:
+ new = None
+ if test.startswith(("ptestresult.lttng-tools.", "ptestresult.babeltrace.", "ptestresult.babeltrace2")) and "_-_" in test:
+ new = test.split("_-_")[0]
+ elif test.startswith(("ptestresult.curl.")) and "__" in test:
+ new = test.split("__")[0]
+ elif test.startswith(("ptestresult.dbus.")) and "__" in test:
+ new = test.split("__")[0]
+ elif test.startswith("ptestresult.binutils") and "build-st-" in test:
+ new = test.split(" ")[0]
+ elif test.startswith("ptestresult.gcc") and "/tmp/runtest." in test:
+ new = ".".join(test.split(".")[:2])
+ if new:
+ results[r][i]['result'][new] = results[r][i]['result'][test]
+ del results[r][i]['result'][test]
+
def regression_common(args, logger, base_results, target_results):
if args.base_result_id:
base_results = resultutils.filter_resultsdata(base_results, args.base_result_id)
if args.target_result_id:
target_results = resultutils.filter_resultsdata(target_results, args.target_result_id)
+ fixup_ptest_names(base_results, logger)
+ fixup_ptest_names(target_results, logger)
+
matches = []
regressions = []
notfound = []
@@ -62,7 +317,9 @@ def regression_common(args, logger, base_results, target_results):
# removing any pairs which match
for c in base.copy():
for b in target.copy():
- res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b])
+ if not can_be_compared(logger, base_results[a][c], target_results[a][b]):
+ continue
+ res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b], args.limit)
if not res:
matches.append(resstr)
base.remove(c)
@@ -71,15 +328,18 @@ def regression_common(args, logger, base_results, target_results):
# Should only now see regressions, we may not be able to match multiple pairs directly
for c in base:
for b in target:
- res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b])
+ if not can_be_compared(logger, base_results[a][c], target_results[a][b]):
+ continue
+ res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b], args.limit)
if res:
regressions.append(resstr)
else:
notfound.append("%s not found in target" % a)
- print("\n".join(sorted(matches)))
print("\n".join(sorted(regressions)))
+ print("\n" + MISSING_TESTS_BANNER + "\n")
print("\n".join(sorted(notfound)))
-
+ print("\n" + ADDITIONAL_DATA_BANNER + "\n")
+ print("\n".join(sorted(matches)))
return 0
def regression_git(args, logger):
@@ -183,4 +443,5 @@ def register_commands(subparsers):
parser_build.add_argument('--commit-number', help="Revision number to search for, redundant if --commit is specified")
parser_build.add_argument('--commit2', help="Revision to compare with")
parser_build.add_argument('--commit-number2', help="Revision number to compare with, redundant if --commit2 is specified")
+ parser_build.add_argument('-l', '--limit', default=REGRESSIONS_DISPLAY_LIMIT, help="Maximum number of changes to display per test. Can be set to 0 to print all changes")