diff options
-rw-r--r-- | .gitlab-ci.yml | 37 | ||||
-rw-r--r-- | README.md | 15 | ||||
-rwxr-xr-x | bin/analyze-archive.py | 4 | ||||
-rwxr-xr-x | bin/analyze-kconfig.py | 23 | ||||
-rwxr-xr-x | bin/analyze-log.py | 94 | ||||
-rwxr-xr-x | bin/analyze-trace.py | 471 | ||||
-rwxr-xr-x | bin/extract-kernel-ws.py | 166 | ||||
-rwxr-xr-x | bin/extract-speedup-from-log.py | 197 | ||||
-rwxr-xr-x | bin/kstest.py | 31 | ||||
-rwxr-xr-x | bin/perf-stat-to-dfatool.py | 40 | ||||
-rwxr-xr-x | bin/pta-workload.py | 92 | ||||
-rwxr-xr-x | bin/run-with-rapl | 32 | ||||
-rwxr-xr-x | bin/ttest-ind.py | 31 | ||||
-rwxr-xr-x | bin/workload.py | 161 | ||||
-rw-r--r-- | doc/analysis-logs.md | 6 | ||||
-rw-r--r-- | lib/behaviour.py | 388 | ||||
-rw-r--r-- | lib/cli.py | 138 | ||||
-rw-r--r-- | lib/functions.py | 113 | ||||
-rw-r--r-- | lib/loader/plain.py | 140 | ||||
-rw-r--r-- | lib/model.py | 150 | ||||
-rw-r--r-- | lib/parameters.py | 39 | ||||
-rw-r--r-- | lib/paramfit.py | 25 | ||||
-rw-r--r-- | lib/pelt.py | 2 | ||||
-rw-r--r-- | lib/utils.py | 66 | ||||
-rw-r--r-- | lib/validation.py | 22 | ||||
-rwxr-xr-x | libexec/rapl-to-dfatool.py | 27 |
26 files changed, 2279 insertions, 231 deletions
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 149653d..d8e53cd 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -9,26 +9,27 @@ run_tests: stage: test variables: GIT_SUBMODULE_STRATEGY: normal + DFATOOL_ULS_FUNCTIONS: linear,logarithmic,logarithmic1,exponential,square,inverse,sqrt script: - mkdir test-data - - wget -qO test-data/20161221_123347_mmparam.tar https://lib.finalrewind.org/energy-models/20161221_123347_mmparam.tar - - wget -qO test-data/20170116_124500_LM75x.tar https://lib.finalrewind.org/energy-models/20170116_124500_LM75x.tar - - wget -qO test-data/20170116_131306_LM75x.tar https://lib.finalrewind.org/energy-models/20170116_131306_LM75x.tar - - wget -qO test-data/20170116_142654_mmstatic.tar https://lib.finalrewind.org/energy-models/20170116_142654_mmstatic.tar - - wget -qO test-data/20170116_143516_mmstatic.tar https://lib.finalrewind.org/energy-models/20170116_143516_mmstatic.tar - - wget -qO test-data/20170116_145420_sharpLS013B4DN.tar https://lib.finalrewind.org/energy-models/20170116_145420_sharpLS013B4DN.tar - - wget -qO test-data/20170116_151348_sharpLS013B4DN.tar https://lib.finalrewind.org/energy-models/20170116_151348_sharpLS013B4DN.tar - - wget -qO test-data/20170220_164723_RF24_int_A.tar https://lib.finalrewind.org/energy-models/20170220_164723_RF24_int_A.tar - - wget -qO test-data/20190815_103347_nRF24_no-rx.json https://lib.finalrewind.org/energy-models/20190815_103347_nRF24_no-rx.json - - wget -qO test-data/20190815_111745_nRF24_no-rx.json https://lib.finalrewind.org/energy-models/20190815_111745_nRF24_no-rx.json - - wget -qO test-data/20190815_122531_nRF24_no-rx.json https://lib.finalrewind.org/energy-models/20190815_122531_nRF24_no-rx.json - - wget -qO test-data/20191024-150723-nrf24l01-var-ack-retry.tar https://lib.finalrewind.org/energy-models/20191024-150723-nrf24l01-var-ack-retry.tar - - wget -qO test-data/20191024-152648-nrf24l01-var-ack.tar https://lib.finalrewind.org/energy-models/20191024-152648-nrf24l01-var-ack.tar - - wget -qO test-data/20200722-113624-timedResistiveLoad.tar https://lib.finalrewind.org/energy-models/20200722-113624-timedResistiveLoad.tar - - wget -qO test-data/20201203-112341-et_la_dco.tar https://lib.finalrewind.org/energy-models/20201203-112341-et_la_dco.tar - - wget -qO test-data/20201203-110526-et_timer_dco.tar https://lib.finalrewind.org/energy-models/20201203-110526-et_timer_dco.tar - - wget -qO test-data/20201203-113313-et_la_hfxt0.tar https://lib.finalrewind.org/energy-models/20201203-113313-et_la_hfxt0.tar - - wget -qO test-data/20201203-114004-et_timer_hfxt0.tar https://lib.finalrewind.org/energy-models/20201203-114004-et_timer_hfxt0.tar + - wget -qO test-data/20161221_123347_mmparam.tar https://ess.cs.uos.de/.private/dfatool/20161221_123347_mmparam.tar + - wget -qO test-data/20170116_124500_LM75x.tar https://ess.cs.uos.de/.private/dfatool/20170116_124500_LM75x.tar + - wget -qO test-data/20170116_131306_LM75x.tar https://ess.cs.uos.de/.private/dfatool/20170116_131306_LM75x.tar + - wget -qO test-data/20170116_142654_mmstatic.tar https://ess.cs.uos.de/.private/dfatool/20170116_142654_mmstatic.tar + - wget -qO test-data/20170116_143516_mmstatic.tar https://ess.cs.uos.de/.private/dfatool/20170116_143516_mmstatic.tar + - wget -qO test-data/20170116_145420_sharpLS013B4DN.tar https://ess.cs.uos.de/.private/dfatool/20170116_145420_sharpLS013B4DN.tar + - wget -qO test-data/20170116_151348_sharpLS013B4DN.tar https://ess.cs.uos.de/.private/dfatool/20170116_151348_sharpLS013B4DN.tar + - wget -qO test-data/20170220_164723_RF24_int_A.tar https://ess.cs.uos.de/.private/dfatool/20170220_164723_RF24_int_A.tar + - wget -qO test-data/20190815_103347_nRF24_no-rx.json https://ess.cs.uos.de/.private/dfatool/20190815_103347_nRF24_no-rx.json + - wget -qO test-data/20190815_111745_nRF24_no-rx.json https://ess.cs.uos.de/.private/dfatool/20190815_111745_nRF24_no-rx.json + - wget -qO test-data/20190815_122531_nRF24_no-rx.json https://ess.cs.uos.de/.private/dfatool/20190815_122531_nRF24_no-rx.json + - wget -qO test-data/20191024-150723-nrf24l01-var-ack-retry.tar https://ess.cs.uos.de/.private/dfatool/20191024-150723-nrf24l01-var-ack-retry.tar + - wget -qO test-data/20191024-152648-nrf24l01-var-ack.tar https://ess.cs.uos.de/.private/dfatool/20191024-152648-nrf24l01-var-ack.tar + - wget -qO test-data/20200722-113624-timedResistiveLoad.tar https://ess.cs.uos.de/.private/dfatool/20200722-113624-timedResistiveLoad.tar + - wget -qO test-data/20201203-112341-et_la_dco.tar https://ess.cs.uos.de/.private/dfatool/20201203-112341-et_la_dco.tar + - wget -qO test-data/20201203-110526-et_timer_dco.tar https://ess.cs.uos.de/.private/dfatool/20201203-110526-et_timer_dco.tar + - wget -qO test-data/20201203-113313-et_la_hfxt0.tar https://ess.cs.uos.de/.private/dfatool/20201203-113313-et_la_hfxt0.tar + - wget -qO test-data/20201203-114004-et_timer_hfxt0.tar https://ess.cs.uos.de/.private/dfatool/20201203-114004-et_timer_hfxt0.tar - pytest-3 --cov=lib - python3-coverage html -i artifacts: @@ -133,9 +133,12 @@ The following variables may be set to alter the behaviour of dfatool components. | `DFATOOL_LMT_MIN_SAMPLES_LEAF` | 0.0 .. **0.1** .. 1.0, 3 .. *n* | Minimum samples that each leaf of a split candidate must contain. A value below 1.0 specifies a ratio of the total number of training samples. A value above 1 specifies an absolute number of samples. | | `DFATOOL_LMT_MAX_BINS` | 10 .. **120** | Number of bins used to determine optimal split. LMT default: 25. | | `DFATOOL_LMT_CRITERION` | **mse**, rmse, mae, poisson | Error metric to use when selecting best split. | -| `DFATOOL_ULS_ERROR_METRIC` | **ssr**, rmsd, mae, … | Error metric to use when selecting best-fitting function during unsupervised least squares (ULS) regression. Least squares regression itself minimzes root mean square deviation (rmsd), hence the equivalent (but partitioning-compatible) sum of squared residuals (ssr) is the default. Supports all metrics accepted by `--error-metric`. | +| `DFATOOL_ULS_ERROR_METRIC` | **ssr**, rmsd, **mae**, … | Error metric to use when selecting best-fitting function during unsupervised least squares (ULS) regression. By default, least squares regression minimzes root mean square deviation (rmsd), hence the equivalent (but partitioning-compatible) sum of squared residuals (ssr) is the default. If `DFATOOL_ULS_LOSS_FUNCTION` is set to another value than linear, the default is mean absolute error (mae). Supports all metrics accepted by `--error-metric`. | +| `DFATOOL_ULS_FUNCTIONS` | a,b,… | List of function templates to use in ULS. Default: all supported functions. | +| `DFATOOL_ULS_LOSS_FUNCTION` | **linear**', soft\_l1, … | Loss function for least squares fitting, see `scipy.optimize.least_squares#loss` documentation. | | `DFATOOL_ULS_MIN_DISTINCT_VALUES` | 2 .. **3** .. *n* | Minimum number of unique values a parameter must take to be eligible for ULS | | `DFATOOL_ULS_SKIP_CODEPENDENT_CHECK` | **0**, 1 | Do not detect and remove co-dependent features in ULS. | +| `DFATOOL_ULS_MIN_BOUND` | **-∞** .. *n* | Lower bound for ULS regression variables. Setting it to 0 can often be beneficial. | | `DFATOOL_XGB_N_ESTIMATORS` | 1 .. **100** .. *n* | Number of estimators (i.e., trees) for XGBoost. | | `DFATOOL_XGB_MAX_DEPTH` | 2 .. **6** .. *n* | Maximum XGBoost tree depth. | | `DFATOOL_XGB_SUBSAMPLE` | 0.0 .. **1.0** | XGBoost subsampling ratio. | @@ -154,6 +157,8 @@ The following variables may be set to alter the behaviour of dfatool components. | `DFATOOL_RMT_LOSS_IGNORE_SCALAR` | **0**, 1 | Ignore scalar parameters when computing the loss for split node candidates. Instead of computing the loss of a single partition for each `x_i == j`, compute the loss of partitions for `x_i == j` in which non-scalar parameters vary and scalar parameters are constant. This way, scalar parameters do not affect the decision about which non-scalar parameter to use for splitting. | | `DFATOOL_PARAM_CATEGORICAL_TO_SCALAR` | **0**, 1 | Some models (e.g. FOL, sklearn CART, XGBoost) do not support categorical parameters. Ignore them (0) or convert them to scalar indexes (1). Conversion uses lexical order. | | `DFATOOL_FOL_SECOND_ORDER` | **0**, 1 | Add second-order components (interaction of feature pairs) to first-order linear function. | +| `DFATOOL_CSV_IGNORE` | *str1,str2,...* | Ignore the listed fields when loading CSV log files. | +| `DFATOOL_CSV_OBSERVATIONS` | *str1,str2,...* | Treat the listed fields as observations rather than features. | ## Examples @@ -174,3 +179,11 @@ The NFP values should be exactly as described by the selected configuration opti * [Kconfig](https://ess.cs.uos.de/git-build/dfatool/master/x264.kconfig) * [CART](https://ess.cs.uos.de/git-build/dfatool/master/x264-cart.json) * [RMT](https://ess.cs.uos.de/git-build/dfatool/master/x264-rmt.json) + +## References + +Mirrors of this repository are maintained at the following locations: + +* [ESS](https://ess.cs.uos.de/git/software/dfatool) +* [finalrewind.org](https://git.finalrewind.org/dfatool/) +* [GitHub](https://github.com/derf/dfatool) diff --git a/bin/analyze-archive.py b/bin/analyze-archive.py index 53a8c37..0587ce1 100755 --- a/bin/analyze-archive.py +++ b/bin/analyze-archive.py @@ -426,7 +426,9 @@ def main(): dfatool.cli.print_info_by_name(model, by_name) if args.export_csv_unparam: - dfatool.cli.export_csv_unparam(model, args.export_csv_unparam) + dfatool.cli.export_csv_unparam( + model, args.export_csv_unparam, dialect=args.export_csv_dialect + ) if args.export_pgf_unparam: dfatool.cli.export_pgf_unparam(model, args.export_pgf_unparam) diff --git a/bin/analyze-kconfig.py b/bin/analyze-kconfig.py index d148641..8401e68 100755 --- a/bin/analyze-kconfig.py +++ b/bin/analyze-kconfig.py @@ -19,7 +19,6 @@ import time import numpy as np import dfatool.cli -import dfatool.plotter import dfatool.utils import dfatool.functions as df from dfatool.loader.kconfig import KConfigAttributes @@ -354,7 +353,9 @@ def main(): dfatool.cli.print_info_by_name(model, by_name) if args.export_csv_unparam: - dfatool.cli.export_csv_unparam(model, args.export_csv_unparam) + dfatool.cli.export_csv_unparam( + model, args.export_csv_unparam, dialect=args.export_csv_dialect + ) if args.export_pgf_unparam: dfatool.cli.export_pgf_unparam(model, args.export_pgf_unparam) @@ -363,10 +364,12 @@ def main(): dfatool.cli.export_json_unparam(model, args.export_json_unparam) if args.plot_unparam: + import dfatool.plotter as dp + for kv in args.plot_unparam.split(";"): state_or_trans, attribute, ylabel = kv.split(":") fname = "param_y_{}_{}.pdf".format(state_or_trans, attribute) - dfatool.plotter.plot_y( + dp.plot_y( model.by_name[state_or_trans][attribute], xlabel="measurement #", ylabel=ylabel, @@ -375,6 +378,8 @@ def main(): ) if args.boxplot_unparam: + import dfatool.plotter as dp + title = None if args.filter_param: title = "filter: " + ", ".join( @@ -382,7 +387,7 @@ def main(): ) for name in model.names: attr_names = sorted(model.attributes(name)) - dfatool.plotter.boxplot( + dp.boxplot( attr_names, [model.by_name[name][attr] for attr in attr_names], xlabel="Attribute", @@ -391,7 +396,7 @@ def main(): show=not args.non_interactive, ) for attribute in attr_names: - dfatool.plotter.boxplot( + dp.boxplot( [attribute], [model.by_name[name][attribute]], output=f"{args.boxplot_unparam}{name}-{attribute}.pdf", @@ -403,6 +408,8 @@ def main(): dfatool.cli.boxplot_param(args, model) if args.plot_param: + import dfatool.plotter as dp + for kv in args.plot_param.split(";"): try: state_or_trans, attribute, param_name, *function = kv.split(":") @@ -416,7 +423,7 @@ def main(): function = gplearn_to_function(" ".join(function)) else: function = None - dfatool.plotter.plot_param( + dp.plot_param( model, state_or_trans, attribute, @@ -568,6 +575,10 @@ def main(): json_model[attribute] = data.copy() if nfpkeys: json_model[attribute].update(nfpkeys[name][attribute]) + if "paramValueToIndex" in json_model[attribute]["modelFunction"]: + json_model[attribute]["paramValueToIndex"] = json_model[attribute][ + "modelFunction" + ].pop("paramValueToIndex") out_model = { "model": json_model, "modelType": "dfatool-kconfig", diff --git a/bin/analyze-log.py b/bin/analyze-log.py index dd32fab..50b5648 100755 --- a/bin/analyze-log.py +++ b/bin/analyze-log.py @@ -46,6 +46,11 @@ def main(): "--export-model", metavar="FILE", type=str, help="Export JSON model to FILE" ) parser.add_argument( + "--export-model-with-lut", + action="store_true", + help="Include LUT in model export", + ) + parser.add_argument( "logfiles", nargs="+", type=str, @@ -65,18 +70,17 @@ def main(): style="{", ) - if args.filter_observation: - args.filter_observation = list( - map(lambda x: tuple(x.split(":")), args.filter_observation.split(",")) - ) - observations = reduce(lambda a, b: a + b, map(parse_logfile, args.logfiles)) by_name, parameter_names = dfatool.utils.observations_to_by_name(observations) del observations if args.ignore_param: args.ignore_param = args.ignore_param.split(",") - dfatool.utils.ignore_param(by_name, parameter_names, args.ignore_param) + + if args.filter_observation: + args.filter_observation = list( + map(lambda x: tuple(x.split(":")), args.filter_observation.split(",")) + ) if args.filter_param: args.filter_param = list( @@ -92,6 +96,7 @@ def main(): dfatool.utils.filter_aggregate_by_param(by_name, parameter_names, args.filter_param) dfatool.utils.filter_aggregate_by_observation(by_name, args.filter_observation) + dfatool.utils.ignore_param(by_name, parameter_names, args.ignore_param) if args.param_shift: param_shift = dfatool.cli.parse_param_shift(args.param_shift) @@ -124,8 +129,13 @@ def main(): if args.info: dfatool.cli.print_info_by_name(model, by_name) + if args.information_gain: + dfatool.cli.print_information_gain_by_name(model, by_name) + if args.export_csv_unparam: - dfatool.cli.export_csv_unparam(model, args.export_csv_unparam) + dfatool.cli.export_csv_unparam( + model, args.export_csv_unparam, dialect=args.export_csv_dialect + ) if args.export_pgf_unparam: dfatool.cli.export_pgf_unparam(model, args.export_pgf_unparam) @@ -199,7 +209,7 @@ def main(): lut_quality = None else: ts = time.time() - lut_quality = model.assess(lut_model) + lut_quality = model.assess(lut_model, with_sum=args.add_total_observation) timing["assess lut"] = time.time() - ts ts = time.time() @@ -209,22 +219,36 @@ def main(): ts = time.time() if xv_method == "montecarlo": static_quality, _ = xv.montecarlo( - lambda m: m.get_static(), xv_count, static=True + lambda m: m.get_static(), + xv_count, + static=True, + with_sum=args.add_total_observation, ) xv.export_filename = args.export_xv - analytic_quality, _ = xv.montecarlo(lambda m: m.get_fitted()[0], xv_count) + analytic_quality, _ = xv.montecarlo( + lambda m: m.get_fitted()[0], xv_count, with_sum=args.add_total_observation + ) elif xv_method == "kfold": - static_quality, _ = xv.kfold(lambda m: m.get_static(), xv_count, static=True) + static_quality, _ = xv.kfold( + lambda m: m.get_static(), + xv_count, + static=True, + with_sum=args.add_total_observation, + ) xv.export_filename = args.export_xv - analytic_quality, _ = xv.kfold(lambda m: m.get_fitted()[0], xv_count) + analytic_quality, _ = xv.kfold( + lambda m: m.get_fitted()[0], xv_count, with_sum=args.add_total_observation + ) else: - static_quality = model.assess(static_model) + static_quality = model.assess(static_model, with_sum=args.add_total_observation) if args.export_raw_predictions: analytic_quality, raw_results = model.assess(param_model, return_raw=True) with open(args.export_raw_predictions, "w") as f: json.dump(raw_results, f, cls=dfatool.utils.NpEncoder) else: - analytic_quality = model.assess(param_model) + analytic_quality = model.assess( + param_model, with_sum=args.add_total_observation + ) timing["assess model"] = time.time() - ts if "static" in args.show_model or "all" in args.show_model: @@ -237,6 +261,7 @@ def main(): name, attribute, with_dependence="all" in args.show_model, + precision=args.show_model_precision, ) if "param" in args.show_model or "all" in args.show_model: @@ -244,7 +269,11 @@ def main(): for name in sorted(model.names): for attribute in sorted(model.attributes(name)): info = param_info(name, attribute) - dfatool.cli.print_model(f"{name:10s} {attribute:15s}", info) + dfatool.cli.print_model( + f"{name:10s} {attribute:15s}", + info, + precision=args.show_model_precision, + ) if args.show_model_error: dfatool.cli.model_quality_table( @@ -262,8 +291,8 @@ def main(): dfatool.cli.print_model_complexity(model) if args.export_model: - print(f"Exportding model to {args.export_model}") - json_model = model.to_json() + print(f"Exporting model to {args.export_model}") + json_model = model.to_json(with_by_param=args.export_model_with_lut) with open(args.export_model, "w") as f: json.dump( json_model, f, indent=2, sort_keys=True, cls=dfatool.utils.NpEncoder @@ -272,13 +301,34 @@ def main(): if args.export_dot: dfatool.cli.export_dot(model, args.export_dot) - if args.export_dref: - dref = model.to_dref(static_quality, lut_quality, analytic_quality) + if args.export_dref or args.export_pseudo_dref: + dref = model.to_dref( + static_quality, + lut_quality, + analytic_quality, + with_sum=args.add_total_observation, + ) for key, value in timing.items(): dref[f"timing/{key}"] = (value, r"\second") - dfatool.cli.export_dataref( - args.export_dref, dref, precision=args.dref_precision - ) + + if args.information_gain: + for name in model.names: + for attr in model.attributes(name): + mutual_information = model.mutual_information(name, attr) + for param in model.parameters: + if param in mutual_information: + dref[f"mutual information/{name}/{attr}/{param}"] = ( + mutual_information[param] + ) + + if args.export_pseudo_dref: + dfatool.cli.export_pseudo_dref( + args.export_pseudo_dref, dref, precision=args.dref_precision + ) + if args.export_dref: + dfatool.cli.export_dataref( + args.export_dref, dref, precision=args.dref_precision + ) if args.export_json: with open(args.export_json, "w") as f: diff --git a/bin/analyze-trace.py b/bin/analyze-trace.py new file mode 100755 index 0000000..1cc3b89 --- /dev/null +++ b/bin/analyze-trace.py @@ -0,0 +1,471 @@ +#!/usr/bin/env python3 + +""" +analyze-trace - Generate a performance-aware behaviour model from log files + +foo +""" + +import argparse +import dfatool.cli +import dfatool.plotter +import dfatool.utils +import dfatool.functions as df +from dfatool.behaviour import SDKBehaviourModel +from dfatool.loader import Logfile +from dfatool.model import AnalyticModel +from dfatool.validation import CrossValidator +from functools import reduce +import logging +import json +import re +import sys +import time + + +def parse_logfile(filename): + loader = Logfile() + + if filename.endswith("xz"): + import lzma + + with lzma.open(filename, "rt") as f: + return loader.load(f, is_trace=True) + with open(filename, "r") as f: + return loader.load(f, is_trace=True) + + +def join_annotations(ref, base, new): + offset = len(ref) + return base + list(map(lambda x: x.apply_offset(offset), new)) + + +def main(): + timing = dict() + parser = argparse.ArgumentParser( + formatter_class=argparse.RawDescriptionHelpFormatter, description=__doc__ + ) + dfatool.cli.add_standard_arguments(parser) + parser.add_argument( + "logfiles", + nargs="+", + type=str, + help="Path to benchmark output (.txt or .txt.xz)", + ) + args = parser.parse_args() + dfatool.cli.sanity_check(args) + + if args.log_level: + numeric_level = getattr(logging, args.log_level.upper(), None) + if not isinstance(numeric_level, int): + print(f"Invalid log level: {args.log_level}", file=sys.stderr) + sys.exit(1) + logging.basicConfig( + level=numeric_level, + format="{asctime} {levelname}:{name}:{message}", + style="{", + ) + + observations, annotations = reduce( + lambda a, b: (a[0] + b[0], join_annotations(a[0], a[1], b[1])), + map(parse_logfile, args.logfiles), + ) + + bm = SDKBehaviourModel(observations, annotations) + observations += bm.meta_observations + is_loop = bm.is_loop + am_tt_param_names = bm.am_tt_param_names + delta_by_name = bm.delta_by_name + delta_param_by_name = bm.delta_param_by_name + + def format_guard(guard): + return "∧".join(map(lambda kv: f"{kv[0]}={kv[1]}", guard)) + + for name in sorted(delta_by_name.keys()): + for t_from, t_to_set in delta_by_name[name].items(): + i_to_transition = dict() + delta_param_sets = list() + to_names = list() + transition_guard = dict() + + for t_to in sorted(t_to_set): + delta_params = delta_param_by_name[name][(t_from, t_to)] + delta_param_sets.append(delta_params) + to_names.append(t_to) + n_confs = len(delta_params) + if is_loop.get(t_from, False) and is_loop.get(t_to, False): + print(f"{name} {t_from} → {t_to} ⟳") + elif is_loop.get(t_from, False): + print(f"{name} {t_from} → {t_to} →") + else: + print( + f"{name} {t_from} → {t_to} ({' ∨ '.join(map(format_guard, bm.transition_guard[t_from].get(t_to, list()))) or '⊤'})" + ) + + for i in range(len(delta_param_sets)): + for j in range(i + 1, len(delta_param_sets)): + if not delta_param_sets[i].isdisjoint(delta_param_sets[j]): + intersection = delta_param_sets[i].intersection( + delta_param_sets[j] + ) + if is_loop.get(t_from, False): + logging.debug( + f"Loop transition <{t_from}>: <{to_names[i]}> and <{to_names[j]}> are both taken for {intersection}" + ) + else: + logging.error( + f"Outbound transitions of <{t_from}> are not deterministic: <{to_names[i]}> and <{to_names[j]}> are both taken for {intersection}" + ) + raise RuntimeError( + f"Outbound transitions of <{t_from}> are not deterministic" + ) + + print("") + + by_name, parameter_names = dfatool.utils.observations_to_by_name(observations) + del observations + + if args.ignore_param: + args.ignore_param = args.ignore_param.split(",") + + if args.filter_observation: + args.filter_observation = list( + map(lambda x: tuple(x.split(":")), args.filter_observation.split(",")) + ) + + if args.filter_param: + args.filter_param = list( + map( + lambda entry: dfatool.cli.parse_filter_string( + entry, parameter_names=parameter_names + ), + args.filter_param.split(";"), + ) + ) + else: + args.filter_param = list() + + dfatool.utils.filter_aggregate_by_param(by_name, parameter_names, args.filter_param) + dfatool.utils.filter_aggregate_by_observation(by_name, args.filter_observation) + dfatool.utils.ignore_param(by_name, parameter_names, args.ignore_param) + + if args.param_shift: + param_shift = dfatool.cli.parse_param_shift(args.param_shift) + dfatool.utils.shift_param_in_aggregate(by_name, parameter_names, param_shift) + + if args.normalize_nfp: + norm = dfatool.cli.parse_nfp_normalization(args.normalize_nfp) + dfatool.utils.normalize_nfp_in_aggregate(by_name, norm) + + function_override = dict() + if args.function_override: + for function_desc in args.function_override.split(";"): + state_or_tran, attribute, function_str = function_desc.split(":") + function_override[(state_or_tran, attribute)] = function_str + + ts = time.time() + if args.load_json: + with open(args.load_json, "r") as f: + model = AnalyticModel.from_json(json.load(f), by_name, parameter_names) + else: + model = AnalyticModel( + by_name, + parameter_names, + force_tree=args.force_tree, + compute_stats=not args.skip_param_stats, + function_override=function_override, + ) + timing["AnalyticModel"] = time.time() - ts + + if args.info: + dfatool.cli.print_info_by_name(model, by_name) + + if args.information_gain: + dfatool.cli.print_information_gain_by_name(model, by_name) + + if args.export_csv_unparam: + dfatool.cli.export_csv_unparam( + model, args.export_csv_unparam, dialect=args.export_csv_dialect + ) + + if args.export_pgf_unparam: + dfatool.cli.export_pgf_unparam(model, args.export_pgf_unparam) + + if args.export_json_unparam: + dfatool.cli.export_json_unparam(model, args.export_json_unparam) + + if args.plot_unparam: + for kv in args.plot_unparam.split(";"): + state_or_trans, attribute, ylabel = kv.split(":") + fname = "param_y_{}_{}.pdf".format(state_or_trans, attribute) + dfatool.plotter.plot_y( + model.by_name[state_or_trans][attribute], + xlabel="measurement #", + ylabel=ylabel, + # output=fname, + show=not args.non_interactive, + ) + + if args.boxplot_unparam: + title = None + if args.filter_param: + title = "filter: " + ", ".join( + map(lambda kv: f"{kv[0]}={kv[1]}", args.filter_param) + ) + for name in model.names: + attr_names = sorted(model.attributes(name)) + dfatool.plotter.boxplot( + attr_names, + [model.by_name[name][attr] for attr in attr_names], + xlabel="Attribute", + output=f"{args.boxplot_unparam}{name}.pdf", + title=title, + show=not args.non_interactive, + ) + for attribute in attr_names: + dfatool.plotter.boxplot( + [attribute], + [model.by_name[name][attribute]], + output=f"{args.boxplot_unparam}{name}-{attribute}.pdf", + title=title, + show=not args.non_interactive, + ) + + if args.boxplot_param: + dfatool.cli.boxplot_param(args, model) + + if args.cross_validate: + xv_method, xv_count = args.cross_validate.split(":") + xv_count = int(xv_count) + xv = CrossValidator( + AnalyticModel, + by_name, + parameter_names, + force_tree=args.force_tree, + compute_stats=not args.skip_param_stats, + show_progress=args.progress, + ) + xv.parameter_aware = args.parameter_aware_cross_validation + else: + xv_method = None + xv_count = None + + static_model = model.get_static() + + ts = time.time() + lut_model = model.get_param_lut() + timing["get lut"] = time.time() - ts + + if lut_model is None: + lut_quality = None + else: + ts = time.time() + lut_quality = model.assess(lut_model, with_sum=args.add_total_observation) + timing["assess lut"] = time.time() - ts + + ts = time.time() + param_model, param_info = model.get_fitted() + timing["get model"] = time.time() - ts + + ts = time.time() + if xv_method == "montecarlo": + static_quality, _ = xv.montecarlo( + lambda m: m.get_static(), + xv_count, + static=True, + with_sum=args.add_total_observation, + ) + xv.export_filename = args.export_xv + analytic_quality, _ = xv.montecarlo( + lambda m: m.get_fitted()[0], xv_count, with_sum=args.add_total_observation + ) + elif xv_method == "kfold": + static_quality, _ = xv.kfold( + lambda m: m.get_static(), + xv_count, + static=True, + with_sum=args.add_total_observation, + ) + xv.export_filename = args.export_xv + analytic_quality, _ = xv.kfold( + lambda m: m.get_fitted()[0], xv_count, with_sum=args.add_total_observation + ) + else: + static_quality = model.assess(static_model, with_sum=args.add_total_observation) + if args.export_raw_predictions: + analytic_quality, raw_results = model.assess(param_model, return_raw=True) + with open(args.export_raw_predictions, "w") as f: + json.dump(raw_results, f, cls=dfatool.utils.NpEncoder) + else: + analytic_quality = model.assess( + param_model, with_sum=args.add_total_observation + ) + timing["assess model"] = time.time() - ts + + if "paramdetection" in args.show_model or "all" in args.show_model: + for name in model.names: + for attribute in model.attributes(name): + info = param_info(name, attribute) + print( + "{:10s} {:10s} non-param stddev {:f}".format( + name, + attribute, + model.attr_by_name[name][attribute].stats.std_static, + ) + ) + print( + "{:10s} {:10s} param-lut stddev {:f}".format( + name, + attribute, + model.attr_by_name[name][attribute].stats.std_param_lut, + ) + ) + for param in sorted( + model.attr_by_name[name][attribute].stats.std_by_param.keys() + ): + print( + "{:10s} {:10s} {:10s} stddev {:f}".format( + name, + attribute, + param, + model.attr_by_name[name][attribute].stats.std_by_param[ + param + ], + ) + ) + for arg_index in range(model.attr_by_name[name][attribute].arg_count): + print( + "{:10s} {:10s} {:10s} stddev {:f}".format( + name, + attribute, + f"arg{arg_index}", + model.attr_by_name[name][attribute].stats.std_by_arg[ + arg_index + ], + ) + ) + if type(info) is df.AnalyticFunction: + for param_name in sorted(info.fit_by_param.keys(), key=str): + param_fit = info.fit_by_param[param_name]["results"] + for function_type in sorted(param_fit.keys()): + function_rmsd = param_fit[function_type]["rmsd"] + print( + "{:10s} {:10s} {:10s} mean {:10s} RMSD {:.0f}".format( + name, + attribute, + str(param_name), + function_type, + function_rmsd, + ) + ) + + if "static" in args.show_model or "all" in args.show_model: + print("--- static model ---") + for name in sorted(model.names): + for attribute in sorted(model.attributes(name)): + dfatool.cli.print_static( + model, + static_model, + name, + attribute, + with_dependence="all" in args.show_model, + precision=args.show_model_precision, + ) + + if "param" in args.show_model or "all" in args.show_model: + print("--- param model ---") + for name in sorted(model.names): + for attribute in sorted(model.attributes(name)): + info = param_info(name, attribute) + dfatool.cli.print_model( + f"{name:10s} {attribute:15s}", + info, + precision=args.show_model_precision, + ) + + if args.show_model_error: + dfatool.cli.model_quality_table( + lut=lut_quality, + model=analytic_quality, + static=static_quality, + model_info=param_info, + xv_method=xv_method, + xv_count=xv_count, + error_metric=args.error_metric, + load_model=args.load_json, + ) + + if args.show_model_complexity: + dfatool.cli.print_model_complexity(model) + + if args.export_dot: + dfatool.cli.export_dot(model, args.export_dot) + + if args.export_dref or args.export_pseudo_dref: + dref = model.to_dref( + static_quality, + lut_quality, + analytic_quality, + with_sum=args.add_total_observation, + ) + for key, value in timing.items(): + dref[f"timing/{key}"] = (value, r"\second") + + if args.information_gain: + for name in model.names: + for attr in model.attributes(name): + mutual_information = model.mutual_information(name, attr) + for param in model.parameters: + if param in mutual_information: + dref[f"mutual information/{name}/{attr}/{param}"] = ( + mutual_information[param] + ) + + if args.export_pseudo_dref: + dfatool.cli.export_pseudo_dref( + args.export_pseudo_dref, dref, precision=args.dref_precision + ) + if args.export_dref: + dfatool.cli.export_dataref( + args.export_dref, dref, precision=args.dref_precision + ) + + if args.export_json: + with open(args.export_json, "w") as f: + json.dump( + model.to_json( + static_error=static_quality, + lut_error=lut_quality, + model_error=analytic_quality, + ), + f, + sort_keys=True, + cls=dfatool.utils.NpEncoder, + indent=2, + ) + + if args.plot_param: + for kv in args.plot_param.split(";"): + try: + state_or_trans, attribute, param_name = kv.split(":") + except ValueError: + print( + "Usage: --plot-param='state_or_trans:attribute:param_name'", + file=sys.stderr, + ) + sys.exit(1) + dfatool.plotter.plot_param( + model, + state_or_trans, + attribute, + model.param_index(param_name), + title=state_or_trans, + ylabel=attribute, + xlabel=param_name, + output=f"{state_or_trans}-{attribute}-{param_name}.pdf", + show=not args.non_interactive, + ) + + +if __name__ == "__main__": + main() diff --git a/bin/extract-kernel-ws.py b/bin/extract-kernel-ws.py new file mode 100755 index 0000000..9f263e2 --- /dev/null +++ b/bin/extract-kernel-ws.py @@ -0,0 +1,166 @@ +#!/usr/bin/env python3 + +import argparse +import numpy as np +import sys +import logging + + +def main(): + parser = argparse.ArgumentParser( + formatter_class=argparse.RawDescriptionHelpFormatter, description=__doc__ + ) + parser.add_argument( + "--log-level", + metavar="LEVEL", + choices=["debug", "info", "warning", "error"], + default="warning", + help="Set log level", + ) + parser.add_argument( + "--output-format", + metavar="FORMAT", + choices=["dfatool", "valgrind-ws"], + default="dfatool", + help="Set output format", + ) + parser.add_argument( + "benchmark_file", + type=str, + help="Benchmark file used to run valgrind-ws", + ) + parser.add_argument( + "ws_output", + type=str, + help="valgrind-ws output file", + ) + + args = parser.parse_args() + benchmark_filename = args.benchmark_file.split("/")[-1] + + if args.log_level: + numeric_level = getattr(logging, args.log_level.upper(), None) + if not isinstance(numeric_level, int): + print(f"Invalid log level: {args.log_level}", file=sys.stderr) + sys.exit(1) + logging.basicConfig( + level=numeric_level, + format="{asctime} {levelname}:{name}:{message}", + style="{", + ) + + with open(args.benchmark_file, "r") as f: + start_range = [None, None] + end_range = [None, None] + in_nop = False + for lineno, line in enumerate(f): + line = line.strip() + if line == "#if NOP_SYNC": + in_nop = True + if start_range[0] is None: + start_range[0] = lineno + else: + end_range[0] = lineno + if in_nop and line.startswith("#endif"): + in_nop = False + if start_range[1] is None: + start_range[1] = lineno + else: + end_range[1] = lineno + + logging.debug(f"start_range = {start_range}, end_range = {end_range}") + + page_size = None + ws_log = list() + sample_info = dict() + with open(args.ws_output, "r") as f: + in_ws_log = False + in_sample_info = False + for line in f: + line = line.strip() + if in_ws_log and line == "": + in_ws_log = False + if in_sample_info and line == "": + in_sample_info = False + if page_size is None and line.startswith("Page size:"): + page_size = int(line.split()[2]) + + if in_ws_log: + t, wss_i, wss_d, info_ref = line.split() + ws_log.append((int(t), int(wss_i), int(wss_d), info_ref)) + elif in_sample_info: + _, info_ref, _, locs = line.split() + info_ref = info_ref.removesuffix("]") + locs = locs.removeprefix("loc=") + sample_info[info_ref] = list() + for loc in filter(lambda x: len(x), locs.split("|")): + filename, lineno = loc.split(":") + sample_info[info_ref].append((filename, int(lineno))) + + if line == "t WSS_insn WSS_data info": + in_ws_log = True + if line == "Sample info:": + in_sample_info = True + + if page_size is None: + raise RuntimeError("Unable to determine page size fom {args.ws_output}") + + logging.debug(f"sample_info = {sample_info}") + next_in_kernel = False + in_kernel = False + insn_working_set_sizes = list() + data_working_set_sizes = list() + kernel_range = [None, None] + for t, wss_i, wss_d, info_ref in ws_log: + if next_in_kernel: + next_in_kernel = False + in_kernel = True + kernel_range[0] = t + + if info_ref != "-": + for filename, lineno in sample_info[info_ref]: + if ( + filename == benchmark_filename + and start_range[0] <= lineno <= start_range[1] + ): + next_in_kernel = True + elif ( + filename == benchmark_filename + and end_range[0] <= lineno <= end_range[1] + ): + in_kernel = False + + if in_kernel: + data_working_set_sizes.append(wss_d * page_size) + insn_working_set_sizes.append(wss_i * page_size) + kernel_range[1] = t + + if args.output_format == "dfatool": + print( + f"wss_data_mean_bytes={np.mean(data_working_set_sizes)}" + + f" wss_data_median_bytes={np.median(data_working_set_sizes)}" + + f" wss_data_stddev={np.std(data_working_set_sizes)}" + + f" wss_insn_mean_bytes={np.mean(insn_working_set_sizes)}" + + f" wss_insn_median_bytes={np.median(insn_working_set_sizes)}" + + f" wss_insn_stddev={np.std(insn_working_set_sizes)}" + ) + elif args.output_format == "valgrind-ws": + with open(args.ws_output, "r") as f: + in_ws_log = False + for line in f: + if in_ws_log and line.strip() == "": + in_ws_log = False + + if in_ws_log: + ts = int(line.strip().split()[0]) + if kernel_range[0] <= ts <= kernel_range[1]: + print(line, end="") + else: + print(line, end="") + + if line.strip() == "t WSS_insn WSS_data info": + in_ws_log = True + + +if __name__ == "__main__": + main() diff --git a/bin/extract-speedup-from-log.py b/bin/extract-speedup-from-log.py new file mode 100755 index 0000000..3537ec3 --- /dev/null +++ b/bin/extract-speedup-from-log.py @@ -0,0 +1,197 @@ +#!/usr/bin/env python3 + +"""extract-speedup-from-log - Determine speedup from dfatool log files + +foo +""" + +import argparse +import dfatool.cli +import dfatool.utils +import logging +import numpy as np +import sys +from dfatool.loader import Logfile, CSVfile +from dfatool.model import AnalyticModel +from functools import reduce + + +def parse_logfile(filename): + if ".csv" in filename: + loader = CSVfile() + else: + loader = Logfile() + + if filename.endswith("xz"): + import lzma + + with lzma.open(filename, "rt") as f: + return loader.load(f) + with open(filename, "r") as f: + return loader.load(f) + + +def main(): + parser = argparse.ArgumentParser( + formatter_class=argparse.RawDescriptionHelpFormatter, description=__doc__ + ) + parser.add_argument( + "--add-param", + metavar="<param>=<value>[ <param>=<value> ...]", + type=str, + help="Add additional parameter specifications to output lines", + ) + parser.add_argument( + "--filter-param", + metavar="<parameter name><condition>[;<parameter name><condition>...]", + type=str, + help="Only consider measurements where <parameter name> satisfies <condition>. " + "<condition> may be <operator><parameter value> with operator being < / <= / = / >= / >, " + "or ∈<parameter value>[,<parameter value>...]. " + "All other measurements (including those where it is None, that is, has not been set yet) are discarded. " + "Note that this may remove entire function calls from the model.", + ) + parser.add_argument( + "--ignore-param", + metavar="<parameter name>[,<parameter name>,...]", + type=str, + help="Ignore listed parameters during model generation", + ) + parser.add_argument( + "--log-level", + metavar="LEVEL", + choices=["debug", "info", "warning", "error"], + default="warning", + help="Set log level", + ) + parser.add_argument( + "numerator", + type=str, + help="numerator parameters", + ) + parser.add_argument( + "denominator", + type=str, + help="denominator parameters", + ) + parser.add_argument( + "observation", + type=str, + help="observation (key:attribute) used for speedup calculation", + ) + parser.add_argument( + "logfiles", + nargs="+", + type=str, + help="Path to benchmark output (.txt or .txt.xz)", + ) + args = parser.parse_args() + + if args.log_level: + numeric_level = getattr(logging, args.log_level.upper(), None) + if not isinstance(numeric_level, int): + print(f"Invalid log level: {args.log_level}", file=sys.stderr) + sys.exit(1) + logging.basicConfig( + level=numeric_level, + format="{asctime} {levelname}:{name}:{message}", + style="{", + ) + + observations = reduce(lambda a, b: a + b, map(parse_logfile, args.logfiles)) + by_name_num, parameter_names_num = dfatool.utils.observations_to_by_name( + observations + ) + by_name_denom, parameter_names_denom = dfatool.utils.observations_to_by_name( + observations + ) + del observations + + if args.filter_param: + args.filter_param = list( + map( + lambda entry: dfatool.cli.parse_filter_string( + entry, parameter_names=parameter_names_num + ), + args.filter_param.split(";"), + ) + ) + else: + args.filter_param = list() + + filter_num = list( + map( + lambda entry: dfatool.cli.parse_filter_string( + entry, parameter_names=parameter_names_num + ), + args.numerator.split(";"), + ) + ) + + filter_denom = list( + map( + lambda entry: dfatool.cli.parse_filter_string( + entry, parameter_names=parameter_names_denom + ), + args.denominator.split(";"), + ) + ) + + filter_num += args.filter_param + filter_denom += args.filter_param + + ignore_num = list(map(lambda x: x[0], filter_num)) + ignore_denom = list(map(lambda x: x[0], filter_denom)) + assert ignore_num == ignore_denom + + if args.ignore_param: + args.ignore_param = args.ignore_param.split(";") + ignore_num += args.ignore_param + ignore_denom += args.ignore_param + + dfatool.utils.filter_aggregate_by_param( + by_name_num, parameter_names_num, filter_num + ) + dfatool.utils.filter_aggregate_by_param( + by_name_denom, parameter_names_denom, filter_denom + ) + dfatool.utils.ignore_param(by_name_num, parameter_names_num, ignore_num) + dfatool.utils.ignore_param(by_name_denom, parameter_names_denom, ignore_denom) + + model_num = AnalyticModel( + by_name_num, + parameter_names_num, + compute_stats=False, + ) + + model_denom = AnalyticModel( + by_name_denom, + parameter_names_denom, + compute_stats=False, + ) + + for param_key in model_num.get_by_param().keys(): + name, params = param_key + num_data = model_num.get_by_param().get(param_key).get(args.observation) + try: + denom_data = model_denom.get_by_param().get(param_key).get(args.observation) + except AttributeError: + logging.error(f"Cannot find numerator param {param_key} in denominator") + logging.error(f"Parameter names == {tuple(parameter_names_num)}") + logging.error("You may need to adjust --ignore-param") + sys.exit(1) + if num_data and denom_data: + param_str = " ".join( + map( + lambda i: f"{parameter_names_num[i]}={params[i]}", + range(len(params)), + ) + ) + if args.add_param is not None: + param_str += " " + args.add_param + for speedup in np.array(num_data) / np.array(denom_data): + print(f"[::] {name} | {param_str} | speedup={speedup}") + + +if __name__ == "__main__": + main() diff --git a/bin/kstest.py b/bin/kstest.py new file mode 100755 index 0000000..61951eb --- /dev/null +++ b/bin/kstest.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python3 + +import json +import sys +from scipy.stats import kstest + + +def main(pvalue, file1, file2, macro=None): + with open(file1, "r") as f: + data1 = json.load(f) + with open(file2, "r") as f: + data2 = json.load(f) + result = kstest(data1, data2) + + pvalue = float(pvalue) + + print(f"% {result}") + if macro is None: + print(r"\drefset{ttest/pvalue}{" + str(result.pvalue) + "}") + if result.pvalue < pvalue: + if macro: + print("\\def\\" + macro + "{$p < " + f"{pvalue:0.2f}" + "$}") + sys.exit(0) + else: + if macro: + print("\\def\\" + macro + "{$p \\ge " + f"{pvalue:0.2f}" + "$}") + sys.exit(1) + + +if __name__ == "__main__": + main(*sys.argv[1:]) diff --git a/bin/perf-stat-to-dfatool.py b/bin/perf-stat-to-dfatool.py new file mode 100755 index 0000000..01f568f --- /dev/null +++ b/bin/perf-stat-to-dfatool.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python3 + +import argparse +import json +import sys + + +def main(): + metric = dict() + for line in sys.stdin: + line = line.strip() + data = json.loads(line) + + count = int(float(data["counter-value"])) + label = data["event"] + + if data["metric-unit"] != "(null)": + extra = float(data["metric-value"]) + extra_label = data["metric-unit"] + else: + extra = None + extra_label = None + + metric[label] = (count, extra, extra_label) + + buf = "" + for key in sorted(metric.keys()): + count, extra, extra_label = metric[key] + buf += f" {key}={count}" + if extra_label is not None: + if extra_label.startswith("of all"): + label = extra_label.replace(" ", "-") + buf += f" {key}-percentage-{label}={extra}" + else: + buf += f" {key}-metric={extra}" + print(buf) + + +if __name__ == "__main__": + main() diff --git a/bin/pta-workload.py b/bin/pta-workload.py new file mode 100755 index 0000000..19a7378 --- /dev/null +++ b/bin/pta-workload.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python3 + +import sys +from dfatool.automata import PTA +from dfatool.utils import human_readable +from dfatool.lex import TimedSequence, TimedWord, Workload + +args = sys.argv[1:] + +loops = dict() +ptafiles = list() +loop_names = set() + + +def simulate_word(timedword): + prev_state = "UNINITIALIZED" + prev_param = None + ret = dict() + for trace_part in timedword: + print("Trace Part {}".format(trace_part)) + if type(trace_part) is TimedWord: + result = pta.simulate( + trace_part, orig_state=prev_state, orig_param=prev_param + ) + elif type(trace_part) is Workload: + result = pta.simulate( + trace_part.word, orig_state=prev_state, orig_param=prev_param + ) + if prev_state != result.end_state: + print( + "Warning: loop starts in state {}, but terminates in {}".format( + prev_state, result.end_state.name + ) + ) + if prev_param != result.parameters: + print( + "Warning: loop starts with parameters {}, but terminates with {}".format( + prev_param, result.parameters + ) + ) + ret[trace_part.name] = result + loop_names.add(trace_part.name) + + print(" Duration: " + human_readable(result.duration, "s")) + if result.duration_mae: + print( + u" ± {} / {:.0f}%".format( + human_readable(result.duration_mae, "s"), result.duration_mape + ) + ) + print(" Energy: " + human_readable(result.energy, "J")) + if result.energy_mae: + print( + u" ± {} / {:.0f}%".format( + human_readable(result.energy_mae, "J"), result.energy_mape + ) + ) + print(" Mean Power: " + human_readable(result.mean_power, "W")) + print("") + + prev_state = result.end_state + prev_param = result.parameters + + return ret + + +for i in range(len(args) // 2): + ptafile, raw_word = args[i * 2], args[i * 2 + 1] + ptafiles.append(ptafile) + pta = PTA.from_file(ptafile) + timedword = TimedSequence(raw_word) + print("Input: {}\n".format(timedword)) + loops[ptafile] = simulate_word(timedword) + +for loop_name in sorted(loop_names): + result_set = list() + total_power = 0 + for ptafile in sorted(ptafiles): + if loop_name in loops[ptafile]: + result_set.append(loops[ptafile][loop_name]) + total_power += loops[ptafile][loop_name].mean_power + print( + "{}: total mean power is {}".format(loop_name, human_readable(total_power, "W")) + ) + for i, result in enumerate(result_set): + print( + " {:.0f}% {} (period: {})".format( + result.mean_power * 100 / total_power, + ptafiles[i], + human_readable(result.duration, "s"), + ) + ) diff --git a/bin/run-with-rapl b/bin/run-with-rapl new file mode 100755 index 0000000..54d2d9c --- /dev/null +++ b/bin/run-with-rapl @@ -0,0 +1,32 @@ +#!/bin/sh + +DFATOOL="$(dirname "$0")/.." + +if test -z "${COUNTERS}"; then + COUNTERS="$(ls -1 /sys/class/powercap)" +fi + +NAMES= +UJ_FILES= +for counter in ${COUNTERS}; do + if test -e /sys/class/powercap/${counter}/name && test -e /sys/class/powercap/${counter}/energy_uj; then + NAMES="${NAMES} $(cat /sys/class/powercap/${counter}/name)_${counter} " + UJ_FILES="${UJ_FILES} /sys/class/powercap/${counter}/energy_uj" + fi +done + +if ! cat ${UJ_FILES} > /dev/null; then + echo "Unable to read all counters (${UJ_FILES})" >&2 + echo "You may need to run sudo chmod a+r /sys/class/powercap/*/energy_uj" >&2 + exit 1 +fi + +OUTPUT=$(mktemp) + +RAPL_START=$(cat ${UJ_FILES}) +3>${OUTPUT} perf stat -x, -e duration_time --log-fd 3 "$@" +RAPL_END=$(cat ${UJ_FILES}) + +"${DFATOOL}/libexec/rapl-to-dfatool.py" "$(cat ${OUTPUT})" "${NAMES}" "${RAPL_START}" "${RAPL_END}" + +rm -f ${OUTPUT} diff --git a/bin/ttest-ind.py b/bin/ttest-ind.py new file mode 100755 index 0000000..ebed6af --- /dev/null +++ b/bin/ttest-ind.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python3 + +import json +import sys +from scipy.stats import ttest_ind + + +def main(pvalue, file1, file2, macro=None): + with open(file1, "r") as f: + data1 = json.load(f) + with open(file2, "r") as f: + data2 = json.load(f) + result = ttest_ind(data1, data2) + + pvalue = float(pvalue) + + print(f"% {result}") + if macro is None: + print(r"\drefset{ttest/pvalue}{" + str(result.pvalue) + "}") + if result.pvalue < pvalue: + if macro: + print("\\def\\" + macro + "{$p < " + f"{pvalue:0.2f}" + "$}") + sys.exit(0) + else: + if macro: + print("\\def\\" + macro + "{$p \\ge " + f"{pvalue:0.2f}" + "$}") + sys.exit(1) + + +if __name__ == "__main__": + main(*sys.argv[1:]) diff --git a/bin/workload.py b/bin/workload.py index 19a7378..72b66bb 100755 --- a/bin/workload.py +++ b/bin/workload.py @@ -1,92 +1,93 @@ #!/usr/bin/env python3 +import argparse +import json +import logging import sys -from dfatool.automata import PTA -from dfatool.utils import human_readable -from dfatool.lex import TimedSequence, TimedWord, Workload +import dfatool.cli +import dfatool.utils +from dfatool.behaviour import EventSequenceModel +from dfatool.model import AnalyticModel -args = sys.argv[1:] - -loops = dict() -ptafiles = list() -loop_names = set() +def main(): + parser = argparse.ArgumentParser( + formatter_class=argparse.RawDescriptionHelpFormatter, description=__doc__ + ) + parser.add_argument("--aggregate", choices=["sum"], default="sum") + parser.add_argument("--aggregate-unit", choices=["s", "B/s"], default="s") + parser.add_argument( + "--aggregate-init", + default=0, + type=float, + ) + parser.add_argument( + "--log-level", + metavar="LEVEL", + choices=["debug", "info", "warning", "error"], + default="warning", + help="Set log level", + ) + parser.add_argument("--normalize-output", type=str) + parser.add_argument( + "--info", + action="store_true", + help="Show benchmark information (number of measurements, parameter values, ...)", + ) + parser.add_argument( + "--models", + nargs="+", + type=str, + help="Path to model file (.json or .json.xz)", + ) + parser.add_argument( + "--use-lut", + action="store_true", + help="Use LUT rather than performance model for prediction", + ) + parser.add_argument("event", nargs="+", type=str) + args = parser.parse_args() -def simulate_word(timedword): - prev_state = "UNINITIALIZED" - prev_param = None - ret = dict() - for trace_part in timedword: - print("Trace Part {}".format(trace_part)) - if type(trace_part) is TimedWord: - result = pta.simulate( - trace_part, orig_state=prev_state, orig_param=prev_param - ) - elif type(trace_part) is Workload: - result = pta.simulate( - trace_part.word, orig_state=prev_state, orig_param=prev_param - ) - if prev_state != result.end_state: - print( - "Warning: loop starts in state {}, but terminates in {}".format( - prev_state, result.end_state.name - ) - ) - if prev_param != result.parameters: - print( - "Warning: loop starts with parameters {}, but terminates with {}".format( - prev_param, result.parameters - ) - ) - ret[trace_part.name] = result - loop_names.add(trace_part.name) + if args.log_level: + numeric_level = getattr(logging, args.log_level.upper(), None) + if not isinstance(numeric_level, int): + print(f"Invalid log level: {args.log_level}", file=sys.stderr) + sys.exit(1) + logging.basicConfig( + level=numeric_level, + format="{asctime} {levelname}:{name}:{message}", + style="{", + ) - print(" Duration: " + human_readable(result.duration, "s")) - if result.duration_mae: - print( - u" ± {} / {:.0f}%".format( - human_readable(result.duration_mae, "s"), result.duration_mape - ) - ) - print(" Energy: " + human_readable(result.energy, "J")) - if result.energy_mae: - print( - u" ± {} / {:.0f}%".format( - human_readable(result.energy_mae, "J"), result.energy_mape - ) - ) - print(" Mean Power: " + human_readable(result.mean_power, "W")) - print("") + models = list() + for model_file in args.models: + with open(model_file, "r") as f: + models.append(AnalyticModel.from_json(json.load(f))) - prev_state = result.end_state - prev_param = result.parameters + if args.info: + for i in range(len(models)): + print(f"""{args.models[i]}: {" ".join(models[i].parameters)}""") + _, param_info = models[i].get_fitted() + for name in models[i].names: + for attr in models[i].attributes(name): + print(f" {name}.{attr} {param_info(name, attr)}") - return ret + workload = EventSequenceModel(models) + aggregate = workload.eval_strs( + args.event, + aggregate=args.aggregate, + aggregate_init=args.aggregate_init, + use_lut=args.use_lut, + ) + if args.normalize_output: + sf = dfatool.cli.parse_shift_function( + "--normalize-output", args.normalize_output + ) + print(dfatool.utils.human_readable(sf(aggregate), args.aggregate_unit)) + else: + print(dfatool.utils.human_readable(aggregate, args.aggregate_unit)) -for i in range(len(args) // 2): - ptafile, raw_word = args[i * 2], args[i * 2 + 1] - ptafiles.append(ptafile) - pta = PTA.from_file(ptafile) - timedword = TimedSequence(raw_word) - print("Input: {}\n".format(timedword)) - loops[ptafile] = simulate_word(timedword) -for loop_name in sorted(loop_names): - result_set = list() - total_power = 0 - for ptafile in sorted(ptafiles): - if loop_name in loops[ptafile]: - result_set.append(loops[ptafile][loop_name]) - total_power += loops[ptafile][loop_name].mean_power - print( - "{}: total mean power is {}".format(loop_name, human_readable(total_power, "W")) - ) - for i, result in enumerate(result_set): - print( - " {:.0f}% {} (period: {})".format( - result.mean_power * 100 / total_power, - ptafiles[i], - human_readable(result.duration, "s"), - ) - ) +if __name__ == "__main__": + main() diff --git a/doc/analysis-logs.md b/doc/analysis-logs.md index f787a18..4b58350 100644 --- a/doc/analysis-logs.md +++ b/doc/analysis-logs.md @@ -1,7 +1,7 @@ # Data Analysis and Performance Model Generation from Log Files -Here, dfatool works with lines of the form "`[::]` *Key* *Attribute* | *parameters* | *NFP values*", where *parameters* is a space-separated series of *param=value* entries (i.e., benchmark configuration) and *NFP values* is a space-separate series of *NFP=value* entries (i.e., benchmark output). -All measurements of a given *Key* *Attribute* combination must use the same set of NFP names. -Parameter names may be different -- parameters that are present in other lines of the same *Key* *Attribute* will be treated as undefined in those lines where they are missing. +Here, dfatool works with lines of the form "`[::]` *Key* | *parameters* | *NFP values*", where *parameters* is a space-separated series of *param=value* entries (i.e., benchmark configuration) and *NFP values* is a space-separate series of *NFP=value* entries (i.e., benchmark output). +All measurements of a given *Key* combination must use the same set of NFP names. +Parameter names may be different -- parameters that are present in other lines of the same *Key* will be treated as undefined in those lines where they are missing. Use `bin/analyze-log.py file1.txt file2.txt ...` for analysis. diff --git a/lib/behaviour.py b/lib/behaviour.py new file mode 100644 index 0000000..136a55e --- /dev/null +++ b/lib/behaviour.py @@ -0,0 +1,388 @@ +#!/usr/bin/env python3 + +import logging +from . import utils +from .model import AnalyticModel +from . import functions as df + +logger = logging.getLogger(__name__) + + +class SDKBehaviourModel: + + def __init__(self, observations, annotations): + + meta_observations = list() + delta_by_name = dict() + delta_param_by_name = dict() + is_loop = dict() + + for annotation in annotations: + # annotation.start.param may be incomplete, for instance in cases + # where DPUs are allocated before the input file is loadeed (and + # thus before the problem size is known). + # However, annotation.end.param may also differ from annotation.start.param (it should not, but that's how some benchmarks roll). + # So, we use annotation.start.param if it has the same keys as annotation.end.param, and annotation.end.param otherwise + if sorted(annotation.start.param.keys()) == sorted( + annotation.end.param.keys() + ): + am_tt_param_names = sorted(annotation.start.param.keys()) + else: + am_tt_param_names = sorted(annotation.end.param.keys()) + if annotation.name not in delta_by_name: + delta_by_name[annotation.name] = dict() + delta_param_by_name[annotation.name] = dict() + _, _, meta_obs, _is_loop = self.learn_pta( + observations, + annotation, + delta_by_name[annotation.name], + delta_param_by_name[annotation.name], + ) + meta_observations += meta_obs + is_loop.update(_is_loop) + + self.am_tt_param_names = am_tt_param_names + self.delta_by_name = delta_by_name + self.delta_param_by_name = delta_param_by_name + self.meta_observations = meta_observations + self.is_loop = is_loop + + self.build_transition_guards() + + def build_transition_guards(self): + self.transition_guard = dict() + for name in sorted(self.delta_by_name.keys()): + for t_from, t_to_set in self.delta_by_name[name].items(): + i_to_transition = dict() + delta_param_sets = list() + to_names = list() + transition_guard = dict() + + if len(t_to_set) > 1: + am_tt_by_name = { + name: { + "attributes": [t_from], + "param": list(), + t_from: list(), + }, + } + for i, t_to in enumerate(sorted(t_to_set)): + for param in self.delta_param_by_name[name][(t_from, t_to)]: + am_tt_by_name[name]["param"].append( + utils.param_dict_to_list( + utils.param_str_to_dict(param), + self.am_tt_param_names, + ) + ) + am_tt_by_name[name][t_from].append(i) + i_to_transition[i] = t_to + am = AnalyticModel( + am_tt_by_name, self.am_tt_param_names, force_tree=True + ) + model, info = am.get_fitted() + if type(info(name, t_from)) is df.SplitFunction: + flat_model = info(name, t_from).flatten() + else: + flat_model = list() + logger.warning( + f"Model for {name} {t_from} is {info(name, t_from)}, expected SplitFunction" + ) + + for prefix, output in flat_model: + transition_name = i_to_transition[int(output)] + if transition_name not in transition_guard: + transition_guard[transition_name] = list() + transition_guard[transition_name].append(prefix) + + self.transition_guard[t_from] = transition_guard + + def get_trace(self, name, param_dict): + delta = self.delta_by_name[name] + current_state = "__init__" + trace = [current_state] + states_seen = set() + while current_state != "__end__": + next_states = delta[current_state] + + states_seen.add(current_state) + next_states = list(filter(lambda q: q not in states_seen, next_states)) + + if len(next_states) == 0: + raise RuntimeError( + f"get_trace({name}, {param_dict}): found infinite loop at {trace}" + ) + + if len(next_states) > 1 and self.transition_guard[current_state]: + matching_next_states = list() + for candidate in next_states: + for condition in self.transition_guard[current_state][candidate]: + valid = True + for key, value in condition: + if param_dict[key] != value: + valid = False + break + if valid: + matching_next_states.append(candidate) + break + next_states = matching_next_states + + if len(next_states) == 0: + raise RuntimeError( + f"get_trace({name}, {param_dict}): found no valid outbound transitions at {trace}, candidates {self.transition_guard[current_state]}" + ) + if len(next_states) > 1: + raise RuntimeError( + f"get_trace({name}, {param_dict}): found non-deterministic outbound transitions {next_states} at {trace}" + ) + + (next_state,) = next_states + + trace.append(next_state) + current_state = next_state + + return trace + + def learn_pta(self, observations, annotation, delta=dict(), delta_param=dict()): + prev_i = annotation.start.offset + prev = "__init__" + prev_non_kernel = prev + meta_observations = list() + n_seen = dict() + + total_latency_us = 0 + + if sorted(annotation.start.param.keys()) == sorted(annotation.end.param.keys()): + param_dict = annotation.start.param + else: + param_dict = annotation.end.param + param_str = utils.param_dict_to_str(param_dict) + + if annotation.kernels: + # ggf. als dict of tuples, für den Fall dass Schleifen verschieden iterieren können? + for i in range(prev_i, annotation.kernels[0].offset): + this = observations[i]["name"] + " @ " + observations[i]["place"] + + if this in n_seen: + if n_seen[this] == 1: + logger.debug( + f"Loop found in {annotation.start.name} {param_dict}: {this} ⟳" + ) + n_seen[this] += 1 + else: + n_seen[this] = 1 + + if not prev in delta: + delta[prev] = set() + delta[prev].add(this) + + if not (prev, this) in delta_param: + delta_param[(prev, this)] = set() + delta_param[(prev, this)].add(param_str) + + prev = this + prev_i = i + 1 + + total_latency_us += observations[i]["attribute"].get("latency_us", 0) + + meta_observations.append( + { + "name": f"__trace__ {this}", + "param": param_dict, + "attribute": dict( + filter( + lambda kv: not kv[0].startswith("e_"), + observations[i]["param"].items(), + ) + ), + } + ) + prev_non_kernel = prev + + for kernel in annotation.kernels: + prev = prev_non_kernel + for i in range(prev_i, kernel.offset): + this = observations[i]["name"] + " @ " + observations[i]["place"] + + if not prev in delta: + delta[prev] = set() + delta[prev].add(this) + + if not (prev, this) in delta_param: + delta_param[(prev, this)] = set() + delta_param[(prev, this)].add(param_str) + + # The last iteration (next block) contains a single kernel, + # so we do not increase total_latency_us here. + # However, this means that we will only ever get one latency + # value for each set of kernels with a common problem size, + # despite potentially having far more data at our fingertips. + # We could provide one total_latency_us for each kernel + # (by combining start latency + kernel latency + teardown latency), + # but for that we first need to distinguish between kernel + # components and teardown components in the following block. + + prev = this + prev_i = i + 1 + + meta_observations.append( + { + "name": f"__trace__ {this}", + "param": param_dict, + "attribute": dict( + filter( + lambda kv: not kv[0].startswith("e_"), + observations[i]["param"].items(), + ) + ), + } + ) + + # There is no kernel end signal in the underlying data, so the last iteration also contains a kernel run. + prev = prev_non_kernel + for i in range(prev_i, annotation.end.offset): + this = observations[i]["name"] + " @ " + observations[i]["place"] + + if this in n_seen: + if n_seen[this] == 1: + logger.debug( + f"Loop found in {annotation.start.name} {param_dict}: {this} ⟳" + ) + n_seen[this] += 1 + else: + n_seen[this] = 1 + + if not prev in delta: + delta[prev] = set() + delta[prev].add(this) + + if not (prev, this) in delta_param: + delta_param[(prev, this)] = set() + delta_param[(prev, this)].add(param_str) + + total_latency_us += observations[i]["attribute"].get("latency_us", 0) + + prev = this + + meta_observations.append( + { + "name": f"__trace__ {this}", + "param": param_dict, + "attribute": dict( + filter( + lambda kv: not kv[0].startswith("e_"), + observations[i]["param"].items(), + ) + ), + } + ) + + if not prev in delta: + delta[prev] = set() + delta[prev].add("__end__") + if not (prev, "__end__") in delta_param: + delta_param[(prev, "__end__")] = set() + delta_param[(prev, "__end__")].add(param_str) + + for transition, count in n_seen.items(): + meta_observations.append( + { + "name": f"__loop__ {transition}", + "param": param_dict, + "attribute": {"n_iterations": count}, + } + ) + + if total_latency_us: + meta_observations.append( + { + "name": annotation.start.name, + "param": param_dict, + "attribute": {"latency_us": total_latency_us}, + } + ) + + is_loop = dict( + map(lambda kv: (kv[0], True), filter(lambda kv: kv[1] > 1, n_seen.items())) + ) + + return delta, delta_param, meta_observations, is_loop + + +class EventSequenceModel: + def __init__(self, models): + self.models = models + + def _event_normalizer(self, event): + event_normalizer = lambda p: p + if "/" in event: + v1, v2 = event.split("/") + if utils.is_numeric(v1): + event = v2.strip() + event_normalizer = lambda p: utils.soft_cast_float(v1) / p + elif utils.is_numeric(v2): + event = v1.strip() + event_normalizer = lambda p: p / utils.soft_cast_float(v2) + else: + raise RuntimeError(f"Cannot parse '{event}'") + return event, event_normalizer + + def eval_strs(self, events, aggregate="sum", aggregate_init=0, use_lut=False): + for event in events: + event, event_normalizer = self._event_normalizer(event) + nn, param = event.split("(") + name, action = nn.split(".") + param_model = None + ref_model = None + + for model in self.models: + if name in model.names and action in model.attributes(name): + ref_model = model + if use_lut: + param_model = model.get_param_lut(allow_none=True) + else: + param_model, param_info = model.get_fitted() + break + + if param_model is None: + raise RuntimeError(f"Did not find a model for {name}.{action}") + + param = param.removesuffix(")") + if param == "": + param = dict() + else: + param = utils.parse_conf_str(param) + + param_list = utils.param_dict_to_list(param, ref_model.parameters) + + if not use_lut and not param_info(name, action).is_predictable(param_list): + logger.warning( + f"Cannot predict {name}.{action}({param}), falling back to static model" + ) + + try: + event_output = event_normalizer( + param_model( + name, + action, + param=param_list, + ) + ) + except KeyError: + if use_lut: + logger.error( + f"Cannot predict {name}.{action}({param}) from LUT model" + ) + else: + logger.error(f"Cannot predict {name}.{action}({param}) from model") + raise + except TypeError: + if not use_lut: + logger.error(f"Cannot predict {name}.{action}({param}) from model") + raise + + if aggregate == "sum": + aggregate_init += event_output + else: + raise RuntimeError(f"Unknown aggregate type: {aggregate}") + + return aggregate_init @@ -1,7 +1,6 @@ #!/usr/bin/env python3 import dfatool.functions as df -import dfatool.plotter import logging import numpy as np import os @@ -14,7 +13,11 @@ def sanity_check(args): pass -def print_static(model, static_model, name, attribute, with_dependence=False): +def print_static( + model, static_model, name, attribute, with_dependence=False, precision=2 +): + if precision is None: + precision = 6 unit = " " if attribute == "power": unit = "µW" @@ -23,25 +26,15 @@ def print_static(model, static_model, name, attribute, with_dependence=False): elif attribute == "substate_count": unit = "su" if model.attr_by_name[name][attribute].stats: + ratio = model.attr_by_name[name][ + attribute + ].stats.generic_param_dependence_ratio() print( - "{:10s}: {:28s} : {:.2f} {:s} ({:.2f})".format( - name, - attribute, - static_model(name, attribute), - unit, - model.attr_by_name[name][ - attribute - ].stats.generic_param_dependence_ratio(), - ) + f"{name:10s}: {attribute:28s} : {static_model(name, attribute):.{precision}f} {unit:s} ({ratio:.2f})" ) else: print( - "{:10s}: {:28s} : {:.2f} {:s}".format( - name, - attribute, - static_model(name, attribute), - unit, - ) + f"{name:10s}: {attribute:28s} : {static_model(name, attribute):.{precision}f} {unit:s}" ) if with_dependence: for param in model.parameters: @@ -93,18 +86,38 @@ def print_info_by_name(model, by_name): ) -def print_analyticinfo(prefix, info): +def print_information_gain_by_name(model, by_name): + for name in model.names: + for attr in model.attributes(name): + print(f"{name} {attr}:") + mutual_information = model.mutual_information(name, attr) + for param in model.parameters: + if param in mutual_information: + print(f" Parameter {param} : {mutual_information[param]:5.2f}") + else: + print(f" Parameter {param} : -.--") + + +def print_analyticinfo(prefix, info, ndigits=None): model_function = info.model_function.removeprefix("0 + ") for i in range(len(info.model_args)): - model_function = model_function.replace( - f"regression_arg({i})", str(info.model_args[i]) - ) + if ndigits is not None: + model_function = model_function.replace( + f"regression_arg({i})", str(round(info.model_args[i], ndigits=ndigits)) + ) + else: + model_function = model_function.replace( + f"regression_arg({i})", str(info.model_args[i]) + ) model_function = model_function.replace("+ -", "- ") print(f"{prefix}: {model_function}") -def print_staticinfo(prefix, info): - print(f"{prefix}: {info.value}") +def print_staticinfo(prefix, info, ndigits=None): + if ndigits is not None: + print(f"{prefix}: {round(info.value, ndigits)}") + else: + print(f"{prefix}: {info.value}") def print_symreginfo(prefix, info): @@ -160,7 +173,7 @@ def _print_cartinfo(prefix, model): def print_splitinfo(info, prefix=""): if type(info) is df.SplitFunction: - for k, v in info.child.items(): + for k, v in sorted(info.child.items()): print_splitinfo(v, f"{prefix} {info.param_name}={k}") elif type(info) is df.ScalarSplitFunction: print_splitinfo(info.child_le, f"{prefix} {info.param_name}≤{info.threshold}") @@ -175,13 +188,13 @@ def print_splitinfo(info, prefix=""): print(f"{prefix}: UNKNOWN {type(info)}") -def print_model(prefix, info): +def print_model(prefix, info, precision=None): if type(info) is df.StaticFunction: - print_staticinfo(prefix, info) + print_staticinfo(prefix, info, ndigits=precision) elif type(info) is df.AnalyticFunction: - print_analyticinfo(prefix, info) + print_analyticinfo(prefix, info, ndigits=precision) elif type(info) is df.FOLFunction: - print_analyticinfo(prefix, info) + print_analyticinfo(prefix, info, ndigits=precision) elif type(info) is df.CARTFunction: print_cartinfo(prefix, info) elif type(info) is df.SplitFunction: @@ -280,6 +293,7 @@ def model_quality_table( buf = f"{key:>{key_len}s} {attr:>{attr_len}s}" for results, info in ((lut, None), (model, model_info), (static, None)): buf += " " + if results is not None and ( info is None or ( @@ -317,6 +331,23 @@ def model_quality_table( print(buf) +def export_pseudo_dref(dref_file, dref, precision=None): + with open(dref_file, "w") as f: + for k, v in sorted(os.environ.items(), key=lambda kv: kv[0]): + if k.startswith("DFATOOL_"): + print(f"% {k}='{v}'", file=f) + for arg in sys.argv: + print(f"% {arg}", file=f) + for k, v in sorted(dref.items()): + k = k.replace("/", "I").replace("-", "").replace("_", "").replace(" ", "") + if type(v) is tuple: + v = v[0] + if type(v) in (float, np.float64) and precision is not None: + print("\\def\\" + k + "{" + f"{v:.{precision}f}" + "}", file=f) + else: + print("\\def\\" + k + "{" + str(v) + "}", file=f) + + def export_dataref(dref_file, dref, precision=None): with open(dref_file, "w") as f: for k, v in sorted(os.environ.items(), key=lambda kv: kv[0]): @@ -358,13 +389,23 @@ def export_dot(model, dot_prefix): logger.info(f"Dot export of model saved to {filename}") -def export_csv_unparam(model, csv_prefix): +def export_csv_unparam(model, csv_prefix, dialect="excel"): import csv + class ExcelLF(csv.Dialect): + delimiter = "," + quotechar = '"' + doublequote = True + skipinitialspace = False + lineterminator = "\n" + quoting = 0 + + csv.register_dialect("excel-lf", ExcelLF) + for name in sorted(model.names): filename = f"{csv_prefix}{name}.csv" with open(filename, "w") as f: - writer = csv.writer(f) + writer = csv.writer(f, dialect=dialect) writer.writerow( ["measurement"] + model.parameters + sorted(model.attributes(name)) ) @@ -420,6 +461,8 @@ def export_json_unparam(model, filename): def boxplot_param(args, model): + import dfatool.plotter as dp + title = None param_is_filtered = dict() if args.filter_param: @@ -449,7 +492,7 @@ def boxplot_param(args, model): ) ) for attribute in attr_names: - dfatool.plotter.boxplot( + dp.boxplot( param_desc, list(map(lambda k: by_param[(name, k)][attribute], param_keys)), output=f"{args.boxplot_param}{name}-{attribute}.pdf", @@ -467,6 +510,12 @@ def add_standard_arguments(parser): help="Export tree-based model to {PREFIX}{name}-{attribute}.dot", ) parser.add_argument( + "--export-pseudo-dref", + metavar="FILE", + type=str, + help="Export model and model quality to LaTeX def file (sort of like dataref)", + ) + parser.add_argument( "--export-dref", metavar="FILE", type=str, @@ -479,6 +528,14 @@ def add_standard_arguments(parser): help="Export raw (parameter-independent) observations in CSV format to {PREFIX}{name}-{attribute}.csv", ) parser.add_argument( + "--export-csv-dialect", + metavar="DIALECT", + type=str, + choices=["excel", "excel-lf", "excel-tab", "unix"], + default="excel", + help="CSV dialect to use for --export-csv-unparam", + ) + parser.add_argument( "--export-pgf-unparam", metavar="PREFIX", type=str, @@ -494,7 +551,7 @@ def add_standard_arguments(parser): "--export-json", metavar="FILENAME", type=str, - help="Export model in JSON format to FILENAME", + help="Export model and error metrics in JSON format to FILENAME", ) parser.add_argument( "--load-json", @@ -557,6 +614,11 @@ def add_standard_arguments(parser): help="Show benchmark information (number of measurements, parameter values, ...)", ) parser.add_argument( + "--information-gain", + action="store_true", + help="Show information gain of parameters", + ) + parser.add_argument( "--log-level", metavar="LEVEL", choices=["debug", "info", "warning", "error"], @@ -574,6 +636,13 @@ def add_standard_arguments(parser): "all: all of the above", ) parser.add_argument( + "--show-model-precision", + metavar="NDIG", + type=int, + default=2, + help="Limit precision of model output to NDIG decimals", + ) + parser.add_argument( "--show-model-error", action="store_true", help="Show model error compared to LUT (lower bound) and static (reference) models", @@ -584,6 +653,11 @@ def add_standard_arguments(parser): help="Show model complexity score and details (e.g. regression tree height and node count)", ) parser.add_argument( + "--add-total-observation", + action="store_true", + help="Add a TOTAL observation for each <key> that consists of the sums of its <attribute> entries. This allows for cross-validation of behaviour models vs. non-behaviour-aware models.", + ) + parser.add_argument( "--cross-validate", metavar="<method>:<count>", type=str, diff --git a/lib/functions.py b/lib/functions.py index 32fade0..b76814b 100644 --- a/lib/functions.py +++ b/lib/functions.py @@ -27,6 +27,9 @@ dfatool_rmt_relevance_threshold = float( os.getenv("DFATOOL_RMT_RELEVANCE_THRESHOLD", "0.5") ) +dfatool_uls_loss_fun = os.getenv("DFATOOL_ULS_LOSS_FUNCTION", "linear") +dfatool_uls_min_bound = float(os.getenv("DFATOOL_ULS_MIN_BOUND", -np.inf)) + if dfatool_preproc_relevance_method == "mi": import sklearn.feature_selection @@ -90,7 +93,15 @@ class ParamFunction: error measure. """ - def __init__(self, param_function, validation_function, num_vars, repr_str=None): + def __init__( + self, + param_function, + validation_function, + num_vars, + repr_str=None, + ini=None, + bounds=((dfatool_uls_min_bound, dfatool_uls_min_bound), (np.inf, np.inf)), + ): """ Create function object suitable for regression analysis. @@ -113,6 +124,8 @@ class ParamFunction: self._validation_function = validation_function self._num_variables = num_vars self.repr_str = repr_str + self.ini = ini + self.bounds = bounds def __repr__(self) -> str: if self.repr_str: @@ -452,6 +465,24 @@ class SplitFunction(ModelFunction): or "infty", } ) + return hyper + + # SplitFunction only + def flatten(self): + paths = list() + for param_value, subtree in self.child.items(): + if type(subtree) is SplitFunction: + for path, value in subtree.flatten(): + path = [(self.param_name, param_value)] + path + paths.append((path, value)) + elif type(subtree) is StaticFunction: + path = [(self.param_name, param_value)] + paths.append((path, subtree.value)) + else: + raise RuntimeError( + "flatten is only implemented for RMTs with constant leaves" + ) + return paths @classmethod def from_json(cls, data): @@ -1662,7 +1693,11 @@ class FOLFunction(SKLearnRegressionFunction): self.model_args = list(np.ones((num_vars))) try: res = optimize.least_squares( - error_function, self.model_args, args=(fit_parameters, data), xtol=2e-15 + error_function, + self.model_args, + args=(fit_parameters, data), + xtol=2e-15, + loss=dfatool_uls_loss_fun, ) except ValueError as err: logger.warning(f"Fit failed: {err} (function: {self.model_function})") @@ -1792,6 +1827,7 @@ class AnalyticFunction(ModelFunction): both for function usage and least squares optimization. If unset, defaults to [1, 1, 1, ...] """ + bounds = kwargs.pop("bounds", dict()) super().__init__(value, **kwargs) self._parameter_names = parameters self._num_args = num_args @@ -1800,6 +1836,7 @@ class AnalyticFunction(ModelFunction): self._dependson = [False] * (len(parameters) + num_args) self.fit_success = False self.fit_by_param = fit_by_param + self.bounds = bounds if type(function_str) == str: num_vars_re = re.compile(r"regression_arg\(([0-9]+)\)") @@ -1905,10 +1942,26 @@ class AnalyticFunction(ModelFunction): """ X, Y, num_valid, num_total = self.get_fit_data(by_param) if num_valid > 2: + lower_bounds = list() + upper_bounds = list() + for i in range(len(self.model_args)): + if i in self.bounds and self.bounds[i][0] == "range": + param_index = self._parameter_names.index(self.bounds[i][1]) + lower_bounds.append(np.min(X[param_index])) + upper_bounds.append(np.max(X[param_index])) + self.model_args[i] = np.mean(X[param_index]) + else: + lower_bounds.append(dfatool_uls_min_bound) + upper_bounds.append(np.inf) error_function = lambda P, X, y: self._function(P, X) - y try: res = optimize.least_squares( - error_function, self.model_args, args=(X, Y), xtol=2e-15 + error_function, + self.model_args, + args=(X, Y), + xtol=2e-15, + loss=dfatool_uls_loss_fun, + bounds=(lower_bounds, upper_bounds), ) except ValueError as err: logger.warning(f"Fit failed: {err} (function: {self.model_function})") @@ -2024,6 +2077,7 @@ class analytic: _safe_log = np.vectorize(lambda x: np.log(np.abs(x)) if np.abs(x) > 0.001 else 1.0) _safe_inv = np.vectorize(lambda x: 1 / x if np.abs(x) > 0.001 else 1.0) _safe_sqrt = np.vectorize(lambda x: np.sqrt(np.abs(x))) + _roofline = np.vectorize(lambda x, y: x if x < y else y) _function_map = { "linear": lambda x: x, @@ -2039,6 +2093,7 @@ class analytic: "safe_log": lambda x: np.log(np.abs(x)) if np.abs(x) > 0.001 else 1.0, "safe_inv": lambda x: 1 / x if np.abs(x) > 0.001 else 1.0, "safe_sqrt": lambda x: np.sqrt(np.abs(x)), + "roofline": lambda x, y: x if x < y else y, } @staticmethod @@ -2108,6 +2163,21 @@ class analytic: 2, repr_str="β₀ + β₁ * np.sqrt(x)", ), + "roofline": ParamFunction( + lambda reg_param, model_param: reg_param[0] + + reg_param[1] * analytic._roofline(model_param, reg_param[2]), + lambda model_param: True, + 3, + repr_str="β₀ + β₁ * roofline(x, β₂)", + bounds=( + ( + dfatool_uls_min_bound, + dfatool_uls_min_bound, + dfatool_uls_min_bound, + ), + (np.inf, np.inf, np.inf), + ), + ), # "num0_8": ParamFunction( # lambda reg_param, model_param: reg_param[0] # + reg_param[1] * analytic._num0_8(model_param), @@ -2159,11 +2229,17 @@ class analytic: if os.getenv("DFATOOL_RMT_SUBMODEL", "uls") == "fol": functions = {"linear": functions["linear"]} + elif allowed_functions := os.getenv("DFATOOL_ULS_FUNCTIONS", None): + allowed_functions = allowed_functions.split(",") + all_functions = list(functions.keys()) + for function_name in all_functions: + if function_name not in allowed_functions: + functions.pop(function_name) return functions @staticmethod - def _fmap(reference_type, reference_name, function_type): + def _fmap(reference_type, reference_name, function_type, arg_idx=None): """Map arg/parameter name and best-fit function name to function text suitable for AnalyticFunction.""" ref_str = "{}({})".format(reference_type, reference_name) if function_type == "linear": @@ -2182,6 +2258,8 @@ class analytic: return "1/({})".format(ref_str) if function_type == "sqrt": return "np.sqrt({})".format(ref_str) + if function_type == "roofline": + return "analytic._roofline({}, regression_arg({}))".format(ref_str, arg_idx) return "analytic._{}({})".format(function_type, ref_str) @staticmethod @@ -2206,22 +2284,29 @@ class analytic: """ buf = "0" arg_idx = 0 + bounds = dict() for combination in powerset(fit_results.items()): buf += " + regression_arg({:d})".format(arg_idx) arg_idx += 1 for function_item in combination: if is_numeric(function_item[0]): - buf += " * {}".format( - analytic._fmap( - "function_arg", function_item[0], function_item[1]["best"] - ) - ) + mapkey = "function_arg" else: - buf += " * {}".format( - analytic._fmap( - "parameter", function_item[0], function_item[1]["best"] - ) + mapkey = "parameter" + buf += " * {}".format( + analytic._fmap( + mapkey, function_item[0], function_item[1]["best"], arg_idx ) + ) + if function_item[1]["best"] == "roofline": + bounds[arg_idx] = ("range", function_item[0]) + arg_idx += 1 return AnalyticFunction( - None, buf, parameter_names, num_args, fit_by_param=fit_results, **kwargs + None, + buf, + parameter_names, + num_args, + fit_by_param=fit_results, + bounds=bounds, + **kwargs, ) diff --git a/lib/loader/plain.py b/lib/loader/plain.py index 1818bce..ef0b596 100644 --- a/lib/loader/plain.py +++ b/lib/loader/plain.py @@ -4,6 +4,10 @@ from ..utils import soft_cast_int_or_float, soft_cast_float import os import re +import logging + +logger = logging.getLogger(__name__) + class CSVfile: def __init__(self): @@ -65,10 +69,48 @@ class CSVfile: return observations -class Logfile: - def __init__(self): - pass +class TraceAnnotation: + offset = None + name = None + param = dict() + + def __init__(self, **kwargs): + self.__dict__.update(kwargs) + + def apply_offset(self, offset): + self.offset += offset + return self + + def __repr__(self): + param_desc = " ".join(map(lambda kv: f"{kv[0]}={kv[1]}", self.param.items())) + return f"{self.name}<{param_desc} @ {self.offset}>" + + +class RunAnnotation: + name = None + start = None + kernels = list() + end = None + + # start: offset points to first run entry + # kernel: offset points to first kernel run entry + # end: offset points to first non-run entry (i.e., for all run entries: offset < end.offset) + + def __init__(self, **kwargs): + self.__dict__.update(kwargs) + + def apply_offset(self, offset): + self.start.apply_offset(offset) + for kernel in self.kernels: + kernel.apply_offset(offset) + self.end.apply_offset(offset) + return self + + def __repr__(self): + return f"RunAnnotation<{self.name}, start={self.start}, kernels={self.kernels}, end={self.end}>" + +class Logfile: def kv_to_param(self, kv_str, cast): try: key, value = kv_str.split("=") @@ -84,14 +126,24 @@ class Logfile: def kv_to_param_i(self, kv_str): return self.kv_to_param(kv_str, soft_cast_int_or_float) - def load(self, f): + def load(self, f, is_trace=False): observations = list() + if is_trace: + trace_status = None + trace_start = None + trace_kernels = list() + trace_end = None + annotations = list() + for lineno, line in enumerate(f): - m = re.search(r"\[::\] *([^|]*?) *[|] *([^|]*?) *[|] *(.*)", line) - if m: + if m := re.search(r"\[::\] *([^|]*?) *[|] *([^|]*?) *[|] *(.*)", line): name_str = m.group(1) param_str = m.group(2) attr_str = m.group(3) + if is_trace: + name_str, name_annot = name_str.split("@") + name_str = name_str.strip() + name_annot = name_annot.strip() try: param = dict(map(self.kv_to_param_i, param_str.split())) attr = dict(map(self.kv_to_param_f, attr_str.split())) @@ -102,13 +154,89 @@ class Logfile: "attribute": attr, } ) + if is_trace: + observations[-1]["place"] = name_annot + except ValueError: + logger.warning( + f"Error parsing {f}: invalid key-value pair in line {lineno+1}" + ) + logger.warning(f"Offending entry:\n{line}") + raise + + if not is_trace: + continue + + # only relevant for is_trace == True + if m := re.fullmatch(r"\[>>\] *([^|]*?) *[|] *([^|]*?) *", line): + trace_status = 1 + trace_kernels = list() + name_str = m.group(1) + param_str = m.group(2) + try: + param = dict(map(self.kv_to_param_i, param_str.split())) + except ValueError: + logger.warning( + f"Error parsing {f}: invalid key-value pair in line {lineno+1}" + ) + logger.warning(f"Offending entry:\n{line}") + raise + trace_start = TraceAnnotation( + offset=len(observations), name=name_str, param=param + ) + + if m := re.fullmatch(r"\[--\] *([^|]*?) *[|] *([^|]*?) *", line): + trace_status = 2 + name_str = m.group(1) + param_str = m.group(2) + try: + param = dict(map(self.kv_to_param_i, param_str.split())) except ValueError: logger.warning( f"Error parsing {f}: invalid key-value pair in line {lineno+1}" ) logger.warning(f"Offending entry:\n{line}") raise + trace_kernels.append( + TraceAnnotation( + offset=len(observations), name=name_str, param=param + ) + ) + + if m := re.fullmatch(r"\[<<\] *([^|]*?) *[|] *([^|]*?) *", line): + trace_status = None + name_str = m.group(1) + param_str = m.group(2) + try: + param = dict(map(self.kv_to_param_i, param_str.split())) + except ValueError: + logger.warning( + f"Error parsing {f}: invalid key-value pair in line {lineno+1}" + ) + logger.warning(f"Offending entry:\n{line}") + raise + trace_end = TraceAnnotation( + offset=len(observations), name=name_str, param=param + ) + if trace_start is not None: + assert trace_start.name == trace_end.name + for kernel in trace_kernels: + assert trace_start.name == kernel.name + annotations.append( + RunAnnotation( + name=trace_start.name, + start=trace_start, + kernels=trace_kernels, + end=trace_end, + ) + ) + + trace_status = None + trace_start = None + trace_kernels = list() + trace_end = None + if is_trace: + return observations, annotations return observations def dump(self, observations, f): diff --git a/lib/model.py b/lib/model.py index 2452af7..4d1edd5 100644 --- a/lib/model.py +++ b/lib/model.py @@ -14,7 +14,14 @@ from .parameters import ( distinct_param_values, ) from .paramfit import ParamFit -from .utils import is_numeric, soft_cast_int, by_name_to_by_param, regression_measures +from .utils import ( + is_numeric, + soft_cast_int, + by_name_to_by_param, + by_param_to_by_name, + regression_measures, + param_eq_or_none, +) logger = logging.getLogger(__name__) @@ -79,6 +86,7 @@ class AnalyticModel: compute_stats=True, force_tree=False, max_std=None, + by_param=None, from_json=None, ): """ @@ -96,7 +104,7 @@ class AnalyticModel: - attributes: list of keys that should be analyzed, e.g. ['power', 'duration'] - for each attribute mentioned in 'attributes': A list with measurements. - All list except for 'attributes' must have the same length. + All lists except for 'attributes' must have the same length. For example: parameters = ['foo_count', 'irrelevant'] @@ -148,9 +156,18 @@ class AnalyticModel: for name, name_data in from_json["name"].items(): self.attr_by_name[name] = dict() for attr, attr_data in name_data.items(): - self.attr_by_name[name][attr] = ModelAttribute.from_json( - name, attr, attr_data - ) + if by_param: + self.attr_by_name[name][attr] = ModelAttribute.from_json( + name, + attr, + attr_data, + data_values=by_name[name][attr], + param_values=by_name[name]["param"], + ) + else: + self.attr_by_name[name][attr] = ModelAttribute.from_json( + name, attr, attr_data + ) self.fit_done = True return @@ -227,6 +244,9 @@ class AnalyticModel: return self.parameters[param_index] return str(param_index) + def mutual_information(self, name, attr): + return self.attr_by_name[name][attr].mutual_information() + def get_static(self, use_mean=False): """ Get static model function: name, attribute -> model value. @@ -246,7 +266,7 @@ class AnalyticModel: return static_model_getter - def get_param_lut(self, use_mean=False, fallback=False): + def get_param_lut(self, use_mean=False, fallback=False, allow_none=False): """ Get parameter-look-up-table model function: name, attribute, parameter values -> model value. @@ -276,7 +296,16 @@ class AnalyticModel: try: return lut_model[name][key][param] except KeyError: - if fallback: + if allow_none: + keys = filter( + lambda p: param_eq_or_none(param, p), + lut_model[name][key].keys(), + ) + values = list(map(lambda p: lut_model[name][key][p], keys)) + if not values: + raise + return np.mean(values) + elif fallback: return static_model[name][key] raise params = kwargs["params"] @@ -446,7 +475,7 @@ class AnalyticModel: return model_getter, info_getter - def assess(self, model_function, ref=None, return_raw=False): + def assess(self, model_function, ref=None, return_raw=False, with_sum=False): """ Calculate MAE, SMAPE, etc. of model_function for each by_name entry. @@ -476,18 +505,36 @@ class AnalyticModel: ) measures = regression_measures(predicted_data, elem[attribute]) detailed_results[name][attribute] = measures - if return_raw: + if return_raw or with_sum: raw_results[name]["attribute"][attribute] = { "groundTruth": elem[attribute], "modelOutput": predicted_data, } + if with_sum: + for name in ref.keys(): + attr_0 = ref[name]["attributes"][0] + gt_sum = np.zeros( + len(raw_results[name]["attribute"][attr_0]["groundTruth"]) + ) + mo_sum = np.zeros( + len(raw_results[name]["attribute"][attr_0]["modelOutput"]) + ) + for attribute in ref[name]["attributes"]: + gt_sum += np.array( + raw_results[name]["attribute"][attribute]["groundTruth"] + ) + mo_sum += np.array( + raw_results[name]["attribute"][attribute]["modelOutput"] + ) + detailed_results[name]["TOTAL"] = regression_measures(mo_sum, gt_sum) + if return_raw: return detailed_results, raw_results return detailed_results def to_dref( - self, static_quality, lut_quality, model_quality, xv_models=None + self, static_quality, lut_quality, model_quality, xv_models=None, with_sum=False ) -> dict: ret = dict() for name in self.names: @@ -519,20 +566,27 @@ class AnalyticModel: ) for k, v in param_data.items(): ret[f"paramcount/{name}/{k}"] = v - for attr_name, attr in self.attr_by_name[name].items(): + + attr_pairs = self.attr_by_name[name].items() + if with_sum: + attr_pairs = list(attr_pairs) + attr_pairs.append(("TOTAL", None)) + + for attr_name, attr in attr_pairs: # attr.data must be the same for all attrs - ret[f"data/{name}/num samples"] = len(attr.data) - unit = None - if "power" in attr.attr: - unit = r"\micro\watt" - elif "energy" in attr.attr: - unit = r"\pico\joule" - elif attr.attr == "duration": - unit = r"\micro\second" - for k, v in attr.to_dref(unit).items(): - ret[f"data/{name}/{attr_name}/{k}"] = v - for k, v in attr.model_function.hyper_to_dref().items(): - ret[f"hyper/{name}/{attr_name}/{k}"] = v + if attr is not None: + ret[f"data/{name}/num samples"] = len(attr.data) + unit = None + if "power" in attr.attr: + unit = r"\micro\watt" + elif "energy" in attr.attr: + unit = r"\pico\joule" + elif attr.attr == "duration": + unit = r"\micro\second" + for k, v in attr.to_dref(unit).items(): + ret[f"data/{name}/{attr_name}/{k}"] = v + for k, v in attr.model_function.hyper_to_dref().items(): + ret[f"hyper/{name}/{attr_name}/{k}"] = v e_static = static_quality[name][attr_name] for metric in "mae p50 p90 p95 p99".split(): ret[f"error/static/{name}/{attr_name}/{metric}"] = ( @@ -555,7 +609,7 @@ class AnalyticModel: except KeyError: logger.warning(f"{name} {attr_name} static model has no MAPE") - if lut_quality is not None: + if lut_quality is not None and attr_name in lut_quality[name]: e_lut = lut_quality[name][attr_name] for metric in "mae p50 p90 p95 p99".split(): ret[f"error/lut/{name}/{attr_name}/{metric}"] = ( @@ -615,25 +669,65 @@ class AnalyticModel: ret[f"xv/{name}/{attr_name}/{k}"] = np.mean(entry[k]) return ret - def to_json(self, **kwargs) -> dict: + def to_json( + self, + with_by_param=False, + lut_error=None, + static_error=None, + model_error=None, + **kwargs, + ) -> dict: """ Return JSON encoding of this AnalyticModel. """ ret = { "parameters": self.parameters, "name": dict([[name, dict()] for name in self.names]), + "paramValuesbyName": dict([[name, dict()] for name in self.names]), } + if with_by_param: + by_param = self.get_by_param() + ret["byParam"] = list() + for k, v in by_param.items(): + ret["byParam"].append((k, v)) + for name in self.names: for attr_name, attr in self.attr_by_name[name].items(): ret["name"][name][attr_name] = attr.to_json(**kwargs) + if lut_error: + ret["name"][name][attr_name]["lutError"] = lut_error[name][ + attr_name + ] + if static_error: + ret["name"][name][attr_name]["staticError"] = static_error[name][ + attr_name + ] + if model_error: + ret["name"][name][attr_name]["modelError"] = model_error[name][ + attr_name + ] + attr_name = list(self.attributes(name))[0] + for param_name in self.parameters: + if self.attr_by_name[name][attr_name].stats is not None: + ret["paramValuesbyName"][name][param_name] = self.attr_by_name[ + name + ][attr_name].stats.distinct_values_by_param_name[param_name] return ret @classmethod - def from_json(cls, data, by_name, parameters): - assert data["parameters"] == parameters - return cls(by_name, parameters, from_json=data) + def from_json(cls, data, by_name=None, parameters=None): + if by_name is None and parameters is None: + assert data["byParam"] is not None + by_param = dict() + for (nk, pk), v in data["byParam"]: + by_param[(nk, tuple(pk))] = v + by_name = by_param_to_by_name(by_param) + return cls(by_name, data["parameters"], by_param=by_param, from_json=data) + else: + assert data["parameters"] == parameters + return cls(by_name, parameters, from_json=data) def webconf_function_map(self) -> list: ret = list() diff --git a/lib/parameters.py b/lib/parameters.py index 4047c10..acb044c 100644 --- a/lib/parameters.py +++ b/lib/parameters.py @@ -604,6 +604,9 @@ class ModelAttribute: # The best model we have. May be Static, Split, or Param (and later perhaps Substate) self.model_function = None + # Information gain cache. Used for statistical analysis + self.mutual_information_cache = None + self._check_codependent_param() # There must be at least 3 distinct data values (≠ None) if an analytic model @@ -618,13 +621,20 @@ class ModelAttribute: mean = np.mean(self.data) return f"ModelAttribute<{self.name}, {self.attr}, mean={mean}>" - def to_json(self, **kwargs): - return { + def to_json(self, with_lut=False, **kwargs): + ret = { "paramNames": self.param_names, "argCount": self.arg_count, "modelFunction": self.model_function.to_json(**kwargs), } + if with_lut: + ret["LUT"] = list() + for key, value in self.by_param.items(): + ret["LUT"].append((key, value)) + + return ret + def to_dref(self, unit=None): ret = {"mean": (self.mean, unit), "median": (self.median, unit)} @@ -699,12 +709,33 @@ class ModelAttribute: def webconf_function_map(self): return self.model_function.webconf_function_map() + def mutual_information(self): + if self.mutual_information_cache is not None: + return self.mutual_information_cache + + from sklearn.feature_selection import mutual_info_regression + + fit_parameters, _, ignore_index = param_to_ndarray( + self.param_values, with_nan=False, categorical_to_scalar=True + ) + + mutual_info_result = mutual_info_regression(fit_parameters, self.data) + + self.mutual_information_cache = dict() + j = 0 + for i, param_name in enumerate(self.param_names): + if not ignore_index[i]: + self.mutual_information_cache[param_name] = mutual_info_result[j] + j += 1 + + return self.mutual_information_cache + @classmethod - def from_json(cls, name, attr, data): + def from_json(cls, name, attr, data, data_values=None, param_values=None): param_names = data["paramNames"] arg_count = data["argCount"] - self = cls(name, attr, None, None, param_names, arg_count) + self = cls(name, attr, data_values, param_values, param_names, arg_count) self.model_function = df.ModelFunction.from_json(data["modelFunction"]) self.mean = self.model_function.value diff --git a/lib/paramfit.py b/lib/paramfit.py index e6539a4..84eba2b 100644 --- a/lib/paramfit.py +++ b/lib/paramfit.py @@ -16,7 +16,13 @@ from .utils import ( ) logger = logging.getLogger(__name__) -best_fit_metric = os.getenv("DFATOOL_ULS_ERROR_METRIC", "ssr") +dfatool_uls_loss_fun = os.getenv("DFATOOL_ULS_LOSS_FUNCTION", "linear") +dfatool_uls_min_bound = float(os.getenv("DFATOOL_ULS_MIN_BOUND", -np.inf)) + +if dfatool_uls_loss_fun == "linear": + best_fit_metric = os.getenv("DFATOOL_ULS_ERROR_METRIC", "ssr") +else: + best_fit_metric = os.getenv("DFATOOL_ULS_ERROR_METRIC", "mae") class ParamFit: @@ -205,9 +211,24 @@ def _try_fits( if function_name not in raw_results: raw_results[function_name] = dict() error_function = param_function.error_function + if param_function.ini: + ini = param_function.ini + else: + ini = [0] + [1 for i in range(1, param_function._num_variables)] + if function_name == "roofline": + param_function.bounds = ( + (dfatool_uls_min_bound, dfatool_uls_min_bound, np.min(X)), + (np.inf, np.inf, np.max(X)), + ) + ini[2] = np.mean(X) try: res = optimize.least_squares( - error_function, [0, 1], args=(X, Y), xtol=2e-15 + error_function, + ini, + args=(X, Y), + xtol=2e-15, + loss=dfatool_uls_loss_fun, + bounds=param_function.bounds, ) except FloatingPointError as e: logger.warning( diff --git a/lib/pelt.py b/lib/pelt.py index 29faf5c..8a951c9 100644 --- a/lib/pelt.py +++ b/lib/pelt.py @@ -94,7 +94,7 @@ class PELT: ) algo = algo.fit(self.norm_signal(signal)) - # Empirically, most sub-state detectino results use a penalty + # Empirically, most sub-state detection results use a penalty # in the range 30 to 60. If there's no changepoints with a # penalty of 20, there's also no changepoins with any penalty # > 20, so we can safely skip changepoint detection altogether. diff --git a/lib/utils.py b/lib/utils.py index 426b701..fb76367 100644 --- a/lib/utils.py +++ b/lib/utils.py @@ -48,6 +48,8 @@ def running_mean(x: np.ndarray, N: int) -> np.ndarray: def human_readable(value, unit): + if value is None: + return value for prefix, factor in ( ("p", 1e-12), ("n", 1e-9), @@ -55,6 +57,8 @@ def human_readable(value, unit): ("m", 1e-3), ("", 1), ("k", 1e3), + ("M", 1e6), + ("G", 1e9), ): if value < 1e3 * factor: return "{:.2f} {}{}".format(value * (1 / factor), prefix, unit) @@ -150,7 +154,7 @@ def parse_conf_str(conf_str): """ conf_dict = dict() for option in conf_str.split(","): - key, value = option.split("=") + key, value = option.strip().split("=") conf_dict[key] = soft_cast_float(value) return conf_dict @@ -205,6 +209,18 @@ def param_slice_eq(a, b, index): return False +def param_eq_or_none(a, b): + """ + Check if by_param keys a and b are identical, allowing a None in a to match any key in b. + """ + set_keys = tuple(filter(lambda i: a[i] is not None, range(len(a)))) + a_not_none = tuple(map(lambda i: a[i], set_keys)) + b_not_none = tuple(map(lambda i: b[i], set_keys)) + if a_not_none == b_not_none: + return True + return False + + def match_parameter_values(input_param: dict, match_param: dict): """ Check whether one of the paramaters in `input_param` has the same value in `match_param`. @@ -302,6 +318,21 @@ def param_dict_to_list(param_dict, parameter_names, default=None): return ret +def param_dict_to_str(param_dict): + ret = list() + for parameter_name in sorted(param_dict.keys()): + ret.append(f"{parameter_name}={param_dict[parameter_name]}") + return " ".join(ret) + + +def param_str_to_dict(param_str): + ret = dict() + for param_pair in param_str.split(): + key, value = param_pair.split("=") + ret[key] = soft_cast_int_or_float(value) + return ret + + def observations_enum_to_bool(observations: list, kconfig=False): """ Convert enum / categorical observations to boolean-only ones. @@ -345,6 +376,10 @@ def observations_enum_to_bool(observations: list, kconfig=False): def ignore_param(by_name: dict, parameter_names: list, ignored_parameters: list): ignored_indexes = list() unpoppable_params = list() + + if ignored_parameters is None: + return + for param_name in sorted(ignored_parameters): try: ignored_indexes.append(parameter_names.index(param_name)) @@ -560,22 +595,30 @@ def filter_aggregate_by_param(aggregate, parameters, parameter_filter): param_index = parameters.index(param_name) except ValueError: logger.error(f"Unknown parameter '{param_name}'") - return + continue param_value = soft_cast_int(param_value) names_to_remove = set() if condition == "<": - condf = lambda x: x[param_index] < param_value + condf = ( + lambda x: x[param_index] is not None and x[param_index] < param_value + ) elif condition == "≤": - condf = lambda x: x[param_index] <= param_value + condf = ( + lambda x: x[param_index] is not None and x[param_index] <= param_value + ) elif condition == "=": condf = lambda x: x[param_index] == param_value elif condition == "≠": condf = lambda x: x[param_index] != param_value elif condition == "≥": - condf = lambda x: x[param_index] >= param_value + condf = ( + lambda x: x[param_index] is not None and x[param_index] >= param_value + ) elif condition == ">": - condf = lambda x: x[param_index] > param_value + condf = ( + lambda x: x[param_index] is not None and x[param_index] > param_value + ) elif condition == "∈": param_values = tuple(map(soft_cast_int, param_value.split(","))) condf = lambda x: x[param_index] in param_values @@ -685,11 +728,18 @@ def regression_measures(predicted: np.ndarray, ground_truth: np.ndarray): rsq -- R^2 measure, see sklearn.metrics.r2_score count -- Number of values """ - if type(predicted) != np.ndarray: + + if type(predicted) is list: + predicted = np.array(predicted) + + if type(ground_truth) is list: + ground_truth = np.array(ground_truth) + + if type(predicted) is not np.ndarray: raise ValueError( "first arg ('predicted') must be ndarray, is {}".format(type(predicted)) ) - if type(ground_truth) != np.ndarray: + if type(ground_truth) is not np.ndarray: raise ValueError( "second arg ('ground_truth') must be ndarray, is {}".format( type(ground_truth) diff --git a/lib/validation.py b/lib/validation.py index 958a9e0..bf6764d 100644 --- a/lib/validation.py +++ b/lib/validation.py @@ -109,7 +109,7 @@ class CrossValidator: self.args = args self.kwargs = kwargs - def kfold(self, model_getter, k=10, static=False): + def kfold(self, model_getter, k=10, static=False, with_sum=False): """ Perform k-fold cross-validation and return average model quality. @@ -161,10 +161,10 @@ class CrossValidator: training_and_validation_sets[i][name] = subsets_by_name[name][i] return self._generic_xv( - model_getter, training_and_validation_sets, static=static + model_getter, training_and_validation_sets, static=static, with_sum=with_sum ) - def montecarlo(self, model_getter, count=200, static=False): + def montecarlo(self, model_getter, count=200, static=False, with_sum=False): """ Perform Monte Carlo cross-validation and return average model quality. @@ -211,10 +211,12 @@ class CrossValidator: training_and_validation_sets[i][name] = subsets_by_name[name][i] return self._generic_xv( - model_getter, training_and_validation_sets, static=static + model_getter, training_and_validation_sets, static=static, with_sum=with_sum ) - def _generic_xv(self, model_getter, training_and_validation_sets, static=False): + def _generic_xv( + self, model_getter, training_and_validation_sets, static=False, with_sum=False + ): ret = dict() models = list() @@ -268,6 +270,16 @@ class CrossValidator: ) ) + if with_sum: + for name in self.names: + attr_0 = self.by_name[name]["attributes"][0] + gt_sum = np.zeros(len(ret[name][attr_0]["groundTruth"])) + mo_sum = np.zeros(len(ret[name][attr_0]["modelOutput"])) + for attribute in self.by_name[name]["attributes"]: + gt_sum += np.array(ret[name][attribute]["groundTruth"]) + mo_sum += np.array(ret[name][attribute]["modelOutput"]) + ret[name]["TOTAL"] = regression_measures(mo_sum, gt_sum) + return ret, models def _single_xv(self, model_getter, tv_set_dict, static=False): diff --git a/libexec/rapl-to-dfatool.py b/libexec/rapl-to-dfatool.py new file mode 100755 index 0000000..5ab4c38 --- /dev/null +++ b/libexec/rapl-to-dfatool.py @@ -0,0 +1,27 @@ +#!/usr/bin/env python3 + +import sys + + +def main(perf_line, rapl_names, rapl_start, rapl_stop): + duration_ns = int(perf_line.split(",")[3]) + + rapl_names = rapl_names.split() + rapl_start = rapl_start.split() + rapl_stop = rapl_stop.split() + + buf = [f"duration_ns={duration_ns}"] + + for i in range(len(rapl_names)): + uj_start = int(rapl_start[i]) + uj_stop = int(rapl_stop[i]) + buf.append(f"{rapl_names[i]}_energy_uj={uj_stop - uj_start}") + buf.append( + f"{rapl_names[i]}_power_W={(uj_stop - uj_start) * 1000 / duration_ns}" + ) + + print(" ".join(buf)) + + +if __name__ == "__main__": + main(*sys.argv[1:]) |