diff options
-rw-r--r-- | README.md | 3 | ||||
-rwxr-xr-x | bin/analyze-log.py | 22 | ||||
-rwxr-xr-x | bin/analyze-trace.py | 471 | ||||
-rwxr-xr-x | bin/pta-workload.py | 92 | ||||
-rwxr-xr-x | bin/run-with-rapl | 32 | ||||
-rwxr-xr-x | bin/workload.py | 161 | ||||
-rw-r--r-- | lib/behaviour.py | 388 | ||||
-rw-r--r-- | lib/cli.py | 25 | ||||
-rw-r--r-- | lib/functions.py | 25 | ||||
-rw-r--r-- | lib/loader/plain.py | 136 | ||||
-rw-r--r-- | lib/model.py | 88 | ||||
-rw-r--r-- | lib/parameters.py | 15 | ||||
-rw-r--r-- | lib/paramfit.py | 8 | ||||
-rw-r--r-- | lib/utils.py | 44 | ||||
-rwxr-xr-x | libexec/rapl-to-dfatool.py | 27 |
15 files changed, 1420 insertions, 117 deletions
@@ -133,8 +133,9 @@ The following variables may be set to alter the behaviour of dfatool components. | `DFATOOL_LMT_MIN_SAMPLES_LEAF` | 0.0 .. **0.1** .. 1.0, 3 .. *n* | Minimum samples that each leaf of a split candidate must contain. A value below 1.0 specifies a ratio of the total number of training samples. A value above 1 specifies an absolute number of samples. | | `DFATOOL_LMT_MAX_BINS` | 10 .. **120** | Number of bins used to determine optimal split. LMT default: 25. | | `DFATOOL_LMT_CRITERION` | **mse**, rmse, mae, poisson | Error metric to use when selecting best split. | -| `DFATOOL_ULS_ERROR_METRIC` | **ssr**, rmsd, mae, … | Error metric to use when selecting best-fitting function during unsupervised least squares (ULS) regression. Least squares regression itself minimzes root mean square deviation (rmsd), hence the equivalent (but partitioning-compatible) sum of squared residuals (ssr) is the default. Supports all metrics accepted by `--error-metric`. | +| `DFATOOL_ULS_ERROR_METRIC` | **ssr**, rmsd, **mae**, … | Error metric to use when selecting best-fitting function during unsupervised least squares (ULS) regression. By default, least squares regression minimzes root mean square deviation (rmsd), hence the equivalent (but partitioning-compatible) sum of squared residuals (ssr) is the default. If `DFATOOL_ULS_LOSS_FUNCTION` is set to another value than linear, the default is mean absolute error (mae). Supports all metrics accepted by `--error-metric`. | | `DFATOOL_ULS_FUNCTIONS` | a,b,… | List of function templates to use in ULS. Default: all supported functions. | +| `DFATOOL_ULS_LOSS_FUNCTION` | **linear**', soft\_l1, … | Loss function for least squares fitting, see `scipy.optimize.least_squares#loss` documentation. | | `DFATOOL_ULS_MIN_DISTINCT_VALUES` | 2 .. **3** .. *n* | Minimum number of unique values a parameter must take to be eligible for ULS | | `DFATOOL_ULS_SKIP_CODEPENDENT_CHECK` | **0**, 1 | Do not detect and remove co-dependent features in ULS. | | `DFATOOL_ULS_MIN_BOUND` | **-∞** .. *n* | Lower bound for ULS regression variables. Setting it to 0 can often be beneficial. | diff --git a/bin/analyze-log.py b/bin/analyze-log.py index 901fb0f..50b5648 100755 --- a/bin/analyze-log.py +++ b/bin/analyze-log.py @@ -46,6 +46,11 @@ def main(): "--export-model", metavar="FILE", type=str, help="Export JSON model to FILE" ) parser.add_argument( + "--export-model-with-lut", + action="store_true", + help="Include LUT in model export", + ) + parser.add_argument( "logfiles", nargs="+", type=str, @@ -286,8 +291,8 @@ def main(): dfatool.cli.print_model_complexity(model) if args.export_model: - print(f"Exportding model to {args.export_model}") - json_model = model.to_json() + print(f"Exporting model to {args.export_model}") + json_model = model.to_json(with_by_param=args.export_model_with_lut) with open(args.export_model, "w") as f: json.dump( json_model, f, indent=2, sort_keys=True, cls=dfatool.utils.NpEncoder @@ -296,7 +301,7 @@ def main(): if args.export_dot: dfatool.cli.export_dot(model, args.export_dot) - if args.export_dref: + if args.export_dref or args.export_pseudo_dref: dref = model.to_dref( static_quality, lut_quality, @@ -316,9 +321,14 @@ def main(): mutual_information[param] ) - dfatool.cli.export_dataref( - args.export_dref, dref, precision=args.dref_precision - ) + if args.export_pseudo_dref: + dfatool.cli.export_pseudo_dref( + args.export_pseudo_dref, dref, precision=args.dref_precision + ) + if args.export_dref: + dfatool.cli.export_dataref( + args.export_dref, dref, precision=args.dref_precision + ) if args.export_json: with open(args.export_json, "w") as f: diff --git a/bin/analyze-trace.py b/bin/analyze-trace.py new file mode 100755 index 0000000..1cc3b89 --- /dev/null +++ b/bin/analyze-trace.py @@ -0,0 +1,471 @@ +#!/usr/bin/env python3 + +""" +analyze-trace - Generate a performance-aware behaviour model from log files + +foo +""" + +import argparse +import dfatool.cli +import dfatool.plotter +import dfatool.utils +import dfatool.functions as df +from dfatool.behaviour import SDKBehaviourModel +from dfatool.loader import Logfile +from dfatool.model import AnalyticModel +from dfatool.validation import CrossValidator +from functools import reduce +import logging +import json +import re +import sys +import time + + +def parse_logfile(filename): + loader = Logfile() + + if filename.endswith("xz"): + import lzma + + with lzma.open(filename, "rt") as f: + return loader.load(f, is_trace=True) + with open(filename, "r") as f: + return loader.load(f, is_trace=True) + + +def join_annotations(ref, base, new): + offset = len(ref) + return base + list(map(lambda x: x.apply_offset(offset), new)) + + +def main(): + timing = dict() + parser = argparse.ArgumentParser( + formatter_class=argparse.RawDescriptionHelpFormatter, description=__doc__ + ) + dfatool.cli.add_standard_arguments(parser) + parser.add_argument( + "logfiles", + nargs="+", + type=str, + help="Path to benchmark output (.txt or .txt.xz)", + ) + args = parser.parse_args() + dfatool.cli.sanity_check(args) + + if args.log_level: + numeric_level = getattr(logging, args.log_level.upper(), None) + if not isinstance(numeric_level, int): + print(f"Invalid log level: {args.log_level}", file=sys.stderr) + sys.exit(1) + logging.basicConfig( + level=numeric_level, + format="{asctime} {levelname}:{name}:{message}", + style="{", + ) + + observations, annotations = reduce( + lambda a, b: (a[0] + b[0], join_annotations(a[0], a[1], b[1])), + map(parse_logfile, args.logfiles), + ) + + bm = SDKBehaviourModel(observations, annotations) + observations += bm.meta_observations + is_loop = bm.is_loop + am_tt_param_names = bm.am_tt_param_names + delta_by_name = bm.delta_by_name + delta_param_by_name = bm.delta_param_by_name + + def format_guard(guard): + return "∧".join(map(lambda kv: f"{kv[0]}={kv[1]}", guard)) + + for name in sorted(delta_by_name.keys()): + for t_from, t_to_set in delta_by_name[name].items(): + i_to_transition = dict() + delta_param_sets = list() + to_names = list() + transition_guard = dict() + + for t_to in sorted(t_to_set): + delta_params = delta_param_by_name[name][(t_from, t_to)] + delta_param_sets.append(delta_params) + to_names.append(t_to) + n_confs = len(delta_params) + if is_loop.get(t_from, False) and is_loop.get(t_to, False): + print(f"{name} {t_from} → {t_to} ⟳") + elif is_loop.get(t_from, False): + print(f"{name} {t_from} → {t_to} →") + else: + print( + f"{name} {t_from} → {t_to} ({' ∨ '.join(map(format_guard, bm.transition_guard[t_from].get(t_to, list()))) or '⊤'})" + ) + + for i in range(len(delta_param_sets)): + for j in range(i + 1, len(delta_param_sets)): + if not delta_param_sets[i].isdisjoint(delta_param_sets[j]): + intersection = delta_param_sets[i].intersection( + delta_param_sets[j] + ) + if is_loop.get(t_from, False): + logging.debug( + f"Loop transition <{t_from}>: <{to_names[i]}> and <{to_names[j]}> are both taken for {intersection}" + ) + else: + logging.error( + f"Outbound transitions of <{t_from}> are not deterministic: <{to_names[i]}> and <{to_names[j]}> are both taken for {intersection}" + ) + raise RuntimeError( + f"Outbound transitions of <{t_from}> are not deterministic" + ) + + print("") + + by_name, parameter_names = dfatool.utils.observations_to_by_name(observations) + del observations + + if args.ignore_param: + args.ignore_param = args.ignore_param.split(",") + + if args.filter_observation: + args.filter_observation = list( + map(lambda x: tuple(x.split(":")), args.filter_observation.split(",")) + ) + + if args.filter_param: + args.filter_param = list( + map( + lambda entry: dfatool.cli.parse_filter_string( + entry, parameter_names=parameter_names + ), + args.filter_param.split(";"), + ) + ) + else: + args.filter_param = list() + + dfatool.utils.filter_aggregate_by_param(by_name, parameter_names, args.filter_param) + dfatool.utils.filter_aggregate_by_observation(by_name, args.filter_observation) + dfatool.utils.ignore_param(by_name, parameter_names, args.ignore_param) + + if args.param_shift: + param_shift = dfatool.cli.parse_param_shift(args.param_shift) + dfatool.utils.shift_param_in_aggregate(by_name, parameter_names, param_shift) + + if args.normalize_nfp: + norm = dfatool.cli.parse_nfp_normalization(args.normalize_nfp) + dfatool.utils.normalize_nfp_in_aggregate(by_name, norm) + + function_override = dict() + if args.function_override: + for function_desc in args.function_override.split(";"): + state_or_tran, attribute, function_str = function_desc.split(":") + function_override[(state_or_tran, attribute)] = function_str + + ts = time.time() + if args.load_json: + with open(args.load_json, "r") as f: + model = AnalyticModel.from_json(json.load(f), by_name, parameter_names) + else: + model = AnalyticModel( + by_name, + parameter_names, + force_tree=args.force_tree, + compute_stats=not args.skip_param_stats, + function_override=function_override, + ) + timing["AnalyticModel"] = time.time() - ts + + if args.info: + dfatool.cli.print_info_by_name(model, by_name) + + if args.information_gain: + dfatool.cli.print_information_gain_by_name(model, by_name) + + if args.export_csv_unparam: + dfatool.cli.export_csv_unparam( + model, args.export_csv_unparam, dialect=args.export_csv_dialect + ) + + if args.export_pgf_unparam: + dfatool.cli.export_pgf_unparam(model, args.export_pgf_unparam) + + if args.export_json_unparam: + dfatool.cli.export_json_unparam(model, args.export_json_unparam) + + if args.plot_unparam: + for kv in args.plot_unparam.split(";"): + state_or_trans, attribute, ylabel = kv.split(":") + fname = "param_y_{}_{}.pdf".format(state_or_trans, attribute) + dfatool.plotter.plot_y( + model.by_name[state_or_trans][attribute], + xlabel="measurement #", + ylabel=ylabel, + # output=fname, + show=not args.non_interactive, + ) + + if args.boxplot_unparam: + title = None + if args.filter_param: + title = "filter: " + ", ".join( + map(lambda kv: f"{kv[0]}={kv[1]}", args.filter_param) + ) + for name in model.names: + attr_names = sorted(model.attributes(name)) + dfatool.plotter.boxplot( + attr_names, + [model.by_name[name][attr] for attr in attr_names], + xlabel="Attribute", + output=f"{args.boxplot_unparam}{name}.pdf", + title=title, + show=not args.non_interactive, + ) + for attribute in attr_names: + dfatool.plotter.boxplot( + [attribute], + [model.by_name[name][attribute]], + output=f"{args.boxplot_unparam}{name}-{attribute}.pdf", + title=title, + show=not args.non_interactive, + ) + + if args.boxplot_param: + dfatool.cli.boxplot_param(args, model) + + if args.cross_validate: + xv_method, xv_count = args.cross_validate.split(":") + xv_count = int(xv_count) + xv = CrossValidator( + AnalyticModel, + by_name, + parameter_names, + force_tree=args.force_tree, + compute_stats=not args.skip_param_stats, + show_progress=args.progress, + ) + xv.parameter_aware = args.parameter_aware_cross_validation + else: + xv_method = None + xv_count = None + + static_model = model.get_static() + + ts = time.time() + lut_model = model.get_param_lut() + timing["get lut"] = time.time() - ts + + if lut_model is None: + lut_quality = None + else: + ts = time.time() + lut_quality = model.assess(lut_model, with_sum=args.add_total_observation) + timing["assess lut"] = time.time() - ts + + ts = time.time() + param_model, param_info = model.get_fitted() + timing["get model"] = time.time() - ts + + ts = time.time() + if xv_method == "montecarlo": + static_quality, _ = xv.montecarlo( + lambda m: m.get_static(), + xv_count, + static=True, + with_sum=args.add_total_observation, + ) + xv.export_filename = args.export_xv + analytic_quality, _ = xv.montecarlo( + lambda m: m.get_fitted()[0], xv_count, with_sum=args.add_total_observation + ) + elif xv_method == "kfold": + static_quality, _ = xv.kfold( + lambda m: m.get_static(), + xv_count, + static=True, + with_sum=args.add_total_observation, + ) + xv.export_filename = args.export_xv + analytic_quality, _ = xv.kfold( + lambda m: m.get_fitted()[0], xv_count, with_sum=args.add_total_observation + ) + else: + static_quality = model.assess(static_model, with_sum=args.add_total_observation) + if args.export_raw_predictions: + analytic_quality, raw_results = model.assess(param_model, return_raw=True) + with open(args.export_raw_predictions, "w") as f: + json.dump(raw_results, f, cls=dfatool.utils.NpEncoder) + else: + analytic_quality = model.assess( + param_model, with_sum=args.add_total_observation + ) + timing["assess model"] = time.time() - ts + + if "paramdetection" in args.show_model or "all" in args.show_model: + for name in model.names: + for attribute in model.attributes(name): + info = param_info(name, attribute) + print( + "{:10s} {:10s} non-param stddev {:f}".format( + name, + attribute, + model.attr_by_name[name][attribute].stats.std_static, + ) + ) + print( + "{:10s} {:10s} param-lut stddev {:f}".format( + name, + attribute, + model.attr_by_name[name][attribute].stats.std_param_lut, + ) + ) + for param in sorted( + model.attr_by_name[name][attribute].stats.std_by_param.keys() + ): + print( + "{:10s} {:10s} {:10s} stddev {:f}".format( + name, + attribute, + param, + model.attr_by_name[name][attribute].stats.std_by_param[ + param + ], + ) + ) + for arg_index in range(model.attr_by_name[name][attribute].arg_count): + print( + "{:10s} {:10s} {:10s} stddev {:f}".format( + name, + attribute, + f"arg{arg_index}", + model.attr_by_name[name][attribute].stats.std_by_arg[ + arg_index + ], + ) + ) + if type(info) is df.AnalyticFunction: + for param_name in sorted(info.fit_by_param.keys(), key=str): + param_fit = info.fit_by_param[param_name]["results"] + for function_type in sorted(param_fit.keys()): + function_rmsd = param_fit[function_type]["rmsd"] + print( + "{:10s} {:10s} {:10s} mean {:10s} RMSD {:.0f}".format( + name, + attribute, + str(param_name), + function_type, + function_rmsd, + ) + ) + + if "static" in args.show_model or "all" in args.show_model: + print("--- static model ---") + for name in sorted(model.names): + for attribute in sorted(model.attributes(name)): + dfatool.cli.print_static( + model, + static_model, + name, + attribute, + with_dependence="all" in args.show_model, + precision=args.show_model_precision, + ) + + if "param" in args.show_model or "all" in args.show_model: + print("--- param model ---") + for name in sorted(model.names): + for attribute in sorted(model.attributes(name)): + info = param_info(name, attribute) + dfatool.cli.print_model( + f"{name:10s} {attribute:15s}", + info, + precision=args.show_model_precision, + ) + + if args.show_model_error: + dfatool.cli.model_quality_table( + lut=lut_quality, + model=analytic_quality, + static=static_quality, + model_info=param_info, + xv_method=xv_method, + xv_count=xv_count, + error_metric=args.error_metric, + load_model=args.load_json, + ) + + if args.show_model_complexity: + dfatool.cli.print_model_complexity(model) + + if args.export_dot: + dfatool.cli.export_dot(model, args.export_dot) + + if args.export_dref or args.export_pseudo_dref: + dref = model.to_dref( + static_quality, + lut_quality, + analytic_quality, + with_sum=args.add_total_observation, + ) + for key, value in timing.items(): + dref[f"timing/{key}"] = (value, r"\second") + + if args.information_gain: + for name in model.names: + for attr in model.attributes(name): + mutual_information = model.mutual_information(name, attr) + for param in model.parameters: + if param in mutual_information: + dref[f"mutual information/{name}/{attr}/{param}"] = ( + mutual_information[param] + ) + + if args.export_pseudo_dref: + dfatool.cli.export_pseudo_dref( + args.export_pseudo_dref, dref, precision=args.dref_precision + ) + if args.export_dref: + dfatool.cli.export_dataref( + args.export_dref, dref, precision=args.dref_precision + ) + + if args.export_json: + with open(args.export_json, "w") as f: + json.dump( + model.to_json( + static_error=static_quality, + lut_error=lut_quality, + model_error=analytic_quality, + ), + f, + sort_keys=True, + cls=dfatool.utils.NpEncoder, + indent=2, + ) + + if args.plot_param: + for kv in args.plot_param.split(";"): + try: + state_or_trans, attribute, param_name = kv.split(":") + except ValueError: + print( + "Usage: --plot-param='state_or_trans:attribute:param_name'", + file=sys.stderr, + ) + sys.exit(1) + dfatool.plotter.plot_param( + model, + state_or_trans, + attribute, + model.param_index(param_name), + title=state_or_trans, + ylabel=attribute, + xlabel=param_name, + output=f"{state_or_trans}-{attribute}-{param_name}.pdf", + show=not args.non_interactive, + ) + + +if __name__ == "__main__": + main() diff --git a/bin/pta-workload.py b/bin/pta-workload.py new file mode 100755 index 0000000..19a7378 --- /dev/null +++ b/bin/pta-workload.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python3 + +import sys +from dfatool.automata import PTA +from dfatool.utils import human_readable +from dfatool.lex import TimedSequence, TimedWord, Workload + +args = sys.argv[1:] + +loops = dict() +ptafiles = list() +loop_names = set() + + +def simulate_word(timedword): + prev_state = "UNINITIALIZED" + prev_param = None + ret = dict() + for trace_part in timedword: + print("Trace Part {}".format(trace_part)) + if type(trace_part) is TimedWord: + result = pta.simulate( + trace_part, orig_state=prev_state, orig_param=prev_param + ) + elif type(trace_part) is Workload: + result = pta.simulate( + trace_part.word, orig_state=prev_state, orig_param=prev_param + ) + if prev_state != result.end_state: + print( + "Warning: loop starts in state {}, but terminates in {}".format( + prev_state, result.end_state.name + ) + ) + if prev_param != result.parameters: + print( + "Warning: loop starts with parameters {}, but terminates with {}".format( + prev_param, result.parameters + ) + ) + ret[trace_part.name] = result + loop_names.add(trace_part.name) + + print(" Duration: " + human_readable(result.duration, "s")) + if result.duration_mae: + print( + u" ± {} / {:.0f}%".format( + human_readable(result.duration_mae, "s"), result.duration_mape + ) + ) + print(" Energy: " + human_readable(result.energy, "J")) + if result.energy_mae: + print( + u" ± {} / {:.0f}%".format( + human_readable(result.energy_mae, "J"), result.energy_mape + ) + ) + print(" Mean Power: " + human_readable(result.mean_power, "W")) + print("") + + prev_state = result.end_state + prev_param = result.parameters + + return ret + + +for i in range(len(args) // 2): + ptafile, raw_word = args[i * 2], args[i * 2 + 1] + ptafiles.append(ptafile) + pta = PTA.from_file(ptafile) + timedword = TimedSequence(raw_word) + print("Input: {}\n".format(timedword)) + loops[ptafile] = simulate_word(timedword) + +for loop_name in sorted(loop_names): + result_set = list() + total_power = 0 + for ptafile in sorted(ptafiles): + if loop_name in loops[ptafile]: + result_set.append(loops[ptafile][loop_name]) + total_power += loops[ptafile][loop_name].mean_power + print( + "{}: total mean power is {}".format(loop_name, human_readable(total_power, "W")) + ) + for i, result in enumerate(result_set): + print( + " {:.0f}% {} (period: {})".format( + result.mean_power * 100 / total_power, + ptafiles[i], + human_readable(result.duration, "s"), + ) + ) diff --git a/bin/run-with-rapl b/bin/run-with-rapl new file mode 100755 index 0000000..54d2d9c --- /dev/null +++ b/bin/run-with-rapl @@ -0,0 +1,32 @@ +#!/bin/sh + +DFATOOL="$(dirname "$0")/.." + +if test -z "${COUNTERS}"; then + COUNTERS="$(ls -1 /sys/class/powercap)" +fi + +NAMES= +UJ_FILES= +for counter in ${COUNTERS}; do + if test -e /sys/class/powercap/${counter}/name && test -e /sys/class/powercap/${counter}/energy_uj; then + NAMES="${NAMES} $(cat /sys/class/powercap/${counter}/name)_${counter} " + UJ_FILES="${UJ_FILES} /sys/class/powercap/${counter}/energy_uj" + fi +done + +if ! cat ${UJ_FILES} > /dev/null; then + echo "Unable to read all counters (${UJ_FILES})" >&2 + echo "You may need to run sudo chmod a+r /sys/class/powercap/*/energy_uj" >&2 + exit 1 +fi + +OUTPUT=$(mktemp) + +RAPL_START=$(cat ${UJ_FILES}) +3>${OUTPUT} perf stat -x, -e duration_time --log-fd 3 "$@" +RAPL_END=$(cat ${UJ_FILES}) + +"${DFATOOL}/libexec/rapl-to-dfatool.py" "$(cat ${OUTPUT})" "${NAMES}" "${RAPL_START}" "${RAPL_END}" + +rm -f ${OUTPUT} diff --git a/bin/workload.py b/bin/workload.py index 19a7378..72b66bb 100755 --- a/bin/workload.py +++ b/bin/workload.py @@ -1,92 +1,93 @@ #!/usr/bin/env python3 +import argparse +import json +import logging import sys -from dfatool.automata import PTA -from dfatool.utils import human_readable -from dfatool.lex import TimedSequence, TimedWord, Workload +import dfatool.cli +import dfatool.utils +from dfatool.behaviour import EventSequenceModel +from dfatool.model import AnalyticModel -args = sys.argv[1:] - -loops = dict() -ptafiles = list() -loop_names = set() +def main(): + parser = argparse.ArgumentParser( + formatter_class=argparse.RawDescriptionHelpFormatter, description=__doc__ + ) + parser.add_argument("--aggregate", choices=["sum"], default="sum") + parser.add_argument("--aggregate-unit", choices=["s", "B/s"], default="s") + parser.add_argument( + "--aggregate-init", + default=0, + type=float, + ) + parser.add_argument( + "--log-level", + metavar="LEVEL", + choices=["debug", "info", "warning", "error"], + default="warning", + help="Set log level", + ) + parser.add_argument("--normalize-output", type=str) + parser.add_argument( + "--info", + action="store_true", + help="Show benchmark information (number of measurements, parameter values, ...)", + ) + parser.add_argument( + "--models", + nargs="+", + type=str, + help="Path to model file (.json or .json.xz)", + ) + parser.add_argument( + "--use-lut", + action="store_true", + help="Use LUT rather than performance model for prediction", + ) + parser.add_argument("event", nargs="+", type=str) + args = parser.parse_args() -def simulate_word(timedword): - prev_state = "UNINITIALIZED" - prev_param = None - ret = dict() - for trace_part in timedword: - print("Trace Part {}".format(trace_part)) - if type(trace_part) is TimedWord: - result = pta.simulate( - trace_part, orig_state=prev_state, orig_param=prev_param - ) - elif type(trace_part) is Workload: - result = pta.simulate( - trace_part.word, orig_state=prev_state, orig_param=prev_param - ) - if prev_state != result.end_state: - print( - "Warning: loop starts in state {}, but terminates in {}".format( - prev_state, result.end_state.name - ) - ) - if prev_param != result.parameters: - print( - "Warning: loop starts with parameters {}, but terminates with {}".format( - prev_param, result.parameters - ) - ) - ret[trace_part.name] = result - loop_names.add(trace_part.name) + if args.log_level: + numeric_level = getattr(logging, args.log_level.upper(), None) + if not isinstance(numeric_level, int): + print(f"Invalid log level: {args.log_level}", file=sys.stderr) + sys.exit(1) + logging.basicConfig( + level=numeric_level, + format="{asctime} {levelname}:{name}:{message}", + style="{", + ) - print(" Duration: " + human_readable(result.duration, "s")) - if result.duration_mae: - print( - u" ± {} / {:.0f}%".format( - human_readable(result.duration_mae, "s"), result.duration_mape - ) - ) - print(" Energy: " + human_readable(result.energy, "J")) - if result.energy_mae: - print( - u" ± {} / {:.0f}%".format( - human_readable(result.energy_mae, "J"), result.energy_mape - ) - ) - print(" Mean Power: " + human_readable(result.mean_power, "W")) - print("") + models = list() + for model_file in args.models: + with open(model_file, "r") as f: + models.append(AnalyticModel.from_json(json.load(f))) - prev_state = result.end_state - prev_param = result.parameters + if args.info: + for i in range(len(models)): + print(f"""{args.models[i]}: {" ".join(models[i].parameters)}""") + _, param_info = models[i].get_fitted() + for name in models[i].names: + for attr in models[i].attributes(name): + print(f" {name}.{attr} {param_info(name, attr)}") - return ret + workload = EventSequenceModel(models) + aggregate = workload.eval_strs( + args.event, + aggregate=args.aggregate, + aggregate_init=args.aggregate_init, + use_lut=args.use_lut, + ) + if args.normalize_output: + sf = dfatool.cli.parse_shift_function( + "--normalize-output", args.normalize_output + ) + print(dfatool.utils.human_readable(sf(aggregate), args.aggregate_unit)) + else: + print(dfatool.utils.human_readable(aggregate, args.aggregate_unit)) -for i in range(len(args) // 2): - ptafile, raw_word = args[i * 2], args[i * 2 + 1] - ptafiles.append(ptafile) - pta = PTA.from_file(ptafile) - timedword = TimedSequence(raw_word) - print("Input: {}\n".format(timedword)) - loops[ptafile] = simulate_word(timedword) -for loop_name in sorted(loop_names): - result_set = list() - total_power = 0 - for ptafile in sorted(ptafiles): - if loop_name in loops[ptafile]: - result_set.append(loops[ptafile][loop_name]) - total_power += loops[ptafile][loop_name].mean_power - print( - "{}: total mean power is {}".format(loop_name, human_readable(total_power, "W")) - ) - for i, result in enumerate(result_set): - print( - " {:.0f}% {} (period: {})".format( - result.mean_power * 100 / total_power, - ptafiles[i], - human_readable(result.duration, "s"), - ) - ) +if __name__ == "__main__": + main() diff --git a/lib/behaviour.py b/lib/behaviour.py new file mode 100644 index 0000000..136a55e --- /dev/null +++ b/lib/behaviour.py @@ -0,0 +1,388 @@ +#!/usr/bin/env python3 + +import logging +from . import utils +from .model import AnalyticModel +from . import functions as df + +logger = logging.getLogger(__name__) + + +class SDKBehaviourModel: + + def __init__(self, observations, annotations): + + meta_observations = list() + delta_by_name = dict() + delta_param_by_name = dict() + is_loop = dict() + + for annotation in annotations: + # annotation.start.param may be incomplete, for instance in cases + # where DPUs are allocated before the input file is loadeed (and + # thus before the problem size is known). + # However, annotation.end.param may also differ from annotation.start.param (it should not, but that's how some benchmarks roll). + # So, we use annotation.start.param if it has the same keys as annotation.end.param, and annotation.end.param otherwise + if sorted(annotation.start.param.keys()) == sorted( + annotation.end.param.keys() + ): + am_tt_param_names = sorted(annotation.start.param.keys()) + else: + am_tt_param_names = sorted(annotation.end.param.keys()) + if annotation.name not in delta_by_name: + delta_by_name[annotation.name] = dict() + delta_param_by_name[annotation.name] = dict() + _, _, meta_obs, _is_loop = self.learn_pta( + observations, + annotation, + delta_by_name[annotation.name], + delta_param_by_name[annotation.name], + ) + meta_observations += meta_obs + is_loop.update(_is_loop) + + self.am_tt_param_names = am_tt_param_names + self.delta_by_name = delta_by_name + self.delta_param_by_name = delta_param_by_name + self.meta_observations = meta_observations + self.is_loop = is_loop + + self.build_transition_guards() + + def build_transition_guards(self): + self.transition_guard = dict() + for name in sorted(self.delta_by_name.keys()): + for t_from, t_to_set in self.delta_by_name[name].items(): + i_to_transition = dict() + delta_param_sets = list() + to_names = list() + transition_guard = dict() + + if len(t_to_set) > 1: + am_tt_by_name = { + name: { + "attributes": [t_from], + "param": list(), + t_from: list(), + }, + } + for i, t_to in enumerate(sorted(t_to_set)): + for param in self.delta_param_by_name[name][(t_from, t_to)]: + am_tt_by_name[name]["param"].append( + utils.param_dict_to_list( + utils.param_str_to_dict(param), + self.am_tt_param_names, + ) + ) + am_tt_by_name[name][t_from].append(i) + i_to_transition[i] = t_to + am = AnalyticModel( + am_tt_by_name, self.am_tt_param_names, force_tree=True + ) + model, info = am.get_fitted() + if type(info(name, t_from)) is df.SplitFunction: + flat_model = info(name, t_from).flatten() + else: + flat_model = list() + logger.warning( + f"Model for {name} {t_from} is {info(name, t_from)}, expected SplitFunction" + ) + + for prefix, output in flat_model: + transition_name = i_to_transition[int(output)] + if transition_name not in transition_guard: + transition_guard[transition_name] = list() + transition_guard[transition_name].append(prefix) + + self.transition_guard[t_from] = transition_guard + + def get_trace(self, name, param_dict): + delta = self.delta_by_name[name] + current_state = "__init__" + trace = [current_state] + states_seen = set() + while current_state != "__end__": + next_states = delta[current_state] + + states_seen.add(current_state) + next_states = list(filter(lambda q: q not in states_seen, next_states)) + + if len(next_states) == 0: + raise RuntimeError( + f"get_trace({name}, {param_dict}): found infinite loop at {trace}" + ) + + if len(next_states) > 1 and self.transition_guard[current_state]: + matching_next_states = list() + for candidate in next_states: + for condition in self.transition_guard[current_state][candidate]: + valid = True + for key, value in condition: + if param_dict[key] != value: + valid = False + break + if valid: + matching_next_states.append(candidate) + break + next_states = matching_next_states + + if len(next_states) == 0: + raise RuntimeError( + f"get_trace({name}, {param_dict}): found no valid outbound transitions at {trace}, candidates {self.transition_guard[current_state]}" + ) + if len(next_states) > 1: + raise RuntimeError( + f"get_trace({name}, {param_dict}): found non-deterministic outbound transitions {next_states} at {trace}" + ) + + (next_state,) = next_states + + trace.append(next_state) + current_state = next_state + + return trace + + def learn_pta(self, observations, annotation, delta=dict(), delta_param=dict()): + prev_i = annotation.start.offset + prev = "__init__" + prev_non_kernel = prev + meta_observations = list() + n_seen = dict() + + total_latency_us = 0 + + if sorted(annotation.start.param.keys()) == sorted(annotation.end.param.keys()): + param_dict = annotation.start.param + else: + param_dict = annotation.end.param + param_str = utils.param_dict_to_str(param_dict) + + if annotation.kernels: + # ggf. als dict of tuples, für den Fall dass Schleifen verschieden iterieren können? + for i in range(prev_i, annotation.kernels[0].offset): + this = observations[i]["name"] + " @ " + observations[i]["place"] + + if this in n_seen: + if n_seen[this] == 1: + logger.debug( + f"Loop found in {annotation.start.name} {param_dict}: {this} ⟳" + ) + n_seen[this] += 1 + else: + n_seen[this] = 1 + + if not prev in delta: + delta[prev] = set() + delta[prev].add(this) + + if not (prev, this) in delta_param: + delta_param[(prev, this)] = set() + delta_param[(prev, this)].add(param_str) + + prev = this + prev_i = i + 1 + + total_latency_us += observations[i]["attribute"].get("latency_us", 0) + + meta_observations.append( + { + "name": f"__trace__ {this}", + "param": param_dict, + "attribute": dict( + filter( + lambda kv: not kv[0].startswith("e_"), + observations[i]["param"].items(), + ) + ), + } + ) + prev_non_kernel = prev + + for kernel in annotation.kernels: + prev = prev_non_kernel + for i in range(prev_i, kernel.offset): + this = observations[i]["name"] + " @ " + observations[i]["place"] + + if not prev in delta: + delta[prev] = set() + delta[prev].add(this) + + if not (prev, this) in delta_param: + delta_param[(prev, this)] = set() + delta_param[(prev, this)].add(param_str) + + # The last iteration (next block) contains a single kernel, + # so we do not increase total_latency_us here. + # However, this means that we will only ever get one latency + # value for each set of kernels with a common problem size, + # despite potentially having far more data at our fingertips. + # We could provide one total_latency_us for each kernel + # (by combining start latency + kernel latency + teardown latency), + # but for that we first need to distinguish between kernel + # components and teardown components in the following block. + + prev = this + prev_i = i + 1 + + meta_observations.append( + { + "name": f"__trace__ {this}", + "param": param_dict, + "attribute": dict( + filter( + lambda kv: not kv[0].startswith("e_"), + observations[i]["param"].items(), + ) + ), + } + ) + + # There is no kernel end signal in the underlying data, so the last iteration also contains a kernel run. + prev = prev_non_kernel + for i in range(prev_i, annotation.end.offset): + this = observations[i]["name"] + " @ " + observations[i]["place"] + + if this in n_seen: + if n_seen[this] == 1: + logger.debug( + f"Loop found in {annotation.start.name} {param_dict}: {this} ⟳" + ) + n_seen[this] += 1 + else: + n_seen[this] = 1 + + if not prev in delta: + delta[prev] = set() + delta[prev].add(this) + + if not (prev, this) in delta_param: + delta_param[(prev, this)] = set() + delta_param[(prev, this)].add(param_str) + + total_latency_us += observations[i]["attribute"].get("latency_us", 0) + + prev = this + + meta_observations.append( + { + "name": f"__trace__ {this}", + "param": param_dict, + "attribute": dict( + filter( + lambda kv: not kv[0].startswith("e_"), + observations[i]["param"].items(), + ) + ), + } + ) + + if not prev in delta: + delta[prev] = set() + delta[prev].add("__end__") + if not (prev, "__end__") in delta_param: + delta_param[(prev, "__end__")] = set() + delta_param[(prev, "__end__")].add(param_str) + + for transition, count in n_seen.items(): + meta_observations.append( + { + "name": f"__loop__ {transition}", + "param": param_dict, + "attribute": {"n_iterations": count}, + } + ) + + if total_latency_us: + meta_observations.append( + { + "name": annotation.start.name, + "param": param_dict, + "attribute": {"latency_us": total_latency_us}, + } + ) + + is_loop = dict( + map(lambda kv: (kv[0], True), filter(lambda kv: kv[1] > 1, n_seen.items())) + ) + + return delta, delta_param, meta_observations, is_loop + + +class EventSequenceModel: + def __init__(self, models): + self.models = models + + def _event_normalizer(self, event): + event_normalizer = lambda p: p + if "/" in event: + v1, v2 = event.split("/") + if utils.is_numeric(v1): + event = v2.strip() + event_normalizer = lambda p: utils.soft_cast_float(v1) / p + elif utils.is_numeric(v2): + event = v1.strip() + event_normalizer = lambda p: p / utils.soft_cast_float(v2) + else: + raise RuntimeError(f"Cannot parse '{event}'") + return event, event_normalizer + + def eval_strs(self, events, aggregate="sum", aggregate_init=0, use_lut=False): + for event in events: + event, event_normalizer = self._event_normalizer(event) + nn, param = event.split("(") + name, action = nn.split(".") + param_model = None + ref_model = None + + for model in self.models: + if name in model.names and action in model.attributes(name): + ref_model = model + if use_lut: + param_model = model.get_param_lut(allow_none=True) + else: + param_model, param_info = model.get_fitted() + break + + if param_model is None: + raise RuntimeError(f"Did not find a model for {name}.{action}") + + param = param.removesuffix(")") + if param == "": + param = dict() + else: + param = utils.parse_conf_str(param) + + param_list = utils.param_dict_to_list(param, ref_model.parameters) + + if not use_lut and not param_info(name, action).is_predictable(param_list): + logger.warning( + f"Cannot predict {name}.{action}({param}), falling back to static model" + ) + + try: + event_output = event_normalizer( + param_model( + name, + action, + param=param_list, + ) + ) + except KeyError: + if use_lut: + logger.error( + f"Cannot predict {name}.{action}({param}) from LUT model" + ) + else: + logger.error(f"Cannot predict {name}.{action}({param}) from model") + raise + except TypeError: + if not use_lut: + logger.error(f"Cannot predict {name}.{action}({param}) from model") + raise + + if aggregate == "sum": + aggregate_init += event_output + else: + raise RuntimeError(f"Unknown aggregate type: {aggregate}") + + return aggregate_init @@ -331,6 +331,23 @@ def model_quality_table( print(buf) +def export_pseudo_dref(dref_file, dref, precision=None): + with open(dref_file, "w") as f: + for k, v in sorted(os.environ.items(), key=lambda kv: kv[0]): + if k.startswith("DFATOOL_"): + print(f"% {k}='{v}'", file=f) + for arg in sys.argv: + print(f"% {arg}", file=f) + for k, v in sorted(dref.items()): + k = k.replace("/", "I").replace("-", "").replace("_", "").replace(" ", "") + if type(v) is tuple: + v = v[0] + if type(v) in (float, np.float64) and precision is not None: + print("\\def\\" + k + "{" + f"{v:.{precision}f}" + "}", file=f) + else: + print("\\def\\" + k + "{" + str(v) + "}", file=f) + + def export_dataref(dref_file, dref, precision=None): with open(dref_file, "w") as f: for k, v in sorted(os.environ.items(), key=lambda kv: kv[0]): @@ -493,6 +510,12 @@ def add_standard_arguments(parser): help="Export tree-based model to {PREFIX}{name}-{attribute}.dot", ) parser.add_argument( + "--export-pseudo-dref", + metavar="FILE", + type=str, + help="Export model and model quality to LaTeX def file (sort of like dataref)", + ) + parser.add_argument( "--export-dref", metavar="FILE", type=str, @@ -528,7 +551,7 @@ def add_standard_arguments(parser): "--export-json", metavar="FILENAME", type=str, - help="Export model in JSON format to FILENAME", + help="Export model and error metrics in JSON format to FILENAME", ) parser.add_argument( "--load-json", diff --git a/lib/functions.py b/lib/functions.py index 187e6ff..b76814b 100644 --- a/lib/functions.py +++ b/lib/functions.py @@ -27,6 +27,7 @@ dfatool_rmt_relevance_threshold = float( os.getenv("DFATOOL_RMT_RELEVANCE_THRESHOLD", "0.5") ) +dfatool_uls_loss_fun = os.getenv("DFATOOL_ULS_LOSS_FUNCTION", "linear") dfatool_uls_min_bound = float(os.getenv("DFATOOL_ULS_MIN_BOUND", -np.inf)) if dfatool_preproc_relevance_method == "mi": @@ -466,6 +467,23 @@ class SplitFunction(ModelFunction): ) return hyper + # SplitFunction only + def flatten(self): + paths = list() + for param_value, subtree in self.child.items(): + if type(subtree) is SplitFunction: + for path, value in subtree.flatten(): + path = [(self.param_name, param_value)] + path + paths.append((path, value)) + elif type(subtree) is StaticFunction: + path = [(self.param_name, param_value)] + paths.append((path, subtree.value)) + else: + raise RuntimeError( + "flatten is only implemented for RMTs with constant leaves" + ) + return paths + @classmethod def from_json(cls, data): assert data["type"] == "split" @@ -1675,7 +1693,11 @@ class FOLFunction(SKLearnRegressionFunction): self.model_args = list(np.ones((num_vars))) try: res = optimize.least_squares( - error_function, self.model_args, args=(fit_parameters, data), xtol=2e-15 + error_function, + self.model_args, + args=(fit_parameters, data), + xtol=2e-15, + loss=dfatool_uls_loss_fun, ) except ValueError as err: logger.warning(f"Fit failed: {err} (function: {self.model_function})") @@ -1938,6 +1960,7 @@ class AnalyticFunction(ModelFunction): self.model_args, args=(X, Y), xtol=2e-15, + loss=dfatool_uls_loss_fun, bounds=(lower_bounds, upper_bounds), ) except ValueError as err: diff --git a/lib/loader/plain.py b/lib/loader/plain.py index 50f3ca6..ef0b596 100644 --- a/lib/loader/plain.py +++ b/lib/loader/plain.py @@ -69,10 +69,48 @@ class CSVfile: return observations -class Logfile: - def __init__(self): - pass +class TraceAnnotation: + offset = None + name = None + param = dict() + + def __init__(self, **kwargs): + self.__dict__.update(kwargs) + + def apply_offset(self, offset): + self.offset += offset + return self + + def __repr__(self): + param_desc = " ".join(map(lambda kv: f"{kv[0]}={kv[1]}", self.param.items())) + return f"{self.name}<{param_desc} @ {self.offset}>" + +class RunAnnotation: + name = None + start = None + kernels = list() + end = None + + # start: offset points to first run entry + # kernel: offset points to first kernel run entry + # end: offset points to first non-run entry (i.e., for all run entries: offset < end.offset) + + def __init__(self, **kwargs): + self.__dict__.update(kwargs) + + def apply_offset(self, offset): + self.start.apply_offset(offset) + for kernel in self.kernels: + kernel.apply_offset(offset) + self.end.apply_offset(offset) + return self + + def __repr__(self): + return f"RunAnnotation<{self.name}, start={self.start}, kernels={self.kernels}, end={self.end}>" + + +class Logfile: def kv_to_param(self, kv_str, cast): try: key, value = kv_str.split("=") @@ -88,14 +126,24 @@ class Logfile: def kv_to_param_i(self, kv_str): return self.kv_to_param(kv_str, soft_cast_int_or_float) - def load(self, f): + def load(self, f, is_trace=False): observations = list() + if is_trace: + trace_status = None + trace_start = None + trace_kernels = list() + trace_end = None + annotations = list() + for lineno, line in enumerate(f): - m = re.search(r"\[::\] *([^|]*?) *[|] *([^|]*?) *[|] *(.*)", line) - if m: + if m := re.search(r"\[::\] *([^|]*?) *[|] *([^|]*?) *[|] *(.*)", line): name_str = m.group(1) param_str = m.group(2) attr_str = m.group(3) + if is_trace: + name_str, name_annot = name_str.split("@") + name_str = name_str.strip() + name_annot = name_annot.strip() try: param = dict(map(self.kv_to_param_i, param_str.split())) attr = dict(map(self.kv_to_param_f, attr_str.split())) @@ -106,13 +154,89 @@ class Logfile: "attribute": attr, } ) + if is_trace: + observations[-1]["place"] = name_annot + except ValueError: + logger.warning( + f"Error parsing {f}: invalid key-value pair in line {lineno+1}" + ) + logger.warning(f"Offending entry:\n{line}") + raise + + if not is_trace: + continue + + # only relevant for is_trace == True + if m := re.fullmatch(r"\[>>\] *([^|]*?) *[|] *([^|]*?) *", line): + trace_status = 1 + trace_kernels = list() + name_str = m.group(1) + param_str = m.group(2) + try: + param = dict(map(self.kv_to_param_i, param_str.split())) + except ValueError: + logger.warning( + f"Error parsing {f}: invalid key-value pair in line {lineno+1}" + ) + logger.warning(f"Offending entry:\n{line}") + raise + trace_start = TraceAnnotation( + offset=len(observations), name=name_str, param=param + ) + + if m := re.fullmatch(r"\[--\] *([^|]*?) *[|] *([^|]*?) *", line): + trace_status = 2 + name_str = m.group(1) + param_str = m.group(2) + try: + param = dict(map(self.kv_to_param_i, param_str.split())) + except ValueError: + logger.warning( + f"Error parsing {f}: invalid key-value pair in line {lineno+1}" + ) + logger.warning(f"Offending entry:\n{line}") + raise + trace_kernels.append( + TraceAnnotation( + offset=len(observations), name=name_str, param=param + ) + ) + + if m := re.fullmatch(r"\[<<\] *([^|]*?) *[|] *([^|]*?) *", line): + trace_status = None + name_str = m.group(1) + param_str = m.group(2) + try: + param = dict(map(self.kv_to_param_i, param_str.split())) except ValueError: logger.warning( f"Error parsing {f}: invalid key-value pair in line {lineno+1}" ) logger.warning(f"Offending entry:\n{line}") raise + trace_end = TraceAnnotation( + offset=len(observations), name=name_str, param=param + ) + if trace_start is not None: + assert trace_start.name == trace_end.name + for kernel in trace_kernels: + assert trace_start.name == kernel.name + annotations.append( + RunAnnotation( + name=trace_start.name, + start=trace_start, + kernels=trace_kernels, + end=trace_end, + ) + ) + + trace_status = None + trace_start = None + trace_kernels = list() + trace_end = None + if is_trace: + return observations, annotations return observations def dump(self, observations, f): diff --git a/lib/model.py b/lib/model.py index 58f05a4..4d1edd5 100644 --- a/lib/model.py +++ b/lib/model.py @@ -14,7 +14,14 @@ from .parameters import ( distinct_param_values, ) from .paramfit import ParamFit -from .utils import is_numeric, soft_cast_int, by_name_to_by_param, regression_measures +from .utils import ( + is_numeric, + soft_cast_int, + by_name_to_by_param, + by_param_to_by_name, + regression_measures, + param_eq_or_none, +) logger = logging.getLogger(__name__) @@ -79,6 +86,7 @@ class AnalyticModel: compute_stats=True, force_tree=False, max_std=None, + by_param=None, from_json=None, ): """ @@ -96,7 +104,7 @@ class AnalyticModel: - attributes: list of keys that should be analyzed, e.g. ['power', 'duration'] - for each attribute mentioned in 'attributes': A list with measurements. - All list except for 'attributes' must have the same length. + All lists except for 'attributes' must have the same length. For example: parameters = ['foo_count', 'irrelevant'] @@ -148,9 +156,18 @@ class AnalyticModel: for name, name_data in from_json["name"].items(): self.attr_by_name[name] = dict() for attr, attr_data in name_data.items(): - self.attr_by_name[name][attr] = ModelAttribute.from_json( - name, attr, attr_data - ) + if by_param: + self.attr_by_name[name][attr] = ModelAttribute.from_json( + name, + attr, + attr_data, + data_values=by_name[name][attr], + param_values=by_name[name]["param"], + ) + else: + self.attr_by_name[name][attr] = ModelAttribute.from_json( + name, attr, attr_data + ) self.fit_done = True return @@ -249,7 +266,7 @@ class AnalyticModel: return static_model_getter - def get_param_lut(self, use_mean=False, fallback=False): + def get_param_lut(self, use_mean=False, fallback=False, allow_none=False): """ Get parameter-look-up-table model function: name, attribute, parameter values -> model value. @@ -279,7 +296,16 @@ class AnalyticModel: try: return lut_model[name][key][param] except KeyError: - if fallback: + if allow_none: + keys = filter( + lambda p: param_eq_or_none(param, p), + lut_model[name][key].keys(), + ) + values = list(map(lambda p: lut_model[name][key][p], keys)) + if not values: + raise + return np.mean(values) + elif fallback: return static_model[name][key] raise params = kwargs["params"] @@ -643,7 +669,14 @@ class AnalyticModel: ret[f"xv/{name}/{attr_name}/{k}"] = np.mean(entry[k]) return ret - def to_json(self, **kwargs) -> dict: + def to_json( + self, + with_by_param=False, + lut_error=None, + static_error=None, + model_error=None, + **kwargs, + ) -> dict: """ Return JSON encoding of this AnalyticModel. """ @@ -653,21 +686,48 @@ class AnalyticModel: "paramValuesbyName": dict([[name, dict()] for name in self.names]), } + if with_by_param: + by_param = self.get_by_param() + ret["byParam"] = list() + for k, v in by_param.items(): + ret["byParam"].append((k, v)) + for name in self.names: for attr_name, attr in self.attr_by_name[name].items(): ret["name"][name][attr_name] = attr.to_json(**kwargs) + if lut_error: + ret["name"][name][attr_name]["lutError"] = lut_error[name][ + attr_name + ] + if static_error: + ret["name"][name][attr_name]["staticError"] = static_error[name][ + attr_name + ] + if model_error: + ret["name"][name][attr_name]["modelError"] = model_error[name][ + attr_name + ] attr_name = list(self.attributes(name))[0] for param_name in self.parameters: - ret["paramValuesbyName"][name][param_name] = self.attr_by_name[name][ - attr_name - ].stats.distinct_values_by_param_name[param_name] + if self.attr_by_name[name][attr_name].stats is not None: + ret["paramValuesbyName"][name][param_name] = self.attr_by_name[ + name + ][attr_name].stats.distinct_values_by_param_name[param_name] return ret @classmethod - def from_json(cls, data, by_name, parameters): - assert data["parameters"] == parameters - return cls(by_name, parameters, from_json=data) + def from_json(cls, data, by_name=None, parameters=None): + if by_name is None and parameters is None: + assert data["byParam"] is not None + by_param = dict() + for (nk, pk), v in data["byParam"]: + by_param[(nk, tuple(pk))] = v + by_name = by_param_to_by_name(by_param) + return cls(by_name, data["parameters"], by_param=by_param, from_json=data) + else: + assert data["parameters"] == parameters + return cls(by_name, parameters, from_json=data) def webconf_function_map(self) -> list: ret = list() diff --git a/lib/parameters.py b/lib/parameters.py index 0653100..acb044c 100644 --- a/lib/parameters.py +++ b/lib/parameters.py @@ -621,13 +621,20 @@ class ModelAttribute: mean = np.mean(self.data) return f"ModelAttribute<{self.name}, {self.attr}, mean={mean}>" - def to_json(self, **kwargs): - return { + def to_json(self, with_lut=False, **kwargs): + ret = { "paramNames": self.param_names, "argCount": self.arg_count, "modelFunction": self.model_function.to_json(**kwargs), } + if with_lut: + ret["LUT"] = list() + for key, value in self.by_param.items(): + ret["LUT"].append((key, value)) + + return ret + def to_dref(self, unit=None): ret = {"mean": (self.mean, unit), "median": (self.median, unit)} @@ -724,11 +731,11 @@ class ModelAttribute: return self.mutual_information_cache @classmethod - def from_json(cls, name, attr, data): + def from_json(cls, name, attr, data, data_values=None, param_values=None): param_names = data["paramNames"] arg_count = data["argCount"] - self = cls(name, attr, None, None, param_names, arg_count) + self = cls(name, attr, data_values, param_values, param_names, arg_count) self.model_function = df.ModelFunction.from_json(data["modelFunction"]) self.mean = self.model_function.value diff --git a/lib/paramfit.py b/lib/paramfit.py index 000aa9c..84eba2b 100644 --- a/lib/paramfit.py +++ b/lib/paramfit.py @@ -16,9 +16,14 @@ from .utils import ( ) logger = logging.getLogger(__name__) -best_fit_metric = os.getenv("DFATOOL_ULS_ERROR_METRIC", "ssr") +dfatool_uls_loss_fun = os.getenv("DFATOOL_ULS_LOSS_FUNCTION", "linear") dfatool_uls_min_bound = float(os.getenv("DFATOOL_ULS_MIN_BOUND", -np.inf)) +if dfatool_uls_loss_fun == "linear": + best_fit_metric = os.getenv("DFATOOL_ULS_ERROR_METRIC", "ssr") +else: + best_fit_metric = os.getenv("DFATOOL_ULS_ERROR_METRIC", "mae") + class ParamFit: """ @@ -222,6 +227,7 @@ def _try_fits( ini, args=(X, Y), xtol=2e-15, + loss=dfatool_uls_loss_fun, bounds=param_function.bounds, ) except FloatingPointError as e: diff --git a/lib/utils.py b/lib/utils.py index 4850a53..fb76367 100644 --- a/lib/utils.py +++ b/lib/utils.py @@ -48,6 +48,8 @@ def running_mean(x: np.ndarray, N: int) -> np.ndarray: def human_readable(value, unit): + if value is None: + return value for prefix, factor in ( ("p", 1e-12), ("n", 1e-9), @@ -55,6 +57,8 @@ def human_readable(value, unit): ("m", 1e-3), ("", 1), ("k", 1e3), + ("M", 1e6), + ("G", 1e9), ): if value < 1e3 * factor: return "{:.2f} {}{}".format(value * (1 / factor), prefix, unit) @@ -150,7 +154,7 @@ def parse_conf_str(conf_str): """ conf_dict = dict() for option in conf_str.split(","): - key, value = option.split("=") + key, value = option.strip().split("=") conf_dict[key] = soft_cast_float(value) return conf_dict @@ -205,6 +209,18 @@ def param_slice_eq(a, b, index): return False +def param_eq_or_none(a, b): + """ + Check if by_param keys a and b are identical, allowing a None in a to match any key in b. + """ + set_keys = tuple(filter(lambda i: a[i] is not None, range(len(a)))) + a_not_none = tuple(map(lambda i: a[i], set_keys)) + b_not_none = tuple(map(lambda i: b[i], set_keys)) + if a_not_none == b_not_none: + return True + return False + + def match_parameter_values(input_param: dict, match_param: dict): """ Check whether one of the paramaters in `input_param` has the same value in `match_param`. @@ -302,6 +318,21 @@ def param_dict_to_list(param_dict, parameter_names, default=None): return ret +def param_dict_to_str(param_dict): + ret = list() + for parameter_name in sorted(param_dict.keys()): + ret.append(f"{parameter_name}={param_dict[parameter_name]}") + return " ".join(ret) + + +def param_str_to_dict(param_str): + ret = dict() + for param_pair in param_str.split(): + key, value = param_pair.split("=") + ret[key] = soft_cast_int_or_float(value) + return ret + + def observations_enum_to_bool(observations: list, kconfig=False): """ Convert enum / categorical observations to boolean-only ones. @@ -697,11 +728,18 @@ def regression_measures(predicted: np.ndarray, ground_truth: np.ndarray): rsq -- R^2 measure, see sklearn.metrics.r2_score count -- Number of values """ - if type(predicted) != np.ndarray: + + if type(predicted) is list: + predicted = np.array(predicted) + + if type(ground_truth) is list: + ground_truth = np.array(ground_truth) + + if type(predicted) is not np.ndarray: raise ValueError( "first arg ('predicted') must be ndarray, is {}".format(type(predicted)) ) - if type(ground_truth) != np.ndarray: + if type(ground_truth) is not np.ndarray: raise ValueError( "second arg ('ground_truth') must be ndarray, is {}".format( type(ground_truth) diff --git a/libexec/rapl-to-dfatool.py b/libexec/rapl-to-dfatool.py new file mode 100755 index 0000000..5ab4c38 --- /dev/null +++ b/libexec/rapl-to-dfatool.py @@ -0,0 +1,27 @@ +#!/usr/bin/env python3 + +import sys + + +def main(perf_line, rapl_names, rapl_start, rapl_stop): + duration_ns = int(perf_line.split(",")[3]) + + rapl_names = rapl_names.split() + rapl_start = rapl_start.split() + rapl_stop = rapl_stop.split() + + buf = [f"duration_ns={duration_ns}"] + + for i in range(len(rapl_names)): + uj_start = int(rapl_start[i]) + uj_stop = int(rapl_stop[i]) + buf.append(f"{rapl_names[i]}_energy_uj={uj_stop - uj_start}") + buf.append( + f"{rapl_names[i]}_power_W={(uj_stop - uj_start) * 1000 / duration_ns}" + ) + + print(" ".join(buf)) + + +if __name__ == "__main__": + main(*sys.argv[1:]) |