summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitlab-ci.yml37
-rw-r--r--README.md15
-rwxr-xr-xbin/analyze-archive.py4
-rwxr-xr-xbin/analyze-kconfig.py23
-rwxr-xr-xbin/analyze-log.py94
-rwxr-xr-xbin/analyze-trace.py471
-rwxr-xr-xbin/extract-kernel-ws.py166
-rwxr-xr-xbin/extract-speedup-from-log.py197
-rwxr-xr-xbin/kstest.py31
-rwxr-xr-xbin/perf-stat-to-dfatool.py40
-rwxr-xr-xbin/pta-workload.py92
-rwxr-xr-xbin/run-with-rapl32
-rwxr-xr-xbin/ttest-ind.py31
-rwxr-xr-xbin/workload.py161
-rw-r--r--doc/analysis-logs.md6
-rw-r--r--lib/behaviour.py388
-rw-r--r--lib/cli.py138
-rw-r--r--lib/functions.py113
-rw-r--r--lib/loader/plain.py140
-rw-r--r--lib/model.py150
-rw-r--r--lib/parameters.py39
-rw-r--r--lib/paramfit.py25
-rw-r--r--lib/pelt.py2
-rw-r--r--lib/utils.py66
-rw-r--r--lib/validation.py22
-rwxr-xr-xlibexec/rapl-to-dfatool.py27
26 files changed, 2279 insertions, 231 deletions
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 149653d..d8e53cd 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -9,26 +9,27 @@ run_tests:
stage: test
variables:
GIT_SUBMODULE_STRATEGY: normal
+ DFATOOL_ULS_FUNCTIONS: linear,logarithmic,logarithmic1,exponential,square,inverse,sqrt
script:
- mkdir test-data
- - wget -qO test-data/20161221_123347_mmparam.tar https://lib.finalrewind.org/energy-models/20161221_123347_mmparam.tar
- - wget -qO test-data/20170116_124500_LM75x.tar https://lib.finalrewind.org/energy-models/20170116_124500_LM75x.tar
- - wget -qO test-data/20170116_131306_LM75x.tar https://lib.finalrewind.org/energy-models/20170116_131306_LM75x.tar
- - wget -qO test-data/20170116_142654_mmstatic.tar https://lib.finalrewind.org/energy-models/20170116_142654_mmstatic.tar
- - wget -qO test-data/20170116_143516_mmstatic.tar https://lib.finalrewind.org/energy-models/20170116_143516_mmstatic.tar
- - wget -qO test-data/20170116_145420_sharpLS013B4DN.tar https://lib.finalrewind.org/energy-models/20170116_145420_sharpLS013B4DN.tar
- - wget -qO test-data/20170116_151348_sharpLS013B4DN.tar https://lib.finalrewind.org/energy-models/20170116_151348_sharpLS013B4DN.tar
- - wget -qO test-data/20170220_164723_RF24_int_A.tar https://lib.finalrewind.org/energy-models/20170220_164723_RF24_int_A.tar
- - wget -qO test-data/20190815_103347_nRF24_no-rx.json https://lib.finalrewind.org/energy-models/20190815_103347_nRF24_no-rx.json
- - wget -qO test-data/20190815_111745_nRF24_no-rx.json https://lib.finalrewind.org/energy-models/20190815_111745_nRF24_no-rx.json
- - wget -qO test-data/20190815_122531_nRF24_no-rx.json https://lib.finalrewind.org/energy-models/20190815_122531_nRF24_no-rx.json
- - wget -qO test-data/20191024-150723-nrf24l01-var-ack-retry.tar https://lib.finalrewind.org/energy-models/20191024-150723-nrf24l01-var-ack-retry.tar
- - wget -qO test-data/20191024-152648-nrf24l01-var-ack.tar https://lib.finalrewind.org/energy-models/20191024-152648-nrf24l01-var-ack.tar
- - wget -qO test-data/20200722-113624-timedResistiveLoad.tar https://lib.finalrewind.org/energy-models/20200722-113624-timedResistiveLoad.tar
- - wget -qO test-data/20201203-112341-et_la_dco.tar https://lib.finalrewind.org/energy-models/20201203-112341-et_la_dco.tar
- - wget -qO test-data/20201203-110526-et_timer_dco.tar https://lib.finalrewind.org/energy-models/20201203-110526-et_timer_dco.tar
- - wget -qO test-data/20201203-113313-et_la_hfxt0.tar https://lib.finalrewind.org/energy-models/20201203-113313-et_la_hfxt0.tar
- - wget -qO test-data/20201203-114004-et_timer_hfxt0.tar https://lib.finalrewind.org/energy-models/20201203-114004-et_timer_hfxt0.tar
+ - wget -qO test-data/20161221_123347_mmparam.tar https://ess.cs.uos.de/.private/dfatool/20161221_123347_mmparam.tar
+ - wget -qO test-data/20170116_124500_LM75x.tar https://ess.cs.uos.de/.private/dfatool/20170116_124500_LM75x.tar
+ - wget -qO test-data/20170116_131306_LM75x.tar https://ess.cs.uos.de/.private/dfatool/20170116_131306_LM75x.tar
+ - wget -qO test-data/20170116_142654_mmstatic.tar https://ess.cs.uos.de/.private/dfatool/20170116_142654_mmstatic.tar
+ - wget -qO test-data/20170116_143516_mmstatic.tar https://ess.cs.uos.de/.private/dfatool/20170116_143516_mmstatic.tar
+ - wget -qO test-data/20170116_145420_sharpLS013B4DN.tar https://ess.cs.uos.de/.private/dfatool/20170116_145420_sharpLS013B4DN.tar
+ - wget -qO test-data/20170116_151348_sharpLS013B4DN.tar https://ess.cs.uos.de/.private/dfatool/20170116_151348_sharpLS013B4DN.tar
+ - wget -qO test-data/20170220_164723_RF24_int_A.tar https://ess.cs.uos.de/.private/dfatool/20170220_164723_RF24_int_A.tar
+ - wget -qO test-data/20190815_103347_nRF24_no-rx.json https://ess.cs.uos.de/.private/dfatool/20190815_103347_nRF24_no-rx.json
+ - wget -qO test-data/20190815_111745_nRF24_no-rx.json https://ess.cs.uos.de/.private/dfatool/20190815_111745_nRF24_no-rx.json
+ - wget -qO test-data/20190815_122531_nRF24_no-rx.json https://ess.cs.uos.de/.private/dfatool/20190815_122531_nRF24_no-rx.json
+ - wget -qO test-data/20191024-150723-nrf24l01-var-ack-retry.tar https://ess.cs.uos.de/.private/dfatool/20191024-150723-nrf24l01-var-ack-retry.tar
+ - wget -qO test-data/20191024-152648-nrf24l01-var-ack.tar https://ess.cs.uos.de/.private/dfatool/20191024-152648-nrf24l01-var-ack.tar
+ - wget -qO test-data/20200722-113624-timedResistiveLoad.tar https://ess.cs.uos.de/.private/dfatool/20200722-113624-timedResistiveLoad.tar
+ - wget -qO test-data/20201203-112341-et_la_dco.tar https://ess.cs.uos.de/.private/dfatool/20201203-112341-et_la_dco.tar
+ - wget -qO test-data/20201203-110526-et_timer_dco.tar https://ess.cs.uos.de/.private/dfatool/20201203-110526-et_timer_dco.tar
+ - wget -qO test-data/20201203-113313-et_la_hfxt0.tar https://ess.cs.uos.de/.private/dfatool/20201203-113313-et_la_hfxt0.tar
+ - wget -qO test-data/20201203-114004-et_timer_hfxt0.tar https://ess.cs.uos.de/.private/dfatool/20201203-114004-et_timer_hfxt0.tar
- pytest-3 --cov=lib
- python3-coverage html -i
artifacts:
diff --git a/README.md b/README.md
index 2ba8e89..1a6cbfc 100644
--- a/README.md
+++ b/README.md
@@ -133,9 +133,12 @@ The following variables may be set to alter the behaviour of dfatool components.
| `DFATOOL_LMT_MIN_SAMPLES_LEAF` | 0.0 .. **0.1** .. 1.0, 3 .. *n* | Minimum samples that each leaf of a split candidate must contain. A value below 1.0 specifies a ratio of the total number of training samples. A value above 1 specifies an absolute number of samples. |
| `DFATOOL_LMT_MAX_BINS` | 10 .. **120** | Number of bins used to determine optimal split. LMT default: 25. |
| `DFATOOL_LMT_CRITERION` | **mse**, rmse, mae, poisson | Error metric to use when selecting best split. |
-| `DFATOOL_ULS_ERROR_METRIC` | **ssr**, rmsd, mae, … | Error metric to use when selecting best-fitting function during unsupervised least squares (ULS) regression. Least squares regression itself minimzes root mean square deviation (rmsd), hence the equivalent (but partitioning-compatible) sum of squared residuals (ssr) is the default. Supports all metrics accepted by `--error-metric`. |
+| `DFATOOL_ULS_ERROR_METRIC` | **ssr**, rmsd, **mae**, … | Error metric to use when selecting best-fitting function during unsupervised least squares (ULS) regression. By default, least squares regression minimzes root mean square deviation (rmsd), hence the equivalent (but partitioning-compatible) sum of squared residuals (ssr) is the default. If `DFATOOL_ULS_LOSS_FUNCTION` is set to another value than linear, the default is mean absolute error (mae). Supports all metrics accepted by `--error-metric`. |
+| `DFATOOL_ULS_FUNCTIONS` | a,b,… | List of function templates to use in ULS. Default: all supported functions. |
+| `DFATOOL_ULS_LOSS_FUNCTION` | **linear**', soft\_l1, … | Loss function for least squares fitting, see `scipy.optimize.least_squares#loss` documentation. |
| `DFATOOL_ULS_MIN_DISTINCT_VALUES` | 2 .. **3** .. *n* | Minimum number of unique values a parameter must take to be eligible for ULS |
| `DFATOOL_ULS_SKIP_CODEPENDENT_CHECK` | **0**, 1 | Do not detect and remove co-dependent features in ULS. |
+| `DFATOOL_ULS_MIN_BOUND` | **-∞** .. *n* | Lower bound for ULS regression variables. Setting it to 0 can often be beneficial. |
| `DFATOOL_XGB_N_ESTIMATORS` | 1 .. **100** .. *n* | Number of estimators (i.e., trees) for XGBoost. |
| `DFATOOL_XGB_MAX_DEPTH` | 2 .. **6** .. *n* | Maximum XGBoost tree depth. |
| `DFATOOL_XGB_SUBSAMPLE` | 0.0 .. **1.0** | XGBoost subsampling ratio. |
@@ -154,6 +157,8 @@ The following variables may be set to alter the behaviour of dfatool components.
| `DFATOOL_RMT_LOSS_IGNORE_SCALAR` | **0**, 1 | Ignore scalar parameters when computing the loss for split node candidates. Instead of computing the loss of a single partition for each `x_i == j`, compute the loss of partitions for `x_i == j` in which non-scalar parameters vary and scalar parameters are constant. This way, scalar parameters do not affect the decision about which non-scalar parameter to use for splitting. |
| `DFATOOL_PARAM_CATEGORICAL_TO_SCALAR` | **0**, 1 | Some models (e.g. FOL, sklearn CART, XGBoost) do not support categorical parameters. Ignore them (0) or convert them to scalar indexes (1). Conversion uses lexical order. |
| `DFATOOL_FOL_SECOND_ORDER` | **0**, 1 | Add second-order components (interaction of feature pairs) to first-order linear function. |
+| `DFATOOL_CSV_IGNORE` | *str1,str2,...* | Ignore the listed fields when loading CSV log files. |
+| `DFATOOL_CSV_OBSERVATIONS` | *str1,str2,...* | Treat the listed fields as observations rather than features. |
## Examples
@@ -174,3 +179,11 @@ The NFP values should be exactly as described by the selected configuration opti
* [Kconfig](https://ess.cs.uos.de/git-build/dfatool/master/x264.kconfig)
* [CART](https://ess.cs.uos.de/git-build/dfatool/master/x264-cart.json)
* [RMT](https://ess.cs.uos.de/git-build/dfatool/master/x264-rmt.json)
+
+## References
+
+Mirrors of this repository are maintained at the following locations:
+
+* [ESS](https://ess.cs.uos.de/git/software/dfatool)
+* [finalrewind.org](https://git.finalrewind.org/dfatool/)
+* [GitHub](https://github.com/derf/dfatool)
diff --git a/bin/analyze-archive.py b/bin/analyze-archive.py
index 53a8c37..0587ce1 100755
--- a/bin/analyze-archive.py
+++ b/bin/analyze-archive.py
@@ -426,7 +426,9 @@ def main():
dfatool.cli.print_info_by_name(model, by_name)
if args.export_csv_unparam:
- dfatool.cli.export_csv_unparam(model, args.export_csv_unparam)
+ dfatool.cli.export_csv_unparam(
+ model, args.export_csv_unparam, dialect=args.export_csv_dialect
+ )
if args.export_pgf_unparam:
dfatool.cli.export_pgf_unparam(model, args.export_pgf_unparam)
diff --git a/bin/analyze-kconfig.py b/bin/analyze-kconfig.py
index d148641..8401e68 100755
--- a/bin/analyze-kconfig.py
+++ b/bin/analyze-kconfig.py
@@ -19,7 +19,6 @@ import time
import numpy as np
import dfatool.cli
-import dfatool.plotter
import dfatool.utils
import dfatool.functions as df
from dfatool.loader.kconfig import KConfigAttributes
@@ -354,7 +353,9 @@ def main():
dfatool.cli.print_info_by_name(model, by_name)
if args.export_csv_unparam:
- dfatool.cli.export_csv_unparam(model, args.export_csv_unparam)
+ dfatool.cli.export_csv_unparam(
+ model, args.export_csv_unparam, dialect=args.export_csv_dialect
+ )
if args.export_pgf_unparam:
dfatool.cli.export_pgf_unparam(model, args.export_pgf_unparam)
@@ -363,10 +364,12 @@ def main():
dfatool.cli.export_json_unparam(model, args.export_json_unparam)
if args.plot_unparam:
+ import dfatool.plotter as dp
+
for kv in args.plot_unparam.split(";"):
state_or_trans, attribute, ylabel = kv.split(":")
fname = "param_y_{}_{}.pdf".format(state_or_trans, attribute)
- dfatool.plotter.plot_y(
+ dp.plot_y(
model.by_name[state_or_trans][attribute],
xlabel="measurement #",
ylabel=ylabel,
@@ -375,6 +378,8 @@ def main():
)
if args.boxplot_unparam:
+ import dfatool.plotter as dp
+
title = None
if args.filter_param:
title = "filter: " + ", ".join(
@@ -382,7 +387,7 @@ def main():
)
for name in model.names:
attr_names = sorted(model.attributes(name))
- dfatool.plotter.boxplot(
+ dp.boxplot(
attr_names,
[model.by_name[name][attr] for attr in attr_names],
xlabel="Attribute",
@@ -391,7 +396,7 @@ def main():
show=not args.non_interactive,
)
for attribute in attr_names:
- dfatool.plotter.boxplot(
+ dp.boxplot(
[attribute],
[model.by_name[name][attribute]],
output=f"{args.boxplot_unparam}{name}-{attribute}.pdf",
@@ -403,6 +408,8 @@ def main():
dfatool.cli.boxplot_param(args, model)
if args.plot_param:
+ import dfatool.plotter as dp
+
for kv in args.plot_param.split(";"):
try:
state_or_trans, attribute, param_name, *function = kv.split(":")
@@ -416,7 +423,7 @@ def main():
function = gplearn_to_function(" ".join(function))
else:
function = None
- dfatool.plotter.plot_param(
+ dp.plot_param(
model,
state_or_trans,
attribute,
@@ -568,6 +575,10 @@ def main():
json_model[attribute] = data.copy()
if nfpkeys:
json_model[attribute].update(nfpkeys[name][attribute])
+ if "paramValueToIndex" in json_model[attribute]["modelFunction"]:
+ json_model[attribute]["paramValueToIndex"] = json_model[attribute][
+ "modelFunction"
+ ].pop("paramValueToIndex")
out_model = {
"model": json_model,
"modelType": "dfatool-kconfig",
diff --git a/bin/analyze-log.py b/bin/analyze-log.py
index dd32fab..50b5648 100755
--- a/bin/analyze-log.py
+++ b/bin/analyze-log.py
@@ -46,6 +46,11 @@ def main():
"--export-model", metavar="FILE", type=str, help="Export JSON model to FILE"
)
parser.add_argument(
+ "--export-model-with-lut",
+ action="store_true",
+ help="Include LUT in model export",
+ )
+ parser.add_argument(
"logfiles",
nargs="+",
type=str,
@@ -65,18 +70,17 @@ def main():
style="{",
)
- if args.filter_observation:
- args.filter_observation = list(
- map(lambda x: tuple(x.split(":")), args.filter_observation.split(","))
- )
-
observations = reduce(lambda a, b: a + b, map(parse_logfile, args.logfiles))
by_name, parameter_names = dfatool.utils.observations_to_by_name(observations)
del observations
if args.ignore_param:
args.ignore_param = args.ignore_param.split(",")
- dfatool.utils.ignore_param(by_name, parameter_names, args.ignore_param)
+
+ if args.filter_observation:
+ args.filter_observation = list(
+ map(lambda x: tuple(x.split(":")), args.filter_observation.split(","))
+ )
if args.filter_param:
args.filter_param = list(
@@ -92,6 +96,7 @@ def main():
dfatool.utils.filter_aggregate_by_param(by_name, parameter_names, args.filter_param)
dfatool.utils.filter_aggregate_by_observation(by_name, args.filter_observation)
+ dfatool.utils.ignore_param(by_name, parameter_names, args.ignore_param)
if args.param_shift:
param_shift = dfatool.cli.parse_param_shift(args.param_shift)
@@ -124,8 +129,13 @@ def main():
if args.info:
dfatool.cli.print_info_by_name(model, by_name)
+ if args.information_gain:
+ dfatool.cli.print_information_gain_by_name(model, by_name)
+
if args.export_csv_unparam:
- dfatool.cli.export_csv_unparam(model, args.export_csv_unparam)
+ dfatool.cli.export_csv_unparam(
+ model, args.export_csv_unparam, dialect=args.export_csv_dialect
+ )
if args.export_pgf_unparam:
dfatool.cli.export_pgf_unparam(model, args.export_pgf_unparam)
@@ -199,7 +209,7 @@ def main():
lut_quality = None
else:
ts = time.time()
- lut_quality = model.assess(lut_model)
+ lut_quality = model.assess(lut_model, with_sum=args.add_total_observation)
timing["assess lut"] = time.time() - ts
ts = time.time()
@@ -209,22 +219,36 @@ def main():
ts = time.time()
if xv_method == "montecarlo":
static_quality, _ = xv.montecarlo(
- lambda m: m.get_static(), xv_count, static=True
+ lambda m: m.get_static(),
+ xv_count,
+ static=True,
+ with_sum=args.add_total_observation,
)
xv.export_filename = args.export_xv
- analytic_quality, _ = xv.montecarlo(lambda m: m.get_fitted()[0], xv_count)
+ analytic_quality, _ = xv.montecarlo(
+ lambda m: m.get_fitted()[0], xv_count, with_sum=args.add_total_observation
+ )
elif xv_method == "kfold":
- static_quality, _ = xv.kfold(lambda m: m.get_static(), xv_count, static=True)
+ static_quality, _ = xv.kfold(
+ lambda m: m.get_static(),
+ xv_count,
+ static=True,
+ with_sum=args.add_total_observation,
+ )
xv.export_filename = args.export_xv
- analytic_quality, _ = xv.kfold(lambda m: m.get_fitted()[0], xv_count)
+ analytic_quality, _ = xv.kfold(
+ lambda m: m.get_fitted()[0], xv_count, with_sum=args.add_total_observation
+ )
else:
- static_quality = model.assess(static_model)
+ static_quality = model.assess(static_model, with_sum=args.add_total_observation)
if args.export_raw_predictions:
analytic_quality, raw_results = model.assess(param_model, return_raw=True)
with open(args.export_raw_predictions, "w") as f:
json.dump(raw_results, f, cls=dfatool.utils.NpEncoder)
else:
- analytic_quality = model.assess(param_model)
+ analytic_quality = model.assess(
+ param_model, with_sum=args.add_total_observation
+ )
timing["assess model"] = time.time() - ts
if "static" in args.show_model or "all" in args.show_model:
@@ -237,6 +261,7 @@ def main():
name,
attribute,
with_dependence="all" in args.show_model,
+ precision=args.show_model_precision,
)
if "param" in args.show_model or "all" in args.show_model:
@@ -244,7 +269,11 @@ def main():
for name in sorted(model.names):
for attribute in sorted(model.attributes(name)):
info = param_info(name, attribute)
- dfatool.cli.print_model(f"{name:10s} {attribute:15s}", info)
+ dfatool.cli.print_model(
+ f"{name:10s} {attribute:15s}",
+ info,
+ precision=args.show_model_precision,
+ )
if args.show_model_error:
dfatool.cli.model_quality_table(
@@ -262,8 +291,8 @@ def main():
dfatool.cli.print_model_complexity(model)
if args.export_model:
- print(f"Exportding model to {args.export_model}")
- json_model = model.to_json()
+ print(f"Exporting model to {args.export_model}")
+ json_model = model.to_json(with_by_param=args.export_model_with_lut)
with open(args.export_model, "w") as f:
json.dump(
json_model, f, indent=2, sort_keys=True, cls=dfatool.utils.NpEncoder
@@ -272,13 +301,34 @@ def main():
if args.export_dot:
dfatool.cli.export_dot(model, args.export_dot)
- if args.export_dref:
- dref = model.to_dref(static_quality, lut_quality, analytic_quality)
+ if args.export_dref or args.export_pseudo_dref:
+ dref = model.to_dref(
+ static_quality,
+ lut_quality,
+ analytic_quality,
+ with_sum=args.add_total_observation,
+ )
for key, value in timing.items():
dref[f"timing/{key}"] = (value, r"\second")
- dfatool.cli.export_dataref(
- args.export_dref, dref, precision=args.dref_precision
- )
+
+ if args.information_gain:
+ for name in model.names:
+ for attr in model.attributes(name):
+ mutual_information = model.mutual_information(name, attr)
+ for param in model.parameters:
+ if param in mutual_information:
+ dref[f"mutual information/{name}/{attr}/{param}"] = (
+ mutual_information[param]
+ )
+
+ if args.export_pseudo_dref:
+ dfatool.cli.export_pseudo_dref(
+ args.export_pseudo_dref, dref, precision=args.dref_precision
+ )
+ if args.export_dref:
+ dfatool.cli.export_dataref(
+ args.export_dref, dref, precision=args.dref_precision
+ )
if args.export_json:
with open(args.export_json, "w") as f:
diff --git a/bin/analyze-trace.py b/bin/analyze-trace.py
new file mode 100755
index 0000000..1cc3b89
--- /dev/null
+++ b/bin/analyze-trace.py
@@ -0,0 +1,471 @@
+#!/usr/bin/env python3
+
+"""
+analyze-trace - Generate a performance-aware behaviour model from log files
+
+foo
+"""
+
+import argparse
+import dfatool.cli
+import dfatool.plotter
+import dfatool.utils
+import dfatool.functions as df
+from dfatool.behaviour import SDKBehaviourModel
+from dfatool.loader import Logfile
+from dfatool.model import AnalyticModel
+from dfatool.validation import CrossValidator
+from functools import reduce
+import logging
+import json
+import re
+import sys
+import time
+
+
+def parse_logfile(filename):
+ loader = Logfile()
+
+ if filename.endswith("xz"):
+ import lzma
+
+ with lzma.open(filename, "rt") as f:
+ return loader.load(f, is_trace=True)
+ with open(filename, "r") as f:
+ return loader.load(f, is_trace=True)
+
+
+def join_annotations(ref, base, new):
+ offset = len(ref)
+ return base + list(map(lambda x: x.apply_offset(offset), new))
+
+
+def main():
+ timing = dict()
+ parser = argparse.ArgumentParser(
+ formatter_class=argparse.RawDescriptionHelpFormatter, description=__doc__
+ )
+ dfatool.cli.add_standard_arguments(parser)
+ parser.add_argument(
+ "logfiles",
+ nargs="+",
+ type=str,
+ help="Path to benchmark output (.txt or .txt.xz)",
+ )
+ args = parser.parse_args()
+ dfatool.cli.sanity_check(args)
+
+ if args.log_level:
+ numeric_level = getattr(logging, args.log_level.upper(), None)
+ if not isinstance(numeric_level, int):
+ print(f"Invalid log level: {args.log_level}", file=sys.stderr)
+ sys.exit(1)
+ logging.basicConfig(
+ level=numeric_level,
+ format="{asctime} {levelname}:{name}:{message}",
+ style="{",
+ )
+
+ observations, annotations = reduce(
+ lambda a, b: (a[0] + b[0], join_annotations(a[0], a[1], b[1])),
+ map(parse_logfile, args.logfiles),
+ )
+
+ bm = SDKBehaviourModel(observations, annotations)
+ observations += bm.meta_observations
+ is_loop = bm.is_loop
+ am_tt_param_names = bm.am_tt_param_names
+ delta_by_name = bm.delta_by_name
+ delta_param_by_name = bm.delta_param_by_name
+
+ def format_guard(guard):
+ return "∧".join(map(lambda kv: f"{kv[0]}={kv[1]}", guard))
+
+ for name in sorted(delta_by_name.keys()):
+ for t_from, t_to_set in delta_by_name[name].items():
+ i_to_transition = dict()
+ delta_param_sets = list()
+ to_names = list()
+ transition_guard = dict()
+
+ for t_to in sorted(t_to_set):
+ delta_params = delta_param_by_name[name][(t_from, t_to)]
+ delta_param_sets.append(delta_params)
+ to_names.append(t_to)
+ n_confs = len(delta_params)
+ if is_loop.get(t_from, False) and is_loop.get(t_to, False):
+ print(f"{name} {t_from} → {t_to} ⟳")
+ elif is_loop.get(t_from, False):
+ print(f"{name} {t_from} → {t_to} →")
+ else:
+ print(
+ f"{name} {t_from} → {t_to} ({' ∨ '.join(map(format_guard, bm.transition_guard[t_from].get(t_to, list()))) or '⊤'})"
+ )
+
+ for i in range(len(delta_param_sets)):
+ for j in range(i + 1, len(delta_param_sets)):
+ if not delta_param_sets[i].isdisjoint(delta_param_sets[j]):
+ intersection = delta_param_sets[i].intersection(
+ delta_param_sets[j]
+ )
+ if is_loop.get(t_from, False):
+ logging.debug(
+ f"Loop transition <{t_from}>: <{to_names[i]}> and <{to_names[j]}> are both taken for {intersection}"
+ )
+ else:
+ logging.error(
+ f"Outbound transitions of <{t_from}> are not deterministic: <{to_names[i]}> and <{to_names[j]}> are both taken for {intersection}"
+ )
+ raise RuntimeError(
+ f"Outbound transitions of <{t_from}> are not deterministic"
+ )
+
+ print("")
+
+ by_name, parameter_names = dfatool.utils.observations_to_by_name(observations)
+ del observations
+
+ if args.ignore_param:
+ args.ignore_param = args.ignore_param.split(",")
+
+ if args.filter_observation:
+ args.filter_observation = list(
+ map(lambda x: tuple(x.split(":")), args.filter_observation.split(","))
+ )
+
+ if args.filter_param:
+ args.filter_param = list(
+ map(
+ lambda entry: dfatool.cli.parse_filter_string(
+ entry, parameter_names=parameter_names
+ ),
+ args.filter_param.split(";"),
+ )
+ )
+ else:
+ args.filter_param = list()
+
+ dfatool.utils.filter_aggregate_by_param(by_name, parameter_names, args.filter_param)
+ dfatool.utils.filter_aggregate_by_observation(by_name, args.filter_observation)
+ dfatool.utils.ignore_param(by_name, parameter_names, args.ignore_param)
+
+ if args.param_shift:
+ param_shift = dfatool.cli.parse_param_shift(args.param_shift)
+ dfatool.utils.shift_param_in_aggregate(by_name, parameter_names, param_shift)
+
+ if args.normalize_nfp:
+ norm = dfatool.cli.parse_nfp_normalization(args.normalize_nfp)
+ dfatool.utils.normalize_nfp_in_aggregate(by_name, norm)
+
+ function_override = dict()
+ if args.function_override:
+ for function_desc in args.function_override.split(";"):
+ state_or_tran, attribute, function_str = function_desc.split(":")
+ function_override[(state_or_tran, attribute)] = function_str
+
+ ts = time.time()
+ if args.load_json:
+ with open(args.load_json, "r") as f:
+ model = AnalyticModel.from_json(json.load(f), by_name, parameter_names)
+ else:
+ model = AnalyticModel(
+ by_name,
+ parameter_names,
+ force_tree=args.force_tree,
+ compute_stats=not args.skip_param_stats,
+ function_override=function_override,
+ )
+ timing["AnalyticModel"] = time.time() - ts
+
+ if args.info:
+ dfatool.cli.print_info_by_name(model, by_name)
+
+ if args.information_gain:
+ dfatool.cli.print_information_gain_by_name(model, by_name)
+
+ if args.export_csv_unparam:
+ dfatool.cli.export_csv_unparam(
+ model, args.export_csv_unparam, dialect=args.export_csv_dialect
+ )
+
+ if args.export_pgf_unparam:
+ dfatool.cli.export_pgf_unparam(model, args.export_pgf_unparam)
+
+ if args.export_json_unparam:
+ dfatool.cli.export_json_unparam(model, args.export_json_unparam)
+
+ if args.plot_unparam:
+ for kv in args.plot_unparam.split(";"):
+ state_or_trans, attribute, ylabel = kv.split(":")
+ fname = "param_y_{}_{}.pdf".format(state_or_trans, attribute)
+ dfatool.plotter.plot_y(
+ model.by_name[state_or_trans][attribute],
+ xlabel="measurement #",
+ ylabel=ylabel,
+ # output=fname,
+ show=not args.non_interactive,
+ )
+
+ if args.boxplot_unparam:
+ title = None
+ if args.filter_param:
+ title = "filter: " + ", ".join(
+ map(lambda kv: f"{kv[0]}={kv[1]}", args.filter_param)
+ )
+ for name in model.names:
+ attr_names = sorted(model.attributes(name))
+ dfatool.plotter.boxplot(
+ attr_names,
+ [model.by_name[name][attr] for attr in attr_names],
+ xlabel="Attribute",
+ output=f"{args.boxplot_unparam}{name}.pdf",
+ title=title,
+ show=not args.non_interactive,
+ )
+ for attribute in attr_names:
+ dfatool.plotter.boxplot(
+ [attribute],
+ [model.by_name[name][attribute]],
+ output=f"{args.boxplot_unparam}{name}-{attribute}.pdf",
+ title=title,
+ show=not args.non_interactive,
+ )
+
+ if args.boxplot_param:
+ dfatool.cli.boxplot_param(args, model)
+
+ if args.cross_validate:
+ xv_method, xv_count = args.cross_validate.split(":")
+ xv_count = int(xv_count)
+ xv = CrossValidator(
+ AnalyticModel,
+ by_name,
+ parameter_names,
+ force_tree=args.force_tree,
+ compute_stats=not args.skip_param_stats,
+ show_progress=args.progress,
+ )
+ xv.parameter_aware = args.parameter_aware_cross_validation
+ else:
+ xv_method = None
+ xv_count = None
+
+ static_model = model.get_static()
+
+ ts = time.time()
+ lut_model = model.get_param_lut()
+ timing["get lut"] = time.time() - ts
+
+ if lut_model is None:
+ lut_quality = None
+ else:
+ ts = time.time()
+ lut_quality = model.assess(lut_model, with_sum=args.add_total_observation)
+ timing["assess lut"] = time.time() - ts
+
+ ts = time.time()
+ param_model, param_info = model.get_fitted()
+ timing["get model"] = time.time() - ts
+
+ ts = time.time()
+ if xv_method == "montecarlo":
+ static_quality, _ = xv.montecarlo(
+ lambda m: m.get_static(),
+ xv_count,
+ static=True,
+ with_sum=args.add_total_observation,
+ )
+ xv.export_filename = args.export_xv
+ analytic_quality, _ = xv.montecarlo(
+ lambda m: m.get_fitted()[0], xv_count, with_sum=args.add_total_observation
+ )
+ elif xv_method == "kfold":
+ static_quality, _ = xv.kfold(
+ lambda m: m.get_static(),
+ xv_count,
+ static=True,
+ with_sum=args.add_total_observation,
+ )
+ xv.export_filename = args.export_xv
+ analytic_quality, _ = xv.kfold(
+ lambda m: m.get_fitted()[0], xv_count, with_sum=args.add_total_observation
+ )
+ else:
+ static_quality = model.assess(static_model, with_sum=args.add_total_observation)
+ if args.export_raw_predictions:
+ analytic_quality, raw_results = model.assess(param_model, return_raw=True)
+ with open(args.export_raw_predictions, "w") as f:
+ json.dump(raw_results, f, cls=dfatool.utils.NpEncoder)
+ else:
+ analytic_quality = model.assess(
+ param_model, with_sum=args.add_total_observation
+ )
+ timing["assess model"] = time.time() - ts
+
+ if "paramdetection" in args.show_model or "all" in args.show_model:
+ for name in model.names:
+ for attribute in model.attributes(name):
+ info = param_info(name, attribute)
+ print(
+ "{:10s} {:10s} non-param stddev {:f}".format(
+ name,
+ attribute,
+ model.attr_by_name[name][attribute].stats.std_static,
+ )
+ )
+ print(
+ "{:10s} {:10s} param-lut stddev {:f}".format(
+ name,
+ attribute,
+ model.attr_by_name[name][attribute].stats.std_param_lut,
+ )
+ )
+ for param in sorted(
+ model.attr_by_name[name][attribute].stats.std_by_param.keys()
+ ):
+ print(
+ "{:10s} {:10s} {:10s} stddev {:f}".format(
+ name,
+ attribute,
+ param,
+ model.attr_by_name[name][attribute].stats.std_by_param[
+ param
+ ],
+ )
+ )
+ for arg_index in range(model.attr_by_name[name][attribute].arg_count):
+ print(
+ "{:10s} {:10s} {:10s} stddev {:f}".format(
+ name,
+ attribute,
+ f"arg{arg_index}",
+ model.attr_by_name[name][attribute].stats.std_by_arg[
+ arg_index
+ ],
+ )
+ )
+ if type(info) is df.AnalyticFunction:
+ for param_name in sorted(info.fit_by_param.keys(), key=str):
+ param_fit = info.fit_by_param[param_name]["results"]
+ for function_type in sorted(param_fit.keys()):
+ function_rmsd = param_fit[function_type]["rmsd"]
+ print(
+ "{:10s} {:10s} {:10s} mean {:10s} RMSD {:.0f}".format(
+ name,
+ attribute,
+ str(param_name),
+ function_type,
+ function_rmsd,
+ )
+ )
+
+ if "static" in args.show_model or "all" in args.show_model:
+ print("--- static model ---")
+ for name in sorted(model.names):
+ for attribute in sorted(model.attributes(name)):
+ dfatool.cli.print_static(
+ model,
+ static_model,
+ name,
+ attribute,
+ with_dependence="all" in args.show_model,
+ precision=args.show_model_precision,
+ )
+
+ if "param" in args.show_model or "all" in args.show_model:
+ print("--- param model ---")
+ for name in sorted(model.names):
+ for attribute in sorted(model.attributes(name)):
+ info = param_info(name, attribute)
+ dfatool.cli.print_model(
+ f"{name:10s} {attribute:15s}",
+ info,
+ precision=args.show_model_precision,
+ )
+
+ if args.show_model_error:
+ dfatool.cli.model_quality_table(
+ lut=lut_quality,
+ model=analytic_quality,
+ static=static_quality,
+ model_info=param_info,
+ xv_method=xv_method,
+ xv_count=xv_count,
+ error_metric=args.error_metric,
+ load_model=args.load_json,
+ )
+
+ if args.show_model_complexity:
+ dfatool.cli.print_model_complexity(model)
+
+ if args.export_dot:
+ dfatool.cli.export_dot(model, args.export_dot)
+
+ if args.export_dref or args.export_pseudo_dref:
+ dref = model.to_dref(
+ static_quality,
+ lut_quality,
+ analytic_quality,
+ with_sum=args.add_total_observation,
+ )
+ for key, value in timing.items():
+ dref[f"timing/{key}"] = (value, r"\second")
+
+ if args.information_gain:
+ for name in model.names:
+ for attr in model.attributes(name):
+ mutual_information = model.mutual_information(name, attr)
+ for param in model.parameters:
+ if param in mutual_information:
+ dref[f"mutual information/{name}/{attr}/{param}"] = (
+ mutual_information[param]
+ )
+
+ if args.export_pseudo_dref:
+ dfatool.cli.export_pseudo_dref(
+ args.export_pseudo_dref, dref, precision=args.dref_precision
+ )
+ if args.export_dref:
+ dfatool.cli.export_dataref(
+ args.export_dref, dref, precision=args.dref_precision
+ )
+
+ if args.export_json:
+ with open(args.export_json, "w") as f:
+ json.dump(
+ model.to_json(
+ static_error=static_quality,
+ lut_error=lut_quality,
+ model_error=analytic_quality,
+ ),
+ f,
+ sort_keys=True,
+ cls=dfatool.utils.NpEncoder,
+ indent=2,
+ )
+
+ if args.plot_param:
+ for kv in args.plot_param.split(";"):
+ try:
+ state_or_trans, attribute, param_name = kv.split(":")
+ except ValueError:
+ print(
+ "Usage: --plot-param='state_or_trans:attribute:param_name'",
+ file=sys.stderr,
+ )
+ sys.exit(1)
+ dfatool.plotter.plot_param(
+ model,
+ state_or_trans,
+ attribute,
+ model.param_index(param_name),
+ title=state_or_trans,
+ ylabel=attribute,
+ xlabel=param_name,
+ output=f"{state_or_trans}-{attribute}-{param_name}.pdf",
+ show=not args.non_interactive,
+ )
+
+
+if __name__ == "__main__":
+ main()
diff --git a/bin/extract-kernel-ws.py b/bin/extract-kernel-ws.py
new file mode 100755
index 0000000..9f263e2
--- /dev/null
+++ b/bin/extract-kernel-ws.py
@@ -0,0 +1,166 @@
+#!/usr/bin/env python3
+
+import argparse
+import numpy as np
+import sys
+import logging
+
+
+def main():
+ parser = argparse.ArgumentParser(
+ formatter_class=argparse.RawDescriptionHelpFormatter, description=__doc__
+ )
+ parser.add_argument(
+ "--log-level",
+ metavar="LEVEL",
+ choices=["debug", "info", "warning", "error"],
+ default="warning",
+ help="Set log level",
+ )
+ parser.add_argument(
+ "--output-format",
+ metavar="FORMAT",
+ choices=["dfatool", "valgrind-ws"],
+ default="dfatool",
+ help="Set output format",
+ )
+ parser.add_argument(
+ "benchmark_file",
+ type=str,
+ help="Benchmark file used to run valgrind-ws",
+ )
+ parser.add_argument(
+ "ws_output",
+ type=str,
+ help="valgrind-ws output file",
+ )
+
+ args = parser.parse_args()
+ benchmark_filename = args.benchmark_file.split("/")[-1]
+
+ if args.log_level:
+ numeric_level = getattr(logging, args.log_level.upper(), None)
+ if not isinstance(numeric_level, int):
+ print(f"Invalid log level: {args.log_level}", file=sys.stderr)
+ sys.exit(1)
+ logging.basicConfig(
+ level=numeric_level,
+ format="{asctime} {levelname}:{name}:{message}",
+ style="{",
+ )
+
+ with open(args.benchmark_file, "r") as f:
+ start_range = [None, None]
+ end_range = [None, None]
+ in_nop = False
+ for lineno, line in enumerate(f):
+ line = line.strip()
+ if line == "#if NOP_SYNC":
+ in_nop = True
+ if start_range[0] is None:
+ start_range[0] = lineno
+ else:
+ end_range[0] = lineno
+ if in_nop and line.startswith("#endif"):
+ in_nop = False
+ if start_range[1] is None:
+ start_range[1] = lineno
+ else:
+ end_range[1] = lineno
+
+ logging.debug(f"start_range = {start_range}, end_range = {end_range}")
+
+ page_size = None
+ ws_log = list()
+ sample_info = dict()
+ with open(args.ws_output, "r") as f:
+ in_ws_log = False
+ in_sample_info = False
+ for line in f:
+ line = line.strip()
+ if in_ws_log and line == "":
+ in_ws_log = False
+ if in_sample_info and line == "":
+ in_sample_info = False
+ if page_size is None and line.startswith("Page size:"):
+ page_size = int(line.split()[2])
+
+ if in_ws_log:
+ t, wss_i, wss_d, info_ref = line.split()
+ ws_log.append((int(t), int(wss_i), int(wss_d), info_ref))
+ elif in_sample_info:
+ _, info_ref, _, locs = line.split()
+ info_ref = info_ref.removesuffix("]")
+ locs = locs.removeprefix("loc=")
+ sample_info[info_ref] = list()
+ for loc in filter(lambda x: len(x), locs.split("|")):
+ filename, lineno = loc.split(":")
+ sample_info[info_ref].append((filename, int(lineno)))
+
+ if line == "t WSS_insn WSS_data info":
+ in_ws_log = True
+ if line == "Sample info:":
+ in_sample_info = True
+
+ if page_size is None:
+ raise RuntimeError("Unable to determine page size fom {args.ws_output}")
+
+ logging.debug(f"sample_info = {sample_info}")
+ next_in_kernel = False
+ in_kernel = False
+ insn_working_set_sizes = list()
+ data_working_set_sizes = list()
+ kernel_range = [None, None]
+ for t, wss_i, wss_d, info_ref in ws_log:
+ if next_in_kernel:
+ next_in_kernel = False
+ in_kernel = True
+ kernel_range[0] = t
+
+ if info_ref != "-":
+ for filename, lineno in sample_info[info_ref]:
+ if (
+ filename == benchmark_filename
+ and start_range[0] <= lineno <= start_range[1]
+ ):
+ next_in_kernel = True
+ elif (
+ filename == benchmark_filename
+ and end_range[0] <= lineno <= end_range[1]
+ ):
+ in_kernel = False
+
+ if in_kernel:
+ data_working_set_sizes.append(wss_d * page_size)
+ insn_working_set_sizes.append(wss_i * page_size)
+ kernel_range[1] = t
+
+ if args.output_format == "dfatool":
+ print(
+ f"wss_data_mean_bytes={np.mean(data_working_set_sizes)}"
+ + f" wss_data_median_bytes={np.median(data_working_set_sizes)}"
+ + f" wss_data_stddev={np.std(data_working_set_sizes)}"
+ + f" wss_insn_mean_bytes={np.mean(insn_working_set_sizes)}"
+ + f" wss_insn_median_bytes={np.median(insn_working_set_sizes)}"
+ + f" wss_insn_stddev={np.std(insn_working_set_sizes)}"
+ )
+ elif args.output_format == "valgrind-ws":
+ with open(args.ws_output, "r") as f:
+ in_ws_log = False
+ for line in f:
+ if in_ws_log and line.strip() == "":
+ in_ws_log = False
+
+ if in_ws_log:
+ ts = int(line.strip().split()[0])
+ if kernel_range[0] <= ts <= kernel_range[1]:
+ print(line, end="")
+ else:
+ print(line, end="")
+
+ if line.strip() == "t WSS_insn WSS_data info":
+ in_ws_log = True
+
+
+if __name__ == "__main__":
+ main()
diff --git a/bin/extract-speedup-from-log.py b/bin/extract-speedup-from-log.py
new file mode 100755
index 0000000..3537ec3
--- /dev/null
+++ b/bin/extract-speedup-from-log.py
@@ -0,0 +1,197 @@
+#!/usr/bin/env python3
+
+"""extract-speedup-from-log - Determine speedup from dfatool log files
+
+foo
+"""
+
+import argparse
+import dfatool.cli
+import dfatool.utils
+import logging
+import numpy as np
+import sys
+from dfatool.loader import Logfile, CSVfile
+from dfatool.model import AnalyticModel
+from functools import reduce
+
+
+def parse_logfile(filename):
+ if ".csv" in filename:
+ loader = CSVfile()
+ else:
+ loader = Logfile()
+
+ if filename.endswith("xz"):
+ import lzma
+
+ with lzma.open(filename, "rt") as f:
+ return loader.load(f)
+ with open(filename, "r") as f:
+ return loader.load(f)
+
+
+def main():
+ parser = argparse.ArgumentParser(
+ formatter_class=argparse.RawDescriptionHelpFormatter, description=__doc__
+ )
+ parser.add_argument(
+ "--add-param",
+ metavar="<param>=<value>[ <param>=<value> ...]",
+ type=str,
+ help="Add additional parameter specifications to output lines",
+ )
+ parser.add_argument(
+ "--filter-param",
+ metavar="<parameter name><condition>[;<parameter name><condition>...]",
+ type=str,
+ help="Only consider measurements where <parameter name> satisfies <condition>. "
+ "<condition> may be <operator><parameter value> with operator being < / <= / = / >= / >, "
+ "or ∈<parameter value>[,<parameter value>...]. "
+ "All other measurements (including those where it is None, that is, has not been set yet) are discarded. "
+ "Note that this may remove entire function calls from the model.",
+ )
+ parser.add_argument(
+ "--ignore-param",
+ metavar="<parameter name>[,<parameter name>,...]",
+ type=str,
+ help="Ignore listed parameters during model generation",
+ )
+ parser.add_argument(
+ "--log-level",
+ metavar="LEVEL",
+ choices=["debug", "info", "warning", "error"],
+ default="warning",
+ help="Set log level",
+ )
+ parser.add_argument(
+ "numerator",
+ type=str,
+ help="numerator parameters",
+ )
+ parser.add_argument(
+ "denominator",
+ type=str,
+ help="denominator parameters",
+ )
+ parser.add_argument(
+ "observation",
+ type=str,
+ help="observation (key:attribute) used for speedup calculation",
+ )
+ parser.add_argument(
+ "logfiles",
+ nargs="+",
+ type=str,
+ help="Path to benchmark output (.txt or .txt.xz)",
+ )
+ args = parser.parse_args()
+
+ if args.log_level:
+ numeric_level = getattr(logging, args.log_level.upper(), None)
+ if not isinstance(numeric_level, int):
+ print(f"Invalid log level: {args.log_level}", file=sys.stderr)
+ sys.exit(1)
+ logging.basicConfig(
+ level=numeric_level,
+ format="{asctime} {levelname}:{name}:{message}",
+ style="{",
+ )
+
+ observations = reduce(lambda a, b: a + b, map(parse_logfile, args.logfiles))
+ by_name_num, parameter_names_num = dfatool.utils.observations_to_by_name(
+ observations
+ )
+ by_name_denom, parameter_names_denom = dfatool.utils.observations_to_by_name(
+ observations
+ )
+ del observations
+
+ if args.filter_param:
+ args.filter_param = list(
+ map(
+ lambda entry: dfatool.cli.parse_filter_string(
+ entry, parameter_names=parameter_names_num
+ ),
+ args.filter_param.split(";"),
+ )
+ )
+ else:
+ args.filter_param = list()
+
+ filter_num = list(
+ map(
+ lambda entry: dfatool.cli.parse_filter_string(
+ entry, parameter_names=parameter_names_num
+ ),
+ args.numerator.split(";"),
+ )
+ )
+
+ filter_denom = list(
+ map(
+ lambda entry: dfatool.cli.parse_filter_string(
+ entry, parameter_names=parameter_names_denom
+ ),
+ args.denominator.split(";"),
+ )
+ )
+
+ filter_num += args.filter_param
+ filter_denom += args.filter_param
+
+ ignore_num = list(map(lambda x: x[0], filter_num))
+ ignore_denom = list(map(lambda x: x[0], filter_denom))
+ assert ignore_num == ignore_denom
+
+ if args.ignore_param:
+ args.ignore_param = args.ignore_param.split(";")
+ ignore_num += args.ignore_param
+ ignore_denom += args.ignore_param
+
+ dfatool.utils.filter_aggregate_by_param(
+ by_name_num, parameter_names_num, filter_num
+ )
+ dfatool.utils.filter_aggregate_by_param(
+ by_name_denom, parameter_names_denom, filter_denom
+ )
+ dfatool.utils.ignore_param(by_name_num, parameter_names_num, ignore_num)
+ dfatool.utils.ignore_param(by_name_denom, parameter_names_denom, ignore_denom)
+
+ model_num = AnalyticModel(
+ by_name_num,
+ parameter_names_num,
+ compute_stats=False,
+ )
+
+ model_denom = AnalyticModel(
+ by_name_denom,
+ parameter_names_denom,
+ compute_stats=False,
+ )
+
+ for param_key in model_num.get_by_param().keys():
+ name, params = param_key
+ num_data = model_num.get_by_param().get(param_key).get(args.observation)
+ try:
+ denom_data = model_denom.get_by_param().get(param_key).get(args.observation)
+ except AttributeError:
+ logging.error(f"Cannot find numerator param {param_key} in denominator")
+ logging.error(f"Parameter names == {tuple(parameter_names_num)}")
+ logging.error("You may need to adjust --ignore-param")
+ sys.exit(1)
+ if num_data and denom_data:
+ param_str = " ".join(
+ map(
+ lambda i: f"{parameter_names_num[i]}={params[i]}",
+ range(len(params)),
+ )
+ )
+ if args.add_param is not None:
+ param_str += " " + args.add_param
+ for speedup in np.array(num_data) / np.array(denom_data):
+ print(f"[::] {name} | {param_str} | speedup={speedup}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/bin/kstest.py b/bin/kstest.py
new file mode 100755
index 0000000..61951eb
--- /dev/null
+++ b/bin/kstest.py
@@ -0,0 +1,31 @@
+#!/usr/bin/env python3
+
+import json
+import sys
+from scipy.stats import kstest
+
+
+def main(pvalue, file1, file2, macro=None):
+ with open(file1, "r") as f:
+ data1 = json.load(f)
+ with open(file2, "r") as f:
+ data2 = json.load(f)
+ result = kstest(data1, data2)
+
+ pvalue = float(pvalue)
+
+ print(f"% {result}")
+ if macro is None:
+ print(r"\drefset{ttest/pvalue}{" + str(result.pvalue) + "}")
+ if result.pvalue < pvalue:
+ if macro:
+ print("\\def\\" + macro + "{$p < " + f"{pvalue:0.2f}" + "$}")
+ sys.exit(0)
+ else:
+ if macro:
+ print("\\def\\" + macro + "{$p \\ge " + f"{pvalue:0.2f}" + "$}")
+ sys.exit(1)
+
+
+if __name__ == "__main__":
+ main(*sys.argv[1:])
diff --git a/bin/perf-stat-to-dfatool.py b/bin/perf-stat-to-dfatool.py
new file mode 100755
index 0000000..01f568f
--- /dev/null
+++ b/bin/perf-stat-to-dfatool.py
@@ -0,0 +1,40 @@
+#!/usr/bin/env python3
+
+import argparse
+import json
+import sys
+
+
+def main():
+ metric = dict()
+ for line in sys.stdin:
+ line = line.strip()
+ data = json.loads(line)
+
+ count = int(float(data["counter-value"]))
+ label = data["event"]
+
+ if data["metric-unit"] != "(null)":
+ extra = float(data["metric-value"])
+ extra_label = data["metric-unit"]
+ else:
+ extra = None
+ extra_label = None
+
+ metric[label] = (count, extra, extra_label)
+
+ buf = ""
+ for key in sorted(metric.keys()):
+ count, extra, extra_label = metric[key]
+ buf += f" {key}={count}"
+ if extra_label is not None:
+ if extra_label.startswith("of all"):
+ label = extra_label.replace(" ", "-")
+ buf += f" {key}-percentage-{label}={extra}"
+ else:
+ buf += f" {key}-metric={extra}"
+ print(buf)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/bin/pta-workload.py b/bin/pta-workload.py
new file mode 100755
index 0000000..19a7378
--- /dev/null
+++ b/bin/pta-workload.py
@@ -0,0 +1,92 @@
+#!/usr/bin/env python3
+
+import sys
+from dfatool.automata import PTA
+from dfatool.utils import human_readable
+from dfatool.lex import TimedSequence, TimedWord, Workload
+
+args = sys.argv[1:]
+
+loops = dict()
+ptafiles = list()
+loop_names = set()
+
+
+def simulate_word(timedword):
+ prev_state = "UNINITIALIZED"
+ prev_param = None
+ ret = dict()
+ for trace_part in timedword:
+ print("Trace Part {}".format(trace_part))
+ if type(trace_part) is TimedWord:
+ result = pta.simulate(
+ trace_part, orig_state=prev_state, orig_param=prev_param
+ )
+ elif type(trace_part) is Workload:
+ result = pta.simulate(
+ trace_part.word, orig_state=prev_state, orig_param=prev_param
+ )
+ if prev_state != result.end_state:
+ print(
+ "Warning: loop starts in state {}, but terminates in {}".format(
+ prev_state, result.end_state.name
+ )
+ )
+ if prev_param != result.parameters:
+ print(
+ "Warning: loop starts with parameters {}, but terminates with {}".format(
+ prev_param, result.parameters
+ )
+ )
+ ret[trace_part.name] = result
+ loop_names.add(trace_part.name)
+
+ print(" Duration: " + human_readable(result.duration, "s"))
+ if result.duration_mae:
+ print(
+ u" ± {} / {:.0f}%".format(
+ human_readable(result.duration_mae, "s"), result.duration_mape
+ )
+ )
+ print(" Energy: " + human_readable(result.energy, "J"))
+ if result.energy_mae:
+ print(
+ u" ± {} / {:.0f}%".format(
+ human_readable(result.energy_mae, "J"), result.energy_mape
+ )
+ )
+ print(" Mean Power: " + human_readable(result.mean_power, "W"))
+ print("")
+
+ prev_state = result.end_state
+ prev_param = result.parameters
+
+ return ret
+
+
+for i in range(len(args) // 2):
+ ptafile, raw_word = args[i * 2], args[i * 2 + 1]
+ ptafiles.append(ptafile)
+ pta = PTA.from_file(ptafile)
+ timedword = TimedSequence(raw_word)
+ print("Input: {}\n".format(timedword))
+ loops[ptafile] = simulate_word(timedword)
+
+for loop_name in sorted(loop_names):
+ result_set = list()
+ total_power = 0
+ for ptafile in sorted(ptafiles):
+ if loop_name in loops[ptafile]:
+ result_set.append(loops[ptafile][loop_name])
+ total_power += loops[ptafile][loop_name].mean_power
+ print(
+ "{}: total mean power is {}".format(loop_name, human_readable(total_power, "W"))
+ )
+ for i, result in enumerate(result_set):
+ print(
+ " {:.0f}% {} (period: {})".format(
+ result.mean_power * 100 / total_power,
+ ptafiles[i],
+ human_readable(result.duration, "s"),
+ )
+ )
diff --git a/bin/run-with-rapl b/bin/run-with-rapl
new file mode 100755
index 0000000..54d2d9c
--- /dev/null
+++ b/bin/run-with-rapl
@@ -0,0 +1,32 @@
+#!/bin/sh
+
+DFATOOL="$(dirname "$0")/.."
+
+if test -z "${COUNTERS}"; then
+ COUNTERS="$(ls -1 /sys/class/powercap)"
+fi
+
+NAMES=
+UJ_FILES=
+for counter in ${COUNTERS}; do
+ if test -e /sys/class/powercap/${counter}/name && test -e /sys/class/powercap/${counter}/energy_uj; then
+ NAMES="${NAMES} $(cat /sys/class/powercap/${counter}/name)_${counter} "
+ UJ_FILES="${UJ_FILES} /sys/class/powercap/${counter}/energy_uj"
+ fi
+done
+
+if ! cat ${UJ_FILES} > /dev/null; then
+ echo "Unable to read all counters (${UJ_FILES})" >&2
+ echo "You may need to run sudo chmod a+r /sys/class/powercap/*/energy_uj" >&2
+ exit 1
+fi
+
+OUTPUT=$(mktemp)
+
+RAPL_START=$(cat ${UJ_FILES})
+3>${OUTPUT} perf stat -x, -e duration_time --log-fd 3 "$@"
+RAPL_END=$(cat ${UJ_FILES})
+
+"${DFATOOL}/libexec/rapl-to-dfatool.py" "$(cat ${OUTPUT})" "${NAMES}" "${RAPL_START}" "${RAPL_END}"
+
+rm -f ${OUTPUT}
diff --git a/bin/ttest-ind.py b/bin/ttest-ind.py
new file mode 100755
index 0000000..ebed6af
--- /dev/null
+++ b/bin/ttest-ind.py
@@ -0,0 +1,31 @@
+#!/usr/bin/env python3
+
+import json
+import sys
+from scipy.stats import ttest_ind
+
+
+def main(pvalue, file1, file2, macro=None):
+ with open(file1, "r") as f:
+ data1 = json.load(f)
+ with open(file2, "r") as f:
+ data2 = json.load(f)
+ result = ttest_ind(data1, data2)
+
+ pvalue = float(pvalue)
+
+ print(f"% {result}")
+ if macro is None:
+ print(r"\drefset{ttest/pvalue}{" + str(result.pvalue) + "}")
+ if result.pvalue < pvalue:
+ if macro:
+ print("\\def\\" + macro + "{$p < " + f"{pvalue:0.2f}" + "$}")
+ sys.exit(0)
+ else:
+ if macro:
+ print("\\def\\" + macro + "{$p \\ge " + f"{pvalue:0.2f}" + "$}")
+ sys.exit(1)
+
+
+if __name__ == "__main__":
+ main(*sys.argv[1:])
diff --git a/bin/workload.py b/bin/workload.py
index 19a7378..72b66bb 100755
--- a/bin/workload.py
+++ b/bin/workload.py
@@ -1,92 +1,93 @@
#!/usr/bin/env python3
+import argparse
+import json
+import logging
import sys
-from dfatool.automata import PTA
-from dfatool.utils import human_readable
-from dfatool.lex import TimedSequence, TimedWord, Workload
+import dfatool.cli
+import dfatool.utils
+from dfatool.behaviour import EventSequenceModel
+from dfatool.model import AnalyticModel
-args = sys.argv[1:]
-
-loops = dict()
-ptafiles = list()
-loop_names = set()
+def main():
+ parser = argparse.ArgumentParser(
+ formatter_class=argparse.RawDescriptionHelpFormatter, description=__doc__
+ )
+ parser.add_argument("--aggregate", choices=["sum"], default="sum")
+ parser.add_argument("--aggregate-unit", choices=["s", "B/s"], default="s")
+ parser.add_argument(
+ "--aggregate-init",
+ default=0,
+ type=float,
+ )
+ parser.add_argument(
+ "--log-level",
+ metavar="LEVEL",
+ choices=["debug", "info", "warning", "error"],
+ default="warning",
+ help="Set log level",
+ )
+ parser.add_argument("--normalize-output", type=str)
+ parser.add_argument(
+ "--info",
+ action="store_true",
+ help="Show benchmark information (number of measurements, parameter values, ...)",
+ )
+ parser.add_argument(
+ "--models",
+ nargs="+",
+ type=str,
+ help="Path to model file (.json or .json.xz)",
+ )
+ parser.add_argument(
+ "--use-lut",
+ action="store_true",
+ help="Use LUT rather than performance model for prediction",
+ )
+ parser.add_argument("event", nargs="+", type=str)
+ args = parser.parse_args()
-def simulate_word(timedword):
- prev_state = "UNINITIALIZED"
- prev_param = None
- ret = dict()
- for trace_part in timedword:
- print("Trace Part {}".format(trace_part))
- if type(trace_part) is TimedWord:
- result = pta.simulate(
- trace_part, orig_state=prev_state, orig_param=prev_param
- )
- elif type(trace_part) is Workload:
- result = pta.simulate(
- trace_part.word, orig_state=prev_state, orig_param=prev_param
- )
- if prev_state != result.end_state:
- print(
- "Warning: loop starts in state {}, but terminates in {}".format(
- prev_state, result.end_state.name
- )
- )
- if prev_param != result.parameters:
- print(
- "Warning: loop starts with parameters {}, but terminates with {}".format(
- prev_param, result.parameters
- )
- )
- ret[trace_part.name] = result
- loop_names.add(trace_part.name)
+ if args.log_level:
+ numeric_level = getattr(logging, args.log_level.upper(), None)
+ if not isinstance(numeric_level, int):
+ print(f"Invalid log level: {args.log_level}", file=sys.stderr)
+ sys.exit(1)
+ logging.basicConfig(
+ level=numeric_level,
+ format="{asctime} {levelname}:{name}:{message}",
+ style="{",
+ )
- print(" Duration: " + human_readable(result.duration, "s"))
- if result.duration_mae:
- print(
- u" ± {} / {:.0f}%".format(
- human_readable(result.duration_mae, "s"), result.duration_mape
- )
- )
- print(" Energy: " + human_readable(result.energy, "J"))
- if result.energy_mae:
- print(
- u" ± {} / {:.0f}%".format(
- human_readable(result.energy_mae, "J"), result.energy_mape
- )
- )
- print(" Mean Power: " + human_readable(result.mean_power, "W"))
- print("")
+ models = list()
+ for model_file in args.models:
+ with open(model_file, "r") as f:
+ models.append(AnalyticModel.from_json(json.load(f)))
- prev_state = result.end_state
- prev_param = result.parameters
+ if args.info:
+ for i in range(len(models)):
+ print(f"""{args.models[i]}: {" ".join(models[i].parameters)}""")
+ _, param_info = models[i].get_fitted()
+ for name in models[i].names:
+ for attr in models[i].attributes(name):
+ print(f" {name}.{attr} {param_info(name, attr)}")
- return ret
+ workload = EventSequenceModel(models)
+ aggregate = workload.eval_strs(
+ args.event,
+ aggregate=args.aggregate,
+ aggregate_init=args.aggregate_init,
+ use_lut=args.use_lut,
+ )
+ if args.normalize_output:
+ sf = dfatool.cli.parse_shift_function(
+ "--normalize-output", args.normalize_output
+ )
+ print(dfatool.utils.human_readable(sf(aggregate), args.aggregate_unit))
+ else:
+ print(dfatool.utils.human_readable(aggregate, args.aggregate_unit))
-for i in range(len(args) // 2):
- ptafile, raw_word = args[i * 2], args[i * 2 + 1]
- ptafiles.append(ptafile)
- pta = PTA.from_file(ptafile)
- timedword = TimedSequence(raw_word)
- print("Input: {}\n".format(timedword))
- loops[ptafile] = simulate_word(timedword)
-for loop_name in sorted(loop_names):
- result_set = list()
- total_power = 0
- for ptafile in sorted(ptafiles):
- if loop_name in loops[ptafile]:
- result_set.append(loops[ptafile][loop_name])
- total_power += loops[ptafile][loop_name].mean_power
- print(
- "{}: total mean power is {}".format(loop_name, human_readable(total_power, "W"))
- )
- for i, result in enumerate(result_set):
- print(
- " {:.0f}% {} (period: {})".format(
- result.mean_power * 100 / total_power,
- ptafiles[i],
- human_readable(result.duration, "s"),
- )
- )
+if __name__ == "__main__":
+ main()
diff --git a/doc/analysis-logs.md b/doc/analysis-logs.md
index f787a18..4b58350 100644
--- a/doc/analysis-logs.md
+++ b/doc/analysis-logs.md
@@ -1,7 +1,7 @@
# Data Analysis and Performance Model Generation from Log Files
-Here, dfatool works with lines of the form "`[::]` *Key* *Attribute* | *parameters* | *NFP values*", where *parameters* is a space-separated series of *param=value* entries (i.e., benchmark configuration) and *NFP values* is a space-separate series of *NFP=value* entries (i.e., benchmark output).
-All measurements of a given *Key* *Attribute* combination must use the same set of NFP names.
-Parameter names may be different -- parameters that are present in other lines of the same *Key* *Attribute* will be treated as undefined in those lines where they are missing.
+Here, dfatool works with lines of the form "`[::]` *Key* | *parameters* | *NFP values*", where *parameters* is a space-separated series of *param=value* entries (i.e., benchmark configuration) and *NFP values* is a space-separate series of *NFP=value* entries (i.e., benchmark output).
+All measurements of a given *Key* combination must use the same set of NFP names.
+Parameter names may be different -- parameters that are present in other lines of the same *Key* will be treated as undefined in those lines where they are missing.
Use `bin/analyze-log.py file1.txt file2.txt ...` for analysis.
diff --git a/lib/behaviour.py b/lib/behaviour.py
new file mode 100644
index 0000000..136a55e
--- /dev/null
+++ b/lib/behaviour.py
@@ -0,0 +1,388 @@
+#!/usr/bin/env python3
+
+import logging
+from . import utils
+from .model import AnalyticModel
+from . import functions as df
+
+logger = logging.getLogger(__name__)
+
+
+class SDKBehaviourModel:
+
+ def __init__(self, observations, annotations):
+
+ meta_observations = list()
+ delta_by_name = dict()
+ delta_param_by_name = dict()
+ is_loop = dict()
+
+ for annotation in annotations:
+ # annotation.start.param may be incomplete, for instance in cases
+ # where DPUs are allocated before the input file is loadeed (and
+ # thus before the problem size is known).
+ # However, annotation.end.param may also differ from annotation.start.param (it should not, but that's how some benchmarks roll).
+ # So, we use annotation.start.param if it has the same keys as annotation.end.param, and annotation.end.param otherwise
+ if sorted(annotation.start.param.keys()) == sorted(
+ annotation.end.param.keys()
+ ):
+ am_tt_param_names = sorted(annotation.start.param.keys())
+ else:
+ am_tt_param_names = sorted(annotation.end.param.keys())
+ if annotation.name not in delta_by_name:
+ delta_by_name[annotation.name] = dict()
+ delta_param_by_name[annotation.name] = dict()
+ _, _, meta_obs, _is_loop = self.learn_pta(
+ observations,
+ annotation,
+ delta_by_name[annotation.name],
+ delta_param_by_name[annotation.name],
+ )
+ meta_observations += meta_obs
+ is_loop.update(_is_loop)
+
+ self.am_tt_param_names = am_tt_param_names
+ self.delta_by_name = delta_by_name
+ self.delta_param_by_name = delta_param_by_name
+ self.meta_observations = meta_observations
+ self.is_loop = is_loop
+
+ self.build_transition_guards()
+
+ def build_transition_guards(self):
+ self.transition_guard = dict()
+ for name in sorted(self.delta_by_name.keys()):
+ for t_from, t_to_set in self.delta_by_name[name].items():
+ i_to_transition = dict()
+ delta_param_sets = list()
+ to_names = list()
+ transition_guard = dict()
+
+ if len(t_to_set) > 1:
+ am_tt_by_name = {
+ name: {
+ "attributes": [t_from],
+ "param": list(),
+ t_from: list(),
+ },
+ }
+ for i, t_to in enumerate(sorted(t_to_set)):
+ for param in self.delta_param_by_name[name][(t_from, t_to)]:
+ am_tt_by_name[name]["param"].append(
+ utils.param_dict_to_list(
+ utils.param_str_to_dict(param),
+ self.am_tt_param_names,
+ )
+ )
+ am_tt_by_name[name][t_from].append(i)
+ i_to_transition[i] = t_to
+ am = AnalyticModel(
+ am_tt_by_name, self.am_tt_param_names, force_tree=True
+ )
+ model, info = am.get_fitted()
+ if type(info(name, t_from)) is df.SplitFunction:
+ flat_model = info(name, t_from).flatten()
+ else:
+ flat_model = list()
+ logger.warning(
+ f"Model for {name} {t_from} is {info(name, t_from)}, expected SplitFunction"
+ )
+
+ for prefix, output in flat_model:
+ transition_name = i_to_transition[int(output)]
+ if transition_name not in transition_guard:
+ transition_guard[transition_name] = list()
+ transition_guard[transition_name].append(prefix)
+
+ self.transition_guard[t_from] = transition_guard
+
+ def get_trace(self, name, param_dict):
+ delta = self.delta_by_name[name]
+ current_state = "__init__"
+ trace = [current_state]
+ states_seen = set()
+ while current_state != "__end__":
+ next_states = delta[current_state]
+
+ states_seen.add(current_state)
+ next_states = list(filter(lambda q: q not in states_seen, next_states))
+
+ if len(next_states) == 0:
+ raise RuntimeError(
+ f"get_trace({name}, {param_dict}): found infinite loop at {trace}"
+ )
+
+ if len(next_states) > 1 and self.transition_guard[current_state]:
+ matching_next_states = list()
+ for candidate in next_states:
+ for condition in self.transition_guard[current_state][candidate]:
+ valid = True
+ for key, value in condition:
+ if param_dict[key] != value:
+ valid = False
+ break
+ if valid:
+ matching_next_states.append(candidate)
+ break
+ next_states = matching_next_states
+
+ if len(next_states) == 0:
+ raise RuntimeError(
+ f"get_trace({name}, {param_dict}): found no valid outbound transitions at {trace}, candidates {self.transition_guard[current_state]}"
+ )
+ if len(next_states) > 1:
+ raise RuntimeError(
+ f"get_trace({name}, {param_dict}): found non-deterministic outbound transitions {next_states} at {trace}"
+ )
+
+ (next_state,) = next_states
+
+ trace.append(next_state)
+ current_state = next_state
+
+ return trace
+
+ def learn_pta(self, observations, annotation, delta=dict(), delta_param=dict()):
+ prev_i = annotation.start.offset
+ prev = "__init__"
+ prev_non_kernel = prev
+ meta_observations = list()
+ n_seen = dict()
+
+ total_latency_us = 0
+
+ if sorted(annotation.start.param.keys()) == sorted(annotation.end.param.keys()):
+ param_dict = annotation.start.param
+ else:
+ param_dict = annotation.end.param
+ param_str = utils.param_dict_to_str(param_dict)
+
+ if annotation.kernels:
+ # ggf. als dict of tuples, für den Fall dass Schleifen verschieden iterieren können?
+ for i in range(prev_i, annotation.kernels[0].offset):
+ this = observations[i]["name"] + " @ " + observations[i]["place"]
+
+ if this in n_seen:
+ if n_seen[this] == 1:
+ logger.debug(
+ f"Loop found in {annotation.start.name} {param_dict}: {this} ⟳"
+ )
+ n_seen[this] += 1
+ else:
+ n_seen[this] = 1
+
+ if not prev in delta:
+ delta[prev] = set()
+ delta[prev].add(this)
+
+ if not (prev, this) in delta_param:
+ delta_param[(prev, this)] = set()
+ delta_param[(prev, this)].add(param_str)
+
+ prev = this
+ prev_i = i + 1
+
+ total_latency_us += observations[i]["attribute"].get("latency_us", 0)
+
+ meta_observations.append(
+ {
+ "name": f"__trace__ {this}",
+ "param": param_dict,
+ "attribute": dict(
+ filter(
+ lambda kv: not kv[0].startswith("e_"),
+ observations[i]["param"].items(),
+ )
+ ),
+ }
+ )
+ prev_non_kernel = prev
+
+ for kernel in annotation.kernels:
+ prev = prev_non_kernel
+ for i in range(prev_i, kernel.offset):
+ this = observations[i]["name"] + " @ " + observations[i]["place"]
+
+ if not prev in delta:
+ delta[prev] = set()
+ delta[prev].add(this)
+
+ if not (prev, this) in delta_param:
+ delta_param[(prev, this)] = set()
+ delta_param[(prev, this)].add(param_str)
+
+ # The last iteration (next block) contains a single kernel,
+ # so we do not increase total_latency_us here.
+ # However, this means that we will only ever get one latency
+ # value for each set of kernels with a common problem size,
+ # despite potentially having far more data at our fingertips.
+ # We could provide one total_latency_us for each kernel
+ # (by combining start latency + kernel latency + teardown latency),
+ # but for that we first need to distinguish between kernel
+ # components and teardown components in the following block.
+
+ prev = this
+ prev_i = i + 1
+
+ meta_observations.append(
+ {
+ "name": f"__trace__ {this}",
+ "param": param_dict,
+ "attribute": dict(
+ filter(
+ lambda kv: not kv[0].startswith("e_"),
+ observations[i]["param"].items(),
+ )
+ ),
+ }
+ )
+
+ # There is no kernel end signal in the underlying data, so the last iteration also contains a kernel run.
+ prev = prev_non_kernel
+ for i in range(prev_i, annotation.end.offset):
+ this = observations[i]["name"] + " @ " + observations[i]["place"]
+
+ if this in n_seen:
+ if n_seen[this] == 1:
+ logger.debug(
+ f"Loop found in {annotation.start.name} {param_dict}: {this} ⟳"
+ )
+ n_seen[this] += 1
+ else:
+ n_seen[this] = 1
+
+ if not prev in delta:
+ delta[prev] = set()
+ delta[prev].add(this)
+
+ if not (prev, this) in delta_param:
+ delta_param[(prev, this)] = set()
+ delta_param[(prev, this)].add(param_str)
+
+ total_latency_us += observations[i]["attribute"].get("latency_us", 0)
+
+ prev = this
+
+ meta_observations.append(
+ {
+ "name": f"__trace__ {this}",
+ "param": param_dict,
+ "attribute": dict(
+ filter(
+ lambda kv: not kv[0].startswith("e_"),
+ observations[i]["param"].items(),
+ )
+ ),
+ }
+ )
+
+ if not prev in delta:
+ delta[prev] = set()
+ delta[prev].add("__end__")
+ if not (prev, "__end__") in delta_param:
+ delta_param[(prev, "__end__")] = set()
+ delta_param[(prev, "__end__")].add(param_str)
+
+ for transition, count in n_seen.items():
+ meta_observations.append(
+ {
+ "name": f"__loop__ {transition}",
+ "param": param_dict,
+ "attribute": {"n_iterations": count},
+ }
+ )
+
+ if total_latency_us:
+ meta_observations.append(
+ {
+ "name": annotation.start.name,
+ "param": param_dict,
+ "attribute": {"latency_us": total_latency_us},
+ }
+ )
+
+ is_loop = dict(
+ map(lambda kv: (kv[0], True), filter(lambda kv: kv[1] > 1, n_seen.items()))
+ )
+
+ return delta, delta_param, meta_observations, is_loop
+
+
+class EventSequenceModel:
+ def __init__(self, models):
+ self.models = models
+
+ def _event_normalizer(self, event):
+ event_normalizer = lambda p: p
+ if "/" in event:
+ v1, v2 = event.split("/")
+ if utils.is_numeric(v1):
+ event = v2.strip()
+ event_normalizer = lambda p: utils.soft_cast_float(v1) / p
+ elif utils.is_numeric(v2):
+ event = v1.strip()
+ event_normalizer = lambda p: p / utils.soft_cast_float(v2)
+ else:
+ raise RuntimeError(f"Cannot parse '{event}'")
+ return event, event_normalizer
+
+ def eval_strs(self, events, aggregate="sum", aggregate_init=0, use_lut=False):
+ for event in events:
+ event, event_normalizer = self._event_normalizer(event)
+ nn, param = event.split("(")
+ name, action = nn.split(".")
+ param_model = None
+ ref_model = None
+
+ for model in self.models:
+ if name in model.names and action in model.attributes(name):
+ ref_model = model
+ if use_lut:
+ param_model = model.get_param_lut(allow_none=True)
+ else:
+ param_model, param_info = model.get_fitted()
+ break
+
+ if param_model is None:
+ raise RuntimeError(f"Did not find a model for {name}.{action}")
+
+ param = param.removesuffix(")")
+ if param == "":
+ param = dict()
+ else:
+ param = utils.parse_conf_str(param)
+
+ param_list = utils.param_dict_to_list(param, ref_model.parameters)
+
+ if not use_lut and not param_info(name, action).is_predictable(param_list):
+ logger.warning(
+ f"Cannot predict {name}.{action}({param}), falling back to static model"
+ )
+
+ try:
+ event_output = event_normalizer(
+ param_model(
+ name,
+ action,
+ param=param_list,
+ )
+ )
+ except KeyError:
+ if use_lut:
+ logger.error(
+ f"Cannot predict {name}.{action}({param}) from LUT model"
+ )
+ else:
+ logger.error(f"Cannot predict {name}.{action}({param}) from model")
+ raise
+ except TypeError:
+ if not use_lut:
+ logger.error(f"Cannot predict {name}.{action}({param}) from model")
+ raise
+
+ if aggregate == "sum":
+ aggregate_init += event_output
+ else:
+ raise RuntimeError(f"Unknown aggregate type: {aggregate}")
+
+ return aggregate_init
diff --git a/lib/cli.py b/lib/cli.py
index 310314b..b68f548 100644
--- a/lib/cli.py
+++ b/lib/cli.py
@@ -1,7 +1,6 @@
#!/usr/bin/env python3
import dfatool.functions as df
-import dfatool.plotter
import logging
import numpy as np
import os
@@ -14,7 +13,11 @@ def sanity_check(args):
pass
-def print_static(model, static_model, name, attribute, with_dependence=False):
+def print_static(
+ model, static_model, name, attribute, with_dependence=False, precision=2
+):
+ if precision is None:
+ precision = 6
unit = " "
if attribute == "power":
unit = "µW"
@@ -23,25 +26,15 @@ def print_static(model, static_model, name, attribute, with_dependence=False):
elif attribute == "substate_count":
unit = "su"
if model.attr_by_name[name][attribute].stats:
+ ratio = model.attr_by_name[name][
+ attribute
+ ].stats.generic_param_dependence_ratio()
print(
- "{:10s}: {:28s} : {:.2f} {:s} ({:.2f})".format(
- name,
- attribute,
- static_model(name, attribute),
- unit,
- model.attr_by_name[name][
- attribute
- ].stats.generic_param_dependence_ratio(),
- )
+ f"{name:10s}: {attribute:28s} : {static_model(name, attribute):.{precision}f} {unit:s} ({ratio:.2f})"
)
else:
print(
- "{:10s}: {:28s} : {:.2f} {:s}".format(
- name,
- attribute,
- static_model(name, attribute),
- unit,
- )
+ f"{name:10s}: {attribute:28s} : {static_model(name, attribute):.{precision}f} {unit:s}"
)
if with_dependence:
for param in model.parameters:
@@ -93,18 +86,38 @@ def print_info_by_name(model, by_name):
)
-def print_analyticinfo(prefix, info):
+def print_information_gain_by_name(model, by_name):
+ for name in model.names:
+ for attr in model.attributes(name):
+ print(f"{name} {attr}:")
+ mutual_information = model.mutual_information(name, attr)
+ for param in model.parameters:
+ if param in mutual_information:
+ print(f" Parameter {param} : {mutual_information[param]:5.2f}")
+ else:
+ print(f" Parameter {param} : -.--")
+
+
+def print_analyticinfo(prefix, info, ndigits=None):
model_function = info.model_function.removeprefix("0 + ")
for i in range(len(info.model_args)):
- model_function = model_function.replace(
- f"regression_arg({i})", str(info.model_args[i])
- )
+ if ndigits is not None:
+ model_function = model_function.replace(
+ f"regression_arg({i})", str(round(info.model_args[i], ndigits=ndigits))
+ )
+ else:
+ model_function = model_function.replace(
+ f"regression_arg({i})", str(info.model_args[i])
+ )
model_function = model_function.replace("+ -", "- ")
print(f"{prefix}: {model_function}")
-def print_staticinfo(prefix, info):
- print(f"{prefix}: {info.value}")
+def print_staticinfo(prefix, info, ndigits=None):
+ if ndigits is not None:
+ print(f"{prefix}: {round(info.value, ndigits)}")
+ else:
+ print(f"{prefix}: {info.value}")
def print_symreginfo(prefix, info):
@@ -160,7 +173,7 @@ def _print_cartinfo(prefix, model):
def print_splitinfo(info, prefix=""):
if type(info) is df.SplitFunction:
- for k, v in info.child.items():
+ for k, v in sorted(info.child.items()):
print_splitinfo(v, f"{prefix} {info.param_name}={k}")
elif type(info) is df.ScalarSplitFunction:
print_splitinfo(info.child_le, f"{prefix} {info.param_name}≤{info.threshold}")
@@ -175,13 +188,13 @@ def print_splitinfo(info, prefix=""):
print(f"{prefix}: UNKNOWN {type(info)}")
-def print_model(prefix, info):
+def print_model(prefix, info, precision=None):
if type(info) is df.StaticFunction:
- print_staticinfo(prefix, info)
+ print_staticinfo(prefix, info, ndigits=precision)
elif type(info) is df.AnalyticFunction:
- print_analyticinfo(prefix, info)
+ print_analyticinfo(prefix, info, ndigits=precision)
elif type(info) is df.FOLFunction:
- print_analyticinfo(prefix, info)
+ print_analyticinfo(prefix, info, ndigits=precision)
elif type(info) is df.CARTFunction:
print_cartinfo(prefix, info)
elif type(info) is df.SplitFunction:
@@ -280,6 +293,7 @@ def model_quality_table(
buf = f"{key:>{key_len}s} {attr:>{attr_len}s}"
for results, info in ((lut, None), (model, model_info), (static, None)):
buf += " "
+
if results is not None and (
info is None
or (
@@ -317,6 +331,23 @@ def model_quality_table(
print(buf)
+def export_pseudo_dref(dref_file, dref, precision=None):
+ with open(dref_file, "w") as f:
+ for k, v in sorted(os.environ.items(), key=lambda kv: kv[0]):
+ if k.startswith("DFATOOL_"):
+ print(f"% {k}='{v}'", file=f)
+ for arg in sys.argv:
+ print(f"% {arg}", file=f)
+ for k, v in sorted(dref.items()):
+ k = k.replace("/", "I").replace("-", "").replace("_", "").replace(" ", "")
+ if type(v) is tuple:
+ v = v[0]
+ if type(v) in (float, np.float64) and precision is not None:
+ print("\\def\\" + k + "{" + f"{v:.{precision}f}" + "}", file=f)
+ else:
+ print("\\def\\" + k + "{" + str(v) + "}", file=f)
+
+
def export_dataref(dref_file, dref, precision=None):
with open(dref_file, "w") as f:
for k, v in sorted(os.environ.items(), key=lambda kv: kv[0]):
@@ -358,13 +389,23 @@ def export_dot(model, dot_prefix):
logger.info(f"Dot export of model saved to {filename}")
-def export_csv_unparam(model, csv_prefix):
+def export_csv_unparam(model, csv_prefix, dialect="excel"):
import csv
+ class ExcelLF(csv.Dialect):
+ delimiter = ","
+ quotechar = '"'
+ doublequote = True
+ skipinitialspace = False
+ lineterminator = "\n"
+ quoting = 0
+
+ csv.register_dialect("excel-lf", ExcelLF)
+
for name in sorted(model.names):
filename = f"{csv_prefix}{name}.csv"
with open(filename, "w") as f:
- writer = csv.writer(f)
+ writer = csv.writer(f, dialect=dialect)
writer.writerow(
["measurement"] + model.parameters + sorted(model.attributes(name))
)
@@ -420,6 +461,8 @@ def export_json_unparam(model, filename):
def boxplot_param(args, model):
+ import dfatool.plotter as dp
+
title = None
param_is_filtered = dict()
if args.filter_param:
@@ -449,7 +492,7 @@ def boxplot_param(args, model):
)
)
for attribute in attr_names:
- dfatool.plotter.boxplot(
+ dp.boxplot(
param_desc,
list(map(lambda k: by_param[(name, k)][attribute], param_keys)),
output=f"{args.boxplot_param}{name}-{attribute}.pdf",
@@ -467,6 +510,12 @@ def add_standard_arguments(parser):
help="Export tree-based model to {PREFIX}{name}-{attribute}.dot",
)
parser.add_argument(
+ "--export-pseudo-dref",
+ metavar="FILE",
+ type=str,
+ help="Export model and model quality to LaTeX def file (sort of like dataref)",
+ )
+ parser.add_argument(
"--export-dref",
metavar="FILE",
type=str,
@@ -479,6 +528,14 @@ def add_standard_arguments(parser):
help="Export raw (parameter-independent) observations in CSV format to {PREFIX}{name}-{attribute}.csv",
)
parser.add_argument(
+ "--export-csv-dialect",
+ metavar="DIALECT",
+ type=str,
+ choices=["excel", "excel-lf", "excel-tab", "unix"],
+ default="excel",
+ help="CSV dialect to use for --export-csv-unparam",
+ )
+ parser.add_argument(
"--export-pgf-unparam",
metavar="PREFIX",
type=str,
@@ -494,7 +551,7 @@ def add_standard_arguments(parser):
"--export-json",
metavar="FILENAME",
type=str,
- help="Export model in JSON format to FILENAME",
+ help="Export model and error metrics in JSON format to FILENAME",
)
parser.add_argument(
"--load-json",
@@ -557,6 +614,11 @@ def add_standard_arguments(parser):
help="Show benchmark information (number of measurements, parameter values, ...)",
)
parser.add_argument(
+ "--information-gain",
+ action="store_true",
+ help="Show information gain of parameters",
+ )
+ parser.add_argument(
"--log-level",
metavar="LEVEL",
choices=["debug", "info", "warning", "error"],
@@ -574,6 +636,13 @@ def add_standard_arguments(parser):
"all: all of the above",
)
parser.add_argument(
+ "--show-model-precision",
+ metavar="NDIG",
+ type=int,
+ default=2,
+ help="Limit precision of model output to NDIG decimals",
+ )
+ parser.add_argument(
"--show-model-error",
action="store_true",
help="Show model error compared to LUT (lower bound) and static (reference) models",
@@ -584,6 +653,11 @@ def add_standard_arguments(parser):
help="Show model complexity score and details (e.g. regression tree height and node count)",
)
parser.add_argument(
+ "--add-total-observation",
+ action="store_true",
+ help="Add a TOTAL observation for each <key> that consists of the sums of its <attribute> entries. This allows for cross-validation of behaviour models vs. non-behaviour-aware models.",
+ )
+ parser.add_argument(
"--cross-validate",
metavar="<method>:<count>",
type=str,
diff --git a/lib/functions.py b/lib/functions.py
index 32fade0..b76814b 100644
--- a/lib/functions.py
+++ b/lib/functions.py
@@ -27,6 +27,9 @@ dfatool_rmt_relevance_threshold = float(
os.getenv("DFATOOL_RMT_RELEVANCE_THRESHOLD", "0.5")
)
+dfatool_uls_loss_fun = os.getenv("DFATOOL_ULS_LOSS_FUNCTION", "linear")
+dfatool_uls_min_bound = float(os.getenv("DFATOOL_ULS_MIN_BOUND", -np.inf))
+
if dfatool_preproc_relevance_method == "mi":
import sklearn.feature_selection
@@ -90,7 +93,15 @@ class ParamFunction:
error measure.
"""
- def __init__(self, param_function, validation_function, num_vars, repr_str=None):
+ def __init__(
+ self,
+ param_function,
+ validation_function,
+ num_vars,
+ repr_str=None,
+ ini=None,
+ bounds=((dfatool_uls_min_bound, dfatool_uls_min_bound), (np.inf, np.inf)),
+ ):
"""
Create function object suitable for regression analysis.
@@ -113,6 +124,8 @@ class ParamFunction:
self._validation_function = validation_function
self._num_variables = num_vars
self.repr_str = repr_str
+ self.ini = ini
+ self.bounds = bounds
def __repr__(self) -> str:
if self.repr_str:
@@ -452,6 +465,24 @@ class SplitFunction(ModelFunction):
or "infty",
}
)
+ return hyper
+
+ # SplitFunction only
+ def flatten(self):
+ paths = list()
+ for param_value, subtree in self.child.items():
+ if type(subtree) is SplitFunction:
+ for path, value in subtree.flatten():
+ path = [(self.param_name, param_value)] + path
+ paths.append((path, value))
+ elif type(subtree) is StaticFunction:
+ path = [(self.param_name, param_value)]
+ paths.append((path, subtree.value))
+ else:
+ raise RuntimeError(
+ "flatten is only implemented for RMTs with constant leaves"
+ )
+ return paths
@classmethod
def from_json(cls, data):
@@ -1662,7 +1693,11 @@ class FOLFunction(SKLearnRegressionFunction):
self.model_args = list(np.ones((num_vars)))
try:
res = optimize.least_squares(
- error_function, self.model_args, args=(fit_parameters, data), xtol=2e-15
+ error_function,
+ self.model_args,
+ args=(fit_parameters, data),
+ xtol=2e-15,
+ loss=dfatool_uls_loss_fun,
)
except ValueError as err:
logger.warning(f"Fit failed: {err} (function: {self.model_function})")
@@ -1792,6 +1827,7 @@ class AnalyticFunction(ModelFunction):
both for function usage and least squares optimization.
If unset, defaults to [1, 1, 1, ...]
"""
+ bounds = kwargs.pop("bounds", dict())
super().__init__(value, **kwargs)
self._parameter_names = parameters
self._num_args = num_args
@@ -1800,6 +1836,7 @@ class AnalyticFunction(ModelFunction):
self._dependson = [False] * (len(parameters) + num_args)
self.fit_success = False
self.fit_by_param = fit_by_param
+ self.bounds = bounds
if type(function_str) == str:
num_vars_re = re.compile(r"regression_arg\(([0-9]+)\)")
@@ -1905,10 +1942,26 @@ class AnalyticFunction(ModelFunction):
"""
X, Y, num_valid, num_total = self.get_fit_data(by_param)
if num_valid > 2:
+ lower_bounds = list()
+ upper_bounds = list()
+ for i in range(len(self.model_args)):
+ if i in self.bounds and self.bounds[i][0] == "range":
+ param_index = self._parameter_names.index(self.bounds[i][1])
+ lower_bounds.append(np.min(X[param_index]))
+ upper_bounds.append(np.max(X[param_index]))
+ self.model_args[i] = np.mean(X[param_index])
+ else:
+ lower_bounds.append(dfatool_uls_min_bound)
+ upper_bounds.append(np.inf)
error_function = lambda P, X, y: self._function(P, X) - y
try:
res = optimize.least_squares(
- error_function, self.model_args, args=(X, Y), xtol=2e-15
+ error_function,
+ self.model_args,
+ args=(X, Y),
+ xtol=2e-15,
+ loss=dfatool_uls_loss_fun,
+ bounds=(lower_bounds, upper_bounds),
)
except ValueError as err:
logger.warning(f"Fit failed: {err} (function: {self.model_function})")
@@ -2024,6 +2077,7 @@ class analytic:
_safe_log = np.vectorize(lambda x: np.log(np.abs(x)) if np.abs(x) > 0.001 else 1.0)
_safe_inv = np.vectorize(lambda x: 1 / x if np.abs(x) > 0.001 else 1.0)
_safe_sqrt = np.vectorize(lambda x: np.sqrt(np.abs(x)))
+ _roofline = np.vectorize(lambda x, y: x if x < y else y)
_function_map = {
"linear": lambda x: x,
@@ -2039,6 +2093,7 @@ class analytic:
"safe_log": lambda x: np.log(np.abs(x)) if np.abs(x) > 0.001 else 1.0,
"safe_inv": lambda x: 1 / x if np.abs(x) > 0.001 else 1.0,
"safe_sqrt": lambda x: np.sqrt(np.abs(x)),
+ "roofline": lambda x, y: x if x < y else y,
}
@staticmethod
@@ -2108,6 +2163,21 @@ class analytic:
2,
repr_str="β₀ + β₁ * np.sqrt(x)",
),
+ "roofline": ParamFunction(
+ lambda reg_param, model_param: reg_param[0]
+ + reg_param[1] * analytic._roofline(model_param, reg_param[2]),
+ lambda model_param: True,
+ 3,
+ repr_str="β₀ + β₁ * roofline(x, β₂)",
+ bounds=(
+ (
+ dfatool_uls_min_bound,
+ dfatool_uls_min_bound,
+ dfatool_uls_min_bound,
+ ),
+ (np.inf, np.inf, np.inf),
+ ),
+ ),
# "num0_8": ParamFunction(
# lambda reg_param, model_param: reg_param[0]
# + reg_param[1] * analytic._num0_8(model_param),
@@ -2159,11 +2229,17 @@ class analytic:
if os.getenv("DFATOOL_RMT_SUBMODEL", "uls") == "fol":
functions = {"linear": functions["linear"]}
+ elif allowed_functions := os.getenv("DFATOOL_ULS_FUNCTIONS", None):
+ allowed_functions = allowed_functions.split(",")
+ all_functions = list(functions.keys())
+ for function_name in all_functions:
+ if function_name not in allowed_functions:
+ functions.pop(function_name)
return functions
@staticmethod
- def _fmap(reference_type, reference_name, function_type):
+ def _fmap(reference_type, reference_name, function_type, arg_idx=None):
"""Map arg/parameter name and best-fit function name to function text suitable for AnalyticFunction."""
ref_str = "{}({})".format(reference_type, reference_name)
if function_type == "linear":
@@ -2182,6 +2258,8 @@ class analytic:
return "1/({})".format(ref_str)
if function_type == "sqrt":
return "np.sqrt({})".format(ref_str)
+ if function_type == "roofline":
+ return "analytic._roofline({}, regression_arg({}))".format(ref_str, arg_idx)
return "analytic._{}({})".format(function_type, ref_str)
@staticmethod
@@ -2206,22 +2284,29 @@ class analytic:
"""
buf = "0"
arg_idx = 0
+ bounds = dict()
for combination in powerset(fit_results.items()):
buf += " + regression_arg({:d})".format(arg_idx)
arg_idx += 1
for function_item in combination:
if is_numeric(function_item[0]):
- buf += " * {}".format(
- analytic._fmap(
- "function_arg", function_item[0], function_item[1]["best"]
- )
- )
+ mapkey = "function_arg"
else:
- buf += " * {}".format(
- analytic._fmap(
- "parameter", function_item[0], function_item[1]["best"]
- )
+ mapkey = "parameter"
+ buf += " * {}".format(
+ analytic._fmap(
+ mapkey, function_item[0], function_item[1]["best"], arg_idx
)
+ )
+ if function_item[1]["best"] == "roofline":
+ bounds[arg_idx] = ("range", function_item[0])
+ arg_idx += 1
return AnalyticFunction(
- None, buf, parameter_names, num_args, fit_by_param=fit_results, **kwargs
+ None,
+ buf,
+ parameter_names,
+ num_args,
+ fit_by_param=fit_results,
+ bounds=bounds,
+ **kwargs,
)
diff --git a/lib/loader/plain.py b/lib/loader/plain.py
index 1818bce..ef0b596 100644
--- a/lib/loader/plain.py
+++ b/lib/loader/plain.py
@@ -4,6 +4,10 @@ from ..utils import soft_cast_int_or_float, soft_cast_float
import os
import re
+import logging
+
+logger = logging.getLogger(__name__)
+
class CSVfile:
def __init__(self):
@@ -65,10 +69,48 @@ class CSVfile:
return observations
-class Logfile:
- def __init__(self):
- pass
+class TraceAnnotation:
+ offset = None
+ name = None
+ param = dict()
+
+ def __init__(self, **kwargs):
+ self.__dict__.update(kwargs)
+
+ def apply_offset(self, offset):
+ self.offset += offset
+ return self
+
+ def __repr__(self):
+ param_desc = " ".join(map(lambda kv: f"{kv[0]}={kv[1]}", self.param.items()))
+ return f"{self.name}<{param_desc} @ {self.offset}>"
+
+
+class RunAnnotation:
+ name = None
+ start = None
+ kernels = list()
+ end = None
+
+ # start: offset points to first run entry
+ # kernel: offset points to first kernel run entry
+ # end: offset points to first non-run entry (i.e., for all run entries: offset < end.offset)
+
+ def __init__(self, **kwargs):
+ self.__dict__.update(kwargs)
+
+ def apply_offset(self, offset):
+ self.start.apply_offset(offset)
+ for kernel in self.kernels:
+ kernel.apply_offset(offset)
+ self.end.apply_offset(offset)
+ return self
+
+ def __repr__(self):
+ return f"RunAnnotation<{self.name}, start={self.start}, kernels={self.kernels}, end={self.end}>"
+
+class Logfile:
def kv_to_param(self, kv_str, cast):
try:
key, value = kv_str.split("=")
@@ -84,14 +126,24 @@ class Logfile:
def kv_to_param_i(self, kv_str):
return self.kv_to_param(kv_str, soft_cast_int_or_float)
- def load(self, f):
+ def load(self, f, is_trace=False):
observations = list()
+ if is_trace:
+ trace_status = None
+ trace_start = None
+ trace_kernels = list()
+ trace_end = None
+ annotations = list()
+
for lineno, line in enumerate(f):
- m = re.search(r"\[::\] *([^|]*?) *[|] *([^|]*?) *[|] *(.*)", line)
- if m:
+ if m := re.search(r"\[::\] *([^|]*?) *[|] *([^|]*?) *[|] *(.*)", line):
name_str = m.group(1)
param_str = m.group(2)
attr_str = m.group(3)
+ if is_trace:
+ name_str, name_annot = name_str.split("@")
+ name_str = name_str.strip()
+ name_annot = name_annot.strip()
try:
param = dict(map(self.kv_to_param_i, param_str.split()))
attr = dict(map(self.kv_to_param_f, attr_str.split()))
@@ -102,13 +154,89 @@ class Logfile:
"attribute": attr,
}
)
+ if is_trace:
+ observations[-1]["place"] = name_annot
+ except ValueError:
+ logger.warning(
+ f"Error parsing {f}: invalid key-value pair in line {lineno+1}"
+ )
+ logger.warning(f"Offending entry:\n{line}")
+ raise
+
+ if not is_trace:
+ continue
+
+ # only relevant for is_trace == True
+ if m := re.fullmatch(r"\[>>\] *([^|]*?) *[|] *([^|]*?) *", line):
+ trace_status = 1
+ trace_kernels = list()
+ name_str = m.group(1)
+ param_str = m.group(2)
+ try:
+ param = dict(map(self.kv_to_param_i, param_str.split()))
+ except ValueError:
+ logger.warning(
+ f"Error parsing {f}: invalid key-value pair in line {lineno+1}"
+ )
+ logger.warning(f"Offending entry:\n{line}")
+ raise
+ trace_start = TraceAnnotation(
+ offset=len(observations), name=name_str, param=param
+ )
+
+ if m := re.fullmatch(r"\[--\] *([^|]*?) *[|] *([^|]*?) *", line):
+ trace_status = 2
+ name_str = m.group(1)
+ param_str = m.group(2)
+ try:
+ param = dict(map(self.kv_to_param_i, param_str.split()))
except ValueError:
logger.warning(
f"Error parsing {f}: invalid key-value pair in line {lineno+1}"
)
logger.warning(f"Offending entry:\n{line}")
raise
+ trace_kernels.append(
+ TraceAnnotation(
+ offset=len(observations), name=name_str, param=param
+ )
+ )
+
+ if m := re.fullmatch(r"\[<<\] *([^|]*?) *[|] *([^|]*?) *", line):
+ trace_status = None
+ name_str = m.group(1)
+ param_str = m.group(2)
+ try:
+ param = dict(map(self.kv_to_param_i, param_str.split()))
+ except ValueError:
+ logger.warning(
+ f"Error parsing {f}: invalid key-value pair in line {lineno+1}"
+ )
+ logger.warning(f"Offending entry:\n{line}")
+ raise
+ trace_end = TraceAnnotation(
+ offset=len(observations), name=name_str, param=param
+ )
+ if trace_start is not None:
+ assert trace_start.name == trace_end.name
+ for kernel in trace_kernels:
+ assert trace_start.name == kernel.name
+ annotations.append(
+ RunAnnotation(
+ name=trace_start.name,
+ start=trace_start,
+ kernels=trace_kernels,
+ end=trace_end,
+ )
+ )
+
+ trace_status = None
+ trace_start = None
+ trace_kernels = list()
+ trace_end = None
+ if is_trace:
+ return observations, annotations
return observations
def dump(self, observations, f):
diff --git a/lib/model.py b/lib/model.py
index 2452af7..4d1edd5 100644
--- a/lib/model.py
+++ b/lib/model.py
@@ -14,7 +14,14 @@ from .parameters import (
distinct_param_values,
)
from .paramfit import ParamFit
-from .utils import is_numeric, soft_cast_int, by_name_to_by_param, regression_measures
+from .utils import (
+ is_numeric,
+ soft_cast_int,
+ by_name_to_by_param,
+ by_param_to_by_name,
+ regression_measures,
+ param_eq_or_none,
+)
logger = logging.getLogger(__name__)
@@ -79,6 +86,7 @@ class AnalyticModel:
compute_stats=True,
force_tree=False,
max_std=None,
+ by_param=None,
from_json=None,
):
"""
@@ -96,7 +104,7 @@ class AnalyticModel:
- attributes: list of keys that should be analyzed,
e.g. ['power', 'duration']
- for each attribute mentioned in 'attributes': A list with measurements.
- All list except for 'attributes' must have the same length.
+ All lists except for 'attributes' must have the same length.
For example:
parameters = ['foo_count', 'irrelevant']
@@ -148,9 +156,18 @@ class AnalyticModel:
for name, name_data in from_json["name"].items():
self.attr_by_name[name] = dict()
for attr, attr_data in name_data.items():
- self.attr_by_name[name][attr] = ModelAttribute.from_json(
- name, attr, attr_data
- )
+ if by_param:
+ self.attr_by_name[name][attr] = ModelAttribute.from_json(
+ name,
+ attr,
+ attr_data,
+ data_values=by_name[name][attr],
+ param_values=by_name[name]["param"],
+ )
+ else:
+ self.attr_by_name[name][attr] = ModelAttribute.from_json(
+ name, attr, attr_data
+ )
self.fit_done = True
return
@@ -227,6 +244,9 @@ class AnalyticModel:
return self.parameters[param_index]
return str(param_index)
+ def mutual_information(self, name, attr):
+ return self.attr_by_name[name][attr].mutual_information()
+
def get_static(self, use_mean=False):
"""
Get static model function: name, attribute -> model value.
@@ -246,7 +266,7 @@ class AnalyticModel:
return static_model_getter
- def get_param_lut(self, use_mean=False, fallback=False):
+ def get_param_lut(self, use_mean=False, fallback=False, allow_none=False):
"""
Get parameter-look-up-table model function: name, attribute, parameter values -> model value.
@@ -276,7 +296,16 @@ class AnalyticModel:
try:
return lut_model[name][key][param]
except KeyError:
- if fallback:
+ if allow_none:
+ keys = filter(
+ lambda p: param_eq_or_none(param, p),
+ lut_model[name][key].keys(),
+ )
+ values = list(map(lambda p: lut_model[name][key][p], keys))
+ if not values:
+ raise
+ return np.mean(values)
+ elif fallback:
return static_model[name][key]
raise
params = kwargs["params"]
@@ -446,7 +475,7 @@ class AnalyticModel:
return model_getter, info_getter
- def assess(self, model_function, ref=None, return_raw=False):
+ def assess(self, model_function, ref=None, return_raw=False, with_sum=False):
"""
Calculate MAE, SMAPE, etc. of model_function for each by_name entry.
@@ -476,18 +505,36 @@ class AnalyticModel:
)
measures = regression_measures(predicted_data, elem[attribute])
detailed_results[name][attribute] = measures
- if return_raw:
+ if return_raw or with_sum:
raw_results[name]["attribute"][attribute] = {
"groundTruth": elem[attribute],
"modelOutput": predicted_data,
}
+ if with_sum:
+ for name in ref.keys():
+ attr_0 = ref[name]["attributes"][0]
+ gt_sum = np.zeros(
+ len(raw_results[name]["attribute"][attr_0]["groundTruth"])
+ )
+ mo_sum = np.zeros(
+ len(raw_results[name]["attribute"][attr_0]["modelOutput"])
+ )
+ for attribute in ref[name]["attributes"]:
+ gt_sum += np.array(
+ raw_results[name]["attribute"][attribute]["groundTruth"]
+ )
+ mo_sum += np.array(
+ raw_results[name]["attribute"][attribute]["modelOutput"]
+ )
+ detailed_results[name]["TOTAL"] = regression_measures(mo_sum, gt_sum)
+
if return_raw:
return detailed_results, raw_results
return detailed_results
def to_dref(
- self, static_quality, lut_quality, model_quality, xv_models=None
+ self, static_quality, lut_quality, model_quality, xv_models=None, with_sum=False
) -> dict:
ret = dict()
for name in self.names:
@@ -519,20 +566,27 @@ class AnalyticModel:
)
for k, v in param_data.items():
ret[f"paramcount/{name}/{k}"] = v
- for attr_name, attr in self.attr_by_name[name].items():
+
+ attr_pairs = self.attr_by_name[name].items()
+ if with_sum:
+ attr_pairs = list(attr_pairs)
+ attr_pairs.append(("TOTAL", None))
+
+ for attr_name, attr in attr_pairs:
# attr.data must be the same for all attrs
- ret[f"data/{name}/num samples"] = len(attr.data)
- unit = None
- if "power" in attr.attr:
- unit = r"\micro\watt"
- elif "energy" in attr.attr:
- unit = r"\pico\joule"
- elif attr.attr == "duration":
- unit = r"\micro\second"
- for k, v in attr.to_dref(unit).items():
- ret[f"data/{name}/{attr_name}/{k}"] = v
- for k, v in attr.model_function.hyper_to_dref().items():
- ret[f"hyper/{name}/{attr_name}/{k}"] = v
+ if attr is not None:
+ ret[f"data/{name}/num samples"] = len(attr.data)
+ unit = None
+ if "power" in attr.attr:
+ unit = r"\micro\watt"
+ elif "energy" in attr.attr:
+ unit = r"\pico\joule"
+ elif attr.attr == "duration":
+ unit = r"\micro\second"
+ for k, v in attr.to_dref(unit).items():
+ ret[f"data/{name}/{attr_name}/{k}"] = v
+ for k, v in attr.model_function.hyper_to_dref().items():
+ ret[f"hyper/{name}/{attr_name}/{k}"] = v
e_static = static_quality[name][attr_name]
for metric in "mae p50 p90 p95 p99".split():
ret[f"error/static/{name}/{attr_name}/{metric}"] = (
@@ -555,7 +609,7 @@ class AnalyticModel:
except KeyError:
logger.warning(f"{name} {attr_name} static model has no MAPE")
- if lut_quality is not None:
+ if lut_quality is not None and attr_name in lut_quality[name]:
e_lut = lut_quality[name][attr_name]
for metric in "mae p50 p90 p95 p99".split():
ret[f"error/lut/{name}/{attr_name}/{metric}"] = (
@@ -615,25 +669,65 @@ class AnalyticModel:
ret[f"xv/{name}/{attr_name}/{k}"] = np.mean(entry[k])
return ret
- def to_json(self, **kwargs) -> dict:
+ def to_json(
+ self,
+ with_by_param=False,
+ lut_error=None,
+ static_error=None,
+ model_error=None,
+ **kwargs,
+ ) -> dict:
"""
Return JSON encoding of this AnalyticModel.
"""
ret = {
"parameters": self.parameters,
"name": dict([[name, dict()] for name in self.names]),
+ "paramValuesbyName": dict([[name, dict()] for name in self.names]),
}
+ if with_by_param:
+ by_param = self.get_by_param()
+ ret["byParam"] = list()
+ for k, v in by_param.items():
+ ret["byParam"].append((k, v))
+
for name in self.names:
for attr_name, attr in self.attr_by_name[name].items():
ret["name"][name][attr_name] = attr.to_json(**kwargs)
+ if lut_error:
+ ret["name"][name][attr_name]["lutError"] = lut_error[name][
+ attr_name
+ ]
+ if static_error:
+ ret["name"][name][attr_name]["staticError"] = static_error[name][
+ attr_name
+ ]
+ if model_error:
+ ret["name"][name][attr_name]["modelError"] = model_error[name][
+ attr_name
+ ]
+ attr_name = list(self.attributes(name))[0]
+ for param_name in self.parameters:
+ if self.attr_by_name[name][attr_name].stats is not None:
+ ret["paramValuesbyName"][name][param_name] = self.attr_by_name[
+ name
+ ][attr_name].stats.distinct_values_by_param_name[param_name]
return ret
@classmethod
- def from_json(cls, data, by_name, parameters):
- assert data["parameters"] == parameters
- return cls(by_name, parameters, from_json=data)
+ def from_json(cls, data, by_name=None, parameters=None):
+ if by_name is None and parameters is None:
+ assert data["byParam"] is not None
+ by_param = dict()
+ for (nk, pk), v in data["byParam"]:
+ by_param[(nk, tuple(pk))] = v
+ by_name = by_param_to_by_name(by_param)
+ return cls(by_name, data["parameters"], by_param=by_param, from_json=data)
+ else:
+ assert data["parameters"] == parameters
+ return cls(by_name, parameters, from_json=data)
def webconf_function_map(self) -> list:
ret = list()
diff --git a/lib/parameters.py b/lib/parameters.py
index 4047c10..acb044c 100644
--- a/lib/parameters.py
+++ b/lib/parameters.py
@@ -604,6 +604,9 @@ class ModelAttribute:
# The best model we have. May be Static, Split, or Param (and later perhaps Substate)
self.model_function = None
+ # Information gain cache. Used for statistical analysis
+ self.mutual_information_cache = None
+
self._check_codependent_param()
# There must be at least 3 distinct data values (≠ None) if an analytic model
@@ -618,13 +621,20 @@ class ModelAttribute:
mean = np.mean(self.data)
return f"ModelAttribute<{self.name}, {self.attr}, mean={mean}>"
- def to_json(self, **kwargs):
- return {
+ def to_json(self, with_lut=False, **kwargs):
+ ret = {
"paramNames": self.param_names,
"argCount": self.arg_count,
"modelFunction": self.model_function.to_json(**kwargs),
}
+ if with_lut:
+ ret["LUT"] = list()
+ for key, value in self.by_param.items():
+ ret["LUT"].append((key, value))
+
+ return ret
+
def to_dref(self, unit=None):
ret = {"mean": (self.mean, unit), "median": (self.median, unit)}
@@ -699,12 +709,33 @@ class ModelAttribute:
def webconf_function_map(self):
return self.model_function.webconf_function_map()
+ def mutual_information(self):
+ if self.mutual_information_cache is not None:
+ return self.mutual_information_cache
+
+ from sklearn.feature_selection import mutual_info_regression
+
+ fit_parameters, _, ignore_index = param_to_ndarray(
+ self.param_values, with_nan=False, categorical_to_scalar=True
+ )
+
+ mutual_info_result = mutual_info_regression(fit_parameters, self.data)
+
+ self.mutual_information_cache = dict()
+ j = 0
+ for i, param_name in enumerate(self.param_names):
+ if not ignore_index[i]:
+ self.mutual_information_cache[param_name] = mutual_info_result[j]
+ j += 1
+
+ return self.mutual_information_cache
+
@classmethod
- def from_json(cls, name, attr, data):
+ def from_json(cls, name, attr, data, data_values=None, param_values=None):
param_names = data["paramNames"]
arg_count = data["argCount"]
- self = cls(name, attr, None, None, param_names, arg_count)
+ self = cls(name, attr, data_values, param_values, param_names, arg_count)
self.model_function = df.ModelFunction.from_json(data["modelFunction"])
self.mean = self.model_function.value
diff --git a/lib/paramfit.py b/lib/paramfit.py
index e6539a4..84eba2b 100644
--- a/lib/paramfit.py
+++ b/lib/paramfit.py
@@ -16,7 +16,13 @@ from .utils import (
)
logger = logging.getLogger(__name__)
-best_fit_metric = os.getenv("DFATOOL_ULS_ERROR_METRIC", "ssr")
+dfatool_uls_loss_fun = os.getenv("DFATOOL_ULS_LOSS_FUNCTION", "linear")
+dfatool_uls_min_bound = float(os.getenv("DFATOOL_ULS_MIN_BOUND", -np.inf))
+
+if dfatool_uls_loss_fun == "linear":
+ best_fit_metric = os.getenv("DFATOOL_ULS_ERROR_METRIC", "ssr")
+else:
+ best_fit_metric = os.getenv("DFATOOL_ULS_ERROR_METRIC", "mae")
class ParamFit:
@@ -205,9 +211,24 @@ def _try_fits(
if function_name not in raw_results:
raw_results[function_name] = dict()
error_function = param_function.error_function
+ if param_function.ini:
+ ini = param_function.ini
+ else:
+ ini = [0] + [1 for i in range(1, param_function._num_variables)]
+ if function_name == "roofline":
+ param_function.bounds = (
+ (dfatool_uls_min_bound, dfatool_uls_min_bound, np.min(X)),
+ (np.inf, np.inf, np.max(X)),
+ )
+ ini[2] = np.mean(X)
try:
res = optimize.least_squares(
- error_function, [0, 1], args=(X, Y), xtol=2e-15
+ error_function,
+ ini,
+ args=(X, Y),
+ xtol=2e-15,
+ loss=dfatool_uls_loss_fun,
+ bounds=param_function.bounds,
)
except FloatingPointError as e:
logger.warning(
diff --git a/lib/pelt.py b/lib/pelt.py
index 29faf5c..8a951c9 100644
--- a/lib/pelt.py
+++ b/lib/pelt.py
@@ -94,7 +94,7 @@ class PELT:
)
algo = algo.fit(self.norm_signal(signal))
- # Empirically, most sub-state detectino results use a penalty
+ # Empirically, most sub-state detection results use a penalty
# in the range 30 to 60. If there's no changepoints with a
# penalty of 20, there's also no changepoins with any penalty
# > 20, so we can safely skip changepoint detection altogether.
diff --git a/lib/utils.py b/lib/utils.py
index 426b701..fb76367 100644
--- a/lib/utils.py
+++ b/lib/utils.py
@@ -48,6 +48,8 @@ def running_mean(x: np.ndarray, N: int) -> np.ndarray:
def human_readable(value, unit):
+ if value is None:
+ return value
for prefix, factor in (
("p", 1e-12),
("n", 1e-9),
@@ -55,6 +57,8 @@ def human_readable(value, unit):
("m", 1e-3),
("", 1),
("k", 1e3),
+ ("M", 1e6),
+ ("G", 1e9),
):
if value < 1e3 * factor:
return "{:.2f} {}{}".format(value * (1 / factor), prefix, unit)
@@ -150,7 +154,7 @@ def parse_conf_str(conf_str):
"""
conf_dict = dict()
for option in conf_str.split(","):
- key, value = option.split("=")
+ key, value = option.strip().split("=")
conf_dict[key] = soft_cast_float(value)
return conf_dict
@@ -205,6 +209,18 @@ def param_slice_eq(a, b, index):
return False
+def param_eq_or_none(a, b):
+ """
+ Check if by_param keys a and b are identical, allowing a None in a to match any key in b.
+ """
+ set_keys = tuple(filter(lambda i: a[i] is not None, range(len(a))))
+ a_not_none = tuple(map(lambda i: a[i], set_keys))
+ b_not_none = tuple(map(lambda i: b[i], set_keys))
+ if a_not_none == b_not_none:
+ return True
+ return False
+
+
def match_parameter_values(input_param: dict, match_param: dict):
"""
Check whether one of the paramaters in `input_param` has the same value in `match_param`.
@@ -302,6 +318,21 @@ def param_dict_to_list(param_dict, parameter_names, default=None):
return ret
+def param_dict_to_str(param_dict):
+ ret = list()
+ for parameter_name in sorted(param_dict.keys()):
+ ret.append(f"{parameter_name}={param_dict[parameter_name]}")
+ return " ".join(ret)
+
+
+def param_str_to_dict(param_str):
+ ret = dict()
+ for param_pair in param_str.split():
+ key, value = param_pair.split("=")
+ ret[key] = soft_cast_int_or_float(value)
+ return ret
+
+
def observations_enum_to_bool(observations: list, kconfig=False):
"""
Convert enum / categorical observations to boolean-only ones.
@@ -345,6 +376,10 @@ def observations_enum_to_bool(observations: list, kconfig=False):
def ignore_param(by_name: dict, parameter_names: list, ignored_parameters: list):
ignored_indexes = list()
unpoppable_params = list()
+
+ if ignored_parameters is None:
+ return
+
for param_name in sorted(ignored_parameters):
try:
ignored_indexes.append(parameter_names.index(param_name))
@@ -560,22 +595,30 @@ def filter_aggregate_by_param(aggregate, parameters, parameter_filter):
param_index = parameters.index(param_name)
except ValueError:
logger.error(f"Unknown parameter '{param_name}'")
- return
+ continue
param_value = soft_cast_int(param_value)
names_to_remove = set()
if condition == "<":
- condf = lambda x: x[param_index] < param_value
+ condf = (
+ lambda x: x[param_index] is not None and x[param_index] < param_value
+ )
elif condition == "≤":
- condf = lambda x: x[param_index] <= param_value
+ condf = (
+ lambda x: x[param_index] is not None and x[param_index] <= param_value
+ )
elif condition == "=":
condf = lambda x: x[param_index] == param_value
elif condition == "≠":
condf = lambda x: x[param_index] != param_value
elif condition == "≥":
- condf = lambda x: x[param_index] >= param_value
+ condf = (
+ lambda x: x[param_index] is not None and x[param_index] >= param_value
+ )
elif condition == ">":
- condf = lambda x: x[param_index] > param_value
+ condf = (
+ lambda x: x[param_index] is not None and x[param_index] > param_value
+ )
elif condition == "∈":
param_values = tuple(map(soft_cast_int, param_value.split(",")))
condf = lambda x: x[param_index] in param_values
@@ -685,11 +728,18 @@ def regression_measures(predicted: np.ndarray, ground_truth: np.ndarray):
rsq -- R^2 measure, see sklearn.metrics.r2_score
count -- Number of values
"""
- if type(predicted) != np.ndarray:
+
+ if type(predicted) is list:
+ predicted = np.array(predicted)
+
+ if type(ground_truth) is list:
+ ground_truth = np.array(ground_truth)
+
+ if type(predicted) is not np.ndarray:
raise ValueError(
"first arg ('predicted') must be ndarray, is {}".format(type(predicted))
)
- if type(ground_truth) != np.ndarray:
+ if type(ground_truth) is not np.ndarray:
raise ValueError(
"second arg ('ground_truth') must be ndarray, is {}".format(
type(ground_truth)
diff --git a/lib/validation.py b/lib/validation.py
index 958a9e0..bf6764d 100644
--- a/lib/validation.py
+++ b/lib/validation.py
@@ -109,7 +109,7 @@ class CrossValidator:
self.args = args
self.kwargs = kwargs
- def kfold(self, model_getter, k=10, static=False):
+ def kfold(self, model_getter, k=10, static=False, with_sum=False):
"""
Perform k-fold cross-validation and return average model quality.
@@ -161,10 +161,10 @@ class CrossValidator:
training_and_validation_sets[i][name] = subsets_by_name[name][i]
return self._generic_xv(
- model_getter, training_and_validation_sets, static=static
+ model_getter, training_and_validation_sets, static=static, with_sum=with_sum
)
- def montecarlo(self, model_getter, count=200, static=False):
+ def montecarlo(self, model_getter, count=200, static=False, with_sum=False):
"""
Perform Monte Carlo cross-validation and return average model quality.
@@ -211,10 +211,12 @@ class CrossValidator:
training_and_validation_sets[i][name] = subsets_by_name[name][i]
return self._generic_xv(
- model_getter, training_and_validation_sets, static=static
+ model_getter, training_and_validation_sets, static=static, with_sum=with_sum
)
- def _generic_xv(self, model_getter, training_and_validation_sets, static=False):
+ def _generic_xv(
+ self, model_getter, training_and_validation_sets, static=False, with_sum=False
+ ):
ret = dict()
models = list()
@@ -268,6 +270,16 @@ class CrossValidator:
)
)
+ if with_sum:
+ for name in self.names:
+ attr_0 = self.by_name[name]["attributes"][0]
+ gt_sum = np.zeros(len(ret[name][attr_0]["groundTruth"]))
+ mo_sum = np.zeros(len(ret[name][attr_0]["modelOutput"]))
+ for attribute in self.by_name[name]["attributes"]:
+ gt_sum += np.array(ret[name][attribute]["groundTruth"])
+ mo_sum += np.array(ret[name][attribute]["modelOutput"])
+ ret[name]["TOTAL"] = regression_measures(mo_sum, gt_sum)
+
return ret, models
def _single_xv(self, model_getter, tv_set_dict, static=False):
diff --git a/libexec/rapl-to-dfatool.py b/libexec/rapl-to-dfatool.py
new file mode 100755
index 0000000..5ab4c38
--- /dev/null
+++ b/libexec/rapl-to-dfatool.py
@@ -0,0 +1,27 @@
+#!/usr/bin/env python3
+
+import sys
+
+
+def main(perf_line, rapl_names, rapl_start, rapl_stop):
+ duration_ns = int(perf_line.split(",")[3])
+
+ rapl_names = rapl_names.split()
+ rapl_start = rapl_start.split()
+ rapl_stop = rapl_stop.split()
+
+ buf = [f"duration_ns={duration_ns}"]
+
+ for i in range(len(rapl_names)):
+ uj_start = int(rapl_start[i])
+ uj_stop = int(rapl_stop[i])
+ buf.append(f"{rapl_names[i]}_energy_uj={uj_stop - uj_start}")
+ buf.append(
+ f"{rapl_names[i]}_power_W={(uj_stop - uj_start) * 1000 / duration_ns}"
+ )
+
+ print(" ".join(buf))
+
+
+if __name__ == "__main__":
+ main(*sys.argv[1:])