diff options
| -rw-r--r-- | README.md | 11 | ||||
| -rwxr-xr-x | bin/analyze-trace.py | 291 | ||||
| -rw-r--r-- | lib/behaviour.py | 310 | ||||
| -rw-r--r-- | lib/cli.py | 2 | ||||
| -rw-r--r-- | lib/functions.py | 8 | ||||
| -rw-r--r-- | lib/loader/plain.py | 3 | ||||
| -rw-r--r-- | lib/model.py | 30 | ||||
| -rw-r--r-- | lib/paramfit.py | 8 | ||||
| -rw-r--r-- | lib/utils.py | 11 |
9 files changed, 434 insertions, 240 deletions
@@ -55,11 +55,11 @@ Least-Squares Regression is essentially a subset of RMT with just a single tree LMT and RMT differ significantly, as LMT uses a learning algorithm that starts out with a DECART and uses bottom-up pruning to turn it into an LMT, whereas RMT build a DECART that only considers parameters that are not suitable for least-squares regression and then uses least-squares regression to find and fit leaf functions. By default, dfatool uses heuristics to determine whether it should generate a simple least-squares regression function or a fully-fledged RMT. -Arguments such as `--force-tree` and environment variables (below) can be used to generate a different flavour of performance model; see [Modeling Method Selection](doc/modeling-method.md). +Arguments such as `--force-tree` and environment variables (below) can be used to generate a different flavour of performance model; see [Modelling Method Selection](doc/modeling-method.md). Again, most of the options and methods documented here work for all three scripts: analyze-archive, analyze-kconfig, and analyze-log. * [Model Visualization and Export](doc/model-visual.md) -* [Modeling Method Selection](doc/modeling-method.md) +* [Modelling Method Selection](doc/modeling-method.md) * [Assessing Model Quality](doc/model-assessment.md) ## Model Application @@ -112,9 +112,9 @@ The following variables may be set to alter the behaviour of dfatool components. | `DFATOOL_KCONF_WITH_CHOICE_NODES` | 0, **1** | Treat kconfig choices (e.g. "choice Model → MobileNet / ResNet / Inception") as enum parameters. If enabled, the corresponding boolean kconfig variables (e.g. "Model\_MobileNet") are not converted to parameters. If disabled, all (and only) boolean kconfig variables are treated as parameters. Mostly relevant for analyze-kconfig, eval-kconfig | | `DFATOOL_COMPENSATE_DRIFT` | **0**, 1 | Perform drift compensation for loaders without sync input (e.g. EnergyTrace or Keysight) | | `DFATOOL_DRIFT_COMPENSATION_PENALTY` | 0 .. 100 (default: majority vote over several penalties) | Specify penalty for ruptures.py PELT changepoint petection | -| `DFATOOL_MODEL` | cart, decart, fol, lgbm, lmt, **rmt**, symreg, uls, xgb | Modeling method. See below for method-specific configuration options. | +| `DFATOOL_MODEL` | cart, decart, fol, lgbm, lmt, **rmt**, symreg, uls, xgb | Modelling method. See below for method-specific configuration options. | | `DFATOOL_RMT_MAX_DEPTH` | **0** .. *n* | Maximum depth for RMT. Default (0): unlimited. | -| `DFATOOL_RMT_SUBMODEL` | cart, fol, static, symreg, **uls** | Modeling method for RMT leaf functions. | +| `DFATOOL_RMT_SUBMODEL` | cart, fol, static, symreg, **uls** | Modelling method for RMT leaf functions. | | `DFATOOL_PREPROCESSING_RELEVANCE_METHOD` | **none**, mi | Ignore parameters deemed irrelevant by the specified heuristic before passing them on to `DFATOOL_MODEL`. | | `DFATOOL_PREPROCESSING_RELEVANCE_THRESHOLD` | .. **0.1** .. | Threshold for relevance heuristic. | | `DFATOOL_CART_MAX_DEPTH` | **0** .. *n* | maximum depth for sklearn CART. Default (0): unlimited. | @@ -133,8 +133,9 @@ The following variables may be set to alter the behaviour of dfatool components. | `DFATOOL_LMT_MIN_SAMPLES_LEAF` | 0.0 .. **0.1** .. 1.0, 3 .. *n* | Minimum samples that each leaf of a split candidate must contain. A value below 1.0 specifies a ratio of the total number of training samples. A value above 1 specifies an absolute number of samples. | | `DFATOOL_LMT_MAX_BINS` | 10 .. **120** | Number of bins used to determine optimal split. LMT default: 25. | | `DFATOOL_LMT_CRITERION` | **mse**, rmse, mae, poisson | Error metric to use when selecting best split. | -| `DFATOOL_ULS_ERROR_METRIC` | **ssr**, rmsd, mae, … | Error metric to use when selecting best-fitting function during unsupervised least squares (ULS) regression. Least squares regression itself minimzes root mean square deviation (rmsd), hence the equivalent (but partitioning-compatible) sum of squared residuals (ssr) is the default. Supports all metrics accepted by `--error-metric`. | +| `DFATOOL_ULS_ERROR_METRIC` | **ssr**, rmsd, **mae**, … | Error metric to use when selecting best-fitting function during unsupervised least squares (ULS) regression. By default, least squares regression minimzes root mean square deviation (rmsd), hence the equivalent (but partitioning-compatible) sum of squared residuals (ssr) is the default. If `DFATOOL_ULS_LOSS_FUNCTION` is set to another value than linear, the default is mean absolute error (mae). Supports all metrics accepted by `--error-metric`. | | `DFATOOL_ULS_FUNCTIONS` | a,b,… | List of function templates to use in ULS. Default: all supported functions. | +| `DFATOOL_ULS_LOSS_FUNCTION` | **linear**', soft\_l1, … | Loss function for least squares fitting, see `scipy.optimize.least_squares#loss` documentation. | | `DFATOOL_ULS_MIN_DISTINCT_VALUES` | 2 .. **3** .. *n* | Minimum number of unique values a parameter must take to be eligible for ULS | | `DFATOOL_ULS_SKIP_CODEPENDENT_CHECK` | **0**, 1 | Do not detect and remove co-dependent features in ULS. | | `DFATOOL_ULS_MIN_BOUND` | **-∞** .. *n* | Lower bound for ULS regression variables. Setting it to 0 can often be beneficial. | diff --git a/bin/analyze-trace.py b/bin/analyze-trace.py index 3fe7e28..1cc3b89 100755 --- a/bin/analyze-trace.py +++ b/bin/analyze-trace.py @@ -11,6 +11,7 @@ import dfatool.cli import dfatool.plotter import dfatool.utils import dfatool.functions as df +from dfatool.behaviour import SDKBehaviourModel from dfatool.loader import Logfile from dfatool.model import AnalyticModel from dfatool.validation import CrossValidator @@ -34,174 +35,6 @@ def parse_logfile(filename): return loader.load(f, is_trace=True) -def learn_pta(observations, annotation, delta=dict(), delta_param=dict()): - prev_i = annotation.start.offset - prev = "__init__" - prev_non_kernel = prev - meta_observations = list() - n_seen = dict() - - total_latency_us = 0 - - if annotation.kernels: - # ggf. als dict of tuples, für den Fall dass Schleifen verschieden iterieren können? - for i in range(prev_i, annotation.kernels[0].offset): - this = observations[i]["name"] + " @ " + observations[i]["place"] - - if this in n_seen: - if n_seen[this] == 1: - logging.debug( - f"Loop found in {annotation.start.name} {annotation.start.param}: {this} ⟳" - ) - n_seen[this] += 1 - else: - n_seen[this] = 1 - - if not prev in delta: - delta[prev] = set() - delta[prev].add(this) - - if not (prev, this) in delta_param: - delta_param[(prev, this)] = set() - delta_param[(prev, this)].add( - dfatool.utils.param_dict_to_str(annotation.start.param) - ) - - prev = this - prev_i = i + 1 - - total_latency_us += observations[i]["attribute"].get("latency_us", 0) - - meta_observations.append( - { - "name": f"__trace__ {this}", - "param": annotation.start.param, - "attribute": dict( - filter( - lambda kv: not kv[0].startswith("e_"), - observations[i]["param"].items(), - ) - ), - } - ) - prev_non_kernel = prev - - for kernel in annotation.kernels: - prev = prev_non_kernel - for i in range(prev_i, kernel.offset): - this = observations[i]["name"] + " @ " + observations[i]["place"] - - if not prev in delta: - delta[prev] = set() - delta[prev].add(this) - - if not (prev, this) in delta_param: - delta_param[(prev, this)] = set() - delta_param[(prev, this)].add( - dfatool.utils.param_dict_to_str(annotation.start.param) - ) - - # The last iteration (next block) contains a single kernel, - # so we do not increase total_latency_us here. - # However, this means that we will only ever get one latency - # value for each set of kernels with a common problem size, - # despite potentially having far more data at our fingertips. - # We could provide one total_latency_us for each kernel - # (by combining start latency + kernel latency + teardown latency), - # but for that we first need to distinguish between kernel - # components and teardown components in the following block. - - prev = this - prev_i = i + 1 - - meta_observations.append( - { - "name": f"__trace__ {this}", - "param": annotation.start.param, - "attribute": dict( - filter( - lambda kv: not kv[0].startswith("e_"), - observations[i]["param"].items(), - ) - ), - } - ) - - # There is no kernel end signal in the underlying data, so the last iteration also contains a kernel run. - prev = prev_non_kernel - for i in range(prev_i, annotation.end.offset): - this = observations[i]["name"] + " @ " + observations[i]["place"] - - if this in n_seen: - if n_seen[this] == 1: - logging.debug( - f"Loop found in {annotation.start.name} {annotation.start.param}: {this} ⟳" - ) - n_seen[this] += 1 - else: - n_seen[this] = 1 - - if not prev in delta: - delta[prev] = set() - delta[prev].add(this) - - if not (prev, this) in delta_param: - delta_param[(prev, this)] = set() - delta_param[(prev, this)].add( - dfatool.utils.param_dict_to_str(annotation.start.param) - ) - - total_latency_us += observations[i]["attribute"].get("latency_us", 0) - - prev = this - - meta_observations.append( - { - "name": f"__trace__ {this}", - "param": annotation.start.param, - "attribute": dict( - filter( - lambda kv: not kv[0].startswith("e_"), - observations[i]["param"].items(), - ) - ), - } - ) - - if not prev in delta: - delta[prev] = set() - delta[prev].add("__end__") - if not (prev, "__end__") in delta_param: - delta_param[(prev, "__end__")] = set() - delta_param[(prev, "__end__")].add( - dfatool.utils.param_dict_to_str(annotation.start.param) - ) - - for transition, count in n_seen.items(): - meta_observations.append( - { - "name": f"__loop__ {transition}", - "param": annotation.start.param, - "attribute": {"n_iterations": count}, - } - ) - - if total_latency_us: - meta_observations.append( - { - "name": annotation.start.name, - "param": annotation.start.param, - "attribute": {"latency_us": total_latency_us}, - } - ) - - is_loop = dict( - map(lambda kv: (kv[0], True), filter(lambda kv: kv[1] > 1, n_seen.items())) - ) - - return delta, delta_param, meta_observations, is_loop - - def join_annotations(ref, base, new): offset = len(ref) return base + list(map(lambda x: x.apply_offset(offset), new)) @@ -238,68 +71,23 @@ def main(): map(parse_logfile, args.logfiles), ) - delta_by_name = dict() - delta_param_by_name = dict() - is_loop = dict() - for annotation in annotations: - am_tt_param_names = sorted(annotation.start.param.keys()) - if annotation.name not in delta_by_name: - delta_by_name[annotation.name] = dict() - delta_param_by_name[annotation.name] = dict() - _, _, meta_obs, _is_loop = learn_pta( - observations, - annotation, - delta_by_name[annotation.name], - delta_param_by_name[annotation.name], - ) - observations += meta_obs - is_loop.update(_is_loop) + bm = SDKBehaviourModel(observations, annotations) + observations += bm.meta_observations + is_loop = bm.is_loop + am_tt_param_names = bm.am_tt_param_names + delta_by_name = bm.delta_by_name + delta_param_by_name = bm.delta_param_by_name def format_guard(guard): return "∧".join(map(lambda kv: f"{kv[0]}={kv[1]}", guard)) for name in sorted(delta_by_name.keys()): - delta_cond = dict() for t_from, t_to_set in delta_by_name[name].items(): i_to_transition = dict() delta_param_sets = list() to_names = list() transition_guard = dict() - if len(t_to_set) > 1: - am_tt_by_name = { - name: { - "attributes": [t_from], - "param": list(), - t_from: list(), - }, - } - for i, t_to in enumerate(sorted(t_to_set)): - for param in delta_param_by_name[name][(t_from, t_to)]: - am_tt_by_name[name]["param"].append( - dfatool.utils.param_dict_to_list( - dfatool.utils.param_str_to_dict(param), - am_tt_param_names, - ) - ) - am_tt_by_name[name][t_from].append(i) - i_to_transition[i] = t_to - am = AnalyticModel(am_tt_by_name, am_tt_param_names, force_tree=True) - model, info = am.get_fitted() - if type(info(name, t_from)) is df.SplitFunction: - flat_model = info(name, t_from).flatten() - else: - flat_model = list() - logging.warning( - f"Model for {name} {t_from} is {info(name, t_from)}, expected SplitFunction" - ) - - for prefix, output in flat_model: - transition_name = i_to_transition[int(output)] - if transition_name not in transition_guard: - transition_guard[transition_name] = list() - transition_guard[transition_name].append(prefix) - for t_to in sorted(t_to_set): delta_params = delta_param_by_name[name][(t_from, t_to)] delta_param_sets.append(delta_params) @@ -311,7 +99,7 @@ def main(): print(f"{name} {t_from} → {t_to} →") else: print( - f"{name} {t_from} → {t_to} ({' ∨ '.join(map(format_guard, transition_guard.get(t_to, list()))) or '⊤'})" + f"{name} {t_from} → {t_to} ({' ∨ '.join(map(format_guard, bm.transition_guard[t_from].get(t_to, list()))) or '⊤'})" ) for i in range(len(delta_param_sets)): @@ -514,6 +302,63 @@ def main(): ) timing["assess model"] = time.time() - ts + if "paramdetection" in args.show_model or "all" in args.show_model: + for name in model.names: + for attribute in model.attributes(name): + info = param_info(name, attribute) + print( + "{:10s} {:10s} non-param stddev {:f}".format( + name, + attribute, + model.attr_by_name[name][attribute].stats.std_static, + ) + ) + print( + "{:10s} {:10s} param-lut stddev {:f}".format( + name, + attribute, + model.attr_by_name[name][attribute].stats.std_param_lut, + ) + ) + for param in sorted( + model.attr_by_name[name][attribute].stats.std_by_param.keys() + ): + print( + "{:10s} {:10s} {:10s} stddev {:f}".format( + name, + attribute, + param, + model.attr_by_name[name][attribute].stats.std_by_param[ + param + ], + ) + ) + for arg_index in range(model.attr_by_name[name][attribute].arg_count): + print( + "{:10s} {:10s} {:10s} stddev {:f}".format( + name, + attribute, + f"arg{arg_index}", + model.attr_by_name[name][attribute].stats.std_by_arg[ + arg_index + ], + ) + ) + if type(info) is df.AnalyticFunction: + for param_name in sorted(info.fit_by_param.keys(), key=str): + param_fit = info.fit_by_param[param_name]["results"] + for function_type in sorted(param_fit.keys()): + function_rmsd = param_fit[function_type]["rmsd"] + print( + "{:10s} {:10s} {:10s} mean {:10s} RMSD {:.0f}".format( + name, + attribute, + str(param_name), + function_type, + function_rmsd, + ) + ) + if "static" in args.show_model or "all" in args.show_model: print("--- static model ---") for name in sorted(model.names): @@ -588,7 +433,11 @@ def main(): if args.export_json: with open(args.export_json, "w") as f: json.dump( - model.to_json(), + model.to_json( + static_error=static_quality, + lut_error=lut_quality, + model_error=analytic_quality, + ), f, sort_keys=True, cls=dfatool.utils.NpEncoder, diff --git a/lib/behaviour.py b/lib/behaviour.py index 1e59d20..136a55e 100644 --- a/lib/behaviour.py +++ b/lib/behaviour.py @@ -2,10 +2,312 @@ import logging from . import utils +from .model import AnalyticModel +from . import functions as df logger = logging.getLogger(__name__) +class SDKBehaviourModel: + + def __init__(self, observations, annotations): + + meta_observations = list() + delta_by_name = dict() + delta_param_by_name = dict() + is_loop = dict() + + for annotation in annotations: + # annotation.start.param may be incomplete, for instance in cases + # where DPUs are allocated before the input file is loadeed (and + # thus before the problem size is known). + # However, annotation.end.param may also differ from annotation.start.param (it should not, but that's how some benchmarks roll). + # So, we use annotation.start.param if it has the same keys as annotation.end.param, and annotation.end.param otherwise + if sorted(annotation.start.param.keys()) == sorted( + annotation.end.param.keys() + ): + am_tt_param_names = sorted(annotation.start.param.keys()) + else: + am_tt_param_names = sorted(annotation.end.param.keys()) + if annotation.name not in delta_by_name: + delta_by_name[annotation.name] = dict() + delta_param_by_name[annotation.name] = dict() + _, _, meta_obs, _is_loop = self.learn_pta( + observations, + annotation, + delta_by_name[annotation.name], + delta_param_by_name[annotation.name], + ) + meta_observations += meta_obs + is_loop.update(_is_loop) + + self.am_tt_param_names = am_tt_param_names + self.delta_by_name = delta_by_name + self.delta_param_by_name = delta_param_by_name + self.meta_observations = meta_observations + self.is_loop = is_loop + + self.build_transition_guards() + + def build_transition_guards(self): + self.transition_guard = dict() + for name in sorted(self.delta_by_name.keys()): + for t_from, t_to_set in self.delta_by_name[name].items(): + i_to_transition = dict() + delta_param_sets = list() + to_names = list() + transition_guard = dict() + + if len(t_to_set) > 1: + am_tt_by_name = { + name: { + "attributes": [t_from], + "param": list(), + t_from: list(), + }, + } + for i, t_to in enumerate(sorted(t_to_set)): + for param in self.delta_param_by_name[name][(t_from, t_to)]: + am_tt_by_name[name]["param"].append( + utils.param_dict_to_list( + utils.param_str_to_dict(param), + self.am_tt_param_names, + ) + ) + am_tt_by_name[name][t_from].append(i) + i_to_transition[i] = t_to + am = AnalyticModel( + am_tt_by_name, self.am_tt_param_names, force_tree=True + ) + model, info = am.get_fitted() + if type(info(name, t_from)) is df.SplitFunction: + flat_model = info(name, t_from).flatten() + else: + flat_model = list() + logger.warning( + f"Model for {name} {t_from} is {info(name, t_from)}, expected SplitFunction" + ) + + for prefix, output in flat_model: + transition_name = i_to_transition[int(output)] + if transition_name not in transition_guard: + transition_guard[transition_name] = list() + transition_guard[transition_name].append(prefix) + + self.transition_guard[t_from] = transition_guard + + def get_trace(self, name, param_dict): + delta = self.delta_by_name[name] + current_state = "__init__" + trace = [current_state] + states_seen = set() + while current_state != "__end__": + next_states = delta[current_state] + + states_seen.add(current_state) + next_states = list(filter(lambda q: q not in states_seen, next_states)) + + if len(next_states) == 0: + raise RuntimeError( + f"get_trace({name}, {param_dict}): found infinite loop at {trace}" + ) + + if len(next_states) > 1 and self.transition_guard[current_state]: + matching_next_states = list() + for candidate in next_states: + for condition in self.transition_guard[current_state][candidate]: + valid = True + for key, value in condition: + if param_dict[key] != value: + valid = False + break + if valid: + matching_next_states.append(candidate) + break + next_states = matching_next_states + + if len(next_states) == 0: + raise RuntimeError( + f"get_trace({name}, {param_dict}): found no valid outbound transitions at {trace}, candidates {self.transition_guard[current_state]}" + ) + if len(next_states) > 1: + raise RuntimeError( + f"get_trace({name}, {param_dict}): found non-deterministic outbound transitions {next_states} at {trace}" + ) + + (next_state,) = next_states + + trace.append(next_state) + current_state = next_state + + return trace + + def learn_pta(self, observations, annotation, delta=dict(), delta_param=dict()): + prev_i = annotation.start.offset + prev = "__init__" + prev_non_kernel = prev + meta_observations = list() + n_seen = dict() + + total_latency_us = 0 + + if sorted(annotation.start.param.keys()) == sorted(annotation.end.param.keys()): + param_dict = annotation.start.param + else: + param_dict = annotation.end.param + param_str = utils.param_dict_to_str(param_dict) + + if annotation.kernels: + # ggf. als dict of tuples, für den Fall dass Schleifen verschieden iterieren können? + for i in range(prev_i, annotation.kernels[0].offset): + this = observations[i]["name"] + " @ " + observations[i]["place"] + + if this in n_seen: + if n_seen[this] == 1: + logger.debug( + f"Loop found in {annotation.start.name} {param_dict}: {this} ⟳" + ) + n_seen[this] += 1 + else: + n_seen[this] = 1 + + if not prev in delta: + delta[prev] = set() + delta[prev].add(this) + + if not (prev, this) in delta_param: + delta_param[(prev, this)] = set() + delta_param[(prev, this)].add(param_str) + + prev = this + prev_i = i + 1 + + total_latency_us += observations[i]["attribute"].get("latency_us", 0) + + meta_observations.append( + { + "name": f"__trace__ {this}", + "param": param_dict, + "attribute": dict( + filter( + lambda kv: not kv[0].startswith("e_"), + observations[i]["param"].items(), + ) + ), + } + ) + prev_non_kernel = prev + + for kernel in annotation.kernels: + prev = prev_non_kernel + for i in range(prev_i, kernel.offset): + this = observations[i]["name"] + " @ " + observations[i]["place"] + + if not prev in delta: + delta[prev] = set() + delta[prev].add(this) + + if not (prev, this) in delta_param: + delta_param[(prev, this)] = set() + delta_param[(prev, this)].add(param_str) + + # The last iteration (next block) contains a single kernel, + # so we do not increase total_latency_us here. + # However, this means that we will only ever get one latency + # value for each set of kernels with a common problem size, + # despite potentially having far more data at our fingertips. + # We could provide one total_latency_us for each kernel + # (by combining start latency + kernel latency + teardown latency), + # but for that we first need to distinguish between kernel + # components and teardown components in the following block. + + prev = this + prev_i = i + 1 + + meta_observations.append( + { + "name": f"__trace__ {this}", + "param": param_dict, + "attribute": dict( + filter( + lambda kv: not kv[0].startswith("e_"), + observations[i]["param"].items(), + ) + ), + } + ) + + # There is no kernel end signal in the underlying data, so the last iteration also contains a kernel run. + prev = prev_non_kernel + for i in range(prev_i, annotation.end.offset): + this = observations[i]["name"] + " @ " + observations[i]["place"] + + if this in n_seen: + if n_seen[this] == 1: + logger.debug( + f"Loop found in {annotation.start.name} {param_dict}: {this} ⟳" + ) + n_seen[this] += 1 + else: + n_seen[this] = 1 + + if not prev in delta: + delta[prev] = set() + delta[prev].add(this) + + if not (prev, this) in delta_param: + delta_param[(prev, this)] = set() + delta_param[(prev, this)].add(param_str) + + total_latency_us += observations[i]["attribute"].get("latency_us", 0) + + prev = this + + meta_observations.append( + { + "name": f"__trace__ {this}", + "param": param_dict, + "attribute": dict( + filter( + lambda kv: not kv[0].startswith("e_"), + observations[i]["param"].items(), + ) + ), + } + ) + + if not prev in delta: + delta[prev] = set() + delta[prev].add("__end__") + if not (prev, "__end__") in delta_param: + delta_param[(prev, "__end__")] = set() + delta_param[(prev, "__end__")].add(param_str) + + for transition, count in n_seen.items(): + meta_observations.append( + { + "name": f"__loop__ {transition}", + "param": param_dict, + "attribute": {"n_iterations": count}, + } + ) + + if total_latency_us: + meta_observations.append( + { + "name": annotation.start.name, + "param": param_dict, + "attribute": {"latency_us": total_latency_us}, + } + ) + + is_loop = dict( + map(lambda kv: (kv[0], True), filter(lambda kv: kv[1] > 1, n_seen.items())) + ) + + return delta, delta_param, meta_observations, is_loop + + class EventSequenceModel: def __init__(self, models): self.models = models @@ -53,7 +355,7 @@ class EventSequenceModel: param_list = utils.param_dict_to_list(param, ref_model.parameters) if not use_lut and not param_info(name, action).is_predictable(param_list): - logging.warning( + logger.warning( f"Cannot predict {name}.{action}({param}), falling back to static model" ) @@ -67,15 +369,15 @@ class EventSequenceModel: ) except KeyError: if use_lut: - logging.error( + logger.error( f"Cannot predict {name}.{action}({param}) from LUT model" ) else: - logging.error(f"Cannot predict {name}.{action}({param}) from model") + logger.error(f"Cannot predict {name}.{action}({param}) from model") raise except TypeError: if not use_lut: - logging.error(f"Cannot predict {name}.{action}({param}) from model") + logger.error(f"Cannot predict {name}.{action}({param}) from model") raise if aggregate == "sum": @@ -551,7 +551,7 @@ def add_standard_arguments(parser): "--export-json", metavar="FILENAME", type=str, - help="Export model in JSON format to FILENAME", + help="Export model and error metrics in JSON format to FILENAME", ) parser.add_argument( "--load-json", diff --git a/lib/functions.py b/lib/functions.py index 35b04ef..b76814b 100644 --- a/lib/functions.py +++ b/lib/functions.py @@ -27,6 +27,7 @@ dfatool_rmt_relevance_threshold = float( os.getenv("DFATOOL_RMT_RELEVANCE_THRESHOLD", "0.5") ) +dfatool_uls_loss_fun = os.getenv("DFATOOL_ULS_LOSS_FUNCTION", "linear") dfatool_uls_min_bound = float(os.getenv("DFATOOL_ULS_MIN_BOUND", -np.inf)) if dfatool_preproc_relevance_method == "mi": @@ -1692,7 +1693,11 @@ class FOLFunction(SKLearnRegressionFunction): self.model_args = list(np.ones((num_vars))) try: res = optimize.least_squares( - error_function, self.model_args, args=(fit_parameters, data), xtol=2e-15 + error_function, + self.model_args, + args=(fit_parameters, data), + xtol=2e-15, + loss=dfatool_uls_loss_fun, ) except ValueError as err: logger.warning(f"Fit failed: {err} (function: {self.model_function})") @@ -1955,6 +1960,7 @@ class AnalyticFunction(ModelFunction): self.model_args, args=(X, Y), xtol=2e-15, + loss=dfatool_uls_loss_fun, bounds=(lower_bounds, upper_bounds), ) except ValueError as err: diff --git a/lib/loader/plain.py b/lib/loader/plain.py index 488dd2a..ef0b596 100644 --- a/lib/loader/plain.py +++ b/lib/loader/plain.py @@ -163,6 +163,9 @@ class Logfile: logger.warning(f"Offending entry:\n{line}") raise + if not is_trace: + continue + # only relevant for is_trace == True if m := re.fullmatch(r"\[>>\] *([^|]*?) *[|] *([^|]*?) *", line): trace_status = 1 diff --git a/lib/model.py b/lib/model.py index dbe05aa..4d1edd5 100644 --- a/lib/model.py +++ b/lib/model.py @@ -104,7 +104,7 @@ class AnalyticModel: - attributes: list of keys that should be analyzed, e.g. ['power', 'duration'] - for each attribute mentioned in 'attributes': A list with measurements. - All list except for 'attributes' must have the same length. + All lists except for 'attributes' must have the same length. For example: parameters = ['foo_count', 'irrelevant'] @@ -669,7 +669,14 @@ class AnalyticModel: ret[f"xv/{name}/{attr_name}/{k}"] = np.mean(entry[k]) return ret - def to_json(self, with_by_param=False, **kwargs) -> dict: + def to_json( + self, + with_by_param=False, + lut_error=None, + static_error=None, + model_error=None, + **kwargs, + ) -> dict: """ Return JSON encoding of this AnalyticModel. """ @@ -688,11 +695,24 @@ class AnalyticModel: for name in self.names: for attr_name, attr in self.attr_by_name[name].items(): ret["name"][name][attr_name] = attr.to_json(**kwargs) + if lut_error: + ret["name"][name][attr_name]["lutError"] = lut_error[name][ + attr_name + ] + if static_error: + ret["name"][name][attr_name]["staticError"] = static_error[name][ + attr_name + ] + if model_error: + ret["name"][name][attr_name]["modelError"] = model_error[name][ + attr_name + ] attr_name = list(self.attributes(name))[0] for param_name in self.parameters: - ret["paramValuesbyName"][name][param_name] = self.attr_by_name[name][ - attr_name - ].stats.distinct_values_by_param_name[param_name] + if self.attr_by_name[name][attr_name].stats is not None: + ret["paramValuesbyName"][name][param_name] = self.attr_by_name[ + name + ][attr_name].stats.distinct_values_by_param_name[param_name] return ret diff --git a/lib/paramfit.py b/lib/paramfit.py index 000aa9c..84eba2b 100644 --- a/lib/paramfit.py +++ b/lib/paramfit.py @@ -16,9 +16,14 @@ from .utils import ( ) logger = logging.getLogger(__name__) -best_fit_metric = os.getenv("DFATOOL_ULS_ERROR_METRIC", "ssr") +dfatool_uls_loss_fun = os.getenv("DFATOOL_ULS_LOSS_FUNCTION", "linear") dfatool_uls_min_bound = float(os.getenv("DFATOOL_ULS_MIN_BOUND", -np.inf)) +if dfatool_uls_loss_fun == "linear": + best_fit_metric = os.getenv("DFATOOL_ULS_ERROR_METRIC", "ssr") +else: + best_fit_metric = os.getenv("DFATOOL_ULS_ERROR_METRIC", "mae") + class ParamFit: """ @@ -222,6 +227,7 @@ def _try_fits( ini, args=(X, Y), xtol=2e-15, + loss=dfatool_uls_loss_fun, bounds=param_function.bounds, ) except FloatingPointError as e: diff --git a/lib/utils.py b/lib/utils.py index 48a29d8..fb76367 100644 --- a/lib/utils.py +++ b/lib/utils.py @@ -728,11 +728,18 @@ def regression_measures(predicted: np.ndarray, ground_truth: np.ndarray): rsq -- R^2 measure, see sklearn.metrics.r2_score count -- Number of values """ - if type(predicted) != np.ndarray: + + if type(predicted) is list: + predicted = np.array(predicted) + + if type(ground_truth) is list: + ground_truth = np.array(ground_truth) + + if type(predicted) is not np.ndarray: raise ValueError( "first arg ('predicted') must be ndarray, is {}".format(type(predicted)) ) - if type(ground_truth) != np.ndarray: + if type(ground_truth) is not np.ndarray: raise ValueError( "second arg ('ground_truth') must be ndarray, is {}".format( type(ground_truth) |
