diff options
Diffstat (limited to 'lib')
| -rw-r--r-- | lib/behaviour.py | 310 | ||||
| -rw-r--r-- | lib/cli.py | 2 | ||||
| -rw-r--r-- | lib/functions.py | 8 | ||||
| -rw-r--r-- | lib/loader/plain.py | 3 | ||||
| -rw-r--r-- | lib/model.py | 30 | ||||
| -rw-r--r-- | lib/paramfit.py | 8 | ||||
| -rw-r--r-- | lib/utils.py | 11 |
7 files changed, 358 insertions, 14 deletions
diff --git a/lib/behaviour.py b/lib/behaviour.py index 1e59d20..136a55e 100644 --- a/lib/behaviour.py +++ b/lib/behaviour.py @@ -2,10 +2,312 @@ import logging from . import utils +from .model import AnalyticModel +from . import functions as df logger = logging.getLogger(__name__) +class SDKBehaviourModel: + + def __init__(self, observations, annotations): + + meta_observations = list() + delta_by_name = dict() + delta_param_by_name = dict() + is_loop = dict() + + for annotation in annotations: + # annotation.start.param may be incomplete, for instance in cases + # where DPUs are allocated before the input file is loadeed (and + # thus before the problem size is known). + # However, annotation.end.param may also differ from annotation.start.param (it should not, but that's how some benchmarks roll). + # So, we use annotation.start.param if it has the same keys as annotation.end.param, and annotation.end.param otherwise + if sorted(annotation.start.param.keys()) == sorted( + annotation.end.param.keys() + ): + am_tt_param_names = sorted(annotation.start.param.keys()) + else: + am_tt_param_names = sorted(annotation.end.param.keys()) + if annotation.name not in delta_by_name: + delta_by_name[annotation.name] = dict() + delta_param_by_name[annotation.name] = dict() + _, _, meta_obs, _is_loop = self.learn_pta( + observations, + annotation, + delta_by_name[annotation.name], + delta_param_by_name[annotation.name], + ) + meta_observations += meta_obs + is_loop.update(_is_loop) + + self.am_tt_param_names = am_tt_param_names + self.delta_by_name = delta_by_name + self.delta_param_by_name = delta_param_by_name + self.meta_observations = meta_observations + self.is_loop = is_loop + + self.build_transition_guards() + + def build_transition_guards(self): + self.transition_guard = dict() + for name in sorted(self.delta_by_name.keys()): + for t_from, t_to_set in self.delta_by_name[name].items(): + i_to_transition = dict() + delta_param_sets = list() + to_names = list() + transition_guard = dict() + + if len(t_to_set) > 1: + am_tt_by_name = { + name: { + "attributes": [t_from], + "param": list(), + t_from: list(), + }, + } + for i, t_to in enumerate(sorted(t_to_set)): + for param in self.delta_param_by_name[name][(t_from, t_to)]: + am_tt_by_name[name]["param"].append( + utils.param_dict_to_list( + utils.param_str_to_dict(param), + self.am_tt_param_names, + ) + ) + am_tt_by_name[name][t_from].append(i) + i_to_transition[i] = t_to + am = AnalyticModel( + am_tt_by_name, self.am_tt_param_names, force_tree=True + ) + model, info = am.get_fitted() + if type(info(name, t_from)) is df.SplitFunction: + flat_model = info(name, t_from).flatten() + else: + flat_model = list() + logger.warning( + f"Model for {name} {t_from} is {info(name, t_from)}, expected SplitFunction" + ) + + for prefix, output in flat_model: + transition_name = i_to_transition[int(output)] + if transition_name not in transition_guard: + transition_guard[transition_name] = list() + transition_guard[transition_name].append(prefix) + + self.transition_guard[t_from] = transition_guard + + def get_trace(self, name, param_dict): + delta = self.delta_by_name[name] + current_state = "__init__" + trace = [current_state] + states_seen = set() + while current_state != "__end__": + next_states = delta[current_state] + + states_seen.add(current_state) + next_states = list(filter(lambda q: q not in states_seen, next_states)) + + if len(next_states) == 0: + raise RuntimeError( + f"get_trace({name}, {param_dict}): found infinite loop at {trace}" + ) + + if len(next_states) > 1 and self.transition_guard[current_state]: + matching_next_states = list() + for candidate in next_states: + for condition in self.transition_guard[current_state][candidate]: + valid = True + for key, value in condition: + if param_dict[key] != value: + valid = False + break + if valid: + matching_next_states.append(candidate) + break + next_states = matching_next_states + + if len(next_states) == 0: + raise RuntimeError( + f"get_trace({name}, {param_dict}): found no valid outbound transitions at {trace}, candidates {self.transition_guard[current_state]}" + ) + if len(next_states) > 1: + raise RuntimeError( + f"get_trace({name}, {param_dict}): found non-deterministic outbound transitions {next_states} at {trace}" + ) + + (next_state,) = next_states + + trace.append(next_state) + current_state = next_state + + return trace + + def learn_pta(self, observations, annotation, delta=dict(), delta_param=dict()): + prev_i = annotation.start.offset + prev = "__init__" + prev_non_kernel = prev + meta_observations = list() + n_seen = dict() + + total_latency_us = 0 + + if sorted(annotation.start.param.keys()) == sorted(annotation.end.param.keys()): + param_dict = annotation.start.param + else: + param_dict = annotation.end.param + param_str = utils.param_dict_to_str(param_dict) + + if annotation.kernels: + # ggf. als dict of tuples, für den Fall dass Schleifen verschieden iterieren können? + for i in range(prev_i, annotation.kernels[0].offset): + this = observations[i]["name"] + " @ " + observations[i]["place"] + + if this in n_seen: + if n_seen[this] == 1: + logger.debug( + f"Loop found in {annotation.start.name} {param_dict}: {this} ⟳" + ) + n_seen[this] += 1 + else: + n_seen[this] = 1 + + if not prev in delta: + delta[prev] = set() + delta[prev].add(this) + + if not (prev, this) in delta_param: + delta_param[(prev, this)] = set() + delta_param[(prev, this)].add(param_str) + + prev = this + prev_i = i + 1 + + total_latency_us += observations[i]["attribute"].get("latency_us", 0) + + meta_observations.append( + { + "name": f"__trace__ {this}", + "param": param_dict, + "attribute": dict( + filter( + lambda kv: not kv[0].startswith("e_"), + observations[i]["param"].items(), + ) + ), + } + ) + prev_non_kernel = prev + + for kernel in annotation.kernels: + prev = prev_non_kernel + for i in range(prev_i, kernel.offset): + this = observations[i]["name"] + " @ " + observations[i]["place"] + + if not prev in delta: + delta[prev] = set() + delta[prev].add(this) + + if not (prev, this) in delta_param: + delta_param[(prev, this)] = set() + delta_param[(prev, this)].add(param_str) + + # The last iteration (next block) contains a single kernel, + # so we do not increase total_latency_us here. + # However, this means that we will only ever get one latency + # value for each set of kernels with a common problem size, + # despite potentially having far more data at our fingertips. + # We could provide one total_latency_us for each kernel + # (by combining start latency + kernel latency + teardown latency), + # but for that we first need to distinguish between kernel + # components and teardown components in the following block. + + prev = this + prev_i = i + 1 + + meta_observations.append( + { + "name": f"__trace__ {this}", + "param": param_dict, + "attribute": dict( + filter( + lambda kv: not kv[0].startswith("e_"), + observations[i]["param"].items(), + ) + ), + } + ) + + # There is no kernel end signal in the underlying data, so the last iteration also contains a kernel run. + prev = prev_non_kernel + for i in range(prev_i, annotation.end.offset): + this = observations[i]["name"] + " @ " + observations[i]["place"] + + if this in n_seen: + if n_seen[this] == 1: + logger.debug( + f"Loop found in {annotation.start.name} {param_dict}: {this} ⟳" + ) + n_seen[this] += 1 + else: + n_seen[this] = 1 + + if not prev in delta: + delta[prev] = set() + delta[prev].add(this) + + if not (prev, this) in delta_param: + delta_param[(prev, this)] = set() + delta_param[(prev, this)].add(param_str) + + total_latency_us += observations[i]["attribute"].get("latency_us", 0) + + prev = this + + meta_observations.append( + { + "name": f"__trace__ {this}", + "param": param_dict, + "attribute": dict( + filter( + lambda kv: not kv[0].startswith("e_"), + observations[i]["param"].items(), + ) + ), + } + ) + + if not prev in delta: + delta[prev] = set() + delta[prev].add("__end__") + if not (prev, "__end__") in delta_param: + delta_param[(prev, "__end__")] = set() + delta_param[(prev, "__end__")].add(param_str) + + for transition, count in n_seen.items(): + meta_observations.append( + { + "name": f"__loop__ {transition}", + "param": param_dict, + "attribute": {"n_iterations": count}, + } + ) + + if total_latency_us: + meta_observations.append( + { + "name": annotation.start.name, + "param": param_dict, + "attribute": {"latency_us": total_latency_us}, + } + ) + + is_loop = dict( + map(lambda kv: (kv[0], True), filter(lambda kv: kv[1] > 1, n_seen.items())) + ) + + return delta, delta_param, meta_observations, is_loop + + class EventSequenceModel: def __init__(self, models): self.models = models @@ -53,7 +355,7 @@ class EventSequenceModel: param_list = utils.param_dict_to_list(param, ref_model.parameters) if not use_lut and not param_info(name, action).is_predictable(param_list): - logging.warning( + logger.warning( f"Cannot predict {name}.{action}({param}), falling back to static model" ) @@ -67,15 +369,15 @@ class EventSequenceModel: ) except KeyError: if use_lut: - logging.error( + logger.error( f"Cannot predict {name}.{action}({param}) from LUT model" ) else: - logging.error(f"Cannot predict {name}.{action}({param}) from model") + logger.error(f"Cannot predict {name}.{action}({param}) from model") raise except TypeError: if not use_lut: - logging.error(f"Cannot predict {name}.{action}({param}) from model") + logger.error(f"Cannot predict {name}.{action}({param}) from model") raise if aggregate == "sum": @@ -551,7 +551,7 @@ def add_standard_arguments(parser): "--export-json", metavar="FILENAME", type=str, - help="Export model in JSON format to FILENAME", + help="Export model and error metrics in JSON format to FILENAME", ) parser.add_argument( "--load-json", diff --git a/lib/functions.py b/lib/functions.py index 35b04ef..b76814b 100644 --- a/lib/functions.py +++ b/lib/functions.py @@ -27,6 +27,7 @@ dfatool_rmt_relevance_threshold = float( os.getenv("DFATOOL_RMT_RELEVANCE_THRESHOLD", "0.5") ) +dfatool_uls_loss_fun = os.getenv("DFATOOL_ULS_LOSS_FUNCTION", "linear") dfatool_uls_min_bound = float(os.getenv("DFATOOL_ULS_MIN_BOUND", -np.inf)) if dfatool_preproc_relevance_method == "mi": @@ -1692,7 +1693,11 @@ class FOLFunction(SKLearnRegressionFunction): self.model_args = list(np.ones((num_vars))) try: res = optimize.least_squares( - error_function, self.model_args, args=(fit_parameters, data), xtol=2e-15 + error_function, + self.model_args, + args=(fit_parameters, data), + xtol=2e-15, + loss=dfatool_uls_loss_fun, ) except ValueError as err: logger.warning(f"Fit failed: {err} (function: {self.model_function})") @@ -1955,6 +1960,7 @@ class AnalyticFunction(ModelFunction): self.model_args, args=(X, Y), xtol=2e-15, + loss=dfatool_uls_loss_fun, bounds=(lower_bounds, upper_bounds), ) except ValueError as err: diff --git a/lib/loader/plain.py b/lib/loader/plain.py index 488dd2a..ef0b596 100644 --- a/lib/loader/plain.py +++ b/lib/loader/plain.py @@ -163,6 +163,9 @@ class Logfile: logger.warning(f"Offending entry:\n{line}") raise + if not is_trace: + continue + # only relevant for is_trace == True if m := re.fullmatch(r"\[>>\] *([^|]*?) *[|] *([^|]*?) *", line): trace_status = 1 diff --git a/lib/model.py b/lib/model.py index dbe05aa..4d1edd5 100644 --- a/lib/model.py +++ b/lib/model.py @@ -104,7 +104,7 @@ class AnalyticModel: - attributes: list of keys that should be analyzed, e.g. ['power', 'duration'] - for each attribute mentioned in 'attributes': A list with measurements. - All list except for 'attributes' must have the same length. + All lists except for 'attributes' must have the same length. For example: parameters = ['foo_count', 'irrelevant'] @@ -669,7 +669,14 @@ class AnalyticModel: ret[f"xv/{name}/{attr_name}/{k}"] = np.mean(entry[k]) return ret - def to_json(self, with_by_param=False, **kwargs) -> dict: + def to_json( + self, + with_by_param=False, + lut_error=None, + static_error=None, + model_error=None, + **kwargs, + ) -> dict: """ Return JSON encoding of this AnalyticModel. """ @@ -688,11 +695,24 @@ class AnalyticModel: for name in self.names: for attr_name, attr in self.attr_by_name[name].items(): ret["name"][name][attr_name] = attr.to_json(**kwargs) + if lut_error: + ret["name"][name][attr_name]["lutError"] = lut_error[name][ + attr_name + ] + if static_error: + ret["name"][name][attr_name]["staticError"] = static_error[name][ + attr_name + ] + if model_error: + ret["name"][name][attr_name]["modelError"] = model_error[name][ + attr_name + ] attr_name = list(self.attributes(name))[0] for param_name in self.parameters: - ret["paramValuesbyName"][name][param_name] = self.attr_by_name[name][ - attr_name - ].stats.distinct_values_by_param_name[param_name] + if self.attr_by_name[name][attr_name].stats is not None: + ret["paramValuesbyName"][name][param_name] = self.attr_by_name[ + name + ][attr_name].stats.distinct_values_by_param_name[param_name] return ret diff --git a/lib/paramfit.py b/lib/paramfit.py index 000aa9c..84eba2b 100644 --- a/lib/paramfit.py +++ b/lib/paramfit.py @@ -16,9 +16,14 @@ from .utils import ( ) logger = logging.getLogger(__name__) -best_fit_metric = os.getenv("DFATOOL_ULS_ERROR_METRIC", "ssr") +dfatool_uls_loss_fun = os.getenv("DFATOOL_ULS_LOSS_FUNCTION", "linear") dfatool_uls_min_bound = float(os.getenv("DFATOOL_ULS_MIN_BOUND", -np.inf)) +if dfatool_uls_loss_fun == "linear": + best_fit_metric = os.getenv("DFATOOL_ULS_ERROR_METRIC", "ssr") +else: + best_fit_metric = os.getenv("DFATOOL_ULS_ERROR_METRIC", "mae") + class ParamFit: """ @@ -222,6 +227,7 @@ def _try_fits( ini, args=(X, Y), xtol=2e-15, + loss=dfatool_uls_loss_fun, bounds=param_function.bounds, ) except FloatingPointError as e: diff --git a/lib/utils.py b/lib/utils.py index 48a29d8..fb76367 100644 --- a/lib/utils.py +++ b/lib/utils.py @@ -728,11 +728,18 @@ def regression_measures(predicted: np.ndarray, ground_truth: np.ndarray): rsq -- R^2 measure, see sklearn.metrics.r2_score count -- Number of values """ - if type(predicted) != np.ndarray: + + if type(predicted) is list: + predicted = np.array(predicted) + + if type(ground_truth) is list: + ground_truth = np.array(ground_truth) + + if type(predicted) is not np.ndarray: raise ValueError( "first arg ('predicted') must be ndarray, is {}".format(type(predicted)) ) - if type(ground_truth) != np.ndarray: + if type(ground_truth) is not np.ndarray: raise ValueError( "second arg ('ground_truth') must be ndarray, is {}".format( type(ground_truth) |
