From 813c1508c200c6fc64a671b521af2eec522e9487 Mon Sep 17 00:00:00 2001 From: Daniel Friesel Date: Fri, 5 Mar 2021 15:35:44 +0100 Subject: promote sub-state models to a first-class modeling citizen get_fitted_sub is no longer present, sub-state models are part of get_fitted now --- bin/analyze-archive.py | 43 +++++++++++-------------- lib/functions.py | 58 +++++++++++++++++++++++++++++++-- lib/model.py | 87 +++++++++++++------------------------------------- 3 files changed, 97 insertions(+), 91 deletions(-) diff --git a/bin/analyze-archive.py b/bin/analyze-archive.py index d7e6a59..a9ee5cf 100755 --- a/bin/analyze-archive.py +++ b/bin/analyze-archive.py @@ -47,6 +47,7 @@ from dfatool.functions import ( gplearn_to_function, SplitFunction, AnalyticFunction, + SubstateFunction, StaticFunction, ) from dfatool.model import PTAModel @@ -866,15 +867,6 @@ if __name__ == "__main__": safe_functions_enabled=safe_functions_enabled ) - if args.with_substates: - sub_model, sub_info = model.get_fitted_sub( - safe_functions_enabled=safe_functions_enabled, - state_duration=raw_data.setup_by_fileno[0]["state_duration"] * 1e3, - ) - - # substate_model = model.get_substates() - # print(model.assess(substate_model, ref=model.sc_by_name)) - if "paramdetection" in show_models or "all" in show_models: for state in model.states_and_transitions: for attribute in model.attributes(state): @@ -931,6 +923,8 @@ if __name__ == "__main__": print_splitinfo( model.parameters, info, f"{state:10s} {attribute:15s}" ) + elif type(info) is SubstateFunction: + print(f"{state:10s} {attribute:15s}: Substate (TODO)") for trans in model.transitions: for attribute in model.attributes(trans): info = param_info(trans, attribute) @@ -940,6 +934,8 @@ if __name__ == "__main__": print_splitinfo( model.parameters, info, f"{trans:10s} {attribute:15s}" ) + elif type(info) is SubstateFunction: + print(f"{state:10s} {attribute:15s}: Substate (TODO)") if args.with_substates: for submodel in model.submodel_by_name.values(): sub_param_model, sub_param_info = submodel.get_fitted() @@ -949,14 +945,21 @@ if __name__ == "__main__": if type(info) is AnalyticFunction: print( "{:10s} {:15s}: {}".format( - substate, subattribute, info.function.model_function - ) - ) - print( - "{:10s} {:15s} {}".format( - "", "", info.function.model_args + substate, subattribute, info.model_function ) ) + print("{:10s} {:15s} {}".format("", "", info.model_args)) + + if args.with_substates: + for state in model.states: + if ( + type(model.attr_by_name[state]["power"].model_function) + is SubstateFunction + ): + # sub-state models need to know the duration of the state / transition. only needed for eval. + model.attr_by_name[state]["power"].model_function.static_duration = ( + raw_data.setup_by_fileno[0]["state_duration"] * 1e3 + ) if xv_method == "montecarlo": analytic_quality = xv.montecarlo(lambda m: m.get_fitted()[0], xv_count) @@ -965,9 +968,6 @@ if __name__ == "__main__": else: analytic_quality = model.assess(param_model) - if args.with_substates: - sub_quality = model.assess(sub_model) - if "tex" in show_models or "tex" in show_quality: print_text_model_data( model, @@ -1013,13 +1013,6 @@ if __name__ == "__main__": [None, sub_param_info, None], ) - if ("table" in show_quality or "all" in show_quality) and args.with_substates: - model_quality_table( - ["parameterized", "sub-states", "LUT"], - [analytic_quality, sub_quality, lut_quality], - [param_info, sub_info, None], - ) - if "overall" in show_quality or "all" in show_quality: print("overall state static/param/lut MAE assuming equal state distribution:") print( diff --git a/lib/functions.py b/lib/functions.py index 52d110b..4e0e8f7 100644 --- a/lib/functions.py +++ b/lib/functions.py @@ -284,7 +284,6 @@ class SplitFunction(ModelFunction): ret.update( { "type": "split", - "value": self.value, "paramIndex": self.param_index, "child": dict([[k, v.to_json()] for k, v in self.child.items()]), } @@ -303,6 +302,62 @@ class SplitFunction(ModelFunction): return f"SplitFunction<{self.value}, param_index={self.param_index}>" +class SubstateFunction(ModelFunction): + def __init__(self, value, sequence_by_count, count_model, sub_model): + super().__init__(value) + self.sequence_by_count = sequence_by_count + self.count_model = count_model + self.sub_model = sub_model + + # only used by analyze-archive model quality evaluation. Not serialized. + self.static_duration = None + + def is_predictable(self, param_list): + substate_count = round(self.count_model.eval(param_list)) + return substate_count in self.sequence_by_count + + def eval(self, param_list, duration=None): + substate_count = round(self.count_model.eval(param_list)) + cumulative_energy = 0 + total_duration = 0 + substate_model, _ = self.sub_model.get_fitted() + substate_sequence = self.sequence_by_count[substate_count] + for i, sub_name in enumerate(substate_sequence): + sub_duration = substate_model(sub_name, "duration", param=param_list) + sub_power = substate_model(sub_name, "power", param=param_list) + + if i == substate_count - 1: + if duration is not None: + sub_duration = duration - total_duration + elif self.static_duration is not None: + sub_duration = self.static_duration - total_duration + + cumulative_energy += sub_power * sub_duration + total_duration += sub_duration + + return cumulative_energy / total_duration + + def to_json(self): + ret = super().to_json() + ret.update( + { + "type": "substate", + "sequence": self.sequence_by_count, + "countModel": self.count_model.to_json(), + "subModel": self.sub_model.to_json(), + } + ) + return ret + + @classmethod + def from_json(cls, data): + assert data["type"] == "substate" + raise NotImplementedError + + def __repr__(self): + return "SubstateFunction" + + class AnalyticFunction(ModelFunction): """ A multi-dimensional model function, generated from a string, which can be optimized using regression. @@ -500,7 +555,6 @@ class AnalyticFunction(ModelFunction): ret.update( { "type": "analytic", - "value": self.value, "functionStr": self.model_function, "argCount": self._num_args, "parameterNames": self._parameter_names, diff --git a/lib/model.py b/lib/model.py index 4c4c226..527a19e 100644 --- a/lib/model.py +++ b/lib/model.py @@ -4,7 +4,7 @@ import logging import numpy as np import os from .automata import PTA, ModelAttribute -from .functions import StaticFunction +from .functions import StaticFunction, SubstateFunction from .parameters import ParallelParamStats from .paramfit import ParallelParamFit from .utils import soft_cast_int, by_name_to_by_param, regression_measures @@ -439,6 +439,7 @@ class PTAModel(AnalyticModel): from .pelt import PELT self.pelt = PELT(**pelt) + # must run before _compute_stats so that _compute_stats produces a "substate_count" model self.find_substates() else: self.pelt = None @@ -448,6 +449,9 @@ class PTAModel(AnalyticModel): self._compute_stats(by_name) if self.pelt is not None: + # cluster_substates uses self.attr_by_name[*]["power"].param_values, which is set by _compute_stats + # cluster_substates relies on fitted "substate_count" models, which are generated by get_fitted. + self.get_fitted() # cluster_substates alters submodel_by_name, so we cannot use its keys() iterator. names_with_submodel = list(self.submodel_by_name.keys()) for name in names_with_submodel: @@ -465,60 +469,6 @@ class PTAModel(AnalyticModel): for key in elem["attributes"]: elem[key] = np.array(elem[key]) - def get_fitted_sub( - self, use_mean=False, safe_functions_enabled=False, state_duration=None - ): - - param_model_getter, param_info_getter = self.get_fitted( - use_mean=use_mean, safe_functions_enabled=safe_functions_enabled - ) - - def model_getter(name, key, **kwargs): - if key != "power": - return param_model_getter(name, key, **kwargs) - - try: - substate_count = round(param_model_getter(name, "substate_count")) - except KeyError: - return param_model_getter(name, key, **kwargs) - if substate_count == 1: - return param_model_getter(name, key, **kwargs) - - cumulative_energy = 0 - total_duration = 0 - substate_model, _ = self.submodel_by_name[name].get_fitted() - substate_sequence = self.substate_sequence_by_nc[(name, substate_count)] - for i, sub_name in enumerate(substate_sequence): - sub_duration = substate_model(sub_name, "duration", **kwargs) - sub_power = substate_model(sub_name, "power", **kwargs) - - if i == substate_count - 1: - if "duration" in kwargs: - sub_duration = kwargs["duration"] - total_duration - elif name in self.states and state_duration is not None: - sub_duration = state_duration - total_duration - - cumulative_energy += sub_power * sub_duration - total_duration += sub_duration - - return cumulative_energy / total_duration - - def info_getter(name, key, **kwargs): - if key != "power": - return None - - try: - substate_count = round(param_model_getter(name, "substate_count")) - except KeyError: - return None - if substate_count == 1: - return None - - # TODO - return True - - return model_getter, info_getter - # This heuristic is very similar to the "function is not much better than # median" checks in get_fitted. So far, doing it here as well is mostly # a performance and not an algorithm quality decision. @@ -669,13 +619,10 @@ class PTAModel(AnalyticModel): # Schwankungen, die beim separaten Fitting zu unterschiedlichen Funktionen führen würden. p_attr = self.attr_by_name[p_name]["power"] p_params = list(set(map(tuple, p_attr.param_values))) - p_param_index = dict() - for i, p_param in enumerate(p_params): - p_param_index[p_param] = i sub_attr_by_function = dict() static = submodel.get_static() lut = submodel.get_param_lut(fallback=True) - values_to_cluster = np.zeros((len(submodel.names), len(p_param_index))) + values_to_cluster = np.zeros((len(submodel.names), len(p_params))) for i, name in enumerate(submodel.names): for j, param in enumerate(p_params): values_to_cluster[i, j] = lut(name, "duration", param=param) @@ -699,7 +646,7 @@ class PTAModel(AnalyticModel): if len(cl_substates) == 1: clusters.append(cl_substates) continue - values_to_cluster = np.zeros((len(cl_substates), len(p_param_index))) + values_to_cluster = np.zeros((len(cl_substates), len(p_params))) for i, name in enumerate(cl_substates): for j, param in enumerate(p_params): values_to_cluster[i, j] = lut(name, "power", param=param) @@ -742,10 +689,22 @@ class PTAModel(AnalyticModel): "power": powers, } self.submodel_by_name[p_name] = PTAModel(by_name, self.parameters, dict()) - for k in self.substate_sequence_by_nc.keys(): - self.substate_sequence_by_nc[k] = list( - map(lambda x: new_subname_by_old[x], self.substate_sequence_by_nc[k]) - ) + sequence_by_count = dict() + for name, count in self.substate_sequence_by_nc.keys(): + if name == p_name: + sequence_by_count[int(count)] = list( + map( + lambda x: new_subname_by_old[x], + self.substate_sequence_by_nc[(name, count)], + ) + ) + + self.attr_by_name[p_name]["power"].model_function = SubstateFunction( + self.attr_by_name[p_name]["power"].get_static(), + sequence_by_count, + self.attr_by_name[p_name]["substate_count"].model_function, + self.submodel_by_name[p_name], + ) # data[0] = [first sub-state, second sub-state, ...] # data[1] = [first sub-state, second sub-state, ...] -- cgit v1.2.3