diff options
-rwxr-xr-x | bin/analyze-archive.py | 5 | ||||
-rw-r--r-- | lib/model.py | 184 | ||||
-rw-r--r-- | lib/parameters.py | 4 | ||||
-rw-r--r-- | lib/utils.py | 1 |
4 files changed, 191 insertions, 3 deletions
diff --git a/bin/analyze-archive.py b/bin/analyze-archive.py index 65d80b3..5a6b8f0 100755 --- a/bin/analyze-archive.py +++ b/bin/analyze-archive.py @@ -115,7 +115,7 @@ import sys from dfatool import plotter from dfatool.loader import RawData, pta_trace_to_aggregate from dfatool.functions import gplearn_to_function -from dfatool.model import PTAModel +from dfatool.model import PTAModel, DecisionTreeModel from dfatool.validation import CrossValidator from dfatool.utils import filter_aggregate_by_param from dfatool.automata import PTA @@ -473,6 +473,9 @@ if __name__ == "__main__": pta=pta, ) + dtreemodel = DecisionTreeModel(by_name, parameters,) + print(dtreemodel.get_tree()) + if xv_method: xv = CrossValidator(PTAModel, by_name, parameters, arg_count) diff --git a/lib/model.py b/lib/model.py index bb4a45b..41cf726 100644 --- a/lib/model.py +++ b/lib/model.py @@ -11,6 +11,7 @@ from .functions import AnalyticFunction from .parameters import ParamStats from .utils import is_numeric, soft_cast_int, param_slice_eq, remove_index_from_tuple from .utils import by_name_to_by_param, match_parameter_values +from .utils import filter_aggregate_by_param logger = logging.getLogger(__name__) arg_support_enabled = True @@ -375,7 +376,7 @@ def _num_args_from_by_name(by_name): class AnalyticModel: - u""" + """ Parameter-aware analytic energy/data size/... model. Supports both static and parameter-based model attributes, and automatic detection of parameter-dependence. @@ -662,8 +663,187 @@ class AnalyticModel: pass +def grep_aggregate_by_state_and_param(aggregate, name, param_index, param_value): + new_aggregate = dict() + new_aggregate[name] = { + "isa": aggregate[name]["isa"], + "attributes": aggregate[name]["attributes"], + } + + param_index = soft_cast_int(param_index) + indices_to_copy = list( + map(lambda x: x[param_index] == param_value, aggregate[name]["param"]) + ) + + if len(indices_to_copy) == 0: + raise RuntimeError("empty result") + + new_aggregate[name]["param"] = list( + map( + lambda iv: iv[1], + filter( + lambda iv: indices_to_copy[iv[0]], enumerate(aggregate[name]["param"]), + ), + ) + ) + + for attribute in aggregate[name]["attributes"]: + new_aggregate[name][attribute] = aggregate[name][attribute][indices_to_copy] + + return new_aggregate + + +class DecisionTreeModel: + def __init__(self, by_name, parameters): + self.by_name = by_name + self.by_param = by_name_to_by_param(by_name) + self.parameters = sorted(parameters) + + self.dtree = None + + # Dtree-Konzept: Für jeden Param: Split auf ==wert1 / == wert2 (-> zwei Partitionen der Daten) + # Falls Menge beeinflussender Parameter in den beiden Partitionen unterschiedlich oder + # Art der Abhängigkeit unterschiedlich: Hiernach muss der DTree unterscheiden. + # Auswahl der ersten Ebene anhand TODO (Parameter, bei dem die beiden Partitionen den niedrigsten absolute static model error haben? + # Oder bei dem es die größte Anzahl unterschiedlciher Parameter / Abhängigkeitstypen gibt? So eine Gütefunktion wäre dafür nice) + + stats = ParamStats(self.by_name, self.by_param, self.parameters, dict(), False) + pre_candidates = list() + for state in self.states(): + for i, param in enumerate(self.parameters): + if len(stats.distinct_values[state][param]) == 2: + logger.debug(f"{state} has binary param {param}") + pre_candidates.append( + (state, i, param, stats.distinct_values[state][param]) + ) + + candidates = list() + for state, param_index, param_name, param_values in pre_candidates: + by_name_sub1 = grep_aggregate_by_state_and_param( + by_name, state, param_index, param_values[0] + ) + by_param_sub1 = by_name_to_by_param(by_name_sub1) + by_name_sub2 = grep_aggregate_by_state_and_param( + by_name, state, param_index, param_values[1] + ) + by_param_sub2 = by_name_to_by_param(by_name_sub2) + stats_sub1 = ParamStats( + by_name_sub1, by_param_sub1, self.parameters, dict(), False + ) + stats_sub2 = ParamStats( + by_name_sub2, by_param_sub2, self.parameters, dict(), False + ) + for attribute in by_name[state]["attributes"]: + param1 = stats_sub1.stats[state][attribute]["depends_on_params"] + param2 = stats_sub2.stats[state][attribute]["depends_on_params"] + if param1 != param2: + logger.debug( + f"{state} {attribute} by {param_name} is dtree candidate" + ) + logger.debug( + " {} == {} -> depends on {}".format( + param_name, param_values[0], param1 + ) + ) + logger.debug( + " {} == {} -> depends on {}".format( + param_name, param_values[1], param2 + ) + ) + candidates.append( + (state, attribute, param_index, param_name, param_values) + ) + # TODO erstmal rausfinden, anhand welches Parameters recursive descent gemacht wird. Es soll ja immer nur einer sein. + # logger.debug('>>> recursive descent for {}/{} with {} = {}'.format(state, attribute, param_name, param_values[0])) + # __class__(by_name_sub1, self.parameters) + # logger.debug('<<< recursive descent for {}/{} with {} = {}'.format(state, attribute, param_name, param_values[0])) + # logger.debug('>>> recursive descent for {}/{} with {} = {}'.format(state, attribute, param_name, param_values[1])) + # __class__(by_name_sub2, self.parameters) + # logger.debug('<<< recursive descent for {}/{} with {} = {}'.format(state, attribute, param_name, param_values[1])) + + candidates_by_state_attribute = dict() + for state, attribute, param_index, param_name, param_values in candidates: + if (state, attribute) not in candidates_by_state_attribute: + candidates_by_state_attribute[(state, attribute)] = list() + candidates_by_state_attribute[(state, attribute)].append( + (param_index, param_name, param_values) + ) + + for k, params in candidates_by_state_attribute.items(): + state, attribute = k + min_mae_global = np.inf + min_mae_param = None + for param_index, param_name, param_values in params: + by_name_sub1 = grep_aggregate_by_state_and_param( + by_name, state, param_index, param_values[0] + ) + by_name_sub2 = grep_aggregate_by_state_and_param( + by_name, state, param_index, param_values[1] + ) + m1 = PTAModel(by_name_sub1, self.parameters, dict()) + m2 = PTAModel(by_name_sub2, self.parameters, dict()) + pm1, pi1 = m1.get_fitted() + pm2, pi2 = m2.get_fitted() + mae1 = m1.assess(pm1)["by_name"][state][attribute]["mae"] + mae2 = m1.assess(pm1)["by_name"][state][attribute]["mae"] + min_mae = min(mae1, mae2) + if min_mae < min_mae_global: + min_mae_global = min_mae + min_mae_param = ( + param_index, + param_name, + param_values, + [by_name_sub1, by_name_sub2], + ) + logger.debug( + f"{state} {attribute}: splitting on {min_mae_param[1]} gives lowest MAE ({min_mae_global})" + ) + dtree = { + "param_index": min_mae_param[0], + "param_name": min_mae_param[1], + "min_mae": min_mae_global, + "param_value_subtree": dict(), + } + logger.debug( + f">>> recursive descent for {state} {attribute} with {min_mae_param[1]}={min_mae_param[2][0]}" + ) + dtree["param_value_subtree"][min_mae_param[2][0]] = __class__( + min_mae_param[3][0], self.parameters + ).get_tree(state, attribute) + logger.debug( + f"<<< recursive descent for {state} {attribute} with {min_mae_param[1]}={min_mae_param[2][0]}" + ) + logger.debug( + f">>> recursive descent for {state} {attribute} with {min_mae_param[1]}={min_mae_param[2][1]}" + ) + dtree["param_value_subtree"][min_mae_param[2][1]] = __class__( + min_mae_param[3][1], self.parameters + ).get_tree(state, attribute) + logger.debug( + f"<<< recursive descent for {state} {attribute} with {min_mae_param[1]}={min_mae_param[2][1]}" + ) + if self.dtree is None: + self.dtree = dict() + self.dtree[(state, attribute)] = dtree + + def get_tree(self, state=None, attribute=None): + if state is None or attribute is None: + return self.dtree + if self.dtree is not None and (state, attribute) in self.dtree: + return self.dtree[(state, attribute)] + return None + + def states(self): + """Return sorted list of state names.""" + return sorted( + list( + filter(lambda k: self.by_name[k]["isa"] == "state", self.by_name.keys()) + ) + ) + + class PTAModel: - u""" + """ Parameter-aware PTA-based energy model. Supports both static and parameter-based model attributes, and automatic detection of parameter-dependence. diff --git a/lib/parameters.py b/lib/parameters.py index 5c6b978..3a393f6 100644 --- a/lib/parameters.py +++ b/lib/parameters.py @@ -272,6 +272,7 @@ def _compute_param_statistics( "corr_by_arg": [], "depends_on_param": {}, "depends_on_arg": [], + "depends_on_params": list(), } np.seterr("raise") @@ -297,6 +298,9 @@ def _compute_param_statistics( ret["std_param_lut"], ) + if ret["depends_on_param"][param]: + ret["depends_on_params"].append(param) + if state_or_trans in arg_count: for arg_index in range(arg_count[state_or_trans]): std_matrix, mean_std, lut_matrix = _std_by_param( diff --git a/lib/utils.py b/lib/utils.py index d28ecda..990ad01 100644 --- a/lib/utils.py +++ b/lib/utils.py @@ -204,6 +204,7 @@ def filter_aggregate_by_param(aggregate, parameters, parameter_filter): indices_to_keep = list( map(lambda x: x[param_index] == param_value, aggregate[name]["param"]) ) + # ["param"] is not a numpy array, so we can't use ["param"][indices_to_keep] and rely on this map-filter-expression instead aggregate[name]["param"] = list( map( lambda iv: iv[1], |