summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xbin/analyze-archive.py5
-rw-r--r--lib/model.py184
-rw-r--r--lib/parameters.py4
-rw-r--r--lib/utils.py1
4 files changed, 191 insertions, 3 deletions
diff --git a/bin/analyze-archive.py b/bin/analyze-archive.py
index 65d80b3..5a6b8f0 100755
--- a/bin/analyze-archive.py
+++ b/bin/analyze-archive.py
@@ -115,7 +115,7 @@ import sys
from dfatool import plotter
from dfatool.loader import RawData, pta_trace_to_aggregate
from dfatool.functions import gplearn_to_function
-from dfatool.model import PTAModel
+from dfatool.model import PTAModel, DecisionTreeModel
from dfatool.validation import CrossValidator
from dfatool.utils import filter_aggregate_by_param
from dfatool.automata import PTA
@@ -473,6 +473,9 @@ if __name__ == "__main__":
pta=pta,
)
+ dtreemodel = DecisionTreeModel(by_name, parameters,)
+ print(dtreemodel.get_tree())
+
if xv_method:
xv = CrossValidator(PTAModel, by_name, parameters, arg_count)
diff --git a/lib/model.py b/lib/model.py
index bb4a45b..41cf726 100644
--- a/lib/model.py
+++ b/lib/model.py
@@ -11,6 +11,7 @@ from .functions import AnalyticFunction
from .parameters import ParamStats
from .utils import is_numeric, soft_cast_int, param_slice_eq, remove_index_from_tuple
from .utils import by_name_to_by_param, match_parameter_values
+from .utils import filter_aggregate_by_param
logger = logging.getLogger(__name__)
arg_support_enabled = True
@@ -375,7 +376,7 @@ def _num_args_from_by_name(by_name):
class AnalyticModel:
- u"""
+ """
Parameter-aware analytic energy/data size/... model.
Supports both static and parameter-based model attributes, and automatic detection of parameter-dependence.
@@ -662,8 +663,187 @@ class AnalyticModel:
pass
+def grep_aggregate_by_state_and_param(aggregate, name, param_index, param_value):
+ new_aggregate = dict()
+ new_aggregate[name] = {
+ "isa": aggregate[name]["isa"],
+ "attributes": aggregate[name]["attributes"],
+ }
+
+ param_index = soft_cast_int(param_index)
+ indices_to_copy = list(
+ map(lambda x: x[param_index] == param_value, aggregate[name]["param"])
+ )
+
+ if len(indices_to_copy) == 0:
+ raise RuntimeError("empty result")
+
+ new_aggregate[name]["param"] = list(
+ map(
+ lambda iv: iv[1],
+ filter(
+ lambda iv: indices_to_copy[iv[0]], enumerate(aggregate[name]["param"]),
+ ),
+ )
+ )
+
+ for attribute in aggregate[name]["attributes"]:
+ new_aggregate[name][attribute] = aggregate[name][attribute][indices_to_copy]
+
+ return new_aggregate
+
+
+class DecisionTreeModel:
+ def __init__(self, by_name, parameters):
+ self.by_name = by_name
+ self.by_param = by_name_to_by_param(by_name)
+ self.parameters = sorted(parameters)
+
+ self.dtree = None
+
+ # Dtree-Konzept: Für jeden Param: Split auf ==wert1 / == wert2 (-> zwei Partitionen der Daten)
+ # Falls Menge beeinflussender Parameter in den beiden Partitionen unterschiedlich oder
+ # Art der Abhängigkeit unterschiedlich: Hiernach muss der DTree unterscheiden.
+ # Auswahl der ersten Ebene anhand TODO (Parameter, bei dem die beiden Partitionen den niedrigsten absolute static model error haben?
+ # Oder bei dem es die größte Anzahl unterschiedlciher Parameter / Abhängigkeitstypen gibt? So eine Gütefunktion wäre dafür nice)
+
+ stats = ParamStats(self.by_name, self.by_param, self.parameters, dict(), False)
+ pre_candidates = list()
+ for state in self.states():
+ for i, param in enumerate(self.parameters):
+ if len(stats.distinct_values[state][param]) == 2:
+ logger.debug(f"{state} has binary param {param}")
+ pre_candidates.append(
+ (state, i, param, stats.distinct_values[state][param])
+ )
+
+ candidates = list()
+ for state, param_index, param_name, param_values in pre_candidates:
+ by_name_sub1 = grep_aggregate_by_state_and_param(
+ by_name, state, param_index, param_values[0]
+ )
+ by_param_sub1 = by_name_to_by_param(by_name_sub1)
+ by_name_sub2 = grep_aggregate_by_state_and_param(
+ by_name, state, param_index, param_values[1]
+ )
+ by_param_sub2 = by_name_to_by_param(by_name_sub2)
+ stats_sub1 = ParamStats(
+ by_name_sub1, by_param_sub1, self.parameters, dict(), False
+ )
+ stats_sub2 = ParamStats(
+ by_name_sub2, by_param_sub2, self.parameters, dict(), False
+ )
+ for attribute in by_name[state]["attributes"]:
+ param1 = stats_sub1.stats[state][attribute]["depends_on_params"]
+ param2 = stats_sub2.stats[state][attribute]["depends_on_params"]
+ if param1 != param2:
+ logger.debug(
+ f"{state} {attribute} by {param_name} is dtree candidate"
+ )
+ logger.debug(
+ " {} == {} -> depends on {}".format(
+ param_name, param_values[0], param1
+ )
+ )
+ logger.debug(
+ " {} == {} -> depends on {}".format(
+ param_name, param_values[1], param2
+ )
+ )
+ candidates.append(
+ (state, attribute, param_index, param_name, param_values)
+ )
+ # TODO erstmal rausfinden, anhand welches Parameters recursive descent gemacht wird. Es soll ja immer nur einer sein.
+ # logger.debug('>>> recursive descent for {}/{} with {} = {}'.format(state, attribute, param_name, param_values[0]))
+ # __class__(by_name_sub1, self.parameters)
+ # logger.debug('<<< recursive descent for {}/{} with {} = {}'.format(state, attribute, param_name, param_values[0]))
+ # logger.debug('>>> recursive descent for {}/{} with {} = {}'.format(state, attribute, param_name, param_values[1]))
+ # __class__(by_name_sub2, self.parameters)
+ # logger.debug('<<< recursive descent for {}/{} with {} = {}'.format(state, attribute, param_name, param_values[1]))
+
+ candidates_by_state_attribute = dict()
+ for state, attribute, param_index, param_name, param_values in candidates:
+ if (state, attribute) not in candidates_by_state_attribute:
+ candidates_by_state_attribute[(state, attribute)] = list()
+ candidates_by_state_attribute[(state, attribute)].append(
+ (param_index, param_name, param_values)
+ )
+
+ for k, params in candidates_by_state_attribute.items():
+ state, attribute = k
+ min_mae_global = np.inf
+ min_mae_param = None
+ for param_index, param_name, param_values in params:
+ by_name_sub1 = grep_aggregate_by_state_and_param(
+ by_name, state, param_index, param_values[0]
+ )
+ by_name_sub2 = grep_aggregate_by_state_and_param(
+ by_name, state, param_index, param_values[1]
+ )
+ m1 = PTAModel(by_name_sub1, self.parameters, dict())
+ m2 = PTAModel(by_name_sub2, self.parameters, dict())
+ pm1, pi1 = m1.get_fitted()
+ pm2, pi2 = m2.get_fitted()
+ mae1 = m1.assess(pm1)["by_name"][state][attribute]["mae"]
+ mae2 = m1.assess(pm1)["by_name"][state][attribute]["mae"]
+ min_mae = min(mae1, mae2)
+ if min_mae < min_mae_global:
+ min_mae_global = min_mae
+ min_mae_param = (
+ param_index,
+ param_name,
+ param_values,
+ [by_name_sub1, by_name_sub2],
+ )
+ logger.debug(
+ f"{state} {attribute}: splitting on {min_mae_param[1]} gives lowest MAE ({min_mae_global})"
+ )
+ dtree = {
+ "param_index": min_mae_param[0],
+ "param_name": min_mae_param[1],
+ "min_mae": min_mae_global,
+ "param_value_subtree": dict(),
+ }
+ logger.debug(
+ f">>> recursive descent for {state} {attribute} with {min_mae_param[1]}={min_mae_param[2][0]}"
+ )
+ dtree["param_value_subtree"][min_mae_param[2][0]] = __class__(
+ min_mae_param[3][0], self.parameters
+ ).get_tree(state, attribute)
+ logger.debug(
+ f"<<< recursive descent for {state} {attribute} with {min_mae_param[1]}={min_mae_param[2][0]}"
+ )
+ logger.debug(
+ f">>> recursive descent for {state} {attribute} with {min_mae_param[1]}={min_mae_param[2][1]}"
+ )
+ dtree["param_value_subtree"][min_mae_param[2][1]] = __class__(
+ min_mae_param[3][1], self.parameters
+ ).get_tree(state, attribute)
+ logger.debug(
+ f"<<< recursive descent for {state} {attribute} with {min_mae_param[1]}={min_mae_param[2][1]}"
+ )
+ if self.dtree is None:
+ self.dtree = dict()
+ self.dtree[(state, attribute)] = dtree
+
+ def get_tree(self, state=None, attribute=None):
+ if state is None or attribute is None:
+ return self.dtree
+ if self.dtree is not None and (state, attribute) in self.dtree:
+ return self.dtree[(state, attribute)]
+ return None
+
+ def states(self):
+ """Return sorted list of state names."""
+ return sorted(
+ list(
+ filter(lambda k: self.by_name[k]["isa"] == "state", self.by_name.keys())
+ )
+ )
+
+
class PTAModel:
- u"""
+ """
Parameter-aware PTA-based energy model.
Supports both static and parameter-based model attributes, and automatic detection of parameter-dependence.
diff --git a/lib/parameters.py b/lib/parameters.py
index 5c6b978..3a393f6 100644
--- a/lib/parameters.py
+++ b/lib/parameters.py
@@ -272,6 +272,7 @@ def _compute_param_statistics(
"corr_by_arg": [],
"depends_on_param": {},
"depends_on_arg": [],
+ "depends_on_params": list(),
}
np.seterr("raise")
@@ -297,6 +298,9 @@ def _compute_param_statistics(
ret["std_param_lut"],
)
+ if ret["depends_on_param"][param]:
+ ret["depends_on_params"].append(param)
+
if state_or_trans in arg_count:
for arg_index in range(arg_count[state_or_trans]):
std_matrix, mean_std, lut_matrix = _std_by_param(
diff --git a/lib/utils.py b/lib/utils.py
index d28ecda..990ad01 100644
--- a/lib/utils.py
+++ b/lib/utils.py
@@ -204,6 +204,7 @@ def filter_aggregate_by_param(aggregate, parameters, parameter_filter):
indices_to_keep = list(
map(lambda x: x[param_index] == param_value, aggregate[name]["param"])
)
+ # ["param"] is not a numpy array, so we can't use ["param"][indices_to_keep] and rely on this map-filter-expression instead
aggregate[name]["param"] = list(
map(
lambda iv: iv[1],