summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Friesel <daniel.friesel@uos.de>2020-07-09 14:55:08 +0200
committerDaniel Friesel <daniel.friesel@uos.de>2020-07-09 14:55:08 +0200
commit016210b6c64881a683b45a91af213a1d1a1128ab (patch)
tree48f700bf5c1039ca79cd7f3e69c1f48fdd4595e3
parent2f5fa87125ccf44f1c6e208ed736070274ea6e2e (diff)
working candidate detection for decision trees
-rwxr-xr-xbin/analyze-archive.py4
-rw-r--r--lib/model.py112
-rw-r--r--lib/parameters.py4
-rw-r--r--lib/utils.py1
4 files changed, 118 insertions, 3 deletions
diff --git a/bin/analyze-archive.py b/bin/analyze-archive.py
index c531bb6..4dd2f90 100755
--- a/bin/analyze-archive.py
+++ b/bin/analyze-archive.py
@@ -112,7 +112,7 @@ import sys
from dfatool import plotter
from dfatool.loader import RawData, pta_trace_to_aggregate
from dfatool.functions import gplearn_to_function
-from dfatool.model import PTAModel
+from dfatool.model import PTAModel, DecisionTreeModel
from dfatool.validation import CrossValidator
from dfatool.utils import filter_aggregate_by_param
from dfatool.automata import PTA
@@ -454,6 +454,8 @@ if __name__ == "__main__":
pta=pta,
)
+ dtreemodel = DecisionTreeModel(by_name, parameters,)
+
if xv_method:
xv = CrossValidator(PTAModel, by_name, parameters, arg_count)
diff --git a/lib/model.py b/lib/model.py
index 082fe8a..68286c9 100644
--- a/lib/model.py
+++ b/lib/model.py
@@ -10,6 +10,7 @@ from .functions import AnalyticFunction
from .parameters import ParamStats
from .utils import is_numeric, soft_cast_int, param_slice_eq, remove_index_from_tuple
from .utils import by_name_to_by_param, match_parameter_values
+from .utils import filter_aggregate_by_param
logger = logging.getLogger(__name__)
arg_support_enabled = True
@@ -374,7 +375,7 @@ def _num_args_from_by_name(by_name):
class AnalyticModel:
- u"""
+ """
Parameter-aware analytic energy/data size/... model.
Supports both static and parameter-based model attributes, and automatic detection of parameter-dependence.
@@ -661,8 +662,115 @@ class AnalyticModel:
pass
+def grep_aggregate_by_state_and_param(aggregate, name, param_index, param_value):
+ new_aggregate = dict()
+ new_aggregate[name] = {
+ "attributes": aggregate[name]["attributes"],
+ }
+
+ param_index = soft_cast_int(param_index)
+ indices_to_copy = list(
+ map(lambda x: x[param_index] == param_value, aggregate[name]["param"])
+ )
+
+ if len(indices_to_copy) == 0:
+ raise RuntimeError("empty result")
+
+ new_aggregate[name]["param"] = list(
+ map(
+ lambda iv: iv[1],
+ filter(
+ lambda iv: indices_to_copy[iv[0]], enumerate(aggregate[name]["param"]),
+ ),
+ )
+ )
+
+ for attribute in aggregate[name]["attributes"]:
+ new_aggregate[name][attribute] = aggregate[name][attribute][indices_to_copy]
+
+ return new_aggregate
+
+
+class DecisionTreeModel:
+ def __init__(self, by_name, parameters):
+ self.by_name = by_name
+ self.by_param = by_name_to_by_param(by_name)
+ self.parameters = sorted(parameters)
+
+ # Dtree-Konzept: Für jeden Param: Split auf ==wert1 / == wert2 (-> zwei Partitionen der Daten)
+ # Falls Menge beeinflussender Parameter in den beiden Partitionen unterschiedlich oder
+ # Art der Abhängigkeit unterschiedlich: Hiernach muss der DTree unterscheiden.
+ # Auswahl der ersten Ebene anhand TODO (Parameter, bei dem die beiden Partitionen den niedrigsten absolute static model error haben?
+ # Oder bei dem es die größte Anzahl unterschiedlciher Parameter / Abhängigkeitstypen gibt? So eine Gütefunktion wäre dafür nice)
+
+ stats = ParamStats(self.by_name, self.by_param, self.parameters, dict(), False)
+ pre_candidates = list()
+ for state in self.states():
+ for i, param in enumerate(self.parameters):
+ if len(stats.distinct_values[state][param]) == 2:
+ logger.debug(f"{state} has binary param {param}")
+ pre_candidates.append(
+ (state, i, param, stats.distinct_values[state][param])
+ )
+
+ candidates = list()
+ for state, param_index, param_name, param_values in pre_candidates:
+ by_name_sub1 = grep_aggregate_by_state_and_param(
+ by_name, state, param_index, param_values[0]
+ )
+ by_param_sub1 = by_name_to_by_param(by_name_sub1)
+ by_name_sub2 = grep_aggregate_by_state_and_param(
+ by_name, state, param_index, param_values[1]
+ )
+ by_param_sub2 = by_name_to_by_param(by_name_sub2)
+ stats_sub1 = ParamStats(
+ by_name_sub1, by_param_sub1, self.parameters, dict(), False
+ )
+ stats_sub2 = ParamStats(
+ by_name_sub2, by_param_sub2, self.parameters, dict(), False
+ )
+ for attribute in by_name[state]["attributes"]:
+ param1 = stats_sub1.stats[state][attribute]["depends_on_params"]
+ param2 = stats_sub2.stats[state][attribute]["depends_on_params"]
+ if param1 != param2:
+ logger.debug(
+ f"{state} {attribute} by {param_name} is dtree candidate"
+ )
+ logger.debug(
+ " {} == {} -> depends on {}".format(
+ param_name, param_values[0], param1
+ )
+ )
+ logger.debug(
+ " {} == {} -> depends on {}".format(
+ param_name, param_values[1], param2
+ )
+ )
+ candidates.append(
+ (state, attribute, param_index, param_name, param_values)
+ )
+
+ candidates_by_state_attribute = dict()
+ for state, attribute, param_index, param_name, param_values in candidates:
+ if (state, attribute) not in candidates_by_state_attribute:
+ candidates_by_state_attribute[(state, attribute)] = list()
+ candidates_by_state_attribute[(state, attribute)].append(
+ (param_index, param_name, param_values)
+ )
+
+ print(candidates_by_state_attribute)
+
+ def states(self):
+ """Return sorted list of state names."""
+ return sorted(
+ list(
+ filter(lambda k: self.by_name[k]["isa"] == "state", self.by_name.keys())
+ )
+ )
+
+
class PTAModel:
- u"""
+ """
Parameter-aware PTA-based energy model.
Supports both static and parameter-based model attributes, and automatic detection of parameter-dependence.
diff --git a/lib/parameters.py b/lib/parameters.py
index 5c6b978..3a393f6 100644
--- a/lib/parameters.py
+++ b/lib/parameters.py
@@ -272,6 +272,7 @@ def _compute_param_statistics(
"corr_by_arg": [],
"depends_on_param": {},
"depends_on_arg": [],
+ "depends_on_params": list(),
}
np.seterr("raise")
@@ -297,6 +298,9 @@ def _compute_param_statistics(
ret["std_param_lut"],
)
+ if ret["depends_on_param"][param]:
+ ret["depends_on_params"].append(param)
+
if state_or_trans in arg_count:
for arg_index in range(arg_count[state_or_trans]):
std_matrix, mean_std, lut_matrix = _std_by_param(
diff --git a/lib/utils.py b/lib/utils.py
index d28ecda..990ad01 100644
--- a/lib/utils.py
+++ b/lib/utils.py
@@ -204,6 +204,7 @@ def filter_aggregate_by_param(aggregate, parameters, parameter_filter):
indices_to_keep = list(
map(lambda x: x[param_index] == param_value, aggregate[name]["param"])
)
+ # ["param"] is not a numpy array, so we can't use ["param"][indices_to_keep] and rely on this map-filter-expression instead
aggregate[name]["param"] = list(
map(
lambda iv: iv[1],