summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Friesel <daniel.friesel@uos.de>2021-08-23 16:29:24 +0200
committerDaniel Friesel <daniel.friesel@uos.de>2021-08-23 16:29:24 +0200
commit43736e37f7b1806ec1500f0cf242324be311673c (patch)
tree49adc86d556de15b7404a9e2d9b4ba34f13328bc
parent693fa1b637b4cb2eb3796a4ac1376baf0552b8d4 (diff)
move build_dtree method back to ModelAttribute class
-rw-r--r--lib/model.py115
-rw-r--r--lib/parameters.py130
2 files changed, 131 insertions, 114 deletions
diff --git a/lib/model.py b/lib/model.py
index dc0cb4d..76444b0 100644
--- a/lib/model.py
+++ b/lib/model.py
@@ -346,126 +346,13 @@ class AnalyticModel:
):
with_function_leaves = False
- self.attr_by_name[name][attribute].model_function = self._build_dtree(
+ self.attr_by_name[name][attribute].build_dtree(
self.by_name[name]["param"],
self.by_name[name][attribute],
with_function_leaves,
threshold,
)
- def _build_dtree(
- self, parameters, data, with_function_leaves=False, threshold=100, level=0
- ):
- """
- Build a Decision Tree on `param` / `data` for kconfig models.
-
- :param this_symbols: parameter names
- :param this_data: list of measurements. Each entry is a (param vector, mearusements vector) tuple.
- param vector holds parameter values (same order as parameter names). mearuserements vector holds measurements.
- :param data_index: Index in measurements vector to use for model generation. Default 0.
- :param threshold: Return a StaticFunction leaf node if std(data[data_index]) < threshold. Default 100.
-
- :returns: SplitFunction or StaticFunction
- """
-
- # TODO remove data entries which are None (and remove corresponding parameters, too!)
-
- parameter_names = self.parameters
- if len(parameter_names) == 0 or np.std(data) < threshold:
- return StaticFunction(np.mean(data))
- # sf.value_error["std"] = np.std(data)
-
- mean_stds = list()
- for param_index, param in enumerate(parameter_names):
-
- unique_values = list(set(map(lambda p: p[param_index], parameters)))
-
- if None in unique_values:
- # param is a choice and undefined in some configs. Do not split on it.
- mean_stds.append(np.inf)
- continue
-
- if (
- with_function_leaves
- and len(unique_values) > 3
- and all(map(lambda x: type(x) is int, unique_values))
- ):
- # param can be modeled as a function. Do not split on it.
- mean_stds.append(np.inf)
- continue
-
- child_indexes = list()
- for value in unique_values:
- child_indexes.append(
- list(
- filter(
- lambda i: parameters[i][param_index] == value,
- range(len(parameters)),
- )
- )
- )
-
- if len(list(filter(len, child_indexes))) < 2:
- # this param only has a single value. there's no point in splitting.
- mean_stds.append(np.inf)
- continue
-
- children = list()
- for child in child_indexes:
- children.append(np.std(list(map(lambda i: data[i], child))))
-
- if np.any(np.isnan(children)):
- mean_stds.append(np.inf)
- else:
- mean_stds.append(np.mean(children))
-
- if np.all(np.isinf(mean_stds)):
- # all children have the same configuration. We shouldn't get here due to the threshold check above...
- if with_function_leaves:
- # try generating a function. if it fails, model_function is a StaticFunction.
- ma = ModelAttribute("tmp", "tmp", data, parameters, self.parameters, 0)
- ParamStats.compute_for_attr(ma)
- paramfit = ParamFit(parallel=False)
- for key, param, args, kwargs in ma.get_data_for_paramfit():
- paramfit.enqueue(key, param, args, kwargs)
- paramfit.fit()
- ma.set_data_from_paramfit(paramfit)
- return ma.model_function
- else:
- logging.warning(
- f"While building DTree for configurations {parameters}: Children have identical configuration, but high stddev ({np.std(data)}). Falling back to Staticfunction"
- )
- return StaticFunction(np.mean(data))
-
- symbol_index = np.argmin(mean_stds)
- symbol = parameter_names[symbol_index]
-
- unique_values = list(set(map(lambda p: p[symbol_index], parameters)))
-
- child = dict()
-
- for value in unique_values:
- indexes = list(
- filter(
- lambda i: parameters[i][symbol_index] == value,
- range(len(parameters)),
- )
- )
- child_parameters = list(map(lambda i: parameters[i], indexes))
- child_data = list(map(lambda i: data[i], indexes))
- if len(child_data):
- child[value] = self._build_dtree(
- child_parameters,
- child_data,
- with_function_leaves,
- threshold,
- level + 1,
- )
-
- assert len(child.values()) >= 2
-
- return SplitFunction(np.mean(data), symbol_index, child)
-
def to_dref(self, static_quality, lut_quality, model_quality) -> dict:
ret = dict()
for name in self.names:
diff --git a/lib/parameters.py b/lib/parameters.py
index e516926..238f496 100644
--- a/lib/parameters.py
+++ b/lib/parameters.py
@@ -7,6 +7,7 @@ from collections import OrderedDict
from copy import deepcopy
from multiprocessing import Pool
import dfatool.functions as df
+from .paramfit import ParamFit
from .utils import remove_index_from_tuple, is_numeric
from .utils import filter_aggregate_by_param, partition_by_param
@@ -723,3 +724,132 @@ class ModelAttribute:
if x.fit_success:
self.model_function = x
+
+ def build_dtree(self, parameters, data, with_function_leaves=False, threshold=100):
+ """
+ Build a Decision Tree on `param` / `data` for kconfig models.
+
+ :param this_symbols: parameter names
+ :param this_data: list of measurements. Each entry is a (param vector, mearusements vector) tuple.
+ param vector holds parameter values (same order as parameter names). mearuserements vector holds measurements.
+ :param data_index: Index in measurements vector to use for model generation. Default 0.
+ :param threshold: Return a StaticFunction leaf node if std(data[data_index]) < threshold. Default 100.
+
+ :returns: SplitFunction or StaticFunction
+ """
+ self.model_function = self._build_dtree(
+ parameters, data, with_function_leaves, threshold
+ )
+
+ def _build_dtree(
+ self, parameters, data, with_function_leaves=False, threshold=100, level=0
+ ):
+ """
+ Build a Decision Tree on `param` / `data` for kconfig models.
+
+ :param this_symbols: parameter names
+ :param this_data: list of measurements. Each entry is a (param vector, mearusements vector) tuple.
+ param vector holds parameter values (same order as parameter names). mearuserements vector holds measurements.
+ :param data_index: Index in measurements vector to use for model generation. Default 0.
+ :param threshold: Return a StaticFunction leaf node if std(data[data_index]) < threshold. Default 100.
+
+ :returns: SplitFunction or StaticFunction
+ """
+
+ # TODO remove data entries which are None (and remove corresponding parameters, too!)
+
+ parameter_names = self.param_names
+ if len(parameter_names) == 0 or np.std(data) < threshold:
+ return df.StaticFunction(np.mean(data))
+ # sf.value_error["std"] = np.std(data)
+
+ mean_stds = list()
+ for param_index, param in enumerate(parameter_names):
+
+ unique_values = list(set(map(lambda p: p[param_index], parameters)))
+
+ if None in unique_values:
+ # param is a choice and undefined in some configs. Do not split on it.
+ mean_stds.append(np.inf)
+ continue
+
+ if (
+ with_function_leaves
+ and len(unique_values) > 3
+ and all(map(lambda x: type(x) is int, unique_values))
+ ):
+ # param can be modeled as a function. Do not split on it.
+ mean_stds.append(np.inf)
+ continue
+
+ child_indexes = list()
+ for value in unique_values:
+ child_indexes.append(
+ list(
+ filter(
+ lambda i: parameters[i][param_index] == value,
+ range(len(parameters)),
+ )
+ )
+ )
+
+ if len(list(filter(len, child_indexes))) < 2:
+ # this param only has a single value. there's no point in splitting.
+ mean_stds.append(np.inf)
+ continue
+
+ children = list()
+ for child in child_indexes:
+ children.append(np.std(list(map(lambda i: data[i], child))))
+
+ if np.any(np.isnan(children)):
+ mean_stds.append(np.inf)
+ else:
+ mean_stds.append(np.mean(children))
+
+ if np.all(np.isinf(mean_stds)):
+ # all children have the same configuration. We shouldn't get here due to the threshold check above...
+ if with_function_leaves:
+ # try generating a function. if it fails, model_function is a StaticFunction.
+ ma = ModelAttribute("tmp", "tmp", data, parameters, self.param_names, 0)
+ ParamStats.compute_for_attr(ma)
+ paramfit = ParamFit(parallel=False)
+ for key, param, args, kwargs in ma.get_data_for_paramfit():
+ paramfit.enqueue(key, param, args, kwargs)
+ paramfit.fit()
+ ma.set_data_from_paramfit(paramfit)
+ return ma.model_function
+ else:
+ logging.warning(
+ f"While building DTree for configurations {parameters}: Children have identical configuration, but high stddev ({np.std(data)}). Falling back to Staticfunction"
+ )
+ return df.StaticFunction(np.mean(data))
+
+ symbol_index = np.argmin(mean_stds)
+ symbol = parameter_names[symbol_index]
+
+ unique_values = list(set(map(lambda p: p[symbol_index], parameters)))
+
+ child = dict()
+
+ for value in unique_values:
+ indexes = list(
+ filter(
+ lambda i: parameters[i][symbol_index] == value,
+ range(len(parameters)),
+ )
+ )
+ child_parameters = list(map(lambda i: parameters[i], indexes))
+ child_data = list(map(lambda i: data[i], indexes))
+ if len(child_data):
+ child[value] = self._build_dtree(
+ child_parameters,
+ child_data,
+ with_function_leaves,
+ threshold,
+ level + 1,
+ )
+
+ assert len(child.values()) >= 2
+
+ return df.SplitFunction(np.mean(data), symbol_index, child)