diff options
Diffstat (limited to 'lib/parameters.py')
-rw-r--r-- | lib/parameters.py | 130 |
1 files changed, 130 insertions, 0 deletions
diff --git a/lib/parameters.py b/lib/parameters.py index e516926..238f496 100644 --- a/lib/parameters.py +++ b/lib/parameters.py @@ -7,6 +7,7 @@ from collections import OrderedDict from copy import deepcopy from multiprocessing import Pool import dfatool.functions as df +from .paramfit import ParamFit from .utils import remove_index_from_tuple, is_numeric from .utils import filter_aggregate_by_param, partition_by_param @@ -723,3 +724,132 @@ class ModelAttribute: if x.fit_success: self.model_function = x + + def build_dtree(self, parameters, data, with_function_leaves=False, threshold=100): + """ + Build a Decision Tree on `param` / `data` for kconfig models. + + :param this_symbols: parameter names + :param this_data: list of measurements. Each entry is a (param vector, mearusements vector) tuple. + param vector holds parameter values (same order as parameter names). mearuserements vector holds measurements. + :param data_index: Index in measurements vector to use for model generation. Default 0. + :param threshold: Return a StaticFunction leaf node if std(data[data_index]) < threshold. Default 100. + + :returns: SplitFunction or StaticFunction + """ + self.model_function = self._build_dtree( + parameters, data, with_function_leaves, threshold + ) + + def _build_dtree( + self, parameters, data, with_function_leaves=False, threshold=100, level=0 + ): + """ + Build a Decision Tree on `param` / `data` for kconfig models. + + :param this_symbols: parameter names + :param this_data: list of measurements. Each entry is a (param vector, mearusements vector) tuple. + param vector holds parameter values (same order as parameter names). mearuserements vector holds measurements. + :param data_index: Index in measurements vector to use for model generation. Default 0. + :param threshold: Return a StaticFunction leaf node if std(data[data_index]) < threshold. Default 100. + + :returns: SplitFunction or StaticFunction + """ + + # TODO remove data entries which are None (and remove corresponding parameters, too!) + + parameter_names = self.param_names + if len(parameter_names) == 0 or np.std(data) < threshold: + return df.StaticFunction(np.mean(data)) + # sf.value_error["std"] = np.std(data) + + mean_stds = list() + for param_index, param in enumerate(parameter_names): + + unique_values = list(set(map(lambda p: p[param_index], parameters))) + + if None in unique_values: + # param is a choice and undefined in some configs. Do not split on it. + mean_stds.append(np.inf) + continue + + if ( + with_function_leaves + and len(unique_values) > 3 + and all(map(lambda x: type(x) is int, unique_values)) + ): + # param can be modeled as a function. Do not split on it. + mean_stds.append(np.inf) + continue + + child_indexes = list() + for value in unique_values: + child_indexes.append( + list( + filter( + lambda i: parameters[i][param_index] == value, + range(len(parameters)), + ) + ) + ) + + if len(list(filter(len, child_indexes))) < 2: + # this param only has a single value. there's no point in splitting. + mean_stds.append(np.inf) + continue + + children = list() + for child in child_indexes: + children.append(np.std(list(map(lambda i: data[i], child)))) + + if np.any(np.isnan(children)): + mean_stds.append(np.inf) + else: + mean_stds.append(np.mean(children)) + + if np.all(np.isinf(mean_stds)): + # all children have the same configuration. We shouldn't get here due to the threshold check above... + if with_function_leaves: + # try generating a function. if it fails, model_function is a StaticFunction. + ma = ModelAttribute("tmp", "tmp", data, parameters, self.param_names, 0) + ParamStats.compute_for_attr(ma) + paramfit = ParamFit(parallel=False) + for key, param, args, kwargs in ma.get_data_for_paramfit(): + paramfit.enqueue(key, param, args, kwargs) + paramfit.fit() + ma.set_data_from_paramfit(paramfit) + return ma.model_function + else: + logging.warning( + f"While building DTree for configurations {parameters}: Children have identical configuration, but high stddev ({np.std(data)}). Falling back to Staticfunction" + ) + return df.StaticFunction(np.mean(data)) + + symbol_index = np.argmin(mean_stds) + symbol = parameter_names[symbol_index] + + unique_values = list(set(map(lambda p: p[symbol_index], parameters))) + + child = dict() + + for value in unique_values: + indexes = list( + filter( + lambda i: parameters[i][symbol_index] == value, + range(len(parameters)), + ) + ) + child_parameters = list(map(lambda i: parameters[i], indexes)) + child_data = list(map(lambda i: data[i], indexes)) + if len(child_data): + child[value] = self._build_dtree( + child_parameters, + child_data, + with_function_leaves, + threshold, + level + 1, + ) + + assert len(child.values()) >= 2 + + return df.SplitFunction(np.mean(data), symbol_index, child) |