diff options
-rwxr-xr-x | bin/analyze-config.py | 81 | ||||
-rw-r--r-- | lib/model.py | 79 |
2 files changed, 80 insertions, 80 deletions
diff --git a/bin/analyze-config.py b/bin/analyze-config.py index 06c1bfe..2a92601 100755 --- a/bin/analyze-config.py +++ b/bin/analyze-config.py @@ -142,83 +142,6 @@ def main(): model = AnalyticModel(by_name, params, compute_stats=False) - def get_min(this_symbols, this_data, data_index=0, threshold=100, level=0): - """ - Build a Decision Tree on `this_data`. - - :param this_symbols: parameter names - :param this_data: list of measurements. Each entry is a (param vector, mearusements vector) tuple. - param vector holds parameter values (same order as parameter names). mearuserements vector holds measurements. - :param data_index: Index in measurements vector to use for model generation. Default 0. - :param threshold: Return a StaticFunction leaf node if std(data[data_index]) < threshold. Default 100. - - :returns: SplitFunction or StaticFunction - """ - - rom_sizes = list(map(lambda x: x[1][data_index], this_data)) - - if np.std(rom_sizes) < threshold or len(this_symbols) == 0: - return StaticFunction(np.mean(rom_sizes)) - # sf.value_error["std"] = np.std(rom_sizes) - - mean_stds = list() - for i, param in enumerate(this_symbols): - - unique_values = list(set(map(lambda vrr: vrr[0][i], this_data))) - - if None in unique_values: - # param is a choice and undefined in some configs. Do not split on it. - mean_stds.append(np.inf) - continue - - child_values = list() - for value in unique_values: - child_values.append( - list(filter(lambda vrr: vrr[0][i] == value, this_data)) - ) - - if len(list(filter(len, child_values))) < 2: - # this param only has a single value. there's no point in splitting. - mean_stds.append(np.inf) - continue - - children = list() - for child in child_values: - children.append(np.std(list(map(lambda x: x[1][data_index], child)))) - - if np.any(np.isnan(children)): - mean_stds.append(np.inf) - else: - mean_stds.append(np.mean(children)) - - if np.all(np.isinf(mean_stds)): - # all children have the same configuration. We shouldn't get here due to the threshold check above... - logging.warning("Waht") - return StaticFunction(np.mean(rom_sizes)) - - symbol_index = np.argmin(mean_stds) - symbol = this_symbols[symbol_index] - - unique_values = list(set(map(lambda vrr: vrr[0][symbol_index], this_data))) - - child = dict() - - for value in unique_values: - children = list( - filter(lambda vrr: vrr[0][symbol_index] == value, this_data) - ) - if len(children): - print( - f"Level {level} split on {symbol} == {value} has {len(children)} children" - ) - child[value] = get_min( - this_symbols, children, data_index, threshold, level + 1 - ) - - assert len(child.values()) >= 2 - - return SplitFunction(np.mean(rom_sizes), symbol_index, child) - model.attr_by_name["multipass"] = dict() model.attr_by_name["multipass"]["rom_usage"] = ModelAttribute( "multipass", @@ -235,10 +158,10 @@ def main(): params, ) - model.attr_by_name["multipass"]["rom_usage"].model_function = get_min( + model.attr_by_name["multipass"]["rom_usage"].model_function = model.build_tree( params, data, 0, 100 ) - model.attr_by_name["multipass"]["ram_usage"].model_function = get_min( + model.attr_by_name["multipass"]["ram_usage"].model_function = model.build_tree( params, data, 1, 20 ) diff --git a/lib/model.py b/lib/model.py index 1e6bb28..8c00fd2 100644 --- a/lib/model.py +++ b/lib/model.py @@ -4,7 +4,7 @@ import logging import numpy as np import os from .automata import PTA, ModelAttribute -from .functions import StaticFunction, SubstateFunction +from .functions import StaticFunction, SubstateFunction, SplitFunction from .parameters import ParallelParamStats, codependent_param_dict from .paramfit import ParallelParamFit from .utils import soft_cast_int, by_name_to_by_param, regression_measures @@ -316,6 +316,83 @@ class AnalyticModel: return detailed_results + def build_tree(self, this_symbols, this_data, data_index=0, threshold=100, level=0): + """ + Build a Decision Tree on `this_data` for kconfig models. + + :param this_symbols: parameter names + :param this_data: list of measurements. Each entry is a (param vector, mearusements vector) tuple. + param vector holds parameter values (same order as parameter names). mearuserements vector holds measurements. + :param data_index: Index in measurements vector to use for model generation. Default 0. + :param threshold: Return a StaticFunction leaf node if std(data[data_index]) < threshold. Default 100. + + :returns: SplitFunction or StaticFunction + """ + + rom_sizes = list(map(lambda x: x[1][data_index], this_data)) + + if np.std(rom_sizes) < threshold or len(this_symbols) == 0: + return StaticFunction(np.mean(rom_sizes)) + # sf.value_error["std"] = np.std(rom_sizes) + + mean_stds = list() + for i, param in enumerate(this_symbols): + + unique_values = list(set(map(lambda vrr: vrr[0][i], this_data))) + + if None in unique_values: + # param is a choice and undefined in some configs. Do not split on it. + mean_stds.append(np.inf) + continue + + child_values = list() + for value in unique_values: + child_values.append( + list(filter(lambda vrr: vrr[0][i] == value, this_data)) + ) + + if len(list(filter(len, child_values))) < 2: + # this param only has a single value. there's no point in splitting. + mean_stds.append(np.inf) + continue + + children = list() + for child in child_values: + children.append(np.std(list(map(lambda x: x[1][data_index], child)))) + + if np.any(np.isnan(children)): + mean_stds.append(np.inf) + else: + mean_stds.append(np.mean(children)) + + if np.all(np.isinf(mean_stds)): + # all children have the same configuration. We shouldn't get here due to the threshold check above... + logging.warning("Waht") + return StaticFunction(np.mean(rom_sizes)) + + symbol_index = np.argmin(mean_stds) + symbol = this_symbols[symbol_index] + + unique_values = list(set(map(lambda vrr: vrr[0][symbol_index], this_data))) + + child = dict() + + for value in unique_values: + children = list( + filter(lambda vrr: vrr[0][symbol_index] == value, this_data) + ) + if len(children): + print( + f"Level {level} split on {symbol} == {value} has {len(children)} children" + ) + child[value] = self.build_tree( + this_symbols, children, data_index, threshold, level + 1 + ) + + assert len(child.values()) >= 2 + + return SplitFunction(np.mean(rom_sizes), symbol_index, child) + def to_dref(self, static_quality, lut_quality, model_quality) -> dict: ret = dict() for name in self.names: |