diff options
-rw-r--r-- | README.md | 3 | ||||
-rwxr-xr-x | bin/analyze-kconfig.py | 2 | ||||
-rw-r--r-- | lib/cli.py | 7 | ||||
-rw-r--r-- | lib/functions.py | 85 | ||||
-rw-r--r-- | lib/model.py | 3 | ||||
-rw-r--r-- | lib/parameters.py | 60 | ||||
-rw-r--r-- | lib/utils.py | 51 |
7 files changed, 157 insertions, 54 deletions
@@ -110,7 +110,8 @@ The following variables may be set to alter the behaviour of dfatool components. | `DFATOOL_DTREE_NONBINARY_NODES` | 0, **1** | Enable non-binary nodes (i.e., nodes with more than two children corresponding to enum variables) in decision trees | | `DFATOOL_DTREE_IGNORE_IRRELEVANT_PARAMS` | 0, **1** | Ignore parameters deemed irrelevant by stddev heuristic during regression tree generation | | `DFATOOL_DTREE_LOSS_IGNORE_SCALAR` | **0**, 1 | Ignore scalar parameters when computing the loss for split node candidates. Instead of computing the loss of a single partition for each `x_i == j`, compute the loss of partitions for `x_i == j` in which non-scalar parameters vary and scalar parameters are constant. This way, scalar parameters do not affect the decision about which non-scalar parameter to use for splitting. | -| `DFATOOL_PARAM_CATEGORIAL_TO_SCALAR` | **0**, 1 | Some models (e.g. sklearn CART, XGBoost) do not support categorial parameters. Ignore them (0) or convert them to scalar indexes (1). | +| `DFATOOL_PARAM_CATEGORIAL_TO_SCALAR` | **0**, 1 | Some models (e.g. FOL, sklearn CART, XGBoost) do not support categorial parameters. Ignore them (0) or convert them to scalar indexes (1). | +| `DFATOOL_FIT_FOL` | **0**, 1 | Build a first-order linear function (i.e., a * param1 + b * param2 + ...) instead of more complex functions or tree structures. | ## Examples diff --git a/bin/analyze-kconfig.py b/bin/analyze-kconfig.py index 9c3d814..c340901 100755 --- a/bin/analyze-kconfig.py +++ b/bin/analyze-kconfig.py @@ -293,6 +293,8 @@ def main(): info = param_info(name, attribute) if type(info) is dfatool.cli.AnalyticFunction: dfatool.cli.print_analyticinfo(f"{name:20s} {attribute:15s}", info) + elif type(info) is dfatool.cli.FOLFunction: + dfatool.cli.print_analyticinfo(f"{name:20s} {attribute:15s}", info) elif type(info) is dfatool.cli.SplitFunction: dfatool.cli.print_splitinfo( model.parameters, info, f"{name:20s} {attribute:15s}" @@ -1,6 +1,11 @@ #!/usr/bin/env python3 -from dfatool.functions import SplitFunction, AnalyticFunction, StaticFunction +from dfatool.functions import ( + SplitFunction, + AnalyticFunction, + StaticFunction, + FOLFunction, +) def print_static(model, static_model, name, attribute): diff --git a/lib/functions.py b/lib/functions.py index 3c2b424..0b0044b 100644 --- a/lib/functions.py +++ b/lib/functions.py @@ -11,7 +11,7 @@ import numpy as np import os import re from scipy import optimize -from .utils import is_numeric +from .utils import is_numeric, param_to_ndarray logger = logging.getLogger(__name__) @@ -600,6 +600,89 @@ class XGBoostFunction(SKLearnRegressionFunction): return 1 + max(ret) +# first-order linear function (no feature interaction) +class FOLFunction(ModelFunction): + def __init__(self, value, parameters, num_args=0): + super().__init__(value) + self.parameter_names = parameters + self._num_args = num_args + self.fit_success = False + + def fit(self, param_values, data): + categorial_to_scalar = bool( + int(os.getenv("DFATOOL_PARAM_CATEGORIAL_TO_SCALAR", "0")) + ) + fit_parameters, categorial_to_index, ignore_index = param_to_ndarray( + param_values, + with_nan=False, + categorial_to_scalar=categorial_to_scalar, + ) + self.categorial_to_index = categorial_to_index + self.ignore_index = ignore_index + fit_parameters = fit_parameters.swapaxes(0, 1) + num_vars = fit_parameters.shape[0] + funbuf = "lambda reg_param, model_param: 0" + for i in range(num_vars): + funbuf += f" + reg_param[{i}] * model_param[{i}]" + self._function_str = self.model_function = funbuf + self._function = eval(funbuf) + + error_function = lambda P, X, y: self._function(P, X) - y + self.model_args = list(np.ones((num_vars))) + try: + res = optimize.least_squares( + error_function, self.model_args, args=(fit_parameters, data), xtol=2e-15 + ) + except ValueError as err: + logger.warning(f"Fit failed: {err} (function: {self.model_function})") + return + if res.status > 0: + self.model_args = res.x + self.fit_success = True + else: + logger.warning( + f"Fit failed: {res.message} (function: {self.model_function})" + ) + + def is_predictable(self, param_list=None): + """ + Return whether the model function can be evaluated on the given parameter values. + """ + return True + + def eval(self, param_list=None): + """ + Evaluate model function with specified param/arg values. + + Far a Staticfunction, this is just the static value + + """ + if param_list is None: + return self.value + actual_param_list = list() + for i, param in enumerate(param_list): + if not self.ignore_index[i]: + if i in self.categorial_to_index: + try: + actual_param_list.append(self.categorial_to_index[i][param]) + except KeyError: + # param was not part of training data. substitute an unused scalar. + # Note that all param values which were not part of training data map to the same scalar this way. + # This should be harmless. + actual_param_list.append( + max(self.categorial_to_index[i].values()) + 1 + ) + else: + actual_param_list.append(param) + try: + return self._function(self.model_args, actual_param_list) + except FloatingPointError as e: + logger.error( + f"{e} when predicting {self._function_str}({param_list}), returning static value" + ) + return self.value + + class AnalyticFunction(ModelFunction): """ A multi-dimensional model function, generated from a string, which can be optimized using regression. diff --git a/lib/model.py b/lib/model.py index 34bb564..c97ecf0 100644 --- a/lib/model.py +++ b/lib/model.py @@ -271,6 +271,7 @@ class AnalyticModel: paramfit = ParamFit() tree_allowed = bool(int(os.getenv("DFATOOL_DTREE_ENABLED", "1"))) + use_fol = bool(int(os.getenv("DFATOOL_FIT_FOL", "0"))) tree_required = dict() for name in self.names: @@ -278,6 +279,8 @@ class AnalyticModel: for attr in self.attr_by_name[name].keys(): if self.attr_by_name[name][attr].function_override is not None: self.attr_by_name[name][attr].fit_override_function() + elif use_fol: + self.attr_by_name[name][attr].build_fol_model() elif self.attr_by_name[name][ attr ].all_relevant_parameters_are_none_or_numeric(): diff --git a/lib/parameters.py b/lib/parameters.py index 300bb6f..ed56bdd 100644 --- a/lib/parameters.py +++ b/lib/parameters.py @@ -10,6 +10,7 @@ import dfatool.functions as df from .paramfit import ParamFit from .utils import remove_index_from_tuple, is_numeric from .utils import filter_aggregate_by_param, partition_by_param +from .utils import param_to_ndarray logger = logging.getLogger(__name__) @@ -38,57 +39,6 @@ def distinct_param_values(param_tuples): return distinct_values -def param_to_ndarray( - param_tuples, with_nan=True, categorial_to_scalar=False, ignore_indexes=list() -): - has_nan = dict() - has_non_numeric = dict() - distinct_values = dict() - category_to_scalar = dict() - - for param_tuple in param_tuples: - for i, param in enumerate(param_tuple): - if not is_numeric(param): - if param is None: - has_nan[i] = True - else: - has_non_numeric[i] = True - if categorial_to_scalar and param is not None: - if not i in distinct_values: - distinct_values[i] = set() - distinct_values[i].add(param) - - for i, paramset in distinct_values.items(): - distinct_values[i] = sorted(paramset) - category_to_scalar[i] = dict() - for j, param in enumerate(distinct_values[i]): - category_to_scalar[i][param] = j - - ignore_index = dict() - for i in range(len(param_tuples[0])): - if has_non_numeric.get(i, False) and not categorial_to_scalar: - ignore_index[i] = True - elif not with_nan and has_nan.get(i, False): - ignore_index[i] = True - else: - ignore_index[i] = False - - for i in ignore_indexes: - ignore_index[i] = True - - ret_tuples = list() - for param_tuple in param_tuples: - ret_tuple = list() - for i, param in enumerate(param_tuple): - if not ignore_index[i]: - if i in category_to_scalar: - ret_tuple.append(category_to_scalar[i][param]) - else: - ret_tuple.append(param) - ret_tuples.append(ret_tuple) - return np.asarray(ret_tuples), category_to_scalar, ignore_index - - def _depends_on_param(corr_param, std_param, std_lut): # if self.use_corrcoef: if False: @@ -929,6 +879,14 @@ class ModelAttribute: return ret + def build_fol_model(self): + x = df.FOLFunction(self.median, self.param_names) + x.fit(self.param_values, self.data) + if x.fit_success: + self.model_function = x + else: + logger.warning(f"Fit of first-order linear model function failed.") + def fit_override_function(self): function_str = self.function_override x = df.AnalyticFunction( diff --git a/lib/utils.py b/lib/utils.py index 7d5b5b9..a8acb51 100644 --- a/lib/utils.py +++ b/lib/utils.py @@ -201,6 +201,57 @@ def partition_by_param(data, param_values, ignore_parameters=list()): return ret +def param_to_ndarray( + param_tuples, with_nan=True, categorial_to_scalar=False, ignore_indexes=list() +): + has_nan = dict() + has_non_numeric = dict() + distinct_values = dict() + category_to_scalar = dict() + + for param_tuple in param_tuples: + for i, param in enumerate(param_tuple): + if not is_numeric(param): + if param is None: + has_nan[i] = True + else: + has_non_numeric[i] = True + if categorial_to_scalar and param is not None: + if not i in distinct_values: + distinct_values[i] = set() + distinct_values[i].add(param) + + for i, paramset in distinct_values.items(): + distinct_values[i] = sorted(paramset) + category_to_scalar[i] = dict() + for j, param in enumerate(distinct_values[i]): + category_to_scalar[i][param] = j + + ignore_index = dict() + for i in range(len(param_tuples[0])): + if has_non_numeric.get(i, False) and not categorial_to_scalar: + ignore_index[i] = True + elif not with_nan and has_nan.get(i, False): + ignore_index[i] = True + else: + ignore_index[i] = False + + for i in ignore_indexes: + ignore_index[i] = True + + ret_tuples = list() + for param_tuple in param_tuples: + ret_tuple = list() + for i, param in enumerate(param_tuple): + if not ignore_index[i]: + if i in category_to_scalar: + ret_tuple.append(category_to_scalar[i][param]) + else: + ret_tuple.append(param) + ret_tuples.append(ret_tuple) + return np.asarray(ret_tuples), category_to_scalar, ignore_index + + def param_dict_to_list(param_dict, parameter_names, default=None): """ Convert {"foo": 1, "bar": 2}, ["bar", "foo", "quux"] to [2, 1, None] |