diff options
author | Daniel Friesel <daniel.friesel@uos.de> | 2022-06-03 13:03:05 +0200 |
---|---|---|
committer | Daniel Friesel <daniel.friesel@uos.de> | 2022-06-03 13:03:05 +0200 |
commit | 5ba5bc244d5e9d967279fa9da4900d1063447da6 (patch) | |
tree | c890c50b9f9b446d6d71091f5bdd766f2ded0c7f /lib | |
parent | 509629740f782e11d22d7cf0c000bc0423782d7f (diff) |
add first order linear functions for evaluation purposes
Diffstat (limited to 'lib')
-rw-r--r-- | lib/cli.py | 7 | ||||
-rw-r--r-- | lib/functions.py | 85 | ||||
-rw-r--r-- | lib/model.py | 3 | ||||
-rw-r--r-- | lib/parameters.py | 60 | ||||
-rw-r--r-- | lib/utils.py | 51 |
5 files changed, 153 insertions, 53 deletions
@@ -1,6 +1,11 @@ #!/usr/bin/env python3 -from dfatool.functions import SplitFunction, AnalyticFunction, StaticFunction +from dfatool.functions import ( + SplitFunction, + AnalyticFunction, + StaticFunction, + FOLFunction, +) def print_static(model, static_model, name, attribute): diff --git a/lib/functions.py b/lib/functions.py index 3c2b424..0b0044b 100644 --- a/lib/functions.py +++ b/lib/functions.py @@ -11,7 +11,7 @@ import numpy as np import os import re from scipy import optimize -from .utils import is_numeric +from .utils import is_numeric, param_to_ndarray logger = logging.getLogger(__name__) @@ -600,6 +600,89 @@ class XGBoostFunction(SKLearnRegressionFunction): return 1 + max(ret) +# first-order linear function (no feature interaction) +class FOLFunction(ModelFunction): + def __init__(self, value, parameters, num_args=0): + super().__init__(value) + self.parameter_names = parameters + self._num_args = num_args + self.fit_success = False + + def fit(self, param_values, data): + categorial_to_scalar = bool( + int(os.getenv("DFATOOL_PARAM_CATEGORIAL_TO_SCALAR", "0")) + ) + fit_parameters, categorial_to_index, ignore_index = param_to_ndarray( + param_values, + with_nan=False, + categorial_to_scalar=categorial_to_scalar, + ) + self.categorial_to_index = categorial_to_index + self.ignore_index = ignore_index + fit_parameters = fit_parameters.swapaxes(0, 1) + num_vars = fit_parameters.shape[0] + funbuf = "lambda reg_param, model_param: 0" + for i in range(num_vars): + funbuf += f" + reg_param[{i}] * model_param[{i}]" + self._function_str = self.model_function = funbuf + self._function = eval(funbuf) + + error_function = lambda P, X, y: self._function(P, X) - y + self.model_args = list(np.ones((num_vars))) + try: + res = optimize.least_squares( + error_function, self.model_args, args=(fit_parameters, data), xtol=2e-15 + ) + except ValueError as err: + logger.warning(f"Fit failed: {err} (function: {self.model_function})") + return + if res.status > 0: + self.model_args = res.x + self.fit_success = True + else: + logger.warning( + f"Fit failed: {res.message} (function: {self.model_function})" + ) + + def is_predictable(self, param_list=None): + """ + Return whether the model function can be evaluated on the given parameter values. + """ + return True + + def eval(self, param_list=None): + """ + Evaluate model function with specified param/arg values. + + Far a Staticfunction, this is just the static value + + """ + if param_list is None: + return self.value + actual_param_list = list() + for i, param in enumerate(param_list): + if not self.ignore_index[i]: + if i in self.categorial_to_index: + try: + actual_param_list.append(self.categorial_to_index[i][param]) + except KeyError: + # param was not part of training data. substitute an unused scalar. + # Note that all param values which were not part of training data map to the same scalar this way. + # This should be harmless. + actual_param_list.append( + max(self.categorial_to_index[i].values()) + 1 + ) + else: + actual_param_list.append(param) + try: + return self._function(self.model_args, actual_param_list) + except FloatingPointError as e: + logger.error( + f"{e} when predicting {self._function_str}({param_list}), returning static value" + ) + return self.value + + class AnalyticFunction(ModelFunction): """ A multi-dimensional model function, generated from a string, which can be optimized using regression. diff --git a/lib/model.py b/lib/model.py index 34bb564..c97ecf0 100644 --- a/lib/model.py +++ b/lib/model.py @@ -271,6 +271,7 @@ class AnalyticModel: paramfit = ParamFit() tree_allowed = bool(int(os.getenv("DFATOOL_DTREE_ENABLED", "1"))) + use_fol = bool(int(os.getenv("DFATOOL_FIT_FOL", "0"))) tree_required = dict() for name in self.names: @@ -278,6 +279,8 @@ class AnalyticModel: for attr in self.attr_by_name[name].keys(): if self.attr_by_name[name][attr].function_override is not None: self.attr_by_name[name][attr].fit_override_function() + elif use_fol: + self.attr_by_name[name][attr].build_fol_model() elif self.attr_by_name[name][ attr ].all_relevant_parameters_are_none_or_numeric(): diff --git a/lib/parameters.py b/lib/parameters.py index 300bb6f..ed56bdd 100644 --- a/lib/parameters.py +++ b/lib/parameters.py @@ -10,6 +10,7 @@ import dfatool.functions as df from .paramfit import ParamFit from .utils import remove_index_from_tuple, is_numeric from .utils import filter_aggregate_by_param, partition_by_param +from .utils import param_to_ndarray logger = logging.getLogger(__name__) @@ -38,57 +39,6 @@ def distinct_param_values(param_tuples): return distinct_values -def param_to_ndarray( - param_tuples, with_nan=True, categorial_to_scalar=False, ignore_indexes=list() -): - has_nan = dict() - has_non_numeric = dict() - distinct_values = dict() - category_to_scalar = dict() - - for param_tuple in param_tuples: - for i, param in enumerate(param_tuple): - if not is_numeric(param): - if param is None: - has_nan[i] = True - else: - has_non_numeric[i] = True - if categorial_to_scalar and param is not None: - if not i in distinct_values: - distinct_values[i] = set() - distinct_values[i].add(param) - - for i, paramset in distinct_values.items(): - distinct_values[i] = sorted(paramset) - category_to_scalar[i] = dict() - for j, param in enumerate(distinct_values[i]): - category_to_scalar[i][param] = j - - ignore_index = dict() - for i in range(len(param_tuples[0])): - if has_non_numeric.get(i, False) and not categorial_to_scalar: - ignore_index[i] = True - elif not with_nan and has_nan.get(i, False): - ignore_index[i] = True - else: - ignore_index[i] = False - - for i in ignore_indexes: - ignore_index[i] = True - - ret_tuples = list() - for param_tuple in param_tuples: - ret_tuple = list() - for i, param in enumerate(param_tuple): - if not ignore_index[i]: - if i in category_to_scalar: - ret_tuple.append(category_to_scalar[i][param]) - else: - ret_tuple.append(param) - ret_tuples.append(ret_tuple) - return np.asarray(ret_tuples), category_to_scalar, ignore_index - - def _depends_on_param(corr_param, std_param, std_lut): # if self.use_corrcoef: if False: @@ -929,6 +879,14 @@ class ModelAttribute: return ret + def build_fol_model(self): + x = df.FOLFunction(self.median, self.param_names) + x.fit(self.param_values, self.data) + if x.fit_success: + self.model_function = x + else: + logger.warning(f"Fit of first-order linear model function failed.") + def fit_override_function(self): function_str = self.function_override x = df.AnalyticFunction( diff --git a/lib/utils.py b/lib/utils.py index 7d5b5b9..a8acb51 100644 --- a/lib/utils.py +++ b/lib/utils.py @@ -201,6 +201,57 @@ def partition_by_param(data, param_values, ignore_parameters=list()): return ret +def param_to_ndarray( + param_tuples, with_nan=True, categorial_to_scalar=False, ignore_indexes=list() +): + has_nan = dict() + has_non_numeric = dict() + distinct_values = dict() + category_to_scalar = dict() + + for param_tuple in param_tuples: + for i, param in enumerate(param_tuple): + if not is_numeric(param): + if param is None: + has_nan[i] = True + else: + has_non_numeric[i] = True + if categorial_to_scalar and param is not None: + if not i in distinct_values: + distinct_values[i] = set() + distinct_values[i].add(param) + + for i, paramset in distinct_values.items(): + distinct_values[i] = sorted(paramset) + category_to_scalar[i] = dict() + for j, param in enumerate(distinct_values[i]): + category_to_scalar[i][param] = j + + ignore_index = dict() + for i in range(len(param_tuples[0])): + if has_non_numeric.get(i, False) and not categorial_to_scalar: + ignore_index[i] = True + elif not with_nan and has_nan.get(i, False): + ignore_index[i] = True + else: + ignore_index[i] = False + + for i in ignore_indexes: + ignore_index[i] = True + + ret_tuples = list() + for param_tuple in param_tuples: + ret_tuple = list() + for i, param in enumerate(param_tuple): + if not ignore_index[i]: + if i in category_to_scalar: + ret_tuple.append(category_to_scalar[i][param]) + else: + ret_tuple.append(param) + ret_tuples.append(ret_tuple) + return np.asarray(ret_tuples), category_to_scalar, ignore_index + + def param_dict_to_list(param_dict, parameter_names, default=None): """ Convert {"foo": 1, "bar": 2}, ["bar", "foo", "quux"] to [2, 1, None] |