diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/functions.py | 95 | ||||
-rw-r--r-- | lib/model.py | 5 | ||||
-rw-r--r-- | lib/parameters.py | 50 |
3 files changed, 38 insertions, 112 deletions
diff --git a/lib/functions.py b/lib/functions.py index 13897bb..07d3e2f 100644 --- a/lib/functions.py +++ b/lib/functions.py @@ -923,7 +923,7 @@ class LMTFunction(SKLearnRegressionFunction): except np.linalg.LinAlgError as e: logger.error(f"LMT generation failed: {e}") self.fit_success = False - return + return self logger.debug("Fitted LMT") self.regressor = lmt @@ -1175,107 +1175,29 @@ class XGBoostFunction(SKLearnRegressionFunction): } -class SymbolicRegressionFunction(ModelFunction): - def __init__(self, value, parameters, num_args=0, **kwargs): - super().__init__(value, **kwargs) - self.parameter_names = parameters - self._num_args = num_args - self.fit_success = False - +class SymbolicRegressionFunction(SKLearnRegressionFunction): def fit(self, param_values, data, ignore_param_indexes=None): - self.categorical_to_scalar = bool( - int(os.getenv("DFATOOL_PARAM_CATEGORICAL_TO_SCALAR", "0")) - ) - fit_parameters, categorical_to_index, ignore_index = param_to_ndarray( + fit_parameters, self.categorical_to_index, self.ignore_index = param_to_ndarray( param_values, with_nan=False, categorical_to_scalar=self.categorical_to_scalar, ignore_indexes=ignore_param_indexes, ) - self.categorical_to_index = categorical_to_index - self.ignore_index = ignore_index if fit_parameters.shape[1] == 0: logger.debug( f"Cannot use Symbolic Regression due to lack of parameters: parameter shape is {np.array(param_values).shape}, fit_parameter shape is {fit_parameters.shape}" ) - return + self.fit_success = False + return self from dfatool.gplearn.genetic import SymbolicRegressor self.regressor = SymbolicRegressor() self.regressor.fit(fit_parameters, data) + self._build_feature_names() self.fit_success = True - - # TODO inherit from SKLearnRegressionFunction, making this obsolete. - # requires SKLearnRegressionFunction to provide .fit and refactoring - # build_tree to move .fit into SKLearnRegressionFunction descendants. - def is_predictable(self, param_list=None): - """ - Return whether the model function can be evaluated on the given parameter values. - - For a StaticFunction, this is always the case (i.e., this function always returns true). - """ - return True - - # TODO inherit from SKLearnRegressionFunction, making this obsolete. - # requires SKLearnRegressionFunction to provide .fit and refactoring - # build_tree to move .fit into SKLearnRegressionFunction descendants. - def eval(self, param_list=None): - """ - Evaluate model function with specified param/arg values. - - Far a Staticfunction, this is just the static value - - """ - if param_list is None: - return self.value - actual_param_list = list() - for i, param in enumerate(param_list): - if not self.ignore_index[i]: - if i in self.categorical_to_index: - try: - actual_param_list.append(self.categorical_to_index[i][param]) - except KeyError: - # param was not part of training data. substitute an unused scalar. - # Note that all param values which were not part of training data map to the same scalar this way. - # This should be harmless. - actual_param_list.append( - max(self.categorical_to_index[i].values()) + 1 - ) - else: - actual_param_list.append(int(param)) - predictions = self.regressor.predict(np.array([actual_param_list])) - if predictions.shape == (1,): - return predictions[0] - return predictions - - # TODO inherit from SKLearnRegressionFunction, making this obsolete. - # requires SKLearnRegressionFunction to provide .fit and refactoring - # build_tree to move .fit into SKLearnRegressionFunction descendants. - def eval_arr(self, params): - actual_params = list() - for param_tuple in params: - actual_param_list = list() - for i, param in enumerate(param_tuple): - if not self.ignore_index[i]: - if i in self.categorical_to_index: - try: - actual_param_list.append( - self.categorical_to_index[i][param] - ) - except KeyError: - # param was not part of training data. substitute an unused scalar. - # Note that all param values which were not part of training data map to the same scalar this way. - # This should be harmless. - actual_param_list.append( - max(self.categorical_to_index[i].values()) + 1 - ) - else: - actual_param_list.append(int(param)) - actual_params.append(actual_param_list) - predictions = self.regressor.predict(np.array(actual_params)) - return predictions + return self # first-order linear function (no feature interaction) @@ -1356,7 +1278,7 @@ class FOLFunction(ModelFunction): ) except ValueError as err: logger.warning(f"Fit failed: {err} (function: {self.model_function})") - return + return self if res.status > 0: self.model_args = res.x self.fit_success = True @@ -1364,6 +1286,7 @@ class FOLFunction(ModelFunction): logger.warning( f"Fit failed: {res.message} (function: {self.model_function})" ) + return self def is_predictable(self, param_list=None): """ diff --git a/lib/model.py b/lib/model.py index 26c4646..b92a50c 100644 --- a/lib/model.py +++ b/lib/model.py @@ -308,6 +308,8 @@ class AnalyticModel: self.attr_by_name[name][attr].build_fol() elif model_type == "lmt": self.attr_by_name[name][attr].build_lmt() + elif model_type == "symreg": + self.attr_by_name[name][attr].build_symreg() elif model_type == "xgb": self.attr_by_name[name][attr].build_xgb() else: @@ -332,7 +334,6 @@ class AnalyticModel: else: paramfit = ParamFit() tree_allowed = bool(int(os.getenv("DFATOOL_RMT_ENABLED", "1"))) - use_symreg = bool(int(os.getenv("DFATOOL_FIT_SYMREG", "0"))) tree_required = dict() for name in self.names: @@ -340,8 +341,6 @@ class AnalyticModel: for attr in self.attr_by_name[name].keys(): if self.attr_by_name[name][attr].function_override is not None: self.attr_by_name[name][attr].fit_override_function() - elif use_symreg: - self.attr_by_name[name][attr].build_symreg_model() elif self.attr_by_name[name][ attr ].all_relevant_parameters_are_none_or_numeric(): diff --git a/lib/parameters.py b/lib/parameters.py index 1ae4e4c..bafc2a5 100644 --- a/lib/parameters.py +++ b/lib/parameters.py @@ -830,29 +830,6 @@ class ModelAttribute: return False return True - def build_symreg_model(self): - ignore_irrelevant = bool( - int(os.getenv("DFATOOL_RMT_IGNORE_IRRELEVANT_PARAMS", "0")) - ) - ignore_param_indexes = list() - if ignore_irrelevant: - for param_index, param in enumerate(self.param_names): - if not self.stats.depends_on_param(param): - ignore_param_indexes.append(param_index) - x = df.SymbolicRegressionFunction( - self.median, - self.param_names, - n_samples=self.data.shape[0], - num_args=self.arg_count, - ) - x.fit(self.param_values, self.data, ignore_param_indexes=ignore_param_indexes) - if x.fit_success: - self.model_function = x - else: - logger.debug( - f"Symbolic Regression model generation for {self.name} {self.attr} failed." - ) - def fit_override_function(self): function_str = self.function_override x = df.AnalyticFunction( @@ -986,6 +963,33 @@ class ModelAttribute: ) return False + def build_symreg(self): + ignore_irrelevant = bool( + int(os.getenv("DFATOOL_RMT_IGNORE_IRRELEVANT_PARAMS", "0")) + ) + ignore_param_indexes = list() + if ignore_irrelevant: + for param_index, param in enumerate(self.param_names): + if not self.stats.depends_on_param(param): + ignore_param_indexes.append(param_index) + x = df.SymbolicRegressionFunction( + np.mean(self.data), + n_samples=self.data.shape[0], + param_names=self.param_names, + arg_count=self.arg_count, + ).fit(self.param_values, self.data, ignore_param_indexes=ignore_param_indexes) + if x.fit_success: + self.model_function = x + return True + else: + logger.debug( + f"Symbolic Regression model generation for {self.name} {self.attr} failed." + ) + self.model_function = df.StaticFunction( + np.mean(self.data), n_samples=len(self.data) + ) + return False + def build_xgb(self): mf = df.XGBoostFunction( np.mean(self.data), |