summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorBirte Kristina Friesel <birte.friesel@uos.de>2024-02-21 13:52:18 +0100
committerBirte Kristina Friesel <birte.friesel@uos.de>2024-02-21 13:52:18 +0100
commite7ee25bfc59495e076265577638b8f58733f8912 (patch)
treedd556df68ad78dc2971d5a015adbba13f4cd36b3 /lib
parent761df90cebc0e0fb443c1333f7d97347cd5ebd98 (diff)
make SymbolicRegressionFunction inherit from SKLearnRegressionFunction
Diffstat (limited to 'lib')
-rw-r--r--lib/functions.py95
-rw-r--r--lib/model.py5
-rw-r--r--lib/parameters.py50
3 files changed, 38 insertions, 112 deletions
diff --git a/lib/functions.py b/lib/functions.py
index 13897bb..07d3e2f 100644
--- a/lib/functions.py
+++ b/lib/functions.py
@@ -923,7 +923,7 @@ class LMTFunction(SKLearnRegressionFunction):
except np.linalg.LinAlgError as e:
logger.error(f"LMT generation failed: {e}")
self.fit_success = False
- return
+ return self
logger.debug("Fitted LMT")
self.regressor = lmt
@@ -1175,107 +1175,29 @@ class XGBoostFunction(SKLearnRegressionFunction):
}
-class SymbolicRegressionFunction(ModelFunction):
- def __init__(self, value, parameters, num_args=0, **kwargs):
- super().__init__(value, **kwargs)
- self.parameter_names = parameters
- self._num_args = num_args
- self.fit_success = False
-
+class SymbolicRegressionFunction(SKLearnRegressionFunction):
def fit(self, param_values, data, ignore_param_indexes=None):
- self.categorical_to_scalar = bool(
- int(os.getenv("DFATOOL_PARAM_CATEGORICAL_TO_SCALAR", "0"))
- )
- fit_parameters, categorical_to_index, ignore_index = param_to_ndarray(
+ fit_parameters, self.categorical_to_index, self.ignore_index = param_to_ndarray(
param_values,
with_nan=False,
categorical_to_scalar=self.categorical_to_scalar,
ignore_indexes=ignore_param_indexes,
)
- self.categorical_to_index = categorical_to_index
- self.ignore_index = ignore_index
if fit_parameters.shape[1] == 0:
logger.debug(
f"Cannot use Symbolic Regression due to lack of parameters: parameter shape is {np.array(param_values).shape}, fit_parameter shape is {fit_parameters.shape}"
)
- return
+ self.fit_success = False
+ return self
from dfatool.gplearn.genetic import SymbolicRegressor
self.regressor = SymbolicRegressor()
self.regressor.fit(fit_parameters, data)
+ self._build_feature_names()
self.fit_success = True
-
- # TODO inherit from SKLearnRegressionFunction, making this obsolete.
- # requires SKLearnRegressionFunction to provide .fit and refactoring
- # build_tree to move .fit into SKLearnRegressionFunction descendants.
- def is_predictable(self, param_list=None):
- """
- Return whether the model function can be evaluated on the given parameter values.
-
- For a StaticFunction, this is always the case (i.e., this function always returns true).
- """
- return True
-
- # TODO inherit from SKLearnRegressionFunction, making this obsolete.
- # requires SKLearnRegressionFunction to provide .fit and refactoring
- # build_tree to move .fit into SKLearnRegressionFunction descendants.
- def eval(self, param_list=None):
- """
- Evaluate model function with specified param/arg values.
-
- Far a Staticfunction, this is just the static value
-
- """
- if param_list is None:
- return self.value
- actual_param_list = list()
- for i, param in enumerate(param_list):
- if not self.ignore_index[i]:
- if i in self.categorical_to_index:
- try:
- actual_param_list.append(self.categorical_to_index[i][param])
- except KeyError:
- # param was not part of training data. substitute an unused scalar.
- # Note that all param values which were not part of training data map to the same scalar this way.
- # This should be harmless.
- actual_param_list.append(
- max(self.categorical_to_index[i].values()) + 1
- )
- else:
- actual_param_list.append(int(param))
- predictions = self.regressor.predict(np.array([actual_param_list]))
- if predictions.shape == (1,):
- return predictions[0]
- return predictions
-
- # TODO inherit from SKLearnRegressionFunction, making this obsolete.
- # requires SKLearnRegressionFunction to provide .fit and refactoring
- # build_tree to move .fit into SKLearnRegressionFunction descendants.
- def eval_arr(self, params):
- actual_params = list()
- for param_tuple in params:
- actual_param_list = list()
- for i, param in enumerate(param_tuple):
- if not self.ignore_index[i]:
- if i in self.categorical_to_index:
- try:
- actual_param_list.append(
- self.categorical_to_index[i][param]
- )
- except KeyError:
- # param was not part of training data. substitute an unused scalar.
- # Note that all param values which were not part of training data map to the same scalar this way.
- # This should be harmless.
- actual_param_list.append(
- max(self.categorical_to_index[i].values()) + 1
- )
- else:
- actual_param_list.append(int(param))
- actual_params.append(actual_param_list)
- predictions = self.regressor.predict(np.array(actual_params))
- return predictions
+ return self
# first-order linear function (no feature interaction)
@@ -1356,7 +1278,7 @@ class FOLFunction(ModelFunction):
)
except ValueError as err:
logger.warning(f"Fit failed: {err} (function: {self.model_function})")
- return
+ return self
if res.status > 0:
self.model_args = res.x
self.fit_success = True
@@ -1364,6 +1286,7 @@ class FOLFunction(ModelFunction):
logger.warning(
f"Fit failed: {res.message} (function: {self.model_function})"
)
+ return self
def is_predictable(self, param_list=None):
"""
diff --git a/lib/model.py b/lib/model.py
index 26c4646..b92a50c 100644
--- a/lib/model.py
+++ b/lib/model.py
@@ -308,6 +308,8 @@ class AnalyticModel:
self.attr_by_name[name][attr].build_fol()
elif model_type == "lmt":
self.attr_by_name[name][attr].build_lmt()
+ elif model_type == "symreg":
+ self.attr_by_name[name][attr].build_symreg()
elif model_type == "xgb":
self.attr_by_name[name][attr].build_xgb()
else:
@@ -332,7 +334,6 @@ class AnalyticModel:
else:
paramfit = ParamFit()
tree_allowed = bool(int(os.getenv("DFATOOL_RMT_ENABLED", "1")))
- use_symreg = bool(int(os.getenv("DFATOOL_FIT_SYMREG", "0")))
tree_required = dict()
for name in self.names:
@@ -340,8 +341,6 @@ class AnalyticModel:
for attr in self.attr_by_name[name].keys():
if self.attr_by_name[name][attr].function_override is not None:
self.attr_by_name[name][attr].fit_override_function()
- elif use_symreg:
- self.attr_by_name[name][attr].build_symreg_model()
elif self.attr_by_name[name][
attr
].all_relevant_parameters_are_none_or_numeric():
diff --git a/lib/parameters.py b/lib/parameters.py
index 1ae4e4c..bafc2a5 100644
--- a/lib/parameters.py
+++ b/lib/parameters.py
@@ -830,29 +830,6 @@ class ModelAttribute:
return False
return True
- def build_symreg_model(self):
- ignore_irrelevant = bool(
- int(os.getenv("DFATOOL_RMT_IGNORE_IRRELEVANT_PARAMS", "0"))
- )
- ignore_param_indexes = list()
- if ignore_irrelevant:
- for param_index, param in enumerate(self.param_names):
- if not self.stats.depends_on_param(param):
- ignore_param_indexes.append(param_index)
- x = df.SymbolicRegressionFunction(
- self.median,
- self.param_names,
- n_samples=self.data.shape[0],
- num_args=self.arg_count,
- )
- x.fit(self.param_values, self.data, ignore_param_indexes=ignore_param_indexes)
- if x.fit_success:
- self.model_function = x
- else:
- logger.debug(
- f"Symbolic Regression model generation for {self.name} {self.attr} failed."
- )
-
def fit_override_function(self):
function_str = self.function_override
x = df.AnalyticFunction(
@@ -986,6 +963,33 @@ class ModelAttribute:
)
return False
+ def build_symreg(self):
+ ignore_irrelevant = bool(
+ int(os.getenv("DFATOOL_RMT_IGNORE_IRRELEVANT_PARAMS", "0"))
+ )
+ ignore_param_indexes = list()
+ if ignore_irrelevant:
+ for param_index, param in enumerate(self.param_names):
+ if not self.stats.depends_on_param(param):
+ ignore_param_indexes.append(param_index)
+ x = df.SymbolicRegressionFunction(
+ np.mean(self.data),
+ n_samples=self.data.shape[0],
+ param_names=self.param_names,
+ arg_count=self.arg_count,
+ ).fit(self.param_values, self.data, ignore_param_indexes=ignore_param_indexes)
+ if x.fit_success:
+ self.model_function = x
+ return True
+ else:
+ logger.debug(
+ f"Symbolic Regression model generation for {self.name} {self.attr} failed."
+ )
+ self.model_function = df.StaticFunction(
+ np.mean(self.data), n_samples=len(self.data)
+ )
+ return False
+
def build_xgb(self):
mf = df.XGBoostFunction(
np.mean(self.data),