summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorDaniel Friesel <daniel.friesel@uos.de>2022-06-03 13:03:05 +0200
committerDaniel Friesel <daniel.friesel@uos.de>2022-06-03 13:03:05 +0200
commit5ba5bc244d5e9d967279fa9da4900d1063447da6 (patch)
treec890c50b9f9b446d6d71091f5bdd766f2ded0c7f /lib
parent509629740f782e11d22d7cf0c000bc0423782d7f (diff)
add first order linear functions for evaluation purposes
Diffstat (limited to 'lib')
-rw-r--r--lib/cli.py7
-rw-r--r--lib/functions.py85
-rw-r--r--lib/model.py3
-rw-r--r--lib/parameters.py60
-rw-r--r--lib/utils.py51
5 files changed, 153 insertions, 53 deletions
diff --git a/lib/cli.py b/lib/cli.py
index b6ccbc2..4c56e73 100644
--- a/lib/cli.py
+++ b/lib/cli.py
@@ -1,6 +1,11 @@
#!/usr/bin/env python3
-from dfatool.functions import SplitFunction, AnalyticFunction, StaticFunction
+from dfatool.functions import (
+ SplitFunction,
+ AnalyticFunction,
+ StaticFunction,
+ FOLFunction,
+)
def print_static(model, static_model, name, attribute):
diff --git a/lib/functions.py b/lib/functions.py
index 3c2b424..0b0044b 100644
--- a/lib/functions.py
+++ b/lib/functions.py
@@ -11,7 +11,7 @@ import numpy as np
import os
import re
from scipy import optimize
-from .utils import is_numeric
+from .utils import is_numeric, param_to_ndarray
logger = logging.getLogger(__name__)
@@ -600,6 +600,89 @@ class XGBoostFunction(SKLearnRegressionFunction):
return 1 + max(ret)
+# first-order linear function (no feature interaction)
+class FOLFunction(ModelFunction):
+ def __init__(self, value, parameters, num_args=0):
+ super().__init__(value)
+ self.parameter_names = parameters
+ self._num_args = num_args
+ self.fit_success = False
+
+ def fit(self, param_values, data):
+ categorial_to_scalar = bool(
+ int(os.getenv("DFATOOL_PARAM_CATEGORIAL_TO_SCALAR", "0"))
+ )
+ fit_parameters, categorial_to_index, ignore_index = param_to_ndarray(
+ param_values,
+ with_nan=False,
+ categorial_to_scalar=categorial_to_scalar,
+ )
+ self.categorial_to_index = categorial_to_index
+ self.ignore_index = ignore_index
+ fit_parameters = fit_parameters.swapaxes(0, 1)
+ num_vars = fit_parameters.shape[0]
+ funbuf = "lambda reg_param, model_param: 0"
+ for i in range(num_vars):
+ funbuf += f" + reg_param[{i}] * model_param[{i}]"
+ self._function_str = self.model_function = funbuf
+ self._function = eval(funbuf)
+
+ error_function = lambda P, X, y: self._function(P, X) - y
+ self.model_args = list(np.ones((num_vars)))
+ try:
+ res = optimize.least_squares(
+ error_function, self.model_args, args=(fit_parameters, data), xtol=2e-15
+ )
+ except ValueError as err:
+ logger.warning(f"Fit failed: {err} (function: {self.model_function})")
+ return
+ if res.status > 0:
+ self.model_args = res.x
+ self.fit_success = True
+ else:
+ logger.warning(
+ f"Fit failed: {res.message} (function: {self.model_function})"
+ )
+
+ def is_predictable(self, param_list=None):
+ """
+ Return whether the model function can be evaluated on the given parameter values.
+ """
+ return True
+
+ def eval(self, param_list=None):
+ """
+ Evaluate model function with specified param/arg values.
+
+ Far a Staticfunction, this is just the static value
+
+ """
+ if param_list is None:
+ return self.value
+ actual_param_list = list()
+ for i, param in enumerate(param_list):
+ if not self.ignore_index[i]:
+ if i in self.categorial_to_index:
+ try:
+ actual_param_list.append(self.categorial_to_index[i][param])
+ except KeyError:
+ # param was not part of training data. substitute an unused scalar.
+ # Note that all param values which were not part of training data map to the same scalar this way.
+ # This should be harmless.
+ actual_param_list.append(
+ max(self.categorial_to_index[i].values()) + 1
+ )
+ else:
+ actual_param_list.append(param)
+ try:
+ return self._function(self.model_args, actual_param_list)
+ except FloatingPointError as e:
+ logger.error(
+ f"{e} when predicting {self._function_str}({param_list}), returning static value"
+ )
+ return self.value
+
+
class AnalyticFunction(ModelFunction):
"""
A multi-dimensional model function, generated from a string, which can be optimized using regression.
diff --git a/lib/model.py b/lib/model.py
index 34bb564..c97ecf0 100644
--- a/lib/model.py
+++ b/lib/model.py
@@ -271,6 +271,7 @@ class AnalyticModel:
paramfit = ParamFit()
tree_allowed = bool(int(os.getenv("DFATOOL_DTREE_ENABLED", "1")))
+ use_fol = bool(int(os.getenv("DFATOOL_FIT_FOL", "0")))
tree_required = dict()
for name in self.names:
@@ -278,6 +279,8 @@ class AnalyticModel:
for attr in self.attr_by_name[name].keys():
if self.attr_by_name[name][attr].function_override is not None:
self.attr_by_name[name][attr].fit_override_function()
+ elif use_fol:
+ self.attr_by_name[name][attr].build_fol_model()
elif self.attr_by_name[name][
attr
].all_relevant_parameters_are_none_or_numeric():
diff --git a/lib/parameters.py b/lib/parameters.py
index 300bb6f..ed56bdd 100644
--- a/lib/parameters.py
+++ b/lib/parameters.py
@@ -10,6 +10,7 @@ import dfatool.functions as df
from .paramfit import ParamFit
from .utils import remove_index_from_tuple, is_numeric
from .utils import filter_aggregate_by_param, partition_by_param
+from .utils import param_to_ndarray
logger = logging.getLogger(__name__)
@@ -38,57 +39,6 @@ def distinct_param_values(param_tuples):
return distinct_values
-def param_to_ndarray(
- param_tuples, with_nan=True, categorial_to_scalar=False, ignore_indexes=list()
-):
- has_nan = dict()
- has_non_numeric = dict()
- distinct_values = dict()
- category_to_scalar = dict()
-
- for param_tuple in param_tuples:
- for i, param in enumerate(param_tuple):
- if not is_numeric(param):
- if param is None:
- has_nan[i] = True
- else:
- has_non_numeric[i] = True
- if categorial_to_scalar and param is not None:
- if not i in distinct_values:
- distinct_values[i] = set()
- distinct_values[i].add(param)
-
- for i, paramset in distinct_values.items():
- distinct_values[i] = sorted(paramset)
- category_to_scalar[i] = dict()
- for j, param in enumerate(distinct_values[i]):
- category_to_scalar[i][param] = j
-
- ignore_index = dict()
- for i in range(len(param_tuples[0])):
- if has_non_numeric.get(i, False) and not categorial_to_scalar:
- ignore_index[i] = True
- elif not with_nan and has_nan.get(i, False):
- ignore_index[i] = True
- else:
- ignore_index[i] = False
-
- for i in ignore_indexes:
- ignore_index[i] = True
-
- ret_tuples = list()
- for param_tuple in param_tuples:
- ret_tuple = list()
- for i, param in enumerate(param_tuple):
- if not ignore_index[i]:
- if i in category_to_scalar:
- ret_tuple.append(category_to_scalar[i][param])
- else:
- ret_tuple.append(param)
- ret_tuples.append(ret_tuple)
- return np.asarray(ret_tuples), category_to_scalar, ignore_index
-
-
def _depends_on_param(corr_param, std_param, std_lut):
# if self.use_corrcoef:
if False:
@@ -929,6 +879,14 @@ class ModelAttribute:
return ret
+ def build_fol_model(self):
+ x = df.FOLFunction(self.median, self.param_names)
+ x.fit(self.param_values, self.data)
+ if x.fit_success:
+ self.model_function = x
+ else:
+ logger.warning(f"Fit of first-order linear model function failed.")
+
def fit_override_function(self):
function_str = self.function_override
x = df.AnalyticFunction(
diff --git a/lib/utils.py b/lib/utils.py
index 7d5b5b9..a8acb51 100644
--- a/lib/utils.py
+++ b/lib/utils.py
@@ -201,6 +201,57 @@ def partition_by_param(data, param_values, ignore_parameters=list()):
return ret
+def param_to_ndarray(
+ param_tuples, with_nan=True, categorial_to_scalar=False, ignore_indexes=list()
+):
+ has_nan = dict()
+ has_non_numeric = dict()
+ distinct_values = dict()
+ category_to_scalar = dict()
+
+ for param_tuple in param_tuples:
+ for i, param in enumerate(param_tuple):
+ if not is_numeric(param):
+ if param is None:
+ has_nan[i] = True
+ else:
+ has_non_numeric[i] = True
+ if categorial_to_scalar and param is not None:
+ if not i in distinct_values:
+ distinct_values[i] = set()
+ distinct_values[i].add(param)
+
+ for i, paramset in distinct_values.items():
+ distinct_values[i] = sorted(paramset)
+ category_to_scalar[i] = dict()
+ for j, param in enumerate(distinct_values[i]):
+ category_to_scalar[i][param] = j
+
+ ignore_index = dict()
+ for i in range(len(param_tuples[0])):
+ if has_non_numeric.get(i, False) and not categorial_to_scalar:
+ ignore_index[i] = True
+ elif not with_nan and has_nan.get(i, False):
+ ignore_index[i] = True
+ else:
+ ignore_index[i] = False
+
+ for i in ignore_indexes:
+ ignore_index[i] = True
+
+ ret_tuples = list()
+ for param_tuple in param_tuples:
+ ret_tuple = list()
+ for i, param in enumerate(param_tuple):
+ if not ignore_index[i]:
+ if i in category_to_scalar:
+ ret_tuple.append(category_to_scalar[i][param])
+ else:
+ ret_tuple.append(param)
+ ret_tuples.append(ret_tuple)
+ return np.asarray(ret_tuples), category_to_scalar, ignore_index
+
+
def param_dict_to_list(param_dict, parameter_names, default=None):
"""
Convert {"foo": 1, "bar": 2}, ["bar", "foo", "quux"] to [2, 1, None]