summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Friesel <daniel.friesel@uos.de>2022-09-22 10:18:42 +0200
committerDaniel Friesel <daniel.friesel@uos.de>2022-09-22 10:18:42 +0200
commit0a20c63666d23d1e125c3fb54dfb854bd272d3c7 (patch)
treed1f55a17bc49942dc8036004e3a9836451fb9084
parentbb0344e587b7093c807aae4a614316ad6bcefe97 (diff)
CART, XGB: Improve eval speed by predicting everything at once
-rw-r--r--lib/functions.py38
-rw-r--r--lib/model.py70
2 files changed, 84 insertions, 24 deletions
diff --git a/lib/functions.py b/lib/functions.py
index 9a99c58..9e5639a 100644
--- a/lib/functions.py
+++ b/lib/functions.py
@@ -160,6 +160,8 @@ class NormalizationFunction:
class ModelFunction:
+ always_predictable = False
+ has_eval_arr = False
"""
Encapsulates the behaviour of a single model attribute, e.g. TX power or write duration.
@@ -192,6 +194,9 @@ class ModelFunction:
def eval(self, param_list):
raise NotImplementedError
+ def eval_arr(self, params):
+ raise NotImplementedError
+
def eval_mae(self, param_list):
"""Return model Mean Absolute Error (MAE) for `param_list`."""
if self.is_predictable(param_list):
@@ -254,6 +259,9 @@ class ModelFunction:
class StaticFunction(ModelFunction):
+ always_predictable = True
+ has_eval_arr = True
+
def is_predictable(self, param_list=None):
"""
Return whether the model function can be evaluated on the given parameter values.
@@ -271,6 +279,9 @@ class StaticFunction(ModelFunction):
"""
return self.value
+ def eval_arr(self, params):
+ return [self.value for p in params]
+
def to_json(self, **kwargs):
ret = super().to_json(**kwargs)
ret.update({"type": "static", "value": self.value})
@@ -447,6 +458,9 @@ class SubstateFunction(ModelFunction):
class SKLearnRegressionFunction(ModelFunction):
+ always_predictable = True
+ has_eval_arr = True
+
def __init__(self, value, regressor, categorial_to_index, ignore_index):
super().__init__(value)
self.regressor = regressor
@@ -490,6 +504,28 @@ class SKLearnRegressionFunction(ModelFunction):
return predictions[0]
return predictions
+ def eval_arr(self, params):
+ actual_params = list()
+ for param_tuple in params:
+ actual_param_list = list()
+ for i, param in enumerate(param_tuple):
+ if not self.ignore_index[i]:
+ if i in self.categorial_to_index:
+ try:
+ actual_param_list.append(self.categorial_to_index[i][param])
+ except KeyError:
+ # param was not part of training data. substitute an unused scalar.
+ # Note that all param values which were not part of training data map to the same scalar this way.
+ # This should be harmless.
+ actual_param_list.append(
+ max(self.categorial_to_index[i].values()) + 1
+ )
+ else:
+ actual_param_list.append(int(param))
+ actual_params.append(actual_param_list)
+ predictions = self.regressor.predict(np.array(actual_params))
+ return predictions
+
class CARTFunction(SKLearnRegressionFunction):
def get_number_of_nodes(self):
@@ -602,6 +638,8 @@ class XGBoostFunction(SKLearnRegressionFunction):
# first-order linear function (no feature interaction)
class FOLFunction(ModelFunction):
+ always_predictable = True
+
def __init__(self, value, parameters, num_args=0):
super().__init__(value)
self.parameter_names = parameters
diff --git a/lib/model.py b/lib/model.py
index e77db01..427b5ec 100644
--- a/lib/model.py
+++ b/lib/model.py
@@ -237,6 +237,8 @@ class AnalyticModel:
model[name][k] = v.get_static(use_mean=use_mean)
def static_model_getter(name, key, **kwargs):
+ if "params" in kwargs:
+ return [model[name][key] for p in kwargs["params"]]
return model[name][key]
return static_model_getter
@@ -266,18 +268,27 @@ class AnalyticModel:
for param, model_value in v.by_param.items():
lut_model[name][k][param] = v.get_lut(param, use_mean=use_mean)
- def lut_median_getter(name, key, param, arg=list(), **kwargs):
- if arg:
- if type(param) is tuple:
- param = list(param)
- param.extend(map(soft_cast_int, arg))
- param = tuple(param)
- try:
- return lut_model[name][key][param]
- except KeyError:
- if fallback:
- return static_model[name][key]
- raise
+ def lut_median_getter(name, key, **kwargs):
+ if "param" in kwargs:
+ param = tuple(kwargs["param"])
+ try:
+ return lut_model[name][key][param]
+ except KeyError:
+ if fallback:
+ return static_model[name][key]
+ raise
+ params = kwargs["params"]
+ if fallback:
+ return list(
+ map(
+ lambda p: lut_model[name][key][tuple(p)]
+ if tuple(p) in lut_model[name][key]
+ else static_model[name][key],
+ params,
+ )
+ )
+ else:
+ return list(map(lambda p: lut_model[name][key][tuple(p)], params))
return lut_median_getter
@@ -351,14 +362,32 @@ class AnalyticModel:
# shortcut
if type(model_info) is StaticFunction:
+ if "params" in kwargs:
+ return [static_model[name][key] for p in kwargs["params"]]
return static_model[name][key]
- if "arg" in kwargs and "param" in kwargs:
- kwargs["param"].extend(map(soft_cast_int, kwargs["arg"]))
-
- if model_function.is_predictable(kwargs["param"]):
+ if "param" in kwargs and model_function.is_predictable(kwargs["param"]):
return model_function.eval(kwargs["param"])
+ if "params" in kwargs:
+ if model_function.has_eval_arr and (
+ model_function.always_predictable
+ or all(
+ map(
+ lambda p: model_function.is_predictable(p), kwargs["params"]
+ )
+ )
+ ):
+ return model_function.eval_arr(kwargs["params"])
+ return list(
+ map(
+ lambda p: model_function.eval(p)
+ if model_function.is_predictable(p)
+ else static_model[name][key],
+ kwargs["params"],
+ )
+ )
+
return static_model[name][key]
def info_getter(name, key):
@@ -395,14 +424,7 @@ class AnalyticModel:
}
for attribute in elem["attributes"]:
predicted_data = np.array(
- list(
- map(
- lambda i: model_function(
- name, attribute, param=elem["param"][i]
- ),
- range(len(elem[attribute])),
- )
- )
+ model_function(name, attribute, params=elem["param"])
)
measures = regression_measures(predicted_data, elem[attribute])
detailed_results[name][attribute] = measures