diff options
author | Daniel Friesel <daniel.friesel@uos.de> | 2022-09-22 10:18:42 +0200 |
---|---|---|
committer | Daniel Friesel <daniel.friesel@uos.de> | 2022-09-22 10:18:42 +0200 |
commit | 0a20c63666d23d1e125c3fb54dfb854bd272d3c7 (patch) | |
tree | d1f55a17bc49942dc8036004e3a9836451fb9084 | |
parent | bb0344e587b7093c807aae4a614316ad6bcefe97 (diff) |
CART, XGB: Improve eval speed by predicting everything at once
-rw-r--r-- | lib/functions.py | 38 | ||||
-rw-r--r-- | lib/model.py | 70 |
2 files changed, 84 insertions, 24 deletions
diff --git a/lib/functions.py b/lib/functions.py index 9a99c58..9e5639a 100644 --- a/lib/functions.py +++ b/lib/functions.py @@ -160,6 +160,8 @@ class NormalizationFunction: class ModelFunction: + always_predictable = False + has_eval_arr = False """ Encapsulates the behaviour of a single model attribute, e.g. TX power or write duration. @@ -192,6 +194,9 @@ class ModelFunction: def eval(self, param_list): raise NotImplementedError + def eval_arr(self, params): + raise NotImplementedError + def eval_mae(self, param_list): """Return model Mean Absolute Error (MAE) for `param_list`.""" if self.is_predictable(param_list): @@ -254,6 +259,9 @@ class ModelFunction: class StaticFunction(ModelFunction): + always_predictable = True + has_eval_arr = True + def is_predictable(self, param_list=None): """ Return whether the model function can be evaluated on the given parameter values. @@ -271,6 +279,9 @@ class StaticFunction(ModelFunction): """ return self.value + def eval_arr(self, params): + return [self.value for p in params] + def to_json(self, **kwargs): ret = super().to_json(**kwargs) ret.update({"type": "static", "value": self.value}) @@ -447,6 +458,9 @@ class SubstateFunction(ModelFunction): class SKLearnRegressionFunction(ModelFunction): + always_predictable = True + has_eval_arr = True + def __init__(self, value, regressor, categorial_to_index, ignore_index): super().__init__(value) self.regressor = regressor @@ -490,6 +504,28 @@ class SKLearnRegressionFunction(ModelFunction): return predictions[0] return predictions + def eval_arr(self, params): + actual_params = list() + for param_tuple in params: + actual_param_list = list() + for i, param in enumerate(param_tuple): + if not self.ignore_index[i]: + if i in self.categorial_to_index: + try: + actual_param_list.append(self.categorial_to_index[i][param]) + except KeyError: + # param was not part of training data. substitute an unused scalar. + # Note that all param values which were not part of training data map to the same scalar this way. + # This should be harmless. + actual_param_list.append( + max(self.categorial_to_index[i].values()) + 1 + ) + else: + actual_param_list.append(int(param)) + actual_params.append(actual_param_list) + predictions = self.regressor.predict(np.array(actual_params)) + return predictions + class CARTFunction(SKLearnRegressionFunction): def get_number_of_nodes(self): @@ -602,6 +638,8 @@ class XGBoostFunction(SKLearnRegressionFunction): # first-order linear function (no feature interaction) class FOLFunction(ModelFunction): + always_predictable = True + def __init__(self, value, parameters, num_args=0): super().__init__(value) self.parameter_names = parameters diff --git a/lib/model.py b/lib/model.py index e77db01..427b5ec 100644 --- a/lib/model.py +++ b/lib/model.py @@ -237,6 +237,8 @@ class AnalyticModel: model[name][k] = v.get_static(use_mean=use_mean) def static_model_getter(name, key, **kwargs): + if "params" in kwargs: + return [model[name][key] for p in kwargs["params"]] return model[name][key] return static_model_getter @@ -266,18 +268,27 @@ class AnalyticModel: for param, model_value in v.by_param.items(): lut_model[name][k][param] = v.get_lut(param, use_mean=use_mean) - def lut_median_getter(name, key, param, arg=list(), **kwargs): - if arg: - if type(param) is tuple: - param = list(param) - param.extend(map(soft_cast_int, arg)) - param = tuple(param) - try: - return lut_model[name][key][param] - except KeyError: - if fallback: - return static_model[name][key] - raise + def lut_median_getter(name, key, **kwargs): + if "param" in kwargs: + param = tuple(kwargs["param"]) + try: + return lut_model[name][key][param] + except KeyError: + if fallback: + return static_model[name][key] + raise + params = kwargs["params"] + if fallback: + return list( + map( + lambda p: lut_model[name][key][tuple(p)] + if tuple(p) in lut_model[name][key] + else static_model[name][key], + params, + ) + ) + else: + return list(map(lambda p: lut_model[name][key][tuple(p)], params)) return lut_median_getter @@ -351,14 +362,32 @@ class AnalyticModel: # shortcut if type(model_info) is StaticFunction: + if "params" in kwargs: + return [static_model[name][key] for p in kwargs["params"]] return static_model[name][key] - if "arg" in kwargs and "param" in kwargs: - kwargs["param"].extend(map(soft_cast_int, kwargs["arg"])) - - if model_function.is_predictable(kwargs["param"]): + if "param" in kwargs and model_function.is_predictable(kwargs["param"]): return model_function.eval(kwargs["param"]) + if "params" in kwargs: + if model_function.has_eval_arr and ( + model_function.always_predictable + or all( + map( + lambda p: model_function.is_predictable(p), kwargs["params"] + ) + ) + ): + return model_function.eval_arr(kwargs["params"]) + return list( + map( + lambda p: model_function.eval(p) + if model_function.is_predictable(p) + else static_model[name][key], + kwargs["params"], + ) + ) + return static_model[name][key] def info_getter(name, key): @@ -395,14 +424,7 @@ class AnalyticModel: } for attribute in elem["attributes"]: predicted_data = np.array( - list( - map( - lambda i: model_function( - name, attribute, param=elem["param"][i] - ), - range(len(elem[attribute])), - ) - ) + model_function(name, attribute, params=elem["param"]) ) measures = regression_measures(predicted_data, elem[attribute]) detailed_results[name][attribute] = measures |