diff options
author | Daniel Friesel <daniel.friesel@uos.de> | 2022-01-06 11:58:09 +0100 |
---|---|---|
committer | Daniel Friesel <daniel.friesel@uos.de> | 2022-01-06 11:58:09 +0100 |
commit | 01860ccf2addcb1dd84418887f76b88c4acdf53a (patch) | |
tree | 36fd3ba51210caab7315891c24d019eeb069378d | |
parent | 09a1140ee677f633c28fb887692b417874402356 (diff) |
sklearn (CART, XGBoost): support mapping of categorial parameter values to scalar indexes
-rw-r--r-- | lib/functions.py | 8 | ||||
-rw-r--r-- | lib/parameters.py | 38 |
2 files changed, 36 insertions, 10 deletions
diff --git a/lib/functions.py b/lib/functions.py index f4e1709..4a6dac2 100644 --- a/lib/functions.py +++ b/lib/functions.py @@ -417,9 +417,10 @@ class SubstateFunction(ModelFunction): class SKLearnRegressionFunction(ModelFunction): - def __init__(self, value, regressor, ignore_index): + def __init__(self, value, regressor, categorial_to_index, ignore_index): super().__init__(value) self.regressor = regressor + self.categorial_to_index = categorial_to_index self.ignore_index = ignore_index def is_predictable(self, param_list=None): @@ -442,7 +443,10 @@ class SKLearnRegressionFunction(ModelFunction): actual_param_list = list() for i, param in enumerate(param_list): if not self.ignore_index[i]: - actual_param_list.append(param) + if i in self.categorial_to_index: + actual_param_list.append(self.categorial_to_index[i][param]) + else: + actual_param_list.append(param) return self.regressor.predict(np.array([actual_param_list])) diff --git a/lib/parameters.py b/lib/parameters.py index ca28cbb..b66c7b4 100644 --- a/lib/parameters.py +++ b/lib/parameters.py @@ -38,9 +38,11 @@ def distinct_param_values(param_tuples): return distinct_values -def param_to_ndarray(param_tuples, with_nan=True): +def param_to_ndarray(param_tuples, with_nan=True, categorial_to_scalar=False): has_nan = dict() has_non_numeric = dict() + distinct_values = dict() + category_to_scalar = dict() for param_tuple in param_tuples: for i, param in enumerate(param_tuple): @@ -49,24 +51,40 @@ def param_to_ndarray(param_tuples, with_nan=True): has_nan[i] = True else: has_non_numeric[i] = True + if categorial_to_scalar and param is not None: + if not i in distinct_values: + distinct_values[i] = set() + distinct_values[i].add(param) + + for i, paramset in distinct_values.items(): + distinct_values[i] = sorted(paramset) + category_to_scalar[i] = dict() + for j, param in enumerate(distinct_values[i]): + category_to_scalar[i][param] = j ignore_index = dict() for i in range(len(param_tuples[0])): - if has_non_numeric.get(i, False): + if has_non_numeric.get(i, False) and not categorial_to_scalar: ignore_index[i] = True elif not with_nan and has_nan.get(i, False): ignore_index[i] = True else: ignore_index[i] = False + print(category_to_scalar, ignore_index) + ret_tuples = list() for param_tuple in param_tuples: ret_tuple = list() for i, param in enumerate(param_tuple): if not ignore_index[i]: - ret_tuple.append(param) + if i in category_to_scalar: + ret_tuple.append(category_to_scalar[i][param]) + else: + ret_tuple.append(param) + print(ret_tuple) ret_tuples.append(ret_tuple) - return np.asarray(ret_tuples), ignore_index + return np.asarray(ret_tuples), category_to_scalar, ignore_index def _depends_on_param(corr_param, std_param, std_lut): @@ -902,10 +920,12 @@ class ModelAttribute: if max_depth == 0: max_depth = None cart = DecisionTreeRegressor(max_depth=max_depth) - fit_parameters, ignore_index = param_to_ndarray(parameters, with_nan=False) + fit_parameters, category_to_index, ignore_index = param_to_ndarray( + parameters, with_nan=False, categorial_to_scalar=True + ) cart.fit(fit_parameters, data) self.model_function = df.SKLearnRegressionFunction( - np.mean(data), cart, ignore_index + np.mean(data), cart, category_to_index, ignore_index ) return @@ -921,10 +941,12 @@ class ModelAttribute: gamma=0.01, alpha=0.0006, ) - fit_parameters, ignore_index = param_to_ndarray(parameters, with_nan=False) + fit_parameters, category_to_index, ignore_index = param_to_ndarray( + parameters, with_nan=False, categorial_to_scalar=True + ) xgb.fit(fit_parameters, data) self.model_function = df.SKLearnRegressionFunction( - np.mean(data), xgb, ignore_index + np.mean(data), xgb, category_to_index, ignore_index ) return |