diff options
author | Birte Kristina Friesel <birte.friesel@uos.de> | 2024-02-19 10:32:39 +0100 |
---|---|---|
committer | Birte Kristina Friesel <birte.friesel@uos.de> | 2024-02-19 10:32:39 +0100 |
commit | cdf3f2ffe49d9836be74e60a83365d26f378fc88 (patch) | |
tree | f67b8e1f35ebdd4edf67402f6bf0bfe0eda6be32 /lib | |
parent | 01ddab4bc7cff2c06b67e6327c848baa8141ed5c (diff) |
categorial → categorical
Diffstat (limited to 'lib')
-rw-r--r-- | lib/cli.py | 4 | ||||
-rw-r--r-- | lib/functions.py | 40 | ||||
-rw-r--r-- | lib/parameters.py | 14 | ||||
-rw-r--r-- | lib/utils.py | 12 |
4 files changed, 36 insertions, 34 deletions
@@ -576,7 +576,7 @@ def add_standard_arguments(parser): ) parser.add_argument( "--param-shift", - metavar="<key>=<+|-|*|/><value>|none-to-0|categorial;...", + metavar="<key>=<+|-|*|/><value>|none-to-0|categorical;...", type=str, help="Adjust parameter values before passing them to model generation", ) @@ -695,7 +695,7 @@ def parse_shift_function(param_name, param_shift): elif param_shift.startswith("/"): param_shift_value = float(param_shift[1:]) return lambda p: p / param_shift_value - elif param_shift == "categorial": + elif param_shift == "categorical": return lambda p: "=" + str(p) elif param_shift == "none-to-0": return lambda p: p or 0 diff --git a/lib/functions.py b/lib/functions.py index 6366f0a..4940956 100644 --- a/lib/functions.py +++ b/lib/functions.py @@ -590,7 +590,7 @@ class SKLearnRegressionFunction(ModelFunction): always_predictable = True has_eval_arr = True - def __init__(self, value, regressor, categorial_to_index, ignore_index, **kwargs): + def __init__(self, value, regressor, categorical_to_index, ignore_index, **kwargs): # Needed for JSON export self.param_names = kwargs.pop("param_names") self.arg_count = kwargs.pop("arg_count") @@ -601,7 +601,7 @@ class SKLearnRegressionFunction(ModelFunction): super().__init__(value, **kwargs) self.regressor = regressor - self.categorial_to_index = categorial_to_index + self.categorical_to_index = categorical_to_index self.ignore_index = ignore_index # SKLearnRegressionFunction descendants use self.param_names \ self.ignore_index as features. @@ -649,15 +649,15 @@ class SKLearnRegressionFunction(ModelFunction): actual_param_list = list() for i, param in enumerate(param_list): if not self.ignore_index[i]: - if i in self.categorial_to_index: + if i in self.categorical_to_index: try: - actual_param_list.append(self.categorial_to_index[i][param]) + actual_param_list.append(self.categorical_to_index[i][param]) except KeyError: # param was not part of training data. substitute an unused scalar. # Note that all param values which were not part of training data map to the same scalar this way. # This should be harmless. actual_param_list.append( - max(self.categorial_to_index[i].values()) + 1 + max(self.categorical_to_index[i].values()) + 1 ) else: actual_param_list.append(int(param)) @@ -672,15 +672,17 @@ class SKLearnRegressionFunction(ModelFunction): actual_param_list = list() for i, param in enumerate(param_tuple): if not self.ignore_index[i]: - if i in self.categorial_to_index: + if i in self.categorical_to_index: try: - actual_param_list.append(self.categorial_to_index[i][param]) + actual_param_list.append( + self.categorical_to_index[i][param] + ) except KeyError: # param was not part of training data. substitute an unused scalar. # Note that all param values which were not part of training data map to the same scalar this way. # This should be harmless. actual_param_list.append( - max(self.categorial_to_index[i].values()) + 1 + max(self.categorical_to_index[i].values()) + 1 ) else: actual_param_list.append(int(param)) @@ -691,7 +693,7 @@ class SKLearnRegressionFunction(ModelFunction): def to_json(self, **kwargs): ret = super().to_json(**kwargs) - # Note: categorial_to_index uses param_names, not feature_names + # Note: categorical_to_index uses param_names, not feature_names param_names = self.param_names + list( map( lambda i: f"arg{i-len(self.param_names)}", @@ -704,7 +706,7 @@ class SKLearnRegressionFunction(ModelFunction): ret["paramValueToIndex"] = dict( map( lambda kv: (param_names[kv[0]], kv[1]), - self.categorial_to_index.items(), + self.categorical_to_index.items(), ) ) @@ -958,17 +960,17 @@ class FOLFunction(ModelFunction): self.fit_success = False def fit(self, param_values, data, ignore_param_indexes=None): - self.categorial_to_scalar = bool( - int(os.getenv("DFATOOL_PARAM_CATEGORIAL_TO_SCALAR", "0")) + self.categorical_to_scalar = bool( + int(os.getenv("DFATOOL_PARAM_CATEGORICAL_TO_SCALAR", "0")) ) second_order = int(os.getenv("DFATOOL_FOL_SECOND_ORDER", "0")) - fit_parameters, categorial_to_index, ignore_index = param_to_ndarray( + fit_parameters, categorical_to_index, ignore_index = param_to_ndarray( param_values, with_nan=False, - categorial_to_scalar=self.categorial_to_scalar, + categorical_to_scalar=self.categorical_to_scalar, ignore_indexes=ignore_param_indexes, ) - self.categorial_to_index = categorial_to_index + self.categorical_to_index = categorical_to_index self.ignore_index = ignore_index fit_parameters = fit_parameters.swapaxes(0, 1) @@ -1052,15 +1054,15 @@ class FOLFunction(ModelFunction): actual_param_list = list() for i, param in enumerate(param_list): if not self.ignore_index[i]: - if i in self.categorial_to_index: + if i in self.categorical_to_index: try: - actual_param_list.append(self.categorial_to_index[i][param]) + actual_param_list.append(self.categorical_to_index[i][param]) except KeyError: # param was not part of training data. substitute an unused scalar. # Note that all param values which were not part of training data map to the same scalar this way. # This should be harmless. actual_param_list.append( - max(self.categorial_to_index[i].values()) + 1 + max(self.categorical_to_index[i].values()) + 1 ) else: actual_param_list.append(int(param)) @@ -1105,7 +1107,7 @@ class FOLFunction(ModelFunction): def hyper_to_dref(self): return { - "fol/categorial to scalar": int(self.categorial_to_scalar), + "fol/categorical to scalar": int(self.categorical_to_scalar), } diff --git a/lib/parameters.py b/lib/parameters.py index 2e3878f..f367eb9 100644 --- a/lib/parameters.py +++ b/lib/parameters.py @@ -918,7 +918,7 @@ class ModelAttribute: :param data: Measurements. [data 1, data 2, data 3, ...] :param with_function_leaves: Use fitted function sets to generate function leaves for scalar parameters :param with_nonbinary_nodes: Allow non-binary nodes for enum and scalar parameters (i.e., nodes with more than two children) - :param with_sklearn_cart: Use `sklearn.tree.DecisionTreeRegressor` CART implementation for tree generation. Does not support categorial (enum) + :param with_sklearn_cart: Use `sklearn.tree.DecisionTreeRegressor` CART implementation for tree generation. Does not support categorical (enum) and sparse parameters. Both are ignored during fitting. All other options are ignored as well. :param with_sklearn_decart: Use `sklearn.tree.DecisionTreeRegressor` CART implementation in DECART mode for tree generation. CART limitations apply; additionaly, scalar parameters are ignored during fitting. @@ -928,8 +928,8 @@ class ModelAttribute: :returns: SplitFunction or StaticFunction """ - categorial_to_scalar = bool( - int(os.getenv("DFATOOL_PARAM_CATEGORIAL_TO_SCALAR", "0")) + categorical_to_scalar = bool( + int(os.getenv("DFATOOL_PARAM_CATEGORICAL_TO_SCALAR", "0")) ) if with_function_leaves is None: with_function_leaves = bool( @@ -969,13 +969,13 @@ class ModelAttribute: fit_parameters, category_to_index, ignore_index = param_to_ndarray( parameters, with_nan=False, - categorial_to_scalar=categorial_to_scalar, + categorical_to_scalar=categorical_to_scalar, ) elif with_sklearn_decart: fit_parameters, category_to_index, ignore_index = param_to_ndarray( parameters, with_nan=False, - categorial_to_scalar=categorial_to_scalar, + categorical_to_scalar=categorical_to_scalar, ignore_indexes=self.scalar_param_indexes, ) if fit_parameters.shape[1] == 0: @@ -1071,7 +1071,7 @@ class ModelAttribute: reg_lambda=reg_lambda, ) fit_parameters, category_to_index, ignore_index = param_to_ndarray( - parameters, with_nan=False, categorial_to_scalar=categorial_to_scalar + parameters, with_nan=False, categorical_to_scalar=categorical_to_scalar ) if fit_parameters.shape[1] == 0: logger.warning( @@ -1159,7 +1159,7 @@ class ModelAttribute: criterion=criterion, ) fit_parameters, category_to_index, ignore_index = param_to_ndarray( - parameters, with_nan=False, categorial_to_scalar=categorial_to_scalar + parameters, with_nan=False, categorical_to_scalar=categorical_to_scalar ) if fit_parameters.shape[1] == 0: logger.warning( diff --git a/lib/utils.py b/lib/utils.py index d6cdfc5..c16e419 100644 --- a/lib/utils.py +++ b/lib/utils.py @@ -289,7 +289,7 @@ def partition_by_param(data, param_values, ignore_parameters=list()): def param_to_ndarray( - param_tuples, with_nan=True, categorial_to_scalar=False, ignore_indexes=list() + param_tuples, with_nan=True, categorical_to_scalar=False, ignore_indexes=list() ): has_nan = dict() has_non_numeric = dict() @@ -297,7 +297,7 @@ def param_to_ndarray( category_to_scalar = dict() logger.debug( - f"converting param_to_ndarray(with_nan={with_nan}, categorial_to_scalar={categorial_to_scalar}, ignore_indexes={ignore_indexes})" + f"converting param_to_ndarray(with_nan={with_nan}, categorical_to_scalar={categorical_to_scalar}, ignore_indexes={ignore_indexes})" ) for param_tuple in param_tuples: @@ -307,7 +307,7 @@ def param_to_ndarray( has_nan[i] = True else: has_non_numeric[i] = True - if categorial_to_scalar and param is not None: + if categorical_to_scalar and param is not None: if not i in distinct_values: distinct_values[i] = set() distinct_values[i].add(param) @@ -320,7 +320,7 @@ def param_to_ndarray( ignore_index = dict() for i in range(len(param_tuples[0])): - if has_non_numeric.get(i, False) and not categorial_to_scalar: + if has_non_numeric.get(i, False) and not categorical_to_scalar: ignore_index[i] = True elif not with_nan and has_nan.get(i, False): ignore_index[i] = True @@ -337,7 +337,7 @@ def param_to_ndarray( if not ignore_index[i]: if i in category_to_scalar and not is_numeric(param): ret_tuple.append(category_to_scalar[i][param]) - elif categorial_to_scalar: + elif categorical_to_scalar: ret_tuple.append(soft_cast_int(param)) else: ret_tuple.append(param) @@ -357,7 +357,7 @@ def param_dict_to_list(param_dict, parameter_names, default=None): def observations_enum_to_bool(observations: list, kconfig=False): """ - Convert enum / categorial observations to boolean-only ones. + Convert enum / categorical observations to boolean-only ones. 'observations' is altered in-place. DEPRECATED. |