summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorBirte Kristina Friesel <birte.friesel@uos.de>2024-02-19 10:32:39 +0100
committerBirte Kristina Friesel <birte.friesel@uos.de>2024-02-19 10:32:39 +0100
commitcdf3f2ffe49d9836be74e60a83365d26f378fc88 (patch)
treef67b8e1f35ebdd4edf67402f6bf0bfe0eda6be32 /lib
parent01ddab4bc7cff2c06b67e6327c848baa8141ed5c (diff)
categorial → categorical
Diffstat (limited to 'lib')
-rw-r--r--lib/cli.py4
-rw-r--r--lib/functions.py40
-rw-r--r--lib/parameters.py14
-rw-r--r--lib/utils.py12
4 files changed, 36 insertions, 34 deletions
diff --git a/lib/cli.py b/lib/cli.py
index 3da6fce..f289d4a 100644
--- a/lib/cli.py
+++ b/lib/cli.py
@@ -576,7 +576,7 @@ def add_standard_arguments(parser):
)
parser.add_argument(
"--param-shift",
- metavar="<key>=<+|-|*|/><value>|none-to-0|categorial;...",
+ metavar="<key>=<+|-|*|/><value>|none-to-0|categorical;...",
type=str,
help="Adjust parameter values before passing them to model generation",
)
@@ -695,7 +695,7 @@ def parse_shift_function(param_name, param_shift):
elif param_shift.startswith("/"):
param_shift_value = float(param_shift[1:])
return lambda p: p / param_shift_value
- elif param_shift == "categorial":
+ elif param_shift == "categorical":
return lambda p: "=" + str(p)
elif param_shift == "none-to-0":
return lambda p: p or 0
diff --git a/lib/functions.py b/lib/functions.py
index 6366f0a..4940956 100644
--- a/lib/functions.py
+++ b/lib/functions.py
@@ -590,7 +590,7 @@ class SKLearnRegressionFunction(ModelFunction):
always_predictable = True
has_eval_arr = True
- def __init__(self, value, regressor, categorial_to_index, ignore_index, **kwargs):
+ def __init__(self, value, regressor, categorical_to_index, ignore_index, **kwargs):
# Needed for JSON export
self.param_names = kwargs.pop("param_names")
self.arg_count = kwargs.pop("arg_count")
@@ -601,7 +601,7 @@ class SKLearnRegressionFunction(ModelFunction):
super().__init__(value, **kwargs)
self.regressor = regressor
- self.categorial_to_index = categorial_to_index
+ self.categorical_to_index = categorical_to_index
self.ignore_index = ignore_index
# SKLearnRegressionFunction descendants use self.param_names \ self.ignore_index as features.
@@ -649,15 +649,15 @@ class SKLearnRegressionFunction(ModelFunction):
actual_param_list = list()
for i, param in enumerate(param_list):
if not self.ignore_index[i]:
- if i in self.categorial_to_index:
+ if i in self.categorical_to_index:
try:
- actual_param_list.append(self.categorial_to_index[i][param])
+ actual_param_list.append(self.categorical_to_index[i][param])
except KeyError:
# param was not part of training data. substitute an unused scalar.
# Note that all param values which were not part of training data map to the same scalar this way.
# This should be harmless.
actual_param_list.append(
- max(self.categorial_to_index[i].values()) + 1
+ max(self.categorical_to_index[i].values()) + 1
)
else:
actual_param_list.append(int(param))
@@ -672,15 +672,17 @@ class SKLearnRegressionFunction(ModelFunction):
actual_param_list = list()
for i, param in enumerate(param_tuple):
if not self.ignore_index[i]:
- if i in self.categorial_to_index:
+ if i in self.categorical_to_index:
try:
- actual_param_list.append(self.categorial_to_index[i][param])
+ actual_param_list.append(
+ self.categorical_to_index[i][param]
+ )
except KeyError:
# param was not part of training data. substitute an unused scalar.
# Note that all param values which were not part of training data map to the same scalar this way.
# This should be harmless.
actual_param_list.append(
- max(self.categorial_to_index[i].values()) + 1
+ max(self.categorical_to_index[i].values()) + 1
)
else:
actual_param_list.append(int(param))
@@ -691,7 +693,7 @@ class SKLearnRegressionFunction(ModelFunction):
def to_json(self, **kwargs):
ret = super().to_json(**kwargs)
- # Note: categorial_to_index uses param_names, not feature_names
+ # Note: categorical_to_index uses param_names, not feature_names
param_names = self.param_names + list(
map(
lambda i: f"arg{i-len(self.param_names)}",
@@ -704,7 +706,7 @@ class SKLearnRegressionFunction(ModelFunction):
ret["paramValueToIndex"] = dict(
map(
lambda kv: (param_names[kv[0]], kv[1]),
- self.categorial_to_index.items(),
+ self.categorical_to_index.items(),
)
)
@@ -958,17 +960,17 @@ class FOLFunction(ModelFunction):
self.fit_success = False
def fit(self, param_values, data, ignore_param_indexes=None):
- self.categorial_to_scalar = bool(
- int(os.getenv("DFATOOL_PARAM_CATEGORIAL_TO_SCALAR", "0"))
+ self.categorical_to_scalar = bool(
+ int(os.getenv("DFATOOL_PARAM_CATEGORICAL_TO_SCALAR", "0"))
)
second_order = int(os.getenv("DFATOOL_FOL_SECOND_ORDER", "0"))
- fit_parameters, categorial_to_index, ignore_index = param_to_ndarray(
+ fit_parameters, categorical_to_index, ignore_index = param_to_ndarray(
param_values,
with_nan=False,
- categorial_to_scalar=self.categorial_to_scalar,
+ categorical_to_scalar=self.categorical_to_scalar,
ignore_indexes=ignore_param_indexes,
)
- self.categorial_to_index = categorial_to_index
+ self.categorical_to_index = categorical_to_index
self.ignore_index = ignore_index
fit_parameters = fit_parameters.swapaxes(0, 1)
@@ -1052,15 +1054,15 @@ class FOLFunction(ModelFunction):
actual_param_list = list()
for i, param in enumerate(param_list):
if not self.ignore_index[i]:
- if i in self.categorial_to_index:
+ if i in self.categorical_to_index:
try:
- actual_param_list.append(self.categorial_to_index[i][param])
+ actual_param_list.append(self.categorical_to_index[i][param])
except KeyError:
# param was not part of training data. substitute an unused scalar.
# Note that all param values which were not part of training data map to the same scalar this way.
# This should be harmless.
actual_param_list.append(
- max(self.categorial_to_index[i].values()) + 1
+ max(self.categorical_to_index[i].values()) + 1
)
else:
actual_param_list.append(int(param))
@@ -1105,7 +1107,7 @@ class FOLFunction(ModelFunction):
def hyper_to_dref(self):
return {
- "fol/categorial to scalar": int(self.categorial_to_scalar),
+ "fol/categorical to scalar": int(self.categorical_to_scalar),
}
diff --git a/lib/parameters.py b/lib/parameters.py
index 2e3878f..f367eb9 100644
--- a/lib/parameters.py
+++ b/lib/parameters.py
@@ -918,7 +918,7 @@ class ModelAttribute:
:param data: Measurements. [data 1, data 2, data 3, ...]
:param with_function_leaves: Use fitted function sets to generate function leaves for scalar parameters
:param with_nonbinary_nodes: Allow non-binary nodes for enum and scalar parameters (i.e., nodes with more than two children)
- :param with_sklearn_cart: Use `sklearn.tree.DecisionTreeRegressor` CART implementation for tree generation. Does not support categorial (enum)
+ :param with_sklearn_cart: Use `sklearn.tree.DecisionTreeRegressor` CART implementation for tree generation. Does not support categorical (enum)
and sparse parameters. Both are ignored during fitting. All other options are ignored as well.
:param with_sklearn_decart: Use `sklearn.tree.DecisionTreeRegressor` CART implementation in DECART mode for tree generation. CART limitations
apply; additionaly, scalar parameters are ignored during fitting.
@@ -928,8 +928,8 @@ class ModelAttribute:
:returns: SplitFunction or StaticFunction
"""
- categorial_to_scalar = bool(
- int(os.getenv("DFATOOL_PARAM_CATEGORIAL_TO_SCALAR", "0"))
+ categorical_to_scalar = bool(
+ int(os.getenv("DFATOOL_PARAM_CATEGORICAL_TO_SCALAR", "0"))
)
if with_function_leaves is None:
with_function_leaves = bool(
@@ -969,13 +969,13 @@ class ModelAttribute:
fit_parameters, category_to_index, ignore_index = param_to_ndarray(
parameters,
with_nan=False,
- categorial_to_scalar=categorial_to_scalar,
+ categorical_to_scalar=categorical_to_scalar,
)
elif with_sklearn_decart:
fit_parameters, category_to_index, ignore_index = param_to_ndarray(
parameters,
with_nan=False,
- categorial_to_scalar=categorial_to_scalar,
+ categorical_to_scalar=categorical_to_scalar,
ignore_indexes=self.scalar_param_indexes,
)
if fit_parameters.shape[1] == 0:
@@ -1071,7 +1071,7 @@ class ModelAttribute:
reg_lambda=reg_lambda,
)
fit_parameters, category_to_index, ignore_index = param_to_ndarray(
- parameters, with_nan=False, categorial_to_scalar=categorial_to_scalar
+ parameters, with_nan=False, categorical_to_scalar=categorical_to_scalar
)
if fit_parameters.shape[1] == 0:
logger.warning(
@@ -1159,7 +1159,7 @@ class ModelAttribute:
criterion=criterion,
)
fit_parameters, category_to_index, ignore_index = param_to_ndarray(
- parameters, with_nan=False, categorial_to_scalar=categorial_to_scalar
+ parameters, with_nan=False, categorical_to_scalar=categorical_to_scalar
)
if fit_parameters.shape[1] == 0:
logger.warning(
diff --git a/lib/utils.py b/lib/utils.py
index d6cdfc5..c16e419 100644
--- a/lib/utils.py
+++ b/lib/utils.py
@@ -289,7 +289,7 @@ def partition_by_param(data, param_values, ignore_parameters=list()):
def param_to_ndarray(
- param_tuples, with_nan=True, categorial_to_scalar=False, ignore_indexes=list()
+ param_tuples, with_nan=True, categorical_to_scalar=False, ignore_indexes=list()
):
has_nan = dict()
has_non_numeric = dict()
@@ -297,7 +297,7 @@ def param_to_ndarray(
category_to_scalar = dict()
logger.debug(
- f"converting param_to_ndarray(with_nan={with_nan}, categorial_to_scalar={categorial_to_scalar}, ignore_indexes={ignore_indexes})"
+ f"converting param_to_ndarray(with_nan={with_nan}, categorical_to_scalar={categorical_to_scalar}, ignore_indexes={ignore_indexes})"
)
for param_tuple in param_tuples:
@@ -307,7 +307,7 @@ def param_to_ndarray(
has_nan[i] = True
else:
has_non_numeric[i] = True
- if categorial_to_scalar and param is not None:
+ if categorical_to_scalar and param is not None:
if not i in distinct_values:
distinct_values[i] = set()
distinct_values[i].add(param)
@@ -320,7 +320,7 @@ def param_to_ndarray(
ignore_index = dict()
for i in range(len(param_tuples[0])):
- if has_non_numeric.get(i, False) and not categorial_to_scalar:
+ if has_non_numeric.get(i, False) and not categorical_to_scalar:
ignore_index[i] = True
elif not with_nan and has_nan.get(i, False):
ignore_index[i] = True
@@ -337,7 +337,7 @@ def param_to_ndarray(
if not ignore_index[i]:
if i in category_to_scalar and not is_numeric(param):
ret_tuple.append(category_to_scalar[i][param])
- elif categorial_to_scalar:
+ elif categorical_to_scalar:
ret_tuple.append(soft_cast_int(param))
else:
ret_tuple.append(param)
@@ -357,7 +357,7 @@ def param_dict_to_list(param_dict, parameter_names, default=None):
def observations_enum_to_bool(observations: list, kconfig=False):
"""
- Convert enum / categorial observations to boolean-only ones.
+ Convert enum / categorical observations to boolean-only ones.
'observations' is altered in-place.
DEPRECATED.