summaryrefslogtreecommitdiff
path: root/lib/parameters.py
diff options
context:
space:
mode:
authorBirte Kristina Friesel <birte.friesel@uos.de>2024-02-12 10:51:42 +0100
committerBirte Kristina Friesel <birte.friesel@uos.de>2024-02-12 10:51:42 +0100
commit486690f31dfe8da33fbd0711137844424d0321eb (patch)
tree5d392998eec345c0583b2fc796a995d655b5cb44 /lib/parameters.py
parent152e8c6c99d1791d0dcd78c25d2a20a43f55247d (diff)
unfuck param_names / feature_names handling
Diffstat (limited to 'lib/parameters.py')
-rw-r--r--lib/parameters.py112
1 files changed, 29 insertions, 83 deletions
diff --git a/lib/parameters.py b/lib/parameters.py
index ae4fffb..2e3878f 100644
--- a/lib/parameters.py
+++ b/lib/parameters.py
@@ -609,65 +609,11 @@ class ModelAttribute:
return f"ModelAttribute<{self.name}, {self.attr}, mean={mean}>"
def to_json(self, **kwargs):
- if type(self.model_function) in (
- df.CARTFunction,
- df.LMTFunction,
- df.XGBoostFunction,
- ):
- import sklearn.tree
-
- feature_names = list(
- map(
- lambda i: self.param_names[i],
- filter(
- lambda i: not self.model_function.ignore_index[i],
- range(len(self.param_names)),
- ),
- )
- )
- feature_names += list(
- map(
- lambda i: f"arg{i-len(self.param_names)}",
- filter(
- lambda i: not self.model_function.ignore_index[i],
- range(
- len(self.param_names),
- len(self.param_names) + self.arg_count,
- ),
- ),
- )
- )
- kwargs["feature_names"] = feature_names
- ret = {
+ return {
"paramNames": self.param_names,
"argCount": self.arg_count,
"modelFunction": self.model_function.to_json(**kwargs),
}
- if type(self.model_function) in (
- df.CARTFunction,
- df.FOLFunction,
- df.XGBoostFunction,
- ):
- feature_names = self.param_names
- feature_names += list(
- map(
- lambda i: f"arg{i-len(self.param_names)}",
- filter(
- lambda i: not self.model_function.ignore_index[i],
- range(
- len(self.param_names),
- len(self.param_names) + self.arg_count,
- ),
- ),
- )
- )
- ret["paramValueToIndex"] = dict(
- map(
- lambda kv: (feature_names[kv[0]], kv[1]),
- self.model_function.categorial_to_index.items(),
- )
- )
- return ret
def to_dref(self, unit=None):
ret = {"mean": (self.mean, unit), "median": (self.median, unit)}
@@ -710,47 +656,27 @@ class ModelAttribute:
self.model_function.to_dot(pydot, graph, self.param_names)
return graph
- feature_names = list(
- map(
- lambda i: self.param_names[i],
- filter(
- lambda i: not self.model_function.ignore_index[i],
- range(len(self.param_names)),
- ),
- )
- )
- feature_names += list(
- map(
- lambda i: f"arg{i-len(self.param_names)}",
- filter(
- lambda i: not self.model_function.ignore_index[i],
- range(
- len(self.param_names),
- len(self.param_names) + self.arg_count,
- ),
- ),
- )
- )
-
if type(self.model_function) == df.CARTFunction:
import sklearn.tree
return sklearn.tree.export_graphviz(
self.model_function.regressor,
out_file=None,
- feature_names=feature_names,
+ feature_names=self.model_function.feature_names,
)
if type(self.model_function) == df.XGBoostFunction:
import xgboost
- self.model_function.regressor.get_booster().feature_names = feature_names
+ self.model_function.regressor.get_booster().feature_names = (
+ self.model_function.feature_names
+ )
return [
xgboost.to_graphviz(self.model_function.regressor, num_trees=i)
for i in range(self.model_function.regressor.n_estimators)
]
if type(self.model_function) == df.LMTFunction:
return self.model_function.regressor.model_to_dot(
- feature_names=feature_names
+ feature_names=self.model_function.feature_names
)
return None
@@ -921,7 +847,13 @@ class ModelAttribute:
for param_index, _ in enumerate(self.param_names):
if len(self.stats.distinct_values_by_param_index[param_index]) < 2:
ignore_param_indexes.append(param_index)
- x = df.FOLFunction(self.median, self.param_names, n_samples=self.data.shape[0])
+ x = df.FOLFunction(
+ self.median,
+ self.param_names,
+ n_samples=self.data.shape[0],
+ param_names=self.param_names,
+ arg_count=self.arg_count,
+ )
x.fit(self.param_values, self.data, ignore_param_indexes=ignore_param_indexes)
if x.fit_success:
self.model_function = x
@@ -1063,6 +995,7 @@ class ModelAttribute:
ignore_index,
n_samples=len(data),
param_names=self.param_names,
+ arg_count=self.arg_count,
)
logger.debug("Fitted sklearn CART")
return
@@ -1150,7 +1083,13 @@ class ModelAttribute:
return
xgb.fit(fit_parameters, np.reshape(data, (-1, 1)))
self.model_function = df.XGBoostFunction(
- np.mean(data), xgb, category_to_index, ignore_index, n_samples=len(data)
+ np.mean(data),
+ xgb,
+ category_to_index,
+ ignore_index,
+ n_samples=len(data),
+ param_names=self.param_names,
+ arg_count=self.arg_count,
)
output_filename = os.getenv("DFATOOL_XGB_DUMP_MODEL", None)
if output_filename:
@@ -1247,6 +1186,7 @@ class ModelAttribute:
ignore_index,
n_samples=len(data),
param_names=self.param_names,
+ arg_count=self.arg_count,
)
return
@@ -1510,4 +1450,10 @@ class ModelAttribute:
assert len(child.values()) >= 2
- return df.SplitFunction(np.mean(data), symbol_index, child, n_samples=len(data))
+ return df.SplitFunction(
+ np.mean(data),
+ symbol_index,
+ self.log_param_names[symbol_index],
+ child,
+ n_samples=len(data),
+ )