summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README.md1
-rw-r--r--lib/parameters.py8
2 files changed, 7 insertions, 2 deletions
diff --git a/README.md b/README.md
index 58d235b..9d5a4b1 100644
--- a/README.md
+++ b/README.md
@@ -40,3 +40,4 @@ The following variables may be set to alter the behaviour of dfatool components.
| `DFATOOL_REGRESSION_SAFE_FUNCTIONS` | **0**, 1 | Use safe functions only (e.g. 1/x returnning 1 for x==0) |
| `DFATOOL_DTREE_NONBINARY_NODES` | 0, **1** | Enable non-binary nodes (i.e., nodes with more than two children corresponding to enum variables) in decision trees |
| `DFATOOL_DTREE_LOSS_IGNORE_SCALAR` | **0**, 1 | Ignore scalar parameters when computing the loss for split node candidates. Instead of computing the loss of a single partition for each `x_i == j`, compute the loss of partitions for `x_i == j` in which non-scalar parameters vary and scalar parameters are constant. This way, scalar parameters do not affect the decision about which non-scalar parameter to use for splitting. |
+| `DFATOOL_PARAM_CATEGORIAL_TO_SCALAR` | **0**, 1 | Some models (e.g. sklearn CART, XGBoost) do not support categorial parameters. Ignore them (0) or convert them to scalar indexes (1). |
diff --git a/lib/parameters.py b/lib/parameters.py
index b66c7b4..5781bee 100644
--- a/lib/parameters.py
+++ b/lib/parameters.py
@@ -913,6 +913,10 @@ class ModelAttribute:
:returns: SplitFunction or StaticFunction
"""
+ categorial_to_scalar = bool(
+ int(os.getenv("DFATOOL_PARAM_CATEGORIAL_TO_SCALAR", "0"))
+ )
+
if with_sklearn_cart:
from sklearn.tree import DecisionTreeRegressor
@@ -921,7 +925,7 @@ class ModelAttribute:
max_depth = None
cart = DecisionTreeRegressor(max_depth=max_depth)
fit_parameters, category_to_index, ignore_index = param_to_ndarray(
- parameters, with_nan=False, categorial_to_scalar=True
+ parameters, with_nan=False, categorial_to_scalar=categorial_to_scalar
)
cart.fit(fit_parameters, data)
self.model_function = df.SKLearnRegressionFunction(
@@ -942,7 +946,7 @@ class ModelAttribute:
alpha=0.0006,
)
fit_parameters, category_to_index, ignore_index = param_to_ndarray(
- parameters, with_nan=False, categorial_to_scalar=True
+ parameters, with_nan=False, categorial_to_scalar=categorial_to_scalar
)
xgb.fit(fit_parameters, data)
self.model_function = df.SKLearnRegressionFunction(