diff options
-rw-r--r-- | README.md | 1 | ||||
-rw-r--r-- | lib/parameters.py | 8 |
2 files changed, 7 insertions, 2 deletions
@@ -40,3 +40,4 @@ The following variables may be set to alter the behaviour of dfatool components. | `DFATOOL_REGRESSION_SAFE_FUNCTIONS` | **0**, 1 | Use safe functions only (e.g. 1/x returnning 1 for x==0) | | `DFATOOL_DTREE_NONBINARY_NODES` | 0, **1** | Enable non-binary nodes (i.e., nodes with more than two children corresponding to enum variables) in decision trees | | `DFATOOL_DTREE_LOSS_IGNORE_SCALAR` | **0**, 1 | Ignore scalar parameters when computing the loss for split node candidates. Instead of computing the loss of a single partition for each `x_i == j`, compute the loss of partitions for `x_i == j` in which non-scalar parameters vary and scalar parameters are constant. This way, scalar parameters do not affect the decision about which non-scalar parameter to use for splitting. | +| `DFATOOL_PARAM_CATEGORIAL_TO_SCALAR` | **0**, 1 | Some models (e.g. sklearn CART, XGBoost) do not support categorial parameters. Ignore them (0) or convert them to scalar indexes (1). | diff --git a/lib/parameters.py b/lib/parameters.py index b66c7b4..5781bee 100644 --- a/lib/parameters.py +++ b/lib/parameters.py @@ -913,6 +913,10 @@ class ModelAttribute: :returns: SplitFunction or StaticFunction """ + categorial_to_scalar = bool( + int(os.getenv("DFATOOL_PARAM_CATEGORIAL_TO_SCALAR", "0")) + ) + if with_sklearn_cart: from sklearn.tree import DecisionTreeRegressor @@ -921,7 +925,7 @@ class ModelAttribute: max_depth = None cart = DecisionTreeRegressor(max_depth=max_depth) fit_parameters, category_to_index, ignore_index = param_to_ndarray( - parameters, with_nan=False, categorial_to_scalar=True + parameters, with_nan=False, categorial_to_scalar=categorial_to_scalar ) cart.fit(fit_parameters, data) self.model_function = df.SKLearnRegressionFunction( @@ -942,7 +946,7 @@ class ModelAttribute: alpha=0.0006, ) fit_parameters, category_to_index, ignore_index = param_to_ndarray( - parameters, with_nan=False, categorial_to_scalar=True + parameters, with_nan=False, categorial_to_scalar=categorial_to_scalar ) xgb.fit(fit_parameters, data) self.model_function = df.SKLearnRegressionFunction( |