summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Friesel <daniel.friesel@uos.de>2022-01-17 08:43:04 +0100
committerDaniel Friesel <daniel.friesel@uos.de>2022-01-17 08:44:55 +0100
commit3340bab4196c2737236216ad3845afa74d0b7f39 (patch)
treeeedf6095b75a21906b267533a41ee9f4ae83ff85
parent5ad2c5ef6d84763b579c22121052e875e434a119 (diff)
XGBoost: add env variables for num regressors and max depth
-rw-r--r--README.md6
-rw-r--r--lib/parameters.py16
2 files changed, 14 insertions, 8 deletions
diff --git a/README.md b/README.md
index c4103e3..50c29e3 100644
--- a/README.md
+++ b/README.md
@@ -32,8 +32,10 @@ The following variables may be set to alter the behaviour of dfatool components.
| `DFATOOL_DTREE_ENABLED` | 0, **1** | Use decision trees in get\_fitted |
| `DFATOOL_DTREE_FUNCTION_LEAVES` | 0, **1** | Use functions (fitted via linear regression) in decision tree leaves when modeling numeric parameters with at least three distinct values. If 0, integer parameters are treated as enums instead. |
| `DFATOOL_DTREE_SKLEARN_CART` | **0**, 1 | Use sklearn CART ("Decision Tree Regression") algorithm for decision tree generation. Uses binary nodes and supports splits on scalar variables. Overrides `FUNCTION_LEAVES` (=0) and `NONBINARY_NODES` (=0). |
-| `DFATOOL_CART_MAX_DEPTH` | **0** ... *n* | maximum depth for sklearn CART. Default: unlimited. |
-| `DFATOOL_USE_XGBOOST` | **0**, 1 | Use Extreme Gradient Boosting algorithm for decision tree generation. |
+| `DFATOOL_CART_MAX_DEPTH` | **0** .. *n* | maximum depth for sklearn CART. Default: unlimited. |
+| `DFATOOL_USE_XGBOOST` | **0**, 1 | Use Extreme Gradient Boosting algorithm for decision forest generation. |
+| `DFATOOL_XGB_N_ESTIMATORS` | 1 .. **100** .. *n* | Number of estimators (i.e., trees) for XGBoost. |
+| `DFATOOL_XGB_MAX_DEPTH` | 2 .. **10** ** *n* | Maximum XGBoost tree depth. |
| `DFATOOL_KCONF_WITH_CHOICE_NODES` | 0, **1** | Generate enum parameters from kconfig choice nodes; ignore corresponding boolean config options. |
| `DFATOOL_KCONF_IGNORE_NUMERIC` | **0**, 1 | Ignore numeric (int/hex) configuration options. Useful for comparison with CART/DECART. |
| `DFATOOL_KCONF_IGNORE_STRING` | **0**, 1 | Ignore string configuration options. Useful for comparison with CART/DECART. |
diff --git a/lib/parameters.py b/lib/parameters.py
index 401c7c6..1239046 100644
--- a/lib/parameters.py
+++ b/lib/parameters.py
@@ -936,16 +936,20 @@ class ModelAttribute:
return
if with_xgboost:
- from xgboost import XGBRegressor
+ import xgboost
# TODO retrieve parameters from env
- xgb = XGBRegressor(
- n_estimators=100,
- max_depth=10,
- eta=0.2,
+ # <https://xgboost.readthedocs.io/en/stable/python/python_api.html#module-xgboost.sklearn>
+ # n_estimators := number of trees in forest
+ # max_depth := maximum tree depth
+ # eta <=> learning_rate
+ xgb = xgboost.XGBRegressor(
+ n_estimators=int(os.getenv("DFATOOL_XGB_N_ESTIMATORS", "100")),
+ max_depth=int(os.getenv("DFATOOL_XGB_MAX_DEPTH", "10")),
+ learning_rate=0.2,
subsample=0.7,
gamma=0.01,
- alpha=0.0006,
+ reg_alpha=0.0006,
)
fit_parameters, category_to_index, ignore_index = param_to_ndarray(
parameters, with_nan=False, categorial_to_scalar=categorial_to_scalar