diff options
author | Daniel Friesel <daniel.friesel@uos.de> | 2022-01-28 13:16:46 +0100 |
---|---|---|
committer | Daniel Friesel <daniel.friesel@uos.de> | 2022-01-28 13:16:46 +0100 |
commit | 1f391139530f8051a1ece4fcd9de46afe6afde06 (patch) | |
tree | b69e3d60b9c07ae252651bc236f22f5bb84224d5 /lib | |
parent | f886ffd67396c2a512bfd71dc62e8bbc72f1ef8b (diff) |
PTAModel: Add force_tree option
Diffstat (limited to 'lib')
-rw-r--r-- | lib/functions.py | 6 | ||||
-rw-r--r-- | lib/model.py | 44 | ||||
-rw-r--r-- | lib/parameters.py | 1 |
3 files changed, 50 insertions, 1 deletions
diff --git a/lib/functions.py b/lib/functions.py index 3630215..399072b 100644 --- a/lib/functions.py +++ b/lib/functions.py @@ -470,7 +470,11 @@ class CARTFunction(SKLearnRegressionFunction): class LMTFunction(SKLearnRegressionFunction): - pass + def get_number_of_nodes(self): + return self.regressor.node_count + + def get_max_depth(self): + return self.regressor.max_depth class XGBoostFunction(SKLearnRegressionFunction): diff --git a/lib/model.py b/lib/model.py index 7a28197..3b1279f 100644 --- a/lib/model.py +++ b/lib/model.py @@ -652,6 +652,7 @@ class PTAModel(AnalyticModel): pelt=None, compute_stats=True, dtree_max_std=None, + force_tree=False, ): """ Prepare a new PTA energy model. @@ -734,6 +735,49 @@ class PTAModel(AnalyticModel): if compute_stats: self._compute_stats(by_name) + if force_tree: + for name in self.names: + for attr in self.by_name[name]["attributes"]: + if ( + dtree_max_std + and name in dtree_max_std + and attr in dtree_max_std[name] + ): + threshold = dtree_max_std[name][attr] + elif compute_stats: + threshold = (self.attr_by_name[name][attr].stats.std_param_lut,) + else: + threshold = 0 + with_function_leaves = bool( + int(os.getenv("DFATOOL_DTREE_FUNCTION_LEAVES", "1")) + ) + with_nonbinary_nodes = bool( + int(os.getenv("DFATOOL_DTREE_NONBINARY_NODES", "1")) + ) + with_sklearn_cart = bool( + int(os.getenv("DFATOOL_DTREE_SKLEARN_CART", "0")) + ) + with_lmt = bool(int(os.getenv("DFATOOL_DTREE_LMT", "0"))) + with_xgboost = bool(int(os.getenv("DFATOOL_USE_XGBOOST", "0"))) + loss_ignore_scalar = bool( + int(os.getenv("DFATOOL_DTREE_LOSS_IGNORE_SCALAR", "0")) + ) + logger.debug( + f"build_dtree({name}, {attr}, threshold={threshold}, with_function_leaves={with_function_leaves}, with_nonbinary_nodes={with_nonbinary_nodes}, loss_ignore_scalar={loss_ignore_scalar})" + ) + self.build_dtree( + name, + attr, + threshold=threshold, + with_function_leaves=with_function_leaves, + with_nonbinary_nodes=with_nonbinary_nodes, + with_sklearn_cart=with_sklearn_cart, + with_lmt=with_lmt, + with_xgboost=with_xgboost, + loss_ignore_scalar=loss_ignore_scalar, + ) + self.fit_done = True + if self.pelt is not None: # cluster_substates uses self.attr_by_name[*]["power"].param_values, which is set by _compute_stats # cluster_substates relies on fitted "substate_count" models, which are generated by get_fitted. diff --git a/lib/parameters.py b/lib/parameters.py index 51ff680..1f72700 100644 --- a/lib/parameters.py +++ b/lib/parameters.py @@ -670,6 +670,7 @@ class ModelAttribute: df.SplitFunction, df.CARTFunction, df.XGBoostFunction, + df.LMTFunction, ): ret["decision tree/nodes"] = self.model_function.get_number_of_nodes() ret["decision tree/max depth"] = self.model_function.get_max_depth() |