summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Friesel <daniel.friesel@uos.de>2022-03-01 09:24:18 +0100
committerDaniel Friesel <daniel.friesel@uos.de>2022-03-01 09:24:18 +0100
commite1ae769e8be67adb8c264f3d7aad652c86190eaf (patch)
tree9beae88977820c12a0aad1c0b7b5fbf64aaf4022
parent9d42e811a1cd70dc87cc96f4d847fb239ae88d64 (diff)
refactor tree generation overrides into build_dtree
-rw-r--r--lib/model.py126
-rw-r--r--lib/parameters.py42
2 files changed, 40 insertions, 128 deletions
diff --git a/lib/model.py b/lib/model.py
index baa22da..1c19f14 100644
--- a/lib/model.py
+++ b/lib/model.py
@@ -151,41 +151,11 @@ class AnalyticModel:
threshold = (self.attr_by_name[name][attr].stats.std_param_lut,)
else:
threshold = 0
- with_function_leaves = bool(
- int(os.getenv("DFATOOL_DTREE_FUNCTION_LEAVES", "1"))
- )
- with_nonbinary_nodes = bool(
- int(os.getenv("DFATOOL_DTREE_NONBINARY_NODES", "1"))
- )
- with_sklearn_cart = bool(
- int(os.getenv("DFATOOL_DTREE_SKLEARN_CART", "0"))
- )
- with_sklearn_decart = bool(
- int(os.getenv("DFATOOL_DTREE_SKLEARN_DECART", "0"))
- )
- with_lmt = bool(int(os.getenv("DFATOOL_DTREE_LMT", "0")))
- with_xgboost = bool(int(os.getenv("DFATOOL_USE_XGBOOST", "0")))
- ignore_irrelevant_parameters = bool(
- int(os.getenv("DFATOOL_DTREE_IGNORE_IRRELEVANT_PARAMS", "1"))
- )
- loss_ignore_scalar = bool(
- int(os.getenv("DFATOOL_DTREE_LOSS_IGNORE_SCALAR", "0"))
- )
- logger.debug(
- f"build_dtree({name}, {attr}, threshold={threshold}, with_function_leaves={with_function_leaves}, with_nonbinary_nodes={with_nonbinary_nodes}, ignore_irrelevant_parameters={ignore_irrelevant_parameters}, loss_ignore_scalar={loss_ignore_scalar})"
- )
+ logger.debug(f"build_dtree({name}, {attr}, threshold={threshold})")
self.build_dtree(
name,
attr,
threshold=threshold,
- with_function_leaves=with_function_leaves,
- with_nonbinary_nodes=with_nonbinary_nodes,
- with_sklearn_cart=with_sklearn_cart,
- with_sklearn_decart=with_sklearn_decart,
- with_lmt=with_lmt,
- with_xgboost=with_xgboost,
- ignore_irrelevant_parameters=ignore_irrelevant_parameters,
- loss_ignore_scalar=loss_ignore_scalar,
)
self.fit_done = True
@@ -327,28 +297,6 @@ class AnalyticModel:
for name in self.names:
for attr in self.attr_by_name[name].keys():
if tree_required[name].get(attr, False):
- with_function_leaves = bool(
- int(os.getenv("DFATOOL_DTREE_FUNCTION_LEAVES", "1"))
- )
- with_nonbinary_nodes = bool(
- int(os.getenv("DFATOOL_DTREE_NONBINARY_NODES", "1"))
- )
- with_sklearn_cart = bool(
- int(os.getenv("DFATOOL_DTREE_SKLEARN_CART", "0"))
- )
- with_sklearn_decart = bool(
- int(os.getenv("DFATOOL_DTREE_SKLEARN_DECART", "0"))
- )
- with_lmt = bool(int(os.getenv("DFATOOL_DTREE_LMT", "0")))
- with_xgboost = bool(int(os.getenv("DFATOOL_USE_XGBOOST", "0")))
- ignore_irrelevant_parameters = bool(
- int(
- os.getenv("DFATOOL_DTREE_IGNORE_IRRELEVANT_PARAMS", "1")
- )
- )
- loss_ignore_scalar = bool(
- int(os.getenv("DFATOOL_DTREE_LOSS_IGNORE_SCALAR", "0"))
- )
threshold = self.attr_by_name[name][attr].stats.std_param_lut
if (
self.dtree_max_std
@@ -357,21 +305,9 @@ class AnalyticModel:
):
threshold = self.dtree_max_std[name][attr]
logger.debug(
- f"build_dtree({name}, {attr}, threshold={threshold}, with_function_leaves={with_function_leaves}, ignore_irrelevant_parameters={ignore_irrelevant_parameters}, with_nonbinary_nodes={with_nonbinary_nodes}, loss_ignore_scalar={loss_ignore_scalar})"
- )
- self.build_dtree(
- name,
- attr,
- threshold=threshold,
- with_function_leaves=with_function_leaves,
- with_nonbinary_nodes=with_nonbinary_nodes,
- with_sklearn_cart=with_sklearn_cart,
- with_sklearn_decart=with_sklearn_decart,
- with_lmt=with_lmt,
- with_xgboost=with_xgboost,
- ignore_irrelevant_parameters=ignore_irrelevant_parameters,
- loss_ignore_scalar=loss_ignore_scalar,
+ f"build_dtree({name}, {attr}, threshold={threshold})"
)
+ self.build_dtree(name, attr, threshold=threshold)
else:
self.attr_by_name[name][attr].set_data_from_paramfit(paramfit)
@@ -450,20 +386,7 @@ class AnalyticModel:
return detailed_results, raw_results
return detailed_results
- def build_dtree(
- self,
- name,
- attribute,
- threshold=100,
- with_function_leaves=False,
- with_nonbinary_nodes=True,
- with_sklearn_cart=False,
- with_sklearn_decart=False,
- with_lmt=False,
- with_xgboost=False,
- ignore_irrelevant_parameters=True,
- loss_ignore_scalar=False,
- ):
+ def build_dtree(self, name, attribute, threshold=100, **kwargs):
if name not in self.attr_by_name:
self.attr_by_name[name] = dict()
@@ -481,15 +404,8 @@ class AnalyticModel:
self.attr_by_name[name][attribute].build_dtree(
self.by_name[name]["param"],
self.by_name[name][attribute],
- with_function_leaves=with_function_leaves,
- with_nonbinary_nodes=with_nonbinary_nodes,
- with_sklearn_cart=with_sklearn_cart,
- with_sklearn_decart=with_sklearn_decart,
- with_lmt=with_lmt,
- with_xgboost=with_xgboost,
- ignore_irrelevant_parameters=ignore_irrelevant_parameters,
- loss_ignore_scalar=loss_ignore_scalar,
threshold=threshold,
+ **kwargs,
)
def to_dref(
@@ -779,41 +695,11 @@ class PTAModel(AnalyticModel):
threshold = (self.attr_by_name[name][attr].stats.std_param_lut,)
else:
threshold = 0
- with_function_leaves = bool(
- int(os.getenv("DFATOOL_DTREE_FUNCTION_LEAVES", "1"))
- )
- with_nonbinary_nodes = bool(
- int(os.getenv("DFATOOL_DTREE_NONBINARY_NODES", "1"))
- )
- with_sklearn_cart = bool(
- int(os.getenv("DFATOOL_DTREE_SKLEARN_CART", "0"))
- )
- with_sklearn_decart = bool(
- int(os.getenv("DFATOOL_DTREE_SKLEARN_DECART", "0"))
- )
- with_lmt = bool(int(os.getenv("DFATOOL_DTREE_LMT", "0")))
- with_xgboost = bool(int(os.getenv("DFATOOL_USE_XGBOOST", "0")))
- ignore_irrelevant_parameters = bool(
- int(os.getenv("DFATOOL_DTREE_IGNORE_IRRELEVANT_PARAMS", "1"))
- )
- loss_ignore_scalar = bool(
- int(os.getenv("DFATOOL_DTREE_LOSS_IGNORE_SCALAR", "0"))
- )
- logger.debug(
- f"build_dtree({name}, {attr}, threshold={threshold}, with_function_leaves={with_function_leaves}, with_nonbinary_nodes={with_nonbinary_nodes}, ignore_irrelevant_parameters={ignore_irrelevant_parameters}, loss_ignore_scalar={loss_ignore_scalar})"
- )
+ logger.debug(f"build_dtree({name}, {attr}, threshold={threshold})")
self.build_dtree(
name,
attr,
threshold=threshold,
- with_function_leaves=with_function_leaves,
- with_nonbinary_nodes=with_nonbinary_nodes,
- with_sklearn_cart=with_sklearn_cart,
- with_sklearn_decart=with_sklearn_decart,
- with_lmt=with_lmt,
- with_xgboost=with_xgboost,
- ignore_irrelevant_parameters=ignore_irrelevant_parameters,
- loss_ignore_scalar=loss_ignore_scalar,
)
self.fit_done = True
diff --git a/lib/parameters.py b/lib/parameters.py
index fc6512f..91548f9 100644
--- a/lib/parameters.py
+++ b/lib/parameters.py
@@ -930,14 +930,14 @@ class ModelAttribute:
self,
parameters,
data,
- with_function_leaves=False,
- with_nonbinary_nodes=True,
- with_sklearn_cart=False,
- with_sklearn_decart=False,
- with_xgboost=False,
- with_lmt=False,
- ignore_irrelevant_parameters=True,
- loss_ignore_scalar=False,
+ with_function_leaves=None,
+ with_nonbinary_nodes=None,
+ with_sklearn_cart=None,
+ with_sklearn_decart=None,
+ with_lmt=None,
+ with_xgboost=None,
+ ignore_irrelevant_parameters=None,
+ loss_ignore_scalar=None,
threshold=100,
):
"""
@@ -960,6 +960,32 @@ class ModelAttribute:
categorial_to_scalar = bool(
int(os.getenv("DFATOOL_PARAM_CATEGORIAL_TO_SCALAR", "0"))
)
+ if with_function_leaves is None:
+ with_function_leaves = bool(
+ int(os.getenv("DFATOOL_DTREE_FUNCTION_LEAVES", "1"))
+ )
+ if with_nonbinary_nodes is None:
+ with_nonbinary_nodes = bool(
+ int(os.getenv("DFATOOL_DTREE_NONBINARY_NODES", "1"))
+ )
+ if with_sklearn_cart is None:
+ with_sklearn_cart = bool(int(os.getenv("DFATOOL_DTREE_SKLEARN_CART", "0")))
+ if with_sklearn_decart is None:
+ with_sklearn_decart = bool(
+ int(os.getenv("DFATOOL_DTREE_SKLEARN_DECART", "0"))
+ )
+ if with_lmt is None:
+ with_lmt = bool(int(os.getenv("DFATOOL_DTREE_LMT", "0")))
+ if with_xgboost is None:
+ with_xgboost = bool(int(os.getenv("DFATOOL_USE_XGBOOST", "0")))
+ if ignore_irrelevant_parameters is None:
+ ignore_irrelevant_parameters = bool(
+ int(os.getenv("DFATOOL_DTREE_IGNORE_IRRELEVANT_PARAMS", "1"))
+ )
+ if loss_ignore_scalar is None:
+ loss_ignore_scalar = bool(
+ int(os.getenv("DFATOOL_DTREE_LOSS_IGNORE_SCALAR", "0"))
+ )
if with_sklearn_cart or with_sklearn_decart:
from sklearn.tree import DecisionTreeRegressor