summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Friesel <daniel.friesel@uos.de>2022-01-28 13:16:46 +0100
committerDaniel Friesel <daniel.friesel@uos.de>2022-01-28 13:16:46 +0100
commit1f391139530f8051a1ece4fcd9de46afe6afde06 (patch)
treeb69e3d60b9c07ae252651bc236f22f5bb84224d5
parentf886ffd67396c2a512bfd71dc62e8bbc72f1ef8b (diff)
PTAModel: Add force_tree option
-rwxr-xr-xbin/analyze-archive.py10
-rw-r--r--lib/functions.py6
-rw-r--r--lib/model.py44
-rw-r--r--lib/parameters.py1
4 files changed, 59 insertions, 2 deletions
diff --git a/bin/analyze-archive.py b/bin/analyze-archive.py
index 5cd7ef7..54ba1ef 100755
--- a/bin/analyze-archive.py
+++ b/bin/analyze-archive.py
@@ -500,6 +500,11 @@ if __name__ == "__main__":
type=str,
help="Perform substate analysis",
)
+ parser.add_argument(
+ "--force-tree",
+ action="store_true",
+ help="Build regression tree without checking whether static/analytic functions are sufficient.",
+ )
parser.add_argument("measurement", nargs="+")
args = parser.parse_args()
@@ -638,11 +643,14 @@ if __name__ == "__main__":
function_override=function_override,
pta=pta,
pelt=args.with_substates,
+ force_tree=args.force_tree,
)
constructor_duration = time.time() - constructor_start
if xv_method:
- xv = CrossValidator(PTAModel, by_name, parameters, arg_count)
+ xv = CrossValidator(
+ PTAModel, by_name, parameters, arg_count, force_tree=args.force_tree
+ )
xv.parameter_aware = args.parameter_aware_cross_validation
if args.info:
diff --git a/lib/functions.py b/lib/functions.py
index 3630215..399072b 100644
--- a/lib/functions.py
+++ b/lib/functions.py
@@ -470,7 +470,11 @@ class CARTFunction(SKLearnRegressionFunction):
class LMTFunction(SKLearnRegressionFunction):
- pass
+ def get_number_of_nodes(self):
+ return self.regressor.node_count
+
+ def get_max_depth(self):
+ return self.regressor.max_depth
class XGBoostFunction(SKLearnRegressionFunction):
diff --git a/lib/model.py b/lib/model.py
index 7a28197..3b1279f 100644
--- a/lib/model.py
+++ b/lib/model.py
@@ -652,6 +652,7 @@ class PTAModel(AnalyticModel):
pelt=None,
compute_stats=True,
dtree_max_std=None,
+ force_tree=False,
):
"""
Prepare a new PTA energy model.
@@ -734,6 +735,49 @@ class PTAModel(AnalyticModel):
if compute_stats:
self._compute_stats(by_name)
+ if force_tree:
+ for name in self.names:
+ for attr in self.by_name[name]["attributes"]:
+ if (
+ dtree_max_std
+ and name in dtree_max_std
+ and attr in dtree_max_std[name]
+ ):
+ threshold = dtree_max_std[name][attr]
+ elif compute_stats:
+ threshold = (self.attr_by_name[name][attr].stats.std_param_lut,)
+ else:
+ threshold = 0
+ with_function_leaves = bool(
+ int(os.getenv("DFATOOL_DTREE_FUNCTION_LEAVES", "1"))
+ )
+ with_nonbinary_nodes = bool(
+ int(os.getenv("DFATOOL_DTREE_NONBINARY_NODES", "1"))
+ )
+ with_sklearn_cart = bool(
+ int(os.getenv("DFATOOL_DTREE_SKLEARN_CART", "0"))
+ )
+ with_lmt = bool(int(os.getenv("DFATOOL_DTREE_LMT", "0")))
+ with_xgboost = bool(int(os.getenv("DFATOOL_USE_XGBOOST", "0")))
+ loss_ignore_scalar = bool(
+ int(os.getenv("DFATOOL_DTREE_LOSS_IGNORE_SCALAR", "0"))
+ )
+ logger.debug(
+ f"build_dtree({name}, {attr}, threshold={threshold}, with_function_leaves={with_function_leaves}, with_nonbinary_nodes={with_nonbinary_nodes}, loss_ignore_scalar={loss_ignore_scalar})"
+ )
+ self.build_dtree(
+ name,
+ attr,
+ threshold=threshold,
+ with_function_leaves=with_function_leaves,
+ with_nonbinary_nodes=with_nonbinary_nodes,
+ with_sklearn_cart=with_sklearn_cart,
+ with_lmt=with_lmt,
+ with_xgboost=with_xgboost,
+ loss_ignore_scalar=loss_ignore_scalar,
+ )
+ self.fit_done = True
+
if self.pelt is not None:
# cluster_substates uses self.attr_by_name[*]["power"].param_values, which is set by _compute_stats
# cluster_substates relies on fitted "substate_count" models, which are generated by get_fitted.
diff --git a/lib/parameters.py b/lib/parameters.py
index 51ff680..1f72700 100644
--- a/lib/parameters.py
+++ b/lib/parameters.py
@@ -670,6 +670,7 @@ class ModelAttribute:
df.SplitFunction,
df.CARTFunction,
df.XGBoostFunction,
+ df.LMTFunction,
):
ret["decision tree/nodes"] = self.model_function.get_number_of_nodes()
ret["decision tree/max depth"] = self.model_function.get_max_depth()