From 242d09e9e03cd630fb56a26cac5abcb212d75426 Mon Sep 17 00:00:00 2001 From: Birte Kristina Friesel Date: Thu, 25 Jan 2024 14:10:23 +0100 Subject: Use LMT algorithm defaults for max depth and min samples leaf. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit max bins remains at 120 (≠25), but that one should only affect run-time and not accuracy/complexity --- lib/parameters.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/parameters.py b/lib/parameters.py index 86f2338..c3af6b0 100644 --- a/lib/parameters.py +++ b/lib/parameters.py @@ -1163,7 +1163,7 @@ class ModelAttribute: # max_depth : int, default=5 # The maximum depth of the tree considering only the splitting nodes. # A higher value implies a higher training time. - max_depth = int(os.getenv("DFATOOL_LMT_MAX_DEPTH", "20")) + max_depth = int(os.getenv("DFATOOL_LMT_MAX_DEPTH", "5")) # min_samples_split : int or float, default=6 # The minimum number of samples required to split an internal node. @@ -1189,10 +1189,10 @@ class ModelAttribute: # - If float, then `min_samples_leaf` is a fraction and # `ceil(min_samples_leaf * n_samples)` are the minimum # number of samples for each node. - if "." in os.getenv("DFATOOL_LMT_MIN_SAMPLES_LEAF", ""): + if "." in os.getenv("DFATOOL_LMT_MIN_SAMPLES_LEAF", "0.1"): min_samples_leaf = float(os.getenv("DFATOOL_LMT_MIN_SAMPLES_LEAF")) else: - min_samples_leaf = int(os.getenv("DFATOOL_LMT_MIN_SAMPLES_LEAF", "3")) + min_samples_leaf = int(os.getenv("DFATOOL_LMT_MIN_SAMPLES_LEAF")) # max_bins : int, default=25 # The maximum number of bins to use to search the optimal split in each -- cgit v1.2.3