remove unused legacy attr.build_dtree code

model.build_dtree does belong into attr, but that's a different commit
author: Daniel Friesel <daniel.friesel@uos.de> 2021-08-23 16:18:30 +0200
committer: Daniel Friesel <daniel.friesel@uos.de> 2021-08-23 16:18:30 +0200
commit: 693fa1b637b4cb2eb3796a4ac1376baf0552b8d4 (patch)
tree: 0f86ef4fa53edbd1113195727659f109fccdfe34
parent: 971182ab8a74d6c545aba344ad57c9da835e7e7c (diff)
2 files changed, 2 insertions, 123 deletions
diff --git a/lib/model.py b/lib/model.py
index c9b0e33..dc0cb4d 100644
--- a/lib/model.py
+++ b/lib/model.py
@@ -151,11 +151,6 @@ class AnalyticModel:
 
         paramstats.compute()
 
-        if not os.getenv("DFATOOL_NO_DECISIONTREES"):
-            for name in self.names:
-                for attr in self.attr_by_name[name].values():
-                    attr.build_dtree()
-
     def attributes(self, name):
         return self.attr_by_name[name].keys()
 
diff --git a/lib/parameters.py b/lib/parameters.py
index 9ecd7e9..e516926 100644
--- a/lib/parameters.py
+++ b/lib/parameters.py
@@ -587,124 +587,6 @@ class ModelAttribute:
             return np.mean(self.by_param[param])
         return np.median(self.by_param[param])
 
-    def build_dtree(self):
-        split_param_index = self.get_split_param_index()
-        if split_param_index is None:
-            return
-
-        distinct_values = self.stats.distinct_values_by_param_index[split_param_index]
-        tt1 = list(
-            map(
-                lambda i: self.param_values[i][split_param_index] == distinct_values[0],
-                range(len(self.param_values)),
-            )
-        )
-        tt2 = np.invert(tt1)
-
-        pv1 = list()
-        pv2 = list()
-
-        for i, param_tuple in enumerate(self.param_values):
-            if tt1[i]:
-                pv1.append(param_tuple)
-            else:
-                pv2.append(param_tuple)
-
-        # print(
-        #    f">>> split {self.name} {self.attr} by param #{split_param_index}"
-        # )
-
-        child1 = ModelAttribute(
-            self.name,
-            self.attr,
-            self.data[tt1],
-            pv1,
-            self.param_names,
-            self.arg_count,
-            codependent_param_dict(pv1),
-        )
-        child2 = ModelAttribute(
-            self.name,
-            self.attr,
-            self.data[tt2],
-            pv2,
-            self.param_names,
-            self.arg_count,
-            codependent_param_dict(pv2),
-        )
-
-        ParamStats.compute_for_attr(child1)
-        ParamStats.compute_for_attr(child2)
-
-        child1.build_dtree()
-        child2.build_dtree()
-
-        self.split = (
-            split_param_index,
-            {distinct_values[0]: child1, distinct_values[1]: child2},
-        )
-
-        # print(
-        #    f"<<< split {self.name} {self.attr} by param #{split_param_index}"
-        # )
-
-    # None -> kein split notwendig
-    # andernfalls: Parameter-Index, anhand dessen eine Decision Tree-Ebene aufgespannt wird
-    # (Kinder sind wiederum ModelAttributes, in denen dieser Parameter konstant ist)
-    def get_split_param_index(self):
-        if not self.param_names:
-            return None
-        std_by_param = list()
-        for param_index, param_name in enumerate(self.param_names):
-            distinct_values = self.stats.distinct_values_by_param_index[param_index]
-            if (
-                self.stats.depends_on_param(param_name)
-                and len(distinct_values) == 2
-                and not param_index in self.ignore_param
-            ):
-                val1 = list(
-                    map(
-                        lambda i: self.param_values[i][param_index]
-                        == distinct_values[0],
-                        range(len(self.param_values)),
-                    )
-                )
-                val2 = np.invert(val1)
-                val1_std = np.std(self.data[val1])
-                val2_std = np.std(self.data[val2])
-                std_by_param.append(np.mean([val1_std, val2_std]))
-            else:
-                std_by_param.append(np.inf)
-        for arg_index in range(self.arg_count):
-            distinct_values = self.stats.distinct_values_by_param_index[
-                len(self.param_names) + arg_index
-            ]
-            if (
-                self.stats.depends_on_arg(arg_index)
-                and len(distinct_values) == 2
-                and not len(self.param_names) + arg_index in self.ignore_param
-            ):
-                val1 = list(
-                    map(
-                        lambda i: self.param_values[i][
-                            len(self.param_names) + arg_index
-                        ]
-                        == distinct_values[0],
-                        range(len(self.param_values)),
-                    )
-                )
-                val2 = np.invert(val1)
-                val1_std = np.std(self.data[val1])
-                val2_std = np.std(self.data[val2])
-                std_by_param.append(np.mean([val1_std, val2_std]))
-            else:
-                std_by_param.append(np.inf)
-        split_param_index = np.argmin(std_by_param)
-        split_std = std_by_param[split_param_index]
-        if split_std == np.inf:
-            return None
-        return split_param_index
-
     def get_data_for_paramfit(self, safe_functions_enabled=False):
         if self.split:
             return self.get_data_for_paramfit_split(
@@ -716,6 +598,7 @@ class ModelAttribute:
             )
 
     def get_data_for_paramfit_split(self, safe_functions_enabled=False):
+        # currently unused
         split_param_index, child_by_param_value = self.split
         ret = list()
         for param_value, child in child_by_param_value.items():
@@ -798,6 +681,7 @@ class ModelAttribute:
             self.set_data_from_paramfit_this(paramfit, prefix)
 
     def set_data_from_paramfit_split(self, paramfit, prefix):
+        # currently unused
         split_param_index, child_by_param_value = self.split
         function_map = {
             "split_by": split_param_index,
author	Daniel Friesel <daniel.friesel@uos.de>	2021-08-23 16:18:30 +0200
committer	Daniel Friesel <daniel.friesel@uos.de>	2021-08-23 16:18:30 +0200
commit	693fa1b637b4cb2eb3796a4ac1376baf0552b8d4 (patch)
tree	0f86ef4fa53edbd1113195727659f109fccdfe34
parent	971182ab8a74d6c545aba344ad57c9da835e7e7c (diff)