From 31a87322198be21b2db3e71fd3070e198d368333 Mon Sep 17 00:00:00 2001
From: Daniel Friesel <daniel.friesel@uos.de>
Date: Wed, 23 Jun 2021 15:44:50 +0200
Subject: move kconfig-dtree builder to AnalyticModel. WIP.

---
 lib/model.py | 79 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 78 insertions(+), 1 deletion(-)

(limited to 'lib/model.py')

diff --git a/lib/model.py b/lib/model.py
index 1e6bb28..8c00fd2 100644
--- a/lib/model.py
+++ b/lib/model.py
@@ -4,7 +4,7 @@ import logging
 import numpy as np
 import os
 from .automata import PTA, ModelAttribute
-from .functions import StaticFunction, SubstateFunction
+from .functions import StaticFunction, SubstateFunction, SplitFunction
 from .parameters import ParallelParamStats, codependent_param_dict
 from .paramfit import ParallelParamFit
 from .utils import soft_cast_int, by_name_to_by_param, regression_measures
@@ -316,6 +316,83 @@ class AnalyticModel:
 
         return detailed_results
 
+    def build_tree(self, this_symbols, this_data, data_index=0, threshold=100, level=0):
+        """
+        Build a Decision Tree on `this_data` for kconfig models.
+
+        :param this_symbols: parameter names
+        :param this_data: list of measurements. Each entry is a (param vector, mearusements vector) tuple.
+            param vector holds parameter values (same order as parameter names). mearuserements vector holds measurements.
+        :param data_index: Index in measurements vector to use for model generation. Default 0.
+        :param threshold: Return a StaticFunction leaf node if std(data[data_index]) < threshold. Default 100.
+
+        :returns: SplitFunction or StaticFunction
+        """
+
+        rom_sizes = list(map(lambda x: x[1][data_index], this_data))
+
+        if np.std(rom_sizes) < threshold or len(this_symbols) == 0:
+            return StaticFunction(np.mean(rom_sizes))
+            # sf.value_error["std"] = np.std(rom_sizes)
+
+        mean_stds = list()
+        for i, param in enumerate(this_symbols):
+
+            unique_values = list(set(map(lambda vrr: vrr[0][i], this_data)))
+
+            if None in unique_values:
+                # param is a choice and undefined in some configs. Do not split on it.
+                mean_stds.append(np.inf)
+                continue
+
+            child_values = list()
+            for value in unique_values:
+                child_values.append(
+                    list(filter(lambda vrr: vrr[0][i] == value, this_data))
+                )
+
+            if len(list(filter(len, child_values))) < 2:
+                # this param only has a single value. there's no point in splitting.
+                mean_stds.append(np.inf)
+                continue
+
+            children = list()
+            for child in child_values:
+                children.append(np.std(list(map(lambda x: x[1][data_index], child))))
+
+            if np.any(np.isnan(children)):
+                mean_stds.append(np.inf)
+            else:
+                mean_stds.append(np.mean(children))
+
+        if np.all(np.isinf(mean_stds)):
+            # all children have the same configuration. We shouldn't get here due to the threshold check above...
+            logging.warning("Waht")
+            return StaticFunction(np.mean(rom_sizes))
+
+        symbol_index = np.argmin(mean_stds)
+        symbol = this_symbols[symbol_index]
+
+        unique_values = list(set(map(lambda vrr: vrr[0][symbol_index], this_data)))
+
+        child = dict()
+
+        for value in unique_values:
+            children = list(
+                filter(lambda vrr: vrr[0][symbol_index] == value, this_data)
+            )
+            if len(children):
+                print(
+                    f"Level {level} split on {symbol} == {value} has {len(children)} children"
+                )
+                child[value] = self.build_tree(
+                    this_symbols, children, data_index, threshold, level + 1
+                )
+
+        assert len(child.values()) >= 2
+
+        return SplitFunction(np.mean(rom_sizes), symbol_index, child)
+
     def to_dref(self, static_quality, lut_quality, model_quality) -> dict:
         ret = dict()
         for name in self.names:
-- 
cgit v1.2.3