From f0a48e3115938809f0ceec133f3c850a707e3f58 Mon Sep 17 00:00:00 2001
From: Daniel Friesel <daniel.friesel@uos.de>
Date: Wed, 16 Sep 2020 09:04:26 +0200
Subject: kconfig model: json import

---
 bin/analyze-kconfig.py |  50 +++++++++----
 lib/loader.py          |  23 +++---
 lib/model.py           | 188 +++++++++++++++++++++++++++++++++++++++----------
 3 files changed, 202 insertions(+), 59 deletions(-)

diff --git a/bin/analyze-kconfig.py b/bin/analyze-kconfig.py
index f532a5b..87c05f7 100755
--- a/bin/analyze-kconfig.py
+++ b/bin/analyze-kconfig.py
@@ -11,6 +11,7 @@ import argparse
 import json
 import kconfiglib
 import logging
+import os
 
 from dfatool.loader import KConfigAttributes
 from dfatool.model import KConfigModel
@@ -48,7 +49,11 @@ def main():
         help="Set log level",
     )
     parser.add_argument("kconfig_path", type=str, help="Path to Kconfig file")
-    parser.add_argument("experiment_root", type=str, help="Path to experiment results")
+    parser.add_argument(
+        "model",
+        type=str,
+        help="Path to experiment results directory or model.json file",
+    )
 
     args = parser.parse_args()
 
@@ -57,13 +62,16 @@ def main():
     else:
         print(f"Invalid log level. Setting log level to INFO.", file=sys.stderr)
 
-    data = KConfigAttributes(args.kconfig_path, args.experiment_root)
-    model = KConfigModel(data, args.attribute)
+    if os.path.isdir(args.model):
+        data = KConfigAttributes(args.kconfig_path, args.model)
+        model = KConfigModel.from_benchmark(data, args.attribute)
+        if args.max_stddev:
+            model.max_stddev = args.max_stddev
+        model.build_tree()
 
-    if args.max_stddev:
-        model.max_stddev = args.max_stddev
-
-    model.build_tree()
+    else:
+        with open(args.model, "r") as f:
+            model = KConfigModel.from_json(json.load(f))
 
     if args.export_tree:
         with open(args.export_tree, "w") as f:
@@ -104,15 +112,27 @@ def main():
             if skip:
                 continue
 
-            model_diff = model.value_for_config(kconf2) - model.value_for_config(kconf)
-            if kconf_sym.choice:
-                print(
-                    f"Setting {kconf_sym.choice.name} to {kconf_sym.name} changes {num_changes:2d} symbols, model change: {model_diff:+5.0f}"
-                )
-            else:
-                print(
-                    f"Setting {symbol} to {kconf_sym.str_value} changes {num_changes:2d} symbols, model change: {model_diff:+5.0f}"
+            try:
+                model_diff = model.value_for_config(kconf2) - model.value_for_config(
+                    kconf
                 )
+                if kconf_sym.choice:
+                    print(
+                        f"Setting {kconf_sym.choice.name} to {kconf_sym.name} changes {num_changes:2d} symbols, model change: {model_diff:+5.0f}"
+                    )
+                else:
+                    print(
+                        f"Setting {symbol} to {kconf_sym.str_value} changes {num_changes:2d} symbols, model change: {model_diff:+5.0f}"
+                    )
+            except TypeError:
+                if kconf_sym.choice:
+                    print(
+                        f"Setting {kconf_sym.choice.name} to {kconf_sym.name} changes {num_changes:2d} symbols, model is undefined"
+                    )
+                else:
+                    print(
+                        f"Setting {symbol} to {kconf_sym.str_value} changes {num_changes:2d} symbols, model is undefined"
+                    )
             for changed_symbol in changed_symbols:
                 print(
                     f"    {changed_symbol:30s} -> {kconf2.syms[changed_symbol].str_value}"
diff --git a/lib/loader.py b/lib/loader.py
index 3f0662e..14b7853 100644
--- a/lib/loader.py
+++ b/lib/loader.py
@@ -13,6 +13,7 @@ import hashlib
 import kconfiglib
 from multiprocessing import Pool
 from .utils import running_mean, soft_cast_int
+from frozendict import frozendict
 
 logger = logging.getLogger(__name__)
 
@@ -1953,8 +1954,9 @@ class KConfigAttributes:
                 experiments.append((config_path, attr_path))
 
         kconf = kconfiglib.Kconfig(kconfig_path)
+        self.kconf = kconf
 
-        self.symbols = sorted(
+        self.symbol_names = sorted(
             map(
                 lambda sym: sym.name,
                 filter(
@@ -1964,17 +1966,22 @@ class KConfigAttributes:
             )
         )
 
-        self.data = list()
+        self.choice_names = sorted(map(lambda choice: choice.name, kconf.choices))
+
+        self.symbol = kconf.syms
+        self.choice = dict()
+        for choice in kconf.choices:
+            self.choice[choice.name] = choice
 
-        config_vectors = set()
+        self.data = list()
 
         for config_path, attr_path in experiments:
             kconf.load_config(config_path)
             with open(attr_path, "r") as f:
                 attr = json.load(f)
 
-            config_vector = tuple(
-                map(lambda sym: kconf.syms[sym].tri_value == 2, self.symbols)
-            )
-            config_vectors.add(config_vector)
-            self.data.append((config_vector, attr))
+            config = dict.fromkeys(self.symbol_names, False)
+            for sym in self.symbol_names:
+                if kconf.syms[sym].tri_value == 2:
+                    config[sym] = True
+            self.data.append((frozendict(config), attr))
diff --git a/lib/model.py b/lib/model.py
index c74ebd1..348c541 100644
--- a/lib/model.py
+++ b/lib/model.py
@@ -1165,6 +1165,11 @@ class KConfigModel:
             self.value = value
             self.stddev = stddev
 
+        @classmethod
+        def from_json(cls, json_node):
+            node = cls(json_node["value"], json_node["stddev"])
+            return node
+
         def model(self, kconf):
             return self.value
 
@@ -1174,12 +1179,21 @@ class KConfigModel:
         def to_json(self):
             return {"value": self.value, "stddev": self.stddev}
 
-    class Node:
+    class BoolNode:
         def __init__(self, symbol):
             self.symbol = symbol
             self.child_n = None
             self.child_y = None
 
+        @classmethod
+        def from_json(cls, outer_cls, json_node):
+            node = cls(json_node["symbol"])
+            if json_node["n"]:
+                node.set_child_n(outer_cls._node_from_json(json_node["n"]))
+            if json_node["y"]:
+                node.set_child_y(outer_cls._node_from_json(json_node["y"]))
+            return node
+
         def set_child_n(self, child_node):
             self.child_n = child_node
 
@@ -1194,7 +1208,7 @@ class KConfigModel:
             return None
 
         def __repr__(self):
-            return f"<Node(n={self.child_n}, y={self.child_y})>"
+            return f"<BoolNode {self.symbol}, n={self.child_n}, y={self.child_y}>"
 
         def to_json(self):
             ret = {"symbol": self.symbol}
@@ -1208,9 +1222,48 @@ class KConfigModel:
                 ret["y"] = None
             return ret
 
-    def __init__(self, kconfig_benchmark, attribute):
+    class ChoiceNode:
+        def __init__(self, symbol):
+            self.symbol = symbol
+            self.choice = dict()
+
+        @classmethod
+        def from_json(cls, outer_cls, json_node):
+            node = cls(json_node["symbol"])
+            for choice_name, choice_json in json_node["choice"].items():
+                node.set_child(choice_name, outer_cls._node_from_json(choice_json))
+            return node
+
+        def set_child(self, choice, node):
+            self.choice[choice] = node
+
+        def model(self, kconf):
+            kconf_choice = next(
+                filter(lambda choice: choice.name == self.symbol, kconf.choices)
+            )
+            return self.choice[kconf_choice.selection.name].model(kconf)
+
+        def __repr__(self):
+            choice_names = sorted(self.choice.keys())
+            choice_list = ", ".join(
+                map(lambda choice: f"{choice}={self.choice[choice]}", choice_names)
+            )
+            return f"<ChoiceNode {self.symbol}, {choice_list}>"
+
+        def to_json(self):
+            ret = {"symbol": self.symbol, "choice": dict()}
+            for choice_name, choice_node in self.choice.items():
+                ret["choice"][choice_name] = choice_node.to_json()
+            return ret
+
+    @classmethod
+    def from_benchmark(cls, kconfig_benchmark, attribute):
+        self = cls()
         self.data = kconfig_benchmark.data
-        self.symbols = kconfig_benchmark.symbols
+        self.symbols = kconfig_benchmark.symbol_names
+        self.choices = kconfig_benchmark.choice_names
+        self.symbol = kconfig_benchmark.symbol
+        self.choice = kconfig_benchmark.choice
         self.max_stddev = 10
         if callable(attribute):
             self.attribute = "custom"
@@ -1223,9 +1276,39 @@ class KConfigModel:
             self.attr_function = lambda x: x[1]["total"]["RAM"]
         else:
             raise ValueError("attribute must be a a function, 'rom', or 'ram'")
+        return self
+
+    @classmethod
+    def from_json(cls, json_input: dict):
+        self = cls()
+        self.attribute = json_input["attribute"]
+        self.symbols = json_input["symbols"]
+        self.model = self._node_from_json(json_input["model"])
+        return self
+
+    @classmethod
+    def _node_from_json(cls, json_node):
+        if "choice" in json_node:
+            return cls.ChoiceNode.from_json(cls, json_node)
+        elif "n" in json_node:
+            return cls.BoolNode.from_json(cls, json_node)
+        return cls.Leaf.from_json(json_node)
 
     def build_tree(self):
-        self.model = self._build_tree(self.symbols, self.data, 0)
+        # without ChoiceNode:
+        # self.model = self._build_tree(self.symbols, list(), self.data, 0)
+
+        standalone_symbols = list(
+            filter(
+                lambda sym: self.symbol[sym].choice is None
+                or self.symbol[sym].choice.is_optional,
+                self.symbols,
+            )
+        )
+        tree_choices = list(
+            filter(lambda choice: not self.choice[choice].is_optional, self.choices)
+        )
+        self.model = self._build_tree(standalone_symbols, tree_choices, self.data, 0)
 
     def value_for_config(self, kconf):
         return self.model.model(kconf)
@@ -1238,53 +1321,86 @@ class KConfigModel:
         }
         return output
 
-    def _build_tree(self, this_symbols, this_data, level):
+    def _build_tree(self, this_symbols, this_choices, this_data, level):
 
         rom_sizes = list(map(self.attr_function, this_data))
 
         if np.std(rom_sizes) < self.max_stddev or len(this_symbols) == 0:
             return self.Leaf(np.mean(rom_sizes), np.std(rom_sizes))
 
-        mean_stds = list()
-        for i, param in enumerate(this_symbols):
-            enabled = list(filter(lambda vrr: vrr[0][i] == True, this_data))
-            disabled = list(filter(lambda vrr: vrr[0][i] == False, this_data))
+        sym_stds = list()
+        for symbol_name in this_symbols:
+            enabled = list(filter(lambda vrr: vrr[0][symbol_name] == True, this_data))
+            disabled = list(filter(lambda vrr: vrr[0][symbol_name] == False, this_data))
 
             enabled_std_rom = np.std(list(map(self.attr_function, enabled)))
             disabled_std_rom = np.std(list(map(self.attr_function, disabled)))
             children = [enabled_std_rom, disabled_std_rom]
 
             if np.any(np.isnan(children)):
-                mean_stds.append(np.inf)
+                sym_stds.append(np.inf)
             else:
-                mean_stds.append(np.mean(children))
+                sym_stds.append(np.mean(children))
+
+        choice_stds = list()
+        for choice in this_choices:
+            choice_foo = list()
+            choice_std = list()
+            num_configs = 0
+            for symbol in self.choice[choice].syms:
+                sym_enabled = list(
+                    filter(lambda vrr: vrr[0][symbol.name] == True, this_data)
+                )
+                num_configs += len(sym_enabled)
+                choice_foo.append(sym_enabled)
+                choice_std.append(np.std(list(map(self.attr_function, sym_enabled))))
 
-        symbol_index = np.argmin(mean_stds)
-        symbol = this_symbols[symbol_index]
-        enabled = list(filter(lambda vrr: vrr[0][symbol_index] == True, this_data))
-        disabled = list(filter(lambda vrr: vrr[0][symbol_index] == False, this_data))
+            # only split on a choice if it is present in _all_ configurations
+            if np.any(np.isnan(choice_std)) or num_configs != len(this_data):
+                choice_stds.append(np.inf)
+            else:
+                choice_stds.append(np.mean(choice_std))
 
-        node = self.Node(symbol)
+        min_index = np.argmin(sym_stds + choice_stds)
 
-        new_symbols = this_symbols[:symbol_index] + this_symbols[symbol_index + 1 :]
-        enabled = list(
-            map(
-                lambda x: (x[0][:symbol_index] + x[0][symbol_index + 1 :], x[1]),
-                enabled,
-            )
-        )
-        disabled = list(
-            map(
-                lambda x: (x[0][:symbol_index] + x[0][symbol_index + 1 :], x[1]),
-                disabled,
+        if min_index < len(sym_stds):
+            symbol_index = min_index
+            symbol = this_symbols[symbol_index]
+
+            node = self.BoolNode(symbol)
+
+            new_symbols = this_symbols[:symbol_index] + this_symbols[symbol_index + 1 :]
+
+            enabled = list(filter(lambda vrr: vrr[0][symbol] == True, this_data))
+            disabled = list(filter(lambda vrr: vrr[0][symbol] == False, this_data))
+
+            logger.debug(
+                f"Level {level} split on {symbol} (mean std={sym_stds[symbol_index]}) has {len(enabled)} children when enabled and {len(disabled)} children when disabled"
             )
-        )
-        logger.debug(
-            f"Level {level} split on {symbol} has {len(enabled)} children when enabled and {len(disabled)} children when disabled"
-        )
-        if len(enabled):
-            node.set_child_y(self._build_tree(new_symbols, enabled, level + 1))
-        if len(disabled):
-            node.set_child_n(self._build_tree(new_symbols, disabled, level + 1))
+            if len(enabled):
+                node.set_child_y(
+                    self._build_tree(new_symbols, this_choices, enabled, level + 1)
+                )
+            if len(disabled):
+                node.set_child_n(
+                    self._build_tree(new_symbols, this_choices, disabled, level + 1)
+                )
+        else:
+            choice_index = min_index - len(sym_stds)
+            choice = this_choices[choice_index]
+            node = self.ChoiceNode(choice)
+
+            new_choices = this_choices[:choice_index] + this_choices[choice_index + 1 :]
+
+            for sym in self.choice[choice].syms:
+                enabled = list(filter(lambda vrr: vrr[0][sym.name] == True, this_data))
+                logger.debug(
+                    f"Level {level} split on {choice} (mean std={choice_stds[choice_index]}) has {len(enabled)} children for {sym.name}"
+                )
+                if len(enabled):
+                    node.set_child(
+                        sym.name,
+                        self._build_tree(this_symbols, new_choices, enabled, level + 1),
+                    )
 
         return node
-- 
cgit v1.2.3