add basic decisiontree model for binary kconfig files

author: Daniel Friesel <daniel.friesel@uos.de> 2020-09-09 11:22:17 +0200
committer: Daniel Friesel <daniel.friesel@uos.de> 2020-09-09 11:22:17 +0200
commit: 963be43fe272e330df2f49eb55957aea1b90e579 (patch)
tree: 3b00f1a9406e1cbd91233f7fc1bdd77829fb4205
parent: 8b969f4945e97d811b7a5b27c99b76cf2dd2840b (diff)
2 files changed, 281 insertions, 0 deletions
diff --git a/bin/analyze-config.py b/bin/analyze-config.py
new file mode 100755
index 0000000..56a2716
--- /dev/null
+++ b/bin/analyze-config.py
@@ -0,0 +1,166 @@
+#!/usr/bin/env python3
+
+import json
+import kconfiglib
+import logging
+import os
+
+import numpy as np
+
+numeric_level = getattr(logging, "DEBUG", None)
+if not isinstance(numeric_level, int):
+    print(f"Invalid log level: {loglevel}", file=sys.stderr)
+    sys.exit(1)
+logging.basicConfig(level=numeric_level)
+
+kconfig_path = "/tmp/multipass/Kconfig"
+configs_base = "/tmp/multipass-model"
+
+experiments = list()
+
+for direntry in os.listdir(configs_base):
+    if "Multipass-" in direntry:
+        config_path = f"{configs_base}/{direntry}/.config"
+        attr_path = f"{configs_base}/{direntry}/attributes.json"
+        if os.path.exists(attr_path):
+            experiments.append((config_path, attr_path))
+
+kconf = kconfiglib.Kconfig(kconfig_path)
+
+symbols = sorted(
+    map(
+        lambda sym: sym.name,
+        filter(
+            lambda sym: kconfiglib.TYPE_TO_STR[sym.type] == "bool", kconf.syms.values()
+        ),
+    )
+)
+
+by_name = {
+    "multipass": {
+        "isa": "state",
+        "attributes": ["rom_usage", "ram_usage"],
+        "rom_usage": list(),
+        "ram_usage": list(),
+        "param": list(),
+    }
+}
+data = list()
+
+config_vectors = set()
+
+for config_path, attr_path in experiments:
+    kconf.load_config(config_path)
+    with open(attr_path, "r") as f:
+        attr = json.load(f)
+
+    config_vector = tuple(map(lambda sym: kconf.syms[sym].tri_value == 2, symbols))
+    config_vectors.add(config_vector)
+    by_name["multipass"]["rom_usage"].append(attr["total"]["ROM"])
+    by_name["multipass"]["ram_usage"].append(attr["total"]["RAM"])
+    by_name["multipass"]["param"].append(config_vector)
+    data.append((config_vector, attr["total"]["ROM"], attr["total"]["RAM"]))
+
+print(
+    "Processing {:d} unique configurations of {:d} total".format(
+        len(config_vectors), len(experiments)
+    )
+)
+
+print("std of all data: {:5.0f} Bytes".format(np.std(list(map(lambda x: x[1], data)))))
+
+
+class DTreeLeaf:
+    def __init__(self, value, stddev):
+        self.value = value
+        self.stddev = stddev
+
+    def __repr__(self):
+        return f"<DTreeLeaf({self.value}, {self.stddev})>"
+
+    def to_json(self):
+        return {"value": self.value, "stddev": self.stddev}
+
+
+class DTreeNode:
+    def __init__(self, symbol):
+        self.symbol = symbol
+        self.false_child = None
+        self.true_child = None
+
+    def set_false_child(self, child_node):
+        self.false_child = child_node
+
+    def set_true_child(self, child_node):
+        self.true_child = child_node
+
+    def __repr__(self):
+        return f"<DTreeNode({self.false_child}, {self.true_child})>"
+
+    def to_json(self):
+        ret = {"symbol": self.symbol}
+        if self.false_child:
+            ret["false"] = self.false_child.to_json()
+        else:
+            ret["false"] = None
+        if self.true_child:
+            ret["true"] = self.true_child.to_json()
+        else:
+            ret["true"] = None
+        return ret
+
+
+def get_min(this_symbols, this_data, level):
+
+    rom_sizes = list(map(lambda x: x[1], this_data))
+
+    if np.std(rom_sizes) < 100 or len(this_symbols) == 0:
+        return DTreeLeaf(np.mean(rom_sizes), np.std(rom_sizes))
+
+    mean_stds = list()
+    for i, param in enumerate(this_symbols):
+        enabled = list(filter(lambda vrr: vrr[0][i] == True, this_data))
+        disabled = list(filter(lambda vrr: vrr[0][i] == False, this_data))
+
+        enabled_std_rom = np.std(list(map(lambda x: x[1], enabled)))
+        disabled_std_rom = np.std(list(map(lambda x: x[1], disabled)))
+        children = [enabled_std_rom, disabled_std_rom]
+
+        if np.any(np.isnan(children)):
+            mean_stds.append(np.inf)
+        else:
+            mean_stds.append(np.mean(children))
+
+    symbol_index = np.argmin(mean_stds)
+    symbol = this_symbols[symbol_index]
+    enabled = list(filter(lambda vrr: vrr[0][symbol_index] == True, this_data))
+    disabled = list(filter(lambda vrr: vrr[0][symbol_index] == False, this_data))
+
+    node = DTreeNode(symbol)
+
+    new_symbols = this_symbols[:symbol_index] + this_symbols[symbol_index + 1 :]
+    enabled = list(
+        map(lambda x: (x[0][:symbol_index] + x[0][symbol_index + 1 :], *x[1:]), enabled)
+    )
+    disabled = list(
+        map(
+            lambda x: (x[0][:symbol_index] + x[0][symbol_index + 1 :], *x[1:]), disabled
+        )
+    )
+    print(
+        f"Level {level} split on {symbol} has {len(enabled)} children when enabled and {len(disabled)} children when disabled"
+    )
+    if len(enabled):
+        node.set_true_child(get_min(new_symbols, enabled, level + 1))
+    if len(disabled):
+        node.set_false_child(get_min(new_symbols, disabled, level + 1))
+
+    return node
+
+
+model = get_min(symbols, data, 0)
+
+output = {"model": model.to_json(), "symbols": symbols}
+
+with open("kconfigmodel.json", "w") as f:
+    json.dump(output, f)
diff --git a/bin/model-config.py b/bin/model-config.py
new file mode 100755
index 0000000..9e86d4a
--- /dev/null
+++ b/bin/model-config.py
@@ -0,0 +1,115 @@
+#!/usr/bin/env python3
+
+import json
+import kconfiglib
+import logging
+import os
+import sys
+
+import numpy as np
+
+numeric_level = getattr(logging, "DEBUG", None)
+if not isinstance(numeric_level, int):
+    print(f"Invalid log level: {loglevel}", file=sys.stderr)
+    sys.exit(1)
+logging.basicConfig(level=numeric_level)
+
+kconfig_path = "/tmp/multipass/Kconfig"
+configs_base = "/tmp/multipass-model"
+
+kconf = kconfiglib.Kconfig(kconfig_path)
+
+symbols = sorted(
+    map(
+        lambda sym: sym.name,
+        filter(
+            lambda sym: kconfiglib.TYPE_TO_STR[sym.type] == "bool", kconf.syms.values()
+        ),
+    )
+)
+config_vector = tuple(map(lambda sym: kconf.syms[sym].tri_value == 2, symbols))
+
+with open("kconfigmodel.json", "r") as f:
+    data = json.load(f)
+
+assert symbols == data["symbols"]
+
+model = data["model"]
+
+kconf.load_config(sys.argv[1])
+
+
+class DTreeLeaf:
+    def __init__(self, value, stddev):
+        self.value = value
+        self.stddev = stddev
+
+    def __repr__(self):
+        return f"<DTreeLeaf({self.value}, {self.stddev})>"
+
+    def model(self, kconf):
+        return self.value
+
+
+class DTreeNode:
+    def __init__(self, symbol, false_child=None, true_child=None):
+        self.symbol = symbol
+        self.false_child = false_child
+        self.true_child = true_child
+
+    def set_false_child(self, child_node):
+        self.false_child = child_node
+
+    def set_true_child(self, child_node):
+        self.true_child = child_node
+
+    def __repr__(self):
+        return f"<DTreeNode({self.false_child}, {self.true_child})>"
+
+    def model(self, kconf):
+        if kconf.syms[self.symbol].tri_value == 2 and self.true_child:
+            return self.true_child.model(kconf)
+        elif kconf.syms[self.symbol].tri_value == 0 and self.false_child:
+            return self.false_child.model(kconf)
+        return None
+
+
+def load_model(tree):
+    if tree is None:
+        return None
+    if "value" in tree:
+        return DTreeLeaf(tree["value"], tree["stddev"])
+    return DTreeNode(
+        tree["symbol"], load_model(tree["false"]), load_model(tree["true"])
+    )
+
+
+root = load_model(model)
+
+
+def vector_diff(v1, v2):
+    return sum(map(lambda i: int(v1[i] != v2[i]), range(len(v1))))
+
+
+current_model = root.model(kconf)
+print(f"Model result: {current_model}")
+
+for symbol in symbols:
+    kconf_sym = kconf.syms[symbol]
+    if kconf_sym.tri_value == 0 and 2 in kconf_sym.assignable:
+        kconf_sym.set_value(2)
+        new_vector = tuple(map(lambda sym: kconf.syms[sym].tri_value == 2, symbols))
+        num_changes = vector_diff(config_vector, new_vector)
+        model_diff = root.model(kconf) - current_model
+        print(
+            f"Setting {symbol:30s} to y changes {num_changes:2d} symbols, model change: {model_diff:+5.0f}"
+        )
+    elif kconf_sym.tri_value == 2 and 0 in kconf_sym.assignable:
+        kconf_sym.set_value(0)
+        new_vector = tuple(map(lambda sym: kconf.syms[sym].tri_value == 2, symbols))
+        num_changes = vector_diff(config_vector, new_vector)
+        model_diff = root.model(kconf) - current_model
+        print(
+            f"Setting {symbol:30s} to n changes {num_changes:2d} symbols, model change: {model_diff:+5.0f}"
+        )
+    kconf.load_config(sys.argv[1])
author	Daniel Friesel <daniel.friesel@uos.de>	2020-09-09 11:22:17 +0200
committer	Daniel Friesel <daniel.friesel@uos.de>	2020-09-09 11:22:17 +0200
commit	963be43fe272e330df2f49eb55957aea1b90e579 (patch)
tree	3b00f1a9406e1cbd91233f7fc1bdd77829fb4205
parent	8b969f4945e97d811b7a5b27c99b76cf2dd2840b (diff)