summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Friesel <daniel.friesel@uos.de>2020-09-16 12:55:48 +0200
committerDaniel Friesel <daniel.friesel@uos.de>2020-09-16 12:55:56 +0200
commitee389fc21e87a373d2d7d3ed1c4047165344bad8 (patch)
tree32b9c72ad3aa9ecb053f3736194ed87fece01d28
parent656d6bb9b39e0aa15b09dc4d69375b71cbaea9ec (diff)
switch to sum of squared residuals as loss function
This is in line with DECART by Guo et al., 2017
-rwxr-xr-xbin/analyze-kconfig.py8
-rw-r--r--lib/model.py80
2 files changed, 51 insertions, 37 deletions
diff --git a/bin/analyze-kconfig.py b/bin/analyze-kconfig.py
index 87c05f7..f7ae448 100755
--- a/bin/analyze-kconfig.py
+++ b/bin/analyze-kconfig.py
@@ -37,9 +37,9 @@ def main():
"--attribute", choices=["rom", "ram"], default="rom", help="Model attribute"
)
parser.add_argument(
- "--max-stddev",
+ "--max-loss",
type=float,
- help="Maximum acceptable model standard deviation for DecisionTree Leaves",
+ help="Maximum acceptable model loss for DecisionTree Leaves",
default=10,
)
parser.add_argument(
@@ -65,8 +65,8 @@ def main():
if os.path.isdir(args.model):
data = KConfigAttributes(args.kconfig_path, args.model)
model = KConfigModel.from_benchmark(data, args.attribute)
- if args.max_stddev:
- model.max_stddev = args.max_stddev
+ if args.max_loss:
+ model.max_loss = args.max_loss
model.build_tree()
else:
diff --git a/lib/model.py b/lib/model.py
index e0ce056..f330327 100644
--- a/lib/model.py
+++ b/lib/model.py
@@ -1170,27 +1170,27 @@ class KConfigModel:
:param value: A model value, e.g. expected ROM or RAM usage in Byte
:type value: float
- :param stddev: Standard deviation of benchmark data used to generate this leaf node
- :type stddev: float
+ :param loss: Loss (sum of squared residuals) for the benchmark data used to generate this leaf node vs. `value`. Lower is better.
+ :type loss: float
"""
- def __init__(self, value, stddev):
+ def __init__(self, value, loss):
self.value = value
- self.stddev = stddev
+ self.loss = loss
@classmethod
def from_json(cls, json_node):
- node = cls(json_node["value"], json_node["stddev"])
+ node = cls(json_node["value"], json_node["loss"])
return node
def model(self, kconf):
return self.value
def __repr__(self):
- return f"<Leaf({self.value}, {self.stddev})>"
+ return f"<Leaf({self.value}, {self.loss})>"
def to_json(self):
- return {"value": self.value, "stddev": self.stddev}
+ return {"value": self.value, "loss": self.loss}
class BoolNode(Node):
"""
@@ -1280,7 +1280,10 @@ class KConfigModel:
kconf_choice = next(
filter(lambda choice: choice.name == self.symbol, kconf.choices)
)
- return self.choice[kconf_choice.selection.name].model(kconf)
+ selection = kconf_choice.selection.name
+ if selection in self.choice:
+ return self.choice[selection].model(kconf)
+ return None
def __repr__(self):
choice_names = sorted(self.choice.keys())
@@ -1303,7 +1306,7 @@ class KConfigModel:
self.choices = kconfig_benchmark.choice_names
self.symbol = kconfig_benchmark.symbol
self.choice = kconfig_benchmark.choice
- self.max_stddev = 10
+ self.max_loss = 10
if callable(attribute):
self.attribute = "custom"
self.attr_function = lambda x: attribute(x[1])
@@ -1334,6 +1337,13 @@ class KConfigModel:
return cls.BoolNode.from_json(cls, json_node)
return cls.Leaf.from_json(json_node)
+ def loss(self, values, model_value=None):
+ if type(values) is list:
+ values = np.array(values)
+ if model_value is None:
+ model_value = np.mean(values)
+ return np.sum((model_value - values) ** 2, dtype=np.float64)
+
def build_tree(self):
# without ChoiceNode:
# self.model = self._build_tree(self.symbols, list(), self.data, 0)
@@ -1366,45 +1376,49 @@ class KConfigModel:
rom_sizes = list(map(self.attr_function, this_data))
- if np.std(rom_sizes) < self.max_stddev or len(this_symbols) == 0:
- return self.Leaf(np.mean(rom_sizes), np.std(rom_sizes))
+ if self.loss(rom_sizes) < self.max_loss or len(this_symbols) == 0:
+ return self.Leaf(np.mean(rom_sizes), self.loss(rom_sizes))
- sym_stds = list()
+ sym_losses = list()
for symbol_name in this_symbols:
enabled = list(filter(lambda vrr: vrr[0][symbol_name] == True, this_data))
disabled = list(filter(lambda vrr: vrr[0][symbol_name] == False, this_data))
- enabled_std_rom = np.std(list(map(self.attr_function, enabled)))
- disabled_std_rom = np.std(list(map(self.attr_function, disabled)))
- children = [enabled_std_rom, disabled_std_rom]
+ if len(enabled) == 0 or len(disabled) == 0:
+ sym_losses.append(np.inf)
+ continue
- if np.any(np.isnan(children)):
- sym_stds.append(np.inf)
- else:
- sym_stds.append(np.mean(children))
+ enabled_attr = list(map(self.attr_function, enabled))
+ disabled_attr = list(map(self.attr_function, disabled))
- choice_stds = list()
+ children = [self.loss(enabled_attr), self.loss(disabled_attr)]
+
+ sym_losses.append(np.sum(children))
+
+ choice_losses = list()
for choice in this_choices:
- choice_foo = list()
- choice_std = list()
+ choice_loss = list()
num_configs = 0
for symbol in self.choice[choice].syms:
sym_enabled = list(
filter(lambda vrr: vrr[0][symbol.name] == True, this_data)
)
- num_configs += len(sym_enabled)
- choice_foo.append(sym_enabled)
- choice_std.append(np.std(list(map(self.attr_function, sym_enabled))))
+ enabled_attr = list(map(self.attr_function, sym_enabled))
+ if len(enabled_attr) == 0:
+ continue
+
+ num_configs += len(enabled_attr)
+ choice_loss.append(self.loss(enabled_attr))
# only split on a choice if it is present in _all_ configurations
- if np.any(np.isnan(choice_std)) or num_configs != len(this_data):
- choice_stds.append(np.inf)
+ if num_configs != len(this_data):
+ choice_losses.append(np.inf)
else:
- choice_stds.append(np.mean(choice_std))
+ choice_losses.append(np.sum(choice_loss))
- min_index = np.argmin(sym_stds + choice_stds)
+ min_index = np.argmin(sym_losses + choice_losses)
- if min_index < len(sym_stds):
+ if min_index < len(sym_losses):
symbol_index = min_index
symbol = this_symbols[symbol_index]
@@ -1416,7 +1430,7 @@ class KConfigModel:
disabled = list(filter(lambda vrr: vrr[0][symbol] == False, this_data))
logger.debug(
- f"Level {level} split on {symbol} (mean std={sym_stds[symbol_index]}) has {len(enabled)} children when enabled and {len(disabled)} children when disabled"
+ f"Level {level} split on {symbol} (loss={sym_losses[symbol_index]}) has {len(enabled)} children when enabled and {len(disabled)} children when disabled"
)
if len(enabled):
node.set_child_y(
@@ -1427,7 +1441,7 @@ class KConfigModel:
self._build_tree(new_symbols, this_choices, disabled, level + 1)
)
else:
- choice_index = min_index - len(sym_stds)
+ choice_index = min_index - len(sym_losses)
choice = this_choices[choice_index]
node = self.ChoiceNode(choice)
@@ -1436,7 +1450,7 @@ class KConfigModel:
for sym in self.choice[choice].syms:
enabled = list(filter(lambda vrr: vrr[0][sym.name] == True, this_data))
logger.debug(
- f"Level {level} split on {choice} (mean std={choice_stds[choice_index]}) has {len(enabled)} children for {sym.name}"
+ f"Level {level} split on {choice} (loss={choice_losses[choice_index]}) has {len(enabled)} children for {sym.name}"
)
if len(enabled):
node.set_child(