From ee389fc21e87a373d2d7d3ed1c4047165344bad8 Mon Sep 17 00:00:00 2001 From: Daniel Friesel Date: Wed, 16 Sep 2020 12:55:48 +0200 Subject: switch to sum of squared residuals as loss function This is in line with DECART by Guo et al., 2017 --- lib/model.py | 80 +++++++++++++++++++++++++++++++++++------------------------- 1 file changed, 47 insertions(+), 33 deletions(-) (limited to 'lib/model.py') diff --git a/lib/model.py b/lib/model.py index e0ce056..f330327 100644 --- a/lib/model.py +++ b/lib/model.py @@ -1170,27 +1170,27 @@ class KConfigModel: :param value: A model value, e.g. expected ROM or RAM usage in Byte :type value: float - :param stddev: Standard deviation of benchmark data used to generate this leaf node - :type stddev: float + :param loss: Loss (sum of squared residuals) for the benchmark data used to generate this leaf node vs. `value`. Lower is better. + :type loss: float """ - def __init__(self, value, stddev): + def __init__(self, value, loss): self.value = value - self.stddev = stddev + self.loss = loss @classmethod def from_json(cls, json_node): - node = cls(json_node["value"], json_node["stddev"]) + node = cls(json_node["value"], json_node["loss"]) return node def model(self, kconf): return self.value def __repr__(self): - return f"" + return f"" def to_json(self): - return {"value": self.value, "stddev": self.stddev} + return {"value": self.value, "loss": self.loss} class BoolNode(Node): """ @@ -1280,7 +1280,10 @@ class KConfigModel: kconf_choice = next( filter(lambda choice: choice.name == self.symbol, kconf.choices) ) - return self.choice[kconf_choice.selection.name].model(kconf) + selection = kconf_choice.selection.name + if selection in self.choice: + return self.choice[selection].model(kconf) + return None def __repr__(self): choice_names = sorted(self.choice.keys()) @@ -1303,7 +1306,7 @@ class KConfigModel: self.choices = kconfig_benchmark.choice_names self.symbol = kconfig_benchmark.symbol self.choice = kconfig_benchmark.choice - self.max_stddev = 10 + self.max_loss = 10 if callable(attribute): self.attribute = "custom" self.attr_function = lambda x: attribute(x[1]) @@ -1334,6 +1337,13 @@ class KConfigModel: return cls.BoolNode.from_json(cls, json_node) return cls.Leaf.from_json(json_node) + def loss(self, values, model_value=None): + if type(values) is list: + values = np.array(values) + if model_value is None: + model_value = np.mean(values) + return np.sum((model_value - values) ** 2, dtype=np.float64) + def build_tree(self): # without ChoiceNode: # self.model = self._build_tree(self.symbols, list(), self.data, 0) @@ -1366,45 +1376,49 @@ class KConfigModel: rom_sizes = list(map(self.attr_function, this_data)) - if np.std(rom_sizes) < self.max_stddev or len(this_symbols) == 0: - return self.Leaf(np.mean(rom_sizes), np.std(rom_sizes)) + if self.loss(rom_sizes) < self.max_loss or len(this_symbols) == 0: + return self.Leaf(np.mean(rom_sizes), self.loss(rom_sizes)) - sym_stds = list() + sym_losses = list() for symbol_name in this_symbols: enabled = list(filter(lambda vrr: vrr[0][symbol_name] == True, this_data)) disabled = list(filter(lambda vrr: vrr[0][symbol_name] == False, this_data)) - enabled_std_rom = np.std(list(map(self.attr_function, enabled))) - disabled_std_rom = np.std(list(map(self.attr_function, disabled))) - children = [enabled_std_rom, disabled_std_rom] + if len(enabled) == 0 or len(disabled) == 0: + sym_losses.append(np.inf) + continue - if np.any(np.isnan(children)): - sym_stds.append(np.inf) - else: - sym_stds.append(np.mean(children)) + enabled_attr = list(map(self.attr_function, enabled)) + disabled_attr = list(map(self.attr_function, disabled)) - choice_stds = list() + children = [self.loss(enabled_attr), self.loss(disabled_attr)] + + sym_losses.append(np.sum(children)) + + choice_losses = list() for choice in this_choices: - choice_foo = list() - choice_std = list() + choice_loss = list() num_configs = 0 for symbol in self.choice[choice].syms: sym_enabled = list( filter(lambda vrr: vrr[0][symbol.name] == True, this_data) ) - num_configs += len(sym_enabled) - choice_foo.append(sym_enabled) - choice_std.append(np.std(list(map(self.attr_function, sym_enabled)))) + enabled_attr = list(map(self.attr_function, sym_enabled)) + if len(enabled_attr) == 0: + continue + + num_configs += len(enabled_attr) + choice_loss.append(self.loss(enabled_attr)) # only split on a choice if it is present in _all_ configurations - if np.any(np.isnan(choice_std)) or num_configs != len(this_data): - choice_stds.append(np.inf) + if num_configs != len(this_data): + choice_losses.append(np.inf) else: - choice_stds.append(np.mean(choice_std)) + choice_losses.append(np.sum(choice_loss)) - min_index = np.argmin(sym_stds + choice_stds) + min_index = np.argmin(sym_losses + choice_losses) - if min_index < len(sym_stds): + if min_index < len(sym_losses): symbol_index = min_index symbol = this_symbols[symbol_index] @@ -1416,7 +1430,7 @@ class KConfigModel: disabled = list(filter(lambda vrr: vrr[0][symbol] == False, this_data)) logger.debug( - f"Level {level} split on {symbol} (mean std={sym_stds[symbol_index]}) has {len(enabled)} children when enabled and {len(disabled)} children when disabled" + f"Level {level} split on {symbol} (loss={sym_losses[symbol_index]}) has {len(enabled)} children when enabled and {len(disabled)} children when disabled" ) if len(enabled): node.set_child_y( @@ -1427,7 +1441,7 @@ class KConfigModel: self._build_tree(new_symbols, this_choices, disabled, level + 1) ) else: - choice_index = min_index - len(sym_stds) + choice_index = min_index - len(sym_losses) choice = this_choices[choice_index] node = self.ChoiceNode(choice) @@ -1436,7 +1450,7 @@ class KConfigModel: for sym in self.choice[choice].syms: enabled = list(filter(lambda vrr: vrr[0][sym.name] == True, this_data)) logger.debug( - f"Level {level} split on {choice} (mean std={choice_stds[choice_index]}) has {len(enabled)} children for {sym.name}" + f"Level {level} split on {choice} (loss={choice_losses[choice_index]}) has {len(enabled)} children for {sym.name}" ) if len(enabled): node.set_child( -- cgit v1.2.3