diff options
author | Daniel Friesel <daniel.friesel@uos.de> | 2020-09-16 12:55:48 +0200 |
---|---|---|
committer | Daniel Friesel <daniel.friesel@uos.de> | 2020-09-16 12:55:56 +0200 |
commit | ee389fc21e87a373d2d7d3ed1c4047165344bad8 (patch) | |
tree | 32b9c72ad3aa9ecb053f3736194ed87fece01d28 | |
parent | 656d6bb9b39e0aa15b09dc4d69375b71cbaea9ec (diff) |
switch to sum of squared residuals as loss function
This is in line with DECART by Guo et al., 2017
-rwxr-xr-x | bin/analyze-kconfig.py | 8 | ||||
-rw-r--r-- | lib/model.py | 80 |
2 files changed, 51 insertions, 37 deletions
diff --git a/bin/analyze-kconfig.py b/bin/analyze-kconfig.py index 87c05f7..f7ae448 100755 --- a/bin/analyze-kconfig.py +++ b/bin/analyze-kconfig.py @@ -37,9 +37,9 @@ def main(): "--attribute", choices=["rom", "ram"], default="rom", help="Model attribute" ) parser.add_argument( - "--max-stddev", + "--max-loss", type=float, - help="Maximum acceptable model standard deviation for DecisionTree Leaves", + help="Maximum acceptable model loss for DecisionTree Leaves", default=10, ) parser.add_argument( @@ -65,8 +65,8 @@ def main(): if os.path.isdir(args.model): data = KConfigAttributes(args.kconfig_path, args.model) model = KConfigModel.from_benchmark(data, args.attribute) - if args.max_stddev: - model.max_stddev = args.max_stddev + if args.max_loss: + model.max_loss = args.max_loss model.build_tree() else: diff --git a/lib/model.py b/lib/model.py index e0ce056..f330327 100644 --- a/lib/model.py +++ b/lib/model.py @@ -1170,27 +1170,27 @@ class KConfigModel: :param value: A model value, e.g. expected ROM or RAM usage in Byte :type value: float - :param stddev: Standard deviation of benchmark data used to generate this leaf node - :type stddev: float + :param loss: Loss (sum of squared residuals) for the benchmark data used to generate this leaf node vs. `value`. Lower is better. + :type loss: float """ - def __init__(self, value, stddev): + def __init__(self, value, loss): self.value = value - self.stddev = stddev + self.loss = loss @classmethod def from_json(cls, json_node): - node = cls(json_node["value"], json_node["stddev"]) + node = cls(json_node["value"], json_node["loss"]) return node def model(self, kconf): return self.value def __repr__(self): - return f"<Leaf({self.value}, {self.stddev})>" + return f"<Leaf({self.value}, {self.loss})>" def to_json(self): - return {"value": self.value, "stddev": self.stddev} + return {"value": self.value, "loss": self.loss} class BoolNode(Node): """ @@ -1280,7 +1280,10 @@ class KConfigModel: kconf_choice = next( filter(lambda choice: choice.name == self.symbol, kconf.choices) ) - return self.choice[kconf_choice.selection.name].model(kconf) + selection = kconf_choice.selection.name + if selection in self.choice: + return self.choice[selection].model(kconf) + return None def __repr__(self): choice_names = sorted(self.choice.keys()) @@ -1303,7 +1306,7 @@ class KConfigModel: self.choices = kconfig_benchmark.choice_names self.symbol = kconfig_benchmark.symbol self.choice = kconfig_benchmark.choice - self.max_stddev = 10 + self.max_loss = 10 if callable(attribute): self.attribute = "custom" self.attr_function = lambda x: attribute(x[1]) @@ -1334,6 +1337,13 @@ class KConfigModel: return cls.BoolNode.from_json(cls, json_node) return cls.Leaf.from_json(json_node) + def loss(self, values, model_value=None): + if type(values) is list: + values = np.array(values) + if model_value is None: + model_value = np.mean(values) + return np.sum((model_value - values) ** 2, dtype=np.float64) + def build_tree(self): # without ChoiceNode: # self.model = self._build_tree(self.symbols, list(), self.data, 0) @@ -1366,45 +1376,49 @@ class KConfigModel: rom_sizes = list(map(self.attr_function, this_data)) - if np.std(rom_sizes) < self.max_stddev or len(this_symbols) == 0: - return self.Leaf(np.mean(rom_sizes), np.std(rom_sizes)) + if self.loss(rom_sizes) < self.max_loss or len(this_symbols) == 0: + return self.Leaf(np.mean(rom_sizes), self.loss(rom_sizes)) - sym_stds = list() + sym_losses = list() for symbol_name in this_symbols: enabled = list(filter(lambda vrr: vrr[0][symbol_name] == True, this_data)) disabled = list(filter(lambda vrr: vrr[0][symbol_name] == False, this_data)) - enabled_std_rom = np.std(list(map(self.attr_function, enabled))) - disabled_std_rom = np.std(list(map(self.attr_function, disabled))) - children = [enabled_std_rom, disabled_std_rom] + if len(enabled) == 0 or len(disabled) == 0: + sym_losses.append(np.inf) + continue - if np.any(np.isnan(children)): - sym_stds.append(np.inf) - else: - sym_stds.append(np.mean(children)) + enabled_attr = list(map(self.attr_function, enabled)) + disabled_attr = list(map(self.attr_function, disabled)) - choice_stds = list() + children = [self.loss(enabled_attr), self.loss(disabled_attr)] + + sym_losses.append(np.sum(children)) + + choice_losses = list() for choice in this_choices: - choice_foo = list() - choice_std = list() + choice_loss = list() num_configs = 0 for symbol in self.choice[choice].syms: sym_enabled = list( filter(lambda vrr: vrr[0][symbol.name] == True, this_data) ) - num_configs += len(sym_enabled) - choice_foo.append(sym_enabled) - choice_std.append(np.std(list(map(self.attr_function, sym_enabled)))) + enabled_attr = list(map(self.attr_function, sym_enabled)) + if len(enabled_attr) == 0: + continue + + num_configs += len(enabled_attr) + choice_loss.append(self.loss(enabled_attr)) # only split on a choice if it is present in _all_ configurations - if np.any(np.isnan(choice_std)) or num_configs != len(this_data): - choice_stds.append(np.inf) + if num_configs != len(this_data): + choice_losses.append(np.inf) else: - choice_stds.append(np.mean(choice_std)) + choice_losses.append(np.sum(choice_loss)) - min_index = np.argmin(sym_stds + choice_stds) + min_index = np.argmin(sym_losses + choice_losses) - if min_index < len(sym_stds): + if min_index < len(sym_losses): symbol_index = min_index symbol = this_symbols[symbol_index] @@ -1416,7 +1430,7 @@ class KConfigModel: disabled = list(filter(lambda vrr: vrr[0][symbol] == False, this_data)) logger.debug( - f"Level {level} split on {symbol} (mean std={sym_stds[symbol_index]}) has {len(enabled)} children when enabled and {len(disabled)} children when disabled" + f"Level {level} split on {symbol} (loss={sym_losses[symbol_index]}) has {len(enabled)} children when enabled and {len(disabled)} children when disabled" ) if len(enabled): node.set_child_y( @@ -1427,7 +1441,7 @@ class KConfigModel: self._build_tree(new_symbols, this_choices, disabled, level + 1) ) else: - choice_index = min_index - len(sym_stds) + choice_index = min_index - len(sym_losses) choice = this_choices[choice_index] node = self.ChoiceNode(choice) @@ -1436,7 +1450,7 @@ class KConfigModel: for sym in self.choice[choice].syms: enabled = list(filter(lambda vrr: vrr[0][sym.name] == True, this_data)) logger.debug( - f"Level {level} split on {choice} (mean std={choice_stds[choice_index]}) has {len(enabled)} children for {sym.name}" + f"Level {level} split on {choice} (loss={choice_losses[choice_index]}) has {len(enabled)} children for {sym.name}" ) if len(enabled): node.set_child( |