From ce11d3f77c7fb718124d54f6456b7a0d8f2ceaf0 Mon Sep 17 00:00:00 2001 From: Daniel Friesel Date: Thu, 24 Sep 2020 09:43:09 +0200 Subject: kconfig: model generation and validation with limited sample size eval-kconfig estimates the generalization error in this case --- bin/analyze-kconfig.py | 24 +++++++++++++++++++++++- bin/eval-kconfig.py | 48 ++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 69 insertions(+), 3 deletions(-) diff --git a/bin/analyze-kconfig.py b/bin/analyze-kconfig.py index f7ae448..ff220b0 100755 --- a/bin/analyze-kconfig.py +++ b/bin/analyze-kconfig.py @@ -13,6 +13,8 @@ import kconfiglib import logging import os +import numpy as np + from dfatool.loader import KConfigAttributes from dfatool.model import KConfigModel @@ -48,6 +50,15 @@ def main(): type=lambda level: getattr(logging, level.upper()), help="Set log level", ) + parser.add_argument( + "--info", action="store_true", help="Show Kconfig and benchmark information" + ) + parser.add_argument( + "--sample-size", + type=int, + help="Restrict model generation to N random samples", + metavar="N", + ) parser.add_argument("kconfig_path", type=str, help="Path to Kconfig file") parser.add_argument( "model", @@ -64,7 +75,15 @@ def main(): if os.path.isdir(args.model): data = KConfigAttributes(args.kconfig_path, args.model) - model = KConfigModel.from_benchmark(data, args.attribute) + + if args.sample_size: + shuffled_data_indices = np.random.permutation(np.arange(len(data.data))) + sample_indices = shuffled_data_indices[: args.sample_size] + model = KConfigModel.from_benchmark( + data, args.attribute, indices=sample_indices + ) + else: + model = KConfigModel.from_benchmark(data, args.attribute) if args.max_loss: model.max_loss = args.max_loss model.build_tree() @@ -73,6 +92,9 @@ def main(): with open(args.model, "r") as f: model = KConfigModel.from_json(json.load(f)) + if args.info: + print("TODO") + if args.export_tree: with open(args.export_tree, "w") as f: json.dump(model.to_json(), f) diff --git a/bin/eval-kconfig.py b/bin/eval-kconfig.py index 7f48b52..7bc0c41 100755 --- a/bin/eval-kconfig.py +++ b/bin/eval-kconfig.py @@ -37,6 +37,19 @@ def main(): parser.add_argument( "--with-choice-node", action="store_true", help="Use non-binary Choice Nodes" ) + parser.add_argument( + "--max-loss", + type=float, + help="Maximum acceptable model loss for DecisionTree Leaves", + default=10, + ) + # Falls die population exhaustive ist, kann man nun den generalization error berechnen + parser.add_argument( + "--sample-size", + type=int, + help="Perform model generation and validation with N random samples from the population", + metavar="N", + ) parser.add_argument("kconfig_path", type=str, help="Path to Kconfig file") parser.add_argument( "experiment_root", type=str, help="Experiment results directory" @@ -54,11 +67,26 @@ def main(): k = 10 - partition_pairs = validation._xv_partitions_kfold(len(data.data), k) + if args.sample_size: + shuffled_data_indices = np.random.permutation(np.arange(len(data.data))) + sample_indices = shuffled_data_indices[: args.sample_size] + nonsample_indices = shuffled_data_indices[args.sample_size :] + partition_pairs = validation._xv_partitions_kfold(args.sample_size, k) + partition_pairs = list( + map( + lambda tv: (shuffled_data_indices[tv[0]], shuffled_data_indices[tv[1]]), + partition_pairs, + ) + ) + else: + partition_pairs = validation._xv_partitions_kfold(len(data.data), k) + measures = list() for training_set, validation_set in partition_pairs: model = KConfigModel.from_benchmark(data, args.attribute, indices=training_set) model.with_choice_node = args.with_choice_node + if args.max_loss: + model.max_loss = args.max_loss model.build_tree() measures.append(model.assess_benchmark(data, indices=validation_set)) @@ -71,11 +99,27 @@ def main(): print("10-fold Cross Validation:") print(f"MAE: {aggregate['mae']:.0f} B") - print(f"SMAPE: {aggregate['smape']:.0f} %") + print(f"SMAPE: {aggregate['smape']:.1f} %") print(f"Unpredictable Configurations: {aggregate['unpredictable_count']}") print(aggregate) + if args.sample_size: + print("Estimated Generalization Error") + model = KConfigModel.from_benchmark( + data, args.attribute, indices=sample_indices + ) + model.with_choice_node = args.with_choice_node + if args.max_loss: + model.max_loss = args.max_loss + model.build_tree() + generalization_measure = model.assess_benchmark(data, indices=nonsample_indices) + print(f"MAE: {generalization_measure['mae']:.0f} B") + print(f"SMAPE: {generalization_measure['smape']:.1f} %") + print( + f"Unpredictable Configurations: {generalization_measure['unpredictable_count']}" + ) + """ if args.model: with open(args.model, "r") as f: -- cgit v1.2.3