summaryrefslogtreecommitdiff
path: root/bin/eval-kconfig.py
diff options
context:
space:
mode:
authorDaniel Friesel <daniel.friesel@uos.de>2020-09-24 09:43:09 +0200
committerDaniel Friesel <daniel.friesel@uos.de>2020-09-24 09:43:09 +0200
commitce11d3f77c7fb718124d54f6456b7a0d8f2ceaf0 (patch)
tree52668a7b4025c43a40fc3a3d3e65831a8f890f0e /bin/eval-kconfig.py
parent1ed7a66d836977ae9689f59b6dc5fca0f4637587 (diff)
kconfig: model generation and validation with limited sample size
eval-kconfig estimates the generalization error in this case
Diffstat (limited to 'bin/eval-kconfig.py')
-rwxr-xr-xbin/eval-kconfig.py48
1 files changed, 46 insertions, 2 deletions
diff --git a/bin/eval-kconfig.py b/bin/eval-kconfig.py
index 7f48b52..7bc0c41 100755
--- a/bin/eval-kconfig.py
+++ b/bin/eval-kconfig.py
@@ -37,6 +37,19 @@ def main():
parser.add_argument(
"--with-choice-node", action="store_true", help="Use non-binary Choice Nodes"
)
+ parser.add_argument(
+ "--max-loss",
+ type=float,
+ help="Maximum acceptable model loss for DecisionTree Leaves",
+ default=10,
+ )
+ # Falls die population exhaustive ist, kann man nun den generalization error berechnen
+ parser.add_argument(
+ "--sample-size",
+ type=int,
+ help="Perform model generation and validation with N random samples from the population",
+ metavar="N",
+ )
parser.add_argument("kconfig_path", type=str, help="Path to Kconfig file")
parser.add_argument(
"experiment_root", type=str, help="Experiment results directory"
@@ -54,11 +67,26 @@ def main():
k = 10
- partition_pairs = validation._xv_partitions_kfold(len(data.data), k)
+ if args.sample_size:
+ shuffled_data_indices = np.random.permutation(np.arange(len(data.data)))
+ sample_indices = shuffled_data_indices[: args.sample_size]
+ nonsample_indices = shuffled_data_indices[args.sample_size :]
+ partition_pairs = validation._xv_partitions_kfold(args.sample_size, k)
+ partition_pairs = list(
+ map(
+ lambda tv: (shuffled_data_indices[tv[0]], shuffled_data_indices[tv[1]]),
+ partition_pairs,
+ )
+ )
+ else:
+ partition_pairs = validation._xv_partitions_kfold(len(data.data), k)
+
measures = list()
for training_set, validation_set in partition_pairs:
model = KConfigModel.from_benchmark(data, args.attribute, indices=training_set)
model.with_choice_node = args.with_choice_node
+ if args.max_loss:
+ model.max_loss = args.max_loss
model.build_tree()
measures.append(model.assess_benchmark(data, indices=validation_set))
@@ -71,11 +99,27 @@ def main():
print("10-fold Cross Validation:")
print(f"MAE: {aggregate['mae']:.0f} B")
- print(f"SMAPE: {aggregate['smape']:.0f} %")
+ print(f"SMAPE: {aggregate['smape']:.1f} %")
print(f"Unpredictable Configurations: {aggregate['unpredictable_count']}")
print(aggregate)
+ if args.sample_size:
+ print("Estimated Generalization Error")
+ model = KConfigModel.from_benchmark(
+ data, args.attribute, indices=sample_indices
+ )
+ model.with_choice_node = args.with_choice_node
+ if args.max_loss:
+ model.max_loss = args.max_loss
+ model.build_tree()
+ generalization_measure = model.assess_benchmark(data, indices=nonsample_indices)
+ print(f"MAE: {generalization_measure['mae']:.0f} B")
+ print(f"SMAPE: {generalization_measure['smape']:.1f} %")
+ print(
+ f"Unpredictable Configurations: {generalization_measure['unpredictable_count']}"
+ )
+
"""
if args.model:
with open(args.model, "r") as f: