From 3012dc4ab4d72254262e1fbb15a7cb566cd57be5 Mon Sep 17 00:00:00 2001 From: Daniel Friesel Date: Tue, 12 Jan 2021 11:06:04 +0100 Subject: add z-score based outlier detection and optional removal Note that outliers are removed from both training and evaluation data --- bin/analyze-archive.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'bin/analyze-archive.py') diff --git a/bin/analyze-archive.py b/bin/analyze-archive.py index da60212..c369cf8 100755 --- a/bin/analyze-archive.py +++ b/bin/analyze-archive.py @@ -46,7 +46,7 @@ from dfatool.loader import RawData, pta_trace_to_aggregate from dfatool.functions import gplearn_to_function from dfatool.model import PTAModel from dfatool.validation import CrossValidator -from dfatool.utils import filter_aggregate_by_param +from dfatool.utils import filter_aggregate_by_param, detect_outliers_in_aggregate from dfatool.automata import PTA @@ -311,6 +311,17 @@ if __name__ == "__main__": type=str, help="Plot power trace for state or transition NAME. X axis is wrong for non-MIMOSA measurements", ) + parser.add_argument( + "--remove-outliers", + action="store_true", + help="Remove outliers exceeding the configured z score (default: 10)", + ) + parser.add_argument( + "--z-score", + type=int, + default=10, + help="Configure z score for outlier detection (and optional removel)", + ) parser.add_argument( "--show-models", choices=["static", "paramdetection", "param", "all", "tex", "html"], @@ -522,6 +533,9 @@ if __name__ == "__main__": ) filter_aggregate_by_param(by_name, parameters, args.filter_param) + detect_outliers_in_aggregate( + by_name, z_limit=args.z_score, remove_outliers=args.remove_outliers + ) model = PTAModel( by_name, -- cgit v1.2.3