diff options
-rwxr-xr-x | bin/analyze-archive.py | 16 | ||||
-rw-r--r-- | lib/utils.py | 42 |
2 files changed, 56 insertions, 2 deletions
diff --git a/bin/analyze-archive.py b/bin/analyze-archive.py index da60212..c369cf8 100755 --- a/bin/analyze-archive.py +++ b/bin/analyze-archive.py @@ -46,7 +46,7 @@ from dfatool.loader import RawData, pta_trace_to_aggregate from dfatool.functions import gplearn_to_function from dfatool.model import PTAModel from dfatool.validation import CrossValidator -from dfatool.utils import filter_aggregate_by_param +from dfatool.utils import filter_aggregate_by_param, detect_outliers_in_aggregate from dfatool.automata import PTA @@ -312,6 +312,17 @@ if __name__ == "__main__": help="Plot power trace for state or transition NAME. X axis is wrong for non-MIMOSA measurements", ) parser.add_argument( + "--remove-outliers", + action="store_true", + help="Remove outliers exceeding the configured z score (default: 10)", + ) + parser.add_argument( + "--z-score", + type=int, + default=10, + help="Configure z score for outlier detection (and optional removel)", + ) + parser.add_argument( "--show-models", choices=["static", "paramdetection", "param", "all", "tex", "html"], help="static: show static model values as well as parameter detection heuristic.\n" @@ -522,6 +533,9 @@ if __name__ == "__main__": ) filter_aggregate_by_param(by_name, parameters, args.filter_param) + detect_outliers_in_aggregate( + by_name, z_limit=args.z_score, remove_outliers=args.remove_outliers + ) model = PTAModel( by_name, diff --git a/lib/utils.py b/lib/utils.py index c8f31c2..560ab79 100644 --- a/lib/utils.py +++ b/lib/utils.py @@ -37,7 +37,7 @@ def human_readable(value, unit): for prefix, factor in ( ("p", 1e-12), ("n", 1e-9), - (u"µ", 1e-6), + ("µ", 1e-6), ("m", 1e-3), ("", 1), ("k", 1e3), @@ -268,6 +268,46 @@ def filter_aggregate_by_param(aggregate, parameters, parameter_filter): aggregate.pop(name) +def detect_outliers_in_aggregate(aggregate, z_limit=10, remove_outliers=False): + for name in aggregate.keys(): + indices_to_remove = set() + attributes = list() + for attribute in aggregate[name]["attributes"]: + data = aggregate[name][attribute] + z_scores = (data - np.mean(data)) / np.std(data) + outliers = np.abs(z_scores) > z_limit + if np.any(outliers) and remove_outliers: + indices_to_remove = indices_to_remove.union( + np.arange(len(outliers))[outliers] + ) + attributes.append(attribute) + elif np.any(outliers): + logger.info( + f"{name} {attribute} has {len(z_scores[outliers])} outliers" + ) + if indices_to_remove: + # Assumption: len(aggregate[name][attribute]) is the same for each + # attribute. + logger.info( + f"Removing outliers {indices_to_remove} from {name}. Affected attributes: {attributes}" + ) + indices_to_keep = map( + lambda x: x not in indices_to_remove, np.arange(len(outliers)) + ) + indices_to_keep = np.array(list(indices_to_keep)) + for attribute in aggregate[name]["attributes"]: + aggregate[name][attribute] = aggregate[name][attribute][indices_to_keep] + aggregate[name]["param"] = list( + map( + lambda iv: iv[1], + filter( + lambda iv: indices_to_keep[iv[0]], + enumerate(aggregate[name]["param"]), + ), + ) + ) + + class OptionalTimingAnalysis: def __init__(self, enabled=True): self.enabled = enabled |