diff options
-rwxr-xr-x | bin/analyze-archive.py | 8 | ||||
-rwxr-xr-x | lib/dfatool.py | 48 | ||||
-rw-r--r-- | lib/parameters.py | 241 | ||||
-rw-r--r-- | lib/utils.py | 49 |
4 files changed, 211 insertions, 135 deletions
diff --git a/bin/analyze-archive.py b/bin/analyze-archive.py index a4af02a..69eab6e 100755 --- a/bin/analyze-archive.py +++ b/bin/analyze-archive.py @@ -79,9 +79,10 @@ import json import plotter import re import sys -from dfatool import PTAModel, RawData, pta_trace_to_aggregate, filter_aggregate_by_param +from dfatool import PTAModel, RawData, pta_trace_to_aggregate from dfatool import soft_cast_int, is_numeric, gplearn_to_function from dfatool import CrossValidator +from utils import filter_aggregate_by_param opts = {} @@ -301,6 +302,11 @@ if __name__ == '__main__': '', param, model.stats.param_dependence_ratio(state, 'power', param))) + if model.depends_on_param(state, 'power', param) and len(model.stats.stats[state]['power']['param_data'][param]['codependent_parameters']): + print('{:24s} co-dependencies: {:s}'.format('', ', '.join(model.stats.stats[state]['power']['param_data'][param]['codependent_parameters']))) + for combi, depends in model.stats.stats[state]['power']['param_data'][param]['depends_for_codependent_value'].items(): + print('{} -> {}'.format(combi, depends)) + for trans in model.transitions(): # Mean power is not a typical transition attribute, but may be present for debugging or analysis purposes try: diff --git a/lib/dfatool.py b/lib/dfatool.py index 3151c65..cc07026 100755 --- a/lib/dfatool.py +++ b/lib/dfatool.py @@ -16,7 +16,8 @@ from automata import PTA from functions import analytic from functions import AnalyticFunction from parameters import ParamStats -from utils import vprint, is_numeric, soft_cast_int, param_slice_eq, remove_index_from_tuple, is_power_of_two +from utils import vprint, is_numeric, soft_cast_int, param_slice_eq, remove_index_from_tuple +from utils import by_name_to_by_param arg_support_enabled = True @@ -180,26 +181,6 @@ class KeysightCSV: currents[i] = float(row[2]) * -1 return timestamps, currents -def by_name_to_by_param(by_name: dict): - """ - Convert aggregation by name to aggregation by name and parameter values. - """ - by_param = dict() - for name in by_name.keys(): - for i, parameters in enumerate(by_name[name]['param']): - param_key = (name, tuple(parameters)) - if param_key not in by_param: - by_param[param_key] = dict() - for key in by_name[name].keys(): - by_param[param_key][key] = list() - by_param[param_key]['attributes'] = by_name[name]['attributes'] - # special case for PTA models - if 'isa' in by_name[name]: - by_param[param_key]['isa'] = by_name[name]['isa'] - for attribute in by_name[name]['attributes']: - by_param[param_key][attribute].append(by_name[name][attribute][i]) - return by_param - def _xv_partitions_kfold(length, num_slices): pairs = [] @@ -1403,31 +1384,6 @@ def _add_trace_data_to_aggregate(aggregate, key, element): for datakey, dataval in element['offline_aggregates'].items(): aggregate[key][datakey].extend(dataval) -def filter_aggregate_by_param(aggregate, parameters, parameter_filter): - """ - Remove entries which do not have certain parameter values from `aggregate`. - - :param aggregate: aggregated measurement data, must be a dict conforming to - aggregate[state or transition name]['param'] = (first parameter value, second parameter value, ...) - and - aggregate[state or transition name]['attributes'] = [list of keys with measurement data, e.g. 'power' or 'duration'] - :param parameters: list of parameters, used to map parameter index to parameter name. parameters=['foo', ...] means 'foo' is the first parameter - :param parameter_filter: [[name, value], [name, value], ...] list of parameter values to keep, all others are removed. Values refer to normalizad parameter data. - """ - for param_name_and_value in parameter_filter: - param_index = parameters.index(param_name_and_value[0]) - param_value = soft_cast_int(param_name_and_value[1]) - names_to_remove = set() - for name in aggregate.keys(): - indices_to_keep = list(map(lambda x: x[param_index] == param_value, aggregate[name]['param'])) - aggregate[name]['param'] = list(map(lambda iv: iv[1], filter(lambda iv: indices_to_keep[iv[0]], enumerate(aggregate[name]['param'])))) - for attribute in aggregate[name]['attributes']: - aggregate[name][attribute] = aggregate[name][attribute][indices_to_keep] - if len(aggregate[name][attribute]) == 0: - names_to_remove.add(name) - for name in names_to_remove: - aggregate.pop(name) - def pta_trace_to_aggregate(traces, ignore_trace_indexes = []): u""" diff --git a/lib/parameters.py b/lib/parameters.py index 4dc9c3a..1518076 100644 --- a/lib/parameters.py +++ b/lib/parameters.py @@ -1,77 +1,9 @@ import itertools import numpy as np -from utils import remove_index_from_tuple, is_numeric - -def compute_param_statistics(by_name, by_param, parameter_names, arg_count, state_or_trans, attribute, verbose = False): - """ - Compute standard deviation and correlation coefficient for various data partitions. - - It is strongly recommended to vary all parameter values evenly across partitions. - For instance, given two parameters, providing only the combinations - (1, 1), (5, 1), (7, 1,) (10, 1), (1, 2), (1, 6) will lead to bogus results. - It is better to provide (1, 1), (5, 1), (1, 2), (5, 2), ... (i.e. a cross product of all individual parameter values) - - :param by_name: ground truth partitioned by state/transition name. - by_name[state_or_trans][attribute] must be a list or 1-D numpy array. - by_name[state_or_trans]['param'] must be a list of parameter values - corresponding to the ground truth, e.g. [[1, 2, 3], ...] if the - first ground truth element has the (lexically) first parameter set to 1, - the second to 2 and the third to 3. - :param by_param: ground truth partitioned by state/transition name and parameters. - by_name[(state_or_trans, *)][attribute] must be a list or 1-D numpy array. - :param parameter_names: list of parameter names, must have the same order as the parameter - values in by_param (lexical sorting is recommended). - :param arg_count: dict providing the number of functions args ("local parameters") for each function. - :param state_or_trans: state or transition name, e.g. 'send' or 'TX' - :param attribute: model attribute, e.g. 'power' or 'duration' - :param verbose: print warning if some parameter partitions are too small for fitting - - :returns: a dict with the following content: - std_static -- static parameter-unaware model error: stddev of by_name[state_or_trans][attribute] - std_param_lut -- static parameter-aware model error: mean stddev of by_param[(state_or_trans, *)][attribute] - std_by_param -- static parameter-aware model error ignoring a single parameter. - dictionary with one key per parameter. The value is the mean stddev - of measurements where all other parameters are fixed and the parameter - in question is variable. E.g. std_by_param['X'] is the mean stddev of - by_param[(state_or_trans, (X=*, Y=..., Z=...))][attribute]. - std_by_arg -- same, but ignoring a single function argument - Only set if state_or_trans appears in arg_count, empty dict otherwise. - corr_by_param -- correlation coefficient - corr_by_arg -- same, but ignoring a single function argument - Only set if state_or_trans appears in arg_count, empty dict otherwise. - """ - ret = { - 'std_static' : np.std(by_name[state_or_trans][attribute]), - 'std_param_lut' : np.mean([np.std(by_param[x][attribute]) for x in by_param.keys() if x[0] == state_or_trans]), - 'std_by_param' : {}, - 'std_by_param_values' : {}, - 'lut_by_param_values' : {}, - 'std_by_arg' : [], - 'std_by_arg_values' : [], - 'lut_by_arg_values' : [], - 'corr_by_param' : {}, - 'corr_by_arg' : [], - } - - np.seterr('raise') - - param_values = distinct_param_values(by_name, state_or_trans) - - for param_idx, param in enumerate(parameter_names): - std_matrix, mean_std, lut_matrix = _std_by_param(by_param, param_values, state_or_trans, attribute, param_idx, verbose) - ret['std_by_param'][param] = mean_std - ret['std_by_param_values'][param] = std_matrix - ret['lut_by_param_values'][param] = lut_matrix - ret['corr_by_param'][param] = _corr_by_param(by_name, state_or_trans, attribute, param_idx) - if state_or_trans in arg_count: - for arg_index in range(arg_count[state_or_trans]): - std_matrix, mean_std, lut_matrix = _std_by_param(by_param, param_values, state_or_trans, attribute, len(parameter_names) + arg_index, verbose) - ret['std_by_arg'].append(mean_std) - ret['std_by_arg_values'].append(std_matrix) - ret['lut_by_arg_values'].append(lut_matrix) - ret['corr_by_arg'].append(_corr_by_param(by_name, state_or_trans, attribute, len(parameter_names) + arg_index)) - - return ret +from collections import OrderedDict +from copy import deepcopy +from utils import remove_index_from_tuple, is_numeric, is_power_of_two +from utils import filter_aggregate_by_param, by_name_to_by_param def distinct_param_values(by_name, state_or_tran): """ @@ -79,9 +11,8 @@ def distinct_param_values(by_name, state_or_tran): E.g. if by_name[state_or_tran]['param'] contains the distinct entries (1, 1), (1, 2), (1, 3), (0, 3), this function returns [[1, 0], [1, 2, 3]]. - Note that the order is not guaranteed to be deterministic at the moment. - Also note that this function deliberately also consider None + Note that this function deliberately also consider None (uninitialized parameter with unknown value) as a distinct value. Benchmarks and drivers must ensure that a parameter is only None when its value is not important yet, e.g. a packet length parameter must only be None when @@ -90,13 +21,13 @@ def distinct_param_values(by_name, state_or_tran): """ # TODO a set() is an _unordered_ collection, so this must be converted to # an OrderedDict or a list with a duplicate-pruning step - distinct_values = [set() for i in range(len(by_name[state_or_tran]['param'][0]))] + distinct_values = [OrderedDict() for i in range(len(by_name[state_or_tran]['param'][0]))] for param_tuple in by_name[state_or_tran]['param']: for i in range(len(param_tuple)): - distinct_values[i].add(param_tuple[i]) + distinct_values[i][param_tuple[i]] = True # Convert sets to lists - distinct_values = list(map(list, distinct_values)) + distinct_values = list(map(lambda x: list(x.keys()), distinct_values)) return distinct_values def _std_by_param(by_param, all_param_values, state_or_tran, attribute, param_index, verbose = False): @@ -255,6 +186,11 @@ def remove_parameters_by_indices(by_name, parameter_names, parameter_indices_to_ parameter_names.pop(parameter_index) class ParamStats: + """ + :param stats: `stats[state_or_tran][attribute]` = std_static, std_param_lut, ... (see `compute_param_statistics`) + :param distinct_values: `distinct_values[state_or_tran][param]` = [distinct values in aggregate] + :param distinct_values_by_param_index: `distinct_values[state_or_tran][i]` = [distinct values in aggregate] + """ def __init__(self, by_name, by_param, parameter_names, arg_count, use_corrcoef = False, verbose = False): """ @@ -280,6 +216,8 @@ class ParamStats: use_corrcoef -- use correlation coefficient instead of stddev heuristic for parameter detection """ self.stats = dict() + self.distinct_values = dict() + self.distinct_values_by_param_index = dict() self.use_corrcoef = use_corrcoef self._parameter_names = parameter_names # Note: This is deliberately single-threaded. The overhead incurred @@ -287,8 +225,127 @@ class ParamStats: # computation of statistics measures. for state_or_tran in by_name.keys(): self.stats[state_or_tran] = dict() + self.distinct_values_by_param_index[state_or_tran] = distinct_param_values(by_name, state_or_tran) + self.distinct_values[state_or_tran] = dict() + for i, param in enumerate(parameter_names): + self.distinct_values[state_or_tran][param] = self.distinct_values_by_param_index[state_or_tran][i] for attribute in by_name[state_or_tran]['attributes']: - self.stats[state_or_tran][attribute] = compute_param_statistics(by_name, by_param, parameter_names, arg_count, state_or_tran, attribute, verbose = verbose) + self.stats[state_or_tran][attribute] = self.compute_param_statistics(by_name, by_param, parameter_names, arg_count, state_or_tran, attribute, verbose = verbose) + + def compute_param_statistics(self, by_name, by_param, parameter_names, arg_count, state_or_trans, attribute, verbose = False): + """ + Compute standard deviation and correlation coefficient for various data partitions. + + It is strongly recommended to vary all parameter values evenly across partitions. + For instance, given two parameters, providing only the combinations + (1, 1), (5, 1), (7, 1,) (10, 1), (1, 2), (1, 6) will lead to bogus results. + It is better to provide (1, 1), (5, 1), (1, 2), (5, 2), ... (i.e. a cross product of all individual parameter values) + + :param by_name: ground truth partitioned by state/transition name. + by_name[state_or_trans][attribute] must be a list or 1-D numpy array. + by_name[state_or_trans]['param'] must be a list of parameter values + corresponding to the ground truth, e.g. [[1, 2, 3], ...] if the + first ground truth element has the (lexically) first parameter set to 1, + the second to 2 and the third to 3. + :param by_param: ground truth partitioned by state/transition name and parameters. + by_name[(state_or_trans, *)][attribute] must be a list or 1-D numpy array. + :param parameter_names: list of parameter names, must have the same order as the parameter + values in by_param (lexical sorting is recommended). + :param arg_count: dict providing the number of functions args ("local parameters") for each function. + :param state_or_trans: state or transition name, e.g. 'send' or 'TX' + :param attribute: model attribute, e.g. 'power' or 'duration' + :param verbose: print warning if some parameter partitions are too small for fitting + + :returns: a dict with the following content: + std_static -- static parameter-unaware model error: stddev of by_name[state_or_trans][attribute] + std_param_lut -- static parameter-aware model error: mean stddev of by_param[(state_or_trans, *)][attribute] + std_by_param -- static parameter-aware model error ignoring a single parameter. + dictionary with one key per parameter. The value is the mean stddev + of measurements where all other parameters are fixed and the parameter + in question is variable. E.g. std_by_param['X'] is the mean stddev of + by_param[(state_or_trans, (X=*, Y=..., Z=...))][attribute]. + std_by_arg -- same, but ignoring a single function argument + Only set if state_or_trans appears in arg_count, empty dict otherwise. + corr_by_param -- correlation coefficient + corr_by_arg -- same, but ignoring a single function argument + Only set if state_or_trans appears in arg_count, empty dict otherwise. + """ + ret = { + 'std_static' : np.std(by_name[state_or_trans][attribute]), + 'std_param_lut' : np.mean([np.std(by_param[x][attribute]) for x in by_param.keys() if x[0] == state_or_trans]), + 'std_by_param' : {}, + 'std_by_param_values' : {}, + 'lut_by_param_values' : {}, + 'std_by_arg' : [], + 'std_by_arg_values' : [], + 'lut_by_arg_values' : [], + 'corr_by_param' : {}, + 'corr_by_arg' : [], + 'depends_on_param' : {}, + 'depends_on_arg' : [], + 'param_data' : {}, + } + + np.seterr('raise') + + for param_idx, param in enumerate(parameter_names): + std_matrix, mean_std, lut_matrix = _std_by_param(by_param, self.distinct_values_by_param_index[state_or_trans], state_or_trans, attribute, param_idx, verbose) + ret['std_by_param'][param] = mean_std + ret['std_by_param_values'][param] = std_matrix + ret['lut_by_param_values'][param] = lut_matrix + ret['corr_by_param'][param] = _corr_by_param(by_name, state_or_trans, attribute, param_idx) + + ret['depends_on_param'][param] = self._depends_on_param(ret['corr_by_param'][param], ret['std_by_param'][param], ret['std_param_lut']) + + if ret['depends_on_param'][param]: + ret['param_data'][param] = { + 'codependent_parameters': self._codependent_parameters(param, lut_matrix, std_matrix), + 'depends_for_codependent_value': dict() + } + + # calculate parameter dependence for individual values of codependent parameters + codependent_param_values = list() + for codependent_param in ret['param_data'][param]['codependent_parameters']: + codependent_param_values.append(self.distinct_values[state_or_trans][codependent_param]) + for combi in itertools.product(*codependent_param_values): + by_name_part = deepcopy(by_name) + filter_list = list(zip(ret['param_data'][param]['codependent_parameters'], combi)) + filter_aggregate_by_param(by_name_part, parameter_names, filter_list) + by_param_part = by_name_to_by_param(by_name_part) + # there may be no data for this specific parameter value combination + if state_or_trans in by_name_part: + part_corr = _corr_by_param(by_name_part, state_or_trans, attribute, param_idx) + part_std_lut = np.mean([np.std(by_param_part[x][attribute]) for x in by_param_part.keys() if x[0] == state_or_trans]) + _, part_std_param, _ = _std_by_param(by_param_part, self.distinct_values_by_param_index[state_or_trans], state_or_trans, attribute, param_idx, verbose) + ret['param_data'][param]['depends_for_codependent_value'][combi] = self._depends_on_param(part_corr, part_std_param, part_std_lut) + + if state_or_trans in arg_count: + for arg_index in range(arg_count[state_or_trans]): + std_matrix, mean_std, lut_matrix = _std_by_param(by_param, self.distinct_values_by_param_index[state_or_trans], state_or_trans, attribute, len(parameter_names) + arg_index, verbose) + ret['std_by_arg'].append(mean_std) + ret['std_by_arg_values'].append(std_matrix) + ret['lut_by_arg_values'].append(lut_matrix) + ret['corr_by_arg'].append(_corr_by_param(by_name, state_or_trans, attribute, len(parameter_names) + arg_index)) + + if self.use_corrcoef: + ret['depends_on_arg'].append(ret['corr_by_arg'][arg_index] > 0.1) + elif ret['std_by_arg'][arg_index] == 0: + # In general, std_param_lut < std_by_arg. So, if std_by_arg == 0, std_param_lut == 0 follows. + # This means that the variation of arg does not affect the model quality -> no influence + ret['depends_on_arg'].append(False) + else: + ret['depends_on_arg'].append(ret['std_param_lut'] / ret['std_by_arg'][arg_index] < 0.5) + + return ret + + def _depends_on_param(self, corr_param, std_param, std_lut): + if self.use_corrcoef: + return corr_param > 0.1 + elif std_param == 0: + # In general, std_param_lut < std_by_param. So, if std_by_param == 0, std_param_lut == 0 follows. + # This means that the variation of param does not affect the model quality -> no influence + return False + return std_lut / std_param < 0.5 def _generic_param_independence_ratio(self, state_or_trans, attribute): """ @@ -342,6 +399,20 @@ class ParamStats: return list() + def _codependent_parameters(self, param, lut_by_param_values, std_by_param_values): + """ + Return list of parameters which affect whether a parameter affects a model attribute or not. + """ + safe_div = np.vectorize(lambda x,y: 0. if x == 0 else 1 - x/y) + ratio_by_value = safe_div(lut_by_param_values, std_by_param_values) + err_mode = np.seterr('ignore') + dep_by_value = ratio_by_value > 0.5 + np.seterr(**err_mode) + + other_param_list = list(filter(lambda x: x != param, self._parameter_names)) + influencer_parameters = self._reduce_param_matrix(dep_by_value, other_param_list) + return influencer_parameters + def _get_codependent_parameters(self, stats, param): """ Return list of parameters which affect whether `param` influences the model attribute described in `stats` or not. @@ -392,8 +463,8 @@ class ParamStats: """ Return parameters whose value influences whether `attribute` of `state_or_trans` depends on `param` or not. - For example, a radio's TX POWER is only influenced by the packet length if dynamically sized payloads are enabled. - So reverse_dependent_parameters('TX', 'POWER', 'packet_length') == ['dynamic_payload_size']. + For example, a radio's TX power is only influenced by the packet length if dynamically sized payloads are enabled. + So reverse_dependent_parameters('TX', 'power', 'packet_length') == ['dynamic_payload_size']. :param state_or_trans: state or transition name :param attribute: model attribute @@ -424,16 +495,10 @@ class ParamStats: # --df, 2018-04-18 def depends_on_param(self, state_or_trans, attribute, param): """Return whether attribute of state_or_trans depens on param.""" - if self.use_corrcoef: - return self.param_dependence_ratio(state_or_trans, attribute, param) > 0.1 - else: - return self.param_dependence_ratio(state_or_trans, attribute, param) > 0.5 + return self.stats[state_or_trans][attribute]['depends_on_param'][param] # See notes on depends_on_param def depends_on_arg(self, state_or_trans, attribute, arg_index): """Return whether attribute of state_or_trans depens on arg_index.""" - if self.use_corrcoef: - return self.arg_dependence_ratio(state_or_trans, attribute, arg_index) > 0.1 - else: - return self.arg_dependence_ratio(state_or_trans, attribute, arg_index) > 0.5 + return self.stats[state_or_trans][attribute]['depends_on_arg'][arg_index] diff --git a/lib/utils.py b/lib/utils.py index 5daa7cf..e5699da 100644 --- a/lib/utils.py +++ b/lib/utils.py @@ -114,6 +114,55 @@ def param_slice_eq(a, b, index): return True return False +def by_name_to_by_param(by_name: dict): + """ + Convert aggregation by name to aggregation by name and parameter values. + """ + by_param = dict() + for name in by_name.keys(): + for i, parameters in enumerate(by_name[name]['param']): + param_key = (name, tuple(parameters)) + if param_key not in by_param: + by_param[param_key] = dict() + for key in by_name[name].keys(): + by_param[param_key][key] = list() + by_param[param_key]['attributes'] = by_name[name]['attributes'] + # special case for PTA models + if 'isa' in by_name[name]: + by_param[param_key]['isa'] = by_name[name]['isa'] + for attribute in by_name[name]['attributes']: + by_param[param_key][attribute].append(by_name[name][attribute][i]) + return by_param + +def filter_aggregate_by_param(aggregate, parameters, parameter_filter): + """ + Remove entries which do not have certain parameter values from `aggregate`. + + :param aggregate: aggregated measurement data, must be a dict conforming to + aggregate[state or transition name]['param'] = (first parameter value, second parameter value, ...) + and + aggregate[state or transition name]['attributes'] = [list of keys with measurement data, e.g. 'power' or 'duration'] + :param parameters: list of parameters, used to map parameter index to parameter name. parameters=['foo', ...] means 'foo' is the first parameter + :param parameter_filter: [[name, value], [name, value], ...] list of parameter values to keep, all others are removed. Values refer to normalizad parameter data. + """ + for param_name_and_value in parameter_filter: + param_index = parameters.index(param_name_and_value[0]) + param_value = soft_cast_int(param_name_and_value[1]) + names_to_remove = set() + for name in aggregate.keys(): + indices_to_keep = list(map(lambda x: x[param_index] == param_value, aggregate[name]['param'])) + aggregate[name]['param'] = list(map(lambda iv: iv[1], filter(lambda iv: indices_to_keep[iv[0]], enumerate(aggregate[name]['param'])))) + if len(indices_to_keep) == 0: + print('??? {}->{}'.format(parameter_filter, name)) + names_to_remove.add(name) + else: + for attribute in aggregate[name]['attributes']: + aggregate[name][attribute] = aggregate[name][attribute][indices_to_keep] + if len(aggregate[name][attribute]) == 0: + names_to_remove.add(name) + for name in names_to_remove: + aggregate.pop(name) + class OptionalTimingAnalysis: def __init__(self, enabled = True): self.enabled = enabled |