summaryrefslogtreecommitdiff
path: root/lib/utils.py
diff options
context:
space:
mode:
authorDaniel Friesel <daniel.friesel@uos.de>2019-10-07 09:48:37 +0200
committerDaniel Friesel <daniel.friesel@uos.de>2019-10-07 09:48:37 +0200
commiteb634401bfa4f93154eeb6265f100fd9db2bf7d4 (patch)
tree6adfbf12d55d6f3f925dfa672207352734b30186 /lib/utils.py
parent8b496797773a95bac66d76acc0d4dfee53f70ff7 (diff)
move ParamStats and helper functions to lib/parameters.py
Diffstat (limited to 'lib/utils.py')
-rw-r--r--lib/utils.py253
1 files changed, 0 insertions, 253 deletions
diff --git a/lib/utils.py b/lib/utils.py
index 549b673..5daa7cf 100644
--- a/lib/utils.py
+++ b/lib/utils.py
@@ -1,4 +1,3 @@
-import itertools
import numpy as np
import re
@@ -115,258 +114,6 @@ def param_slice_eq(a, b, index):
return True
return False
-def prune_dependent_parameters(by_name, parameter_names, correlation_threshold = 0.5):
- """
- Remove dependent parameters from aggregate.
-
- :param by_name: measurements partitioned by state/transition/... name and attribute, edited in-place.
- by_name[name][attribute] must be a list or 1-D numpy array.
- by_name[stanamete_or_trans]['param'] must be a list of parameter values.
- Other dict members are left as-is
- :param parameter_names: List of parameter names in the order they are used in by_name[name]['param'], edited in-place.
- :param correlation_threshold: Remove parameter if absolute correlation exceeds this threshold (default: 0.5)
-
- Model generation (and its components, such as relevant parameter detection and least squares optimization) only works if input variables (i.e., parameters)
- are independent of each other. This function computes the correlation coefficient for each pair of parameters and removes those which depend on each other.
- For each pair of dependent parameters, the lexically greater one is removed (e.g. "a" and "b" -> "b" is removed).
- """
-
- parameter_indices_to_remove = list()
- for parameter_combination in itertools.product(range(len(parameter_names)), range(len(parameter_names))):
- index_1, index_2 = parameter_combination
- if index_1 >= index_2:
- continue
- parameter_values = [list(), list()] # both parameters have a value
- parameter_values_1 = list() # parameter 1 has a value
- parameter_values_2 = list() # parameter 2 has a value
- for name in by_name:
- for measurement in by_name[name]['param']:
- value_1 = measurement[index_1]
- value_2 = measurement[index_2]
- if is_numeric(value_1):
- parameter_values_1.append(value_1)
- if is_numeric(value_2):
- parameter_values_2.append(value_2)
- if is_numeric(value_1) and is_numeric(value_2):
- parameter_values[0].append(value_1)
- parameter_values[1].append(value_2)
- if len(parameter_values[0]):
- # Calculating the correlation coefficient only makes sense when neither value is constant
- if np.std(parameter_values_1) != 0 and np.std(parameter_values_2) != 0:
- correlation = np.corrcoef(parameter_values)[0][1]
- if correlation != np.nan and np.abs(correlation) > correlation_threshold:
- print('[!] Parameters {} <-> {} are correlated with coefficcient {}'.format(parameter_names[index_1], parameter_names[index_2], correlation))
- if len(parameter_values_1) < len(parameter_values_2):
- index_to_remove = index_1
- else:
- index_to_remove = index_2
- print(' Removing parameter {}'.format(parameter_names[index_to_remove]))
- parameter_indices_to_remove.append(index_to_remove)
- remove_parameters_by_indices(by_name, parameter_names, parameter_indices_to_remove)
-
-def remove_parameters_by_indices(by_name, parameter_names, parameter_indices_to_remove):
- """
- Remove parameters listed in `parameter_indices` from aggregate `by_name` and `parameter_names`.
-
- :param by_name: measurements partitioned by state/transition/... name and attribute, edited in-place.
- by_name[name][attribute] must be a list or 1-D numpy array.
- by_name[stanamete_or_trans]['param'] must be a list of parameter values.
- Other dict members are left as-is
- :param parameter_names: List of parameter names in the order they are used in by_name[name]['param'], edited in-place.
- :param parameter_indices_to_remove: List of parameter indices to be removed
- """
-
- # Start removal from the end of the list to avoid renumbering of list elemenets
- for parameter_index in sorted(parameter_indices_to_remove, reverse = True):
- for name in by_name:
- for measurement in by_name[name]['param']:
- measurement.pop(parameter_index)
- parameter_names.pop(parameter_index)
-
-def compute_param_statistics(by_name, by_param, parameter_names, arg_count, state_or_trans, attribute, verbose = False):
- """
- Compute standard deviation and correlation coefficient for various data partitions.
-
- It is strongly recommended to vary all parameter values evenly across partitions.
- For instance, given two parameters, providing only the combinations
- (1, 1), (5, 1), (7, 1,) (10, 1), (1, 2), (1, 6) will lead to bogus results.
- It is better to provide (1, 1), (5, 1), (1, 2), (5, 2), ... (i.e. a cross product of all individual parameter values)
-
- :param by_name: ground truth partitioned by state/transition name.
- by_name[state_or_trans][attribute] must be a list or 1-D numpy array.
- by_name[state_or_trans]['param'] must be a list of parameter values
- corresponding to the ground truth, e.g. [[1, 2, 3], ...] if the
- first ground truth element has the (lexically) first parameter set to 1,
- the second to 2 and the third to 3.
- :param by_param: ground truth partitioned by state/transition name and parameters.
- by_name[(state_or_trans, *)][attribute] must be a list or 1-D numpy array.
- :param parameter_names: list of parameter names, must have the same order as the parameter
- values in by_param (lexical sorting is recommended).
- :param arg_count: dict providing the number of functions args ("local parameters") for each function.
- :param state_or_trans: state or transition name, e.g. 'send' or 'TX'
- :param attribute: model attribute, e.g. 'power' or 'duration'
- :param verbose: print warning if some parameter partitions are too small for fitting
-
- :returns: a dict with the following content:
- std_static -- static parameter-unaware model error: stddev of by_name[state_or_trans][attribute]
- std_param_lut -- static parameter-aware model error: mean stddev of by_param[(state_or_trans, *)][attribute]
- std_by_param -- static parameter-aware model error ignoring a single parameter.
- dictionary with one key per parameter. The value is the mean stddev
- of measurements where all other parameters are fixed and the parameter
- in question is variable. E.g. std_by_param['X'] is the mean stddev of
- by_param[(state_or_trans, (X=*, Y=..., Z=...))][attribute].
- std_by_arg -- same, but ignoring a single function argument
- Only set if state_or_trans appears in arg_count, empty dict otherwise.
- corr_by_param -- correlation coefficient
- corr_by_arg -- same, but ignoring a single function argument
- Only set if state_or_trans appears in arg_count, empty dict otherwise.
- """
- ret = {
- 'std_static' : np.std(by_name[state_or_trans][attribute]),
- 'std_param_lut' : np.mean([np.std(by_param[x][attribute]) for x in by_param.keys() if x[0] == state_or_trans]),
- 'std_by_param' : {},
- 'std_by_param_values' : {},
- 'lut_by_param_values' : {},
- 'std_by_arg' : [],
- 'std_by_arg_values' : [],
- 'lut_by_arg_values' : [],
- 'corr_by_param' : {},
- 'corr_by_arg' : [],
- }
-
- np.seterr('raise')
-
- param_values = distinct_param_values(by_name, state_or_trans)
-
- for param_idx, param in enumerate(parameter_names):
- std_matrix, mean_std, lut_matrix = _std_by_param(by_param, param_values, state_or_trans, attribute, param_idx, verbose)
- ret['std_by_param'][param] = mean_std
- ret['std_by_param_values'][param] = std_matrix
- ret['lut_by_param_values'][param] = lut_matrix
- ret['corr_by_param'][param] = _corr_by_param(by_name, state_or_trans, attribute, param_idx)
- if arg_support_enabled and state_or_trans in arg_count:
- for arg_index in range(arg_count[state_or_trans]):
- std_matrix, mean_std, lut_matrix = _std_by_param(by_param, param_values, state_or_trans, attribute, len(parameter_names) + arg_index, verbose)
- ret['std_by_arg'].append(mean_std)
- ret['std_by_arg_values'].append(std_matrix)
- ret['lut_by_arg_values'].append(lut_matrix)
- ret['corr_by_arg'].append(_corr_by_param(by_name, state_or_trans, attribute, len(parameter_names) + arg_index))
-
- return ret
-
-def distinct_param_values(by_name, state_or_tran):
- """
- Return the distinct values of each parameter in by_name[state_or_tran].
-
- E.g. if by_name[state_or_tran]['param'] contains the distinct entries (1, 1), (1, 2), (1, 3), (0, 3),
- this function returns [[1, 0], [1, 2, 3]].
- Note that the order is not guaranteed to be deterministic at the moment.
-
- Also note that this function deliberately also consider None
- (uninitialized parameter with unknown value) as a distinct value. Benchmarks
- and drivers must ensure that a parameter is only None when its value is
- not important yet, e.g. a packet length parameter must only be None when
- write() or similar has not been called yet. Other parameters should always
- be initialized when leaving UNINITIALIZED.
- """
- # TODO a set() is an _unordered_ collection, so this must be converted to
- # an OrderedDict or a list with a duplicate-pruning step
- distinct_values = [set() for i in range(len(by_name[state_or_tran]['param'][0]))]
- for param_tuple in by_name[state_or_tran]['param']:
- for i in range(len(param_tuple)):
- distinct_values[i].add(param_tuple[i])
-
- # Convert sets to lists
- distinct_values = list(map(list, distinct_values))
- return distinct_values
-
-def _std_by_param(by_param, all_param_values, state_or_tran, attribute, param_index, verbose = False):
- u"""
- Calculate standard deviations for a static model where all parameters but param_index are constant.
-
- :param by_param: measurements sorted by key/transition name and parameter values
- :param state_or_tran: state or transition name (-> by_param[(state_or_tran, *)])
- :param attribute: model attribute, e.g. 'power' or 'duration'
- (-> by_param[(state_or_tran, *)][attribute])
- :param param_index: index of variable parameter
- :returns: (stddev matrix, mean stddev)
-
- Returns the mean standard deviation of all measurements of 'attribute'
- (e.g. power consumption or timeout) for state/transition 'state_or_tran' where
- parameter 'param_index' is dynamic and all other parameters are fixed.
- I.e., if parameters are a, b, c ∈ {1,2,3} and 'index' corresponds to b, then
- this function returns the mean of the standard deviations of (a=1, b=*, c=1),
- (a=1, b=*, c=2), and so on.
- Also returns an (n-1)-dimensional array (where n is the number of parameters)
- giving the standard deviation of each individual partition. E.g. for
- param_index == 2 and 4 parameters, array[a][b][d] is the
- stddev of measurements with param0 == a, param1 == b, param2 variable,
- and param3 == d.
- """
- param_values = list(remove_index_from_tuple(all_param_values, param_index))
- info_shape = tuple(map(len, param_values))
-
- # We will calculate the mean over the entire matrix later on. We cannot
- # guarantee that each entry will be filled in this loop (e.g. transitions
- # whose arguments are combined using 'zip' rather than 'cartesian' always
- # have missing parameter combinations), we pre-fill it with NaN and use
- # np.nanmean to skip those when calculating the mean.
- stddev_matrix = np.full(info_shape, np.nan)
- lut_matrix = np.full(info_shape, np.nan)
-
- for param_value in itertools.product(*param_values):
- param_partition = list()
- std_list = list()
- for k, v in by_param.items():
- if k[0] == state_or_tran and (*k[1][:param_index], *k[1][param_index+1:]) == param_value:
- param_partition.extend(v[attribute])
- std_list.append(np.std(v[attribute]))
-
- if len(param_partition) > 1:
- matrix_index = list(range(len(param_value)))
- for i in range(len(param_value)):
- matrix_index[i] = param_values[i].index(param_value[i])
- matrix_index = tuple(matrix_index)
- stddev_matrix[matrix_index] = np.std(param_partition)
- lut_matrix[matrix_index] = np.mean(std_list)
- # This can (and will) happen in normal operation, e.g. when a transition's
- # arguments are combined using 'zip' rather than 'cartesian'.
- #elif len(param_partition) == 1:
- # vprint(verbose, '[W] parameter value partition for {} contains only one element -- skipping'.format(param_value))
- #else:
- # vprint(verbose, '[W] parameter value partition for {} is empty'.format(param_value))
-
- if np.all(np.isnan(stddev_matrix)):
- vprint(verbose, '[W] {}/{} parameter #{} has no data partitions -- how did this even happen?'.format(state_or_tran, attribute, param_index))
- vprint(verbose, 'stddev_matrix = {}'.format(stddev_matrix))
- return stddev_matrix, 0.
-
- return stddev_matrix, np.nanmean(stddev_matrix), lut_matrix #np.mean([np.std(partition) for partition in partitions])
-
-def _corr_by_param(by_name, state_or_trans, attribute, param_index):
- if _all_params_are_numeric(by_name[state_or_trans], param_index):
- param_values = np.array(list((map(lambda x: x[param_index], by_name[state_or_trans]['param']))))
- try:
- return np.corrcoef(by_name[state_or_trans][attribute], param_values)[0, 1]
- except FloatingPointError:
- # Typically happens when all parameter values are identical.
- # Building a correlation coefficient is pointless in this case
- # -> assume no correlation
- return 0.
- except ValueError:
- print('[!] Exception in _corr_by_param(by_name, state_or_trans={}, attribute={}, param_index={})'.format(state_or_trans, attribute, param_index))
- print('[!] while executing np.corrcoef(by_name[{}][{}]={}, {}))'.format(state_or_trans, attribute, by_name[state_or_trans][attribute], param_values))
- raise
- else:
- return 0.
-
-def _all_params_are_numeric(data, param_idx):
- """Check if all `data['param'][*][param_idx]` elements are numeric, as reported by `utils.is_numeric`."""
- param_values = list(map(lambda x: x[param_idx], data['param']))
- if len(list(filter(is_numeric, param_values))) == len(param_values):
- return True
- return False
-
class OptionalTimingAnalysis:
def __init__(self, enabled = True):
self.enabled = enabled