summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xbin/analyze-archive.py8
-rwxr-xr-xlib/dfatool.py48
-rw-r--r--lib/parameters.py241
-rw-r--r--lib/utils.py49
4 files changed, 211 insertions, 135 deletions
diff --git a/bin/analyze-archive.py b/bin/analyze-archive.py
index a4af02a..69eab6e 100755
--- a/bin/analyze-archive.py
+++ b/bin/analyze-archive.py
@@ -79,9 +79,10 @@ import json
import plotter
import re
import sys
-from dfatool import PTAModel, RawData, pta_trace_to_aggregate, filter_aggregate_by_param
+from dfatool import PTAModel, RawData, pta_trace_to_aggregate
from dfatool import soft_cast_int, is_numeric, gplearn_to_function
from dfatool import CrossValidator
+from utils import filter_aggregate_by_param
opts = {}
@@ -301,6 +302,11 @@ if __name__ == '__main__':
'',
param,
model.stats.param_dependence_ratio(state, 'power', param)))
+ if model.depends_on_param(state, 'power', param) and len(model.stats.stats[state]['power']['param_data'][param]['codependent_parameters']):
+ print('{:24s} co-dependencies: {:s}'.format('', ', '.join(model.stats.stats[state]['power']['param_data'][param]['codependent_parameters'])))
+ for combi, depends in model.stats.stats[state]['power']['param_data'][param]['depends_for_codependent_value'].items():
+ print('{} -> {}'.format(combi, depends))
+
for trans in model.transitions():
# Mean power is not a typical transition attribute, but may be present for debugging or analysis purposes
try:
diff --git a/lib/dfatool.py b/lib/dfatool.py
index 3151c65..cc07026 100755
--- a/lib/dfatool.py
+++ b/lib/dfatool.py
@@ -16,7 +16,8 @@ from automata import PTA
from functions import analytic
from functions import AnalyticFunction
from parameters import ParamStats
-from utils import vprint, is_numeric, soft_cast_int, param_slice_eq, remove_index_from_tuple, is_power_of_two
+from utils import vprint, is_numeric, soft_cast_int, param_slice_eq, remove_index_from_tuple
+from utils import by_name_to_by_param
arg_support_enabled = True
@@ -180,26 +181,6 @@ class KeysightCSV:
currents[i] = float(row[2]) * -1
return timestamps, currents
-def by_name_to_by_param(by_name: dict):
- """
- Convert aggregation by name to aggregation by name and parameter values.
- """
- by_param = dict()
- for name in by_name.keys():
- for i, parameters in enumerate(by_name[name]['param']):
- param_key = (name, tuple(parameters))
- if param_key not in by_param:
- by_param[param_key] = dict()
- for key in by_name[name].keys():
- by_param[param_key][key] = list()
- by_param[param_key]['attributes'] = by_name[name]['attributes']
- # special case for PTA models
- if 'isa' in by_name[name]:
- by_param[param_key]['isa'] = by_name[name]['isa']
- for attribute in by_name[name]['attributes']:
- by_param[param_key][attribute].append(by_name[name][attribute][i])
- return by_param
-
def _xv_partitions_kfold(length, num_slices):
pairs = []
@@ -1403,31 +1384,6 @@ def _add_trace_data_to_aggregate(aggregate, key, element):
for datakey, dataval in element['offline_aggregates'].items():
aggregate[key][datakey].extend(dataval)
-def filter_aggregate_by_param(aggregate, parameters, parameter_filter):
- """
- Remove entries which do not have certain parameter values from `aggregate`.
-
- :param aggregate: aggregated measurement data, must be a dict conforming to
- aggregate[state or transition name]['param'] = (first parameter value, second parameter value, ...)
- and
- aggregate[state or transition name]['attributes'] = [list of keys with measurement data, e.g. 'power' or 'duration']
- :param parameters: list of parameters, used to map parameter index to parameter name. parameters=['foo', ...] means 'foo' is the first parameter
- :param parameter_filter: [[name, value], [name, value], ...] list of parameter values to keep, all others are removed. Values refer to normalizad parameter data.
- """
- for param_name_and_value in parameter_filter:
- param_index = parameters.index(param_name_and_value[0])
- param_value = soft_cast_int(param_name_and_value[1])
- names_to_remove = set()
- for name in aggregate.keys():
- indices_to_keep = list(map(lambda x: x[param_index] == param_value, aggregate[name]['param']))
- aggregate[name]['param'] = list(map(lambda iv: iv[1], filter(lambda iv: indices_to_keep[iv[0]], enumerate(aggregate[name]['param']))))
- for attribute in aggregate[name]['attributes']:
- aggregate[name][attribute] = aggregate[name][attribute][indices_to_keep]
- if len(aggregate[name][attribute]) == 0:
- names_to_remove.add(name)
- for name in names_to_remove:
- aggregate.pop(name)
-
def pta_trace_to_aggregate(traces, ignore_trace_indexes = []):
u"""
diff --git a/lib/parameters.py b/lib/parameters.py
index 4dc9c3a..1518076 100644
--- a/lib/parameters.py
+++ b/lib/parameters.py
@@ -1,77 +1,9 @@
import itertools
import numpy as np
-from utils import remove_index_from_tuple, is_numeric
-
-def compute_param_statistics(by_name, by_param, parameter_names, arg_count, state_or_trans, attribute, verbose = False):
- """
- Compute standard deviation and correlation coefficient for various data partitions.
-
- It is strongly recommended to vary all parameter values evenly across partitions.
- For instance, given two parameters, providing only the combinations
- (1, 1), (5, 1), (7, 1,) (10, 1), (1, 2), (1, 6) will lead to bogus results.
- It is better to provide (1, 1), (5, 1), (1, 2), (5, 2), ... (i.e. a cross product of all individual parameter values)
-
- :param by_name: ground truth partitioned by state/transition name.
- by_name[state_or_trans][attribute] must be a list or 1-D numpy array.
- by_name[state_or_trans]['param'] must be a list of parameter values
- corresponding to the ground truth, e.g. [[1, 2, 3], ...] if the
- first ground truth element has the (lexically) first parameter set to 1,
- the second to 2 and the third to 3.
- :param by_param: ground truth partitioned by state/transition name and parameters.
- by_name[(state_or_trans, *)][attribute] must be a list or 1-D numpy array.
- :param parameter_names: list of parameter names, must have the same order as the parameter
- values in by_param (lexical sorting is recommended).
- :param arg_count: dict providing the number of functions args ("local parameters") for each function.
- :param state_or_trans: state or transition name, e.g. 'send' or 'TX'
- :param attribute: model attribute, e.g. 'power' or 'duration'
- :param verbose: print warning if some parameter partitions are too small for fitting
-
- :returns: a dict with the following content:
- std_static -- static parameter-unaware model error: stddev of by_name[state_or_trans][attribute]
- std_param_lut -- static parameter-aware model error: mean stddev of by_param[(state_or_trans, *)][attribute]
- std_by_param -- static parameter-aware model error ignoring a single parameter.
- dictionary with one key per parameter. The value is the mean stddev
- of measurements where all other parameters are fixed and the parameter
- in question is variable. E.g. std_by_param['X'] is the mean stddev of
- by_param[(state_or_trans, (X=*, Y=..., Z=...))][attribute].
- std_by_arg -- same, but ignoring a single function argument
- Only set if state_or_trans appears in arg_count, empty dict otherwise.
- corr_by_param -- correlation coefficient
- corr_by_arg -- same, but ignoring a single function argument
- Only set if state_or_trans appears in arg_count, empty dict otherwise.
- """
- ret = {
- 'std_static' : np.std(by_name[state_or_trans][attribute]),
- 'std_param_lut' : np.mean([np.std(by_param[x][attribute]) for x in by_param.keys() if x[0] == state_or_trans]),
- 'std_by_param' : {},
- 'std_by_param_values' : {},
- 'lut_by_param_values' : {},
- 'std_by_arg' : [],
- 'std_by_arg_values' : [],
- 'lut_by_arg_values' : [],
- 'corr_by_param' : {},
- 'corr_by_arg' : [],
- }
-
- np.seterr('raise')
-
- param_values = distinct_param_values(by_name, state_or_trans)
-
- for param_idx, param in enumerate(parameter_names):
- std_matrix, mean_std, lut_matrix = _std_by_param(by_param, param_values, state_or_trans, attribute, param_idx, verbose)
- ret['std_by_param'][param] = mean_std
- ret['std_by_param_values'][param] = std_matrix
- ret['lut_by_param_values'][param] = lut_matrix
- ret['corr_by_param'][param] = _corr_by_param(by_name, state_or_trans, attribute, param_idx)
- if state_or_trans in arg_count:
- for arg_index in range(arg_count[state_or_trans]):
- std_matrix, mean_std, lut_matrix = _std_by_param(by_param, param_values, state_or_trans, attribute, len(parameter_names) + arg_index, verbose)
- ret['std_by_arg'].append(mean_std)
- ret['std_by_arg_values'].append(std_matrix)
- ret['lut_by_arg_values'].append(lut_matrix)
- ret['corr_by_arg'].append(_corr_by_param(by_name, state_or_trans, attribute, len(parameter_names) + arg_index))
-
- return ret
+from collections import OrderedDict
+from copy import deepcopy
+from utils import remove_index_from_tuple, is_numeric, is_power_of_two
+from utils import filter_aggregate_by_param, by_name_to_by_param
def distinct_param_values(by_name, state_or_tran):
"""
@@ -79,9 +11,8 @@ def distinct_param_values(by_name, state_or_tran):
E.g. if by_name[state_or_tran]['param'] contains the distinct entries (1, 1), (1, 2), (1, 3), (0, 3),
this function returns [[1, 0], [1, 2, 3]].
- Note that the order is not guaranteed to be deterministic at the moment.
- Also note that this function deliberately also consider None
+ Note that this function deliberately also consider None
(uninitialized parameter with unknown value) as a distinct value. Benchmarks
and drivers must ensure that a parameter is only None when its value is
not important yet, e.g. a packet length parameter must only be None when
@@ -90,13 +21,13 @@ def distinct_param_values(by_name, state_or_tran):
"""
# TODO a set() is an _unordered_ collection, so this must be converted to
# an OrderedDict or a list with a duplicate-pruning step
- distinct_values = [set() for i in range(len(by_name[state_or_tran]['param'][0]))]
+ distinct_values = [OrderedDict() for i in range(len(by_name[state_or_tran]['param'][0]))]
for param_tuple in by_name[state_or_tran]['param']:
for i in range(len(param_tuple)):
- distinct_values[i].add(param_tuple[i])
+ distinct_values[i][param_tuple[i]] = True
# Convert sets to lists
- distinct_values = list(map(list, distinct_values))
+ distinct_values = list(map(lambda x: list(x.keys()), distinct_values))
return distinct_values
def _std_by_param(by_param, all_param_values, state_or_tran, attribute, param_index, verbose = False):
@@ -255,6 +186,11 @@ def remove_parameters_by_indices(by_name, parameter_names, parameter_indices_to_
parameter_names.pop(parameter_index)
class ParamStats:
+ """
+ :param stats: `stats[state_or_tran][attribute]` = std_static, std_param_lut, ... (see `compute_param_statistics`)
+ :param distinct_values: `distinct_values[state_or_tran][param]` = [distinct values in aggregate]
+ :param distinct_values_by_param_index: `distinct_values[state_or_tran][i]` = [distinct values in aggregate]
+ """
def __init__(self, by_name, by_param, parameter_names, arg_count, use_corrcoef = False, verbose = False):
"""
@@ -280,6 +216,8 @@ class ParamStats:
use_corrcoef -- use correlation coefficient instead of stddev heuristic for parameter detection
"""
self.stats = dict()
+ self.distinct_values = dict()
+ self.distinct_values_by_param_index = dict()
self.use_corrcoef = use_corrcoef
self._parameter_names = parameter_names
# Note: This is deliberately single-threaded. The overhead incurred
@@ -287,8 +225,127 @@ class ParamStats:
# computation of statistics measures.
for state_or_tran in by_name.keys():
self.stats[state_or_tran] = dict()
+ self.distinct_values_by_param_index[state_or_tran] = distinct_param_values(by_name, state_or_tran)
+ self.distinct_values[state_or_tran] = dict()
+ for i, param in enumerate(parameter_names):
+ self.distinct_values[state_or_tran][param] = self.distinct_values_by_param_index[state_or_tran][i]
for attribute in by_name[state_or_tran]['attributes']:
- self.stats[state_or_tran][attribute] = compute_param_statistics(by_name, by_param, parameter_names, arg_count, state_or_tran, attribute, verbose = verbose)
+ self.stats[state_or_tran][attribute] = self.compute_param_statistics(by_name, by_param, parameter_names, arg_count, state_or_tran, attribute, verbose = verbose)
+
+ def compute_param_statistics(self, by_name, by_param, parameter_names, arg_count, state_or_trans, attribute, verbose = False):
+ """
+ Compute standard deviation and correlation coefficient for various data partitions.
+
+ It is strongly recommended to vary all parameter values evenly across partitions.
+ For instance, given two parameters, providing only the combinations
+ (1, 1), (5, 1), (7, 1,) (10, 1), (1, 2), (1, 6) will lead to bogus results.
+ It is better to provide (1, 1), (5, 1), (1, 2), (5, 2), ... (i.e. a cross product of all individual parameter values)
+
+ :param by_name: ground truth partitioned by state/transition name.
+ by_name[state_or_trans][attribute] must be a list or 1-D numpy array.
+ by_name[state_or_trans]['param'] must be a list of parameter values
+ corresponding to the ground truth, e.g. [[1, 2, 3], ...] if the
+ first ground truth element has the (lexically) first parameter set to 1,
+ the second to 2 and the third to 3.
+ :param by_param: ground truth partitioned by state/transition name and parameters.
+ by_name[(state_or_trans, *)][attribute] must be a list or 1-D numpy array.
+ :param parameter_names: list of parameter names, must have the same order as the parameter
+ values in by_param (lexical sorting is recommended).
+ :param arg_count: dict providing the number of functions args ("local parameters") for each function.
+ :param state_or_trans: state or transition name, e.g. 'send' or 'TX'
+ :param attribute: model attribute, e.g. 'power' or 'duration'
+ :param verbose: print warning if some parameter partitions are too small for fitting
+
+ :returns: a dict with the following content:
+ std_static -- static parameter-unaware model error: stddev of by_name[state_or_trans][attribute]
+ std_param_lut -- static parameter-aware model error: mean stddev of by_param[(state_or_trans, *)][attribute]
+ std_by_param -- static parameter-aware model error ignoring a single parameter.
+ dictionary with one key per parameter. The value is the mean stddev
+ of measurements where all other parameters are fixed and the parameter
+ in question is variable. E.g. std_by_param['X'] is the mean stddev of
+ by_param[(state_or_trans, (X=*, Y=..., Z=...))][attribute].
+ std_by_arg -- same, but ignoring a single function argument
+ Only set if state_or_trans appears in arg_count, empty dict otherwise.
+ corr_by_param -- correlation coefficient
+ corr_by_arg -- same, but ignoring a single function argument
+ Only set if state_or_trans appears in arg_count, empty dict otherwise.
+ """
+ ret = {
+ 'std_static' : np.std(by_name[state_or_trans][attribute]),
+ 'std_param_lut' : np.mean([np.std(by_param[x][attribute]) for x in by_param.keys() if x[0] == state_or_trans]),
+ 'std_by_param' : {},
+ 'std_by_param_values' : {},
+ 'lut_by_param_values' : {},
+ 'std_by_arg' : [],
+ 'std_by_arg_values' : [],
+ 'lut_by_arg_values' : [],
+ 'corr_by_param' : {},
+ 'corr_by_arg' : [],
+ 'depends_on_param' : {},
+ 'depends_on_arg' : [],
+ 'param_data' : {},
+ }
+
+ np.seterr('raise')
+
+ for param_idx, param in enumerate(parameter_names):
+ std_matrix, mean_std, lut_matrix = _std_by_param(by_param, self.distinct_values_by_param_index[state_or_trans], state_or_trans, attribute, param_idx, verbose)
+ ret['std_by_param'][param] = mean_std
+ ret['std_by_param_values'][param] = std_matrix
+ ret['lut_by_param_values'][param] = lut_matrix
+ ret['corr_by_param'][param] = _corr_by_param(by_name, state_or_trans, attribute, param_idx)
+
+ ret['depends_on_param'][param] = self._depends_on_param(ret['corr_by_param'][param], ret['std_by_param'][param], ret['std_param_lut'])
+
+ if ret['depends_on_param'][param]:
+ ret['param_data'][param] = {
+ 'codependent_parameters': self._codependent_parameters(param, lut_matrix, std_matrix),
+ 'depends_for_codependent_value': dict()
+ }
+
+ # calculate parameter dependence for individual values of codependent parameters
+ codependent_param_values = list()
+ for codependent_param in ret['param_data'][param]['codependent_parameters']:
+ codependent_param_values.append(self.distinct_values[state_or_trans][codependent_param])
+ for combi in itertools.product(*codependent_param_values):
+ by_name_part = deepcopy(by_name)
+ filter_list = list(zip(ret['param_data'][param]['codependent_parameters'], combi))
+ filter_aggregate_by_param(by_name_part, parameter_names, filter_list)
+ by_param_part = by_name_to_by_param(by_name_part)
+ # there may be no data for this specific parameter value combination
+ if state_or_trans in by_name_part:
+ part_corr = _corr_by_param(by_name_part, state_or_trans, attribute, param_idx)
+ part_std_lut = np.mean([np.std(by_param_part[x][attribute]) for x in by_param_part.keys() if x[0] == state_or_trans])
+ _, part_std_param, _ = _std_by_param(by_param_part, self.distinct_values_by_param_index[state_or_trans], state_or_trans, attribute, param_idx, verbose)
+ ret['param_data'][param]['depends_for_codependent_value'][combi] = self._depends_on_param(part_corr, part_std_param, part_std_lut)
+
+ if state_or_trans in arg_count:
+ for arg_index in range(arg_count[state_or_trans]):
+ std_matrix, mean_std, lut_matrix = _std_by_param(by_param, self.distinct_values_by_param_index[state_or_trans], state_or_trans, attribute, len(parameter_names) + arg_index, verbose)
+ ret['std_by_arg'].append(mean_std)
+ ret['std_by_arg_values'].append(std_matrix)
+ ret['lut_by_arg_values'].append(lut_matrix)
+ ret['corr_by_arg'].append(_corr_by_param(by_name, state_or_trans, attribute, len(parameter_names) + arg_index))
+
+ if self.use_corrcoef:
+ ret['depends_on_arg'].append(ret['corr_by_arg'][arg_index] > 0.1)
+ elif ret['std_by_arg'][arg_index] == 0:
+ # In general, std_param_lut < std_by_arg. So, if std_by_arg == 0, std_param_lut == 0 follows.
+ # This means that the variation of arg does not affect the model quality -> no influence
+ ret['depends_on_arg'].append(False)
+ else:
+ ret['depends_on_arg'].append(ret['std_param_lut'] / ret['std_by_arg'][arg_index] < 0.5)
+
+ return ret
+
+ def _depends_on_param(self, corr_param, std_param, std_lut):
+ if self.use_corrcoef:
+ return corr_param > 0.1
+ elif std_param == 0:
+ # In general, std_param_lut < std_by_param. So, if std_by_param == 0, std_param_lut == 0 follows.
+ # This means that the variation of param does not affect the model quality -> no influence
+ return False
+ return std_lut / std_param < 0.5
def _generic_param_independence_ratio(self, state_or_trans, attribute):
"""
@@ -342,6 +399,20 @@ class ParamStats:
return list()
+ def _codependent_parameters(self, param, lut_by_param_values, std_by_param_values):
+ """
+ Return list of parameters which affect whether a parameter affects a model attribute or not.
+ """
+ safe_div = np.vectorize(lambda x,y: 0. if x == 0 else 1 - x/y)
+ ratio_by_value = safe_div(lut_by_param_values, std_by_param_values)
+ err_mode = np.seterr('ignore')
+ dep_by_value = ratio_by_value > 0.5
+ np.seterr(**err_mode)
+
+ other_param_list = list(filter(lambda x: x != param, self._parameter_names))
+ influencer_parameters = self._reduce_param_matrix(dep_by_value, other_param_list)
+ return influencer_parameters
+
def _get_codependent_parameters(self, stats, param):
"""
Return list of parameters which affect whether `param` influences the model attribute described in `stats` or not.
@@ -392,8 +463,8 @@ class ParamStats:
"""
Return parameters whose value influences whether `attribute` of `state_or_trans` depends on `param` or not.
- For example, a radio's TX POWER is only influenced by the packet length if dynamically sized payloads are enabled.
- So reverse_dependent_parameters('TX', 'POWER', 'packet_length') == ['dynamic_payload_size'].
+ For example, a radio's TX power is only influenced by the packet length if dynamically sized payloads are enabled.
+ So reverse_dependent_parameters('TX', 'power', 'packet_length') == ['dynamic_payload_size'].
:param state_or_trans: state or transition name
:param attribute: model attribute
@@ -424,16 +495,10 @@ class ParamStats:
# --df, 2018-04-18
def depends_on_param(self, state_or_trans, attribute, param):
"""Return whether attribute of state_or_trans depens on param."""
- if self.use_corrcoef:
- return self.param_dependence_ratio(state_or_trans, attribute, param) > 0.1
- else:
- return self.param_dependence_ratio(state_or_trans, attribute, param) > 0.5
+ return self.stats[state_or_trans][attribute]['depends_on_param'][param]
# See notes on depends_on_param
def depends_on_arg(self, state_or_trans, attribute, arg_index):
"""Return whether attribute of state_or_trans depens on arg_index."""
- if self.use_corrcoef:
- return self.arg_dependence_ratio(state_or_trans, attribute, arg_index) > 0.1
- else:
- return self.arg_dependence_ratio(state_or_trans, attribute, arg_index) > 0.5
+ return self.stats[state_or_trans][attribute]['depends_on_arg'][arg_index]
diff --git a/lib/utils.py b/lib/utils.py
index 5daa7cf..e5699da 100644
--- a/lib/utils.py
+++ b/lib/utils.py
@@ -114,6 +114,55 @@ def param_slice_eq(a, b, index):
return True
return False
+def by_name_to_by_param(by_name: dict):
+ """
+ Convert aggregation by name to aggregation by name and parameter values.
+ """
+ by_param = dict()
+ for name in by_name.keys():
+ for i, parameters in enumerate(by_name[name]['param']):
+ param_key = (name, tuple(parameters))
+ if param_key not in by_param:
+ by_param[param_key] = dict()
+ for key in by_name[name].keys():
+ by_param[param_key][key] = list()
+ by_param[param_key]['attributes'] = by_name[name]['attributes']
+ # special case for PTA models
+ if 'isa' in by_name[name]:
+ by_param[param_key]['isa'] = by_name[name]['isa']
+ for attribute in by_name[name]['attributes']:
+ by_param[param_key][attribute].append(by_name[name][attribute][i])
+ return by_param
+
+def filter_aggregate_by_param(aggregate, parameters, parameter_filter):
+ """
+ Remove entries which do not have certain parameter values from `aggregate`.
+
+ :param aggregate: aggregated measurement data, must be a dict conforming to
+ aggregate[state or transition name]['param'] = (first parameter value, second parameter value, ...)
+ and
+ aggregate[state or transition name]['attributes'] = [list of keys with measurement data, e.g. 'power' or 'duration']
+ :param parameters: list of parameters, used to map parameter index to parameter name. parameters=['foo', ...] means 'foo' is the first parameter
+ :param parameter_filter: [[name, value], [name, value], ...] list of parameter values to keep, all others are removed. Values refer to normalizad parameter data.
+ """
+ for param_name_and_value in parameter_filter:
+ param_index = parameters.index(param_name_and_value[0])
+ param_value = soft_cast_int(param_name_and_value[1])
+ names_to_remove = set()
+ for name in aggregate.keys():
+ indices_to_keep = list(map(lambda x: x[param_index] == param_value, aggregate[name]['param']))
+ aggregate[name]['param'] = list(map(lambda iv: iv[1], filter(lambda iv: indices_to_keep[iv[0]], enumerate(aggregate[name]['param']))))
+ if len(indices_to_keep) == 0:
+ print('??? {}->{}'.format(parameter_filter, name))
+ names_to_remove.add(name)
+ else:
+ for attribute in aggregate[name]['attributes']:
+ aggregate[name][attribute] = aggregate[name][attribute][indices_to_keep]
+ if len(aggregate[name][attribute]) == 0:
+ names_to_remove.add(name)
+ for name in names_to_remove:
+ aggregate.pop(name)
+
class OptionalTimingAnalysis:
def __init__(self, enabled = True):
self.enabled = enabled