summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorDaniel Friesel <daniel.friesel@uos.de>2019-10-11 14:40:48 +0200
committerDaniel Friesel <daniel.friesel@uos.de>2019-10-11 14:40:48 +0200
commita2adf6a90246110fcae4e6a6dcc049d8d69fcb48 (patch)
treeec94d9e77e067e2e168467947f5fd390ed66c569 /lib
parent2a75bc679e2970f9ae938a418af5acea9106e61d (diff)
add helper functions for co-dependent parameter detection
Diffstat (limited to 'lib')
-rw-r--r--lib/dfatool.py40
-rw-r--r--lib/parameters.py212
-rw-r--r--lib/utils.py17
3 files changed, 203 insertions, 66 deletions
diff --git a/lib/dfatool.py b/lib/dfatool.py
index 478f800..a3d5c0f 100644
--- a/lib/dfatool.py
+++ b/lib/dfatool.py
@@ -18,7 +18,7 @@ from functions import analytic
from functions import AnalyticFunction
from parameters import ParamStats
from utils import vprint, is_numeric, soft_cast_int, param_slice_eq, remove_index_from_tuple
-from utils import by_name_to_by_param
+from utils import by_name_to_by_param, match_parameter_values
arg_support_enabled = True
@@ -505,12 +505,10 @@ class RawData:
self.cache_file = '{}/{}.json'.format(self.cache_dir, cache_key)
def load_cache(self):
- print('checking {}...'.format(self.cache_file))
if os.path.exists(self.cache_file):
with open(self.cache_file, 'r') as f:
self.traces = json.load(f)
self.preprocessed = True
- print('loaded cache')
def save_cache(self):
try:
@@ -902,15 +900,15 @@ class ParallelParamFit:
self.fit_queue = []
self.by_param = by_param
- def enqueue(self, state_or_tran, attribute, param_index, param_name, safe_functions_enabled = False):
+ def enqueue(self, state_or_tran, attribute, param_index, param_name, safe_functions_enabled = False, param_filter = None):
"""
Add state_or_tran/attribute/param_name to fit queue.
This causes fit() to compute the best-fitting function for this model part.
"""
self.fit_queue.append({
- 'key' : [state_or_tran, attribute, param_name],
- 'args' : [self.by_param, state_or_tran, attribute, param_index, safe_functions_enabled]
+ 'key' : [state_or_tran, attribute, param_name, param_filter],
+ 'args' : [self.by_param, state_or_tran, attribute, param_index, safe_functions_enabled, param_filter]
})
def fit(self):
@@ -935,16 +933,17 @@ def _try_fits_parallel(arg):
'result' : _try_fits(*arg['args'])
}
-
-def _try_fits(by_param, state_or_tran, model_attribute, param_index, safe_functions_enabled = False):
+def _try_fits(by_param, state_or_tran, model_attribute, param_index, safe_functions_enabled = False, param_filter: dict = None):
"""
Determine goodness-of-fit for prediction of `by_param[(state_or_tran, *)][model_attribute]` dependence on `param_index` using various functions.
This is done by varying `param_index` while keeping all other parameters constant and doing one least squares optimization for each function and for each combination of the remaining parameters.
The value of the parameter corresponding to `param_index` (e.g. txpower or packet length) is the sole input to the model function.
+ Only numeric parameter values (as determined by `utils.is_numeric`) are used for fitting, non-numeric values such as None or enum strings are ignored.
+ Fitting is only performed if at least three distinct parameter values exist in `by_param[(state_or_tran, *)]`.
- :return: a dictionary with the following elements:
- best -- name of the best-fitting function (see `analytic.functions`)
+ :returns: a dictionary with the following elements:
+ best -- name of the best-fitting function (see `analytic.functions`). `None` in case of insufficient data.
best_rmsd -- mean Root Mean Square Deviation of best-fitting function over all combinations of the remaining parameters
mean_rmsd -- mean Root Mean Square Deviation of a reference model using the mean of its respective input data as model value
median_rmsd -- mean Root Mean Square Deviation of a reference model using the median of its respective input data as model value
@@ -961,6 +960,7 @@ def _try_fits(by_param, state_or_tran, model_attribute, param_index, safe_functi
:param param_index: index of the parameter used as model input
:param safe_functions_enabled: Include "safe" variants of functions with limited argument range.
+ :param param_filter: Only use measurements whose parameters match param_filter for fitting.
"""
functions = analytic.functions(safe_functions_enabled = safe_functions_enabled)
@@ -987,7 +987,7 @@ def _try_fits(by_param, state_or_tran, model_attribute, param_index, safe_functi
seen_parameter_combinations = set()
# for each parameter combination:
- for param_key in filter(lambda x: x[0] == state_or_tran and remove_index_from_tuple(x[1], param_index) not in seen_parameter_combinations, by_param.keys()):
+ for param_key in filter(lambda x: x[0] == state_or_tran and remove_index_from_tuple(x[1], param_index) not in seen_parameter_combinations and len(by_param[x]['param']) and match_parameter_values(by_param[x]['param'][0], param_filter), by_param.keys()):
X = []
Y = []
num_valid = 0
@@ -1087,7 +1087,7 @@ def _num_args_from_by_name(by_name):
num_args[key] = len(value['args'][0])
return num_args
-def get_fit_result(results, name, attribute, verbose = False):
+def get_fit_result(results, name, attribute, verbose = False, param_filter: dict = None):
"""
Parse and sanitize fit results for state/transition/... 'name' and model attribute 'attribute'.
@@ -1097,10 +1097,12 @@ def get_fit_result(results, name, attribute, verbose = False):
:param name: state/transition/... name, e.g. 'TX'
:param attribute: model attribute, e.g. 'duration'
:param verbose: print debug message to stdout when deliberately not using a determined fit function
+ :param param_filter:
+ :returns: dict with fit result (see `_try_fits`) for each successfully fitted parameter. E.g. {'param 1': {'best' : 'function name', ...} }
"""
fit_result = dict()
for result in results:
- if result['key'][0] == name and result['key'][1] == attribute and result['result']['best'] != None:
+ if result['key'][0] == name and result['key'][1] == attribute and result['key'][3] == param_filter and result['result']['best'] != None: # dürfte an ['best'] != None liegen-> Fit für gefilterten Kram schlägt fehl?
this_result = result['result']
if this_result['best_rmsd'] >= min(this_result['mean_rmsd'], this_result['median_rmsd']):
vprint(verbose, '[I] Not modeling {} {} as function of {}: best ({:.0f}) is worse than ref ({:.0f}, {:.0f})'.format(
@@ -1583,7 +1585,7 @@ class PTAModel:
"""
Get static model function: name, attribute -> model value.
- Uses the median of by_name for modeling.
+ Uses the median of by_name for modeling, unless `use_mean` is set.
"""
getter_function = np.median
@@ -1633,7 +1635,7 @@ class PTAModel:
def get_fitted(self, safe_functions_enabled = False):
"""
- Get paramete-aware model function and model information function.
+ Get parameter-aware model function and model information function.
Returns two functions:
model_function(name, attribute, param=parameter values) -> model value.
@@ -1651,6 +1653,8 @@ class PTAModel:
for parameter_index, parameter_name in enumerate(self._parameter_names):
if self.depends_on_param(state_or_tran, model_attribute, parameter_name):
paramfit.enqueue(state_or_tran, model_attribute, parameter_index, parameter_name, safe_functions_enabled)
+ for codependent_param_dict in self.stats.codependent_parameter_value_dicts(state_or_tran, model_attribute, parameter_name):
+ paramfit.enqueue(state_or_tran, model_attribute, parameter_index, parameter_name, safe_functions_enabled, codependent_param_dict)
if arg_support_enabled and self.by_name[state_or_tran]['isa'] == 'transition':
for arg_index in range(self._num_args[state_or_tran]):
if self.depends_on_arg(state_or_tran, model_attribute, arg_index):
@@ -1664,6 +1668,12 @@ class PTAModel:
for model_attribute in self.by_name[state_or_tran]['attributes']:
fit_results = get_fit_result(paramfit.results, state_or_tran, model_attribute, self.verbose)
+ for parameter_name in self._parameter_names:
+ if self.depends_on_param(state_or_tran, model_attribute, parameter_name):
+ for codependent_param_dict in self.stats.codependent_parameter_value_dicts(state_or_tran, model_attribute, parameter_name):
+ pass
+ # FIXME get_fit_result hat ja gar keinen Parameter als Argument...
+
if (state_or_tran, model_attribute) in self.function_override:
function_str = self.function_override[(state_or_tran, model_attribute)]
x = AnalyticFunction(function_str, self._parameter_names, num_args)
diff --git a/lib/parameters.py b/lib/parameters.py
index 1518076..bc26643 100644
--- a/lib/parameters.py
+++ b/lib/parameters.py
@@ -19,8 +19,6 @@ def distinct_param_values(by_name, state_or_tran):
write() or similar has not been called yet. Other parameters should always
be initialized when leaving UNINITIALIZED.
"""
- # TODO a set() is an _unordered_ collection, so this must be converted to
- # an OrderedDict or a list with a duplicate-pruning step
distinct_values = [OrderedDict() for i in range(len(by_name[state_or_tran]['param'][0]))]
for param_tuple in by_name[state_or_tran]['param']:
for i in range(len(param_tuple)):
@@ -32,31 +30,37 @@ def distinct_param_values(by_name, state_or_tran):
def _std_by_param(by_param, all_param_values, state_or_tran, attribute, param_index, verbose = False):
u"""
- Calculate standard deviations for a static model where all parameters but param_index are constant.
+ Calculate standard deviations for a static model where all parameters but `param_index` are constant.
:param by_param: measurements sorted by key/transition name and parameter values
+ :param all_param_values: distinct values of each parameter in `state_or_tran`.
+ E.g. for two parameters, the first being None, FOO, or BAR, and the second being 1, 2, 3, or 4, the argument is
+ `[[None, 'FOO', 'BAR'], [1, 2, 3, 4]]`.
:param state_or_tran: state or transition name (-> by_param[(state_or_tran, *)])
:param attribute: model attribute, e.g. 'power' or 'duration'
(-> by_param[(state_or_tran, *)][attribute])
:param param_index: index of variable parameter
- :returns: (stddev matrix, mean stddev)
-
- Returns the mean standard deviation of all measurements of 'attribute'
- (e.g. power consumption or timeout) for state/transition 'state_or_tran' where
- parameter 'param_index' is dynamic and all other parameters are fixed.
- I.e., if parameters are a, b, c ∈ {1,2,3} and 'index' corresponds to b, then
- this function returns the mean of the standard deviations of (a=1, b=*, c=1),
- (a=1, b=*, c=2), and so on.
- Also returns an (n-1)-dimensional array (where n is the number of parameters)
- giving the standard deviation of each individual partition. E.g. for
- param_index == 2 and 4 parameters, array[a][b][d] is the
- stddev of measurements with param0 == a, param1 == b, param2 variable,
- and param3 == d.
+ :returns: (stddev matrix, mean stddev, LUT matrix)
+ *stddev matrix* is an ((number of parameters)-1)-dimensional matrix giving the standard deviation of each individual parameter variation partition.
+ E.g. for param_index == 2 and 4 parameters, stddev matrix[a][b][d] is the stddev of
+ measurements with param0 == all_param_values[0][a],
+ param1 == all_param_values[1][b], param2 variable, and
+ param3 == all_param_values[3][d].
+ *mean stddev* is the mean standard deviation of all measurements of `attribute`
+ for `state_or_tran` where parameter `param_index` is dynamic and all other parameters are fixed.
+ E.g., if parameters are a, b, c ∈ {1,2,3} and 'index' corresponds to b, then
+ this function returns the mean of the standard deviations of (a=1, b=*, c=1),
+ (a=1, b=*, c=2), and so on.
+ *LUT matrix* is an ((number of parameters)-1)-dimensional matrix giving the mean standard deviation of individual partitions with entirely constant parameters.
+ E.g. for param_index == 2 and 4 parameters, LUT matrix[a][b][d] is the mean of
+ stddev(param0 -> a, param1 -> b, param2 -> first distinct value, param3 -> d),
+ stddev(param0 -> a, param1 -> b, param2 -> second distinct value, param3 -> d),
+ and so on.
"""
param_values = list(remove_index_from_tuple(all_param_values, param_index))
info_shape = tuple(map(len, param_values))
- # We will calculate the mean over the entire matrix later on. We cannot
+ # We will calculate the mean over the entire matrix later on. As we cannot
# guarantee that each entry will be filled in this loop (e.g. transitions
# whose arguments are combined using 'zip' rather than 'cartesian' always
# have missing parameter combinations), we pre-fill it with NaN and use
@@ -87,13 +91,25 @@ def _std_by_param(by_param, all_param_values, state_or_tran, attribute, param_in
# vprint(verbose, '[W] parameter value partition for {} is empty'.format(param_value))
if np.all(np.isnan(stddev_matrix)):
- vprint(verbose, '[W] {}/{} parameter #{} has no data partitions -- how did this even happen?'.format(state_or_tran, attribute, param_index))
- vprint(verbose, 'stddev_matrix = {}'.format(stddev_matrix))
+ print('[W] {}/{} parameter #{} has no data partitions -- how did this even happen?'.format(state_or_tran, attribute, param_index))
+ print('stddev_matrix = {}'.format(stddev_matrix))
return stddev_matrix, 0.
return stddev_matrix, np.nanmean(stddev_matrix), lut_matrix #np.mean([np.std(partition) for partition in partitions])
def _corr_by_param(by_name, state_or_trans, attribute, param_index):
+ """
+ Return correlation coefficient (`np.corrcoef`) of `by_name[state_or_trans][attribute][:]` <-> `by_name[state_or_trans]['param'][:][param_index]`
+
+ A correlation coefficient close to 1 indicates that the attribute likely depends on the value of the parameter denoted by `param_index`, if it is nearly 0, it likely does not depend on it.
+
+ If any value of `param_index` is not numeric (i.e., can not be parsed as float), this function returns 0.
+
+ :param by_name: measurements partitioned by state/transition name
+ :param state_or_trans: state or transition name
+ :param attribute: model attribute
+ :param param_index: index of parameter in `by_name[state_or_trans]['param']`
+ """
if _all_params_are_numeric(by_name[state_or_trans], param_index):
param_values = np.array(list((map(lambda x: x[param_index], by_name[state_or_trans]['param']))))
try:
@@ -232,6 +248,24 @@ class ParamStats:
for attribute in by_name[state_or_tran]['attributes']:
self.stats[state_or_tran][attribute] = self.compute_param_statistics(by_name, by_param, parameter_names, arg_count, state_or_tran, attribute, verbose = verbose)
+ def can_be_fitted(self, state_or_tran = None) -> bool:
+ """
+ Return whether a sufficient amount of distinct numeric parameter values is available, allowing a parameter-aware model to be generated.
+
+ :param state_or_tran: state or transition. If unset, returns whether any state or transition can be fitted.
+ """
+ if state_or_tran is None:
+ keys = self.stats.keys()
+ else:
+ keys = [state_or_tran]
+
+ for key in keys:
+ for param in self._parameter_names:
+ if len(list(filter(lambda n: is_numeric(n), self.distinct_values[key][param]))) > 2:
+ print(key, param, list(filter(lambda n: is_numeric(n), self.distinct_values[key][param])))
+ return True
+ return False
+
def compute_param_statistics(self, by_name, by_param, parameter_names, arg_count, state_or_trans, attribute, verbose = False):
"""
Compute standard deviation and correlation coefficient for various data partitions.
@@ -338,6 +372,108 @@ class ParamStats:
return ret
+ def static_submodel_params(self, state_or_tran, attribute):
+ """
+ Return the union of all parameter values which decide whether another parameter influences the model or not.
+
+ I.e., the returned list of dicts contains one entry for each parameter value combination which (probably) does not have any parameter influencing the model.
+ If the current parameters matches one of these, a static sub-model built based on this subset of parameters can likely be used.
+ """
+ # TODO
+ pass
+
+ def has_codependent_parameters(self, state_or_tran: str, attribute: str, param: str) -> bool:
+ """
+ Return whether there are parameters which determine whether `param` influences `state_or_tran` `attribute` or not.
+
+ :param state_or_tran: model state or transition
+ :param attribute: model attribute
+ :param param: parameter name
+ """
+ if len(self.codependent_parameters(state_or_tran, attribute, param)):
+ return True
+ return False
+
+ def codependent_parameters(self, state_or_tran: str, attribute: str, param: str) -> list:
+ """
+ Return list of parameters which determine whether `param` influences `state_or_tran` `attribute` or not.
+
+ :param state_or_tran: model state or transition
+ :param attribute: model attribute
+ :param param: parameter name
+ """
+ if self.stats[state_or_tran][attribute]['depends_on_param'][param]:
+ return self.stats[state_or_tran][attribute]['param_data'][param]['codependent_parameters']
+ return list()
+
+
+ def has_codependent_parameters_union(self, state_or_tran: str, attribute: str) -> bool:
+ """
+ Return whether there is a subset of parameters which decides whether `state_or_tran` `attribute` is static or parameter-dependent
+
+ :param state_or_tran: model state or transition
+ :param attribute: model attribute
+ """
+ depends_on_a_parameter = False
+ for param in self._parameter_names:
+ if self.stats[state_or_tran][attribute]['depends_on_param'][param]:
+ print('{}/{} depends on {}'.format(state_or_tran, attribute, param))
+ depends_on_a_parameter = True
+ if len(self.codependent_parameters(state_or_tran, attribute, param)) == 0:
+ print('has no codependent parameters')
+ # Always depends on this parameter, regardless of other parameters' values
+ return False
+ return depends_on_a_parameter
+
+ def codependent_parameters_union(self, state_or_tran: str, attribute: str) -> list:
+ """
+ Return list of parameters which determine whether any parameter influences `state_or_tran` `attribute`.
+
+ :param state_or_tran: model state or transition
+ :param attribute: model attribute
+ """
+ codependent_parameters = set()
+ for param in self._parameter_names:
+ if self.stats[state_or_tran][attribute]['depends_on_param'][param]:
+ if len(self.codependent_parameters(state_or_tran, attribute, param)) == 0:
+ return list(self._parameter_names)
+ for codependent_param in self.codependent_parameters(state_or_tran, attribute, param):
+ codependent_parameters.add(codependent_param)
+ return sorted(codependent_parameters)
+
+ def codependence_by_codependent_param_values(self, state_or_tran: str, attribute: str, param: str) -> dict:
+ """
+ Return dict mapping codependent parameter values to a boolean indicating whether `param` influences `state_or_tran` `attribute`.
+
+ If a dict value is true, `attribute` depends on `param` for the corresponding codependent parameter values, otherwise it does not.
+
+ :param state_or_tran: model state or transition
+ :param attribute: model attribute
+ :param param: parameter name
+ """
+ if self.stats[state_or_tran][attribute]['depends_on_param'][param]:
+ return self.stats[state_or_tran][attribute]['param_data'][param]['depends_for_codependent_value']
+ return dict()
+
+ def codependent_parameter_value_dicts(self, state_or_tran: str, attribute: str, param: str, kind='dynamic'):
+ """
+ Return dicts of codependent parameter key-value mappings for which `param` influences (or does not influence) `state_or_tran` `attribute`.
+
+ :param state_or_tran: model state or transition
+ :param attribute: model attribute
+ :param param: parameter name:
+ :param kind: 'static' or 'dynamic'. If 'dynamic' (the default), returns codependent parameter values for which `param` influences `attribute`. If 'static', returns codependent parameter values for which `param` does not influence `attribute`
+ """
+ codependent_parameters = self.stats[state_or_tran][attribute]['param_data'][param]['codependent_parameters']
+ codependence_info = self.stats[state_or_tran][attribute]['param_data'][param]['depends_for_codependent_value']
+ if len(codependent_parameters) == 0:
+ return
+ else:
+ for param_values, is_dynamic in codependence_info.items():
+ if (is_dynamic and kind == 'dynamic') or (not is_dynamic and kind == 'static'):
+ yield dict(zip(codependent_parameters, param_values))
+
+
def _depends_on_param(self, corr_param, std_param, std_lut):
if self.use_corrcoef:
return corr_param > 0.1
@@ -381,9 +517,12 @@ class ParamStats:
if np.all(matrix == True) or np.all(matrix == False):
return list()
- if not is_power_of_two(np.count_nonzero(matrix)):
- # cannot be reliably reduced to a list of parameters
- return list()
+ # Diese Abbruchbedingung scheint noch nicht so schlau zu sein...
+ # Mit wird zu viel rausgefiltert (z.B. auto_ack! -> max_retry_count in "bin/analyze-timing.py ../data/20190815_122531_nRF24_no-rx.json" nicht erkannt)
+ # Ohne wird zu wenig rausgefiltert (auch ganz viele Abhängigkeiten erkannt, bei denen eine Parameter-Abhängigketi immer unabhängig vom Wert der anderen Parameter besteht)
+ #if not is_power_of_two(np.count_nonzero(matrix)):
+ # # cannot be reliably reduced to a list of parameters
+ # return list()
if np.count_nonzero(matrix) == 1:
influential_parameters = list()
@@ -413,20 +552,6 @@ class ParamStats:
influencer_parameters = self._reduce_param_matrix(dep_by_value, other_param_list)
return influencer_parameters
- def _get_codependent_parameters(self, stats, param):
- """
- Return list of parameters which affect whether `param` influences the model attribute described in `stats` or not.
- """
- safe_div = np.vectorize(lambda x,y: 0. if x == 0 else 1 - x/y)
- ratio_by_value = safe_div(stats['lut_by_param_values'][param], stats['std_by_param_values'][param])
- err_mode = np.seterr('ignore')
- dep_by_value = ratio_by_value > 0.5
- np.seterr(**err_mode)
-
- other_param_list = list(filter(lambda x: x != param, self._parameter_names))
- influencer_parameters = self._reduce_param_matrix(dep_by_value, other_param_list)
- return influencer_parameters
-
def _param_independence_ratio(self, state_or_trans: str, attribute: str, param: str) -> float:
"""
Return the heuristic ratio of parameter independence for state_or_trans, attribute, and param.
@@ -459,21 +584,6 @@ class ParamStats:
"""
return 1 - self._param_independence_ratio(state_or_trans, attribute, param)
- def reverse_dependent_parameters(self, state_or_trans: str, attribute: str, param: str) -> list:
- """
- Return parameters whose value influences whether `attribute` of `state_or_trans` depends on `param` or not.
-
- For example, a radio's TX power is only influenced by the packet length if dynamically sized payloads are enabled.
- So reverse_dependent_parameters('TX', 'power', 'packet_length') == ['dynamic_payload_size'].
-
- :param state_or_trans: state or transition name
- :param attribute: model attribute
- :param param: parameter name
-
- :returns: list of parameters
- """
- return self._get_codependent_parameters(self.stats[state_or_trans][attribute], param)
-
def _arg_independence_ratio(self, state_or_trans, attribute, arg_index):
statistics = self.stats[state_or_trans][attribute]
if self.use_corrcoef:
diff --git a/lib/utils.py b/lib/utils.py
index e5699da..f040638 100644
--- a/lib/utils.py
+++ b/lib/utils.py
@@ -114,6 +114,21 @@ def param_slice_eq(a, b, index):
return True
return False
+def match_parameter_values(input_param: dict, match_param: dict):
+ """
+ Check whether one of the paramaters in `input_param` has the same value in `match_param`.
+
+ :param input_param: parameter dict of a state/transition/... measurement
+ :param match_param: parameter value filter
+ :returns: True if for all parameters k in match_param: input_param[k] == match_param[k], or if match_param is None.
+ """
+ if match_param is None:
+ return True
+ for k, v in match_param.items():
+ if k in input_param and input_param[k] != v:
+ return False
+ return True
+
def by_name_to_by_param(by_name: dict):
"""
Convert aggregation by name to aggregation by name and parameter values.
@@ -132,6 +147,8 @@ def by_name_to_by_param(by_name: dict):
by_param[param_key]['isa'] = by_name[name]['isa']
for attribute in by_name[name]['attributes']:
by_param[param_key][attribute].append(by_name[name][attribute][i])
+ # Required for match_parameter_valuse in _try_fits
+ by_param[param_key]['param'].append(by_name[name]['param'][i])
return by_param
def filter_aggregate_by_param(aggregate, parameters, parameter_filter):