summaryrefslogtreecommitdiff
path: root/lib/dfatool.py
diff options
context:
space:
mode:
Diffstat (limited to 'lib/dfatool.py')
-rwxr-xr-xlib/dfatool.py186
1 files changed, 2 insertions, 184 deletions
diff --git a/lib/dfatool.py b/lib/dfatool.py
index cc6b32e..3151c65 100755
--- a/lib/dfatool.py
+++ b/lib/dfatool.py
@@ -15,7 +15,8 @@ from multiprocessing import Pool
from automata import PTA
from functions import analytic
from functions import AnalyticFunction
-from utils import vprint, is_numeric, soft_cast_int, param_slice_eq, compute_param_statistics, remove_index_from_tuple, is_power_of_two, distinct_param_values
+from parameters import ParamStats
+from utils import vprint, is_numeric, soft_cast_int, param_slice_eq, remove_index_from_tuple, is_power_of_two
arg_support_enabled = True
@@ -370,189 +371,6 @@ def _preprocess_measurement(measurement):
return processed_data
-class ParamStats:
-
- def __init__(self, by_name, by_param, parameter_names, arg_count, use_corrcoef = False, verbose = False):
- """
- Compute standard deviation and correlation coefficient on parameterized data partitions.
-
- It is strongly recommended to vary all parameter values evenly.
- For instance, given two parameters, providing only the combinations
- (1, 1), (5, 1), (7, 1,) (10, 1), (1, 2), (1, 6) will lead to bogus results.
- It is better to provide (1, 1), (5, 1), (1, 2), (5, 2), ... (i.e. a cross product of all individual parameter values)
-
- arguments:
- by_name -- ground truth partitioned by state/transition name.
- by_name[state_or_trans][attribute] must be a list or 1-D numpy array.
- by_name[state_or_trans]['param'] must be a list of parameter values
- corresponding to the ground truth, e.g. [[1, 2, 3], ...] if the
- first ground truth element has the (lexically) first parameter set to 1,
- the second to 2 and the third to 3.
- by_param -- ground truth partitioned by state/transition name and parameters.
- by_name[(state_or_trans, *)][attribute] must be a list or 1-D numpy array.
- parameter_names -- list of parameter names, must have the same order as the parameter
- values in by_param (lexical sorting is recommended).
- arg_count -- dict providing the number of functions args ("local parameters") for each function.
- use_corrcoef -- use correlation coefficient instead of stddev heuristic for parameter detection
- """
- self.stats = dict()
- self.use_corrcoef = use_corrcoef
- self._parameter_names = parameter_names
- # Note: This is deliberately single-threaded. The overhead incurred
- # by multiprocessing is higher than the speed gained by parallel
- # computation of statistics measures.
- for state_or_tran in by_name.keys():
- self.stats[state_or_tran] = dict()
- for attribute in by_name[state_or_tran]['attributes']:
- self.stats[state_or_tran][attribute] = compute_param_statistics(by_name, by_param, parameter_names, arg_count, state_or_tran, attribute, verbose = verbose)
-
- def _generic_param_independence_ratio(self, state_or_trans, attribute):
- """
- Return the heuristic ratio of parameter independence for state_or_trans and attribute.
-
- This is not supported if the correlation coefficient is used.
- A value close to 1 means no influence, a value close to 0 means high probability of influence.
- """
- statistics = self.stats[state_or_trans][attribute]
- if self.use_corrcoef:
- # not supported
- raise ValueError
- if statistics['std_static'] == 0:
- return 0
- return statistics['std_param_lut'] / statistics['std_static']
-
- def generic_param_dependence_ratio(self, state_or_trans, attribute):
- """
- Return the heuristic ratio of parameter dependence for state_or_trans and attribute.
-
- This is not supported if the correlation coefficient is used.
- A value close to 0 means no influence, a value close to 1 means high probability of influence.
- """
- return 1 - self._generic_param_independence_ratio(state_or_trans, attribute)
-
- def _reduce_param_matrix(self, matrix: np.ndarray, parameter_names: list) -> list:
- """
- :param matrix: parameter dependence matrix, M[(...)] == 1 iff (model attribute) is influenced by (parameter) for other parameter value indxe == (...)
- :param parameter_names: names of parameters in the order in which they appear in the matrix index. The first entry corresponds to the first axis, etc.
- :returns: parameters which determine whether (parameter) has an effect on (model attribute). If a parameter is not part of this list, its value does not
- affect (parameter)'s influence on (model attribute) -- it either always or never has an influence
- """
- if np.all(matrix == True) or np.all(matrix == False):
- return list()
-
- if not is_power_of_two(np.count_nonzero(matrix)):
- # cannot be reliably reduced to a list of parameters
- return list()
-
- if np.count_nonzero(matrix) == 1:
- influential_parameters = list()
- for i, parameter_name in enumerate(parameter_names):
- if matrix.shape[i] > 1:
- influential_parameters.append(parameter_name)
- return influential_parameters
-
- for axis in range(matrix.ndim):
- candidate = self._reduce_param_matrix(np.all(matrix, axis=axis), remove_index_from_tuple(parameter_names, axis))
- if len(candidate):
- return candidate
-
- return list()
-
- def _get_codependent_parameters(self, stats, param):
- """
- Return list of parameters which affect whether `param` influences the model attribute described in `stats` or not.
- """
- safe_div = np.vectorize(lambda x,y: 0. if x == 0 else 1 - x/y)
- ratio_by_value = safe_div(stats['lut_by_param_values'][param], stats['std_by_param_values'][param])
- err_mode = np.seterr('ignore')
- dep_by_value = ratio_by_value > 0.5
- np.seterr(**err_mode)
-
- other_param_list = list(filter(lambda x: x != param, self._parameter_names))
- influencer_parameters = self._reduce_param_matrix(dep_by_value, other_param_list)
- return influencer_parameters
-
- def _param_independence_ratio(self, state_or_trans: str, attribute: str, param: str) -> float:
- """
- Return the heuristic ratio of parameter independence for state_or_trans, attribute, and param.
-
- A value close to 1 means no influence, a value close to 0 means high probability of influence.
- """
- statistics = self.stats[state_or_trans][attribute]
- if self.use_corrcoef:
- return 1 - np.abs(statistics['corr_by_param'][param])
- if statistics['std_by_param'][param] == 0:
- if statistics['std_param_lut'] != 0:
- raise RuntimeError("wat")
- # In general, std_param_lut < std_by_param. So, if std_by_param == 0, std_param_lut == 0 follows.
- # This means that the variation of param does not affect the model quality -> no influence, return 1
- return 1.
-
- return statistics['std_param_lut'] / statistics['std_by_param'][param]
-
- def param_dependence_ratio(self, state_or_trans: str, attribute: str, param: str) -> float:
- """
- Return the heuristic ratio of parameter dependence for state_or_trans, attribute, and param.
-
- A value close to 0 means no influence, a value close to 1 means high probability of influence.
-
- :param state_or_trans: state or transition name
- :param attribute: model attribute
- :param param: parameter name
-
- :returns: parameter dependence (float between 0 == no influence and 1 == high probability of influence)
- """
- return 1 - self._param_independence_ratio(state_or_trans, attribute, param)
-
- def reverse_dependent_parameters(self, state_or_trans: str, attribute: str, param: str) -> list:
- """
- Return parameters whose value influences whether `attribute` of `state_or_trans` depends on `param` or not.
-
- For example, a radio's TX POWER is only influenced by the packet length if dynamically sized payloads are enabled.
- So reverse_dependent_parameters('TX', 'POWER', 'packet_length') == ['dynamic_payload_size'].
-
- :param state_or_trans: state or transition name
- :param attribute: model attribute
- :param param: parameter name
-
- :returns: list of parameters
- """
- return self._get_codependent_parameters(self.stats[state_or_trans][attribute], param)
-
- def _arg_independence_ratio(self, state_or_trans, attribute, arg_index):
- statistics = self.stats[state_or_trans][attribute]
- if self.use_corrcoef:
- return 1 - np.abs(statistics['corr_by_arg'][arg_index])
- if statistics['std_by_arg'][arg_index] == 0:
- if statistics['std_param_lut'] != 0:
- raise RuntimeError("wat")
- # In general, std_param_lut < std_by_arg. So, if std_by_arg == 0, std_param_lut == 0 follows.
- # This means that the variation of arg does not affect the model quality -> no influence, return 1
- return 1
- return statistics['std_param_lut'] / statistics['std_by_arg'][arg_index]
-
- def arg_dependence_ratio(self, state_or_trans: str, attribute: str, arg_index: int) -> float:
- return 1 - self._arg_independence_ratio(state_or_trans, attribute, arg_index)
-
- # This heuristic is very similar to the "function is not much better than
- # median" checks in get_fitted. So far, doing it here as well is mostly
- # a performance and not an algorithm quality decision.
- # --df, 2018-04-18
- def depends_on_param(self, state_or_trans, attribute, param):
- """Return whether attribute of state_or_trans depens on param."""
- if self.use_corrcoef:
- return self.param_dependence_ratio(state_or_trans, attribute, param) > 0.1
- else:
- return self.param_dependence_ratio(state_or_trans, attribute, param) > 0.5
-
- # See notes on depends_on_param
- def depends_on_arg(self, state_or_trans, attribute, arg_index):
- """Return whether attribute of state_or_trans depens on arg_index."""
- if self.use_corrcoef:
- return self.arg_dependence_ratio(state_or_trans, attribute, arg_index) > 0.1
- else:
- return self.arg_dependence_ratio(state_or_trans, attribute, arg_index) > 0.5
-
class TimingData:
"""
Loader for timing model traces measured with on-board timers using `harness.OnboardTimerHarness`.