diff options
author | Daniel Friesel <daniel.friesel@uos.de> | 2019-08-16 11:24:34 +0200 |
---|---|---|
committer | Daniel Friesel <daniel.friesel@uos.de> | 2019-08-16 11:24:34 +0200 |
commit | c1b9509b588412b8045f5d838bf8c6bca0fa9b77 (patch) | |
tree | 7222935694b8be94d0cf2ae41c08e82a872e52be | |
parent | f619692a4601cdb13a45f47c988d76563a16ba0d (diff) |
optionally prune dependent parameters before analysis
-rwxr-xr-x | bin/analyze-timing.py | 3 | ||||
-rwxr-xr-x | lib/dfatool.py | 3 | ||||
-rw-r--r-- | lib/utils.py | 66 | ||||
-rwxr-xr-x | test/test_timingharness.py | 30 |
4 files changed, 101 insertions, 1 deletions
diff --git a/bin/analyze-timing.py b/bin/analyze-timing.py index 465932b..1c27533 100755 --- a/bin/analyze-timing.py +++ b/bin/analyze-timing.py @@ -78,6 +78,7 @@ import sys from dfatool import AnalyticModel, TimingData, pta_trace_to_aggregate from dfatool import soft_cast_int, is_numeric, gplearn_to_function from dfatool import CrossValidator +import utils opts = {} @@ -205,6 +206,8 @@ if __name__ == '__main__': preprocessed_data = raw_data.get_preprocessed_data() by_name, parameters, arg_count = pta_trace_to_aggregate(preprocessed_data, ignored_trace_indexes) + utils.prune_dependent_parameters(by_name, parameters) + for param_name_and_value in opts['filter-param']: param_index = parameters.index(param_name_and_value[0]) param_value = soft_cast_int(param_name_and_value[1]) diff --git a/lib/dfatool.py b/lib/dfatool.py index 528eabc..8990aed 100755 --- a/lib/dfatool.py +++ b/lib/dfatool.py @@ -1014,6 +1014,7 @@ def _try_fits(by_param, state_or_tran, model_attribute, param_index, safe_functi if not len(ref_results['mean']): # Insufficient data for fitting + #print('[W] Insufficient data for fitting {}/{}/{}'.format(state_or_tran, model_attribute, param_index)) return { 'best' : None, 'best_rmsd' : np.inf, @@ -1089,7 +1090,7 @@ def get_fit_result(results, name, attribute, verbose = False): this_result['mean_rmsd'], this_result['median_rmsd'])) # See notes on depends_on_param elif this_result['best_rmsd'] >= 0.8 * min(this_result['mean_rmsd'], this_result['median_rmsd']): - vprint(verbose, '[I] Not modeling {} {} as function of {}: best ({:.0f}) is not much better than ({:.0f}, {:.0f})'.format( + vprint(verbose, '[I] Not modeling {} {} as function of {}: best ({:.0f}) is not much better than ref ({:.0f}, {:.0f})'.format( name, attribute, result['key'][2], this_result['best_rmsd'], this_result['mean_rmsd'], this_result['median_rmsd'])) else: diff --git a/lib/utils.py b/lib/utils.py index b748007..8d1b817 100644 --- a/lib/utils.py +++ b/lib/utils.py @@ -1,3 +1,4 @@ +import itertools import numpy as np import re @@ -98,6 +99,71 @@ def param_slice_eq(a, b, index): return True return False +def prune_dependent_parameters(by_name, parameter_names): + """ + Remove dependent parameters from aggregate. + + :param by_name: measurements partitioned by state/transition/... name and attribute, edited in-place. + by_name[name][attribute] must be a list or 1-D numpy array. + by_name[stanamete_or_trans]['param'] must be a list of parameter values. + Other dict members are left as-is + :param parameter_names: List of parameter names in the order they are used in by_name[name]['param'], edited in-place. + + Model generation (and its components, such as relevant parameter detection and least squares optimization) only work if input variables (i.e., parameters) + are independent of each other. This function computes the correlation coefficient for each pair of parameters and removes those which depend on each other. + For each pair of dependent parameters, the lexically greater one is removed (e.g. "a" and "b" -> "b" is removed). + """ + + parameter_indices_to_remove = list() + for parameter_combination in itertools.product(range(len(parameter_names)), range(len(parameter_names))): + index_1, index_2 = parameter_combination + if index_1 >= index_2: + continue + parameter_values = [list(), list()] # both parameters have a value + parameter_values_1 = list() # parameter 1 has a value + parameter_values_2 = list() # parameter 2 has a value + for name in by_name: + for measurement in by_name[name]['param']: + value_1 = measurement[index_1] + value_2 = measurement[index_2] + if is_numeric(value_1): + parameter_values_1.append(value_1) + if is_numeric(value_2): + parameter_values_2.append(value_2) + if is_numeric(value_1) and is_numeric(value_2): + parameter_values[0].append(value_1) + parameter_values[1].append(value_2) + if len(parameter_values[0]): + correlation = np.corrcoef(parameter_values)[0][1] + if correlation != np.nan and np.abs(correlation) > 0.5: + print('[!] Parameters {} <-> {} are correlated with coefficcient {}'.format(parameter_names[index_1], parameter_names[index_2], correlation)) + if len(parameter_values_1) < len(parameter_values_2): + index_to_remove = index_1 + else: + index_to_remove = index_2 + print(' Removing parameter {}'.format(parameter_names[index_to_remove])) + parameter_indices_to_remove.append(index_to_remove) + remove_parameters_by_indices(by_name, parameter_names, parameter_indices_to_remove) + +def remove_parameters_by_indices(by_name, parameter_names, parameter_indices_to_remove): + """ + Remove parameters listed in `parameter_indices` from aggregate `by_name` and `parameter_names`. + + :param by_name: measurements partitioned by state/transition/... name and attribute, edited in-place. + by_name[name][attribute] must be a list or 1-D numpy array. + by_name[stanamete_or_trans]['param'] must be a list of parameter values. + Other dict members are left as-is + :param parameter_names: List of parameter names in the order they are used in by_name[name]['param'], edited in-place. + :param parameter_indices_to_remove: List of parameter indices to be removed + """ + + # Start removal from the end of the list to avoid renumbering of list elemenets + for parameter_index in sorted(parameter_indices_to_remove, reverse = True): + for name in by_name: + for measurement in by_name[name]['param']: + measurement.pop(parameter_index) + parameter_names.pop(parameter_index) + def compute_param_statistics(by_name, by_param, parameter_names, arg_count, state_or_trans, attribute, verbose = False): """ Compute standard deviation and correlation coefficient for various data partitions. diff --git a/test/test_timingharness.py b/test/test_timingharness.py index 6479f0a..b5937ad 100755 --- a/test/test_timingharness.py +++ b/test/test_timingharness.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 from dfatool import AnalyticModel, TimingData, pta_trace_to_aggregate +from utils import prune_dependent_parameters import unittest class TestModels(unittest.TestCase): @@ -30,5 +31,34 @@ class TestModels(unittest.TestCase): self.assertAlmostEqual(param_info('write', 'duration')['function']._regression_args[2], 1, places=0) self.assertAlmostEqual(param_info('write', 'duration')['function']._regression_args[3], 1, places=0) + def test_dependent_parameter_pruning(self): + raw_data = TimingData(['test-data/20190815_103347_nRF24_no-rx.json']) + preprocessed_data = raw_data.get_preprocessed_data(verbose = False) + by_name, parameters, arg_count = pta_trace_to_aggregate(preprocessed_data) + prune_dependent_parameters(by_name, parameters) + model = AnalyticModel(by_name, parameters, arg_count, verbose = False) + self.assertEqual(model.names, 'getObserveTx setPALevel setRetries setup write'.split(' ')) + static_model = model.get_static() + self.assertAlmostEqual(static_model('getObserveTx', 'duration'), 75, places=0) + self.assertAlmostEqual(static_model('setPALevel', 'duration'), 146, places=0) + self.assertAlmostEqual(static_model('setRetries', 'duration'), 73, places=0) + self.assertAlmostEqual(static_model('setup', 'duration'), 6533, places=0) + self.assertAlmostEqual(static_model('write', 'duration'), 12634, places=0) + + for transition in 'getObserveTx setPALevel setRetries setup write'.split(' '): + self.assertAlmostEqual(model.stats.param_dependence_ratio(transition, 'duration', 'channel'), 0, places=2) + + param_model, param_info = model.get_fitted() + self.assertEqual(param_info('setPALevel', 'duration'), None) + self.assertEqual(param_info('setRetries', 'duration'), None) + self.assertEqual(param_info('setup', 'duration'), None) + self.assertEqual(param_info('write', 'duration')['function']._model_str, '0 + regression_arg(0) + regression_arg(1) * parameter(max_retry_count) + regression_arg(2) * parameter(retry_delay) + regression_arg(3) * parameter(max_retry_count) * parameter(retry_delay)') + + self.assertAlmostEqual(param_info('write', 'duration')['function']._regression_args[0], 1163, places=0) + self.assertAlmostEqual(param_info('write', 'duration')['function']._regression_args[1], 464, places=0) + self.assertAlmostEqual(param_info('write', 'duration')['function']._regression_args[2], 1, places=0) + self.assertAlmostEqual(param_info('write', 'duration')['function']._regression_args[3], 1, places=0) + + if __name__ == '__main__': unittest.main() |