summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Friesel <daniel.friesel@uos.de>2019-08-16 11:24:34 +0200
committerDaniel Friesel <daniel.friesel@uos.de>2019-08-16 11:24:34 +0200
commitc1b9509b588412b8045f5d838bf8c6bca0fa9b77 (patch)
tree7222935694b8be94d0cf2ae41c08e82a872e52be
parentf619692a4601cdb13a45f47c988d76563a16ba0d (diff)
optionally prune dependent parameters before analysis
-rwxr-xr-xbin/analyze-timing.py3
-rwxr-xr-xlib/dfatool.py3
-rw-r--r--lib/utils.py66
-rwxr-xr-xtest/test_timingharness.py30
4 files changed, 101 insertions, 1 deletions
diff --git a/bin/analyze-timing.py b/bin/analyze-timing.py
index 465932b..1c27533 100755
--- a/bin/analyze-timing.py
+++ b/bin/analyze-timing.py
@@ -78,6 +78,7 @@ import sys
from dfatool import AnalyticModel, TimingData, pta_trace_to_aggregate
from dfatool import soft_cast_int, is_numeric, gplearn_to_function
from dfatool import CrossValidator
+import utils
opts = {}
@@ -205,6 +206,8 @@ if __name__ == '__main__':
preprocessed_data = raw_data.get_preprocessed_data()
by_name, parameters, arg_count = pta_trace_to_aggregate(preprocessed_data, ignored_trace_indexes)
+ utils.prune_dependent_parameters(by_name, parameters)
+
for param_name_and_value in opts['filter-param']:
param_index = parameters.index(param_name_and_value[0])
param_value = soft_cast_int(param_name_and_value[1])
diff --git a/lib/dfatool.py b/lib/dfatool.py
index 528eabc..8990aed 100755
--- a/lib/dfatool.py
+++ b/lib/dfatool.py
@@ -1014,6 +1014,7 @@ def _try_fits(by_param, state_or_tran, model_attribute, param_index, safe_functi
if not len(ref_results['mean']):
# Insufficient data for fitting
+ #print('[W] Insufficient data for fitting {}/{}/{}'.format(state_or_tran, model_attribute, param_index))
return {
'best' : None,
'best_rmsd' : np.inf,
@@ -1089,7 +1090,7 @@ def get_fit_result(results, name, attribute, verbose = False):
this_result['mean_rmsd'], this_result['median_rmsd']))
# See notes on depends_on_param
elif this_result['best_rmsd'] >= 0.8 * min(this_result['mean_rmsd'], this_result['median_rmsd']):
- vprint(verbose, '[I] Not modeling {} {} as function of {}: best ({:.0f}) is not much better than ({:.0f}, {:.0f})'.format(
+ vprint(verbose, '[I] Not modeling {} {} as function of {}: best ({:.0f}) is not much better than ref ({:.0f}, {:.0f})'.format(
name, attribute, result['key'][2], this_result['best_rmsd'],
this_result['mean_rmsd'], this_result['median_rmsd']))
else:
diff --git a/lib/utils.py b/lib/utils.py
index b748007..8d1b817 100644
--- a/lib/utils.py
+++ b/lib/utils.py
@@ -1,3 +1,4 @@
+import itertools
import numpy as np
import re
@@ -98,6 +99,71 @@ def param_slice_eq(a, b, index):
return True
return False
+def prune_dependent_parameters(by_name, parameter_names):
+ """
+ Remove dependent parameters from aggregate.
+
+ :param by_name: measurements partitioned by state/transition/... name and attribute, edited in-place.
+ by_name[name][attribute] must be a list or 1-D numpy array.
+ by_name[stanamete_or_trans]['param'] must be a list of parameter values.
+ Other dict members are left as-is
+ :param parameter_names: List of parameter names in the order they are used in by_name[name]['param'], edited in-place.
+
+ Model generation (and its components, such as relevant parameter detection and least squares optimization) only work if input variables (i.e., parameters)
+ are independent of each other. This function computes the correlation coefficient for each pair of parameters and removes those which depend on each other.
+ For each pair of dependent parameters, the lexically greater one is removed (e.g. "a" and "b" -> "b" is removed).
+ """
+
+ parameter_indices_to_remove = list()
+ for parameter_combination in itertools.product(range(len(parameter_names)), range(len(parameter_names))):
+ index_1, index_2 = parameter_combination
+ if index_1 >= index_2:
+ continue
+ parameter_values = [list(), list()] # both parameters have a value
+ parameter_values_1 = list() # parameter 1 has a value
+ parameter_values_2 = list() # parameter 2 has a value
+ for name in by_name:
+ for measurement in by_name[name]['param']:
+ value_1 = measurement[index_1]
+ value_2 = measurement[index_2]
+ if is_numeric(value_1):
+ parameter_values_1.append(value_1)
+ if is_numeric(value_2):
+ parameter_values_2.append(value_2)
+ if is_numeric(value_1) and is_numeric(value_2):
+ parameter_values[0].append(value_1)
+ parameter_values[1].append(value_2)
+ if len(parameter_values[0]):
+ correlation = np.corrcoef(parameter_values)[0][1]
+ if correlation != np.nan and np.abs(correlation) > 0.5:
+ print('[!] Parameters {} <-> {} are correlated with coefficcient {}'.format(parameter_names[index_1], parameter_names[index_2], correlation))
+ if len(parameter_values_1) < len(parameter_values_2):
+ index_to_remove = index_1
+ else:
+ index_to_remove = index_2
+ print(' Removing parameter {}'.format(parameter_names[index_to_remove]))
+ parameter_indices_to_remove.append(index_to_remove)
+ remove_parameters_by_indices(by_name, parameter_names, parameter_indices_to_remove)
+
+def remove_parameters_by_indices(by_name, parameter_names, parameter_indices_to_remove):
+ """
+ Remove parameters listed in `parameter_indices` from aggregate `by_name` and `parameter_names`.
+
+ :param by_name: measurements partitioned by state/transition/... name and attribute, edited in-place.
+ by_name[name][attribute] must be a list or 1-D numpy array.
+ by_name[stanamete_or_trans]['param'] must be a list of parameter values.
+ Other dict members are left as-is
+ :param parameter_names: List of parameter names in the order they are used in by_name[name]['param'], edited in-place.
+ :param parameter_indices_to_remove: List of parameter indices to be removed
+ """
+
+ # Start removal from the end of the list to avoid renumbering of list elemenets
+ for parameter_index in sorted(parameter_indices_to_remove, reverse = True):
+ for name in by_name:
+ for measurement in by_name[name]['param']:
+ measurement.pop(parameter_index)
+ parameter_names.pop(parameter_index)
+
def compute_param_statistics(by_name, by_param, parameter_names, arg_count, state_or_trans, attribute, verbose = False):
"""
Compute standard deviation and correlation coefficient for various data partitions.
diff --git a/test/test_timingharness.py b/test/test_timingharness.py
index 6479f0a..b5937ad 100755
--- a/test/test_timingharness.py
+++ b/test/test_timingharness.py
@@ -1,6 +1,7 @@
#!/usr/bin/env python3
from dfatool import AnalyticModel, TimingData, pta_trace_to_aggregate
+from utils import prune_dependent_parameters
import unittest
class TestModels(unittest.TestCase):
@@ -30,5 +31,34 @@ class TestModels(unittest.TestCase):
self.assertAlmostEqual(param_info('write', 'duration')['function']._regression_args[2], 1, places=0)
self.assertAlmostEqual(param_info('write', 'duration')['function']._regression_args[3], 1, places=0)
+ def test_dependent_parameter_pruning(self):
+ raw_data = TimingData(['test-data/20190815_103347_nRF24_no-rx.json'])
+ preprocessed_data = raw_data.get_preprocessed_data(verbose = False)
+ by_name, parameters, arg_count = pta_trace_to_aggregate(preprocessed_data)
+ prune_dependent_parameters(by_name, parameters)
+ model = AnalyticModel(by_name, parameters, arg_count, verbose = False)
+ self.assertEqual(model.names, 'getObserveTx setPALevel setRetries setup write'.split(' '))
+ static_model = model.get_static()
+ self.assertAlmostEqual(static_model('getObserveTx', 'duration'), 75, places=0)
+ self.assertAlmostEqual(static_model('setPALevel', 'duration'), 146, places=0)
+ self.assertAlmostEqual(static_model('setRetries', 'duration'), 73, places=0)
+ self.assertAlmostEqual(static_model('setup', 'duration'), 6533, places=0)
+ self.assertAlmostEqual(static_model('write', 'duration'), 12634, places=0)
+
+ for transition in 'getObserveTx setPALevel setRetries setup write'.split(' '):
+ self.assertAlmostEqual(model.stats.param_dependence_ratio(transition, 'duration', 'channel'), 0, places=2)
+
+ param_model, param_info = model.get_fitted()
+ self.assertEqual(param_info('setPALevel', 'duration'), None)
+ self.assertEqual(param_info('setRetries', 'duration'), None)
+ self.assertEqual(param_info('setup', 'duration'), None)
+ self.assertEqual(param_info('write', 'duration')['function']._model_str, '0 + regression_arg(0) + regression_arg(1) * parameter(max_retry_count) + regression_arg(2) * parameter(retry_delay) + regression_arg(3) * parameter(max_retry_count) * parameter(retry_delay)')
+
+ self.assertAlmostEqual(param_info('write', 'duration')['function']._regression_args[0], 1163, places=0)
+ self.assertAlmostEqual(param_info('write', 'duration')['function']._regression_args[1], 464, places=0)
+ self.assertAlmostEqual(param_info('write', 'duration')['function']._regression_args[2], 1, places=0)
+ self.assertAlmostEqual(param_info('write', 'duration')['function']._regression_args[3], 1, places=0)
+
+
if __name__ == '__main__':
unittest.main()