diff options
author | Daniel Friesel <daniel.friesel@uos.de> | 2019-10-01 14:37:12 +0200 |
---|---|---|
committer | Daniel Friesel <daniel.friesel@uos.de> | 2019-10-01 14:37:12 +0200 |
commit | d720c861939bdd27388971a51ed59a0eea3cf594 (patch) | |
tree | 4da1a6c8d812089962299ade5ac08996563756bb /lib/dfatool.py | |
parent | a566aab2c15ea1a6760f4261810cace1741e168f (diff) |
initial check for boolean parameters' influence on other parameters
Diffstat (limited to 'lib/dfatool.py')
-rwxr-xr-x | lib/dfatool.py | 16 |
1 files changed, 15 insertions, 1 deletions
diff --git a/lib/dfatool.py b/lib/dfatool.py index 7f6a890..616e6fd 100755 --- a/lib/dfatool.py +++ b/lib/dfatool.py @@ -397,6 +397,7 @@ class ParamStats: """ self.stats = dict() self.use_corrcoef = use_corrcoef + self._parameter_names = parameter_names # Note: This is deliberately single-threaded. The overhead incurred # by multiprocessing is higher than the speed gained by parallel # computation of statistics measures. @@ -443,7 +444,20 @@ class ParamStats: raise RuntimeError("wat") # In general, std_param_lut < std_by_param. So, if std_by_param == 0, std_param_lut == 0 follows. # This means that the variation of param does not affect the model quality -> no influence, return 1 - return 1 + return 1. + + safe_div = np.vectorize(lambda x,y: 1. if x == 0 else x/y) + std_by_value = safe_div(statistics['lut_by_param_values'][param], statistics['std_by_param_values'][param]) + + i = 0 + for other_param in self._parameter_names: + if param != other_param and not np.any(np.isnan(std_by_value)) and std_by_value.shape[i] > 1: + dep1 = np.all(std_by_value < 0.5, axis=i) + dep2 = np.all(std_by_value >= 0.5, axis=i) + if np.any(dep1 | dep2 == False): + print('possible correlation {}/{} {} <-> {}'.format(state_or_trans, attribute, param, other_param)) + i += 1 + return statistics['std_param_lut'] / statistics['std_by_param'][param] def param_dependence_ratio(self, state_or_trans, attribute, param): |