initial check for boolean parameters' influence on other parameters

author: Daniel Friesel <daniel.friesel@uos.de> 2019-10-01 14:37:12 +0200
committer: Daniel Friesel <daniel.friesel@uos.de> 2019-10-01 14:37:12 +0200
commit: d720c861939bdd27388971a51ed59a0eea3cf594 (patch)
tree: 4da1a6c8d812089962299ade5ac08996563756bb /lib/dfatool.py
parent: a566aab2c15ea1a6760f4261810cace1741e168f (diff)
1 files changed, 15 insertions, 1 deletions
diff --git a/lib/dfatool.py b/lib/dfatool.py
index 7f6a890..616e6fd 100755
--- a/lib/dfatool.py
+++ b/lib/dfatool.py
@@ -397,6 +397,7 @@ class ParamStats:
         """
         self.stats = dict()
         self.use_corrcoef = use_corrcoef
+        self._parameter_names = parameter_names
         # Note: This is deliberately single-threaded. The overhead incurred
         # by multiprocessing is higher than the speed gained by parallel
         # computation of statistics measures.
@@ -443,7 +444,20 @@ class ParamStats:
                 raise RuntimeError("wat")
             # In general, std_param_lut < std_by_param. So, if std_by_param == 0, std_param_lut == 0 follows.
             # This means that the variation of param does not affect the model quality -> no influence, return 1
-            return 1
+            return 1.
+
+        safe_div = np.vectorize(lambda x,y: 1. if x == 0 else x/y)
+        std_by_value = safe_div(statistics['lut_by_param_values'][param], statistics['std_by_param_values'][param])
+
+        i = 0
+        for other_param in self._parameter_names:
+            if param != other_param and not np.any(np.isnan(std_by_value)) and std_by_value.shape[i] > 1:
+                dep1 = np.all(std_by_value < 0.5, axis=i)
+                dep2 = np.all(std_by_value >= 0.5, axis=i)
+                if np.any(dep1 | dep2 == False):
+                    print('possible correlation {}/{}  {} <-> {}'.format(state_or_trans, attribute, param, other_param))
+                i += 1
+
         return statistics['std_param_lut'] / statistics['std_by_param'][param]
 
     def param_dependence_ratio(self, state_or_trans, attribute, param):
author	Daniel Friesel <daniel.friesel@uos.de>	2019-10-01 14:37:12 +0200
committer	Daniel Friesel <daniel.friesel@uos.de>	2019-10-01 14:37:12 +0200
commit	d720c861939bdd27388971a51ed59a0eea3cf594 (patch)
tree	4da1a6c8d812089962299ade5ac08996563756bb /lib/dfatool.py
parent	a566aab2c15ea1a6760f4261810cace1741e168f (diff)