summaryrefslogtreecommitdiff
path: root/lib/parameters.py
diff options
context:
space:
mode:
authorBirte Kristina Friesel <birte.friesel@uos.de>2024-06-28 16:38:40 +0200
committerBirte Kristina Friesel <birte.friesel@uos.de>2024-06-28 16:38:40 +0200
commit7d6d1984b010cd2ddb47f04065c528837b53bfba (patch)
treeeb70595f3e509e9fb1556d1142baf028a5e9542d /lib/parameters.py
parent31a4e67625730f765943e5b60c401d8027a50a96 (diff)
analyze-log: add --information-gain
Diffstat (limited to 'lib/parameters.py')
-rw-r--r--lib/parameters.py26
1 files changed, 26 insertions, 0 deletions
diff --git a/lib/parameters.py b/lib/parameters.py
index 4047c10..352e7c7 100644
--- a/lib/parameters.py
+++ b/lib/parameters.py
@@ -604,6 +604,9 @@ class ModelAttribute:
# The best model we have. May be Static, Split, or Param (and later perhaps Substate)
self.model_function = None
+ # Information gain cache. Used for statistical analysis
+ self.mutual_information_cache = None
+
self._check_codependent_param()
# There must be at least 3 distinct data values (≠ None) if an analytic model
@@ -699,6 +702,29 @@ class ModelAttribute:
def webconf_function_map(self):
return self.model_function.webconf_function_map()
+ def mutual_information(self):
+ if self.mutual_information_cache is not None:
+ return self.mutual_information_cache
+
+ from sklearn.feature_selection import mutual_info_regression
+
+ fit_parameters, _, ignore_index = param_to_ndarray(
+ self.param_values, with_nan=False, categorical_to_scalar=True
+ )
+
+ param_to_fit_param = dict()
+ j = 0
+ for i in range(len(self.param_names)):
+ if not ignore_index[i]:
+ param_to_fit_param[i] = j
+ j += 1
+
+ self.mutual_information_cache = mutual_info_regression(
+ fit_parameters, self.data
+ )
+
+ return self.mutual_information_cache
+
@classmethod
def from_json(cls, name, attr, data):
param_names = data["paramNames"]