diff options
author | Birte Kristina Friesel <birte.friesel@uos.de> | 2024-06-28 16:38:40 +0200 |
---|---|---|
committer | Birte Kristina Friesel <birte.friesel@uos.de> | 2024-06-28 16:38:40 +0200 |
commit | 7d6d1984b010cd2ddb47f04065c528837b53bfba (patch) | |
tree | eb70595f3e509e9fb1556d1142baf028a5e9542d /lib/parameters.py | |
parent | 31a4e67625730f765943e5b60c401d8027a50a96 (diff) |
analyze-log: add --information-gain
Diffstat (limited to 'lib/parameters.py')
-rw-r--r-- | lib/parameters.py | 26 |
1 files changed, 26 insertions, 0 deletions
diff --git a/lib/parameters.py b/lib/parameters.py index 4047c10..352e7c7 100644 --- a/lib/parameters.py +++ b/lib/parameters.py @@ -604,6 +604,9 @@ class ModelAttribute: # The best model we have. May be Static, Split, or Param (and later perhaps Substate) self.model_function = None + # Information gain cache. Used for statistical analysis + self.mutual_information_cache = None + self._check_codependent_param() # There must be at least 3 distinct data values (≠ None) if an analytic model @@ -699,6 +702,29 @@ class ModelAttribute: def webconf_function_map(self): return self.model_function.webconf_function_map() + def mutual_information(self): + if self.mutual_information_cache is not None: + return self.mutual_information_cache + + from sklearn.feature_selection import mutual_info_regression + + fit_parameters, _, ignore_index = param_to_ndarray( + self.param_values, with_nan=False, categorical_to_scalar=True + ) + + param_to_fit_param = dict() + j = 0 + for i in range(len(self.param_names)): + if not ignore_index[i]: + param_to_fit_param[i] = j + j += 1 + + self.mutual_information_cache = mutual_info_regression( + fit_parameters, self.data + ) + + return self.mutual_information_cache + @classmethod def from_json(cls, name, attr, data): param_names = data["paramNames"] |