diff options
author | Birte Kristina Friesel <birte.friesel@uos.de> | 2024-06-28 16:38:40 +0200 |
---|---|---|
committer | Birte Kristina Friesel <birte.friesel@uos.de> | 2024-06-28 16:38:40 +0200 |
commit | 7d6d1984b010cd2ddb47f04065c528837b53bfba (patch) | |
tree | eb70595f3e509e9fb1556d1142baf028a5e9542d /lib | |
parent | 31a4e67625730f765943e5b60c401d8027a50a96 (diff) |
analyze-log: add --information-gain
Diffstat (limited to 'lib')
-rw-r--r-- | lib/cli.py | 14 | ||||
-rw-r--r-- | lib/parameters.py | 26 |
2 files changed, 40 insertions, 0 deletions
@@ -92,6 +92,15 @@ def print_info_by_name(model, by_name): ) +def print_information_gain_by_name(model, by_name): + for name in model.names: + for attr in model.attributes(name): + print(f"{name} {attr}:") + mutual_information = model.mutual_information(name, attr) + for i, param in enumerate(model.parameters): + print(f" Parameter {param} : {mutual_information[i]:5.2f}") + + def print_analyticinfo(prefix, info): model_function = info.model_function.removeprefix("0 + ") for i in range(len(info.model_args)): @@ -576,6 +585,11 @@ def add_standard_arguments(parser): help="Show benchmark information (number of measurements, parameter values, ...)", ) parser.add_argument( + "--information-gain", + action="store_true", + help="Show information gain of parameters", + ) + parser.add_argument( "--log-level", metavar="LEVEL", choices=["debug", "info", "warning", "error"], diff --git a/lib/parameters.py b/lib/parameters.py index 4047c10..352e7c7 100644 --- a/lib/parameters.py +++ b/lib/parameters.py @@ -604,6 +604,9 @@ class ModelAttribute: # The best model we have. May be Static, Split, or Param (and later perhaps Substate) self.model_function = None + # Information gain cache. Used for statistical analysis + self.mutual_information_cache = None + self._check_codependent_param() # There must be at least 3 distinct data values (≠ None) if an analytic model @@ -699,6 +702,29 @@ class ModelAttribute: def webconf_function_map(self): return self.model_function.webconf_function_map() + def mutual_information(self): + if self.mutual_information_cache is not None: + return self.mutual_information_cache + + from sklearn.feature_selection import mutual_info_regression + + fit_parameters, _, ignore_index = param_to_ndarray( + self.param_values, with_nan=False, categorical_to_scalar=True + ) + + param_to_fit_param = dict() + j = 0 + for i in range(len(self.param_names)): + if not ignore_index[i]: + param_to_fit_param[i] = j + j += 1 + + self.mutual_information_cache = mutual_info_regression( + fit_parameters, self.data + ) + + return self.mutual_information_cache + @classmethod def from_json(cls, name, attr, data): param_names = data["paramNames"] |