diff options
author | Daniel Friesel <daniel.friesel@uos.de> | 2021-03-25 15:50:25 +0100 |
---|---|---|
committer | Daniel Friesel <daniel.friesel@uos.de> | 2021-03-25 15:50:25 +0100 |
commit | ec6ba6a3ce9ffb142644bdddbcc42def853bc68a (patch) | |
tree | 668811625d6a6bb66b554602d0aadd7d0eac3809 | |
parent | 1eb4e7f47a6aa444fc24f17f2c06c2d419705df2 (diff) |
ParamStats: Remove unused std matrix / lut matrix
This speeds up analysis and adds support for models with more than 32
parameters.
-rw-r--r-- | lib/parameters.py | 85 |
1 files changed, 15 insertions, 70 deletions
diff --git a/lib/parameters.py b/lib/parameters.py index 99510b2..a8a8451 100644 --- a/lib/parameters.py +++ b/lib/parameters.py @@ -48,9 +48,9 @@ def _depends_on_param(corr_param, std_param, std_lut): return std_lut / std_param < 0.5 -def _std_by_param(n_by_param, all_param_values, param_index): +def _mean_std_by_param(n_by_param, all_param_values, param_index): """ - Calculate standard deviations for a static model where all parameters but `param_index` are constant. + Calculate the mean standard deviation for a static model where all parameters but `param_index` are constant. :param n_by_param: measurements of a specific model attribute partitioned by parameter values. Example: `{(0, 2): [2], (0, 4): [4], (0, 6): [6]}` @@ -58,32 +58,14 @@ def _std_by_param(n_by_param, all_param_values, param_index): E.g. for two parameters, the first being None, FOO, or BAR, and the second being 1, 2, 3, or 4, the argument is `[[None, 'FOO', 'BAR'], [1, 2, 3, 4]]`. :param param_index: index of variable parameter - :returns: (stddev matrix, mean stddev, LUT matrix) - *stddev matrix* is an ((number of parameters)-1)-dimensional matrix giving the standard deviation of each individual parameter variation partition. - E.g. for param_index == 2 and 4 parameters, stddev matrix[a, b, d] is the stddev of - measurements with param0 == all_param_values[0][a], - param1 == all_param_values[1][b], param2 variable, and - param3 == all_param_values[3][d]. + :returns: mean stddev *mean stddev* is the mean standard deviation of all measurements where parameter `param_index` is dynamic and all other parameters are fixed. E.g., if parameters are a, b, c ∈ {1,2,3} and 'index' corresponds to b, then this function returns the mean of the standard deviations of (a=1, b=*, c=1), (a=1, b=*, c=2), and so on. - *LUT matrix* is an ((number of parameters)-1)-dimensional matrix giving the mean standard deviation of individual partitions with entirely constant parameters. - E.g. for param_index == 2 and 4 parameters, LUT matrix[a][b][d] is the mean of - stddev(param0 -> a, param1 -> b, param2 -> first distinct value, param3 -> d), - stddev(param0 -> a, param1 -> b, param2 -> second distinct value, param3 -> d), - and so on. """ param_values = list(remove_index_from_tuple(all_param_values, param_index)) - info_shape = tuple(map(len, param_values)) - - # We will calculate the mean over the entire matrix later on. As we cannot - # guarantee that each entry will be filled in this loop (e.g. transitions - # whose arguments are combined using 'zip' rather than 'cartesian' always - # have missing parameter combinations), we pre-fill it with NaN and use - # np.nanmean to skip those when calculating the mean. - stddev_matrix = np.full(info_shape, np.nan) - lut_matrix = np.full(info_shape, np.nan) + partitions = list() for param_value in itertools.product(*param_values): param_partition = list() @@ -91,33 +73,14 @@ def _std_by_param(n_by_param, all_param_values, param_index): for k, v in n_by_param.items(): if (*k[:param_index], *k[param_index + 1 :]) == param_value: param_partition.extend(v) - std_list.append(np.std(v)) if len(param_partition) > 1: - matrix_index = list(range(len(param_value))) - for i in range(len(param_value)): - matrix_index[i] = param_values[i].index(param_value[i]) - matrix_index = tuple(matrix_index) - stddev_matrix[matrix_index] = np.std(param_partition) - lut_matrix[matrix_index] = np.mean(std_list) - # This can (and will) happen in normal operation, e.g. when a transition's - # arguments are combined using 'zip' rather than 'cartesian'. - # elif len(param_partition) == 1: - # vprint(verbose, '[W] parameter value partition for {} contains only one element -- skipping'.format(param_value)) - # else: - # vprint(verbose, '[W] parameter value partition for {} is empty'.format(param_value)) - - if np.all(np.isnan(stddev_matrix)): - logger.warning( - f"parameter #{param_index} has no data partitions. All stddev_matrix entries are NaN." - ) - return stddev_matrix, 0.0, lut_matrix + partitions.append(param_partition) + + if len(partitions) == 0: + return 0.0 - return ( - stddev_matrix, - np.nanmean(stddev_matrix), - lut_matrix, - ) # np.mean([np.std(partition) for partition in partitions]) + return np.mean([np.std(partition) for partition in partitions]) def _corr_by_param(attribute_data, param_values, param_index): @@ -216,12 +179,8 @@ def _compute_param_statistics( ret["std_param_lut"] = np.mean([np.std(v) for v in by_param.values()]) ret["std_by_param"] = dict() - ret["std_by_param_values"] = dict() - ret["lut_by_param_values"] = dict() ret["std_by_arg"] = list() - ret["std_by_arg_values"] = list() - ret["lut_by_arg_values"] = list() ret["corr_by_param"] = dict() ret["corr_by_arg"] = list() @@ -242,12 +201,8 @@ def _compute_param_statistics( else: by_param = ret["by_param"] distinct_values = ret["distinct_values_by_param_index"] - std_matrix, mean_std, lut_matrix = _std_by_param( - by_param, distinct_values, param_idx - ) + mean_std = _mean_std_by_param(by_param, distinct_values, param_idx) ret["std_by_param"][param] = mean_std - ret["std_by_param_values"][param] = std_matrix - ret["lut_by_param_values"][param] = lut_matrix ret["corr_by_param"][param] = _corr_by_param(data, param_tuples, param_idx) ret["_depends_on_param"][param] = _depends_on_param( @@ -269,14 +224,8 @@ def _compute_param_statistics( else: by_param = ret["by_param"] distinct_values = ret["distinct_values_by_param_index"] - std_matrix, mean_std, lut_matrix = _std_by_param( - by_param, - distinct_values, - param_idx, - ) + mean_std = _mean_std_by_param(by_param, distinct_values, param_idx) ret["std_by_arg"].append(mean_std) - ret["std_by_arg_values"].append(std_matrix) - ret["lut_by_arg_values"].append(lut_matrix) ret["corr_by_arg"].append(_corr_by_param(data, param_tuples, param_idx)) if False: @@ -601,9 +550,9 @@ class ModelAttribute: def _check_codependent_param(self): for ( - param1_index, - param2_index, - ), is_codependent in self.codependent_param_pair.items(): + (param1_index, param2_index), + is_codependent, + ) in self.codependent_param_pair.items(): if not is_codependent: continue param1_values = map(lambda pv: pv[param1_index], self.param_values) @@ -810,11 +759,7 @@ class ModelAttribute: ( (self.name, self.attr), arg_index, - ( - by_param, - param_index, - safe_functions_enabled, - ), + (by_param, param_index, safe_functions_enabled), dict(), ) ) |