diff options
-rwxr-xr-x | lib/dfatool.py | 16 | ||||
-rw-r--r-- | lib/utils.py | 18 |
2 files changed, 30 insertions, 4 deletions
diff --git a/lib/dfatool.py b/lib/dfatool.py index 7f6a890..616e6fd 100755 --- a/lib/dfatool.py +++ b/lib/dfatool.py @@ -397,6 +397,7 @@ class ParamStats: """ self.stats = dict() self.use_corrcoef = use_corrcoef + self._parameter_names = parameter_names # Note: This is deliberately single-threaded. The overhead incurred # by multiprocessing is higher than the speed gained by parallel # computation of statistics measures. @@ -443,7 +444,20 @@ class ParamStats: raise RuntimeError("wat") # In general, std_param_lut < std_by_param. So, if std_by_param == 0, std_param_lut == 0 follows. # This means that the variation of param does not affect the model quality -> no influence, return 1 - return 1 + return 1. + + safe_div = np.vectorize(lambda x,y: 1. if x == 0 else x/y) + std_by_value = safe_div(statistics['lut_by_param_values'][param], statistics['std_by_param_values'][param]) + + i = 0 + for other_param in self._parameter_names: + if param != other_param and not np.any(np.isnan(std_by_value)) and std_by_value.shape[i] > 1: + dep1 = np.all(std_by_value < 0.5, axis=i) + dep2 = np.all(std_by_value >= 0.5, axis=i) + if np.any(dep1 | dep2 == False): + print('possible correlation {}/{} {} <-> {}'.format(state_or_trans, attribute, param, other_param)) + i += 1 + return statistics['std_param_lut'] / statistics['std_by_param'][param] def param_dependence_ratio(self, state_or_trans, attribute, param): diff --git a/lib/utils.py b/lib/utils.py index 3cd75b0..0910d8a 100644 --- a/lib/utils.py +++ b/lib/utils.py @@ -221,7 +221,11 @@ def compute_param_statistics(by_name, by_param, parameter_names, arg_count, stat 'std_static' : np.std(by_name[state_or_trans][attribute]), 'std_param_lut' : np.mean([np.std(by_param[x][attribute]) for x in by_param.keys() if x[0] == state_or_trans]), 'std_by_param' : {}, + 'std_by_param_values' : {}, + 'lut_by_param_values' : {}, 'std_by_arg' : [], + 'std_by_arg_values' : [], + 'lut_by_arg_values' : [], 'corr_by_param' : {}, 'corr_by_arg' : [], } @@ -229,13 +233,17 @@ def compute_param_statistics(by_name, by_param, parameter_names, arg_count, stat np.seterr('raise') for param_idx, param in enumerate(parameter_names): - std_matrix, mean_std = _std_by_param(by_param, state_or_trans, attribute, param_idx, verbose) + std_matrix, mean_std, lut_matrix = _std_by_param(by_param, state_or_trans, attribute, param_idx, verbose) ret['std_by_param'][param] = mean_std + ret['std_by_param_values'][param] = std_matrix + ret['lut_by_param_values'][param] = lut_matrix ret['corr_by_param'][param] = _corr_by_param(by_name, state_or_trans, attribute, param_idx) if arg_support_enabled and state_or_trans in arg_count: for arg_index in range(arg_count[state_or_trans]): - std_matrix, mean_std = _std_by_param(by_param, state_or_trans, attribute, len(parameter_names) + arg_index, verbose) + std_matrix, mean_std, lut_matrix = _std_by_param(by_param, state_or_trans, attribute, len(parameter_names) + arg_index, verbose) ret['std_by_arg'].append(mean_std) + ret['std_by_arg_values'].append(std_matrix) + ret['lut_by_arg_values'].append(lut_matrix) ret['corr_by_arg'].append(_corr_by_param(by_name, state_or_trans, attribute, len(parameter_names) + arg_index)) return ret @@ -301,12 +309,15 @@ def _std_by_param(by_param, state_or_tran, attribute, param_index, verbose = Fal # have missing parameter combinations), we pre-fill it with NaN and use # np.nanmean to skip those when calculating the mean. stddev_matrix = np.full(info_shape, np.nan) + lut_matrix = np.full(info_shape, np.nan) for param_value in itertools.product(*param_values): param_partition = list() + std_list = list() for k, v in by_param.items(): if k[0] == state_or_tran and (*k[1][:param_index], *k[1][param_index+1:]) == param_value: param_partition.extend(v[attribute]) + std_list.append(np.std(v[attribute])) if len(param_partition) > 1: matrix_index = list(range(len(param_value))) @@ -314,6 +325,7 @@ def _std_by_param(by_param, state_or_tran, attribute, param_index, verbose = Fal matrix_index[i] = param_values[i].index(param_value[i]) matrix_index = tuple(matrix_index) stddev_matrix[matrix_index] = np.std(param_partition) + lut_matrix[matrix_index] = np.mean(std_list) # This can (and will) happen in normal operation, e.g. when a transition's # arguments are combined using 'zip' rather than 'cartesian'. #elif len(param_partition) == 1: @@ -326,7 +338,7 @@ def _std_by_param(by_param, state_or_tran, attribute, param_index, verbose = Fal vprint(verbose, 'stddev_matrix = {}'.format(stddev_matrix)) return stddev_matrix, 0. - return stddev_matrix, np.nanmean(stddev_matrix) #np.mean([np.std(partition) for partition in partitions]) + return stddev_matrix, np.nanmean(stddev_matrix), lut_matrix #np.mean([np.std(partition) for partition in partitions]) def _corr_by_param(by_name, state_or_trans, attribute, param_index): if _all_params_are_numeric(by_name[state_or_trans], param_index): |