summaryrefslogtreecommitdiff
path: root/lib/parameters.py
diff options
context:
space:
mode:
Diffstat (limited to 'lib/parameters.py')
-rw-r--r--lib/parameters.py191
1 files changed, 80 insertions, 111 deletions
diff --git a/lib/parameters.py b/lib/parameters.py
index ea14ad2..99510b2 100644
--- a/lib/parameters.py
+++ b/lib/parameters.py
@@ -3,7 +3,6 @@ import itertools
import logging
import numpy as np
import os
-import warnings
from collections import OrderedDict
from copy import deepcopy
from multiprocessing import Pool
@@ -109,12 +108,10 @@ def _std_by_param(n_by_param, all_param_values, param_index):
# vprint(verbose, '[W] parameter value partition for {} is empty'.format(param_value))
if np.all(np.isnan(stddev_matrix)):
- warnings.warn(
- "parameter #{} has no data partitions. stddev_matrix = {}".format(
- param_index, stddev_matrix
- )
+ logger.warning(
+ f"parameter #{param_index} has no data partitions. All stddev_matrix entries are NaN."
)
- return stddev_matrix, 0.0
+ return stddev_matrix, 0.0, lut_matrix
return (
stddev_matrix,
@@ -159,7 +156,12 @@ def _corr_by_param(attribute_data, param_values, param_index):
def _compute_param_statistics(
- data, param_names, param_tuples, arg_count=None, use_corrcoef=False
+ data,
+ param_names,
+ param_tuples,
+ arg_count=None,
+ use_corrcoef=False,
+ codependent_params=list(),
):
"""
Compute standard deviation and correlation coefficient on parameterized data partitions.
@@ -230,8 +232,18 @@ def _compute_param_statistics(
np.seterr("raise")
for param_idx, param in enumerate(param_names):
+ if param_idx < len(codependent_params) and codependent_params[param_idx]:
+ by_param = partition_by_param(
+ data, param_tuples, ignore_parameters=codependent_params[param_idx]
+ )
+ distinct_values = ret["distinct_values_by_param_index"].copy()
+ for codependent_param_index in codependent_params[param_idx]:
+ distinct_values[codependent_param_index] = [None]
+ else:
+ by_param = ret["by_param"]
+ distinct_values = ret["distinct_values_by_param_index"]
std_matrix, mean_std, lut_matrix = _std_by_param(
- by_param, ret["distinct_values_by_param_index"], param_idx
+ by_param, distinct_values, param_idx
)
ret["std_by_param"][param] = mean_std
ret["std_by_param_values"][param] = std_matrix
@@ -246,17 +258,26 @@ def _compute_param_statistics(
if arg_count:
for arg_index in range(arg_count):
+ param_idx = len(param_names) + arg_index
+ if param_idx < len(codependent_params) and codependent_params[param_idx]:
+ by_param = partition_by_param(
+ data, param_tuples, ignore_parameters=codependent_params[param_idx]
+ )
+ distinct_values = ret["distinct_values_by_param_index"].copy()
+ for codependent_param_index in codependent_params[param_idx]:
+ distinct_values[codependent_param_index] = [None]
+ else:
+ by_param = ret["by_param"]
+ distinct_values = ret["distinct_values_by_param_index"]
std_matrix, mean_std, lut_matrix = _std_by_param(
by_param,
- ret["distinct_values_by_param_index"],
- len(param_names) + arg_index,
+ distinct_values,
+ param_idx,
)
ret["std_by_arg"].append(mean_std)
ret["std_by_arg_values"].append(std_matrix)
ret["lut_by_arg_values"].append(lut_matrix)
- ret["corr_by_arg"].append(
- _corr_by_param(data, param_tuples, len(param_names) + arg_index)
- )
+ ret["corr_by_arg"].append(_corr_by_param(data, param_tuples, param_idx))
if False:
ret["_depends_on_arg"].append(ret["corr_by_arg"][arg_index] > 0.1)
@@ -326,91 +347,6 @@ def _all_params_are_numeric(data, param_idx):
return False
-def prune_dependent_parameters(by_name, parameter_names, correlation_threshold=0.5):
- """
- Remove dependent parameters from aggregate.
-
- :param by_name: measurements partitioned by state/transition/... name and attribute, edited in-place.
- by_name[name][attribute] must be a list or 1-D numpy array.
- by_name[stanamete_or_trans]['param'] must be a list of parameter values.
- Other dict members are left as-is
- :param parameter_names: List of parameter names in the order they are used in by_name[name]['param'], edited in-place.
- :param correlation_threshold: Remove parameter if absolute correlation exceeds this threshold (default: 0.5)
-
- Model generation (and its components, such as relevant parameter detection and least squares optimization) only works if input variables (i.e., parameters)
- are independent of each other. This function computes the correlation coefficient for each pair of parameters and removes those which depend on each other.
- For each pair of dependent parameters, the lexically greater one is removed (e.g. "a" and "b" -> "b" is removed).
- """
-
- parameter_indices_to_remove = list()
- for parameter_combination in itertools.product(
- range(len(parameter_names)), range(len(parameter_names))
- ):
- index_1, index_2 = parameter_combination
- if index_1 >= index_2:
- continue
- parameter_values = [list(), list()] # both parameters have a value
- parameter_values_1 = list() # parameter 1 has a value
- parameter_values_2 = list() # parameter 2 has a value
- for name in by_name:
- for measurement in by_name[name]["param"]:
- value_1 = measurement[index_1]
- value_2 = measurement[index_2]
- if is_numeric(value_1):
- parameter_values_1.append(value_1)
- if is_numeric(value_2):
- parameter_values_2.append(value_2)
- if is_numeric(value_1) and is_numeric(value_2):
- parameter_values[0].append(value_1)
- parameter_values[1].append(value_2)
- if len(parameter_values[0]):
- # Calculating the correlation coefficient only makes sense when neither value is constant
- if np.std(parameter_values_1) != 0 and np.std(parameter_values_2) != 0:
- correlation = np.corrcoef(parameter_values)[0][1]
- if (
- correlation != np.nan
- and np.abs(correlation) > correlation_threshold
- ):
- logger.debug(
- "Parameters {} <-> {} are correlated with coefficcient {}".format(
- parameter_names[index_1],
- parameter_names[index_2],
- correlation,
- )
- )
- if len(parameter_values_1) < len(parameter_values_2):
- index_to_remove = index_1
- else:
- index_to_remove = index_2
- logger.debug(
- " Removing parameter {}".format(
- parameter_names[index_to_remove]
- )
- )
- parameter_indices_to_remove.append(index_to_remove)
- remove_parameters_by_indices(by_name, parameter_names, parameter_indices_to_remove)
-
-
-def remove_parameters_by_indices(by_name, parameter_names, parameter_indices_to_remove):
- """
- Remove parameters listed in `parameter_indices` from aggregate `by_name` and `parameter_names`.
-
- :param by_name: measurements partitioned by state/transition/... name and attribute, edited in-place.
- by_name[name][attribute] must be a list or 1-D numpy array.
- by_name[stanamete_or_trans]['param'] must be a list of parameter values.
- Other dict members are left as-is
- :param parameter_names: List of parameter names in the order they are used in by_name[name]['param'], edited in-place.
- :param parameter_indices_to_remove: List of parameter indices to be removed
- """
-
- # Start removal from the end of the list to avoid renumbering of list elemenets
- for parameter_index in sorted(parameter_indices_to_remove, reverse=True):
- for name in by_name:
- for measurement in by_name[name]["param"]:
- measurement.pop(parameter_index)
- parameter_names.pop(parameter_index)
-
-
class ParallelParamStats:
def __init__(self):
self.queue = list()
@@ -425,6 +361,8 @@ class ParallelParamStats:
attr.param_names,
attr.param_values,
attr.arg_count,
+ False,
+ attr.codependent_params,
],
}
)
@@ -458,6 +396,7 @@ class ParamStats:
attr.param_values,
arg_count=attr.arg_count,
use_corrcoef=use_corrcoef,
+ codependent_params=attr.codependent_params,
)
attr.by_param = res.pop("by_param")
attr.stats = cls(res)
@@ -609,9 +548,10 @@ class ModelAttribute:
map(lambda i: f"arg{i}", range(arg_count))
)
- # Co-dependent parameters. If (paam1_index, param2_index) in codependent_param, they are codependent.
+ # Co-dependent parameters. If (param1_index, param2_index) in codependent_param, they are codependent.
# In this case, only one of them must be used for parameter-dependent model attribute detection and modeling
- self.codependent_param = codependent_param
+ self.codependent_param_pair = codependent_param
+ self.codependent_params = [list() for x in self.log_param_names]
self.ignore_param = dict()
# Static model used as lower bound of model accuracy
@@ -663,7 +603,7 @@ class ModelAttribute:
for (
param1_index,
param2_index,
- ), is_codependent in self.codependent_param.items():
+ ), is_codependent in self.codependent_param_pair.items():
if not is_codependent:
continue
param1_values = map(lambda pv: pv[param1_index], self.param_values)
@@ -672,12 +612,14 @@ class ModelAttribute:
param2_numeric_count = sum(map(is_numeric, param2_values))
if param1_numeric_count >= param2_numeric_count:
self.ignore_param[param2_index] = True
- logger.warning(
+ self.codependent_params[param1_index].append(param2_index)
+ logger.info(
f"{self.name} {self.attr}: parameters ({self.log_param_names[param1_index]}, {self.log_param_names[param2_index]}) are codependent. Ignoring {self.log_param_names[param2_index]}"
)
else:
self.ignore_param[param1_index] = True
- logger.warning(
+ self.codependent_params[param2_index].append(param1_index)
+ logger.info(
f"{self.name} {self.attr}: parameters ({self.log_param_names[param1_index]}, {self.log_param_names[param2_index]}) are codependent. Ignoring {self.log_param_names[param1_index]}"
)
@@ -719,10 +661,22 @@ class ModelAttribute:
# )
child1 = ModelAttribute(
- self.name, self.attr, self.data[tt1], pv1, self.param_names, self.arg_count
+ self.name,
+ self.attr,
+ self.data[tt1],
+ pv1,
+ self.param_names,
+ self.arg_count,
+ codependent_param_dict(pv1),
)
child2 = ModelAttribute(
- self.name, self.attr, self.data[tt2], pv2, self.param_names, self.arg_count
+ self.name,
+ self.attr,
+ self.data[tt2],
+ pv2,
+ self.param_names,
+ self.arg_count,
+ codependent_param_dict(pv2),
)
ParamStats.compute_for_attr(child1)
@@ -814,10 +768,20 @@ class ModelAttribute:
child_ret = child.get_data_for_paramfit(
safe_functions_enabled=safe_functions_enabled
)
- for key, param, val in child_ret:
- ret.append((key[:2] + (param_value,) + key[2:], param, val))
+ for key, param, args, kwargs in child_ret:
+ ret.append((key[:2] + (param_value,) + key[2:], param, args, kwargs))
return ret
+ def _by_param_for_index(self, param_index):
+ if not self.codependent_params[param_index]:
+ return self.by_param
+ new_param_values = list()
+ for param_tuple in self.param_values:
+ for i in self.codependent_params[param_index]:
+ param_tuple[i] = None
+ new_param_values.append(param_tuple)
+ return partition_by_param(self.data, new_param_values)
+
def get_data_for_paramfit_this(self, safe_functions_enabled=False):
ret = list()
for param_index, param_name in enumerate(self.param_names):
@@ -825,28 +789,33 @@ class ModelAttribute:
self.stats.depends_on_param(param_name)
and not param_index in self.ignore_param
):
+ by_param = self._by_param_for_index(param_index)
ret.append(
(
(self.name, self.attr),
param_name,
- (self.by_param, param_index, safe_functions_enabled),
+ (by_param, param_index, safe_functions_enabled),
+ dict(),
)
)
if self.arg_count:
for arg_index in range(self.arg_count):
+ param_index = len(self.param_names) + arg_index
if (
self.stats.depends_on_arg(arg_index)
- and not arg_index + len(self.param_names) in self.ignore_param
+ and not param_index in self.ignore_param
):
+ by_param = self._by_param_for_index(param_index)
ret.append(
(
(self.name, self.attr),
arg_index,
(
- self.by_param,
- len(self.param_names) + arg_index,
+ by_param,
+ param_index,
safe_functions_enabled,
),
+ dict(),
)
)