Do not pass the entirety of by_nane/by_param to subprocesses

This should speed up analysis quite a bit and also reduce memory usage significantly
author: Daniel Friesel <daniel.friesel@uos.de> 2020-11-02 16:04:12 +0100
committer: Daniel Friesel <daniel.friesel@uos.de> 2020-11-02 16:04:12 +0100
commit: ea627ab6d9b47c53e6b1e34837e928c9d599db51 (patch)
tree: 40f706c314cbe62b7858848915a43bbe200342ef /lib/model.py
parent: 69b4f98dc1de99971b3f5fb56fb07c04fa229e9c (diff)
1 files changed, 27 insertions, 41 deletions
diff --git a/lib/model.py b/lib/model.py
index 192cea3..1190fb0 100644
--- a/lib/model.py
+++ b/lib/model.py
@@ -114,17 +114,17 @@ class ParallelParamFit:
 
         This causes fit() to compute the best-fitting function for this model part.
         """
+        # Transform by_param[(state_or_tran, param_value)][attribute] = ...
+        # into n_by_param[param_value] = ...
+        # (param_value is dynamic, the rest is fixed)
+        n_by_param = dict()
+        for k, v in self.by_param.items():
+            if k[0] == state_or_tran:
+                n_by_param[k[1]] = v[attribute]
         self.fit_queue.append(
             {
                 "key": [state_or_tran, attribute, param_name, param_filter],
-                "args": [
-                    self.by_param,
-                    state_or_tran,
-                    attribute,
-                    param_index,
-                    safe_functions_enabled,
-                    param_filter,
-                ],
+                "args": [n_by_param, param_index, safe_functions_enabled, param_filter],
             }
         )
 
@@ -201,20 +201,15 @@ def _try_fits_parallel(arg):
 
 
 def _try_fits(
-    by_param,
-    state_or_tran,
-    model_attribute,
-    param_index,
-    safe_functions_enabled=False,
-    param_filter: dict = None,
+    n_by_param, param_index, safe_functions_enabled=False, param_filter: dict = None
 ):
     """
-    Determine goodness-of-fit for prediction of `by_param[(state_or_tran, *)][model_attribute]` dependence on `param_index` using various functions.
+    Determine goodness-of-fit for prediction of `n_by_param[(param1_value, param2_value, ...)]` dependence on `param_index` using various functions.
 
     This is done by varying `param_index` while keeping all other parameters constant and doing one least squares optimization for each function and for each combination of the remaining parameters.
     The value of the parameter corresponding to `param_index` (e.g. txpower or packet length) is the sole input to the model function.
     Only numeric parameter values (as determined by `utils.is_numeric`) are used for fitting, non-numeric values such as None or enum strings are ignored.
-    Fitting is only performed if at least three distinct parameter values exist in `by_param[(state_or_tran, *)]`.
+    Fitting is only performed if at least three distinct parameter values exist in `by_param[*]`.
 
     :returns:  a dictionary with the following elements:
         best -- name of the best-fitting function (see `analytic.functions`). `None` in case of insufficient data.
@@ -223,14 +218,8 @@ def _try_fits(
         median_rmsd -- mean Root Mean Square Deviation of a reference model using the median of its respective input data as model value
         results -- mean goodness-of-fit measures for the individual functions. See `analytic.functions` for keys and `aggregate_measures` for values
 
-    :param by_param: measurements partitioned by state/transition/... name and parameter values.
-    Example: `{('foo', (0, 2)): {'bar': [2]}, ('foo', (0, 4)): {'bar': [4]}, ('foo', (0, 6)): {'bar': [6]}}`
-
-    :param state_or_tran: state/transition/... name for which goodness-of-fit will be calculated (first element of by_param key tuple).
-    Example: `'foo'`
-
-    :param model_attribute: attribute for which goodness-of-fit will be calculated.
-    Example: `'bar'`
+    :param n_by_param: measurements of a specific model attribute partitioned by parameter values.
+        Example: `{(0, 2): [2], (0, 4): [4], (0, 6): [6]}`
 
     :param param_index: index of the parameter used as model input
     :param safe_functions_enabled: Include "safe" variants of functions with limited argument range.
@@ -239,15 +228,15 @@ def _try_fits(
 
     functions = analytic.functions(safe_functions_enabled=safe_functions_enabled)
 
-    for param_key in filter(lambda x: x[0] == state_or_tran, by_param.keys()):
+    for param_key in n_by_param.keys():
         # We might remove elements from 'functions' while iterating over
         # its keys. A generator will not allow this, so we need to
         # convert to a list.
         function_names = list(functions.keys())
         for function_name in function_names:
             function_object = functions[function_name]
-            if is_numeric(param_key[1][param_index]) and not function_object.is_valid(
-                param_key[1][param_index]
+            if is_numeric(param_key[param_index]) and not function_object.is_valid(
+                param_key[param_index]
             ):
                 functions.pop(function_name, None)
 
@@ -261,12 +250,11 @@ def _try_fits(
 
     # for each parameter combination:
     for param_key in filter(
-        lambda x: x[0] == state_or_tran
-        and remove_index_from_tuple(x[1], param_index)
+        lambda x: remove_index_from_tuple(x, param_index)
         not in seen_parameter_combinations
-        and len(by_param[x]["param"])
-        and match_parameter_values(by_param[x]["param"][0], param_filter),
-        by_param.keys(),
+        and len(n_by_param[x])
+        and match_parameter_values(n_by_param[x][0], param_filter),
+        n_by_param.keys(),
     ):
         X = []
         Y = []
@@ -275,24 +263,22 @@ def _try_fits(
 
         # Ensure that each parameter combination is only optimized once. Otherwise, with parameters (1, 2, 5), (1, 3, 5), (1, 4, 5) and param_index == 1,
         # the parameter combination (1, *, 5) would be optimized three times, both wasting time and biasing results towards more frequently occuring combinations of non-param_index parameters
-        seen_parameter_combinations.add(
-            remove_index_from_tuple(param_key[1], param_index)
-        )
+        seen_parameter_combinations.add(remove_index_from_tuple(param_key, param_index))
 
         # for each value of the parameter denoted by param_index (all other parameters remain the same):
         for k, v in filter(
-            lambda kv: param_slice_eq(kv[0], param_key, param_index), by_param.items()
+            lambda kv: param_slice_eq(kv[0], param_key, param_index), n_by_param.items()
         ):
             num_total += 1
-            if is_numeric(k[1][param_index]):
+            if is_numeric(k[param_index]):
                 num_valid += 1
-                X.extend([float(k[1][param_index])] * len(v[model_attribute]))
-                Y.extend(v[model_attribute])
+                X.extend([float(k[param_index])] * len(v))
+                Y.extend(v)
 
         if num_valid > 2:
             X = np.array(X)
             Y = np.array(Y)
-            other_parameters = remove_index_from_tuple(k[1], param_index)
+            other_parameters = remove_index_from_tuple(k, param_index)
             raw_results_by_param[other_parameters] = dict()
             results_by_param[other_parameters] = dict()
             for function_name, param_function in functions.items():
@@ -318,7 +304,7 @@ def _try_fits(
 
     if not len(ref_results["mean"]):
         # Insufficient data for fitting
-        # print('[W] Insufficient data for fitting {}/{}/{}'.format(state_or_tran, model_attribute, param_index))
+        # print('[W] Insufficient data for fitting {}'.format(param_index))
         return {"best": None, "best_rmsd": np.inf, "results": results}
 
     for (
author	Daniel Friesel <daniel.friesel@uos.de>	2020-11-02 16:04:12 +0100
committer	Daniel Friesel <daniel.friesel@uos.de>	2020-11-02 16:04:12 +0100
commit	ea627ab6d9b47c53e6b1e34837e928c9d599db51 (patch)
tree	40f706c314cbe62b7858848915a43bbe200342ef /lib/model.py
parent	69b4f98dc1de99971b3f5fb56fb07c04fa229e9c (diff)