move parameter detection / statistics methods to utils (for now)

author: Daniel Friesel <derf@finalrewind.org> 2019-02-01 08:27:19 +0100
committer: Daniel Friesel <derf@finalrewind.org> 2019-02-01 08:27:19 +0100
commit: 8ebb9fc80fb9becf9951fdc8502eefa3eb3868c5 (patch)
tree: 5081c1602889ee6a3516222f51552441832efd80 /lib
parent: e488f05cb5b57cbfb33a92198bd3b269300558bd (diff)
3 files changed, 103 insertions, 93 deletions
diff --git a/lib/dfatool.py b/lib/dfatool.py
index 535fb30..5c55993 100755
--- a/lib/dfatool.py
+++ b/lib/dfatool.py
@@ -15,7 +15,7 @@ from multiprocessing import Pool
 from automata import PTA
 from functions import analytic
 from functions import AnalyticFunction
-from utils import is_numeric
+from utils import *
 
 arg_support_enabled = True
 
@@ -528,25 +528,6 @@ class RawData:
             'num_valid' : num_valid
         }
 
-def _param_slice_eq(a, b, index):
-    """
-    Check if by_param keys a and b are identical, ignoring the parameter at index.
-
-    parameters:
-    a, b -- (state/transition name, [parameter0 value, parameter1 value, ...])
-    index -- parameter index to ignore (0 -> parameter0, 1 -> parameter1, etc.)
-
-    Returns True iff a and b have the same state/transition name, and all
-    parameters at positions != index are identical.
-
-    example:
-    ('foo', [1, 4]), ('foo', [2, 4]), 0 -> True
-    ('foo', [1, 4]), ('foo', [2, 4]), 1 -> False
-    """
-    if (*a[1][:index], *a[1][index+1:]) == (*b[1][:index], *b[1][index+1:]) and a[0] == b[0]:
-        return True
-    return False
-
 
 def _try_fits_parallel(arg):
     return {
@@ -582,7 +563,7 @@ def _try_fits(by_param, state_or_tran, model_attribute, param_index, safe_functi
         num_valid = 0
         num_total = 0
         for k, v in by_param.items():
-            if _param_slice_eq(k, param_key, param_index):
+            if param_slice_eq(k, param_key, param_index):
                 num_total += 1
                 if is_numeric(k[1][param_index]):
                     num_valid += 1
@@ -627,75 +608,6 @@ def _try_fits(by_param, state_or_tran, model_attribute, param_index, safe_functi
         'results' : results
     }
 
-def _all_params_are_numeric(data, param_idx):
-    param_values = list(map(lambda x: x[param_idx], data['param']))
-    if len(list(filter(is_numeric, param_values))) == len(param_values):
-        return True
-    return False
-
-def _compute_param_statistics(by_name, by_param, parameter_names, num_args, state_or_trans, key):
-    ret = {
-        'std_static' : np.std(by_name[state_or_trans][key]),
-        'std_param_lut' : np.mean([np.std(by_param[x][key]) for x in by_param.keys() if x[0] == state_or_trans]),
-        'std_by_param' : {},
-        'std_by_arg' : [],
-        'corr_by_param' : {},
-        'corr_by_arg' : [],
-    }
-
-    for param_idx, param in enumerate(parameter_names):
-        ret['std_by_param'][param] = _mean_std_by_param(by_param, state_or_trans, key, param_idx)
-        ret['corr_by_param'][param] = _corr_by_param(by_name, state_or_trans, key, param_idx)
-    if arg_support_enabled and state_or_trans in num_args:
-        for arg_index in range(num_args[state_or_trans]):
-            ret['std_by_arg'].append(_mean_std_by_param(by_param, state_or_trans, key, len(parameter_names) + arg_index))
-            ret['corr_by_arg'].append(_corr_by_param(by_name, state_or_trans, key, len(parameter_names) + arg_index))
-
-    return ret
-
-def _mean_std_by_param(by_param, state_or_tran, key, param_index):
-    u"""
-    Calculate the mean standard deviation for a static model where all parameters but param_index are constant.
-
-    arguments:
-    by_param -- measurements sorted by key/transition name and parameter values
-    state_or_tran -- state or transition name (-> by_param[(state_or_tran, *)])
-    key -- model attribute, e.g. 'power' or 'duration'
-           (-> by_param[(state_or_tran, *)][key])
-    param_index -- index of variable parameter
-
-    Returns the mean standard deviation of all measurements of 'key'
-    (e.g. power consumption or timeout) for state/transition 'state_or_tran' where
-    parameter 'param_index' is dynamic and all other parameters are fixed.
-    I.e., if parameters are a, b, c ∈ {1,2,3} and 'index' corresponds to b, then
-    this function returns the mean of the standard deviations of (a=1, b=*, c=1),
-    (a=1, b=*, c=2), and so on.
-    """
-    partitions = []
-    for param_value in filter(lambda x: x[0] == state_or_tran, by_param.keys()):
-        param_partition = []
-        for k, v in by_param.items():
-            if _param_slice_eq(k, param_value, param_index):
-                param_partition.extend(v[key])
-        if len(param_partition):
-            partitions.append(param_partition)
-        else:
-            print('[W] parameter value partition for {} is empty'.format(param_value))
-    return np.mean([np.std(partition) for partition in partitions])
-
-def _corr_by_param(by_name, state_or_trans, key, param_index):
-    if _all_params_are_numeric(by_name[state_or_trans], param_index):
-        param_values = np.array(list((map(lambda x: x[param_index], by_name[state_or_trans]['param']))))
-        try:
-            return np.corrcoef(by_name[state_or_trans][key], param_values)[0, 1]
-        except FloatingPointError as fpe:
-            # Typically happens when all parameter values are identical.
-            # Building a correlation coefficient is pointless in this case
-            # -> assume no correlation
-            return 0.
-    else:
-        return 0.
-
 class EnergyModel:
     u"""
     parameter-aware PTA-based energy model.
@@ -877,7 +789,7 @@ class EnergyModel:
             self.stats[state_or_trans] = {}
             for key in self.by_name[state_or_trans]['attributes']:
                 if key in self.by_name[state_or_trans]:
-                    self.stats[state_or_trans][key] = _compute_param_statistics(self.by_name, self.by_param, self._parameter_names, self._num_args, state_or_trans, key)
+                    self.stats[state_or_trans][key] = compute_param_statistics(self.by_name, self.by_param, self._parameter_names, self._num_args, state_or_trans, key)
 
     @classmethod
     def from_model(self, model_data, parameter_names):
diff --git a/lib/functions.py b/lib/functions.py
index 22d7e46..c58db4f 100644
--- a/lib/functions.py
+++ b/lib/functions.py
@@ -162,7 +162,10 @@ class AnalyticFunction:
             which must be a list or 1-D NumPy array containing the ground truth.
             The parameter values in (state_or_tran, *) must be numeric for
             all parameters this function depends on -- otherwise, the
-            corresponding data will be left out.
+            corresponding data will be left out. Parameter values must be
+            ordered according to the order of parameter names used in
+            the ParamFunction constructor. Argument values (if any) always come after
+            parameters, in the order of their index in the function signature.
         state_or_tran -- state or transition name, e.g. "TX" or "send"
         model_attribute -- model attribute name, e.g. "power" or "duration"
 
@@ -214,7 +217,10 @@ class AnalyticFunction:
         model_attribute -- model attribute name, e.g. "power" or "duration"
 
         The ground truth is read from by_param[(state_or_tran, *)][model_attribute],
-        which must be a list or 1-D NumPy array.
+        which must be a list or 1-D NumPy array. Parameter values must be
+        ordered according to the parameter names in the constructor. If
+        argument values are present, they must come after parameter values
+        in the order of their appearance in the function signature.
         """
         X, Y, num_valid, num_total = self.get_fit_data(by_param, state_or_tran, model_attribute)
         if num_valid > 2:
diff --git a/lib/utils.py b/lib/utils.py
index 405d148..b496a7b 100644
--- a/lib/utils.py
+++ b/lib/utils.py
@@ -1,3 +1,7 @@
+import numpy as np
+
+arg_support_enabled = True
+
 def is_numeric(n):
     if n == None:
         return False
@@ -6,3 +10,91 @@ def is_numeric(n):
         return True
     except ValueError:
         return False
+
+def param_slice_eq(a, b, index):
+    """
+    Check if by_param keys a and b are identical, ignoring the parameter at index.
+
+    parameters:
+    a, b -- (state/transition name, [parameter0 value, parameter1 value, ...])
+    index -- parameter index to ignore (0 -> parameter0, 1 -> parameter1, etc.)
+
+    Returns True iff a and b have the same state/transition name, and all
+    parameters at positions != index are identical.
+
+    example:
+    ('foo', [1, 4]), ('foo', [2, 4]), 0 -> True
+    ('foo', [1, 4]), ('foo', [2, 4]), 1 -> False
+    """
+    if (*a[1][:index], *a[1][index+1:]) == (*b[1][:index], *b[1][index+1:]) and a[0] == b[0]:
+        return True
+    return False
+
+def compute_param_statistics(by_name, by_param, parameter_names, num_args, state_or_trans, key):
+    ret = {
+        'std_static' : np.std(by_name[state_or_trans][key]),
+        'std_param_lut' : np.mean([np.std(by_param[x][key]) for x in by_param.keys() if x[0] == state_or_trans]),
+        'std_by_param' : {},
+        'std_by_arg' : [],
+        'corr_by_param' : {},
+        'corr_by_arg' : [],
+    }
+
+    for param_idx, param in enumerate(parameter_names):
+        ret['std_by_param'][param] = _mean_std_by_param(by_param, state_or_trans, key, param_idx)
+        ret['corr_by_param'][param] = _corr_by_param(by_name, state_or_trans, key, param_idx)
+    if arg_support_enabled and state_or_trans in num_args:
+        for arg_index in range(num_args[state_or_trans]):
+            ret['std_by_arg'].append(_mean_std_by_param(by_param, state_or_trans, key, len(parameter_names) + arg_index))
+            ret['corr_by_arg'].append(_corr_by_param(by_name, state_or_trans, key, len(parameter_names) + arg_index))
+
+    return ret
+
+def _mean_std_by_param(by_param, state_or_tran, key, param_index):
+    u"""
+    Calculate the mean standard deviation for a static model where all parameters but param_index are constant.
+
+    arguments:
+    by_param -- measurements sorted by key/transition name and parameter values
+    state_or_tran -- state or transition name (-> by_param[(state_or_tran, *)])
+    key -- model attribute, e.g. 'power' or 'duration'
+           (-> by_param[(state_or_tran, *)][key])
+    param_index -- index of variable parameter
+
+    Returns the mean standard deviation of all measurements of 'key'
+    (e.g. power consumption or timeout) for state/transition 'state_or_tran' where
+    parameter 'param_index' is dynamic and all other parameters are fixed.
+    I.e., if parameters are a, b, c ∈ {1,2,3} and 'index' corresponds to b, then
+    this function returns the mean of the standard deviations of (a=1, b=*, c=1),
+    (a=1, b=*, c=2), and so on.
+    """
+    partitions = []
+    for param_value in filter(lambda x: x[0] == state_or_tran, by_param.keys()):
+        param_partition = []
+        for k, v in by_param.items():
+            if param_slice_eq(k, param_value, param_index):
+                param_partition.extend(v[key])
+        if len(param_partition):
+            partitions.append(param_partition)
+        else:
+            print('[W] parameter value partition for {} is empty'.format(param_value))
+    return np.mean([np.std(partition) for partition in partitions])
+
+def _corr_by_param(by_name, state_or_trans, key, param_index):
+    if _all_params_are_numeric(by_name[state_or_trans], param_index):
+        param_values = np.array(list((map(lambda x: x[param_index], by_name[state_or_trans]['param']))))
+        try:
+            return np.corrcoef(by_name[state_or_trans][key], param_values)[0, 1]
+        except FloatingPointError as fpe:
+            # Typically happens when all parameter values are identical.
+            # Building a correlation coefficient is pointless in this case
+            # -> assume no correlation
+            return 0.
+    else:
+        return 0.
+
+def _all_params_are_numeric(data, param_idx):
+    param_values = list(map(lambda x: x[param_idx], data['param']))
+    if len(list(filter(is_numeric, param_values))) == len(param_values):
+        return True
+    return False
author	Daniel Friesel <derf@finalrewind.org>	2019-02-01 08:27:19 +0100
committer	Daniel Friesel <derf@finalrewind.org>	2019-02-01 08:27:19 +0100
commit	8ebb9fc80fb9becf9951fdc8502eefa3eb3868c5 (patch)
tree	5081c1602889ee6a3516222f51552441832efd80 /lib
parent	e488f05cb5b57cbfb33a92198bd3b269300558bd (diff)