Move ParallelParamFit to a separate module

author: Daniel Friesel <daniel.friesel@uos.de> 2021-03-02 14:05:43 +0100
committer: Daniel Friesel <daniel.friesel@uos.de> 2021-03-02 14:05:43 +0100
commit: 6e999370180f4060d1c9cb4cef4500570cb6d787 (patch)
tree: b43f8e8463bff8bbff6813cf451503c371660c75 /lib/utils.py
parent: 6383ef088e91273c68cd77e8215ad3920dc50ea1 (diff)
1 files changed, 74 insertions, 0 deletions
diff --git a/lib/utils.py b/lib/utils.py
index b38a359..d3334c9 100644
--- a/lib/utils.py
+++ b/lib/utils.py
@@ -1,7 +1,10 @@
+#!/usr/bin/env python3
+
 import json
 import numpy as np
 import re
 import logging
+from sklearn.metrics import r2_score
 
 arg_support_enabled = True
 logger = logging.getLogger(__name__)
@@ -318,6 +321,77 @@ def detect_outliers_in_aggregate(aggregate, z_limit=10, remove_outliers=False):
             )
 
 
+def aggregate_measures(aggregate: float, actual: list) -> dict:
+    """
+    Calculate error measures for model value on data list.
+
+    arguments:
+    aggregate -- model value (float or int)
+    actual -- real-world / reference values (list of float or int)
+
+    return value:
+    See regression_measures
+    """
+    aggregate_array = np.array([aggregate] * len(actual))
+    return regression_measures(aggregate_array, np.array(actual))
+
+
+def regression_measures(predicted: np.ndarray, actual: np.ndarray):
+    """
+    Calculate error measures by comparing model values to reference values.
+
+    arguments:
+    predicted -- model values (np.ndarray)
+    actual -- real-world / reference values (np.ndarray)
+
+    Returns a dict containing the following measures:
+    mae -- Mean Absolute Error
+    mape -- Mean Absolute Percentage Error,
+            if all items in actual are non-zero (NaN otherwise)
+    smape -- Symmetric Mean Absolute Percentage Error,
+             if no 0,0-pairs are present in actual and predicted (NaN otherwise)
+    msd -- Mean Square Deviation
+    rmsd -- Root Mean Square Deviation
+    ssr -- Sum of Squared Residuals
+    rsq -- R^2 measure, see sklearn.metrics.r2_score
+    count -- Number of values
+    """
+    if type(predicted) != np.ndarray:
+        raise ValueError("first arg must be ndarray, is {}".format(type(predicted)))
+    if type(actual) != np.ndarray:
+        raise ValueError("second arg must be ndarray, is {}".format(type(actual)))
+    deviations = predicted - actual
+    # mean = np.mean(actual)
+    if len(deviations) == 0:
+        return {}
+    measures = {
+        "mae": np.mean(np.abs(deviations), dtype=np.float64),
+        "msd": np.mean(deviations ** 2, dtype=np.float64),
+        "rmsd": np.sqrt(np.mean(deviations ** 2), dtype=np.float64),
+        "ssr": np.sum(deviations ** 2, dtype=np.float64),
+        "rsq": r2_score(actual, predicted),
+        "count": len(actual),
+    }
+
+    # rsq_quotient = np.sum((actual - mean)**2, dtype=np.float64) * np.sum((predicted - mean)**2, dtype=np.float64)
+
+    if np.all(actual != 0):
+        measures["mape"] = np.mean(np.abs(deviations / actual)) * 100  # bad measure
+    else:
+        measures["mape"] = np.nan
+    if np.all(np.abs(predicted) + np.abs(actual) != 0):
+        measures["smape"] = (
+            np.mean(np.abs(deviations) / ((np.abs(predicted) + np.abs(actual)) / 2))
+            * 100
+        )
+    else:
+        measures["smape"] = np.nan
+    # if np.all(rsq_quotient != 0):
+    #    measures['rsq'] = (np.sum((actual - mean) * (predicted - mean), dtype=np.float64)**2) / rsq_quotient
+
+    return measures
+
+
 class OptionalTimingAnalysis:
     def __init__(self, enabled=True):
         self.enabled = enabled
author	Daniel Friesel <daniel.friesel@uos.de>	2021-03-02 14:05:43 +0100
committer	Daniel Friesel <daniel.friesel@uos.de>	2021-03-02 14:05:43 +0100
commit	6e999370180f4060d1c9cb4cef4500570cb6d787 (patch)
tree	b43f8e8463bff8bbff6813cf451503c371660c75 /lib/utils.py
parent	6383ef088e91273c68cd77e8215ad3920dc50ea1 (diff)