summaryrefslogtreecommitdiff
path: root/lib/utils.py
diff options
context:
space:
mode:
authorDaniel Friesel <daniel.friesel@uos.de>2021-03-02 14:05:43 +0100
committerDaniel Friesel <daniel.friesel@uos.de>2021-03-02 14:05:43 +0100
commit6e999370180f4060d1c9cb4cef4500570cb6d787 (patch)
treeb43f8e8463bff8bbff6813cf451503c371660c75 /lib/utils.py
parent6383ef088e91273c68cd77e8215ad3920dc50ea1 (diff)
Move ParallelParamFit to a separate module
Diffstat (limited to 'lib/utils.py')
-rw-r--r--lib/utils.py74
1 files changed, 74 insertions, 0 deletions
diff --git a/lib/utils.py b/lib/utils.py
index b38a359..d3334c9 100644
--- a/lib/utils.py
+++ b/lib/utils.py
@@ -1,7 +1,10 @@
+#!/usr/bin/env python3
+
import json
import numpy as np
import re
import logging
+from sklearn.metrics import r2_score
arg_support_enabled = True
logger = logging.getLogger(__name__)
@@ -318,6 +321,77 @@ def detect_outliers_in_aggregate(aggregate, z_limit=10, remove_outliers=False):
)
+def aggregate_measures(aggregate: float, actual: list) -> dict:
+ """
+ Calculate error measures for model value on data list.
+
+ arguments:
+ aggregate -- model value (float or int)
+ actual -- real-world / reference values (list of float or int)
+
+ return value:
+ See regression_measures
+ """
+ aggregate_array = np.array([aggregate] * len(actual))
+ return regression_measures(aggregate_array, np.array(actual))
+
+
+def regression_measures(predicted: np.ndarray, actual: np.ndarray):
+ """
+ Calculate error measures by comparing model values to reference values.
+
+ arguments:
+ predicted -- model values (np.ndarray)
+ actual -- real-world / reference values (np.ndarray)
+
+ Returns a dict containing the following measures:
+ mae -- Mean Absolute Error
+ mape -- Mean Absolute Percentage Error,
+ if all items in actual are non-zero (NaN otherwise)
+ smape -- Symmetric Mean Absolute Percentage Error,
+ if no 0,0-pairs are present in actual and predicted (NaN otherwise)
+ msd -- Mean Square Deviation
+ rmsd -- Root Mean Square Deviation
+ ssr -- Sum of Squared Residuals
+ rsq -- R^2 measure, see sklearn.metrics.r2_score
+ count -- Number of values
+ """
+ if type(predicted) != np.ndarray:
+ raise ValueError("first arg must be ndarray, is {}".format(type(predicted)))
+ if type(actual) != np.ndarray:
+ raise ValueError("second arg must be ndarray, is {}".format(type(actual)))
+ deviations = predicted - actual
+ # mean = np.mean(actual)
+ if len(deviations) == 0:
+ return {}
+ measures = {
+ "mae": np.mean(np.abs(deviations), dtype=np.float64),
+ "msd": np.mean(deviations ** 2, dtype=np.float64),
+ "rmsd": np.sqrt(np.mean(deviations ** 2), dtype=np.float64),
+ "ssr": np.sum(deviations ** 2, dtype=np.float64),
+ "rsq": r2_score(actual, predicted),
+ "count": len(actual),
+ }
+
+ # rsq_quotient = np.sum((actual - mean)**2, dtype=np.float64) * np.sum((predicted - mean)**2, dtype=np.float64)
+
+ if np.all(actual != 0):
+ measures["mape"] = np.mean(np.abs(deviations / actual)) * 100 # bad measure
+ else:
+ measures["mape"] = np.nan
+ if np.all(np.abs(predicted) + np.abs(actual) != 0):
+ measures["smape"] = (
+ np.mean(np.abs(deviations) / ((np.abs(predicted) + np.abs(actual)) / 2))
+ * 100
+ )
+ else:
+ measures["smape"] = np.nan
+ # if np.all(rsq_quotient != 0):
+ # measures['rsq'] = (np.sum((actual - mean) * (predicted - mean), dtype=np.float64)**2) / rsq_quotient
+
+ return measures
+
+
class OptionalTimingAnalysis:
def __init__(self, enabled=True):
self.enabled = enabled