summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBirte Kristina Friesel <birte.friesel@uos.de>2023-12-14 09:31:55 +0100
committerBirte Kristina Friesel <birte.friesel@uos.de>2023-12-14 09:31:55 +0100
commit3f18526e01e7e2320355d12aae331143f4441256 (patch)
tree349eac65f6af98ac093617d71e8dcdb7df5d1875
parenta224eb21e30b7d11cc532e7f7bd344bf8900c5f9 (diff)
add median and 90/95/99th percentile absolute errors to metrics
-rw-r--r--lib/model.py21
-rw-r--r--lib/utils.py10
-rw-r--r--lib/validation.py14
3 files changed, 31 insertions, 14 deletions
diff --git a/lib/model.py b/lib/model.py
index 71d3367..05ea855 100644
--- a/lib/model.py
+++ b/lib/model.py
@@ -187,7 +187,6 @@ class AnalyticModel:
return f"AnalyticModel<names=[{names}]>"
def _compute_stats(self, by_name):
-
paramstats = ParallelParamStats()
for name, data in by_name.items():
@@ -302,7 +301,6 @@ class AnalyticModel:
"""
if not self.fit_done:
-
paramfit = ParamFit()
tree_allowed = bool(int(os.getenv("DFATOOL_DTREE_ENABLED", "1")))
use_fol = bool(int(os.getenv("DFATOOL_FIT_FOL", "0")))
@@ -439,7 +437,6 @@ class AnalyticModel:
return detailed_results
def build_dtree(self, name, attribute, threshold=100, **kwargs):
-
if name not in self.attr_by_name:
self.attr_by_name[name] = dict()
@@ -506,7 +503,11 @@ class AnalyticModel:
for k, v in attr.to_dref(unit).items():
ret[f"data/{name}/{attr_name}/{k}"] = v
e_static = static_quality[name][attr_name]
- ret[f"error/static/{name}/{attr_name}/mae"] = (e_static["mae"], unit)
+ for metric in "mae p50 p90 p95 p99".split():
+ ret[f"error/static/{name}/{attr_name}/{metric}"] = (
+ e_static[metric],
+ unit,
+ )
ret[f"error/static/{name}/{attr_name}/mape"] = (
e_static["mape"],
r"\percent",
@@ -525,7 +526,11 @@ class AnalyticModel:
if lut_quality is not None:
e_lut = lut_quality[name][attr_name]
- ret[f"error/lut/{name}/{attr_name}/mae"] = (e_lut["mae"], unit)
+ for metric in "mae p50 p90 p95 p99".split():
+ ret[f"error/lut/{name}/{attr_name}/{metric}"] = (
+ e_lut[metric],
+ unit,
+ )
ret[f"error/lut/{name}/{attr_name}/mape"] = (
e_lut["mape"],
r"\percent",
@@ -543,7 +548,11 @@ class AnalyticModel:
logger.warning(f"{name} {attr_name} LUT model has no MAPE")
e_model = model_quality[name][attr_name]
- ret[f"error/model/{name}/{attr_name}/mae"] = (e_model["mae"], unit)
+ for metric in "mae p50 p90 p95 p99".split():
+ ret[f"error/model/{name}/{attr_name}/{metric}"] = (
+ e_model[metric],
+ unit,
+ )
ret[f"error/model/{name}/{attr_name}/mape"] = (
e_model["mape"],
r"\percent",
diff --git a/lib/utils.py b/lib/utils.py
index 390a198..e7cf968 100644
--- a/lib/utils.py
+++ b/lib/utils.py
@@ -673,6 +673,11 @@ def regression_measures(predicted: np.ndarray, actual: np.ndarray):
if all items in actual are non-zero (NaN otherwise)
smape -- Symmetric Mean Absolute Percentage Error,
if no 0,0-pairs are present in actual and predicted (NaN otherwise)
+ p50 -- Median Absolute Error (as in: the median of the list of absolute
+ prediction errors aka. 50th percentile error)
+ p90 -- 90th percentile absolute error
+ p95 -- 95th percentile absolute error
+ p99 -- 99th percentile absolute error
msd -- Mean Square Deviation
rmsd -- Root Mean Square Deviation
ssr -- Sum of Squared Residuals
@@ -687,8 +692,13 @@ def regression_measures(predicted: np.ndarray, actual: np.ndarray):
# mean = np.mean(actual)
if len(deviations) == 0:
return {}
+ p50, p90, p95, p99 = np.percentile(np.abs(deviations), (50, 90, 95, 99))
measures = {
"mae": np.mean(np.abs(deviations), dtype=np.float64),
+ "p50": p50,
+ "p90": p90,
+ "p95": p95,
+ "p99": p99,
"msd": np.mean(deviations**2, dtype=np.float64),
"rmsd": np.sqrt(np.mean(deviations**2), dtype=np.float64),
"ssr": np.sum(deviations**2, dtype=np.float64),
diff --git a/lib/validation.py b/lib/validation.py
index 3fc5c1a..68f4ddb 100644
--- a/lib/validation.py
+++ b/lib/validation.py
@@ -129,10 +129,9 @@ class CrossValidator:
'by_name' : {
for each name: {
for each attribute: {
- 'mae' : mean of all mean absolute errors
- 'mae_list' : list of the individual MAE values encountered during cross-validation
- 'smape' : mean of all symmetric mean absolute percentage errors
- 'smape_list' : list of the individual SMAPE values encountered during cross-validation
+ 'groundTruth': [...]
+ 'modelOutput': [...]
+ see dfatool.utils.regression_measures
}
}
}
@@ -186,10 +185,9 @@ class CrossValidator:
'by_name' : {
for each name: {
for each attribute: {
- 'mae' : mean of all mean absolute errors
- 'mae_list' : list of the individual MAE values encountered during cross-validation
- 'smape' : mean of all symmetric mean absolute percentage errors
- 'smape_list' : list of the individual SMAPE values encountered during cross-validation
+ 'groundTruth': [...]
+ 'modelOutput': [...]
+ see dfatool.utils.regression_measures for additional items
}
}
}