diff options
author | Birte Kristina Friesel <birte.friesel@uos.de> | 2023-12-14 09:31:55 +0100 |
---|---|---|
committer | Birte Kristina Friesel <birte.friesel@uos.de> | 2023-12-14 09:31:55 +0100 |
commit | 3f18526e01e7e2320355d12aae331143f4441256 (patch) | |
tree | 349eac65f6af98ac093617d71e8dcdb7df5d1875 | |
parent | a224eb21e30b7d11cc532e7f7bd344bf8900c5f9 (diff) |
add median and 90/95/99th percentile absolute errors to metrics
-rw-r--r-- | lib/model.py | 21 | ||||
-rw-r--r-- | lib/utils.py | 10 | ||||
-rw-r--r-- | lib/validation.py | 14 |
3 files changed, 31 insertions, 14 deletions
diff --git a/lib/model.py b/lib/model.py index 71d3367..05ea855 100644 --- a/lib/model.py +++ b/lib/model.py @@ -187,7 +187,6 @@ class AnalyticModel: return f"AnalyticModel<names=[{names}]>" def _compute_stats(self, by_name): - paramstats = ParallelParamStats() for name, data in by_name.items(): @@ -302,7 +301,6 @@ class AnalyticModel: """ if not self.fit_done: - paramfit = ParamFit() tree_allowed = bool(int(os.getenv("DFATOOL_DTREE_ENABLED", "1"))) use_fol = bool(int(os.getenv("DFATOOL_FIT_FOL", "0"))) @@ -439,7 +437,6 @@ class AnalyticModel: return detailed_results def build_dtree(self, name, attribute, threshold=100, **kwargs): - if name not in self.attr_by_name: self.attr_by_name[name] = dict() @@ -506,7 +503,11 @@ class AnalyticModel: for k, v in attr.to_dref(unit).items(): ret[f"data/{name}/{attr_name}/{k}"] = v e_static = static_quality[name][attr_name] - ret[f"error/static/{name}/{attr_name}/mae"] = (e_static["mae"], unit) + for metric in "mae p50 p90 p95 p99".split(): + ret[f"error/static/{name}/{attr_name}/{metric}"] = ( + e_static[metric], + unit, + ) ret[f"error/static/{name}/{attr_name}/mape"] = ( e_static["mape"], r"\percent", @@ -525,7 +526,11 @@ class AnalyticModel: if lut_quality is not None: e_lut = lut_quality[name][attr_name] - ret[f"error/lut/{name}/{attr_name}/mae"] = (e_lut["mae"], unit) + for metric in "mae p50 p90 p95 p99".split(): + ret[f"error/lut/{name}/{attr_name}/{metric}"] = ( + e_lut[metric], + unit, + ) ret[f"error/lut/{name}/{attr_name}/mape"] = ( e_lut["mape"], r"\percent", @@ -543,7 +548,11 @@ class AnalyticModel: logger.warning(f"{name} {attr_name} LUT model has no MAPE") e_model = model_quality[name][attr_name] - ret[f"error/model/{name}/{attr_name}/mae"] = (e_model["mae"], unit) + for metric in "mae p50 p90 p95 p99".split(): + ret[f"error/model/{name}/{attr_name}/{metric}"] = ( + e_model[metric], + unit, + ) ret[f"error/model/{name}/{attr_name}/mape"] = ( e_model["mape"], r"\percent", diff --git a/lib/utils.py b/lib/utils.py index 390a198..e7cf968 100644 --- a/lib/utils.py +++ b/lib/utils.py @@ -673,6 +673,11 @@ def regression_measures(predicted: np.ndarray, actual: np.ndarray): if all items in actual are non-zero (NaN otherwise) smape -- Symmetric Mean Absolute Percentage Error, if no 0,0-pairs are present in actual and predicted (NaN otherwise) + p50 -- Median Absolute Error (as in: the median of the list of absolute + prediction errors aka. 50th percentile error) + p90 -- 90th percentile absolute error + p95 -- 95th percentile absolute error + p99 -- 99th percentile absolute error msd -- Mean Square Deviation rmsd -- Root Mean Square Deviation ssr -- Sum of Squared Residuals @@ -687,8 +692,13 @@ def regression_measures(predicted: np.ndarray, actual: np.ndarray): # mean = np.mean(actual) if len(deviations) == 0: return {} + p50, p90, p95, p99 = np.percentile(np.abs(deviations), (50, 90, 95, 99)) measures = { "mae": np.mean(np.abs(deviations), dtype=np.float64), + "p50": p50, + "p90": p90, + "p95": p95, + "p99": p99, "msd": np.mean(deviations**2, dtype=np.float64), "rmsd": np.sqrt(np.mean(deviations**2), dtype=np.float64), "ssr": np.sum(deviations**2, dtype=np.float64), diff --git a/lib/validation.py b/lib/validation.py index 3fc5c1a..68f4ddb 100644 --- a/lib/validation.py +++ b/lib/validation.py @@ -129,10 +129,9 @@ class CrossValidator: 'by_name' : { for each name: { for each attribute: { - 'mae' : mean of all mean absolute errors - 'mae_list' : list of the individual MAE values encountered during cross-validation - 'smape' : mean of all symmetric mean absolute percentage errors - 'smape_list' : list of the individual SMAPE values encountered during cross-validation + 'groundTruth': [...] + 'modelOutput': [...] + see dfatool.utils.regression_measures } } } @@ -186,10 +185,9 @@ class CrossValidator: 'by_name' : { for each name: { for each attribute: { - 'mae' : mean of all mean absolute errors - 'mae_list' : list of the individual MAE values encountered during cross-validation - 'smape' : mean of all symmetric mean absolute percentage errors - 'smape_list' : list of the individual SMAPE values encountered during cross-validation + 'groundTruth': [...] + 'modelOutput': [...] + see dfatool.utils.regression_measures for additional items } } } |