From 2c3e1ddceb88fd7ea6c3090fc48d27407ce751b1 Mon Sep 17 00:00:00 2001 From: Daniel Friesel Date: Fri, 25 Feb 2022 16:36:13 +0100 Subject: add --export-raw-predictions --- bin/analyze-archive.py | 7 ++++++- bin/analyze-kconfig.py | 7 ++++++- lib/cli.py | 6 ++++++ lib/model.py | 17 +++++++++++++---- 4 files changed, 31 insertions(+), 6 deletions(-) diff --git a/bin/analyze-archive.py b/bin/analyze-archive.py index a725833..4aab326 100755 --- a/bin/analyze-archive.py +++ b/bin/analyze-archive.py @@ -910,7 +910,12 @@ if __name__ == "__main__": lambda m: m.get_fitted()[0], xv_count ) else: - analytic_quality = model.assess(param_model) + if args.export_raw_predictions: + analytic_quality, raw_results = model.assess(param_model, return_raw=True) + with open(args.export_raw_predictions, "w") as f: + json.dump(raw_results, f, cls=NpEncoder) + else: + analytic_quality = model.assess(param_model) xv_analytic_models = [model] if "tex" in show_models or "tex" in show_quality: diff --git a/bin/analyze-kconfig.py b/bin/analyze-kconfig.py index 1d03839..aab71e8 100755 --- a/bin/analyze-kconfig.py +++ b/bin/analyze-kconfig.py @@ -258,7 +258,12 @@ def main(): ) else: static_quality = model.assess(static_model) - analytic_quality = model.assess(param_model) + if args.export_raw_predictions: + analytic_quality, raw_results = model.assess(param_model, return_raw=True) + with open(args.export_raw_predictions, "w") as f: + json.dump(raw_results, f, cls=dfatool.utils.NpEncoder) + else: + analytic_quality = model.assess(param_model) xv_analytic_models = [model] if lut_model: lut_quality = model.assess(lut_model) diff --git a/lib/cli.py b/lib/cli.py index 3af8cc1..f988421 100644 --- a/lib/cli.py +++ b/lib/cli.py @@ -183,6 +183,12 @@ def add_standard_arguments(parser): type=str, help="Export raw cross-validation results to FILE for later analysis (e.g. to compare different modeling approaches by means of a t-test)", ) + parser.add_argument( + "--export-raw-predictions", + metavar="FILE", + type=str, + help="Export raw model error data (i.e., ground truth vs. model output) to FILE for later analysis (e.g. to compare different modeling approaches by means of a t-test)", + ) parser.add_argument( "--info", action="store_true", diff --git a/lib/model.py b/lib/model.py index 227a323..558f049 100644 --- a/lib/model.py +++ b/lib/model.py @@ -399,7 +399,7 @@ class AnalyticModel: return model_getter, info_getter - def assess(self, model_function, ref=None): + def assess(self, model_function, ref=None, return_raw=False): """ Calculate MAE, SMAPE, etc. of model_function for each by_name entry. @@ -412,11 +412,13 @@ class AnalyticModel: exclusive (e.g. by performing cross validation). Otherwise, overfitting cannot be detected. """ - detailed_results = {} + detailed_results = dict() + raw_results = dict() if ref is None: ref = self.by_name for name, elem in sorted(ref.items()): - detailed_results[name] = {} + detailed_results[name] = dict() + raw_results[name] = dict() for attribute in elem["attributes"]: predicted_data = np.array( list( @@ -430,7 +432,14 @@ class AnalyticModel: ) measures = regression_measures(predicted_data, elem[attribute]) detailed_results[name][attribute] = measures - + if return_raw: + raw_results[name][attribute] = { + "ground_truth": list(elem[attribute]), + "model_output": list(predicted_data), + } + + if return_raw: + return detailed_results, raw_results return detailed_results def build_dtree( -- cgit v1.2.3