1 files changed, 245 insertions, 201 deletions
diff --git a/bin/analyze-archive.py b/bin/analyze-archive.py
index 10fe304..ca36745 100755
--- a/bin/analyze-archive.py
+++ b/bin/analyze-archive.py
@@ -1,73 +1,11 @@
 #!/usr/bin/env python3
 """
-analyze-archive -- generate PTA energy model from annotated legacy MIMOSA traces.
-
-Usage:
-PYTHONPATH=lib bin/analyze-archive.py [options] <tracefiles ...>
+analyze-archive - generate PTA energy model from dfatool benchmark traces
 
 analyze-archive generates a PTA energy model from one or more annotated
-traces generated by MIMOSA/dfatool-legacy. By default, it does nothing else --
-use one of the --plot-* or --show-* options to examine the generated model.
-
-Options:
---plot-unparam=<name>:<attribute>:<Y axis label>[;<name>:<attribute>:<label>;...]
-    Plot all mesurements for <name> <attribute> without regard for parameter values.
-    X axis is measurement number/id.
-
---plot-param=<name> <attribute> <parameter> [gplearn function][;<name> <attribute> <parameter> [function];...]
-    Plot measurements for <name> <attribute> by <parameter>.
-    X axis is parameter value.
-    Plots the model function as one solid line for each combination of non-<parameter>
-    parameters. Also plots the corresponding measurements.
-    If gplearn function is set, it is plotted using dashed lines.
-
---plot-traces=<name>
-    Plot power trace for state or transition <name>.
-
---export-traces=<directory>
-    Export power traces of all states and transitions to <directory>.
-    Creates a JSON file for each state and transition. Each JSON file
-    lists all occurences of the corresponding state/transition in the
-    benchmark's PTA trace. Each occurence contains the corresponding PTA
-    parameters (if any) in 'parameter' and measurement results in 'offline'.
-    As measurements are typically run repeatedly, 'offline' is in turn a list
-    of measurements: offline[0]['uW'] is the power trace of the first
-    measurement of this state/transition, offline[1]['uW'] corresponds t the
-    second measurement, etc. Values are provided in microwatts.
-    For example, TX.json[0].offline[0].uW corresponds to the first measurement
-    of the first TX state in the benchmark, and TX.json[5].offline[2].uW
-    corresponds to the third measurement of the sixth TX state in the benchmark.
-    WARNING: Several GB of RAM and disk space are required for complex measurements.
-             (JSON files may grow very large -- we trade efficiency for easy handling)
-
---info
-    Show state duration and (for each state and transition) number of measurements and parameter values
-
---show-models=<static|paramdetection|param|all|tex|html>
-    static: show static model values as well as parameter detection heuristic
-    paramdetection: show stddev of static/lut/fitted model
-    param: show parameterized model functions and regression variable values
-    all: all of the above
-    tex: print tex/pgfplots-compatible model data on stdout
-    html: print model and quality data as HTML table on stdout
-
---show-quality=<table|summary|all|tex|html>
-    table: show static/fitted/lut SMAPE and MAE for each name and attribute
-    summary: show static/fitted/lut SMAPE and MAE for each attribute, averaged over all states/transitions
-    all: all of the above
-    tex: print tex/pgfplots-compatible model quality data on stdout
-
---ignored-trace-indexes=<i1,i2,...>
-    Specify traces which should be ignored due to bogus data. 1 is the first
-    trace, 2 the second, and so on.
-
---discard-outliers=
-    not supported at the moment
-
---cross-validate=<method>:<count>
-    Perform cross validation when computing model quality.
-    Only works with --show-quality=table at the moment.
+traces generated by dfatool. By default, it does nothing else.
 
+Cross-Validation help:
     If <method> is "montecarlo": Randomly divide data into 2/3 training and 1/3
     validation, <count> times. Reported model quality is the average of all
     validation runs. Data is partitioned without regard for parameter values,
@@ -83,37 +21,25 @@ Options:
     so a specific parameter combination may be present in both training and
     validation sets or just one of them.
 
---function-override=<name attribute function>[;<name> <attribute> <function>;...]
-    Manually specify the function to fit for <name> <attribute>. A function
-    specified this way bypasses parameter detection: It is always assigned,
-    even if the model seems to be independent of the parameters it references.
-
---with-safe-functions
-    If set, include "safe" functions (safe_log, safe_inv, safe_sqrt) which are
-    also defined for cases such as safe_inv(0) or safe_sqrt(-1). This allows
-    a greater range of functions to be tried during fitting.
-
---filter-param=<parameter name>=<parameter value>[,<parameter name>=<parameter value>...]
-    Only consider measurements where <parameter name> is <parameter value>
-    All other measurements (including those where it is None, that is, has
-    not been set yet) are discarded. Note that this may remove entire
-    function calls from the model.
-
---hwmodel=<hwmodel.json|hwmodel.dfa>
-    Load DFA hardware model from JSON or YAML
-
---export-energymodel=<model.json>
-    Export energy model. Works out of the box for v1 and v2 logfiles. Requires --hwmodel for v0 logfiles.
-
---no-cache
-    Do not load cached measurement results
+Trace Export:
+    Each JSON file lists all occurences of the corresponding state/transition in the
+    benchmark's PTA trace. Each occurence contains the corresponding PTA
+    parameters (if any) in 'parameter' and measurement results in 'offline'.
+    As measurements are typically run repeatedly, 'offline' is in turn a list
+    of measurements: offline[0]['uW'] is the power trace of the first
+    measurement of this state/transition, offline[1]['uW'] corresponds t the
+    second measurement, etc. Values are provided in microwatts.
+    For example, TX.json[0].offline[0].uW corresponds to the first measurement
+    of the first TX state in the benchmark, and TX.json[5].offline[2].uW
+    corresponds to the third measurement of the sixth TX state in the benchmark.
+    WARNING: Several GB of RAM and disk space are required for complex measurements.
+             (JSON files may grow very large -- we trade efficiency for easy handling)
 """
 
-import getopt
+import argparse
 import json
 import logging
 import random
-import re
 import sys
 from dfatool import plotter
 from dfatool.loader import RawData, pta_trace_to_aggregate
@@ -123,8 +49,6 @@ from dfatool.validation import CrossValidator
 from dfatool.utils import filter_aggregate_by_param
 from dfatool.automata import PTA
 
-opt = dict()
-
 
 def print_model_quality(results):
     for state_or_tran in results.keys():
@@ -148,6 +72,15 @@ def format_quality_measures(result):
 
 
 def model_quality_table(result_lists, info_list):
+    print(
+        "{:20s} {:15s}       {:19s}       {:19s}       {:19s}".format(
+            "key",
+            "attribute",
+            "static".center(19),
+            "parameterized".center(19),
+            "LUT".center(19),
+        )
+    )
     for state_or_tran in result_lists[0]["by_name"].keys():
         for key in result_lists[0]["by_name"][state_or_tran].keys():
             buf = "{:20s} {:15s}".format(state_or_tran, key)
@@ -158,7 +91,7 @@ def model_quality_table(result_lists, info_list):
                     result = results["by_name"][state_or_tran][key]
                     buf += format_quality_measures(result)
                 else:
-                    buf += "{:6}----{:9}".format("", "")
+                    buf += "{:7}----{:8}".format("", "")
             print(buf)
 
 
@@ -290,11 +223,36 @@ def print_html_model_data(model, pm, pq, lm, lq, am, ai, aq):
         print("</tr>")
     print("</table>")
 
+def plot_traces(preprocessed_data, sot_name):
+    traces = list()
+    for trace in preprocessed_data:
+        for state_or_transition in trace["trace"]:
+            if state_or_transition["name"] == sot_name:
+                traces.extend(
+                    map(lambda x: x["uW"], state_or_transition["offline"])
+                )
+    if len(traces) == 0:
+        print(
+            f"""Did not find traces for state or transition {sot_name}. Abort.""",
+            file=sys.stderr,
+        )
+        sys.exit(2)
+
+    if len(traces) > 40:
+        print(f"""Truncating plot to 40 of {len(traces)} traces (random sample)""")
+        traces = random.sample(traces, 40)
+
+    plotter.plot_y(
+        traces,
+        xlabel="t [1e-5 s]",
+        ylabel="P [uW]",
+        title=sot_name,
+        family=True,
+    )
 
 if __name__ == "__main__":
 
     ignored_trace_indexes = []
-    discard_outliers = None
     safe_functions_enabled = False
     function_override = {}
     show_models = []
@@ -305,80 +263,176 @@ if __name__ == "__main__":
     xv_method = None
     xv_count = 10
 
-    try:
-        optspec = (
-            "info no-cache "
-            "plot-unparam= plot-param= plot-traces= show-models= show-quality= "
-            "ignored-trace-indexes= discard-outliers= function-override= "
-            "export-traces= "
-            "filter-param= "
-            "log-level= "
-            "cross-validate= "
-            "with-safe-functions hwmodel= export-energymodel="
-        )
-        raw_opts, args = getopt.getopt(sys.argv[1:], "", optspec.split(" "))
-
-        for option, parameter in raw_opts:
-            optname = re.sub(r"^--", "", option)
-            opt[optname] = parameter
-
-        if "ignored-trace-indexes" in opt:
-            ignored_trace_indexes = list(
-                map(int, opt["ignored-trace-indexes"].split(","))
-            )
-            if 0 in ignored_trace_indexes:
-                print("[E] arguments to --ignored-trace-indexes start from 1")
+    parser = argparse.ArgumentParser(
+        formatter_class=argparse.RawDescriptionHelpFormatter, description=__doc__
+    )
+    parser.add_argument(
+        "--info",
+        action="store_true",
+        help="Show state duration and (for each state and transition) number of measurements and parameter values)",
+    )
+    parser.add_argument(
+        "--no-cache", action="store_true", help="Do not load cached measurement results"
+    )
+    parser.add_argument(
+        "--plot-unparam",
+        metavar="<name>:<attribute>:<Y axis label>[;<name>:<attribute>:<label>;...]",
+        type=str,
+        help="Plot all mesurements for <name> <attribute> without regard for parameter values. "
+        "X axis is measurement number/id.",
+    )
+    parser.add_argument(
+        "--plot-param",
+        metavar="<name> <attribute> <parameter> [gplearn function][;<name> <attribute> <parameter> [function];...])",
+        type=str,
+        help="Plot measurements for <name> <attribute> by <parameter>. "
+        "X axis is parameter value. "
+        "Plots the model function as one solid line for each combination of non-<parameter> parameters. "
+        "Also plots the corresponding measurements. "
+        "If gplearn function is set, it is plotted using dashed lines.",
+    )
+    parser.add_argument(
+        "--plot-traces",
+        metavar="NAME",
+        type=str,
+        help="Plot power trace for state or transition NAME",
+    )
+    parser.add_argument(
+        "--show-models",
+        choices=["static", "paramdetection", "param", "all", "tex", "html"],
+        help="static: show static model values as well as parameter detection heuristic.\n"
+        "paramdetection: show stddev of static/lut/fitted model\n"
+        "param: show parameterized model functions and regression variable values\n"
+        "all: all of the above\n"
+        "tex: print tex/pgfplots-compatible model data on stdout\n"
+        "html: print model and quality data as HTML table on stdout",
+    )
+    parser.add_argument(
+        "--show-quality",
+        choices=["table", "summary", "all", "tex", "html"],
+        help="table: show static/fitted/lut SMAPE and MAE for each name and attribute.\n"
+        "summary: show static/fitted/lut SMAPE and MAE for each attribute, averaged over all states/transitions.\n"
+        "all: all of the above.\n"
+        "tex: print tex/pgfplots-compatible model quality data on stdout.",
+    )
+    parser.add_argument(
+        "--ignored-trace-indexes",
+        metavar="<i1,i2,...>",
+        type=str,
+        help="Specify traces which should be ignored due to bogus data. "
+        "1 is the first trace, 2 the second, and so on.",
+    )
+    parser.add_argument(
+        "--function-override",
+        metavar="<name> <attribute> <function>[;<name> <attribute> <function>;...]",
+        type=str,
+        help="Manually specify the function to fit for <name> <attribute>. "
+        "A function specified this way bypasses parameter detection: "
+        "It is always assigned, even if the model seems to be independent of the parameters it references.",
+    )
+    parser.add_argument(
+        "--export-traces",
+        metavar="DIRECTORY",
+        type=str,
+        help="Export power traces of all states and transitions to DIRECTORY. "
+        "Creates a JSON file for each state and transition.",
+    )
+    parser.add_argument(
+        "--filter-param",
+        metavar="<parameter name>=<parameter value>[,<parameter name>=<parameter value>...]",
+        type=str,
+        help="Only consider measurements where <parameter name> is <parameter value>. "
+        "All other measurements (including those where it is None, that is, has not been set yet) are discarded. "
+        "Note that this may remove entire function calls from the model.",
+    )
+    parser.add_argument(
+        "--log-level",
+        metavar="LEVEL",
+        choices=["debug", "info", "warning", "error"],
+        help="Set log level",
+    )
+    parser.add_argument(
+        "--cross-validate",
+        metavar="<method>:<count>",
+        type=str,
+        help="Perform cross validation when computing model quality. "
+        "Only works with --show-quality=table at the moment.",
+    )
+    parser.add_argument(
+        "--with-safe-functions",
+        action="store_true",
+        help="Include 'safe' functions (safe_log, safe_inv, safe_sqrt) which are also defined for 0 and -1. "
+        "This allows a greater range of functions to be tried during fitting.",
+    )
+    parser.add_argument(
+        "--hwmodel",
+        metavar="FILE",
+        type=str,
+        help="Load DFA hardware model from JSON or YAML FILE",
+    )
+    parser.add_argument(
+        "--export-energymodel",
+        metavar="FILE",
+        type=str,
+        help="Export JSON energy modle to FILE. Works out of the box for v1 and v2, requires --hwmodel for v0",
+    )
+    parser.add_argument("measurement", nargs="+")
 
-        if "discard-outliers" in opt:
-            discard_outliers = float(opt["discard-outliers"])
+    args = parser.parse_args()
 
-        if "function-override" in opt:
-            for function_desc in opt["function-override"].split(";"):
-                state_or_tran, attribute, *function_str = function_desc.split(" ")
-                function_override[(state_or_tran, attribute)] = " ".join(function_str)
+    if args.log_level:
+        numeric_level = getattr(logging, args.log_level.upper(), None)
+        if not isinstance(numeric_level, int):
+            print(f"Invalid log level: {args.log_level}", file=sys.stderr)
+            sys.exit(1)
+        logging.basicConfig(level=numeric_level)
 
-        if "show-models" in opt:
-            show_models = opt["show-models"].split(",")
+    if args.ignored_trace_indexes:
+        ignored_trace_indexes = list(map(int, args.ignored_trace_indexes.split(",")))
+        if 0 in ignored_trace_indexes:
+            logging.error("arguments to --ignored-trace-indexes start from 1")
 
-        if "show-quality" in opt:
-            show_quality = opt["show-quality"].split(",")
+    if args.function_override:
+        for function_desc in args.function_override.split(";"):
+            state_or_tran, attribute, *function_str = function_desc.split(" ")
+            function_override[(state_or_tran, attribute)] = " ".join(function_str)
 
-        if "cross-validate" in opt:
-            xv_method, xv_count = opt["cross-validate"].split(":")
-            xv_count = int(xv_count)
+    if args.show_models:
+        show_models = args.show_models.split(",")
 
-        if "filter-param" in opt:
-            opt["filter-param"] = list(
-                map(lambda x: x.split("="), opt["filter-param"].split(","))
-            )
-        else:
-            opt["filter-param"] = list()
+    if args.show_quality:
+        show_quality = args.show_quality.split(",")
 
-        if "with-safe-functions" in opt:
-            safe_functions_enabled = True
+    if args.cross_validate:
+        xv_method, xv_count = args.cross_validate.split(":")
+        xv_count = int(xv_count)
 
-        if "hwmodel" in opt:
-            pta = PTA.from_file(opt["hwmodel"])
+    if args.filter_param:
+        args.filter_param = list(
+            map(lambda x: x.split("="), args.filter_param.split(","))
+        )
+    else:
+        args.filter_param = list()
 
-        if "log-level" in opt:
-            numeric_level = getattr(logging, opt["log-level"].upper(), None)
-            if not isinstance(numeric_level, int):
-                print(f"Invalid log level: {loglevel}", file=sys.stderr)
-                sys.exit(1)
-            logging.basicConfig(level=numeric_level)
+    if args.with_safe_functions is not None:
+        safe_functions_enabled = True
 
-    except getopt.GetoptError as err:
-        print(err, file=sys.stderr)
-        sys.exit(2)
+    if args.hwmodel:
+        pta = PTA.from_file(args.hwmodel)
 
     raw_data = RawData(
-        args,
-        with_traces=("export-traces" in opt or "plot-traces" in opt),
-        skip_cache=("no-cache" in opt),
+        args.measurement,
+        with_traces=(args.export_traces is not None or args.plot_traces is not None),
+        skip_cache=args.no_cache,
     )
 
-    if "info" in opt:
+    if args.info:
         print(" ".join(raw_data.filenames) + ":")
+        if raw_data.ptalog:
+            options = " --".join(
+                map(lambda kv: f"{kv[0]}={str(kv[1])}", raw_data.ptalog["opt"].items())
+            )
+            print(f"    Options: --{options}")
         if raw_data.version <= 1:
             data_source = "MIMOSA"
         elif raw_data.version == 2:
@@ -392,7 +446,7 @@ if __name__ == "__main__":
 
     preprocessed_data = raw_data.get_preprocessed_data()
 
-    if "info" in opt:
+    if args.info:
         print(
             f"""    Valid Runs: {raw_data.preprocessing_stats["num_valid"]}/{raw_data.preprocessing_stats["num_runs"]}"""
         )
@@ -401,7 +455,7 @@ if __name__ == "__main__":
         )
         print(f"""    State Duration: {" / ".join(state_durations)} ms""")
 
-    if "export-traces" in opt:
+    if args.export_traces:
         uw_per_sot = dict()
         for trace in preprocessed_data:
             for state_or_transition in trace["trace"]:
@@ -412,37 +466,13 @@ if __name__ == "__main__":
                     elem["uW"] = list(elem["uW"])
                 uw_per_sot[name].append(state_or_transition)
         for name, data in uw_per_sot.items():
-            target = f"{opt['export-traces']}/{name}.json"
+            target = f"{args.export_traces}/{name}.json"
             print(f"exporting {target} ...")
             with open(target, "w") as f:
                 json.dump(data, f)
 
-    if "plot-traces" in opt:
-        traces = list()
-        for trace in preprocessed_data:
-            for state_or_transition in trace["trace"]:
-                if state_or_transition["name"] == opt["plot-traces"]:
-                    traces.extend(
-                        map(lambda x: x["uW"], state_or_transition["offline"])
-                    )
-        if len(traces) == 0:
-            print(
-                f"""Did not find traces for state or transition {opt["plot-traces"]}. Abort.""",
-                file=sys.stderr,
-            )
-            sys.exit(2)
-
-        if len(traces) > 20:
-            print(f"""Truncating plot to 40 of {len(traces)} traces (random sample)""")
-            traces = random.sample(traces, 40)
-
-        plotter.plot_y(
-            traces,
-            xlabel="t [1e-5 s]",
-            ylabel="P [uW]",
-            title=opt["plot-traces"],
-            family=True,
-        )
+    if args.plot_traces:
+        plot_traces(preprocessed_data, args.plot_traces)
 
     if raw_data.preprocessing_stats["num_valid"] == 0:
         print("No valid data available. Abort.", file=sys.stderr)
@@ -455,14 +485,13 @@ if __name__ == "__main__":
         preprocessed_data, ignored_trace_indexes
     )
 
-    filter_aggregate_by_param(by_name, parameters, opt["filter-param"])
+    filter_aggregate_by_param(by_name, parameters, args.filter_param)
 
     model = PTAModel(
         by_name,
         parameters,
         arg_count,
         traces=preprocessed_data,
-        discard_outliers=discard_outliers,
         function_override=function_override,
         pta=pta,
     )
@@ -470,7 +499,7 @@ if __name__ == "__main__":
     if xv_method:
         xv = CrossValidator(PTAModel, by_name, parameters, arg_count)
 
-    if "info" in opt:
+    if args.info:
         for state in model.states():
             print("{}:".format(state))
             print(f"""    Number of Measurements: {len(by_name[state]["power"])}""")
@@ -492,8 +521,8 @@ if __name__ == "__main__":
                     )
                 )
 
-    if "plot-unparam" in opt:
-        for kv in opt["plot-unparam"].split(";"):
+    if args.plot_unparam:
+        for kv in args.plot_unparam.split(";"):
             state_or_trans, attribute, ylabel = kv.split(":")
             fname = "param_y_{}_{}.pdf".format(state_or_trans, attribute)
             plotter.plot_y(
@@ -703,7 +732,7 @@ if __name__ == "__main__":
         )
 
     if "overall" in show_quality or "all" in show_quality:
-        print("overall static/param/lut MAE assuming equal state distribution:")
+        print("overall state static/param/lut MAE assuming equal state distribution:")
         print(
             "    {:6.1f}  /  {:6.1f}  /  {:6.1f}  µW".format(
                 model.assess_states(static_model),
@@ -711,15 +740,30 @@ if __name__ == "__main__":
                 model.assess_states(lut_model),
             )
         )
-        print("overall static/param/lut MAE assuming 95% STANDBY1:")
-        distrib = {"STANDBY1": 0.95, "POWERDOWN": 0.03, "TX": 0.01, "RX": 0.01}
-        print(
-            "    {:6.1f}  /  {:6.1f}  /  {:6.1f}  µW".format(
-                model.assess_states(static_model, distribution=distrib),
-                model.assess_states(param_model, distribution=distrib),
-                model.assess_states(lut_model, distribution=distrib),
+        distrib = dict()
+        num_states = len(model.states())
+        p95_state = None
+        for state in model.states():
+            distrib[state] = 1.0 / num_states
+
+        if "STANDBY1" in model.states():
+            p95_state = "STANDBY1"
+        elif "SLEEP" in model.states():
+            p95_state = "SLEEP"
+
+        if p95_state is not None:
+            for state in distrib.keys():
+                distrib[state] = 0.05 / (num_states - 1)
+            distrib[p95_state] = 0.95
+
+            print(f"overall state static/param/lut MAE assuming 95% {p95_state}:")
+            print(
+                "    {:6.1f}  /  {:6.1f}  /  {:6.1f}  µW".format(
+                    model.assess_states(static_model, distribution=distrib),
+                    model.assess_states(param_model, distribution=distrib),
+                    model.assess_states(lut_model, distribution=distrib),
+                )
             )
-        )
 
     if "summary" in show_quality or "all" in show_quality:
         model_summary_table(
@@ -730,8 +774,8 @@ if __name__ == "__main__":
             ]
         )
 
-    if "plot-param" in opt:
-        for kv in opt["plot-param"].split(";"):
+    if args.plot_param:
+        for kv in args.plot_param.split(";"):
             try:
                 state_or_trans, attribute, param_name, *function = kv.split(" ")
             except ValueError:
@@ -752,14 +796,14 @@ if __name__ == "__main__":
                 extra_function=function,
             )
 
-    if "export-energymodel" in opt:
+    if args.export_energymodel:
         if not pta:
             print(
                 "[E] --export-energymodel requires --hwmodel to be set", file=sys.stderr
             )
             sys.exit(1)
         json_model = model.to_json()
-        with open(opt["export-energymodel"], "w") as f:
+        with open(args.export_energymodel, "w") as f:
             json.dump(json_model, f, indent=2, sort_keys=True)
 
     sys.exit(0)