diff options
Diffstat (limited to 'lib/loader/__init__.py')
-rw-r--r-- | lib/loader/__init__.py | 878 |
1 files changed, 878 insertions, 0 deletions
diff --git a/lib/loader/__init__.py b/lib/loader/__init__.py new file mode 100644 index 0000000..1b9b18f --- /dev/null +++ b/lib/loader/__init__.py @@ -0,0 +1,878 @@ +#!/usr/bin/env python3 + +import io +import json +import logging +import numpy as np +import os +import re +import struct +import tarfile +import hashlib +from multiprocessing import Pool + +from dfatool.utils import NpEncoder, running_mean, soft_cast_int + +from .energytrace import ( + EnergyTrace, + EnergyTraceWithBarcode, + EnergyTraceWithLogicAnalyzer, + EnergyTraceWithTimer, +) +from .keysight import KeysightCSV +from .mimosa import MIMOSA + +logger = logging.getLogger(__name__) + +try: + from .pubcode import Code128 + import zbar + + zbar_available = True +except ImportError: + zbar_available = False + + +arg_support_enabled = True + + +def _preprocess_mimosa(measurement): + setup = measurement["setup"] + mim = MIMOSA( + float(setup["mimosa_voltage"]), + int(setup["mimosa_shunt"]), + with_traces=measurement["with_traces"], + ) + try: + charges, triggers = mim.load_data(measurement["content"]) + trigidx = mim.trigger_edges(triggers) + except EOFError as e: + mim.errors.append("MIMOSA logfile error: {}".format(e)) + trigidx = list() + + if len(trigidx) == 0: + mim.errors.append("MIMOSA log has no triggers") + return { + "fileno": measurement["fileno"], + "info": measurement["info"], + "errors": mim.errors, + "repeat_id": measurement["repeat_id"], + "valid": False, + } + + cal_edges = mim.calibration_edges( + running_mean(mim.currents_nocal(charges[0 : trigidx[0]]), 10) + ) + calfunc, caldata = mim.calibration_function(charges, cal_edges) + vcalfunc = np.vectorize(calfunc, otypes=[np.float64]) + traces = mim.analyze_states(charges, trigidx, vcalfunc) + + # the last (v0) / first (v1) state is not part of the benchmark + traces.pop(measurement["pop"]) + + mim.validate( + len(trigidx), traces, measurement["expected_trace"], setup["state_duration"] + ) + + processed_data = { + "triggers": len(trigidx), + "first_trig": trigidx[0] * 10, + "calibration": caldata, + "energy_trace": traces, + "errors": mim.errors, + "valid": len(mim.errors) == 0, + } + + for key in ["fileno", "info", "repeat_id"]: + processed_data[key] = measurement[key] + + return processed_data + + +def _preprocess_etlog(measurement): + setup = measurement["setup"] + + energytrace_class = EnergyTraceWithBarcode + if measurement["sync_mode"] == "la": + energytrace_class = EnergyTraceWithLogicAnalyzer + elif measurement["sync_mode"] == "timer": + energytrace_class = EnergyTraceWithTimer + + etlog = energytrace_class( + float(setup["voltage"]), + int(setup["state_duration"]), + measurement["transition_names"], + with_traces=measurement["with_traces"], + ) + states_and_transitions = list() + try: + etlog.load_data(measurement["content"]) + states_and_transitions = etlog.analyze_states( + measurement["expected_trace"], measurement["repeat_id"] + ) + except EOFError as e: + etlog.errors.append("EnergyTrace logfile error: {}".format(e)) + except RuntimeError as e: + etlog.errors.append("EnergyTrace loader error: {}".format(e)) + + processed_data = { + "fileno": measurement["fileno"], + "repeat_id": measurement["repeat_id"], + "info": measurement["info"], + "energy_trace": states_and_transitions, + "valid": len(etlog.errors) == 0, + "errors": etlog.errors, + } + + return processed_data + + +class TimingData: + """ + Loader for timing model traces measured with on-board timers using `harness.OnboardTimerHarness`. + + Excpets a specific trace format and UART log output (as produced by + generate-dfa-benchmark.py). Prunes states from output. (TODO) + """ + + def __init__(self, filenames): + """ + Create a new TimingData object. + + Each filenames element corresponds to a measurement run. + """ + self.filenames = filenames.copy() + # holds the benchmark plan (dfa traces) for each series of benchmark runs. + # Note that a single entry typically has more than one corresponding mimosa/energytrace benchmark files, + # as benchmarks are run repeatedly to distinguish between random and parameter-dependent measurement effects. + self.traces_by_fileno = [] + self.setup_by_fileno = [] + self.preprocessed = False + self.version = 0 + + def _concatenate_analyzed_traces(self): + self.traces = [] + for trace_group in self.traces_by_fileno: + for trace in trace_group: + # TimingHarness logs states, but does not aggregate any data for them at the moment -> throw all states away + transitions = list( + filter(lambda x: x["isa"] == "transition", trace["trace"]) + ) + self.traces.append({"id": trace["id"], "trace": transitions}) + for i, trace in enumerate(self.traces): + trace["orig_id"] = trace["id"] + trace["id"] = i + for log_entry in trace["trace"]: + paramkeys = sorted(log_entry["parameter"].keys()) + if "param" not in log_entry["offline_aggregates"]: + log_entry["offline_aggregates"]["param"] = list() + if "duration" in log_entry["offline_aggregates"]: + for i in range(len(log_entry["offline_aggregates"]["duration"])): + paramvalues = list() + for paramkey in paramkeys: + if type(log_entry["parameter"][paramkey]) is list: + paramvalues.append( + soft_cast_int(log_entry["parameter"][paramkey][i]) + ) + else: + paramvalues.append( + soft_cast_int(log_entry["parameter"][paramkey]) + ) + if arg_support_enabled and "args" in log_entry: + paramvalues.extend(map(soft_cast_int, log_entry["args"])) + log_entry["offline_aggregates"]["param"].append(paramvalues) + + def _preprocess_0(self): + for filename in self.filenames: + with open(filename, "r") as f: + log_data = json.load(f) + self.traces_by_fileno.extend(log_data["traces"]) + self._concatenate_analyzed_traces() + + def get_preprocessed_data(self): + """ + Return a list of DFA traces annotated with timing and parameter data. + + Suitable for the PTAModel constructor. + See PTAModel(...) docstring for format details. + """ + if self.preprocessed: + return self.traces + if self.version == 0: + self._preprocess_0() + self.preprocessed = True + return self.traces + + +def sanity_check_aggregate(aggregate): + for key in aggregate: + if "param" not in aggregate[key]: + raise RuntimeError("aggregate[{}][param] does not exist".format(key)) + if "attributes" not in aggregate[key]: + raise RuntimeError("aggregate[{}][attributes] does not exist".format(key)) + for attribute in aggregate[key]["attributes"]: + if attribute not in aggregate[key]: + raise RuntimeError( + "aggregate[{}][{}] does not exist, even though it is contained in aggregate[{}][attributes]".format( + key, attribute, key + ) + ) + param_len = len(aggregate[key]["param"]) + attr_len = len(aggregate[key][attribute]) + if param_len != attr_len: + raise RuntimeError( + "parameter mismatch: len(aggregate[{}][param]) == {} != len(aggregate[{}][{}]) == {}".format( + key, param_len, key, attribute, attr_len + ) + ) + + +def assert_legacy_compatibility(f1, t1, f2, t2): + expected_param_names = sorted( + t1["expected_trace"][0]["trace"][0]["parameter"].keys() + ) + for run in t2["expected_trace"]: + for state_or_trans in run["trace"]: + actual_param_names = sorted(state_or_trans["parameter"].keys()) + if actual_param_names != expected_param_names: + err = f"parameters in {f1} and {f2} are incompatible: {expected_param_names} ≠ {actual_param_names}" + logger.error(err) + raise ValueError(err) + + +def assert_ptalog_compatibility(f1, pl1, f2, pl2): + param1 = pl1["pta"]["parameters"] + param2 = pl2["pta"]["parameters"] + if param1 != param2: + err = f"parameters in {f1} and {f2} are incompatible: {param1} ≠ {param2}" + logger.error(err) + raise ValueError(err) + + states1 = list(sorted(pl1["pta"]["state"].keys())) + states2 = list(sorted(pl2["pta"]["state"].keys())) + if states1 != states2: + err = f"states in {f1} and {f2} differ: {states1} ≠ {states2}" + logger.warning(err) + + transitions1 = list(sorted(map(lambda t: t["name"], pl1["pta"]["transitions"]))) + transitions2 = list(sorted(map(lambda t: t["name"], pl1["pta"]["transitions"]))) + if transitions1 != transitions2: + err = f"transitions in {f1} and {f2} differ: {transitions1} ≠ {transitions2}" + logger.warning(err) + + +class RawData: + """ + Loader for hardware model traces measured with MIMOSA. + + Expects a specific trace format and UART log output (as produced by the + dfatool benchmark generator). Loads data, prunes bogus measurements, and + provides preprocessed data suitable for PTAModel. Results are cached on the + file system, making subsequent loads near-instant. + """ + + def __init__(self, filenames, with_traces=False, skip_cache=False): + """ + Create a new RawData object. + + Each filename element corresponds to a measurement run. + It must be a tar archive with the following contents: + + Version 0: + + * `setup.json`: measurement setup. Must contain the keys `state_duration` (how long each state is active, in ms), + `mimosa_voltage` (voltage applied to dut, in V), and `mimosa_shunt` (shunt value, in Ohm) + * `src/apps/DriverEval/DriverLog.json`: PTA traces and parameters for this benchmark. + Layout: List of traces, each trace has an 'id' (numeric, starting with 1) and 'trace' (list of states and transitions) element. + Each trace has an even number of elements, starting with the first state (usually `UNINITIALIZED`) and ending with a transition. + Each state/transition must have the members `.parameter` (parameter values, empty string or None if unknown), `.isa` ("state" or "transition") and `.name`. + Each transition must additionally contain `.plan.level` ("user" or "epilogue"). + Example: `[ {"id": 1, "trace": [ {"parameter": {...}, "isa": "state", "name": "UNINITIALIZED"}, ...] }, ... ] + * At least one `*.mim` file. Each file corresponds to a single execution of the entire benchmark (i.e., all runs described in DriverLog.json) and starts with a MIMOSA Autocal calibration sequence. + MIMOSA files are parsed by the `MIMOSA` class. + + Version 1: + + * `ptalog.json`: measurement setup and traces. Contents: + `.opt.sleep`: state duration + `.opt.pta`: PTA + `.opt.traces`: list of sub-benchmark traces (the benchmark may have been split due to code size limitations). Each item is a list of traces as returned by `harness.traces`: + `.opt.traces[]`: List of traces. Each trace has an 'id' (numeric, starting with 1) and 'trace' (list of states and transitions) element. + Each state/transition must have the members '`parameter` (dict with normalized parameter values), `.isa` ("state" or "transition") and `.name` + Each transition must additionally contain `.args` + `.opt.files`: list of coresponding MIMOSA measurements. + `.opt.files[]` = ['abc123.mim', ...] + `.opt.configs`: .... + * MIMOSA log files (`*.mim`) as specified in `.opt.files` + + Version 2: + + * `ptalog.json`: measurement setup and traces. Contents: + `.opt.sleep`: state duration + `.opt.pta`: PTA + `.opt.traces`: list of sub-benchmark traces (the benchmark may have been split due to code size limitations). Each item is a list of traces as returned by `harness.traces`: + `.opt.traces[]`: List of traces. Each trace has an 'id' (numeric, starting with 1) and 'trace' (list of states and transitions) element. + Each state/transition must have the members '`parameter` (dict with normalized parameter values), `.isa` ("state" or "transition") and `.name` + Each transition must additionally contain `.args` and `.duration` + * `.duration`: list of durations, one per repetition + `.opt.files`: list of coresponding EnergyTrace measurements. + `.opt.files[]` = ['abc123.etlog', ...] + `.opt.configs`: .... + * EnergyTrace log files (`*.etlog`) as specified in `.opt.files` + + If a cached result for a file is available, it is loaded and the file + is not preprocessed, unless `with_traces` is set. + + tbd + """ + self.with_traces = with_traces + self.input_filenames = filenames.copy() + self.filenames = list() + self.traces_by_fileno = list() + self.setup_by_fileno = list() + self.version = 0 + self.preprocessed = False + self._parameter_names = None + self.ignore_clipping = False + self.pta = None + self.ptalog = None + + with tarfile.open(filenames[0]) as tf: + for member in tf.getmembers(): + if member.name == "ptalog.json" and self.version == 0: + self.version = 1 + # might also be version 2 + # depends on whether *.etlog exists or not + elif ".etlog" in member.name: + self.version = 2 + break + if self.version >= 1: + self.ptalog = json.load(tf.extractfile(tf.getmember("ptalog.json"))) + self.pta = self.ptalog["pta"] + + if self.ptalog and len(filenames) > 1: + for filename in filenames[1:]: + with tarfile.open(filename) as tf: + new_ptalog = json.load(tf.extractfile(tf.getmember("ptalog.json"))) + assert_ptalog_compatibility( + filenames[0], self.ptalog, filename, new_ptalog + ) + self.ptalog["files"].extend(new_ptalog["files"]) + + self.set_cache_file() + if not with_traces and not skip_cache: + self.load_cache() + + def set_cache_file(self): + cache_key = hashlib.sha256("!".join(self.input_filenames).encode()).hexdigest() + self.cache_dir = os.path.dirname(self.input_filenames[0]) + "/cache" + self.cache_file = "{}/{}.json".format(self.cache_dir, cache_key) + + def load_cache(self): + if os.path.exists(self.cache_file): + with open(self.cache_file, "r") as f: + try: + cache_data = json.load(f) + self.filenames = cache_data["filenames"] + self.traces = cache_data["traces"] + self.preprocessing_stats = cache_data["preprocessing_stats"] + if "pta" in cache_data: + self.pta = cache_data["pta"] + if "ptalog" in cache_data: + self.ptalog = cache_data["ptalog"] + self.setup_by_fileno = cache_data["setup_by_fileno"] + self.preprocessed = True + except json.decoder.JSONDecodeError as e: + logger.info(f"Skipping cache entry {self.cache_file}: {e}") + + def save_cache(self): + if self.with_traces: + return + try: + os.mkdir(self.cache_dir) + except FileExistsError: + pass + with open(self.cache_file, "w") as f: + cache_data = { + "filenames": self.filenames, + "traces": self.traces, + "preprocessing_stats": self.preprocessing_stats, + "pta": self.pta, + "ptalog": self.ptalog, + "setup_by_fileno": self.setup_by_fileno, + } + json.dump(cache_data, f) + + def to_dref(self) -> dict: + return { + "raw measurements/valid": self.preprocessing_stats["num_valid"], + "raw measurements/total": self.preprocessing_stats["num_runs"], + "static state duration/mean": ( + np.mean(list(map(lambda x: x["state_duration"], self.setup_by_fileno))), + r"\milli\second", + ), + } + + def _concatenate_traces(self, list_of_traces): + """ + Concatenate `list_of_traces` (list of lists) into a single trace while adjusting trace IDs. + + :param list_of_traces: List of list of traces. + :returns: List of traces with ['id'] in ascending order and ['orig_id'] as previous ['id'] + """ + + trace_output = list() + for trace in list_of_traces: + trace_output.extend(trace.copy()) + for i, trace in enumerate(trace_output): + trace["orig_id"] = trace["id"] + trace["id"] = i + return trace_output + + def get_preprocessed_data(self): + """ + Return a list of DFA traces annotated with energy, timing, and parameter data. + The list is cached on disk, unless the constructor was called with `with_traces` set. + + Each DFA trace contains the following elements: + * `id`: Numeric ID, starting with 1 + * `total_energy`: Total amount of energy (as measured by MIMOSA) in the entire trace + * `orig_id`: Original trace ID. May differ when concatenating multiple (different) benchmarks into one analysis, i.e., when calling RawData() with more than one file argument. + * `trace`: List of the individual states and transitions in this trace. Always contains an even number of elements, staring with the first state (typically "UNINITIALIZED") and ending with a transition. + + Each trace element (that is, an entry of the `trace` list mentioned above) contains the following elements: + * `isa`: "state" or "transition" + * `name`: name + * `offline`: List of offline measumerents for this state/transition. Each entry contains a result for this state/transition during one benchmark execution. + Entry contents: + - `clip_rate`: rate of clipped energy measurements, 0 .. 1 + - `raw_mean`: mean raw MIMOSA value + - `raw_std`: standard deviation of raw MIMOSA value + - `uW_mean`: mean power draw, uW + - `uw_std`: standard deviation of power draw, uW + - `us`: state/transition duration, us + - `uW_mean_delta_prev`: (only for transitions) difference between uW_mean of this transition and uW_mean of previous state + - `uW_mean_elta_next`: (only for transitions) difference between uW_mean of this transition and uW_mean of next state + - `timeout`: (only for transitions) duration of previous state, us + * `offline_aggregates`: Aggregate of `offline` entries. dict of lists, each list entry has the same length + - `duration`: state/transition durations ("us"), us + - `energy`: state/transition energy ("us * uW_mean"), us + - `power`: mean power draw ("uW_mean"), uW + - `power_std`: standard deviations of power draw ("uW_std"), uW^2 + - `paramkeys`: List of lists, each sub-list contains the parameter names corresponding to the `param` entries + - `param`: List of lists, each sub-list contains the parameter values for this measurement. Typically, all sub-lists are the same. + - `rel_energy_prev`: (only for transitions) transition energy relative to previous state mean power, pJ + - `rel_energy_next`: (only for transitions) transition energy relative to next state mean power, pJ + - `rel_power_prev`: (only for transitions) powerrelative to previous state mean power, µW + - `rel_power_next`: (only for transitions) power relative to next state mean power, µW + - `timeout`: (only for transitions) duration of previous state, us + * `offline_attributes`: List containing the keys of `offline_aggregates` which are meant to be part of the model. + This list ultimately decides which hardware/software attributes the model describes. + If isa == state, it contains power, duration, energy + If isa == transition, it contains power, rel_power_prev, rel_power_next, duration, timeout + * `online`: List of online estimations for this state/transition. Each entry contains a result for this state/transition during one benchmark execution. + Entry contents for isa == state: + - `time`: state/transition + Entry contents for isa == transition: + - `timeout`: Duration of previous state, measured using on-board timers + * `parameter`: dictionary describing parameter values for this state/transition. Parameter values refer to the begin of the state/transition and do not account for changes made by the transition. + * `plan`: Dictionary describing expected behaviour according to schedule / offline model. + Contents for isa == state: `energy`, `power`, `time` + Contents for isa == transition: `energy`, `timeout`, `level`. + If level is "user", the transition is part of the regular driver API. If level is "epilogue", it is an interrupt service routine and not called explicitly. + Each transition also contains: + * `args`: List of arguments the corresponding function call was called with. args entries are strings which are not necessarily numeric + * `code`: List of function name (first entry) and arguments (remaining entries) of the corresponding function call + """ + if self.preprocessed: + return self.traces + if self.version <= 2: + self._preprocess_012(self.version) + else: + raise ValueError(f"Unsupported raw data version: {self.version}") + self.preprocessed = True + self.save_cache() + return self.traces + + def _preprocess_012(self, version): + """Load raw MIMOSA data and turn it into measurements which are ready to be analyzed.""" + offline_data = [] + for i, filename in enumerate(self.input_filenames): + + if version == 0: + + self.filenames = self.input_filenames + with tarfile.open(filename) as tf: + self.setup_by_fileno.append(json.load(tf.extractfile("setup.json"))) + traces = json.load( + tf.extractfile("src/apps/DriverEval/DriverLog.json") + ) + self.traces_by_fileno.append(traces) + for member in tf.getmembers(): + _, extension = os.path.splitext(member.name) + if extension == ".mim": + offline_data.append( + { + "content": tf.extractfile(member).read(), + # only for validation + "expected_trace": traces, + "fileno": i, + # For debug output and warnings + "info": member, + # Strip the last state (it is not part of the scheduled measurement) + "pop": -1, + "repeat_id": 0, # needed to add runtime "return_value.apply_from" parameters to offline_aggregates. Irrelevant in v0. + "setup": self.setup_by_fileno[i], + "with_traces": self.with_traces, + } + ) + + elif version == 1: + + with tarfile.open(filename) as tf: + ptalog = json.load(tf.extractfile(tf.getmember("ptalog.json"))) + + # Benchmark code may be too large to be executed in a single + # run, so benchmarks (a benchmark is basically a list of DFA runs) + # may be split up. To accomodate this, ptalog['traces'] is + # a list of lists: ptalog['traces'][0] corresponds to the + # first benchmark part, ptalog['traces'][1] to the + # second, and so on. ptalog['traces'][0][0] is the first + # trace (a sequence of states and transitions) in the + # first benchmark part, ptalog['traces'][0][1] the second, etc. + # + # As traces are typically repeated to minimize the effect + # of random noise, observations for each benchmark part + # are also lists. In this case, this applies in two + # cases: traces[i][j]['parameter'][some_param] is either + # a value (if the parameter is controlld by software) + # or a list (if the parameter is known a posteriori, e.g. + # "how many retransmissions did this packet take?"). + # + # The second case is the MIMOSA energy measurements, which + # are listed in ptalog['files']. ptalog['files'][0] + # contains a list of files for the first benchmark part, + # ptalog['files'][0][0] is its first iteration/repetition, + # ptalog['files'][0][1] the second, etc. + + for j, traces in enumerate(ptalog["traces"]): + self.filenames.append("{}#{}".format(filename, j)) + self.traces_by_fileno.append(traces) + self.setup_by_fileno.append( + { + "mimosa_voltage": ptalog["configs"][j]["voltage"], + "mimosa_shunt": ptalog["configs"][j]["shunt"], + "state_duration": ptalog["opt"]["sleep"], + } + ) + for repeat_id, mim_file in enumerate(ptalog["files"][j]): + # MIMOSA benchmarks always use a single .mim file per benchmark run. + # However, depending on the dfatool version used to run the + # benchmark, ptalog["files"][j] is either "foo.mim" (before Oct 2020) + # or ["foo.mim"] (from Oct 2020 onwards). + if type(mim_file) is list: + mim_file = mim_file[0] + member = tf.getmember(mim_file) + offline_data.append( + { + "content": tf.extractfile(member).read(), + # only for validation + "expected_trace": traces, + "fileno": len(self.traces_by_fileno) - 1, + # For debug output and warnings + "info": member, + # The first online measurement is the UNINITIALIZED state. In v1, + # it is not part of the expected PTA trace -> remove it. + "pop": 0, + "setup": self.setup_by_fileno[-1], + "repeat_id": repeat_id, # needed to add runtime "return_value.apply_from" parameters to offline_aggregates. + "with_traces": self.with_traces, + } + ) + + elif version == 2: + + with tarfile.open(filename) as tf: + ptalog = json.load(tf.extractfile(tf.getmember("ptalog.json"))) + if "sync" in ptalog["opt"]["energytrace"]: + sync_mode = ptalog["opt"]["energytrace"]["sync"] + else: + sync_mode = "bar" + + # Benchmark code may be too large to be executed in a single + # run, so benchmarks (a benchmark is basically a list of DFA runs) + # may be split up. To accomodate this, ptalog['traces'] is + # a list of lists: ptalog['traces'][0] corresponds to the + # first benchmark part, ptalog['traces'][1] to the + # second, and so on. ptalog['traces'][0][0] is the first + # trace (a sequence of states and transitions) in the + # first benchmark part, ptalog['traces'][0][1] the second, etc. + # + # As traces are typically repeated to minimize the effect + # of random noise, observations for each benchmark part + # are also lists. In this case, this applies in two + # cases: traces[i][j]['parameter'][some_param] is either + # a value (if the parameter is controlld by software) + # or a list (if the parameter is known a posteriori, e.g. + # "how many retransmissions did this packet take?"). + # + # The second case is the MIMOSA energy measurements, which + # are listed in ptalog['files']. ptalog['files'][0] + # contains a list of files for the first benchmark part, + # ptalog['files'][0][0] is its first iteration/repetition, + # ptalog['files'][0][1] the second, etc. + + # generate-dfa-benchmark uses TimingHarness to obtain timing data. + # Data is placed in 'offline_aggregates', which is also + # where we are going to store power/energy data. + # In case of invalid measurements, this can lead to a + # mismatch between duration and power/energy data, e.g. + # where duration = [A, B, C], power = [a, b], B belonging + # to an invalid measurement and thus power[b] corresponding + # to duration[C]. At the moment, this is harmless, but in the + # future it might not be. + if "offline_aggregates" in ptalog["traces"][0][0]["trace"][0]: + for trace_group in ptalog["traces"]: + for trace in trace_group: + for state_or_transition in trace["trace"]: + offline_aggregates = state_or_transition.pop( + "offline_aggregates", None + ) + if offline_aggregates: + state_or_transition[ + "online_aggregates" + ] = offline_aggregates + + for j, traces in enumerate(ptalog["traces"]): + self.filenames.append("{}#{}".format(filename, j)) + self.traces_by_fileno.append(traces) + self.setup_by_fileno.append( + { + "voltage": ptalog["configs"][j]["voltage"], + "state_duration": ptalog["opt"]["sleep"], + } + ) + for repeat_id, etlog_files in enumerate(ptalog["files"][j]): + # legacy measurements supported only one file per run + if type(etlog_files) is not list: + etlog_files = [etlog_files] + members = list(map(tf.getmember, etlog_files)) + offline_data.append( + { + "content": list( + map(lambda f: tf.extractfile(f).read(), members) + ), + # used to determine EnergyTrace class for analysis + "sync_mode": sync_mode, + "fileno": len(self.traces_by_fileno) - 1, + # For debug output and warnings + "info": members[0], + "setup": self.setup_by_fileno[-1], + # needed to add runtime "return_value.apply_from" parameters to offline_aggregates, also for EnergyTraceWithBarcode + "repeat_id": repeat_id, + # only for validation + "expected_trace": traces, + "with_traces": self.with_traces, + # only for EnergyTraceWithBarcode + "transition_names": list( + map( + lambda x: x["name"], + ptalog["pta"]["transitions"], + ) + ), + } + ) + # TODO remove 'offline_aggregates' from pre-parse data and place + # it under 'online_aggregates' or similar instead. This way, if + # a .etlog file fails to parse, its corresponding duration data + # will not linger in 'offline_aggregates' and confuse the hell + # out of other code paths + + if self.version == 0 and len(self.input_filenames) > 1: + for entry in offline_data: + assert_legacy_compatibility( + self.input_filenames[0], + offline_data[0], + self.input_filenames[entry["fileno"]], + entry, + ) + + with Pool() as pool: + if self.version <= 1: + measurements = pool.map(_preprocess_mimosa, offline_data) + elif self.version == 2: + measurements = pool.map(_preprocess_etlog, offline_data) + + num_valid = 0 + for measurement in measurements: + + if "energy_trace" not in measurement: + logger.warning( + "Skipping {ar:s}/{m:s}: {e:s}".format( + ar=self.filenames[measurement["fileno"]], + m=measurement["info"].name, + e="; ".join(measurement["errors"]), + ) + ) + continue + + if version == 0 or version == 1: + if measurement["valid"]: + MIMOSA.add_offline_aggregates( + self.traces_by_fileno[measurement["fileno"]], + measurement["energy_trace"], + measurement["repeat_id"], + ) + num_valid += 1 + else: + logger.warning( + "Skipping {ar:s}/{m:s}: {e:s}".format( + ar=self.filenames[measurement["fileno"]], + m=measurement["info"].name, + e="; ".join(measurement["errors"]), + ) + ) + elif version == 2: + if measurement["valid"]: + try: + EnergyTrace.add_offline_aggregates( + self.traces_by_fileno[measurement["fileno"]], + measurement["energy_trace"], + measurement["repeat_id"], + ) + num_valid += 1 + except Exception as e: + logger.warning( + f"Skipping #{measurement['fileno']} {measurement['info']}:\n{e}" + ) + else: + logger.warning( + "Skipping {ar:s}/{m:s}: {e:s}".format( + ar=self.filenames[measurement["fileno"]], + m=measurement["info"].name, + e="; ".join(measurement["errors"]), + ) + ) + logger.info( + "{num_valid:d}/{num_total:d} measurements are valid".format( + num_valid=num_valid, num_total=len(measurements) + ) + ) + if version == 0: + self.traces = self._concatenate_traces(self.traces_by_fileno) + elif version == 1: + self.traces = self._concatenate_traces(self.traces_by_fileno) + elif version == 2: + self.traces = self._concatenate_traces(self.traces_by_fileno) + self.preprocessing_stats = { + "num_runs": len(measurements), + "num_valid": num_valid, + } + + +def _add_trace_data_to_aggregate(aggregate, key, element): + # Only cares about element['isa'], element['offline_aggregates'], and + # element['plan']['level'] + if key not in aggregate: + aggregate[key] = {"isa": element["isa"]} + for datakey in element["offline_aggregates"].keys(): + aggregate[key][datakey] = [] + if element["isa"] == "state": + aggregate[key]["attributes"] = ["power"] + else: + # TODO do not hardcode values + aggregate[key]["attributes"] = [ + "duration", + "power", + "rel_power_prev", + "rel_power_next", + "energy", + "rel_energy_prev", + "rel_energy_next", + ] + if "plan" in element and element["plan"]["level"] == "epilogue": + aggregate[key]["attributes"].insert(0, "timeout") + attributes = aggregate[key]["attributes"].copy() + for attribute in attributes: + if attribute not in element["offline_aggregates"]: + aggregate[key]["attributes"].remove(attribute) + if "offline_support" in element: + aggregate[key]["supports"] = element["offline_support"] + else: + aggregate[key]["supports"] = list() + for datakey, dataval in element["offline_aggregates"].items(): + aggregate[key][datakey].extend(dataval) + + +def pta_trace_to_aggregate(traces, ignore_trace_indexes=[]): + """ + Convert preprocessed DFA traces from peripherals/drivers to by_name aggregate for PTAModel. + + arguments: + traces -- [ ... Liste von einzelnen Läufen (d.h. eine Zustands- und Transitionsfolge UNINITIALIZED -> foo -> FOO -> bar -> BAR -> ...) + Jeder Lauf: + - id: int Nummer des Laufs, beginnend bei 1 + - trace: [ ... Liste von Zuständen und Transitionen + Jeweils: + - name: str Name + - isa: str state // transition + - parameter: { ... globaler Parameter: aktueller wert. null falls noch nicht eingestellt } + - args: [ Funktionsargumente, falls isa == 'transition' ] + - offline_aggregates: + - power: [float(uW)] Mittlere Leistung während Zustand/Transitions + - power_std: [float(uW^2)] Standardabweichung der Leistung + - duration: [int(us)] Dauer + - energy: [float(pJ)] Energieaufnahme des Zustands / der Transition + - clip_rate: [float(0..1)] Clipping + - paramkeys: [[str]] Name der berücksichtigten Parameter + - param: [int // str] Parameterwerte. Quasi-Duplikat von 'parameter' oben + Falls isa == 'transition': + - timeout: [int(us)] Dauer des vorherigen Zustands + - rel_energy_prev: [int(pJ)] + - rel_energy_next: [int(pJ)] + - rel_power_prev: [int(µW)] + - rel_power_next: [int(µW)] + ] + ] + ignore_trace_indexes -- list of trace indexes. The corresponding taces will be ignored. + + returns a tuple of three elements: + by_name -- measurements aggregated by state/transition name, annotated with parameter values + parameter_names -- list of parameter names + arg_count -- dict mapping transition names to the number of arguments of their corresponding driver function + + by_name layout: + Dictionary with one key per state/transition ('send', 'TX', ...). + Each element is in turn a dict with the following elements: + - isa: 'state' or 'transition' + - power: list of mean power measurements in µW + - duration: list of durations in µs + - power_std: list of stddev of power per state/transition + - energy: consumed energy (power*duration) in pJ + - paramkeys: list of parameter names in each measurement (-> list of lists) + - param: list of parameter values in each measurement (-> list of lists) + - attributes: list of keys that should be analyzed, + e.g. ['power', 'duration'] + additionally, only if isa == 'transition': + - timeout: list of duration of previous state in µs + - rel_energy_prev: transition energy relative to previous state mean power in pJ + - rel_energy_next: transition energy relative to next state mean power in pJ + """ + arg_count = dict() + by_name = dict() + parameter_names = sorted(traces[0]["trace"][0]["parameter"].keys()) + for run in traces: + if run["id"] not in ignore_trace_indexes: + for elem in run["trace"]: + if ( + elem["isa"] == "transition" + and not elem["name"] in arg_count + and "args" in elem + ): + arg_count[elem["name"]] = len(elem["args"]) + if elem["name"] != "UNINITIALIZED": + _add_trace_data_to_aggregate(by_name, elem["name"], elem) + for elem in by_name.values(): + for key in elem["attributes"]: + elem[key] = np.array(elem[key]) + return by_name, parameter_names, arg_count |