diff options
Diffstat (limited to 'lib/dfatool.py')
-rw-r--r-- | lib/dfatool.py | 321 |
1 files changed, 304 insertions, 17 deletions
diff --git a/lib/dfatool.py b/lib/dfatool.py index 2f5fa29..81b9525 100644 --- a/lib/dfatool.py +++ b/lib/dfatool.py @@ -12,6 +12,7 @@ import struct import sys import tarfile import hashlib +import zbar from multiprocessing import Pool from automata import PTA from functions import analytic @@ -327,7 +328,7 @@ class CrossValidator: return validation_data.assess(training_model) -def _preprocess_measurement(measurement): +def _preprocess_mimosa(measurement): setup = measurement['setup'] mim = MIMOSA(float(setup['mimosa_voltage']), int(setup['mimosa_shunt'])) try: @@ -371,6 +372,28 @@ def _preprocess_measurement(measurement): return processed_data +def _preprocess_etlog(measurement): + setup = measurement['setup'] + etlog = EnergyTraceLog(float(setup['voltage']), int(setup['state_duration'])) + try: + timestamps, durations, mean_power = etlog.load_data(measurement['content']) + states_and_transitions = etlog.analyze_states(timestamps, durations, mean_power, measurement['expected_trace']) + except EOFError as e: + etlog.is_error = True + etlog.errors.append('EnergyTrace logfile error: {}'.format(e)) + trigidx = list() + + processed_data = { + 'fileno' : measurement['fileno'], + 'info' : measurement['info'], + 'expcted_trace' : measurement['expected_trace'], + 'energy_trace' : etlog.analyze_states(currents, trigidx), + 'has_mimosa_error' : etlog.is_error, + 'mimosa_errors' : etlog.errors, + } + + return processed_data + class TimingData: """ Loader for timing model traces measured with on-board timers using `harness.OnboardTimerHarness`. @@ -496,8 +519,24 @@ class RawData: Each state/transition must have the members '`parameter` (dict with normalized parameter values), `.isa` ("state" or "transition") and `.name` Each transition must additionally contain `.args` `.opt.files`: list of coresponding MIMOSA measurements. - `.opt.files[]` = ['abc123.mim'] + `.opt.files[]` = ['abc123.mim', ...] + `.opt.configs`: .... + * MIMOSA log files (`*.mim`) as specified in `.opt.files` + + Version 2: + + * `ptalog.json`: measurement setup and traces. Contents: + `.opt.sleep`: state duration + `.opt.pta`: PTA + `.opt.traces`: list of sub-benchmark traces (the benchmark may have been split due to code size limitations). Each item is a list of traces as returned by `harness.traces`: + `.opt.traces[]`: List of traces. Each trace has an 'id' (numeric, starting with 1) and 'trace' (list of states and transitions) element. + Each state/transition must have the members '`parameter` (dict with normalized parameter values), `.isa` ("state" or "transition") and `.name` + Each transition must additionally contain `.args` and `.duration` + * `.duration`: list of durations, one per repetition + `.opt.files`: list of coresponding EnergyTrace measurements. + `.opt.files[]` = ['abc123.etlog', ...] `.opt.configs`: .... + * EnergyTrace log files (`*.etlog`) as specified in `.opt.files` tbd """ @@ -513,6 +552,10 @@ class RawData: for member in tf.getmembers(): if member.name == 'ptalog.json': self.version = 1 + # might also be version 2 + # depends on whether *.etlog exists or not + elif '.etlog' in member.name: + self.version = 2 break self.set_cache_file() @@ -815,16 +858,18 @@ class RawData: if self.preprocessed: return self.traces if self.version == 0: - self._preprocess_01(0) + self._preprocess_012(0) elif self.version == 1: - self._preprocess_01(1) + self._preprocess_012(1) + elif self.version == 2: + self._preprocess_012(2) self.preprocessed = True self.save_cache() return self.traces - def _preprocess_01(self, version): + def _preprocess_012(self, version): """Load raw MIMOSA data and turn it into measurements which are ready to be analyzed.""" - mim_files = [] + offline_data = [] for i, filename in enumerate(self.filenames): if version == 0: @@ -835,7 +880,7 @@ class RawData: for member in tf.getmembers(): _, extension = os.path.splitext(member.name) if extension == '.mim': - mim_files.append({ + offline_data.append({ 'content' : tf.extractfile(member).read(), 'fileno' : i, 'info' : member, @@ -848,13 +893,28 @@ class RawData: with tarfile.open(filename) as tf: ptalog = json.load(tf.extractfile(tf.getmember('ptalog.json'))) - # ptalog['traces'] is a list of lists. - # The first level corresponds to the individual .mim files: - # ptalog['traces'][0] contains all traces belonging to the - # first .mim file in the archive. - # The second level holds the individual runs in this - # sub-benchmark, so ptalog['traces'][0][0] is the first - # run, ptalog['traces'][0][1] the second, and so on + # Benchmark code may be too large to be executed in a single + # run, so benchmarks (a benchmark is basically a list of DFA runs) + # may be split up. To accomodate this, ptalog['traces'] is + # a list of lists: ptalog['traces'][0] corresponds to the + # first benchmark part, ptalog['traces'][1] to the + # second, and so on. ptalog['traces'][0][0] is the first + # trace (a sequence of states and transitions) in the + # first benchmark part, ptalog['traces'][0][1] the second, etc. + # + # As traces are typically repeated to minimize the effect + # of random noise, observations for each benchmark part + # are also lists. In this case, this applies in two + # cases: traces[i][j]['parameter'][some_param] is either + # a value (if the parameter is controlld by software) + # or a list (if the parameter is known a posteriori, e.g. + # "how many retransmissions did this packet take?"). + # + # The second case is the MIMOSA energy measurements, which + # are listed in ptalog['files']. ptalog['files'][0] + # contains a list of files for the first benchmark part, + # ptalog['files'][0][0] is its first iteration/repetition, + # ptalog['files'][0][1] the second, etc. for j, traces in enumerate(ptalog['traces']): new_filenames.append('{}#{}'.format(filename, j)) @@ -866,7 +926,55 @@ class RawData: }) for repeat_id, mim_file in enumerate(ptalog['files'][j]): member = tf.getmember(mim_file) - mim_files.append({ + offline_data.append({ + 'content' : tf.extractfile(member).read(), + 'fileno' : j, + 'info' : member, + 'setup' : self.setup_by_fileno[j], + 'repeat_id' : repeat_id, + 'expected_trace' : ptalog['traces'][j], + }) + self.filenames = new_filenames + + elif version == 2: + + new_filenames = list() + with tarfile.open(filename) as tf: + ptalog = json.load(tf.extractfile(tf.getmember('ptalog.json'))) + + # Benchmark code may be too large to be executed in a single + # run, so benchmarks (a benchmark is basically a list of DFA runs) + # may be split up. To accomodate this, ptalog['traces'] is + # a list of lists: ptalog['traces'][0] corresponds to the + # first benchmark part, ptalog['traces'][1] to the + # second, and so on. ptalog['traces'][0][0] is the first + # trace (a sequence of states and transitions) in the + # first benchmark part, ptalog['traces'][0][1] the second, etc. + # + # As traces are typically repeated to minimize the effect + # of random noise, observations for each benchmark part + # are also lists. In this case, this applies in two + # cases: traces[i][j]['parameter'][some_param] is either + # a value (if the parameter is controlld by software) + # or a list (if the parameter is known a posteriori, e.g. + # "how many retransmissions did this packet take?"). + # + # The second case is the MIMOSA energy measurements, which + # are listed in ptalog['files']. ptalog['files'][0] + # contains a list of files for the first benchmark part, + # ptalog['files'][0][0] is its first iteration/repetition, + # ptalog['files'][0][1] the second, etc. + + for j, traces in enumerate(ptalog['traces']): + new_filenames.append('{}#{}'.format(filename, j)) + self.traces_by_fileno.append(traces) + self.setup_by_fileno.append({ + 'voltage' : ptalog['configs'][j]['voltage'], + 'state_duration' : ptalog['opt']['sleep'], + }) + for repeat_id, etlog_file in enumerate(ptalog['files'][j]): + member = tf.getmember(etlog_file) + offline_data.append({ 'content' : tf.extractfile(member).read(), 'fileno' : j, 'info' : member, @@ -877,7 +985,10 @@ class RawData: self.filenames = new_filenames with Pool() as pool: - measurements = pool.map(_preprocess_measurement, mim_files) + if self.version <= 1: + measurements = pool.map(_preprocess_mimosa, offline_data) + elif self.version == 2: + measurements = pool.map(_preprocess_etlog, offline_data) num_valid = 0 valid_traces = list() @@ -894,7 +1005,7 @@ class RawData: # Strip the last state (it is not part of the scheduled measurement) measurement['energy_trace'].pop() repeat = 0 - elif version == 1: + elif version == 1 or version == 2: # The first online measurement is the UNINITIALIZED state. In v1, # it is not part of the expected PTA trace -> remove it. measurement['energy_trace'].pop(0) @@ -1897,6 +2008,182 @@ class PTAModel: 'state_energy_by_trace' : regression_measures(np.array(model_state_energy_list), np.array(real_energy_list)), } +class EnergyTraceLog: + """ + EnergyTrace log loader for DFA traces. + + Expects an EnergyTrace log file generated via msp430-etv / energytrace-util + and a dfatool-generated benchmark. An EnergyTrace log consits of a series + of measurements. Each measurement has a timestamp, mean current, voltage, + and cumulative energy since start of measurement. + """ + + def __init__(self, voltage: float, state_duration: int): + self.voltage = voltage + self.state_duration = state_duration + self.is_error = False + self.errors = list() + + def load_data(self, log_data): + lines = log_data.decode('ascii').split('\n') + data_count = sum(map(lambda x: len(x) > 0 and x[0] != '#', lines)) + data_lines = filter(lambda x: len(x) > 0 and x[0] != '#', lines) + + data = np.empty((data_count, 4)) + + for i, line in enumerate(data_lines): + fields = line.split(' ') + if len(fields) == 4: + timestamp, current, voltage, total_energy = map(int, fields) + elif len(fields) == 5: + cpustate = fields[0] + timestamp, current, voltage, total_energy = map(int, fields[1:]) + else: + raise RuntimeError('cannot parse line "{}"'.format(line)) + data[i] = [timestamp, current, voltage, total_energy] + + + interval_start_timestamp = data[:-1, 0] * 1e-6 + interval_duration = (data[1:, 0] - data[:-1, 0]) * 1e-6 + interval_power = ((data[1:, 3] - data[:-1, 3]) * 1e-9) / ((data[1:, 0] - data[:-1, 0]) * 1e-6) + + m_duration_us = data[-1, 0] - data[0, 0] + m_energy_nj = data[-1, 3] - data[0, 3] + + self.sample_rate = data_count / (m_duration_us * 1e-6) + + print('got {} samples with {} seconds of log data ({} Hz)'.format(data_count, m_duration_us * 1e-6, self.sample_rate)) + + return interval_start_timestamp, interval_duration, interval_power + + def analyze_states(self, interval_start_timestamp, interval_duration, interval_power, traces): + u""" + Split log data into states and transitions and return duration, energy, and mean power for each element. + + :param charges: raw charges (each element describes the charge in pJ transferred during 10 µs) + :param trigidx: "charges" indexes corresponding to a trigger edge, see `trigger_edges` + :param ua_func: charge(pJ) -> current(µA) function as returned by `calibration_function` + + :returns: maybe returns list of states and transitions, both starting andending with a state. + Each element is a dict containing: + * `isa`: 'state' or 'transition' + * `clip_rate`: range(0..1) Anteil an Clipping im Energieverbrauch + * `raw_mean`: Mittelwert der Rohwerte + * `raw_std`: Standardabweichung der Rohwerte + * `uW_mean`: Mittelwert der (kalibrierten) Leistungsaufnahme + * `uW_std`: Standardabweichung der (kalibrierten) Leistungsaufnahme + * `us`: Dauer + if isa == 'transition, it also contains: + * `timeout`: Dauer des vorherigen Zustands + * `uW_mean_delta_prev`: Differenz zwischen uW_mean und uW_mean des vorherigen Zustands + * `uW_mean_delta_next`: Differenz zwischen uW_mean und uW_mean des Folgezustands + """ + + first_sync = self.find_first_sync(interval_start_timestamp, interval_power) + + bc, start, stop = self.find_barcode(interval_start_timestamp, interval_power, interval_start_timestamp[first_sync]) + print('barcode "{}" area: {} .... {} seconds'.format(bc, interval_start_timestamp[start], interval_start_timestamp[stop])) + + # TODO combine transition duration + sleep duration to estimate + # start of next barcode (instead of hardcoded 0.4) + bc, start, stop = self.find_barcode(interval_start_timestamp, interval_power, interval_start_timestamp[stop] + 0.4) + print('barcode "{}" area: {:0.2f} .... {:0.2f} seconds'.format(bc, interval_start_timestamp[start], interval_start_timestamp[stop])) + + def find_first_sync(self, interval_ts, interval_power): + # LED Power is approx. 10 mW, use 5 mW above surrounding median as threshold + sync_threshold_power = np.median(interval_power[: int(3 * self.sample_rate)]) + 5e-3 + for i, ts in enumerate(interval_ts): + if ts > 2 and interval_power[i] > sync_threshold_power: + return i - 300 + return None + + def find_barcode(self, interval_ts, interval_power, start_ts): + """ + Return absolute position and content of the next barcode following `start_ts`. + + :param interval_ts: list of start timestamps (one per measurement interval) [s] + :param interval_power: mean power per measurement interval [W] + :param start_ts: timestamp at which to start looking for a barcode [s] + """ + + for i, ts in enumerate(interval_ts): + if ts >= start_ts: + start_position = i + break + + # Lookaround: 100 ms in both directions + lookaround = int(0.1 * self.sample_rate) + + + # LED Power is approx. 30 mW, use 15 mW above surrounding median as threshold + sync_threshold_power = np.median(interval_power[start_position - lookaround : start_position + lookaround]) + 15e-3 + + print('looking for barcode starting at {:0.2f} s, threshold is {:0.1f} mW'.format(start_ts, sync_threshold_power * 1e3)) + + sync_area_start = None + sync_start_ts = None + sync_area_end = None + sync_end_ts = None + for i, ts in enumerate(interval_ts): + if sync_area_start is None and ts >= start_ts and interval_power[i] > sync_threshold_power: + sync_area_start = i - 300 + sync_start_ts = ts + # minimum barcode duration is 600ms + if sync_area_start is not None and sync_area_end is None and ts > sync_start_ts + 0.6 and (ts > sync_start_ts + 1 or abs(sync_threshold_power - interval_power[i]) > 30e-3): + sync_area_end = i + sync_end_ts = ts + break + + barcode_data = interval_power[sync_area_start : sync_area_end] + + print('barcode search area: {:0.2f} ... {:0.2f} seconds ({} samples)'.format(sync_start_ts, sync_end_ts, len(barcode_data))) + + bc, start, stop = self.find_barcode_in_power_data(barcode_data) + + if bc is None: + return bc, start, stop + + return bc, sync_area_start + start, sync_area_start + stop + + def find_barcode_in_power_data(self, barcode_data): + + min_power = np.min(barcode_data) + max_power = np.max(barcode_data) + + # zbar seems to be confused by measurement (and thus image) noise + # inside of barcodes. As our barcodes are only 1px high, this is + # likely not trivial to fix. + # -> Create a black and white (not grayscale) image to avoid this. + # Unfortunately, this decreases resilience against background noise + # (e.g. a not-exactly-idle peripheral device or CPU interrupts). + image_data = np.around(1 - ((barcode_data - min_power) / (max_power - min_power))) + image_data *= 255 + + # zbar only returns the complete barcode position if it is at least + # two pixels high. For a 1px barcode, it only returns its right border. + + width = len(image_data) + height = 2 + + image_data = bytes(map(int, image_data)) * height + + #img = Image.frombytes('L', (width, height), image_data).resize((width, 100)) + #img.save('/tmp/test-{}.png'.format(sync_area_start)) + + zbimg = zbar.Image(width, height, 'Y800', image_data) + scanner = zbar.ImageScanner() + scanner.parse_config('enable') + + if scanner.scan(zbimg): + sym, = zbimg.symbols + sym_start = sym.location[1][0] + sym_end = sym.location[0][0] + return sym.data, sym_start, sym_end + else: + print('unable to find barcode') + return None, None, None + + class MIMOSA: """ |