summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/dfatool.py321
1 files changed, 304 insertions, 17 deletions
diff --git a/lib/dfatool.py b/lib/dfatool.py
index 2f5fa29..81b9525 100644
--- a/lib/dfatool.py
+++ b/lib/dfatool.py
@@ -12,6 +12,7 @@ import struct
import sys
import tarfile
import hashlib
+import zbar
from multiprocessing import Pool
from automata import PTA
from functions import analytic
@@ -327,7 +328,7 @@ class CrossValidator:
return validation_data.assess(training_model)
-def _preprocess_measurement(measurement):
+def _preprocess_mimosa(measurement):
setup = measurement['setup']
mim = MIMOSA(float(setup['mimosa_voltage']), int(setup['mimosa_shunt']))
try:
@@ -371,6 +372,28 @@ def _preprocess_measurement(measurement):
return processed_data
+def _preprocess_etlog(measurement):
+ setup = measurement['setup']
+ etlog = EnergyTraceLog(float(setup['voltage']), int(setup['state_duration']))
+ try:
+ timestamps, durations, mean_power = etlog.load_data(measurement['content'])
+ states_and_transitions = etlog.analyze_states(timestamps, durations, mean_power, measurement['expected_trace'])
+ except EOFError as e:
+ etlog.is_error = True
+ etlog.errors.append('EnergyTrace logfile error: {}'.format(e))
+ trigidx = list()
+
+ processed_data = {
+ 'fileno' : measurement['fileno'],
+ 'info' : measurement['info'],
+ 'expcted_trace' : measurement['expected_trace'],
+ 'energy_trace' : etlog.analyze_states(currents, trigidx),
+ 'has_mimosa_error' : etlog.is_error,
+ 'mimosa_errors' : etlog.errors,
+ }
+
+ return processed_data
+
class TimingData:
"""
Loader for timing model traces measured with on-board timers using `harness.OnboardTimerHarness`.
@@ -496,8 +519,24 @@ class RawData:
Each state/transition must have the members '`parameter` (dict with normalized parameter values), `.isa` ("state" or "transition") and `.name`
Each transition must additionally contain `.args`
`.opt.files`: list of coresponding MIMOSA measurements.
- `.opt.files[]` = ['abc123.mim']
+ `.opt.files[]` = ['abc123.mim', ...]
+ `.opt.configs`: ....
+ * MIMOSA log files (`*.mim`) as specified in `.opt.files`
+
+ Version 2:
+
+ * `ptalog.json`: measurement setup and traces. Contents:
+ `.opt.sleep`: state duration
+ `.opt.pta`: PTA
+ `.opt.traces`: list of sub-benchmark traces (the benchmark may have been split due to code size limitations). Each item is a list of traces as returned by `harness.traces`:
+ `.opt.traces[]`: List of traces. Each trace has an 'id' (numeric, starting with 1) and 'trace' (list of states and transitions) element.
+ Each state/transition must have the members '`parameter` (dict with normalized parameter values), `.isa` ("state" or "transition") and `.name`
+ Each transition must additionally contain `.args` and `.duration`
+ * `.duration`: list of durations, one per repetition
+ `.opt.files`: list of coresponding EnergyTrace measurements.
+ `.opt.files[]` = ['abc123.etlog', ...]
`.opt.configs`: ....
+ * EnergyTrace log files (`*.etlog`) as specified in `.opt.files`
tbd
"""
@@ -513,6 +552,10 @@ class RawData:
for member in tf.getmembers():
if member.name == 'ptalog.json':
self.version = 1
+ # might also be version 2
+ # depends on whether *.etlog exists or not
+ elif '.etlog' in member.name:
+ self.version = 2
break
self.set_cache_file()
@@ -815,16 +858,18 @@ class RawData:
if self.preprocessed:
return self.traces
if self.version == 0:
- self._preprocess_01(0)
+ self._preprocess_012(0)
elif self.version == 1:
- self._preprocess_01(1)
+ self._preprocess_012(1)
+ elif self.version == 2:
+ self._preprocess_012(2)
self.preprocessed = True
self.save_cache()
return self.traces
- def _preprocess_01(self, version):
+ def _preprocess_012(self, version):
"""Load raw MIMOSA data and turn it into measurements which are ready to be analyzed."""
- mim_files = []
+ offline_data = []
for i, filename in enumerate(self.filenames):
if version == 0:
@@ -835,7 +880,7 @@ class RawData:
for member in tf.getmembers():
_, extension = os.path.splitext(member.name)
if extension == '.mim':
- mim_files.append({
+ offline_data.append({
'content' : tf.extractfile(member).read(),
'fileno' : i,
'info' : member,
@@ -848,13 +893,28 @@ class RawData:
with tarfile.open(filename) as tf:
ptalog = json.load(tf.extractfile(tf.getmember('ptalog.json')))
- # ptalog['traces'] is a list of lists.
- # The first level corresponds to the individual .mim files:
- # ptalog['traces'][0] contains all traces belonging to the
- # first .mim file in the archive.
- # The second level holds the individual runs in this
- # sub-benchmark, so ptalog['traces'][0][0] is the first
- # run, ptalog['traces'][0][1] the second, and so on
+ # Benchmark code may be too large to be executed in a single
+ # run, so benchmarks (a benchmark is basically a list of DFA runs)
+ # may be split up. To accomodate this, ptalog['traces'] is
+ # a list of lists: ptalog['traces'][0] corresponds to the
+ # first benchmark part, ptalog['traces'][1] to the
+ # second, and so on. ptalog['traces'][0][0] is the first
+ # trace (a sequence of states and transitions) in the
+ # first benchmark part, ptalog['traces'][0][1] the second, etc.
+ #
+ # As traces are typically repeated to minimize the effect
+ # of random noise, observations for each benchmark part
+ # are also lists. In this case, this applies in two
+ # cases: traces[i][j]['parameter'][some_param] is either
+ # a value (if the parameter is controlld by software)
+ # or a list (if the parameter is known a posteriori, e.g.
+ # "how many retransmissions did this packet take?").
+ #
+ # The second case is the MIMOSA energy measurements, which
+ # are listed in ptalog['files']. ptalog['files'][0]
+ # contains a list of files for the first benchmark part,
+ # ptalog['files'][0][0] is its first iteration/repetition,
+ # ptalog['files'][0][1] the second, etc.
for j, traces in enumerate(ptalog['traces']):
new_filenames.append('{}#{}'.format(filename, j))
@@ -866,7 +926,55 @@ class RawData:
})
for repeat_id, mim_file in enumerate(ptalog['files'][j]):
member = tf.getmember(mim_file)
- mim_files.append({
+ offline_data.append({
+ 'content' : tf.extractfile(member).read(),
+ 'fileno' : j,
+ 'info' : member,
+ 'setup' : self.setup_by_fileno[j],
+ 'repeat_id' : repeat_id,
+ 'expected_trace' : ptalog['traces'][j],
+ })
+ self.filenames = new_filenames
+
+ elif version == 2:
+
+ new_filenames = list()
+ with tarfile.open(filename) as tf:
+ ptalog = json.load(tf.extractfile(tf.getmember('ptalog.json')))
+
+ # Benchmark code may be too large to be executed in a single
+ # run, so benchmarks (a benchmark is basically a list of DFA runs)
+ # may be split up. To accomodate this, ptalog['traces'] is
+ # a list of lists: ptalog['traces'][0] corresponds to the
+ # first benchmark part, ptalog['traces'][1] to the
+ # second, and so on. ptalog['traces'][0][0] is the first
+ # trace (a sequence of states and transitions) in the
+ # first benchmark part, ptalog['traces'][0][1] the second, etc.
+ #
+ # As traces are typically repeated to minimize the effect
+ # of random noise, observations for each benchmark part
+ # are also lists. In this case, this applies in two
+ # cases: traces[i][j]['parameter'][some_param] is either
+ # a value (if the parameter is controlld by software)
+ # or a list (if the parameter is known a posteriori, e.g.
+ # "how many retransmissions did this packet take?").
+ #
+ # The second case is the MIMOSA energy measurements, which
+ # are listed in ptalog['files']. ptalog['files'][0]
+ # contains a list of files for the first benchmark part,
+ # ptalog['files'][0][0] is its first iteration/repetition,
+ # ptalog['files'][0][1] the second, etc.
+
+ for j, traces in enumerate(ptalog['traces']):
+ new_filenames.append('{}#{}'.format(filename, j))
+ self.traces_by_fileno.append(traces)
+ self.setup_by_fileno.append({
+ 'voltage' : ptalog['configs'][j]['voltage'],
+ 'state_duration' : ptalog['opt']['sleep'],
+ })
+ for repeat_id, etlog_file in enumerate(ptalog['files'][j]):
+ member = tf.getmember(etlog_file)
+ offline_data.append({
'content' : tf.extractfile(member).read(),
'fileno' : j,
'info' : member,
@@ -877,7 +985,10 @@ class RawData:
self.filenames = new_filenames
with Pool() as pool:
- measurements = pool.map(_preprocess_measurement, mim_files)
+ if self.version <= 1:
+ measurements = pool.map(_preprocess_mimosa, offline_data)
+ elif self.version == 2:
+ measurements = pool.map(_preprocess_etlog, offline_data)
num_valid = 0
valid_traces = list()
@@ -894,7 +1005,7 @@ class RawData:
# Strip the last state (it is not part of the scheduled measurement)
measurement['energy_trace'].pop()
repeat = 0
- elif version == 1:
+ elif version == 1 or version == 2:
# The first online measurement is the UNINITIALIZED state. In v1,
# it is not part of the expected PTA trace -> remove it.
measurement['energy_trace'].pop(0)
@@ -1897,6 +2008,182 @@ class PTAModel:
'state_energy_by_trace' : regression_measures(np.array(model_state_energy_list), np.array(real_energy_list)),
}
+class EnergyTraceLog:
+ """
+ EnergyTrace log loader for DFA traces.
+
+ Expects an EnergyTrace log file generated via msp430-etv / energytrace-util
+ and a dfatool-generated benchmark. An EnergyTrace log consits of a series
+ of measurements. Each measurement has a timestamp, mean current, voltage,
+ and cumulative energy since start of measurement.
+ """
+
+ def __init__(self, voltage: float, state_duration: int):
+ self.voltage = voltage
+ self.state_duration = state_duration
+ self.is_error = False
+ self.errors = list()
+
+ def load_data(self, log_data):
+ lines = log_data.decode('ascii').split('\n')
+ data_count = sum(map(lambda x: len(x) > 0 and x[0] != '#', lines))
+ data_lines = filter(lambda x: len(x) > 0 and x[0] != '#', lines)
+
+ data = np.empty((data_count, 4))
+
+ for i, line in enumerate(data_lines):
+ fields = line.split(' ')
+ if len(fields) == 4:
+ timestamp, current, voltage, total_energy = map(int, fields)
+ elif len(fields) == 5:
+ cpustate = fields[0]
+ timestamp, current, voltage, total_energy = map(int, fields[1:])
+ else:
+ raise RuntimeError('cannot parse line "{}"'.format(line))
+ data[i] = [timestamp, current, voltage, total_energy]
+
+
+ interval_start_timestamp = data[:-1, 0] * 1e-6
+ interval_duration = (data[1:, 0] - data[:-1, 0]) * 1e-6
+ interval_power = ((data[1:, 3] - data[:-1, 3]) * 1e-9) / ((data[1:, 0] - data[:-1, 0]) * 1e-6)
+
+ m_duration_us = data[-1, 0] - data[0, 0]
+ m_energy_nj = data[-1, 3] - data[0, 3]
+
+ self.sample_rate = data_count / (m_duration_us * 1e-6)
+
+ print('got {} samples with {} seconds of log data ({} Hz)'.format(data_count, m_duration_us * 1e-6, self.sample_rate))
+
+ return interval_start_timestamp, interval_duration, interval_power
+
+ def analyze_states(self, interval_start_timestamp, interval_duration, interval_power, traces):
+ u"""
+ Split log data into states and transitions and return duration, energy, and mean power for each element.
+
+ :param charges: raw charges (each element describes the charge in pJ transferred during 10 µs)
+ :param trigidx: "charges" indexes corresponding to a trigger edge, see `trigger_edges`
+ :param ua_func: charge(pJ) -> current(µA) function as returned by `calibration_function`
+
+ :returns: maybe returns list of states and transitions, both starting andending with a state.
+ Each element is a dict containing:
+ * `isa`: 'state' or 'transition'
+ * `clip_rate`: range(0..1) Anteil an Clipping im Energieverbrauch
+ * `raw_mean`: Mittelwert der Rohwerte
+ * `raw_std`: Standardabweichung der Rohwerte
+ * `uW_mean`: Mittelwert der (kalibrierten) Leistungsaufnahme
+ * `uW_std`: Standardabweichung der (kalibrierten) Leistungsaufnahme
+ * `us`: Dauer
+ if isa == 'transition, it also contains:
+ * `timeout`: Dauer des vorherigen Zustands
+ * `uW_mean_delta_prev`: Differenz zwischen uW_mean und uW_mean des vorherigen Zustands
+ * `uW_mean_delta_next`: Differenz zwischen uW_mean und uW_mean des Folgezustands
+ """
+
+ first_sync = self.find_first_sync(interval_start_timestamp, interval_power)
+
+ bc, start, stop = self.find_barcode(interval_start_timestamp, interval_power, interval_start_timestamp[first_sync])
+ print('barcode "{}" area: {} .... {} seconds'.format(bc, interval_start_timestamp[start], interval_start_timestamp[stop]))
+
+ # TODO combine transition duration + sleep duration to estimate
+ # start of next barcode (instead of hardcoded 0.4)
+ bc, start, stop = self.find_barcode(interval_start_timestamp, interval_power, interval_start_timestamp[stop] + 0.4)
+ print('barcode "{}" area: {:0.2f} .... {:0.2f} seconds'.format(bc, interval_start_timestamp[start], interval_start_timestamp[stop]))
+
+ def find_first_sync(self, interval_ts, interval_power):
+ # LED Power is approx. 10 mW, use 5 mW above surrounding median as threshold
+ sync_threshold_power = np.median(interval_power[: int(3 * self.sample_rate)]) + 5e-3
+ for i, ts in enumerate(interval_ts):
+ if ts > 2 and interval_power[i] > sync_threshold_power:
+ return i - 300
+ return None
+
+ def find_barcode(self, interval_ts, interval_power, start_ts):
+ """
+ Return absolute position and content of the next barcode following `start_ts`.
+
+ :param interval_ts: list of start timestamps (one per measurement interval) [s]
+ :param interval_power: mean power per measurement interval [W]
+ :param start_ts: timestamp at which to start looking for a barcode [s]
+ """
+
+ for i, ts in enumerate(interval_ts):
+ if ts >= start_ts:
+ start_position = i
+ break
+
+ # Lookaround: 100 ms in both directions
+ lookaround = int(0.1 * self.sample_rate)
+
+
+ # LED Power is approx. 30 mW, use 15 mW above surrounding median as threshold
+ sync_threshold_power = np.median(interval_power[start_position - lookaround : start_position + lookaround]) + 15e-3
+
+ print('looking for barcode starting at {:0.2f} s, threshold is {:0.1f} mW'.format(start_ts, sync_threshold_power * 1e3))
+
+ sync_area_start = None
+ sync_start_ts = None
+ sync_area_end = None
+ sync_end_ts = None
+ for i, ts in enumerate(interval_ts):
+ if sync_area_start is None and ts >= start_ts and interval_power[i] > sync_threshold_power:
+ sync_area_start = i - 300
+ sync_start_ts = ts
+ # minimum barcode duration is 600ms
+ if sync_area_start is not None and sync_area_end is None and ts > sync_start_ts + 0.6 and (ts > sync_start_ts + 1 or abs(sync_threshold_power - interval_power[i]) > 30e-3):
+ sync_area_end = i
+ sync_end_ts = ts
+ break
+
+ barcode_data = interval_power[sync_area_start : sync_area_end]
+
+ print('barcode search area: {:0.2f} ... {:0.2f} seconds ({} samples)'.format(sync_start_ts, sync_end_ts, len(barcode_data)))
+
+ bc, start, stop = self.find_barcode_in_power_data(barcode_data)
+
+ if bc is None:
+ return bc, start, stop
+
+ return bc, sync_area_start + start, sync_area_start + stop
+
+ def find_barcode_in_power_data(self, barcode_data):
+
+ min_power = np.min(barcode_data)
+ max_power = np.max(barcode_data)
+
+ # zbar seems to be confused by measurement (and thus image) noise
+ # inside of barcodes. As our barcodes are only 1px high, this is
+ # likely not trivial to fix.
+ # -> Create a black and white (not grayscale) image to avoid this.
+ # Unfortunately, this decreases resilience against background noise
+ # (e.g. a not-exactly-idle peripheral device or CPU interrupts).
+ image_data = np.around(1 - ((barcode_data - min_power) / (max_power - min_power)))
+ image_data *= 255
+
+ # zbar only returns the complete barcode position if it is at least
+ # two pixels high. For a 1px barcode, it only returns its right border.
+
+ width = len(image_data)
+ height = 2
+
+ image_data = bytes(map(int, image_data)) * height
+
+ #img = Image.frombytes('L', (width, height), image_data).resize((width, 100))
+ #img.save('/tmp/test-{}.png'.format(sync_area_start))
+
+ zbimg = zbar.Image(width, height, 'Y800', image_data)
+ scanner = zbar.ImageScanner()
+ scanner.parse_config('enable')
+
+ if scanner.scan(zbimg):
+ sym, = zbimg.symbols
+ sym_start = sym.location[1][0]
+ sym_end = sym.location[0][0]
+ return sym.data, sym_start, sym_end
+ else:
+ print('unable to find barcode')
+ return None, None, None
+
+
class MIMOSA:
"""