implement autogeneration and data aggregation of timing benchmarks

author: Daniel Friesel <daniel.friesel@uos.de> 2019-07-22 16:58:00 +0200
committer: Daniel Friesel <daniel.friesel@uos.de> 2019-07-22 16:58:00 +0200
commit: 825ee24544c18be6c98c5869d89d7f59290b38ac (patch)
tree: 2fbe83e619981b23ebdc0e73b1027038a7a9d643
parent: ac49f53902d5b5229d691165762a8f419e07e687 (diff)
5 files changed, 296 insertions, 103 deletions
diff --git a/.env b/.env
new file mode 100644
index 0000000..9cadaab
--- /dev/null
+++ b/.env
@@ -0,0 +1 @@
+PYTHONPATH=/home/derf/var/ess/aemr/dfatool/lib
diff --git a/.gitignore b/.gitignore
index 1de64cd..896fa10 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,3 @@
 /lib/__pycache__/
 *.pyc
+/htmlcov/
diff --git a/bin/generate-dfa-benchmark.py b/bin/generate-dfa-benchmark.py
index b9790ed..8e3c227 100755
--- a/bin/generate-dfa-benchmark.py
+++ b/bin/generate-dfa-benchmark.py
@@ -15,6 +15,10 @@ Options:
 --depth=<depth> (default: 3)
     Maximum number of function calls per run
 
+--repeat=<count> (default: 0)
+    Repeat benchmark runs <count> times. When 0, benchmark runs are repeated
+    indefinitely and must be explicitly terminated with Ctrl+C / SIGINT
+
 --instance=<name>
     Override the name of the class instance used for benchmarking
 
@@ -44,6 +48,132 @@ def trace_matches_filter(trace: list, trace_filter: list) -> bool:
             return True
     return False
 
+def benchmark_from_runs(pta: PTA, runs: list, harness: object, benchmark_id: int = 0) -> io.StringIO:
+    outbuf = io.StringIO()
+
+    outbuf.write('#include "arch.h"\n')
+    if 'includes' in pta.codegen:
+        for include in pta.codegen['includes']:
+            outbuf.write('#include "{}"\n'.format(include))
+    outbuf.write(harness.global_code())
+
+    outbuf.write('int main(void)\n')
+    outbuf.write('{\n')
+    for driver in ('arch', 'gpio', 'kout'):
+        outbuf.write('{}.setup();\n'.format(driver))
+    if 'setup' in pta.codegen:
+        for call in pta.codegen['setup']:
+            outbuf.write(call)
+
+    # There is a race condition between flashing the code and starting the UART log.
+    # When starting the log before flashing, output from a previous benchmark may cause bogus data to be added.
+    # When flashing first and then starting the log, the first log lines may be lost.
+    # To work around this, we flash first, then start the log, and use this delay statement to ensure that no output is lost.
+    # This is also useful to faciliate MIMOSA calibration after flashing.
+    outbuf.write('arch.delay_ms(10000);\n')
+
+    outbuf.write('while (1) {\n')
+    outbuf.write(harness.start_benchmark())
+
+    class_prefix = ''
+    if 'instance' in opt:
+        class_prefix = '{}.'.format(opt['instance'])
+    elif 'instance' in pta.codegen:
+        class_prefix = '{}.'.format(pta.codegen['instance'])
+
+    num_transitions = 0
+    num_traces = 0
+    for run in runs:
+        outbuf.write(harness.start_run())
+        harness.start_trace()
+        param = pta.get_initial_param_dict()
+        for transition, arguments, parameter in run:
+            num_transitions += 1
+            harness.append_transition(transition.name, param, arguments)
+            harness.append_state(transition.destination.name, parameter)
+            outbuf.write('// {} -> {}\n'.format(transition.origin.name, transition.destination.name))
+            if transition.is_interrupt:
+                outbuf.write('// wait for {} interrupt\n'.format(transition.name))
+                transition_code = '// TODO add startTransition / stopTransition calls to interrupt routine'
+            else:
+                transition_code = '{}{}({});'.format(class_prefix, transition.name, ', '.join(map(str, arguments)))
+            outbuf.write(harness.pass_transition(pta.get_transition_id(transition), transition_code, transition = transition))
+
+            param = parameter
+
+            outbuf.write('// current parameters: {}\n'.format(', '.join(map(lambda kv: '{}={}'.format(*kv), param.items()))))
+
+            if opt['sleep']:
+                outbuf.write('arch.delay_ms({:d}); // {}\n'.format(opt['sleep'], transition.destination.name))
+
+        outbuf.write(harness.stop_run(num_traces))
+        outbuf.write('\n')
+        num_traces += 1
+
+    outbuf.write(harness.stop_benchmark())
+    outbuf.write('}\n')
+    outbuf.write('return 0;\n')
+    outbuf.write('}\n')
+
+    return outbuf
+
+def run_benchmark(application_file: str, pta: PTA, runs: list, arch: str, app: str, run_args: list, harness: object, sleep: int = 0, repeat: int = 0, run_offset: int = 0, runs_total: int = 0):
+    outbuf = benchmark_from_runs(pta, runs, harness)
+    with open(application_file, 'w') as f:
+        f.write(outbuf.getvalue())
+        print('[MAKE] building benchmark with {:d} runs'.format(len(runs)))
+
+    # assume an average of 10ms per transition. Mind the 10s start delay.
+    run_timeout = 10 + num_transitions * (sleep+10) / 1000
+
+    if repeat:
+        run_timeout *= repeat
+
+    needs_split = False
+    try:
+        runner.build(arch, app, run_args)
+    except RuntimeError:
+        if len(runs) > 50:
+            # Application is too large -> split up runs
+            needs_split = True
+        else:
+            # Unknown error
+            raise
+
+    # This has been deliberately taken out of the except clause to avoid nested exception handlers
+    # (they lead to pretty interesting tracebacks which are probably more confusing than helpful)
+    if needs_split:
+        print('[MAKE] benchmark code is too large, splitting up')
+        mid = len(runs) // 2
+        results = run_benchmark(application_file, pta, runs[:mid], arch, app, run_args, harness, sleep, repeat, run_offset = run_offset, runs_total = runs_total)
+        harness.reset()
+        results.extend(run_benchmark(application_file, pta, runs[mid:], arch, app, run_args, harness, sleep, repeat, run_offset = run_offset + mid, runs_total = runs_total))
+        return results
+
+    runner.flash(arch, app, run_args)
+    monitor = runner.get_monitor(arch, callback = harness.parser_cb)
+
+    if arch == 'posix':
+        print('[RUN] Will run benchmark for {:.0f} seconds'.format(run_timeout))
+        lines = monitor.run(int(run_timeout))
+        return [(runs, harness, lines)]
+
+    # TODO Benchmark bei zu vielen Transitionen in mehrere Programme
+    # aufteilen und diese nacheinander bis zu X % completion (220%)
+    # laufen lassen, zwischendurch jeweils automatisch neu bauen etc.
+    try:
+        slept = 0
+        while repeat == 0 or slept / run_timeout < 1:
+            time.sleep(5)
+            slept += 5
+            print('[RUN] {:d}/{:d} ({:.0f}%), current benchmark at {:.0f}%'.format(run_offset, runs_total, run_offset * 100 / runs_total, slept * 100 / run_timeout))
+    except KeyboardInterrupt:
+        pass
+    lines = monitor.get_lines()
+    monitor.close()
+
+    return [(runs, harness, lines)]
+
 
 if __name__ == '__main__':
 
@@ -53,6 +183,7 @@ if __name__ == '__main__':
             'app= '
             'depth= '
             'instance= '
+            'repeat= '
             'run= '
             'sleep= '
             'timer-pin= '
@@ -69,8 +200,15 @@ if __name__ == '__main__':
         else:
             opt['depth'] = 3
 
+        if 'repeat' in opt:
+            opt['repeat'] = int(opt['repeat'])
+        else:
+            opt['repeat'] = 0
+
         if 'sleep' in opt:
             opt['sleep'] = int(opt['sleep'])
+        else:
+            opt['sleep'] = 0
 
         if 'trace-filter' in opt:
             trace_filter = []
@@ -95,100 +233,33 @@ if __name__ == '__main__':
     else:
         timer_pin = 'GPIO::p1_0'
 
-    harness = OnboardTimerHarness(timer_pin)
-
-    outbuf = io.StringIO()
-
-    outbuf.write('#include "arch.h"\n')
-    if 'includes' in pta.codegen:
-        for include in pta.codegen['includes']:
-            outbuf.write('#include "{}"\n'.format(include))
-    outbuf.write(harness.global_code())
-
-    outbuf.write('int main(void)\n')
-    outbuf.write('{\n')
-    for driver in ('arch', 'gpio', 'kout'):
-        outbuf.write('{}.setup();\n'.format(driver))
-    if 'setup' in pta.codegen:
-        for call in pta.codegen['setup']:
-            outbuf.write(call)
-
-    outbuf.write('while (1) {\n')
-    outbuf.write(harness.start_benchmark())
-
-    class_prefix = ''
-    if 'instance' in opt:
-        class_prefix = '{}.'.format(opt['instance'])
-    elif 'instance' in pta.codegen:
-        class_prefix = '{}.'.format(pta.codegen['instance'])
+    runs = list()
 
-    num_transitions = 0
-    num_traces = 0
     for run in pta.dfs(opt['depth'], with_arguments = True, with_parameters = True):
         if 'trace-filter' in opt and not trace_matches_filter(run, opt['trace-filter']):
             continue
-        outbuf.write(harness.start_run())
-        harness.start_trace()
-        param = pta.get_initial_param_dict()
-        for transition, arguments, parameter in run:
-            num_transitions += 1
-            harness.append_state(transition.origin.name, param)
-            harness.append_transition(transition.name, param, arguments)
-            param = transition.get_params_after_transition(param, arguments)
-            outbuf.write('// {} -> {}\n'.format(transition.origin.name, transition.destination.name))
-            if transition.is_interrupt:
-                outbuf.write('// wait for {} interrupt\n'.format(transition.name))
-                transition_code = '// TODO add startTransition / stopTransition calls to interrupt routine'
-            else:
-                transition_code = '{}{}({});'.format(class_prefix, transition.name, ', '.join(map(str, arguments)))
-            outbuf.write(harness.pass_transition(pta.get_transition_id(transition), transition_code, transition = transition, parameter = parameter))
-
-            param = parameter
-
-            if 'sleep' in opt:
-                outbuf.write('arch.delay_ms({:d});\n'.format(opt['sleep']))
+        runs.append(run)
 
-        outbuf.write(harness.stop_run(num_traces))
-        outbuf.write('\n')
-        num_traces += 1
+    num_transitions = len(runs)
 
-    if num_transitions == 0:
+    if len(runs) == 0:
         print('DFS returned no traces -- perhaps your trace-filter is too restrictive?', file=sys.stderr)
         sys.exit(1)
 
-    outbuf.write(harness.stop_benchmark())
-    outbuf.write('}\n')
-    outbuf.write('return 0;\n')
-    outbuf.write('}\n')
+    harness = OnboardTimerHarness(gpio_pin = timer_pin, pta = pta, counter_limits = runner.get_counter_limits_us(opt['arch']))
 
     if len(args) > 1:
-        with open(args[1], 'w') as f:
-            f.write(outbuf.getvalue())
+        results = run_benchmark(args[1], pta, runs, opt['arch'], opt['app'], opt['run'].split(), harness, opt['sleep'], opt['repeat'], runs_total = len(runs))
+        json_out = {
+            'opt' : opt,
+            'pta' : pta.to_json(),
+            'traces' : harness.traces,
+            'raw_output' : list(map(lambda x: x[2], results)),
+        }
+        with open(time.strftime('ptalog-%Y%m%d-%H%M%S.json'), 'w') as f:
+            json.dump(json_out, f)
     else:
+        outbuf = benchmark_from_runs(pta, runs, harness)
         print(outbuf.getvalue())
 
-    if 'run' in opt:
-        if 'sleep' in opt:
-            run_timeout = num_transitions * opt['sleep'] / 1000
-        else:
-            run_timeout = num_transitions * 10 / 1000
-        monitor = runner.get_monitor(opt['arch'], callback = harness.parser_cb)
-        runner.build(opt['arch'], opt['app'], opt['run'].split())
-        runner.flash(opt['arch'], opt['app'], opt['run'].split())
-        if opt['arch'] != 'posix':
-            try:
-                slept = 0
-                while True:
-                    time.sleep(5)
-                    slept += 5
-                    print('[MON] approx. {:.0f}% done'.format(slept * 100 / run_timeout))
-            except KeyboardInterrupt:
-                pass
-            lines = monitor.get_lines()
-            monitor.close()
-        else:
-            print('[MON] Will run benchmark for {:.0f} seconds'.format(2 * run_timeout))
-            lines = monitor.run(int(2 * run_timeout))
-        print(lines)
-
     sys.exit(0)
diff --git a/lib/harness.py b/lib/harness.py
index 529a77e..1390e03 100644
--- a/lib/harness.py
+++ b/lib/harness.py
@@ -11,18 +11,19 @@ import re
 # generated otherwise and it should also work with AnalyticModel (which does
 # not have states)
 class TransitionHarness:
-    def __init__(self, gpio_pin = None):
+    def __init__(self, gpio_pin = None, pta = None):
         self.gpio_pin = gpio_pin
+        self.pta = pta
+        self.reset()
+
+    def reset(self):
         self.traces = []
         self.trace_id = 1
-        pass
+        self.synced = False
 
     def start_benchmark(self):
         pass
 
-    def stop_benchmark(self):
-        pass
-
     def global_code(self):
         ret = ''
         if self.gpio_pin != None:
@@ -34,8 +35,8 @@ class TransitionHarness:
             ret += 'PTALog ptalog;\n'
         return ret
 
-    def start_benchmark(self):
-        return 'ptalog.startBenchmark(0);\n'
+    def start_benchmark(self, benchmark_id = 0):
+        return 'ptalog.startBenchmark({:d});\n'.format(benchmark_id)
 
     def start_trace(self):
         self.traces.append({
@@ -62,15 +63,15 @@ class TransitionHarness:
     def start_run(self):
         return 'ptalog.reset();\n'
 
-    def pass_transition(self, transition_id, transition_code, transition: object = None, parameter: dict = dict()):
+    def pass_transition(self, transition_id, transition_code, transition: object = None):
         ret = 'ptalog.passTransition({:d});\n'.format(transition_id)
         ret += 'ptalog.startTransition();\n'
         ret += '{}\n'.format(transition_code)
         ret += 'ptalog.stopTransition();\n'
         return ret
 
-    def stop_run(self, trace_id = 0):
-        return 'ptalog.dump({:d});\n'.format(trace_id)
+    def stop_run(self, num_traces = 0):
+        return 'ptalog.dump({:d});\n'.format(num_traces)
 
     def stop_benchmark(self):
         return ''
@@ -96,8 +97,11 @@ class TransitionHarness:
                 pass
 
 class OnboardTimerHarness(TransitionHarness):
-    def __init__(self, gpio_pin = None):
-        super().__init__(gpio_pin = gpio_pin)
+    def __init__(self, counter_limits, **kwargs):
+        super().__init__(**kwargs)
+        self.trace_id = 0
+        self.trace_length = 0
+        self.one_cycle_in_us, self.one_overflow_in_us, self.counter_max_overflow = counter_limits
 
     def global_code(self):
         ret = '#include "driver/counter.h"\n'
@@ -105,18 +109,63 @@ class OnboardTimerHarness(TransitionHarness):
         ret += super().global_code()
         return ret
 
-    def start_benchmark(self):
+    def start_benchmark(self, benchmark_id = 0):
         ret = 'counter.start();\n'
         ret += 'counter.stop();\n'
         ret += 'ptalog.passNop(counter);\n'
-        ret += super().start_benchmark()
+        ret += super().start_benchmark(benchmark_id)
         return ret
 
-    def pass_transition(self, transition_id, transition_code, transition: object = None, parameter: dict = dict()):
+    def pass_transition(self, transition_id, transition_code, transition: object = None):
         ret = 'ptalog.passTransition({:d});\n'.format(transition_id)
         ret += 'ptalog.startTransition();\n'
         ret += 'counter.start();\n'
         ret += '{}\n'.format(transition_code)
         ret += 'counter.stop();\n'
         ret += 'ptalog.stopTransition(counter);\n'
-        return ret
-\ No newline at end of file
+        return ret
+
+    def parser_cb(self, line):
+        #print('[HARNESS] got line {}'.format(line))
+        if re.match(r'\[PTA\] benchmark start, id=(.*)', line):
+            self.synced = True
+            print('[HARNESS] synced')
+        if self.synced:
+            res = re.match(r'\[PTA\] trace=(.*) count=(.*)', line)
+            if res:
+                self.trace_id = int(res.group(1))
+                self.trace_length = int(res.group(2))
+                self.current_transition_in_trace = 0
+                #print('[HARNESS] trace {:d} contains {:d} transitions. Expecting {:d} transitions.'.format(self.trace_id, self.trace_length, len(self.traces[self.trace_id]['trace']) // 2))
+            res = re.match(r'\[PTA\] transition=(.*) cycles=(.*)/(.*)', line)
+            if res:
+                transition_id = int(res.group(1))
+                # TODO Handle Overflows (requires knowledge of arch-specific max cycle value)
+                cycles = int(res.group(2))
+                overflow = int(res.group(3))
+                if overflow >= self.counter_max_overflow:
+                    raise RuntimeError('Counter overflow ({:d}/{:d}) in benchmark id={:d} trace={:d}: transition #{:d} (ID {:d})'.format(cycles, overflow, 0, self.trace_id, self.current_transition_in_trace, transition_id))
+                duration_us = cycles * self.one_cycle_in_us + overflow * self.one_overflow_in_us
+                # self.traces contains transitions and states, UART output only contains trnasitions -> use index * 2
+                try:
+                    log_data_target = self.traces[self.trace_id]['trace'][self.current_transition_in_trace * 2]
+                except IndexError:
+                    transition_name = None
+                    if self.pta:
+                        transition_name = self.pta.transitions[transition_id].name
+                    print('[HARNESS] benchmark id={:d} trace={:d}: transition #{:d} (ID {:d}, name {}) is out of bounds'.format(0, self.trace_id, self.current_transition_in_trace, transition_id, transition_name))
+                    print('          Offending line: {}'.format(line))
+                    return
+                if log_data_target['isa'] != 'transition':
+                    raise RuntimeError('Log mismatch: Expected transition, got {:s}'.format(log_data_target['isa']))
+                if self.pta:
+                    transition = self.pta.transitions[transition_id]
+                    if transition.name != log_data_target['name']:
+                        raise RuntimeError('Log mismatch: Expected transition {:s}, got transition {:s}'.format(log_data_target['name'], transition.name))
+                #print('[HARNESS] Logging data for transition {}'.format(log_data_target['name']))
+                if 'offline_aggregates' not in log_data_target:
+                    log_data_target['offline_aggregates'] = {
+                        'duration' : list()
+                    }
+                log_data_target['offline_aggregates']['duration'].append(duration_us)
+                self.current_transition_in_trace += 1
+\ No newline at end of file
diff --git a/lib/runner.py b/lib/runner.py
index baf4d16..fe76459 100644
--- a/lib/runner.py
+++ b/lib/runner.py
@@ -10,6 +10,7 @@ Functions:
     get_counter_limits -- return arch-specific multipass counter limits (max value, max overflow)
 """
 
+import os
 import re
 import serial
 import serial.threaded
@@ -39,12 +40,16 @@ class SerialReader(serial.threaded.Protocol):
             str_data = data.decode('UTF-8')
             self.recv_buf += str_data
 
-            lines = list(map(str.strip, self.recv_buf.split('\n')))
+            # We may get anything between \r\n, \n\r and simple \n newlines.
+            # We assume that \n is always present and use str.strip to remove leading/trailing \r symbols
+            # Note: Do not call str.strip on lines[-1]! Otherwise, lines may be mangled
+            lines = self.recv_buf.split('\n')
             if len(lines) > 1:
-                self.lines.extend(lines[:-1])
+                self.lines.extend(map(str.strip, lines[:-1]))
                 self.recv_buf = lines[-1]
                 if self.callback:
-                    self.callback(lines[:-1])
+                    for line in lines[:-1]:
+                        self.callback(str.strip(line))
 
         except UnicodeDecodeError:
             pass
@@ -117,6 +122,51 @@ class SerialMonitor:
         self.worker.stop()
         self.ser.close()
 
+
+class MIMOSAMonitor(SerialMonitor):
+    def __init__(self, port: str, baud: int, callback = None, offset = 130, shunt = 330, voltage = 3.3):
+        super().__init__(port = port, baud = baud, callback = callback)
+        self._offset = offset
+        self._shunt = shunt
+        self._voltage = voltage
+        self._start_mimosa()
+
+    def _mimosacmd(self, opts):
+        cmd = ['MimosaCMD']
+        cmd.extend(opts)
+        res = subprocess.run(cmd)
+        if res.returncode != 0:
+            raise RuntimeError('MimosaCMD returned ' + res.returncode)
+
+    def _start_mimosa(self):
+        self._mimosacmd(['--start'])
+        self._mimosacmd(['--parameter', 'offset', str(self._offset)])
+        self._mimosacmd(['--parameter', 'shunt', str(self._shunt)])
+        self._mimosacmd(['--parameter', 'voltage', str(self._voltage)])
+        self._mimosacmd(['--mimosa-start'])
+
+    def _stop_mimosa(self):
+        self._mimosacmd(['--mimosa-stop'])
+        mtime_changed = True
+        mim_file = None
+        time.sleep(1)
+        for filename in os.listdir():
+            if re.search(r'[.]mim$', filename):
+                mim_file = filename
+                break
+        while mtime_changed:
+            mtime_changed = False
+            if time.time() - os.stat(mim_file).st_mtime < 3:
+                mtime_changed = True
+            time.sleep(1)
+        self._mimosacmd(['--stop'])
+        return mim_file
+
+    def close(self):
+        super().close()
+        mim_file = self._stop_mimosa()
+        os.remove(mim_file)
+
 class ShellMonitor:
     """SerialMonitor runs a program and captures its output for a specific amount of time."""
     def __init__(self, script: str, callback = None):
@@ -140,7 +190,8 @@ class ShellMonitor:
             stdout = subprocess.PIPE, stderr = subprocess.PIPE,
             universal_newlines = True)
         if self.callback:
-            self.callback(res.stdout.split('\n'))
+            for line in res.stdout.split('\n'):
+                self.callback(line)
         return res.stdout.split('\n')
 
     def monitor(self):
@@ -193,12 +244,15 @@ def get_info(arch, opts: list = []) -> list:
     return res.stdout.split('\n')
 
 def get_monitor(arch: str, **kwargs) -> object:
-    """Return a SerialMonitor or ShellMonitor."""
+    """Return a SerialMonitor or ShellMonitor, depending on "make info" output of arch."""
     for line in get_info(arch):
         if 'Monitor:' in line:
             _, port, arg = line.split(' ')
             if port == 'run':
                 return ShellMonitor(arg, **kwargs)
+            elif 'mimosa' in kwargs:
+                mimosa_kwargs = kwargs.pop('mimosa')
+                return MIMOSAMonitor(port, arg, **mimosa_kwargs, **kwargs)
             else:
                 return SerialMonitor(port, arg, **kwargs)
     raise RuntimeError('Monitor failure')
@@ -212,3 +266,20 @@ def get_counter_limits(arch: str) -> tuple:
             max_overflow = int(match.group(2))
             return overflow_value, max_overflow
     raise RuntimeError('Did not find Counter Overflow limits')
+
+def get_counter_limits_us(arch: str) -> tuple:
+    """Return duration of one counter step and one counter overflow in us."""
+    cpu_freq = 0
+    overflow_value = 0
+    max_overflow = 0
+    for line in get_info(arch):
+        match = re.match(r'CPU\s+Freq:\s+(.*)\s+Hz', line)
+        if match:
+            cpu_freq = int(match.group(1))
+        match = re.match(r'Counter Overflow:\s+([^/]*)/(.*)', line)
+        if match:
+            overflow_value = int(match.group(1))
+            max_overflow = int(match.group(2))
+    if cpu_freq and overflow_value:
+        return 1000000 / cpu_freq, overflow_value * 1000000 / cpu_freq, max_overflow
+    raise RuntimeError('Did not find Counter Overflow limits')
author	Daniel Friesel <daniel.friesel@uos.de>	2019-07-22 16:58:00 +0200
committer	Daniel Friesel <daniel.friesel@uos.de>	2019-07-22 16:58:00 +0200
commit	825ee24544c18be6c98c5869d89d7f59290b38ac (patch)
tree	2fbe83e619981b23ebdc0e73b1027038a7a9d643
parent	ac49f53902d5b5229d691165762a8f419e07e687 (diff)