diff options
author | Daniel Friesel <daniel.friesel@uos.de> | 2019-07-22 16:58:00 +0200 |
---|---|---|
committer | Daniel Friesel <daniel.friesel@uos.de> | 2019-07-22 16:58:00 +0200 |
commit | 825ee24544c18be6c98c5869d89d7f59290b38ac (patch) | |
tree | 2fbe83e619981b23ebdc0e73b1027038a7a9d643 | |
parent | ac49f53902d5b5229d691165762a8f419e07e687 (diff) |
implement autogeneration and data aggregation of timing benchmarks
-rw-r--r-- | .env | 1 | ||||
-rw-r--r-- | .gitignore | 1 | ||||
-rwxr-xr-x | bin/generate-dfa-benchmark.py | 235 | ||||
-rw-r--r-- | lib/harness.py | 81 | ||||
-rw-r--r-- | lib/runner.py | 81 |
5 files changed, 296 insertions, 103 deletions
@@ -0,0 +1 @@ +PYTHONPATH=/home/derf/var/ess/aemr/dfatool/lib @@ -1,2 +1,3 @@ /lib/__pycache__/ *.pyc +/htmlcov/ diff --git a/bin/generate-dfa-benchmark.py b/bin/generate-dfa-benchmark.py index b9790ed..8e3c227 100755 --- a/bin/generate-dfa-benchmark.py +++ b/bin/generate-dfa-benchmark.py @@ -15,6 +15,10 @@ Options: --depth=<depth> (default: 3) Maximum number of function calls per run +--repeat=<count> (default: 0) + Repeat benchmark runs <count> times. When 0, benchmark runs are repeated + indefinitely and must be explicitly terminated with Ctrl+C / SIGINT + --instance=<name> Override the name of the class instance used for benchmarking @@ -44,6 +48,132 @@ def trace_matches_filter(trace: list, trace_filter: list) -> bool: return True return False +def benchmark_from_runs(pta: PTA, runs: list, harness: object, benchmark_id: int = 0) -> io.StringIO: + outbuf = io.StringIO() + + outbuf.write('#include "arch.h"\n') + if 'includes' in pta.codegen: + for include in pta.codegen['includes']: + outbuf.write('#include "{}"\n'.format(include)) + outbuf.write(harness.global_code()) + + outbuf.write('int main(void)\n') + outbuf.write('{\n') + for driver in ('arch', 'gpio', 'kout'): + outbuf.write('{}.setup();\n'.format(driver)) + if 'setup' in pta.codegen: + for call in pta.codegen['setup']: + outbuf.write(call) + + # There is a race condition between flashing the code and starting the UART log. + # When starting the log before flashing, output from a previous benchmark may cause bogus data to be added. + # When flashing first and then starting the log, the first log lines may be lost. + # To work around this, we flash first, then start the log, and use this delay statement to ensure that no output is lost. + # This is also useful to faciliate MIMOSA calibration after flashing. + outbuf.write('arch.delay_ms(10000);\n') + + outbuf.write('while (1) {\n') + outbuf.write(harness.start_benchmark()) + + class_prefix = '' + if 'instance' in opt: + class_prefix = '{}.'.format(opt['instance']) + elif 'instance' in pta.codegen: + class_prefix = '{}.'.format(pta.codegen['instance']) + + num_transitions = 0 + num_traces = 0 + for run in runs: + outbuf.write(harness.start_run()) + harness.start_trace() + param = pta.get_initial_param_dict() + for transition, arguments, parameter in run: + num_transitions += 1 + harness.append_transition(transition.name, param, arguments) + harness.append_state(transition.destination.name, parameter) + outbuf.write('// {} -> {}\n'.format(transition.origin.name, transition.destination.name)) + if transition.is_interrupt: + outbuf.write('// wait for {} interrupt\n'.format(transition.name)) + transition_code = '// TODO add startTransition / stopTransition calls to interrupt routine' + else: + transition_code = '{}{}({});'.format(class_prefix, transition.name, ', '.join(map(str, arguments))) + outbuf.write(harness.pass_transition(pta.get_transition_id(transition), transition_code, transition = transition)) + + param = parameter + + outbuf.write('// current parameters: {}\n'.format(', '.join(map(lambda kv: '{}={}'.format(*kv), param.items())))) + + if opt['sleep']: + outbuf.write('arch.delay_ms({:d}); // {}\n'.format(opt['sleep'], transition.destination.name)) + + outbuf.write(harness.stop_run(num_traces)) + outbuf.write('\n') + num_traces += 1 + + outbuf.write(harness.stop_benchmark()) + outbuf.write('}\n') + outbuf.write('return 0;\n') + outbuf.write('}\n') + + return outbuf + +def run_benchmark(application_file: str, pta: PTA, runs: list, arch: str, app: str, run_args: list, harness: object, sleep: int = 0, repeat: int = 0, run_offset: int = 0, runs_total: int = 0): + outbuf = benchmark_from_runs(pta, runs, harness) + with open(application_file, 'w') as f: + f.write(outbuf.getvalue()) + print('[MAKE] building benchmark with {:d} runs'.format(len(runs))) + + # assume an average of 10ms per transition. Mind the 10s start delay. + run_timeout = 10 + num_transitions * (sleep+10) / 1000 + + if repeat: + run_timeout *= repeat + + needs_split = False + try: + runner.build(arch, app, run_args) + except RuntimeError: + if len(runs) > 50: + # Application is too large -> split up runs + needs_split = True + else: + # Unknown error + raise + + # This has been deliberately taken out of the except clause to avoid nested exception handlers + # (they lead to pretty interesting tracebacks which are probably more confusing than helpful) + if needs_split: + print('[MAKE] benchmark code is too large, splitting up') + mid = len(runs) // 2 + results = run_benchmark(application_file, pta, runs[:mid], arch, app, run_args, harness, sleep, repeat, run_offset = run_offset, runs_total = runs_total) + harness.reset() + results.extend(run_benchmark(application_file, pta, runs[mid:], arch, app, run_args, harness, sleep, repeat, run_offset = run_offset + mid, runs_total = runs_total)) + return results + + runner.flash(arch, app, run_args) + monitor = runner.get_monitor(arch, callback = harness.parser_cb) + + if arch == 'posix': + print('[RUN] Will run benchmark for {:.0f} seconds'.format(run_timeout)) + lines = monitor.run(int(run_timeout)) + return [(runs, harness, lines)] + + # TODO Benchmark bei zu vielen Transitionen in mehrere Programme + # aufteilen und diese nacheinander bis zu X % completion (220%) + # laufen lassen, zwischendurch jeweils automatisch neu bauen etc. + try: + slept = 0 + while repeat == 0 or slept / run_timeout < 1: + time.sleep(5) + slept += 5 + print('[RUN] {:d}/{:d} ({:.0f}%), current benchmark at {:.0f}%'.format(run_offset, runs_total, run_offset * 100 / runs_total, slept * 100 / run_timeout)) + except KeyboardInterrupt: + pass + lines = monitor.get_lines() + monitor.close() + + return [(runs, harness, lines)] + if __name__ == '__main__': @@ -53,6 +183,7 @@ if __name__ == '__main__': 'app= ' 'depth= ' 'instance= ' + 'repeat= ' 'run= ' 'sleep= ' 'timer-pin= ' @@ -69,8 +200,15 @@ if __name__ == '__main__': else: opt['depth'] = 3 + if 'repeat' in opt: + opt['repeat'] = int(opt['repeat']) + else: + opt['repeat'] = 0 + if 'sleep' in opt: opt['sleep'] = int(opt['sleep']) + else: + opt['sleep'] = 0 if 'trace-filter' in opt: trace_filter = [] @@ -95,100 +233,33 @@ if __name__ == '__main__': else: timer_pin = 'GPIO::p1_0' - harness = OnboardTimerHarness(timer_pin) - - outbuf = io.StringIO() - - outbuf.write('#include "arch.h"\n') - if 'includes' in pta.codegen: - for include in pta.codegen['includes']: - outbuf.write('#include "{}"\n'.format(include)) - outbuf.write(harness.global_code()) - - outbuf.write('int main(void)\n') - outbuf.write('{\n') - for driver in ('arch', 'gpio', 'kout'): - outbuf.write('{}.setup();\n'.format(driver)) - if 'setup' in pta.codegen: - for call in pta.codegen['setup']: - outbuf.write(call) - - outbuf.write('while (1) {\n') - outbuf.write(harness.start_benchmark()) - - class_prefix = '' - if 'instance' in opt: - class_prefix = '{}.'.format(opt['instance']) - elif 'instance' in pta.codegen: - class_prefix = '{}.'.format(pta.codegen['instance']) + runs = list() - num_transitions = 0 - num_traces = 0 for run in pta.dfs(opt['depth'], with_arguments = True, with_parameters = True): if 'trace-filter' in opt and not trace_matches_filter(run, opt['trace-filter']): continue - outbuf.write(harness.start_run()) - harness.start_trace() - param = pta.get_initial_param_dict() - for transition, arguments, parameter in run: - num_transitions += 1 - harness.append_state(transition.origin.name, param) - harness.append_transition(transition.name, param, arguments) - param = transition.get_params_after_transition(param, arguments) - outbuf.write('// {} -> {}\n'.format(transition.origin.name, transition.destination.name)) - if transition.is_interrupt: - outbuf.write('// wait for {} interrupt\n'.format(transition.name)) - transition_code = '// TODO add startTransition / stopTransition calls to interrupt routine' - else: - transition_code = '{}{}({});'.format(class_prefix, transition.name, ', '.join(map(str, arguments))) - outbuf.write(harness.pass_transition(pta.get_transition_id(transition), transition_code, transition = transition, parameter = parameter)) - - param = parameter - - if 'sleep' in opt: - outbuf.write('arch.delay_ms({:d});\n'.format(opt['sleep'])) + runs.append(run) - outbuf.write(harness.stop_run(num_traces)) - outbuf.write('\n') - num_traces += 1 + num_transitions = len(runs) - if num_transitions == 0: + if len(runs) == 0: print('DFS returned no traces -- perhaps your trace-filter is too restrictive?', file=sys.stderr) sys.exit(1) - outbuf.write(harness.stop_benchmark()) - outbuf.write('}\n') - outbuf.write('return 0;\n') - outbuf.write('}\n') + harness = OnboardTimerHarness(gpio_pin = timer_pin, pta = pta, counter_limits = runner.get_counter_limits_us(opt['arch'])) if len(args) > 1: - with open(args[1], 'w') as f: - f.write(outbuf.getvalue()) + results = run_benchmark(args[1], pta, runs, opt['arch'], opt['app'], opt['run'].split(), harness, opt['sleep'], opt['repeat'], runs_total = len(runs)) + json_out = { + 'opt' : opt, + 'pta' : pta.to_json(), + 'traces' : harness.traces, + 'raw_output' : list(map(lambda x: x[2], results)), + } + with open(time.strftime('ptalog-%Y%m%d-%H%M%S.json'), 'w') as f: + json.dump(json_out, f) else: + outbuf = benchmark_from_runs(pta, runs, harness) print(outbuf.getvalue()) - if 'run' in opt: - if 'sleep' in opt: - run_timeout = num_transitions * opt['sleep'] / 1000 - else: - run_timeout = num_transitions * 10 / 1000 - monitor = runner.get_monitor(opt['arch'], callback = harness.parser_cb) - runner.build(opt['arch'], opt['app'], opt['run'].split()) - runner.flash(opt['arch'], opt['app'], opt['run'].split()) - if opt['arch'] != 'posix': - try: - slept = 0 - while True: - time.sleep(5) - slept += 5 - print('[MON] approx. {:.0f}% done'.format(slept * 100 / run_timeout)) - except KeyboardInterrupt: - pass - lines = monitor.get_lines() - monitor.close() - else: - print('[MON] Will run benchmark for {:.0f} seconds'.format(2 * run_timeout)) - lines = monitor.run(int(2 * run_timeout)) - print(lines) - sys.exit(0) diff --git a/lib/harness.py b/lib/harness.py index 529a77e..1390e03 100644 --- a/lib/harness.py +++ b/lib/harness.py @@ -11,18 +11,19 @@ import re # generated otherwise and it should also work with AnalyticModel (which does # not have states) class TransitionHarness: - def __init__(self, gpio_pin = None): + def __init__(self, gpio_pin = None, pta = None): self.gpio_pin = gpio_pin + self.pta = pta + self.reset() + + def reset(self): self.traces = [] self.trace_id = 1 - pass + self.synced = False def start_benchmark(self): pass - def stop_benchmark(self): - pass - def global_code(self): ret = '' if self.gpio_pin != None: @@ -34,8 +35,8 @@ class TransitionHarness: ret += 'PTALog ptalog;\n' return ret - def start_benchmark(self): - return 'ptalog.startBenchmark(0);\n' + def start_benchmark(self, benchmark_id = 0): + return 'ptalog.startBenchmark({:d});\n'.format(benchmark_id) def start_trace(self): self.traces.append({ @@ -62,15 +63,15 @@ class TransitionHarness: def start_run(self): return 'ptalog.reset();\n' - def pass_transition(self, transition_id, transition_code, transition: object = None, parameter: dict = dict()): + def pass_transition(self, transition_id, transition_code, transition: object = None): ret = 'ptalog.passTransition({:d});\n'.format(transition_id) ret += 'ptalog.startTransition();\n' ret += '{}\n'.format(transition_code) ret += 'ptalog.stopTransition();\n' return ret - def stop_run(self, trace_id = 0): - return 'ptalog.dump({:d});\n'.format(trace_id) + def stop_run(self, num_traces = 0): + return 'ptalog.dump({:d});\n'.format(num_traces) def stop_benchmark(self): return '' @@ -96,8 +97,11 @@ class TransitionHarness: pass class OnboardTimerHarness(TransitionHarness): - def __init__(self, gpio_pin = None): - super().__init__(gpio_pin = gpio_pin) + def __init__(self, counter_limits, **kwargs): + super().__init__(**kwargs) + self.trace_id = 0 + self.trace_length = 0 + self.one_cycle_in_us, self.one_overflow_in_us, self.counter_max_overflow = counter_limits def global_code(self): ret = '#include "driver/counter.h"\n' @@ -105,18 +109,63 @@ class OnboardTimerHarness(TransitionHarness): ret += super().global_code() return ret - def start_benchmark(self): + def start_benchmark(self, benchmark_id = 0): ret = 'counter.start();\n' ret += 'counter.stop();\n' ret += 'ptalog.passNop(counter);\n' - ret += super().start_benchmark() + ret += super().start_benchmark(benchmark_id) return ret - def pass_transition(self, transition_id, transition_code, transition: object = None, parameter: dict = dict()): + def pass_transition(self, transition_id, transition_code, transition: object = None): ret = 'ptalog.passTransition({:d});\n'.format(transition_id) ret += 'ptalog.startTransition();\n' ret += 'counter.start();\n' ret += '{}\n'.format(transition_code) ret += 'counter.stop();\n' ret += 'ptalog.stopTransition(counter);\n' - return ret
\ No newline at end of file + return ret + + def parser_cb(self, line): + #print('[HARNESS] got line {}'.format(line)) + if re.match(r'\[PTA\] benchmark start, id=(.*)', line): + self.synced = True + print('[HARNESS] synced') + if self.synced: + res = re.match(r'\[PTA\] trace=(.*) count=(.*)', line) + if res: + self.trace_id = int(res.group(1)) + self.trace_length = int(res.group(2)) + self.current_transition_in_trace = 0 + #print('[HARNESS] trace {:d} contains {:d} transitions. Expecting {:d} transitions.'.format(self.trace_id, self.trace_length, len(self.traces[self.trace_id]['trace']) // 2)) + res = re.match(r'\[PTA\] transition=(.*) cycles=(.*)/(.*)', line) + if res: + transition_id = int(res.group(1)) + # TODO Handle Overflows (requires knowledge of arch-specific max cycle value) + cycles = int(res.group(2)) + overflow = int(res.group(3)) + if overflow >= self.counter_max_overflow: + raise RuntimeError('Counter overflow ({:d}/{:d}) in benchmark id={:d} trace={:d}: transition #{:d} (ID {:d})'.format(cycles, overflow, 0, self.trace_id, self.current_transition_in_trace, transition_id)) + duration_us = cycles * self.one_cycle_in_us + overflow * self.one_overflow_in_us + # self.traces contains transitions and states, UART output only contains trnasitions -> use index * 2 + try: + log_data_target = self.traces[self.trace_id]['trace'][self.current_transition_in_trace * 2] + except IndexError: + transition_name = None + if self.pta: + transition_name = self.pta.transitions[transition_id].name + print('[HARNESS] benchmark id={:d} trace={:d}: transition #{:d} (ID {:d}, name {}) is out of bounds'.format(0, self.trace_id, self.current_transition_in_trace, transition_id, transition_name)) + print(' Offending line: {}'.format(line)) + return + if log_data_target['isa'] != 'transition': + raise RuntimeError('Log mismatch: Expected transition, got {:s}'.format(log_data_target['isa'])) + if self.pta: + transition = self.pta.transitions[transition_id] + if transition.name != log_data_target['name']: + raise RuntimeError('Log mismatch: Expected transition {:s}, got transition {:s}'.format(log_data_target['name'], transition.name)) + #print('[HARNESS] Logging data for transition {}'.format(log_data_target['name'])) + if 'offline_aggregates' not in log_data_target: + log_data_target['offline_aggregates'] = { + 'duration' : list() + } + log_data_target['offline_aggregates']['duration'].append(duration_us) + self.current_transition_in_trace += 1
\ No newline at end of file diff --git a/lib/runner.py b/lib/runner.py index baf4d16..fe76459 100644 --- a/lib/runner.py +++ b/lib/runner.py @@ -10,6 +10,7 @@ Functions: get_counter_limits -- return arch-specific multipass counter limits (max value, max overflow) """ +import os import re import serial import serial.threaded @@ -39,12 +40,16 @@ class SerialReader(serial.threaded.Protocol): str_data = data.decode('UTF-8') self.recv_buf += str_data - lines = list(map(str.strip, self.recv_buf.split('\n'))) + # We may get anything between \r\n, \n\r and simple \n newlines. + # We assume that \n is always present and use str.strip to remove leading/trailing \r symbols + # Note: Do not call str.strip on lines[-1]! Otherwise, lines may be mangled + lines = self.recv_buf.split('\n') if len(lines) > 1: - self.lines.extend(lines[:-1]) + self.lines.extend(map(str.strip, lines[:-1])) self.recv_buf = lines[-1] if self.callback: - self.callback(lines[:-1]) + for line in lines[:-1]: + self.callback(str.strip(line)) except UnicodeDecodeError: pass @@ -117,6 +122,51 @@ class SerialMonitor: self.worker.stop() self.ser.close() + +class MIMOSAMonitor(SerialMonitor): + def __init__(self, port: str, baud: int, callback = None, offset = 130, shunt = 330, voltage = 3.3): + super().__init__(port = port, baud = baud, callback = callback) + self._offset = offset + self._shunt = shunt + self._voltage = voltage + self._start_mimosa() + + def _mimosacmd(self, opts): + cmd = ['MimosaCMD'] + cmd.extend(opts) + res = subprocess.run(cmd) + if res.returncode != 0: + raise RuntimeError('MimosaCMD returned ' + res.returncode) + + def _start_mimosa(self): + self._mimosacmd(['--start']) + self._mimosacmd(['--parameter', 'offset', str(self._offset)]) + self._mimosacmd(['--parameter', 'shunt', str(self._shunt)]) + self._mimosacmd(['--parameter', 'voltage', str(self._voltage)]) + self._mimosacmd(['--mimosa-start']) + + def _stop_mimosa(self): + self._mimosacmd(['--mimosa-stop']) + mtime_changed = True + mim_file = None + time.sleep(1) + for filename in os.listdir(): + if re.search(r'[.]mim$', filename): + mim_file = filename + break + while mtime_changed: + mtime_changed = False + if time.time() - os.stat(mim_file).st_mtime < 3: + mtime_changed = True + time.sleep(1) + self._mimosacmd(['--stop']) + return mim_file + + def close(self): + super().close() + mim_file = self._stop_mimosa() + os.remove(mim_file) + class ShellMonitor: """SerialMonitor runs a program and captures its output for a specific amount of time.""" def __init__(self, script: str, callback = None): @@ -140,7 +190,8 @@ class ShellMonitor: stdout = subprocess.PIPE, stderr = subprocess.PIPE, universal_newlines = True) if self.callback: - self.callback(res.stdout.split('\n')) + for line in res.stdout.split('\n'): + self.callback(line) return res.stdout.split('\n') def monitor(self): @@ -193,12 +244,15 @@ def get_info(arch, opts: list = []) -> list: return res.stdout.split('\n') def get_monitor(arch: str, **kwargs) -> object: - """Return a SerialMonitor or ShellMonitor.""" + """Return a SerialMonitor or ShellMonitor, depending on "make info" output of arch.""" for line in get_info(arch): if 'Monitor:' in line: _, port, arg = line.split(' ') if port == 'run': return ShellMonitor(arg, **kwargs) + elif 'mimosa' in kwargs: + mimosa_kwargs = kwargs.pop('mimosa') + return MIMOSAMonitor(port, arg, **mimosa_kwargs, **kwargs) else: return SerialMonitor(port, arg, **kwargs) raise RuntimeError('Monitor failure') @@ -212,3 +266,20 @@ def get_counter_limits(arch: str) -> tuple: max_overflow = int(match.group(2)) return overflow_value, max_overflow raise RuntimeError('Did not find Counter Overflow limits') + +def get_counter_limits_us(arch: str) -> tuple: + """Return duration of one counter step and one counter overflow in us.""" + cpu_freq = 0 + overflow_value = 0 + max_overflow = 0 + for line in get_info(arch): + match = re.match(r'CPU\s+Freq:\s+(.*)\s+Hz', line) + if match: + cpu_freq = int(match.group(1)) + match = re.match(r'Counter Overflow:\s+([^/]*)/(.*)', line) + if match: + overflow_value = int(match.group(1)) + max_overflow = int(match.group(2)) + if cpu_freq and overflow_value: + return 1000000 / cpu_freq, overflow_value * 1000000 / cpu_freq, max_overflow + raise RuntimeError('Did not find Counter Overflow limits') |