diff options
Diffstat (limited to 'lib/data_parameters.py')
-rw-r--r-- | lib/data_parameters.py | 51 |
1 files changed, 50 insertions, 1 deletions
diff --git a/lib/data_parameters.py b/lib/data_parameters.py index 6e64011..7392cbd 100644 --- a/lib/data_parameters.py +++ b/lib/data_parameters.py @@ -65,7 +65,6 @@ def _num_of_type(json, wanted_type): def json_to_param(json): """Return numeric parameters describing the structure of JSON data.""" - ret = dict() ret['strlen_keys'] = _string_key_length(json) @@ -79,12 +78,50 @@ def json_to_param(json): class Protolog: + """ + Loader and postprocessor for raw protobench (protocol-modeling/benchmark.py) data. + + Converts data sorted by (arch,lib)/benchmark/index/attribute + to data sorted by (benchmark,index)/arch/lib/attribute. + + Once constructed, a class object provides three members: + + libraries -- array of library:config elements found in the benchmark results + architectures -- array of multipass architecture names + aggregate -- enriched log data, ordered by benchmark: { + ('benchmark name', 'sub-benchmark index') : { + 'architecture' : { + 'library:options' : { + 'attribute' : value (usually int or array) + } + } + } + } + + aggregate attributes: + bss_{nop,ser,serdes} : whole-program Block Storage Segment (BSS) size + callcycles_raw : { 'C++ statement' : [CPU cycles for execution] ... }. + Not adjusted for 'nop' cycles -> values are a few cycles higher than true duration + cycles_{ser,des,enc,dec,encser,desdec} : cycles for complete (de)serialization step, + measured using just one counter start/stop (not a sum of callcycles_raw entries). + Adjusted for 'nop' cycles -> should give accurate function call duration + data_{secnop,ser,serdes} : whole-program Data Segment size + heap_{ser,des} : Maximum heap usage during step + serialized_size : Size (Bytes) of serialized data + stack_alloc_{ser,des} : Maximum stack usage (Bytes) during step. + Based on online analysis (comparison of memory dumps) + stack_set_{ser,des} : Number of stack bytes modified during step. + Based on online analysis (comparison of memory dumps), should be + smaller than the corresponding stack_alloc_ value + text_{nop,ser,serdes} : whole-program Text Segment (code/Flash) size + """ idem = lambda x: x datamap = [ ['bss_nop', 'bss_size_nop', idem], ['bss_ser', 'bss_size_ser', idem], ['bss_serdes', 'bss_size_serdes', idem], + ['callcycles_raw', 'callcycles', idem], ['cycles_ser', 'cycles', lambda x: max(0, int(np.mean(x['ser']) - np.mean(x['nop'])))], ['cycles_des', 'cycles', lambda x: max(0, int(np.mean(x['des']) - np.mean(x['nop'])))], ['cycles_enc', 'cycles', lambda x: max(0, int(np.mean(x['enc']) - np.mean(x['nop'])))], @@ -115,6 +152,12 @@ class Protolog: ] def __init__(self, logfile): + """ + Load and enrich raw protobench log data. + + The enriched data can be accessed via the .aggregate class member, + see the class documentation for details. + """ with open(logfile, 'rb') as f: self.data = ubjson.load(f) self.libraries = set() @@ -172,6 +215,12 @@ class Protolog: # pass def add_datapoint(self, arch, lib, key, value, aggregate_label, data_label, getter): + """ + Set self.aggregate[key][arch][lib][aggregate_Label] = getter(value[data_label]['v']). + + Additionally, add lib to self.libraries and arch to self.architectures + key usually is ('benchmark name', 'sub-benchmark index'). + """ if data_label in value and 'v' in value[data_label]: self.architectures.add(arch) self.libraries.add(lib) |