1 files changed, 50 insertions, 1 deletions
diff --git a/lib/data_parameters.py b/lib/data_parameters.py
index 6e64011..7392cbd 100644
--- a/lib/data_parameters.py
+++ b/lib/data_parameters.py
@@ -65,7 +65,6 @@ def _num_of_type(json, wanted_type):
 
 def json_to_param(json):
     """Return numeric parameters describing the structure of JSON data."""
-
     ret = dict()
 
     ret['strlen_keys'] = _string_key_length(json)
@@ -79,12 +78,50 @@ def json_to_param(json):
 
 
 class Protolog:
+    """
+    Loader and postprocessor for raw protobench (protocol-modeling/benchmark.py) data.
+
+    Converts data sorted by (arch,lib)/benchmark/index/attribute
+    to data sorted by (benchmark,index)/arch/lib/attribute.
+
+    Once constructed, a class object provides three members:
+
+    libraries -- array of library:config elements found in the benchmark results
+    architectures -- array of multipass architecture names
+    aggregate -- enriched log data, ordered by benchmark: {
+        ('benchmark name', 'sub-benchmark index') : {
+            'architecture' : {
+                'library:options' : {
+                    'attribute' : value (usually int or array)
+                }
+            }
+        }
+    }
+
+    aggregate attributes:
+    bss_{nop,ser,serdes} : whole-program Block Storage Segment (BSS) size
+    callcycles_raw : { 'C++ statement' : [CPU cycles for execution] ... }.
+        Not adjusted for 'nop' cycles -> values are a few cycles higher than true duration
+    cycles_{ser,des,enc,dec,encser,desdec} : cycles for complete (de)serialization step,
+        measured using just one counter start/stop (not a sum of callcycles_raw entries).
+        Adjusted for 'nop' cycles -> should give accurate function call duration
+    data_{secnop,ser,serdes} : whole-program Data Segment size
+    heap_{ser,des} : Maximum heap usage during step
+    serialized_size : Size (Bytes) of serialized data
+    stack_alloc_{ser,des} : Maximum stack usage (Bytes) during step.
+        Based on online analysis (comparison of memory dumps)
+    stack_set_{ser,des} : Number of stack bytes modified during step.
+        Based on online analysis (comparison of memory dumps), should be
+        smaller than the corresponding stack_alloc_ value
+    text_{nop,ser,serdes} : whole-program Text Segment (code/Flash) size
+    """
 
     idem = lambda x: x
     datamap = [
         ['bss_nop', 'bss_size_nop', idem],
         ['bss_ser', 'bss_size_ser', idem],
         ['bss_serdes', 'bss_size_serdes', idem],
+        ['callcycles_raw', 'callcycles', idem],
         ['cycles_ser', 'cycles', lambda x: max(0, int(np.mean(x['ser']) - np.mean(x['nop'])))],
         ['cycles_des', 'cycles', lambda x: max(0, int(np.mean(x['des']) - np.mean(x['nop'])))],
         ['cycles_enc', 'cycles', lambda x: max(0, int(np.mean(x['enc']) - np.mean(x['nop'])))],
@@ -115,6 +152,12 @@ class Protolog:
     ]
 
     def __init__(self, logfile):
+        """
+        Load and enrich raw protobench log data.
+
+        The enriched data can be accessed via the .aggregate class member,
+        see the class documentation for details.
+        """
         with open(logfile, 'rb') as f:
             self.data = ubjson.load(f)
         self.libraries = set()
@@ -172,6 +215,12 @@ class Protolog:
                     #    pass
 
     def add_datapoint(self, arch, lib, key, value, aggregate_label, data_label, getter):
+        """
+        Set self.aggregate[key][arch][lib][aggregate_Label] = getter(value[data_label]['v']).
+
+        Additionally, add lib to self.libraries and arch to self.architectures
+        key usually is ('benchmark name', 'sub-benchmark index').
+        """
         if data_label in value and 'v' in value[data_label]:
             self.architectures.add(arch)
             self.libraries.add(lib)