summaryrefslogtreecommitdiff
path: root/lib/data_parameters.py
diff options
context:
space:
mode:
Diffstat (limited to 'lib/data_parameters.py')
-rw-r--r--lib/data_parameters.py337
1 files changed, 236 insertions, 101 deletions
diff --git a/lib/data_parameters.py b/lib/data_parameters.py
index 3b7a148..1150b71 100644
--- a/lib/data_parameters.py
+++ b/lib/data_parameters.py
@@ -10,6 +10,7 @@ from . import cycles_to_energy, size_to_radio_energy, utils
import numpy as np
import ubjson
+
def _string_value_length(json):
if type(json) == str:
return len(json)
@@ -22,6 +23,7 @@ def _string_value_length(json):
return 0
+
# TODO distinguish between int and uint, which is not visible from the
# data value alone
def _int_value_length(json):
@@ -40,18 +42,21 @@ def _int_value_length(json):
return 0
+
def _string_key_length(json):
if type(json) == dict:
return sum(map(len, json.keys())) + sum(map(_string_key_length, json.values()))
return 0
+
def _num_keys(json):
if type(json) == dict:
return len(json.keys()) + sum(map(_num_keys, json.values()))
return 0
+
def _num_of_type(json, wanted_type):
ret = 0
if type(json) == wanted_type:
@@ -65,16 +70,17 @@ def _num_of_type(json, wanted_type):
return ret
+
def json_to_param(json):
"""Return numeric parameters describing the structure of JSON data."""
ret = dict()
- ret['strlen_keys'] = _string_key_length(json)
- ret['strlen_values'] = _string_value_length(json)
- ret['bytelen_int'] = _int_value_length(json)
- ret['num_int'] = _num_of_type(json, int)
- ret['num_float'] = _num_of_type(json, float)
- ret['num_str'] = _num_of_type(json, str)
+ ret["strlen_keys"] = _string_key_length(json)
+ ret["strlen_values"] = _string_value_length(json)
+ ret["bytelen_int"] = _int_value_length(json)
+ ret["num_int"] = _num_of_type(json, int)
+ ret["num_float"] = _num_of_type(json, float)
+ ret["num_str"] = _num_of_type(json, str)
return ret
@@ -127,16 +133,16 @@ class Protolog:
# bogus data
if val > 10_000_000:
return np.nan
- for val in data['nop']:
+ for val in data["nop"]:
# bogus data
if val > 10_000_000:
return np.nan
# All measurements in data[key] cover the same instructions, so they
# should be identical -> it's safe to take the median.
# However, we leave out the first measurement as it is often bogus.
- if key == 'nop':
- return np.median(data['nop'][1:])
- return max(0, int(np.median(data[key][1:]) - np.median(data['nop'][1:])))
+ if key == "nop":
+ return np.median(data["nop"][1:])
+ return max(0, int(np.median(data[key][1:]) - np.median(data["nop"][1:])))
def _median_callcycles(data):
ret = dict()
@@ -146,37 +152,44 @@ class Protolog:
idem = lambda x: x
datamap = [
- ['bss_nop', 'bss_size_nop', idem],
- ['bss_ser', 'bss_size_ser', idem],
- ['bss_serdes', 'bss_size_serdes', idem],
- ['callcycles_raw', 'callcycles', idem],
- ['callcycles_median', 'callcycles', _median_callcycles],
+ ["bss_nop", "bss_size_nop", idem],
+ ["bss_ser", "bss_size_ser", idem],
+ ["bss_serdes", "bss_size_serdes", idem],
+ ["callcycles_raw", "callcycles", idem],
+ ["callcycles_median", "callcycles", _median_callcycles],
# Used to remove nop cycles from callcycles_median
- ['cycles_nop', 'cycles', lambda x: Protolog._median_cycles(x, 'nop')],
- ['cycles_ser', 'cycles', lambda x: Protolog._median_cycles(x, 'ser')],
- ['cycles_des', 'cycles', lambda x: Protolog._median_cycles(x, 'des')],
- ['cycles_enc', 'cycles', lambda x: Protolog._median_cycles(x, 'enc')],
- ['cycles_dec', 'cycles', lambda x: Protolog._median_cycles(x, 'dec')],
- #['cycles_ser_arr', 'cycles', lambda x: np.array(x['ser'][1:]) - np.mean(x['nop'][1:])],
- #['cycles_des_arr', 'cycles', lambda x: np.array(x['des'][1:]) - np.mean(x['nop'][1:])],
- #['cycles_enc_arr', 'cycles', lambda x: np.array(x['enc'][1:]) - np.mean(x['nop'][1:])],
- #['cycles_dec_arr', 'cycles', lambda x: np.array(x['dec'][1:]) - np.mean(x['nop'][1:])],
- ['data_nop', 'data_size_nop', idem],
- ['data_ser', 'data_size_ser', idem],
- ['data_serdes', 'data_size_serdes', idem],
- ['heap_ser', 'heap_usage_ser', idem],
- ['heap_des', 'heap_usage_des', idem],
- ['serialized_size', 'serialized_size', idem],
- ['stack_alloc_ser', 'stack_online_ser', lambda x: x['allocated']],
- ['stack_set_ser', 'stack_online_ser', lambda x: x['used']],
- ['stack_alloc_des', 'stack_online_des', lambda x: x['allocated']],
- ['stack_set_des', 'stack_online_des', lambda x: x['used']],
- ['text_nop', 'text_size_nop', idem],
- ['text_ser', 'text_size_ser', idem],
- ['text_serdes', 'text_size_serdes', idem],
+ ["cycles_nop", "cycles", lambda x: Protolog._median_cycles(x, "nop")],
+ ["cycles_ser", "cycles", lambda x: Protolog._median_cycles(x, "ser")],
+ ["cycles_des", "cycles", lambda x: Protolog._median_cycles(x, "des")],
+ ["cycles_enc", "cycles", lambda x: Protolog._median_cycles(x, "enc")],
+ ["cycles_dec", "cycles", lambda x: Protolog._median_cycles(x, "dec")],
+ # ['cycles_ser_arr', 'cycles', lambda x: np.array(x['ser'][1:]) - np.mean(x['nop'][1:])],
+ # ['cycles_des_arr', 'cycles', lambda x: np.array(x['des'][1:]) - np.mean(x['nop'][1:])],
+ # ['cycles_enc_arr', 'cycles', lambda x: np.array(x['enc'][1:]) - np.mean(x['nop'][1:])],
+ # ['cycles_dec_arr', 'cycles', lambda x: np.array(x['dec'][1:]) - np.mean(x['nop'][1:])],
+ ["data_nop", "data_size_nop", idem],
+ ["data_ser", "data_size_ser", idem],
+ ["data_serdes", "data_size_serdes", idem],
+ ["heap_ser", "heap_usage_ser", idem],
+ ["heap_des", "heap_usage_des", idem],
+ ["serialized_size", "serialized_size", idem],
+ ["stack_alloc_ser", "stack_online_ser", lambda x: x["allocated"]],
+ ["stack_set_ser", "stack_online_ser", lambda x: x["used"]],
+ ["stack_alloc_des", "stack_online_des", lambda x: x["allocated"]],
+ ["stack_set_des", "stack_online_des", lambda x: x["used"]],
+ ["text_nop", "text_size_nop", idem],
+ ["text_ser", "text_size_ser", idem],
+ ["text_serdes", "text_size_serdes", idem],
]
- def __init__(self, logfile, cpu_conf = None, cpu_conf_str = None, radio_conf = None, radio_conf_str = None):
+ def __init__(
+ self,
+ logfile,
+ cpu_conf=None,
+ cpu_conf_str=None,
+ radio_conf=None,
+ radio_conf_str=None,
+ ):
"""
Load and enrich raw protobench log data.
@@ -185,116 +198,177 @@ class Protolog:
"""
self.cpu = None
self.radio = None
- with open(logfile, 'rb') as f:
+ with open(logfile, "rb") as f:
self.data = ubjson.load(f)
self.libraries = set()
self.architectures = set()
self.aggregate = dict()
for arch_lib in self.data.keys():
- arch, lib, libopts = arch_lib.split(':')
- library = lib + ':' + libopts
+ arch, lib, libopts = arch_lib.split(":")
+ library = lib + ":" + libopts
for benchmark in self.data[arch_lib].keys():
for benchmark_item in self.data[arch_lib][benchmark].keys():
subv = self.data[arch_lib][benchmark][benchmark_item]
for aggregate_label, data_label, getter in Protolog.datamap:
try:
- self.add_datapoint(arch, library, (benchmark, benchmark_item), subv, aggregate_label, data_label, getter)
+ self.add_datapoint(
+ arch,
+ library,
+ (benchmark, benchmark_item),
+ subv,
+ aggregate_label,
+ data_label,
+ getter,
+ )
except KeyError:
pass
except TypeError as e:
- print('TypeError in {} {} {} {}: {} -> {}'.format(
- arch_lib, benchmark, benchmark_item, aggregate_label,
- subv[data_label]['v'], str(e)))
+ print(
+ "TypeError in {} {} {} {}: {} -> {}".format(
+ arch_lib,
+ benchmark,
+ benchmark_item,
+ aggregate_label,
+ subv[data_label]["v"],
+ str(e),
+ )
+ )
pass
try:
- codegen = codegen_for_lib(lib, libopts.split(','), subv['data'])
+ codegen = codegen_for_lib(lib, libopts.split(","), subv["data"])
if codegen.max_serialized_bytes != None:
- self.add_datapoint(arch, library, (benchmark, benchmark_item), subv, 'buffer_size', data_label, lambda x: codegen.max_serialized_bytes)
+ self.add_datapoint(
+ arch,
+ library,
+ (benchmark, benchmark_item),
+ subv,
+ "buffer_size",
+ data_label,
+ lambda x: codegen.max_serialized_bytes,
+ )
else:
- self.add_datapoint(arch, library, (benchmark, benchmark_item), subv, 'buffer_size', data_label, lambda x: 0)
+ self.add_datapoint(
+ arch,
+ library,
+ (benchmark, benchmark_item),
+ subv,
+ "buffer_size",
+ data_label,
+ lambda x: 0,
+ )
except:
# avro's codegen will raise RuntimeError("Unsupported Schema") on unsupported data. Other libraries may just silently ignore it.
- self.add_datapoint(arch, library, (benchmark, benchmark_item), subv, 'buffer_size', data_label, lambda x: 0)
- #self.aggregate[(benchmark, benchmark_item)][arch][lib][aggregate_label] = getter(value[data_label]['v'])
-
+ self.add_datapoint(
+ arch,
+ library,
+ (benchmark, benchmark_item),
+ subv,
+ "buffer_size",
+ data_label,
+ lambda x: 0,
+ )
+ # self.aggregate[(benchmark, benchmark_item)][arch][lib][aggregate_label] = getter(value[data_label]['v'])
for key in self.aggregate.keys():
for arch in self.aggregate[key].keys():
for lib, val in self.aggregate[key][arch].items():
try:
- val['cycles_encser'] = val['cycles_enc'] + val['cycles_ser']
+ val["cycles_encser"] = val["cycles_enc"] + val["cycles_ser"]
except KeyError:
pass
try:
- val['cycles_desdec'] = val['cycles_des'] + val['cycles_dec']
+ val["cycles_desdec"] = val["cycles_des"] + val["cycles_dec"]
except KeyError:
pass
try:
- for line in val['callcycles_median'].keys():
- val['callcycles_median'][line] -= val['cycles_nop']
+ for line in val["callcycles_median"].keys():
+ val["callcycles_median"][line] -= val["cycles_nop"]
except KeyError:
pass
try:
- val['data_serdes_delta'] = val['data_serdes'] - val['data_nop']
+ val["data_serdes_delta"] = val["data_serdes"] - val["data_nop"]
except KeyError:
pass
try:
- val['data_serdes_delta_nobuf'] = val['data_serdes'] - val['data_nop'] - val['buffer_size']
+ val["data_serdes_delta_nobuf"] = (
+ val["data_serdes"] - val["data_nop"] - val["buffer_size"]
+ )
except KeyError:
pass
try:
- val['bss_serdes_delta'] = val['bss_serdes'] - val['bss_nop']
+ val["bss_serdes_delta"] = val["bss_serdes"] - val["bss_nop"]
except KeyError:
pass
try:
- val['bss_serdes_delta_nobuf'] = val['bss_serdes'] - val['bss_nop'] - val['buffer_size']
+ val["bss_serdes_delta_nobuf"] = (
+ val["bss_serdes"] - val["bss_nop"] - val["buffer_size"]
+ )
except KeyError:
pass
try:
- val['text_serdes_delta'] = val['text_serdes'] - val['text_nop']
+ val["text_serdes_delta"] = val["text_serdes"] - val["text_nop"]
except KeyError:
pass
try:
- val['total_dmem_ser'] = val['stack_alloc_ser']
- val['written_dmem_ser'] = val['stack_set_ser']
- val['total_dmem_ser'] += val['heap_ser']
- val['written_dmem_ser'] += val['heap_ser']
+ val["total_dmem_ser"] = val["stack_alloc_ser"]
+ val["written_dmem_ser"] = val["stack_set_ser"]
+ val["total_dmem_ser"] += val["heap_ser"]
+ val["written_dmem_ser"] += val["heap_ser"]
except KeyError:
pass
try:
- val['total_dmem_des'] = val['stack_alloc_des']
- val['written_dmem_des'] = val['stack_set_des']
- val['total_dmem_des'] += val['heap_des']
- val['written_dmem_des'] += val['heap_des']
+ val["total_dmem_des"] = val["stack_alloc_des"]
+ val["written_dmem_des"] = val["stack_set_des"]
+ val["total_dmem_des"] += val["heap_des"]
+ val["written_dmem_des"] += val["heap_des"]
except KeyError:
pass
try:
- val['total_dmem_serdes'] = max(val['total_dmem_ser'], val['total_dmem_des'])
+ val["total_dmem_serdes"] = max(
+ val["total_dmem_ser"], val["total_dmem_des"]
+ )
except KeyError:
pass
try:
- val['text_ser_delta'] = val['text_ser'] - val['text_nop']
- val['text_serdes_delta'] = val['text_serdes'] - val['text_nop']
+ val["text_ser_delta"] = val["text_ser"] - val["text_nop"]
+ val["text_serdes_delta"] = val["text_serdes"] - val["text_nop"]
except KeyError:
pass
try:
- val['bss_ser_delta'] = val['bss_ser'] - val['bss_nop']
- val['bss_serdes_delta'] = val['bss_serdes'] - val['bss_nop']
+ val["bss_ser_delta"] = val["bss_ser"] - val["bss_nop"]
+ val["bss_serdes_delta"] = val["bss_serdes"] - val["bss_nop"]
except KeyError:
pass
try:
- val['data_ser_delta'] = val['data_ser'] - val['data_nop']
- val['data_serdes_delta'] = val['data_serdes'] - val['data_nop']
+ val["data_ser_delta"] = val["data_ser"] - val["data_nop"]
+ val["data_serdes_delta"] = val["data_serdes"] - val["data_nop"]
except KeyError:
pass
try:
- val['allmem_ser'] = val['text_ser'] + val['data_ser'] + val['bss_ser'] + val['total_dmem_ser'] - val['buffer_size']
- val['allmem_serdes'] = val['text_serdes'] + val['data_serdes'] + val['bss_serdes'] + val['total_dmem_serdes'] - val['buffer_size']
+ val["allmem_ser"] = (
+ val["text_ser"]
+ + val["data_ser"]
+ + val["bss_ser"]
+ + val["total_dmem_ser"]
+ - val["buffer_size"]
+ )
+ val["allmem_serdes"] = (
+ val["text_serdes"]
+ + val["data_serdes"]
+ + val["bss_serdes"]
+ + val["total_dmem_serdes"]
+ - val["buffer_size"]
+ )
except KeyError:
pass
try:
- val['smem_serdes'] = val['text_serdes'] + val['data_serdes'] + val['bss_serdes'] - val['buffer_size']
+ val["smem_serdes"] = (
+ val["text_serdes"]
+ + val["data_serdes"]
+ + val["bss_serdes"]
+ - val["buffer_size"]
+ )
except KeyError:
pass
@@ -303,7 +377,7 @@ class Protolog:
if cpu_conf:
self.cpu_conf = cpu_conf
- cpu = self.cpu = cycles_to_energy.get_class(cpu_conf['model'])
+ cpu = self.cpu = cycles_to_energy.get_class(cpu_conf["model"])
for key, value in cpu.default_params.items():
if not key in cpu_conf:
cpu_conf[key] = value
@@ -312,48 +386,102 @@ class Protolog:
for lib, val in self.aggregate[key][arch].items():
# All energy data is stored in nanojoules (nJ)
try:
- val['energy_enc'] = int(val['cycles_enc'] * cpu.get_power(cpu_conf) / cpu_conf['cpu_freq'] * 1e9)
+ val["energy_enc"] = int(
+ val["cycles_enc"]
+ * cpu.get_power(cpu_conf)
+ / cpu_conf["cpu_freq"]
+ * 1e9
+ )
except KeyError:
pass
except ValueError:
- print('cycles_enc is NaN for {} -> {} -> {}'.format(arch, lib, key))
+ print(
+ "cycles_enc is NaN for {} -> {} -> {}".format(
+ arch, lib, key
+ )
+ )
try:
- val['energy_ser'] = int(val['cycles_ser'] * cpu.get_power(cpu_conf) / cpu_conf['cpu_freq'] * 1e9)
+ val["energy_ser"] = int(
+ val["cycles_ser"]
+ * cpu.get_power(cpu_conf)
+ / cpu_conf["cpu_freq"]
+ * 1e9
+ )
except KeyError:
pass
except ValueError:
- print('cycles_ser is NaN for {} -> {} -> {}'.format(arch, lib, key))
+ print(
+ "cycles_ser is NaN for {} -> {} -> {}".format(
+ arch, lib, key
+ )
+ )
try:
- val['energy_encser'] = int(val['cycles_encser'] * cpu.get_power(cpu_conf) / cpu_conf['cpu_freq'] * 1e9)
+ val["energy_encser"] = int(
+ val["cycles_encser"]
+ * cpu.get_power(cpu_conf)
+ / cpu_conf["cpu_freq"]
+ * 1e9
+ )
except KeyError:
pass
except ValueError:
- print('cycles_encser is NaN for {} -> {} -> {}'.format(arch, lib, key))
+ print(
+ "cycles_encser is NaN for {} -> {} -> {}".format(
+ arch, lib, key
+ )
+ )
try:
- val['energy_des'] = int(val['cycles_des'] * cpu.get_power(cpu_conf) / cpu_conf['cpu_freq'] * 1e9)
+ val["energy_des"] = int(
+ val["cycles_des"]
+ * cpu.get_power(cpu_conf)
+ / cpu_conf["cpu_freq"]
+ * 1e9
+ )
except KeyError:
pass
except ValueError:
- print('cycles_des is NaN for {} -> {} -> {}'.format(arch, lib, key))
+ print(
+ "cycles_des is NaN for {} -> {} -> {}".format(
+ arch, lib, key
+ )
+ )
try:
- val['energy_dec'] = int(val['cycles_dec'] * cpu.get_power(cpu_conf) / cpu_conf['cpu_freq'] * 1e9)
+ val["energy_dec"] = int(
+ val["cycles_dec"]
+ * cpu.get_power(cpu_conf)
+ / cpu_conf["cpu_freq"]
+ * 1e9
+ )
except KeyError:
pass
except ValueError:
- print('cycles_dec is NaN for {} -> {} -> {}'.format(arch, lib, key))
+ print(
+ "cycles_dec is NaN for {} -> {} -> {}".format(
+ arch, lib, key
+ )
+ )
try:
- val['energy_desdec'] = int(val['cycles_desdec'] * cpu.get_power(cpu_conf) / cpu_conf['cpu_freq'] * 1e9)
+ val["energy_desdec"] = int(
+ val["cycles_desdec"]
+ * cpu.get_power(cpu_conf)
+ / cpu_conf["cpu_freq"]
+ * 1e9
+ )
except KeyError:
pass
except ValueError:
- print('cycles_desdec is NaN for {} -> {} -> {}'.format(arch, lib, key))
+ print(
+ "cycles_desdec is NaN for {} -> {} -> {}".format(
+ arch, lib, key
+ )
+ )
if radio_conf_str:
radio_conf = utils.parse_conf_str(radio_conf_str)
if radio_conf:
self.radio_conf = radio_conf
- radio = self.radio = size_to_radio_energy.get_class(radio_conf['model'])
+ radio = self.radio = size_to_radio_energy.get_class(radio_conf["model"])
for key, value in radio.default_params.items():
if not key in radio_conf:
radio_conf[key] = value
@@ -361,17 +489,22 @@ class Protolog:
for arch in self.aggregate[key].keys():
for lib, val in self.aggregate[key][arch].items():
try:
- radio_conf['txbytes'] = val['serialized_size']
- if radio_conf['txbytes'] > 0:
- val['energy_tx'] = int(radio.get_energy(radio_conf) * 1e9)
+ radio_conf["txbytes"] = val["serialized_size"]
+ if radio_conf["txbytes"] > 0:
+ val["energy_tx"] = int(
+ radio.get_energy(radio_conf) * 1e9
+ )
else:
- val['energy_tx'] = 0
- val['energy_encsertx'] = val['energy_encser'] + val['energy_tx']
- val['energy_desdecrx'] = val['energy_desdec'] + val['energy_tx']
+ val["energy_tx"] = 0
+ val["energy_encsertx"] = (
+ val["energy_encser"] + val["energy_tx"]
+ )
+ val["energy_desdecrx"] = (
+ val["energy_desdec"] + val["energy_tx"]
+ )
except KeyError:
pass
-
def add_datapoint(self, arch, lib, key, value, aggregate_label, data_label, getter):
"""
Set self.aggregate[key][arch][lib][aggregate_Label] = getter(value[data_label]['v']).
@@ -379,7 +512,7 @@ class Protolog:
Additionally, add lib to self.libraries and arch to self.architectures
key usually is ('benchmark name', 'sub-benchmark index').
"""
- if data_label in value and 'v' in value[data_label]:
+ if data_label in value and "v" in value[data_label]:
self.architectures.add(arch)
self.libraries.add(lib)
if not key in self.aggregate:
@@ -388,4 +521,6 @@ class Protolog:
self.aggregate[key][arch] = dict()
if not lib in self.aggregate[key][arch]:
self.aggregate[key][arch][lib] = dict()
- self.aggregate[key][arch][lib][aggregate_label] = getter(value[data_label]['v'])
+ self.aggregate[key][arch][lib][aggregate_label] = getter(
+ value[data_label]["v"]
+ )