summaryrefslogtreecommitdiff
path: root/bin/merge.py
diff options
context:
space:
mode:
authorDaniel Friesel <derf@finalrewind.org>2017-04-03 15:04:15 +0200
committerDaniel Friesel <derf@finalrewind.org>2017-04-03 15:04:15 +0200
commit00e57331b1c7ef2b1f402f41e1223308e0d8ce61 (patch)
tree05e9b4223072582a5a6843de6d9845213a94f341 /bin/merge.py
initial commit
Diffstat (limited to 'bin/merge.py')
-rwxr-xr-xbin/merge.py1033
1 files changed, 1033 insertions, 0 deletions
diff --git a/bin/merge.py b/bin/merge.py
new file mode 100755
index 0000000..1ff7e74
--- /dev/null
+++ b/bin/merge.py
@@ -0,0 +1,1033 @@
+#!/usr/bin/env python3
+
+import getopt
+import json
+import numpy as np
+import os
+import re
+import sys
+import plotter
+from copy import deepcopy
+from dfatool import aggregate_measures, regression_measures, is_numeric, powerset
+from matplotlib.patches import Polygon
+from scipy import optimize
+
+opts = {}
+
+def load_json(filename):
+ with open(filename, "r") as f:
+ return json.load(f)
+
+def save_json(data, filename):
+ with open(filename, "w") as f:
+ return json.dump(data, f)
+
+def print_data(aggregate):
+ for key in sorted(aggregate.keys()):
+ data = aggregate[key]
+ name, params = key
+ print("%s @ %s : ~ = %.f (%.f, %.f) µ_σ_outer = %.f n = %d" %
+ (name, params, np.median(data['means']), np.percentile(data['means'], 25),
+ np.percentile(data['means'], 75), np.mean(data['stds']), len(data['means'])))
+
+def flatten(somelist):
+ return [item for sublist in somelist for item in sublist]
+
+def mimosa_data(elem):
+ means = [x['uW_mean'] for x in elem['offline']]
+ durations = [x['us'] - 20 for x in elem['offline']]
+ stds = [x['uW_std'] for x in elem['offline']]
+ energies = [x['uW_mean'] * (x['us'] - 20) for x in elem['offline']]
+ clips = [x['clip_rate'] for x in elem['offline']]
+ substate_thresholds = []
+ substate_data = []
+ timeouts = []
+ rel_energies = []
+ if 'timeout' in elem['offline'][0]:
+ timeouts = [x['timeout'] for x in elem['offline']]
+ if 'uW_mean_delta' in elem['offline'][0]:
+ rel_energies = [x['uW_mean_delta'] * (x['us'] - 20) for x in elem['offline']]
+ for x in elem['offline']:
+ if 'substates' in x:
+ substate_thresholds.append(x['substates']['threshold'])
+ substate_data.append(x['substates']['states'])
+
+ return means, stds, durations, energies, rel_energies, clips, timeouts, substate_thresholds
+
+def online_data(elem):
+ means = [int(x['power']) for x in elem['online']]
+ durations = [int(x['time']) for x in elem['online']]
+
+ return means, durations
+
+# parameters = statistic variables such as txpower, bitrate etc.
+# variables = function variables/parameters set by linear regression
+def str_to_param_function(function_string, parameters, variables):
+ rawfunction = function_string
+ dependson = [False] * len(parameters)
+
+ for i in range(len(parameters)):
+ if rawfunction.find("global(%s)" % (parameters[i])) >= 0:
+ dependson[i] = True
+ rawfunction = rawfunction.replace("global(%s)" % (parameters[i]), "arg[%d]" % (i))
+ for i in range(len(variables)):
+ rawfunction = rawfunction.replace("param(%d)" % (i), "param[%d]" % (i))
+ fitfunc = eval("lambda param, arg: " + rawfunction);
+
+ return fitfunc, dependson
+
+def mk_function_data(name, paramdata, parameters, dependson, datatype):
+ X = [[] for i in range(len(parameters))]
+ Xm = [[] for i in range(len(parameters))]
+ Y = []
+ Ym = []
+
+ num_valid = 0
+ num_total = 0
+
+ for key, val in paramdata.items():
+ if key[0] == name and len(key[1]) == len(parameters):
+ valid = True
+ num_total += 1
+ for i in range(len(parameters)):
+ if dependson[i] and not is_numeric(key[1][i]):
+ valid = False
+ if valid:
+ num_valid += 1
+ Y.extend(val[datatype])
+ Ym.append(np.median(val[datatype]))
+ for i in range(len(parameters)):
+ if dependson[i] or is_numeric(key[1][i]):
+ X[i].extend([int(key[1][i])] * len(val[datatype]))
+ Xm[i].append(int(key[1][i]))
+ else:
+ X[i].extend([0] * len(val[datatype]))
+ Xm[i].append(0)
+
+ for i in range(len(parameters)):
+ X[i] = np.array(X[i])
+ Xm[i] = np.array(Xm[i])
+ X = tuple(X)
+ Xm = tuple(Xm)
+ Y = np.array(Y)
+ Ym = np.array(Ym)
+
+ return X, Y, Xm, Ym, num_valid, num_total
+
+def raw_num0_8(num):
+ return (8 - num1(num))
+
+def raw_num0_16(num):
+ return (16 - num1(num))
+
+def raw_num1(num):
+ return bin(int(num)).count("1")
+
+num0_8 = np.vectorize(raw_num0_8)
+num0_16 = np.vectorize(raw_num0_16)
+num1 = np.vectorize(raw_num1)
+
+def try_fits(name, datatype, paramidx, paramdata):
+ functions = {
+ 'linear' : lambda param, arg: param[0] + param[1] * arg,
+ 'logarithmic' : lambda param, arg: param[0] + param[1] * np.log(arg),
+ 'logarithmic1' : lambda param, arg: param[0] + param[1] * np.log(arg + 1),
+ 'exponential' : lambda param, arg: param[0] + param[1] * np.exp(arg),
+ #'polynomial' : lambda param, arg: param[0] + param[1] * arg + param[2] * arg ** 2,
+ 'square' : lambda param, arg: param[0] + param[1] * arg ** 2,
+ 'fractional' : lambda param, arg: param[0] + param[1] / arg,
+ 'sqrt' : lambda param, arg: param[0] + param[1] * np.sqrt(arg),
+ #'num0_8' : lambda param, arg: param[0] + param[1] * num0_8(arg),
+ #'num0_16' : lambda param, arg: param[0] + param[1] * num0_16(arg),
+ #'num1' : lambda param, arg: param[0] + param[1] * num1(arg),
+ }
+ results = dict([[key, []] for key in functions.keys()])
+ errors = {}
+
+ allvalues = [(*key[1][:paramidx], *key[1][paramidx+1:]) for key in paramdata.keys() if key[0] == name]
+ allvalues = list(set(allvalues))
+
+ for value in allvalues:
+ X = []
+ Xm = []
+ Y = []
+ Ym = []
+ num_valid = 0
+ num_total = 0
+ for key, val in paramdata.items():
+ if key[0] == name and len(key[1]) > paramidx and (*key[1][:paramidx], *key[1][paramidx+1:]) == value:
+ num_total += 1
+ if is_numeric(key[1][paramidx]):
+ num_valid += 1
+ Y.extend(val[datatype])
+ Ym.append(np.median(val[datatype]))
+ X.extend([int(key[1][paramidx])] * len(val[datatype]))
+ Xm.append(int(key[1][paramidx]))
+ if int(key[1][paramidx]) == 0:
+ functions.pop('fractional', None)
+ if int(key[1][paramidx]) <= 0:
+ functions.pop('logarithmic', None)
+ if int(key[1][paramidx]) < 0:
+ functions.pop('logarithmic1', None)
+ functions.pop('sqrt', None)
+ if int(key[1][paramidx]) > 64:
+ functions.pop('exponential', None)
+
+ # there should be -at least- two values when fitting
+ if num_valid > 1:
+ Y = np.array(Y)
+ Ym = np.array(Ym)
+ X = np.array(X)
+ Xm = np.array(Xm)
+ for kind, function in functions.items():
+ results[kind] = {}
+ errfunc = lambda P, X, y: function(P, X) - y
+ try:
+ res = optimize.least_squares(errfunc, [0, 1], args=(X, Y), xtol=2e-15)
+ measures = regression_measures(function(res.x, X), Y)
+ for k, v in measures.items():
+ if not k in results[kind]:
+ results[kind][k] = []
+ results[kind][k].append(v)
+ except:
+ pass
+
+ for function_name, result in results.items():
+ if len(result) > 0 and function_name in functions:
+ errors[function_name] = {}
+ for measure in result.keys():
+ errors[function_name][measure] = np.mean(result[measure])
+
+ return errors
+
+def fit_function(function, name, datatype, parameters, paramdata, xaxis=None, yaxis=None):
+
+ variables = list(map(lambda x: float(x), function['params']))
+ fitfunc, dependson = str_to_param_function(function['raw'], parameters, variables)
+
+ X, Y, Xm, Ym, num_valid, num_total = mk_function_data(name, paramdata, parameters, dependson, datatype)
+
+ if num_valid > 0:
+
+ if num_valid != num_total:
+ num_invalid = num_total - num_valid
+ print("Warning: fit(%s): %d of %d states had incomplete parameter hashes" % (name, num_invalid, len(paramdata)))
+
+ errfunc = lambda P, X, y: fitfunc(P, X) - y
+ try:
+ res = optimize.least_squares(errfunc, variables, args=(X, Y), xtol=2e-15) # loss='cauchy'
+ except ValueError as err:
+ function['error'] = str(err)
+ return
+ #x1 = optimize.curve_fit(lambda param, *arg: fitfunc(param, arg), X, Y, functionparams)
+ measures = regression_measures(fitfunc(res.x, X), Y)
+
+ if res.status <= 0:
+ function['error'] = res.message
+ return
+
+ if 'fit' in opts:
+ for i in range(len(parameters)):
+ plotter.plot_param_fit(function['raw'], name, fitfunc, res.x, parameters, datatype, i, X, Y, xaxis, yaxis)
+
+ function['params'] = list(res.x)
+ function['fit'] = measures
+
+ else:
+ function['error'] = 'log contained no numeric parameters'
+
+def assess_function(function, name, datatype, parameters, paramdata):
+
+ variables = list(map(lambda x: float(x), function['params']))
+ fitfunc, dependson = str_to_param_function(function['raw'], parameters, variables)
+ X, Y, Xm, Ym, num_valid, num_total = mk_function_data(name, paramdata, parameters, dependson, datatype)
+
+ if num_valid > 0:
+ return regression_measures(fitfunc(variables, X), Y)
+ else:
+ return None
+
+def xv_assess_function(name, funbase, what, validation, mae, smape):
+ goodness = assess_function(funbase, name, what, parameters, validation)
+ if goodness != None:
+ if not name in mae:
+ mae[name] = []
+ if not name in smape:
+ smape[name] = []
+ mae[name].append(goodness['mae'])
+ smape[name].append(goodness['smape'])
+
+def xv2_assess_function(name, funbase, what, validation, mae, smape, rmsd):
+ goodness = assess_function(funbase, name, what, parameters, validation)
+ if goodness != None:
+ if goodness['mae'] < 10e9:
+ mae.append(goodness['mae'])
+ rmsd.append(goodness['rmsd'])
+ smape.append(goodness['smape'])
+ else:
+ print('[!] Ignoring MAE of %d (SMAPE %.f)' % (goodness['mae'], goodness['smape']))
+
+# Returns the values used for each parameter in the measurement, e.g.
+# { 'txpower' : [1, 2, 4, 8], 'length' : [16] }
+# non-numeric values such as '' are skipped
+def param_values(parameters, by_param):
+ paramvalues = dict([[param, set()] for param in parameters])
+
+ for _, paramvalue in by_param.keys():
+ for i, param in enumerate(parameters):
+ if is_numeric(paramvalue[i]):
+ paramvalues[param].add(paramvalue[i])
+
+ return paramvalues
+
+def param_key(elem):
+ name = elem['name']
+ paramtuple = ()
+
+ if 'parameter' in elem:
+ paramkeys = sorted(elem['parameter'].keys())
+ paramtuple = tuple([elem['parameter'][x] for x in paramkeys])
+
+ return (name, paramtuple)
+
+#def param_arg_key(elem):
+# # Argumentbasierte Parametrisierung ist erstmal out of scope
+# #if 'args' in elem:
+# # argtuple = tuple(elem['args'])
+#
+# return (name, paramtuple, argtuple)
+
+def add_data_to_aggregate(aggregate, key, isa, data):
+ if not key in aggregate:
+ aggregate[key] = {
+ 'isa' : isa,
+ }
+ for datakey in data.keys():
+ aggregate[key][datakey] = []
+ for datakey, dataval in data.items():
+ aggregate[key][datakey].extend(dataval)
+
+def fake_add_data_to_aggregate(aggregate, key, isa, database, idx):
+ timeout_val = []
+ if len(database['timeouts']):
+ timeout_val = [database['timeouts'][idx]]
+ rel_energy_val = []
+ if len(database['rel_energies']):
+ rel_energy_val = [database['rel_energies'][idx]]
+ add_data_to_aggregate(aggregate, key, isa, {
+ 'means' : [database['means'][idx]],
+ 'stds' : [database['stds'][idx]],
+ 'durations' : [database['durations'][idx]],
+ 'energies' : [database['energies'][idx]],
+ 'rel_energies' : rel_energy_val,
+ 'clip_rate' : [database['clip_rate'][idx]],
+ 'timeouts' : timeout_val,
+ })
+
+def weight_by_name(aggdata):
+ total = {}
+ count = {}
+ weight = {}
+ for key in aggdata.keys():
+ if not key[0] in total:
+ total[key[0]] = 0
+ total[key[0]] += len(aggdata[key]['means'])
+ count[key] = len(aggdata[key]['means'])
+ for key in aggdata.keys():
+ weight[key] = float(count[key]) / total[key[0]]
+ return weight
+
+# returns the mean standard deviation of all measurements of 'what'
+# (e.g. power consumption or timeout) for state/transition 'name' where
+# parameter 'index' is dynamic and all other parameters are fixed.
+# I.e., if parameters are a, b, c ∈ {1,2,3} and 'index' corresponds to b', then
+# this function returns the mean of the standard deviations of (a=1, b=*, c=1),
+# (a=1, b=*, c=2), and so on
+def mean_std_by_param(data, keys, name, what, index):
+ partitions = []
+ for key in keys:
+ partition = []
+ for k, v in data.items():
+ if (*k[1][:index], *k[1][index+1:]) == key and k[0] == name:
+ partition.extend(v[what])
+ partitions.append(partition)
+ return np.mean([np.std(partition) for partition in partitions])
+
+# returns the mean standard deviation of all measurements of 'what'
+# (e.g. power consumption or timeout) for state/transition 'name' where the
+# trace of previous transitions is fixed except for a single transition,
+# whose occurence or absence is silently ignored.
+# this is done separately for each transition (-> returns a dictionary)
+def mean_std_by_trace_part(data, transitions, name, what):
+ ret = {}
+ for transition in transitions:
+ keys = set(map(lambda x : (x[0], x[1], tuple([y for y in x[2] if y != transition])), data.keys()))
+ ret[transition] = {}
+ partitions = []
+ for key in keys:
+ partition = []
+ for k, v in data.items():
+ key_without_transition = (k[0], k[1], tuple([y for y in k[2] if y != transition]))
+ if key[0] == name and key == key_without_transition:
+ partition.extend(v[what])
+ if len(partition):
+ partitions.append(partition)
+ ret[transition] = np.mean([np.std(partition) for partition in partitions])
+ return ret
+
+
+def load_run_elem(index, element, trace, by_name, by_param, by_trace):
+ means, stds, durations, energies, rel_energies, clips, timeouts, sub_thresholds = mimosa_data(element)
+
+ online_means = []
+ online_durations = []
+ if element['isa'] == 'state':
+ online_means, online_durations = online_data(element)
+
+ key = param_key(element)
+ pre_trace = tuple(map(lambda x : x['name'], trace[1:index:2]))
+ trace_key = (*key, pre_trace)
+ name = element['name']
+
+ add_data_to_aggregate(by_name, name, element['isa'], {
+ 'means' : means,
+ 'stds' : stds,
+ 'durations' : durations,
+ 'energies' : energies,
+ 'rel_energies' : rel_energies,
+ 'clip_rate' : clips,
+ 'timeouts' : timeouts,
+ 'sub_thresholds' : sub_thresholds,
+ 'param' : [key[1]] * len(means),
+ 'online_means' : online_means,
+ 'online_durations' : online_durations,
+ })
+ add_data_to_aggregate(by_param, key, element['isa'], {
+ 'means' : means,
+ 'stds' : stds,
+ 'durations' : durations,
+ 'energies' : energies,
+ 'rel_energies' : rel_energies,
+ 'clip_rate' : clips,
+ 'timeouts' : timeouts,
+ 'sub_thresholds' : sub_thresholds,
+ 'online_means' : online_means,
+ 'online_durations' : online_durations,
+ })
+ add_data_to_aggregate(by_trace, trace_key, element['isa'], {
+ 'means' : means,
+ 'stds' : stds,
+ 'durations' : durations,
+ 'energies' : energies,
+ 'rel_energies' : rel_energies,
+ 'clip_rate' : clips,
+ 'timeouts' : timeouts,
+ 'sub_thresholds' : sub_thresholds,
+ 'online_means' : online_means,
+ 'online_durations' : online_durations,
+ })
+
+def fmap(name, funtype):
+ if funtype == 'linear':
+ return "global(%s)" % name
+ if funtype == 'logarithmic':
+ return "np.log(global(%s))" % name
+ if funtype == 'logarithmic1':
+ return "np.log(global(%s) + 1)" % name
+ if funtype == 'exponential':
+ return "np.exp(global(%s))" % name
+ if funtype == 'square':
+ return "global(%s)**2" % name
+ if funtype == 'fractional':
+ return "1 / global(%s)" % name
+ if funtype == 'sqrt':
+ return "np.sqrt(global(%s))" % name
+ if funtype == 'num0_8':
+ return "num0_8(global(%s))" % name
+ if funtype == 'num0_16':
+ return "num0_16(global(%s))" % name
+ if funtype == 'num1':
+ return "num1(global(%s))" % name
+ return "ERROR"
+
+def fguess_to_function(name, datatype, aggdata, parameters, paramdata, yaxis):
+ best_fit = {}
+ fitguess = aggdata['fit_guess']
+ params = list(filter(lambda x : x in fitguess, parameters))
+ if len(params) > 0:
+ for param in params:
+ best_fit_val = np.inf
+ for func_name, fit_val in fitguess[param].items():
+ if fit_val['rmsd'] < best_fit_val:
+ best_fit_val = fit_val['rmsd']
+ best_fit[param] = func_name
+ buf = '0'
+ pidx = 0
+ for elem in powerset(best_fit.items()):
+ buf += " + param(%d)" % pidx
+ pidx += 1
+ for fun in elem:
+ buf += " * %s" % fmap(*fun)
+ aggdata['function']['estimate'] = {
+ 'raw' : buf,
+ 'params' : list(np.ones((pidx))),
+ 'base' : [best_fit[param] for param in params]
+ }
+ fit_function(
+ aggdata['function']['estimate'], name, datatype, parameters,
+ paramdata, yaxis=yaxis)
+
+def param_measures(name, paramdata, key, fun):
+ mae = []
+ smape = []
+ rmsd = []
+ for pkey, pval in paramdata.items():
+ if pkey[0] == name:
+ # Median ist besseres Maß für MAE / SMAPE,
+ # Mean ist besseres für SSR. Da least_squares SSR optimiert
+ # nutzen wir hier auch Mean.
+ goodness = aggregate_measures(fun(pval[key]), pval[key])
+ mae.append(goodness['mae'])
+ rmsd.append(goodness['rmsd'])
+ if 'smape' in goodness:
+ smape.append(goodness['smape'])
+ ret = {
+ 'mae' : np.mean(mae),
+ 'rmsd' : np.mean(rmsd)
+ }
+ if len(smape):
+ ret['smape'] = np.mean(smape)
+
+ return ret
+
+def keydata(name, val, paramdata, tracedata, key):
+ ret = {
+ 'count' : len(val[key]),
+ 'median' : np.median(val[key]),
+ 'mean' : np.mean(val[key]),
+ 'mean_goodness' : aggregate_measures(np.mean(val[key]), val[key]),
+ 'median_goodness' : aggregate_measures(np.median(val[key]), val[key]),
+ 'param_mean_goodness' : param_measures(name, paramdata, key, np.mean),
+ 'param_median_goodness' : param_measures(name, paramdata, key, np.median),
+ 'std_inner' : np.std(val[key]),
+ 'std_param' : np.mean([np.std(paramdata[x][key]) for x in paramdata.keys() if x[0] == name]),
+ 'std_trace' : np.mean([np.std(tracedata[x][key]) for x in tracedata.keys() if x[0] == name]),
+ 'std_by_param' : {},
+ 'fit_guess' : {},
+ 'function' : {},
+ }
+
+ return ret
+
+def splitidx_kfold(length, num_slices):
+ pairs = []
+ indexes = np.arange(length)
+ for i in range(0, num_slices):
+ training = np.delete(indexes, slice(i, None, num_slices))
+ validation = indexes[i::num_slices]
+ pairs.append((training, validation))
+ return pairs
+
+def splitidx_srs(length, num_slices):
+ pairs = []
+ for i in range(0, num_slices):
+ shuffled = np.random.permutation(np.arange(length))
+ border = int(length * float(2) / 3)
+ training = shuffled[:border]
+ validation = shuffled[border:]
+ pairs.append((training, validation))
+ return pairs
+
+def val_run(aggdata, split_fun, count):
+ mae = []
+ smape = []
+ rmsd = []
+ pairs = split_fun(len(aggdata), count)
+ for i in range(0, count):
+ training = aggdata[pairs[i][0]]
+ validation = aggdata[pairs[i][1]]
+ median = np.median(training)
+ goodness = aggregate_measures(median, validation)
+ mae.append(goodness['mae'])
+ rmsd.append(goodness['rmsd'])
+ if 'smape' in goodness:
+ smape.append(goodness['smape'])
+
+ mae_mean = np.mean(mae)
+ rmsd_mean = np.mean(rmsd)
+ if len(smape):
+ smape_mean = np.mean(smape)
+ else:
+ smape_mean = -1
+
+ return mae_mean, smape_mean, rmsd_mean
+
+# by_trace is not part of the cross-validation process
+def val_run_fun(aggdata, by_trace, name, key, funtype1, funtype2, splitfun, count):
+ aggdata = aggdata[name]
+ isa = aggdata['isa']
+ mae = []
+ smape = []
+ rmsd = []
+ estimates = []
+ pairs = splitfun(len(aggdata[key]), count)
+ for i in range(0, count):
+ bpa_training = {}
+ bpa_validation = {}
+
+ for idx in pairs[i][0]:
+ bpa_key = (name, aggdata['param'][idx])
+ fake_add_data_to_aggregate(bpa_training, bpa_key, isa, aggdata, idx)
+ for idx in pairs[i][1]:
+ bpa_key = (name, aggdata['param'][idx])
+ fake_add_data_to_aggregate(bpa_validation, bpa_key, isa, aggdata, idx)
+
+ fake_by_name = { name : aggdata }
+ ares = analyze(fake_by_name, bpa_training, by_trace, parameters)
+ if name in ares[isa] and funtype2 in ares[isa][name][funtype1]['function']:
+ xv2_assess_function(name, ares[isa][name][funtype1]['function'][funtype2], key, bpa_validation, mae, smape, rmsd)
+ if funtype2 == 'estimate':
+ if 'base' in ares[isa][name][funtype1]['function'][funtype2]:
+ estimates.append(tuple(ares[isa][name][funtype1]['function'][funtype2]['base']))
+ else:
+ estimates.append(None)
+ return mae, smape, rmsd, estimates
+
+# by_trace is not part of the cross-validation process
+def val_run_fun_p(aggdata, by_trace, name, key, funtype1, funtype2, splitfun, count):
+ aggdata = dict([[x, aggdata[x]] for x in aggdata if x[0] == name])
+ isa = aggdata[list(aggdata.keys())[0]]['isa']
+ mae = []
+ smape = []
+ rmsd = []
+ estimates = []
+ pairs = splitfun(len(aggdata.keys()), count) # pairs are by_param index arrays
+ keys = sorted(aggdata.keys())
+ for i in range(0, count):
+ bpa_training = dict([[keys[x], aggdata[keys[x]]] for x in pairs[i][0]])
+ bpa_validation = dict([[keys[x], aggdata[keys[x]]] for x in pairs[i][1]])
+ bna_training = {}
+ for val in bpa_training.values():
+ for idx in range(0, len(val[key])):
+ fake_add_data_to_aggregate(bna_training, name, isa, val, idx)
+
+ ares = analyze(bna_training, bpa_training, by_trace, parameters)
+ if name in ares[isa] and funtype2 in ares[isa][name][funtype1]['function']:
+ xv2_assess_function(name, ares[isa][name][funtype1]['function'][funtype2], key, bpa_validation, mae, smape, rmsd)
+ if funtype2 == 'estimate':
+ if 'base' in ares[isa][name][funtype1]['function'][funtype2]:
+ estimates.append(tuple(ares[isa][name][funtype1]['function'][funtype2]['base']))
+ else:
+ estimates.append(None)
+ return mae, smape, rmsd, estimates
+
+def crossvalidate(by_name, by_param, by_trace, model, parameters):
+ param_mc_count = 200
+ paramv = param_values(parameters, by_param)
+ for name in sorted(by_name.keys()):
+ isa = by_name[name]['isa']
+ by_name[name]['means'] = np.array(by_name[name]['means'])
+ by_name[name]['energies'] = np.array(by_name[name]['energies'])
+ by_name[name]['rel_energies'] = np.array(by_name[name]['rel_energies'])
+ by_name[name]['durations'] = np.array(by_name[name]['durations'])
+
+ if isa == 'state':
+ mae_mean, smape_mean, rms_mean = val_run(by_name[name]['means'], splitidx_srs, 200)
+ print('%16s, static power, Monte Carlo: MAE %8.f µW, SMAPE %6.2f%%, RMS %d' % (name, mae_mean, smape_mean, rms_mean))
+ mae_mean, smape_mean, rms_mean = val_run(by_name[name]['means'], splitidx_kfold, 10)
+ print('%16s, static power, 10-fold sys: MAE %8.f µW, SMAPE %6.2f%%, RMS %d' % (name, mae_mean, smape_mean, rms_mean))
+ else:
+ mae_mean, smape_mean, rms_mean = val_run(by_name[name]['energies'], splitidx_srs, 200)
+ print('%16s, static energy, Monte Carlo: MAE %8.f pJ, SMAPE %6.2f%%, RMS %d' % (name, mae_mean, smape_mean, rms_mean))
+ mae_mean, smape_mean, rms_mean = val_run(by_name[name]['energies'], splitidx_kfold, 10)
+ print('%16s, static energy, 10-fold sys: MAE %8.f pJ, SMAPE %6.2f%%, RMS %d' % (name, mae_mean, smape_mean, rms_mean))
+ mae_mean, smape_mean, rms_mean = val_run(by_name[name]['rel_energies'], splitidx_srs, 200)
+ print('%16s, static rel_energy, Monte Carlo: MAE %8.f pJ, SMAPE %6.2f%%, RMS %d' % (name, mae_mean, smape_mean, rms_mean))
+ mae_mean, smape_mean, rms_mean = val_run(by_name[name]['rel_energies'], splitidx_kfold, 10)
+ print('%16s, static rel_energy, 10-fold sys: MAE %8.f pJ, SMAPE %6.2f%%, RMS %d' % (name, mae_mean, smape_mean, rms_mean))
+ mae_mean, smape_mean, rms_mean = val_run(by_name[name]['durations'], splitidx_srs, 200)
+ print('%16s, static duration, Monte Carlo: MAE %8.f µs, SMAPE %6.2f%%, RMS %d' % (name, mae_mean, smape_mean, rms_mean))
+ mae_mean, smape_mean, rms_mean = val_run(by_name[name]['durations'], splitidx_kfold, 10)
+ print('%16s, static duration, 10-fold sys: MAE %8.f µs, SMAPE %6.2f%%, RMS %d' % (name, mae_mean, smape_mean, rms_mean))
+
+ def print_estimates(estimates, total):
+ histogram = {}
+ buf = ' '
+ for estimate in estimates:
+ if not estimate in histogram:
+ histogram[estimate] = 1
+ else:
+ histogram[estimate] += 1
+ for estimate, count in sorted(histogram.items(), key=lambda kv: kv[1], reverse=True):
+ buf += ' %.f%% %s' % (count * 100 / total, estimate)
+ if len(estimates):
+ print(buf)
+
+ def val_run_funs(by_name, by_trace, name, key1, key2, key3, unit):
+ mae, smape, rmsd, estimates = val_run_fun(by_name, by_trace, name, key1, key2, key3, splitidx_srs, param_mc_count)
+ print('%16s, %8s %10s, Monte Carlo: MAE %8.f %s, SMAPE %6.2f%%, RMS %d' % (
+ name, key3, key2, np.mean(mae), unit, np.mean(smape), np.mean(rmsd)))
+ print_estimates(estimates, param_mc_count)
+ mae, smape, rmsd, estimates = val_run_fun(by_name, by_trace, name, key1, key2, key3, splitidx_kfold, 10)
+ print('%16s, %8s %10s, 10-fold sys: MAE %8.f %s, SMAPE %6.2f%%, RMS %d' % (
+ name, key3, key2, np.mean(mae), unit, np.mean(smape), np.mean(rmsd)))
+ print_estimates(estimates, 10)
+ mae, smape, rmsd, estimates = val_run_fun_p(by_param, by_trace, name, key1, key2, key3, splitidx_srs, param_mc_count)
+ print('%16s, %8s %10s, param-aware Monte Carlo: MAE %8.f %s, SMAPE %6.2f%%, RMS %d' % (
+ name, key3, key2, np.mean(mae), unit, np.mean(smape), np.mean(rmsd)))
+ print_estimates(estimates, param_mc_count)
+ mae, smape, rmsd, estimates = val_run_fun_p(by_param, by_trace, name, key1, key2, key3, splitidx_kfold, 10)
+ print('%16s, %8s %10s, param-aware 10-fold sys: MAE %8.f %s, SMAPE %6.2f%%, RMS %d' % (
+ name, key3, key2, np.mean(mae), unit, np.mean(smape), np.mean(rmsd)))
+ print_estimates(estimates, 10)
+
+ if 'power' in model[isa][name] and 'function' in model[isa][name]['power']:
+ if 'user' in model[isa][name]['power']['function']:
+ val_run_funs(by_name, by_trace, name, 'means', 'power', 'user', 'µW')
+ if 'estimate' in model[isa][name]['power']['function']:
+ val_run_funs(by_name, by_trace, name, 'means', 'power', 'estimate', 'µW')
+ if 'timeout' in model[isa][name] and 'function' in model[isa][name]['timeout']:
+ if 'user' in model[isa][name]['timeout']['function']:
+ val_run_funs(by_name, by_trace, name, 'timeouts', 'timeout', 'user', 'µs')
+ if 'estimate' in model[isa][name]['timeout']['function']:
+ val_run_funs(by_name, by_trace, name, 'timeouts', 'timeout', 'estimate', 'µs')
+ if 'duration' in model[isa][name] and 'function' in model[isa][name]['duration']:
+ if 'user' in model[isa][name]['duration']['function']:
+ val_run_funs(by_name, by_trace, name, 'durations', 'duration', 'user', 'µs')
+ if 'estimate' in model[isa][name]['duration']['function']:
+ val_run_funs(by_name, by_trace, name, 'durations', 'duration', 'estimate', 'µs')
+ if 'energy' in model[isa][name] and 'function' in model[isa][name]['energy']:
+ if 'user' in model[isa][name]['energy']['function']:
+ val_run_funs(by_name, by_trace, name, 'energies', 'energy', 'user', 'pJ')
+ if 'estimate' in model[isa][name]['energy']['function']:
+ val_run_funs(by_name, by_trace, name, 'energies', 'energy', 'estimate', 'pJ')
+ if 'rel_energy' in model[isa][name] and 'function' in model[isa][name]['rel_energy']:
+ if 'user' in model[isa][name]['rel_energy']['function']:
+ val_run_funs(by_name, by_trace, name, 'rel_energies', 'rel_energy', 'user', 'pJ')
+ if 'estimate' in model[isa][name]['rel_energy']['function']:
+ val_run_funs(by_name, by_trace, name, 'rel_energies', 'rel_energy', 'estimate', 'pJ')
+
+ return
+ for i, param in enumerate(parameters):
+ user_mae = {}
+ user_smape = {}
+ estimate_mae = {}
+ estimate_smape = {}
+ for val in paramv[param]:
+ bpa_training = dict([[x, by_param[x]] for x in by_param if x[1][i] != val])
+ bpa_validation = dict([[x, by_param[x]] for x in by_param if x[1][i] == val])
+ to_pop = []
+ for name in by_name.keys():
+ if not any(map(lambda x : x[0] == name, bpa_training.keys())):
+ to_pop.append(name)
+ for name in to_pop:
+ by_name.pop(name, None)
+ ares = analyze(by_name, bpa_training, by_trace, parameters)
+ for name in sorted(ares['state'].keys()):
+ state = ares['state'][name]
+ if 'function' in state['power']:
+ if 'user' in state['power']['function']:
+ xv_assess_function(name, state['power']['function']['user'], 'means', bpa_validation, user_mae, user_smape)
+ if 'estimate' in state['power']['function']:
+ xv_assess_function(name, state['power']['function']['estimate'], 'means', bpa_validation, estimate_mae, estimate_smape)
+ for name in sorted(ares['transition'].keys()):
+ trans = ares['transition'][name]
+ if 'timeout' in trans and 'function' in trans['timeout']:
+ if 'user' in trans['timeout']['function']:
+ xv_assess_function(name, trans['timeout']['function']['user'], 'timeouts', bpa_validation, user_mae, user_smape)
+ if 'estimate' in trans['timeout']['function']:
+ xv_assess_function(name, trans['timeout']['function']['estimate'], 'timeouts', bpa_validation, estimate_mae, estimate_smape)
+
+ for name in sorted(user_mae.keys()):
+ if by_name[name]['isa'] == 'state':
+ print('user function %s power by %s: MAE %.f µW, SMAPE %.2f%%' % (
+ name, param, np.mean(user_mae[name]), np.mean(user_smape[name])))
+ else:
+ print('user function %s timeout by %s: MAE %.f µs, SMAPE %.2f%%' % (
+ name, param, np.mean(user_mae[name]), np.mean(user_smape[name])))
+ for name in sorted(estimate_mae.keys()):
+ if by_name[name]['isa'] == 'state':
+ print('estimate function %s power by %s: MAE %.f µW, SMAPE %.2f%%' % (
+ name, param, np.mean(estimate_mae[name]), np.mean(estimate_smape[name])))
+ else:
+ print('estimate function %s timeout by %s: MAE %.f µs, SMAPE %.2f%%' % (
+ name, param, np.mean(estimate_mae[name]), np.mean(estimate_smape[name])))
+
+def validate(by_name, by_param, parameters):
+ aggdata = {
+ 'state' : {},
+ 'transition' : {},
+ }
+ for key, val in by_name.items():
+ name = key
+ isa = val['isa']
+ model = data['model'][isa][name]
+
+ if isa == 'state':
+ aggdata[isa][name] = {
+ 'power' : {
+ 'goodness' : aggregate_measures(model['power']['static'], val['means']),
+ 'median' : np.median(val['means']),
+ 'mean' : np.mean(val['means']),
+ 'std_inner' : np.std(val['means']),
+ 'function' : {},
+ },
+ 'online_power' : {
+ 'goodness' : regression_measures(np.array(val['online_means']), np.array(val['means'])),
+ 'median' : np.median(val['online_means']),
+ 'mean' : np.mean(val['online_means']),
+ 'std_inner' : np.std(val['online_means']),
+ 'function' : {},
+ },
+ 'online_duration' : {
+ 'goodness' : regression_measures(np.array(val['online_durations']), np.array(val['durations'])),
+ 'median' : np.median(val['online_durations']),
+ 'mean' : np.mean(val['online_durations']),
+ 'std_inner' : np.std(val['online_durations']),
+ 'function' : {},
+ },
+ 'clip' : {
+ 'mean' : np.mean(val['clip_rate']),
+ 'max' : max(val['clip_rate']),
+ },
+ 'timeout' : {},
+ }
+ if 'function' in model['power']:
+ aggdata[isa][name]['power']['function'] = {
+ 'estimate' : {
+ 'fit' : assess_function(model['power']['function']['estimate'],
+ name, 'means', parameters, by_param),
+ },
+ 'user': {
+ 'fit' : assess_function(model['power']['function']['user'],
+ name, 'means', parameters, by_param),
+ },
+ }
+ if isa == 'transition':
+ aggdata[isa][name] = {
+ 'duration' : {
+ 'goodness' : aggregate_measures(model['duration']['static'], val['durations']),
+ 'median' : np.median(val['durations']),
+ 'mean' : np.mean(val['durations']),
+ 'std_inner' : np.std(val['durations']),
+ 'function' : {},
+ },
+ 'energy' : {
+ 'goodness' : aggregate_measures(model['energy']['static'], val['energies']),
+ 'median' : np.median(val['energies']),
+ 'mean' : np.mean(val['energies']),
+ 'std_inner' : np.std(val['energies']),
+ 'function' : {},
+ },
+ 'rel_energy' : {
+ 'goodness' : aggregate_measures(model['rel_energy']['static'], val['rel_energies']),
+ 'median' : np.median(val['rel_energies']),
+ 'mean' : np.mean(val['rel_energies']),
+ 'std_inner' : np.std(val['rel_energies']),
+ 'function' : {},
+ },
+ 'clip' : {
+ 'mean' : np.mean(val['clip_rate']),
+ 'max' : max(val['clip_rate']),
+ },
+ 'timeout' : {},
+ }
+ if 'function' in model['timeout']:
+ aggdata[isa][name]['timeout'] = {
+ 'median' : np.median(val['timeouts']),
+ 'mean' : np.mean(val['timeouts']),
+ 'function': {
+ 'estimate' : {
+ 'fit' : assess_function(model['timeout']['function']['estimate'],
+ name, 'timeouts', parameters, by_param),
+ },
+ 'user': {
+ 'fit' : assess_function(model['timeout']['function']['user'],
+ name, 'timeouts', parameters, by_param),
+ },
+ },
+ }
+ return aggdata
+
+def analyze(by_name, by_param, by_trace, parameters):
+ aggdata = {
+ 'state' : {},
+ 'transition' : {},
+ }
+ transition_names = list(map(lambda x: x[0], filter(lambda x: x[1]['isa'] == 'transition', by_name.items())))
+ for name, val in by_name.items():
+ isa = val['isa']
+ model = data['model'][isa][name]
+
+ aggdata[isa][name] = {
+ 'power' : keydata(name, val, by_param, by_trace, 'means'),
+ 'duration' : keydata(name, val, by_param, by_trace, 'durations'),
+ 'energy' : keydata(name, val, by_param, by_trace, 'energies'),
+ 'clip' : {
+ 'mean' : np.mean(val['clip_rate']),
+ 'max' : max(val['clip_rate']),
+ },
+ 'timeout' : {},
+ }
+
+ aggval = aggdata[isa][name]
+ aggval['power']['std_outer'] = np.mean(val['stds'])
+
+ if isa == 'transition':
+ aggval['rel_energy'] = keydata(name, val, by_param, by_trace, 'rel_energies')
+
+ if isa == 'transition' and 'function' in data['model']['transition'][name]['timeout']:
+ aggval['timeout'] = keydata(name, val, by_param, by_trace, 'timeouts')
+
+ for i, param in enumerate(sorted(data['model']['parameter'].keys())):
+ values = list(set([key[1][i] for key in by_param.keys() if key[0] == name and key[1][i] != '']))
+ allvalues = [(*key[1][:i], *key[1][i+1:]) for key in by_param.keys() if key[0] == name]
+ #allvalues = list(set(allvalues))
+ if len(values) > 1:
+ if isa == 'state':
+ aggval['power']['std_by_param'][param] = mean_std_by_param(
+ by_param, allvalues, name, 'means', i)
+ if aggval['power']['std_by_param'][param] > 0 and aggval['power']['std_param'] / aggval['power']['std_by_param'][param] < 0.6:
+ aggval['power']['fit_guess'][param] = try_fits(name, 'means', i, by_param)
+ else:
+ aggval['duration']['std_by_param'][param] = mean_std_by_param(
+ by_param, allvalues, name, 'durations', i)
+ if aggval['duration']['std_by_param'][param] > 0 and aggval['duration']['std_param'] / aggval['duration']['std_by_param'][param] < 0.6:
+ aggval['duration']['fit_guess'][param] = try_fits(name, 'durations', i, by_param)
+ aggval['energy']['std_by_param'][param] = mean_std_by_param(
+ by_param, allvalues, name, 'energies', i)
+ if aggval['energy']['std_by_param'][param] > 0 and aggval['energy']['std_param'] / aggval['energy']['std_by_param'][param] < 0.6:
+ aggval['energy']['fit_guess'][param] = try_fits(name, 'energies', i, by_param)
+ aggval['rel_energy']['std_by_param'][param] = mean_std_by_param(
+ by_param, allvalues, name, 'rel_energies', i)
+ if aggval['rel_energy']['std_by_param'][param] > 0 and aggval['rel_energy']['std_param'] / aggval['rel_energy']['std_by_param'][param] < 0.6:
+ aggval['rel_energy']['fit_guess'][param] = try_fits(name, 'rel_energies', i, by_param)
+ if isa == 'transition' and 'function' in data['model']['transition'][name]['timeout']:
+ aggval['timeout']['std_by_param'][param] = mean_std_by_param(
+ by_param, allvalues, name, 'timeouts', i)
+ if aggval['timeout']['std_by_param'][param] > 0 and aggval['timeout']['std_param'] / aggval['timeout']['std_by_param'][param] < 0.6:
+ aggval['timeout']['fit_guess'][param] = try_fits(name, 'timeouts', i, by_param)
+
+ if isa == 'state':
+ fguess_to_function(name, 'means', aggval['power'], parameters, by_param,
+ 'estimated %s power [µW]' % name)
+ if 'function' in model['power'] and 'user' in model['power']['function']:
+ aggval['power']['function']['user'] = {
+ 'raw' : model['power']['function']['user']['raw'],
+ 'params' : model['power']['function']['user']['params'],
+ }
+ fit_function(
+ aggval['power']['function']['user'], name, 'means', parameters, by_param,
+ yaxis='%s power [µW]' % name)
+ if aggval['power']['std_param'] > 0 and aggval['power']['std_trace'] / aggval['power']['std_param'] < 0.5:
+ aggval['power']['std_by_trace'] = mean_std_by_trace_part(by_trace, transition_names, name, 'means')
+ else:
+ fguess_to_function(name, 'durations', aggval['duration'], parameters, by_param,
+ 'estimated %s duration [µs]' % name)
+ fguess_to_function(name, 'energies', aggval['energy'], parameters, by_param,
+ 'estimated %s energy [pJ]' % name)
+ fguess_to_function(name, 'rel_energies', aggval['rel_energy'], parameters, by_param,
+ 'estimated relative %s energy [pJ]' % name)
+ if 'function' in model['duration'] and 'user' in model['duration']['function']:
+ aggval['duration']['function']['user'] = {
+ 'raw' : model['duration']['function']['user']['raw'],
+ 'params' : model['duration']['function']['user']['params'],
+ }
+ fit_function(
+ aggval['duration']['function']['user'], name, 'durations', parameters, by_param,
+ yaxis='%s duration [µs]' % name)
+ if 'function' in model['energy'] and 'user' in model['energy']['function']:
+ aggval['energy']['function']['user'] = {
+ 'raw' : model['energy']['function']['user']['raw'],
+ 'params' : model['energy']['function']['user']['params'],
+ }
+ fit_function(
+ aggval['energy']['function']['user'], name, 'energies', parameters, by_param,
+ yaxis='%s energy [pJ]' % name)
+ if 'function' in model['rel_energy'] and 'user' in model['rel_energy']['function']:
+ aggval['rel_energy']['function']['user'] = {
+ 'raw' : model['rel_energy']['function']['user']['raw'],
+ 'params' : model['rel_energy']['function']['user']['params'],
+ }
+ fit_function(
+ aggval['rel_energy']['function']['user'], name, 'rel_energies', parameters, by_param,
+ yaxis='%s rel_energy [pJ]' % name)
+ if 'function' in model['timeout'] and 'user' in model['timeout']['function']:
+ fguess_to_function(name, 'timeouts', aggval['timeout'], parameters, by_param,
+ 'estimated %s timeout [µs]' % name)
+ aggval['timeout']['function']['user'] = {
+ 'raw' : model['timeout']['function']['user']['raw'],
+ 'params' : model['timeout']['function']['user']['params'],
+ }
+ fit_function(
+ aggval['timeout']['function']['user'], name, 'timeouts', parameters,
+ by_param, yaxis='%s timeout [µs]' % name)
+ if aggval['timeout']['std_param'] > 0 and aggval['timeout']['std_trace'] / aggval['timeout']['std_param'] < 0.5:
+ aggval['timeout']['std_by_trace'] = mean_std_by_trace_part(by_trace, transition_names, name, 'timeouts')
+
+ return aggdata
+
+try:
+ raw_opts, args = getopt.getopt(sys.argv[1:], "", [
+ "fit", "states", "transitions", "params", "clipping", "timing",
+ "histogram", "substates", "validate", "crossvalidate", "ignore-trace-idx="])
+ for option, parameter in raw_opts:
+ optname = re.sub(r'^--', '', option)
+ opts[optname] = parameter
+ if 'ignore-trace-idx' in opts:
+ opts['ignore-trace-idx'] = int(opts['ignore-trace-idx'])
+except getopt.GetoptError as err:
+ print(err)
+ sys.exit(2)
+
+data = load_json(args[0])
+by_name = {}
+by_param = {}
+by_trace = {}
+parameters = sorted(data['model']['parameter'].keys())
+
+for arg in args:
+ mdata = load_json(arg)
+ for runidx, run in enumerate(mdata['traces']):
+ if 'ignore-trace-idx' not in opts or opts['ignore-trace-idx'] != runidx:
+ for i, elem in enumerate(run['trace']):
+ if elem['name'] != 'UNINITIALIZED':
+ load_run_elem(i, elem, run['trace'], by_name, by_param, by_trace)
+
+if 'states' in opts:
+ if 'params' in opts:
+ plotter.plot_states_param(data['model'], by_param)
+ else:
+ plotter.plot_states(data['model'], by_name)
+ if 'timing' in opts:
+ plotter.plot_states_duration(data['model'], by_name)
+ plotter.plot_states_duration(data['model'], by_param)
+ if 'clipping' in opts:
+ plotter.plot_states_clips(data['model'], by_name)
+if 'transitions' in opts:
+ plotter.plot_transitions(data['model'], by_name)
+ if 'timing' in opts:
+ plotter.plot_transitions_duration(data['model'], by_name)
+ plotter.plot_transitions_timeout(data['model'], by_param)
+ if 'clipping' in opts:
+ plotter.plot_transitions_clips(data['model'], by_name)
+if 'histogram' in opts:
+ for key in sorted(by_name.keys()):
+ plotter.plot_histogram(by_name[key]['means'])
+if 'substates' in opts:
+ if 'params' in opts:
+ plotter.plot_substate_thresholds_p(data['model'], by_param)
+ else:
+ plotter.plot_substate_thresholds(data['model'], by_name)
+
+if 'validate' in opts:
+ data['aggregate'] = validate(by_name, by_param, parameters)
+elif 'crossvalidate' in opts:
+ crossvalidate(by_name, by_param, by_trace, data['model'], parameters)
+else:
+ data['aggregate'] = analyze(by_name, by_param, by_trace, parameters)
+
+# TODO optionally also plot data points for states/transitions which do not have
+# a function, but may depend on a parameter (visualization is always good!)
+
+save_json(data, args[0])