diff options
author | Daniel Friesel <daniel.friesel@uos.de> | 2020-04-29 13:01:31 +0200 |
---|---|---|
committer | Daniel Friesel <daniel.friesel@uos.de> | 2020-04-29 13:01:31 +0200 |
commit | 36d02c1227374b107aa351388c0b5e3df65e4fa9 (patch) | |
tree | 14ccf8e77c2203a8ca775c1f1ffe9c7cc997c320 /bin/merge.py | |
parent | 4b79b253d268652a1ae7239b564aaff9c2871589 (diff) |
Remove most unused perl scripts and modules
Diffstat (limited to 'bin/merge.py')
-rwxr-xr-x | bin/merge.py | 1053 |
1 files changed, 0 insertions, 1053 deletions
diff --git a/bin/merge.py b/bin/merge.py deleted file mode 100755 index 551bc9e..0000000 --- a/bin/merge.py +++ /dev/null @@ -1,1053 +0,0 @@ -#!/usr/bin/env python3 - -import getopt -import json -import numpy as np -import os -import re -import sys -import plotter -from copy import deepcopy -from dfatool import aggregate_measures, regression_measures, is_numeric, powerset -from dfatool import append_if_set, mean_or_none, float_or_nan -from matplotlib.patches import Polygon -from scipy import optimize, stats -#import pickle - -opts = {} - -def load_json(filename): - with open(filename, "r") as f: - return json.load(f) - -def save_json(data, filename): - with open(filename, "w") as f: - return json.dump(data, f) - -def print_data(aggregate): - for key in sorted(aggregate.keys()): - data = aggregate[key] - name, params = key - print("%s @ %s : ~ = %.f (%.f, %.f) µ_σ_outer = %.f n = %d" % - (name, params, np.median(data['means']), np.percentile(data['means'], 25), - np.percentile(data['means'], 75), np.mean(data['stds']), len(data['means']))) - -def flatten(somelist): - return [item for sublist in somelist for item in sublist] - -def mimosa_data(elem): - means = [x['uW_mean'] for x in elem['offline']] - durations = [x['us'] - 20 for x in elem['offline']] - stds = [x['uW_std'] for x in elem['offline']] - energies = [x['uW_mean'] * (x['us'] - 20) for x in elem['offline']] - clips = [x['clip_rate'] for x in elem['offline']] - substate_thresholds = [] - substate_data = [] - timeouts = [] - rel_energies_prev = [] - rel_energies_next = [] - if 'timeout' in elem['offline'][0]: - timeouts = [x['timeout'] for x in elem['offline']] - if 'uW_mean_delta_prev' in elem['offline'][0]: - rel_energies_prev = [x['uW_mean_delta_prev'] * (x['us'] - 20) for x in elem['offline']] - if 'uW_mean_delta_next' in elem['offline'][0]: - rel_energies_next = [x['uW_mean_delta_next'] * (x['us'] - 20) for x in elem['offline']] - for x in elem['offline']: - if 'substates' in x: - substate_thresholds.append(x['substates']['threshold']) - substate_data.append(x['substates']['states']) - - return (means, stds, durations, energies, rel_energies_prev, - rel_energies_next, clips, timeouts, substate_thresholds) - -def online_data(elem): - means = [int(x['power']) for x in elem['online']] - durations = [int(x['time']) for x in elem['online']] - - return means, durations - -# parameters = statistic variables such as txpower, bitrate etc. -# variables = function variables/parameters set by linear regression -def str_to_param_function(function_string, parameters, variables): - rawfunction = function_string - dependson = [False] * len(parameters) - - for i in range(len(parameters)): - if rawfunction.find("global(%s)" % (parameters[i])) >= 0: - dependson[i] = True - rawfunction = rawfunction.replace("global(%s)" % (parameters[i]), "arg[%d]" % (i)) - if rawfunction.find("local(%s)" % (parameters[i])) >= 0: - dependson[i] = True - rawfunction = rawfunction.replace("local(%s)" % (parameters[i]), "arg[%d]" % (i)) - for i in range(len(variables)): - rawfunction = rawfunction.replace("param(%d)" % (i), "param[%d]" % (i)) - fitfunc = eval("lambda param, arg: " + rawfunction); - - return fitfunc, dependson - -def mk_function_data(name, paramdata, parameters, dependson, datatype): - X = [[] for i in range(len(parameters))] - Xm = [[] for i in range(len(parameters))] - Y = [] - Ym = [] - - num_valid = 0 - num_total = 0 - - for key, val in paramdata.items(): - if key[0] == name and len(key[1]) == len(parameters): - valid = True - num_total += 1 - for i in range(len(parameters)): - if dependson[i] and not is_numeric(key[1][i]): - valid = False - if valid: - num_valid += 1 - Y.extend(val[datatype]) - Ym.append(np.median(val[datatype])) - for i in range(len(parameters)): - if dependson[i] or is_numeric(key[1][i]): - X[i].extend([float(key[1][i])] * len(val[datatype])) - Xm[i].append(float(key[1][i])) - else: - X[i].extend([0] * len(val[datatype])) - Xm[i].append(0) - - for i in range(len(parameters)): - X[i] = np.array(X[i]) - Xm[i] = np.array(Xm[i]) - X = tuple(X) - Xm = tuple(Xm) - Y = np.array(Y) - Ym = np.array(Ym) - - return X, Y, Xm, Ym, num_valid, num_total - -def raw_num0_8(num): - return (8 - num1(num)) - -def raw_num0_16(num): - return (16 - num1(num)) - -def raw_num1(num): - return bin(int(num)).count("1") - -num0_8 = np.vectorize(raw_num0_8) -num0_16 = np.vectorize(raw_num0_16) -num1 = np.vectorize(raw_num1) - -def try_fits(name, datatype, paramidx, paramdata): - functions = { - 'linear' : lambda param, arg: param[0] + param[1] * arg, - 'logarithmic' : lambda param, arg: param[0] + param[1] * np.log(arg), - 'logarithmic1' : lambda param, arg: param[0] + param[1] * np.log(arg + 1), - 'exponential' : lambda param, arg: param[0] + param[1] * np.exp(arg), - #'polynomial' : lambda param, arg: param[0] + param[1] * arg + param[2] * arg ** 2, - 'square' : lambda param, arg: param[0] + param[1] * arg ** 2, - 'fractional' : lambda param, arg: param[0] + param[1] / arg, - 'sqrt' : lambda param, arg: param[0] + param[1] * np.sqrt(arg), - 'num0_8' : lambda param, arg: param[0] + param[1] * num0_8(arg), - 'num0_16' : lambda param, arg: param[0] + param[1] * num0_16(arg), - 'num1' : lambda param, arg: param[0] + param[1] * num1(arg), - } - results = dict([[key, []] for key in functions.keys()]) - errors = {} - - allvalues = [(*key[1][:paramidx], *key[1][paramidx+1:]) for key in paramdata.keys() if key[0] == name] - allvalues = list(set(allvalues)) - - for value in allvalues: - X = [] - Xm = [] - Y = [] - Ym = [] - num_valid = 0 - num_total = 0 - for key, val in paramdata.items(): - if key[0] == name and len(key[1]) > paramidx and (*key[1][:paramidx], *key[1][paramidx+1:]) == value: - num_total += 1 - if is_numeric(key[1][paramidx]): - num_valid += 1 - Y.extend(val[datatype]) - Ym.append(np.median(val[datatype])) - X.extend([float(key[1][paramidx])] * len(val[datatype])) - Xm.append(float(key[1][paramidx])) - if float(key[1][paramidx]) == 0: - functions.pop('fractional', None) - if float(key[1][paramidx]) <= 0: - functions.pop('logarithmic', None) - if float(key[1][paramidx]) < 0: - functions.pop('logarithmic1', None) - functions.pop('sqrt', None) - if float(key[1][paramidx]) > 64: - functions.pop('exponential', None) - - # there should be -at least- two values when fitting - if num_valid > 1: - Y = np.array(Y) - Ym = np.array(Ym) - X = np.array(X) - Xm = np.array(Xm) - for kind, function in functions.items(): - results[kind] = {} - errfunc = lambda P, X, y: function(P, X) - y - try: - res = optimize.least_squares(errfunc, [0, 1], args=(X, Y), xtol=2e-15) - measures = regression_measures(function(res.x, X), Y) - for k, v in measures.items(): - if not k in results[kind]: - results[kind][k] = [] - results[kind][k].append(v) - except: - pass - - for function_name, result in results.items(): - if len(result) > 0 and function_name in functions: - errors[function_name] = {} - for measure in result.keys(): - errors[function_name][measure] = np.mean(result[measure]) - - return errors - -def fit_function(function, name, datatype, parameters, paramdata, xaxis=None, yaxis=None): - - variables = list(map(lambda x: float(x), function['params'])) - fitfunc, dependson = str_to_param_function(function['raw'], parameters, variables) - - X, Y, Xm, Ym, num_valid, num_total = mk_function_data(name, paramdata, parameters, dependson, datatype) - - if num_valid > 0: - - if num_valid != num_total: - num_invalid = num_total - num_valid - print("Warning: fit(%s): %d of %d states had incomplete parameter hashes" % (name, num_invalid, len(paramdata))) - - errfunc = lambda P, X, y: fitfunc(P, X) - y - try: - res = optimize.least_squares(errfunc, variables, args=(X, Y), xtol=2e-15) # loss='cauchy' - except ValueError as err: - function['error'] = str(err) - return - #x1 = optimize.curve_fit(lambda param, *arg: fitfunc(param, arg), X, Y, functionparams) - measures = regression_measures(fitfunc(res.x, X), Y) - - if res.status <= 0: - function['error'] = res.message - return - - if 'fit' in opts: - for i in range(len(parameters)): - plotter.plot_param_fit(function['raw'], name, fitfunc, res.x, parameters, datatype, i, X, Y, xaxis, yaxis) - - function['params'] = list(res.x) - function['fit'] = measures - - else: - function['error'] = 'log contained no numeric parameters' - -def assess_function(function, name, datatype, parameters, paramdata): - - variables = list(map(lambda x: float(x), function['params'])) - fitfunc, dependson = str_to_param_function(function['raw'], parameters, variables) - X, Y, Xm, Ym, num_valid, num_total = mk_function_data(name, paramdata, parameters, dependson, datatype) - - if num_valid > 0: - return regression_measures(fitfunc(variables, X), Y) - else: - return None - -def xv_assess_function(name, funbase, what, validation, mae, smape): - goodness = assess_function(funbase, name, what, parameters, validation) - if goodness != None: - if not name in mae: - mae[name] = [] - if not name in smape: - smape[name] = [] - append_if_set(mae, goodness, 'mae') - append_if_set(smape, goodness, 'smape') - -def xv2_assess_function(name, funbase, what, validation, mae, smape, rmsd): - goodness = assess_function(funbase, name, what, parameters, validation) - if goodness != None: - if goodness['mae'] < 10e9: - mae.append(goodness['mae']) - rmsd.append(goodness['rmsd']) - smape.append(goodness['smape']) - else: - print('[!] Ignoring MAE of %d (SMAPE %.f)' % (goodness['mae'], goodness['smape'])) - -# Returns the values used for each parameter in the measurement, e.g. -# { 'txpower' : [1, 2, 4, 8], 'length' : [16] } -# non-numeric values such as '' are skipped -def param_values(parameters, by_param): - paramvalues = dict([[param, set()] for param in parameters]) - - for _, paramvalue in by_param.keys(): - for i, param in enumerate(parameters): - if is_numeric(paramvalue[i]): - paramvalues[param].add(paramvalue[i]) - - return paramvalues - -def param_hash(values): - ret = {} - - for i, param in enumerate(parameters): - ret[param] = values[i] - - return ret - -# Returns the values used for each function argument in the measurement, e.g. -# { 'data': [], 'length' : [16, 31, 32] } -# non-numeric values such as '' or 'long_test_string' are skipped -def arg_values(name, by_arg): - TODO - argvalues = dict([[arg, set()] for arg in parameters]) - - for _, paramvalue in by_param.keys(): - for i, param in enumerate(parameters): - if is_numeric(paramvalue[i]): - paramvalues[param].add(paramvalue[i]) - - return paramvalues - -def mk_param_key(elem): - name = elem['name'] - paramtuple = () - - if 'parameter' in elem: - paramkeys = sorted(elem['parameter'].keys()) - paramtuple = tuple([elem['parameter'][x] for x in paramkeys]) - - return (name, paramtuple) - -def mk_arg_key(elem): - name = elem['name'] - argtuple = () - - if 'args' in elem: - argtuple = tuple(elem['args']) - - return (name, argtuple) - -def add_data_to_aggregate(aggregate, key, isa, data): - if not key in aggregate: - aggregate[key] = { - 'isa' : isa, - } - for datakey in data.keys(): - aggregate[key][datakey] = [] - for datakey, dataval in data.items(): - aggregate[key][datakey].extend(dataval) - -def fake_add_data_to_aggregate(aggregate, key, isa, database, idx): - timeout_val = [] - if len(database['timeouts']): - timeout_val = [database['timeouts'][idx]] - rel_energy_p_val = [] - if len(database['rel_energies_prev']): - rel_energy_p_val = [database['rel_energies_prev'][idx]] - rel_energy_n_val = [] - if len(database['rel_energies_next']): - rel_energy_n_val = [database['rel_energies_next'][idx]] - add_data_to_aggregate(aggregate, key, isa, { - 'means' : [database['means'][idx]], - 'stds' : [database['stds'][idx]], - 'durations' : [database['durations'][idx]], - 'energies' : [database['energies'][idx]], - 'rel_energies_prev' : rel_energy_p_val, - 'rel_energies_next' : rel_energy_n_val, - 'clip_rate' : [database['clip_rate'][idx]], - 'timeouts' : timeout_val, - }) - -def weight_by_name(aggdata): - total = {} - count = {} - weight = {} - for key in aggdata.keys(): - if not key[0] in total: - total[key[0]] = 0 - total[key[0]] += len(aggdata[key]['means']) - count[key] = len(aggdata[key]['means']) - for key in aggdata.keys(): - weight[key] = float(count[key]) / total[key[0]] - return weight - -# returns the mean standard deviation of all measurements of 'what' -# (e.g. power consumption or timeout) for state/transition 'name' where -# parameter 'index' is dynamic and all other parameters are fixed. -# I.e., if parameters are a, b, c ∈ {1,2,3} and 'index' corresponds to b', then -# this function returns the mean of the standard deviations of (a=1, b=*, c=1), -# (a=1, b=*, c=2), and so on -def mean_std_by_param(data, keys, name, what, index): - partitions = [] - for key in keys: - partition = [] - for k, v in data.items(): - if (*k[1][:index], *k[1][index+1:]) == key and k[0] == name: - partition.extend(v[what]) - partitions.append(partition) - return np.mean([np.std(partition) for partition in partitions]) - -def spearmanr_by_param(name, what, index): - sr = stats.spearmanr(by_name[name][what], list(map(lambda x : float_or_nan(x[index]), by_name[name]['param'])))[0] - if sr == np.nan: - return None - return sr - -# returns the mean standard deviation of all measurements of 'what' -# (e.g. energy or duration) for transition 'name' where -# the 'index'th argumetn is dynamic and all other arguments are fixed. -# I.e., if arguments are a, b, c ∈ {1,2,3} and 'index' is 1, then -# this function returns the mean of the standard deviations of (a=1, b=*, c=1), -# (a=1, b=*, c=2), and so on -def mean_std_by_arg(data, keys, name, what, index): - return mean_std_by_param(data, keys, name, what, index) - -# returns the mean standard deviation of all measurements of 'what' -# (e.g. power consumption or timeout) for state/transition 'name' where the -# trace of previous transitions is fixed except for a single transition, -# whose occurence or absence is silently ignored. -# this is done separately for each transition (-> returns a dictionary) -def mean_std_by_trace_part(data, transitions, name, what): - ret = {} - for transition in transitions: - keys = set(map(lambda x : (x[0], x[1], tuple([y for y in x[2] if y != transition])), data.keys())) - ret[transition] = {} - partitions = [] - for key in keys: - partition = [] - for k, v in data.items(): - key_without_transition = (k[0], k[1], tuple([y for y in k[2] if y != transition])) - if key[0] == name and key == key_without_transition: - partition.extend(v[what]) - if len(partition): - partitions.append(partition) - ret[transition] = np.mean([np.std(partition) for partition in partitions]) - return ret - - -def load_run_elem(index, element, trace, by_name, by_arg, by_param, by_trace): - means, stds, durations, energies, rel_energies_prev, rel_energies_next, clips, timeouts, sub_thresholds = mimosa_data(element) - - online_means = [] - online_durations = [] - if element['isa'] == 'state': - online_means, online_durations = online_data(element) - - if 'voltage' in opts: - element['parameter']['voltage'] = opts['voltage'] - - arg_key = mk_arg_key(element) - param_key = mk_param_key(element) - pre_trace = tuple(map(lambda x : x['name'], trace[1:index:2])) - trace_key = (*param_key, pre_trace) - name = element['name'] - - elem_data = { - 'means' : means, - 'stds' : stds, - 'durations' : durations, - 'energies' : energies, - 'rel_energies_prev' : rel_energies_prev, - 'rel_energies_next' : rel_energies_next, - 'clip_rate' : clips, - 'timeouts' : timeouts, - 'sub_thresholds' : sub_thresholds, - 'param' : [param_key[1]] * len(means), - 'online_means' : online_means, - 'online_durations' : online_durations, - } - add_data_to_aggregate(by_name, name, element['isa'], elem_data) - add_data_to_aggregate(by_arg, arg_key, element['isa'], elem_data) - add_data_to_aggregate(by_param, param_key, element['isa'], elem_data) - add_data_to_aggregate(by_trace, trace_key, element['isa'], elem_data) - -def fmap(reftype, name, funtype): - if funtype == 'linear': - return "%s(%s)" % (reftype, name) - if funtype == 'logarithmic': - return "np.log(%s(%s))" % (reftype, name) - if funtype == 'logarithmic1': - return "np.log(%s(%s) + 1)" % (reftype, name) - if funtype == 'exponential': - return "np.exp(%s(%s))" % (reftype, name) - if funtype == 'square': - return "%s(%s)**2" % (reftype, name) - if funtype == 'fractional': - return "1 / %s(%s)" % (reftype, name) - if funtype == 'sqrt': - return "np.sqrt(%s(%s))" % (reftype, name) - if funtype == 'num0_8': - return "num0_8(%s(%s))" % (reftype, name) - if funtype == 'num0_16': - return "num0_16(%s(%s))" % (reftype, name) - if funtype == 'num1': - return "num1(%s(%s))" % (reftype, name) - return "ERROR" - -def fguess_to_function(name, datatype, aggdata, parameters, paramdata, yaxis): - best_fit = {} - fitguess = aggdata['fit_guess'] - params = list(filter(lambda x : x in fitguess, parameters)) - if len(params) > 0: - for param in params: - best_fit_val = np.inf - for func_name, fit_val in fitguess[param].items(): - if fit_val['rmsd'] < best_fit_val: - best_fit_val = fit_val['rmsd'] - best_fit[param] = func_name - buf = '0' - pidx = 0 - for elem in powerset(best_fit.items()): - buf += " + param(%d)" % pidx - pidx += 1 - for fun in elem: - buf += " * %s" % fmap('global', *fun) - aggdata['function']['estimate'] = { - 'raw' : buf, - 'params' : list(np.ones((pidx))), - 'base' : [best_fit[param] for param in params] - } - fit_function( - aggdata['function']['estimate'], name, datatype, parameters, - paramdata, yaxis=yaxis) - -def arg_fguess_to_function(name, datatype, aggdata, arguments, argdata, yaxis): - best_fit = {} - fitguess = aggdata['arg_fit_guess'] - args = list(filter(lambda x : x in fitguess, arguments)) - if len(args) > 0: - for arg in args: - best_fit_val = np.inf - for func_name, fit_val in fitguess[arg].items(): - if fit_val['rmsd'] < best_fit_val: - best_fit_val = fit_val['rmsd'] - best_fit[arg] = func_name - buf = '0' - pidx = 0 - for elem in powerset(best_fit.items()): - buf += " + param(%d)" % pidx - pidx += 1 - for fun in elem: - buf += " * %s" % fmap('local', *fun) - aggdata['function']['estimate_arg'] = { - 'raw' : buf, - 'params' : list(np.ones((pidx))), - 'base' : [best_fit[arg] for arg in args] - } - fit_function( - aggdata['function']['estimate_arg'], name, datatype, arguments, - argdata, yaxis=yaxis) - -def param_measures(name, paramdata, key, fun): - mae = [] - smape = [] - rmsd = [] - for pkey, pval in paramdata.items(): - if pkey[0] == name: - # Median ist besseres Maß für MAE / SMAPE, - # Mean ist besseres für SSR. Da least_squares SSR optimiert - # nutzen wir hier auch Mean. - goodness = aggregate_measures(fun(pval[key]), pval[key]) - append_if_set(mae, goodness, 'mae') - append_if_set(rmsd, goodness, 'rmsd') - append_if_set(smape, goodness, 'smape') - ret = { - 'mae' : mean_or_none(mae), - 'rmsd' : mean_or_none(rmsd), - 'smape' : mean_or_none(smape) - } - - return ret - -def arg_measures(name, argdata, key, fun): - return param_measures(name, argdata, key, fun) - -def lookup_table(name, paramdata, key, fun, keyfun): - lut = [] - - for pkey, pval in paramdata.items(): - if pkey[0] == name: - lut.append({ - 'key': keyfun(pkey[1]), - 'value': fun(pval[key]), - }) - - return lut - -def keydata(name, val, argdata, paramdata, tracedata, key): - ret = { - 'count' : len(val[key]), - 'median' : np.median(val[key]), - 'mean' : np.mean(val[key]), - 'median_by_param' : lookup_table(name, paramdata, key, np.median, param_hash), - 'mean_goodness' : aggregate_measures(np.mean(val[key]), val[key]), - 'median_goodness' : aggregate_measures(np.median(val[key]), val[key]), - 'param_mean_goodness' : param_measures(name, paramdata, key, np.mean), - 'param_median_goodness' : param_measures(name, paramdata, key, np.median), - 'std_inner' : np.std(val[key]), - 'std_param' : np.mean([np.std(paramdata[x][key]) for x in paramdata.keys() if x[0] == name]), - 'std_trace' : np.mean([np.std(tracedata[x][key]) for x in tracedata.keys() if x[0] == name]), - 'std_by_param' : {}, - 'spearmanr_by_param' : {}, - 'fit_guess' : {}, - 'function' : {}, - } - - if val['isa'] == 'transition': - ret['arg_mean_goodness'] = arg_measures(name, argdata, key, np.mean) - ret['arg_median_goodness'] = arg_measures(name, argdata, key, np.median) - ret['median_by_arg'] = lookup_table(name, argdata, key, np.median, list) - ret['std_arg'] = np.mean([np.std(argdata[x][key]) for x in argdata.keys() if x[0] == name]) - ret['std_by_arg'] = {} - ret['arg_fit_guess'] = {} - - return ret - -def splitidx_kfold(length, num_slices): - pairs = [] - indexes = np.arange(length) - for i in range(0, num_slices): - training = np.delete(indexes, slice(i, None, num_slices)) - validation = indexes[i::num_slices] - pairs.append((training, validation)) - return pairs - -def splitidx_srs(length, num_slices): - pairs = [] - for i in range(0, num_slices): - shuffled = np.random.permutation(np.arange(length)) - border = int(length * float(2) / 3) - training = shuffled[:border] - validation = shuffled[border:] - pairs.append((training, validation)) - return pairs - -def val_run(aggdata, split_fun, count): - mae = [] - smape = [] - rmsd = [] - pairs = split_fun(len(aggdata), count) - for i in range(0, count): - training = aggdata[pairs[i][0]] - validation = aggdata[pairs[i][1]] - median = np.median(training) - goodness = aggregate_measures(median, validation) - append_if_set(mae, goodness, 'mae') - append_if_set(rmsd, goodness, 'rmsd') - append_if_set(smape, goodness, 'smape') - - mae_mean = np.mean(mae) - rmsd_mean = np.mean(rmsd) - if len(smape): - smape_mean = np.mean(smape) - else: - smape_mean = -1 - - return mae_mean, smape_mean, rmsd_mean - -# by_trace is not part of the cross-validation process -def val_run_fun(aggdata, by_trace, name, key, funtype1, funtype2, splitfun, count): - aggdata = aggdata[name] - isa = aggdata['isa'] - mae = [] - smape = [] - rmsd = [] - estimates = [] - pairs = splitfun(len(aggdata[key]), count) - for i in range(0, count): - bpa_training = {} - bpa_validation = {} - - for idx in pairs[i][0]: - bpa_key = (name, aggdata['param'][idx]) - fake_add_data_to_aggregate(bpa_training, bpa_key, isa, aggdata, idx) - for idx in pairs[i][1]: - bpa_key = (name, aggdata['param'][idx]) - fake_add_data_to_aggregate(bpa_validation, bpa_key, isa, aggdata, idx) - - fake_by_name = { name : aggdata } - ares = analyze(fake_by_name, {}, bpa_training, by_trace, parameters) - if name in ares[isa] and funtype2 in ares[isa][name][funtype1]['function']: - xv2_assess_function(name, ares[isa][name][funtype1]['function'][funtype2], key, bpa_validation, mae, smape, rmsd) - if funtype2 == 'estimate': - if 'base' in ares[isa][name][funtype1]['function'][funtype2]: - estimates.append(tuple(ares[isa][name][funtype1]['function'][funtype2]['base'])) - else: - estimates.append(None) - return mae, smape, rmsd, estimates - -# by_trace is not part of the cross-validation process -def val_run_fun_p(aggdata, by_trace, name, key, funtype1, funtype2, splitfun, count): - aggdata = dict([[x, aggdata[x]] for x in aggdata if x[0] == name]) - isa = aggdata[list(aggdata.keys())[0]]['isa'] - mae = [] - smape = [] - rmsd = [] - estimates = [] - pairs = splitfun(len(aggdata.keys()), count) # pairs are by_param index arrays - keys = sorted(aggdata.keys()) - for i in range(0, count): - bpa_training = dict([[keys[x], aggdata[keys[x]]] for x in pairs[i][0]]) - bpa_validation = dict([[keys[x], aggdata[keys[x]]] for x in pairs[i][1]]) - bna_training = {} - for val in bpa_training.values(): - for idx in range(0, len(val[key])): - fake_add_data_to_aggregate(bna_training, name, isa, val, idx) - - ares = analyze(bna_training, {}, bpa_training, by_trace, parameters) - if name in ares[isa] and funtype2 in ares[isa][name][funtype1]['function']: - xv2_assess_function(name, ares[isa][name][funtype1]['function'][funtype2], key, bpa_validation, mae, smape, rmsd) - if funtype2 == 'estimate': - if 'base' in ares[isa][name][funtype1]['function'][funtype2]: - estimates.append(tuple(ares[isa][name][funtype1]['function'][funtype2]['base'])) - else: - estimates.append(None) - return mae, smape, rmsd, estimates - -def crossvalidate(by_name, by_param, by_trace, model, parameters): - param_mc_count = 200 - paramv = param_values(parameters, by_param) - for name in sorted(by_name.keys()): - isa = by_name[name]['isa'] - by_name[name]['means'] = np.array(by_name[name]['means']) - by_name[name]['energies'] = np.array(by_name[name]['energies']) - by_name[name]['rel_energies_prev'] = np.array(by_name[name]['rel_energies_prev']) - by_name[name]['rel_energies_next'] = np.array(by_name[name]['rel_energies_next']) - by_name[name]['durations'] = np.array(by_name[name]['durations']) - - if isa == 'state': - mae_mean, smape_mean, rms_mean = val_run(by_name[name]['means'], splitidx_srs, 200) - print('%16s, static power, Monte Carlo: MAE %8.f µW, SMAPE %6.2f%%, RMS %d' % (name, mae_mean, smape_mean, rms_mean)) - mae_mean, smape_mean, rms_mean = val_run(by_name[name]['means'], splitidx_kfold, 10) - print('%16s, static power, 10-fold sys: MAE %8.f µW, SMAPE %6.2f%%, RMS %d' % (name, mae_mean, smape_mean, rms_mean)) - else: - mae_mean, smape_mean, rms_mean = val_run(by_name[name]['energies'], splitidx_srs, 200) - print('%16s, static energy, Monte Carlo: MAE %8.f pJ, SMAPE %6.2f%%, RMS %d' % (name, mae_mean, smape_mean, rms_mean)) - mae_mean, smape_mean, rms_mean = val_run(by_name[name]['energies'], splitidx_kfold, 10) - print('%16s, static energy, 10-fold sys: MAE %8.f pJ, SMAPE %6.2f%%, RMS %d' % (name, mae_mean, smape_mean, rms_mean)) - mae_mean, smape_mean, rms_mean = val_run(by_name[name]['rel_energies_prev'], splitidx_srs, 200) - print('%16s, static rel_energy_p, Monte Carlo: MAE %8.f pJ, SMAPE %6.2f%%, RMS %d' % (name, mae_mean, smape_mean, rms_mean)) - mae_mean, smape_mean, rms_mean = val_run(by_name[name]['rel_energies_prev'], splitidx_kfold, 10) - print('%16s, static rel_energy_p, 10-fold sys: MAE %8.f pJ, SMAPE %6.2f%%, RMS %d' % (name, mae_mean, smape_mean, rms_mean)) - mae_mean, smape_mean, rms_mean = val_run(by_name[name]['rel_energies_next'], splitidx_srs, 200) - print('%16s, static rel_energy_n, Monte Carlo: MAE %8.f pJ, SMAPE %6.2f%%, RMS %d' % (name, mae_mean, smape_mean, rms_mean)) - mae_mean, smape_mean, rms_mean = val_run(by_name[name]['rel_energies_next'], splitidx_kfold, 10) - print('%16s, static rel_energy_n, 10-fold sys: MAE %8.f pJ, SMAPE %6.2f%%, RMS %d' % (name, mae_mean, smape_mean, rms_mean)) - mae_mean, smape_mean, rms_mean = val_run(by_name[name]['durations'], splitidx_srs, 200) - print('%16s, static duration, Monte Carlo: MAE %8.f µs, SMAPE %6.2f%%, RMS %d' % (name, mae_mean, smape_mean, rms_mean)) - mae_mean, smape_mean, rms_mean = val_run(by_name[name]['durations'], splitidx_kfold, 10) - print('%16s, static duration, 10-fold sys: MAE %8.f µs, SMAPE %6.2f%%, RMS %d' % (name, mae_mean, smape_mean, rms_mean)) - - def print_estimates(estimates, total): - histogram = {} - buf = ' ' - for estimate in estimates: - if not estimate in histogram: - histogram[estimate] = 1 - else: - histogram[estimate] += 1 - for estimate, count in sorted(histogram.items(), key=lambda kv: kv[1], reverse=True): - buf += ' %.f%% %s' % (count * 100 / total, estimate) - if len(estimates): - print(buf) - - def val_run_funs(by_name, by_trace, name, key1, key2, key3, unit): - mae, smape, rmsd, estimates = val_run_fun(by_name, by_trace, name, key1, key2, key3, splitidx_srs, param_mc_count) - print('%16s, %8s %12s, Monte Carlo: MAE %8.f %s, SMAPE %6.2f%%, RMS %d' % ( - name, key3, key2, np.mean(mae), unit, np.mean(smape), np.mean(rmsd))) - print_estimates(estimates, param_mc_count) - mae, smape, rmsd, estimates = val_run_fun(by_name, by_trace, name, key1, key2, key3, splitidx_kfold, 10) - print('%16s, %8s %12s, 10-fold sys: MAE %8.f %s, SMAPE %6.2f%%, RMS %d' % ( - name, key3, key2, np.mean(mae), unit, np.mean(smape), np.mean(rmsd))) - print_estimates(estimates, 10) - mae, smape, rmsd, estimates = val_run_fun_p(by_param, by_trace, name, key1, key2, key3, splitidx_srs, param_mc_count) - print('%16s, %8s %12s, param-aware Monte Carlo: MAE %8.f %s, SMAPE %6.2f%%, RMS %d' % ( - name, key3, key2, np.mean(mae), unit, np.mean(smape), np.mean(rmsd))) - print_estimates(estimates, param_mc_count) - mae, smape, rmsd, estimates = val_run_fun_p(by_param, by_trace, name, key1, key2, key3, splitidx_kfold, 10) - print('%16s, %8s %12s, param-aware 10-fold sys: MAE %8.f %s, SMAPE %6.2f%%, RMS %d' % ( - name, key3, key2, np.mean(mae), unit, np.mean(smape), np.mean(rmsd))) - print_estimates(estimates, 10) - - if 'power' in model[isa][name] and 'function' in model[isa][name]['power']: - if 'user' in model[isa][name]['power']['function']: - val_run_funs(by_name, by_trace, name, 'means', 'power', 'user', 'µW') - if 'estimate' in model[isa][name]['power']['function']: - val_run_funs(by_name, by_trace, name, 'means', 'power', 'estimate', 'µW') - if 'timeout' in model[isa][name] and 'function' in model[isa][name]['timeout']: - if 'user' in model[isa][name]['timeout']['function']: - val_run_funs(by_name, by_trace, name, 'timeouts', 'timeout', 'user', 'µs') - if 'estimate' in model[isa][name]['timeout']['function']: - val_run_funs(by_name, by_trace, name, 'timeouts', 'timeout', 'estimate', 'µs') - if 'duration' in model[isa][name] and 'function' in model[isa][name]['duration']: - if 'user' in model[isa][name]['duration']['function']: - val_run_funs(by_name, by_trace, name, 'durations', 'duration', 'user', 'µs') - if 'estimate' in model[isa][name]['duration']['function']: - val_run_funs(by_name, by_trace, name, 'durations', 'duration', 'estimate', 'µs') - if 'energy' in model[isa][name] and 'function' in model[isa][name]['energy']: - if 'user' in model[isa][name]['energy']['function']: - val_run_funs(by_name, by_trace, name, 'energies', 'energy', 'user', 'pJ') - if 'estimate' in model[isa][name]['energy']['function']: - val_run_funs(by_name, by_trace, name, 'energies', 'energy', 'estimate', 'pJ') - if 'rel_energy_prev' in model[isa][name] and 'function' in model[isa][name]['rel_energy_prev']: - if 'user' in model[isa][name]['rel_energy_prev']['function']: - val_run_funs(by_name, by_trace, name, 'rel_energies_prev', 'rel_energy_prev', 'user', 'pJ') - if 'estimate' in model[isa][name]['rel_energy_prev']['function']: - val_run_funs(by_name, by_trace, name, 'rel_energies_prev', 'rel_energy_prev', 'estimate', 'pJ') - if 'rel_energy_next' in model[isa][name] and 'function' in model[isa][name]['rel_energy_next']: - if 'user' in model[isa][name]['rel_energy_next']['function']: - val_run_funs(by_name, by_trace, name, 'rel_energies_next', 'rel_energy_next', 'user', 'pJ') - if 'estimate' in model[isa][name]['rel_energy_next']['function']: - val_run_funs(by_name, by_trace, name, 'rel_energies_next', 'rel_energy_next', 'estimate', 'pJ') - - return - for i, param in enumerate(parameters): - user_mae = {} - user_smape = {} - estimate_mae = {} - estimate_smape = {} - for val in paramv[param]: - bpa_training = dict([[x, by_param[x]] for x in by_param if x[1][i] != val]) - bpa_validation = dict([[x, by_param[x]] for x in by_param if x[1][i] == val]) - to_pop = [] - for name in by_name.keys(): - if not any(map(lambda x : x[0] == name, bpa_training.keys())): - to_pop.append(name) - for name in to_pop: - by_name.pop(name, None) - ares = analyze(by_name, {}, bpa_training, by_trace, parameters) - for name in sorted(ares['state'].keys()): - state = ares['state'][name] - if 'function' in state['power']: - if 'user' in state['power']['function']: - xv_assess_function(name, state['power']['function']['user'], 'means', bpa_validation, user_mae, user_smape) - if 'estimate' in state['power']['function']: - xv_assess_function(name, state['power']['function']['estimate'], 'means', bpa_validation, estimate_mae, estimate_smape) - for name in sorted(ares['transition'].keys()): - trans = ares['transition'][name] - if 'timeout' in trans and 'function' in trans['timeout']: - if 'user' in trans['timeout']['function']: - xv_assess_function(name, trans['timeout']['function']['user'], 'timeouts', bpa_validation, user_mae, user_smape) - if 'estimate' in trans['timeout']['function']: - xv_assess_function(name, trans['timeout']['function']['estimate'], 'timeouts', bpa_validation, estimate_mae, estimate_smape) - - for name in sorted(user_mae.keys()): - if by_name[name]['isa'] == 'state': - print('user function %s power by %s: MAE %.f µW, SMAPE %.2f%%' % ( - name, param, np.mean(user_mae[name]), np.mean(user_smape[name]))) - else: - print('user function %s timeout by %s: MAE %.f µs, SMAPE %.2f%%' % ( - name, param, np.mean(user_mae[name]), np.mean(user_smape[name]))) - for name in sorted(estimate_mae.keys()): - if by_name[name]['isa'] == 'state': - print('estimate function %s power by %s: MAE %.f µW, SMAPE %.2f%%' % ( - name, param, np.mean(estimate_mae[name]), np.mean(estimate_smape[name]))) - else: - print('estimate function %s timeout by %s: MAE %.f µs, SMAPE %.2f%%' % ( - name, param, np.mean(estimate_mae[name]), np.mean(estimate_smape[name]))) - -def analyze_by_param(aggval, by_param, allvalues, name, key1, key2, param, param_idx): - aggval[key1]['std_by_param'][param] = mean_std_by_param( - by_param, allvalues, name, key2, param_idx) - aggval[key1]['spearmanr_by_param'][param] = spearmanr_by_param(name, key2, param_idx) - if aggval[key1]['std_by_param'][param] > 0 and aggval[key1]['std_param'] / aggval[key1]['std_by_param'][param] < 0.6: - aggval[key1]['fit_guess'][param] = try_fits(name, key2, param_idx, by_param) - -def analyze_by_arg(aggval, by_arg, allvalues, name, key1, key2, arg_name, arg_idx): - aggval[key1]['std_by_arg'][arg_name] = mean_std_by_arg( - by_arg, allvalues, name, key2, arg_idx) - if aggval[key1]['std_by_arg'][arg_name] > 0 and aggval[key1]['std_arg'] / aggval[key1]['std_by_arg'][arg_name] < 0.6: - aggval[key1]['arg_fit_guess'][arg_name] = try_fits(name, key2, arg_idx, by_arg) - -def maybe_fit_function(aggval, model, by_param, parameters, name, key1, key2, unit): - if 'function' in model[key1] and 'user' in model[key1]['function']: - aggval[key1]['function']['user'] = { - 'raw' : model[key1]['function']['user']['raw'], - 'params' : model[key1]['function']['user']['params'], - } - fit_function( - aggval[key1]['function']['user'], name, key2, parameters, by_param, - yaxis='%s %s by param [%s]' % (name, key1, unit)) - -def analyze(by_name, by_arg, by_param, by_trace, parameters): - aggdata = { - 'state' : {}, - 'transition' : {}, - 'min_voltage' : min_voltage, - 'max_voltage' : max_voltage, - } - transition_names = list(map(lambda x: x[0], filter(lambda x: x[1]['isa'] == 'transition', by_name.items()))) - for name, val in by_name.items(): - isa = val['isa'] - model = data['model'][isa][name] - - aggdata[isa][name] = { - 'power' : keydata(name, val, by_arg, by_param, by_trace, 'means'), - 'duration' : keydata(name, val, by_arg, by_param, by_trace, 'durations'), - 'energy' : keydata(name, val, by_arg, by_param, by_trace, 'energies'), - 'clip' : { - 'mean' : np.mean(val['clip_rate']), - 'max' : max(val['clip_rate']), - }, - 'timeout' : {}, - } - - aggval = aggdata[isa][name] - aggval['power']['std_outer'] = np.mean(val['stds']) - - if isa == 'transition': - aggval['rel_energy_prev'] = keydata(name, val, by_arg, by_param, by_trace, 'rel_energies_prev') - aggval['rel_energy_next'] = keydata(name, val, by_arg, by_param, by_trace, 'rel_energies_next') - aggval['timeout'] = keydata(name, val, by_arg, by_param, by_trace, 'timeouts') - - for i, param in enumerate(parameters): - values = list(set([key[1][i] for key in by_param.keys() if key[0] == name and key[1][i] != ''])) - allvalues = [(*key[1][:i], *key[1][i+1:]) for key in by_param.keys() if key[0] == name] - #allvalues = list(set(allvalues)) - if len(values) > 1: - if isa == 'state': - analyze_by_param(aggval, by_param, allvalues, name, 'power', 'means', param, i) - else: - analyze_by_param(aggval, by_param, allvalues, name, 'duration', 'durations', param, i) - analyze_by_param(aggval, by_param, allvalues, name, 'energy', 'energies', param, i) - analyze_by_param(aggval, by_param, allvalues, name, 'rel_energy_prev', 'rel_energies_prev', param, i) - analyze_by_param(aggval, by_param, allvalues, name, 'rel_energy_next', 'rel_energies_next', param, i) - analyze_by_param(aggval, by_param, allvalues, name, 'timeout', 'timeouts', param, i) - - if isa == 'state': - fguess_to_function(name, 'means', aggval['power'], parameters, by_param, - 'estimated %s power by param [µW]' % name) - maybe_fit_function(aggval, model, by_param, parameters, name, 'power', 'means', 'µW') - if aggval['power']['std_param'] > 0 and aggval['power']['std_trace'] / aggval['power']['std_param'] < 0.5: - aggval['power']['std_by_trace'] = mean_std_by_trace_part(by_trace, transition_names, name, 'means') - else: - fguess_to_function(name, 'durations', aggval['duration'], parameters, by_param, - 'estimated %s duration by param [µs]' % name) - fguess_to_function(name, 'energies', aggval['energy'], parameters, by_param, - 'estimated %s energy by param [pJ]' % name) - fguess_to_function(name, 'rel_energies_prev', aggval['rel_energy_prev'], parameters, by_param, - 'estimated relative_prev %s energy by param [pJ]' % name) - fguess_to_function(name, 'rel_energies_next', aggval['rel_energy_next'], parameters, by_param, - 'estimated relative_next %s energy by param [pJ]' % name) - fguess_to_function(name, 'timeouts', aggval['timeout'], parameters, by_param, - 'estimated %s timeout by param [µs]' % name) - maybe_fit_function(aggval, model, by_param, parameters, name, 'duration', 'durations', 'µs') - maybe_fit_function(aggval, model, by_param, parameters, name, 'energy', 'energies', 'pJ') - maybe_fit_function(aggval, model, by_param, parameters, name, 'rel_energy_prev', 'rel_energies_prev', 'pJ') - maybe_fit_function(aggval, model, by_param, parameters, name, 'rel_energy_next', 'rel_energies_next', 'pJ') - maybe_fit_function(aggval, model, by_param, parameters, name, 'timeout', 'timeouts', 'µs') - - for i, arg in enumerate(model['parameters']): - values = list(set([key[1][i] for key in by_arg.keys() if key[0] == name and is_numeric(key[1][i])])) - allvalues = [(*key[1][:i], *key[1][i+1:]) for key in by_arg.keys() if key[0] == name] - analyze_by_arg(aggval, by_arg, allvalues, name, 'duration', 'durations', arg['name'], i) - analyze_by_arg(aggval, by_arg, allvalues, name, 'energy', 'energies', arg['name'], i) - analyze_by_arg(aggval, by_arg, allvalues, name, 'rel_energy_prev', 'rel_energies_prev', arg['name'], i) - analyze_by_arg(aggval, by_arg, allvalues, name, 'rel_energy_next', 'rel_energies_next', arg['name'], i) - analyze_by_arg(aggval, by_arg, allvalues, name, 'timeout', 'timeouts', arg['name'], i) - - arguments = list(map(lambda x: x['name'], model['parameters'])) - arg_fguess_to_function(name, 'durations', aggval['duration'], arguments, by_arg, - 'estimated %s duration by arg [µs]' % name) - arg_fguess_to_function(name, 'energies', aggval['energy'], arguments, by_arg, - 'estimated %s energy by arg [pJ]' % name) - arg_fguess_to_function(name, 'rel_energies_prev', aggval['rel_energy_prev'], arguments, by_arg, - 'estimated relative_prev %s energy by arg [pJ]' % name) - arg_fguess_to_function(name, 'rel_energies_next', aggval['rel_energy_next'], arguments, by_arg, - 'estimated relative_next %s energy by arg [pJ]' % name) - arg_fguess_to_function(name, 'timeouts', aggval['timeout'], arguments, by_arg, - 'estimated %s timeout by arg [µs]' % name) - - return aggdata - -try: - raw_opts, args = getopt.getopt(sys.argv[1:], "", [ - "fit", "states", "transitions", "params", "clipping", "timing", - "histogram", "substates", "validate", "crossvalidate", "ignore-trace-idx=", "voltage"]) - for option, parameter in raw_opts: - optname = re.sub(r'^--', '', option) - opts[optname] = parameter - if 'ignore-trace-idx' in opts: - opts['ignore-trace-idx'] = int(opts['ignore-trace-idx']) -except getopt.GetoptError as err: - print(err) - sys.exit(2) - -data = load_json(args[0]) -by_name = {} -by_arg = {} -by_param = {} -by_trace = {} - -if 'voltage' in opts: - data['model']['parameter']['voltage'] = { - 'default' : float(data['setup']['mimosa_voltage']), - 'function' : None, - 'arg_name' : None, - } - -min_voltage = float(data['setup']['mimosa_voltage']) -max_voltage = float(data['setup']['mimosa_voltage']) - -parameters = sorted(data['model']['parameter'].keys()) - -for arg in args: - mdata = load_json(arg) - this_voltage = float(mdata['setup']['mimosa_voltage']) - if this_voltage > max_voltage: - max_voltage = this_voltage - if this_voltage < min_voltage: - min_voltage = this_voltage - if 'voltage' in opts: - opts['voltage'] = this_voltage - for runidx, run in enumerate(mdata['traces']): - if 'ignore-trace-idx' not in opts or opts['ignore-trace-idx'] != runidx: - for i, elem in enumerate(run['trace']): - if elem['name'] != 'UNINITIALIZED': - load_run_elem(i, elem, run['trace'], by_name, by_arg, by_param, by_trace) - -#with open('/tmp/by_name.pickle', 'wb') as f: -# pickle.dump(by_name, f, pickle.HIGHEST_PROTOCOL) -#with open('/tmp/by_arg.pickle', 'wb') as f: -# pickle.dump(by_arg, f, pickle.HIGHEST_PROTOCOL) -#with open('/tmp/by_param.pickle', 'wb') as f: -# pickle.dump(by_param, f, pickle.HIGHEST_PROTOCOL) - -if 'states' in opts: - if 'params' in opts: - plotter.plot_states_param(data['model'], by_param) - else: - plotter.plot_states(data['model'], by_name) - if 'timing' in opts: - plotter.plot_states_duration(data['model'], by_name) - plotter.plot_states_duration(data['model'], by_param) - if 'clipping' in opts: - plotter.plot_states_clips(data['model'], by_name) -if 'transitions' in opts: - plotter.plot_transitions(data['model'], by_name) - if 'timing' in opts: - plotter.plot_transitions_duration(data['model'], by_name) - plotter.plot_transitions_timeout(data['model'], by_param) - if 'clipping' in opts: - plotter.plot_transitions_clips(data['model'], by_name) -if 'histogram' in opts: - for key in sorted(by_name.keys()): - plotter.plot_histogram(by_name[key]['means']) -if 'substates' in opts: - if 'params' in opts: - plotter.plot_substate_thresholds_p(data['model'], by_param) - else: - plotter.plot_substate_thresholds(data['model'], by_name) - -if 'crossvalidate' in opts: - crossvalidate(by_name, by_param, by_trace, data['model'], parameters) -else: - data['aggregate'] = analyze(by_name, by_arg, by_param, by_trace, parameters) - -# TODO optionally also plot data points for states/transitions which do not have -# a function, but may depend on a parameter (visualization is always good!) - -save_json(data, args[0]) |