diff options
Diffstat (limited to 'lib')
-rwxr-xr-x | lib/dfatool.py | 216 |
1 files changed, 204 insertions, 12 deletions
diff --git a/lib/dfatool.py b/lib/dfatool.py index 3cc46dc..657a7b1 100755 --- a/lib/dfatool.py +++ b/lib/dfatool.py @@ -20,10 +20,23 @@ from utils import is_numeric arg_support_enabled = True def running_mean(x, N): + """ + Compute running average. + + arguments: + x -- NumPy array + N -- how many items to average + """ cumsum = np.cumsum(np.insert(x, 0, 0)) return (cumsum[N:] - cumsum[:-N]) / N def soft_cast_int(n): + """ + Convert to int, if possible. + + If it is empty, returns None. + If it is not numeric, it is left unchanged. + """ if n == None or n == '': return None try: @@ -32,6 +45,7 @@ def soft_cast_int(n): return n def float_or_nan(n): + """Convert to float (if numeric) or NaN.""" if n == None: return np.nan try: @@ -40,9 +54,16 @@ def float_or_nan(n): return np.nan def vprint(verbose, string): + """ + Print string if verbose. + + Prints string if verbose is a True value + """ if verbose: print(string) + # I don't recall what these are for. + # --df, 2019-01-29 def _gplearn_add_(x, y): return x + y @@ -94,19 +115,55 @@ def _arg_name(arg_index): return '~arg{:02}'.format(arg_index) def append_if_set(aggregate, data, key): + """Append data[key] to aggregate if key in data.""" if key in data: aggregate.append(data[key]) def mean_or_none(arr): + """ + Compute mean of NumPy array arr. + + Return -1 if arr is empty. + """ if len(arr): return np.mean(arr) return -1 def aggregate_measures(aggregate, actual): + """ + Calculate error measures for model value on data list. + + arguments: + aggregate -- model value (float or int) + actual -- real-world / reference values (list of float or int) + + return value: + See regression_measures + """ aggregate_array = np.array([aggregate] * len(actual)) return regression_measures(aggregate_array, np.array(actual)) def regression_measures(predicted, actual): + """ + Calculate error measures by comparing model values to reference values. + + arguments: + predicted -- model values (np.ndarray) + actual -- real-world / reference values (np.ndarray) + + Returns a dict containing the following measures: + mae -- Mean Absolute Error + mape -- Mean Absolute Percentage Error, + if all items in actual are non-zero (NaN otherwise) + smape -- Symmetric Mean Absolute Percentage Error, + if no 0,0-pairs are present in actual and predicted (NaN otherwise) + msd -- Mean Square Deviation + rmsd -- Root Mean Square Deviation + ssr -- Sum of Squared Residuals + rsq -- R^2 measure, see sklearn.metrics.r2_score + count -- Number of values + """ + if type(predicted) != np.ndarray: raise ValueError('first arg must be ndarray, is {}'.format(type(predicted))) if type(actual) != np.ndarray: @@ -209,8 +266,20 @@ def _preprocess_measurement(measurement): return processed_data class RawData: + """ + Loader for hardware model traces measured with MIMOSA. + + Expects a specific trace format and UART log output (as produced by the + dfatool benchmark generator). Loads data, prunes bogus measurements, and + provides preprocessed data suitable for EnergyModel. + """ def __init__(self, filenames): + """ + Create a new RawData object. + + Each filename element corresponds to a measurement run. + """ self.filenames = filenames.copy() self.traces_by_fileno = [] self.setup_by_fileno = [] @@ -388,17 +457,22 @@ class RawData: trace['id'] = i def get_preprocessed_data(self, verbose = True): + """ + Return a list of DFA traces annotated with energy, timing, and parameter data. + + Suitable for the EnergyModel constructor. + See EnergyModel(...) docstring for format details. + """ self.verbose = verbose if self.preprocessed: return self.traces if self.version == 0: - self.preprocess_0() + self._preprocess_0() self.preprocessed = True return self.traces - # Loads raw MIMOSA data and turns it into measurements which are ready to - # be analyzed. - def preprocess_0(self): + def _preprocess_0(self): + """Load raw MIMOSA data and turn it into measurements which are ready to be analyzed.""" mim_files = [] for i, filename in enumerate(self.filenames): with tarfile.open(filename) as tf: @@ -437,6 +511,20 @@ class RawData: } def _param_slice_eq(a, b, index): + """ + Check if by_param keys a and b are identical, ignoring the parameter at index. + + parameters: + a, b -- (state/transition name, [parameter0 value, parameter1 value, ...]) + index -- parameter index to ignore (0 -> parameter0, 1 -> parameter1, etc.) + + Returns True iff a and b have the same state/transition name, and all + parameters at positions != index are identical. + + example: + ('foo', [1, 4]), ('foo', [2, 4]), 0 -> True + ('foo', [1, 4]), ('foo', [2, 4]), 1 -> False + """ if (*a[1][:index], *a[1][index+1:]) == (*b[1][:index], *b[1][index+1:]) and a[0] == b[0]: return True return False @@ -528,7 +616,7 @@ def _compute_param_statistics_parallel(args): 'result' : _compute_param_statistics(*args['args']) } -def all_params_are_numeric(data, param_idx): +def _all_params_are_numeric(data, param_idx): param_values = list(map(lambda x: x[param_idx], data['param'])) if len(list(filter(is_numeric, param_values))) == len(param_values): return True @@ -554,13 +642,24 @@ def _compute_param_statistics(by_name, by_param, parameter_names, num_args, stat return ret -# returns the mean standard deviation of all measurements of 'what' -# (e.g. power consumption or timeout) for state/transition 'name' where -# parameter 'index' is dynamic and all other parameters are fixed. -# I.e., if parameters are a, b, c ∈ {1,2,3} and 'index' corresponds to b', then -# this function returns the mean of the standard deviations of (a=1, b=*, c=1), -# (a=1, b=*, c=2), and so on def _mean_std_by_param(by_param, state_or_tran, key, param_index): + u""" + Calculate the mean standard deviation for a static model where all parameters but param_index are constant. + + arguments: + by_param -- measurements sorted by key/transition name and parameter values + state_or_tran -- state or transition name (-> by_param[(state_or_tran, *)]) + key -- model attribute, e.g. 'power' or 'duration' + (-> by_param[(state_or_tran, *)][key]) + param_index -- index of variable parameter + + Returns the mean standard deviation of all measurements of 'key' + (e.g. power consumption or timeout) for state/transition 'state_or_tran' where + parameter 'param_index' is dynamic and all other parameters are fixed. + I.e., if parameters are a, b, c ∈ {1,2,3} and 'index' corresponds to b, then + this function returns the mean of the standard deviations of (a=1, b=*, c=1), + (a=1, b=*, c=2), and so on. + """ partitions = [] for param_value in filter(lambda x: x[0] == state_or_tran, by_param.keys()): param_partition = [] @@ -574,7 +673,7 @@ def _mean_std_by_param(by_param, state_or_tran, key, param_index): return np.mean([np.std(partition) for partition in partitions]) def _corr_by_param(by_name, state_or_trans, key, param_index): - if all_params_are_numeric(by_name[state_or_trans], param_index): + if _all_params_are_numeric(by_name[state_or_trans], param_index): param_values = np.array(list((map(lambda x: x[param_index], by_name[state_or_trans]['param'])))) try: return np.corrcoef(by_name[state_or_trans][key], param_values)[0, 1] @@ -587,8 +686,101 @@ def _corr_by_param(by_name, state_or_trans, key, param_index): return 0. class EnergyModel: + """ + parameter-aware PTA-based energy model. + + Supports both static and parameter-based model attributes, and automatic detection of parameter-dependence. + """ def __init__(self, preprocessed_data, ignore_trace_indexes = [], discard_outliers = None, function_override = {}, verbose = True, use_corrcoef = False, hwmodel = None): + """ + Prepare a new PTA energy model. + + Actual model generation is done on-demand by calling the respective functions. + + arguments: + preprocessed_data -- list of preprocessed DFA traces. + ignore_trace_indexes -- list of trace indexes. The corresponding taces will be ignored. + discard_outliers -- experimental: threshold for outlier detection and removel (float). + Outlier detection is performed individually for each state/transition in each trace, + so it only works if the benchmark ran several times. + Given "data" (a set of measurements of the same thing, e.g. TX duration in the third benchmark trace), + "m" (the median of all attribute measurements with the same parameters, which may include data from other traces), + a data point X is considered an outlier if + | 0.6745 * (X - m) / median(|data - m|) | > discard_outliers . + function_override -- dict of overrides for automatic parameter function generation. + If (state or transition name, model attribute) is present in function_override, + the corresponding text string is the function used for analytic (parameter-aware/fitted) + modeling of this attribute. It is passed to AnalyticFunction, see + there for the required format. Note that this happens regardless of + parameter dependency detection: The provided analytic function will be assigned + even if it seems like the model attribute is static / parameter-independent. + verbose -- print informative output, e.g. when removing an outlier + use_corrcoef -- use correlation coefficient instead of stddev comparison + to detect whether a model attribute depends on a parameter + hwmodel -- hardware model suitable for PTA.from_hwmodel + + Detailed layout of preprocessed_data: + [ ... Liste von einzelnen Läufen (d.h. eine Zustands- und Transitionsfolge UNINITIALIZED -> foo -> FOO -> bar -> BAR -> ...) + Jeder Lauf: + - id: int Nummer des Laufs, beginnend bei 1 + - trace: [ ... Liste von Zuständen und Transitionen + Jeweils: + - name: str Name + - isa: str state // transition + - parameter: { ... globaler Parameter: aktueller wert. null falls noch nicht eingestellt } + - plan: + Falls isa == 'state': + - power: int(uW?) + - time: int(us) geplante Dauer + - energy: int(pJ?) + Falls isa == 'transition': + - timeout: int(us) oder null + - energy: int (pJ?) + - level: str 'user' 'epilogue' + - offline_attributes: [ ... Namen der in offline_aggregates gespeicherten Modellattribute, z.B. param, duration, energy, timeout ] + - offline_aggregates: + - power: [float(uW)] Mittlere Leistung während Zustand/Transitions + - power_std: [float(uW^2)] Standardabweichung der Leistung + - duration: [int(us)] Dauer + - energy: [float(pJ)] Energieaufnahme des Zustands / der Transition + - clip_rate: [float(0..1)] Clipping + - paramkeys: [[str]] Name der berücksichtigten Parameter + - param: [int // str] Parameterwerte. Quasi-Duplikat von 'parameter' oben + Falls isa == 'transition': + - timeout: [int(us)] Dauer des vorherigen Zustands + - rel_energy_prev: [int(pJ)] + - rel_energy_next: [int(pJ)] + - offline: [ ... Während der Messung von MIMOSA o.ä. gemessene Werte + -> siehe doc/MIMOSA analyze_states + - isa: 'state' oder 'transition' + - clip_rate: range(0..1) Anteil an Clipping im Energieverbrauch + - raw_mean: Mittelwert der Rohwerte + - raw_std: Standardabweichung der Rohwerte + - uW_mean: Mittelwert der (kalibrierten) Leistungsaufnahme + - uW_std: Standardabweichung der (kalibrierten) Leistungsaufnahme + - us: Dauer + Nur falls isa 'transition': + - timeout: Dauer des vorherigen Zustands + - uW_mean_delta_prev + - uW_mean_delta_next + ] + - online: [ ... Während der Messung vom Betriebssystem bestimmte Daten + Falls isa == 'state': + - power: int(uW?) + - time: int(us) geplante Dauer + - energy: int(pJ?) + Falls isa == 'transition': + - timeout: int(us) oder null + - energy: int (pJ?) + - level: str ('user' oder 'epilogue') + ] + Falls isa == 'transition': + - code: [str] Name und Argumente der aufgerufenen Funktion + - args: [str] Argumente der aufgerufenen Funktion + ] + ] + """ self.traces = preprocessed_data self.by_name = {} self.by_param = {} |