diff options
author | Daniel Friesel <daniel.friesel@uos.de> | 2020-11-03 14:44:55 +0100 |
---|---|---|
committer | Daniel Friesel <daniel.friesel@uos.de> | 2020-11-03 14:44:55 +0100 |
commit | c3a92f7255398f6500b868c20c0afd538dae09bf (patch) | |
tree | dce092da93b5ff7039b723457ef3e4836fed8b5b | |
parent | ea627ab6d9b47c53e6b1e34837e928c9d599db51 (diff) |
analyze number of substates per state
-rwxr-xr-x | bin/analyze-archive.py | 3 | ||||
-rw-r--r-- | lib/loader.py | 9 | ||||
-rw-r--r-- | lib/model.py | 107 | ||||
-rw-r--r-- | lib/pelt.py | 130 | ||||
-rw-r--r-- | lib/utils.py | 27 |
5 files changed, 183 insertions, 93 deletions
diff --git a/bin/analyze-archive.py b/bin/analyze-archive.py index eca98c4..35beae8 100755 --- a/bin/analyze-archive.py +++ b/bin/analyze-archive.py @@ -643,7 +643,8 @@ if __name__ == "__main__": ) if args.with_substates is not None: - substate_model, substate_info = model.get_substates() + substate_model = model.get_substates() + print(model.assess(substate_model, ref=model.sc_by_name)) if "paramdetection" in show_models or "all" in show_models: for state in model.states_and_transitions(): diff --git a/lib/loader.py b/lib/loader.py index fff515f..4cb5dc0 100644 --- a/lib/loader.py +++ b/lib/loader.py @@ -651,8 +651,12 @@ class RawData: online_trace_part["offline_aggregates"]["rel_energy_next"] = [] online_trace_part["offline_aggregates"]["timeout"] = [] elif "plot" in offline_trace_part: - online_trace_part["offline_support"] = ["power_traces"] + online_trace_part["offline_support"] = [ + "power_traces", + "timestamps", + ] online_trace_part["offline_aggregates"]["power_traces"] = list() + online_trace_part["offline_aggregates"]["timestamps"] = list() # Note: All state/transitions are 20us "too long" due to injected # active wait states. These are needed to work around MIMOSA's @@ -688,6 +692,9 @@ class RawData: online_trace_part["offline_aggregates"]["power_traces"].append( offline_trace_part["plot"][1] ) + online_trace_part["offline_aggregates"]["timestamps"].append( + offline_trace_part["plot"][0] + ) def _merge_online_and_etlog(self, measurement): # Edits self.traces_by_fileno[measurement['fileno']][*]['trace'][*]['offline'] diff --git a/lib/model.py b/lib/model.py index 1190fb0..ab46dc7 100644 --- a/lib/model.py +++ b/lib/model.py @@ -10,7 +10,7 @@ from .functions import analytic from .functions import AnalyticFunction from .parameters import ParamStats from .utils import is_numeric, soft_cast_int, param_slice_eq, remove_index_from_tuple -from .utils import by_name_to_by_param, match_parameter_values +from .utils import by_name_to_by_param, by_param_to_by_name, match_parameter_values logger = logging.getLogger(__name__) arg_support_enabled = True @@ -921,29 +921,92 @@ class PTAModel: return model_getter, info_getter + def pelt_refine(self, by_param_key): + logger.debug(f"PELT: {by_param_key} needs refinement") + # Assumption: All power traces for this parameter setting + # are similar, so determining the penalty for the first one + # is sufficient. + penalty, changepoints = self.pelt.get_penalty_and_changepoints( + self.by_param[by_param_key]["power_traces"][0] + ) + if len(changepoints) == 0: + logger.debug(f" we found no changepoints with penalty {penalty}") + substate_counts = [1 for i in self.by_param[by_param_key]["param"]] + substate_data = { + "duration": self.by_param[by_param_key]["duration"], + "power": self.by_param[by_param_key]["power"], + "power_std": self.by_param[by_param_key]["power_std"], + } + return (substate_counts, substate_data) + logger.debug( + f" we found {len(changepoints)} changepoints with penalty {penalty}" + ) + return self.pelt.calc_raw_states( + self.by_param[by_param_key]["timestamps"], + self.by_param[by_param_key]["power_traces"], + penalty, + ) + def get_substates(self): states = self.states() + + substates_by_param = dict() for k in self.by_param.keys(): if k[0] in states: + state_name = k[0] if self.pelt.needs_refinement(self.by_param[k]["power_traces"]): - logger.debug(f"PELT: {k} needs refinement") - # Assumption: All power traces for this parameter setting - # are similar, so determining the penalty for the first one - # is sufficient. - penalty, changepoints = self.pelt.get_penalty_and_changepoints( - self.by_param[k]["power_traces"][0] - ) - if len(changepoints): - logger.debug( - f" we found {len(changepoints)} changepoints with penalty {penalty}" - ) - self.pelt.calc_raw_states( - self.by_param[k]["power_traces"], penalty - ) - else: - logger.debug( - f" we found no changepoints with penalty {penalty}" - ) + substates_by_param[k] = self.pelt_refine(k) + else: + substate_counts = [1 for i in self.by_param[k]["param"]] + substate_data = { + "duration": self.by_param[k]["duration"], + "power": self.by_param[k]["power"], + "power_std": self.by_param[k]["power_std"], + } + substates_by_param[k] = (substate_counts, substate_data) + + # suitable for AEMR modeling + sc_by_param = dict() + for param_key, (substate_counts, _) in substates_by_param.items(): + sc_by_param[param_key] = { + "attributes": ["substate_count"], + "isa": "state", + "substate_count": substate_counts, + "param": self.by_param[param_key]["param"], + } + + sc_by_name = by_param_to_by_name(sc_by_param) + self.sc_by_name = sc_by_name + self.sc_by_param = sc_by_param + static_model = self._get_model_from_dict(self.sc_by_name, np.median) + + def static_model_getter(name, key, **kwargs): + return static_model[name][key] + + return static_model_getter + + """ + for k in self.by_param.keys(): + if k[0] in states: + state_name = k[0] + if state_name not in pelt_by_name: + pelt_by_name[state_name] = dict() + if self.pelt.needs_refinement(self.by_param[k]["power_traces"]): + res = self.pelt_refine(k) + for substate_index, substate in enumerate(res): + if substate_index not in pelt_by_name[state_name]: + pelt_by_name[state_name][substate_index] = { + "attribute": ["power", "duration"], + "isa": "state", + "param": list(), + "power": list(), + "duration": list() + } + pelt_by_name[state_name][substate_index]["param"].extend(self.by_param[k]["param"][:len(substate["power"])]) + pelt_by_name[state_name][substate_index]["power"].extend(substate["power"]) + pelt_by_name[state_name][substate_index]["duration"].extend(substate["duration"]) + print(pelt_by_name) + """ return None, None @@ -994,7 +1057,7 @@ class PTAModel: def attributes(self, state_or_trans): return self.by_name[state_or_trans]["attributes"] - def assess(self, model_function): + def assess(self, model_function, ref=None): """ Calculate MAE, SMAPE, etc. of model_function for each by_name entry. @@ -1008,7 +1071,9 @@ class PTAModel: overfitting cannot be detected. """ detailed_results = {} - for name, elem in sorted(self.by_name.items()): + if ref is None: + ref = self.by_name + for name, elem in sorted(ref.items()): detailed_results[name] = {} for key in elem["attributes"]: predicted_data = np.array( diff --git a/lib/pelt.py b/lib/pelt.py index a215b28..518bef7 100644 --- a/lib/pelt.py +++ b/lib/pelt.py @@ -1,6 +1,9 @@ +import logging import numpy as np from multiprocessing import Pool +logger = logging.getLogger(__name__) + def PELT_get_changepoints(algo, penalty): res = (penalty, algo.predict(pen=penalty)) @@ -10,42 +13,21 @@ def PELT_get_changepoints(algo, penalty): # calculates the raw_states for measurement measurement. num_measurement is used to identify the # return value # penalty, model and jump are directly passed to pelt -def PELT_get_raw_states(num_measurement, algo, signal, penalty): - bkpts = algo.predict(pen=penalty) - calced_states = list() - start_time = 0 - end_time = 0 +def PELT_get_raw_states(num_measurement, algo, penalty): + changepoints = algo.predict(pen=penalty) + substates = list() + start_index = 0 + end_index = 0 # calc metrics for all states - for bkpt in bkpts: - # start_time of state is end_time of previous one + for changepoint in changepoints: + # start_index of state is end_index of previous one # (Transitions are instantaneous) - start_time = end_time - end_time = bkpt - power_vals = signal[start_time:end_time] - mean_power = np.mean(power_vals) - std_dev = np.std(power_vals) - calced_state = (start_time, end_time, mean_power, std_dev) - calced_states.append(calced_state) - num = 0 - new_avg_std = 0 - # calc avg std for all states from this measurement - for s in calced_states: - # print_info("State " + str(num) + " starts at t=" + str(s[0]) - # + " and ends at t=" + str(s[1]) - # + " while using " + str(s[2]) - # + "uW with sigma=" + str(s[3])) - num = num + 1 - new_avg_std = new_avg_std + s[3] - # check case if no state has been found to avoid crashing - if len(calced_states) != 0: - new_avg_std = new_avg_std / len(calced_states) - else: - new_avg_std = 0 - change_avg_std = None # measurement["uW_std"] - new_avg_std - # print_info("The average standard deviation for the newly found states is " - # + str(new_avg_std)) - # print_info("That is a reduction of " + str(change_avg_std)) - return num_measurement, calced_states, new_avg_std, change_avg_std + start_index = end_index + end_index = changepoint - 1 + substate = (start_index, end_index) + substates.append(substate) + + return num_measurement, substates class PELT: @@ -54,7 +36,7 @@ class PELT: self.jump = 1 self.min_dist = 10 self.num_samples = None - self.refinement_threshold = 200e-6 # µW + self.refinement_threshold = 200e-6 # 200 µW self.range_min = 0 self.range_max = 100 self.__dict__.update(kwargs) @@ -89,7 +71,6 @@ class PELT: if self.num_samples is not None and len(signal) > self.num_samples: self.jump = len(signal) // int(self.num_samples) - print(f"jump = {self.jump}") else: self.jump = 1 @@ -106,29 +87,29 @@ class PELT: if len(res[1]) > 0 and res[1][-1] == len(signal): res[1].pop() changepoints_by_penalty[res[0]] = res[1] - num_changepoints = list() + changepoint_counts = list() for i in range(0, 100): - num_changepoints.append(len(changepoints_by_penalty[i])) + changepoint_counts.append(len(changepoints_by_penalty[i])) start_index = -1 end_index = -1 longest_start = -1 longest_end = -1 prev_val = -1 - for i, num_bkpts in enumerate(num_changepoints): - if num_bkpts != prev_val: + for i, num_changepoints in enumerate(changepoint_counts): + if num_changepoints != prev_val: end_index = i - 1 if end_index - start_index > longest_end - longest_start: longest_start = start_index longest_end = end_index start_index = i - if i == len(num_changepoints) - 1: + if i == len(changepoint_counts) - 1: end_index = i if end_index - start_index > longest_end - longest_start: longest_start = start_index longest_end = end_index start_index = i - prev_val = num_bkpts + prev_val = num_changepoints middle_of_plateau = longest_start + (longest_start - longest_start) // 2 changepoints = np.array(changepoints_by_penalty[middle_of_plateau]) return middle_of_plateau, changepoints @@ -141,48 +122,57 @@ class PELT: penalty, _ = self.get_penalty_and_changepoints(signal) return penalty - def calc_raw_states(self, signals, penalty, opt_model=None): + def calc_raw_states(self, timestamps, signals, penalty, opt_model=None): + """ + Calculate substates for signals (assumed to be long to a single parameter configuration). + + :returns: List of substates with duration and mean power: [(substate 1 duration, substate 1 power), ...] + """ + # imported here as ruptures is only used for changepoint detection. # This way, dfatool can be used without having ruptures installed as # long as --pelt isn't active. import ruptures + substate_data = list() + raw_states_calc_args = list() for num_measurement, measurement in enumerate(signals): normed_signal = self.norm_signal(measurement) algo = ruptures.Pelt( model=self.model, jump=self.jump, min_size=self.min_dist ).fit(normed_signal) - raw_states_calc_args.append((num_measurement, algo, normed_signal, penalty)) + raw_states_calc_args.append((num_measurement, algo, penalty)) raw_states_list = [None] * len(signals) with Pool() as pool: raw_states_res = pool.starmap(PELT_get_raw_states, raw_states_calc_args) - # extracting result and putting it in correct order -> index of raw_states_list - # entry still corresponds with index of measurement in measurements_by_states - # -> If measurements are discarded the used ones are easily recognized - for ret_val in raw_states_res: - num_measurement = ret_val[0] - raw_states = ret_val[1] - avg_std = ret_val[2] - change_avg_std = ret_val[3] - # FIXME: Wieso gibt mir meine IDE hier eine Warning aus? Der Index müsste doch - # int sein oder nicht? Es scheint auch vernünftig zu klappen... - raw_states_list[num_measurement] = raw_states - # print( - # "The average standard deviation for the newly found states in " - # + "measurement No. " - # + str(num_measurement) - # + " is " - # + str(avg_std) - # ) - # print("That is a reduction of " + str(change_avg_std)) - for i, raw_state in enumerate(raw_states): - print( - f"Measurement #{num_measurement} sub-state #{i}: {raw_state[0]} -> {raw_state[1]}, mean {raw_state[2]}" + substate_counts = list(map(lambda x: len(x[1]), raw_states_res)) + expected_substate_count = np.argmax(np.bincount(substate_counts)) + usable_measurements = list( + filter(lambda x: len(x[1]) == expected_substate_count, raw_states_res) + ) + logger.debug( + f" There are {expected_substate_count} substates (std = {np.std(substate_counts)}, {len(usable_measurements)}/{len(raw_states_res)} results are usable)" + ) + + for i in range(expected_substate_count): + substate_data.append( + {"duration": list(), "power": list(), "power_std": list()} + ) + + for num_measurement, substates in usable_measurements: + for i, substate in enumerate(substates): + power_trace = signals[num_measurement][substate[0] : substate[1]] + mean_power = np.mean(power_trace) + std_power = np.std(power_trace) + duration = ( + timestamps[num_measurement][substate[1]] + - timestamps[num_measurement][substate[0]] ) - # l_signal = measurements_by_config['offline'][num_measurement]['uW'] - # l_bkpts = [s[1] for s in raw_states] - # fig, ax = rpt.display(np.array(l_signal), l_bkpts) - # plt.show() + substate_data[i]["duration"].append(duration) + substate_data[i]["power"].append(mean_power) + substate_data[i]["power_std"].append(std_power) + + return substate_counts, substate_data diff --git a/lib/utils.py b/lib/utils.py index 2ed3d6e..c8f31c2 100644 --- a/lib/utils.py +++ b/lib/utils.py @@ -199,6 +199,33 @@ def by_name_to_by_param(by_name: dict): return by_param +def by_param_to_by_name(by_param: dict) -> dict: + """ + Convert aggregation by name and parameter values to aggregation by name only. + """ + by_name = dict() + for param_key in by_param.keys(): + name, _ = param_key + if name not in by_name: + by_name[name] = dict() + for key in by_param[param_key].keys(): + by_name[name][key] = list() + by_name[name]["attributes"] = by_param[param_key]["attributes"] + # special case for PTA models + if "isa" in by_param[param_key]: + by_name[name]["isa"] = by_param[param_key]["isa"] + for attribute in by_name[name]["attributes"]: + by_name[name][attribute].extend(by_param[param_key][attribute]) + if "supports" in by_param[param_key]: + for support in by_param[param_key]["supports"]: + by_name[name][support].extend(by_param[param_key][support]) + by_name[name]["param"].extend(by_param[param_key]["param"]) + for name in by_name.keys(): + for attribute in by_name[name]["attributes"]: + by_name[name][attribute] = np.array(by_name[name][attribute]) + return by_name + + def filter_aggregate_by_param(aggregate, parameters, parameter_filter): """ Remove entries which do not have certain parameter values from `aggregate`. |