summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xbin/Proof_Of_Concept_PELT.py1368
-rwxr-xr-xbin/analyze-archive.py24
-rw-r--r--bin/plot_generator.py136
-rw-r--r--lib/loader.py17
-rw-r--r--lib/model.py30
-rw-r--r--lib/pelt.py393
-rw-r--r--lib/utils.py3
7 files changed, 1967 insertions, 4 deletions
diff --git a/bin/Proof_Of_Concept_PELT.py b/bin/Proof_Of_Concept_PELT.py
new file mode 100755
index 0000000..3206b68
--- /dev/null
+++ b/bin/Proof_Of_Concept_PELT.py
@@ -0,0 +1,1368 @@
+#!/usr/bin/env python3
+import json
+import os
+import time
+import sys
+import getopt
+import re
+import pprint
+from multiprocessing import Pool, Manager, cpu_count
+from kneed import KneeLocator
+from sklearn.cluster import AgglomerativeClustering
+import matplotlib.pyplot as plt
+import ruptures as rpt
+import numpy as np
+
+from dfatool.functions import analytic
+from dfatool.loader import RawData
+from dfatool import parameters
+from dfatool.model import ParallelParamFit, PTAModel
+from dfatool.utils import by_name_to_by_param
+
+# from scipy.cluster.hierarchy import dendrogram, linkage # for graphical display
+
+# py bin\Proof_Of_Concept_PELT.py --filename="..\data\TX.json" --jump=1 --pen_override=28 --refinement_thresh=100
+# py bin\Proof_Of_Concept_PELT.py --filename="..\data\TX.json" --jump=1 --pen_override=28 --refinement_thresh=100 --cache_dicts --cache_loc="..\data\TX2_cache"
+from dfatool.validation import CrossValidator
+
+
+# returns the found changepoints by algo for the specific penalty pen.
+# algo should be the return value of Pelt(...).fit(signal)
+# Also puts a token in container q to let the progressmeter know the changepoints for penalty pen
+# have been calculated.
+# used for parallel calculation of changepoints vs penalty
+def get_bkps(algo, pen, q):
+ res = pen, len(algo.predict(pen=pen))
+ q.put(pen)
+ return res
+
+
+# Wrapper for kneedle
+def find_knee_point(data_x, data_y, S=1.0, curve="convex", direction="decreasing"):
+ kneedle = KneeLocator(data_x, data_y, S=S, curve=curve, direction=direction)
+ kneepoint = (kneedle.knee, kneedle.knee_y)
+ return kneepoint
+
+
+# returns the changepoints found on signal with penalty penalty.
+# model, jump and min_dist are directly passed to PELT
+def calc_pelt(signal, penalty, model="l1", jump=5, min_dist=2, plotting=False):
+ # default params in Function
+ if model is None:
+ model = "l1"
+ if jump is None:
+ jump = 5
+ if min_dist is None:
+ min_dist = 2
+ if plotting is None:
+ plotting = False
+ # change point detection. best fit seemingly with l1. rbf prods. RuntimeErr for pen > 30
+ # https://ctruong.perso.math.cnrs.fr/ruptures-docs/build/html/costs/index.html
+ # model = "l1" #"l1" # "l2", "rbf"
+ algo = rpt.Pelt(model=model, jump=jump, min_size=min_dist).fit(signal)
+
+ if penalty is not None:
+ bkps = algo.predict(pen=penalty)
+ if plotting:
+ fig, ax = rpt.display(signal, bkps)
+ plt.show()
+ return bkps
+
+ print_error("No Penalty specified.")
+ sys.exit(-1)
+
+
+# calculates and returns the necessary penalty for signal. Parallel execution with num_processes many processes
+# jump, min_dist are passed directly to PELT. S is directly passed to kneedle.
+# pen_modifier is used as a factor on the resulting penalty.
+# the interval [range_min, range_max] is used for searching.
+# refresh_delay and refresh_thresh are used to configure the progress "bar".
+def calculate_penalty_value(
+ signal,
+ model="l1",
+ jump=5,
+ min_dist=2,
+ range_min=0,
+ range_max=50,
+ num_processes=8,
+ refresh_delay=1,
+ refresh_thresh=5,
+ S=1.0,
+ pen_modifier=None,
+ show_plots=False,
+):
+ # default params in Function
+ if model is None:
+ model = "l1"
+ if jump is None:
+ jump = 5
+ if min_dist is None:
+ min_dist = 2
+ if range_min is None:
+ range_min = 0
+ if range_max is None:
+ range_max = 50
+ if num_processes is None:
+ num_processes = 8
+ if refresh_delay is None:
+ refresh_delay = 1
+ if refresh_thresh is None:
+ refresh_thresh = 5
+ if S is None:
+ S = 1.0
+ if pen_modifier is None:
+ pen_modifier = 1
+ # change point detection. best fit seemingly with l1. rbf prods. RuntimeErr for pen > 30
+ # https://ctruong.perso.math.cnrs.fr/ruptures-docs/build/html/costs/index.html
+ # model = "l1" #"l1" # "l2", "rbf"
+ algo = rpt.Pelt(model=model, jump=jump, min_size=min_dist).fit(signal)
+
+ ### CALC BKPS WITH DIFF PENALTYS
+ if range_max != range_min:
+ # building args array for parallelizing
+ args = []
+ # for displaying progression
+ m = Manager()
+ q = m.Queue()
+
+ for i in range(range_min, range_max + 1):
+ # same calculation for all except other penalty
+ args.append((algo, i, q))
+
+ print_info("starting kneepoint calculation.")
+ # init Pool with num_proesses
+ with Pool(min(num_processes, len(args))) as p:
+ # collect results from pool
+ result = p.starmap_async(get_bkps, args)
+ # monitor loop
+ percentage = -100 # Force display of 0%
+ i = 0
+ while True:
+ if result.ready():
+ break
+
+ size = q.qsize()
+ last_percentage = percentage
+ percentage = round(size / (range_max - range_min) * 100, 2)
+ if percentage >= last_percentage + 2 or i >= refresh_thresh:
+ print_info("Current progress: " + str(percentage) + "%")
+ i = 0
+ else:
+ i += 1
+ time.sleep(refresh_delay)
+ res = result.get()
+ print_info("Finished kneepoint calculation.")
+ # DECIDE WHICH PENALTY VALUE TO CHOOSE ACCORDING TO ELBOW/KNEE APPROACH
+ # split x and y coords to pass to kneedle
+ pen_val = [x[0] for x in res]
+ fitted_bkps_val = [x[1] for x in res]
+ # # plot to look at res
+ knee = find_knee_point(pen_val, fitted_bkps_val, S=S)
+
+ # TODO: Find plateau on pen_val vs fitted_bkps_val
+ # scipy.find_peaks() does not find plateaus if they extend through the end of the data.
+ # to counter that, add one extremely large value to the right side of the data
+ # after negating it is extremely small -> Almost certainly smaller than the
+ # found plateau therefore the plateau does not extend through the border
+ # -> scipy.find_peaks finds it. Choose value from within that plateau.
+ # fitted_bkps_val.append(100000000)
+ # TODO: Approaching over find_peaks might not work if the initial decrease step to the
+ # "correct" number of changepoints and additional decrease steps e.g. underfitting
+ # take place within the given penalty interval. find_peak only finds plateaus
+ # of peaks. If the number of chpts decreases after the wanted plateau the condition
+ # for local peaks is not satisfied anymore. Therefore this approach will only work
+ # if the plateau extends over the right border of the penalty interval.
+ # peaks, peak_plateaus = find_peaks(- np.array(fitted_bkps_val), plateau_size=1)
+ # Since the data is monotonously decreasing only one plateau can be found.
+
+ # assuming the plateau is constant, i.e. no noise. OK to assume this here, since num_bkpts
+ # is monotonously decreasing. If the number of bkpts decreases inside a considered
+ # plateau, it means that the stable configuration is not yet met. -> Search further
+ start_index = -1
+ end_index = -1
+ longest_start = -1
+ longest_end = -1
+ prev_val = -1
+ for i, num_bkpts in enumerate(fitted_bkps_val[knee[0] :]):
+ if num_bkpts != prev_val:
+ end_index = i - 1
+ if end_index - start_index > longest_end - longest_start:
+ # currently found sequence is the longest found yet
+ longest_start = start_index
+ longest_end = end_index
+ start_index = i
+ if i == len(fitted_bkps_val[knee[0] :]) - 1:
+ # end sequence with last value
+ end_index = i
+ # # since it is not guaranteed that this is the end of the plateau, assume the mid
+ # # of the plateau was hit.
+ # size = end_index - start_index
+ # end_index = end_index + size
+ # However this is not the clean solution. Better if search interval is widened
+ # with range_min and range_max
+ if end_index - start_index > longest_end - longest_start:
+ # last found sequence is the longest found yet
+ longest_start = start_index
+ longest_end = end_index
+ start_index = i
+ prev_val = num_bkpts
+ if show_plots:
+ plt.xlabel("Penalty")
+ plt.ylabel("Number of Changepoints")
+ plt.plot(pen_val, fitted_bkps_val)
+ plt.vlines(
+ longest_start + knee[0], 0, max(fitted_bkps_val), linestyles="dashed"
+ )
+ plt.vlines(
+ longest_end + knee[0], 0, max(fitted_bkps_val), linestyles="dashed"
+ )
+ plt.show()
+ # choosing pen from plateau
+ mid_of_plat = longest_start + (longest_end - longest_start) // 2
+ knee = (mid_of_plat + knee[0], fitted_bkps_val[mid_of_plat + knee[0]])
+
+ # modify knee according to options. Defaults to 1 * knee
+ knee = (knee[0] * pen_modifier, knee[1])
+
+ else:
+ # range_min == range_max. has the same effect as pen_override
+ knee = (range_min, None)
+ print_info(str(knee[0]) + " has been selected as penalty.")
+ if knee[0] is not None:
+ return knee
+
+ print_error(
+ "With the current thresh-hold S="
+ + str(S)
+ + " it is not possible to select a penalty value."
+ )
+ sys.exit(-1)
+
+
+# calculates the raw_states for measurement measurement. num_measurement is used to identify the
+# return value
+# penalty, model and jump are directly passed to pelt
+def calc_raw_states_func(num_measurement, measurement, penalty, model, jump):
+ # extract signal
+ signal = np.array(measurement["uW"])
+ # norm signal to remove dependency on absolute values
+ normed_signal = norm_signal(signal)
+ # calculate the breakpoints
+ bkpts = calc_pelt(normed_signal, penalty, model=model, jump=jump)
+ calced_states = list()
+ start_time = 0
+ end_time = 0
+ # calc metrics for all states
+ for bkpt in bkpts:
+ # start_time of state is end_time of previous one
+ # (Transitions are instantaneous)
+ start_time = end_time
+ end_time = bkpt
+ power_vals = signal[start_time:end_time]
+ mean_power = np.mean(power_vals)
+ std_dev = np.std(power_vals)
+ calced_state = (start_time, end_time, mean_power, std_dev)
+ calced_states.append(calced_state)
+ num = 0
+ new_avg_std = 0
+ # calc avg std for all states from this measurement
+ for s in calced_states:
+ # print_info("State " + str(num) + " starts at t=" + str(s[0])
+ # + " and ends at t=" + str(s[1])
+ # + " while using " + str(s[2])
+ # + "uW with sigma=" + str(s[3]))
+ num = num + 1
+ new_avg_std = new_avg_std + s[3]
+ # check case if no state has been found to avoid crashing
+ if len(calced_states) != 0:
+ new_avg_std = new_avg_std / len(calced_states)
+ else:
+ new_avg_std = 0
+ change_avg_std = measurement["uW_std"] - new_avg_std
+ # print_info("The average standard deviation for the newly found states is "
+ # + str(new_avg_std))
+ # print_info("That is a reduction of " + str(change_avg_std))
+ return num_measurement, calced_states, new_avg_std, change_avg_std
+
+
+# parallelize calc over all measurements
+def calc_raw_states(arg_list, num_processes=8):
+ m = Manager()
+ with Pool(processes=min(num_processes, len(arg_list))) as p:
+ # collect results from pool
+ result = p.starmap(calc_raw_states_func, arg_list)
+ return result
+
+
+# Very short benchmark yielded approx. 3 times the speed of solution not using sort
+# checks the percentiles if refinement is necessary
+def needs_refinement(signal, thresh):
+ sorted_signal = sorted(signal)
+ length_of_signal = len(signal)
+ percentile_size = int()
+ percentile_size = length_of_signal // 100
+ lower_percentile = sorted_signal[0:percentile_size]
+ upper_percentile = sorted_signal[
+ length_of_signal - percentile_size : length_of_signal
+ ]
+ lower_percentile_mean = np.mean(lower_percentile)
+ upper_percentile_mean = np.mean(upper_percentile)
+ median = np.median(sorted_signal)
+ dist = median - lower_percentile_mean
+ if dist > thresh:
+ return True
+ dist = upper_percentile_mean - median
+ if dist > thresh:
+ return True
+ return False
+
+
+# helper functions for user output
+# TODO: maybe switch with python logging feature
+def print_info(str_to_prt):
+ str_lst = str_to_prt.split(sep="\n")
+ for str_prt in str_lst:
+ print("[INFO]" + str_prt)
+
+
+def print_warning(str_to_prt):
+ str_lst = str_to_prt.split(sep="\n")
+ for str_prt in str_lst:
+ print("[WARNING]" + str_prt)
+
+
+def print_error(str_to_prt):
+ str_lst = str_to_prt.split(sep="\n")
+ for str_prt in str_lst:
+ print("[ERROR]" + str_prt, file=sys.stderr)
+
+
+# norms the signal and apply scaler to all values as a factor
+def norm_signal(signal, scaler=25):
+ max_val = max(signal)
+ normed_signal = np.zeros(shape=len(signal))
+ for i, signal_i in enumerate(signal):
+ normed_signal[i] = signal_i / max_val
+ normed_signal[i] = normed_signal[i] * scaler
+ return normed_signal
+
+
+# norms the values to prepare them for clustering
+def norm_values_to_cluster(values_to_cluster):
+ new_vals = np.array(values_to_cluster)
+ num_samples = len(values_to_cluster)
+ num_params = len(values_to_cluster[0])
+ for i in range(num_params):
+ param_vals = []
+ for sample in new_vals:
+ param_vals.append(sample[i])
+ max_val = np.max(np.abs(param_vals))
+ for num_sample, sample in enumerate(new_vals):
+ values_to_cluster[num_sample][i] = sample[i] / max_val
+ return new_vals
+
+
+# finds state_num using state name
+def get_state_num(state_name, distinct_states):
+ for state_num, states in enumerate(distinct_states):
+ if state_name in states:
+ return state_num
+ return -1
+
+
+if __name__ == "__main__":
+ # OPTION RECOGNITION
+ opt = dict()
+
+ optspec = (
+ "filename= "
+ "v "
+ "model= "
+ "jump= "
+ "min_dist= "
+ "range_min= "
+ "range_max= "
+ "num_processes= "
+ "refresh_delay= "
+ "refresh_thresh= "
+ "S= "
+ "pen_override= "
+ "pen_modifier= "
+ "plotting= "
+ "refinement_thresh= "
+ "cache_dicts "
+ "cache_loc= "
+ )
+ opt_filename = None
+ opt_verbose = False
+ opt_model = None
+ opt_jump = None
+ opt_min_dist = None
+ opt_range_min = None
+ opt_range_max = None
+ opt_num_processes = cpu_count()
+ opt_refresh_delay = None
+ opt_refresh_thresh = None
+ opt_S = None
+ opt_pen_override = None
+ opt_pen_modifier = None
+ opt_plotting = False
+ opt_refinement_thresh = None
+ opt_cache_loc = None
+ try:
+ raw_opts, args = getopt.getopt(sys.argv[1:], "", optspec.split(" "))
+
+ for option, parameter in raw_opts:
+ optname = re.sub(r"^--", "", option)
+ opt[optname] = parameter
+
+ if "filename" not in opt:
+ print_error("No file specified!")
+ sys.exit(-1)
+ else:
+ opt_filename = opt["filename"]
+ if "v" in opt:
+ opt_verbose = True
+ opt_plotting = True
+ if "model" in opt:
+ opt_model = opt["model"]
+ if "jump" in opt:
+ try:
+ opt_jump = int(opt["jump"])
+ except ValueError as verr:
+ print(verr, file=sys.stderr)
+ sys.exit(-1)
+ if "min_dist" in opt:
+ try:
+ opt_min_dist = int(opt["min_dist"])
+ except ValueError as verr:
+ print(verr, file=sys.stderr)
+ sys.exit(-1)
+ if "range_min" in opt:
+ try:
+ opt_range_min = int(opt["range_min"])
+ except ValueError as verr:
+ print(verr, file=sys.stderr)
+ sys.exit(-1)
+ if "range_max" in opt:
+ try:
+ opt_range_max = int(opt["range_max"])
+ except ValueError as verr:
+ print(verr, file=sys.stderr)
+ sys.exit(-1)
+ if "num_processes" in opt:
+ try:
+ opt_num_processes = int(opt["num_processes"])
+ except ValueError as verr:
+ print(verr, file=sys.stderr)
+ sys.exit(-1)
+ if "refresh_delay" in opt:
+ try:
+ opt_refresh_delay = int(opt["refresh_delay"])
+ except ValueError as verr:
+ print(verr, file=sys.stderr)
+ sys.exit(-1)
+ if "refresh_thresh" in opt:
+ try:
+ opt_refresh_thresh = int(opt["refresh_thresh"])
+ except ValueError as verr:
+ print(verr, file=sys.stderr)
+ sys.exit(-1)
+ if "S" in opt:
+ try:
+ opt_S = float(opt["S"])
+ except ValueError as verr:
+ print(verr, file=sys.stderr)
+ sys.exit(-1)
+ if "pen_override" in opt:
+ try:
+ opt_pen_override = int(opt["pen_override"])
+ except ValueError as verr:
+ print(verr, file=sys.stderr)
+ sys.exit(-1)
+ if "pen_modifier" in opt:
+ try:
+ opt_pen_modifier = float(opt["pen_modifier"])
+ except ValueError as verr:
+ print(verr, file=sys.stderr)
+ sys.exit(-1)
+ if "refinement_thresh" in opt:
+ try:
+ opt_refinement_thresh = int(opt["refinement_thresh"])
+ except ValueError as verr:
+ print(verr, file=sys.stderr)
+ sys.exit(-1)
+ if "cache_dicts" in opt:
+ if "cache_loc" in opt:
+ opt_cache_loc = opt["cache_loc"]
+ else:
+ print_error('If "cache_dicts" is set, "cache_loc" must be provided.')
+ sys.exit(-1)
+ except getopt.GetoptError as err:
+ print(err, file=sys.stderr)
+ sys.exit(-1)
+ filepath = os.path.dirname(opt_filename)
+ # OPENING DATA
+ if ".json" in opt_filename:
+ # open file with trace data from json
+ print_info(
+ "Will only refine the state which is present in "
+ + opt_filename
+ + " if necessary."
+ )
+ with open(opt_filename, "r") as f:
+ configurations = json.load(f)
+
+ # for i in range(0, 7):
+ # signal = np.array(configurations[i]['offline'][0]['uW'])
+ # plt.plot(signal)
+ # plt.xlabel('Time [us]')
+ # plt.ylabel('Power [mW]')
+ # plt.show()
+ # sys.exit()
+
+ # resulting_sequence_list = []
+ # search for param_names, by_param and by_name files
+ # cachingopts
+ by_param_file = None
+ by_name_file = None
+ param_names_file = None
+ from_cache = False
+ not_accurate = False
+ if opt_cache_loc is not None:
+ flag = False
+ by_name_loc = os.path.join(opt_cache_loc, "by_name.txt")
+ by_param_loc = os.path.join(opt_cache_loc, "by_param.txt")
+ param_names_loc = os.path.join(opt_cache_loc, "param_names.txt")
+ if os.path.isfile(by_name_loc) and os.path.getsize(by_name_loc) > 0:
+ by_name_file = open(by_name_loc, "r")
+ else:
+ print_error("In " + opt_cache_loc + " is no by_name.txt.")
+ flag = True
+ if os.path.isfile(by_param_loc) and os.path.getsize(by_param_loc) > 0:
+ by_param_file = open(by_param_loc, "r")
+ else:
+ print_error("In " + opt_cache_loc + " is no by_param.txt.")
+ flag = True
+ if os.path.isfile(param_names_loc) and os.path.getsize(param_names_loc) > 0:
+ param_names_file = open(param_names_loc, "r")
+ else:
+ print_error("In " + opt_cache_loc + " is no param_names.txt.")
+ flag = True
+ if flag:
+ print_info("The cache will be build.")
+ else:
+ print_warning(
+ 'THE OPTION "cache_dicts" IS FOR DEBUGGING PURPOSES ONLY! '
+ "\nDO NOT USE FOR REGULAR APPLICATIONS!"
+ "\nThis will possibly not be maintained in further development."
+ )
+ from_cache = True
+ big_state_name = configurations[0]["name"]
+ if None in (by_param_file, by_name_file, param_names_file):
+ state_durations_by_config = []
+ state_consumptions_by_config = []
+ # loop through all traces check if refinement is necessary and if necessary refine it.
+ for num_config, measurements_by_config in enumerate(configurations):
+ # loop through all occurrences of the looked at state
+ print_info(
+ "Looking at state '"
+ + measurements_by_config["name"]
+ + "' with params: "
+ + str(measurements_by_config["parameter"])
+ + "("
+ + str(num_config + 1)
+ + "/"
+ + str(len(configurations))
+ + ")"
+ )
+ num_needs_refine = 0
+ print_info("Checking if refinement is necessary...")
+ for measurement in measurements_by_config["offline"]:
+ # loop through measurements of particular state
+ # an check if state needs refinement
+ signal = measurement["uW"]
+ # mean = measurement['uW_mean']
+ if needs_refinement(signal, opt_refinement_thresh):
+ num_needs_refine = num_needs_refine + 1
+ if num_needs_refine == 0:
+ print_info(
+ "No refinement necessary for state '"
+ + measurements_by_config["name"]
+ + "' with params: "
+ + str(measurements_by_config["parameter"])
+ )
+ elif num_needs_refine < len(measurements_by_config["offline"]) / 2:
+ print_info(
+ "No refinement necessary for state '"
+ + measurements_by_config["name"]
+ + "' with params: "
+ + str(measurements_by_config["parameter"])
+ )
+ print_warning(
+ "However this decision was not unanimously. This could hint a poor"
+ "measurement quality."
+ )
+ else:
+ if num_needs_refine != len(measurements_by_config["parameter"]):
+ print_warning(
+ "However this decision was not unanimously. This could hint a poor"
+ "measurement quality."
+ )
+ # assume that all measurements of the same param configuration are fundamentally
+ # similar -> calculate penalty for first measurement, use it for all
+ if opt_pen_override is None:
+ signal = np.array(measurements_by_config["offline"][0]["uW"])
+ normed_signal = norm_signal(signal)
+ penalty = calculate_penalty_value(
+ normed_signal,
+ model=opt_model,
+ range_min=opt_range_min,
+ range_max=opt_range_max,
+ num_processes=opt_num_processes,
+ jump=opt_jump,
+ S=opt_S,
+ pen_modifier=opt_pen_modifier,
+ )
+ penalty = penalty[0]
+ else:
+ penalty = opt_pen_override
+ # build arguments for parallel excecution
+ print_info("Starting raw_states calculation.")
+ ### YOU ARE HERE
+ raw_states_calc_args = []
+ for num_measurement, measurement in enumerate(
+ measurements_by_config["offline"]
+ ):
+ raw_states_calc_args.append(
+ (num_measurement, measurement, penalty, opt_model, opt_jump)
+ )
+
+ raw_states_list = [None] * len(measurements_by_config["offline"])
+ raw_states_res = calc_raw_states(
+ raw_states_calc_args, opt_num_processes
+ )
+ # extracting result and putting it in correct order -> index of raw_states_list
+ # entry still corresponds with index of measurement in measurements_by_states
+ # -> If measurements are discarded the used ones are easily recognized
+ for ret_val in raw_states_res:
+ num_measurement = ret_val[0]
+ raw_states = ret_val[1]
+ avg_std = ret_val[2]
+ change_avg_std = ret_val[3]
+ # FIXME: Wieso gibt mir meine IDE hier eine Warning aus? Der Index müsste doch
+ # int sein oder nicht? Es scheint auch vernünftig zu klappen...
+ raw_states_list[num_measurement] = raw_states
+ print_info(
+ "The average standard deviation for the newly found states in "
+ + "measurement No. "
+ + str(num_measurement)
+ + " is "
+ + str(avg_std)
+ )
+ print_info("That is a reduction of " + str(change_avg_std))
+ # l_signal = measurements_by_config['offline'][num_measurement]['uW']
+ # l_bkpts = [s[1] for s in raw_states]
+ # fig, ax = rpt.display(np.array(l_signal), l_bkpts)
+ # plt.show()
+ print_info("Finished raw_states calculation.")
+ num_states_array = [int()] * len(raw_states_list)
+ i = 0
+ for i, x in enumerate(raw_states_list):
+ num_states_array[i] = len(x)
+ avg_num_states = np.mean(num_states_array)
+ num_states_dev = np.std(num_states_array)
+ print_info(
+ "On average "
+ + str(avg_num_states)
+ + " States have been found. The standard deviation"
+ + " is "
+ + str(num_states_dev)
+ )
+ # TODO: MAGIC NUMBER
+ if num_states_dev > 1:
+ print_warning(
+ "The number of states varies strongly across measurements."
+ " Consider choosing a larger range for penalty detection."
+ " It is also possible, that the processed data is not accurate"
+ " enough to produce proper results."
+ )
+ time.sleep(5)
+ # TODO: Wie bekomme ich da jetzt raus, was die Wahrheit ist?
+ # Einfach Durchschnitt nehmen?
+ # Preliminary decision: Further on only use the traces, which have the most
+ # frequent state count
+ counts = np.bincount(num_states_array)
+ num_raw_states = np.argmax(counts)
+ print_info(
+ "Choose " + str(num_raw_states) + " as number of raw_states."
+ )
+ if num_raw_states == 1:
+ print_info(
+ "Upon further inspection it is clear that no refinement is necessary."
+ " The macromodel is usable for this configuration."
+ )
+ continue
+ # iterate through all found breakpoints and determine start and end points as well
+ # as power consumption
+ num_measurements = len(raw_states_list)
+ states_duration_list = [list()] * num_raw_states
+ states_consumption_list = [list()] * num_raw_states
+ for num_elem, _ in enumerate(states_duration_list):
+ states_duration_list[num_elem] = [0] * num_measurements
+ states_consumption_list[num_elem] = [0] * num_measurements
+ num_used_measurements = 0
+ for num_measurement, raw_states in enumerate(raw_states_list):
+ if len(raw_states) == num_raw_states:
+ num_used_measurements = num_used_measurements + 1
+ for num_state, s in enumerate(raw_states):
+ states_duration_list[num_state][num_measurement] = (
+ s[1] - s[0]
+ )
+ states_consumption_list[num_state][num_measurement] = s[
+ 2
+ ]
+ # calced_state = (start_time, end_time, mean_power, std_dev)
+ # for num_state, s in enumerate(raw_states):
+ # state_duration = s[1] - s[0]
+ # state_consumption = s[2]
+ # states_duration_list[num_state] = \
+ # states_duration_list[num_state] + state_duration
+ # states_consumption_list[num_state] = \
+ # states_consumption_list[num_state] + state_consumption
+ else:
+ print_info(
+ "Discarding measurement No. "
+ + str(num_measurement)
+ + " because it did not recognize the number of "
+ "raw_states correctly."
+ )
+ # l_signal = measurements_by_config['offline'][num_measurement]['uW']
+ # l_bkpts = [s[1] for s in raw_states]
+ # fig, ax = rpt.display(np.array(l_signal), l_bkpts)
+ # plt.show()
+ # for i, x in enumerate(states_duration_list):
+ # states_duration_list[i] = x / num_used_measurements
+ # for i, x in enumerate(states_consumption_list):
+ # states_consumption_list[i] = x / num_used_measurements
+ if num_used_measurements != len(raw_states_list):
+ if num_used_measurements / len(raw_states_list) <= 0.5:
+ print_warning(
+ "Only used "
+ + str(num_used_measurements)
+ + "/"
+ + str(len(raw_states_list))
+ + " Measurements for refinement. "
+ + "Others did not recognize number of states correctly."
+ + "\nYou should verify the integrity of the measurements."
+ )
+ else:
+ print_info(
+ "Used "
+ + str(num_used_measurements)
+ + "/"
+ + str(len(raw_states_list))
+ + " Measurements for refinement."
+ + " Others did not recognize number of states correctly."
+ )
+ num_used_measurements = i
+ else:
+ print_info("Used all available measurements.")
+
+ state_durations_by_config.append((num_config, states_duration_list))
+ state_consumptions_by_config.append(
+ (num_config, states_consumption_list)
+ )
+
+ # combine all state durations and consumptions to parametrized model
+ if len(state_durations_by_config) == 0:
+ print(
+ "No refinement necessary for this state. The macromodel is usable."
+ )
+ sys.exit(1)
+ if len(state_durations_by_config) / len(configurations) > 1 / 2 and len(
+ state_durations_by_config
+ ) != len(configurations):
+ print_warning(
+ "Some measurements(>50%) need to be refined, however that is not true for"
+ " all measurements. This hints a correlation between the structure of"
+ " the underlying automaton and parameters. Only the ones which need to"
+ " be refined will be refined. THE RESULT WILL NOT ACCURATELY DEPICT "
+ " THE REAL WORLD."
+ )
+ not_accurate = True
+ if len(state_durations_by_config) / len(configurations) < 1 / 2:
+ print_warning(
+ "Some measurements(<50%) need to be refined, however that is not true for"
+ " all measurements. This hints a correlation between the structure of"
+ " the underlying automaton and parameters. Or a poor quality of measurements."
+ " No Refinement will be done."
+ )
+ sys.exit(-1)
+ # this is only necessary because at this state only linear automatons can be modeled.
+ num_states_array = [int()] * len(state_consumptions_by_config)
+ for i, (_, states_consumption_list) in enumerate(
+ state_consumptions_by_config
+ ):
+ num_states_array[i] = len(states_consumption_list)
+ counts = np.bincount(num_states_array)
+ num_raw_states = np.argmax(counts)
+ usable_configs = len(state_consumptions_by_config)
+ # param_list identical for each raw_state
+ param_list = []
+ param_names = configurations[0]["offline_aggregates"]["paramkeys"][0]
+ print_info("param_names: " + str(param_names))
+ for num_config, states_consumption_list in state_consumptions_by_config:
+ if len(states_consumption_list) != num_raw_states:
+ print_warning(
+ "Config No."
+ + str(num_config)
+ + " not usable yet due to different "
+ + "number of states. This hints a correlation between parameters and "
+ + "the structure of the resulting automaton. This will be possibly"
+ + " supported in a future version of this tool. HOWEVER AT THE MOMENT"
+ " THIS WILL LEAD TO INACCURATE RESULTS!"
+ )
+ not_accurate = True
+ usable_configs = usable_configs - 1
+ else:
+ param_list.extend(
+ configurations[num_config]["offline_aggregates"]["param"]
+ )
+ print_info("param_list: " + str(param_list))
+
+ if usable_configs == len(state_consumptions_by_config):
+ print_info("All configs usable.")
+ else:
+ print_info("Using only " + str(usable_configs) + " Configs.")
+ # build by_name
+ by_name = {}
+ usable_configs_2 = len(state_consumptions_by_config)
+ for i in range(num_raw_states):
+ consumptions_for_state = []
+ durations_for_state = []
+ for j, (_, states_consumption_list) in enumerate(
+ state_consumptions_by_config
+ ):
+ if len(states_consumption_list) == num_raw_states:
+ consumptions_for_state.extend(states_consumption_list[i])
+ durations_for_state.extend(state_durations_by_config[j][1][i])
+ else:
+ not_accurate = True
+ usable_configs_2 = usable_configs_2 - 1
+ if usable_configs_2 != usable_configs:
+ print_error(
+ "an zwei unterschiedlichen Stellen wurden unterschiedlich viele "
+ "Messungen rausgeworfen. Bei Janis beschweren."
+ )
+ state_name = "state_" + str(i)
+ state_dict = {
+ "param": param_list,
+ "power": consumptions_for_state,
+ "duration": durations_for_state,
+ "attributes": ["power", "duration"],
+ # Da kein "richtiger" Automat generiert wird, gibt es auch keine Transitionen
+ "isa": "state",
+ }
+ by_name[state_name] = state_dict
+ by_param = by_name_to_by_param(by_name)
+ if opt_cache_loc is not None:
+ by_name_loc = os.path.join(opt_cache_loc, "by_name.txt")
+ by_param_loc = os.path.join(opt_cache_loc, "by_param.txt")
+ param_names_loc = os.path.join(opt_cache_loc, "param_names.txt")
+ f = open(by_name_loc, "w")
+ f.write(str(by_name))
+ f.close()
+ f = open(by_param_loc, "w")
+ f.write(str(by_param))
+ f.close()
+ f = open(param_names_loc, "w")
+ f.write(str(param_names))
+ f.close()
+ else:
+ by_name_text = str(by_name_file.read())
+ by_name = eval(by_name_text)
+ by_param_text = str(by_param_file.read())
+ by_param = eval(by_param_text)
+ param_names_text = str(param_names_file.read())
+ param_names = eval(param_names_text)
+
+ # t = 0
+ # last_pow = 0
+ # for key in by_name.keys():
+ # end_t = t + np.mean(by_name[key]["duration"])
+ # power = np.mean(by_name[key]["power"])
+ # plt.vlines(t, min(last_pow, power), max(last_pow, power))
+ # plt.hlines(power, t, end_t)
+ # t = end_t
+ # last_pow = power
+ # plt.show()
+ stats = parameters.ParamStats(by_name, by_param, param_names, dict())
+ paramfit = ParallelParamFit(by_param)
+ for state_name in by_name.keys():
+ for num_param, param_name in enumerate(param_names):
+ if stats.depends_on_param(state_name, "power", param_name):
+ paramfit.enqueue(state_name, "power", num_param, param_name)
+ if stats.depends_on_param(state_name, "duration", param_name):
+ paramfit.enqueue(state_name, "duration", num_param, param_name)
+ print_info(
+ "State "
+ + state_name
+ + "s power depends on param "
+ + param_name
+ + ":"
+ + str(stats.depends_on_param(state_name, "power", param_name))
+ )
+ print_info(
+ "State "
+ + state_name
+ + "s duration depends on param "
+ + param_name
+ + ":"
+ + str(stats.depends_on_param(state_name, "duration", param_name))
+ )
+ paramfit.fit()
+ fit_res_dur_dict = {}
+ fit_res_pow_dict = {}
+ # fit functions and check if successful
+ for state_name in by_name.keys():
+ fit_power = paramfit.get_result(state_name, "power")
+ fit_duration = paramfit.get_result(state_name, "duration")
+ combined_fit_power = analytic.function_powerset(fit_power, param_names, 0)
+ combined_fit_duration = analytic.function_powerset(
+ fit_duration, param_names, 0
+ )
+ combined_fit_power.fit(by_param, state_name, "power")
+ if not combined_fit_power.fit_success:
+ print_warning(
+ "Fitting(power) for state " + state_name + " was not succesful!"
+ )
+ combined_fit_duration.fit(by_param, state_name, "duration")
+ if not combined_fit_duration.fit_success:
+ print_warning(
+ "Fitting(duration) for state " + state_name + " was not succesful!"
+ )
+ fit_res_pow_dict[state_name] = combined_fit_power
+ fit_res_dur_dict[state_name] = combined_fit_duration
+ # only raw_states with the same number of function parameters can be similar
+ num_param_pow_dict = {}
+ num_param_dur_dict = {}
+ # print found substate_results
+ for state_name in by_name.keys():
+ model_function = str(fit_res_pow_dict[state_name].model_function)
+ model_args = fit_res_pow_dict[state_name].model_args
+ num_param_pow_dict[state_name] = len(model_args)
+ for num_arg, arg in enumerate(model_args):
+ replace_string = "regression_arg(" + str(num_arg) + ")"
+ model_function = model_function.replace(replace_string, str(arg))
+ print_info("Power-Function for state " + state_name + ": " + model_function)
+ for state_name in by_name.keys():
+ model_function = str(fit_res_dur_dict[state_name].model_function)
+ model_args = fit_res_dur_dict[state_name].model_args
+ num_param_dur_dict[state_name] = len(model_args)
+ for num_arg, arg in enumerate(model_args):
+ replace_string = "regression_arg(" + str(num_arg) + ")"
+ model_function = model_function.replace(replace_string, str(arg))
+ print_info(
+ "Duration-Function for state " + state_name + ": " + model_function
+ )
+ # sort states in buckets for clustering
+ similar_raw_state_buckets = {}
+ for state_name in by_name.keys():
+ pow_model_function = str(fit_res_pow_dict[state_name].model_function)
+ dur_model_function = str(fit_res_dur_dict[state_name].model_function)
+ key_tuple = (pow_model_function, dur_model_function)
+ if key_tuple not in similar_raw_state_buckets:
+ similar_raw_state_buckets[key_tuple] = []
+ similar_raw_state_buckets[key_tuple].append(state_name)
+
+ # cluster for each Key-Tuple using the function parameters
+ distinct_states = []
+ for key_tuple in similar_raw_state_buckets.keys():
+ print_info(
+ "Key-Tuple "
+ + str(key_tuple)
+ + ": "
+ + str(similar_raw_state_buckets[key_tuple])
+ )
+ similar_states = similar_raw_state_buckets[key_tuple]
+ if len(similar_states) > 1:
+ # only necessary to cluster if more than one raw_state has the same function
+ # configuration
+ # functions are identical -> num_params and used params are identical
+ num_params = (
+ num_param_dur_dict[similar_states[0]]
+ + num_param_pow_dict[similar_states[0]]
+ )
+ values_to_cluster = np.zeros((len(similar_states), num_params))
+ for num_state, state_name in enumerate(similar_states):
+ dur_params = fit_res_dur_dict[state_name].model_args
+ pow_params = fit_res_pow_dict[state_name].model_args
+ j = 0
+ for param in pow_params:
+ values_to_cluster[num_state][j] = param
+ j = j + 1
+ for param in dur_params:
+ values_to_cluster[num_state][j] = param
+ j = j + 1
+ normed_vals_to_cluster = norm_values_to_cluster(values_to_cluster)
+ cluster = AgglomerativeClustering(
+ n_clusters=None,
+ compute_full_tree=True,
+ affinity="euclidean",
+ linkage="ward",
+ # TODO: Magic Number. Beim Evaluieren finetunen
+ distance_threshold=1,
+ )
+ cluster.fit_predict(values_to_cluster)
+ cluster_labels = cluster.labels_
+ print_info("Cluster labels:\n" + str(cluster_labels))
+ if cluster.n_clusters_ > 1:
+ # more than one distinct state found -> seperation of raw_states necessary
+ distinct_state_dict = {}
+ for num_state, label in enumerate(cluster_labels):
+ if label not in distinct_state_dict.keys():
+ distinct_state_dict[label] = []
+ distinct_state_dict[label].append(similar_states[num_state])
+ for distinct_state_key in distinct_state_dict.keys():
+ distinct_states.append(distinct_state_dict[distinct_state_key])
+ else:
+ # all raw_states make up this state
+ distinct_states.append(similar_states)
+ else:
+ distinct_states.append(similar_states)
+ for num_state, distinct_state in enumerate(distinct_states):
+ print("State " + str(num_state) + ": " + str(distinct_state))
+ num_raw_states = len(by_name.keys())
+ resulting_sequence = [int] * num_raw_states
+ for i in range(num_raw_states):
+ # apply the projection from raw_states to states
+ state_name = "state_" + str(i)
+ state_num = get_state_num(state_name, distinct_states)
+ if state_num == -1:
+ print_error(
+ "Critical Error when creating the resulting sequence. raw_state state_"
+ + str(i)
+ + " could not be mapped to a state."
+ )
+ sys.exit(-1)
+ resulting_sequence[i] = state_num
+ print("Resulting sequence is: " + str(resulting_sequence))
+ # if from_cache:
+ # print_warning(
+ # "YOU USED THE OPTION \"cache_dicts\". THIS IS FOR DEBUGGING PURPOSES ONLY!"
+ # "\nTHE SCRIPT WILL NOW STOP PREMATURELY,"
+ # "SINCE DATA FOR FURTHER COMPUTATION IS MISSING!")
+ # sys.exit(0)
+ # parameterize all new states
+ new_by_name = {}
+ for num_state, distinct_state in enumerate(distinct_states):
+ state_name = "State_" + str(num_state)
+ consumptions_for_state = []
+ durations_for_state = []
+ param_list = []
+ for raw_state in distinct_state:
+ original_state_dict = by_name[raw_state]
+ param_list.extend(original_state_dict["param"])
+ consumptions_for_state.extend(original_state_dict["power"])
+ durations_for_state.extend(original_state_dict["duration"])
+ new_state_dict = {
+ "param": param_list,
+ "power": consumptions_for_state,
+ "duration": durations_for_state,
+ "attributes": ["power", "duration"],
+ # Da kein richtiger Automat generiert wird, gibt es auch keine Transitionen
+ "isa": "state",
+ }
+ new_by_name[state_name] = new_state_dict
+ new_by_param = by_name_to_by_param(new_by_name)
+ new_stats = parameters.ParamStats(
+ new_by_name, new_by_param, param_names, dict()
+ )
+ new_paramfit = ParallelParamFit(new_by_param)
+ for state_name in new_by_name.keys():
+ for num_param, param_name in enumerate(param_names):
+ if new_stats.depends_on_param(state_name, "power", param_name):
+ new_paramfit.enqueue(state_name, "power", num_param, param_name)
+ if new_stats.depends_on_param(state_name, "duration", param_name):
+ new_paramfit.enqueue(state_name, "duration", num_param, param_name)
+ print_info(
+ "State "
+ + state_name
+ + "s power depends on param "
+ + param_name
+ + ":"
+ + str(new_stats.depends_on_param(state_name, "power", param_name))
+ )
+ print_info(
+ "State "
+ + state_name
+ + "s duration depends on param "
+ + param_name
+ + ":"
+ + str(
+ new_stats.depends_on_param(state_name, "duration", param_name)
+ )
+ )
+ new_paramfit.fit()
+ new_fit_res_dur_dict = {}
+ new_fit_res_pow_dict = {}
+ for state_name in new_by_name.keys():
+ fit_power = new_paramfit.get_result(state_name, "power")
+ fit_duration = new_paramfit.get_result(state_name, "duration")
+ combined_fit_power = analytic.function_powerset(fit_power, param_names, 0)
+ combined_fit_duration = analytic.function_powerset(
+ fit_duration, param_names, 0
+ )
+ combined_fit_power.fit(new_by_param, state_name, "power")
+ if not combined_fit_power.fit_success:
+ print_warning(
+ "Fitting(power) for state " + state_name + " was not succesful!"
+ )
+ combined_fit_duration.fit(new_by_param, state_name, "duration")
+ if not combined_fit_duration.fit_success:
+ print_warning(
+ "Fitting(duration) for state " + state_name + " was not succesful!"
+ )
+ new_fit_res_pow_dict[state_name] = combined_fit_power
+ new_fit_res_dur_dict[state_name] = combined_fit_duration
+ # output results
+ result_loc = os.path.join(filepath, "result" + big_state_name + ".txt")
+ with open(result_loc, "w") as f:
+ f.write("Resulting Sequence: " + str(resulting_sequence))
+ f.write("\n\n")
+ for state_name in new_by_name.keys():
+ model_function = str(new_fit_res_pow_dict[state_name].model_function)
+ model_args = new_fit_res_pow_dict[state_name].model_args
+ for num_arg, arg in enumerate(model_args):
+ replace_string = "regression_arg(" + str(num_arg) + ")"
+ model_function = model_function.replace(replace_string, str(arg))
+ print("Power-Function for state " + state_name + ": " + model_function)
+ f.write(
+ "Power-Function for state "
+ + state_name
+ + ": "
+ + model_function
+ + "\n"
+ )
+ f.write("\n\n")
+ for state_name in new_by_name.keys():
+ model_function = str(new_fit_res_dur_dict[state_name].model_function)
+ model_args = new_fit_res_dur_dict[state_name].model_args
+ for num_arg, arg in enumerate(model_args):
+ replace_string = "regression_arg(" + str(num_arg) + ")"
+ model_function = model_function.replace(replace_string, str(arg))
+ print(
+ "Duration-Function for state " + state_name + ": " + model_function
+ )
+ f.write(
+ "Duration-Function for state "
+ + state_name
+ + ": "
+ + model_function
+ + "\n"
+ )
+ if not_accurate:
+ print_warning(
+ "THIS RESULT IS NOT ACCURATE. SEE WARNINGLOG TO GET A BETTER UNDERSTANDING"
+ " WHY."
+ )
+ f.write(
+ "THIS RESULT IS NOT ACCURATE. SEE WARNINGLOG TO GET A BETTER UNDERSTANDING"
+ " WHY."
+ )
+
+ # Removed clustering at this point, since it provided too much difficulties
+ # at the current state. Clustering is still used, but at another point of execution.
+ # Now parametrization is done first. raw_states are grouped by their using a dict
+ # where the key is [power_function, duration_dunction]. Then all raw_states from
+ # each bucket are clustered by their parameters
+ # i = 0
+ # cluster_labels_list = []
+ # num_cluster_list = []
+ # for num_trace, raw_states in enumerate(raw_states_list):
+ # # iterate through raw states from measurements
+ # if len(raw_states) == num_raw_states:
+ # # build array with power values to cluster these
+ # value_to_cluster = np.zeros((num_raw_states, 2))
+ # j = 0
+ # for s in raw_states:
+ # value_to_cluster[j][0] = s[2]
+ # value_to_cluster[j][1] = 0
+ # j = j + 1
+ # # linked = linkage(value_to_cluster, 'single')
+ # #
+ # # labelList = range(1, 11)
+ # #
+ # # plt.figure(figsize=(10, 7))
+ # # dendrogram(linked,
+ # # orientation='top',
+ # # distance_sort='descending',
+ # # show_leaf_counts=True)
+ # # plt.show()
+ # # TODO: Automatic detection of number of clusters. Aktuell noch MAGIC NUMBER
+ # # im distance_threshold
+ # cluster = AgglomerativeClustering(n_clusters=None, compute_full_tree=True,
+ # affinity='euclidean',
+ # linkage='ward',
+ # distance_threshold=opt_refinement_thresh * 100)
+ # # cluster = AgglomerativeClustering(n_clusters=5, affinity='euclidean',
+ # # linkage='ward')
+ # cluster.fit_predict(value_to_cluster)
+ # # print_info("Cluster labels:\n" + str(cluster.labels_))
+ # # plt.scatter(value_to_cluster[:, 0], value_to_cluster[:, 1], c=cluster.labels_, cmap='rainbow')
+ # # plt.show()
+ # cluster_labels_list.append((num_trace, cluster.labels_))
+ # num_cluster_list.append((num_trace, cluster.n_clusters_))
+ # i = i + 1
+ # else:
+ # print_info("Discarding measurement No. " + str(num_trace) + " because it "
+ # + "did not recognize the number of raw_states correctly.")
+ # num_used_measurements = len(raw_states_list)
+ # if i != len(raw_states_list):
+ # if i / len(raw_states_list) <= 0.5:
+ # print_warning("Only used " + str(i) + "/" + str(len(raw_states_list))
+ # + " Measurements for refinement. "
+ # "Others did not recognize number of states correctly."
+ # "\nYou should verify the integrity of the measurements.")
+ # else:
+ # print_info("Used " + str(i) + "/" + str(len(raw_states_list))
+ # + " Measurements for refinement. "
+ # "Others did not recognize number of states correctly.")
+ # num_used_measurements = i
+ # # TODO: DEBUG Kram
+ # sys.exit(0)
+ # else:
+ # print_info("Used all available measurements.")
+ #
+ # num_states = np.argmax(np.bincount([elem[1] for elem in num_cluster_list]))
+ # avg_per_state_list = [None] * len(cluster_labels_list)
+ # used_clusters = 0
+ # for number, (num_trace, labels) in enumerate(cluster_labels_list):
+ # if num_cluster_list[number][1] == num_states:
+ # avg_per_state = [0] * num_states
+ # count_per_state = [0] * num_states
+ # raw_states = raw_states_list[num_trace]
+ # for num_label, label in enumerate(labels):
+ # count_per_state[label] = count_per_state[label] + 1
+ # avg_per_state[label] = avg_per_state[label] + raw_states[num_label][2]
+ # for i, _ in enumerate(avg_per_state):
+ # avg_per_state[i] = avg_per_state[i] / count_per_state[i]
+ # avg_per_state_list[number] = avg_per_state
+ # used_clusters = used_clusters + 1
+ # else:
+ # # hopefully this does not happen regularly
+ # print_info("Discarding measurement " + str(number)
+ # + " because the clustering yielded not matching results.")
+ # num_used_measurements = num_used_measurements - 1
+ # if num_used_measurements == 0:
+ # print_error("Something went terribly wrong. Discarded all measurements.")
+ # # continue
+ # sys.exit(-1)
+ # # flattend version for clustering:
+ # values_to_cluster = np.zeros((num_states * used_clusters, 2))
+ # index = 0
+ # for avg_per_state in avg_per_state_list:
+ # if avg_per_state is not None:
+ # for avg in avg_per_state:
+ # values_to_cluster[index][0] = avg
+ # values_to_cluster[index][1] = 0
+ # index = index + 1
+ # # plt.scatter(values_to_cluster[:, 0], values_to_cluster[:, 1])
+ # # plt.show()
+ # cluster = AgglomerativeClustering(n_clusters=num_states)
+ # cluster.fit_predict(values_to_cluster)
+ # # Aktuell hast du hier ein plattes Array mit labels. Jetzt also das wieder auf die
+ # # ursprünglichen Labels abbilden, die dann verändern mit den hier gefundenen Labels.
+ # # Alle identischen Zustände haben identische Labels. Dann vllt bei resulting
+ # # sequence ausgeben, wie groß die übereinstimmung bei der Stateabfolge ist.
+ # new_labels_list = []
+ # new_labels = []
+ # i = 0
+ # for label in cluster.labels_:
+ # new_labels.append(label)
+ # i = i + 1
+ # if i == num_states:
+ # new_labels_list.append(new_labels)
+ # new_labels = []
+ # i = 0
+ # # only the selected measurements are present in new_labels.
+ # # new_labels_index should not be incremented, if not selected_measurement is skipped
+ # new_labels_index = 0
+ # # cluster_labels_list contains all measurements -> if measurement is skipped
+ # # still increment the index
+ # index = 0
+ # for elem in avg_per_state_list:
+ # if elem is not None:
+ # for number, label in enumerate(cluster_labels_list[index][1]):
+ # cluster_labels_list[index][1][number] = \
+ # new_labels_list[new_labels_index][label]
+ # new_labels_index = new_labels_index + 1
+ # else:
+ # # override not selected measurement labels to avoid choosing the wrong ones.
+ # for number, label in enumerate(cluster_labels_list[index][1]):
+ # cluster_labels_list[index][1][number] = -1
+ # index = index + 1
+ # resulting_sequence = [None] * num_raw_states
+ # i = 0
+ # confidence = 0
+ # for x in resulting_sequence:
+ # j = 0
+ # test_list = []
+ # for arr in [elem[1] for elem in cluster_labels_list]:
+ # if num_cluster_list[j][1] != num_states:
+ # j = j + 1
+ # else:
+ # if -1 in arr:
+ # print_error("Bei Janis beschweren! Fehler beim Umbenennen der"
+ # " Zustände wahrscheinlich.")
+ # sys.exit(-1)
+ # test_list.append(arr[i])
+ # j = j + 1
+ # bincount = np.bincount(test_list)
+ # resulting_sequence[i] = np.argmax(bincount)
+ # confidence = confidence + bincount[resulting_sequence[i]] / np.sum(bincount)
+ # i = i + 1
+ # confidence = confidence / len(resulting_sequence)
+ # print_info("Confidence of resulting sequence is " + str(confidence)
+ # + " while using " + str(num_used_measurements) + "/"
+ # + str(len(raw_states_list)) + " measurements.")
+ # #print(resulting_sequence)
+ # resulting_sequence_list.append((num_config, resulting_sequence))
+ # # TODO: Was jetzt? Hier habe ich jetzt pro Konfiguration eine Zustandsfolge. Daraus Automat
+ # # erzeugen. Aber wie? Oder erst parametrisieren? Eigentlich brauche ich vorher die
+ # # Loops. Wie erkenne ich die? Es können beliebig viele Loops an beliebigen Stellen
+ # # auftreten.
+ # # TODO: Die Zustandsfolgen werden sich nicht einfach in isomorphe(-einzelne wegfallende bzw.
+ # # hinzukommende Zustände) Automaten übersetzten lassen. Basiert alles auf dem Problem:
+ # # wie erkenne ich, dass zwei Zustände die selben sind und nicht nur einfach eine ähnliche
+ # # Leistungsaufnahme haben?! Vllt Zustände 2D clustern? 1Dim = Leistungsaufnahme,
+ # # 2Dim=Dauer? Zumindest innerhalb einer Paramkonfiguration sollte sich die Dauer eines
+ # # Zustands ja nicht mehr ändern. Kann sicherlich immernoch Falschclustering erzeugen...
+ # for num_config, sequence in resulting_sequence_list:
+ # print_info("NO. config:" + str(num_config))
+ # print_info(sequence)
+ #
+ #
+ #
+ #
+
+ elif ".tar" in opt_filename:
+ # open with dfatool
+ raw_data_args = list()
+ raw_data_args.append(opt_filename)
+ raw_data = RawData(raw_data_args, with_traces=True)
+ print_info(
+ "Preprocessing file. Depending on its size, this could take a while."
+ )
+ preprocessed_data = raw_data.get_preprocessed_data()
+ print_info("File fully preprocessed")
+ # TODO: Mal schauen, wie ich das mache. Erstmal nur mit json. Ist erstmal raus. Wird nicht
+ # umgesetzt.
+ print_error(
+ "Not implemented yet. Please generate .json files first with dfatool and use"
+ " those."
+ )
+ else:
+ print_error("Unknown dataformat")
+ sys.exit(-1)
diff --git a/bin/analyze-archive.py b/bin/analyze-archive.py
index 5d4411b..fec5620 100755
--- a/bin/analyze-archive.py
+++ b/bin/analyze-archive.py
@@ -385,6 +385,12 @@ if __name__ == "__main__":
type=str,
help="Export JSON energy modle to FILE. Works out of the box for v1 and v2, requires --hwmodel for v0",
)
+ parser.add_argument(
+ "--with-substates",
+ metavar="PELT_CONFIG",
+ type=str,
+ help="Perform substate analysis",
+ )
parser.add_argument("measurement", nargs="+")
args = parser.parse_args()
@@ -431,7 +437,11 @@ if __name__ == "__main__":
raw_data = RawData(
args.measurement,
- with_traces=(args.export_traces is not None or args.plot_traces is not None),
+ with_traces=(
+ args.export_traces is not None
+ or args.plot_traces is not None
+ or args.with_substates is not None
+ ),
skip_cache=args.no_cache,
)
@@ -480,6 +490,14 @@ if __name__ == "__main__":
with open(target, "w") as f:
json.dump(data, f)
+ if args.with_substates is not None:
+ arg_dict = dict()
+ if args.with_substates != "":
+ for kv in args.with_substates.split(","):
+ k, v = kv.split("=")
+ arg_dict[k] = v
+ args.with_substates = arg_dict
+
if args.plot_traces:
plot_traces(preprocessed_data, args.plot_traces)
@@ -503,6 +521,7 @@ if __name__ == "__main__":
traces=preprocessed_data,
function_override=function_override,
pta=pta,
+ pelt=args.with_substates,
)
if xv_method:
@@ -625,6 +644,9 @@ if __name__ == "__main__":
safe_functions_enabled=safe_functions_enabled
)
+ if args.with_substates is not None:
+ substate_model, substate_info = model.get_substates()
+
if "paramdetection" in show_models or "all" in show_models:
for state in model.states_and_transitions():
for attribute in model.attributes(state):
diff --git a/bin/plot_generator.py b/bin/plot_generator.py
new file mode 100644
index 0000000..908213d
--- /dev/null
+++ b/bin/plot_generator.py
@@ -0,0 +1,136 @@
+#!/usr/bin/env python3
+import getopt
+import sys
+import re
+import os
+import numpy as np
+import pprint
+import json
+import matplotlib.pyplot as plt
+
+if __name__ == "__main__":
+ # OPTION RECOGNITION
+ opt = dict()
+
+ optspec = "bench_filename= " "result_filename= "
+ opt_bench_filename = None
+ opt_result_filename = None
+ try:
+ raw_opts, args = getopt.getopt(sys.argv[1:], "", optspec.split(" "))
+
+ for option, parameter in raw_opts:
+ optname = re.sub(r"^--", "", option)
+ opt[optname] = parameter
+ except getopt.GetoptError as err:
+ print(err, file=sys.stderr)
+ sys.exit(-1)
+
+ if "bench_filename" in opt:
+ opt_bench_filename = opt["bench_filename"]
+ else:
+
+ sys.exit(-1)
+ if "result_filename" in opt:
+ opt_result_filename = opt["result_filename"]
+ else:
+ print("wth")
+ sys.exit(-1)
+
+ with open(opt_bench_filename, "r") as f:
+ configurations = json.load(f)
+ with open(opt_result_filename, "r") as f:
+ sequence_line = f.readline()
+ begin_sequence = sequence_line.rfind("Resulting Sequence: ") + 20
+
+ if begin_sequence < 20:
+ print("nicht gefunden!")
+ sys.exit(-1)
+ sequence_substr = sequence_line[begin_sequence:]
+ resulting_sequence = eval(sequence_substr)
+ new_line = f.readline()
+ while new_line == "\n":
+ new_line = f.readline()
+ function_line = new_line
+ pow_function_dict = dict()
+ while function_line != "\n":
+ state_name_pos = function_line.find("Power-Function for state ") + 25
+ state_name_end = function_line.find(":")
+ state_name = function_line[state_name_pos:state_name_end]
+ function_string = function_line[state_name_end + 1 : -1]
+ pow_function_dict[state_name] = function_string
+ function_line = f.readline()
+ new_line = "\n"
+ while new_line == "\n":
+ new_line = f.readline()
+ function_line = new_line
+ dur_function_dict = dict()
+ while (
+ function_line != "\n"
+ and function_line != ""
+ and "THIS RESULT IS NOT ACCURATE." not in function_line
+ ):
+ state_name_pos = function_line.find("Duration-Function for state ") + 28
+ state_name_end = function_line.find(":")
+ state_name = function_line[state_name_pos:state_name_end]
+ function_string = function_line[state_name_end + 1 : -1]
+ dur_function_dict[state_name] = function_string
+ function_line = f.readline()
+
+ param_names = configurations[0]["offline_aggregates"]["paramkeys"][0]
+
+ for num_fig in range(0, min(4, len(configurations))):
+ rand_config_no = np.random.randint(0, len(configurations), 1)[0]
+ rand_conf = configurations[rand_config_no]
+ rand_signal = np.array(rand_conf["offline"][0]["uW"])
+ rand_param = rand_conf["offline_aggregates"]["param"][0]
+ rand_max_pow = max(rand_signal)
+ # pprint.pprint(rand_param)
+ pretty_rand_param = pprint.pformat(rand_param)
+ print(
+ str(param_names)
+ + "("
+ + str(rand_config_no)
+ + ")"
+ + "\n"
+ + pretty_rand_param
+ )
+ time = 0
+ next_time = 0
+ rand_stepper = 0
+ pow = 0
+ resulting_coords = list()
+ while rand_stepper < len(resulting_sequence):
+ curr_state = resulting_sequence[rand_stepper]
+ curr_state_name = "State_" + str(curr_state)
+ curr_pow_func = pow_function_dict[curr_state_name]
+ curr_dur_func = dur_function_dict[curr_state_name]
+ for num_param, name in enumerate(param_names):
+ replace_string = "parameter(" + name + ")"
+ curr_pow_func = curr_pow_func.replace(
+ replace_string, str(rand_param[num_param])
+ )
+ curr_dur_func = curr_dur_func.replace(
+ replace_string, str(rand_param[num_param])
+ )
+ pow = eval(curr_pow_func)
+ dur = eval(curr_dur_func)
+ next_time = time + dur
+ start_coord = (time, pow)
+ end_coord = (next_time, pow)
+ resulting_coords.append(start_coord)
+ resulting_coords.append(end_coord)
+ rand_stepper = rand_stepper + 1
+ time = next_time
+
+ with open("res_conf_" + str(num_fig) + "_signal.txt", "w") as f:
+ f.write("x,y\n")
+ for x, y in enumerate(rand_signal):
+ f.write(str(x) + "," + str(y) + "\n")
+ with open("res_conf_" + str(num_fig) + "_fit.txt", "w") as f:
+ f.write("x,y\n")
+ for x, y in resulting_coords:
+ f.write(str(x) + "," + str(y) + "\n")
+ plt.plot(rand_signal)
+ plt.plot([x for x, y in resulting_coords], [y for x, y in resulting_coords])
+ plt.savefig("res_conf_" + str(num_fig) + "_pic.pdf", format="pdf", dpi=300)
+ plt.clf()
diff --git a/lib/loader.py b/lib/loader.py
index 0c7ad91..b9a2930 100644
--- a/lib/loader.py
+++ b/lib/loader.py
@@ -583,6 +583,7 @@ class RawData:
# and self.traces_by_fileno[measurement['fileno']][*]['trace'][*]['offline_aggregates'] in place
# (appends data from measurement['energy_trace'])
# If measurement['expected_trace'] exists, it is edited in place instead
+ # "offline_aggregates" is the only data used later on by model.py's by_name / by_param dicts
online_datapoints = []
if "expected_trace" in measurement:
traces = measurement["expected_trace"]
@@ -626,12 +627,14 @@ class RawData:
if arg_support_enabled and "args" in online_trace_part:
paramvalues.extend(map(soft_cast_int, online_trace_part["args"]))
+ # TODO rename offline_aggregates to make it clear that this is what ends up in by_name / by_param and model.py
if "offline_aggregates" not in online_trace_part:
online_trace_part["offline_attributes"] = [
"power",
"duration",
"energy",
]
+ # this is what ends up in by_name / by_param and is used by model.py
online_trace_part["offline_aggregates"] = {
"power": [],
"duration": [],
@@ -647,6 +650,9 @@ class RawData:
online_trace_part["offline_aggregates"]["rel_energy_prev"] = []
online_trace_part["offline_aggregates"]["rel_energy_next"] = []
online_trace_part["offline_aggregates"]["timeout"] = []
+ elif "uW" in offline_trace_part:
+ online_trace_part["offline_support"] = ["power_traces"]
+ online_trace_part["offline_aggregates"]["power_traces"] = list()
# Note: All state/transitions are 20us "too long" due to injected
# active wait states. These are needed to work around MIMOSA's
@@ -678,6 +684,11 @@ class RawData:
offline_trace_part["timeout"]
)
+ if online_trace_part["isa"] == "state" and "uW" in offline_trace_part:
+ online_trace_part["offline_aggregates"]["power_traces"].append(
+ offline_trace_part["uW"]
+ )
+
def _merge_online_and_etlog(self, measurement):
# Edits self.traces_by_fileno[measurement['fileno']][*]['trace'][*]['offline']
# and self.traces_by_fileno[measurement['fileno']][*]['trace'][*]['offline_aggregates'] in place
@@ -1110,13 +1121,17 @@ def _add_trace_data_to_aggregate(aggregate, key, element):
"rel_energy_next",
]
# Uncomment this line if you also want to analyze mean transition power
- # aggrgate[key]['attributes'].append('power')
+ # aggregate[key]['attributes'].append('power')
if "plan" in element and element["plan"]["level"] == "epilogue":
aggregate[key]["attributes"].insert(0, "timeout")
attributes = aggregate[key]["attributes"].copy()
for attribute in attributes:
if attribute not in element["offline_aggregates"]:
aggregate[key]["attributes"].remove(attribute)
+ if "offline_support" in element:
+ aggregate[key]["supports"] = element["offline_support"]
+ else:
+ aggregate[key]["supports"] = list()
for datakey, dataval in element["offline_aggregates"].items():
aggregate[key][datakey].extend(dataval)
diff --git a/lib/model.py b/lib/model.py
index bb4a45b..57507e7 100644
--- a/lib/model.py
+++ b/lib/model.py
@@ -375,7 +375,7 @@ def _num_args_from_by_name(by_name):
class AnalyticModel:
- u"""
+ """
Parameter-aware analytic energy/data size/... model.
Supports both static and parameter-based model attributes, and automatic detection of parameter-dependence.
@@ -663,7 +663,7 @@ class AnalyticModel:
class PTAModel:
- u"""
+ """
Parameter-aware PTA-based energy model.
Supports both static and parameter-based model attributes, and automatic detection of parameter-dependence.
@@ -704,6 +704,7 @@ class PTAModel:
function_override={},
use_corrcoef=False,
pta=None,
+ pelt=None,
):
"""
Prepare a new PTA energy model.
@@ -726,6 +727,7 @@ class PTAModel:
use_corrcoef -- use correlation coefficient instead of stddev comparison
to detect whether a model attribute depends on a parameter
pta -- hardware model as `PTA` object
+ pelt -- perform sub-state detection via PELT and model sub-states as well. Requires traces to be set.
"""
self.by_name = by_name
self.by_param = by_name_to_by_param(by_name)
@@ -733,6 +735,12 @@ class PTAModel:
self._num_args = arg_count
self._use_corrcoef = use_corrcoef
self.traces = traces
+ if traces is not None and pelt is not None:
+ from .pelt import PELT
+
+ self.pelt = PELT(**pelt)
+ else:
+ self.pelt = None
self.stats = ParamStats(
self.by_name,
self.by_param,
@@ -927,6 +935,24 @@ class PTAModel:
return model_getter, info_getter
+ def get_substates(self):
+ states = self.states()
+ for k in self.by_param.keys():
+ if k[0] == "TX":
+ if self.pelt.needs_refinement(self.by_param[k]["power_traces"]):
+ print(f"PELT: {k} needs refinement")
+ penalty = self.pelt.get_penalty_value(
+ self.by_param[k]["power_traces"]
+ )
+ print(
+ f" we found {penalty[1]} changepoints with penalty {penalty[0]}"
+ )
+ self.pelt.calc_raw_states(
+ self.by_param[k]["power_traces"], penalty[0]
+ )
+
+ return None, None
+
def to_json(self):
static_model = self.get_static()
static_quality = self.assess(static_model)
diff --git a/lib/pelt.py b/lib/pelt.py
new file mode 100644
index 0000000..7f2e922
--- /dev/null
+++ b/lib/pelt.py
@@ -0,0 +1,393 @@
+#!/usr/bin/env python3
+
+import numpy as np
+import ruptures
+from multiprocessing import Pool
+
+# returns the found changepoints by algo for the specific penalty pen.
+# algo should be the return value of Pelt(...).fit(signal)
+# Also puts a token in container q to let the progressmeter know the changepoints for penalty pen
+# have been calculated.
+# used for parallel calculation of changepoints vs penalty
+def _get_breakpoints(algo, pen):
+ return pen, len(algo.predict(pen=pen))
+
+
+def find_knee_point(data_x, data_y, S=1.0, curve="convex", direction="decreasing"):
+ kneedle = kneed.KneeLocator(data_x, data_y, S=S, curve=curve, direction=direction)
+ kneepoint = (kneedle.knee, kneedle.knee_y)
+ return kneepoint
+
+
+def norm_signal(signal, scaler=25):
+ max_val = max(signal)
+ normed_signal = np.zeros(shape=len(signal))
+ for i, signal_i in enumerate(signal):
+ normed_signal[i] = signal_i / max_val
+ normed_signal[i] = normed_signal[i] * scaler
+ return normed_signal
+
+
+# Scheint Einfluss auf die gefundene Anzahl CHangepoints zu haben. Hrmpf.
+# Kann aber auch shitty downsampling sein. Diese Funktion ist _sehr_ quick&dirty.
+def coarse_signal(signal, divisor=10):
+ ret = list()
+ for i in range((len(signal) // divisor)):
+ ret.append(np.mean(signal[i * divisor : (i + 1) * divisor]))
+ return np.array(ret)
+
+
+# returns the changepoints found on signal with penalty penalty.
+# model, jump and min_dist are directly passed to PELT
+def calc_pelt(signal, penalty, model="l1", jump=5, min_dist=2, plotting=False):
+ # default params in Function
+ if model is None:
+ model = "l1"
+ if jump is None:
+ jump = 5
+ if min_dist is None:
+ min_dist = 2
+ if plotting is None:
+ plotting = False
+ # change point detection. best fit seemingly with l1. rbf prods. RuntimeErr for pen > 30
+ # https://ctruong.perso.math.cnrs.fr/ruptures-docs/build/html/costs/index.html
+ # model = "l1" #"l1" # "l2", "rbf"
+ algo = ruptures.Pelt(model=model, jump=jump, min_size=min_dist).fit(signal)
+
+ if penalty is not None:
+ bkps = algo.predict(pen=penalty)
+ if plotting:
+ fig, ax = ruptures.display(signal, bkps)
+ plt.show()
+ return bkps
+
+ print_error("No Penalty specified.")
+ sys.exit(-1)
+
+
+# calculates the raw_states for measurement measurement. num_measurement is used to identify the
+# return value
+# penalty, model and jump are directly passed to pelt
+def calc_raw_states_func(num_measurement, measurement, penalty, model, jump):
+ # extract signal
+ signal = np.array(measurement)
+ # norm signal to remove dependency on absolute values
+ normed_signal = norm_signal(signal)
+ # calculate the breakpoints
+ bkpts = calc_pelt(normed_signal, penalty, model=model, jump=jump)
+ calced_states = list()
+ start_time = 0
+ end_time = 0
+ # calc metrics for all states
+ for bkpt in bkpts:
+ # start_time of state is end_time of previous one
+ # (Transitions are instantaneous)
+ start_time = end_time
+ end_time = bkpt
+ power_vals = signal[start_time:end_time]
+ mean_power = np.mean(power_vals)
+ std_dev = np.std(power_vals)
+ calced_state = (start_time, end_time, mean_power, std_dev)
+ calced_states.append(calced_state)
+ num = 0
+ new_avg_std = 0
+ # calc avg std for all states from this measurement
+ for s in calced_states:
+ # print_info("State " + str(num) + " starts at t=" + str(s[0])
+ # + " and ends at t=" + str(s[1])
+ # + " while using " + str(s[2])
+ # + "uW with sigma=" + str(s[3]))
+ num = num + 1
+ new_avg_std = new_avg_std + s[3]
+ # check case if no state has been found to avoid crashing
+ if len(calced_states) != 0:
+ new_avg_std = new_avg_std / len(calced_states)
+ else:
+ new_avg_std = 0
+ change_avg_std = None # measurement["uW_std"] - new_avg_std
+ # print_info("The average standard deviation for the newly found states is "
+ # + str(new_avg_std))
+ # print_info("That is a reduction of " + str(change_avg_std))
+ return num_measurement, calced_states, new_avg_std, change_avg_std
+
+
+# parallelize calc over all measurements
+def calc_raw_states(arg_list):
+ with Pool() as pool:
+ # collect results from pool
+ result = pool.starmap(calc_raw_states_func, arg_list)
+ return result
+
+
+class PELT:
+ def __init__(self, **kwargs):
+ # Defaults von Janis
+ self.jump = 1
+ self.refinement_threshold = 100
+ self.range_min = 0
+ self.range_max = 100
+ self.__dict__.update(kwargs)
+
+ # signals: a set of uW measurements belonging to a single parameter configuration (i.e., a single by_param entry)
+ def needs_refinement(self, signals):
+ count = 0
+ for signal in signals:
+ p1, median, p99 = np.percentile(signal, (1, 50, 99))
+
+ if median - p1 > self.refinement_threshold:
+ count += 1
+ elif p99 - median > self.refinement_threshold:
+ count += 1
+ refinement_ratio = count / len(signals)
+ return refinement_ratio > 0.3
+
+ def get_penalty_value(
+ self, signals, model="l1", min_dist=2, range_min=0, range_max=100, S=1.0
+ ):
+ # Janis macht hier noch kein norm_signal. Mit sieht es aber genau so brauchbar aus.
+ # TODO vor der Penaltybestimmung die Auflösung der Daten auf z.B. 1 kHz
+ # verringern. Dann geht's deutlich schneller und superkurze
+ # Substates interessieren uns ohnehin weniger
+ signal = coarse_signal(norm_signal(signals[0]))
+ algo = ruptures.Pelt(model=model, jump=self.jump, min_size=min_dist).fit(signal)
+ queue = list()
+ for i in range(range_min, range_max + 1):
+ queue.append((algo, i))
+ with Pool() as pool:
+ results = pool.starmap(_get_breakpoints, queue)
+ pen_val = [x[0] for x in results]
+ changepoint_counts = [x[1] for x in results]
+ # Scheint unnötig zu sein, da wir ohnehin plateau detection durchführen
+ # knee = find_knee_point(pen_val, changepoint_counts, S=S)
+ knee = (0,)
+
+ start_index = -1
+ end_index = -1
+ longest_start = -1
+ longest_end = -1
+ prev_val = -1
+ for i, num_bkpts in enumerate(changepoint_counts[knee[0] :]):
+ if num_bkpts != prev_val:
+ end_index = i - 1
+ if end_index - start_index > longest_end - longest_start:
+ # currently found sequence is the longest found yet
+ longest_start = start_index
+ longest_end = end_index
+ start_index = i
+ if i == len(changepoint_counts[knee[0] :]) - 1:
+ # end sequence with last value
+ end_index = i
+ # # since it is not guaranteed that this is the end of the plateau, assume the mid
+ # # of the plateau was hit.
+ # size = end_index - start_index
+ # end_index = end_index + size
+ # However this is not the clean solution. Better if search interval is widened
+ # with range_min and range_max
+ if end_index - start_index > longest_end - longest_start:
+ # last found sequence is the longest found yet
+ longest_start = start_index
+ longest_end = end_index
+ start_index = i
+ prev_val = num_bkpts
+ mid_of_plat = longest_start + (longest_end - longest_start) // 2
+ knee = (mid_of_plat + knee[0], changepoint_counts[mid_of_plat + knee[0]])
+
+ # modify knee according to options. Defaults to 1 * knee
+ knee = (knee[0] * 1, knee[1])
+ return knee
+
+ def calc_raw_states(self, signals, penalty, opt_model=None):
+ raw_states_calc_args = list()
+ for num_measurement, measurement in enumerate(signals):
+ raw_states_calc_args.append(
+ (num_measurement, measurement, penalty, opt_model, self.jump)
+ )
+
+ raw_states_list = [None] * len(signals)
+ raw_states_res = calc_raw_states(raw_states_calc_args)
+
+ # extracting result and putting it in correct order -> index of raw_states_list
+ # entry still corresponds with index of measurement in measurements_by_states
+ # -> If measurements are discarded the used ones are easily recognized
+ for ret_val in raw_states_res:
+ num_measurement = ret_val[0]
+ raw_states = ret_val[1]
+ avg_std = ret_val[2]
+ change_avg_std = ret_val[3]
+ # FIXME: Wieso gibt mir meine IDE hier eine Warning aus? Der Index müsste doch
+ # int sein oder nicht? Es scheint auch vernünftig zu klappen...
+ raw_states_list[num_measurement] = raw_states
+ # print(
+ # "The average standard deviation for the newly found states in "
+ # + "measurement No. "
+ # + str(num_measurement)
+ # + " is "
+ # + str(avg_std)
+ # )
+ # print("That is a reduction of " + str(change_avg_std))
+ for i, raw_state in enumerate(raw_states):
+ print(
+ f"Measurement #{num_measurement} sub-state #{i}: {raw_state[0]} -> {raw_state[1]}, mean {raw_state[2]}"
+ )
+ # l_signal = measurements_by_config['offline'][num_measurement]['uW']
+ # l_bkpts = [s[1] for s in raw_states]
+ # fig, ax = rpt.display(np.array(l_signal), l_bkpts)
+ # plt.show()
+
+ """
+ # calculates and returns the necessary penalty for signal. Parallel execution with num_processes many processes
+ # jump, min_dist are passed directly to PELT. S is directly passed to kneedle.
+ # pen_modifier is used as a factor on the resulting penalty.
+ # the interval [range_min, range_max] is used for searching.
+ def calculate_penalty_value(
+ signal,
+ model="l1",
+ jump=5,
+ min_dist=2,
+ range_min=0,
+ range_max=100,
+ S=1.0,
+ pen_modifier=None,
+ show_plots=False,
+ ):
+ # default params in Function
+ if model is None:
+ model = "l1"
+ if jump is None:
+ jump = 5
+ if min_dist is None:
+ min_dist = 2
+ if range_min is None:
+ range_min = 0
+ if range_max is None:
+ range_max = 50
+ if S is None:
+ S = 1.0
+ if pen_modifier is None:
+ pen_modifier = 1
+ # change point detection. best fit seemingly with l1. rbf prods. RuntimeErr for pen > 30
+ # https://ctruong.perso.math.cnrs.fr/ruptures-docs/build/html/costs/index.html
+ # model = "l1" #"l1" # "l2", "rbf"
+ algo = ruptures.Pelt(model=model, jump=jump, min_size=min_dist).fit(signal)
+
+ ### CALC BKPS WITH DIFF PENALTYS
+ if range_max != range_min:
+ # building args array for parallelizing
+ args = []
+ # for displaying progression
+ m = Manager()
+ q = m.Queue()
+
+ for i in range(range_min, range_max + 1):
+ # same calculation for all except other penalty
+ args.append((algo, i, q))
+
+ print_info("starting kneepoint calculation.")
+ # init Pool with num_proesses
+ with Pool() as p:
+ # collect results from pool
+ result = p.starmap_async(get_bkps, args)
+ # monitor loop
+ percentage = -100 # Force display of 0%
+ i = 0
+ while True:
+ if result.ready():
+ break
+
+ size = q.qsize()
+ last_percentage = percentage
+ percentage = round(size / (range_max - range_min) * 100, 2)
+ if percentage >= last_percentage + 2 or i >= refresh_thresh:
+ print_info("Current progress: " + str(percentage) + "%")
+ i = 0
+ else:
+ i += 1
+ time.sleep(refresh_delay)
+ res = result.get()
+ print_info("Finished kneepoint calculation.")
+ # DECIDE WHICH PENALTY VALUE TO CHOOSE ACCORDING TO ELBOW/KNEE APPROACH
+ # split x and y coords to pass to kneedle
+ pen_val = [x[0] for x in res]
+ fitted_bkps_val = [x[1] for x in res]
+ # # plot to look at res
+ knee = find_knee_point(pen_val, fitted_bkps_val, S=S)
+
+ # TODO: Find plateau on pen_val vs fitted_bkps_val
+ # scipy.find_peaks() does not find plateaus if they extend through the end of the data.
+ # to counter that, add one extremely large value to the right side of the data
+ # after negating it is extremely small -> Almost certainly smaller than the
+ # found plateau therefore the plateau does not extend through the border
+ # -> scipy.find_peaks finds it. Choose value from within that plateau.
+ # fitted_bkps_val.append(100000000)
+ # TODO: Approaching over find_peaks might not work if the initial decrease step to the
+ # "correct" number of changepoints and additional decrease steps e.g. underfitting
+ # take place within the given penalty interval. find_peak only finds plateaus
+ # of peaks. If the number of chpts decreases after the wanted plateau the condition
+ # for local peaks is not satisfied anymore. Therefore this approach will only work
+ # if the plateau extends over the right border of the penalty interval.
+ # peaks, peak_plateaus = find_peaks(- np.array(fitted_bkps_val), plateau_size=1)
+ # Since the data is monotonously decreasing only one plateau can be found.
+
+ # assuming the plateau is constant, i.e. no noise. OK to assume this here, since num_bkpts
+ # is monotonously decreasing. If the number of bkpts decreases inside a considered
+ # plateau, it means that the stable configuration is not yet met. -> Search further
+ start_index = -1
+ end_index = -1
+ longest_start = -1
+ longest_end = -1
+ prev_val = -1
+ for i, num_bkpts in enumerate(fitted_bkps_val[knee[0] :]):
+ if num_bkpts != prev_val:
+ end_index = i - 1
+ if end_index - start_index > longest_end - longest_start:
+ # currently found sequence is the longest found yet
+ longest_start = start_index
+ longest_end = end_index
+ start_index = i
+ if i == len(fitted_bkps_val[knee[0] :]) - 1:
+ # end sequence with last value
+ end_index = i
+ # # since it is not guaranteed that this is the end of the plateau, assume the mid
+ # # of the plateau was hit.
+ # size = end_index - start_index
+ # end_index = end_index + size
+ # However this is not the clean solution. Better if search interval is widened
+ # with range_min and range_max
+ if end_index - start_index > longest_end - longest_start:
+ # last found sequence is the longest found yet
+ longest_start = start_index
+ longest_end = end_index
+ start_index = i
+ prev_val = num_bkpts
+ if show_plots:
+ plt.xlabel("Penalty")
+ plt.ylabel("Number of Changepoints")
+ plt.plot(pen_val, fitted_bkps_val)
+ plt.vlines(
+ longest_start + knee[0], 0, max(fitted_bkps_val), linestyles="dashed"
+ )
+ plt.vlines(
+ longest_end + knee[0], 0, max(fitted_bkps_val), linestyles="dashed"
+ )
+ plt.show()
+ # choosing pen from plateau
+ mid_of_plat = longest_start + (longest_end - longest_start) // 2
+ knee = (mid_of_plat + knee[0], fitted_bkps_val[mid_of_plat + knee[0]])
+
+ # modify knee according to options. Defaults to 1 * knee
+ knee = (knee[0] * pen_modifier, knee[1])
+
+ else:
+ # range_min == range_max. has the same effect as pen_override
+ knee = (range_min, None)
+ print_info(str(knee[0]) + " has been selected as penalty.")
+ if knee[0] is not None:
+ return knee
+
+ print_error(
+ "With the current thresh-hold S="
+ + str(S)
+ + " it is not possible to select a penalty value."
+ )
+ sys.exit(-1)
+ """
diff --git a/lib/utils.py b/lib/utils.py
index adcb534..31fedcf 100644
--- a/lib/utils.py
+++ b/lib/utils.py
@@ -193,6 +193,9 @@ def by_name_to_by_param(by_name: dict):
by_param[param_key]["isa"] = by_name[name]["isa"]
for attribute in by_name[name]["attributes"]:
by_param[param_key][attribute].append(by_name[name][attribute][i])
+ if "supports" in by_name[name]:
+ for support in by_name[name]["supports"]:
+ by_param[param_key][support].append(by_name[name][support][i])
# Required for match_parameter_valuse in _try_fits
by_param[param_key]["param"].append(by_name[name]["param"][i])
return by_param