diff options
author | jfalkenhagen <jfalkenhagen@uos.de> | 2020-07-08 17:29:56 +0200 |
---|---|---|
committer | jfalkenhagen <jfalkenhagen@uos.de> | 2020-07-08 17:29:56 +0200 |
commit | 56c0cd63af5e34fc2e3da64018155f715825a343 (patch) | |
tree | 69379087947a46870c0d1c32e1f0ea04339adc6c /bin | |
parent | 7dc6363cd7f17cf5a09f678da612e15a0e6bfbac (diff) |
bin/Proof_Of_Concept_PELT: Trennen von Penalty-Berechnung und PELT -> Pro Messreihe/Paramkonig. nur noch einmaliges bestimmen der Penalty. Bestimmen der Penalty über KNEEDLE Ab Kneepoint dann suche nach Plateau -> Wahl der Mitte des Plateaus. Automatisches Clustern funktioniert jetzt auch scheinbar für alle Messreihen aus TX.json. Mit anderen nicht getestet.
Diffstat (limited to 'bin')
-rw-r--r-- | bin/Proof_Of_Concept_PELT.py | 193 |
1 files changed, 139 insertions, 54 deletions
diff --git a/bin/Proof_Of_Concept_PELT.py b/bin/Proof_Of_Concept_PELT.py index dbcc7c1..0e63b78 100644 --- a/bin/Proof_Of_Concept_PELT.py +++ b/bin/Proof_Of_Concept_PELT.py @@ -1,16 +1,16 @@ -import matplotlib.pyplot as plt import json -from kneed import KneeLocator -import ruptures as rpt import time -from multiprocessing import Pool, Manager -import numpy as np import sys import getopt import re -from dfatool.dfatool import RawData - +from multiprocessing import Pool, Manager +from kneed import KneeLocator from sklearn.cluster import AgglomerativeClustering +from scipy.signal import find_peaks +import matplotlib.pyplot as plt +import ruptures as rpt +import numpy as np +from dfatool.dfatool import RawData # from scipy.cluster.hierarchy import dendrogram, linkage # for graphical display @@ -19,8 +19,8 @@ from sklearn.cluster import AgglomerativeClustering def plot_data_from_json(filename, trace_num, x_axis, y_axis): - with open(filename, 'r') as f: - tx_data = json.load(f) + with open(filename, 'r') as file: + tx_data = json.load(file) print(tx_data[trace_num]['parameter']) plt.plot(tx_data[trace_num]['offline'][0]['uW']) plt.xlabel(x_axis) @@ -64,12 +64,38 @@ def find_knee_point(data_x, data_y, S=1.0, curve='convex', direction='decreasing return kneepoint -def calc_pelt(signal, model='l1', jump=5, min_dist=2, range_min=0, range_max=50, num_processes=8, - refresh_delay=1, refresh_thresh=5, S=1.0, pen_override=None, pen_modifier=None, - plotting=False): +def calc_pelt(signal, penalty, model="l1", jump=5, min_dist=2, plotting=False): + # default params in Function + if model is None: + model = "l1" + if jump is None: + jump = 5 + if min_dist is None: + min_dist = 2 + if plotting is None: + plotting = False + # change point detection. best fit seemingly with l1. rbf prods. RuntimeErr for pen > 30 + # https://ctruong.perso.math.cnrs.fr/ruptures-docs/build/html/costs/index.html + # model = "l1" #"l1" # "l2", "rbf" + algo = rpt.Pelt(model=model, jump=jump, min_size=min_dist).fit(signal) + + if penalty is not None: + bkps = algo.predict(pen=penalty) + if plotting: + fig, ax = rpt.display(signal, bkps) + plt.show() + return bkps + + print_error("No Penalty specified.") + sys.exit() + + +def calculate_penalty_value(signal, model="l1", jump=5, min_dist=2, range_min=0, range_max=50, + num_processes=8, refresh_delay=1, refresh_thresh=5, S=1.0, + pen_modifier=None): # default params in Function if model is None: - model = 'l1' + model = "l1" if jump is None: jump = 5 if min_dist is None: @@ -86,8 +112,6 @@ def calc_pelt(signal, model='l1', jump=5, min_dist=2, range_min=0, range_max=50, refresh_thresh = 5 if S is None: S = 1.0 - if plotting is None: - plotting = False if pen_modifier is None: pen_modifier = 1 # change point detection. best fit seemingly with l1. rbf prods. RuntimeErr for pen > 30 @@ -96,7 +120,7 @@ def calc_pelt(signal, model='l1', jump=5, min_dist=2, range_min=0, range_max=50, algo = rpt.Pelt(model=model, jump=jump, min_size=min_dist).fit(signal) ### CALC BKPS WITH DIFF PENALTYS - if pen_override is None and range_max != range_min: + if range_max != range_min: # building args array for parallelizing args = [] # for displaying progression @@ -106,7 +130,7 @@ def calc_pelt(signal, model='l1', jump=5, min_dist=2, range_min=0, range_max=50, for i in range(range_min, range_max + 1): args.append((algo, i, q)) - print_info('starting kneepoint calculation.') + print_info("starting kneepoint calculation.") # init Pool with num_proesses with Pool(num_processes) as p: # collect results from pool @@ -122,7 +146,7 @@ def calc_pelt(signal, model='l1', jump=5, min_dist=2, range_min=0, range_max=50, last_percentage = percentage percentage = round(size / (range_max - range_min) * 100, 2) if percentage >= last_percentage + 2 or i >= refresh_thresh: - print_info('Current progress: ' + str(percentage) + '%') + print_info("Current progress: " + str(percentage) + "%") i = 0 else: i += 1 @@ -135,31 +159,68 @@ def calc_pelt(signal, model='l1', jump=5, min_dist=2, range_min=0, range_max=50, fitted_bkps_val = [x[1] for x in res] # # plot to look at res knee = find_knee_point(pen_val, fitted_bkps_val, S=S) + + # TODO: Find plateau on pen_val vs fitted_bkps_val + # scipy.find_peaks() does not find plateaus if they extend through the end of the data. + # to counter that, add one extremely large value to the right side of the data + # after negating it is extremely small -> Almost certainly smaller than the + # found plateau therefore the plateau does not extend through the border -> scipy.find_peaks + # finds it. Choose value from within that plateau. + # fitted_bkps_val.append(100000000) + # TODO: Approaching over find_peaks might not work if the initial decrease step to the + # "correct" number of changepoints and additional decrease steps e.g. underfitting + # take place within the given penalty interval. find_peak only finds plateaus + # of peaks. If the number of chpts decreases after the wanted plateau the condition + # for local peaks is not satisfied anymore. Therefore this approach will only work + # if the plateau extends over the right border of the penalty interval. + # peaks, peak_plateaus = find_peaks(- np.array(fitted_bkps_val), plateau_size=1) + # Since the data is monotonously decreasing only one plateau can be found. + + # assuming the plateau is constant + start_index = -1 + end_index = -1 + longest_start = -1 + longest_end = -1 + prev_val = -1 + for i, num_bkpts in enumerate(fitted_bkps_val[knee[0]:]): + if num_bkpts != prev_val: + end_index = i - 1 + if end_index - start_index > longest_end - longest_start: + # currently found sequence is the longest found yet + longest_start = start_index + longest_end = end_index + start_index = i + if i == len(fitted_bkps_val[knee[0]:]) - 1: + # end sequence with last value + end_index = i + if end_index - start_index > longest_end - longest_start: + # last found sequence is the longest found yet + longest_start = start_index + longest_end = end_index + start_index = i + prev_val = num_bkpts # plt.xlabel('Penalty') # plt.ylabel('Number of Changepoints') # plt.plot(pen_val, fitted_bkps_val) - # plt.vlines(knee[0], 0, max(fitted_bkps_val), linestyles='dashed') - # print("knee: " + str(knee[0])) + # plt.vlines(longest_start + knee[0], 0, max(fitted_bkps_val), linestyles='dashed') + # plt.vlines(longest_end + knee[0], 0, max(fitted_bkps_val), linestyles='dashed') # plt.show() + # choosing pen from plateau + mid_of_plat = longest_start + (longest_end - longest_start) // 2 + knee = (mid_of_plat + knee[0], fitted_bkps_val[mid_of_plat + knee[0]]) + # modify knee according to options. Defaults to 1 * knee knee = (knee[0] * pen_modifier, knee[1]) + else: - # use forced pen value for plotting if specified. Else use only pen in range - if pen_override is not None: - knee = (pen_override, None) - else: - knee = (range_min, None) - print_info("" + str(knee[0]) + " has been selected as kneepoint.") - # plt.plot(pen_val, fittet_bkps_val) + # range_min == range_max. has the same effect as pen_override + knee = (range_min, None) + print_info(str(knee[0]) + " has been selected as kneepoint.") if knee[0] is not None: - bkps = algo.predict(pen=knee[0]) - if plotting: - fig, ax = rpt.display(signal, bkps) - plt.show() - return bkps + return knee - print_error('With the current thresh-hold S=' + str(S) - + ' it is not possible to select a penalty value.') + print_error("With the current thresh-hold S=" + str(S) + + " it is not possible to select a penalty value.") sys.exit() @@ -265,6 +326,14 @@ def print_error(str_to_prt): print("[ERROR]" + str_prt, file=sys.stderr) +def norm_signal(signal): + # TODO: maybe refine normalisation of signal + normed_signal = np.zeros(shape=len(signal)) + for i, signal_i in enumerate(signal): + normed_signal[i] = signal_i / 1000 + return normed_signal + + if __name__ == '__main__': # OPTION RECOGNITION opt = dict() @@ -414,19 +483,28 @@ if __name__ == '__main__': break if not refine: print_info("No refinement necessary for state '" + measurements_by_state['name'] - + "'") + + "' with params: " + str(measurements_by_state['parameter'])) else: + # assume that all measurements of the same param configuration are fundamentally + # similar -> calculate penalty for first measurement, use it for all + if opt_pen_override is None: + signal = np.array(measurements_by_state['offline'][0]['uW']) + normed_signal = norm_signal(signal) + penalty = calculate_penalty_value(normed_signal, model=opt_model, + range_min=opt_range_min, + range_max=opt_range_max, + num_processes=opt_num_processes, + jump=opt_jump, S=opt_S, + pen_modifier=opt_pen_modifier) + penalty = penalty[0] + else: + penalty = opt_pen_override # calc and save all bkpts for the given state and param config raw_states_list = list() for measurement in measurements_by_state['offline']: signal = np.array(measurement['uW']) - normed_signal = np.zeros(shape=len(signal)) - for i in range(0, len(signal)): - normed_signal[i] = signal[i] / 1000 - bkpts = calc_pelt(normed_signal, model=opt_model, range_min=opt_range_min, - range_max=opt_range_max, num_processes=opt_num_processes, - jump=opt_jump, S=opt_S, pen_override=opt_pen_override, - pen_modifier=opt_pen_modifier) + normed_signal = norm_signal(signal) + bkpts = calc_pelt(normed_signal, penalty, model=opt_model, jump=opt_jump) calced_states = list() start_time = 0 end_time = 0 @@ -468,8 +546,8 @@ if __name__ == '__main__': # TODO: MAGIC NUMBER if num_states_dev > 1: print_warning("The number of states varies strongly across measurements." - " Consider choosing a larger value for S or using the pen_modifier" - " option.") + " Consider choosing a larger value for S or using the " + "pen_modifier option.") time.sleep(5) # TODO: Wie bekomme ich da jetzt raus, was die Wahrheit ist? # Einfach Durchschnitt nehmen? @@ -501,10 +579,10 @@ if __name__ == '__main__': # show_leaf_counts=True) # plt.show() # TODO: Automatic detection of number of clusters. Aktuell noch MAGIC NUMBER - # cluster = AgglomerativeClustering(n_clusters=None, compute_full_tree=True, affinity='euclidean', - # linkage='ward', distance_threshold=opt_refinement_thresh) - cluster = AgglomerativeClustering(n_clusters=5, affinity='euclidean', - linkage='ward') + cluster = AgglomerativeClustering(n_clusters=None, compute_full_tree=True, affinity='euclidean', + linkage='ward', distance_threshold=opt_refinement_thresh*100) + # cluster = AgglomerativeClustering(n_clusters=5, affinity='euclidean', + # linkage='ward') cluster.fit_predict(value_to_cluster) print_info("Cluster labels:\n" + str(cluster.labels_)) # plt.scatter(value_to_cluster[:, 0], value_to_cluster[:, 1], c=cluster.labels_, cmap='rainbow') @@ -515,9 +593,19 @@ if __name__ == '__main__': num_cluster_list.append(cluster.n_clusters_) i = i + 1 if i != len(raw_states_list): - print_info("Used " + str(i) + "/" + str(len(raw_states_list)) - + " Measurements for state clustering. " - "Others did not recognize number of states correctly.") + if i / len(raw_states_list) <= 0.5: + print_warning("Only used " + str(i) + "/" + str(len(raw_states_list)) + + " Measurements for refinement. " + "Others did not recognize number of states correctly." + "\nYou should verify the integrity of the measurements.") + else: + print_info("Used " + str(i) + "/" + str(len(raw_states_list)) + + " Measurements for refinement. " + "Others did not recognize number of states correctly.") + sys.exit() + else: + print_info("Used all available measurements.") + num_states = np.argmax(np.bincount(num_cluster_list)) resulting_sequence = [None] * num_raw_states i = 0 @@ -534,9 +622,6 @@ if __name__ == '__main__': i = i + 1 print(resulting_sequence) - # TODO: TESTING PURPOSES - sys.exit() - elif ".tar" in opt_filename: # open with dfatool raw_data_args = list() |