diff options
| author | jfalkenhagen <jfalkenhagen@uos.de> | 2020-07-08 17:29:56 +0200 | 
|---|---|---|
| committer | jfalkenhagen <jfalkenhagen@uos.de> | 2020-07-08 17:29:56 +0200 | 
| commit | 56c0cd63af5e34fc2e3da64018155f715825a343 (patch) | |
| tree | 69379087947a46870c0d1c32e1f0ea04339adc6c /bin | |
| parent | 7dc6363cd7f17cf5a09f678da612e15a0e6bfbac (diff) | |
bin/Proof_Of_Concept_PELT: Trennen von Penalty-Berechnung und PELT -> Pro Messreihe/Paramkonig. nur noch einmaliges bestimmen der Penalty. Bestimmen der Penalty über KNEEDLE Ab Kneepoint dann suche nach Plateau -> Wahl der Mitte des Plateaus. Automatisches Clustern funktioniert jetzt auch scheinbar für alle Messreihen aus TX.json. Mit anderen nicht getestet.
Diffstat (limited to 'bin')
| -rw-r--r-- | bin/Proof_Of_Concept_PELT.py | 193 | 
1 files changed, 139 insertions, 54 deletions
| diff --git a/bin/Proof_Of_Concept_PELT.py b/bin/Proof_Of_Concept_PELT.py index dbcc7c1..0e63b78 100644 --- a/bin/Proof_Of_Concept_PELT.py +++ b/bin/Proof_Of_Concept_PELT.py @@ -1,16 +1,16 @@ -import matplotlib.pyplot as plt  import json -from kneed import KneeLocator -import ruptures as rpt  import time -from multiprocessing import Pool, Manager -import numpy as np  import sys  import getopt  import re -from dfatool.dfatool import RawData - +from multiprocessing import Pool, Manager +from kneed import KneeLocator  from sklearn.cluster import AgglomerativeClustering +from scipy.signal import find_peaks +import matplotlib.pyplot as plt +import ruptures as rpt +import numpy as np +from dfatool.dfatool import RawData  # from scipy.cluster.hierarchy import dendrogram, linkage # for graphical display @@ -19,8 +19,8 @@ from sklearn.cluster import AgglomerativeClustering  def plot_data_from_json(filename, trace_num, x_axis, y_axis): -    with open(filename, 'r') as f: -        tx_data = json.load(f) +    with open(filename, 'r') as file: +        tx_data = json.load(file)      print(tx_data[trace_num]['parameter'])      plt.plot(tx_data[trace_num]['offline'][0]['uW'])      plt.xlabel(x_axis) @@ -64,12 +64,38 @@ def find_knee_point(data_x, data_y, S=1.0, curve='convex', direction='decreasing      return kneepoint -def calc_pelt(signal, model='l1', jump=5, min_dist=2, range_min=0, range_max=50, num_processes=8, -              refresh_delay=1, refresh_thresh=5, S=1.0, pen_override=None, pen_modifier=None, -              plotting=False): +def calc_pelt(signal, penalty, model="l1", jump=5, min_dist=2, plotting=False): +    # default params in Function +    if model is None: +        model = "l1" +    if jump is None: +        jump = 5 +    if min_dist is None: +        min_dist = 2 +    if plotting is None: +        plotting = False +    # change point detection. best fit seemingly with l1. rbf prods. RuntimeErr for pen > 30 +    # https://ctruong.perso.math.cnrs.fr/ruptures-docs/build/html/costs/index.html +    # model = "l1"   #"l1"  # "l2", "rbf" +    algo = rpt.Pelt(model=model, jump=jump, min_size=min_dist).fit(signal) + +    if penalty is not None: +        bkps = algo.predict(pen=penalty) +        if plotting: +            fig, ax = rpt.display(signal, bkps) +            plt.show() +        return bkps + +    print_error("No Penalty specified.") +    sys.exit() + + +def calculate_penalty_value(signal, model="l1", jump=5, min_dist=2, range_min=0, range_max=50, +                            num_processes=8, refresh_delay=1, refresh_thresh=5, S=1.0, +                            pen_modifier=None):      # default params in Function      if model is None: -        model = 'l1' +        model = "l1"      if jump is None:          jump = 5      if min_dist is None: @@ -86,8 +112,6 @@ def calc_pelt(signal, model='l1', jump=5, min_dist=2, range_min=0, range_max=50,          refresh_thresh = 5      if S is None:          S = 1.0 -    if plotting is None: -        plotting = False      if pen_modifier is None:          pen_modifier = 1      # change point detection. best fit seemingly with l1. rbf prods. RuntimeErr for pen > 30 @@ -96,7 +120,7 @@ def calc_pelt(signal, model='l1', jump=5, min_dist=2, range_min=0, range_max=50,      algo = rpt.Pelt(model=model, jump=jump, min_size=min_dist).fit(signal)      ### CALC BKPS WITH DIFF PENALTYS -    if pen_override is None and range_max != range_min: +    if range_max != range_min:          # building args array for parallelizing          args = []          # for displaying progression @@ -106,7 +130,7 @@ def calc_pelt(signal, model='l1', jump=5, min_dist=2, range_min=0, range_max=50,          for i in range(range_min, range_max + 1):              args.append((algo, i, q)) -        print_info('starting kneepoint calculation.') +        print_info("starting kneepoint calculation.")          # init Pool with num_proesses          with Pool(num_processes) as p:              # collect results from pool @@ -122,7 +146,7 @@ def calc_pelt(signal, model='l1', jump=5, min_dist=2, range_min=0, range_max=50,                  last_percentage = percentage                  percentage = round(size / (range_max - range_min) * 100, 2)                  if percentage >= last_percentage + 2 or i >= refresh_thresh: -                    print_info('Current progress: ' + str(percentage) + '%') +                    print_info("Current progress: " + str(percentage) + "%")                      i = 0                  else:                      i += 1 @@ -135,31 +159,68 @@ def calc_pelt(signal, model='l1', jump=5, min_dist=2, range_min=0, range_max=50,          fitted_bkps_val = [x[1] for x in res]          # # plot to look at res          knee = find_knee_point(pen_val, fitted_bkps_val, S=S) + +        # TODO: Find plateau on pen_val vs fitted_bkps_val +        # scipy.find_peaks() does not find plateaus if they extend through the end of the data. +        # to counter that, add one extremely large value to the right side of the data +        # after negating it is extremely small -> Almost certainly smaller than the +        # found plateau therefore the plateau does not extend through the border -> scipy.find_peaks +        # finds it. Choose value from within that plateau. +        # fitted_bkps_val.append(100000000) +        # TODO: Approaching over find_peaks might not work if the initial decrease step to the +        #   "correct" number of changepoints and additional decrease steps e.g. underfitting +        #   take place within the given penalty interval. find_peak only finds plateaus +        #   of peaks. If the number of chpts decreases after the wanted plateau the condition +        #   for local peaks is not satisfied anymore. Therefore this approach will only work +        #   if the plateau extends over the right border of the penalty interval. +        # peaks, peak_plateaus = find_peaks(- np.array(fitted_bkps_val), plateau_size=1) +        # Since the data is monotonously decreasing only one plateau can be found. + +        # assuming the plateau is constant +        start_index = -1 +        end_index = -1 +        longest_start = -1 +        longest_end = -1 +        prev_val = -1 +        for i, num_bkpts in enumerate(fitted_bkps_val[knee[0]:]): +            if num_bkpts != prev_val: +                end_index = i - 1 +                if end_index - start_index > longest_end - longest_start: +                    # currently found sequence is the longest found yet +                    longest_start = start_index +                    longest_end = end_index +                start_index = i +            if i == len(fitted_bkps_val[knee[0]:]) - 1: +                # end sequence with last value +                end_index = i +                if end_index - start_index > longest_end - longest_start: +                    # last found sequence is the longest found yet +                    longest_start = start_index +                    longest_end = end_index +                start_index = i +            prev_val = num_bkpts          # plt.xlabel('Penalty')          # plt.ylabel('Number of Changepoints')          # plt.plot(pen_val, fitted_bkps_val) -        # plt.vlines(knee[0], 0, max(fitted_bkps_val), linestyles='dashed') -        # print("knee: " + str(knee[0])) +        # plt.vlines(longest_start + knee[0], 0, max(fitted_bkps_val), linestyles='dashed') +        # plt.vlines(longest_end + knee[0], 0, max(fitted_bkps_val), linestyles='dashed')          # plt.show() +        # choosing pen from plateau +        mid_of_plat = longest_start + (longest_end - longest_start) // 2 +        knee = (mid_of_plat + knee[0], fitted_bkps_val[mid_of_plat + knee[0]]) +          # modify knee according to options. Defaults to 1 * knee          knee = (knee[0] * pen_modifier, knee[1]) +      else: -        # use forced pen value for plotting if specified. Else use only pen in range -        if pen_override is not None: -            knee = (pen_override, None) -        else: -            knee = (range_min, None) -    print_info("" + str(knee[0]) + " has been selected as kneepoint.") -    # plt.plot(pen_val, fittet_bkps_val) +        # range_min == range_max. has the same effect as pen_override +        knee = (range_min, None) +    print_info(str(knee[0]) + " has been selected as kneepoint.")      if knee[0] is not None: -        bkps = algo.predict(pen=knee[0]) -        if plotting: -            fig, ax = rpt.display(signal, bkps) -            plt.show() -        return bkps +        return knee -    print_error('With the current thresh-hold S=' + str(S) -                + ' it is not possible to select a penalty value.') +    print_error("With the current thresh-hold S=" + str(S) +                + " it is not possible to select a penalty value.")      sys.exit() @@ -265,6 +326,14 @@ def print_error(str_to_prt):          print("[ERROR]" + str_prt, file=sys.stderr) +def norm_signal(signal): +    # TODO: maybe refine normalisation of signal +    normed_signal = np.zeros(shape=len(signal)) +    for i, signal_i in enumerate(signal): +        normed_signal[i] = signal_i / 1000 +    return normed_signal + +  if __name__ == '__main__':      # OPTION RECOGNITION      opt = dict() @@ -414,19 +483,28 @@ if __name__ == '__main__':                      break              if not refine:                  print_info("No refinement necessary for state '" + measurements_by_state['name'] -                           + "'") +                           + "' with params: " + str(measurements_by_state['parameter']))              else: +                # assume that all measurements of the same param configuration are fundamentally +                # similar -> calculate penalty for first measurement, use it for all +                if opt_pen_override is None: +                    signal = np.array(measurements_by_state['offline'][0]['uW']) +                    normed_signal = norm_signal(signal) +                    penalty = calculate_penalty_value(normed_signal, model=opt_model, +                                                      range_min=opt_range_min, +                                                      range_max=opt_range_max, +                                                      num_processes=opt_num_processes, +                                                      jump=opt_jump, S=opt_S, +                                                      pen_modifier=opt_pen_modifier) +                    penalty = penalty[0] +                else: +                    penalty = opt_pen_override                  # calc and save all bkpts for the given state and param config                  raw_states_list = list()                  for measurement in measurements_by_state['offline']:                      signal = np.array(measurement['uW']) -                    normed_signal = np.zeros(shape=len(signal)) -                    for i in range(0, len(signal)): -                        normed_signal[i] = signal[i] / 1000 -                    bkpts = calc_pelt(normed_signal, model=opt_model, range_min=opt_range_min, -                                      range_max=opt_range_max, num_processes=opt_num_processes, -                                      jump=opt_jump, S=opt_S, pen_override=opt_pen_override, -                                      pen_modifier=opt_pen_modifier) +                    normed_signal = norm_signal(signal) +                    bkpts = calc_pelt(normed_signal, penalty, model=opt_model, jump=opt_jump)                      calced_states = list()                      start_time = 0                      end_time = 0 @@ -468,8 +546,8 @@ if __name__ == '__main__':                  # TODO: MAGIC NUMBER                  if num_states_dev > 1:                      print_warning("The number of states varies strongly across measurements." -                                  " Consider choosing a larger value for S or using the pen_modifier" -                                  " option.") +                                  " Consider choosing a larger value for S or using the " +                                  "pen_modifier option.")                      time.sleep(5)                  # TODO: Wie bekomme ich da jetzt raus, was die Wahrheit ist?                  # Einfach Durchschnitt nehmen? @@ -501,10 +579,10 @@ if __name__ == '__main__':                          #            show_leaf_counts=True)                          # plt.show()                          # TODO: Automatic detection of number of clusters. Aktuell noch MAGIC NUMBER -                        # cluster = AgglomerativeClustering(n_clusters=None, compute_full_tree=True, affinity='euclidean', -                        #                                  linkage='ward', distance_threshold=opt_refinement_thresh) -                        cluster = AgglomerativeClustering(n_clusters=5, affinity='euclidean', -                                                          linkage='ward') +                        cluster = AgglomerativeClustering(n_clusters=None, compute_full_tree=True, affinity='euclidean', +                                                         linkage='ward', distance_threshold=opt_refinement_thresh*100) +                        # cluster = AgglomerativeClustering(n_clusters=5, affinity='euclidean', +                        #                                   linkage='ward')                          cluster.fit_predict(value_to_cluster)                          print_info("Cluster labels:\n" + str(cluster.labels_))                          # plt.scatter(value_to_cluster[:, 0], value_to_cluster[:, 1], c=cluster.labels_, cmap='rainbow') @@ -515,9 +593,19 @@ if __name__ == '__main__':                          num_cluster_list.append(cluster.n_clusters_)                          i = i + 1                  if i != len(raw_states_list): -                    print_info("Used " + str(i) + "/" + str(len(raw_states_list)) -                               + " Measurements for state clustering. " -                                 "Others did not recognize number of states correctly.") +                    if i / len(raw_states_list) <= 0.5: +                        print_warning("Only used " + str(i) + "/" + str(len(raw_states_list)) +                                      + " Measurements for refinement. " +                                        "Others did not recognize number of states correctly." +                                        "\nYou should verify the integrity of the measurements.") +                    else: +                        print_info("Used " + str(i) + "/" + str(len(raw_states_list)) +                                   + " Measurements for refinement. " +                                     "Others did not recognize number of states correctly.") +                    sys.exit() +                else: +                    print_info("Used all available measurements.") +                  num_states = np.argmax(np.bincount(num_cluster_list))                  resulting_sequence = [None] * num_raw_states                  i = 0 @@ -534,9 +622,6 @@ if __name__ == '__main__':                      i = i + 1                  print(resulting_sequence) -                # TODO: TESTING PURPOSES -                sys.exit() -      elif ".tar" in opt_filename:          # open with dfatool          raw_data_args = list() | 
