diff options
author | jfalkenhagen <jfalkenhagen@uos.de> | 2020-07-16 16:34:20 +0200 |
---|---|---|
committer | jfalkenhagen <jfalkenhagen@uos.de> | 2020-07-16 16:34:20 +0200 |
commit | a00ffc0e32ddc72a8faceec4344432cdbf3b90c7 (patch) | |
tree | bb31398b72c38f801ad2931b4181bc653b037903 | |
parent | e1f0618fb04e42b7d3e49055af83f58a803b28b8 (diff) |
bin/Proof_Of_Concept_PELT: kleine kosmetische Änderungen
-rw-r--r-- | bin/Proof_Of_Concept_PELT.py | 39 |
1 files changed, 19 insertions, 20 deletions
diff --git a/bin/Proof_Of_Concept_PELT.py b/bin/Proof_Of_Concept_PELT.py index 0d5be54..7726f53 100644 --- a/bin/Proof_Of_Concept_PELT.py +++ b/bin/Proof_Of_Concept_PELT.py @@ -160,11 +160,11 @@ def calculate_penalty_value(signal, model="l1", jump=5, min_dist=2, range_min=0, knee = find_knee_point(pen_val, fitted_bkps_val, S=S) # TODO: Find plateau on pen_val vs fitted_bkps_val - # scipy.find_peaks() does not find plateaus if they extend through the end of the data. - # to counter that, add one extremely large value to the right side of the data - # after negating it is extremely small -> Almost certainly smaller than the - # found plateau therefore the plateau does not extend through the border -> scipy.find_peaks - # finds it. Choose value from within that plateau. + # scipy.find_peaks() does not find plateaus if they extend through the end of the data. + # to counter that, add one extremely large value to the right side of the data + # after negating it is extremely small -> Almost certainly smaller than the + # found plateau therefore the plateau does not extend through the border + # -> scipy.find_peaks finds it. Choose value from within that plateau. # fitted_bkps_val.append(100000000) # TODO: Approaching over find_peaks might not work if the initial decrease step to the # "correct" number of changepoints and additional decrease steps e.g. underfitting @@ -331,7 +331,6 @@ def calc_raw_states(arg_list, num_processes=8): # Very short benchmark yielded approx. 3 times the speed of solution not using sort -# TODO: Decide whether median is really the better baseline than mean def needs_refinement(signal, thresh): sorted_signal = sorted(signal) length_of_signal = len(signal) @@ -509,29 +508,28 @@ if __name__ == '__main__': configurations = json.load(f) # loop through all traces check if refinement is necessary resulting_sequence_list = [] - for num_config, measurements_by_configuration in enumerate(configurations): + for num_config, measurements_by_config in enumerate(configurations): # loop through all occurrences of the looked at state - print_info("Looking at state '" + measurements_by_configuration['name'] + "' with params: " - + str(measurements_by_configuration['parameter'])) + print_info("Looking at state '" + measurements_by_config['name'] + "' with params: " + + str(measurements_by_config['parameter'])) refine = False print_info("Checking if refinement is necessary...") - for measurement in measurements_by_configuration['offline']: + for measurement in measurements_by_config['offline']: # loop through measurements of particular state # an check if state needs refinement signal = measurement['uW'] # mean = measurement['uW_mean'] - # TODO: Decide if median is really the better baseline than mean if needs_refinement(signal, opt_refinement_thresh) and not refine: print_info("Refinement is necessary!") refine = True if not refine: - print_info("No refinement necessary for state '" + measurements_by_configuration['name'] - + "' with params: " + str(measurements_by_configuration['parameter'])) + print_info("No refinement necessary for state '" + measurements_by_config['name'] + + "' with params: " + str(measurements_by_config['parameter'])) else: # assume that all measurements of the same param configuration are fundamentally # similar -> calculate penalty for first measurement, use it for all if opt_pen_override is None: - signal = np.array(measurements_by_configuration['offline'][0]['uW']) + signal = np.array(measurements_by_config['offline'][0]['uW']) normed_signal = norm_signal(signal) penalty = calculate_penalty_value(normed_signal, model=opt_model, range_min=opt_range_min, @@ -545,11 +543,11 @@ if __name__ == '__main__': # build arguments for parallel excecution print_info("Starting raw_states calculation.") raw_states_calc_args = [] - for num_measurement, measurement in enumerate(measurements_by_configuration['offline']): + for num_measurement, measurement in enumerate(measurements_by_config['offline']): raw_states_calc_args.append((num_measurement, measurement, penalty, opt_model, opt_jump)) - raw_states_list = [None] * len(measurements_by_configuration['offline']) + raw_states_list = [None] * len(measurements_by_config['offline']) raw_states_res = calc_raw_states(raw_states_calc_args, opt_num_processes) # extracting result and putting it in correct order -> index of raw_states_list # entry still corresponds with index of measurement in measurements_by_states @@ -622,8 +620,6 @@ if __name__ == '__main__': # print_info("Cluster labels:\n" + str(cluster.labels_)) # plt.scatter(value_to_cluster[:, 0], value_to_cluster[:, 1], c=cluster.labels_, cmap='rainbow') # plt.show() - # TODO: Problem: Der Algorithmus nummeriert die Zustände nicht immer gleich... also bspw.: - # mal ist das tatsächliche Transmit mit 1 belabelt und mal mit 3 cluster_labels_list.append((num_trace, cluster.labels_)) num_cluster_list.append((num_trace, cluster.n_clusters_)) i = i + 1 @@ -739,7 +735,7 @@ if __name__ == '__main__': print_info("Confidence of resulting sequence is " + str(confidence) + " while using " + str(num_used_measurements) + "/" + str(len(raw_states_list)) + " measurements.") - print(resulting_sequence) + #print(resulting_sequence) resulting_sequence_list.append((num_config, resulting_sequence)) # TODO: Was jetzt? Hier habe ich jetzt pro Konfiguration eine Zustandsfolge. Daraus Automat # erzeugen. Aber wie? Oder erst parametrisieren? Eigentlich brauche ich vorher die @@ -750,7 +746,10 @@ if __name__ == '__main__': # wie erkenne ich, dass zwei Zustände die selben sind und nicht nur einfach eine ähnliche # Leistungsaufnahme haben?! Vllt Zustände 2D clustern? 1Dim = Leistungsaufnahme, # 2Dim=Dauer? Zumindest innerhalb einer Paramkonfiguration sollte sich die Dauer eines - # Zustands ja nicht mehr ändern. + # Zustands ja nicht mehr ändern. Kann sicherlich immernoch Falschclustering erzeugen... + for num_config, sequence in resulting_sequence_list: + print_info("NO. config:" + str(num_config)) + print_info(sequence) elif ".tar" in opt_filename: # open with dfatool |