diff options
author | jfalkenhagen <jfalkenhagen@uos.de> | 2020-08-09 15:11:42 +0200 |
---|---|---|
committer | jfalkenhagen <jfalkenhagen@uos.de> | 2020-08-09 15:11:42 +0200 |
commit | 61fb6094a33c4855c763f1925e61aec90294daa3 (patch) | |
tree | 7aa5cef892dd8e3c786687d175cda27877310108 /bin | |
parent | bf49cf3ccee8c6d3c91c6a2ac81d7923a35b198e (diff) |
Parametrisierung scheint vernünftig zu klappen. Vermutlich fertig.
Diffstat (limited to 'bin')
-rw-r--r-- | bin/Proof_Of_Concept_PELT.py | 637 |
1 files changed, 418 insertions, 219 deletions
diff --git a/bin/Proof_Of_Concept_PELT.py b/bin/Proof_Of_Concept_PELT.py index 75cdce6..40c405d 100644 --- a/bin/Proof_Of_Concept_PELT.py +++ b/bin/Proof_Of_Concept_PELT.py @@ -14,13 +14,14 @@ import numpy as np from dfatool.functions import analytic from dfatool.loader import RawData from dfatool import parameters -from dfatool.model import ParallelParamFit +from dfatool.model import ParallelParamFit, PTAModel from dfatool.utils import by_name_to_by_param # from scipy.cluster.hierarchy import dendrogram, linkage # for graphical display -# py bin\Proof_Of_Concept_PELT.py --filename="..\data\TX.json" --jump=1 --pen_override=10 --refinement_thresh=100 +# py bin\Proof_Of_Concept_PELT.py --filename="..\data\TX.json" --jump=1 --pen_override=28 --refinement_thresh=100 +# py bin\Proof_Of_Concept_PELT.py --filename="..\data\TX.json" --jump=1 --pen_override=28 --refinement_thresh=100 --cache_dicts --cache_loc="..\data\TX_cache" def plot_data_from_json(filename, trace_num, x_axis, y_axis): @@ -294,7 +295,7 @@ def calculate_penalty_value(signal, model="l1", jump=5, min_dist=2, range_min=0, # raw_states_calc_args.append((num_measurement, measurement, penalty, opt_model # , opt_jump)) -def calc_raw_states_func(num_trace, measurement, penalty, model, jump): +def calc_raw_states_func(num_measurement, measurement, penalty, model, jump): signal = np.array(measurement['uW']) normed_signal = norm_signal(signal) bkpts = calc_pelt(normed_signal, penalty, model=model, jump=jump) @@ -325,7 +326,7 @@ def calc_raw_states_func(num_trace, measurement, penalty, model, jump): # print_info("The average standard deviation for the newly found states is " # + str(new_avg_std)) # print_info("That is a reduction of " + str(change_avg_std)) - return num_trace, calced_states, new_avg_std, change_avg_std + return num_measurement, calced_states, new_avg_std, change_avg_std def calc_raw_states(arg_list, num_processes=8): @@ -382,6 +383,27 @@ def norm_signal(signal): return normed_signal +def norm_values_to_cluster(values_to_cluster): + new_vals = np.array(values_to_cluster) + num_samples = len(values_to_cluster) + num_params = len(values_to_cluster[0]) + for i in range(num_params): + param_vals = [] + for sample in new_vals: + param_vals.append(sample[i]) + max_val = np.max(np.abs(param_vals)) + for num_sample, sample in enumerate(new_vals): + values_to_cluster[num_sample][i] = sample[i] / max_val + return new_vals + + +def get_state_num(state_name, distinct_states): + for state_num, states in enumerate(distinct_states): + if state_name in states: + return state_num + return -1 + + if __name__ == '__main__': # OPTION RECOGNITION opt = dict() @@ -536,6 +558,7 @@ if __name__ == '__main__': by_param_file = None by_name_file = None param_names_file = None + from_cache = False if opt_cache_loc is not None: flag = False by_name_loc = os.path.join(opt_cache_loc, "by_name.txt") @@ -558,6 +581,12 @@ if __name__ == '__main__': flag = True if flag: print_info("The cache will be build.") + else: + print_warning("THE OPTION \"cache_dicts\" IS FOR DEBUGGING PURPOSES ONLY! " + "\nDO NOT USE FOR REGULAR APPLICATIONS!" + "\nThe script will not run to the end properly." + "\nNo final parametrization will be done.") + from_cache = True if None in (by_param_file, by_name_file, param_names_file): state_durations_by_config = [] @@ -565,7 +594,8 @@ if __name__ == '__main__': for num_config, measurements_by_config in enumerate(configurations): # loop through all occurrences of the looked at state print_info("Looking at state '" + measurements_by_config['name'] + "' with params: " - + str(measurements_by_config['parameter']) + "(" + str(num_config + 1) + "/" + + str(measurements_by_config['parameter']) + "(" + str( + num_config + 1) + "/" + str(len(configurations)) + ")") refine = False print_info("Checking if refinement is necessary...") @@ -578,8 +608,9 @@ if __name__ == '__main__': print_info("Refinement is necessary!") refine = True if not refine: - print_info("No refinement necessary for state '" + measurements_by_config['name'] - + "' with params: " + str(measurements_by_config['parameter'])) + print_info( + "No refinement necessary for state '" + measurements_by_config['name'] + + "' with params: " + str(measurements_by_config['parameter'])) else: # assume that all measurements of the same param configuration are fundamentally # similar -> calculate penalty for first measurement, use it for all @@ -598,7 +629,8 @@ if __name__ == '__main__': # build arguments for parallel excecution print_info("Starting raw_states calculation.") raw_states_calc_args = [] - for num_measurement, measurement in enumerate(measurements_by_config['offline']): + for num_measurement, measurement in enumerate( + measurements_by_config['offline']): raw_states_calc_args.append((num_measurement, measurement, penalty, opt_model, opt_jump)) @@ -608,15 +640,16 @@ if __name__ == '__main__': # entry still corresponds with index of measurement in measurements_by_states # -> If measurements are discarded the correct ones are easily recognized for ret_val in raw_states_res: - num_trace = ret_val[0] + num_measurement = ret_val[0] raw_states = ret_val[1] avg_std = ret_val[2] change_avg_std = ret_val[3] # TODO: Wieso gibt mir meine IDE hier eine Warning aus? Der Index müsste doch # int sein oder nicht? Es scheint auch vernünftig zu klappen... - raw_states_list[num_trace] = raw_states + raw_states_list[num_measurement] = raw_states print_info("The average standard deviation for the newly found states in " - + "measurement No. " + str(num_trace) + " is " + str(avg_std)) + + "measurement No. " + str(num_measurement) + " is " + str( + avg_std)) print_info("That is a reduction of " + str(change_avg_std)) print_info("Finished raw_states calculation.") num_states_array = [int()] * len(raw_states_list) @@ -643,37 +676,46 @@ if __name__ == '__main__': print_info("Choose " + str(num_raw_states) + " as number of raw_states.") # iterate through all found breakpoints and determine start and end points as well # as power consumption - states_duration_list = [0] * num_raw_states - states_consumption_list = [0] * num_raw_states + num_measurements = len(raw_states_list) + states_duration_list = [list()] * num_raw_states + states_consumption_list = [list()] * num_raw_states + for num_elem, _ in enumerate(states_duration_list): + states_duration_list[num_elem] = [0] * num_measurements + states_consumption_list[num_elem] = [0] * num_measurements num_used_measurements = 0 - for num_trace, raw_states in enumerate(raw_states_list): + for num_measurement, raw_states in enumerate(raw_states_list): if len(raw_states) == num_raw_states: num_used_measurements = num_used_measurements + 1 - # calced_state = (start_time, end_time, mean_power, std_dev) for num_state, s in enumerate(raw_states): - state_duration = s[1] - s[0] - state_consumption = s[2] - states_duration_list[num_state] = \ - states_duration_list[num_state] + state_duration - states_consumption_list[num_state] = \ - states_consumption_list[num_state] + state_consumption + states_duration_list[num_state][num_measurement] = s[1] - s[0] + states_consumption_list[num_state][num_measurement] = s[2] + # calced_state = (start_time, end_time, mean_power, std_dev) + # for num_state, s in enumerate(raw_states): + # state_duration = s[1] - s[0] + # state_consumption = s[2] + # states_duration_list[num_state] = \ + # states_duration_list[num_state] + state_duration + # states_consumption_list[num_state] = \ + # states_consumption_list[num_state] + state_consumption else: - print_info("Discarding measurement No. " + str(num_trace) + " because it " - + "did not recognize the number of raw_states correctly.") - for i, x in enumerate(states_duration_list): - states_duration_list[i] = x / num_used_measurements - for i, x in enumerate(states_consumption_list): - states_consumption_list[i] = x / num_used_measurements + print_info("Discarding measurement No. " + str(num_measurement) + + " because it did not recognize the number of " + "raw_states correctly.") + # for i, x in enumerate(states_duration_list): + # states_duration_list[i] = x / num_used_measurements + # for i, x in enumerate(states_consumption_list): + # states_consumption_list[i] = x / num_used_measurements if num_used_measurements != len(raw_states_list): if num_used_measurements / len(raw_states_list) <= 0.5: print_warning("Only used " + str(num_used_measurements) + "/" - + str(len(raw_states_list)) + " Measurements for refinement. " + + str( + len(raw_states_list)) + " Measurements for refinement. " + "Others did not recognize number of states correctly." + "\nYou should verify the integrity of the measurements.") else: print_info("Used " + str(num_used_measurements) + "/" - + str(len(raw_states_list)) + " Measurements for refinement. " - + "Others did not recognize number of states correctly.") + + str(len(raw_states_list)) + " Measurements for refinement." + + " Others did not recognize number of states correctly.") num_used_measurements = i # TODO: DEBUG Kram sys.exit(0) @@ -697,20 +739,19 @@ if __name__ == '__main__': num_raw_states = np.argmax(counts) usable_configs = len(state_consumptions_by_config) # param_list identical for each raw_state - # TODO: Kann man die echt einfach rausziehen aus der json? Ich hab sie nicht gefunden... - # Nur für jede Messung. Aber da sind die ja ohnehin identisch. param_list = [] param_names = configurations[0]['offline_aggregates']['paramkeys'][0] print_info("param_names: " + str(param_names)) for num_config, states_consumption_list in state_consumptions_by_config: if len(states_consumption_list) != num_raw_states: - print_warning("Config No." + str(num_config) + " not usable yet due to different " - + "number of states. This hints a correlation between parameters and " - + "the structure of the resulting automaton. This will be possibly be" - + " supported in a future version of this tool.") + print_warning( + "Config No." + str(num_config) + " not usable yet due to different " + + "number of states. This hints a correlation between parameters and " + + "the structure of the resulting automaton. This will be possibly" + + " supported in a future version of this tool.") usable_configs = usable_configs - 1 else: - param_list.append(configurations[num_config]['offline_aggregates']['param'][0]) + param_list.extend(configurations[num_config]['offline_aggregates']['param']) print_info("param_list: " + str(param_list)) if usable_configs == len(state_consumptions_by_config): @@ -722,16 +763,16 @@ if __name__ == '__main__': consumptions_for_state = [] durations_for_state = [] for j, (_, states_consumption_list) in enumerate(state_consumptions_by_config): - consumptions_for_state.append(states_consumption_list[i]) - durations_for_state.append(state_durations_by_config[j][1][i]) - name = "state_" + str(i) + consumptions_for_state.extend(states_consumption_list[i]) + durations_for_state.extend(state_durations_by_config[j][1][i]) + state_name = "state_" + str(i) state_dict = { "param": param_list, "power": consumptions_for_state, "duration": durations_for_state, "attributes": ["power", "duration"] } - by_name[name] = state_dict + by_name[state_name] = state_dict by_param = by_name_to_by_param(by_name) if opt_cache_loc is not None: by_name_loc = os.path.join(opt_cache_loc, "by_name.txt") @@ -779,8 +820,8 @@ if __name__ == '__main__': + str(stats.depends_on_param(state_name, "duration", param_name)) ) paramfit.fit() - fit_res_dur_list = [] - fit_res_pow_list = [] + fit_res_dur_dict = {} + fit_res_pow_dict = {} for state_name in by_name.keys(): fit_power = paramfit.get_result(state_name, "power") fit_duration = paramfit.get_result(state_name, "duration") @@ -792,182 +833,340 @@ if __name__ == '__main__': combined_fit_duration.fit(by_param, state_name, "duration") if not combined_fit_duration.fit_success: print_warning("Fitting(duration) for state " + state_name + " was not succesful!") - fit_res_pow_list.append(combined_fit_power) - fit_res_dur_list.append(combined_fit_duration) - - - # TODO: removed clustering (temporarily), since it provided too much dificultys - # at the current state - # i = 0 - # cluster_labels_list = [] - # num_cluster_list = [] - # for num_trace, raw_states in enumerate(raw_states_list): - # # iterate through raw states from measurements - # if len(raw_states) == num_raw_states: - # # build array with power values to cluster these - # value_to_cluster = np.zeros((num_raw_states, 2)) - # j = 0 - # for s in raw_states: - # value_to_cluster[j][0] = s[2] - # value_to_cluster[j][1] = 0 - # j = j + 1 - # # linked = linkage(value_to_cluster, 'single') - # # - # # labelList = range(1, 11) - # # - # # plt.figure(figsize=(10, 7)) - # # dendrogram(linked, - # # orientation='top', - # # distance_sort='descending', - # # show_leaf_counts=True) - # # plt.show() - # # TODO: Automatic detection of number of clusters. Aktuell noch MAGIC NUMBER - # # im distance_threshold - # cluster = AgglomerativeClustering(n_clusters=None, compute_full_tree=True, - # affinity='euclidean', - # linkage='ward', - # distance_threshold=opt_refinement_thresh * 100) - # # cluster = AgglomerativeClustering(n_clusters=5, affinity='euclidean', - # # linkage='ward') - # cluster.fit_predict(value_to_cluster) - # # print_info("Cluster labels:\n" + str(cluster.labels_)) - # # plt.scatter(value_to_cluster[:, 0], value_to_cluster[:, 1], c=cluster.labels_, cmap='rainbow') - # # plt.show() - # cluster_labels_list.append((num_trace, cluster.labels_)) - # num_cluster_list.append((num_trace, cluster.n_clusters_)) - # i = i + 1 - # else: - # print_info("Discarding measurement No. " + str(num_trace) + " because it " - # + "did not recognize the number of raw_states correctly.") - # num_used_measurements = len(raw_states_list) - # if i != len(raw_states_list): - # if i / len(raw_states_list) <= 0.5: - # print_warning("Only used " + str(i) + "/" + str(len(raw_states_list)) - # + " Measurements for refinement. " - # "Others did not recognize number of states correctly." - # "\nYou should verify the integrity of the measurements.") - # else: - # print_info("Used " + str(i) + "/" + str(len(raw_states_list)) - # + " Measurements for refinement. " - # "Others did not recognize number of states correctly.") - # num_used_measurements = i - # # TODO: DEBUG Kram - # sys.exit(0) - # else: - # print_info("Used all available measurements.") - # - # num_states = np.argmax(np.bincount([elem[1] for elem in num_cluster_list])) - # avg_per_state_list = [None] * len(cluster_labels_list) - # used_clusters = 0 - # for number, (num_trace, labels) in enumerate(cluster_labels_list): - # if num_cluster_list[number][1] == num_states: - # avg_per_state = [0] * num_states - # count_per_state = [0] * num_states - # raw_states = raw_states_list[num_trace] - # for num_label, label in enumerate(labels): - # count_per_state[label] = count_per_state[label] + 1 - # avg_per_state[label] = avg_per_state[label] + raw_states[num_label][2] - # for i, _ in enumerate(avg_per_state): - # avg_per_state[i] = avg_per_state[i] / count_per_state[i] - # avg_per_state_list[number] = avg_per_state - # used_clusters = used_clusters + 1 - # else: - # # hopefully this does not happen regularly - # print_info("Discarding measurement " + str(number) - # + " because the clustering yielded not matching results.") - # num_used_measurements = num_used_measurements - 1 - # if num_used_measurements == 0: - # print_error("Something went terribly wrong. Discarded all measurements.") - # # continue - # sys.exit(-1) - # # flattend version for clustering: - # values_to_cluster = np.zeros((num_states * used_clusters, 2)) - # index = 0 - # for avg_per_state in avg_per_state_list: - # if avg_per_state is not None: - # for avg in avg_per_state: - # values_to_cluster[index][0] = avg - # values_to_cluster[index][1] = 0 - # index = index + 1 - # # plt.scatter(values_to_cluster[:, 0], values_to_cluster[:, 1]) - # # plt.show() - # cluster = AgglomerativeClustering(n_clusters=num_states) - # cluster.fit_predict(values_to_cluster) - # # Aktuell hast du hier ein plattes Array mit labels. Jetzt also das wieder auf die - # # ursprünglichen Labels abbilden, die dann verändern mit den hier gefundenen Labels. - # # Alle identischen Zustände haben identische Labels. Dann vllt bei resulting - # # sequence ausgeben, wie groß die übereinstimmung bei der Stateabfolge ist. - # new_labels_list = [] - # new_labels = [] - # i = 0 - # for label in cluster.labels_: - # new_labels.append(label) - # i = i + 1 - # if i == num_states: - # new_labels_list.append(new_labels) - # new_labels = [] - # i = 0 - # # only the selected measurements are present in new_labels. - # # new_labels_index should not be incremented, if not selected_measurement is skipped - # new_labels_index = 0 - # # cluster_labels_list contains all measurements -> if measurement is skipped - # # still increment the index - # index = 0 - # for elem in avg_per_state_list: - # if elem is not None: - # for number, label in enumerate(cluster_labels_list[index][1]): - # cluster_labels_list[index][1][number] = \ - # new_labels_list[new_labels_index][label] - # new_labels_index = new_labels_index + 1 - # else: - # # override not selected measurement labels to avoid choosing the wrong ones. - # for number, label in enumerate(cluster_labels_list[index][1]): - # cluster_labels_list[index][1][number] = -1 - # index = index + 1 - # resulting_sequence = [None] * num_raw_states - # i = 0 - # confidence = 0 - # for x in resulting_sequence: - # j = 0 - # test_list = [] - # for arr in [elem[1] for elem in cluster_labels_list]: - # if num_cluster_list[j][1] != num_states: - # j = j + 1 - # else: - # if -1 in arr: - # print_error("Bei Janis beschweren! Fehler beim Umbenennen der" - # " Zustände wahrscheinlich.") - # sys.exit(-1) - # test_list.append(arr[i]) - # j = j + 1 - # bincount = np.bincount(test_list) - # resulting_sequence[i] = np.argmax(bincount) - # confidence = confidence + bincount[resulting_sequence[i]] / np.sum(bincount) - # i = i + 1 - # confidence = confidence / len(resulting_sequence) - # print_info("Confidence of resulting sequence is " + str(confidence) - # + " while using " + str(num_used_measurements) + "/" - # + str(len(raw_states_list)) + " measurements.") - # #print(resulting_sequence) - # resulting_sequence_list.append((num_config, resulting_sequence)) - # # TODO: Was jetzt? Hier habe ich jetzt pro Konfiguration eine Zustandsfolge. Daraus Automat - # # erzeugen. Aber wie? Oder erst parametrisieren? Eigentlich brauche ich vorher die - # # Loops. Wie erkenne ich die? Es können beliebig viele Loops an beliebigen Stellen - # # auftreten. - # # TODO: Die Zustandsfolgen werden sich nicht einfach in isomorphe(-einzelne wegfallende bzw. - # # hinzukommende Zustände) Automaten übersetzten lassen. Basiert alles auf dem Problem: - # # wie erkenne ich, dass zwei Zustände die selben sind und nicht nur einfach eine ähnliche - # # Leistungsaufnahme haben?! Vllt Zustände 2D clustern? 1Dim = Leistungsaufnahme, - # # 2Dim=Dauer? Zumindest innerhalb einer Paramkonfiguration sollte sich die Dauer eines - # # Zustands ja nicht mehr ändern. Kann sicherlich immernoch Falschclustering erzeugen... - # for num_config, sequence in resulting_sequence_list: - # print_info("NO. config:" + str(num_config)) - # print_info(sequence) - # - # - # - # - + fit_res_pow_dict[state_name] = combined_fit_power + fit_res_dur_dict[state_name] = combined_fit_duration + # only raw_states with the same number of function parameters can be similar + num_param_pow_dict = {} + num_param_dur_dict = {} + for state_name in by_name.keys(): + model_function = str(fit_res_pow_dict[state_name].model_function) + model_args = fit_res_pow_dict[state_name].model_args + num_param_pow_dict[state_name] = len(model_args) + for num_arg, arg in enumerate(model_args): + replace_string = "regression_arg(" + str(num_arg) + ")" + model_function = model_function.replace(replace_string, str(arg)) + print_info("Power-Function for state " + state_name + ": " + + model_function) + for state_name in by_name.keys(): + model_function = str(fit_res_dur_dict[state_name].model_function) + model_args = fit_res_dur_dict[state_name].model_args + num_param_dur_dict[state_name] = len(model_args) + for num_arg, arg in enumerate(model_args): + replace_string = "regression_arg(" + str(num_arg) + ")" + model_function = model_function.replace(replace_string, str(arg)) + print_info("Duration-Function for state " + state_name + ": " + + model_function) + similar_raw_state_buckets = {} + for state_name in by_name.keys(): + pow_model_function = str(fit_res_pow_dict[state_name].model_function) + dur_model_function = str(fit_res_dur_dict[state_name].model_function) + key_tuple = (pow_model_function, dur_model_function) + if key_tuple not in similar_raw_state_buckets: + similar_raw_state_buckets[key_tuple] = [] + similar_raw_state_buckets[key_tuple].append(state_name) + + # cluster for each Key-Tuple using the function parameters + distinct_states = [] + for key_tuple in similar_raw_state_buckets.keys(): + print_info("Key-Tuple " + str(key_tuple) + ": " + + str(similar_raw_state_buckets[key_tuple])) + similar_states = similar_raw_state_buckets[key_tuple] + if len(similar_states) > 1: + # functions are identical -> num_params is identical + num_params = num_param_dur_dict[similar_states[0]] + num_param_pow_dict[ + similar_states[0]] + values_to_cluster = np.zeros((len(similar_states), num_params)) + for num_state, state_name in enumerate(similar_states): + dur_params = fit_res_dur_dict[state_name].model_args + pow_params = fit_res_pow_dict[state_name].model_args + j = 0 + for param in pow_params: + values_to_cluster[num_state][j] = param + j = j + 1 + for param in dur_params: + values_to_cluster[num_state][j] = param + j = j + 1 + normed_vals_to_cluster = norm_values_to_cluster(values_to_cluster) + cluster = AgglomerativeClustering(n_clusters=None, compute_full_tree=True, + affinity='euclidean', + linkage='ward', + # TODO: Magic Number. Beim Evaluieren finetunen + distance_threshold=1) + cluster.fit_predict(values_to_cluster) + cluster_labels = cluster.labels_ + print_info("Cluster labels:\n" + str(cluster_labels)) + if cluster.n_clusters_ > 1: + # more than one distinct state found + distinct_state_dict = {} + for num_state, label in enumerate(cluster_labels): + if label not in distinct_state_dict.keys(): + distinct_state_dict[label] = [] + distinct_state_dict[label].append(similar_states[num_state]) + for distinct_state_key in distinct_state_dict.keys(): + distinct_states.append(distinct_state_dict[distinct_state_key]) + else: + distinct_states.append(similar_states) + else: + distinct_states.append(similar_states) + for num_state, distinct_state in enumerate(distinct_states): + print("State " + str(num_state) + ": " + str(distinct_state)) + num_raw_states = len(by_name.keys()) + resulting_sequence = [int] * num_raw_states + for i in range(num_raw_states): + state_name = "state_" + str(i) + state_num = get_state_num(state_name, distinct_states) + if state_num == -1: + print_error("Critical Error when creating the resulting sequence. raw_state state_" + + str(i) + " could not be mapped to a state.") + sys.exit(-1) + resulting_sequence[i] = state_num + print("Resulting sequence is: " + str(resulting_sequence)) + # if from_cache: + # print_warning( + # "YOU USED THE OPTION \"cache_dicts\". THIS IS FOR DEBUGGING PURPOSES ONLY!" + # "\nTHE SCRIPT WILL NOW STOP PREMATURELY," + # "SINCE DATA FOR FURTHER COMPUTATION IS MISSING!") + # sys.exit(0) + + new_by_name = {} + for num_state, distinct_state in enumerate(distinct_states): + state_name = "State_" + str(num_state) + consumptions_for_state = [] + durations_for_state = [] + param_list = [] + for raw_state in distinct_state: + original_state_dict = by_name[raw_state] + param_list.extend(original_state_dict["param"]) + consumptions_for_state.extend(original_state_dict["power"]) + durations_for_state.extend(original_state_dict["duration"]) + new_state_dict = { + "param": param_list, + "power": consumptions_for_state, + "duration": durations_for_state, + "attributes": ["power", "duration"] + } + new_by_name[state_name] = new_state_dict + new_by_param = by_name_to_by_param(new_by_name) + new_stats = parameters.ParamStats(new_by_name, new_by_param, param_names, dict()) + new_paramfit = ParallelParamFit(new_by_param) + for state_name in new_by_name.keys(): + for num_param, param_name in enumerate(param_names): + if new_stats.depends_on_param(state_name, "power", param_name): + new_paramfit.enqueue(state_name, "power", num_param, param_name) + if new_stats.depends_on_param(state_name, "duration", param_name): + new_paramfit.enqueue(state_name, "duration", num_param, param_name) + print_info("State " + state_name + "s power depends on param " + param_name + ":" + + str(new_stats.depends_on_param(state_name, "power", param_name)) + ) + print_info("State " + state_name + "s duration depends on param " + param_name + ":" + + str(new_stats.depends_on_param(state_name, "duration", param_name)) + ) + new_paramfit.fit() + new_fit_res_dur_dict = {} + new_fit_res_pow_dict = {} + for state_name in new_by_name.keys(): + fit_power = new_paramfit.get_result(state_name, "power") + fit_duration = new_paramfit.get_result(state_name, "duration") + combined_fit_power = analytic.function_powerset(fit_power, param_names, 0) + combined_fit_duration = analytic.function_powerset(fit_duration, param_names, 0) + combined_fit_power.fit(new_by_param, state_name, "power") + if not combined_fit_power.fit_success: + print_warning("Fitting(power) for state " + state_name + " was not succesful!") + combined_fit_duration.fit(new_by_param, state_name, "duration") + if not combined_fit_duration.fit_success: + print_warning("Fitting(duration) for state " + state_name + " was not succesful!") + new_fit_res_pow_dict[state_name] = combined_fit_power + new_fit_res_dur_dict[state_name] = combined_fit_duration + for state_name in new_by_name.keys(): + model_function = str(new_fit_res_pow_dict[state_name].model_function) + model_args = new_fit_res_pow_dict[state_name].model_args + for num_arg, arg in enumerate(model_args): + replace_string = "regression_arg(" + str(num_arg) + ")" + model_function = model_function.replace(replace_string, str(arg)) + print("Power-Function for state " + state_name + ": " + + model_function) + for state_name in new_by_name.keys(): + model_function = str(new_fit_res_dur_dict[state_name].model_function) + model_args = new_fit_res_dur_dict[state_name].model_args + for num_arg, arg in enumerate(model_args): + replace_string = "regression_arg(" + str(num_arg) + ")" + model_function = model_function.replace(replace_string, str(arg)) + print("Duration-Function for state " + state_name + ": " + + model_function) + model = PTAModel(by_name, param_names, dict()) + + + # TODO: removed clustering (temporarily), since it provided too much dificultys + # at the current state + # i = 0 + # cluster_labels_list = [] + # num_cluster_list = [] + # for num_trace, raw_states in enumerate(raw_states_list): + # # iterate through raw states from measurements + # if len(raw_states) == num_raw_states: + # # build array with power values to cluster these + # value_to_cluster = np.zeros((num_raw_states, 2)) + # j = 0 + # for s in raw_states: + # value_to_cluster[j][0] = s[2] + # value_to_cluster[j][1] = 0 + # j = j + 1 + # # linked = linkage(value_to_cluster, 'single') + # # + # # labelList = range(1, 11) + # # + # # plt.figure(figsize=(10, 7)) + # # dendrogram(linked, + # # orientation='top', + # # distance_sort='descending', + # # show_leaf_counts=True) + # # plt.show() + # # TODO: Automatic detection of number of clusters. Aktuell noch MAGIC NUMBER + # # im distance_threshold + # cluster = AgglomerativeClustering(n_clusters=None, compute_full_tree=True, + # affinity='euclidean', + # linkage='ward', + # distance_threshold=opt_refinement_thresh * 100) + # # cluster = AgglomerativeClustering(n_clusters=5, affinity='euclidean', + # # linkage='ward') + # cluster.fit_predict(value_to_cluster) + # # print_info("Cluster labels:\n" + str(cluster.labels_)) + # # plt.scatter(value_to_cluster[:, 0], value_to_cluster[:, 1], c=cluster.labels_, cmap='rainbow') + # # plt.show() + # cluster_labels_list.append((num_trace, cluster.labels_)) + # num_cluster_list.append((num_trace, cluster.n_clusters_)) + # i = i + 1 + # else: + # print_info("Discarding measurement No. " + str(num_trace) + " because it " + # + "did not recognize the number of raw_states correctly.") + # num_used_measurements = len(raw_states_list) + # if i != len(raw_states_list): + # if i / len(raw_states_list) <= 0.5: + # print_warning("Only used " + str(i) + "/" + str(len(raw_states_list)) + # + " Measurements for refinement. " + # "Others did not recognize number of states correctly." + # "\nYou should verify the integrity of the measurements.") + # else: + # print_info("Used " + str(i) + "/" + str(len(raw_states_list)) + # + " Measurements for refinement. " + # "Others did not recognize number of states correctly.") + # num_used_measurements = i + # # TODO: DEBUG Kram + # sys.exit(0) + # else: + # print_info("Used all available measurements.") + # + # num_states = np.argmax(np.bincount([elem[1] for elem in num_cluster_list])) + # avg_per_state_list = [None] * len(cluster_labels_list) + # used_clusters = 0 + # for number, (num_trace, labels) in enumerate(cluster_labels_list): + # if num_cluster_list[number][1] == num_states: + # avg_per_state = [0] * num_states + # count_per_state = [0] * num_states + # raw_states = raw_states_list[num_trace] + # for num_label, label in enumerate(labels): + # count_per_state[label] = count_per_state[label] + 1 + # avg_per_state[label] = avg_per_state[label] + raw_states[num_label][2] + # for i, _ in enumerate(avg_per_state): + # avg_per_state[i] = avg_per_state[i] / count_per_state[i] + # avg_per_state_list[number] = avg_per_state + # used_clusters = used_clusters + 1 + # else: + # # hopefully this does not happen regularly + # print_info("Discarding measurement " + str(number) + # + " because the clustering yielded not matching results.") + # num_used_measurements = num_used_measurements - 1 + # if num_used_measurements == 0: + # print_error("Something went terribly wrong. Discarded all measurements.") + # # continue + # sys.exit(-1) + # # flattend version for clustering: + # values_to_cluster = np.zeros((num_states * used_clusters, 2)) + # index = 0 + # for avg_per_state in avg_per_state_list: + # if avg_per_state is not None: + # for avg in avg_per_state: + # values_to_cluster[index][0] = avg + # values_to_cluster[index][1] = 0 + # index = index + 1 + # # plt.scatter(values_to_cluster[:, 0], values_to_cluster[:, 1]) + # # plt.show() + # cluster = AgglomerativeClustering(n_clusters=num_states) + # cluster.fit_predict(values_to_cluster) + # # Aktuell hast du hier ein plattes Array mit labels. Jetzt also das wieder auf die + # # ursprünglichen Labels abbilden, die dann verändern mit den hier gefundenen Labels. + # # Alle identischen Zustände haben identische Labels. Dann vllt bei resulting + # # sequence ausgeben, wie groß die übereinstimmung bei der Stateabfolge ist. + # new_labels_list = [] + # new_labels = [] + # i = 0 + # for label in cluster.labels_: + # new_labels.append(label) + # i = i + 1 + # if i == num_states: + # new_labels_list.append(new_labels) + # new_labels = [] + # i = 0 + # # only the selected measurements are present in new_labels. + # # new_labels_index should not be incremented, if not selected_measurement is skipped + # new_labels_index = 0 + # # cluster_labels_list contains all measurements -> if measurement is skipped + # # still increment the index + # index = 0 + # for elem in avg_per_state_list: + # if elem is not None: + # for number, label in enumerate(cluster_labels_list[index][1]): + # cluster_labels_list[index][1][number] = \ + # new_labels_list[new_labels_index][label] + # new_labels_index = new_labels_index + 1 + # else: + # # override not selected measurement labels to avoid choosing the wrong ones. + # for number, label in enumerate(cluster_labels_list[index][1]): + # cluster_labels_list[index][1][number] = -1 + # index = index + 1 + # resulting_sequence = [None] * num_raw_states + # i = 0 + # confidence = 0 + # for x in resulting_sequence: + # j = 0 + # test_list = [] + # for arr in [elem[1] for elem in cluster_labels_list]: + # if num_cluster_list[j][1] != num_states: + # j = j + 1 + # else: + # if -1 in arr: + # print_error("Bei Janis beschweren! Fehler beim Umbenennen der" + # " Zustände wahrscheinlich.") + # sys.exit(-1) + # test_list.append(arr[i]) + # j = j + 1 + # bincount = np.bincount(test_list) + # resulting_sequence[i] = np.argmax(bincount) + # confidence = confidence + bincount[resulting_sequence[i]] / np.sum(bincount) + # i = i + 1 + # confidence = confidence / len(resulting_sequence) + # print_info("Confidence of resulting sequence is " + str(confidence) + # + " while using " + str(num_used_measurements) + "/" + # + str(len(raw_states_list)) + " measurements.") + # #print(resulting_sequence) + # resulting_sequence_list.append((num_config, resulting_sequence)) + # # TODO: Was jetzt? Hier habe ich jetzt pro Konfiguration eine Zustandsfolge. Daraus Automat + # # erzeugen. Aber wie? Oder erst parametrisieren? Eigentlich brauche ich vorher die + # # Loops. Wie erkenne ich die? Es können beliebig viele Loops an beliebigen Stellen + # # auftreten. + # # TODO: Die Zustandsfolgen werden sich nicht einfach in isomorphe(-einzelne wegfallende bzw. + # # hinzukommende Zustände) Automaten übersetzten lassen. Basiert alles auf dem Problem: + # # wie erkenne ich, dass zwei Zustände die selben sind und nicht nur einfach eine ähnliche + # # Leistungsaufnahme haben?! Vllt Zustände 2D clustern? 1Dim = Leistungsaufnahme, + # # 2Dim=Dauer? Zumindest innerhalb einer Paramkonfiguration sollte sich die Dauer eines + # # Zustands ja nicht mehr ändern. Kann sicherlich immernoch Falschclustering erzeugen... + # for num_config, sequence in resulting_sequence_list: + # print_info("NO. config:" + str(num_config)) + # print_info(sequence) + # + # + # + # elif ".tar" in opt_filename: # open with dfatool |