bin/Proof_Of_Concept_PELT: Parametrisierung von raw_states sollte eigentlich vernünftig klappen. Für mindestens TX klappt das aber nicht.

author: jfalkenhagen <jfalkenhagen@uos.de> 2020-07-20 23:48:21 +0200
committer: jfalkenhagen <jfalkenhagen@uos.de> 2020-07-20 23:48:21 +0200
commit: bf49cf3ccee8c6d3c91c6a2ac81d7923a35b198e (patch)
tree: 3f07594b5b82ec9b3b678fcab9be3d4ea09f79f8 /bin/Proof_Of_Concept_PELT.py
parent: e15ac967c7e9b1b9f781ee9478f3b1e723d6177a (diff)
1 files changed, 456 insertions, 238 deletions
diff --git a/bin/Proof_Of_Concept_PELT.py b/bin/Proof_Of_Concept_PELT.py
index de47d4a..75cdce6 100644
--- a/bin/Proof_Of_Concept_PELT.py
+++ b/bin/Proof_Of_Concept_PELT.py
@@ -1,4 +1,5 @@
 import json
+import os
 import time
 import sys
 import getopt
@@ -9,7 +10,12 @@ from sklearn.cluster import AgglomerativeClustering
 import matplotlib.pyplot as plt
 import ruptures as rpt
 import numpy as np
+
+from dfatool.functions import analytic
 from dfatool.loader import RawData
+from dfatool import parameters
+from dfatool.model import ParallelParamFit
+from dfatool.utils import by_name_to_by_param
 
 
 # from scipy.cluster.hierarchy import dendrogram, linkage # for graphical display
@@ -396,6 +402,8 @@ if __name__ == '__main__':
         "pen_modifier= "
         "plotting= "
         "refinement_thresh= "
+        "cache_dicts "
+        "cache_loc= "
     )
     opt_filename = None
     opt_verbose = False
@@ -412,6 +420,7 @@ if __name__ == '__main__':
     opt_pen_modifier = None
     opt_plotting = False
     opt_refinement_thresh = None
+    opt_cache_loc = None
     try:
         raw_opts, args = getopt.getopt(sys.argv[1:], "", optspec.split(" "))
 
@@ -495,6 +504,12 @@ if __name__ == '__main__':
             except ValueError as verr:
                 print(verr, file=sys.stderr)
                 sys.exit(-1)
+        if 'cache_dicts' in opt:
+            if 'cache_loc' in opt:
+                opt_cache_loc = opt['cache_loc']
+            else:
+                print_error("If \"cache_dicts\" is set, \"cache_loc\" must be provided.")
+                sys.exit(-1)
     except getopt.GetoptError as err:
         print(err, file=sys.stderr)
         sys.exit(-1)
@@ -506,250 +521,453 @@ if __name__ == '__main__':
             "Will only refine the state which is present in " + opt_filename + " if necessary.")
         with open(opt_filename, 'r') as f:
             configurations = json.load(f)
+
+        # for i in range(0, 7):
+        #     signal = np.array(configurations[i]['offline'][0]['uW'])
+        #     plt.plot(signal)
+        # plt.xlabel('Time [us]')
+        # plt.ylabel('Power [mW]')
+        # plt.show()
+        # sys.exit()
+
         # loop through all traces check if refinement is necessary
-        resulting_sequence_list = []
-        for num_config, measurements_by_config in enumerate(configurations):
-            # loop through all occurrences of the looked at state
-            print_info("Looking at state '" + measurements_by_config['name'] + "' with params: "
-                       + str(measurements_by_config['parameter']))
-            refine = False
-            print_info("Checking if refinement is necessary...")
-            for measurement in measurements_by_config['offline']:
-                # loop through measurements of particular state
-                # an check if state needs refinement
-                signal = measurement['uW']
-                # mean = measurement['uW_mean']
-                if needs_refinement(signal, opt_refinement_thresh) and not refine:
-                    print_info("Refinement is necessary!")
-                    refine = True
-            if not refine:
-                print_info("No refinement necessary for state '" + measurements_by_config['name']
-                           + "' with params: " + str(measurements_by_config['parameter']))
+        # resulting_sequence_list = []
+        # search for param_names, by_param and by_name files
+        by_param_file = None
+        by_name_file = None
+        param_names_file = None
+        if opt_cache_loc is not None:
+            flag = False
+            by_name_loc = os.path.join(opt_cache_loc, "by_name.txt")
+            by_param_loc = os.path.join(opt_cache_loc, "by_param.txt")
+            param_names_loc = os.path.join(opt_cache_loc, "param_names.txt")
+            if os.path.isfile(by_name_loc) and os.path.getsize(by_name_loc) > 0:
+                by_name_file = open(by_name_loc, "r")
+            else:
+                print_error("In " + opt_cache_loc + " is no by_name.txt.")
+                flag = True
+            if os.path.isfile(by_param_loc) and os.path.getsize(by_param_loc) > 0:
+                by_param_file = open(by_param_loc, "r")
+            else:
+                print_error("In " + opt_cache_loc + " is no by_param.txt.")
+                flag = True
+            if os.path.isfile(param_names_loc) and os.path.getsize(param_names_loc) > 0:
+                param_names_file = open(param_names_loc, "r")
             else:
-                # assume that all measurements of the same param configuration are fundamentally
-                # similar -> calculate penalty for first measurement, use it for all
-                if opt_pen_override is None:
-                    signal = np.array(measurements_by_config['offline'][0]['uW'])
-                    normed_signal = norm_signal(signal)
-                    penalty = calculate_penalty_value(normed_signal, model=opt_model,
-                                                      range_min=opt_range_min,
-                                                      range_max=opt_range_max,
-                                                      num_processes=opt_num_processes,
-                                                      jump=opt_jump, S=opt_S,
-                                                      pen_modifier=opt_pen_modifier)
-                    penalty = penalty[0]
+                print_error("In " + opt_cache_loc + " is no param_names.txt.")
+                flag = True
+            if flag:
+                print_info("The cache will be build.")
+
+        if None in (by_param_file, by_name_file, param_names_file):
+            state_durations_by_config = []
+            state_consumptions_by_config = []
+            for num_config, measurements_by_config in enumerate(configurations):
+                # loop through all occurrences of the looked at state
+                print_info("Looking at state '" + measurements_by_config['name'] + "' with params: "
+                           + str(measurements_by_config['parameter']) + "(" + str(num_config + 1) + "/"
+                           + str(len(configurations)) + ")")
+                refine = False
+                print_info("Checking if refinement is necessary...")
+                for measurement in measurements_by_config['offline']:
+                    # loop through measurements of particular state
+                    # an check if state needs refinement
+                    signal = measurement['uW']
+                    # mean = measurement['uW_mean']
+                    if needs_refinement(signal, opt_refinement_thresh) and not refine:
+                        print_info("Refinement is necessary!")
+                        refine = True
+                if not refine:
+                    print_info("No refinement necessary for state '" + measurements_by_config['name']
+                               + "' with params: " + str(measurements_by_config['parameter']))
                 else:
-                    penalty = opt_pen_override
-                # build arguments for parallel excecution
-                print_info("Starting raw_states calculation.")
-                raw_states_calc_args = []
-                for num_measurement, measurement in enumerate(measurements_by_config['offline']):
-                    raw_states_calc_args.append((num_measurement, measurement, penalty,
-                                                 opt_model, opt_jump))
-
-                raw_states_list = [None] * len(measurements_by_config['offline'])
-                raw_states_res = calc_raw_states(raw_states_calc_args, opt_num_processes)
-                # extracting result and putting it in correct order -> index of raw_states_list
-                # entry still corresponds with index of measurement in measurements_by_states
-                # -> If measurements are discarded the correct ones are easily recognized
-                for ret_val in raw_states_res:
-                    num_trace = ret_val[0]
-                    raw_states = ret_val[1]
-                    avg_std = ret_val[2]
-                    change_avg_std = ret_val[3]
-                    # TODO: Wieso gibt mir meine IDE hier eine Warning aus? Der Index müsste doch
-                    #   int sein oder nicht? Es scheint auch vernünftig zu klappen...
-                    raw_states_list[num_trace] = raw_states
-                    print_info("The average standard deviation for the newly found states in "
-                               + "measurement No. " + str(num_trace) + " is " + str(avg_std))
-                    print_info("That is a reduction of " + str(change_avg_std))
-                print_info("Finished raw_states calculation.")
-                num_states_array = [int()] * len(raw_states_list)
-                i = 0
-                for i, x in enumerate(raw_states_list):
-                    num_states_array[i] = len(x)
-                avg_num_states = np.mean(num_states_array)
-                num_states_dev = np.std(num_states_array)
-                print_info("On average " + str(avg_num_states)
-                           + " States have been found. The standard deviation"
-                           + " is " + str(num_states_dev))
-                # TODO: MAGIC NUMBER
-                if num_states_dev > 1:
-                    print_warning("The number of states varies strongly across measurements."
-                                  " Consider choosing a larger value for S or using the "
-                                  "pen_modifier option.")
-                    time.sleep(5)
-                # TODO: Wie bekomme ich da jetzt raus, was die Wahrheit ist?
-                # Einfach Durchschnitt nehmen?
-                # Preliminary decision: Further on only use the traces, which have the most frequent state count
-                counts = np.bincount(num_states_array)
-                num_raw_states = np.argmax(counts)
-                print_info("Choose " + str(num_raw_states) + " as number of raw_states.")
-                i = 0
-                cluster_labels_list = []
-                num_cluster_list = []
-                for num_trace, raw_states in enumerate(raw_states_list):
-                    # iterate through raw states from measurements
-                    if len(raw_states) == num_raw_states:
-                        # build array with power values to cluster these
-                        value_to_cluster = np.zeros((num_raw_states, 2))
-                        j = 0
-                        for s in raw_states:
-                            value_to_cluster[j][0] = s[2]
-                            value_to_cluster[j][1] = 0
-                            j = j + 1
-                        # linked = linkage(value_to_cluster, 'single')
-                        #
-                        # labelList = range(1, 11)
-                        #
-                        # plt.figure(figsize=(10, 7))
-                        # dendrogram(linked,
-                        #            orientation='top',
-                        #            distance_sort='descending',
-                        #            show_leaf_counts=True)
-                        # plt.show()
-                        # TODO: Automatic detection of number of clusters. Aktuell noch MAGIC NUMBER
-                        #   im distance_threshold
-                        cluster = AgglomerativeClustering(n_clusters=None, compute_full_tree=True,
-                                                          affinity='euclidean',
-                                                          linkage='ward',
-                                                          distance_threshold=opt_refinement_thresh * 100)
-                        # cluster = AgglomerativeClustering(n_clusters=5, affinity='euclidean',
-                        #                                   linkage='ward')
-                        cluster.fit_predict(value_to_cluster)
-                        # print_info("Cluster labels:\n" + str(cluster.labels_))
-                        # plt.scatter(value_to_cluster[:, 0], value_to_cluster[:, 1], c=cluster.labels_, cmap='rainbow')
-                        # plt.show()
-                        cluster_labels_list.append((num_trace, cluster.labels_))
-                        num_cluster_list.append((num_trace, cluster.n_clusters_))
-                        i = i + 1
+                    # assume that all measurements of the same param configuration are fundamentally
+                    # similar -> calculate penalty for first measurement, use it for all
+                    if opt_pen_override is None:
+                        signal = np.array(measurements_by_config['offline'][0]['uW'])
+                        normed_signal = norm_signal(signal)
+                        penalty = calculate_penalty_value(normed_signal, model=opt_model,
+                                                          range_min=opt_range_min,
+                                                          range_max=opt_range_max,
+                                                          num_processes=opt_num_processes,
+                                                          jump=opt_jump, S=opt_S,
+                                                          pen_modifier=opt_pen_modifier)
+                        penalty = penalty[0]
                     else:
-                        print_info("Discarding measurement No. " + str(num_trace) + " because it "
-                                   + "did not recognize the number of raw_states correctly.")
-                num_used_measurements = len(raw_states_list)
-                if i != len(raw_states_list):
-                    if i / len(raw_states_list) <= 0.5:
-                        print_warning("Only used " + str(i) + "/" + str(len(raw_states_list))
-                                      + " Measurements for refinement. "
-                                        "Others did not recognize number of states correctly."
-                                        "\nYou should verify the integrity of the measurements.")
+                        penalty = opt_pen_override
+                    # build arguments for parallel excecution
+                    print_info("Starting raw_states calculation.")
+                    raw_states_calc_args = []
+                    for num_measurement, measurement in enumerate(measurements_by_config['offline']):
+                        raw_states_calc_args.append((num_measurement, measurement, penalty,
+                                                     opt_model, opt_jump))
+
+                    raw_states_list = [None] * len(measurements_by_config['offline'])
+                    raw_states_res = calc_raw_states(raw_states_calc_args, opt_num_processes)
+                    # extracting result and putting it in correct order -> index of raw_states_list
+                    # entry still corresponds with index of measurement in measurements_by_states
+                    # -> If measurements are discarded the correct ones are easily recognized
+                    for ret_val in raw_states_res:
+                        num_trace = ret_val[0]
+                        raw_states = ret_val[1]
+                        avg_std = ret_val[2]
+                        change_avg_std = ret_val[3]
+                        # TODO: Wieso gibt mir meine IDE hier eine Warning aus? Der Index müsste doch
+                        #   int sein oder nicht? Es scheint auch vernünftig zu klappen...
+                        raw_states_list[num_trace] = raw_states
+                        print_info("The average standard deviation for the newly found states in "
+                                   + "measurement No. " + str(num_trace) + " is " + str(avg_std))
+                        print_info("That is a reduction of " + str(change_avg_std))
+                    print_info("Finished raw_states calculation.")
+                    num_states_array = [int()] * len(raw_states_list)
+                    i = 0
+                    for i, x in enumerate(raw_states_list):
+                        num_states_array[i] = len(x)
+                    avg_num_states = np.mean(num_states_array)
+                    num_states_dev = np.std(num_states_array)
+                    print_info("On average " + str(avg_num_states)
+                               + " States have been found. The standard deviation"
+                               + " is " + str(num_states_dev))
+                    # TODO: MAGIC NUMBER
+                    if num_states_dev > 1:
+                        print_warning("The number of states varies strongly across measurements."
+                                      " Consider choosing a larger value for S or using the "
+                                      "pen_modifier option.")
+                        time.sleep(5)
+                    # TODO: Wie bekomme ich da jetzt raus, was die Wahrheit ist?
+                    #   Einfach Durchschnitt nehmen?
+                    #   Preliminary decision: Further on only use the traces, which have the most
+                    #   frequent state count
+                    counts = np.bincount(num_states_array)
+                    num_raw_states = np.argmax(counts)
+                    print_info("Choose " + str(num_raw_states) + " as number of raw_states.")
+                    # iterate through all found breakpoints and determine start and end points as well
+                    # as power consumption
+                    states_duration_list = [0] * num_raw_states
+                    states_consumption_list = [0] * num_raw_states
+                    num_used_measurements = 0
+                    for num_trace, raw_states in enumerate(raw_states_list):
+                        if len(raw_states) == num_raw_states:
+                            num_used_measurements = num_used_measurements + 1
+                            # calced_state = (start_time, end_time, mean_power, std_dev)
+                            for num_state, s in enumerate(raw_states):
+                                state_duration = s[1] - s[0]
+                                state_consumption = s[2]
+                                states_duration_list[num_state] = \
+                                    states_duration_list[num_state] + state_duration
+                                states_consumption_list[num_state] = \
+                                    states_consumption_list[num_state] + state_consumption
+                        else:
+                            print_info("Discarding measurement No. " + str(num_trace) + " because it "
+                                       + "did not recognize the number of raw_states correctly.")
+                    for i, x in enumerate(states_duration_list):
+                        states_duration_list[i] = x / num_used_measurements
+                    for i, x in enumerate(states_consumption_list):
+                        states_consumption_list[i] = x / num_used_measurements
+                    if num_used_measurements != len(raw_states_list):
+                        if num_used_measurements / len(raw_states_list) <= 0.5:
+                            print_warning("Only used " + str(num_used_measurements) + "/"
+                                          + str(len(raw_states_list)) + " Measurements for refinement. "
+                                          + "Others did not recognize number of states correctly."
+                                          + "\nYou should verify the integrity of the measurements.")
+                        else:
+                            print_info("Used " + str(num_used_measurements) + "/"
+                                       + str(len(raw_states_list)) + " Measurements for refinement. "
+                                       + "Others did not recognize number of states correctly.")
+                        num_used_measurements = i
+                        # TODO: DEBUG Kram
+                        sys.exit(0)
                     else:
-                        print_info("Used " + str(i) + "/" + str(len(raw_states_list))
-                                   + " Measurements for refinement. "
-                                     "Others did not recognize number of states correctly.")
-                    num_used_measurements = i
-                    # TODO: DEBUG Kram
-                    sys.exit(0)
+                        print_info("Used all available measurements.")
+
+                    state_durations_by_config.append((num_config, states_duration_list))
+                    state_consumptions_by_config.append((num_config, states_consumption_list))
+                    # # TODO:
+                    # if num_config == 6:
+                    #     print("BRECHE AUS")
+                    #     break
+
+            # combine all state durations and consumptions to parametrized model
+
+            # this is only necessary because at this state only linear automatons can be modeled.
+            num_states_array = [int()] * len(state_consumptions_by_config)
+            for i, (_, states_consumption_list) in enumerate(state_consumptions_by_config):
+                num_states_array[i] = len(states_consumption_list)
+            counts = np.bincount(num_states_array)
+            num_raw_states = np.argmax(counts)
+            usable_configs = len(state_consumptions_by_config)
+            # param_list identical for each raw_state
+            # TODO: Kann man die echt einfach rausziehen aus der json? Ich hab sie nicht gefunden...
+            #   Nur für jede Messung. Aber da sind die ja ohnehin identisch.
+            param_list = []
+            param_names = configurations[0]['offline_aggregates']['paramkeys'][0]
+            print_info("param_names: " + str(param_names))
+            for num_config, states_consumption_list in state_consumptions_by_config:
+                if len(states_consumption_list) != num_raw_states:
+                    print_warning("Config No." + str(num_config) + " not usable yet due to different "
+                                  + "number of states. This hints a correlation between parameters and "
+                                  + "the structure of the resulting automaton. This will be possibly be"
+                                  + " supported in a future version of this tool.")
+                    usable_configs = usable_configs - 1
                 else:
-                    print_info("Used all available measurements.")
-
-                num_states = np.argmax(np.bincount([elem[1] for elem in num_cluster_list]))
-                avg_per_state_list = [None] * len(cluster_labels_list)
-                used_clusters = 0
-                for number, (num_trace, labels) in enumerate(cluster_labels_list):
-                    if num_cluster_list[number][1] == num_states:
-                        avg_per_state = [0] * num_states
-                        count_per_state = [0] * num_states
-                        raw_states = raw_states_list[num_trace]
-                        for num_label, label in enumerate(labels):
-                            count_per_state[label] = count_per_state[label] + 1
-                            avg_per_state[label] = avg_per_state[label] + raw_states[num_label][2]
-                        for i, _ in enumerate(avg_per_state):
-                            avg_per_state[i] = avg_per_state[i] / count_per_state[i]
-                        avg_per_state_list[number] = avg_per_state
-                        used_clusters = used_clusters + 1
-                    else:
-                        # hopefully this does not happen regularly
-                        print_info("Discarding measurement " + str(number)
-                                   + " because the clustering yielded not matching results.")
-                        num_used_measurements = num_used_measurements - 1
-                if num_used_measurements == 0:
-                    print_error("Something went terribly wrong. Discarded all measurements.")
-                    # continue
-                    sys.exit(-1)
-                # flattend version for clustering:
-                values_to_cluster = np.zeros((num_states * used_clusters, 2))
-                index = 0
-                for avg_per_state in avg_per_state_list:
-                    if avg_per_state is not None:
-                        for avg in avg_per_state:
-                            values_to_cluster[index][0] = avg
-                            values_to_cluster[index][1] = 0
-                            index = index + 1
-                # plt.scatter(values_to_cluster[:, 0], values_to_cluster[:, 1])
-                # plt.show()
-                cluster = AgglomerativeClustering(n_clusters=num_states)
-                cluster.fit_predict(values_to_cluster)
-                # Aktuell hast du hier ein plattes Array mit labels. Jetzt also das wieder auf die
-                # ursprünglichen Labels abbilden, die dann verändern mit den hier gefundenen Labels.
-                # Alle identischen Zustände haben identische Labels. Dann vllt bei resulting
-                # sequence ausgeben, wie groß die übereinstimmung bei der Stateabfolge ist.
-                new_labels_list = []
-                new_labels = []
-                i = 0
-                for label in cluster.labels_:
-                    new_labels.append(label)
-                    i = i + 1
-                    if i == num_states:
-                        new_labels_list.append(new_labels)
-                        new_labels = []
-                        i = 0
-                # only the selected measurements are present in new_labels.
-                # new_labels_index should not be incremented, if not selected_measurement is skipped
-                new_labels_index = 0
-                # cluster_labels_list contains all measurements -> if measurement is skipped
-                # still increment the index
-                index = 0
-                for elem in avg_per_state_list:
-                    if elem is not None:
-                        for number, label in enumerate(cluster_labels_list[index][1]):
-                            cluster_labels_list[index][1][number] = \
-                                new_labels_list[new_labels_index][label]
-                        new_labels_index = new_labels_index + 1
-                    else:
-                        # override not selected measurement labels to avoid choosing the wrong ones.
-                        for number, label in enumerate(cluster_labels_list[index][1]):
-                            cluster_labels_list[index][1][number] = -1
-                    index = index + 1
-                resulting_sequence = [None] * num_raw_states
-                i = 0
-                confidence = 0
-                for x in resulting_sequence:
-                    j = 0
-                    test_list = []
-                    for arr in [elem[1] for elem in cluster_labels_list]:
-                        if num_cluster_list[j][1] != num_states:
-                            j = j + 1
-                        else:
-                            if -1 in arr:
-                                print_error("Bei Janis beschweren! Fehler beim Umbenennen der"
-                                            " Zustände wahrscheinlich.")
-                                sys.exit(-1)
-                            test_list.append(arr[i])
-                            j = j + 1
-                    bincount = np.bincount(test_list)
-                    resulting_sequence[i] = np.argmax(bincount)
-                    confidence = confidence + bincount[resulting_sequence[i]] / np.sum(bincount)
-                    i = i + 1
-                confidence = confidence / len(resulting_sequence)
-                print_info("Confidence of resulting sequence is " + str(confidence)
-                           + " while using " + str(num_used_measurements) + "/"
-                           + str(len(raw_states_list)) + " measurements.")
-                #print(resulting_sequence)
-                resulting_sequence_list.append((num_config, resulting_sequence))
-        # TODO: Was jetzt? Hier habe ich jetzt pro Konfiguration eine Zustandsfolge. Daraus Automat
-        #   erzeugen. Aber wie? Oder erst parametrisieren? Eigentlich brauche ich vorher die
-        #   Loops. Wie erkenne ich die? Es können beliebig viele Loops an beliebigen Stellen
-        #   auftreten.
-        # TODO: Die Zustandsfolgen werden sich nicht einfach in isomorphe(-einzelne wegfallende bzw.
-        #   hinzukommende Zustände) Automaten übersetzten lassen. Basiert alles auf dem Problem:
-        #   wie erkenne ich, dass zwei Zustände die selben sind und nicht nur einfach eine ähnliche
-        #   Leistungsaufnahme haben?! Vllt Zustände 2D clustern? 1Dim = Leistungsaufnahme,
-        #   2Dim=Dauer? Zumindest innerhalb einer Paramkonfiguration sollte sich die Dauer eines
-        #   Zustands ja nicht mehr ändern. Kann sicherlich immernoch Falschclustering erzeugen...
-        for num_config, sequence in resulting_sequence_list:
-            print_info("NO. config:" + str(num_config))
-            print_info(sequence)
+                    param_list.append(configurations[num_config]['offline_aggregates']['param'][0])
+            print_info("param_list: " + str(param_list))
+
+            if usable_configs == len(state_consumptions_by_config):
+                print_info("All configs usable.")
+            else:
+                print_info("Using only " + str(usable_configs) + " Configs.")
+            by_name = {}
+            for i in range(num_raw_states):
+                consumptions_for_state = []
+                durations_for_state = []
+                for j, (_, states_consumption_list) in enumerate(state_consumptions_by_config):
+                    consumptions_for_state.append(states_consumption_list[i])
+                    durations_for_state.append(state_durations_by_config[j][1][i])
+                name = "state_" + str(i)
+                state_dict = {
+                    "param": param_list,
+                    "power": consumptions_for_state,
+                    "duration": durations_for_state,
+                    "attributes": ["power", "duration"]
+                }
+                by_name[name] = state_dict
+            by_param = by_name_to_by_param(by_name)
+            if opt_cache_loc is not None:
+                by_name_loc = os.path.join(opt_cache_loc, "by_name.txt")
+                by_param_loc = os.path.join(opt_cache_loc, "by_param.txt")
+                param_names_loc = os.path.join(opt_cache_loc, "param_names.txt")
+                f = open(by_name_loc, "w")
+                f.write(str(by_name))
+                f.close()
+                f = open(by_param_loc, "w")
+                f.write(str(by_param))
+                f.close()
+                f = open(param_names_loc, "w")
+                f.write(str(param_names))
+                f.close()
+        else:
+            by_name_text = str(by_name_file.read())
+            by_name = eval(by_name_text)
+            by_param_text = str(by_param_file.read())
+            by_param = eval(by_param_text)
+            param_names_text = str(param_names_file.read())
+            param_names = eval(param_names_text)
+
+        # t = 0
+        # last_pow = 0
+        # for key in by_name.keys():
+        #     end_t = t + np.mean(by_name[key]["duration"])
+        #     power = np.mean(by_name[key]["power"])
+        #     plt.vlines(t, min(last_pow, power), max(last_pow, power))
+        #     plt.hlines(power, t, end_t)
+        #     t = end_t
+        #     last_pow = power
+        # plt.show()
+        stats = parameters.ParamStats(by_name, by_param, param_names, dict())
+        paramfit = ParallelParamFit(by_param)
+        for state_name in by_name.keys():
+            for num_param, param_name in enumerate(param_names):
+                if stats.depends_on_param(state_name, "power", param_name):
+                    paramfit.enqueue(state_name, "power", num_param, param_name)
+                if stats.depends_on_param(state_name, "duration", param_name):
+                    paramfit.enqueue(state_name, "duration", num_param, param_name)
+                print_info("State " + state_name + "s power depends on param " + param_name + ":" +
+                           str(stats.depends_on_param(state_name, "power", param_name))
+                           )
+                print_info("State " + state_name + "s duration depends on param " + param_name + ":"
+                           + str(stats.depends_on_param(state_name, "duration", param_name))
+                           )
+        paramfit.fit()
+        fit_res_dur_list = []
+        fit_res_pow_list = []
+        for state_name in by_name.keys():
+            fit_power = paramfit.get_result(state_name, "power")
+            fit_duration = paramfit.get_result(state_name, "duration")
+            combined_fit_power = analytic.function_powerset(fit_power, param_names, 0)
+            combined_fit_duration = analytic.function_powerset(fit_duration, param_names, 0)
+            combined_fit_power.fit(by_param, state_name, "power")
+            if not combined_fit_power.fit_success:
+                print_warning("Fitting(power) for state " + state_name + " was not succesful!")
+            combined_fit_duration.fit(by_param, state_name, "duration")
+            if not combined_fit_duration.fit_success:
+                print_warning("Fitting(duration) for state " + state_name + " was not succesful!")
+            fit_res_pow_list.append(combined_fit_power)
+            fit_res_dur_list.append(combined_fit_duration)
+
+
+        #         TODO: removed clustering (temporarily), since it provided too much dificultys
+        #           at the current state
+        #         i = 0
+        #         cluster_labels_list = []
+        #         num_cluster_list = []
+        #         for num_trace, raw_states in enumerate(raw_states_list):
+        #             # iterate through raw states from measurements
+        #             if len(raw_states) == num_raw_states:
+        #                 # build array with power values to cluster these
+        #                 value_to_cluster = np.zeros((num_raw_states, 2))
+        #                 j = 0
+        #                 for s in raw_states:
+        #                     value_to_cluster[j][0] = s[2]
+        #                     value_to_cluster[j][1] = 0
+        #                     j = j + 1
+        #                 # linked = linkage(value_to_cluster, 'single')
+        #                 #
+        #                 # labelList = range(1, 11)
+        #                 #
+        #                 # plt.figure(figsize=(10, 7))
+        #                 # dendrogram(linked,
+        #                 #            orientation='top',
+        #                 #            distance_sort='descending',
+        #                 #            show_leaf_counts=True)
+        #                 # plt.show()
+        #                 # TODO: Automatic detection of number of clusters. Aktuell noch MAGIC NUMBER
+        #                 #   im distance_threshold
+        #                 cluster = AgglomerativeClustering(n_clusters=None, compute_full_tree=True,
+        #                                                   affinity='euclidean',
+        #                                                   linkage='ward',
+        #                                                   distance_threshold=opt_refinement_thresh * 100)
+        #                 # cluster = AgglomerativeClustering(n_clusters=5, affinity='euclidean',
+        #                 #                                   linkage='ward')
+        #                 cluster.fit_predict(value_to_cluster)
+        #                 # print_info("Cluster labels:\n" + str(cluster.labels_))
+        #                 # plt.scatter(value_to_cluster[:, 0], value_to_cluster[:, 1], c=cluster.labels_, cmap='rainbow')
+        #                 # plt.show()
+        #                 cluster_labels_list.append((num_trace, cluster.labels_))
+        #                 num_cluster_list.append((num_trace, cluster.n_clusters_))
+        #                 i = i + 1
+        #             else:
+        #                 print_info("Discarding measurement No. " + str(num_trace) + " because it "
+        #                            + "did not recognize the number of raw_states correctly.")
+        #         num_used_measurements = len(raw_states_list)
+        #         if i != len(raw_states_list):
+        #             if i / len(raw_states_list) <= 0.5:
+        #                 print_warning("Only used " + str(i) + "/" + str(len(raw_states_list))
+        #                               + " Measurements for refinement. "
+        #                                 "Others did not recognize number of states correctly."
+        #                                 "\nYou should verify the integrity of the measurements.")
+        #             else:
+        #                 print_info("Used " + str(i) + "/" + str(len(raw_states_list))
+        #                            + " Measurements for refinement. "
+        #                              "Others did not recognize number of states correctly.")
+        #             num_used_measurements = i
+        #             # TODO: DEBUG Kram
+        #             sys.exit(0)
+        #         else:
+        #             print_info("Used all available measurements.")
+        #
+        #         num_states = np.argmax(np.bincount([elem[1] for elem in num_cluster_list]))
+        #         avg_per_state_list = [None] * len(cluster_labels_list)
+        #         used_clusters = 0
+        #         for number, (num_trace, labels) in enumerate(cluster_labels_list):
+        #             if num_cluster_list[number][1] == num_states:
+        #                 avg_per_state = [0] * num_states
+        #                 count_per_state = [0] * num_states
+        #                 raw_states = raw_states_list[num_trace]
+        #                 for num_label, label in enumerate(labels):
+        #                     count_per_state[label] = count_per_state[label] + 1
+        #                     avg_per_state[label] = avg_per_state[label] + raw_states[num_label][2]
+        #                 for i, _ in enumerate(avg_per_state):
+        #                     avg_per_state[i] = avg_per_state[i] / count_per_state[i]
+        #                 avg_per_state_list[number] = avg_per_state
+        #                 used_clusters = used_clusters + 1
+        #             else:
+        #                 # hopefully this does not happen regularly
+        #                 print_info("Discarding measurement " + str(number)
+        #                            + " because the clustering yielded not matching results.")
+        #                 num_used_measurements = num_used_measurements - 1
+        #         if num_used_measurements == 0:
+        #             print_error("Something went terribly wrong. Discarded all measurements.")
+        #             # continue
+        #             sys.exit(-1)
+        #         # flattend version for clustering:
+        #         values_to_cluster = np.zeros((num_states * used_clusters, 2))
+        #         index = 0
+        #         for avg_per_state in avg_per_state_list:
+        #             if avg_per_state is not None:
+        #                 for avg in avg_per_state:
+        #                     values_to_cluster[index][0] = avg
+        #                     values_to_cluster[index][1] = 0
+        #                     index = index + 1
+        #         # plt.scatter(values_to_cluster[:, 0], values_to_cluster[:, 1])
+        #         # plt.show()
+        #         cluster = AgglomerativeClustering(n_clusters=num_states)
+        #         cluster.fit_predict(values_to_cluster)
+        #         # Aktuell hast du hier ein plattes Array mit labels. Jetzt also das wieder auf die
+        #         # ursprünglichen Labels abbilden, die dann verändern mit den hier gefundenen Labels.
+        #         # Alle identischen Zustände haben identische Labels. Dann vllt bei resulting
+        #         # sequence ausgeben, wie groß die übereinstimmung bei der Stateabfolge ist.
+        #         new_labels_list = []
+        #         new_labels = []
+        #         i = 0
+        #         for label in cluster.labels_:
+        #             new_labels.append(label)
+        #             i = i + 1
+        #             if i == num_states:
+        #                 new_labels_list.append(new_labels)
+        #                 new_labels = []
+        #                 i = 0
+        #         # only the selected measurements are present in new_labels.
+        #         # new_labels_index should not be incremented, if not selected_measurement is skipped
+        #         new_labels_index = 0
+        #         # cluster_labels_list contains all measurements -> if measurement is skipped
+        #         # still increment the index
+        #         index = 0
+        #         for elem in avg_per_state_list:
+        #             if elem is not None:
+        #                 for number, label in enumerate(cluster_labels_list[index][1]):
+        #                     cluster_labels_list[index][1][number] = \
+        #                         new_labels_list[new_labels_index][label]
+        #                 new_labels_index = new_labels_index + 1
+        #             else:
+        #                 # override not selected measurement labels to avoid choosing the wrong ones.
+        #                 for number, label in enumerate(cluster_labels_list[index][1]):
+        #                     cluster_labels_list[index][1][number] = -1
+        #             index = index + 1
+        #         resulting_sequence = [None] * num_raw_states
+        #         i = 0
+        #         confidence = 0
+        #         for x in resulting_sequence:
+        #             j = 0
+        #             test_list = []
+        #             for arr in [elem[1] for elem in cluster_labels_list]:
+        #                 if num_cluster_list[j][1] != num_states:
+        #                     j = j + 1
+        #                 else:
+        #                     if -1 in arr:
+        #                         print_error("Bei Janis beschweren! Fehler beim Umbenennen der"
+        #                                     " Zustände wahrscheinlich.")
+        #                         sys.exit(-1)
+        #                     test_list.append(arr[i])
+        #                     j = j + 1
+        #             bincount = np.bincount(test_list)
+        #             resulting_sequence[i] = np.argmax(bincount)
+        #             confidence = confidence + bincount[resulting_sequence[i]] / np.sum(bincount)
+        #             i = i + 1
+        #         confidence = confidence / len(resulting_sequence)
+        #         print_info("Confidence of resulting sequence is " + str(confidence)
+        #                    + " while using " + str(num_used_measurements) + "/"
+        #                    + str(len(raw_states_list)) + " measurements.")
+        #         #print(resulting_sequence)
+        #         resulting_sequence_list.append((num_config, resulting_sequence))
+        # # TODO: Was jetzt? Hier habe ich jetzt pro Konfiguration eine Zustandsfolge. Daraus Automat
+        # #   erzeugen. Aber wie? Oder erst parametrisieren? Eigentlich brauche ich vorher die
+        # #   Loops. Wie erkenne ich die? Es können beliebig viele Loops an beliebigen Stellen
+        # #   auftreten.
+        # # TODO: Die Zustandsfolgen werden sich nicht einfach in isomorphe(-einzelne wegfallende bzw.
+        # #   hinzukommende Zustände) Automaten übersetzten lassen. Basiert alles auf dem Problem:
+        # #   wie erkenne ich, dass zwei Zustände die selben sind und nicht nur einfach eine ähnliche
+        # #   Leistungsaufnahme haben?! Vllt Zustände 2D clustern? 1Dim = Leistungsaufnahme,
+        # #   2Dim=Dauer? Zumindest innerhalb einer Paramkonfiguration sollte sich die Dauer eines
+        # #   Zustands ja nicht mehr ändern. Kann sicherlich immernoch Falschclustering erzeugen...
+        # for num_config, sequence in resulting_sequence_list:
+        #     print_info("NO. config:" + str(num_config))
+        #     print_info(sequence)
+        #
+        #
+        #
+        #
+
 
     elif ".tar" in opt_filename:
         # open with dfatool
author	jfalkenhagen <jfalkenhagen@uos.de>	2020-07-20 23:48:21 +0200
committer	jfalkenhagen <jfalkenhagen@uos.de>	2020-07-20 23:48:21 +0200
commit	bf49cf3ccee8c6d3c91c6a2ac81d7923a35b198e (patch)
tree	3f07594b5b82ec9b3b678fcab9be3d4ea09f79f8 /bin/Proof_Of_Concept_PELT.py
parent	e15ac967c7e9b1b9f781ee9478f3b1e723d6177a (diff)