summaryrefslogtreecommitdiff
path: root/lib/pelt.py
diff options
context:
space:
mode:
authorDaniel Friesel <daniel.friesel@uos.de>2021-02-12 14:00:59 +0100
committerDaniel Friesel <daniel.friesel@uos.de>2021-02-12 14:00:59 +0100
commit1385c61eca928ac213c0faa7198f3d7161500652 (patch)
tree8435002ac67bb6a3967913b9921769b0d4ecfdc2 /lib/pelt.py
parentac6d9225347af3756e61fae32d0c32d4ce73a479 (diff)
PELT: use variable penalty for benchmark repetations
Previously, the first measurement of each by_param entry determined its penalty, and all subsequenc measurements in it were analyzed with this penalty. This lead to some of them coming up with a different number of sub-states and being ignored. Now, all by_param entries are passed to the kneedle / plateau algorithm, and each gets its own penalty. In most cases, this leads to all entries rporting the same number of sub-states, so far less need to be ignored.
Diffstat (limited to 'lib/pelt.py')
-rw-r--r--lib/pelt.py73
1 files changed, 42 insertions, 31 deletions
diff --git a/lib/pelt.py b/lib/pelt.py
index 500ae13..c7bd179 100644
--- a/lib/pelt.py
+++ b/lib/pelt.py
@@ -46,6 +46,9 @@ class PELT:
self.with_multiprocessing = True
self.__dict__.update(kwargs)
+ self.jump = int(self.jump)
+ self.min_dist = int(self.min_dist)
+
if os.getenv("DFATOOL_PELT_MODEL"):
# https://centre-borelli.github.io/ruptures-docs/user-guide/costs/costl1/
self.model = os.getenv("DFATOOL_PELT_MODEL")
@@ -89,10 +92,11 @@ class PELT:
signal,
)
- if self.num_samples is not None and len(signal) > self.num_samples:
- self.jump = len(signal) // int(self.num_samples)
- else:
- self.jump = 1
+ if self.num_samples is not None:
+ if len(signal) > self.num_samples:
+ self.jump = len(signal) // int(self.num_samples)
+ else:
+ self.jump = 1
if self.algo == "dynp":
# https://centre-borelli.github.io/ruptures-docs/user-guide/detection/dynp/
@@ -172,49 +176,56 @@ class PELT:
prev_val = num_changepoints
middle_of_plateau = longest_start + (longest_start - longest_start) // 2
changepoints = np.array(changepoints_by_penalty[middle_of_plateau])
- return middle_of_plateau, changepoints
+ return middle_of_plateau, changepoints_by_penalty
def get_changepoints(self, signal, **kwargs):
- _, changepoints = self.get_penalty_and_changepoints(signal, **kwargs)
- return changepoints
+ penalty, changepoints_by_penalty = self.get_penalty_and_changepoints(
+ signal, **kwargs
+ )
+ return changepoints_by_penalty[penalty]
def get_penalty(self, signal, **kwargs):
penalty, _ = self.get_penalty_and_changepoints(signal, **kwargs)
return penalty
- def calc_raw_states(self, timestamps, signals, penalty, opt_model=None):
+ def calc_raw_states(
+ self,
+ timestamps,
+ signals,
+ changepoints_by_signal,
+ num_changepoints,
+ opt_model=None,
+ ):
"""
Calculate substates for signals (assumed to be long to a single parameter configuration).
:returns: List of substates with duration and mean power: [(substate 1 duration, substate 1 power), ...]
"""
- # imported here as ruptures is only used for changepoint detection.
- # This way, dfatool can be used without having ruptures installed as
- # long as --pelt isn't active.
- import ruptures
-
substate_data = list()
+ substate_counts = list()
+ usable_measurements = list()
+ expected_substate_count = num_changepoints
+
+ for i, changepoints in enumerate(changepoints_by_signal):
+ substates = list()
+ start_index = 0
+ end_index = 0
+ # calc metrics for all states
+ for changepoint in changepoints:
+ # start_index of state is end_index of previous one
+ # (Transitions are instantaneous)
+ start_index = end_index
+ end_index = changepoint - 1
+ substate = (start_index, end_index)
+ substates.append(substate)
+
+ substate_counts.append(len(substates))
+ if len(substates) == expected_substate_count:
+ usable_measurements.append((i, substates))
- raw_states_calc_args = list()
- for num_measurement, measurement in enumerate(signals):
- normed_signal = self.norm_signal(measurement)
- algo = ruptures.Pelt(
- model=self.model, jump=self.jump, min_size=self.min_dist
- ).fit(normed_signal)
- raw_states_calc_args.append((num_measurement, algo, penalty))
-
- raw_states_list = [None] * len(signals)
- with Pool() as pool:
- raw_states_res = pool.starmap(PELT_get_raw_states, raw_states_calc_args)
-
- substate_counts = list(map(lambda x: len(x[1]), raw_states_res))
- expected_substate_count = np.argmax(np.bincount(substate_counts))
- usable_measurements = list(
- filter(lambda x: len(x[1]) == expected_substate_count, raw_states_res)
- )
logger.debug(
- f" There are {expected_substate_count} substates (std = {np.std(substate_counts)}, {len(usable_measurements)}/{len(raw_states_res)} results are usable)"
+ f"{len(usable_measurements)} of {len(changepoints_by_signal)} measurements have {expected_substate_count} sub-states"
)
for i in range(expected_substate_count):