summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Friesel <daniel.friesel@uos.de>2021-03-05 15:35:44 +0100
committerDaniel Friesel <daniel.friesel@uos.de>2021-03-05 15:35:44 +0100
commit813c1508c200c6fc64a671b521af2eec522e9487 (patch)
treeae6940a2ce6f20f76e6d4faaad5583848a155423
parentebbdac3a505b0f02d194a44780375699b987aaf6 (diff)
promote sub-state models to a first-class modeling citizen
get_fitted_sub is no longer present, sub-state models are part of get_fitted now
-rwxr-xr-xbin/analyze-archive.py43
-rw-r--r--lib/functions.py58
-rw-r--r--lib/model.py87
3 files changed, 97 insertions, 91 deletions
diff --git a/bin/analyze-archive.py b/bin/analyze-archive.py
index d7e6a59..a9ee5cf 100755
--- a/bin/analyze-archive.py
+++ b/bin/analyze-archive.py
@@ -47,6 +47,7 @@ from dfatool.functions import (
gplearn_to_function,
SplitFunction,
AnalyticFunction,
+ SubstateFunction,
StaticFunction,
)
from dfatool.model import PTAModel
@@ -866,15 +867,6 @@ if __name__ == "__main__":
safe_functions_enabled=safe_functions_enabled
)
- if args.with_substates:
- sub_model, sub_info = model.get_fitted_sub(
- safe_functions_enabled=safe_functions_enabled,
- state_duration=raw_data.setup_by_fileno[0]["state_duration"] * 1e3,
- )
-
- # substate_model = model.get_substates()
- # print(model.assess(substate_model, ref=model.sc_by_name))
-
if "paramdetection" in show_models or "all" in show_models:
for state in model.states_and_transitions:
for attribute in model.attributes(state):
@@ -931,6 +923,8 @@ if __name__ == "__main__":
print_splitinfo(
model.parameters, info, f"{state:10s} {attribute:15s}"
)
+ elif type(info) is SubstateFunction:
+ print(f"{state:10s} {attribute:15s}: Substate (TODO)")
for trans in model.transitions:
for attribute in model.attributes(trans):
info = param_info(trans, attribute)
@@ -940,6 +934,8 @@ if __name__ == "__main__":
print_splitinfo(
model.parameters, info, f"{trans:10s} {attribute:15s}"
)
+ elif type(info) is SubstateFunction:
+ print(f"{state:10s} {attribute:15s}: Substate (TODO)")
if args.with_substates:
for submodel in model.submodel_by_name.values():
sub_param_model, sub_param_info = submodel.get_fitted()
@@ -949,14 +945,21 @@ if __name__ == "__main__":
if type(info) is AnalyticFunction:
print(
"{:10s} {:15s}: {}".format(
- substate, subattribute, info.function.model_function
- )
- )
- print(
- "{:10s} {:15s} {}".format(
- "", "", info.function.model_args
+ substate, subattribute, info.model_function
)
)
+ print("{:10s} {:15s} {}".format("", "", info.model_args))
+
+ if args.with_substates:
+ for state in model.states:
+ if (
+ type(model.attr_by_name[state]["power"].model_function)
+ is SubstateFunction
+ ):
+ # sub-state models need to know the duration of the state / transition. only needed for eval.
+ model.attr_by_name[state]["power"].model_function.static_duration = (
+ raw_data.setup_by_fileno[0]["state_duration"] * 1e3
+ )
if xv_method == "montecarlo":
analytic_quality = xv.montecarlo(lambda m: m.get_fitted()[0], xv_count)
@@ -965,9 +968,6 @@ if __name__ == "__main__":
else:
analytic_quality = model.assess(param_model)
- if args.with_substates:
- sub_quality = model.assess(sub_model)
-
if "tex" in show_models or "tex" in show_quality:
print_text_model_data(
model,
@@ -1013,13 +1013,6 @@ if __name__ == "__main__":
[None, sub_param_info, None],
)
- if ("table" in show_quality or "all" in show_quality) and args.with_substates:
- model_quality_table(
- ["parameterized", "sub-states", "LUT"],
- [analytic_quality, sub_quality, lut_quality],
- [param_info, sub_info, None],
- )
-
if "overall" in show_quality or "all" in show_quality:
print("overall state static/param/lut MAE assuming equal state distribution:")
print(
diff --git a/lib/functions.py b/lib/functions.py
index 52d110b..4e0e8f7 100644
--- a/lib/functions.py
+++ b/lib/functions.py
@@ -284,7 +284,6 @@ class SplitFunction(ModelFunction):
ret.update(
{
"type": "split",
- "value": self.value,
"paramIndex": self.param_index,
"child": dict([[k, v.to_json()] for k, v in self.child.items()]),
}
@@ -303,6 +302,62 @@ class SplitFunction(ModelFunction):
return f"SplitFunction<{self.value}, param_index={self.param_index}>"
+class SubstateFunction(ModelFunction):
+ def __init__(self, value, sequence_by_count, count_model, sub_model):
+ super().__init__(value)
+ self.sequence_by_count = sequence_by_count
+ self.count_model = count_model
+ self.sub_model = sub_model
+
+ # only used by analyze-archive model quality evaluation. Not serialized.
+ self.static_duration = None
+
+ def is_predictable(self, param_list):
+ substate_count = round(self.count_model.eval(param_list))
+ return substate_count in self.sequence_by_count
+
+ def eval(self, param_list, duration=None):
+ substate_count = round(self.count_model.eval(param_list))
+ cumulative_energy = 0
+ total_duration = 0
+ substate_model, _ = self.sub_model.get_fitted()
+ substate_sequence = self.sequence_by_count[substate_count]
+ for i, sub_name in enumerate(substate_sequence):
+ sub_duration = substate_model(sub_name, "duration", param=param_list)
+ sub_power = substate_model(sub_name, "power", param=param_list)
+
+ if i == substate_count - 1:
+ if duration is not None:
+ sub_duration = duration - total_duration
+ elif self.static_duration is not None:
+ sub_duration = self.static_duration - total_duration
+
+ cumulative_energy += sub_power * sub_duration
+ total_duration += sub_duration
+
+ return cumulative_energy / total_duration
+
+ def to_json(self):
+ ret = super().to_json()
+ ret.update(
+ {
+ "type": "substate",
+ "sequence": self.sequence_by_count,
+ "countModel": self.count_model.to_json(),
+ "subModel": self.sub_model.to_json(),
+ }
+ )
+ return ret
+
+ @classmethod
+ def from_json(cls, data):
+ assert data["type"] == "substate"
+ raise NotImplementedError
+
+ def __repr__(self):
+ return "SubstateFunction"
+
+
class AnalyticFunction(ModelFunction):
"""
A multi-dimensional model function, generated from a string, which can be optimized using regression.
@@ -500,7 +555,6 @@ class AnalyticFunction(ModelFunction):
ret.update(
{
"type": "analytic",
- "value": self.value,
"functionStr": self.model_function,
"argCount": self._num_args,
"parameterNames": self._parameter_names,
diff --git a/lib/model.py b/lib/model.py
index 4c4c226..527a19e 100644
--- a/lib/model.py
+++ b/lib/model.py
@@ -4,7 +4,7 @@ import logging
import numpy as np
import os
from .automata import PTA, ModelAttribute
-from .functions import StaticFunction
+from .functions import StaticFunction, SubstateFunction
from .parameters import ParallelParamStats
from .paramfit import ParallelParamFit
from .utils import soft_cast_int, by_name_to_by_param, regression_measures
@@ -439,6 +439,7 @@ class PTAModel(AnalyticModel):
from .pelt import PELT
self.pelt = PELT(**pelt)
+ # must run before _compute_stats so that _compute_stats produces a "substate_count" model
self.find_substates()
else:
self.pelt = None
@@ -448,6 +449,9 @@ class PTAModel(AnalyticModel):
self._compute_stats(by_name)
if self.pelt is not None:
+ # cluster_substates uses self.attr_by_name[*]["power"].param_values, which is set by _compute_stats
+ # cluster_substates relies on fitted "substate_count" models, which are generated by get_fitted.
+ self.get_fitted()
# cluster_substates alters submodel_by_name, so we cannot use its keys() iterator.
names_with_submodel = list(self.submodel_by_name.keys())
for name in names_with_submodel:
@@ -465,60 +469,6 @@ class PTAModel(AnalyticModel):
for key in elem["attributes"]:
elem[key] = np.array(elem[key])
- def get_fitted_sub(
- self, use_mean=False, safe_functions_enabled=False, state_duration=None
- ):
-
- param_model_getter, param_info_getter = self.get_fitted(
- use_mean=use_mean, safe_functions_enabled=safe_functions_enabled
- )
-
- def model_getter(name, key, **kwargs):
- if key != "power":
- return param_model_getter(name, key, **kwargs)
-
- try:
- substate_count = round(param_model_getter(name, "substate_count"))
- except KeyError:
- return param_model_getter(name, key, **kwargs)
- if substate_count == 1:
- return param_model_getter(name, key, **kwargs)
-
- cumulative_energy = 0
- total_duration = 0
- substate_model, _ = self.submodel_by_name[name].get_fitted()
- substate_sequence = self.substate_sequence_by_nc[(name, substate_count)]
- for i, sub_name in enumerate(substate_sequence):
- sub_duration = substate_model(sub_name, "duration", **kwargs)
- sub_power = substate_model(sub_name, "power", **kwargs)
-
- if i == substate_count - 1:
- if "duration" in kwargs:
- sub_duration = kwargs["duration"] - total_duration
- elif name in self.states and state_duration is not None:
- sub_duration = state_duration - total_duration
-
- cumulative_energy += sub_power * sub_duration
- total_duration += sub_duration
-
- return cumulative_energy / total_duration
-
- def info_getter(name, key, **kwargs):
- if key != "power":
- return None
-
- try:
- substate_count = round(param_model_getter(name, "substate_count"))
- except KeyError:
- return None
- if substate_count == 1:
- return None
-
- # TODO
- return True
-
- return model_getter, info_getter
-
# This heuristic is very similar to the "function is not much better than
# median" checks in get_fitted. So far, doing it here as well is mostly
# a performance and not an algorithm quality decision.
@@ -669,13 +619,10 @@ class PTAModel(AnalyticModel):
# Schwankungen, die beim separaten Fitting zu unterschiedlichen Funktionen führen würden.
p_attr = self.attr_by_name[p_name]["power"]
p_params = list(set(map(tuple, p_attr.param_values)))
- p_param_index = dict()
- for i, p_param in enumerate(p_params):
- p_param_index[p_param] = i
sub_attr_by_function = dict()
static = submodel.get_static()
lut = submodel.get_param_lut(fallback=True)
- values_to_cluster = np.zeros((len(submodel.names), len(p_param_index)))
+ values_to_cluster = np.zeros((len(submodel.names), len(p_params)))
for i, name in enumerate(submodel.names):
for j, param in enumerate(p_params):
values_to_cluster[i, j] = lut(name, "duration", param=param)
@@ -699,7 +646,7 @@ class PTAModel(AnalyticModel):
if len(cl_substates) == 1:
clusters.append(cl_substates)
continue
- values_to_cluster = np.zeros((len(cl_substates), len(p_param_index)))
+ values_to_cluster = np.zeros((len(cl_substates), len(p_params)))
for i, name in enumerate(cl_substates):
for j, param in enumerate(p_params):
values_to_cluster[i, j] = lut(name, "power", param=param)
@@ -742,10 +689,22 @@ class PTAModel(AnalyticModel):
"power": powers,
}
self.submodel_by_name[p_name] = PTAModel(by_name, self.parameters, dict())
- for k in self.substate_sequence_by_nc.keys():
- self.substate_sequence_by_nc[k] = list(
- map(lambda x: new_subname_by_old[x], self.substate_sequence_by_nc[k])
- )
+ sequence_by_count = dict()
+ for name, count in self.substate_sequence_by_nc.keys():
+ if name == p_name:
+ sequence_by_count[int(count)] = list(
+ map(
+ lambda x: new_subname_by_old[x],
+ self.substate_sequence_by_nc[(name, count)],
+ )
+ )
+
+ self.attr_by_name[p_name]["power"].model_function = SubstateFunction(
+ self.attr_by_name[p_name]["power"].get_static(),
+ sequence_by_count,
+ self.attr_by_name[p_name]["substate_count"].model_function,
+ self.submodel_by_name[p_name],
+ )
# data[0] = [first sub-state, second sub-state, ...]
# data[1] = [first sub-state, second sub-state, ...]