summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/behaviour.py388
-rw-r--r--lib/cli.py25
-rw-r--r--lib/functions.py25
-rw-r--r--lib/loader/plain.py136
-rw-r--r--lib/model.py88
-rw-r--r--lib/parameters.py15
-rw-r--r--lib/paramfit.py8
-rw-r--r--lib/utils.py44
8 files changed, 699 insertions, 30 deletions
diff --git a/lib/behaviour.py b/lib/behaviour.py
new file mode 100644
index 0000000..136a55e
--- /dev/null
+++ b/lib/behaviour.py
@@ -0,0 +1,388 @@
+#!/usr/bin/env python3
+
+import logging
+from . import utils
+from .model import AnalyticModel
+from . import functions as df
+
+logger = logging.getLogger(__name__)
+
+
+class SDKBehaviourModel:
+
+ def __init__(self, observations, annotations):
+
+ meta_observations = list()
+ delta_by_name = dict()
+ delta_param_by_name = dict()
+ is_loop = dict()
+
+ for annotation in annotations:
+ # annotation.start.param may be incomplete, for instance in cases
+ # where DPUs are allocated before the input file is loadeed (and
+ # thus before the problem size is known).
+ # However, annotation.end.param may also differ from annotation.start.param (it should not, but that's how some benchmarks roll).
+ # So, we use annotation.start.param if it has the same keys as annotation.end.param, and annotation.end.param otherwise
+ if sorted(annotation.start.param.keys()) == sorted(
+ annotation.end.param.keys()
+ ):
+ am_tt_param_names = sorted(annotation.start.param.keys())
+ else:
+ am_tt_param_names = sorted(annotation.end.param.keys())
+ if annotation.name not in delta_by_name:
+ delta_by_name[annotation.name] = dict()
+ delta_param_by_name[annotation.name] = dict()
+ _, _, meta_obs, _is_loop = self.learn_pta(
+ observations,
+ annotation,
+ delta_by_name[annotation.name],
+ delta_param_by_name[annotation.name],
+ )
+ meta_observations += meta_obs
+ is_loop.update(_is_loop)
+
+ self.am_tt_param_names = am_tt_param_names
+ self.delta_by_name = delta_by_name
+ self.delta_param_by_name = delta_param_by_name
+ self.meta_observations = meta_observations
+ self.is_loop = is_loop
+
+ self.build_transition_guards()
+
+ def build_transition_guards(self):
+ self.transition_guard = dict()
+ for name in sorted(self.delta_by_name.keys()):
+ for t_from, t_to_set in self.delta_by_name[name].items():
+ i_to_transition = dict()
+ delta_param_sets = list()
+ to_names = list()
+ transition_guard = dict()
+
+ if len(t_to_set) > 1:
+ am_tt_by_name = {
+ name: {
+ "attributes": [t_from],
+ "param": list(),
+ t_from: list(),
+ },
+ }
+ for i, t_to in enumerate(sorted(t_to_set)):
+ for param in self.delta_param_by_name[name][(t_from, t_to)]:
+ am_tt_by_name[name]["param"].append(
+ utils.param_dict_to_list(
+ utils.param_str_to_dict(param),
+ self.am_tt_param_names,
+ )
+ )
+ am_tt_by_name[name][t_from].append(i)
+ i_to_transition[i] = t_to
+ am = AnalyticModel(
+ am_tt_by_name, self.am_tt_param_names, force_tree=True
+ )
+ model, info = am.get_fitted()
+ if type(info(name, t_from)) is df.SplitFunction:
+ flat_model = info(name, t_from).flatten()
+ else:
+ flat_model = list()
+ logger.warning(
+ f"Model for {name} {t_from} is {info(name, t_from)}, expected SplitFunction"
+ )
+
+ for prefix, output in flat_model:
+ transition_name = i_to_transition[int(output)]
+ if transition_name not in transition_guard:
+ transition_guard[transition_name] = list()
+ transition_guard[transition_name].append(prefix)
+
+ self.transition_guard[t_from] = transition_guard
+
+ def get_trace(self, name, param_dict):
+ delta = self.delta_by_name[name]
+ current_state = "__init__"
+ trace = [current_state]
+ states_seen = set()
+ while current_state != "__end__":
+ next_states = delta[current_state]
+
+ states_seen.add(current_state)
+ next_states = list(filter(lambda q: q not in states_seen, next_states))
+
+ if len(next_states) == 0:
+ raise RuntimeError(
+ f"get_trace({name}, {param_dict}): found infinite loop at {trace}"
+ )
+
+ if len(next_states) > 1 and self.transition_guard[current_state]:
+ matching_next_states = list()
+ for candidate in next_states:
+ for condition in self.transition_guard[current_state][candidate]:
+ valid = True
+ for key, value in condition:
+ if param_dict[key] != value:
+ valid = False
+ break
+ if valid:
+ matching_next_states.append(candidate)
+ break
+ next_states = matching_next_states
+
+ if len(next_states) == 0:
+ raise RuntimeError(
+ f"get_trace({name}, {param_dict}): found no valid outbound transitions at {trace}, candidates {self.transition_guard[current_state]}"
+ )
+ if len(next_states) > 1:
+ raise RuntimeError(
+ f"get_trace({name}, {param_dict}): found non-deterministic outbound transitions {next_states} at {trace}"
+ )
+
+ (next_state,) = next_states
+
+ trace.append(next_state)
+ current_state = next_state
+
+ return trace
+
+ def learn_pta(self, observations, annotation, delta=dict(), delta_param=dict()):
+ prev_i = annotation.start.offset
+ prev = "__init__"
+ prev_non_kernel = prev
+ meta_observations = list()
+ n_seen = dict()
+
+ total_latency_us = 0
+
+ if sorted(annotation.start.param.keys()) == sorted(annotation.end.param.keys()):
+ param_dict = annotation.start.param
+ else:
+ param_dict = annotation.end.param
+ param_str = utils.param_dict_to_str(param_dict)
+
+ if annotation.kernels:
+ # ggf. als dict of tuples, für den Fall dass Schleifen verschieden iterieren können?
+ for i in range(prev_i, annotation.kernels[0].offset):
+ this = observations[i]["name"] + " @ " + observations[i]["place"]
+
+ if this in n_seen:
+ if n_seen[this] == 1:
+ logger.debug(
+ f"Loop found in {annotation.start.name} {param_dict}: {this} ⟳"
+ )
+ n_seen[this] += 1
+ else:
+ n_seen[this] = 1
+
+ if not prev in delta:
+ delta[prev] = set()
+ delta[prev].add(this)
+
+ if not (prev, this) in delta_param:
+ delta_param[(prev, this)] = set()
+ delta_param[(prev, this)].add(param_str)
+
+ prev = this
+ prev_i = i + 1
+
+ total_latency_us += observations[i]["attribute"].get("latency_us", 0)
+
+ meta_observations.append(
+ {
+ "name": f"__trace__ {this}",
+ "param": param_dict,
+ "attribute": dict(
+ filter(
+ lambda kv: not kv[0].startswith("e_"),
+ observations[i]["param"].items(),
+ )
+ ),
+ }
+ )
+ prev_non_kernel = prev
+
+ for kernel in annotation.kernels:
+ prev = prev_non_kernel
+ for i in range(prev_i, kernel.offset):
+ this = observations[i]["name"] + " @ " + observations[i]["place"]
+
+ if not prev in delta:
+ delta[prev] = set()
+ delta[prev].add(this)
+
+ if not (prev, this) in delta_param:
+ delta_param[(prev, this)] = set()
+ delta_param[(prev, this)].add(param_str)
+
+ # The last iteration (next block) contains a single kernel,
+ # so we do not increase total_latency_us here.
+ # However, this means that we will only ever get one latency
+ # value for each set of kernels with a common problem size,
+ # despite potentially having far more data at our fingertips.
+ # We could provide one total_latency_us for each kernel
+ # (by combining start latency + kernel latency + teardown latency),
+ # but for that we first need to distinguish between kernel
+ # components and teardown components in the following block.
+
+ prev = this
+ prev_i = i + 1
+
+ meta_observations.append(
+ {
+ "name": f"__trace__ {this}",
+ "param": param_dict,
+ "attribute": dict(
+ filter(
+ lambda kv: not kv[0].startswith("e_"),
+ observations[i]["param"].items(),
+ )
+ ),
+ }
+ )
+
+ # There is no kernel end signal in the underlying data, so the last iteration also contains a kernel run.
+ prev = prev_non_kernel
+ for i in range(prev_i, annotation.end.offset):
+ this = observations[i]["name"] + " @ " + observations[i]["place"]
+
+ if this in n_seen:
+ if n_seen[this] == 1:
+ logger.debug(
+ f"Loop found in {annotation.start.name} {param_dict}: {this} ⟳"
+ )
+ n_seen[this] += 1
+ else:
+ n_seen[this] = 1
+
+ if not prev in delta:
+ delta[prev] = set()
+ delta[prev].add(this)
+
+ if not (prev, this) in delta_param:
+ delta_param[(prev, this)] = set()
+ delta_param[(prev, this)].add(param_str)
+
+ total_latency_us += observations[i]["attribute"].get("latency_us", 0)
+
+ prev = this
+
+ meta_observations.append(
+ {
+ "name": f"__trace__ {this}",
+ "param": param_dict,
+ "attribute": dict(
+ filter(
+ lambda kv: not kv[0].startswith("e_"),
+ observations[i]["param"].items(),
+ )
+ ),
+ }
+ )
+
+ if not prev in delta:
+ delta[prev] = set()
+ delta[prev].add("__end__")
+ if not (prev, "__end__") in delta_param:
+ delta_param[(prev, "__end__")] = set()
+ delta_param[(prev, "__end__")].add(param_str)
+
+ for transition, count in n_seen.items():
+ meta_observations.append(
+ {
+ "name": f"__loop__ {transition}",
+ "param": param_dict,
+ "attribute": {"n_iterations": count},
+ }
+ )
+
+ if total_latency_us:
+ meta_observations.append(
+ {
+ "name": annotation.start.name,
+ "param": param_dict,
+ "attribute": {"latency_us": total_latency_us},
+ }
+ )
+
+ is_loop = dict(
+ map(lambda kv: (kv[0], True), filter(lambda kv: kv[1] > 1, n_seen.items()))
+ )
+
+ return delta, delta_param, meta_observations, is_loop
+
+
+class EventSequenceModel:
+ def __init__(self, models):
+ self.models = models
+
+ def _event_normalizer(self, event):
+ event_normalizer = lambda p: p
+ if "/" in event:
+ v1, v2 = event.split("/")
+ if utils.is_numeric(v1):
+ event = v2.strip()
+ event_normalizer = lambda p: utils.soft_cast_float(v1) / p
+ elif utils.is_numeric(v2):
+ event = v1.strip()
+ event_normalizer = lambda p: p / utils.soft_cast_float(v2)
+ else:
+ raise RuntimeError(f"Cannot parse '{event}'")
+ return event, event_normalizer
+
+ def eval_strs(self, events, aggregate="sum", aggregate_init=0, use_lut=False):
+ for event in events:
+ event, event_normalizer = self._event_normalizer(event)
+ nn, param = event.split("(")
+ name, action = nn.split(".")
+ param_model = None
+ ref_model = None
+
+ for model in self.models:
+ if name in model.names and action in model.attributes(name):
+ ref_model = model
+ if use_lut:
+ param_model = model.get_param_lut(allow_none=True)
+ else:
+ param_model, param_info = model.get_fitted()
+ break
+
+ if param_model is None:
+ raise RuntimeError(f"Did not find a model for {name}.{action}")
+
+ param = param.removesuffix(")")
+ if param == "":
+ param = dict()
+ else:
+ param = utils.parse_conf_str(param)
+
+ param_list = utils.param_dict_to_list(param, ref_model.parameters)
+
+ if not use_lut and not param_info(name, action).is_predictable(param_list):
+ logger.warning(
+ f"Cannot predict {name}.{action}({param}), falling back to static model"
+ )
+
+ try:
+ event_output = event_normalizer(
+ param_model(
+ name,
+ action,
+ param=param_list,
+ )
+ )
+ except KeyError:
+ if use_lut:
+ logger.error(
+ f"Cannot predict {name}.{action}({param}) from LUT model"
+ )
+ else:
+ logger.error(f"Cannot predict {name}.{action}({param}) from model")
+ raise
+ except TypeError:
+ if not use_lut:
+ logger.error(f"Cannot predict {name}.{action}({param}) from model")
+ raise
+
+ if aggregate == "sum":
+ aggregate_init += event_output
+ else:
+ raise RuntimeError(f"Unknown aggregate type: {aggregate}")
+
+ return aggregate_init
diff --git a/lib/cli.py b/lib/cli.py
index 75f6890..b68f548 100644
--- a/lib/cli.py
+++ b/lib/cli.py
@@ -331,6 +331,23 @@ def model_quality_table(
print(buf)
+def export_pseudo_dref(dref_file, dref, precision=None):
+ with open(dref_file, "w") as f:
+ for k, v in sorted(os.environ.items(), key=lambda kv: kv[0]):
+ if k.startswith("DFATOOL_"):
+ print(f"% {k}='{v}'", file=f)
+ for arg in sys.argv:
+ print(f"% {arg}", file=f)
+ for k, v in sorted(dref.items()):
+ k = k.replace("/", "I").replace("-", "").replace("_", "").replace(" ", "")
+ if type(v) is tuple:
+ v = v[0]
+ if type(v) in (float, np.float64) and precision is not None:
+ print("\\def\\" + k + "{" + f"{v:.{precision}f}" + "}", file=f)
+ else:
+ print("\\def\\" + k + "{" + str(v) + "}", file=f)
+
+
def export_dataref(dref_file, dref, precision=None):
with open(dref_file, "w") as f:
for k, v in sorted(os.environ.items(), key=lambda kv: kv[0]):
@@ -493,6 +510,12 @@ def add_standard_arguments(parser):
help="Export tree-based model to {PREFIX}{name}-{attribute}.dot",
)
parser.add_argument(
+ "--export-pseudo-dref",
+ metavar="FILE",
+ type=str,
+ help="Export model and model quality to LaTeX def file (sort of like dataref)",
+ )
+ parser.add_argument(
"--export-dref",
metavar="FILE",
type=str,
@@ -528,7 +551,7 @@ def add_standard_arguments(parser):
"--export-json",
metavar="FILENAME",
type=str,
- help="Export model in JSON format to FILENAME",
+ help="Export model and error metrics in JSON format to FILENAME",
)
parser.add_argument(
"--load-json",
diff --git a/lib/functions.py b/lib/functions.py
index 187e6ff..b76814b 100644
--- a/lib/functions.py
+++ b/lib/functions.py
@@ -27,6 +27,7 @@ dfatool_rmt_relevance_threshold = float(
os.getenv("DFATOOL_RMT_RELEVANCE_THRESHOLD", "0.5")
)
+dfatool_uls_loss_fun = os.getenv("DFATOOL_ULS_LOSS_FUNCTION", "linear")
dfatool_uls_min_bound = float(os.getenv("DFATOOL_ULS_MIN_BOUND", -np.inf))
if dfatool_preproc_relevance_method == "mi":
@@ -466,6 +467,23 @@ class SplitFunction(ModelFunction):
)
return hyper
+ # SplitFunction only
+ def flatten(self):
+ paths = list()
+ for param_value, subtree in self.child.items():
+ if type(subtree) is SplitFunction:
+ for path, value in subtree.flatten():
+ path = [(self.param_name, param_value)] + path
+ paths.append((path, value))
+ elif type(subtree) is StaticFunction:
+ path = [(self.param_name, param_value)]
+ paths.append((path, subtree.value))
+ else:
+ raise RuntimeError(
+ "flatten is only implemented for RMTs with constant leaves"
+ )
+ return paths
+
@classmethod
def from_json(cls, data):
assert data["type"] == "split"
@@ -1675,7 +1693,11 @@ class FOLFunction(SKLearnRegressionFunction):
self.model_args = list(np.ones((num_vars)))
try:
res = optimize.least_squares(
- error_function, self.model_args, args=(fit_parameters, data), xtol=2e-15
+ error_function,
+ self.model_args,
+ args=(fit_parameters, data),
+ xtol=2e-15,
+ loss=dfatool_uls_loss_fun,
)
except ValueError as err:
logger.warning(f"Fit failed: {err} (function: {self.model_function})")
@@ -1938,6 +1960,7 @@ class AnalyticFunction(ModelFunction):
self.model_args,
args=(X, Y),
xtol=2e-15,
+ loss=dfatool_uls_loss_fun,
bounds=(lower_bounds, upper_bounds),
)
except ValueError as err:
diff --git a/lib/loader/plain.py b/lib/loader/plain.py
index 50f3ca6..ef0b596 100644
--- a/lib/loader/plain.py
+++ b/lib/loader/plain.py
@@ -69,10 +69,48 @@ class CSVfile:
return observations
-class Logfile:
- def __init__(self):
- pass
+class TraceAnnotation:
+ offset = None
+ name = None
+ param = dict()
+
+ def __init__(self, **kwargs):
+ self.__dict__.update(kwargs)
+
+ def apply_offset(self, offset):
+ self.offset += offset
+ return self
+
+ def __repr__(self):
+ param_desc = " ".join(map(lambda kv: f"{kv[0]}={kv[1]}", self.param.items()))
+ return f"{self.name}<{param_desc} @ {self.offset}>"
+
+class RunAnnotation:
+ name = None
+ start = None
+ kernels = list()
+ end = None
+
+ # start: offset points to first run entry
+ # kernel: offset points to first kernel run entry
+ # end: offset points to first non-run entry (i.e., for all run entries: offset < end.offset)
+
+ def __init__(self, **kwargs):
+ self.__dict__.update(kwargs)
+
+ def apply_offset(self, offset):
+ self.start.apply_offset(offset)
+ for kernel in self.kernels:
+ kernel.apply_offset(offset)
+ self.end.apply_offset(offset)
+ return self
+
+ def __repr__(self):
+ return f"RunAnnotation<{self.name}, start={self.start}, kernels={self.kernels}, end={self.end}>"
+
+
+class Logfile:
def kv_to_param(self, kv_str, cast):
try:
key, value = kv_str.split("=")
@@ -88,14 +126,24 @@ class Logfile:
def kv_to_param_i(self, kv_str):
return self.kv_to_param(kv_str, soft_cast_int_or_float)
- def load(self, f):
+ def load(self, f, is_trace=False):
observations = list()
+ if is_trace:
+ trace_status = None
+ trace_start = None
+ trace_kernels = list()
+ trace_end = None
+ annotations = list()
+
for lineno, line in enumerate(f):
- m = re.search(r"\[::\] *([^|]*?) *[|] *([^|]*?) *[|] *(.*)", line)
- if m:
+ if m := re.search(r"\[::\] *([^|]*?) *[|] *([^|]*?) *[|] *(.*)", line):
name_str = m.group(1)
param_str = m.group(2)
attr_str = m.group(3)
+ if is_trace:
+ name_str, name_annot = name_str.split("@")
+ name_str = name_str.strip()
+ name_annot = name_annot.strip()
try:
param = dict(map(self.kv_to_param_i, param_str.split()))
attr = dict(map(self.kv_to_param_f, attr_str.split()))
@@ -106,13 +154,89 @@ class Logfile:
"attribute": attr,
}
)
+ if is_trace:
+ observations[-1]["place"] = name_annot
+ except ValueError:
+ logger.warning(
+ f"Error parsing {f}: invalid key-value pair in line {lineno+1}"
+ )
+ logger.warning(f"Offending entry:\n{line}")
+ raise
+
+ if not is_trace:
+ continue
+
+ # only relevant for is_trace == True
+ if m := re.fullmatch(r"\[>>\] *([^|]*?) *[|] *([^|]*?) *", line):
+ trace_status = 1
+ trace_kernels = list()
+ name_str = m.group(1)
+ param_str = m.group(2)
+ try:
+ param = dict(map(self.kv_to_param_i, param_str.split()))
+ except ValueError:
+ logger.warning(
+ f"Error parsing {f}: invalid key-value pair in line {lineno+1}"
+ )
+ logger.warning(f"Offending entry:\n{line}")
+ raise
+ trace_start = TraceAnnotation(
+ offset=len(observations), name=name_str, param=param
+ )
+
+ if m := re.fullmatch(r"\[--\] *([^|]*?) *[|] *([^|]*?) *", line):
+ trace_status = 2
+ name_str = m.group(1)
+ param_str = m.group(2)
+ try:
+ param = dict(map(self.kv_to_param_i, param_str.split()))
+ except ValueError:
+ logger.warning(
+ f"Error parsing {f}: invalid key-value pair in line {lineno+1}"
+ )
+ logger.warning(f"Offending entry:\n{line}")
+ raise
+ trace_kernels.append(
+ TraceAnnotation(
+ offset=len(observations), name=name_str, param=param
+ )
+ )
+
+ if m := re.fullmatch(r"\[<<\] *([^|]*?) *[|] *([^|]*?) *", line):
+ trace_status = None
+ name_str = m.group(1)
+ param_str = m.group(2)
+ try:
+ param = dict(map(self.kv_to_param_i, param_str.split()))
except ValueError:
logger.warning(
f"Error parsing {f}: invalid key-value pair in line {lineno+1}"
)
logger.warning(f"Offending entry:\n{line}")
raise
+ trace_end = TraceAnnotation(
+ offset=len(observations), name=name_str, param=param
+ )
+ if trace_start is not None:
+ assert trace_start.name == trace_end.name
+ for kernel in trace_kernels:
+ assert trace_start.name == kernel.name
+ annotations.append(
+ RunAnnotation(
+ name=trace_start.name,
+ start=trace_start,
+ kernels=trace_kernels,
+ end=trace_end,
+ )
+ )
+
+ trace_status = None
+ trace_start = None
+ trace_kernels = list()
+ trace_end = None
+ if is_trace:
+ return observations, annotations
return observations
def dump(self, observations, f):
diff --git a/lib/model.py b/lib/model.py
index 58f05a4..4d1edd5 100644
--- a/lib/model.py
+++ b/lib/model.py
@@ -14,7 +14,14 @@ from .parameters import (
distinct_param_values,
)
from .paramfit import ParamFit
-from .utils import is_numeric, soft_cast_int, by_name_to_by_param, regression_measures
+from .utils import (
+ is_numeric,
+ soft_cast_int,
+ by_name_to_by_param,
+ by_param_to_by_name,
+ regression_measures,
+ param_eq_or_none,
+)
logger = logging.getLogger(__name__)
@@ -79,6 +86,7 @@ class AnalyticModel:
compute_stats=True,
force_tree=False,
max_std=None,
+ by_param=None,
from_json=None,
):
"""
@@ -96,7 +104,7 @@ class AnalyticModel:
- attributes: list of keys that should be analyzed,
e.g. ['power', 'duration']
- for each attribute mentioned in 'attributes': A list with measurements.
- All list except for 'attributes' must have the same length.
+ All lists except for 'attributes' must have the same length.
For example:
parameters = ['foo_count', 'irrelevant']
@@ -148,9 +156,18 @@ class AnalyticModel:
for name, name_data in from_json["name"].items():
self.attr_by_name[name] = dict()
for attr, attr_data in name_data.items():
- self.attr_by_name[name][attr] = ModelAttribute.from_json(
- name, attr, attr_data
- )
+ if by_param:
+ self.attr_by_name[name][attr] = ModelAttribute.from_json(
+ name,
+ attr,
+ attr_data,
+ data_values=by_name[name][attr],
+ param_values=by_name[name]["param"],
+ )
+ else:
+ self.attr_by_name[name][attr] = ModelAttribute.from_json(
+ name, attr, attr_data
+ )
self.fit_done = True
return
@@ -249,7 +266,7 @@ class AnalyticModel:
return static_model_getter
- def get_param_lut(self, use_mean=False, fallback=False):
+ def get_param_lut(self, use_mean=False, fallback=False, allow_none=False):
"""
Get parameter-look-up-table model function: name, attribute, parameter values -> model value.
@@ -279,7 +296,16 @@ class AnalyticModel:
try:
return lut_model[name][key][param]
except KeyError:
- if fallback:
+ if allow_none:
+ keys = filter(
+ lambda p: param_eq_or_none(param, p),
+ lut_model[name][key].keys(),
+ )
+ values = list(map(lambda p: lut_model[name][key][p], keys))
+ if not values:
+ raise
+ return np.mean(values)
+ elif fallback:
return static_model[name][key]
raise
params = kwargs["params"]
@@ -643,7 +669,14 @@ class AnalyticModel:
ret[f"xv/{name}/{attr_name}/{k}"] = np.mean(entry[k])
return ret
- def to_json(self, **kwargs) -> dict:
+ def to_json(
+ self,
+ with_by_param=False,
+ lut_error=None,
+ static_error=None,
+ model_error=None,
+ **kwargs,
+ ) -> dict:
"""
Return JSON encoding of this AnalyticModel.
"""
@@ -653,21 +686,48 @@ class AnalyticModel:
"paramValuesbyName": dict([[name, dict()] for name in self.names]),
}
+ if with_by_param:
+ by_param = self.get_by_param()
+ ret["byParam"] = list()
+ for k, v in by_param.items():
+ ret["byParam"].append((k, v))
+
for name in self.names:
for attr_name, attr in self.attr_by_name[name].items():
ret["name"][name][attr_name] = attr.to_json(**kwargs)
+ if lut_error:
+ ret["name"][name][attr_name]["lutError"] = lut_error[name][
+ attr_name
+ ]
+ if static_error:
+ ret["name"][name][attr_name]["staticError"] = static_error[name][
+ attr_name
+ ]
+ if model_error:
+ ret["name"][name][attr_name]["modelError"] = model_error[name][
+ attr_name
+ ]
attr_name = list(self.attributes(name))[0]
for param_name in self.parameters:
- ret["paramValuesbyName"][name][param_name] = self.attr_by_name[name][
- attr_name
- ].stats.distinct_values_by_param_name[param_name]
+ if self.attr_by_name[name][attr_name].stats is not None:
+ ret["paramValuesbyName"][name][param_name] = self.attr_by_name[
+ name
+ ][attr_name].stats.distinct_values_by_param_name[param_name]
return ret
@classmethod
- def from_json(cls, data, by_name, parameters):
- assert data["parameters"] == parameters
- return cls(by_name, parameters, from_json=data)
+ def from_json(cls, data, by_name=None, parameters=None):
+ if by_name is None and parameters is None:
+ assert data["byParam"] is not None
+ by_param = dict()
+ for (nk, pk), v in data["byParam"]:
+ by_param[(nk, tuple(pk))] = v
+ by_name = by_param_to_by_name(by_param)
+ return cls(by_name, data["parameters"], by_param=by_param, from_json=data)
+ else:
+ assert data["parameters"] == parameters
+ return cls(by_name, parameters, from_json=data)
def webconf_function_map(self) -> list:
ret = list()
diff --git a/lib/parameters.py b/lib/parameters.py
index 0653100..acb044c 100644
--- a/lib/parameters.py
+++ b/lib/parameters.py
@@ -621,13 +621,20 @@ class ModelAttribute:
mean = np.mean(self.data)
return f"ModelAttribute<{self.name}, {self.attr}, mean={mean}>"
- def to_json(self, **kwargs):
- return {
+ def to_json(self, with_lut=False, **kwargs):
+ ret = {
"paramNames": self.param_names,
"argCount": self.arg_count,
"modelFunction": self.model_function.to_json(**kwargs),
}
+ if with_lut:
+ ret["LUT"] = list()
+ for key, value in self.by_param.items():
+ ret["LUT"].append((key, value))
+
+ return ret
+
def to_dref(self, unit=None):
ret = {"mean": (self.mean, unit), "median": (self.median, unit)}
@@ -724,11 +731,11 @@ class ModelAttribute:
return self.mutual_information_cache
@classmethod
- def from_json(cls, name, attr, data):
+ def from_json(cls, name, attr, data, data_values=None, param_values=None):
param_names = data["paramNames"]
arg_count = data["argCount"]
- self = cls(name, attr, None, None, param_names, arg_count)
+ self = cls(name, attr, data_values, param_values, param_names, arg_count)
self.model_function = df.ModelFunction.from_json(data["modelFunction"])
self.mean = self.model_function.value
diff --git a/lib/paramfit.py b/lib/paramfit.py
index 000aa9c..84eba2b 100644
--- a/lib/paramfit.py
+++ b/lib/paramfit.py
@@ -16,9 +16,14 @@ from .utils import (
)
logger = logging.getLogger(__name__)
-best_fit_metric = os.getenv("DFATOOL_ULS_ERROR_METRIC", "ssr")
+dfatool_uls_loss_fun = os.getenv("DFATOOL_ULS_LOSS_FUNCTION", "linear")
dfatool_uls_min_bound = float(os.getenv("DFATOOL_ULS_MIN_BOUND", -np.inf))
+if dfatool_uls_loss_fun == "linear":
+ best_fit_metric = os.getenv("DFATOOL_ULS_ERROR_METRIC", "ssr")
+else:
+ best_fit_metric = os.getenv("DFATOOL_ULS_ERROR_METRIC", "mae")
+
class ParamFit:
"""
@@ -222,6 +227,7 @@ def _try_fits(
ini,
args=(X, Y),
xtol=2e-15,
+ loss=dfatool_uls_loss_fun,
bounds=param_function.bounds,
)
except FloatingPointError as e:
diff --git a/lib/utils.py b/lib/utils.py
index 4850a53..fb76367 100644
--- a/lib/utils.py
+++ b/lib/utils.py
@@ -48,6 +48,8 @@ def running_mean(x: np.ndarray, N: int) -> np.ndarray:
def human_readable(value, unit):
+ if value is None:
+ return value
for prefix, factor in (
("p", 1e-12),
("n", 1e-9),
@@ -55,6 +57,8 @@ def human_readable(value, unit):
("m", 1e-3),
("", 1),
("k", 1e3),
+ ("M", 1e6),
+ ("G", 1e9),
):
if value < 1e3 * factor:
return "{:.2f} {}{}".format(value * (1 / factor), prefix, unit)
@@ -150,7 +154,7 @@ def parse_conf_str(conf_str):
"""
conf_dict = dict()
for option in conf_str.split(","):
- key, value = option.split("=")
+ key, value = option.strip().split("=")
conf_dict[key] = soft_cast_float(value)
return conf_dict
@@ -205,6 +209,18 @@ def param_slice_eq(a, b, index):
return False
+def param_eq_or_none(a, b):
+ """
+ Check if by_param keys a and b are identical, allowing a None in a to match any key in b.
+ """
+ set_keys = tuple(filter(lambda i: a[i] is not None, range(len(a))))
+ a_not_none = tuple(map(lambda i: a[i], set_keys))
+ b_not_none = tuple(map(lambda i: b[i], set_keys))
+ if a_not_none == b_not_none:
+ return True
+ return False
+
+
def match_parameter_values(input_param: dict, match_param: dict):
"""
Check whether one of the paramaters in `input_param` has the same value in `match_param`.
@@ -302,6 +318,21 @@ def param_dict_to_list(param_dict, parameter_names, default=None):
return ret
+def param_dict_to_str(param_dict):
+ ret = list()
+ for parameter_name in sorted(param_dict.keys()):
+ ret.append(f"{parameter_name}={param_dict[parameter_name]}")
+ return " ".join(ret)
+
+
+def param_str_to_dict(param_str):
+ ret = dict()
+ for param_pair in param_str.split():
+ key, value = param_pair.split("=")
+ ret[key] = soft_cast_int_or_float(value)
+ return ret
+
+
def observations_enum_to_bool(observations: list, kconfig=False):
"""
Convert enum / categorical observations to boolean-only ones.
@@ -697,11 +728,18 @@ def regression_measures(predicted: np.ndarray, ground_truth: np.ndarray):
rsq -- R^2 measure, see sklearn.metrics.r2_score
count -- Number of values
"""
- if type(predicted) != np.ndarray:
+
+ if type(predicted) is list:
+ predicted = np.array(predicted)
+
+ if type(ground_truth) is list:
+ ground_truth = np.array(ground_truth)
+
+ if type(predicted) is not np.ndarray:
raise ValueError(
"first arg ('predicted') must be ndarray, is {}".format(type(predicted))
)
- if type(ground_truth) != np.ndarray:
+ if type(ground_truth) is not np.ndarray:
raise ValueError(
"second arg ('ground_truth') must be ndarray, is {}".format(
type(ground_truth)