diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/behaviour.py | 388 | ||||
-rw-r--r-- | lib/cli.py | 25 | ||||
-rw-r--r-- | lib/functions.py | 25 | ||||
-rw-r--r-- | lib/loader/plain.py | 136 | ||||
-rw-r--r-- | lib/model.py | 88 | ||||
-rw-r--r-- | lib/parameters.py | 15 | ||||
-rw-r--r-- | lib/paramfit.py | 8 | ||||
-rw-r--r-- | lib/utils.py | 44 |
8 files changed, 699 insertions, 30 deletions
diff --git a/lib/behaviour.py b/lib/behaviour.py new file mode 100644 index 0000000..136a55e --- /dev/null +++ b/lib/behaviour.py @@ -0,0 +1,388 @@ +#!/usr/bin/env python3 + +import logging +from . import utils +from .model import AnalyticModel +from . import functions as df + +logger = logging.getLogger(__name__) + + +class SDKBehaviourModel: + + def __init__(self, observations, annotations): + + meta_observations = list() + delta_by_name = dict() + delta_param_by_name = dict() + is_loop = dict() + + for annotation in annotations: + # annotation.start.param may be incomplete, for instance in cases + # where DPUs are allocated before the input file is loadeed (and + # thus before the problem size is known). + # However, annotation.end.param may also differ from annotation.start.param (it should not, but that's how some benchmarks roll). + # So, we use annotation.start.param if it has the same keys as annotation.end.param, and annotation.end.param otherwise + if sorted(annotation.start.param.keys()) == sorted( + annotation.end.param.keys() + ): + am_tt_param_names = sorted(annotation.start.param.keys()) + else: + am_tt_param_names = sorted(annotation.end.param.keys()) + if annotation.name not in delta_by_name: + delta_by_name[annotation.name] = dict() + delta_param_by_name[annotation.name] = dict() + _, _, meta_obs, _is_loop = self.learn_pta( + observations, + annotation, + delta_by_name[annotation.name], + delta_param_by_name[annotation.name], + ) + meta_observations += meta_obs + is_loop.update(_is_loop) + + self.am_tt_param_names = am_tt_param_names + self.delta_by_name = delta_by_name + self.delta_param_by_name = delta_param_by_name + self.meta_observations = meta_observations + self.is_loop = is_loop + + self.build_transition_guards() + + def build_transition_guards(self): + self.transition_guard = dict() + for name in sorted(self.delta_by_name.keys()): + for t_from, t_to_set in self.delta_by_name[name].items(): + i_to_transition = dict() + delta_param_sets = list() + to_names = list() + transition_guard = dict() + + if len(t_to_set) > 1: + am_tt_by_name = { + name: { + "attributes": [t_from], + "param": list(), + t_from: list(), + }, + } + for i, t_to in enumerate(sorted(t_to_set)): + for param in self.delta_param_by_name[name][(t_from, t_to)]: + am_tt_by_name[name]["param"].append( + utils.param_dict_to_list( + utils.param_str_to_dict(param), + self.am_tt_param_names, + ) + ) + am_tt_by_name[name][t_from].append(i) + i_to_transition[i] = t_to + am = AnalyticModel( + am_tt_by_name, self.am_tt_param_names, force_tree=True + ) + model, info = am.get_fitted() + if type(info(name, t_from)) is df.SplitFunction: + flat_model = info(name, t_from).flatten() + else: + flat_model = list() + logger.warning( + f"Model for {name} {t_from} is {info(name, t_from)}, expected SplitFunction" + ) + + for prefix, output in flat_model: + transition_name = i_to_transition[int(output)] + if transition_name not in transition_guard: + transition_guard[transition_name] = list() + transition_guard[transition_name].append(prefix) + + self.transition_guard[t_from] = transition_guard + + def get_trace(self, name, param_dict): + delta = self.delta_by_name[name] + current_state = "__init__" + trace = [current_state] + states_seen = set() + while current_state != "__end__": + next_states = delta[current_state] + + states_seen.add(current_state) + next_states = list(filter(lambda q: q not in states_seen, next_states)) + + if len(next_states) == 0: + raise RuntimeError( + f"get_trace({name}, {param_dict}): found infinite loop at {trace}" + ) + + if len(next_states) > 1 and self.transition_guard[current_state]: + matching_next_states = list() + for candidate in next_states: + for condition in self.transition_guard[current_state][candidate]: + valid = True + for key, value in condition: + if param_dict[key] != value: + valid = False + break + if valid: + matching_next_states.append(candidate) + break + next_states = matching_next_states + + if len(next_states) == 0: + raise RuntimeError( + f"get_trace({name}, {param_dict}): found no valid outbound transitions at {trace}, candidates {self.transition_guard[current_state]}" + ) + if len(next_states) > 1: + raise RuntimeError( + f"get_trace({name}, {param_dict}): found non-deterministic outbound transitions {next_states} at {trace}" + ) + + (next_state,) = next_states + + trace.append(next_state) + current_state = next_state + + return trace + + def learn_pta(self, observations, annotation, delta=dict(), delta_param=dict()): + prev_i = annotation.start.offset + prev = "__init__" + prev_non_kernel = prev + meta_observations = list() + n_seen = dict() + + total_latency_us = 0 + + if sorted(annotation.start.param.keys()) == sorted(annotation.end.param.keys()): + param_dict = annotation.start.param + else: + param_dict = annotation.end.param + param_str = utils.param_dict_to_str(param_dict) + + if annotation.kernels: + # ggf. als dict of tuples, für den Fall dass Schleifen verschieden iterieren können? + for i in range(prev_i, annotation.kernels[0].offset): + this = observations[i]["name"] + " @ " + observations[i]["place"] + + if this in n_seen: + if n_seen[this] == 1: + logger.debug( + f"Loop found in {annotation.start.name} {param_dict}: {this} ⟳" + ) + n_seen[this] += 1 + else: + n_seen[this] = 1 + + if not prev in delta: + delta[prev] = set() + delta[prev].add(this) + + if not (prev, this) in delta_param: + delta_param[(prev, this)] = set() + delta_param[(prev, this)].add(param_str) + + prev = this + prev_i = i + 1 + + total_latency_us += observations[i]["attribute"].get("latency_us", 0) + + meta_observations.append( + { + "name": f"__trace__ {this}", + "param": param_dict, + "attribute": dict( + filter( + lambda kv: not kv[0].startswith("e_"), + observations[i]["param"].items(), + ) + ), + } + ) + prev_non_kernel = prev + + for kernel in annotation.kernels: + prev = prev_non_kernel + for i in range(prev_i, kernel.offset): + this = observations[i]["name"] + " @ " + observations[i]["place"] + + if not prev in delta: + delta[prev] = set() + delta[prev].add(this) + + if not (prev, this) in delta_param: + delta_param[(prev, this)] = set() + delta_param[(prev, this)].add(param_str) + + # The last iteration (next block) contains a single kernel, + # so we do not increase total_latency_us here. + # However, this means that we will only ever get one latency + # value for each set of kernels with a common problem size, + # despite potentially having far more data at our fingertips. + # We could provide one total_latency_us for each kernel + # (by combining start latency + kernel latency + teardown latency), + # but for that we first need to distinguish between kernel + # components and teardown components in the following block. + + prev = this + prev_i = i + 1 + + meta_observations.append( + { + "name": f"__trace__ {this}", + "param": param_dict, + "attribute": dict( + filter( + lambda kv: not kv[0].startswith("e_"), + observations[i]["param"].items(), + ) + ), + } + ) + + # There is no kernel end signal in the underlying data, so the last iteration also contains a kernel run. + prev = prev_non_kernel + for i in range(prev_i, annotation.end.offset): + this = observations[i]["name"] + " @ " + observations[i]["place"] + + if this in n_seen: + if n_seen[this] == 1: + logger.debug( + f"Loop found in {annotation.start.name} {param_dict}: {this} ⟳" + ) + n_seen[this] += 1 + else: + n_seen[this] = 1 + + if not prev in delta: + delta[prev] = set() + delta[prev].add(this) + + if not (prev, this) in delta_param: + delta_param[(prev, this)] = set() + delta_param[(prev, this)].add(param_str) + + total_latency_us += observations[i]["attribute"].get("latency_us", 0) + + prev = this + + meta_observations.append( + { + "name": f"__trace__ {this}", + "param": param_dict, + "attribute": dict( + filter( + lambda kv: not kv[0].startswith("e_"), + observations[i]["param"].items(), + ) + ), + } + ) + + if not prev in delta: + delta[prev] = set() + delta[prev].add("__end__") + if not (prev, "__end__") in delta_param: + delta_param[(prev, "__end__")] = set() + delta_param[(prev, "__end__")].add(param_str) + + for transition, count in n_seen.items(): + meta_observations.append( + { + "name": f"__loop__ {transition}", + "param": param_dict, + "attribute": {"n_iterations": count}, + } + ) + + if total_latency_us: + meta_observations.append( + { + "name": annotation.start.name, + "param": param_dict, + "attribute": {"latency_us": total_latency_us}, + } + ) + + is_loop = dict( + map(lambda kv: (kv[0], True), filter(lambda kv: kv[1] > 1, n_seen.items())) + ) + + return delta, delta_param, meta_observations, is_loop + + +class EventSequenceModel: + def __init__(self, models): + self.models = models + + def _event_normalizer(self, event): + event_normalizer = lambda p: p + if "/" in event: + v1, v2 = event.split("/") + if utils.is_numeric(v1): + event = v2.strip() + event_normalizer = lambda p: utils.soft_cast_float(v1) / p + elif utils.is_numeric(v2): + event = v1.strip() + event_normalizer = lambda p: p / utils.soft_cast_float(v2) + else: + raise RuntimeError(f"Cannot parse '{event}'") + return event, event_normalizer + + def eval_strs(self, events, aggregate="sum", aggregate_init=0, use_lut=False): + for event in events: + event, event_normalizer = self._event_normalizer(event) + nn, param = event.split("(") + name, action = nn.split(".") + param_model = None + ref_model = None + + for model in self.models: + if name in model.names and action in model.attributes(name): + ref_model = model + if use_lut: + param_model = model.get_param_lut(allow_none=True) + else: + param_model, param_info = model.get_fitted() + break + + if param_model is None: + raise RuntimeError(f"Did not find a model for {name}.{action}") + + param = param.removesuffix(")") + if param == "": + param = dict() + else: + param = utils.parse_conf_str(param) + + param_list = utils.param_dict_to_list(param, ref_model.parameters) + + if not use_lut and not param_info(name, action).is_predictable(param_list): + logger.warning( + f"Cannot predict {name}.{action}({param}), falling back to static model" + ) + + try: + event_output = event_normalizer( + param_model( + name, + action, + param=param_list, + ) + ) + except KeyError: + if use_lut: + logger.error( + f"Cannot predict {name}.{action}({param}) from LUT model" + ) + else: + logger.error(f"Cannot predict {name}.{action}({param}) from model") + raise + except TypeError: + if not use_lut: + logger.error(f"Cannot predict {name}.{action}({param}) from model") + raise + + if aggregate == "sum": + aggregate_init += event_output + else: + raise RuntimeError(f"Unknown aggregate type: {aggregate}") + + return aggregate_init @@ -331,6 +331,23 @@ def model_quality_table( print(buf) +def export_pseudo_dref(dref_file, dref, precision=None): + with open(dref_file, "w") as f: + for k, v in sorted(os.environ.items(), key=lambda kv: kv[0]): + if k.startswith("DFATOOL_"): + print(f"% {k}='{v}'", file=f) + for arg in sys.argv: + print(f"% {arg}", file=f) + for k, v in sorted(dref.items()): + k = k.replace("/", "I").replace("-", "").replace("_", "").replace(" ", "") + if type(v) is tuple: + v = v[0] + if type(v) in (float, np.float64) and precision is not None: + print("\\def\\" + k + "{" + f"{v:.{precision}f}" + "}", file=f) + else: + print("\\def\\" + k + "{" + str(v) + "}", file=f) + + def export_dataref(dref_file, dref, precision=None): with open(dref_file, "w") as f: for k, v in sorted(os.environ.items(), key=lambda kv: kv[0]): @@ -493,6 +510,12 @@ def add_standard_arguments(parser): help="Export tree-based model to {PREFIX}{name}-{attribute}.dot", ) parser.add_argument( + "--export-pseudo-dref", + metavar="FILE", + type=str, + help="Export model and model quality to LaTeX def file (sort of like dataref)", + ) + parser.add_argument( "--export-dref", metavar="FILE", type=str, @@ -528,7 +551,7 @@ def add_standard_arguments(parser): "--export-json", metavar="FILENAME", type=str, - help="Export model in JSON format to FILENAME", + help="Export model and error metrics in JSON format to FILENAME", ) parser.add_argument( "--load-json", diff --git a/lib/functions.py b/lib/functions.py index 187e6ff..b76814b 100644 --- a/lib/functions.py +++ b/lib/functions.py @@ -27,6 +27,7 @@ dfatool_rmt_relevance_threshold = float( os.getenv("DFATOOL_RMT_RELEVANCE_THRESHOLD", "0.5") ) +dfatool_uls_loss_fun = os.getenv("DFATOOL_ULS_LOSS_FUNCTION", "linear") dfatool_uls_min_bound = float(os.getenv("DFATOOL_ULS_MIN_BOUND", -np.inf)) if dfatool_preproc_relevance_method == "mi": @@ -466,6 +467,23 @@ class SplitFunction(ModelFunction): ) return hyper + # SplitFunction only + def flatten(self): + paths = list() + for param_value, subtree in self.child.items(): + if type(subtree) is SplitFunction: + for path, value in subtree.flatten(): + path = [(self.param_name, param_value)] + path + paths.append((path, value)) + elif type(subtree) is StaticFunction: + path = [(self.param_name, param_value)] + paths.append((path, subtree.value)) + else: + raise RuntimeError( + "flatten is only implemented for RMTs with constant leaves" + ) + return paths + @classmethod def from_json(cls, data): assert data["type"] == "split" @@ -1675,7 +1693,11 @@ class FOLFunction(SKLearnRegressionFunction): self.model_args = list(np.ones((num_vars))) try: res = optimize.least_squares( - error_function, self.model_args, args=(fit_parameters, data), xtol=2e-15 + error_function, + self.model_args, + args=(fit_parameters, data), + xtol=2e-15, + loss=dfatool_uls_loss_fun, ) except ValueError as err: logger.warning(f"Fit failed: {err} (function: {self.model_function})") @@ -1938,6 +1960,7 @@ class AnalyticFunction(ModelFunction): self.model_args, args=(X, Y), xtol=2e-15, + loss=dfatool_uls_loss_fun, bounds=(lower_bounds, upper_bounds), ) except ValueError as err: diff --git a/lib/loader/plain.py b/lib/loader/plain.py index 50f3ca6..ef0b596 100644 --- a/lib/loader/plain.py +++ b/lib/loader/plain.py @@ -69,10 +69,48 @@ class CSVfile: return observations -class Logfile: - def __init__(self): - pass +class TraceAnnotation: + offset = None + name = None + param = dict() + + def __init__(self, **kwargs): + self.__dict__.update(kwargs) + + def apply_offset(self, offset): + self.offset += offset + return self + + def __repr__(self): + param_desc = " ".join(map(lambda kv: f"{kv[0]}={kv[1]}", self.param.items())) + return f"{self.name}<{param_desc} @ {self.offset}>" + +class RunAnnotation: + name = None + start = None + kernels = list() + end = None + + # start: offset points to first run entry + # kernel: offset points to first kernel run entry + # end: offset points to first non-run entry (i.e., for all run entries: offset < end.offset) + + def __init__(self, **kwargs): + self.__dict__.update(kwargs) + + def apply_offset(self, offset): + self.start.apply_offset(offset) + for kernel in self.kernels: + kernel.apply_offset(offset) + self.end.apply_offset(offset) + return self + + def __repr__(self): + return f"RunAnnotation<{self.name}, start={self.start}, kernels={self.kernels}, end={self.end}>" + + +class Logfile: def kv_to_param(self, kv_str, cast): try: key, value = kv_str.split("=") @@ -88,14 +126,24 @@ class Logfile: def kv_to_param_i(self, kv_str): return self.kv_to_param(kv_str, soft_cast_int_or_float) - def load(self, f): + def load(self, f, is_trace=False): observations = list() + if is_trace: + trace_status = None + trace_start = None + trace_kernels = list() + trace_end = None + annotations = list() + for lineno, line in enumerate(f): - m = re.search(r"\[::\] *([^|]*?) *[|] *([^|]*?) *[|] *(.*)", line) - if m: + if m := re.search(r"\[::\] *([^|]*?) *[|] *([^|]*?) *[|] *(.*)", line): name_str = m.group(1) param_str = m.group(2) attr_str = m.group(3) + if is_trace: + name_str, name_annot = name_str.split("@") + name_str = name_str.strip() + name_annot = name_annot.strip() try: param = dict(map(self.kv_to_param_i, param_str.split())) attr = dict(map(self.kv_to_param_f, attr_str.split())) @@ -106,13 +154,89 @@ class Logfile: "attribute": attr, } ) + if is_trace: + observations[-1]["place"] = name_annot + except ValueError: + logger.warning( + f"Error parsing {f}: invalid key-value pair in line {lineno+1}" + ) + logger.warning(f"Offending entry:\n{line}") + raise + + if not is_trace: + continue + + # only relevant for is_trace == True + if m := re.fullmatch(r"\[>>\] *([^|]*?) *[|] *([^|]*?) *", line): + trace_status = 1 + trace_kernels = list() + name_str = m.group(1) + param_str = m.group(2) + try: + param = dict(map(self.kv_to_param_i, param_str.split())) + except ValueError: + logger.warning( + f"Error parsing {f}: invalid key-value pair in line {lineno+1}" + ) + logger.warning(f"Offending entry:\n{line}") + raise + trace_start = TraceAnnotation( + offset=len(observations), name=name_str, param=param + ) + + if m := re.fullmatch(r"\[--\] *([^|]*?) *[|] *([^|]*?) *", line): + trace_status = 2 + name_str = m.group(1) + param_str = m.group(2) + try: + param = dict(map(self.kv_to_param_i, param_str.split())) + except ValueError: + logger.warning( + f"Error parsing {f}: invalid key-value pair in line {lineno+1}" + ) + logger.warning(f"Offending entry:\n{line}") + raise + trace_kernels.append( + TraceAnnotation( + offset=len(observations), name=name_str, param=param + ) + ) + + if m := re.fullmatch(r"\[<<\] *([^|]*?) *[|] *([^|]*?) *", line): + trace_status = None + name_str = m.group(1) + param_str = m.group(2) + try: + param = dict(map(self.kv_to_param_i, param_str.split())) except ValueError: logger.warning( f"Error parsing {f}: invalid key-value pair in line {lineno+1}" ) logger.warning(f"Offending entry:\n{line}") raise + trace_end = TraceAnnotation( + offset=len(observations), name=name_str, param=param + ) + if trace_start is not None: + assert trace_start.name == trace_end.name + for kernel in trace_kernels: + assert trace_start.name == kernel.name + annotations.append( + RunAnnotation( + name=trace_start.name, + start=trace_start, + kernels=trace_kernels, + end=trace_end, + ) + ) + + trace_status = None + trace_start = None + trace_kernels = list() + trace_end = None + if is_trace: + return observations, annotations return observations def dump(self, observations, f): diff --git a/lib/model.py b/lib/model.py index 58f05a4..4d1edd5 100644 --- a/lib/model.py +++ b/lib/model.py @@ -14,7 +14,14 @@ from .parameters import ( distinct_param_values, ) from .paramfit import ParamFit -from .utils import is_numeric, soft_cast_int, by_name_to_by_param, regression_measures +from .utils import ( + is_numeric, + soft_cast_int, + by_name_to_by_param, + by_param_to_by_name, + regression_measures, + param_eq_or_none, +) logger = logging.getLogger(__name__) @@ -79,6 +86,7 @@ class AnalyticModel: compute_stats=True, force_tree=False, max_std=None, + by_param=None, from_json=None, ): """ @@ -96,7 +104,7 @@ class AnalyticModel: - attributes: list of keys that should be analyzed, e.g. ['power', 'duration'] - for each attribute mentioned in 'attributes': A list with measurements. - All list except for 'attributes' must have the same length. + All lists except for 'attributes' must have the same length. For example: parameters = ['foo_count', 'irrelevant'] @@ -148,9 +156,18 @@ class AnalyticModel: for name, name_data in from_json["name"].items(): self.attr_by_name[name] = dict() for attr, attr_data in name_data.items(): - self.attr_by_name[name][attr] = ModelAttribute.from_json( - name, attr, attr_data - ) + if by_param: + self.attr_by_name[name][attr] = ModelAttribute.from_json( + name, + attr, + attr_data, + data_values=by_name[name][attr], + param_values=by_name[name]["param"], + ) + else: + self.attr_by_name[name][attr] = ModelAttribute.from_json( + name, attr, attr_data + ) self.fit_done = True return @@ -249,7 +266,7 @@ class AnalyticModel: return static_model_getter - def get_param_lut(self, use_mean=False, fallback=False): + def get_param_lut(self, use_mean=False, fallback=False, allow_none=False): """ Get parameter-look-up-table model function: name, attribute, parameter values -> model value. @@ -279,7 +296,16 @@ class AnalyticModel: try: return lut_model[name][key][param] except KeyError: - if fallback: + if allow_none: + keys = filter( + lambda p: param_eq_or_none(param, p), + lut_model[name][key].keys(), + ) + values = list(map(lambda p: lut_model[name][key][p], keys)) + if not values: + raise + return np.mean(values) + elif fallback: return static_model[name][key] raise params = kwargs["params"] @@ -643,7 +669,14 @@ class AnalyticModel: ret[f"xv/{name}/{attr_name}/{k}"] = np.mean(entry[k]) return ret - def to_json(self, **kwargs) -> dict: + def to_json( + self, + with_by_param=False, + lut_error=None, + static_error=None, + model_error=None, + **kwargs, + ) -> dict: """ Return JSON encoding of this AnalyticModel. """ @@ -653,21 +686,48 @@ class AnalyticModel: "paramValuesbyName": dict([[name, dict()] for name in self.names]), } + if with_by_param: + by_param = self.get_by_param() + ret["byParam"] = list() + for k, v in by_param.items(): + ret["byParam"].append((k, v)) + for name in self.names: for attr_name, attr in self.attr_by_name[name].items(): ret["name"][name][attr_name] = attr.to_json(**kwargs) + if lut_error: + ret["name"][name][attr_name]["lutError"] = lut_error[name][ + attr_name + ] + if static_error: + ret["name"][name][attr_name]["staticError"] = static_error[name][ + attr_name + ] + if model_error: + ret["name"][name][attr_name]["modelError"] = model_error[name][ + attr_name + ] attr_name = list(self.attributes(name))[0] for param_name in self.parameters: - ret["paramValuesbyName"][name][param_name] = self.attr_by_name[name][ - attr_name - ].stats.distinct_values_by_param_name[param_name] + if self.attr_by_name[name][attr_name].stats is not None: + ret["paramValuesbyName"][name][param_name] = self.attr_by_name[ + name + ][attr_name].stats.distinct_values_by_param_name[param_name] return ret @classmethod - def from_json(cls, data, by_name, parameters): - assert data["parameters"] == parameters - return cls(by_name, parameters, from_json=data) + def from_json(cls, data, by_name=None, parameters=None): + if by_name is None and parameters is None: + assert data["byParam"] is not None + by_param = dict() + for (nk, pk), v in data["byParam"]: + by_param[(nk, tuple(pk))] = v + by_name = by_param_to_by_name(by_param) + return cls(by_name, data["parameters"], by_param=by_param, from_json=data) + else: + assert data["parameters"] == parameters + return cls(by_name, parameters, from_json=data) def webconf_function_map(self) -> list: ret = list() diff --git a/lib/parameters.py b/lib/parameters.py index 0653100..acb044c 100644 --- a/lib/parameters.py +++ b/lib/parameters.py @@ -621,13 +621,20 @@ class ModelAttribute: mean = np.mean(self.data) return f"ModelAttribute<{self.name}, {self.attr}, mean={mean}>" - def to_json(self, **kwargs): - return { + def to_json(self, with_lut=False, **kwargs): + ret = { "paramNames": self.param_names, "argCount": self.arg_count, "modelFunction": self.model_function.to_json(**kwargs), } + if with_lut: + ret["LUT"] = list() + for key, value in self.by_param.items(): + ret["LUT"].append((key, value)) + + return ret + def to_dref(self, unit=None): ret = {"mean": (self.mean, unit), "median": (self.median, unit)} @@ -724,11 +731,11 @@ class ModelAttribute: return self.mutual_information_cache @classmethod - def from_json(cls, name, attr, data): + def from_json(cls, name, attr, data, data_values=None, param_values=None): param_names = data["paramNames"] arg_count = data["argCount"] - self = cls(name, attr, None, None, param_names, arg_count) + self = cls(name, attr, data_values, param_values, param_names, arg_count) self.model_function = df.ModelFunction.from_json(data["modelFunction"]) self.mean = self.model_function.value diff --git a/lib/paramfit.py b/lib/paramfit.py index 000aa9c..84eba2b 100644 --- a/lib/paramfit.py +++ b/lib/paramfit.py @@ -16,9 +16,14 @@ from .utils import ( ) logger = logging.getLogger(__name__) -best_fit_metric = os.getenv("DFATOOL_ULS_ERROR_METRIC", "ssr") +dfatool_uls_loss_fun = os.getenv("DFATOOL_ULS_LOSS_FUNCTION", "linear") dfatool_uls_min_bound = float(os.getenv("DFATOOL_ULS_MIN_BOUND", -np.inf)) +if dfatool_uls_loss_fun == "linear": + best_fit_metric = os.getenv("DFATOOL_ULS_ERROR_METRIC", "ssr") +else: + best_fit_metric = os.getenv("DFATOOL_ULS_ERROR_METRIC", "mae") + class ParamFit: """ @@ -222,6 +227,7 @@ def _try_fits( ini, args=(X, Y), xtol=2e-15, + loss=dfatool_uls_loss_fun, bounds=param_function.bounds, ) except FloatingPointError as e: diff --git a/lib/utils.py b/lib/utils.py index 4850a53..fb76367 100644 --- a/lib/utils.py +++ b/lib/utils.py @@ -48,6 +48,8 @@ def running_mean(x: np.ndarray, N: int) -> np.ndarray: def human_readable(value, unit): + if value is None: + return value for prefix, factor in ( ("p", 1e-12), ("n", 1e-9), @@ -55,6 +57,8 @@ def human_readable(value, unit): ("m", 1e-3), ("", 1), ("k", 1e3), + ("M", 1e6), + ("G", 1e9), ): if value < 1e3 * factor: return "{:.2f} {}{}".format(value * (1 / factor), prefix, unit) @@ -150,7 +154,7 @@ def parse_conf_str(conf_str): """ conf_dict = dict() for option in conf_str.split(","): - key, value = option.split("=") + key, value = option.strip().split("=") conf_dict[key] = soft_cast_float(value) return conf_dict @@ -205,6 +209,18 @@ def param_slice_eq(a, b, index): return False +def param_eq_or_none(a, b): + """ + Check if by_param keys a and b are identical, allowing a None in a to match any key in b. + """ + set_keys = tuple(filter(lambda i: a[i] is not None, range(len(a)))) + a_not_none = tuple(map(lambda i: a[i], set_keys)) + b_not_none = tuple(map(lambda i: b[i], set_keys)) + if a_not_none == b_not_none: + return True + return False + + def match_parameter_values(input_param: dict, match_param: dict): """ Check whether one of the paramaters in `input_param` has the same value in `match_param`. @@ -302,6 +318,21 @@ def param_dict_to_list(param_dict, parameter_names, default=None): return ret +def param_dict_to_str(param_dict): + ret = list() + for parameter_name in sorted(param_dict.keys()): + ret.append(f"{parameter_name}={param_dict[parameter_name]}") + return " ".join(ret) + + +def param_str_to_dict(param_str): + ret = dict() + for param_pair in param_str.split(): + key, value = param_pair.split("=") + ret[key] = soft_cast_int_or_float(value) + return ret + + def observations_enum_to_bool(observations: list, kconfig=False): """ Convert enum / categorical observations to boolean-only ones. @@ -697,11 +728,18 @@ def regression_measures(predicted: np.ndarray, ground_truth: np.ndarray): rsq -- R^2 measure, see sklearn.metrics.r2_score count -- Number of values """ - if type(predicted) != np.ndarray: + + if type(predicted) is list: + predicted = np.array(predicted) + + if type(ground_truth) is list: + ground_truth = np.array(ground_truth) + + if type(predicted) is not np.ndarray: raise ValueError( "first arg ('predicted') must be ndarray, is {}".format(type(predicted)) ) - if type(ground_truth) != np.ndarray: + if type(ground_truth) is not np.ndarray: raise ValueError( "second arg ('ground_truth') must be ndarray, is {}".format( type(ground_truth) |