4 files changed, 45 insertions, 32 deletions
diff --git a/README.md b/README.md
index cb88eed..73d5cbf 100644
--- a/README.md
+++ b/README.md
@@ -55,11 +55,11 @@ Least-Squares Regression is essentially a subset of RMT with just a single tree
 LMT and RMT differ significantly, as LMT uses a learning algorithm that starts out with a DECART and uses bottom-up pruning to turn it into an LMT, whereas RMT build a DECART that only considers parameters that are not suitable for least-squares regression and then uses least-squares regression to find and fit leaf functions.
 
 By default, dfatool uses heuristics to determine whether it should generate a simple least-squares regression function or a fully-fledged RMT.
-Arguments such as `--force-tree` and environment variables (below) can be used to generate a different flavour of performance model; see [Modeling Method Selection](doc/modeling-method.md).
+Arguments such as `--force-tree` and environment variables (below) can be used to generate a different flavour of performance model; see [Modelling Method Selection](doc/modeling-method.md).
 Again, most of the options and methods documented here work for all three scripts: analyze-archive, analyze-kconfig, and analyze-log.
 
 * [Model Visualization and Export](doc/model-visual.md)
-* [Modeling Method Selection](doc/modeling-method.md)
+* [Modelling Method Selection](doc/modeling-method.md)
 * [Assessing Model Quality](doc/model-assessment.md)
 
 ## Model Application
@@ -112,9 +112,9 @@ The following variables may be set to alter the behaviour of dfatool components.
 | `DFATOOL_KCONF_WITH_CHOICE_NODES` | 0, **1** | Treat kconfig choices (e.g. "choice Model → MobileNet / ResNet / Inception") as enum parameters. If enabled, the corresponding boolean kconfig variables (e.g. "Model\_MobileNet") are not converted to parameters. If disabled, all (and only) boolean kconfig variables are treated as parameters. Mostly relevant for analyze-kconfig, eval-kconfig |
 | `DFATOOL_COMPENSATE_DRIFT` | **0**, 1 | Perform drift compensation for loaders without sync input (e.g. EnergyTrace or Keysight) |
 | `DFATOOL_DRIFT_COMPENSATION_PENALTY` | 0 .. 100 (default: majority vote over several penalties) | Specify penalty for ruptures.py PELT changepoint petection |
-| `DFATOOL_MODEL` | cart, decart, fol, lgbm, lmt, **rmt**, symreg, uls, xgb | Modeling method. See below for method-specific configuration options. |
+| `DFATOOL_MODEL` | cart, decart, fol, lgbm, lmt, **rmt**, symreg, uls, xgb | Modelling method. See below for method-specific configuration options. |
 | `DFATOOL_RMT_MAX_DEPTH` | **0** .. *n* | Maximum depth for RMT. Default (0): unlimited. |
-| `DFATOOL_RMT_SUBMODEL` | cart, fol, static, symreg, **uls** | Modeling method for RMT leaf functions. |
+| `DFATOOL_RMT_SUBMODEL` | cart, fol, static, symreg, **uls** | Modelling method for RMT leaf functions. |
 | `DFATOOL_PREPROCESSING_RELEVANCE_METHOD` | **none**, mi | Ignore parameters deemed irrelevant by the specified heuristic before passing them on to `DFATOOL_MODEL`. |
 | `DFATOOL_PREPROCESSING_RELEVANCE_THRESHOLD` | .. **0.1** .. | Threshold for relevance heuristic. |
 | `DFATOOL_CART_MAX_DEPTH` | **0** .. *n* | maximum depth for sklearn CART. Default (0): unlimited. |
@@ -133,8 +133,9 @@ The following variables may be set to alter the behaviour of dfatool components.
 | `DFATOOL_LMT_MIN_SAMPLES_LEAF` | 0.0 .. **0.1** .. 1.0, 3 .. *n* | Minimum samples that each leaf of a split candidate must contain. A value below 1.0 specifies a ratio of the total number of training samples. A value above 1 specifies an absolute number of samples. |
 | `DFATOOL_LMT_MAX_BINS` | 10 .. **120** | Number of bins used to determine optimal split. LMT default: 25. |
 | `DFATOOL_LMT_CRITERION` | **mse**, rmse, mae, poisson | Error metric to use when selecting best split. |
-| `DFATOOL_ULS_ERROR_METRIC` | **ssr**, rmsd, mae, … | Error metric to use when selecting best-fitting function during unsupervised least squares (ULS) regression. Least squares regression itself minimzes root mean square deviation (rmsd), hence the equivalent (but partitioning-compatible) sum of squared residuals (ssr) is the default. Supports all metrics accepted by `--error-metric`. |
+| `DFATOOL_ULS_ERROR_METRIC` | **ssr**, rmsd, **mae**, … | Error metric to use when selecting best-fitting function during unsupervised least squares (ULS) regression. By default, least squares regression minimzes root mean square deviation (rmsd), hence the equivalent (but partitioning-compatible) sum of squared residuals (ssr) is the default. If `DFATOOL_ULS_LOSS_FUNCTION` is set to another value than linear, the default is mean absolute error (mae). Supports all metrics accepted by `--error-metric`. |
 | `DFATOOL_ULS_FUNCTIONS` | a,b,… | List of function templates to use in ULS. Default: all supported functions. |
+| `DFATOOL_ULS_LOSS_FUNCTION` | **linear**', soft\_l1, … | Loss function for least squares fitting, see `scipy.optimize.least_squares#loss` documentation. |
 | `DFATOOL_ULS_MIN_DISTINCT_VALUES` | 2 .. **3** .. *n* | Minimum number of unique values a parameter must take to be eligible for ULS |
 | `DFATOOL_ULS_SKIP_CODEPENDENT_CHECK` | **0**, 1 | Do not detect and remove co-dependent features in ULS. |
 | `DFATOOL_ULS_MIN_BOUND` | **-∞** .. *n* | Lower bound for ULS regression variables. Setting it to 0 can often be beneficial. |
diff --git a/lib/behaviour.py b/lib/behaviour.py
index fed0f6d..136a55e 100644
--- a/lib/behaviour.py
+++ b/lib/behaviour.py
@@ -21,9 +21,14 @@ class SDKBehaviourModel:
             # annotation.start.param may be incomplete, for instance in cases
             # where DPUs are allocated before the input file is loadeed (and
             # thus before the problem size is known).
-            # Hence, we must use annotation.end.param whenever we deal
-            # with possibly problem size-dependent behaviour.
-            am_tt_param_names = sorted(annotation.end.param.keys())
+            # However, annotation.end.param may also differ from annotation.start.param (it should not, but that's how some benchmarks roll).
+            # So, we use annotation.start.param if it has the same keys as annotation.end.param, and annotation.end.param otherwise
+            if sorted(annotation.start.param.keys()) == sorted(
+                annotation.end.param.keys()
+            ):
+                am_tt_param_names = sorted(annotation.start.param.keys())
+            else:
+                am_tt_param_names = sorted(annotation.end.param.keys())
             if annotation.name not in delta_by_name:
                 delta_by_name[annotation.name] = dict()
                 delta_param_by_name[annotation.name] = dict()
@@ -146,6 +151,12 @@ class SDKBehaviourModel:
 
         total_latency_us = 0
 
+        if sorted(annotation.start.param.keys()) == sorted(annotation.end.param.keys()):
+            param_dict = annotation.start.param
+        else:
+            param_dict = annotation.end.param
+        param_str = utils.param_dict_to_str(param_dict)
+
         if annotation.kernels:
             # ggf. als dict of tuples, für den Fall dass Schleifen verschieden iterieren können?
             for i in range(prev_i, annotation.kernels[0].offset):
@@ -154,7 +165,7 @@ class SDKBehaviourModel:
                 if this in n_seen:
                     if n_seen[this] == 1:
                         logger.debug(
-                            f"Loop found in {annotation.start.name} {annotation.end.param}: {this} ⟳"
+                            f"Loop found in {annotation.start.name} {param_dict}: {this} ⟳"
                         )
                     n_seen[this] += 1
                 else:
@@ -164,16 +175,9 @@ class SDKBehaviourModel:
                     delta[prev] = set()
                 delta[prev].add(this)
 
-                # annotation.start.param may be incomplete, for instance in cases
-                # where DPUs are allocated before the input file is loadeed (and
-                # thus before the problem size is known).
-                # Hence, we must use annotation.end.param whenever we deal
-                # with possibly problem size-dependent behaviour.
                 if not (prev, this) in delta_param:
                     delta_param[(prev, this)] = set()
-                delta_param[(prev, this)].add(
-                    utils.param_dict_to_str(annotation.end.param)
-                )
+                delta_param[(prev, this)].add(param_str)
 
                 prev = this
                 prev_i = i + 1
@@ -183,7 +187,7 @@ class SDKBehaviourModel:
                 meta_observations.append(
                     {
                         "name": f"__trace__ {this}",
-                        "param": annotation.end.param,
+                        "param": param_dict,
                         "attribute": dict(
                             filter(
                                 lambda kv: not kv[0].startswith("e_"),
@@ -205,9 +209,7 @@ class SDKBehaviourModel:
 
                 if not (prev, this) in delta_param:
                     delta_param[(prev, this)] = set()
-                delta_param[(prev, this)].add(
-                    utils.param_dict_to_str(annotation.end.param)
-                )
+                delta_param[(prev, this)].add(param_str)
 
                 # The last iteration (next block) contains a single kernel,
                 # so we do not increase total_latency_us here.
@@ -225,7 +227,7 @@ class SDKBehaviourModel:
                 meta_observations.append(
                     {
                         "name": f"__trace__ {this}",
-                        "param": annotation.end.param,
+                        "param": param_dict,
                         "attribute": dict(
                             filter(
                                 lambda kv: not kv[0].startswith("e_"),
@@ -243,7 +245,7 @@ class SDKBehaviourModel:
             if this in n_seen:
                 if n_seen[this] == 1:
                     logger.debug(
-                        f"Loop found in {annotation.start.name} {annotation.end.param}: {this} ⟳"
+                        f"Loop found in {annotation.start.name} {param_dict}: {this} ⟳"
                     )
                 n_seen[this] += 1
             else:
@@ -255,7 +257,7 @@ class SDKBehaviourModel:
 
             if not (prev, this) in delta_param:
                 delta_param[(prev, this)] = set()
-            delta_param[(prev, this)].add(utils.param_dict_to_str(annotation.end.param))
+            delta_param[(prev, this)].add(param_str)
 
             total_latency_us += observations[i]["attribute"].get("latency_us", 0)
 
@@ -264,7 +266,7 @@ class SDKBehaviourModel:
             meta_observations.append(
                 {
                     "name": f"__trace__ {this}",
-                    "param": annotation.end.param,
+                    "param": param_dict,
                     "attribute": dict(
                         filter(
                             lambda kv: not kv[0].startswith("e_"),
@@ -279,15 +281,13 @@ class SDKBehaviourModel:
         delta[prev].add("__end__")
         if not (prev, "__end__") in delta_param:
             delta_param[(prev, "__end__")] = set()
-        delta_param[(prev, "__end__")].add(
-            utils.param_dict_to_str(annotation.end.param)
-        )
+        delta_param[(prev, "__end__")].add(param_str)
 
         for transition, count in n_seen.items():
             meta_observations.append(
                 {
                     "name": f"__loop__ {transition}",
-                    "param": annotation.end.param,
+                    "param": param_dict,
                     "attribute": {"n_iterations": count},
                 }
             )
@@ -296,7 +296,7 @@ class SDKBehaviourModel:
             meta_observations.append(
                 {
                     "name": annotation.start.name,
-                    "param": annotation.end.param,
+                    "param": param_dict,
                     "attribute": {"latency_us": total_latency_us},
                 }
             )
diff --git a/lib/functions.py b/lib/functions.py
index 35b04ef..b76814b 100644
--- a/lib/functions.py
+++ b/lib/functions.py
@@ -27,6 +27,7 @@ dfatool_rmt_relevance_threshold = float(
     os.getenv("DFATOOL_RMT_RELEVANCE_THRESHOLD", "0.5")
 )
 
+dfatool_uls_loss_fun = os.getenv("DFATOOL_ULS_LOSS_FUNCTION", "linear")
 dfatool_uls_min_bound = float(os.getenv("DFATOOL_ULS_MIN_BOUND", -np.inf))
 
 if dfatool_preproc_relevance_method == "mi":
@@ -1692,7 +1693,11 @@ class FOLFunction(SKLearnRegressionFunction):
         self.model_args = list(np.ones((num_vars)))
         try:
             res = optimize.least_squares(
-                error_function, self.model_args, args=(fit_parameters, data), xtol=2e-15
+                error_function,
+                self.model_args,
+                args=(fit_parameters, data),
+                xtol=2e-15,
+                loss=dfatool_uls_loss_fun,
             )
         except ValueError as err:
             logger.warning(f"Fit failed: {err} (function: {self.model_function})")
@@ -1955,6 +1960,7 @@ class AnalyticFunction(ModelFunction):
                     self.model_args,
                     args=(X, Y),
                     xtol=2e-15,
+                    loss=dfatool_uls_loss_fun,
                     bounds=(lower_bounds, upper_bounds),
                 )
             except ValueError as err:
diff --git a/lib/paramfit.py b/lib/paramfit.py
index 000aa9c..84eba2b 100644
--- a/lib/paramfit.py
+++ b/lib/paramfit.py
@@ -16,9 +16,14 @@ from .utils import (
 )
 
 logger = logging.getLogger(__name__)
-best_fit_metric = os.getenv("DFATOOL_ULS_ERROR_METRIC", "ssr")
+dfatool_uls_loss_fun = os.getenv("DFATOOL_ULS_LOSS_FUNCTION", "linear")
 dfatool_uls_min_bound = float(os.getenv("DFATOOL_ULS_MIN_BOUND", -np.inf))
 
+if dfatool_uls_loss_fun == "linear":
+    best_fit_metric = os.getenv("DFATOOL_ULS_ERROR_METRIC", "ssr")
+else:
+    best_fit_metric = os.getenv("DFATOOL_ULS_ERROR_METRIC", "mae")
+
 
 class ParamFit:
     """
@@ -222,6 +227,7 @@ def _try_fits(
                         ini,
                         args=(X, Y),
                         xtol=2e-15,
+                        loss=dfatool_uls_loss_fun,
                         bounds=param_function.bounds,
                     )
                 except FloatingPointError as e: