Add DFATOOL_ULS_LOSS_FUNCTION variablemain

author: Birte Kristina Friesel <birte.friesel@uos.de> 2025-07-01 11:31:35 +0200
committer: Birte Kristina Friesel <birte.friesel@uos.de> 2025-07-01 11:31:35 +0200
commit: d95c23431cfac423a19f7827155ae836cbbe558a (patch)
tree: ecbde25a8c7f49406e1deab7717c9f4046a6c7b2
parent: c9b2559aca435d65ab33a7bfaaa0ee0f9620596e (diff)
3 files changed, 16 insertions, 3 deletions
diff --git a/README.md b/README.md
index cb88eed..1a6cbfc 100644
--- a/README.md
+++ b/README.md
@@ -133,8 +133,9 @@ The following variables may be set to alter the behaviour of dfatool components.
 | `DFATOOL_LMT_MIN_SAMPLES_LEAF` | 0.0 .. **0.1** .. 1.0, 3 .. *n* | Minimum samples that each leaf of a split candidate must contain. A value below 1.0 specifies a ratio of the total number of training samples. A value above 1 specifies an absolute number of samples. |
 | `DFATOOL_LMT_MAX_BINS` | 10 .. **120** | Number of bins used to determine optimal split. LMT default: 25. |
 | `DFATOOL_LMT_CRITERION` | **mse**, rmse, mae, poisson | Error metric to use when selecting best split. |
-| `DFATOOL_ULS_ERROR_METRIC` | **ssr**, rmsd, mae, … | Error metric to use when selecting best-fitting function during unsupervised least squares (ULS) regression. Least squares regression itself minimzes root mean square deviation (rmsd), hence the equivalent (but partitioning-compatible) sum of squared residuals (ssr) is the default. Supports all metrics accepted by `--error-metric`. |
+| `DFATOOL_ULS_ERROR_METRIC` | **ssr**, rmsd, **mae**, … | Error metric to use when selecting best-fitting function during unsupervised least squares (ULS) regression. By default, least squares regression minimzes root mean square deviation (rmsd), hence the equivalent (but partitioning-compatible) sum of squared residuals (ssr) is the default. If `DFATOOL_ULS_LOSS_FUNCTION` is set to another value than linear, the default is mean absolute error (mae). Supports all metrics accepted by `--error-metric`. |
 | `DFATOOL_ULS_FUNCTIONS` | a,b,… | List of function templates to use in ULS. Default: all supported functions. |
+| `DFATOOL_ULS_LOSS_FUNCTION` | **linear**', soft\_l1, … | Loss function for least squares fitting, see `scipy.optimize.least_squares#loss` documentation. |
 | `DFATOOL_ULS_MIN_DISTINCT_VALUES` | 2 .. **3** .. *n* | Minimum number of unique values a parameter must take to be eligible for ULS |
 | `DFATOOL_ULS_SKIP_CODEPENDENT_CHECK` | **0**, 1 | Do not detect and remove co-dependent features in ULS. |
 | `DFATOOL_ULS_MIN_BOUND` | **-∞** .. *n* | Lower bound for ULS regression variables. Setting it to 0 can often be beneficial. |
diff --git a/lib/functions.py b/lib/functions.py
index 35b04ef..b76814b 100644
--- a/lib/functions.py
+++ b/lib/functions.py
@@ -27,6 +27,7 @@ dfatool_rmt_relevance_threshold = float(
     os.getenv("DFATOOL_RMT_RELEVANCE_THRESHOLD", "0.5")
 )
 
+dfatool_uls_loss_fun = os.getenv("DFATOOL_ULS_LOSS_FUNCTION", "linear")
 dfatool_uls_min_bound = float(os.getenv("DFATOOL_ULS_MIN_BOUND", -np.inf))
 
 if dfatool_preproc_relevance_method == "mi":
@@ -1692,7 +1693,11 @@ class FOLFunction(SKLearnRegressionFunction):
         self.model_args = list(np.ones((num_vars)))
         try:
             res = optimize.least_squares(
-                error_function, self.model_args, args=(fit_parameters, data), xtol=2e-15
+                error_function,
+                self.model_args,
+                args=(fit_parameters, data),
+                xtol=2e-15,
+                loss=dfatool_uls_loss_fun,
             )
         except ValueError as err:
             logger.warning(f"Fit failed: {err} (function: {self.model_function})")
@@ -1955,6 +1960,7 @@ class AnalyticFunction(ModelFunction):
                     self.model_args,
                     args=(X, Y),
                     xtol=2e-15,
+                    loss=dfatool_uls_loss_fun,
                     bounds=(lower_bounds, upper_bounds),
                 )
             except ValueError as err:
diff --git a/lib/paramfit.py b/lib/paramfit.py
index 000aa9c..84eba2b 100644
--- a/lib/paramfit.py
+++ b/lib/paramfit.py
@@ -16,9 +16,14 @@ from .utils import (
 )
 
 logger = logging.getLogger(__name__)
-best_fit_metric = os.getenv("DFATOOL_ULS_ERROR_METRIC", "ssr")
+dfatool_uls_loss_fun = os.getenv("DFATOOL_ULS_LOSS_FUNCTION", "linear")
 dfatool_uls_min_bound = float(os.getenv("DFATOOL_ULS_MIN_BOUND", -np.inf))
 
+if dfatool_uls_loss_fun == "linear":
+    best_fit_metric = os.getenv("DFATOOL_ULS_ERROR_METRIC", "ssr")
+else:
+    best_fit_metric = os.getenv("DFATOOL_ULS_ERROR_METRIC", "mae")
+
 
 class ParamFit:
     """
@@ -222,6 +227,7 @@ def _try_fits(
                         ini,
                         args=(X, Y),
                         xtol=2e-15,
+                        loss=dfatool_uls_loss_fun,
                         bounds=param_function.bounds,
                     )
                 except FloatingPointError as e:
author	Birte Kristina Friesel <birte.friesel@uos.de>	2025-07-01 11:31:35 +0200
committer	Birte Kristina Friesel <birte.friesel@uos.de>	2025-07-01 11:31:35 +0200
commit	d95c23431cfac423a19f7827155ae836cbbe558a (patch)
tree	ecbde25a8c7f49406e1deab7717c9f4046a6c7b2
parent	c9b2559aca435d65ab33a7bfaaa0ee0f9620596e (diff)