diff options
author | Birte Kristina Friesel <birte.friesel@uos.de> | 2024-02-21 12:35:45 +0100 |
---|---|---|
committer | Birte Kristina Friesel <birte.friesel@uos.de> | 2024-02-21 12:35:45 +0100 |
commit | d5950d6f31de5403ed61124d799cd0fafe491b06 (patch) | |
tree | 8c38e4001013cb13c960922fe68b33b2b389df96 | |
parent | 5d83f255f05c3b74df0ace1f70b260959b392eca (diff) |
Replace DFATOOL_FIT_FOL with DFATOOL_MODEL=fol
-rw-r--r-- | README.md | 3 | ||||
-rw-r--r-- | doc/modeling-method.md | 5 | ||||
-rw-r--r-- | lib/model.py | 5 | ||||
-rw-r--r-- | lib/parameters.py | 75 |
4 files changed, 47 insertions, 41 deletions
@@ -111,7 +111,7 @@ The following variables may be set to alter the behaviour of dfatool components. | `DFATOOL_KCONF_WITH_CHOICE_NODES` | 0, **1** | Treat kconfig choices (e.g. "choice Model → MobileNet / ResNet / Inception") as enum parameters. If enabled, the corresponding boolean kconfig variables (e.g. "Model\_MobileNet") are not converted to parameters. If disabled, all (and only) boolean kconfig variables are treated as parameters. Mostly relevant for analyze-kconfig, eval-kconfig | | `DFATOOL_COMPENSATE_DRIFT` | **0**, 1 | Perform drift compensation for loaders without sync input (e.g. EnergyTrace or Keysight) | | `DFATOOL_DRIFT_COMPENSATION_PENALTY` | 0 .. 100 (default: majority vote over several penalties) | Specify penalty for ruptures.py PELT changepoint petection | -| `DFATOOL_MODEL` | cart, decart, lmt, **rmt**, xgb | Modeling method. See below for method-specific configuration options. | +| `DFATOOL_MODEL` | cart, decart, fol, lmt, **rmt**, xgb | Modeling method. See below for method-specific configuration options. | | `DFATOOL_DTREE_ENABLED` | 0, **1** | Use decision trees in get\_fitted | | `DFATOOL_DTREE_FUNCTION_LEAVES` | 0, **1** | Use functions (fitted via linear regression) in decision tree leaves when modeling numeric parameters with at least three distinct values. If 0, integer parameters are treated as enums instead. | | `DFATOOL_CART_MAX_DEPTH` | **0** .. *n* | maximum depth for sklearn CART. Default (0): unlimited. | @@ -140,7 +140,6 @@ The following variables may be set to alter the behaviour of dfatool components. | `DFATOOL_PARAM_RELEVANCE_THRESHOLD` | 0 .. **0.5** .. 1 | Threshold for relevant parameter detection: parameter *i* is relevant if mean standard deviation (data partitioned by all parameters) / mean standard deviation (data partition by all parameters but *i*) is less than threshold | | `DFATOOL_DTREE_LOSS_IGNORE_SCALAR` | **0**, 1 | Ignore scalar parameters when computing the loss for split node candidates. Instead of computing the loss of a single partition for each `x_i == j`, compute the loss of partitions for `x_i == j` in which non-scalar parameters vary and scalar parameters are constant. This way, scalar parameters do not affect the decision about which non-scalar parameter to use for splitting. | | `DFATOOL_PARAM_CATEGORICAL_TO_SCALAR` | **0**, 1 | Some models (e.g. FOL, sklearn CART, XGBoost) do not support categorical parameters. Ignore them (0) or convert them to scalar indexes (1). Conversion uses lexical order. | -| `DFATOOL_FIT_FOL` | **0**, 1 | Build a first-order linear function (i.e., a * param1 + b * param2 + ...) instead of more complex functions or tree structures. Must not be combined with `--force-tree`. | | `DFATOOL_FOL_SECOND_ORDER` | **0**, 1 | Add second-order components (interaction of feature pairs) to first-order linear function. | ## Examples diff --git a/doc/modeling-method.md b/doc/modeling-method.md index 58fe03b..7a4c635 100644 --- a/doc/modeling-method.md +++ b/doc/modeling-method.md @@ -14,6 +14,10 @@ sklearn CART ("Decision Tree Regression") algorithm. Uses binary nodes and suppo sklearn CART ("Decision Tree Regression") algorithm. Ignores scalar parameters, thus emulating the DECART algorithm. +## FOL (First-Order Linear function) + +Build a first-order linear function (i.e., a * param1 + b * param2 + ...). + ## LMT (Linear Model Trees) [Linear Model Tree](https://github.com/cerlymarco/linear-tree) algorithm. Uses binary nodes and linear functions. @@ -41,7 +45,6 @@ All of these are valid regression model trees. * `--force-tree` builds a tree structure even if dfatool's heuristic indicates that no non-integer parameter affects the modeled performance attribute. * `DFATOOL_DTREE_IGNORE_IRRELEVANT_PARAMS=0` disables the relevant parameter detection heuristic when building the tree structure. By default, irrelevant parameters cannot end up as decision nodes. * `DFATOOL_FIT_LINEAR_ONLY=1` makes RMT behave more like LMT by only considering linear functions in leaf nodes. -* `DFATOOL_FIT_FOL=1` * `DFATOOL_PARAM_CATEGORICAL_TO_SCALAR=1` * `DFATOOL_ULS_SKIP_CODEPENDENT_CHECK=1` * `DFATOOL_REGRESSION_SAFE_FUNCTIONS=1` diff --git a/lib/model.py b/lib/model.py index 972547d..6718090 100644 --- a/lib/model.py +++ b/lib/model.py @@ -304,6 +304,8 @@ class AnalyticModel: self.attr_by_name[name][attr].build_cart() elif model_type == "decart": self.attr_by_name[name][attr].build_decart() + elif model_type == "fol": + self.attr_by_name[name][attr].build_fol() elif model_type == "lmt": self.attr_by_name[name][attr].build_lmt() elif model_type == "xgb": @@ -332,7 +334,6 @@ class AnalyticModel: else: paramfit = ParamFit() tree_allowed = bool(int(os.getenv("DFATOOL_DTREE_ENABLED", "1"))) - use_fol = bool(int(os.getenv("DFATOOL_FIT_FOL", "0"))) use_symreg = bool(int(os.getenv("DFATOOL_FIT_SYMREG", "0"))) tree_required = dict() @@ -341,8 +342,6 @@ class AnalyticModel: for attr in self.attr_by_name[name].keys(): if self.attr_by_name[name][attr].function_override is not None: self.attr_by_name[name][attr].fit_override_function() - elif use_fol: - self.attr_by_name[name][attr].build_fol_model() elif use_symreg: self.attr_by_name[name][attr].build_symreg_model() elif self.attr_by_name[name][ diff --git a/lib/parameters.py b/lib/parameters.py index fa85b7a..d3d2659 100644 --- a/lib/parameters.py +++ b/lib/parameters.py @@ -830,35 +830,6 @@ class ModelAttribute: return False return True - def build_fol_model(self): - ignore_irrelevant = bool( - int(os.getenv("DFATOOL_DTREE_IGNORE_IRRELEVANT_PARAMS", "0")) - ) - ignore_param_indexes = list() - if ignore_irrelevant: - for param_index, param in enumerate(self.param_names): - if not self.stats.depends_on_param(param): - ignore_param_indexes.append(param_index) - if not self.stats: - logger.warning( - "build_fol_model called with ModelAttribute.stats unavailable -- overfitting likely" - ) - else: - for param_index, _ in enumerate(self.param_names): - if len(self.stats.distinct_values_by_param_index[param_index]) < 2: - ignore_param_indexes.append(param_index) - x = df.FOLFunction( - self.median, - self.param_names, - n_samples=self.data.shape[0], - num_args=self.arg_count, - ) - x.fit(self.param_values, self.data, ignore_param_indexes=ignore_param_indexes) - if x.fit_success: - self.model_function = x - else: - logger.warning(f"Fit of first-order linear model function failed.") - def build_symreg_model(self): ignore_irrelevant = bool( int(os.getenv("DFATOOL_DTREE_IGNORE_IRRELEVANT_PARAMS", "0")) @@ -963,8 +934,42 @@ class ModelAttribute: ) return False - def build_xgb(self): - mf = df.XGBoostFunction( + def build_fol(self): + ignore_irrelevant = bool( + int(os.getenv("DFATOOL_DTREE_IGNORE_IRRELEVANT_PARAMS", "0")) + ) + ignore_param_indexes = list() + if ignore_irrelevant: + for param_index, param in enumerate(self.param_names): + if not self.stats.depends_on_param(param): + ignore_param_indexes.append(param_index) + if not self.stats: + logger.warning( + "build_fol_model called with ModelAttribute.stats unavailable -- overfitting likely" + ) + else: + for param_index, _ in enumerate(self.param_names): + if len(self.stats.distinct_values_by_param_index[param_index]) < 2: + ignore_param_indexes.append(param_index) + x = df.FOLFunction( + self.median, + self.param_names, + n_samples=self.data.shape[0], + num_args=self.arg_count, + ) + x.fit(self.param_values, self.data, ignore_param_indexes=ignore_param_indexes) + if x.fit_success: + self.model_function = x + return True + else: + logger.warning(f"Fit of first-order linear model function failed.") + self.model_function = df.StaticFunction( + np.mean(self.data), n_samples=len(self.data) + ) + return False + + def build_lmt(self): + mf = df.LMTFunction( np.mean(self.data), n_samples=len(self.data), param_names=self.param_names, @@ -975,14 +980,14 @@ class ModelAttribute: self.model_function = mf return True else: - logger.warning(f"XGB generation for {self.name} {self.attr} faled") + logger.warning(f"LMT generation for {self.name} {self.attr} faled") self.model_function = df.StaticFunction( np.mean(self.data), n_samples=len(self.data) ) return False - def build_lmt(self): - mf = df.LMTFunction( + def build_xgb(self): + mf = df.XGBoostFunction( np.mean(self.data), n_samples=len(self.data), param_names=self.param_names, @@ -993,7 +998,7 @@ class ModelAttribute: self.model_function = mf return True else: - logger.warning(f"LMT generation for {self.name} {self.attr} faled") + logger.warning(f"XGB generation for {self.name} {self.attr} faled") self.model_function = df.StaticFunction( np.mean(self.data), n_samples=len(self.data) ) |