summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBirte Kristina Friesel <birte.friesel@uos.de>2024-02-21 12:35:45 +0100
committerBirte Kristina Friesel <birte.friesel@uos.de>2024-02-21 12:35:45 +0100
commitd5950d6f31de5403ed61124d799cd0fafe491b06 (patch)
tree8c38e4001013cb13c960922fe68b33b2b389df96
parent5d83f255f05c3b74df0ace1f70b260959b392eca (diff)
Replace DFATOOL_FIT_FOL with DFATOOL_MODEL=fol
-rw-r--r--README.md3
-rw-r--r--doc/modeling-method.md5
-rw-r--r--lib/model.py5
-rw-r--r--lib/parameters.py75
4 files changed, 47 insertions, 41 deletions
diff --git a/README.md b/README.md
index 8690959..b99a54e 100644
--- a/README.md
+++ b/README.md
@@ -111,7 +111,7 @@ The following variables may be set to alter the behaviour of dfatool components.
| `DFATOOL_KCONF_WITH_CHOICE_NODES` | 0, **1** | Treat kconfig choices (e.g. "choice Model → MobileNet / ResNet / Inception") as enum parameters. If enabled, the corresponding boolean kconfig variables (e.g. "Model\_MobileNet") are not converted to parameters. If disabled, all (and only) boolean kconfig variables are treated as parameters. Mostly relevant for analyze-kconfig, eval-kconfig |
| `DFATOOL_COMPENSATE_DRIFT` | **0**, 1 | Perform drift compensation for loaders without sync input (e.g. EnergyTrace or Keysight) |
| `DFATOOL_DRIFT_COMPENSATION_PENALTY` | 0 .. 100 (default: majority vote over several penalties) | Specify penalty for ruptures.py PELT changepoint petection |
-| `DFATOOL_MODEL` | cart, decart, lmt, **rmt**, xgb | Modeling method. See below for method-specific configuration options. |
+| `DFATOOL_MODEL` | cart, decart, fol, lmt, **rmt**, xgb | Modeling method. See below for method-specific configuration options. |
| `DFATOOL_DTREE_ENABLED` | 0, **1** | Use decision trees in get\_fitted |
| `DFATOOL_DTREE_FUNCTION_LEAVES` | 0, **1** | Use functions (fitted via linear regression) in decision tree leaves when modeling numeric parameters with at least three distinct values. If 0, integer parameters are treated as enums instead. |
| `DFATOOL_CART_MAX_DEPTH` | **0** .. *n* | maximum depth for sklearn CART. Default (0): unlimited. |
@@ -140,7 +140,6 @@ The following variables may be set to alter the behaviour of dfatool components.
| `DFATOOL_PARAM_RELEVANCE_THRESHOLD` | 0 .. **0.5** .. 1 | Threshold for relevant parameter detection: parameter *i* is relevant if mean standard deviation (data partitioned by all parameters) / mean standard deviation (data partition by all parameters but *i*) is less than threshold |
| `DFATOOL_DTREE_LOSS_IGNORE_SCALAR` | **0**, 1 | Ignore scalar parameters when computing the loss for split node candidates. Instead of computing the loss of a single partition for each `x_i == j`, compute the loss of partitions for `x_i == j` in which non-scalar parameters vary and scalar parameters are constant. This way, scalar parameters do not affect the decision about which non-scalar parameter to use for splitting. |
| `DFATOOL_PARAM_CATEGORICAL_TO_SCALAR` | **0**, 1 | Some models (e.g. FOL, sklearn CART, XGBoost) do not support categorical parameters. Ignore them (0) or convert them to scalar indexes (1). Conversion uses lexical order. |
-| `DFATOOL_FIT_FOL` | **0**, 1 | Build a first-order linear function (i.e., a * param1 + b * param2 + ...) instead of more complex functions or tree structures. Must not be combined with `--force-tree`. |
| `DFATOOL_FOL_SECOND_ORDER` | **0**, 1 | Add second-order components (interaction of feature pairs) to first-order linear function. |
## Examples
diff --git a/doc/modeling-method.md b/doc/modeling-method.md
index 58fe03b..7a4c635 100644
--- a/doc/modeling-method.md
+++ b/doc/modeling-method.md
@@ -14,6 +14,10 @@ sklearn CART ("Decision Tree Regression") algorithm. Uses binary nodes and suppo
sklearn CART ("Decision Tree Regression") algorithm. Ignores scalar parameters, thus emulating the DECART algorithm.
+## FOL (First-Order Linear function)
+
+Build a first-order linear function (i.e., a * param1 + b * param2 + ...).
+
## LMT (Linear Model Trees)
[Linear Model Tree](https://github.com/cerlymarco/linear-tree) algorithm. Uses binary nodes and linear functions.
@@ -41,7 +45,6 @@ All of these are valid regression model trees.
* `--force-tree` builds a tree structure even if dfatool's heuristic indicates that no non-integer parameter affects the modeled performance attribute.
* `DFATOOL_DTREE_IGNORE_IRRELEVANT_PARAMS=0` disables the relevant parameter detection heuristic when building the tree structure. By default, irrelevant parameters cannot end up as decision nodes.
* `DFATOOL_FIT_LINEAR_ONLY=1` makes RMT behave more like LMT by only considering linear functions in leaf nodes.
-* `DFATOOL_FIT_FOL=1`
* `DFATOOL_PARAM_CATEGORICAL_TO_SCALAR=1`
* `DFATOOL_ULS_SKIP_CODEPENDENT_CHECK=1`
* `DFATOOL_REGRESSION_SAFE_FUNCTIONS=1`
diff --git a/lib/model.py b/lib/model.py
index 972547d..6718090 100644
--- a/lib/model.py
+++ b/lib/model.py
@@ -304,6 +304,8 @@ class AnalyticModel:
self.attr_by_name[name][attr].build_cart()
elif model_type == "decart":
self.attr_by_name[name][attr].build_decart()
+ elif model_type == "fol":
+ self.attr_by_name[name][attr].build_fol()
elif model_type == "lmt":
self.attr_by_name[name][attr].build_lmt()
elif model_type == "xgb":
@@ -332,7 +334,6 @@ class AnalyticModel:
else:
paramfit = ParamFit()
tree_allowed = bool(int(os.getenv("DFATOOL_DTREE_ENABLED", "1")))
- use_fol = bool(int(os.getenv("DFATOOL_FIT_FOL", "0")))
use_symreg = bool(int(os.getenv("DFATOOL_FIT_SYMREG", "0")))
tree_required = dict()
@@ -341,8 +342,6 @@ class AnalyticModel:
for attr in self.attr_by_name[name].keys():
if self.attr_by_name[name][attr].function_override is not None:
self.attr_by_name[name][attr].fit_override_function()
- elif use_fol:
- self.attr_by_name[name][attr].build_fol_model()
elif use_symreg:
self.attr_by_name[name][attr].build_symreg_model()
elif self.attr_by_name[name][
diff --git a/lib/parameters.py b/lib/parameters.py
index fa85b7a..d3d2659 100644
--- a/lib/parameters.py
+++ b/lib/parameters.py
@@ -830,35 +830,6 @@ class ModelAttribute:
return False
return True
- def build_fol_model(self):
- ignore_irrelevant = bool(
- int(os.getenv("DFATOOL_DTREE_IGNORE_IRRELEVANT_PARAMS", "0"))
- )
- ignore_param_indexes = list()
- if ignore_irrelevant:
- for param_index, param in enumerate(self.param_names):
- if not self.stats.depends_on_param(param):
- ignore_param_indexes.append(param_index)
- if not self.stats:
- logger.warning(
- "build_fol_model called with ModelAttribute.stats unavailable -- overfitting likely"
- )
- else:
- for param_index, _ in enumerate(self.param_names):
- if len(self.stats.distinct_values_by_param_index[param_index]) < 2:
- ignore_param_indexes.append(param_index)
- x = df.FOLFunction(
- self.median,
- self.param_names,
- n_samples=self.data.shape[0],
- num_args=self.arg_count,
- )
- x.fit(self.param_values, self.data, ignore_param_indexes=ignore_param_indexes)
- if x.fit_success:
- self.model_function = x
- else:
- logger.warning(f"Fit of first-order linear model function failed.")
-
def build_symreg_model(self):
ignore_irrelevant = bool(
int(os.getenv("DFATOOL_DTREE_IGNORE_IRRELEVANT_PARAMS", "0"))
@@ -963,8 +934,42 @@ class ModelAttribute:
)
return False
- def build_xgb(self):
- mf = df.XGBoostFunction(
+ def build_fol(self):
+ ignore_irrelevant = bool(
+ int(os.getenv("DFATOOL_DTREE_IGNORE_IRRELEVANT_PARAMS", "0"))
+ )
+ ignore_param_indexes = list()
+ if ignore_irrelevant:
+ for param_index, param in enumerate(self.param_names):
+ if not self.stats.depends_on_param(param):
+ ignore_param_indexes.append(param_index)
+ if not self.stats:
+ logger.warning(
+ "build_fol_model called with ModelAttribute.stats unavailable -- overfitting likely"
+ )
+ else:
+ for param_index, _ in enumerate(self.param_names):
+ if len(self.stats.distinct_values_by_param_index[param_index]) < 2:
+ ignore_param_indexes.append(param_index)
+ x = df.FOLFunction(
+ self.median,
+ self.param_names,
+ n_samples=self.data.shape[0],
+ num_args=self.arg_count,
+ )
+ x.fit(self.param_values, self.data, ignore_param_indexes=ignore_param_indexes)
+ if x.fit_success:
+ self.model_function = x
+ return True
+ else:
+ logger.warning(f"Fit of first-order linear model function failed.")
+ self.model_function = df.StaticFunction(
+ np.mean(self.data), n_samples=len(self.data)
+ )
+ return False
+
+ def build_lmt(self):
+ mf = df.LMTFunction(
np.mean(self.data),
n_samples=len(self.data),
param_names=self.param_names,
@@ -975,14 +980,14 @@ class ModelAttribute:
self.model_function = mf
return True
else:
- logger.warning(f"XGB generation for {self.name} {self.attr} faled")
+ logger.warning(f"LMT generation for {self.name} {self.attr} faled")
self.model_function = df.StaticFunction(
np.mean(self.data), n_samples=len(self.data)
)
return False
- def build_lmt(self):
- mf = df.LMTFunction(
+ def build_xgb(self):
+ mf = df.XGBoostFunction(
np.mean(self.data),
n_samples=len(self.data),
param_names=self.param_names,
@@ -993,7 +998,7 @@ class ModelAttribute:
self.model_function = mf
return True
else:
- logger.warning(f"LMT generation for {self.name} {self.attr} faled")
+ logger.warning(f"XGB generation for {self.name} {self.attr} faled")
self.model_function = df.StaticFunction(
np.mean(self.data), n_samples=len(self.data)
)