summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBirte Kristina Friesel <birte.friesel@uos.de>2024-01-10 13:47:54 +0100
committerBirte Kristina Friesel <birte.friesel@uos.de>2024-01-10 13:47:54 +0100
commit34e1e445a1c5be892b436632245659f93a36f271 (patch)
treeaa8cbe8fe4b23363c94e3ef65eb33e55abbf1551
parente77adfcc3cc0a173108aa182ed7f71a5be7e5408 (diff)
dot export: add xgboost support and documentation
-rw-r--r--doc/model-visual.md11
-rw-r--r--lib/cli.py8
-rw-r--r--lib/parameters.py71
3 files changed, 49 insertions, 41 deletions
diff --git a/doc/model-visual.md b/doc/model-visual.md
index 4f629d1..7830421 100644
--- a/doc/model-visual.md
+++ b/doc/model-visual.md
@@ -5,3 +5,14 @@
In low- to medium-complexity applications, the simplest way of examining the
performance models generated by dfatool is textual output: `--show-model=param`.
This is implemented for most modeling methods.
+
+## Graphical Output via dot(1)
+
+`--export-dot PREFIX` exports the generated performance models for each pair of
+name (state/transition/…) and performance attribute to
+`PREFIX(name)-(attribute).dot`. The dot(1) program is capable of transforming
+those into images. For instance, if feh(1) is available, a visual model is
+accessible via `dot -Tpng filename.dot | feh -`.
+
+In case of regression forests (XGBoost), dfatool exports the individual trees to
+`PREFIX(name)-(attribute).(index).dot`.
diff --git a/lib/cli.py b/lib/cli.py
index 011155b..f296cff 100644
--- a/lib/cli.py
+++ b/lib/cli.py
@@ -270,6 +270,14 @@ def export_dot(model, dot_prefix):
dot_model = model.attr_by_name[name][attribute].to_dot()
if dot_model is None:
logger.debug(f"{name} {attribute} does not have a dot model")
+ elif type(dot_model) is list:
+ # A Forest
+ for i, tree in enumerate(dot_model):
+ filename = f"{dot_prefix}{name}-{attribute}.{i:03d}.dot"
+ with open(filename, "w") as f:
+ print(tree, file=f)
+ filename = filename.replace(f".{len(dot_model)-1:03d}.", ".*.")
+ logger.info(f"Dot exports of model saved to {filename}")
else:
filename = f"{dot_prefix}{name}-{attribute}.dot"
with open(filename, "w") as f:
diff --git a/lib/parameters.py b/lib/parameters.py
index e6d5561..9a0171b 100644
--- a/lib/parameters.py
+++ b/lib/parameters.py
@@ -688,57 +688,46 @@ class ModelAttribute:
graph = pydot.Dot("Regression Model Tree", graph_type="graph")
self.model_function.to_dot(pydot, graph, self.param_names)
return graph
- if type(self.model_function) == df.CARTFunction:
- import sklearn.tree
- feature_names = list(
- map(
- lambda i: self.param_names[i],
- filter(
- lambda i: not self.model_function.ignore_index[i],
- range(len(self.param_names)),
- ),
- )
+ feature_names = list(
+ map(
+ lambda i: self.param_names[i],
+ filter(
+ lambda i: not self.model_function.ignore_index[i],
+ range(len(self.param_names)),
+ ),
)
- feature_names += list(
- map(
- lambda i: f"arg{i-len(self.param_names)}",
- filter(
- lambda i: not self.model_function.ignore_index[i],
- range(
- len(self.param_names),
- len(self.param_names) + self.arg_count,
- ),
+ )
+ feature_names += list(
+ map(
+ lambda i: f"arg{i-len(self.param_names)}",
+ filter(
+ lambda i: not self.model_function.ignore_index[i],
+ range(
+ len(self.param_names),
+ len(self.param_names) + self.arg_count,
),
- )
+ ),
)
+ )
+
+ if type(self.model_function) == df.CARTFunction:
+ import sklearn.tree
+
return sklearn.tree.export_graphviz(
self.model_function.regressor,
out_file=None,
feature_names=feature_names,
)
+ if type(self.model_function) == df.XGBoostFunction:
+ import xgboost
+
+ self.model_function.regressor.get_booster().feature_names = feature_names
+ return [
+ xgboost.to_graphviz(self.model_function.regressor, num_trees=i)
+ for i in range(self.model_function.regressor.n_estimators)
+ ]
if type(self.model_function) == df.LMTFunction:
- feature_names = list(
- map(
- lambda i: self.param_names[i],
- filter(
- lambda i: not self.model_function.ignore_index[i],
- range(len(self.param_names)),
- ),
- )
- )
- feature_names += list(
- map(
- lambda i: f"arg{i-len(self.param_names)}",
- filter(
- lambda i: not self.model_function.ignore_index[i],
- range(
- len(self.param_names),
- len(self.param_names) + self.arg_count,
- ),
- ),
- )
- )
return self.model_function.regressor.model_to_dot(
feature_names=feature_names
)