diff options
author | Birte Kristina Friesel <birte.friesel@uos.de> | 2024-01-10 13:47:54 +0100 |
---|---|---|
committer | Birte Kristina Friesel <birte.friesel@uos.de> | 2024-01-10 13:47:54 +0100 |
commit | 34e1e445a1c5be892b436632245659f93a36f271 (patch) | |
tree | aa8cbe8fe4b23363c94e3ef65eb33e55abbf1551 | |
parent | e77adfcc3cc0a173108aa182ed7f71a5be7e5408 (diff) |
dot export: add xgboost support and documentation
-rw-r--r-- | doc/model-visual.md | 11 | ||||
-rw-r--r-- | lib/cli.py | 8 | ||||
-rw-r--r-- | lib/parameters.py | 71 |
3 files changed, 49 insertions, 41 deletions
diff --git a/doc/model-visual.md b/doc/model-visual.md index 4f629d1..7830421 100644 --- a/doc/model-visual.md +++ b/doc/model-visual.md @@ -5,3 +5,14 @@ In low- to medium-complexity applications, the simplest way of examining the performance models generated by dfatool is textual output: `--show-model=param`. This is implemented for most modeling methods. + +## Graphical Output via dot(1) + +`--export-dot PREFIX` exports the generated performance models for each pair of +name (state/transition/…) and performance attribute to +`PREFIX(name)-(attribute).dot`. The dot(1) program is capable of transforming +those into images. For instance, if feh(1) is available, a visual model is +accessible via `dot -Tpng filename.dot | feh -`. + +In case of regression forests (XGBoost), dfatool exports the individual trees to +`PREFIX(name)-(attribute).(index).dot`. @@ -270,6 +270,14 @@ def export_dot(model, dot_prefix): dot_model = model.attr_by_name[name][attribute].to_dot() if dot_model is None: logger.debug(f"{name} {attribute} does not have a dot model") + elif type(dot_model) is list: + # A Forest + for i, tree in enumerate(dot_model): + filename = f"{dot_prefix}{name}-{attribute}.{i:03d}.dot" + with open(filename, "w") as f: + print(tree, file=f) + filename = filename.replace(f".{len(dot_model)-1:03d}.", ".*.") + logger.info(f"Dot exports of model saved to {filename}") else: filename = f"{dot_prefix}{name}-{attribute}.dot" with open(filename, "w") as f: diff --git a/lib/parameters.py b/lib/parameters.py index e6d5561..9a0171b 100644 --- a/lib/parameters.py +++ b/lib/parameters.py @@ -688,57 +688,46 @@ class ModelAttribute: graph = pydot.Dot("Regression Model Tree", graph_type="graph") self.model_function.to_dot(pydot, graph, self.param_names) return graph - if type(self.model_function) == df.CARTFunction: - import sklearn.tree - feature_names = list( - map( - lambda i: self.param_names[i], - filter( - lambda i: not self.model_function.ignore_index[i], - range(len(self.param_names)), - ), - ) + feature_names = list( + map( + lambda i: self.param_names[i], + filter( + lambda i: not self.model_function.ignore_index[i], + range(len(self.param_names)), + ), ) - feature_names += list( - map( - lambda i: f"arg{i-len(self.param_names)}", - filter( - lambda i: not self.model_function.ignore_index[i], - range( - len(self.param_names), - len(self.param_names) + self.arg_count, - ), + ) + feature_names += list( + map( + lambda i: f"arg{i-len(self.param_names)}", + filter( + lambda i: not self.model_function.ignore_index[i], + range( + len(self.param_names), + len(self.param_names) + self.arg_count, ), - ) + ), ) + ) + + if type(self.model_function) == df.CARTFunction: + import sklearn.tree + return sklearn.tree.export_graphviz( self.model_function.regressor, out_file=None, feature_names=feature_names, ) + if type(self.model_function) == df.XGBoostFunction: + import xgboost + + self.model_function.regressor.get_booster().feature_names = feature_names + return [ + xgboost.to_graphviz(self.model_function.regressor, num_trees=i) + for i in range(self.model_function.regressor.n_estimators) + ] if type(self.model_function) == df.LMTFunction: - feature_names = list( - map( - lambda i: self.param_names[i], - filter( - lambda i: not self.model_function.ignore_index[i], - range(len(self.param_names)), - ), - ) - ) - feature_names += list( - map( - lambda i: f"arg{i-len(self.param_names)}", - filter( - lambda i: not self.model_function.ignore_index[i], - range( - len(self.param_names), - len(self.param_names) + self.arg_count, - ), - ), - ) - ) return self.model_function.regressor.model_to_dot( feature_names=feature_names ) |