summaryrefslogtreecommitdiff
path: root/lib/lineartree/lineartree.py
diff options
context:
space:
mode:
Diffstat (limited to 'lib/lineartree/lineartree.py')
-rw-r--r--lib/lineartree/lineartree.py1589
1 files changed, 1589 insertions, 0 deletions
diff --git a/lib/lineartree/lineartree.py b/lib/lineartree/lineartree.py
new file mode 100644
index 0000000..0c8f2d9
--- /dev/null
+++ b/lib/lineartree/lineartree.py
@@ -0,0 +1,1589 @@
+#!/usr/bin/env python3
+# Copyright (c) 2021 Marco Cerliani, MIT License <https://github.com/cerlymarco/linear-tree>
+
+import numpy as np
+
+from sklearn.base import ClassifierMixin, RegressorMixin
+
+from sklearn.utils import check_array
+from sklearn.utils.validation import check_is_fitted, _check_sample_weight
+
+from ._classes import _predict_branch
+from ._classes import _LinearTree, _LinearBoosting, _LinearForest
+
+
+class LinearTreeRegressor(_LinearTree, RegressorMixin):
+ """A Linear Tree Regressor.
+
+ A Linear Tree Regressor is a meta-estimator that combine the learning
+ ability of Decision Tree and the predictive power of Linear Models.
+ Like in tree-based algorithms, the received data are splitted according
+ simple decision rules. The goodness of slits is evaluated in gain terms
+ fitting linear models in each node. This implies that the models in the
+ leaves are linear instead of constant approximations like in classical
+ Decision Tree.
+
+ Parameters
+ ----------
+ base_estimator : object
+ The base estimator to fit on dataset splits.
+ The base estimator must be a sklearn.linear_model.
+
+ criterion : {"mse", "rmse", "mae", "poisson"}, default="mse"
+ The function to measure the quality of a split. "poisson"
+ requires ``y >= 0``.
+
+ max_depth : int, default=5
+ The maximum depth of the tree considering only the splitting nodes.
+ A higher value implies a higher training time.
+
+ min_samples_split : int or float, default=6
+ The minimum number of samples required to split an internal node.
+ The minimum valid number of samples in each node is 6.
+ A lower value implies a higher training time.
+ - If int, then consider `min_samples_split` as the minimum number.
+ - If float, then `min_samples_split` is a fraction and
+ `ceil(min_samples_split * n_samples)` are the minimum
+ number of samples for each split.
+
+ min_samples_leaf : int or float, default=0.1
+ The minimum number of samples required to be at a leaf node.
+ A split point at any depth will only be considered if it leaves at
+ least `min_samples_leaf` training samples in each of the left and
+ right branches.
+ The minimum valid number of samples in each leaf is 3.
+ A lower value implies a higher training time.
+ - If int, then consider `min_samples_leaf` as the minimum number.
+ - If float, then `min_samples_leaf` is a fraction and
+ `ceil(min_samples_leaf * n_samples)` are the minimum
+ number of samples for each node.
+
+ max_bins : int, default=25
+ The maximum number of bins to use to search the optimal split in each
+ feature. Features with a small number of unique values may use less than
+ ``max_bins`` bins. Must be lower than 120 and larger than 10.
+ A higher value implies a higher training time.
+
+ categorical_features : int or array-like of int, default=None
+ Indicates the categorical features.
+ All categorical indices must be in `[0, n_features)`.
+ Categorical features are used for splits but are not used in
+ model fitting.
+ More categorical features imply a higher training time.
+ - None : no feature will be considered categorical.
+ - integer array-like : integer indices indicating categorical
+ features.
+ - integer : integer index indicating a categorical
+ feature.
+
+ split_features : int or array-like of int, default=None
+ Defines which features can be used to split on.
+ All split feature indices must be in `[0, n_features)`.
+ - None : All features will be used for splitting.
+ - integer array-like : integer indices indicating splitting features.
+ - integer : integer index indicating a single splitting feature.
+
+ linear_features : int or array-like of int, default=None
+ Defines which features are used for the linear model in the leaves.
+ All linear feature indices must be in `[0, n_features)`.
+ - None : All features except those in `categorical_features`
+ will be used in the leaf models.
+ - integer array-like : integer indices indicating features to
+ be used in the leaf models.
+ - integer : integer index indicating a single feature to be used
+ in the leaf models.
+
+ n_jobs : int, default=None
+ The number of jobs to run in parallel for model fitting.
+ ``None`` means 1 using one processor. ``-1`` means using all
+ processors.
+
+ Attributes
+ ----------
+ n_features_in_ : int
+ The number of features when :meth:`fit` is performed.
+
+ feature_importances_ : ndarray of shape (n_features, )
+ Normalized total reduction of criteria by splitting features.
+
+ n_targets_ : int
+ The number of targets when :meth:`fit` is performed.
+
+ Examples
+ --------
+ >>> from sklearn.linear_model import LinearRegression
+ >>> from lineartree import LinearTreeRegressor
+ >>> from sklearn.datasets import make_regression
+ >>> X, y = make_regression(n_samples=100, n_features=4,
+ ... n_informative=2, n_targets=1,
+ ... random_state=0, shuffle=False)
+ >>> regr = LinearTreeRegressor(base_estimator=LinearRegression())
+ >>> regr.fit(X, y)
+ >>> regr.predict([[0, 0, 0, 0]])
+ array([8.8817842e-16])
+ """
+
+ def __init__(
+ self,
+ base_estimator,
+ *,
+ criterion="mse",
+ max_depth=5,
+ min_samples_split=6,
+ min_samples_leaf=0.1,
+ max_bins=25,
+ categorical_features=None,
+ split_features=None,
+ linear_features=None,
+ n_jobs=None
+ ):
+
+ self.base_estimator = base_estimator
+ self.criterion = criterion
+ self.max_depth = max_depth
+ self.min_samples_split = min_samples_split
+ self.min_samples_leaf = min_samples_leaf
+ self.max_bins = max_bins
+ self.categorical_features = categorical_features
+ self.split_features = split_features
+ self.linear_features = linear_features
+ self.n_jobs = n_jobs
+
+ def fit(self, X, y, sample_weight=None):
+ """Build a Linear Tree of a linear estimator from the training
+ set (X, y).
+
+ Parameters
+ ----------
+ X : array-like of shape (n_samples, n_features)
+ The training input samples.
+
+ y : array-like of shape (n_samples, ) or (n_samples, n_targets)
+ Target values.
+
+ sample_weight : array-like of shape (n_samples, ), default=None
+ Sample weights. If None, then samples are equally weighted.
+ Note that if the base estimator does not support sample weighting,
+ the sample weights are still used to evaluate the splits.
+
+ Returns
+ -------
+ self : object
+ """
+ reg_criterions = ("mse", "rmse", "mae", "poisson")
+
+ if self.criterion not in reg_criterions:
+ raise ValueError(
+ "Regression tasks support only criterion in {}, "
+ "got '{}'.".format(reg_criterions, self.criterion)
+ )
+
+ # Convert data (X is required to be 2d and indexable)
+ X, y = self._validate_data(
+ X,
+ y,
+ accept_sparse=False,
+ dtype=None,
+ force_all_finite=False,
+ multi_output=True,
+ )
+ if sample_weight is not None:
+ sample_weight = _check_sample_weight(sample_weight, X, dtype=None)
+
+ y_shape = np.shape(y)
+ self.n_targets_ = y_shape[1] if len(y_shape) > 1 else 1
+ if self.n_targets_ < 2:
+ y = y.ravel()
+ self._fit(X, y, sample_weight)
+
+ return self
+
+ def predict(self, X):
+ """Predict regression target for X.
+
+ Parameters
+ ----------
+ X : array-like of shape (n_samples, n_features)
+ Samples.
+
+ Returns
+ -------
+ pred : ndarray of shape (n_samples, ) or also (n_samples, n_targets) if
+ multitarget regression.
+ The predicted values.
+ """
+ check_is_fitted(self, attributes="_nodes")
+
+ X = check_array(X, accept_sparse=False, dtype=None, force_all_finite=False)
+ self._check_n_features(X, reset=False)
+
+ if self.n_targets_ > 1:
+ pred = np.zeros((X.shape[0], self.n_targets_))
+ else:
+ pred = np.zeros(X.shape[0])
+
+ for L in self._leaves.values():
+
+ mask = _predict_branch(X, L.threshold)
+ if (~mask).all():
+ continue
+
+ pred[mask] = L.model.predict(X[np.ix_(mask, self._linear_features)])
+
+ return pred
+
+
+class LinearTreeClassifier(_LinearTree, ClassifierMixin):
+ """A Linear Tree Classifier.
+
+ A Linear Tree Classifier is a meta-estimator that combine the learning
+ ability of Decision Tree and the predictive power of Linear Models.
+ Like in tree-based algorithms, the received data are splitted according
+ simple decision rules. The goodness of slits is evaluated in gain terms
+ fitting linear models in each node. This implies that the models in the
+ leaves are linear instead of constant approximations like in classical
+ Decision Tree.
+
+ Parameters
+ ----------
+ base_estimator : object
+ The base estimator to fit on dataset splits.
+ The base estimator must be a sklearn.linear_model.
+ The selected base estimator is automatically substituted by a
+ `~sklearn.dummy.DummyClassifier` when a dataset split
+ is composed of unique labels.
+
+ criterion : {"hamming", "crossentropy"}, default="hamming"
+ The function to measure the quality of a split. `"crossentropy"`
+ can be used only if `base_estimator` has `predict_proba` method.
+
+ max_depth : int, default=5
+ The maximum depth of the tree considering only the splitting nodes.
+ A higher value implies a higher training time.
+
+ min_samples_split : int or float, default=6
+ The minimum number of samples required to split an internal node.
+ The minimum valid number of samples in each node is 6.
+ A lower value implies a higher training time.
+ - If int, then consider `min_samples_split` as the minimum number.
+ - If float, then `min_samples_split` is a fraction and
+ `ceil(min_samples_split * n_samples)` are the minimum
+ number of samples for each split.
+
+ min_samples_leaf : int or float, default=0.1
+ The minimum number of samples required to be at a leaf node.
+ A split point at any depth will only be considered if it leaves at
+ least `min_samples_leaf` training samples in each of the left and
+ right branches.
+ The minimum valid number of samples in each leaf is 3.
+ A lower value implies a higher training time.
+ - If int, then consider `min_samples_leaf` as the minimum number.
+ - If float, then `min_samples_leaf` is a fraction and
+ `ceil(min_samples_leaf * n_samples)` are the minimum
+ number of samples for each node.
+
+ max_bins : int, default=25
+ The maximum number of bins to use to search the optimal split in each
+ feature. Features with a small number of unique values may use less than
+ ``max_bins`` bins. Must be lower than 120 and larger than 10.
+ A higher value implies a higher training time.
+
+ categorical_features : int or array-like of int, default=None
+ Indicates the categorical features.
+ All categorical indices must be in `[0, n_features)`.
+ Categorical features are used for splits but are not used in
+ model fitting.
+ More categorical features imply a higher training time.
+ - None : no feature will be considered categorical.
+ - integer array-like : integer indices indicating categorical
+ features.
+ - integer : integer index indicating a categorical
+ feature.
+
+ split_features : int or array-like of int, default=None
+ Defines which features can be used to split on.
+ All split feature indices must be in `[0, n_features)`.
+ - None : All features will be used for splitting.
+ - integer array-like : integer indices indicating splitting features.
+ - integer : integer index indicating a single splitting feature.
+
+ linear_features : int or array-like of int, default=None
+ Defines which features are used for the linear model in the leaves.
+ All linear feature indices must be in `[0, n_features)`.
+ - None : All features except those in `categorical_features`
+ will be used in the leaf models.
+ - integer array-like : integer indices indicating features to
+ be used in the leaf models.
+ - integer : integer index indicating a single feature to be used
+ in the leaf models.
+
+ n_jobs : int, default=None
+ The number of jobs to run in parallel for model fitting.
+ ``None`` means 1 using one processor. ``-1`` means using all
+ processors.
+
+ Attributes
+ ----------
+ n_features_in_ : int
+ The number of features when :meth:`fit` is performed.
+
+ feature_importances_ : ndarray of shape (n_features, )
+ Normalized total reduction of criteria by splitting features.
+
+ classes_ : ndarray of shape (n_classes, )
+ A list of class labels known to the classifier.
+
+ Examples
+ --------
+ >>> from sklearn.linear_model import RidgeClassifier
+ >>> from lineartree import LinearTreeClassifier
+ >>> from sklearn.datasets import make_classification
+ >>> X, y = make_classification(n_samples=100, n_features=4,
+ ... n_informative=2, n_redundant=0,
+ ... random_state=0, shuffle=False)
+ >>> clf = LinearTreeClassifier(base_estimator=RidgeClassifier())
+ >>> clf.fit(X, y)
+ >>> clf.predict([[0, 0, 0, 0]])
+ array([1])
+ """
+
+ def __init__(
+ self,
+ base_estimator,
+ *,
+ criterion="hamming",
+ max_depth=5,
+ min_samples_split=6,
+ min_samples_leaf=0.1,
+ max_bins=25,
+ categorical_features=None,
+ split_features=None,
+ linear_features=None,
+ n_jobs=None
+ ):
+
+ self.base_estimator = base_estimator
+ self.criterion = criterion
+ self.max_depth = max_depth
+ self.min_samples_split = min_samples_split
+ self.min_samples_leaf = min_samples_leaf
+ self.max_bins = max_bins
+ self.categorical_features = categorical_features
+ self.split_features = split_features
+ self.linear_features = linear_features
+ self.n_jobs = n_jobs
+
+ def fit(self, X, y, sample_weight=None):
+ """Build a Linear Tree of a linear estimator from the training
+ set (X, y).
+
+ Parameters
+ ----------
+ X : array-like of shape (n_samples, n_features)
+ The training input samples.
+
+ y : array-like of shape (n_samples, )
+ Target values.
+
+ sample_weight : array-like of shape (n_samples, ), default=None
+ Sample weights. If None, then samples are equally weighted.
+ Note that if the base estimator does not support sample weighting,
+ the sample weights are still used to evaluate the splits.
+
+ Returns
+ -------
+ self : object
+ """
+ clas_criterions = ("hamming", "crossentropy")
+
+ if self.criterion not in clas_criterions:
+ raise ValueError(
+ "Classification tasks support only criterion in {}, "
+ "got '{}'.".format(clas_criterions, self.criterion)
+ )
+
+ if (
+ not hasattr(self.base_estimator, "predict_proba")
+ and self.criterion == "crossentropy"
+ ):
+ raise ValueError(
+ "The 'crossentropy' criterion requires a base_estimator "
+ "with predict_proba method."
+ )
+
+ # Convert data (X is required to be 2d and indexable)
+ X, y = self._validate_data(
+ X,
+ y,
+ accept_sparse=False,
+ dtype=None,
+ force_all_finite=False,
+ multi_output=False,
+ )
+ if sample_weight is not None:
+ sample_weight = _check_sample_weight(sample_weight, X, dtype=None)
+
+ self.classes_ = np.unique(y)
+ self._fit(X, y, sample_weight)
+
+ return self
+
+ def predict(self, X):
+ """Predict class for X.
+
+ Parameters
+ ----------
+ X : array-like of shape (n_samples, n_features)
+ Samples.
+
+ Returns
+ -------
+ pred : ndarray of shape (n_samples, )
+ The predicted classes.
+ """
+ check_is_fitted(self, attributes="_nodes")
+
+ X = check_array(X, accept_sparse=False, dtype=None, force_all_finite=False)
+ self._check_n_features(X, reset=False)
+
+ pred = np.empty(X.shape[0], dtype=self.classes_.dtype)
+
+ for L in self._leaves.values():
+
+ mask = _predict_branch(X, L.threshold)
+ if (~mask).all():
+ continue
+
+ pred[mask] = L.model.predict(X[np.ix_(mask, self._linear_features)])
+
+ return pred
+
+ def predict_proba(self, X):
+ """Predict class probabilities for X.
+
+ If base estimators do not implement a ``predict_proba`` method,
+ then the one-hot encoding of the predicted class is returned.
+
+ Parameters
+ ----------
+ X : array-like of shape (n_samples, n_features)
+ Samples.
+
+ Returns
+ -------
+ pred : ndarray of shape (n_samples, n_classes)
+ The class probabilities of the input samples. The order of the
+ classes corresponds to that in the attribute :term:`classes_`.
+ """
+ check_is_fitted(self, attributes="_nodes")
+
+ X = check_array(X, accept_sparse=False, dtype=None, force_all_finite=False)
+ self._check_n_features(X, reset=False)
+
+ pred = np.zeros((X.shape[0], len(self.classes_)))
+
+ if hasattr(self.base_estimator, "predict_proba"):
+ for L in self._leaves.values():
+
+ mask = _predict_branch(X, L.threshold)
+ if (~mask).all():
+ continue
+
+ pred[
+ np.ix_(mask, np.isin(self.classes_, L.classes))
+ ] = L.model.predict_proba(X[np.ix_(mask, self._linear_features)])
+
+ else:
+ pred_class = self.predict(X)
+ class_to_int = dict(map(reversed, enumerate(self.classes_)))
+ pred_class = np.array([class_to_int[i] for i in pred_class])
+ pred[np.arange(X.shape[0]), pred_class] = 1
+
+ return pred
+
+ def predict_log_proba(self, X):
+ """Predict class log-probabilities for X.
+
+ If base estimators do not implement a ``predict_log_proba`` method,
+ then the logarithm of the one-hot encoded predicted class is returned.
+
+ Parameters
+ ----------
+ X : array-like of shape (n_samples, n_features)
+ Samples.
+
+ Returns
+ -------
+ pred : ndarray of shape (n_samples, n_classes)
+ The class log-probabilities of the input samples. The order of the
+ classes corresponds to that in the attribute :term:`classes_`.
+ """
+ return np.log(self.predict_proba(X))
+
+
+class LinearBoostRegressor(_LinearBoosting, RegressorMixin):
+ """A Linear Boosting Regressor.
+
+ A Linear Boosting Regressor is an iterative meta-estimator that starts
+ with a linear regressor, and model the residuals through decision trees.
+ At each iteration, the path leading to highest error (i.e. the worst leaf)
+ is added as a new binary variable to the base model. This kind of Linear
+ Boosting can be considered as an improvement over general linear models
+ since it enables incorporating non-linear features by residuals modeling.
+
+ Parameters
+ ----------
+ base_estimator : object
+ The base estimator iteratively fitted.
+ The base estimator must be a sklearn.linear_model.
+
+ loss : {"linear", "square", "absolute", "exponential"}, default="linear"
+ The function used to calculate the residuals of each sample.
+
+ n_estimators : int, default=10
+ The number of boosting stages to perform. It corresponds to the number
+ of the new features generated.
+
+ max_depth : int, default=3
+ The maximum depth of the tree. If None, then nodes are expanded until
+ all leaves are pure or until all leaves contain less than
+ min_samples_split samples.
+
+ min_samples_split : int or float, default=2
+ The minimum number of samples required to split an internal node:
+
+ - If int, then consider `min_samples_split` as the minimum number.
+ - If float, then `min_samples_split` is a fraction and
+ `ceil(min_samples_split * n_samples)` are the minimum
+ number of samples for each split.
+
+ min_samples_leaf : int or float, default=1
+ The minimum number of samples required to be at a leaf node.
+ A split point at any depth will only be considered if it leaves at
+ least ``min_samples_leaf`` training samples in each of the left and
+ right branches. This may have the effect of smoothing the model,
+ especially in regression.
+
+ - If int, then consider `min_samples_leaf` as the minimum number.
+ - If float, then `min_samples_leaf` is a fraction and
+ `ceil(min_samples_leaf * n_samples)` are the minimum
+ number of samples for each node.
+
+ min_weight_fraction_leaf : float, default=0.0
+ The minimum weighted fraction of the sum total of weights (of all
+ the input samples) required to be at a leaf node. Samples have
+ equal weight when sample_weight is not provided.
+
+ max_features : int, float or {"auto", "sqrt", "log2"}, default=None
+ The number of features to consider when looking for the best split:
+
+ - If int, then consider `max_features` features at each split.
+ - If float, then `max_features` is a fraction and
+ `int(max_features * n_features)` features are considered at each
+ split.
+ - If "auto", then `max_features=n_features`.
+ - If "sqrt", then `max_features=sqrt(n_features)`.
+ - If "log2", then `max_features=log2(n_features)`.
+ - If None, then `max_features=n_features`.
+
+ Note: the search for a split does not stop until at least one
+ valid partition of the node samples is found, even if it requires to
+ effectively inspect more than ``max_features`` features.
+
+ random_state : int, RandomState instance or None, default=None
+ Controls the randomness of the estimator.
+
+ max_leaf_nodes : int, default=None
+ Grow a tree with ``max_leaf_nodes`` in best-first fashion.
+ Best nodes are defined as relative reduction in impurity.
+ If None then unlimited number of leaf nodes.
+
+ min_impurity_decrease : float, default=0.0
+ A node will be split if this split induces a decrease of the impurity
+ greater than or equal to this value.
+
+ ccp_alpha : non-negative float, default=0.0
+ Complexity parameter used for Minimal Cost-Complexity Pruning. The
+ subtree with the largest cost complexity that is smaller than
+ ``ccp_alpha`` will be chosen. By default, no pruning is performed. See
+ :ref:`minimal_cost_complexity_pruning` for details.
+
+ Attributes
+ ----------
+ n_features_in_ : int
+ The number of features when :meth:`fit` is performed.
+
+ n_features_out_ : int
+ The total number of features used to fit the base estimator in the
+ last iteration. The number of output features is equal to the sum
+ of n_features_in_ and n_estimators.
+
+ coef_ : array of shape (n_features_out_, ) or (n_targets, n_features_out_)
+ Estimated coefficients for the linear regression problem.
+ If multiple targets are passed during the fit (y 2D), this is a
+ 2D array of shape (n_targets, n_features_out_), while if only one target
+ is passed, this is a 1D array of length n_features_out_.
+
+ intercept_ : float or array of shape (n_targets, )
+ Independent term in the linear model. Set to 0 if `fit_intercept = False`
+ in `base_estimator`
+
+ Examples
+ --------
+ >>> from sklearn.linear_model import LinearRegression
+ >>> from lineartree import LinearBoostRegressor
+ >>> from sklearn.datasets import make_regression
+ >>> X, y = make_regression(n_samples=100, n_features=4,
+ ... n_informative=2, n_targets=1,
+ ... random_state=0, shuffle=False)
+ >>> regr = LinearBoostRegressor(base_estimator=LinearRegression())
+ >>> regr.fit(X, y)
+ >>> regr.predict([[0, 0, 0, 0]])
+ array([8.8817842e-16])
+
+ References
+ ----------
+ Explainable boosted linear regression for time series forecasting.
+ Authors: Igor Ilic, Berk Gorgulu, Mucahit Cevik, Mustafa Gokce Baydogan.
+ (https://arxiv.org/abs/2009.09110)
+ """
+
+ def __init__(
+ self,
+ base_estimator,
+ *,
+ loss="linear",
+ n_estimators=10,
+ max_depth=3,
+ min_samples_split=2,
+ min_samples_leaf=1,
+ min_weight_fraction_leaf=0.0,
+ max_features=None,
+ random_state=None,
+ max_leaf_nodes=None,
+ min_impurity_decrease=0.0,
+ ccp_alpha=0.0
+ ):
+
+ self.base_estimator = base_estimator
+ self.loss = loss
+ self.n_estimators = n_estimators
+ self.max_depth = max_depth
+ self.min_samples_split = min_samples_split
+ self.min_samples_leaf = min_samples_leaf
+ self.min_weight_fraction_leaf = min_weight_fraction_leaf
+ self.max_features = max_features
+ self.random_state = random_state
+ self.max_leaf_nodes = max_leaf_nodes
+ self.min_impurity_decrease = min_impurity_decrease
+ self.ccp_alpha = ccp_alpha
+
+ def fit(self, X, y, sample_weight=None):
+ """Build a Linear Boosting from the training set (X, y).
+
+ Parameters
+ ----------
+ X : array-like of shape (n_samples, n_features)
+ The training input samples.
+
+ y : array-like of shape (n_samples, ) or (n_samples, n_targets)
+ Target values.
+
+ sample_weight : array-like of shape (n_samples, ), default=None
+ Sample weights.
+
+ Returns
+ -------
+ self : object
+ """
+ reg_losses = ("linear", "square", "absolute", "exponential")
+
+ if self.loss not in reg_losses:
+ raise ValueError(
+ "Regression tasks support only loss in {}, "
+ "got '{}'.".format(reg_losses, self.loss)
+ )
+
+ # Convert data (X is required to be 2d and indexable)
+ X, y = self._validate_data(
+ X, y, accept_sparse=False, dtype=np.float32, multi_output=True
+ )
+ if sample_weight is not None:
+ sample_weight = _check_sample_weight(sample_weight, X, dtype=None)
+
+ y_shape = np.shape(y)
+ n_targets = y_shape[1] if len(y_shape) > 1 else 1
+ if n_targets < 2:
+ y = y.ravel()
+ self._fit(X, y, sample_weight)
+
+ return self
+
+ def predict(self, X):
+ """Predict regression target for X.
+
+ Parameters
+ ----------
+ X : array-like of shape (n_samples, n_features)
+ Samples.
+
+ Returns
+ -------
+ pred : ndarray of shape (n_samples, ) or also (n_samples, n_targets) if
+ multitarget regression.
+ The predicted values.
+ """
+ check_is_fitted(self, attributes="base_estimator_")
+
+ return self.base_estimator_.predict(self.transform(X))
+
+
+class LinearBoostClassifier(_LinearBoosting, ClassifierMixin):
+ """A Linear Boosting Classifier.
+
+ A Linear Boosting Classifier is an iterative meta-estimator that starts
+ with a linear classifier, and model the residuals through decision trees.
+ At each iteration, the path leading to highest error (i.e. the worst leaf)
+ is added as a new binary variable to the base model. This kind of Linear
+ Boosting can be considered as an improvement over general linear models
+ since it enables incorporating non-linear features by residuals modeling.
+
+ Parameters
+ ----------
+ base_estimator : object
+ The base estimator iteratively fitted.
+ The base estimator must be a sklearn.linear_model.
+
+ loss : {"hamming", "entropy"}, default="entropy"
+ The function used to calculate the residuals of each sample.
+ `"entropy"` can be used only if `base_estimator` has `predict_proba`
+ method.
+
+ n_estimators : int, default=10
+ The number of boosting stages to perform. It corresponds to the number
+ of the new features generated.
+
+ max_depth : int, default=3
+ The maximum depth of the tree. If None, then nodes are expanded until
+ all leaves are pure or until all leaves contain less than
+ min_samples_split samples.
+
+ min_samples_split : int or float, default=2
+ The minimum number of samples required to split an internal node:
+
+ - If int, then consider `min_samples_split` as the minimum number.
+ - If float, then `min_samples_split` is a fraction and
+ `ceil(min_samples_split * n_samples)` are the minimum
+ number of samples for each split.
+
+ min_samples_leaf : int or float, default=1
+ The minimum number of samples required to be at a leaf node.
+ A split point at any depth will only be considered if it leaves at
+ least ``min_samples_leaf`` training samples in each of the left and
+ right branches. This may have the effect of smoothing the model,
+ especially in regression.
+
+ - If int, then consider `min_samples_leaf` as the minimum number.
+ - If float, then `min_samples_leaf` is a fraction and
+ `ceil(min_samples_leaf * n_samples)` are the minimum
+ number of samples for each node.
+
+ min_weight_fraction_leaf : float, default=0.0
+ The minimum weighted fraction of the sum total of weights (of all
+ the input samples) required to be at a leaf node. Samples have
+ equal weight when sample_weight is not provided.
+
+ max_features : int, float or {"auto", "sqrt", "log2"}, default=None
+ The number of features to consider when looking for the best split:
+
+ - If int, then consider `max_features` features at each split.
+ - If float, then `max_features` is a fraction and
+ `int(max_features * n_features)` features are considered at each
+ split.
+ - If "auto", then `max_features=n_features`.
+ - If "sqrt", then `max_features=sqrt(n_features)`.
+ - If "log2", then `max_features=log2(n_features)`.
+ - If None, then `max_features=n_features`.
+
+ Note: the search for a split does not stop until at least one
+ valid partition of the node samples is found, even if it requires to
+ effectively inspect more than ``max_features`` features.
+
+ random_state : int, RandomState instance or None, default=None
+ Controls the randomness of the estimator.
+
+ max_leaf_nodes : int, default=None
+ Grow a tree with ``max_leaf_nodes`` in best-first fashion.
+ Best nodes are defined as relative reduction in impurity.
+ If None then unlimited number of leaf nodes.
+
+ min_impurity_decrease : float, default=0.0
+ A node will be split if this split induces a decrease of the impurity
+ greater than or equal to this value.
+
+ ccp_alpha : non-negative float, default=0.0
+ Complexity parameter used for Minimal Cost-Complexity Pruning. The
+ subtree with the largest cost complexity that is smaller than
+ ``ccp_alpha`` will be chosen. By default, no pruning is performed. See
+ :ref:`minimal_cost_complexity_pruning` for details.
+
+ Attributes
+ ----------
+ n_features_in_ : int
+ The number of features when :meth:`fit` is performed.
+
+ n_features_out_ : int
+ The total number of features used to fit the base estimator in the
+ last iteration. The number of output features is equal to the sum
+ of n_features_in_ and n_estimators.
+
+ coef_ : ndarray of shape (1, n_features_out_) or (n_classes, n_features_out_)
+ Coefficient of the features in the decision function.
+
+ intercept_ : float or array of shape (n_classes, )
+ Independent term in the linear model. Set to 0 if `fit_intercept = False`
+ in `base_estimator`
+
+ classes_ : ndarray of shape (n_classes, )
+ A list of class labels known to the classifier.
+
+ Examples
+ --------
+ >>> from sklearn.linear_model import RidgeClassifier
+ >>> from lineartree import LinearBoostClassifier
+ >>> from sklearn.datasets import make_classification
+ >>> X, y = make_classification(n_samples=100, n_features=4,
+ ... n_informative=2, n_redundant=0,
+ ... random_state=0, shuffle=False)
+ >>> clf = LinearBoostClassifier(base_estimator=RidgeClassifier())
+ >>> clf.fit(X, y)
+ >>> clf.predict([[0, 0, 0, 0]])
+ array([1])
+
+ References
+ ----------
+ Explainable boosted linear regression for time series forecasting.
+ Authors: Igor Ilic, Berk Gorgulu, Mucahit Cevik, Mustafa Gokce Baydogan.
+ (https://arxiv.org/abs/2009.09110)
+ """
+
+ def __init__(
+ self,
+ base_estimator,
+ *,
+ loss="hamming",
+ n_estimators=10,
+ max_depth=3,
+ min_samples_split=2,
+ min_samples_leaf=1,
+ min_weight_fraction_leaf=0.0,
+ max_features=None,
+ random_state=None,
+ max_leaf_nodes=None,
+ min_impurity_decrease=0.0,
+ ccp_alpha=0.0
+ ):
+
+ self.base_estimator = base_estimator
+ self.loss = loss
+ self.n_estimators = n_estimators
+ self.max_depth = max_depth
+ self.min_samples_split = min_samples_split
+ self.min_samples_leaf = min_samples_leaf
+ self.min_weight_fraction_leaf = min_weight_fraction_leaf
+ self.max_features = max_features
+ self.random_state = random_state
+ self.max_leaf_nodes = max_leaf_nodes
+ self.min_impurity_decrease = min_impurity_decrease
+ self.ccp_alpha = ccp_alpha
+
+ def fit(self, X, y, sample_weight=None):
+ """Build a Linear Boosting from the training set (X, y).
+
+ Parameters
+ ----------
+ X : array-like of shape (n_samples, n_features)
+ The training input samples.
+
+ y : array-like of shape (n_samples, )
+ Target values.
+
+ sample_weight : array-like of shape (n_samples, ), default=None
+ Sample weights.
+
+ Returns
+ -------
+ self : object
+ """
+ clas_losses = ("hamming", "entropy")
+
+ if self.loss not in clas_losses:
+ raise ValueError(
+ "Classification tasks support only loss in {}, "
+ "got '{}'.".format(clas_losses, self.loss)
+ )
+
+ if not hasattr(self.base_estimator, "predict_proba") and self.loss == "entropy":
+ raise ValueError(
+ "The 'entropy' loss requires a base_estimator "
+ "with predict_proba method."
+ )
+
+ # Convert data (X is required to be 2d and indexable)
+ X, y = self._validate_data(
+ X, y, accept_sparse=False, dtype=np.float32, multi_output=False
+ )
+ if sample_weight is not None:
+ sample_weight = _check_sample_weight(sample_weight, X, dtype=None)
+
+ self.classes_ = np.unique(y)
+ self._fit(X, y, sample_weight)
+
+ return self
+
+ def predict(self, X):
+ """Predict class for X.
+
+ Parameters
+ ----------
+ X : array-like of shape (n_samples, n_features)
+ Samples.
+
+ Returns
+ -------
+ pred : ndarray of shape (n_samples, )
+ The predicted classes.
+ """
+ check_is_fitted(self, attributes="base_estimator_")
+
+ return self.base_estimator_.predict(self.transform(X))
+
+ def predict_proba(self, X):
+ """Predict class probabilities for X.
+
+ If base estimators do not implement a ``predict_proba`` method,
+ then the one-hot encoding of the predicted class is returned.
+
+ Parameters
+ ----------
+ X : array-like of shape (n_samples, n_features)
+ Samples.
+
+ Returns
+ -------
+ pred : ndarray of shape (n_samples, n_classes)
+ The class probabilities of the input samples. The order of the
+ classes corresponds to that in the attribute :term:`classes_`.
+ """
+ if hasattr(self.base_estimator, "predict_proba"):
+ check_is_fitted(self, attributes="base_estimator_")
+ pred = self.base_estimator_.predict_proba(self.transform(X))
+
+ else:
+ pred_class = self.predict(X)
+ pred = np.zeros((pred_class.shape[0], len(self.classes_)))
+ class_to_int = dict(map(reversed, enumerate(self.classes_)))
+ pred_class = np.array([class_to_int[v] for v in pred_class])
+ pred[np.arange(pred_class.shape[0]), pred_class] = 1
+
+ return pred
+
+ def predict_log_proba(self, X):
+ """Predict class log-probabilities for X.
+
+ If base estimators do not implement a ``predict_log_proba`` method,
+ then the logarithm of the one-hot encoded predicted class is returned.
+
+ Parameters
+ ----------
+ X : array-like of shape (n_samples, n_features)
+ Samples.
+
+ Returns
+ -------
+ pred : ndarray of shape (n_samples, n_classes)
+ The class log-probabilities of the input samples. The order of the
+ classes corresponds to that in the attribute :term:`classes_`.
+ """
+ return np.log(self.predict_proba(X))
+
+
+class LinearForestClassifier(_LinearForest, ClassifierMixin):
+ """ "A Linear Forest Classifier.
+
+ Linear forests generalizes the well known random forests by combining
+ linear models with the same random forests.
+ The key idea of linear forests is to use the strength of linear models
+ to improve the nonparametric learning ability of tree-based algorithms.
+ Firstly, a linear model is fitted on the whole dataset, then a random
+ forest is trained on the same dataset but using the residuals of the
+ previous steps as target. The final predictions are the sum of the raw
+ linear predictions and the residuals modeled by the random forest.
+
+ For classification tasks the same approach used in regression context
+ is adopted. The binary targets are transformed into logits using the
+ inverse sigmoid function. A linear regression is fitted. A random forest
+ regressor is trained to approximate the residulas from logits and linear
+ predictions. Finally the sigmoid of the combinded predictions are taken
+ to obtain probabilities.
+ The multi-label scenario is carried out using OneVsRestClassifier.
+
+ Parameters
+ ----------
+ base_estimator : object
+ The linear estimator fitted on the raw target.
+ The linear estimator must be a regressor from sklearn.linear_model.
+
+ n_estimators : int, default=100
+ The number of trees in the forest.
+
+ max_depth : int, default=None
+ The maximum depth of the tree. If None, then nodes are expanded until
+ all leaves are pure or until all leaves contain less than
+ min_samples_split samples.
+
+ min_samples_split : int or float, default=2
+ The minimum number of samples required to split an internal node:
+
+ - If int, then consider `min_samples_split` as the minimum number.
+ - If float, then `min_samples_split` is a fraction and
+ `ceil(min_samples_split * n_samples)` are the minimum
+ number of samples for each split.
+
+ min_samples_leaf : int or float, default=1
+ The minimum number of samples required to be at a leaf node.
+ A split point at any depth will only be considered if it leaves at
+ least ``min_samples_leaf`` training samples in each of the left and
+ right branches. This may have the effect of smoothing the model,
+ especially in regression.
+
+ - If int, then consider `min_samples_leaf` as the minimum number.
+ - If float, then `min_samples_leaf` is a fraction and
+ `ceil(min_samples_leaf * n_samples)` are the minimum
+ number of samples for each node.
+
+ min_weight_fraction_leaf : float, default=0.0
+ The minimum weighted fraction of the sum total of weights (of all
+ the input samples) required to be at a leaf node. Samples have
+ equal weight when sample_weight is not provided.
+
+ max_features : {"auto", "sqrt", "log2"}, int or float, default="auto"
+ The number of features to consider when looking for the best split:
+
+ - If int, then consider `max_features` features at each split.
+ - If float, then `max_features` is a fraction and
+ `round(max_features * n_features)` features are considered at each
+ split.
+ - If "auto", then `max_features=n_features`.
+ - If "sqrt", then `max_features=sqrt(n_features)`.
+ - If "log2", then `max_features=log2(n_features)`.
+ - If None, then `max_features=n_features`.
+
+ Note: the search for a split does not stop until at least one
+ valid partition of the node samples is found, even if it requires to
+ effectively inspect more than ``max_features`` features.
+
+ max_leaf_nodes : int, default=None
+ Grow trees with ``max_leaf_nodes`` in best-first fashion.
+ Best nodes are defined as relative reduction in impurity.
+ If None then unlimited number of leaf nodes.
+
+ min_impurity_decrease : float, default=0.0
+ A node will be split if this split induces a decrease of the impurity
+ greater than or equal to this value.
+
+ bootstrap : bool, default=True
+ Whether bootstrap samples are used when building trees. If False, the
+ whole dataset is used to build each tree.
+
+ oob_score : bool, default=False
+ Whether to use out-of-bag samples to estimate the generalization score.
+ Only available if bootstrap=True.
+
+ n_jobs : int, default=None
+ The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`,
+ :meth:`decision_path` and :meth:`apply` are all parallelized over the
+ trees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`
+ context. ``-1`` means using all processors.
+
+ random_state : int, RandomState instance or None, default=None
+ Controls both the randomness of the bootstrapping of the samples used
+ when building trees (if ``bootstrap=True``) and the sampling of the
+ features to consider when looking for the best split at each node
+ (if ``max_features < n_features``).
+
+ ccp_alpha : non-negative float, default=0.0
+ Complexity parameter used for Minimal Cost-Complexity Pruning. The
+ subtree with the largest cost complexity that is smaller than
+ ``ccp_alpha`` will be chosen. By default, no pruning is performed. See
+ :ref:`minimal_cost_complexity_pruning` for details.
+
+ max_samples : int or float, default=None
+ If bootstrap is True, the number of samples to draw from X
+ to train each base estimator.
+
+ - If None (default), then draw `X.shape[0]` samples.
+ - If int, then draw `max_samples` samples.
+ - If float, then draw `max_samples * X.shape[0]` samples. Thus,
+ `max_samples` should be in the interval `(0, 1]`.
+
+ Attributes
+ ----------
+ n_features_in_ : int
+ The number of features when :meth:`fit` is performed.
+
+ feature_importances_ : ndarray of shape (n_features, )
+ The impurity-based feature importances.
+ The higher, the more important the feature.
+ The importance of a feature is computed as the (normalized)
+ total reduction of the criterion brought by that feature. It is also
+ known as the Gini importance.
+
+ coef_ : ndarray of shape (1, n_features_out_)
+ Coefficient of the features in the decision function.
+
+ intercept_ : float
+ Independent term in the linear model. Set to 0 if `fit_intercept = False`
+ in `base_estimator`.
+
+ classes_ : ndarray of shape (n_classes, )
+ A list of class labels known to the classifier.
+
+ base_estimator_ : object
+ A fitted linear model instance.
+
+ forest_estimator_ : object
+ A fitted random forest instance.
+
+ Examples
+ --------
+ >>> from sklearn.linear_model import LinearRegression
+ >>> from lineartree import LinearForestClassifier
+ >>> from sklearn.datasets import make_classification
+ >>> X, y = make_classification(n_samples=100, n_classes=2, n_features=4,
+ ... n_informative=2, n_redundant=0,
+ ... random_state=0, shuffle=False)
+ >>> clf = LinearForestClassifier(base_estimator=LinearRegression())
+ >>> clf.fit(X, y)
+ >>> clf.predict([[0, 0, 0, 0]])
+ array([1])
+
+ References
+ ----------
+ Regression-Enhanced Random Forests.
+ Authors: Haozhe Zhang, Dan Nettleton, Zhengyuan Zhu.
+ (https://arxiv.org/abs/1904.10416)
+ """
+
+ def __init__(
+ self,
+ base_estimator,
+ *,
+ n_estimators=100,
+ max_depth=None,
+ min_samples_split=2,
+ min_samples_leaf=1,
+ min_weight_fraction_leaf=0.0,
+ max_features="auto",
+ max_leaf_nodes=None,
+ min_impurity_decrease=0.0,
+ bootstrap=True,
+ oob_score=False,
+ n_jobs=None,
+ random_state=None,
+ ccp_alpha=0.0,
+ max_samples=None
+ ):
+
+ self.base_estimator = base_estimator
+ self.n_estimators = n_estimators
+ self.max_depth = max_depth
+ self.min_samples_split = min_samples_split
+ self.min_samples_leaf = min_samples_leaf
+ self.min_weight_fraction_leaf = min_weight_fraction_leaf
+ self.max_features = max_features
+ self.max_leaf_nodes = max_leaf_nodes
+ self.min_impurity_decrease = min_impurity_decrease
+ self.bootstrap = bootstrap
+ self.oob_score = oob_score
+ self.n_jobs = n_jobs
+ self.random_state = random_state
+ self.ccp_alpha = ccp_alpha
+ self.max_samples = max_samples
+
+ def fit(self, X, y, sample_weight=None):
+ """Build a Linear Forest from the training set (X, y).
+
+ Parameters
+ ----------
+ X : array-like of shape (n_samples, n_features)
+ The training input samples.
+
+ y : array-like of shape (n_samples, ) or (n_samples, n_targets)
+ Target values.
+
+ sample_weight : array-like of shape (n_samples, ), default=None
+ Sample weights.
+
+ Returns
+ -------
+ self : object
+ """
+ # Convert data (X is required to be 2d and indexable)
+ X, y = self._validate_data(
+ X, y, accept_sparse=True, dtype=None, multi_output=False
+ )
+ if sample_weight is not None:
+ sample_weight = _check_sample_weight(sample_weight, X, dtype=None)
+
+ self.classes_ = np.unique(y)
+ if len(self.classes_) > 2:
+ raise ValueError(
+ "LinearForestClassifier supports only binary classification task. "
+ "To solve a multi-lable classification task use "
+ "LinearForestClassifier with OneVsRestClassifier from sklearn."
+ )
+
+ self._fit(X, y, sample_weight)
+
+ return self
+
+ def decision_function(self, X):
+ """Predict confidence scores for samples.
+
+ The confidence score for a sample is proportional to the signed
+ distance of that sample to the hyperplane.
+
+ Parameters
+ ----------
+ X : array-like of shape (n_samples, n_features)
+ Samples.
+
+ Returns
+ -------
+ pred : ndarray of shape (n_samples, )
+ Confidence scores.
+ Confidence score for self.classes_[1] where >0 means this
+ class would be predicted.
+ """
+ check_is_fitted(self, attributes="base_estimator_")
+ X = check_array(X, dtype=None, accept_sparse=False)
+ self._check_n_features(X, reset=False)
+
+ linear_pred = self.base_estimator_.predict(X)
+ forest_pred = self.forest_estimator_.predict(X)
+
+ return linear_pred + forest_pred
+
+ def predict(self, X):
+ """Predict class for X.
+
+ Parameters
+ ----------
+ X : array-like of shape (n_samples, n_features)
+ Samples.
+
+ Returns
+ -------
+ pred : ndarray of shape (n_samples, )
+ The predicted classes.
+ """
+ pred = self.decision_function(X)
+ pred_class = (self._sigmoid(pred) > 0.5).astype(int)
+ int_to_class = dict(enumerate(self.classes_))
+ pred_class = np.array([int_to_class[i] for i in pred_class])
+
+ return pred_class
+
+ def predict_proba(self, X):
+ """Predict class probabilities for X.
+
+ Parameters
+ ----------
+ X : array-like of shape (n_samples, n_features)
+ Samples.
+
+ Returns
+ -------
+ proba : ndarray of shape (n_samples, n_classes)
+ The class probabilities of the input samples. The order of the
+ classes corresponds to that in the attribute :term:`classes_`.
+ """
+ check_is_fitted(self, attributes="base_estimator_")
+ X = check_array(X, dtype=None, accept_sparse=False)
+ self._check_n_features(X, reset=False)
+
+ pred = self._sigmoid(self.base_estimator_.predict(X))
+ proba = np.zeros((X.shape[0], 2))
+ proba[:, 0] = 1 - pred
+ proba[:, 1] = pred
+
+ return proba
+
+ def predict_log_proba(self, X):
+ """Predict class log-probabilities for X.
+
+ Parameters
+ ----------
+ X : array-like of shape (n_samples, n_features)
+ Samples.
+
+ Returns
+ -------
+ pred : ndarray of shape (n_samples, n_classes)
+ The class log-probabilities of the input samples. The order of the
+ classes corresponds to that in the attribute :term:`classes_`.
+ """
+ return np.log(self.predict_proba(X))
+
+
+class LinearForestRegressor(_LinearForest, RegressorMixin):
+ """ "A Linear Forest Regressor.
+
+ Linear forests generalizes the well known random forests by combining
+ linear models with the same random forests.
+ The key idea of linear forests is to use the strength of linear models
+ to improve the nonparametric learning ability of tree-based algorithms.
+ Firstly, a linear model is fitted on the whole dataset, then a random
+ forest is trained on the same dataset but using the residuals of the
+ previous steps as target. The final predictions are the sum of the raw
+ linear predictions and the residuals modeled by the random forest.
+
+ Parameters
+ ----------
+ base_estimator : object
+ The linear estimator fitted on the raw target.
+ The linear estimator must be a regressor from sklearn.linear_model.
+
+ n_estimators : int, default=100
+ The number of trees in the forest.
+
+ max_depth : int, default=None
+ The maximum depth of the tree. If None, then nodes are expanded until
+ all leaves are pure or until all leaves contain less than
+ min_samples_split samples.
+
+ min_samples_split : int or float, default=2
+ The minimum number of samples required to split an internal node:
+
+ - If int, then consider `min_samples_split` as the minimum number.
+ - If float, then `min_samples_split` is a fraction and
+ `ceil(min_samples_split * n_samples)` are the minimum
+ number of samples for each split.
+
+ min_samples_leaf : int or float, default=1
+ The minimum number of samples required to be at a leaf node.
+ A split point at any depth will only be considered if it leaves at
+ least ``min_samples_leaf`` training samples in each of the left and
+ right branches. This may have the effect of smoothing the model,
+ especially in regression.
+
+ - If int, then consider `min_samples_leaf` as the minimum number.
+ - If float, then `min_samples_leaf` is a fraction and
+ `ceil(min_samples_leaf * n_samples)` are the minimum
+ number of samples for each node.
+
+ min_weight_fraction_leaf : float, default=0.0
+ The minimum weighted fraction of the sum total of weights (of all
+ the input samples) required to be at a leaf node. Samples have
+ equal weight when sample_weight is not provided.
+
+ max_features : {"auto", "sqrt", "log2"}, int or float, default="auto"
+ The number of features to consider when looking for the best split:
+
+ - If int, then consider `max_features` features at each split.
+ - If float, then `max_features` is a fraction and
+ `round(max_features * n_features)` features are considered at each
+ split.
+ - If "auto", then `max_features=n_features`.
+ - If "sqrt", then `max_features=sqrt(n_features)`.
+ - If "log2", then `max_features=log2(n_features)`.
+ - If None, then `max_features=n_features`.
+
+ Note: the search for a split does not stop until at least one
+ valid partition of the node samples is found, even if it requires to
+ effectively inspect more than ``max_features`` features.
+
+ max_leaf_nodes : int, default=None
+ Grow trees with ``max_leaf_nodes`` in best-first fashion.
+ Best nodes are defined as relative reduction in impurity.
+ If None then unlimited number of leaf nodes.
+
+ min_impurity_decrease : float, default=0.0
+ A node will be split if this split induces a decrease of the impurity
+ greater than or equal to this value.
+
+ bootstrap : bool, default=True
+ Whether bootstrap samples are used when building trees. If False, the
+ whole dataset is used to build each tree.
+
+ oob_score : bool, default=False
+ Whether to use out-of-bag samples to estimate the generalization score.
+ Only available if bootstrap=True.
+
+ n_jobs : int, default=None
+ The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`,
+ :meth:`decision_path` and :meth:`apply` are all parallelized over the
+ trees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`
+ context. ``-1`` means using all processors.
+
+ random_state : int, RandomState instance or None, default=None
+ Controls both the randomness of the bootstrapping of the samples used
+ when building trees (if ``bootstrap=True``) and the sampling of the
+ features to consider when looking for the best split at each node
+ (if ``max_features < n_features``).
+
+ ccp_alpha : non-negative float, default=0.0
+ Complexity parameter used for Minimal Cost-Complexity Pruning. The
+ subtree with the largest cost complexity that is smaller than
+ ``ccp_alpha`` will be chosen. By default, no pruning is performed. See
+ :ref:`minimal_cost_complexity_pruning` for details.
+
+ max_samples : int or float, default=None
+ If bootstrap is True, the number of samples to draw from X
+ to train each base estimator.
+
+ - If None (default), then draw `X.shape[0]` samples.
+ - If int, then draw `max_samples` samples.
+ - If float, then draw `max_samples * X.shape[0]` samples. Thus,
+ `max_samples` should be in the interval `(0, 1]`.
+
+ Attributes
+ ----------
+ n_features_in_ : int
+ The number of features when :meth:`fit` is performed.
+
+ feature_importances_ : ndarray of shape (n_features, )
+ The impurity-based feature importances.
+ The higher, the more important the feature.
+ The importance of a feature is computed as the (normalized)
+ total reduction of the criterion brought by that feature. It is also
+ known as the Gini importance.
+
+ coef_ : array of shape (n_features, ) or (n_targets, n_features)
+ Estimated coefficients for the linear regression problem.
+ If multiple targets are passed during the fit (y 2D), this is a
+ 2D array of shape (n_targets, n_features), while if only one target
+ is passed, this is a 1D array of length n_features.
+
+ intercept_ : float or array of shape (n_targets,)
+ Independent term in the linear model. Set to 0 if `fit_intercept = False`
+ in `base_estimator`.
+
+ base_estimator_ : object
+ A fitted linear model instance.
+
+ forest_estimator_ : object
+ A fitted random forest instance.
+
+ Examples
+ --------
+ >>> from sklearn.linear_model import LinearRegression
+ >>> from lineartree import LinearForestRegressor
+ >>> from sklearn.datasets import make_regression
+ >>> X, y = make_regression(n_samples=100, n_features=4,
+ ... n_informative=2, n_targets=1,
+ ... random_state=0, shuffle=False)
+ >>> regr = LinearForestRegressor(base_estimator=LinearRegression())
+ >>> regr.fit(X, y)
+ >>> regr.predict([[0, 0, 0, 0]])
+ array([8.8817842e-16])
+
+ References
+ ----------
+ Regression-Enhanced Random Forests.
+ Authors: Haozhe Zhang, Dan Nettleton, Zhengyuan Zhu.
+ (https://arxiv.org/abs/1904.10416)
+ """
+
+ def __init__(
+ self,
+ base_estimator,
+ *,
+ n_estimators=100,
+ max_depth=None,
+ min_samples_split=2,
+ min_samples_leaf=1,
+ min_weight_fraction_leaf=0.0,
+ max_features="auto",
+ max_leaf_nodes=None,
+ min_impurity_decrease=0.0,
+ bootstrap=True,
+ oob_score=False,
+ n_jobs=None,
+ random_state=None,
+ ccp_alpha=0.0,
+ max_samples=None
+ ):
+
+ self.base_estimator = base_estimator
+ self.n_estimators = n_estimators
+ self.max_depth = max_depth
+ self.min_samples_split = min_samples_split
+ self.min_samples_leaf = min_samples_leaf
+ self.min_weight_fraction_leaf = min_weight_fraction_leaf
+ self.max_features = max_features
+ self.max_leaf_nodes = max_leaf_nodes
+ self.min_impurity_decrease = min_impurity_decrease
+ self.bootstrap = bootstrap
+ self.oob_score = oob_score
+ self.n_jobs = n_jobs
+ self.random_state = random_state
+ self.ccp_alpha = ccp_alpha
+ self.max_samples = max_samples
+
+ def fit(self, X, y, sample_weight=None):
+ """Build a Linear Forest from the training set (X, y).
+
+ Parameters
+ ----------
+ X : array-like of shape (n_samples, n_features)
+ The training input samples.
+
+ y : array-like of shape (n_samples, ) or (n_samples, n_targets)
+ Target values.
+
+ sample_weight : array-like of shape (n_samples, ), default=None
+ Sample weights.
+
+ Returns
+ -------
+ self : object
+ """
+ # Convert data (X is required to be 2d and indexable)
+ X, y = self._validate_data(
+ X, y, accept_sparse=True, dtype=None, multi_output=True
+ )
+ if sample_weight is not None:
+ sample_weight = _check_sample_weight(sample_weight, X, dtype=None)
+
+ y_shape = np.shape(y)
+ n_targets = y_shape[1] if len(y_shape) > 1 else 1
+ if n_targets < 2:
+ y = y.ravel()
+ self._fit(X, y, sample_weight)
+
+ return self
+
+ def predict(self, X):
+ """Predict regression target for X.
+
+ Parameters
+ ----------
+ X : array-like of shape (n_samples, n_features)
+ Samples.
+
+ Returns
+ -------
+ pred : ndarray of shape (n_samples, ) or also (n_samples, n_targets) if
+ multitarget regression.
+ The predicted values.
+ """
+ check_is_fitted(self, attributes="base_estimator_")
+ X = check_array(X, dtype=None, accept_sparse=False)
+ self._check_n_features(X, reset=False)
+
+ linear_pred = self.base_estimator_.predict(X)
+ forest_pred = self.forest_estimator_.predict(X)
+
+ return linear_pred + forest_pred