Source code for skdownscale.pointwise_models.grouping
from __future__ import annotations
from typing import Any
import numpy as np
import pandas as pd
from numpy.typing import NDArray
from .utils import default_none_kwargs
[docs]
class GroupedRegressor:
"""Grouped Regressor
Wrapper supporting fitting seperate estimators distinct groups
Parameters
----------
estimator : object
Estimator object such as derived from `BaseEstimator`. This estimator will be fit to each group
fit_grouper : object
Grouper object, such as `pd.Grouper` or `PaddedDOYGrouper` used to split data into groups during fitting.
predict_grouper : object, func, str
Grouper object, such as `pd.Grouper` used to split data into groups during prediction.
estimator_kwargs : dict
Keyword arguments to pass onto the `estimator`'s contructor.
fit_grouper_kwargs : dict
Keyword arguments to pass onto the `fit_grouper`s contructor.
predict_grouper_kwargs : dict
Keyword arguments to pass onto the `predict_grouper`s contructor.
"""
[docs]
def __init__(
self,
estimator: Any,
fit_grouper: Any,
predict_grouper: Any,
estimator_kwargs: dict[str, Any] | None = None,
fit_grouper_kwargs: dict[str, Any] | None = None,
predict_grouper_kwargs: dict[str, Any] | None = None,
) -> None:
self.estimator = estimator
self.estimator_kwargs = estimator_kwargs
self.fit_grouper = fit_grouper
self.fit_grouper_kwargs = fit_grouper_kwargs
self.predict_grouper = predict_grouper
self.predict_grouper_kwargs = predict_grouper_kwargs
[docs]
def fit(self, X, y, **fit_kwargs):
"""Fit the grouped regressor
Parameters
----------
X : pd.DataFrame, shape (n_samples, n_features)
Training data
y : pd.Series or pd.DataFrame, shape (n_samples, ) or (n_samples, n_targets)
Target values
**fit_kwargs
Additional keyword arguments to pass onto the estimator's fit method
Returns
-------
self : returns an instance of self.
"""
fit_grouper_kwargs = default_none_kwargs(self.fit_grouper_kwargs)
x_groups = self.fit_grouper(X.index, **fit_grouper_kwargs).groups
y_groups = self.fit_grouper(y.index, **fit_grouper_kwargs).groups
self.targets_ = list(y.keys())
estimator_kwargs = default_none_kwargs(self.estimator_kwargs)
self.estimators_ = {key: self.estimator(**estimator_kwargs) for key in x_groups}
for x_key, x_inds in x_groups.items():
y_inds = y_groups[x_key]
self.estimators_[x_key].fit(X.iloc[x_inds], y.iloc[y_inds], **fit_kwargs)
return self
[docs]
def predict(self, X):
"""Predict estimator target for X
Parameters
----------
X : pd.DataFrame, shape (n_samples, n_features)
Training data
Returns
-------
y : ndarray of shape (n_samples,) or (n_samples, n_outputs)
The predicted values.
"""
predict_grouper_kwargs = default_none_kwargs(self.predict_grouper_kwargs)
grouper = X.groupby(self.predict_grouper, **predict_grouper_kwargs)
result = np.empty((len(X), len(self.targets_)))
for key, inds in grouper.indices.items():
result[inds, ...] = self.estimators_[key].predict(X.iloc[inds])
return result
class PaddedDOYGrouper:
"""Grouper to group an Index by day-of-year +/ pad
Parameters
----------
index : pd.DatetimeIndex
Pandas DatetimeIndex to be grouped.
window : int
Size of the padded offset for each day of year.
"""
def __init__(self, index: pd.DatetimeIndex, window: int) -> None:
self.index = index
self.window = window
idoy = index.dayofyear
n = idoy.max()
# day-of-year x day-of-year groups
temp_groups = np.zeros((n, n), dtype=np.bool)
for i in range(n):
inds = np.arange(i - self.window, i + self.window + 1)
inds[inds < 0] += n
inds[inds > n - 1] -= n
temp_groups[i, inds] = True
arr = temp_groups[idoy - 1]
self._groups = {doy: np.nonzero(arr[:, doy - 1])[0] for doy in range(1, n + 1)}
@property
def groups(self) -> dict[int, NDArray[np.intp]]:
"""Dict {doy -> group indicies}."""
return self._groups