Source code for skdownscale.pointwise_models.bcsd

import collections

import numpy as np
import pandas as pd
from sklearn.utils.validation import check_is_fitted

from .base import TimeSynchronousDownscaler
from .groupers import DAY_GROUPER, MONTH_GROUPER, PaddedDOYGrouper
from .quantile import QuantileMapper
from .utils import default_none_kwargs, ensure_samples_features


class BcsdBase(TimeSynchronousDownscaler):
    """Base class for BCSD model."""

    _fit_attributes = ['y_climo_', 'quantile_mappers_']
    _timestep = 'M'

    def __init__(
        self,
        time_grouper=MONTH_GROUPER,
        climate_trend_grouper=DAY_GROUPER,
        climate_trend=MONTH_GROUPER,
        return_anoms=True,
        qm_kwargs=None,
    ):

        self.time_grouper = time_grouper
        self.climate_trend_grouper = climate_trend_grouper
        self.climate_trend = climate_trend
        self.return_anoms = return_anoms
        self.qm_kwargs = qm_kwargs

    def _pre_fit(self):
        if isinstance(self.time_grouper, str):
            if self.time_grouper == 'daily_nasa-nex':
                self.time_grouper = PaddedDOYGrouper
                self.timestep = 'daily'
            else:
                self.time_grouper_ = pd.Grouper(freq=self.time_grouper)
                self.timestep = 'monthly'
        else:
            self.time_grouper_ = self.time_grouper
            self.timestep = 'monthly'

    def _create_groups(self, df, climate_trend=False):
        """helper function to create groups by either daily or month"""
        if self.timestep == 'monthly':
            return df.groupby(self.time_grouper)
        elif self.timestep == 'daily':
            if climate_trend:
                # group by day only rather than also +/- offset days
                return df.groupby(self.climate_trend_grouper)
            else:
                return self.time_grouper(df)
        else:
            raise TypeError('unexpected time grouper type %s' % self.time_grouper)

    def _qm_fit_by_group(self, groups):
        """helper function to fit quantile mappers by group

        Note that we store these mappers for later
        """
        self.quantile_mappers_ = {}
        qm_kwargs = default_none_kwargs(self.qm_kwargs)
        for key, group in groups:
            self.quantile_mappers_[key] = QuantileMapper(**qm_kwargs).fit(group)

    def _qm_transform_by_group(self, groups):
        """helper function to apply quantile mapping by group

        Note that we recombine the dataframes using pd.concat, there may be a better way to do this
        """

        dfs = []
        for key, group in groups:
            qmapped = self.quantile_mappers_[key].transform(group)
            dfs.append(pd.DataFrame(qmapped, index=group.index, columns=group.columns))
        return pd.concat(dfs).sort_index()

    def _remove_climatology(self, obj, climatology, climate_trend=False):
        """helper function to remove climatologies"""
        dfs = []
        for key, group in self._create_groups(obj, climate_trend):
            if self.timestep == 'monthly':
                dfs.append(group - climatology.loc[key].values)
            elif self.timestep == 'daily':
                dfs.append(group - climatology.loc[key])

        result = pd.concat(dfs).sort_index()
        assert obj.shape == result.shape
        return result


[docs]class BcsdPrecipitation(BcsdBase): """Classic BCSD model for Precipitation Parameters ---------- time_grouper : str or pd.Grouper, optional Pandas time frequency str or Grouper object. Specifies how to group time periods. Default is 'M' (e.g. Monthly). qm_kwargs : dict Keyword arguments to pass to QuantileMapper. Attributes ---------- time_grouper : pd.Grouper Linear Regression object. quantile_mappers_ : dict QuantileMapper objects (one for each time group). """ def fit(self, X, y): """Fit BcsdPrecipitation model Parameters ---------- X : pd.Series or pd.DataFrame, shape (n_samples, 1) Training data y : pd.Series or pd.DataFrame, shape (n_samples, 1) Target values. Returns ------- self : returns an instance of self. """ self._pre_fit() X, y = self._validate_data(X, y, y_numeric=True) # TO-DO: set n_features_n attribute if self.n_features_in_ != 1: raise ValueError(f'BCSD only supports 1 feature, found {self.n_features_in_}') y_groups = self._create_groups(y) # calculate the climatologies self.y_climo_ = y_groups.mean() if self.return_anoms and self.y_climo_.values.min() <= 0: raise ValueError('Invalid value in target climatology') # fit the quantile mappers # TO-DO: do we need to detrend the data before fitting the quantile mappers?? self._qm_fit_by_group(y_groups) return self def predict(self, X): """Predict using the BcsdPrecipitation model Parameters ---------- X : pd.Series or pd.DataFrame, shape (n_samples, 1) Samples. Returns ------- C : pd.DataFrame, shape (n_samples, 1) Returns predicted values. """ check_is_fitted(self) X = self._validate_data(X) # Bias correction # apply quantile mapping by month or day Xqm = self._qm_transform_by_group(self._create_groups(X, climate_trend=True)) # calculate the anomalies as a ratio of the training data if self.return_anoms: return self._calc_ratio_anoms(Xqm, self.y_climo_) else: return Xqm def _calc_ratio_anoms(self, obj, climatology, climate_trend=False): """helper function for dividing day groups by climatology""" dfs = [] for key, group in self._create_groups(obj, climate_trend): if self.timestep == 'monthly': dfs.append(group / climatology.loc[key].values) else: dfs.append(group / climatology.loc[key]) result = pd.concat(dfs).sort_index() assert obj.shape == result.shape return result def _more_tags(self): return { '_xfail_checks': { 'check_estimators_dtypes': 'BCSD only suppers 1 feature', 'check_dtype_object': 'BCSD only suppers 1 feature', 'check_fit_score_takes_y': 'BCSD only suppers 1 feature', 'check_estimators_fit_returns_self': 'BCSD only suppers 1 feature', 'check_estimators_fit_returns_self(readonly_memmap=True)': 'BCSD only suppers 1 feature', 'check_pipeline_consistency': 'BCSD only suppers 1 feature', 'check_estimators_nan_inf': 'BCSD only suppers 1 feature', 'check_estimators_overwrite_params': 'BCSD only suppers 1 feature', 'check_estimators_pickle': 'BCSD only suppers 1 feature', 'check_fit2d_predict1d': 'BCSD only suppers 1 feature', 'check_methods_subset_invariance': 'BCSD only suppers 1 feature', 'check_fit2d_1sample': 'BCSD only suppers 1 feature', 'check_dict_unchanged': 'BCSD only suppers 1 feature', 'check_dont_overwrite_parameters': 'BCSD only suppers 1 feature', 'check_fit_idempotent': 'BCSD only suppers 1 feature', 'check_n_features_in': 'BCSD only suppers 1 feature', 'check_fit_check_is_fitted': 'BCSD only suppers 1 feature', 'check_methods_sample_order_invariance': 'temporal order matters', }, }
[docs]class BcsdTemperature(BcsdBase): def fit(self, X, y): """Fit BcsdTemperature model Parameters ---------- X : pd.Series or pd.DataFrame, shape (n_samples, 1) Training data y : pd.Series or pd.DataFrame, shape (n_samples, 1) Target values. Returns ------- self : returns an instance of self. """ self._pre_fit() X, y = self._validate_data(X, y, y_numeric=True) # TO-DO: set n_features_in attribute if self.n_features_in_ != 1: raise ValueError(f'BCSD only supports up to 4 features, found {self.n_features_in_}') # make groups for day or month y_groups = self._create_groups(y) # calculate the climatologies self._x_climo = self._create_groups(X).mean() self.y_climo_ = y_groups.mean() # fit the quantile mappers self._qm_fit_by_group(y_groups) return self def predict(self, X): """Predict using the BcsdTemperature model Parameters ---------- X : DataFrame, shape (n_samples, 1) Samples. Returns ------- C : pd.DataFrame, shape (n_samples, 1) Returns predicted values. """ check_is_fitted(self) X = self._check_array(X) # Calculate the 9-year running mean for each month def rolling_func(x): return x.rolling(9, center=True, min_periods=1).mean() X_rolling_mean = X.groupby(self.climate_trend, group_keys=False).apply(rolling_func) # remove climatology from 9-year monthly mean climate trend X_shift = self._remove_climatology(X_rolling_mean, self._x_climo, climate_trend=True) # remove shift from model data X_no_shift = X - X_shift # Bias correction # apply quantile mapping by month or day Xqm = self._qm_transform_by_group(self._create_groups(X_no_shift, climate_trend=True)) # restore the climate trend X_qm_with_shift = X_shift + Xqm # return bias corrected absolute values or calculate the anomalies if self.return_anoms: return self._remove_climatology(X_qm_with_shift, self.y_climo_) else: return X_qm_with_shift def _remove_climatology(self, obj, climatology, climate_trend=False): """helper function to remove climatologies""" dfs = [] for key, group in self._create_groups(obj, climate_trend): if self.timestep == 'monthly': dfs.append(group - climatology.loc[key].values) elif self.timestep == 'daily': dfs.append(group - climatology.loc[key].values) result = pd.concat(dfs).sort_index() if obj.shape != result.shape: raise ValueError('shape of climo is not equal to input array') return result def _more_tags(self): return { '_xfail_checks': { 'check_estimators_dtypes': 'BCSD only suppers 1 feature', 'check_fit_score_takes_y': 'BCSD only suppers 1 feature', 'check_estimators_fit_returns_self': 'BCSD only suppers 1 feature', 'check_estimators_fit_returns_self(readonly_memmap=True)': 'BCSD only suppers 1 feature', 'check_dtype_object': 'BCSD only suppers 1 feature', 'check_pipeline_consistency': 'BCSD only suppers 1 feature', 'check_estimators_nan_inf': 'BCSD only suppers 1 feature', 'check_estimators_overwrite_params': 'BCSD only suppers 1 feature', 'check_estimators_pickle': 'BCSD only suppers 1 feature', 'check_fit2d_predict1d': 'BCSD only suppers 1 feature', 'check_methods_subset_invariance': 'BCSD only suppers 1 feature', 'check_fit2d_1sample': 'BCSD only suppers 1 feature', 'check_dict_unchanged': 'BCSD only suppers 1 feature', 'check_dont_overwrite_parameters': 'BCSD only suppers 1 feature', 'check_fit_idempotent': 'BCSD only suppers 1 feature', 'check_n_features_in': 'BCSD only suppers 1 feature', 'check_fit_check_is_fitted': 'BCSD only suppers 1 feature', 'check_methods_sample_order_invariance': 'temporal order matters', }, }