Source code for pytesmo.validation_framework.adapters

# Copyright (c) 2016,Vienna University of Technology,
# Department of Geodesy and Geoinformation
# All rights reserved.
import warnings

# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#   * Redistributions of source code must retain the above copyright notice,
#     this list of conditions and the following disclaimer.
#   * Redistributions in binary form must reproduce the above copyright notice,
#     this list of conditions and the following disclaimer in the documentation
#     and/or other materials provided with the distribution.
#   * Neither the name of the Vienna University of Technology, Department of
#     Geodesy and Geoinformation nor the names of its contributors may be used
#     to endorse or promote products derived from this software without specific
#     prior written permission.

# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL VIENNA UNIVERSITY OF TECHNOLOGY, DEPARTMENT OF
# GEODESY AND GEOINFORMATION BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
# IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

'''
Module containing adapters that can be used together with the validation
framework.
'''

import operator
from pytesmo.time_series.anomaly import calc_anomaly
from pytesmo.time_series.anomaly import calc_climatology
from pandas import DataFrame


[docs]class BasicAdapter(object):
    """
    Base class for other adapters that works around data readers that don't
    return a DataFrame (e.g. ASCAT). Also removes unnecessary timezone information in data.
    """
    def __init__(self, cls, data_property_name='data'):
        self.cls = cls
        self.data_property_name = data_property_name

    def __get_dataframe(self, data):
        if ((not isinstance(data, DataFrame)) and (hasattr(data, self.data_property_name)) and
            (isinstance(getattr(data, self.data_property_name), DataFrame))):
            data = getattr(data, self.data_property_name)
        return data

    def __drop_tz_info(self, data):
        if data.index.tz is not None:
            warnings.warn('Dropping timezone information ({}) for data from reader {}'.format(data.index.tz, self.cls.__class__.__name__))
            data.index = data.index.tz_convert(None)
        return data

[docs]    def read_ts(self, *args, **kwargs):
        data = self.cls.read_ts(*args, **kwargs)
        data = self.__drop_tz_info(self.__get_dataframe(data))
        return data

[docs]    def read(self, *args, **kwargs):
        data = self.cls.read(*args, **kwargs)
        data = self.__drop_tz_info(self.__get_dataframe(data))
        return data

[docs]class MaskingAdapter(BasicAdapter):
    """
    Transform the given class to return a boolean dataset given the operator
    and threshold. This class calls the read_ts and read methods
    of the given instance and applies boolean masking to the returned data
    using the given operator and threshold.

    Parameters
    ----------
    cls: object
        has to have a read_ts or read method
    operator: string
        one of '<', '<=', '==', '>=', '>', '!='
    threshold:
        value to use as the threshold combined with the operator
    column_name: string, optional
        name of the column to cut the read masking dataset to
    """

    def __init__(self, cls, op, threshold, column_name = None):
        super(MaskingAdapter, self).__init__(cls)

        self.op_lookup = {'<': operator.lt,
                          '<=': operator.le,
                          '==': operator.eq,
                          '!=': operator.ne,
                          '>=': operator.ge,
                          '>': operator.gt}
        self.operator = self.op_lookup[op]
        self.threshold = threshold

        self.column_name = column_name

    def __mask(self, data):
        if self.column_name is not None:
            data = data.loc[:, [self.column_name]]
        return self.operator(data, self.threshold)

[docs]    def read_ts(self, *args, **kwargs):
        data = super(MaskingAdapter, self).read_ts(*args, **kwargs)
        return self.__mask(data)

[docs]    def read(self, *args, **kwargs):
        data = super(MaskingAdapter, self).read(*args, **kwargs)
        return self.__mask(data)

[docs]class SelfMaskingAdapter(BasicAdapter):
    """
    Transform the given (reader) class to return a dataset that is masked based
    on the given column, operator, and threshold. This class calls the read_ts
    or read method of the given reader instance, applies the operator/threshold
    to the specified column, and masks the whole dataframe with the result.

    Parameters
    ----------
    cls: object
        has to have a read_ts or read method
    operator: string
        one of '<', '<=', '==', '>=', '>', '!='
    threshold:
        value to use as the threshold combined with the operator
    column_name: string
        name of the column to apply the threshold to
    """

    def __init__(self, cls, op, threshold, column_name):
        super(SelfMaskingAdapter, self).__init__(cls)

        self.op_lookup = {'<': operator.lt,
                          '<=': operator.le,
                          '==': operator.eq,
                          '!=': operator.ne,
                          '>=': operator.ge,
                          '>': operator.gt}

        self.operator = self.op_lookup[op]
        self.threshold = threshold
        self.column_name = column_name

    def __mask(self, data):
        mask = self.operator(data[self.column_name], self.threshold)
        return data[mask]

[docs]    def read_ts(self, *args, **kwargs):
        data = super(SelfMaskingAdapter, self).read_ts(*args, **kwargs)
        return self.__mask(data)

[docs]    def read(self, *args, **kwargs):
        data = super(SelfMaskingAdapter, self).read(*args, **kwargs)
        return self.__mask(data)

[docs]class AnomalyAdapter(BasicAdapter):
    """
    Takes the pandas DataFrame that the read_ts or read method of the instance
    returns and calculates the anomaly of the time series based on a moving
    average.


    Parameters
    ----------
    cls : class instance
        Must have a read_ts or read method returning a pandas.DataFrame
    window_size : float, optional
        The window-size [days] of the moving-average window to calculate the
        anomaly reference (only used if climatology is not provided)
        Default: 35 (days)
    columns: list, optional
        columns in the dataset for which to calculate anomalies.
    """

    def __init__(self, cls, window_size=35, columns=None):
        super(AnomalyAdapter, self).__init__(cls)
        self.window_size = window_size
        self.columns = columns

[docs]    def calc_anom(self, data):
        if self.columns is None:
            ite = data
        else:
            ite = self.columns
        for column in ite:
            data[column] = calc_anomaly(data[column],
                                        window_size=self.window_size)
        return data

[docs]    def read_ts(self, *args, **kwargs):
        data = super(AnomalyAdapter, self).read_ts(*args, **kwargs)
        return self.calc_anom(data)

[docs]    def read(self, *args, **kwargs):
        data = super(AnomalyAdapter, self).read(*args, **kwargs)
        return self.calc_anom(data)


[docs]class AnomalyClimAdapter(BasicAdapter):
    """
    Takes the pandas DataFrame that the read_ts or read method of the instance
    returns and calculates the anomaly of the time series based on a moving
    average.


    Parameters
    ----------
    cls : class instance
        Must have a read_ts or read method returning a pandas.DataFrame
    columns: list, optional
        columns in the dataset for which to calculate anomalies.
    kwargs:
        Any additional arguments will be given to the calc_climatology function.
    """

    def __init__(self, cls, columns=None, **kwargs):
        super(AnomalyClimAdapter, self).__init__(cls)
        self.kwargs = kwargs
        self.columns = columns

[docs]    def calc_anom(self, data):
        if self.columns is None:
            ite = data
        else:
            ite = self.columns
        for column in ite:
            clim = calc_climatology(data[column], **self.kwargs)
            data[column] = calc_anomaly(data[column], climatology=clim)
        return data

[docs]    def read_ts(self, *args, **kwargs):
        data = super(AnomalyClimAdapter, self).read_ts(*args, **kwargs)
        return self.calc_anom(data)

[docs]    def read(self, *args, **kwargs):
        data = super(AnomalyClimAdapter, self).read(*args, **kwargs)
        return self.calc_anom(data)