Source code for pytesmo.validation_framework.temporal_matchers

# Copyright (c) 2013,Vienna University of Technology, Department of Geodesy and Geoinformation
# All rights reserved.

# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#   * Redistributions of source code must retain the above copyright
#     notice, this list of conditions and the following disclaimer.
#    * Redistributions in binary form must reproduce the above copyright
#      notice, this list of conditions and the following disclaimer in the
#      documentation and/or other materials provided with the distribution.
#    * Neither the name of the Vienna University of Technology, Department of Geodesy and Geoinformation nor the
#      names of its contributors may be used to endorse or promote products
#      derived from this software without specific prior written permission.

# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL VIENNA UNIVERSITY OF TECHNOLOGY,
# DEPARTMENT OF GEODESY AND GEOINFORMATION BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

'''
Created on Sep 24, 2013

@author: Christoph.Paulik@geo.tuwien.ac.at
'''

import itertools

import pytesmo.temporal_matching as temp_match

import pandas as pd
from distutils.version import LooseVersion

[docs]class BasicTemporalMatching(object): """ Temporal matching object Parameters ---------- window : float window size to use for temporal matching. A match in other will only be found if it is +- window size days away from a point in reference """ def __init__(self, window=0.5): self.window = window
[docs] def match(self, reference, *args): """ takes reference and other dataframe and returnes a joined Dataframe in this case the reference dataset for the grid is also the temporal reference dataset """ matched_datasets = temp_match.df_match(reference, *args, dropna=True, dropduplicates=True, window=self.window) if type(matched_datasets) != tuple: matched_datasets = [matched_datasets] matched_data = pd.DataFrame(reference) for match in matched_datasets: if LooseVersion(pd.__version__) < LooseVersion('0.23'): match = match.drop(('index', ''), axis=1) else: match = match.drop('index', axis=1) match = match.drop('distance', axis=1) matched_data = matched_data.join(match) return matched_data.dropna(how='all')
[docs] def combinatory_matcher(self, df_dict, refkey, n=2): """ Basic temporal matcher that matches always one Dataframe to the reference Dataframe resulting in matched DataFrame pairs. If the input dict has the keys 'data1' and 'data2' then the output dict will have the key ('data1', 'data2'). The new key is stored as a tuple to avoid any issues with string concetanation. During matching the column names of the dataframes will be transformed into MultiIndex to ensure unique names. Parameters ---------- df_dict: dict of pandas.DataFrames dictionary containing the spatially colocated DataFrames. refkey: string key into the df_dict of the DataFrame that should be taken as a reference. n: int number of datasets to match at once Returns ------- matched: dict of pandas.DataFrames Dictionary containing matched DataFrames. The key is put together from the keys of the input dict as a tuple of the keys of the datasets this dataframe contains. """ matched = {} keys = list(df_dict) keys.pop(keys.index(refkey)) ref_df = df_dict[refkey] ref_df = df_name_multiindex(ref_df, refkey) for iterkeys in itertools.combinations(keys, n - 1): match_list = [] match_key = [] for key in iterkeys: other_df = df_dict[key] other_df = df_name_multiindex(other_df, key) match_list.append(other_df) match_key.append(key) matched_key = tuple([refkey] + sorted(match_key)) joined = self.match(ref_df, *match_list) if len(joined) != 0: matched[matched_key] = joined return matched
[docs]def df_name_multiindex(df, name): """ Rename columns of a DataFrame by using new column names that are tuples of (name, column_name) to ensure unique column names that can also be split again. This transforms the columns to a MultiIndex. """ d = {} for c in df.columns: d[c] = (name, c) return df.rename(columns=d)