Source code for pytesmo.validation_framework.temporal_matchers

# Copyright (c) 2013,Vienna University of Technology, Department of Geodesy and
# Geoinformation
# All rights reserved.

# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#   * Redistributions of source code must retain the above copyright
#     notice, this list of conditions and the following disclaimer.
#    * Redistributions in binary form must reproduce the above copyright
#      notice, this list of conditions and the following disclaimer in the
#      documentation and/or other materials provided with the distribution.
#    * Neither the name of the Vienna University of Technology, Department of
#      Geodesy and Geoinformation nor the names of its contributors may be used
#      to endorse or promote products derived from this software without
#      specific prior written permission.

# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL VIENNA UNIVERSITY OF TECHNOLOGY, DEPARTMENT
# OF GEODESY AND GEOINFORMATION BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

"""
Created on Sep 24, 2013

@author: Christoph.Paulik@geo.tuwien.ac.at
"""

import itertools
import pandas as pd

import pytesmo.temporal_matching as temporal_matching


[docs]class BasicTemporalMatching(object): """ Temporal matching object Parameters ---------- window : float window size to use for temporal matching. A match in other will only be found if it is +- window size days away from a point in reference """ def __init__(self, window=0.5): self.window = window
[docs] def match(self, reference, *args): """ takes reference and other dataframe and returnes a joined Dataframe in this case the reference dataset for the grid is also the temporal reference dataset """ ref_df = pd.DataFrame(reference) return temporal_matching.combined_temporal_collocation( ref_df, args, self.window, dropna=True, dropduplicates=True, add_ref_data=True, combined_dropna="all", )
[docs] def combinatory_matcher(self, df_dict, refkey, n=2, **kwargs): """ Basic temporal matcher that matches always one Dataframe to the reference Dataframe resulting in matched DataFrame pairs. If the input dict has the keys 'data1' and 'data2' then the output dict will have the key ('data1', 'data2'). The new key is stored as a tuple to avoid any issues with string concetanation. During matching the column names of the dataframes will be transformed into MultiIndex to ensure unique names. Parameters ---------- df_dict: dict of pandas.DataFrames dictionary containing the spatially colocated DataFrames. refkey: string key into the df_dict of the DataFrame that should be taken as a reference. n: int number of datasets to match at once k : dummy argument Returns ------- matched: dict of pandas.DataFrames Dictionary containing matched DataFrames. The key is put together from the keys of the input dict as a tuple of the keys of the datasets this dataframe contains. """ matched = {} keys = list(df_dict) keys.pop(keys.index(refkey)) ref_df = df_dict[refkey] ref_df = df_name_multiindex(ref_df, refkey) for iterkeys in itertools.combinations(keys, n - 1): match_list = [] match_key = [] for key in iterkeys: other_df = df_dict[key] other_df = df_name_multiindex(other_df, key) match_list.append(other_df) match_key.append(key) matched_key = tuple([refkey] + sorted(match_key)) joined = self.match(ref_df, *match_list) if len(joined) != 0: matched[matched_key] = joined return matched
[docs]def dfdict_combined_temporal_collocation( dfs, refname, k, window=None, n=None, **kwargs ): """ Applies :py:func:`combined_temporal_collocation` on a dictionary of dataframes. Parameters ---------- dfs : dict Dictionary of pd.DataFrames containing the dataframes to be collocated. refname : str Name of the reference frame in `dfs`. k : int Number of columns that will be put together in the output dictionary. The output will consist of all combinations of size k. window : pd.Timedelta or float, optional Window around reference timestamps in which to look for data. Floats are interpreted as number of days. If it is not given, defaults to 1 hour to mimick the behaviour of ``BasicTemporalMatching.combinatory_matcher``. **kwargs : Keyword arguments passed to :py:func:`combined_temporal_collocation`. Returns: -------- matched_dict : dict Dictionary where the key is tuples of ``(refname, othernames...)``. """ if n is not None: if len(dfs) != n: return {} if window is None: window = pd.Timedelta(hours=1) others = [] for name in dfs: if name != refname: others.append(df_name_multiindex(dfs[name], name)) ref = df_name_multiindex(dfs[refname], refname) matched_df = temporal_matching.combined_temporal_collocation( ref, others, window, add_ref_data=True, **kwargs ) # unpack again to dictionary matched_dict = {} othernames = list(dfs.keys()) othernames.remove(refname) key = tuple([refname] + othernames) matched_dict[key] = matched_df return matched_dict
[docs]def make_combined_temporal_matcher(window): """ Matches multiple dataframes together to only have common timestamps. See :py:func:`pytesmo.temporal_matching.dfdict_combined_temporal_collocation` for more details Parameters ---------- window : pd.Timedelta or float, optional Window around reference timestamps in which to look for data. Floats are interpreted as number of days. If it is not given, defaults to 1 hour to mimick the behaviour of ``BasicTemporalMatching.combinatory_matcher``. """ def matcher(dfs, refname, k=None, **kwargs): # this comes from Validation.temporal_match_datasets but is not # required return dfdict_combined_temporal_collocation( dfs, refname, k, window=window, dropna=True, combined_dropna="any", dropduplicates=True, **kwargs ) return matcher
[docs]def df_name_multiindex(df, name): """ Rename columns of a DataFrame by using new column names that are tuples of (name, column_name) to ensure unique column names that can also be split again. This transforms the columns to a MultiIndex. """ d = {} for c in df.columns: d[c] = (name, c) return df.rename(columns=d)