Source code for pytesmo.validation_framework.metric_calculators

# Copyright (c) 2013,Vienna University of Technology, Department of Geodesy and Geoinformation
# All rights reserved.

# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#   * Redistributions of source code must retain the above copyright
#     notice, this list of conditions and the following disclaimer.
#    * Redistributions in binary form must reproduce the above copyright
#      notice, this list of conditions and the following disclaimer in the
#      documentation and/or other materials provided with the distribution.
#    * Neither the name of the Vienna University of Technology, Department of Geodesy and Geoinformation nor the
#      names of its contributors may be used to endorse or promote products
#      derived from this software without specific prior written permission.

# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL VIENNA UNIVERSITY OF TECHNOLOGY,
# DEPARTMENT OF GEODESY AND GEOINFORMATION BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

'''
Created on Sep 24, 2013

Metric calculators useable in together with core

@author: Christoph.Paulik@geo.tuwien.ac.at
'''

import pytesmo.metrics as metrics

import copy
import numpy as np


[docs]class BasicMetrics(object): """ This class just computes the basic metrics, Pearson's R Spearman's rho optionally Kendall's tau RMSD BIAS it also stores information about gpi, lat, lon and number of observations Parameters ---------- other_name: string, optional Name of the column of the non-reference / other dataset in the pandas DataFrame calc_tau: boolean, optional if True then also tau is calculated. This is set to False by default since the calculation of Kendalls tau is rather slow and can significantly impact performance of e.g. global validation studies """ def __init__(self, other_name='k1', calc_tau=False): self.result_template = {'R': np.float32([np.nan]), 'p_R': np.float32([np.nan]), 'rho': np.float32([np.nan]), 'p_rho': np.float32([np.nan]), 'tau': np.float32([np.nan]), 'p_tau': np.float32([np.nan]), 'RMSD': np.float32([np.nan]), 'BIAS': np.float32([np.nan]), 'n_obs': np.int32([0]), 'gpi': np.int32([-1]), 'lon': np.float64([np.nan]), 'lat': np.float64([np.nan])} self.other_name = other_name self.calc_tau = calc_tau
[docs] def calc_metrics(self, data, gpi_info): """ calculates the desired statistics Parameters ---------- data : pandas.DataFrame with 2 columns, the first column is the reference dataset named 'ref' the second column the dataset to compare against named 'other' gpi_info : tuple of (gpi, lon, lat) Notes ----- Kendall tau is calculation is optional at the moment because the scipy implementation is very slow which is problematic for global comparisons """ dataset = copy.deepcopy(self.result_template) dataset['n_obs'][0] = len(data) dataset['gpi'][0] = gpi_info[0] dataset['lon'][0] = gpi_info[1] dataset['lat'][0] = gpi_info[2] if len(data) < 10: return dataset x, y = data['ref'].values, data[self.other_name].values R, p_R = metrics.pearsonr(x, y) rho, p_rho = metrics.spearmanr(x, y) RMSD = metrics.rmsd(x, y) BIAS = metrics.bias(x, y) dataset['R'][0], dataset['p_R'][0] = R, p_R dataset['rho'][0], dataset['p_rho'][0] = rho, p_rho dataset['RMSD'][0] = RMSD dataset['BIAS'][0] = BIAS if self.calc_tau: tau, p_tau = metrics.kendalltau(x, y) dataset['tau'][0], dataset['p_tau'][0] = tau, p_tau return dataset
[docs]class BasicMetricsPlusMSE(BasicMetrics): """ Basic Metrics plus Mean squared Error and the decomposition of the MSE into correlation, bias and variance parts. """ def __init__(self, other_name='k1', calc_tau=False): super(BasicMetricsPlusMSE, self).__init__(other_name=other_name, calc_tau=calc_tau) self.result_template.update({'mse': np.float32([np.nan]), 'mse_corr': np.float32([np.nan]), 'mse_bias': np.float32([np.nan]), 'mse_var': np.float32([np.nan])})
[docs] def calc_metrics(self, data, gpi_info): dataset = super(BasicMetricsPlusMSE, self).calc_metrics(data, gpi_info) if len(data) < 10: return dataset x, y = data['ref'].values, data[self.other_name].values mse, mse_corr, mse_bias, mse_var = metrics.mse(x, y) dataset['mse'][0] = mse dataset['mse_corr'][0] = mse_corr dataset['mse_bias'][0] = mse_bias dataset['mse_var'][0] = mse_var return dataset
[docs]class FTMetrics(object): """ This class computes Freeze/Thaw Metrics Calculated metrics are: SSF frozen/temp unfrozen SSF unfrozen/temp frozen SSF unfrozen/temp unfrozen SSF frozen/temp frozen it also stores information about gpi, lat, lon and number of total observations """ def __init__(self, frozen_flag=2, other_name='k1'): self.frozen_flag_value = frozen_flag self.result_template = {'ssf_fr_temp_un': np.float32([np.nan]), 'ssf_fr_temp_fr': np.float32([np.nan]), 'ssf_un_temp_fr': np.float32([np.nan]), 'ssf_un_temp_un': np.float32([np.nan]), 'n_obs': np.int32([0]), 'gpi': np.int32([-1]), 'lon': np.float64([np.nan]), 'lat': np.float64([np.nan])} self.other_name = other_name
[docs] def calc_metrics(self, data, gpi_info): """ calculates the desired statistics Parameters ---------- data : pandas.DataFrame with 2 columns, the first column is the reference dataset named 'ref' the second column the dataset to compare against named 'other' gpi_info : tuple of (gpi, lon, lat) Notes ----- Kendall tau is not calculated at the moment because the scipy implementation is very slow which is problematic for global comparisons """ dataset = copy.deepcopy(self.result_template) dataset['n_obs'][0] = len(data) dataset['gpi'][0] = gpi_info[0] dataset['lon'][0] = gpi_info[1] dataset['lat'][0] = gpi_info[2] # if len(data) < 10: return dataset ssf, temp = data['ref'].values, data[self.other_name].values # SSF <= 1 unfrozen # SSF >= 2 frozen ssf_frozen = np.where(ssf == self.frozen_flag_value)[0] ssf_unfrozen = np.where(ssf != self.frozen_flag_value)[0] temp_ssf_frozen = temp[ssf_frozen] temp_ssf_unfrozen = temp[ssf_unfrozen] # correct classifications ssf_temp_frozen = np.where(temp_ssf_frozen < 0)[0] ssf_temp_unfrozen = np.where(temp_ssf_unfrozen >= 0)[0] # incorrect classifications ssf_fr_temp_unfrozen = np.where(temp_ssf_frozen >= 0)[0] ssf_un_temp_frozen = np.where(temp_ssf_unfrozen < 0)[0] dataset['ssf_fr_temp_un'][0] = len(ssf_fr_temp_unfrozen) dataset['ssf_fr_temp_fr'][0] = len(ssf_temp_frozen) dataset['ssf_un_temp_fr'][0] = len(ssf_un_temp_frozen) dataset['ssf_un_temp_un'][0] = len(ssf_temp_unfrozen) return dataset