# Copyright (c) 2013,Vienna University of Technology, Department of Geodesy and Geoinformation
# All rights reserved.
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the Vienna University of Technology, Department of Geodesy and Geoinformation nor the
# names of its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL VIENNA UNIVERSITY OF TECHNOLOGY,
# DEPARTMENT OF GEODESY AND GEOINFORMATION BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
'''
Created on Sep 24, 2013
Metric calculators useable in together with core
@author: Christoph.Paulik@geo.tuwien.ac.at
'''
import pytesmo.metrics as metrics
import copy
import numpy as np
[docs]class BasicMetrics(object):
"""
This class just computes the basic metrics,
Pearson's R
Spearman's rho
optionally Kendall's tau
RMSD
BIAS
it also stores information about gpi, lat, lon
and number of observations
Parameters
----------
other_name: string, optional
Name of the column of the non-reference / other dataset in the
pandas DataFrame
calc_tau: boolean, optional
if True then also tau is calculated. This is set to False by default
since the calculation of Kendalls tau is rather slow and can significantly
impact performance of e.g. global validation studies
"""
def __init__(self, other_name='k1',
calc_tau=False):
self.result_template = {'R': np.float32([np.nan]),
'p_R': np.float32([np.nan]),
'rho': np.float32([np.nan]),
'p_rho': np.float32([np.nan]),
'tau': np.float32([np.nan]),
'p_tau': np.float32([np.nan]),
'RMSD': np.float32([np.nan]),
'BIAS': np.float32([np.nan]),
'n_obs': np.int32([0]),
'gpi': np.int32([-1]),
'lon': np.float64([np.nan]),
'lat': np.float64([np.nan])}
self.other_name = other_name
self.calc_tau = calc_tau
[docs] def calc_metrics(self, data, gpi_info):
"""
calculates the desired statistics
Parameters
----------
data : pandas.DataFrame
with 2 columns, the first column is the reference dataset
named 'ref'
the second column the dataset to compare against named 'other'
gpi_info : tuple
of (gpi, lon, lat)
Notes
-----
Kendall tau is calculation is optional at the moment
because the scipy implementation is very slow which is problematic for
global comparisons
"""
dataset = copy.deepcopy(self.result_template)
dataset['n_obs'][0] = len(data)
dataset['gpi'][0] = gpi_info[0]
dataset['lon'][0] = gpi_info[1]
dataset['lat'][0] = gpi_info[2]
if len(data) < 10:
return dataset
x, y = data['ref'].values, data[self.other_name].values
R, p_R = metrics.pearsonr(x, y)
rho, p_rho = metrics.spearmanr(x, y)
RMSD = metrics.rmsd(x, y)
BIAS = metrics.bias(x, y)
dataset['R'][0], dataset['p_R'][0] = R, p_R
dataset['rho'][0], dataset['p_rho'][0] = rho, p_rho
dataset['RMSD'][0] = RMSD
dataset['BIAS'][0] = BIAS
if self.calc_tau:
tau, p_tau = metrics.kendalltau(x, y)
dataset['tau'][0], dataset['p_tau'][0] = tau, p_tau
return dataset
[docs]class BasicMetricsPlusMSE(BasicMetrics):
"""
Basic Metrics plus Mean squared Error and the decomposition of the MSE
into correlation, bias and variance parts.
"""
def __init__(self, other_name='k1',
calc_tau=False):
super(BasicMetricsPlusMSE, self).__init__(other_name=other_name,
calc_tau=calc_tau)
self.result_template.update({'mse': np.float32([np.nan]),
'mse_corr': np.float32([np.nan]),
'mse_bias': np.float32([np.nan]),
'mse_var': np.float32([np.nan])})
[docs] def calc_metrics(self, data, gpi_info):
dataset = super(BasicMetricsPlusMSE, self).calc_metrics(data, gpi_info)
if len(data) < 10:
return dataset
x, y = data['ref'].values, data[self.other_name].values
mse, mse_corr, mse_bias, mse_var = metrics.mse(x, y)
dataset['mse'][0] = mse
dataset['mse_corr'][0] = mse_corr
dataset['mse_bias'][0] = mse_bias
dataset['mse_var'][0] = mse_var
return dataset
[docs]class FTMetrics(object):
"""
This class computes Freeze/Thaw Metrics
Calculated metrics are:
SSF frozen/temp unfrozen
SSF unfrozen/temp frozen
SSF unfrozen/temp unfrozen
SSF frozen/temp frozen
it also stores information about gpi, lat, lon
and number of total observations
"""
def __init__(self, frozen_flag=2,
other_name='k1'):
self.frozen_flag_value = frozen_flag
self.result_template = {'ssf_fr_temp_un': np.float32([np.nan]),
'ssf_fr_temp_fr': np.float32([np.nan]),
'ssf_un_temp_fr': np.float32([np.nan]),
'ssf_un_temp_un': np.float32([np.nan]),
'n_obs': np.int32([0]),
'gpi': np.int32([-1]),
'lon': np.float64([np.nan]),
'lat': np.float64([np.nan])}
self.other_name = other_name
[docs] def calc_metrics(self, data, gpi_info):
"""
calculates the desired statistics
Parameters
----------
data : pandas.DataFrame
with 2 columns, the first column is the reference dataset
named 'ref'
the second column the dataset to compare against named 'other'
gpi_info : tuple
of (gpi, lon, lat)
Notes
-----
Kendall tau is not calculated at the moment
because the scipy implementation is very slow which is problematic for
global comparisons
"""
dataset = copy.deepcopy(self.result_template)
dataset['n_obs'][0] = len(data)
dataset['gpi'][0] = gpi_info[0]
dataset['lon'][0] = gpi_info[1]
dataset['lat'][0] = gpi_info[2]
# if len(data) < 10: return dataset
ssf, temp = data['ref'].values, data[self.other_name].values
# SSF <= 1 unfrozen
# SSF >= 2 frozen
ssf_frozen = np.where(ssf == self.frozen_flag_value)[0]
ssf_unfrozen = np.where(ssf != self.frozen_flag_value)[0]
temp_ssf_frozen = temp[ssf_frozen]
temp_ssf_unfrozen = temp[ssf_unfrozen]
# correct classifications
ssf_temp_frozen = np.where(temp_ssf_frozen < 0)[0]
ssf_temp_unfrozen = np.where(temp_ssf_unfrozen >= 0)[0]
# incorrect classifications
ssf_fr_temp_unfrozen = np.where(temp_ssf_frozen >= 0)[0]
ssf_un_temp_frozen = np.where(temp_ssf_unfrozen < 0)[0]
dataset['ssf_fr_temp_un'][0] = len(ssf_fr_temp_unfrozen)
dataset['ssf_fr_temp_fr'][0] = len(ssf_temp_frozen)
dataset['ssf_un_temp_fr'][0] = len(ssf_un_temp_frozen)
dataset['ssf_un_temp_un'][0] = len(ssf_temp_unfrozen)
return dataset