Source code for pytesmo.metrics.confidence_intervals

import numpy as np
from scipy import stats

from pytesmo.metrics import pairwise
from pytesmo.metrics.tcol import tcol_metrics


[docs]def has_analytical_ci(metric_func): """ Checks whether an analytical CI implementation is available. Parameters ---------- metric_func : callable Function that calculates metric value. Must be from :py:mod:`pytesmo.metrics`. Returns ------- has_ci : bool ``True`` if there is a function with name ``metric_func.__name__ + "_ci"`` in :py:mod:`pytesmo.metric_cis`. """ return hasattr(pairwise, metric_func.__name__ + "_ci")
[docs]def with_analytical_ci(metric_func, x, y, alpha=0.05): """ Evaluates metric and analytical confidence interval. This calculates the metric value and an analytical confidence interval. For this to work, the metric function must be from :py:mod:`pytesmo.pairwise_metric_cis` Use :py:func:`pytesmo.metrics.has_analytical_ci` to check whether it's available. Parameters ---------- metric_func : callable or str Function that calculates metric value. Must be from :py:mod:`pytesmo.metrics`, and have an analytical CI function implemented in :py:mod:`pytesmo.metric_cis`. The metric function must have the following signature: ``(x : np.ndarray, y : np.ndarray) -> float`` Alternatively can be the name of the function in `pytesmo.metrics`. x, y : np.ndarray Data to be compared. alpha : float, optional Confidence level, default is 0.05. Returns ------- m : float Metric value lower : float Lower bound of confidence interval upper : float Upper bound of confidence interval Raises ------ ValueError : If no analytical CI function is available.""" if isinstance(metric_func, str): metric_func = getattr(pairwise, metric_func) m = metric_func(x, y) name = metric_func.__name__ if has_analytical_ci(metric_func): ci = getattr(pairwise, name + "_ci")(x, y, m, alpha) else: raise ValueError(f"No analytical CI implemented for {name}.") return m, ci[0], ci[1]
[docs]def with_bootstrapped_ci( metric_func, x, y, alpha=0.05, method="percentile", nsamples=1000, minimum_data_length=100, ): """ Evaluates metric and bootstraps confidence interval. This calculates the metric value and uses bootstrapping to find a confidence interval for the metric. This works only for pairwise metrics, use :py:func:`pytesmo.metrics.tcol_metrics_with_bootstrap_ci` for TCA metrics. Parameters ---------- metric_func : callable Function that calculates metric value. The metric function must have the following signature: ``(x : np.ndarray, y : np.ndarray) -> float`` x, y : np.ndarray Data to be compared. alpha : float, optional Confidence level, default is 0.05. method : str, optional The method to use to calculate confidence intervals. Available methods are: - "percentile" (default): Uses the percentiles of the bootstrapped metric distribution. - "basic": Uses the percentiles of the differences to the original metric value. - "BCa": Bias-corrected and accelerated bootstrap. For more info and a comparison of the methods, see [Gilleland2010]_, especially Table 6 on page 38. nsamples : int, optional Number of bootstrap samples, default is 1000. minimum_data_length : int, optional Minimum amount of data required to do bootstrapping. Default is 100. Returns ------- m : float Metric value for pairwise metrics lower : float or array of floats Lower bound of confidence interval upper : float or array of floats Upper bound of confidence interval References ---------- .. [Gilleland2010] Gilleland, E. (2010). *Confidence Intervals for Forecast\ Verification* (No. NCAR/TN-479+STR). University Corporation for Atmospheric\ Research. doi:10.5065/D6WD3XJM """ # Prototype, might be better to implement this in Cython if it's too slow. # Then it would probably be best to make this function only a lookup table # which calls a cpdef'd method, which itself calls the cdef'd metric # function with a cdef'd bootstrap implementation. n = len(x) if n < minimum_data_length: raise ValueError( "Not enough data for bootstrapping. Your data should contain at" f" least {minimum_data_length} samples.\n" f"You can pass 'minimum_data_length={n}' if you want to do" "bootstrapping nevertheless." ) orig_metric = metric_func(x, y) bootstrapped_metric = np.empty(nsamples, dtype=float) if method == "BCa": orig_jk = _jackknife(metric_func, x, y) for i in range(nsamples): idx = np.random.choice(n, size=n) _x, _y = x[idx], y[idx] bootstrapped_metric[i] = metric_func(_x, _y) if method == "percentile": lower, upper = _percentile_bs_ci( bootstrapped_metric, orig_metric, alpha ) elif method == "basic": lower, upper = _basic_bs_ci(bootstrapped_metric, orig_metric, alpha) elif method == "BCa": lower, upper = _BCa_bs_ci( bootstrapped_metric, orig_metric, alpha, orig_jk ) return orig_metric, lower, upper
[docs]def tcol_metrics_with_bootstrapped_ci( x, y, z, ref_ind=0, alpha=0.05, method="percentile", nsamples=1000, minimum_data_length=100, ): """ Evaluates triple collocation metrics and bootstraps confidence interval. This calculates the SNR, error standard deviation, and scaling parameter value using Triple Collocation Analysis and uses bootstrapping to find confidence intervals. Parameters ---------- x, y, z : np.ndarray Data to be compared. ref_ind: int Index of reference data set for estimating scaling coefficients. Default: 0 (x) alpha : float, optional Confidence level, default is 0.05. method : str, optional The method to use to calculate confidence intervals. Available methods are: - "percentile" (default): Uses the percentiles of the bootstrapped metric distribution. - "basic": Uses the percentiles of the differences to the original metric value. - "BCa": Bias-corrected and accelerated bootstrap. For more info and a comparison of the methods, see [Gilleland2010]_, especially Table 6 on page 38. nsamples : int, optional Number of bootstrap samples, default is 1000. minimum_data_length : int, optional Minimum amount of data required to do bootstrapping. Default is 100. Returns ------- snr_result : tuple ``(snr, lower, upper)``, where each entry is an array of length 3. err_std_result : tuple ``(err_std, lower, upper)``, where each entry is an array of length 3. beta_result : tuple ``(beta, lower, upper)``, where each entry is an array of length 3. Note that beta is always 1 for the reference dataset (see `ref_ind`), therefore the lower and upper values are set to 1 too. References ---------- .. [Gilleland2010] Gilleland, E. (2010). *Confidence Intervals for Forecast\ Verification* (No. NCAR/TN-479+STR). University Corporation for Atmospheric\ Research. doi:10.5065/D6WD3XJM """ # Prototype, might be better to implement this in Cython if it's too slow. # Then it would probably be best to make this function only a lookup table # which calls a cpdef'd method, which itself calls the cdef'd metric # function with a cdef'd bootstrap implementation. n = len(x) if n < minimum_data_length: raise ValueError( "Not enough data for bootstrapping. Your data should contain at" f" least {minimum_data_length} samples.\n" f"You can pass 'minimum_data_length={n}' if you want to do" "bootstrapping nevertheless." ) orig_snr, orig_err_std, orig_beta = tcol_metrics(x, y, z, ref_ind=ref_ind) bootstrapped_snr = np.empty((nsamples, 3), dtype=float) bootstrapped_err_std = np.empty((nsamples, 3), dtype=float) bootstrapped_beta = np.empty((nsamples, 3), dtype=float) if method == "BCa": snr_jk = np.empty((n, 3)) err_std_jk = np.empty((n, 3)) beta_jk = np.empty((n, 3)) mask = np.ones(len(x), dtype=bool) for i in range(len(x)): mask[i] = False snr_jk[i, :], err_std_jk[i, :], beta_jk[i, :] = tcol_metrics( x[mask], y[mask], z[mask], ref_ind=ref_ind ) mask[i] = True for i in range(nsamples): idx = np.random.choice(n, size=n) _x, _y, _z = x[idx], y[idx], z[idx] ( bootstrapped_snr[i, :], bootstrapped_err_std[i, :], bootstrapped_beta[i, :], ) = tcol_metrics(_x, _y, _z, ref_ind=ref_ind) lower_snr = np.empty(3) upper_snr = np.empty(3) lower_err_std = np.empty(3) upper_err_std = np.empty(3) lower_beta = np.empty(3) upper_beta = np.empty(3) for i in range(3): if method == "percentile": lower_snr[i], upper_snr[i] = _percentile_bs_ci( bootstrapped_snr[:, i], orig_snr[i], alpha ) lower_err_std[i], upper_err_std[i] = _percentile_bs_ci( bootstrapped_err_std[:, i], orig_err_std[i], alpha ) if i != ref_ind: lower_beta[i], upper_beta[i] = _percentile_bs_ci( bootstrapped_beta[:, i], orig_beta[i], alpha ) else: lower_beta[i], upper_beta[i] = 1, 1 elif method == "basic": lower_snr[i], upper_snr[i] = _basic_bs_ci( bootstrapped_snr[:, i], orig_snr[i], alpha ) lower_err_std[i], upper_err_std[i] = _basic_bs_ci( bootstrapped_err_std[:, i], orig_err_std[i], alpha ) if i != ref_ind: lower_beta[i], upper_beta[i] = _basic_bs_ci( bootstrapped_beta[:, i], orig_beta[i], alpha ) else: lower_beta[i], upper_beta[i] = 1, 1 elif method == "BCa": lower_snr[i], upper_snr[i] = _BCa_bs_ci( bootstrapped_snr[:, i], orig_snr[i], alpha, snr_jk, ) lower_err_std[i], upper_err_std[i] = _BCa_bs_ci( bootstrapped_err_std[:, i], orig_err_std[i], alpha, err_std_jk ) if i != ref_ind: lower_beta[i], upper_beta[i] = _BCa_bs_ci( bootstrapped_beta[:, i], orig_beta[i], alpha, beta_jk ) else: lower_beta[i], upper_beta[i] = 1, 1 return ( (orig_snr, lower_snr, upper_snr), (orig_err_std, lower_err_std, upper_err_std), (orig_beta, lower_beta, upper_beta), )
def _jackknife(metric_func, x, y): jk = np.empty_like(x) mask = np.ones(len(x), dtype=bool) for i in range(len(x)): mask[i] = False jk[i] = metric_func(x[mask], y[mask]) mask[i] = True return jk def _percentile_bs_ci(bs_m, m, alpha): """Calculates the CI using percentiles""" lower = np.nanquantile(bs_m, alpha / 2) upper = np.nanquantile(bs_m, 1 - alpha / 2) return lower, upper def _basic_bs_ci(bs_m, m, alpha): """Basic bootstrap""" lower = 2 * m - np.nanquantile(bs_m, 1 - alpha / 2) upper = 2 * m - np.nanquantile(bs_m, alpha / 2) return lower, upper def _BCa_bs_ci(bs_m, m, alpha, jk): """BCa bootstrap""" # see also here regarding implementation: # https://www.erikdrysdale.com/bca_python/ z_alpha = stats.norm.ppf(alpha) z_1_alpha = stats.norm.ppf(1 - alpha) # bias correction z0 = stats.norm.ppf(np.mean(bs_m <= m)) # acceleration jk_mean = np.mean(jk) a = np.sum((jk_mean - jk) ** 3) / ( 6 * (np.sum((jk_mean - jk) ** 2)) ** (1.5) ) # calculate adjusted percentiles alpha_lower = stats.norm.cdf( z0 + (z0 + z_alpha) / (1 - a * (z0 + z_alpha)) ) alpha_upper = stats.norm.cdf( z0 + (z0 + z_1_alpha) / (1 - a * (z0 + z_1_alpha)) ) lower = np.nanquantile(bs_m, alpha_lower) upper = np.nanquantile(bs_m, alpha_upper) return lower, upper