Source code for spectral_connectivity.statistics

"""Statistical procedures for connectivity analysis.

This module provides statistical functions for testing significance of
connectivity measures, including multiple comparison corrections and
transforms for coherence-based measures. Functions support both parametric
and non-parametric approaches for statistical inference in frequency domain
connectivity analysis.
"""

from collections.abc import Callable
from typing import Literal

import numpy as np
import scipy.special
import scipy.stats
from numpy.typing import NDArray

np.seterr(invalid="ignore")



[docs]
def Benjamini_Hochberg_procedure(
    p_values: NDArray[np.floating], alpha: float = 0.05
) -> NDArray[np.bool_]:
    """Control false discovery rate using Benjamini-Hochberg procedure.

    Corrects for multiple comparisons and returns significant p-values by
    controlling the false discovery rate at level `alpha` using the
    Benjamini-Hochberg procedure.

    Parameters
    ----------
    p_values : NDArray[floating], shape (...,)
        P-values from statistical tests to be corrected.
    alpha : float, default=0.05
        Expected proportion of false positive tests (false discovery rate).

    Returns
    -------
    is_significant : NDArray[bool], shape (...,)
        Boolean array same shape as `p_values` indicating whether the
        null hypothesis has been rejected (True) or failed to reject (False).

    Examples
    --------
    >>> import numpy as np
    >>> p_vals = np.array([0.001, 0.02, 0.04, 0.3, 0.8])
    >>> significant = Benjamini_Hochberg_procedure(p_vals, alpha=0.05)
    >>> significant
    array([ True,  True, False, False, False])
    """
    p_values = np.array(p_values)
    threshold_line = np.linspace(0, alpha, num=p_values.size + 1, endpoint=True)[1:]
    sorted_p_values = np.sort(p_values.flatten())
    try:
        threshold_ind = int(np.max(np.where(sorted_p_values <= threshold_line)[0]))
        threshold = sorted_p_values[threshold_ind]
    except ValueError:  # There are no values below threshold
        threshold = -1
    return p_values <= threshold




[docs]
def Bonferroni_correction(
    p_values: NDArray[np.floating], alpha: float = 0.05
) -> NDArray[np.bool_]:
    """Control family-wise error rate using Bonferroni correction.

    Corrects for multiple comparisons by dividing the significance level
    by the number of tests. This is a conservative method that controls
    the family-wise error rate.

    Parameters
    ----------
    p_values : NDArray[floating], shape (...,)
        P-values from statistical tests to be corrected.
    alpha : float, default=0.05
        Critical threshold for significance testing.

    Returns
    -------
    is_significant : NDArray[bool], shape (...,)
        Boolean array indicating significant tests after Bonferroni correction.

    Examples
    --------
    >>> import numpy as np
    >>> p_vals = np.array([0.001, 0.02, 0.04, 0.3, 0.8])
    >>> significant = Bonferroni_correction(p_vals, alpha=0.05)
    >>> significant
    array([ True, False, False, False, False])
    """
    p_values = np.asarray(p_values)
    return p_values <= alpha / p_values.size



MULTIPLE_COMPARISONS: dict[str, Callable] = {
    "Benjamini_Hochberg_procedure": Benjamini_Hochberg_procedure,
    "Bonferroni_correction": Bonferroni_correction,
}



[docs]
def adjust_for_multiple_comparisons(
    p_values: NDArray[np.floating],
    alpha: float = 0.05,
    method: Literal[
        "Benjamini_Hochberg_procedure", "Bonferroni_correction"
    ] = "Benjamini_Hochberg_procedure",
) -> NDArray[np.bool_]:
    """Apply multiple comparison correction to p-values.

    Wrapper function that applies the specified multiple comparison correction
    method to control either false discovery rate or family-wise error rate.

    Parameters
    ----------
    p_values : NDArray[floating], shape (...,)
        P-values from statistical tests to be corrected.
    alpha : float, default=0.05
        Significance threshold for the correction method.
    method : {"Benjamini_Hochberg_procedure", "Bonferroni_correction"},
        default="Benjamini_Hochberg_procedure"
        Multiple comparison correction method to apply.

    Returns
    -------
    is_significant : NDArray[bool], shape (...,)
        Boolean array indicating which tests remain significant after correction.

    Examples
    --------
    >>> import numpy as np
    >>> p_vals = np.array([0.001, 0.02, 0.04, 0.3, 0.8])
    >>> # Using Benjamini-Hochberg (default)
    >>> bh_sig = adjust_for_multiple_comparisons(p_vals)
    >>> # Using Bonferroni
    >>> bonf_sig = adjust_for_multiple_comparisons(
    ...     p_vals, method="Bonferroni_correction"
    ... )
    """
    # Note: This function treats all p-values as a single family of tests by
    # flattening the input array. This is the standard approach for multiple
    # comparison correction. An axis parameter could be added in the future if
    # there's a need to correct along specific dimensions independently, but
    # current use cases don't require this functionality.
    return MULTIPLE_COMPARISONS[method](p_values, alpha=alpha)




[docs]
def coherence_fisher_z_transform(
    coherency1: NDArray[np.complexfloating],
    n_obs1: int,
    coherency2: NDArray[np.complexfloating] | float = 0,
    n_obs2: int = 0,
) -> NDArray[np.floating]:
    """Transform coherence magnitude to approximately normal distribution.

    Applies Fisher's z-transformation to coherence magnitudes, which
    approximately normalizes the distribution for statistical testing.
    Can compute single-sample test against zero or two-sample test.

    Parameters
    ----------
    coherency1 : NDArray[complexfloating], shape (...,)
        Complex coherency values between signals.
    n_obs1 : int
        Number of observations for coherency1 (n_tapers * n_trials).
    coherency2 : NDArray[complexfloating] or float, default=0
        Second coherency for comparison. If 0, tests against null hypothesis.
    n_obs2 : int, default=0
        Number of observations for coherency2 (n_tapers * n_trials).

    Returns
    -------
    fisher_z_transform : NDArray[floating], shape (...,)
        Z-scores for statistical testing. If coherency2=0, tests coherency1
        against zero. Otherwise, tests difference coherency1 - coherency2.

    Examples
    --------
    >>> import numpy as np
    >>> # Test single coherence against zero
    >>> coherence = np.array([0.1 + 0.05j, 0.3 + 0.2j, 0.8 + 0.1j])
    >>> z_scores = coherence_fisher_z_transform(coherence, n_obs1=100)
    >>>
    >>> # Compare two coherences
    >>> coh1 = np.array([0.5 + 0.2j, 0.3 + 0.1j])
    >>> coh2 = np.array([0.3 + 0.15j, 0.4 + 0.05j])
    >>> diff_z = coherence_fisher_z_transform(coh1, 100, coh2, 120)

    Notes
    -----
    The transformation uses bias correction based on the number of observations
    to improve the normal approximation for small sample sizes.
    """
    coherence_magnitude1 = np.abs(coherency1)
    coherence_magnitude1[coherence_magnitude1 >= 1] = 1 - np.finfo(float).eps

    coherence_magnitude2 = np.array(np.abs(coherency2))
    coherence_magnitude2[coherence_magnitude2 >= 1] = 1 - np.finfo(float).eps

    bias1, bias2 = coherence_bias(n_obs1), coherence_bias(n_obs2)

    z1 = np.arctanh(coherence_magnitude1) - bias1
    z2 = np.arctanh(coherence_magnitude2) - bias2
    return (z1 - z2) / np.sqrt(bias1 + bias2)




[docs]
def get_normal_distribution_p_values(
    data: NDArray[np.floating],
    mean: float = 0,
    std_deviation: float = 1,
) -> NDArray[np.floating]:
    """Compute p-values for normal distribution test.

    Given data values, returns the probability that each value was generated
    from a normal distribution with specified mean and standard deviation.
    This computes one-tailed p-values (upper tail).

    Parameters
    ----------
    data : NDArray[floating], shape (...,)
        Data values to test.
    mean : float, default=0
        Mean of the null hypothesis normal distribution.
    std_deviation : float, default=1
        Standard deviation of the null hypothesis normal distribution.

    Returns
    -------
    p_values : NDArray[floating], shape (...,)
        One-tailed p-values (upper tail) for each data point.

    Examples
    --------
    >>> import numpy as np
    >>> z_scores = np.array([-1.96, 0, 1.96, 2.58])
    >>> p_vals = get_normal_distribution_p_values(z_scores)
    >>> p_vals
    array([0.975, 0.5, 0.025, 0.005])

    Notes
    -----
    This function handles both NumPy and CuPy arrays automatically,
    falling back to NumPy computation if CuPy fails.
    """
    try:
        return 1 - scipy.stats.norm.cdf(data, loc=mean, scale=std_deviation)
    except TypeError:
        return 1 - scipy.stats.norm.cdf(data.get(), loc=mean, scale=std_deviation)  # type: ignore[attr-defined]




[docs]
def coherence_bias(n_observations: int) -> float:
    """Estimate bias correction for coherence estimates.

    Coherence estimates are biased by finite sample size. This function
    computes the bias correction factor that can be subtracted from
    Fisher z-transformed coherence estimates.

    Parameters
    ----------
    n_observations : int
        Number of observations used in coherence estimation (n_tapers * n_trials).

    Returns
    -------
    bias : float
        Bias correction factor for Fisher z-transform of coherence.

    Examples
    --------
    >>> bias_100 = coherence_bias(100)
    >>> bias_1000 = coherence_bias(1000)
    >>> print(f"Bias with 100 obs: {bias_100:.6f}")
    >>> print(f"Bias with 1000 obs: {bias_1000:.6f}")
    Bias with 100 obs: 0.005051
    Bias with 1000 obs: 0.000501

    References
    ----------
    .. [1] Enochson, L.D., and Goodman, N.R. (1965). Gaussian approximations
           to the distribution
           of sample coherence (Measurement analysis corp Los Angeles CA).
    .. [2] Bokil, H., Purpura, K., Schoffelen, J.-M., Thomson, D., and Mitra, P.
           (2007). Comparing
           spectra and coherences for groups of unequal size.
           Journal of Neuroscience Methods 159,
           337–345. 10.1016/j.jneumeth.2006.07.011.
    """
    degrees_of_freedom = 2 * n_observations
    return 1.0 / (degrees_of_freedom - 2)




[docs]
def coherence_rate_adjustment(
    firing_rate_condition1: float,
    firing_rate_condition2: float,
    spike_power_spectrum: NDArray[np.floating],
    homogeneous_poisson_noise: float = 0,
    dt: float = 1,
) -> NDArray[np.floating]:
    """Adjust spike-field coherence for different firing rates between conditions.

    When comparing coherence between conditions with different firing rates,
    rate differences can cause coherence changes independent of coupling strength.
    This function computes adjustment factors to correct for firing rate differences.

    Parameters
    ----------
    firing_rate_condition1 : float
        Average firing rate in first condition (spikes/sec).
    firing_rate_condition2 : float
        Average firing rate in second condition (spikes/sec).
    spike_power_spectrum : NDArray[floating], shape (n_frequencies,)
        Power spectrum of spike train in condition 1.
    homogeneous_poisson_noise : float, default=0
        Homogeneous Poisson noise parameter (beta in reference).
    dt : float, default=1
        Time step size for discretization.

    Returns
    -------
    rate_adjustment_factor : NDArray[floating], shape (n_frequencies,)
        Multiplicative factors to adjust coherence from condition 1 to
        account for firing rate difference.

    Examples
    --------
    >>> import numpy as np
    >>> # Simulate power spectrum and firing rates
    >>> freqs = np.linspace(1, 100, 50)
    >>> power_spec = 1 / (1 + freqs**2)  # 1/f-like spectrum
    >>> rate1, rate2 = 10.0, 15.0  # Different firing rates
    >>>
    >>> adjustment = coherence_rate_adjustment(rate1, rate2, power_spec)
    >>> print(f"Adjustment range: {adjustment.min():.3f} to {adjustment.max():.3f}")

    Notes
    -----
    For spike-spike coherence comparisons, apply this adjustment twice,
    once for each spike train.

    References
    ----------
    .. [1] Aoi, M.C., Lepage, K.Q., Kramer, M.A., and Eden, U.T. (2015).
           Rate-adjusted spike-LFP coherence comparisons from spike-train
           statistics. Journal of Neuroscience Methods 240, 141-153.
    """
    # alpha in [1]
    firing_rate_ratio = firing_rate_condition2 / firing_rate_condition1
    adjusted_firing_rate = (
        (1 / firing_rate_ratio - 1) * firing_rate_condition1
        + homogeneous_poisson_noise / firing_rate_ratio**2
    ) * dt**2
    return 1 / np.sqrt(1 + (adjusted_firing_rate / spike_power_spectrum))




[docs]
def power_confidence_intervals(
    n_tapers: int,
    power: NDArray[np.floating] | float = 1,
    ci: float = 0.95,
) -> tuple[NDArray[np.floating], NDArray[np.floating]]:
    """Compute confidence intervals for multitaper power spectrum estimates.

    Uses chi-squared distribution to compute confidence bounds for power
    spectral density estimates from multitaper analysis.

    Parameters
    ----------
    n_tapers : int
        Number of tapers used in multitaper estimation.
    power : NDArray[floating] or float, default=1
        Power spectrum estimates. Can be array of values or scalar.
    ci : float, default=0.95
        Confidence level, must be in range [0.5, 1.0].

    Returns
    -------
    lower_bound : NDArray[floating]
        Lower confidence bounds for power estimates.
    upper_bound : NDArray[floating]
        Upper confidence bounds for power estimates.

    Examples
    --------
    >>> import numpy as np
    >>> # Single power estimate with 5 tapers
    >>> lower, upper = power_confidence_intervals(n_tapers=5, power=1.0, ci=0.95)
    >>> print(f"95% CI: [{lower:.3f}, {upper:.3f}]")
    >>>
    >>> # Multiple power estimates
    >>> power_vals = np.array([0.5, 1.0, 2.0, 5.0])
    >>> lower, upper = power_confidence_intervals(5, power_vals, 0.95)

    References
    ----------
    .. [1] Kramer, M.A., and Eden, U.T. (2016). Case studies in neural
           data analysis: a guide for the practicing neuroscientist (MIT Press).
    """
    upper_bound = 2 * n_tapers / scipy.stats.chi2.ppf(1 - ci, 2 * n_tapers) * power
    lower_bound = 2 * n_tapers / scipy.stats.chi2.ppf(ci, 2 * n_tapers) * power

    return lower_bound, upper_bound




[docs]
def power_bias(n_observations: int) -> float:
    """Bias of the power spectrum.

    Parameters
    ----------
    n_observations : int
        n_observations is n_tapers * n_trials

    Returns
    -------
    bias : float
    """
    degrees_of_freedom = 2 * n_observations
    return scipy.special.psi(degrees_of_freedom) - np.log(degrees_of_freedom)




[docs]
def power_variance(n_observations: int) -> float:
    """Compute variance of log-power spectrum estimates.

    Calculates the variance of log-transformed power spectrum estimates
    for use in statistical testing and confidence interval computation.

    Parameters
    ----------
    n_observations : int
        Number of observations used in power estimation (n_tapers * n_trials).

    Returns
    -------
    variance : float
        Variance of log-power estimates.

    Examples
    --------
    >>> var_100 = power_variance(100)
    >>> var_1000 = power_variance(1000)
    >>> print(f"Variance with 100 obs: {var_100:.6f}")
    >>> print(f"Variance with 1000 obs: {var_1000:.6f}")
    Variance with 100 obs: 0.005051
    Variance with 1000 obs: 0.000501
    """
    degrees_of_freedom = 2 * n_observations
    return scipy.special.polygamma(1, degrees_of_freedom)




[docs]
def power_fisher_z_transform(
    spectrum1: NDArray[np.floating],
    n_obs1: int,
    spectrum2: NDArray[np.floating] | float = 0,
    n_obs2: int = 0,
) -> NDArray[np.floating]:
    """Transform power spectrum estimates for statistical testing.

    Applies log-transformation with bias correction to power spectrum estimates,
    enabling approximately normal distributions for hypothesis testing.
    Can perform one-sample test against baseline or two-sample comparison.

    Parameters
    ----------
    spectrum1 : NDArray[floating], shape (...,)
        Power spectrum estimates from first condition.
    n_obs1 : int
        Number of observations for spectrum1 (n_tapers * n_trials).
    spectrum2 : NDArray[floating] or float, default=0
        Power spectrum estimates from second condition for comparison.
        If 0, performs one-sample test.
    n_obs2 : int, default=0
        Number of observations for spectrum2 (n_tapers * n_trials).

    Returns
    -------
    z_scores : NDArray[floating], shape (...,)
        Z-scores for statistical testing of power differences.

    Examples
    --------
    >>> import numpy as np
    >>> # One-sample test against baseline
    >>> power1 = np.array([0.5, 1.0, 2.0, 0.8])
    >>> z_one = power_fisher_z_transform(power1, n_obs1=100)
    >>>
    >>> # Two-sample comparison
    >>> power2 = np.array([0.3, 0.8, 1.5, 0.9])
    >>> z_two = power_fisher_z_transform(power1, 100, power2, 120)

    Notes
    -----
    Uses bias correction based on sample size to improve the normal
    approximation for statistical testing.
    """
    bias1, bias2 = power_bias(n_obs1), power_bias(n_obs2)
    variance1, variance2 = power_variance(n_obs1), power_variance(n_obs2)

    # Bias correction
    z1 = np.log(spectrum1) - bias1
    z2 = np.log(spectrum2) - bias2

    return (z1 - z2) / np.sqrt(variance1 + variance2)