Source code for stingray.stats

import warnings
from collections.abc import Iterable

import numpy as np
from scipy import stats
from stingray.utils import simon
from stingray.utils import vectorize, float64, float32, int32, int64

__all__ = [
    "p_multitrial_from_single_trial",
    "p_single_trial_from_p_multitrial",
    "fold_profile_probability",
    "fold_profile_logprobability",
    "fold_detection_level",
    "phase_dispersion_detection_level",
    "phase_dispersion_probability",
    "phase_dispersion_logprobability",
    "pds_probability",
    "pds_detection_level",
    "z2_n_detection_level",
    "z2_n_probability",
    "z2_n_logprobability",
    "classical_pvalue",
    "chi2_logp",
    "equivalent_gaussian_Nsigma",
    "equivalent_gaussian_Nsigma_from_logp",
    "power_confidence_limits",
    "power_upper_limit",
    "pf_from_ssig",
    "pf_from_a",
    "pf_upper_limit",
    "a_from_pf",
    "a_from_ssig",
    "ssig_from_a",
    "ssig_from_pf",
    "amplitude_upper_limit",
]


@vectorize([float64(float32), float64(float64)], nopython=True)
def _extended_equiv_gaussian_Nsigma(logp):
    """Equivalent gaussian sigma for small log-probability.

    Return the equivalent gaussian sigma corresponding to the natural log of
    the cumulative gaussian probability logp. In other words, return x, such
    that Q(x) = p, where Q(x) is the cumulative normal distribution. This
    version uses the rational approximation from Abramowitz and Stegun,
    eqn 26.2.23, that claims to be precise to ~1e-4. Using the log(P) as input
    gives a much extended range.

    The parameters here are the result of a best-fit, with no physical meaning.

    Translated from Scott Ransom's PRESTO
    """

    t = np.sqrt(-2.0 * logp)
    num = 2.515517 + t * (0.802853 + t * 0.010328)
    denom = 1.0 + t * (1.432788 + t * (0.189269 + t * 0.001308))
    return t - num / denom


@np.vectorize
def equivalent_gaussian_Nsigma_from_logp(logp):
    """Number of Gaussian sigmas corresponding to tail log-probability.

    This function computes the value of the characteristic function of a
    standard Gaussian distribution for the tail probability equivalent to the
    provided p-value, and turns this value into units of standard deviations
    away from the Gaussian mean. This allows the user to make a statement
    about the signal such as “I detected this pulsation at 4.1 sigma

    The example values below are obtained by brute-force integrating the
    Gaussian probability density function using the mpmath library
    between Nsigma and +inf.

    Examples
    --------
    >>> pvalues = [0.15865525393145707, 0.0013498980316301035,
    ...            9.865877e-10, 6.22096e-16,
    ...            3.0567e-138]
    >>> log_pvalues = np.log(np.array(pvalues))
    >>> sigmas = np.array([1, 3, 6, 8, 25])
    >>> # Single number
    >>> assert np.isclose(equivalent_gaussian_Nsigma_from_logp(log_pvalues[0]),
    ...                   sigmas[0], atol=0.01)
    >>> # Array
    >>> assert np.allclose(equivalent_gaussian_Nsigma_from_logp(log_pvalues),
    ...                    sigmas, atol=0.01)
    """
    if logp < -300:
        # print("Extended")
        return _extended_equiv_gaussian_Nsigma(logp)
    return stats.norm.isf(np.exp(logp))


def equivalent_gaussian_Nsigma(p):
    """Number of Gaussian sigmas corresponding to tail probability.

    This function computes the value of the characteristic function of a
    standard Gaussian distribution for the tail probability equivalent to the
    provided p-value, and turns this value into units of standard deviations
    away from the Gaussian mean. This allows the user to make a statement
    about the signal such as “I detected this pulsation at 4.1 sigma

    The example values below are obtained by brute-force integrating the
    Gaussian probability density function using the mpmath library
    between Nsigma and +inf.

    Examples
    --------
    >>> assert np.isclose(equivalent_gaussian_Nsigma(0.15865525393145707), 1,
    ...                   atol=0.01)
    >>> assert np.isclose(equivalent_gaussian_Nsigma(0.0013498980316301035), 3,
    ...                   atol=0.01)
    >>> assert np.isclose(equivalent_gaussian_Nsigma(9.865877e-10), 6,
    ...                   atol=0.01)
    >>> assert np.isclose(equivalent_gaussian_Nsigma(6.22096e-16), 8,
    ...                   atol=0.01)
    >>> assert np.isclose(equivalent_gaussian_Nsigma(3.0567e-138), 25, atol=0.1)
    """
    return equivalent_gaussian_Nsigma_from_logp(np.log(p))


@vectorize([float64(float32, float32), float64(float64, float64)], nopython=True)
def _log_asymptotic_incomplete_gamma(a, z):
    """Asymptotic natural log of incomplete gamma function.

    Return the natural log of the incomplete gamma function in
    its asymptotic limit as z->infty.  This is from Abramowitz
    and Stegun eqn 6.5.32.

    Translated from Scott Ransom's PRESTO
    """

    x = 1.0
    newxpart = 1.0
    term = 1.0
    ii = 1

    while np.abs(newxpart) > 1e-15:
        term *= a - ii
        newxpart = term / np.power(z, ii)
        x += newxpart
        ii += 1

    return (a - 1.0) * np.log(z) - z + np.log(x)


@vectorize([float64(float32), float64(float64)], nopython=True)
def _log_asymptotic_gamma(z):
    """Natural log of the Gamma function in its asymptotic limit.

    Return the natural log of the gamma function in its asymptotic limit
    as z->infty.  This is from Abramowitz and Stegun eqn 6.1.41.

    Translated from Scott Ransom's PRESTO
    """
    half_log_twopi = 0.91893853320467267  # (1/2)*log(2*pi)
    one_twelfth = 8.3333333333333333333333e-2
    one_degree = 2.7777777777777777777778e-3  # 1 / 360
    one_over_1680 = 5.9523809523809529e-4
    one_over_1260 = 7.9365079365079365079365e-4
    x = (z - 0.5) * np.log(z) - z + half_log_twopi
    y = 1.0 / (z * z)
    x += (((-one_over_1680 * y + one_over_1260) * y - one_degree) * y + one_twelfth) / z
    return x


@np.vectorize
def chi2_logp(chi2, dof):
    """Log survival function of the chi-squared distribution.

    Examples
    --------
    >>> chi2 = 31
    >>> # Test check on dof
    >>> chi2_logp(chi2, 1) # doctest:+ELLIPSIS
    Traceback (most recent call last):
        ...
    ValueError: The number of degrees of freedom cannot be < 2
    >>> # Test that approximate function works as expected. chi2 / dof > 15,
    >>> # but small and safe number in order to compare to scipy.stats
    >>> assert np.isclose(chi2_logp(chi2, 2), stats.chi2.logsf(chi2, 2), atol=0.1)
    >>> chi2 = np.array([5, 32])
    >>> assert np.allclose(chi2_logp(chi2, 2), stats.chi2.logsf(chi2, 2), atol=0.1)
    """
    if dof < 2:
        raise ValueError("The number of degrees of freedom cannot be < 2")

    # If very large reduced chi squared, use approximation. This is an
    # eyeballed limit parameter space where the difference between the
    # approximation and the scipy version is tiny, but above which the scipy
    # version starts failing.
    if (chi2 / dof > 15.0) or ((dof > 150) and (chi2 / dof > 6.0)):
        return _log_asymptotic_incomplete_gamma(0.5 * dof, 0.5 * chi2) - _log_asymptotic_gamma(
            0.5 * dof
        )

    return stats.chi2.logsf(chi2, dof)


@vectorize(
    [
        float64(float32, int32),
        float64(float32, int64),
        float64(float64, int32),
        float64(float64, int64),
    ],
    nopython=True,
)
def _logp_multitrial_from_single_logp(logp1, n):
    """Calculate a multi-trial p-value from the log of a single-trial one.

    This allows to work around Numba's limitation on longdoubles, a way to
    vectorize the computation when we need longdouble precision.

    Parameters
    ----------
    logp1 : float
        The natural logarithm of the significance at which we reject the null
        hypothesis on each single trial.
    n : int
        The number of trials

    Returns
    -------
    logpn : float
        The log of the significance at which we reject the null hypothesis
        after multiple trials
    """
    # If the the probability is very small (p1 * n) < 1e-6, use Bonferroni
    # approximation.
    logn = np.log(n)
    if logp1 + logn < -7:
        return logp1 + logn

    return np.log(1 - (1 - np.exp(logp1)) ** n)


def p_multitrial_from_single_trial(p1, n):
    r"""Calculate a multi-trial p-value from a single-trial one.

    Calling *p* the probability of a single success, the Binomial
    distributions says that the probability *at least* one outcome
    in n trials is

    .. math::

        P(k\geq 1) = \sum_{k\geq 1} \binom{n}{k} p^k (1-p)^{(n-k)}

    or more simply, using P(k ≥ 0) = 1

    .. math::

        P(k\geq 1) = 1 - \binom{n}{0} (1-p)^n = 1 - (1-p)^n


    Parameters
    ----------
    p1 : float
        The significance at which we reject the null hypothesis on
        each single trial.
    n : int
        The number of trials

    Returns
    -------
    pn : float
        The significance at which we reject the null hypothesis
        after multiple trials
    """
    logpn = _logp_multitrial_from_single_logp(np.log(p1).astype(np.double), n)

    return np.exp(np.longdouble(logpn))


@vectorize(
    [
        float64(float32, int32),
        float64(float32, int64),
        float64(float64, int32),
        float64(float64, int64),
    ],
    nopython=True,
)
def _logp_single_trial_from_logp_multitrial(logpn, n):
    """Calculate a multi-trial p-value from the log of a single-trial one.

    This allows to work around Numba's limitation on longdoubles, a way to
    vectorize the computation when we need longdouble precision.

    Parameters
    ----------
    logpn : float
        The natural logarithm of the significance at which we want to reject
        the null hypothesis after multiple trials
    n : int
        The number of trials

    Returns
    -------
    logp1 : float
        The log of the significance at which we reject the null hypothesis on
        each single trial.
    """
    logn = np.log(n)
    # If the the probability is very small, use Bonferroni approximation.
    if logpn < -7:
        return logpn - logn

    # Numerical errors arise when pn is very close to 1. (logpn ~ 0)
    if 1 - np.exp(logpn) < np.finfo(np.double).resolution * 1000:
        return np.nan

    p1 = 1 - np.power(1 - np.exp(logpn), 1 / n)
    return np.log(p1)


def p_single_trial_from_p_multitrial(pn, n):
    r"""Calculate the single-trial p-value from a total p-value

    Let us say that we want to reject a null hypothesis at the
    ``pn`` level, after executing ``n`` different measurements.
    This might be the case because, e.g., we
    want to have a 1% probability of detecting a signal in an
    entire power spectrum, and we need to correct the detection
    level accordingly.

    The typical procedure is dividing the initial probability
    (often called _epsilon_) by the number of trials. This is
    called the Bonferroni correction and it is often a good
    approximation, when ``pn`` is low: ``p1 = pn / n``.

    However, if ``pn`` is close to 1, this approximation gives
    incorrect results.

    Here we calculate this probability by inverting the Binomial
    problem. Given that (see ``p_multitrial_from_single_trial``)
    the probability of getting more than one hit in n trials,
    given the single-trial probability *p*, is

    .. math ::

        P (k \geq 1) =  1 - (1 - p)^n,

    we get the single trial probability from the multi-trial one
    from

    .. math ::

        p = 1 - (1 - P)^{(1/n)}

    This is also known as Šidák correction.

    Parameters
    ----------
    pn : float
        The significance at which we want to reject the null
        hypothesis after multiple trials
    n : int
        The number of trials

    Returns
    -------
    p1 : float
        The significance at which we reject the null hypothesis on
        each single trial.
    """

    logp = _logp_single_trial_from_logp_multitrial(np.log(pn).astype(np.float64), n)

    if np.any(np.isnan(logp)):
        if np.any(1 - pn < np.finfo(np.double).resolution * 1000):
            warnings.warn("Multi-trial probability is very close to 1.")
            warnings.warn("The problem is ill-conditioned. Returning NaN")

    return np.exp(logp)



[docs]
def fold_profile_probability(stat, nbin, ntrial=1):
    """Calculate the probability of a certain folded profile, due to noise.

    Parameters
    ----------
    stat : float
        The epoch folding statistics
    nbin : int
        The number of bins in the profile

    Other Parameters
    ----------------
    ntrial : int
        The number of trials executed to find this profile

    Returns
    -------
    p : float
        The probability that the profile has been produced by noise
    """
    p1 = stats.chi2.sf(stat, (nbin - 1))
    return p_multitrial_from_single_trial(p1, ntrial)



def fold_profile_logprobability(stat, nbin, ntrial=1):
    """Calculate the probability of a certain folded profile, due to noise.

    Parameters
    ----------
    stat : float
        The epoch folding statistics
    nbin : int
        The number of bins in the profile

    Other Parameters
    ----------------
    ntrial : int
        The number of trials executed to find this profile

    Returns
    -------
    logp : float
        The log-probability that the profile has been produced by noise
    """
    p1 = chi2_logp(stat, (nbin - 1))
    return _logp_multitrial_from_single_logp(p1, ntrial)



[docs]
def fold_detection_level(nbin, epsilon=0.01, ntrial=1):
    """Return the detection level for a folded profile.

    See Leahy et al. (1983).

    Parameters
    ----------
    nbin : int
        The number of bins in the profile
    epsilon : float, default 0.01
        The fractional probability that the signal has been produced
        by noise

    Other Parameters
    ----------------
    ntrial : int
        The number of trials executed to find this profile

    Returns
    -------
    detlev : float
        The epoch folding statistics corresponding to a probability
        epsilon * 100 % that the signal has been produced by noise
    """
    epsilon = p_single_trial_from_p_multitrial(epsilon, ntrial)
    return stats.chi2.isf(epsilon.astype(np.double), nbin - 1)



def phase_dispersion_probability(stat, nsamples, nbin, ntrial=1):
    """Calculate the probability of a peak in a phase dispersion
    minimization periodogram, due to noise.

    Uses the beta-distribution from Czerny-Schwarzendorf (1997).

    Parameters
    ----------
    stat : float
        The value of the PDM inverse peak

    nsamples : int
        The number of samples in the time series

    nbin : int
        The number of bins in the profile

    Other Parameters
    ----------------
    ntrial : int
        The number of trials executed to find this profile

    Returns
    -------
    p : float
        The probability that the profile has been produced by noise
    """
    d2 = nsamples - nbin
    d1 = nbin - 1

    beta = stats.beta(d2 / 2.0, d1 / 2.0)
    p1 = beta.cdf(stat)

    return p_multitrial_from_single_trial(p1, ntrial)


def phase_dispersion_logprobability(stat, nsamples, nbin, ntrial=1):
    """Calculate the log-probability of a peak in a phase dispersion
    minimization periodogram, due to noise.

    Uses the beta-distribution from Czerny-Schwarzendorf (1997).

    Parameters
    ----------
    stat : float
        The value of the PDM inverse peak

    nsamples : int
        The number of samples in the time series

    nbin : int
        The number of bins in the profile

    Other Parameters
    ----------------
    ntrial : int
        The number of trials executed to find this profile

    Returns
    -------
    logp : float
        The log-probability that the profile has been produced by noise
    """
    d2 = nsamples - nbin
    d1 = nbin - 1

    beta = stats.beta(d2 / 2.0, d1 / 2.0)
    p1 = beta.logcdf(stat)

    return _logp_multitrial_from_single_logp(p1, ntrial)


def phase_dispersion_detection_level(nsamples, nbin, epsilon=0.01, ntrial=1):
    """Return the detection level for a phase dispersion minimization
    periodogram..

    Parameters
    ----------
    nsamples : int
        The number of time bins in the light curve

    nbin : int
        The number of bins in the profile

    epsilon : float, default 0.01
        The fractional probability that the signal has been produced
        by noise

    Other Parameters
    ----------------
    ntrial : int
        The number of trials executed to find this profile

    Returns
    -------
    detlev : float
        The epoch folding statistics corresponding to a probability
        epsilon * 100 % that the signal has been produced by noise
    """
    epsilon = p_single_trial_from_p_multitrial(epsilon, ntrial)

    d2 = nsamples - nbin
    d1 = nbin - 1

    beta = stats.beta(d2 / 2.0, d1 / 2.0)

    return beta.ppf(epsilon.astype(np.double))



[docs]
def z2_n_probability(z2, n, ntrial=1, n_summed_spectra=1):
    """Calculate the probability of a certain folded profile, due to noise.

    Parameters
    ----------
    z2 : float
        A Z^2_n statistics value
    n : int, default 2
        The ``n`` in $Z^2_n$ (number of harmonics, including the fundamental)

    Other Parameters
    ----------------
    ntrial : int
        The number of trials executed to find this profile
    n_summed_spectra : int
        Number of Z_2^n periodograms that were averaged to obtain z2

    Returns
    -------
    p : float
        The probability that the Z^2_n value has been produced by noise
    """
    epsilon_1 = stats.chi2.sf(z2 * n_summed_spectra, 2 * n * n_summed_spectra)
    epsilon = p_multitrial_from_single_trial(epsilon_1, ntrial)
    return epsilon



def z2_n_logprobability(z2, n, ntrial=1, n_summed_spectra=1):
    """Calculate the probability of a certain folded profile, due to noise.

    Parameters
    ----------
    z2 : float
        A Z^2_n statistics value
    n : int, default 2
        The ``n`` in $Z^2_n$ (number of harmonics, including the fundamental)

    Other Parameters
    ----------------
    ntrial : int
        The number of trials executed to find this profile
    n_summed_spectra : int
        Number of Z_2^n periodograms that were averaged to obtain z2

    Returns
    -------
    p : float
        The probability that the Z^2_n value has been produced by noise
    """

    epsilon_1 = chi2_logp(np.double(z2 * n_summed_spectra), 2 * n * n_summed_spectra)
    epsilon = _logp_multitrial_from_single_logp(epsilon_1, ntrial)
    return epsilon



[docs]
def z2_n_detection_level(n=2, epsilon=0.01, ntrial=1, n_summed_spectra=1):
    """Return the detection level for the Z^2_n statistics.

    See Buccheri et al. (1983), Bendat and Piersol (1971).

    Parameters
    ----------
    n : int, default 2
        The ``n`` in $Z^2_n$ (number of harmonics, including the fundamental)
    epsilon : float, default 0.01
        The fractional probability that the signal has been produced by noise

    Other Parameters
    ----------------
    ntrial : int
        The number of trials executed to find this profile
    n_summed_spectra : int
        Number of Z_2^n periodograms that are being averaged

    Returns
    -------
    detlev : float
        The epoch folding statistics corresponding to a probability
        epsilon * 100 % that the signal has been produced by noise
    """

    epsilon = p_single_trial_from_p_multitrial(epsilon, ntrial)
    retlev = stats.chi2.isf(epsilon.astype(np.double), 2 * n_summed_spectra * n) / (
        n_summed_spectra
    )

    return retlev




[docs]
def pds_probability(level, ntrial=1, n_summed_spectra=1, n_rebin=1):
    r"""Give the probability of a given power level in PDS.

    Return the probability of a certain power level in a Power Density
    Spectrum of nbins bins, normalized a la Leahy (1983), based on
    the 2-dof :math:`{\chi}^2` statistics, corrected for rebinning (n_rebin)
    and multiple PDS averaging (n_summed_spectra)

    Parameters
    ----------
    level : float or array of floats
        The power level for which we are calculating the probability

    Other Parameters
    ----------------
    ntrial : int
        The number of *independent* trials (the independent bins of the PDS)
    n_summed_spectra : int
        The number of power density spectra that have been averaged to obtain
        this power level
    n_rebin : int
        The number of power density bins that have been averaged to obtain
        this power level

    Returns
    -------
    epsilon : float
        The probability value(s)
    """

    epsilon_1 = stats.chi2.sf(level * n_summed_spectra * n_rebin, 2 * n_summed_spectra * n_rebin)

    epsilon = p_multitrial_from_single_trial(epsilon_1, ntrial)
    return epsilon



def pds_logprobability(level, ntrial=1, n_summed_spectra=1, n_rebin=1):
    r"""Give the probability of a given power level in PDS.

    Return the probability of a certain power level in a Power Density
    Spectrum of nbins bins, normalized a la Leahy (1983), based on
    the 2-dof :math:`{\chi}^2` statistics, corrected for rebinning (n_rebin)
    and multiple PDS averaging (n_summed_spectra)

    Parameters
    ----------
    level : float or array of floats
        The power level for which we are calculating the probability

    Other Parameters
    ----------------
    ntrial : int
        The number of *independent* trials (the independent bins of the PDS)
    n_summed_spectra : int
        The number of power density spectra that have been averaged to obtain
        this power level
    n_rebin : int
        The number of power density bins that have been averaged to obtain
        this power level

    Returns
    -------
    epsilon : float
        The probability value(s)

    Examples
    --------
    Let us test that it is always consistent with `pds_probability`.
    We use relatively small power values, because for large values
    `pds_probability` underflows.
    >>> powers = np.random.uniform(2, 40, 10)
    >>> nrebin = np.random.randint(1, 10, 10)
    >>> nsummed = np.random.randint(1, 100, 10)
    >>> ntrial = np.random.randint(1, 10000, 10)
    >>> logp = pds_logprobability(powers, ntrial, nsummed, nrebin)
    >>> p = pds_probability(powers, ntrial, nsummed, nrebin)
    >>> assert np.allclose(p, np.exp(logp))
    """

    epsilon_1 = chi2_logp(level * n_summed_spectra * n_rebin, 2 * n_summed_spectra * n_rebin)

    epsilon = _logp_multitrial_from_single_logp(epsilon_1, ntrial)
    return epsilon



[docs]
def pds_detection_level(epsilon=0.01, ntrial=1, n_summed_spectra=1, n_rebin=1):
    r"""Detection level for a PDS.

    Return the detection level (with probability 1 - epsilon) for a Power
    Density Spectrum of nbins bins, normalized a la Leahy (1983), based on
    the 2-dof :math:`{\chi}^2` statistics, corrected for rebinning (n_rebin)
    and multiple PDS averaging (n_summed_spectra)

    Parameters
    ----------
    epsilon : float
        The single-trial probability value(s)

    Other Parameters
    ----------------
    ntrial : int
        The number of *independent* trials (the independent bins of the PDS)
    n_summed_spectra : int
        The number of power density spectra that have been averaged to obtain
        this power level
    n_rebin : int
        The number of power density bins that have been averaged to obtain
        this power level

    Examples
    --------
    >>> assert np.isclose(pds_detection_level(0.1), 4.6, atol=0.1)
    >>> assert np.allclose(pds_detection_level(0.1, n_rebin=[1]), [4.6], atol=0.1)
    """
    epsilon = p_single_trial_from_p_multitrial(epsilon, ntrial)
    epsilon = epsilon.astype(np.double)
    if isinstance(n_rebin, Iterable):
        retlev = [
            stats.chi2.isf(epsilon, 2 * n_summed_spectra * r) / (n_summed_spectra * r)
            for r in n_rebin
        ]
        retlev = np.array(retlev)
    else:
        r = n_rebin
        retlev = stats.chi2.isf(epsilon, 2 * n_summed_spectra * r) / (n_summed_spectra * r)
    return retlev



def classical_pvalue(power, nspec):
    """
    Note:
    This is stingray's original implementation of the probability
    distribution for the power spectrum. It is superseded by the
    implementation in pds_probability for practical purposes, but
    remains here for backwards compatibility and for its educational
    value as a clear, explicit implementation of the correct
    probability distribution.

    Compute the probability of detecting the current power under
    the assumption that there is no periodic oscillation in the data.

    This computes the single-trial p-value that the power was
    observed under the null hypothesis that there is no signal in
    the data.

    Important: the underlying assumptions that make this calculation valid
    are:

    1. the powers in the power spectrum follow a chi-square distribution
    2. the power spectrum is normalized according to [Leahy 1983]_, such
       that the powers have a mean of 2 and a variance of 4
    3. there is only white noise in the light curve. That is, there is no
       aperiodic variability that would change the overall shape of the power
       spectrum.

    Also note that the p-value is for a *single trial*, i.e. the power
    currently being tested. If more than one power or more than one power
    spectrum are being tested, the resulting p-value must be corrected for the
    number of trials (Bonferroni correction).

    Mathematical formulation in [Groth 1975]_.
    Original implementation in IDL by Anna L. Watts.

    Parameters
    ----------
    power :  float
        The squared Fourier amplitude of a spectrum to be evaluated

    nspec : int
        The number of spectra or frequency bins averaged in ``power``.
        This matters because averaging spectra or frequency bins increases
        the signal-to-noise ratio, i.e. makes the statistical distributions
        of the noise narrower, such that a smaller power might be very
        significant in averaged spectra even though it would not be in a single
        power spectrum.

    Returns
    -------
    pval : float
        The classical p-value of the observed power being consistent with
        the null hypothesis of white noise

    References
    ----------

    * .. [Leahy 1983] https://ui.adsabs.harvard.edu/#abs/1983ApJ...266..160L/abstract
    * .. [Groth 1975] https://ui.adsabs.harvard.edu/#abs/1975ApJS...29..285G/abstract

    """

    warnings.warn("This function was substituted by pds_probability.", DeprecationWarning)

    if not np.isfinite(power):
        raise ValueError("power must be a finite floating point number!")

    if power < 0:
        raise ValueError("power must be a positive real number!")

    if not np.isfinite(nspec):
        raise ValueError("nspec must be a finite integer number")

    if nspec < 1:
        raise ValueError("nspec must be larger or equal to 1")

    if not np.isclose(nspec % 1, 0):
        raise ValueError("nspec must be an integer number!")

    # If the power is really big, it's safe to say it's significant,
    # and the p-value will be nearly zero
    if (power * nspec) > 30000:
        simon("Probability of no signal too minuscule to calculate.")
        return 0.0

    else:
        pval = _pavnosigfun(power, nspec)
        return pval


def _pavnosigfun(power, nspec):
    """
    Helper function doing the actual calculation of the p-value.

    Parameters
    ----------
    power : float
        The measured candidate power

    nspec : int
        The number of power spectral bins that were averaged in `power`
        (note: can be either through averaging spectra or neighbouring bins)
    """
    sum = 0.0
    m = nspec - 1

    pn = power * nspec

    while m >= 0:
        s = 0.0
        for i in range(int(m) - 1):
            s += np.log(float(m - i))

        logterm = m * np.log(pn / 2) - pn / 2 - s
        term = np.exp(logterm)
        ratio = sum / term

        if ratio > 1.0e15:
            return sum

        sum += term
        m -= 1

    return sum


def power_confidence_limits(preal, n=1, c=0.95, summed_flag=True):
    """Confidence limits on power, given a (theoretical) signal power.

    This is to be used when we *expect* a given power (e.g. from the pulsed
    fraction measured in previous observations) and we want to know the
    range of values the measured power could take to a given confidence level.
    Adapted from Vaughan et al. 1994, noting that, after appropriate
    normalization of the spectral stats, the distribution of powers in the PDS
    and the Z^2_n searches is always described by a noncentral chi squared
    distribution.

    Parameters
    ----------
    preal: float
        The theoretical signal-generated value of power

    Other Parameters
    ----------------
    n: int
        The number of summed powers to obtain the result. It can be multiple
        harmonics of the PDS, adjacent bins in a PDS summed to collect all the
        power in a QPO, or the n in Z^2_n
    c: float
        The confidence level (e.g. 0.95=95%)
    summed_flag: bool
        Whether the power is (if True) summed or (if False) averaged. Only
        relevant if n > 1

    Returns
    -------
    pmeas: [float, float]
        The lower and upper bounds of the symmetric ``c``-level confidence
        interval on the measured power

    Examples
    --------
    >>> cl = power_confidence_limits(150, c=0.68, n=1, summed_flag=True)
    >>> assert np.allclose(cl, [127, 176], atol=1)
    >>> cl = power_confidence_limits(150, c=0.68, n=8, summed_flag=True)
    >>> assert np.allclose(cl, [141, 190], atol=1)
    >>> cl = power_confidence_limits(150, c=0.68, n=1, summed_flag=False)
    >>> assert np.allclose(cl, [127, 176], atol=1)
    >>> cl = power_confidence_limits(150, c=0.68, n=8, summed_flag=False)
    >>> assert np.allclose(cl, [143, 160], atol=1)
    """
    if summed_flag:
        rv = stats.ncx2(2 * n, preal)
        ints = rv.ppf([(1 - c) / 2, (1 + c) / 2])
    else:
        rv = stats.ncx2(2 * n, preal * n)
        ints = rv.ppf([(1 - c) / 2, (1 + c) / 2]) / n
    return ints


def power_upper_limit(pmeas, n=1, c=0.95, summed_flag=True):
    """Upper limit on signal power, given a measured power in the PDS/Z search.

    Adapted from Vaughan et al. 1994, noting that, after appropriate
    normalization of the spectral stats, the distribution of powers in the PDS
    and the Z^2_n searches is always described by a noncentral chi squared
    distribution.

    Note that Vaughan+94 gives p(pmeas | preal), while we are interested in
    p(real | pmeas), which is not described by the NCX2 stat. Rather than
    integrating the CDF of this probability distribution, we start from a
    reasonable approximation and fit to find the preal that gives pmeas as
    a (e.g.95%) confidence limit.

    As Vaughan+94 shows, this power is always larger than the observed one.
    This is because we are looking for the maximum signal power that,
    combined with noise powers, would give the observed power. This involves
    the possibility that noise powers partially cancel out some signal power.

    Parameters
    ----------
    pmeas: float
        The measured value of power

    Other Parameters
    ----------------
    n: int
        The number of summed powers to obtain pmeas. It can be multiple
        harmonics of the PDS, adjacent bins in a PDS summed to collect all the
        power in a QPO, the n in Z^2_n or the number of averaged PDS
    c: float
        The confidence value for the probability (e.g. 0.95 = 95%)
    summed_flag: bool
        If True, pmeas is the sum of n powers. If False, pmeas is the average
        of n powers. This is relevant when dealing with averaged PDS, where
        the powers are averaged rather than summed. For example, Z^2_n searches
        deal with summed powers (i.e. summed_flag=True), while if power spectrum
        is averaged to improve the statistics the summed_flag should be set to False.

    Returns
    -------
    psig: float
        The signal power that could produce P>pmeas with 1 - c probability

    Examples
    --------
    >>> pup = power_upper_limit(40, 1, 0.99)
    >>> assert np.isclose(pup, 75, atol=2)
    """
    from scipy.optimize import brentq

    def ppf(x):
        rv = stats.ncx2(2 * n, x)
        return rv.ppf(1 - c)

    def isf(x):
        rv = stats.ncx2(2 * n, x)
        return rv.ppf(c)

    if summed_flag:
        pow = pmeas
    else:
        pow = pmeas * n

    def func_to_minimize(x):
        return ppf(x) - pow

    rv = stats.chi2(2 * n)
    plow = rv.ppf(1 - c)
    if pow < plow:
        # Any power below plow is consistent with noise,
        # so the upper limit on the signal power is 0.
        return 0.0

    initial = isf(pow)
    sol = brentq(func_to_minimize, 0, initial * 2)

    if summed_flag:
        return sol
    else:
        return sol / n


def amplitude_upper_limit(
    pmeas, counts, n=1, c=0.95, fft_corr=False, nyq_ratio=0, summed_flag=True
):
    r"""Upper limit on a sinusoidal modulation, given a measured power in the PDS/Z search.

    Eq. 10 in Vaughan+94 and `a_from_ssig`: they are equivalent but Vaughan+94
    corrects further for the response inside an FFT bin and at frequencies close
    to Nyquist. These two corrections are added by using fft_corr=True and
    nyq_ratio to the correct :math:`f / f_{Nyq}` of the FFT peak

    To understand the meaning of this amplitude: if the modulation is described by:

    ..math:: p = \overline{p} (1 + a * \sin(x))

    this function returns a.

    If it is a sum of sinusoidal harmonics instead
    ..math:: p = \overline{p} (1 + \sum_l a_l * \sin(lx))
    a is equivalent to :math:`\sqrt(\sum_l a_l^2)`.

    See `power_upper_limit`

    Parameters
    ----------
    pmeas: float
        The measured value of power

    counts: int
        The number of counts in the light curve used to calculate the spectrum

    Other Parameters
    ----------------
    n: int
        The number of summed powers to obtain pmeas. It can be multiple
        harmonics of the PDS, adjacent bins in a PDS summed to collect all the
        power in a QPO, the n in Z^2_n or the number of averaged PDS
    c: float
        The confidence value for the probability (e.g. 0.95 = 95%)
    fft_corr: bool
        Apply a correction for the expected power concentrated in an FFT bin,
        which is about 0.773 on average (it's 1 at the center of the bin, 2/pi
        at the bin edge.
    nyq_ratio: float
        Ratio of the frequency of this feature with respect to the Nyquist
        frequency. Important to know when dealing with FFTs, because the FFT
        response decays between 0 and f_Nyq similarly to the response inside
        a frequency bin: from 1 at 0 Hz to ~2/pi at f_Nyq
    summed_flag: bool
        If True, pmeas is the sum of n powers. If False, pmeas is the average
        of n powers. This is relevant when dealing with averaged PDS, where
        the powers are averaged rather than summed. For example, Z^2_n searches
        deal with summed powers (i.e. summed_flag=True), while if power spectrum
        is averaged to improve the statistics the summed_flag should be set to False.

    Returns
    -------
    a: float
        The modulation amplitude that could produce P>pmeas with 1 - c probability

    Examples
    --------
    >>> aup = amplitude_upper_limit(40, 30000, 1, 0.99)
    >>> aup_nyq = amplitude_upper_limit(40, 30000, 1, 0.99, nyq_ratio=1)
    >>> assert np.isclose(aup_nyq, aup / (2 / np.pi))
    >>> aup_corr = amplitude_upper_limit(40, 30000, 1, 0.99, fft_corr=True)
    >>> assert np.isclose(aup_corr, aup / np.sqrt(0.773))
    """

    uplim = power_upper_limit(pmeas, n, c, summed_flag=summed_flag)
    a = a_from_ssig(uplim, counts)
    if fft_corr:
        factor = 1 / np.sqrt(0.773)
        a *= factor
    if nyq_ratio > 0:
        factor = np.pi / 2 * nyq_ratio
        sinc_factor = np.sin(factor) / factor
        a /= sinc_factor
    return a


def pf_upper_limit(*args, **kwargs):
    """Upper limit on pulsed fraction, given a measured power in the PDS/Z search.

    See `power_upper_limit` and `pf_from_ssig`.
    All arguments are the same as `amplitude_upper_limit`

    Parameters
    ----------
    pmeas: float
        The measured value of power

    counts: int
        The number of counts in the light curve used to calculate the spectrum

    Other Parameters
    ----------------
    n: int
        The number of summed powers to obtain pmeas. It can be multiple
        harmonics of the PDS, adjacent bins in a PDS summed to collect all the
        power in a QPO, or the n in Z^2_n
    c: float
        The confidence value for the probability (e.g. 0.95 = 95%)
    fft_corr: bool
        Apply a correction for the expected power concentrated in an FFT bin,
        which is about 0.773 on average (it's 1 at the center of the bin, 2/pi
        at the bin edge.
    nyq_ratio: float
        Ratio of the frequency of this feature with respect to the Nyquist
        frequency. Important to know when dealing with FFTs, because the FFT
        response decays between 0 and f_Nyq similarly to the response inside
        a frequency bin: from 1 at 0 Hz to ~2/pi at f_Nyq

    Returns
    -------
    pf: float
        The pulsed fraction that could produce P>pmeas with 1 - c probability

    Examples
    --------
    >>> pfup = pf_upper_limit(40, 30000, 1, 0.99)
    >>> assert np.isclose(pfup, 0.13, atol=0.01)
    """

    return pf_from_a(amplitude_upper_limit(*args, **kwargs))


def pf_from_a(a):
    """Pulsed fraction from fractional amplitude of modulation.

    If the pulsed profile is defined as
    p = mean * (1 + a * sin(phase)),

    we define "pulsed fraction" as 2a/b, where b = mean + a is the maximum and
    a is the amplitude of the modulation.

    Hence, pulsed fraction = 2a/(1+a)

    Examples
    --------
    >>> pf_from_a(1)
    1.0
    >>> pf_from_a(0)
    0.0
    """
    return 2 * a / (1 + a)


def a_from_pf(p):
    """Fractional amplitude of modulation from pulsed fraction

    If the pulsed profile is defined as
    p = mean * (1 + a * sin(phase)),

    we define "pulsed fraction" as 2a/b, where b = mean + a is the maximum and
    a is the amplitude of the modulation.

    Hence, a = pf / (2 - pf)

    Examples
    --------
    >>> a_from_pf(1)
    1.0
    >>> a_from_pf(0)
    0.0
    """
    return p / (2 - p)


def ssig_from_a(a, ncounts):
    """Theoretical power in the Z or PDS search for a sinusoid of amplitude a.

    From Leahy et al. 1983, given a pulse profile
    p = lambda * (1 + a * sin(phase)),
    The theoretical value of Z^2_n is Ncounts / 2 * a^2

    Note that if there are multiple sinusoidal components, one can use
    a = sqrt(sum(a_l))
    (Bachetti+2021b)

    Examples
    --------
    >>> round(ssig_from_a(0.1, 30000), 1)
    150.0
    """
    return ncounts / 2 * a**2


def a_from_ssig(ssig, ncounts):
    """Amplitude of a sinusoid corresponding to a given Z/PDS value

    From Leahy et al. 1983, given a pulse profile
    p = lambda * (1 + a * sin(phase)),
    The theoretical value of Z^2_n is Ncounts / 2 * a^2

    Note that if there are multiple sinusoidal components, one can use
    a = sqrt(sum(a_l))
    (Bachetti+2021b)

    Examples
    --------
    >>> assert np.isclose(a_from_ssig(150, 30000), 0.1)
    """
    return np.sqrt(2 * ssig / ncounts)


def ssig_from_pf(pf, ncounts):
    """Theoretical power in the Z or PDS for a sinusoid of pulsed fraction pf.

    See `ssig_from_a` and `a_from_pf` for more details

    Examples
    --------
    >>> assert round(ssig_from_pf(pf_from_a(0.1), 30000), 1) == 150.0
    """
    a = a_from_pf(pf)
    return ncounts / 2 * a**2


def pf_from_ssig(ssig, ncounts):
    """Estimate pulsed fraction for a sinusoid from a given Z or PDS power.

    See `a_from_ssig` and `pf_from_a` for more details

    Examples
    --------
    >>> assert np.isclose(round(a_from_pf(pf_from_ssig(150, 30000)), 1), 0.1)
    """
    a = a_from_ssig(ssig, ncounts)
    return pf_from_a(a)
Navigation

Source code for stingray.stats