# Source code for stingray.stats

```
import warnings
from collections.abc import Iterable
import numpy as np
from scipy import stats
from stingray.utils import simon
from stingray.utils import vectorize, float64, float32, int32, int64
__all__ = [
"p_multitrial_from_single_trial",
"p_single_trial_from_p_multitrial",
"fold_profile_probability",
"fold_profile_logprobability",
"fold_detection_level",
"phase_dispersion_detection_level",
"phase_dispersion_probability",
"phase_dispersion_logprobability",
"pds_probability",
"pds_detection_level",
"z2_n_detection_level",
"z2_n_probability",
"z2_n_logprobability",
"classical_pvalue",
"chi2_logp",
"equivalent_gaussian_Nsigma",
"equivalent_gaussian_Nsigma_from_logp",
"power_confidence_limits",
"power_upper_limit",
"pf_from_ssig",
"pf_from_a",
"pf_upper_limit",
"a_from_pf",
"a_from_ssig",
"ssig_from_a",
"ssig_from_pf",
"amplitude_upper_limit",
]
@vectorize([float64(float32), float64(float64)], nopython=True)
def _extended_equiv_gaussian_Nsigma(logp):
"""Equivalent gaussian sigma for small log-probability.
Return the equivalent gaussian sigma corresponding to the natural log of
the cumulative gaussian probability logp. In other words, return x, such
that Q(x) = p, where Q(x) is the cumulative normal distribution. This
version uses the rational approximation from Abramowitz and Stegun,
eqn 26.2.23, that claims to be precise to ~1e-4. Using the log(P) as input
gives a much extended range.
The parameters here are the result of a best-fit, with no physical meaning.
Translated from Scott Ransom's PRESTO
"""
t = np.sqrt(-2.0 * logp)
num = 2.515517 + t * (0.802853 + t * 0.010328)
denom = 1.0 + t * (1.432788 + t * (0.189269 + t * 0.001308))
return t - num / denom
@np.vectorize
def equivalent_gaussian_Nsigma_from_logp(logp):
"""Number of Gaussian sigmas corresponding to tail log-probability.
This function computes the value of the characteristic function of a
standard Gaussian distribution for the tail probability equivalent to the
provided p-value, and turns this value into units of standard deviations
away from the Gaussian mean. This allows the user to make a statement
about the signal such as “I detected this pulsation at 4.1 sigma
The example values below are obtained by brute-force integrating the
Gaussian probability density function using the mpmath library
between Nsigma and +inf.
Examples
--------
>>> pvalues = [0.15865525393145707, 0.0013498980316301035,
... 9.865877e-10, 6.22096e-16,
... 3.0567e-138]
>>> log_pvalues = np.log(np.array(pvalues))
>>> sigmas = np.array([1, 3, 6, 8, 25])
>>> # Single number
>>> assert np.isclose(equivalent_gaussian_Nsigma_from_logp(log_pvalues[0]),
... sigmas[0], atol=0.01)
>>> # Array
>>> assert np.allclose(equivalent_gaussian_Nsigma_from_logp(log_pvalues),
... sigmas, atol=0.01)
"""
if logp < -300:
# print("Extended")
return _extended_equiv_gaussian_Nsigma(logp)
return stats.norm.isf(np.exp(logp))
def equivalent_gaussian_Nsigma(p):
"""Number of Gaussian sigmas corresponding to tail probability.
This function computes the value of the characteristic function of a
standard Gaussian distribution for the tail probability equivalent to the
provided p-value, and turns this value into units of standard deviations
away from the Gaussian mean. This allows the user to make a statement
about the signal such as “I detected this pulsation at 4.1 sigma
The example values below are obtained by brute-force integrating the
Gaussian probability density function using the mpmath library
between Nsigma and +inf.
Examples
--------
>>> assert np.isclose(equivalent_gaussian_Nsigma(0.15865525393145707), 1,
... atol=0.01)
>>> assert np.isclose(equivalent_gaussian_Nsigma(0.0013498980316301035), 3,
... atol=0.01)
>>> assert np.isclose(equivalent_gaussian_Nsigma(9.865877e-10), 6,
... atol=0.01)
>>> assert np.isclose(equivalent_gaussian_Nsigma(6.22096e-16), 8,
... atol=0.01)
>>> assert np.isclose(equivalent_gaussian_Nsigma(3.0567e-138), 25, atol=0.1)
"""
return equivalent_gaussian_Nsigma_from_logp(np.log(p))
@vectorize([float64(float32, float32), float64(float64, float64)], nopython=True)
def _log_asymptotic_incomplete_gamma(a, z):
"""Asymptotic natural log of incomplete gamma function.
Return the natural log of the incomplete gamma function in
its asymptotic limit as z->infty. This is from Abramowitz
and Stegun eqn 6.5.32.
Translated from Scott Ransom's PRESTO
"""
x = 1.0
newxpart = 1.0
term = 1.0
ii = 1
while np.abs(newxpart) > 1e-15:
term *= a - ii
newxpart = term / np.power(z, ii)
x += newxpart
ii += 1
return (a - 1.0) * np.log(z) - z + np.log(x)
@vectorize([float64(float32), float64(float64)], nopython=True)
def _log_asymptotic_gamma(z):
"""Natural log of the Gamma function in its asymptotic limit.
Return the natural log of the gamma function in its asymptotic limit
as z->infty. This is from Abramowitz and Stegun eqn 6.1.41.
Translated from Scott Ransom's PRESTO
"""
half_log_twopi = 0.91893853320467267 # (1/2)*log(2*pi)
one_twelfth = 8.3333333333333333333333e-2
one_degree = 2.7777777777777777777778e-3 # 1 / 360
one_over_1680 = 5.9523809523809529e-4
one_over_1260 = 7.9365079365079365079365e-4
x = (z - 0.5) * np.log(z) - z + half_log_twopi
y = 1.0 / (z * z)
x += (((-one_over_1680 * y + one_over_1260) * y - one_degree) * y + one_twelfth) / z
return x
@np.vectorize
def chi2_logp(chi2, dof):
"""Log survival function of the chi-squared distribution.
Examples
--------
>>> chi2 = 31
>>> # Test check on dof
>>> chi2_logp(chi2, 1) # doctest:+ELLIPSIS
Traceback (most recent call last):
...
ValueError: The number of degrees of freedom cannot be < 2
>>> # Test that approximate function works as expected. chi2 / dof > 15,
>>> # but small and safe number in order to compare to scipy.stats
>>> assert np.isclose(chi2_logp(chi2, 2), stats.chi2.logsf(chi2, 2), atol=0.1)
>>> chi2 = np.array([5, 32])
>>> assert np.allclose(chi2_logp(chi2, 2), stats.chi2.logsf(chi2, 2), atol=0.1)
"""
if dof < 2:
raise ValueError("The number of degrees of freedom cannot be < 2")
# If very large reduced chi squared, use approximation. This is an
# eyeballed limit parameter space where the difference between the
# approximation and the scipy version is tiny, but above which the scipy
# version starts failing.
if (chi2 / dof > 15.0) or ((dof > 150) and (chi2 / dof > 6.0)):
return _log_asymptotic_incomplete_gamma(0.5 * dof, 0.5 * chi2) - _log_asymptotic_gamma(
0.5 * dof
)
return stats.chi2.logsf(chi2, dof)
@vectorize(
[
float64(float32, int32),
float64(float32, int64),
float64(float64, int32),
float64(float64, int64),
],
nopython=True,
)
def _logp_multitrial_from_single_logp(logp1, n):
"""Calculate a multi-trial p-value from the log of a single-trial one.
This allows to work around Numba's limitation on longdoubles, a way to
vectorize the computation when we need longdouble precision.
Parameters
----------
logp1 : float
The natural logarithm of the significance at which we reject the null
hypothesis on each single trial.
n : int
The number of trials
Returns
-------
logpn : float
The log of the significance at which we reject the null hypothesis
after multiple trials
"""
# If the the probability is very small (p1 * n) < 1e-6, use Bonferroni
# approximation.
logn = np.log(n)
if logp1 + logn < -7:
return logp1 + logn
return np.log(1 - (1 - np.exp(logp1)) ** n)
def p_multitrial_from_single_trial(p1, n):
r"""Calculate a multi-trial p-value from a single-trial one.
Calling *p* the probability of a single success, the Binomial
distributions says that the probability *at least* one outcome
in n trials is
.. math::
P(k\geq 1) = \sum_{k\geq 1} \binom{n}{k} p^k (1-p)^{(n-k)}
or more simply, using P(k ≥ 0) = 1
.. math::
P(k\geq 1) = 1 - \binom{n}{0} (1-p)^n = 1 - (1-p)^n
Parameters
----------
p1 : float
The significance at which we reject the null hypothesis on
each single trial.
n : int
The number of trials
Returns
-------
pn : float
The significance at which we reject the null hypothesis
after multiple trials
"""
logpn = _logp_multitrial_from_single_logp(np.log(p1).astype(np.double), n)
return np.exp(np.longdouble(logpn))
@vectorize(
[
float64(float32, int32),
float64(float32, int64),
float64(float64, int32),
float64(float64, int64),
],
nopython=True,
)
def _logp_single_trial_from_logp_multitrial(logpn, n):
"""Calculate a multi-trial p-value from the log of a single-trial one.
This allows to work around Numba's limitation on longdoubles, a way to
vectorize the computation when we need longdouble precision.
Parameters
----------
logpn : float
The natural logarithm of the significance at which we want to reject
the null hypothesis after multiple trials
n : int
The number of trials
Returns
-------
logp1 : float
The log of the significance at which we reject the null hypothesis on
each single trial.
"""
logn = np.log(n)
# If the the probability is very small, use Bonferroni approximation.
if logpn < -7:
return logpn - logn
# Numerical errors arise when pn is very close to 1. (logpn ~ 0)
if 1 - np.exp(logpn) < np.finfo(np.double).resolution * 1000:
return np.nan
p1 = 1 - np.power(1 - np.exp(logpn), 1 / n)
return np.log(p1)
def p_single_trial_from_p_multitrial(pn, n):
r"""Calculate the single-trial p-value from a total p-value
Let us say that we want to reject a null hypothesis at the
``pn`` level, after executing ``n`` different measurements.
This might be the case because, e.g., we
want to have a 1% probability of detecting a signal in an
entire power spectrum, and we need to correct the detection
level accordingly.
The typical procedure is dividing the initial probability
(often called _epsilon_) by the number of trials. This is
called the Bonferroni correction and it is often a good
approximation, when ``pn`` is low: ``p1 = pn / n``.
However, if ``pn`` is close to 1, this approximation gives
incorrect results.
Here we calculate this probability by inverting the Binomial
problem. Given that (see ``p_multitrial_from_single_trial``)
the probability of getting more than one hit in n trials,
given the single-trial probability *p*, is
.. math ::
P (k \geq 1) = 1 - (1 - p)^n,
we get the single trial probability from the multi-trial one
from
.. math ::
p = 1 - (1 - P)^{(1/n)}
This is also known as Šidák correction.
Parameters
----------
pn : float
The significance at which we want to reject the null
hypothesis after multiple trials
n : int
The number of trials
Returns
-------
p1 : float
The significance at which we reject the null hypothesis on
each single trial.
"""
logp = _logp_single_trial_from_logp_multitrial(np.log(pn).astype(np.float64), n)
if np.any(np.isnan(logp)):
if np.any(1 - pn < np.finfo(np.double).resolution * 1000):
warnings.warn("Multi-trial probability is very close to 1.")
warnings.warn("The problem is ill-conditioned. Returning NaN")
return np.exp(logp)
[docs]
def fold_profile_probability(stat, nbin, ntrial=1):
"""Calculate the probability of a certain folded profile, due to noise.
Parameters
----------
stat : float
The epoch folding statistics
nbin : int
The number of bins in the profile
Other Parameters
----------------
ntrial : int
The number of trials executed to find this profile
Returns
-------
p : float
The probability that the profile has been produced by noise
"""
p1 = stats.chi2.sf(stat, (nbin - 1))
return p_multitrial_from_single_trial(p1, ntrial)
def fold_profile_logprobability(stat, nbin, ntrial=1):
"""Calculate the probability of a certain folded profile, due to noise.
Parameters
----------
stat : float
The epoch folding statistics
nbin : int
The number of bins in the profile
Other Parameters
----------------
ntrial : int
The number of trials executed to find this profile
Returns
-------
logp : float
The log-probability that the profile has been produced by noise
"""
p1 = chi2_logp(stat, (nbin - 1))
return _logp_multitrial_from_single_logp(p1, ntrial)
[docs]
def fold_detection_level(nbin, epsilon=0.01, ntrial=1):
"""Return the detection level for a folded profile.
See Leahy et al. (1983).
Parameters
----------
nbin : int
The number of bins in the profile
epsilon : float, default 0.01
The fractional probability that the signal has been produced
by noise
Other Parameters
----------------
ntrial : int
The number of trials executed to find this profile
Returns
-------
detlev : float
The epoch folding statistics corresponding to a probability
epsilon * 100 % that the signal has been produced by noise
"""
epsilon = p_single_trial_from_p_multitrial(epsilon, ntrial)
return stats.chi2.isf(epsilon.astype(np.double), nbin - 1)
def phase_dispersion_probability(stat, nsamples, nbin, ntrial=1):
"""Calculate the probability of a peak in a phase dispersion
minimization periodogram, due to noise.
Uses the beta-distribution from Czerny-Schwarzendorf (1997).
Parameters
----------
stat : float
The value of the PDM inverse peak
nsamples : int
The number of samples in the time series
nbin : int
The number of bins in the profile
Other Parameters
----------------
ntrial : int
The number of trials executed to find this profile
Returns
-------
p : float
The probability that the profile has been produced by noise
"""
d2 = nsamples - nbin
d1 = nbin - 1
beta = stats.beta(d2 / 2.0, d1 / 2.0)
p1 = beta.cdf(stat)
return p_multitrial_from_single_trial(p1, ntrial)
def phase_dispersion_logprobability(stat, nsamples, nbin, ntrial=1):
"""Calculate the log-probability of a peak in a phase dispersion
minimization periodogram, due to noise.
Uses the beta-distribution from Czerny-Schwarzendorf (1997).
Parameters
----------
stat : float
The value of the PDM inverse peak
nsamples : int
The number of samples in the time series
nbin : int
The number of bins in the profile
Other Parameters
----------------
ntrial : int
The number of trials executed to find this profile
Returns
-------
logp : float
The log-probability that the profile has been produced by noise
"""
d2 = nsamples - nbin
d1 = nbin - 1
beta = stats.beta(d2 / 2.0, d1 / 2.0)
p1 = beta.logcdf(stat)
return _logp_multitrial_from_single_logp(p1, ntrial)
def phase_dispersion_detection_level(nsamples, nbin, epsilon=0.01, ntrial=1):
"""Return the detection level for a phase dispersion minimization
periodogram..
Parameters
----------
nsamples : int
The number of time bins in the light curve
nbin : int
The number of bins in the profile
epsilon : float, default 0.01
The fractional probability that the signal has been produced
by noise
Other Parameters
----------------
ntrial : int
The number of trials executed to find this profile
Returns
-------
detlev : float
The epoch folding statistics corresponding to a probability
epsilon * 100 % that the signal has been produced by noise
"""
epsilon = p_single_trial_from_p_multitrial(epsilon, ntrial)
d2 = nsamples - nbin
d1 = nbin - 1
beta = stats.beta(d2 / 2.0, d1 / 2.0)
return beta.ppf(epsilon.astype(np.double))
[docs]
def z2_n_probability(z2, n, ntrial=1, n_summed_spectra=1):
"""Calculate the probability of a certain folded profile, due to noise.
Parameters
----------
z2 : float
A Z^2_n statistics value
n : int, default 2
The ``n`` in $Z^2_n$ (number of harmonics, including the fundamental)
Other Parameters
----------------
ntrial : int
The number of trials executed to find this profile
n_summed_spectra : int
Number of Z_2^n periodograms that were averaged to obtain z2
Returns
-------
p : float
The probability that the Z^2_n value has been produced by noise
"""
epsilon_1 = stats.chi2.sf(z2 * n_summed_spectra, 2 * n * n_summed_spectra)
epsilon = p_multitrial_from_single_trial(epsilon_1, ntrial)
return epsilon
def z2_n_logprobability(z2, n, ntrial=1, n_summed_spectra=1):
"""Calculate the probability of a certain folded profile, due to noise.
Parameters
----------
z2 : float
A Z^2_n statistics value
n : int, default 2
The ``n`` in $Z^2_n$ (number of harmonics, including the fundamental)
Other Parameters
----------------
ntrial : int
The number of trials executed to find this profile
n_summed_spectra : int
Number of Z_2^n periodograms that were averaged to obtain z2
Returns
-------
p : float
The probability that the Z^2_n value has been produced by noise
"""
epsilon_1 = chi2_logp(np.double(z2 * n_summed_spectra), 2 * n * n_summed_spectra)
epsilon = _logp_multitrial_from_single_logp(epsilon_1, ntrial)
return epsilon
[docs]
def z2_n_detection_level(n=2, epsilon=0.01, ntrial=1, n_summed_spectra=1):
"""Return the detection level for the Z^2_n statistics.
See Buccheri et al. (1983), Bendat and Piersol (1971).
Parameters
----------
n : int, default 2
The ``n`` in $Z^2_n$ (number of harmonics, including the fundamental)
epsilon : float, default 0.01
The fractional probability that the signal has been produced by noise
Other Parameters
----------------
ntrial : int
The number of trials executed to find this profile
n_summed_spectra : int
Number of Z_2^n periodograms that are being averaged
Returns
-------
detlev : float
The epoch folding statistics corresponding to a probability
epsilon * 100 % that the signal has been produced by noise
"""
epsilon = p_single_trial_from_p_multitrial(epsilon, ntrial)
retlev = stats.chi2.isf(epsilon.astype(np.double), 2 * n_summed_spectra * n) / (
n_summed_spectra
)
return retlev
[docs]
def pds_probability(level, ntrial=1, n_summed_spectra=1, n_rebin=1):
r"""Give the probability of a given power level in PDS.
Return the probability of a certain power level in a Power Density
Spectrum of nbins bins, normalized a la Leahy (1983), based on
the 2-dof :math:`{\chi}^2` statistics, corrected for rebinning (n_rebin)
and multiple PDS averaging (n_summed_spectra)
Parameters
----------
level : float or array of floats
The power level for which we are calculating the probability
Other Parameters
----------------
ntrial : int
The number of *independent* trials (the independent bins of the PDS)
n_summed_spectra : int
The number of power density spectra that have been averaged to obtain
this power level
n_rebin : int
The number of power density bins that have been averaged to obtain
this power level
Returns
-------
epsilon : float
The probability value(s)
"""
epsilon_1 = stats.chi2.sf(level * n_summed_spectra * n_rebin, 2 * n_summed_spectra * n_rebin)
epsilon = p_multitrial_from_single_trial(epsilon_1, ntrial)
return epsilon
def pds_logprobability(level, ntrial=1, n_summed_spectra=1, n_rebin=1):
r"""Give the probability of a given power level in PDS.
Return the probability of a certain power level in a Power Density
Spectrum of nbins bins, normalized a la Leahy (1983), based on
the 2-dof :math:`{\chi}^2` statistics, corrected for rebinning (n_rebin)
and multiple PDS averaging (n_summed_spectra)
Parameters
----------
level : float or array of floats
The power level for which we are calculating the probability
Other Parameters
----------------
ntrial : int
The number of *independent* trials (the independent bins of the PDS)
n_summed_spectra : int
The number of power density spectra that have been averaged to obtain
this power level
n_rebin : int
The number of power density bins that have been averaged to obtain
this power level
Returns
-------
epsilon : float
The probability value(s)
Examples
--------
Let us test that it is always consistent with `pds_probability`.
We use relatively small power values, because for large values
`pds_probability` underflows.
>>> powers = np.random.uniform(2, 40, 10)
>>> nrebin = np.random.randint(1, 10, 10)
>>> nsummed = np.random.randint(1, 100, 10)
>>> ntrial = np.random.randint(1, 10000, 10)
>>> logp = pds_logprobability(powers, ntrial, nsummed, nrebin)
>>> p = pds_probability(powers, ntrial, nsummed, nrebin)
>>> assert np.allclose(p, np.exp(logp))
"""
epsilon_1 = chi2_logp(level * n_summed_spectra * n_rebin, 2 * n_summed_spectra * n_rebin)
epsilon = _logp_multitrial_from_single_logp(epsilon_1, ntrial)
return epsilon
[docs]
def pds_detection_level(epsilon=0.01, ntrial=1, n_summed_spectra=1, n_rebin=1):
r"""Detection level for a PDS.
Return the detection level (with probability 1 - epsilon) for a Power
Density Spectrum of nbins bins, normalized a la Leahy (1983), based on
the 2-dof :math:`{\chi}^2` statistics, corrected for rebinning (n_rebin)
and multiple PDS averaging (n_summed_spectra)
Parameters
----------
epsilon : float
The single-trial probability value(s)
Other Parameters
----------------
ntrial : int
The number of *independent* trials (the independent bins of the PDS)
n_summed_spectra : int
The number of power density spectra that have been averaged to obtain
this power level
n_rebin : int
The number of power density bins that have been averaged to obtain
this power level
Examples
--------
>>> assert np.isclose(pds_detection_level(0.1), 4.6, atol=0.1)
>>> assert np.allclose(pds_detection_level(0.1, n_rebin=[1]), [4.6], atol=0.1)
"""
epsilon = p_single_trial_from_p_multitrial(epsilon, ntrial)
epsilon = epsilon.astype(np.double)
if isinstance(n_rebin, Iterable):
retlev = [
stats.chi2.isf(epsilon, 2 * n_summed_spectra * r) / (n_summed_spectra * r)
for r in n_rebin
]
retlev = np.array(retlev)
else:
r = n_rebin
retlev = stats.chi2.isf(epsilon, 2 * n_summed_spectra * r) / (n_summed_spectra * r)
return retlev
def classical_pvalue(power, nspec):
"""
Note:
This is stingray's original implementation of the probability
distribution for the power spectrum. It is superseded by the
implementation in pds_probability for practical purposes, but
remains here for backwards compatibility and for its educational
value as a clear, explicit implementation of the correct
probability distribution.
Compute the probability of detecting the current power under
the assumption that there is no periodic oscillation in the data.
This computes the single-trial p-value that the power was
observed under the null hypothesis that there is no signal in
the data.
Important: the underlying assumptions that make this calculation valid
are:
1. the powers in the power spectrum follow a chi-square distribution
2. the power spectrum is normalized according to [Leahy 1983]_, such
that the powers have a mean of 2 and a variance of 4
3. there is only white noise in the light curve. That is, there is no
aperiodic variability that would change the overall shape of the power
spectrum.
Also note that the p-value is for a *single trial*, i.e. the power
currently being tested. If more than one power or more than one power
spectrum are being tested, the resulting p-value must be corrected for the
number of trials (Bonferroni correction).
Mathematical formulation in [Groth 1975]_.
Original implementation in IDL by Anna L. Watts.
Parameters
----------
power : float
The squared Fourier amplitude of a spectrum to be evaluated
nspec : int
The number of spectra or frequency bins averaged in ``power``.
This matters because averaging spectra or frequency bins increases
the signal-to-noise ratio, i.e. makes the statistical distributions
of the noise narrower, such that a smaller power might be very
significant in averaged spectra even though it would not be in a single
power spectrum.
Returns
-------
pval : float
The classical p-value of the observed power being consistent with
the null hypothesis of white noise
References
----------
* .. [Leahy 1983] https://ui.adsabs.harvard.edu/#abs/1983ApJ...266..160L/abstract
* .. [Groth 1975] https://ui.adsabs.harvard.edu/#abs/1975ApJS...29..285G/abstract
"""
warnings.warn("This function was substituted by pds_probability.", DeprecationWarning)
if not np.isfinite(power):
raise ValueError("power must be a finite floating point number!")
if power < 0:
raise ValueError("power must be a positive real number!")
if not np.isfinite(nspec):
raise ValueError("nspec must be a finite integer number")
if nspec < 1:
raise ValueError("nspec must be larger or equal to 1")
if not np.isclose(nspec % 1, 0):
raise ValueError("nspec must be an integer number!")
# If the power is really big, it's safe to say it's significant,
# and the p-value will be nearly zero
if (power * nspec) > 30000:
simon("Probability of no signal too minuscule to calculate.")
return 0.0
else:
pval = _pavnosigfun(power, nspec)
return pval
def _pavnosigfun(power, nspec):
"""
Helper function doing the actual calculation of the p-value.
Parameters
----------
power : float
The measured candidate power
nspec : int
The number of power spectral bins that were averaged in `power`
(note: can be either through averaging spectra or neighbouring bins)
"""
sum = 0.0
m = nspec - 1
pn = power * nspec
while m >= 0:
s = 0.0
for i in range(int(m) - 1):
s += np.log(float(m - i))
logterm = m * np.log(pn / 2) - pn / 2 - s
term = np.exp(logterm)
ratio = sum / term
if ratio > 1.0e15:
return sum
sum += term
m -= 1
return sum
def power_confidence_limits(preal, n=1, c=0.95):
"""Confidence limits on power, given a (theoretical) signal power.
This is to be used when we *expect* a given power (e.g. from the pulsed
fraction measured in previous observations) and we want to know the
range of values the measured power could take to a given confidence level.
Adapted from Vaughan et al. 1994, noting that, after appropriate
normalization of the spectral stats, the distribution of powers in the PDS
and the Z^2_n searches is always described by a noncentral chi squared
distribution.
Parameters
----------
preal: float
The theoretical signal-generated value of power
Other Parameters
----------------
n: int
The number of summed powers to obtain the result. It can be multiple
harmonics of the PDS, adjacent bins in a PDS summed to collect all the
power in a QPO, or the n in Z^2_n
c: float
The confidence level (e.g. 0.95=95%)
Returns
-------
pmeas: [float, float]
The upper and lower confidence interval (a, 1-a) on the measured power
Examples
--------
>>> cl = power_confidence_limits(150, c=0.84)
>>> assert np.allclose(cl, [127, 176], atol=1)
"""
rv = stats.ncx2(2 * n, preal)
return rv.ppf([1 - c, c])
def power_upper_limit(pmeas, n=1, c=0.95):
"""Upper limit on signal power, given a measured power in the PDS/Z search.
Adapted from Vaughan et al. 1994, noting that, after appropriate
normalization of the spectral stats, the distribution of powers in the PDS
and the Z^2_n searches is always described by a noncentral chi squared
distribution.
Note that Vaughan+94 gives p(pmeas | preal), while we are interested in
p(real | pmeas), which is not described by the NCX2 stat. Rather than
integrating the CDF of this probability distribution, we start from a
reasonable approximation and fit to find the preal that gives pmeas as
a (e.g.95%) confidence limit.
As Vaughan+94 shows, this power is always larger than the observed one.
This is because we are looking for the maximum signal power that,
combined with noise powers, would give the observed power. This involves
the possibility that noise powers partially cancel out some signal power.
Parameters
----------
pmeas: float
The measured value of power
Other Parameters
----------------
n: int
The number of summed powers to obtain pmeas. It can be multiple
harmonics of the PDS, adjacent bins in a PDS summed to collect all the
power in a QPO, or the n in Z^2_n
c: float
The confidence value for the probability (e.g. 0.95 = 95%)
Returns
-------
psig: float
The signal power that could produce P>pmeas with 1 - c probability
Examples
--------
>>> pup = power_upper_limit(40, 1, 0.99)
>>> assert np.isclose(pup, 75, atol=2)
"""
def ppf(x):
rv = stats.ncx2(2 * n, x)
return rv.ppf(1 - c)
def isf(x):
rv = stats.ncx2(2 * n, x)
return rv.ppf(c)
def func_to_minimize(x, xmeas):
return np.abs(ppf(x) - xmeas)
from scipy.optimize import minimize
initial = isf(pmeas)
res = minimize(func_to_minimize, [initial], pmeas, bounds=[(0, initial * 2)])
return res.x[0]
def amplitude_upper_limit(pmeas, counts, n=1, c=0.95, fft_corr=False, nyq_ratio=0):
r"""Upper limit on a sinusoidal modulation, given a measured power in the PDS/Z search.
Eq. 10 in Vaughan+94 and `a_from_ssig`: they are equivalent but Vaughan+94
corrects further for the response inside an FFT bin and at frequencies close
to Nyquist. These two corrections are added by using fft_corr=True and
nyq_ratio to the correct :math:`f / f_{Nyq}` of the FFT peak
To understand the meaning of this amplitude: if the modulation is described by:
..math:: p = \overline{p} (1 + a * \sin(x))
this function returns a.
If it is a sum of sinusoidal harmonics instead
..math:: p = \overline{p} (1 + \sum_l a_l * \sin(lx))
a is equivalent to :math:`\sqrt(\sum_l a_l^2)`.
See `power_upper_limit`
Parameters
----------
pmeas: float
The measured value of power
counts: int
The number of counts in the light curve used to calculate the spectrum
Other Parameters
----------------
n: int
The number of summed powers to obtain pmeas. It can be multiple
harmonics of the PDS, adjacent bins in a PDS summed to collect all the
power in a QPO, or the n in Z^2_n
c: float
The confidence value for the probability (e.g. 0.95 = 95%)
fft_corr: bool
Apply a correction for the expected power concentrated in an FFT bin,
which is about 0.773 on average (it's 1 at the center of the bin, 2/pi
at the bin edge.
nyq_ratio: float
Ratio of the frequency of this feature with respect to the Nyquist
frequency. Important to know when dealing with FFTs, because the FFT
response decays between 0 and f_Nyq similarly to the response inside
a frequency bin: from 1 at 0 Hz to ~2/pi at f_Nyq
Returns
-------
a: float
The modulation amplitude that could produce P>pmeas with 1 - c probability
Examples
--------
>>> aup = amplitude_upper_limit(40, 30000, 1, 0.99)
>>> aup_nyq = amplitude_upper_limit(40, 30000, 1, 0.99, nyq_ratio=1)
>>> assert np.isclose(aup_nyq, aup / (2 / np.pi))
>>> aup_corr = amplitude_upper_limit(40, 30000, 1, 0.99, fft_corr=True)
>>> assert np.isclose(aup_corr, aup / np.sqrt(0.773))
"""
uplim = power_upper_limit(pmeas, n, c)
a = a_from_ssig(uplim, counts)
if fft_corr:
factor = 1 / np.sqrt(0.773)
a *= factor
if nyq_ratio > 0:
factor = np.pi / 2 * nyq_ratio
sinc_factor = np.sin(factor) / factor
a /= sinc_factor
return a
def pf_upper_limit(*args, **kwargs):
"""Upper limit on pulsed fraction, given a measured power in the PDS/Z search.
See `power_upper_limit` and `pf_from_ssig`.
All arguments are the same as `amplitude_upper_limit`
Parameters
----------
pmeas: float
The measured value of power
counts: int
The number of counts in the light curve used to calculate the spectrum
Other Parameters
----------------
n: int
The number of summed powers to obtain pmeas. It can be multiple
harmonics of the PDS, adjacent bins in a PDS summed to collect all the
power in a QPO, or the n in Z^2_n
c: float
The confidence value for the probability (e.g. 0.95 = 95%)
fft_corr: bool
Apply a correction for the expected power concentrated in an FFT bin,
which is about 0.773 on average (it's 1 at the center of the bin, 2/pi
at the bin edge.
nyq_ratio: float
Ratio of the frequency of this feature with respect to the Nyquist
frequency. Important to know when dealing with FFTs, because the FFT
response decays between 0 and f_Nyq similarly to the response inside
a frequency bin: from 1 at 0 Hz to ~2/pi at f_Nyq
Returns
-------
pf: float
The pulsed fraction that could produce P>pmeas with 1 - c probability
Examples
--------
>>> pfup = pf_upper_limit(40, 30000, 1, 0.99)
>>> assert np.isclose(pfup, 0.13, atol=0.01)
"""
return pf_from_a(amplitude_upper_limit(*args, **kwargs))
def pf_from_a(a):
"""Pulsed fraction from fractional amplitude of modulation.
If the pulsed profile is defined as
p = mean * (1 + a * sin(phase)),
we define "pulsed fraction" as 2a/b, where b = mean + a is the maximum and
a is the amplitude of the modulation.
Hence, pulsed fraction = 2a/(1+a)
Examples
--------
>>> pf_from_a(1)
1.0
>>> pf_from_a(0)
0.0
"""
return 2 * a / (1 + a)
def a_from_pf(p):
"""Fractional amplitude of modulation from pulsed fraction
If the pulsed profile is defined as
p = mean * (1 + a * sin(phase)),
we define "pulsed fraction" as 2a/b, where b = mean + a is the maximum and
a is the amplitude of the modulation.
Hence, a = pf / (2 - pf)
Examples
--------
>>> a_from_pf(1)
1.0
>>> a_from_pf(0)
0.0
"""
return p / (2 - p)
def ssig_from_a(a, ncounts):
"""Theoretical power in the Z or PDS search for a sinusoid of amplitude a.
From Leahy et al. 1983, given a pulse profile
p = lambda * (1 + a * sin(phase)),
The theoretical value of Z^2_n is Ncounts / 2 * a^2
Note that if there are multiple sinusoidal components, one can use
a = sqrt(sum(a_l))
(Bachetti+2021b)
Examples
--------
>>> round(ssig_from_a(0.1, 30000), 1)
150.0
"""
return ncounts / 2 * a**2
def a_from_ssig(ssig, ncounts):
"""Amplitude of a sinusoid corresponding to a given Z/PDS value
From Leahy et al. 1983, given a pulse profile
p = lambda * (1 + a * sin(phase)),
The theoretical value of Z^2_n is Ncounts / 2 * a^2
Note that if there are multiple sinusoidal components, one can use
a = sqrt(sum(a_l))
(Bachetti+2021b)
Examples
--------
>>> assert np.isclose(a_from_ssig(150, 30000), 0.1)
"""
return np.sqrt(2 * ssig / ncounts)
def ssig_from_pf(pf, ncounts):
"""Theoretical power in the Z or PDS for a sinusoid of pulsed fraction pf.
See `ssig_from_a` and `a_from_pf` for more details
Examples
--------
>>> assert round(ssig_from_pf(pf_from_a(0.1), 30000), 1) == 150.0
"""
a = a_from_pf(pf)
return ncounts / 2 * a**2
def pf_from_ssig(ssig, ncounts):
"""Estimate pulsed fraction for a sinusoid from a given Z or PDS power.
See `a_from_ssig` and `pf_from_a` for more details
Examples
--------
>>> assert np.isclose(round(a_from_pf(pf_from_ssig(150, 30000)), 1), 0.1)
"""
a = a_from_ssig(ssig, ncounts)
return pf_from_a(a)
```