#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Statistics and Plotting for quantile-quantile analysis for model discovery
"""
import numpy
import matplotlib.pyplot as plt
import os
from xspec import Plot
[docs]
def KSstat(data, model):
"""
Kolmogorov-Smirnov statistic: maximum deviance between data and model
"""
modelc = model.cumsum() / model.sum()
datac = data.cumsum() / data.sum()
ks = numpy.abs(modelc - datac).max()
return ks
[docs]
def CvMstat(data, model):
"""
Cramér–von Mises statistic: Takes all deviances into account
"""
modelc = model.cumsum()
datac = data.cumsum()
maxmodelc = modelc.max()
cvm = ((modelc / maxmodelc - datac / datac.max())**2 * model / maxmodelc).mean()
return cvm
[docs]
def ADstat(data, model):
"""
Anderson-Darling statistic: Takes all deviances into account
more weight on tails than CvM.
"""
modelc = model.cumsum()
datac = data.cumsum()
maxmodelc = modelc.max()
valid = numpy.logical_and(modelc > 0, maxmodelc - modelc > 0)
modelc = modelc[valid] / maxmodelc
datac = datac[valid] / datac.max()
model = model[valid] / maxmodelc
assert (modelc > 0).all(), ['ADstat has zero cumulative denominator', modelc]
assert (maxmodelc - modelc > 0).all(), ['ADstat has zero=1-1 cumulative denominator', maxmodelc - modelc]
ad = ((modelc - datac)**2 / (modelc * (maxmodelc - modelc)) * model).sum()
return ad
[docs]
def qq_plot(bins, data, model, markers = [0.2, 1, 2, 5, 10], unit = '', annotate = True):
"""
Create a quantile-quantile plot for model discovery (deviations in data from model).
* bins: energies/channel
* data: amount observed
* model: amount predicted
* markers: list of energies/channels (whichever the current plotting xaxis unit)
* unit: unit to append to marker
* annotate: add information to the plot
"""
assert numpy.isfinite(data).all(), data
assert numpy.isfinite(model).all(), model
datac = data.cumsum()
modelc = model.cumsum()
plt.plot(modelc, datac, color='red', drawstyle='steps', linewidth=3)
for m in markers:
mask = bins >= m
# first true value
i = mask.argmax()
if mask[i]:
plt.plot(modelc[i], datac[i], marker='+', color='k')
if datac[i] < modelc[i]:
textkwargs = dict(va='top', ha='left')
else:
textkwargs = dict(va='bottom', ha='right')
plt.text(modelc[i], datac[i], '%s%s' % (m, unit), **textkwargs)
u = max(datac[-1], modelc[-1])
plt.plot([0, u], [0, u], ls='--', color='grey')
plt.xlim(0, u)
plt.ylim(0, u)
plt.title('QQ-plot')
plt.xlabel('integrated model')
plt.ylabel('cumulative data counts')
if annotate:
plt.text(u/2, u/2, 'data excess', va='bottom', ha='center', rotation=45, color='grey', size=8)
plt.text(u/2, u/2, 'model excess', va='center', ha='left', rotation=45, color='grey', size=8)
stats = dict(
ks = KSstat(data, model),
cvm = CvMstat(data, model),
ad = ADstat(data, model),
)
text = """K-S = %(ks).3f
C-vM = %(cvm).5f
A-D = %(ad)e""" % stats
plt.text(u*0.98, u*0.02, text, va='bottom', ha='right', color='grey')
[docs]
def xspecfilenamenormalise(path):
"""
Xspec gets a bit confused if there are "." in the filename
So we replace . with _ in the filename.
But if we replace it as part of the parent directory, we cannot write
there, so this only alters the filename, not the entire path.
"""
if '/' in path:
parts = path.split('/')
parts = (parts[:-1] + [xspecfilenamenormalise(parts[-1])])
return '/'.join(parts)
return path.replace('.', '_')
[docs]
def qq(prefix, markers=5, annotate=True):
"""
Create a quantile-quantile plot for model discovery (deviations in data from model).
The current data and model is used, so call *set_best_fit(analyzer, transformations)*
before, to get the qq plot at the best fit.
* markers: list of energies/channels (whichever the current plotting xaxis unit)
or number of equally spaced markers between minimum+maximum.
* annotate: add information to the plot
"""
olddevice = Plot.device
Plot.device = '/null'
tmpfilename = '%swdatatmp.qdp' % xspecfilenamenormalise(prefix)
if os.path.exists(tmpfilename):
os.remove(tmpfilename)
while len(Plot.commands) > 0:
Plot.delCommand(1)
Plot.addCommand('wdata "%s"' % tmpfilename.replace('.qdp', ''))
Plot.background = True
Plot("counts")
content = numpy.genfromtxt(tmpfilename, skip_header=3)
os.remove(tmpfilename)
while len(Plot.commands) > 0:
Plot.delCommand(1)
if Plot.background:
bins, width, data, dataerror, background, backgrounderror, model = content[:,0:7].transpose()
else:
bins, width, data, dataerror, model = content[:,0:5].transpose()
if not hasattr(markers, '__len__'):
nmarkers = int(markers)
decimals = int(-numpy.log10(bins[-1] - bins[0] + 1e-4))
markers = numpy.linspace(bins[0], bins[-1], nmarkers+2)[1:-1]
markers = set(numpy.round(markers, decimals=decimals))
# make qq plot, with 1:1 line
qq_plot(bins=bins, data=data * width * 2, model=model * width * 2,
markers = markers, annotate = annotate, unit=Plot.xAxis)
Plot.device = olddevice