Source code for bxa.xspec.sinning

#!/usr/bin/env python
# -*- coding: utf-8 -*- 

"""
Binning routines for plotting
"""
import numpy
import scipy.special, scipy.stats
from . import gof
import tqdm



[docs]
def group_adapt(xdata, ydata, xlo, xhi, nmin = 20):
	"""
	Adaptive grouping into nmin count bins.
	
	Args:
		:param xdata: bin center
		:param ydata: count values
		:param xlo: bin lower edge
		:param xhi: bin upper edge
		:param nmin: desired minimum number of counts in each bin
	
	Returns:
		sequence of (lower edge, upper edge, number of counts) tuples
	"""
	i = 0
	while i < len(xlo):
		for j in range(i, len(xlo)):
			# [i:j] (with j)
			xmask = numpy.logical_and(xdata >= xlo[i], xdata < xhi[j])
			
			if ydata[xmask].sum() >= nmin or j + 1 >= len(xlo):
				yield (xlo[i], xhi[j], ydata[xmask].sum())
				#print '  groups', i,j
				break
		i = j + 1




[docs]
def binning(outputfiles_basename, bins, widths, data, models, nmin=20):
	"""Bins the data for plotting and checks the model.

	Using the gof module, computes a Poisson goodness-of-fit range,
	i.e. ranges where the model must lie. This is done for multiple
	binning sizes simultaneously.

	:param outputfiles_basename: not used.
	:param bins: bin location from Plot.x()
	:param widths: bin width from Plot.xErr()
	:param data: counts per bin width from Plot.y() with Plot.background = True and Plot('counts')
	:param model: counts per bin width predicted by the model, from Plot.model()
	:param nmin: number of counts per bin to use for rebinning.
	
	Returns
	--------
	
	* marked_binned: data points binned to contain `nmin` counts
	  a sequence ready to be passed to matplotlib.pyplot.errorbar
	* modelrange: range allowed by the data
	  ready to be passed to matplotlib.pyplot.fill_between
	* and statistics (GoF measure)
	
	"""
	
	xdata = bins
	xlo = bins - widths
	xhi = bins + widths
	# convert from densities to counts
	ydata = numpy.rint(data * widths * 2)
	models = models * widths * 2
	
	best_gof = None
	best_gof_stats = None
	data = None
	
	grouped_data = list(group_adapt(xdata, ydata, xlo, xhi, nmin=nmin))
	data = numpy.array([ydata[numpy.logical_and(xdata >= i, xdata < j)].sum() for i, j in zip(xlo, xhi)])
	
	component = models[:,0,:]
	for i, counts_predicted in enumerate(tqdm.tqdm(component)):
		modelrange_low, modelrange_high = gof.calc_models_range(data)
	
		stats = gof.calc_multigof(data, counts_predicted)
		curgof = -numpy.log10(
			numpy.min([stats[stats[:,0] == n][:,2].min() * (stats[:,0] == n).sum() 
				for n in sorted(set(stats[:,0]))]) + 1e-300)
		
		if best_gof is None or curgof < best_gof:
			best_gof = curgof
			best_gof_stats = stats
		if i > 100:
			break

	# check if we can reproduce the data
	curgof = best_gof
	stats = best_gof_stats
	
	data_gofp = [numpy.nan] * len(grouped_data)
	for n in numpy.unique(stats[:,0].astype(int)):
		# find the worst case for this level and each datapoint
		#exp(numpy.log(stats[stats[:,0] == n][:,2]).sum()) * (stats[:,0] == n).sum() 
		#		for n in sorted(set(stats[:,0]))]))
		nstats = stats[stats[:,0] == n]
		pxlo = xlo[(nstats[:,1] * n).astype(int)]
		pxhi = numpy.asarray(pxlo[1:].tolist() + [xdata.max()])
	
		# so far so good.
		# mark data points that have not been achieved
		#print zip(pxlo, chi2min)
		for i, (xloi, xhii, ydatai) in enumerate(grouped_data):
			# select p values that intersect
			mask = numpy.logical_and(pxlo < xhii, xloi < pxhi)
			if mask.any():
				data_gofp[i] = numpy.nanmin([data_gofp[i], (nstats[mask][:,2]).min() * len(nstats)])
	
	gof_avg = curgof
	gof_total = gof_avg * len(data)
	
	# return data, marked
	marked_binned = []
	# plot data
	ymin = 1e300
	ymax = 0
	for (xloi, xhii, ydatai), gofpi in zip(grouped_data, data_gofp):
		best_gof = -numpy.log10(gofpi + 1e-300)
		# 1e3 and 1e6 correspond roughly to 3 sigma and 5 sigma
		c = 'green' if best_gof < 2 else 'orange' if best_gof < 6. else 'red'
		f = 1. / (xhii - xloi)
		y = ydatai * f
		modelrange_low  = scipy.special.gammaincinv(ydatai + 1, 0.1) * f
		modelrange_high = scipy.special.gammaincinv(ydatai + 1, 0.9) * f
		marked_binned.append(dict(
			x=(xloi + xhii)/2., xerr=(xhii - xloi) / 2.,
			y = y,
			yerr = [[max(0, modelrange_high - y)], [max(0, y - modelrange_low)]],
			color=c)
		)
		ymin = min(ymin, modelrange_low)
		ymax = max(ymax, modelrange_high)
	
	return dict(marked_binned = marked_binned, 
		gof_avg=gof_avg, gof_total=gof_total, stats=stats,
		xlim = (xlo[0], xhi[-1]),
		ylim = (ymin, ymax),
	)
Source code for bxa.xspec.sinning

BXA (Bayesian X-ray Analysis)

Navigation

Related Topics