Source code for nested_sampling.samplers.svm

import scipy, scipy.stats
import numpy
from numpy import exp, log, log10
import numpy
import sklearn.svm
from nested_sampling.clustering import clusterdetect
import matplotlib.pyplot as plt
import warnings
warnings.simplefilter("ignore", DeprecationWarning)

def svm_classify(points, classes, plot=False):
	plot = plot and points.shape[1] == 2
	if plot:
		print 'svm_classify plotting --'
		#plt.figure(figsize=(5,5))
		for c in sorted(set(classes)):
			x = points[:,0][classes == c]
			y = points[:,1][classes == c]
			plt.plot(x, y, 'x', ms=2)
			#print c, len(x), x[:10], y[:10]
		plt.savefig('svm_classifier.pdf', bbox_inches='tight')
		#plt.close()
		print 'svm_classify plotting done.'
	u = points.mean(axis=0)
	s = points.std(axis=0)
	def transform(p):
		p = numpy.asarray(p)
		return (p - u) / s
	points = transform(points)
	clf = sklearn.svm.NuSVC(nu=0.05, probability=True, kernel='rbf')
	clf.fit(points, classes)
	#print clf.get_params()

	if plot:
		x = numpy.linspace(0, 1, 100)
		y = numpy.linspace(0, 1, 100)
		grid = numpy.array([[[xi, yi] for xi in x] for yi in y])
		dists = numpy.array([[clf.predict_proba(transform([[xi, yi]]))[0][0] for xi in x] for yi in y])
		print 'levels:', dists.max(), dists.min()
		plt.contour(x, y, dists, [0.99, 0.9, 0.1], colors=['red', 'red', 'red'], linestyles=['-', '--', ':'])
		plt.savefig('svm_classifier_borders.pdf', bbox_inches='tight')
		plt.close()
	
	return clf, transform
	#lambda v: (clf.decision_function(v), clf.predict_proba(v)), clf

[docs]class SVMConstrainer(object):
	"""
	This constrainer uses probabilistic Support Vector Machines classifier
	with Radial basis functions (sklearn.svm.NuSVC)
	to find a border separating live points and already discarded points.
	
	Then, points are filtered using this classifier. Only points matching
	the classifier are evaluated.
	"""
	def __init__(self):
		self.sampler = None
		self.iter = 0
		self.rects = []
	
	def draw_constrained(self, Lmin, priortransform, loglikelihood, previous, ndim, **kwargs):
		# previous is [[u, x, L], ...]
		previousL = numpy.array([L for _, _, L in previous])
		previousu = numpy.array([u for u, _, _ in previous])
		assert previousu.shape[1] == ndim, previousu.shape
		self.iter += 1
		rebuild = self.iter % 50 == 1
		if rebuild:
			high = previousL > Lmin
			u = previousu[high]
			L = previousL[high]
		
			# detect clusters using hierarchical clustering
			assert len(u.shape) == 2, u.shape
			distances = scipy.spatial.distance.cdist(u, u)
			cluster = scipy.cluster.hierarchy.single(distances)
			
			n = len(distances)
			clusterdists = cluster[:,2]
			threshold = scipy.stats.mstats.mquantiles(clusterdists, 0.1)*20 + clusterdists.max()/2
			assigned = clusterdetect.cut_cluster(cluster, distances, threshold)
			# now we have clusters with some members
		
			# find some rough boundaries
			# make sure to make them so that they enclose all the points
			clusterids = sorted(set(assigned))
			rects = []
			for i in clusterids:
				inside = assigned == i
				ulow  = u[inside].min(axis=0)
				uhigh = u[inside].max(axis=0)
				width = uhigh - ulow
				# expand, to avoid over-shrinkage
				ulow  -= width*0.2
				uhigh += width*0.2
				j = L[inside].argmax()
				ustart = u[inside][j]
				Lstart = L[inside][j]
				rects.append((i, (ulow, uhigh, (log(uhigh - ulow)).sum())))
				#print 'adding new rectangle:', (i, (ulow, uhigh))
			rects = dict(rects)
			
			# now that we got a little more familiar with out clusters,
			# we want to sample from them
			# for this, we want to create boundaries between high and -high
			# we will do a multi-stage SVM, for every cluster
			rectid = numpy.zeros(len(previous), dtype=int) - 1
			rectid[high] = assigned
			#try:
			#if True:
			#if high.mean() >= 0.9:
			#	print 'not worth it yet to apply svm'
			clf, svmtransform = None, None
			if high.mean() < 0.9 and self.iter % 200 == 1:
				clf, svmtransform = svm_classify(previousu, rectid)
			#except ValueError as e:
			#	clf, svmtransform = None, None
			#	print 'WARNING: SVM step failed: ', e
			self.clf = clf
			self.svmtransform = svmtransform
			self.rects = rects
		
		if len(self.rects) == 1:
			def get_rect_id(x): return 0
		else:
			x = range(len(self.rects))
			y = numpy.array([self.rects[i][2] for i in x])
			minlogsize = y.min()
			y -= y.min()
			y = exp(y)
			totalsize = y.sum()
			y /= y.sum()
			y = [0] + y.cumsum().tolist() + [1]
			x = [x[0]] + list(x) + [x[-1]]
			get_rect_id = scipy.interpolate.interp1d(y, x, kind='zero')
		
		ntoaccept = 0
		while True:
			# sample from rectangles, and through against SVM
			dice = numpy.random.random()
			i = int(get_rect_id(dice))
			ulow, uhigh, logsize = self.rects[i]
			
			u = numpy.random.uniform(ulow, uhigh, size=ndim)
			if len(self.rects) != 1:
				# count in how many rectangles it is
				nrect = sum([exp(logsize - minlogsize)
					for ulow, uhigh, logsize in self.rects.values() if ((u >= ulow).all() and (u <= uhigh).all())])
				# reject proportionally  ~  1. / nrect
				coin = numpy.random.uniform(0, 1)
				accept = coin < exp(logsize) / nrect
				if not accept:
					continue
			
			# if survives (classified to be in high region)
			# then evaluate
			if self.clf is not None:
				prob = self.clf.predict_proba(self.svmtransform(u))[0][0]
				#print 'svm evaluation:', u, prob
				# we allow 1 false positive classified
				if prob > 1 - 1. / len(previous) and ntoaccept % 100 != 95:
					continue
			
			x = priortransform(u)
			L = loglikelihood(x)
			ntoaccept += 1
			if L >= Lmin:
				# yay, we win
				if ntoaccept > 5:
					print '%d samples before accept' % ntoaccept, u, x, L
				return u, x, L, ntoaccept


if __name__ == '__main__':
	import matplotlib.pyplot as plt
	#numpy.random.seed(0)
	points = numpy.random.uniform([0, 0], [1, 1], size=(1000, 2))
	#print 'points', points
	def priortransform(u):
		return u
	def loglikelihood(x):
		vals = 10*exp(-0.5 * (((x - numpy.array([0.4, 0.7]))/0.2)**2).sum())
		vals *= exp(-0.5 * ((((x[0] - 0.5)*2 - (x[1]*2)**3 + 0.01))/0.1)**2)
		vals += exp(-0.5 * (((x - numpy.array([0.2, 0.7]))/0.05)**2).sum())
		return log(vals)
	
	if True:
		vals = exp(-0.5 * (((points - numpy.array([0.4, 0.7]))/0.2)**2).sum(axis=1))
		vals *= exp(-0.5 * ((((points[:,0] - 0.5)*2 - (points[:,1]*2)**3 + 0.01))/0.1)**2)
		vals += exp(-0.5 * (((points - numpy.array([0.2, 0.7]))/0.05)**2).sum(axis=1))
		#print 'vals', vals
		high = vals > 0.01
		assert high.any()
		assert (-high).any()
		a = numpy.logical_and(points[:,0] < 0.4, high)
		b = numpy.logical_and(points[:,0] >= 0.4, high)
		plt.figure(figsize=(5,5))
		plt.plot(points[:,0][a],  points[:,1][a],  'x', color='b')
		plt.plot(points[:,0][b],  points[:,1][b],  'x', color='m')
		plt.plot(points[:,0][-high], points[:,1][-high], '+', color='g')
		plt.savefig('svmtest.pdf', bbox_inches='tight')
		clf, transform = svm_classify(points, high + a)
	
		x = numpy.linspace(0, 1, 100)
		y = numpy.linspace(0, 1, 100)
		#grid = numpy.array([[xi, yi] for xi in x for yi in y])
		grid = numpy.array([[[xi, yi] for xi in x] for yi in y])
		#X, Y = numpy.meshgrid(x, y)
		dists = numpy.array([[clf.predict_proba(transform([[xi, yi]]))[0][0] for xi in x] for yi in y])
		#print dists.shape
		#print grid.shape, dists.shape
		#print grid[:,0].shape, grid[:,1].shape, dists[:,0].shape
		#plt.contourf(grid[:,0], grid[:,1], dists[:,0], 5)
		plt.contourf(x, y, dists)
		plt.contour(x, y, dists, [0.99, 0.9, 0.1], colors=['red', 'red', 'red'], linestyles=['-', '--', ':'])
		plt.savefig('svmtest.pdf', bbox_inches='tight')
	
		points = numpy.random.uniform([0, 0], [1, 1], size=(1000, 2))
		results = clf.predict(transform(points))
		for (x, y), r in zip(points, results):
			#print x, y, r
			plt.plot(x, y, 'o', color='b' if r == 1 else 'g')
		plt.savefig('svmtest.pdf', bbox_inches='tight')
		plt.close()
	
	from simplenested import NestedSampler, nested_integrator
	constrainer = SVMConstrainer()
	
	print 'preparing sampler'
	sampler = NestedSampler(nlive_points = 200, ndim=2,
		priortransform=priortransform, loglikelihood=loglikelihood, 
		draw_constrained = constrainer.draw_constrained)
	constrainer.sampler = sampler
	print 'running sampler'
	result = nested_integrator(tolerance=0.01, sampler=sampler)

	try:
		x = numpy.array([x for _, x, _ in sampler.samples])
		y = numpy.exp([l for _, _, l in sampler.samples])
		print x
		print y
		plt.figure()
		plt.hexbin(x[:,0], x[:,1], C=y, gridsize=40)
		plt.savefig('svmtest_nested_samples.pdf', bbox_inches='tight')
		plt.close()
	except Exception as e:
		print e

	try:
		weights = numpy.array(result['weights']) # L, width
		plt.figure()
		plt.plot(exp(weights[:,1]), exp(weights[:,0]), 'x-', color='blue', ms=1)
		#plt.plot(weights[:,0], weights[:,1], 'x-', color='blue')
		plt.xlabel('prior mass')
		plt.ylabel('likelihood')
		#plt.xlim(0, 1)
		plt.savefig('svmtest_nested_integral.pdf', bbox_inches='tight')
		plt.close()
	except Exception as e:
		print e
	
	#u = numpy.linspace(0, 1, 10000)
	#x = numpy.array([priortransform(ui) for ui in u])
	#L = numpy.array([loglikelihood(xi) for xi in x])
	#print 'monte carlo integration (%d samples) logZ:' % len(u), log(exp(L).mean())

	print 'nested sampling (%d samples) logZ = ' % len(result['samples']), result['logZ'], result['logZerr']
Navigation

Source code for nested_sampling.samplers.svm

Quick search

Navigation