Source code for nested_sampling.samplers.svm

import scipy, scipy.stats
import numpy
from numpy import exp, log, log10
import numpy
import sklearn.svm
from nested_sampling.clustering import clusterdetect
import matplotlib.pyplot as plt
import warnings
warnings.simplefilter("ignore", DeprecationWarning)

def svm_classify(points, classes, plot=False):
	plot = plot and points.shape[1] == 2
	if plot:
		print 'svm_classify plotting --'
		#plt.figure(figsize=(5,5))
		for c in sorted(set(classes)):
			x = points[:,0][classes == c]
			y = points[:,1][classes == c]
			plt.plot(x, y, 'x', ms=2)
			#print c, len(x), x[:10], y[:10]
		plt.savefig('svm_classifier.pdf', bbox_inches='tight')
		#plt.close()
		print 'svm_classify plotting done.'
	u = points.mean(axis=0)
	s = points.std(axis=0)
	def transform(p):
		p = numpy.asarray(p)
		return (p - u) / s
	points = transform(points)
	clf = sklearn.svm.NuSVC(nu=0.05, probability=True, kernel='rbf')
	clf.fit(points, classes)
	#print clf.get_params()

	if plot:
		x = numpy.linspace(0, 1, 100)
		y = numpy.linspace(0, 1, 100)
		grid = numpy.array([[[xi, yi] for xi in x] for yi in y])
		dists = numpy.array([[clf.predict_proba(transform([[xi, yi]]))[0][0] for xi in x] for yi in y])
		print 'levels:', dists.max(), dists.min()
		plt.contour(x, y, dists, [0.99, 0.9, 0.1], colors=['red', 'red', 'red'], linestyles=['-', '--', ':'])
		plt.savefig('svm_classifier_borders.pdf', bbox_inches='tight')
		plt.close()
	
	return clf, transform
	#lambda v: (clf.decision_function(v), clf.predict_proba(v)), clf

[docs]class SVMConstrainer(object): """ This constrainer uses probabilistic Support Vector Machines classifier with Radial basis functions (sklearn.svm.NuSVC) to find a border separating live points and already discarded points. Then, points are filtered using this classifier. Only points matching the classifier are evaluated. """ def __init__(self): self.sampler = None self.iter = 0 self.rects = [] def draw_constrained(self, Lmin, priortransform, loglikelihood, previous, ndim, **kwargs): # previous is [[u, x, L], ...] previousL = numpy.array([L for _, _, L in previous]) previousu = numpy.array([u for u, _, _ in previous]) assert previousu.shape[1] == ndim, previousu.shape self.iter += 1 rebuild = self.iter % 50 == 1 if rebuild: high = previousL > Lmin u = previousu[high] L = previousL[high] # detect clusters using hierarchical clustering assert len(u.shape) == 2, u.shape distances = scipy.spatial.distance.cdist(u, u) cluster = scipy.cluster.hierarchy.single(distances) n = len(distances) clusterdists = cluster[:,2] threshold = scipy.stats.mstats.mquantiles(clusterdists, 0.1)*20 + clusterdists.max()/2 assigned = clusterdetect.cut_cluster(cluster, distances, threshold) # now we have clusters with some members # find some rough boundaries # make sure to make them so that they enclose all the points clusterids = sorted(set(assigned)) rects = [] for i in clusterids: inside = assigned == i ulow = u[inside].min(axis=0) uhigh = u[inside].max(axis=0) width = uhigh - ulow # expand, to avoid over-shrinkage ulow -= width*0.2 uhigh += width*0.2 j = L[inside].argmax() ustart = u[inside][j] Lstart = L[inside][j] rects.append((i, (ulow, uhigh, (log(uhigh - ulow)).sum()))) #print 'adding new rectangle:', (i, (ulow, uhigh)) rects = dict(rects) # now that we got a little more familiar with out clusters, # we want to sample from them # for this, we want to create boundaries between high and -high # we will do a multi-stage SVM, for every cluster rectid = numpy.zeros(len(previous), dtype=int) - 1 rectid[high] = assigned #try: #if True: #if high.mean() >= 0.9: # print 'not worth it yet to apply svm' clf, svmtransform = None, None if high.mean() < 0.9 and self.iter % 200 == 1: clf, svmtransform = svm_classify(previousu, rectid) #except ValueError as e: # clf, svmtransform = None, None # print 'WARNING: SVM step failed: ', e self.clf = clf self.svmtransform = svmtransform self.rects = rects if len(self.rects) == 1: def get_rect_id(x): return 0 else: x = range(len(self.rects)) y = numpy.array([self.rects[i][2] for i in x]) minlogsize = y.min() y -= y.min() y = exp(y) totalsize = y.sum() y /= y.sum() y = [0] + y.cumsum().tolist() + [1] x = [x[0]] + list(x) + [x[-1]] get_rect_id = scipy.interpolate.interp1d(y, x, kind='zero') ntoaccept = 0 while True: # sample from rectangles, and through against SVM dice = numpy.random.random() i = int(get_rect_id(dice)) ulow, uhigh, logsize = self.rects[i] u = numpy.random.uniform(ulow, uhigh, size=ndim) if len(self.rects) != 1: # count in how many rectangles it is nrect = sum([exp(logsize - minlogsize) for ulow, uhigh, logsize in self.rects.values() if ((u >= ulow).all() and (u <= uhigh).all())]) # reject proportionally ~ 1. / nrect coin = numpy.random.uniform(0, 1) accept = coin < exp(logsize) / nrect if not accept: continue # if survives (classified to be in high region) # then evaluate if self.clf is not None: prob = self.clf.predict_proba(self.svmtransform(u))[0][0] #print 'svm evaluation:', u, prob # we allow 1 false positive classified if prob > 1 - 1. / len(previous) and ntoaccept % 100 != 95: continue x = priortransform(u) L = loglikelihood(x) ntoaccept += 1 if L >= Lmin: # yay, we win if ntoaccept > 5: print '%d samples before accept' % ntoaccept, u, x, L return u, x, L, ntoaccept
if __name__ == '__main__': import matplotlib.pyplot as plt #numpy.random.seed(0) points = numpy.random.uniform([0, 0], [1, 1], size=(1000, 2)) #print 'points', points def priortransform(u): return u def loglikelihood(x): vals = 10*exp(-0.5 * (((x - numpy.array([0.4, 0.7]))/0.2)**2).sum()) vals *= exp(-0.5 * ((((x[0] - 0.5)*2 - (x[1]*2)**3 + 0.01))/0.1)**2) vals += exp(-0.5 * (((x - numpy.array([0.2, 0.7]))/0.05)**2).sum()) return log(vals) if True: vals = exp(-0.5 * (((points - numpy.array([0.4, 0.7]))/0.2)**2).sum(axis=1)) vals *= exp(-0.5 * ((((points[:,0] - 0.5)*2 - (points[:,1]*2)**3 + 0.01))/0.1)**2) vals += exp(-0.5 * (((points - numpy.array([0.2, 0.7]))/0.05)**2).sum(axis=1)) #print 'vals', vals high = vals > 0.01 assert high.any() assert (-high).any() a = numpy.logical_and(points[:,0] < 0.4, high) b = numpy.logical_and(points[:,0] >= 0.4, high) plt.figure(figsize=(5,5)) plt.plot(points[:,0][a], points[:,1][a], 'x', color='b') plt.plot(points[:,0][b], points[:,1][b], 'x', color='m') plt.plot(points[:,0][-high], points[:,1][-high], '+', color='g') plt.savefig('svmtest.pdf', bbox_inches='tight') clf, transform = svm_classify(points, high + a) x = numpy.linspace(0, 1, 100) y = numpy.linspace(0, 1, 100) #grid = numpy.array([[xi, yi] for xi in x for yi in y]) grid = numpy.array([[[xi, yi] for xi in x] for yi in y]) #X, Y = numpy.meshgrid(x, y) dists = numpy.array([[clf.predict_proba(transform([[xi, yi]]))[0][0] for xi in x] for yi in y]) #print dists.shape #print grid.shape, dists.shape #print grid[:,0].shape, grid[:,1].shape, dists[:,0].shape #plt.contourf(grid[:,0], grid[:,1], dists[:,0], 5) plt.contourf(x, y, dists) plt.contour(x, y, dists, [0.99, 0.9, 0.1], colors=['red', 'red', 'red'], linestyles=['-', '--', ':']) plt.savefig('svmtest.pdf', bbox_inches='tight') points = numpy.random.uniform([0, 0], [1, 1], size=(1000, 2)) results = clf.predict(transform(points)) for (x, y), r in zip(points, results): #print x, y, r plt.plot(x, y, 'o', color='b' if r == 1 else 'g') plt.savefig('svmtest.pdf', bbox_inches='tight') plt.close() from simplenested import NestedSampler, nested_integrator constrainer = SVMConstrainer() print 'preparing sampler' sampler = NestedSampler(nlive_points = 200, ndim=2, priortransform=priortransform, loglikelihood=loglikelihood, draw_constrained = constrainer.draw_constrained) constrainer.sampler = sampler print 'running sampler' result = nested_integrator(tolerance=0.01, sampler=sampler) try: x = numpy.array([x for _, x, _ in sampler.samples]) y = numpy.exp([l for _, _, l in sampler.samples]) print x print y plt.figure() plt.hexbin(x[:,0], x[:,1], C=y, gridsize=40) plt.savefig('svmtest_nested_samples.pdf', bbox_inches='tight') plt.close() except Exception as e: print e try: weights = numpy.array(result['weights']) # L, width plt.figure() plt.plot(exp(weights[:,1]), exp(weights[:,0]), 'x-', color='blue', ms=1) #plt.plot(weights[:,0], weights[:,1], 'x-', color='blue') plt.xlabel('prior mass') plt.ylabel('likelihood') #plt.xlim(0, 1) plt.savefig('svmtest_nested_integral.pdf', bbox_inches='tight') plt.close() except Exception as e: print e #u = numpy.linspace(0, 1, 10000) #x = numpy.array([priortransform(ui) for ui in u]) #L = numpy.array([loglikelihood(xi) for xi in x]) #print 'monte carlo integration (%d samples) logZ:' % len(u), log(exp(L).mean()) print 'nested sampling (%d samples) logZ = ' % len(result['samples']), result['logZ'], result['logZerr']