Source code for nested_sampling.samplers.friends

import numpy
import scipy.spatial, scipy.cluster
import matplotlib.pyplot as plt
from nested_sampling.clustering import clusterdetect
from nested_sampling.clustering.neighbors import find_maxdistance, find_rdistance, initial_rdistance_guess, nearest_rdistance_guess

[docs]class FriendsConstrainer(object):
	"""
	Rejection sampling pre-filtering method based on neighborhood to live points.

	"Distant" means in this implementation that the distance to a cluster member
	is large. 
	The maximum distance to a cluster is computed by considering each
	cluster member and its k nearest neighbors in turn, and 
	computing the maximum distance.
	
	:param rebuild_every: After how many iterations should the clustering
		distance be re-computed?
	    
	:param radial:	
		if radial = True, then the normal euclidean distance is used.
		otherwise, the absolute coordinate difference in each dimension is used.
	
	:param metric:
		metric to use. Use 'chebyshev' for SupFriends, in which case then 
		the supremum norm is used. Use 'euclidean' for RadFriends, via 
		the euclidean norm.
	
	:param jackknife:
		if True, instead of leaving out a group of live points in
		the distance estimate, only one is left out in turn (jackknife resampling
		instead of bootstrap resampling).
	
	:param force_shrink:
		if True, the distance can only decrease between sampling steps.
	
	"""
	def __init__(self, rebuild_every = 50, radial = True, metric = 'euclidean', jackknife = False,
			force_shrink = False,
			hinter = None, verbose = False):
		self.maxima = []
		self.iter = 0
		self.region = None
		self.rebuild_every = rebuild_every
		self.radial = radial
		self.metric = metric
		self.file = None
		self.jackknife = jackknife
		self.force_shrink = False
		self.hinter = hinter
		self.verbose = verbose
	
[docs]	def cluster(self, u, ndim, keepRadius=False):
		"""
		
		"""
		if self.verbose: print 'building region ...'
		if len(u) > 10:
			if keepRadius and self.region is not None and 'maxdistance' in self.region:
				maxdistance = self.region['maxdistance']
			else:
				if self.radial:
					if self.jackknife:
						#maxdistance = initial_rdistance_guess(u, k=1, metric=self.metric)
						maxdistance = nearest_rdistance_guess(u, metric=self.metric)
					else:
						maxdistance = find_rdistance(u, nbootstraps=50, metric=self.metric, verbose=self.verbose)
				else:
					maxdistance = find_maxdistance(u)
			if self.force_shrink and self.region is not None and 'maxdistance' in self.region:
				maxdistance = min(maxdistance, self.region['maxdistance'])
			ulow  = numpy.max([u.min(axis=0) - maxdistance, numpy.zeros(ndim)], axis=0)
			uhigh = numpy.min([u.max(axis=0) + maxdistance, numpy.ones(ndim)], axis=0)
		else:
			maxdistance = None
			ulow = numpy.zeros(ndim)
			uhigh = numpy.ones(ndim)
		if self.verbose: print 'setting sampling region:', (ulow, uhigh), maxdistance
		self.region = dict(members=u, maxdistance=maxdistance, ulow=ulow, uhigh=uhigh)
		self.generator = None
		
[docs]	def is_inside(self, u):
		"""
		Check if this new point is near or inside one of our clusters
		"""
		ndim = len(u)
		ulow = self.region['ulow']
		uhigh = self.region['uhigh']
		if not ((ulow <= u).all() and (uhigh >= u).all()):
			# does not even lie in our primitive rectangle
			# do not even need to compute the distances
			return False
		
		members = self.region['members']
		maxdistance = self.region['maxdistance']
		
		# if not initialized: no prefiltering
		if maxdistance is None:
			return True
		
		# compute distance to each member in each dimension
		if self.radial:
			dists = scipy.spatial.distance.cdist(members, [u], metric=self.metric)
			assert dists.shape == (len(members), 1)
			dist_criterion = dists < maxdistance
		else:
			dists = numpy.abs(u - members)
			assert dists.shape == (len(members), ndim), (dists.shape, ndim, len(members))
			# nearer than maxdistance in all dimensions
			dist_criterion = numpy.all(dists < maxdistance, axis=1)
			assert dist_criterion.shape == (len(members),), (dist_criterion.shape, len(members))
		# is it true for at least one?
		closeby = dist_criterion.any()
		if closeby:
			return True
		return False
	
[docs]	def are_inside_rect(self, u):
		"""
		Check if the new points are near or inside one of our clusters
		"""
		ulow = self.region['ulow']
		uhigh = self.region['uhigh']
		mask = numpy.logical_and(((ulow <= u).all(axis=1), (uhigh >= u).all(axis=1)))
	def are_inside_cluster(self, u, ndim):
		members = self.region['members']
		maxdistance = self.region['maxdistance']
		
		# if not initialized: no prefiltering
		if maxdistance is None:
			return numpy.ones(len(u), dtype=bool)
		
		# compute distance to each member in each dimension
		if self.radial:
			dists = scipy.spatial.distance.cdist(members, u, metric=self.metric)
			assert dists.shape == (len(members), len(u))
			dist_criterion = dists < maxdistance
		else:
			raise NotImplementedError()
		# is it true for at least one?
		closeby = dist_criterion.any(axis=0)
		return closeby
	def generate(self, ndim):
		it = True
		verbose = False and self.verbose
		ntotal = 0
		# largest maxdistance where generating from full space makes sense
		full_maxdistance = 0.5 * (0.01)**(1./ndim)
		while True:
			maxdistance = self.region['maxdistance']
			if maxdistance is None:
				# do a prefiltering rejection sampling first
				u = numpy.random.uniform(self.region['ulow'], self.region['uhigh'], size=ndim)
				yield u, ntotal
				ntotal = 0
				continue
			members = self.region['members']
			it = numpy.random.uniform() < 0.01
			# depending on the region size compared to 
			# the total space, one of the two methods will
			# be more efficient
			if it or not self.radial or maxdistance > full_maxdistance:
				it = True
				# for large regions
				# do a prefiltering rejection sampling first
				us = numpy.random.uniform(self.region['ulow'], self.region['uhigh'], size=(100, ndim))
				ntotal += 100
				mask = self.are_inside_cluster(us, ndim)
				if not mask.any():
					continue
				us = us[mask]
				#indices = numpy.arange(len(mask))[mask]
				#for i in indices:
				#	u = us[indices[i],:]
				for u in us:
					yield u, ntotal
					ntotal = 0
			else:
				# for small regions
				# draw from points
				us = members[numpy.random.randint(0, len(members), 100),:]
				ntotal += 100
				if verbose: print 'chosen point', us
				if self.metric == 'euclidean':
					# draw direction around it
					direction = numpy.random.normal(0, 1, size=(100, ndim))
					direction = direction / ((direction**2).sum(axis=1)**0.5).reshape((-1,1))
					if verbose: print 'chosen direction', direction
					# choose radius: volume gets larger towards the outside
					# so give the correct weight with dimensionality
					radius = maxdistance * numpy.random.uniform(0, 1, size=(100,1))**(1./ndim)
					us = us + direction * radius
				else:
					assert self.metric == 'chebyshev'
					us = us + numpy.random.uniform(-maxdistance, maxdistance, size=(100, ndim))
				if verbose: print 'using point', u
				inside = numpy.logical_and((us >= 0).all(axis=1), (us <= 1).all(axis=1))
				if not inside.any():
					if verbose: print 'outside boundaries', us, direction, maxdistance
					continue
				us = us[inside]
				# count the number of points this is close to
				dists = scipy.spatial.distance.cdist(members, us, metric=self.metric)
				assert dists.shape == (len(members), len(us))
				nnear = (dists < maxdistance).sum(axis=0)
				if verbose: print 'near', nnear
				#ntotal += 1
				# accept with probability 1./nnear
				coin = numpy.random.uniform(size=len(us))
				
				accept = coin < 1. / nnear
				if not accept.any():
					if verbose: print 'probabilistic rejection due to overlaps'
					continue
				us = us[accept]
				for u in us:
					yield u, ntotal
					ntotal = 0
			
	def debug_plot(Lmin, priortransform, loglikelihood, previous, ndim):
		# plot sampling region
		# and evaluate likelihood on the borders, verifying
		# that it is below Lmin
		# generate points on the surface of all points
		# rank them based on ??
		pass
	
	def rebuild(self, previous, ndim, Lmin, keepRadius=False):
		previousL = numpy.array([L for _, _, L in previous])
		previousu = numpy.array([u for u, _, _ in previous])
		assert previousu.shape[1] == ndim, previousu.shape
		high = previousL > Lmin
		u = previousu[high]
		L = previousL[high]
		self.cluster(u=u, ndim=ndim, keepRadius=keepRadius)
		# reset generator
		self.generator = self.generate(ndim=ndim)
	def debug(self, ndim):
		if self.file is None:
			#self.file = open("friends_debug.txt", "a")
			import tempfile
			filename = tempfile.mktemp(dir='',
				prefix='friends%s-%s_' % (
				'1' if self.jackknife else '',
				self.metric))
			self.file = open(filename, 'w')
			self.file.write("{} {} {}\n".format(self.iter, self.region['maxdistance'], len(self.region['members'])))
		self.file.write("{} {} {} {}\n".format(self.iter, self.region['maxdistance'], len(self.region['members']), ndim))
	def debugplot(self, u = None):
		print 'creating plot...'
		n = len(self.region['members'][0]) / 2
		plt.figure(figsize=(6, n/2*4+1))
		m = self.region['members']
		d = self.region['maxdistance']
		for i in range(n):
			plt.subplot(numpy.ceil(n / 2.), 2, 1+i)
			j = i * 2
			k = i * 2 + 1
			plt.plot(m[:,j], m[:,k], 'x', color='b', ms=1)
			plt.gca().add_artist(plt.Circle((m[0,j], m[0,k]), d, color='g', alpha=0.3))
			if u is not None:
				plt.plot(u[j], u[k], 's', color='r')
				plt.gca().add_artist(plt.Circle((u[j], u[k]), d, color='r', alpha=0.3))
		prefix='friends%s-%s_' % ('1' if self.jackknife else '', self.metric)
		plt.savefig(prefix + 'cluster.pdf')
		plt.close()
		print 'creating plot... done'
	
	def draw_constrained(self, Lmin, priortransform, loglikelihood, previous, ndim, **kwargs):
		# previous is [[u, x, L], ...]
		self.iter += 1
		rebuild = self.iter % self.rebuild_every == 1
		if rebuild or self.region is None:
			self.rebuild(previous, ndim, Lmin, keepRadius=False)
		if self.generator is None:
			self.generator = self.generate(ndim=ndim)
			#self.debug_plot(Lmin, priortransform, loglikelihood, previous, ndim)
		#self.debug(ndim)
		ntoaccept = 0
		ntotalsum = 0
		while True:
			for u, ntotal in self.generator:
				assert (u >= 0).all() and (u <= 1).all(), u
				ntotalsum += ntotal
				
				if self.hinter is not None:
					hints = self.hinter(u)
					if len(hints) == 0:
						# no way
						continue
					if len(hints) > 1:
						# choose a random solution, by size
						raise NotImplementedError("multiple solutions not implemented")
						hints = hints[numpy.random.randInt(len(hints))]
					else:
						hints = hints[0]
				
					for i, lo, hi in hints:
						u[i] = numpy.random.uniform(lo, hi)
					if not is_inside(u):
						# not sure if this is a good idea
						# it means we dont completely trust
						# the hinting function
						continue
				
				x = priortransform(u)
				L = loglikelihood(x)
				ntoaccept += 1
				
				if L > Lmin:
					# yay, we win
					if ntotalsum > 10000: 
						if self.verbose: 
							print 'sampled %d points, evaluated %d ' % (ntotalsum, ntoaccept)
							#self.debugplot(u)
					return u, x, L, ntoaccept
				
				# if running very inefficient, optimize clustering 
				#     if we haven't done so at the start
				if not rebuild and ntoaccept > 1000:
					#self.debugplot(u)
					break
			rebuild = True
			self.rebuild(previous, ndim, Lmin, keepRadius=False)

if __name__ == '__main__':
	friends = FriendsConstrainer(radial = True)
	
	u = numpy.random.uniform(0.45, 0.55, size=1000).reshape((-1, 2))
	ndim = 2
	friends.cluster(u, ndim=ndim)
	Lmin = -1
	rv = scipy.stats.norm(0.515, 0.03)
	def priortransform(x): return x
	def loglikelihood(x): return rv.logpdf(x).sum()
	previous = []
	colors = ['r', 'g', 'orange']
	plt.figure("dists", figsize=(7,4))
	plt.figure("plane", figsize=(5,5))
	plt.plot(u[:,0], u[:,1], 'x')
	Lmins = [-5, 2, 2.5] #, 2.58]
	for j, (Lmin, color) in enumerate(zip(numpy.array(Lmins)*ndim, colors)):
		values = []
		for i in range(200):
			friends.iter = 4 # avoid rebuild
			u, x, L, ntoaccept = friends.draw_constrained(Lmin, priortransform, loglikelihood, previous, ndim)
			plt.figure("plane")
			plt.plot(u[0], u[1], '+', color=color)
			values.append(u)
		values = numpy.array(values)
		plt.figure("dists")
		for k in range(ndim):
			plt.subplot(1, ndim, k + 1)
			plt.title('Lmin={}, dim={}'.format(Lmin, k))
			plt.hist(values[:,k], cumulative=True, normed=True, 
				color=color, bins=1000, histtype='step')
	plt.figure("plane")
	plt.savefig('friends_sampling_test.pdf', bbox_inches='tight')
	plt.close()
	plt.figure("dists")
	plt.savefig('friends_sampling_test_dists.pdf', bbox_inches='tight')
	plt.close()
	
	# another test: given a group of samples, assert that only neighbors are evaluated
	
	r = numpy.random.uniform(0.2, 0.25, size=400)
	phi = numpy.random.uniform(0, 1, size=400)**10 * 2*numpy.pi
	u = numpy.transpose([0.5 + r*numpy.cos(phi), 0.5 + r*numpy.sin(phi)])
	friends.cluster(u, ndim=2)
	plt.figure(figsize=(10,5))
	plt.subplot(1, 2, 1)
	plt.plot(u[:,0], u[:,1], 'x')
	suggested = []
	def loglikelihood(x):
		r = ((x[0] - 0.5)**2 + (x[1] - 0.5)**2)**0.5
		#assert r < 0.5
		#assert r > 0.1
		suggested.append(r)
		if r > 0.2 and r < 0.25:
			plt.plot(x[0], x[1], 'o', color='green')
			return 100
		plt.plot(x[0], x[1], 'o', color='red')
		return -100
	
	ndim = 2
	taken = []
	for i in range(100):
		friends.iter = 4 # avoid rebuild
		u, x, L, ntoaccept = friends.draw_constrained(Lmin, priortransform, loglikelihood, previous, ndim)
		r = ((x[0] - 0.5)**2 + (x[1] - 0.5)**2)**0.5
		taken.append(r)
		print 'suggested:', u
	plt.subplot(1, 2, 2)
	plt.hist(taken, cumulative=True, normed=True, 
			color='g', bins=1000, histtype='step')
	plt.hist(suggested, cumulative=True, normed=True, 
			color='r', bins=1000, histtype='step')
	#x = numpy.linspace(0, 1, 400)
	#y = x**ndim - (x - min(suggested) / max(suggested))**ndim
	#y /= max(y)
	#plt.plot(x * (max(suggested) - min(suggested)) + min(suggested), y, '--', color='grey')
	
	plt.savefig('friends_sampling_test_sampling.pdf', bbox_inches='tight')
	plt.close()
Navigation

Source code for nested_sampling.samplers.friends

Quick search

Navigation