Source code for nested_sampling.samplers.friends

import numpy
import scipy.spatial, scipy.cluster
import matplotlib.pyplot as plt
from nested_sampling.clustering import clusterdetect
from nested_sampling.clustering.neighbors import find_maxdistance, find_rdistance, initial_rdistance_guess, nearest_rdistance_guess

[docs]class FriendsConstrainer(object): """ Rejection sampling pre-filtering method based on neighborhood to live points. "Distant" means in this implementation that the distance to a cluster member is large. The maximum distance to a cluster is computed by considering each cluster member and its k nearest neighbors in turn, and computing the maximum distance. :param rebuild_every: After how many iterations should the clustering distance be re-computed? :param radial: if radial = True, then the normal euclidean distance is used. otherwise, the absolute coordinate difference in each dimension is used. :param metric: metric to use. Use 'chebyshev' for SupFriends, in which case then the supremum norm is used. Use 'euclidean' for RadFriends, via the euclidean norm. :param jackknife: if True, instead of leaving out a group of live points in the distance estimate, only one is left out in turn (jackknife resampling instead of bootstrap resampling). :param force_shrink: if True, the distance can only decrease between sampling steps. """ def __init__(self, rebuild_every = 50, radial = True, metric = 'euclidean', jackknife = False, force_shrink = False, hinter = None, verbose = False): self.maxima = [] self.iter = 0 self.region = None self.rebuild_every = rebuild_every self.radial = radial self.metric = metric self.file = None self.jackknife = jackknife self.force_shrink = False self.hinter = hinter self.verbose = verbose
[docs] def cluster(self, u, ndim, keepRadius=False): """ """ if self.verbose: print 'building region ...' if len(u) > 10: if keepRadius and self.region is not None and 'maxdistance' in self.region: maxdistance = self.region['maxdistance'] else: if self.radial: if self.jackknife: #maxdistance = initial_rdistance_guess(u, k=1, metric=self.metric) maxdistance = nearest_rdistance_guess(u, metric=self.metric) else: maxdistance = find_rdistance(u, nbootstraps=50, metric=self.metric, verbose=self.verbose) else: maxdistance = find_maxdistance(u) if self.force_shrink and self.region is not None and 'maxdistance' in self.region: maxdistance = min(maxdistance, self.region['maxdistance']) ulow = numpy.max([u.min(axis=0) - maxdistance, numpy.zeros(ndim)], axis=0) uhigh = numpy.min([u.max(axis=0) + maxdistance, numpy.ones(ndim)], axis=0) else: maxdistance = None ulow = numpy.zeros(ndim) uhigh = numpy.ones(ndim) if self.verbose: print 'setting sampling region:', (ulow, uhigh), maxdistance self.region = dict(members=u, maxdistance=maxdistance, ulow=ulow, uhigh=uhigh) self.generator = None
[docs] def is_inside(self, u): """ Check if this new point is near or inside one of our clusters """ ndim = len(u) ulow = self.region['ulow'] uhigh = self.region['uhigh'] if not ((ulow <= u).all() and (uhigh >= u).all()): # does not even lie in our primitive rectangle # do not even need to compute the distances return False members = self.region['members'] maxdistance = self.region['maxdistance'] # if not initialized: no prefiltering if maxdistance is None: return True # compute distance to each member in each dimension if self.radial: dists = scipy.spatial.distance.cdist(members, [u], metric=self.metric) assert dists.shape == (len(members), 1) dist_criterion = dists < maxdistance else: dists = numpy.abs(u - members) assert dists.shape == (len(members), ndim), (dists.shape, ndim, len(members)) # nearer than maxdistance in all dimensions dist_criterion = numpy.all(dists < maxdistance, axis=1) assert dist_criterion.shape == (len(members),), (dist_criterion.shape, len(members)) # is it true for at least one? closeby = dist_criterion.any() if closeby: return True return False
[docs] def are_inside_rect(self, u): """ Check if the new points are near or inside one of our clusters """ ulow = self.region['ulow'] uhigh = self.region['uhigh'] mask = numpy.logical_and(((ulow <= u).all(axis=1), (uhigh >= u).all(axis=1)))
def are_inside_cluster(self, u, ndim): members = self.region['members'] maxdistance = self.region['maxdistance'] # if not initialized: no prefiltering if maxdistance is None: return numpy.ones(len(u), dtype=bool) # compute distance to each member in each dimension if self.radial: dists = scipy.spatial.distance.cdist(members, u, metric=self.metric) assert dists.shape == (len(members), len(u)) dist_criterion = dists < maxdistance else: raise NotImplementedError() # is it true for at least one? closeby = dist_criterion.any(axis=0) return closeby def generate(self, ndim): it = True verbose = False and self.verbose ntotal = 0 # largest maxdistance where generating from full space makes sense full_maxdistance = 0.5 * (0.01)**(1./ndim) while True: maxdistance = self.region['maxdistance'] if maxdistance is None: # do a prefiltering rejection sampling first u = numpy.random.uniform(self.region['ulow'], self.region['uhigh'], size=ndim) yield u, ntotal ntotal = 0 continue members = self.region['members'] it = numpy.random.uniform() < 0.01 # depending on the region size compared to # the total space, one of the two methods will # be more efficient if it or not self.radial or maxdistance > full_maxdistance: it = True # for large regions # do a prefiltering rejection sampling first us = numpy.random.uniform(self.region['ulow'], self.region['uhigh'], size=(100, ndim)) ntotal += 100 mask = self.are_inside_cluster(us, ndim) if not mask.any(): continue us = us[mask] #indices = numpy.arange(len(mask))[mask] #for i in indices: # u = us[indices[i],:] for u in us: yield u, ntotal ntotal = 0 else: # for small regions # draw from points us = members[numpy.random.randint(0, len(members), 100),:] ntotal += 100 if verbose: print 'chosen point', us if self.metric == 'euclidean': # draw direction around it direction = numpy.random.normal(0, 1, size=(100, ndim)) direction = direction / ((direction**2).sum(axis=1)**0.5).reshape((-1,1)) if verbose: print 'chosen direction', direction # choose radius: volume gets larger towards the outside # so give the correct weight with dimensionality radius = maxdistance * numpy.random.uniform(0, 1, size=(100,1))**(1./ndim) us = us + direction * radius else: assert self.metric == 'chebyshev' us = us + numpy.random.uniform(-maxdistance, maxdistance, size=(100, ndim)) if verbose: print 'using point', u inside = numpy.logical_and((us >= 0).all(axis=1), (us <= 1).all(axis=1)) if not inside.any(): if verbose: print 'outside boundaries', us, direction, maxdistance continue us = us[inside] # count the number of points this is close to dists = scipy.spatial.distance.cdist(members, us, metric=self.metric) assert dists.shape == (len(members), len(us)) nnear = (dists < maxdistance).sum(axis=0) if verbose: print 'near', nnear #ntotal += 1 # accept with probability 1./nnear coin = numpy.random.uniform(size=len(us)) accept = coin < 1. / nnear if not accept.any(): if verbose: print 'probabilistic rejection due to overlaps' continue us = us[accept] for u in us: yield u, ntotal ntotal = 0 def debug_plot(Lmin, priortransform, loglikelihood, previous, ndim): # plot sampling region # and evaluate likelihood on the borders, verifying # that it is below Lmin # generate points on the surface of all points # rank them based on ?? pass def rebuild(self, previous, ndim, Lmin, keepRadius=False): previousL = numpy.array([L for _, _, L in previous]) previousu = numpy.array([u for u, _, _ in previous]) assert previousu.shape[1] == ndim, previousu.shape high = previousL > Lmin u = previousu[high] L = previousL[high] self.cluster(u=u, ndim=ndim, keepRadius=keepRadius) # reset generator self.generator = self.generate(ndim=ndim) def debug(self, ndim): if self.file is None: #self.file = open("friends_debug.txt", "a") import tempfile filename = tempfile.mktemp(dir='', prefix='friends%s-%s_' % ( '1' if self.jackknife else '', self.metric)) self.file = open(filename, 'w') self.file.write("{} {} {}\n".format(self.iter, self.region['maxdistance'], len(self.region['members']))) self.file.write("{} {} {} {}\n".format(self.iter, self.region['maxdistance'], len(self.region['members']), ndim)) def debugplot(self, u = None): print 'creating plot...' n = len(self.region['members'][0]) / 2 plt.figure(figsize=(6, n/2*4+1)) m = self.region['members'] d = self.region['maxdistance'] for i in range(n): plt.subplot(numpy.ceil(n / 2.), 2, 1+i) j = i * 2 k = i * 2 + 1 plt.plot(m[:,j], m[:,k], 'x', color='b', ms=1) plt.gca().add_artist(plt.Circle((m[0,j], m[0,k]), d, color='g', alpha=0.3)) if u is not None: plt.plot(u[j], u[k], 's', color='r') plt.gca().add_artist(plt.Circle((u[j], u[k]), d, color='r', alpha=0.3)) prefix='friends%s-%s_' % ('1' if self.jackknife else '', self.metric) plt.savefig(prefix + 'cluster.pdf') plt.close() print 'creating plot... done' def draw_constrained(self, Lmin, priortransform, loglikelihood, previous, ndim, **kwargs): # previous is [[u, x, L], ...] self.iter += 1 rebuild = self.iter % self.rebuild_every == 1 if rebuild or self.region is None: self.rebuild(previous, ndim, Lmin, keepRadius=False) if self.generator is None: self.generator = self.generate(ndim=ndim) #self.debug_plot(Lmin, priortransform, loglikelihood, previous, ndim) #self.debug(ndim) ntoaccept = 0 ntotalsum = 0 while True: for u, ntotal in self.generator: assert (u >= 0).all() and (u <= 1).all(), u ntotalsum += ntotal if self.hinter is not None: hints = self.hinter(u) if len(hints) == 0: # no way continue if len(hints) > 1: # choose a random solution, by size raise NotImplementedError("multiple solutions not implemented") hints = hints[numpy.random.randInt(len(hints))] else: hints = hints[0] for i, lo, hi in hints: u[i] = numpy.random.uniform(lo, hi) if not is_inside(u): # not sure if this is a good idea # it means we dont completely trust # the hinting function continue x = priortransform(u) L = loglikelihood(x) ntoaccept += 1 if L > Lmin: # yay, we win if ntotalsum > 10000: if self.verbose: print 'sampled %d points, evaluated %d ' % (ntotalsum, ntoaccept) #self.debugplot(u) return u, x, L, ntoaccept # if running very inefficient, optimize clustering # if we haven't done so at the start if not rebuild and ntoaccept > 1000: #self.debugplot(u) break rebuild = True self.rebuild(previous, ndim, Lmin, keepRadius=False)
if __name__ == '__main__': friends = FriendsConstrainer(radial = True) u = numpy.random.uniform(0.45, 0.55, size=1000).reshape((-1, 2)) ndim = 2 friends.cluster(u, ndim=ndim) Lmin = -1 rv = scipy.stats.norm(0.515, 0.03) def priortransform(x): return x def loglikelihood(x): return rv.logpdf(x).sum() previous = [] colors = ['r', 'g', 'orange'] plt.figure("dists", figsize=(7,4)) plt.figure("plane", figsize=(5,5)) plt.plot(u[:,0], u[:,1], 'x') Lmins = [-5, 2, 2.5] #, 2.58] for j, (Lmin, color) in enumerate(zip(numpy.array(Lmins)*ndim, colors)): values = [] for i in range(200): friends.iter = 4 # avoid rebuild u, x, L, ntoaccept = friends.draw_constrained(Lmin, priortransform, loglikelihood, previous, ndim) plt.figure("plane") plt.plot(u[0], u[1], '+', color=color) values.append(u) values = numpy.array(values) plt.figure("dists") for k in range(ndim): plt.subplot(1, ndim, k + 1) plt.title('Lmin={}, dim={}'.format(Lmin, k)) plt.hist(values[:,k], cumulative=True, normed=True, color=color, bins=1000, histtype='step') plt.figure("plane") plt.savefig('friends_sampling_test.pdf', bbox_inches='tight') plt.close() plt.figure("dists") plt.savefig('friends_sampling_test_dists.pdf', bbox_inches='tight') plt.close() # another test: given a group of samples, assert that only neighbors are evaluated r = numpy.random.uniform(0.2, 0.25, size=400) phi = numpy.random.uniform(0, 1, size=400)**10 * 2*numpy.pi u = numpy.transpose([0.5 + r*numpy.cos(phi), 0.5 + r*numpy.sin(phi)]) friends.cluster(u, ndim=2) plt.figure(figsize=(10,5)) plt.subplot(1, 2, 1) plt.plot(u[:,0], u[:,1], 'x') suggested = [] def loglikelihood(x): r = ((x[0] - 0.5)**2 + (x[1] - 0.5)**2)**0.5 #assert r < 0.5 #assert r > 0.1 suggested.append(r) if r > 0.2 and r < 0.25: plt.plot(x[0], x[1], 'o', color='green') return 100 plt.plot(x[0], x[1], 'o', color='red') return -100 ndim = 2 taken = [] for i in range(100): friends.iter = 4 # avoid rebuild u, x, L, ntoaccept = friends.draw_constrained(Lmin, priortransform, loglikelihood, previous, ndim) r = ((x[0] - 0.5)**2 + (x[1] - 0.5)**2)**0.5 taken.append(r) print 'suggested:', u plt.subplot(1, 2, 2) plt.hist(taken, cumulative=True, normed=True, color='g', bins=1000, histtype='step') plt.hist(suggested, cumulative=True, normed=True, color='r', bins=1000, histtype='step') #x = numpy.linspace(0, 1, 400) #y = x**ndim - (x - min(suggested) / max(suggested))**ndim #y /= max(y) #plt.plot(x * (max(suggested) - min(suggested)) + min(suggested), y, '--', color='grey') plt.savefig('friends_sampling_test_sampling.pdf', bbox_inches='tight') plt.close()