Source code for nested_sampling.samplers.galilean

import scipy, scipy.stats
import numpy
from numpy import exp, log, log10, pi, cos, sin, dot, vdot
import numpy
from numpy.linalg import norm
from matplotlib import pyplot as plt
from nested_sampling.clustering.neighbors import find_maxdistance, find_rdistance, initial_rdistance_guess, nearest_rdistance_guess
import operator
getfirst = operator.itemgetter(0)

def random_unit_vector(ndim):
	v = numpy.random.normal(0, 1, size=ndim)
	v /= scipy.linalg.norm(v)
	return v

[docs]class HybridRadFriendsConstrainer(object): """ Base class to use RadFriends (with enforced shrinking, JackKnife resampling) in combination with local step sampling methods. """ def __init__(self, nsteps = 20, plot = False): self.sampler = None self.phase = 0 self.ndirect = 0 self.ndirect_accepts = 0 self.nfriends = 0 self.nfriends_accepts = 0 self.nsteps = nsteps self.region = None self.plot = plot
[docs] def is_inside(self, u): """ Check if this new point is near or inside one of our clusters """ ndim = len(u) ulow = self.region['ulow'] uhigh = self.region['uhigh'] if not ((ulow <= u).all() and (uhigh >= u).all()): # does not even lie in our primitive rectangle # do not even need to compute the distances return False members = self.region['members'] maxdistance = self.region['maxdistance'] # if not initialized: no prefiltering if maxdistance is None: return True # compute distance to each member in each dimension dists = scipy.spatial.distance.cdist(members, [u], metric='euclidean') dist_criterion = dists < maxdistance # is it true for at least one? closeby = dist_criterion.any() return closeby
[docs] def are_inside_rect(self, u): """ Check if the new points are near or inside one of our clusters """ ulow = self.region['ulow'] uhigh = self.region['uhigh'] mask = numpy.logical_and(((ulow <= u).all(axis=1), (uhigh >= u).all(axis=1)))
def are_inside_cluster(self, us, ndim): members = self.region['members'] maxdistance = self.region['maxdistance'] dists = scipy.spatial.distance.cdist(members, us, metric='euclidean') dist_criterion = dists < maxdistance # is it true for at least one? closeby = dist_criterion.any(axis=0) return closeby def adapt(self): pass def local_step_sample(self, i, u1, x1, L1, Lmin, priortransform, loglikelihood, u): assert L1 >= Lmin maxdistance = self.region['maxdistance'] k = 0 # draw new random velocity if hasattr(self, 'velocities'): self.velocities[i] = random_unit_vector(ndim) if self.plot > 0: plt.figure(i) plt.gca().add_artist(plt.Circle((u1[0], u1[1]), maxdistance, color='grey')) plt.plot(u[:,0], u[:,1], 'o', color='grey') x = [u1[0]] y = [u1[1]] for j in range(self.nsteps): u2, x2, L2, k2 = self.step(i, u1, x1, L1, Lmin, priortransform, loglikelihood) k += k2 u1, x1, L1 = u2, x2, L2 if self.plot > 0: x.append(u1[0]) y.append(u1[1]) if self.plot > 0: plt.plot(x, y, 'x-', color='g') plt.gca().set_aspect(aspect='equal', adjustable='datalim') if i < 15: plt.savefig('galilean_%03d.pdf' % i) plt.close() return u1, x1, L1, k def generate_direct(self, ndim): # draw directly from prior ntotal = 0 maxdistance = self.region['maxdistance'] assert maxdistance is not None members = self.region['members'] while True: us = numpy.random.uniform(self.region['ulow'], self.region['uhigh'], size=(100, ndim)) ntotal += 100 self.ndirect += 100 mask = self.are_inside_cluster(us, ndim) self.ndirect_accepts += mask.sum() if not mask.any(): continue us = us[mask] for u in us: yield u, ntotal ntotal = 0 def generate_from_friends(self, ndim): # for small regions draw from points ntotal = 0 maxdistance = self.region['maxdistance'] assert maxdistance is not None members = self.region['members'] N = 400 while True: # choose random friend us = members[numpy.random.randint(0, len(members), N),:] ntotal += 100 # draw direction around it direction = numpy.random.normal(0, 1, size=(N, ndim)) direction = direction / ((direction**2).sum(axis=1)**0.5).reshape((-1,1)) # choose radius: volume gets larger towards the outside # so give the correct weight with dimensionality radius = maxdistance * numpy.random.uniform(0, 1, size=(N,1))**(1./ndim) us = us + direction * radius inside = numpy.logical_and((us >= 0).all(axis=1), (us <= 1).all(axis=1)) if not inside.any(): continue us = us[inside] # count the number of points this is close to dists = scipy.spatial.distance.cdist(members, us, metric='euclidean') nnear = (dists < maxdistance).sum(axis=0) # accept with probability 1./nnear coin = numpy.random.uniform(size=len(us)) accept = coin < 1. / nnear us = us[accept] for u in us: yield u, ntotal ntotal = 0 def draw_constrained(self, Lmin, priortransform, loglikelihood, ndim, previous, startu, startx, startL, starti, **kwargs): # compute RadFriends spheres u = numpy.array([u for u, _, L in previous if L >= Lmin]) rebuilt = False if self.region is None or len(previous) % 50 == 0: # jackknife, is fast maxdistance = nearest_rdistance_guess(u, metric='euclidean') #maxdistance = find_rdistance(u, nbootstraps=50, metric='euclidean', verbose=False) # make sure we only shrink if self.region is not None and 'maxdistance' in self.region: maxdistance = min(maxdistance, self.region['maxdistance']) #print 'new distance:', maxdistance # compute enclosing rectangle for quick checks ulow = numpy.max([u.min(axis=0) - maxdistance, numpy.zeros(ndim)], axis=0) uhigh = numpy.min([u.max(axis=0) + maxdistance, numpy.ones(ndim)], axis=0) self.region = dict(members=u, maxdistance=maxdistance, ulow=ulow, uhigh=uhigh) self.direct_generator = self.generate_direct(ndim=ndim) self.friends_generator = self.generate_from_friends(ndim=ndim) rebuilt = True ntoaccept = 0 if self.phase == 0: # draw from rectangle until sphere rejection drops below 1% for u, ntotal in self.direct_generator: x = priortransform(u) L = loglikelihood(x) self.nfriends += 1 ntoaccept += 1 if L >= Lmin: if ntotal >= 200: print 'Drawing from prior is becoming inefficient: %d draws before accept' % ntotal print if ntoaccept >= 200: print 'RadFriends is becoming inefficient: %d draws until accept' % ntoaccept print self.nfriends_accepts += 1 return u, x, L, ntoaccept if ntotal >= 1000: # drawing directly from prior # becomes inefficient as we go to # small region # switch to drawing from Friends print 'switching to Friends sampling phase' print self.phase = 1 break if ntoaccept >= 2000: print 'switching to local steps sampling phase' print self.phase = 2 # drawing using RadFriends can become # inefficient in high dimensionality # switch to local step sampling break if self.phase == 1: # draw from spheres until acceptance rate drops below 0.05% for u, ntotal in self.friends_generator: x = priortransform(u) L = loglikelihood(x) self.nfriends += 1 ntoaccept += 1 if L >= Lmin: if ntoaccept >= 200: print 'RadFriends is becoming inefficient: %d draws until accept' % ntoaccept print self.nfriends_accepts += 1 return u, x, L, ntoaccept if ntoaccept >= 2000: print 'switching to local steps sampling phase' print self.phase = 2 # drawing using RadFriends can become # inefficient in high dimensionality # switch to local step sampling break #print 'falling through...' # then do local step sampling i = starti # particle ID u1, x1, L1, k = self.local_step_sample(i, startu, startx, startL, Lmin, priortransform, loglikelihood, u) #self.stats() self.adapt() return u1, x1, L1, k + ntoaccept def stats(self): pass
[docs]class GalileanRadFriendsConstrainer(HybridRadFriendsConstrainer): """ Galilean Nested Sampling is a special case of Nested Sampling using Hamiltonian Monte Carlo. Here we use RadFriends to determine the reflection surfaces. """ def __init__(self, nlive_points, ndim, velocity_scale = 0.5, nsteps = 20, plot = False): HybridRadFriendsConstrainer.__init__(self, nsteps = nsteps, plot = plot) self.velocity_scale = velocity_scale self.velocities = [] # random direction initialisation for i in range(nlive_points): self.velocities.append(random_unit_vector(ndim)) self.velocities = numpy.array(self.velocities) self.nproceed = 0 self.nreflect = 0 self.nreverse = 0 self.nproceed_total = 0 self.nreflect_total = 0 self.nreverse_total = 0
[docs] def reflect(self, u, v): """ Find reflection surface for line u + t*v, t>0 """ # Find set of RadFriend spheres that lie on the line # compute line-sphere intersection with every point members = self.region['members'] r = self.region['maxdistance'] intervals = [] for c in members: uc = u - c v2 = dot(v, v) uc2 = dot(uc, uc) r2 = dot(r, r) vuc = dot(v, uc) root = vuc**2 - v2 * (uc2 - r2) if root < 0: continue if self.plot > 3: plt.gca().add_artist(plt.Circle((c[0], c[1]), r, color='r', alpha=0.1)) t1 = (-vuc - root**0.5) / v2 t2 = (-vuc + root**0.5) / v2 #if True: # start = v * t1 + u # end = v * t2 + u # print 'enclosing:', start, end # plt.plot([start[0], end[0]], [start[1], end[1]], '-', color='orange', lw=2) if t1 <= t2: intervals.append((t1, t2, c)) else: intervals.append((t2, t1, c)) # with the list of intervals, compute the overlapping interval # from the start point to find the last sphere # sort intervals by starting value intervals.sort(key=getfirst) # start and end of interval A, B, C = (0, 0, c) for t1, t2, c in intervals: if t2 < A: continue # wrong direction if B < t1: break # end of continuous interval. if B < t2: # extend interval B = t2 C = c assert C is not None #print 'line interval:', A, B # now we found the final sphere, C # return the normalisation vector, which is between # the intersection point and the center of the sphere p = B * v + u if self.plot > 2: plt.gca().add_artist(plt.Circle((C[0], C[1]), r, color='b', alpha=0.2)) plt.plot(p[0], p[1], '+', color='b') #if not numpy.allclose(C, p): n = C - p # normalise n = n / norm(n) v2 = v - 2 * n * dot(n, v) #else: # # we are starting at the exact center of the sphere, # # and reflecting off the sphere # # so we just go backwards, as the sphere is perpendicular # # everywhere. # print 'center of sphere reflection' # v2 = -v if self.plot > 2: plt.plot([p[0], (v2+p)[0]], [p[1], (p+v2)[1]], '-', color='b', lw=3, alpha=0.2) return u + v, v2 ## if the reflected point is further outside than the ## original point u + v, use u + v. # not allowed by detailled balance! #if B > 1: # return u + v, v2 #else: # return p, v2
def step(self, i, u1, x1, L1, Lmin, priortransform, loglikelihood): # guess for stepsize dice = numpy.random.uniform(0, 1) timestep = 0.1 if dice < 0.3 else 1 scale = self.region['maxdistance'] * self.velocity_scale * timestep #print 'stepsize:', stepsize, self.velocity_scale, maxdistance v = self.velocities[i] * scale # Start with (ui, v) where L(x1) is OK # go along u2 = u1 + v k = 0 # check if inside inside_superset = self.is_inside(u2) inside = False if inside_superset: x2 = priortransform(u2) L2 = loglikelihood(x2) k = k + 1 inside = L2 >= Lmin if inside: # if L2 is ok, proceed with u2, v self.nproceed += 1 #print u1, v, ' -- proceed -->', u2, v return u2, x2, L2, k elif self.plot > 1: plt.plot(u2[0], u2[1], 's', color='r') # not inside. # we need to reflect. # find reflection surface ureflect, vreflect = self.reflect(u1, v) # go there from the outside point # here we use a point a bit closer in, the boundary point u3 = ureflect + vreflect inside_superset = self.is_inside(u3) inside = False if inside_superset: x3 = priortransform(u3) L3 = loglikelihood(x3) k = k + 1 inside = L3 >= Lmin if inside: # update velocity, with mild randomisation #small_angle = pi / 180 / 10 #r = numpy.random.normal(size=len(vreflect)) #r /= norm(r) #vreflect = vreflect * cos(small_angle) + r * scale * sin(small_angle) self.velocities[i] = vreflect / scale self.nreflect += 1 #print u1, v, ' -- reflect -->', u3, vreflect return u3, x3, L3, k elif self.plot > 1: plt.plot(u3[0], u3[1], '^', color='r') # reflected point is also not inside -- very bad. # try reversing from starting point small_angle = pi / 180 r = numpy.random.normal(size=len(vreflect)) r /= norm(r) v = v * cos(small_angle) + r * scale * sin(small_angle) self.velocities[i] = -v / scale #print u1, v, ' -- reflect -->', u3, '-- reverse -->', u1, -v self.nreverse += 1 return u1, x1, L1, k def adapt(self): n = self.nproceed + self.nreflect + self.nreverse pr = self.nproceed * 1. / n N = len(self.velocities) rr = self.nreverse * 1. * N / n if pr < 0.6: # too many proceeds self.velocity_scale /= 1.1 print 'velocity' + ' '*50 + 'v' elif pr > 0.85: self.velocity_scale *= 1.1 print 'velocity' + ' '*50 + '^' else: if rr > 0.10: # too many reverse, decrease scale self.velocity_scale /= 1.01 elif rr < 0.03: # few reverses, can increase scale self.velocity_scale *= 1.01 # else: # just right self.nproceed_total += self.nproceed self.nreflect_total += self.nreflect self.nreverse_total += self.nreverse # reset self.nproceed = 0 self.nreflect = 0 self.nreverse = 0 def stats(self): n = self.nproceed_total + self.nreflect_total + self.nreverse_total if n == 0: return velocity_scale = self.velocity_scale print 'GalileanConstrainer stats: nsteps: %d, %.3f%% proceeds, %.3f%% reflects, %.5f%% reverses ' % ( n, self.nproceed_total * 100. / n, self.nreflect_total * 100. / n, self.nreverse_total * 100. / n) if self.nproceed_total * 1. / n < 0.5: print 'velocity_scale = %s (too large!, too few proceeds)' % velocity_scale elif self.nproceed_total * 1. / n > 0.75: print 'velocity_scale = %s (too small!, too many proceeds)' % velocity_scale else: print 'velocity_scale = %s (good by proceeds)' % velocity_scale N = len(self.velocities) if self.nreverse_total * 1. / n * N < 0.25: print 'velocity_scale = %s (too large!, too few reverse)' % velocity_scale elif self.nreverse_total * 1. / n * N > 5: print 'velocity_scale = %s (too small!, too many reverse)' % velocity_scale else: print 'velocity_scale = %s (good by reverses)' % velocity_scale
[docs]class MCMCRadFriendsConstrainer(HybridRadFriendsConstrainer): """ Markov Chain Monte Carlo sampling Here we use RadFriends to restrict the proposal distribution. """ def __init__(self, proposal_scale = 3, nsteps = 20, plot = False): HybridRadFriendsConstrainer.__init__(self, nsteps = nsteps, plot = plot) self.proposal_scale = proposal_scale self.naccepts = 0 self.nrejects = 0 self.naccepts_total = 0 self.nrejects_total = 0 self.nskip = 0 def adapt(self): n = self.naccepts + self.nrejects proposal_scale = self.proposal_scale ar = self.naccepts * 100. / n boost = self.nskip * 100. / n if ar < 85: print 'proposal scale %.2f acceptance rate: %.2f%% %50s' % (self.proposal_scale, ar, 'v') self.proposal_scale /= 1.01 elif ar > 95: print 'proposal scale %.2f acceptance rate: %.2f%% %50s' % (self.proposal_scale, ar, '^') self.proposal_scale *= 1.01 self.naccepts_total += self.naccepts self.nrejects_total += self.nrejects self.naccepts = 0 self.nrejects = 0 self.nskip = 0 def step(self, i, u1, x1, L1, Lmin, priortransform, loglikelihood): scale = self.region['maxdistance'] * self.proposal_scale # restricted proposal while True: step = numpy.random.normal(0, scale, size=len(u1)) u2 = u1 + step inside_superset = self.is_inside(u2) if inside_superset: break else: self.nskip += 1 # check if inside x2 = priortransform(u2) L2 = loglikelihood(x2) k = 1 inside = L2 >= Lmin if inside: # if L2 is ok, accept the point self.naccepts += 1 return u2, x2, L2, k elif self.plot > 1: plt.plot(u2[0], u2[1], 's', color='r') # not inside. self.nrejects += 1 return u1, x1, L1, k def stats(self): n = self.naccepts_total + self.nrejects_total if n == 0: return proposal_scale = self.proposal_scale ar = self.naccepts_total * 100. / n boost = self.nskip * 100. / n print 'MCMCConstrainer stats: nsteps: %d, %.3f%% accepts, %.3f%% skipped (%d)' % ( n, ar, boost, self.nskip) if ar < 85: print 'proposal_scale = %s (too large!, too few accepts)' % proposal_scale elif ar > 95: print 'proposal_scale = %s (too small!, too many accepts)' % proposal_scale else: print 'proposal_scale = %s (good by accepts)' % proposal_scale
if __name__ == '__main__': # gauss likelihood, sample at 1sigma ndim = 10 Lmin = -1 numpy.random.seed(1) def priortransform(u): return u def loglikelihood(x): return -0.5 * (((x - 0.5)/0.3)**2).sum() startpoint = numpy.random.uniform(0, 1, size=ndim) live_points = [] while len(live_points) < 100: p = numpy.random.uniform(0, 1, size=ndim) if loglikelihood(p) >= Lmin: live_points.append((p, p, Lmin)) print 'have live points' #constrainer = GalileanRadFriendsConstrainer(100, ndim, # velocity_scale = 1, nsteps = 200, plot=3) constrainer = MCMCRadFriendsConstrainer(proposal_scale = 1, nsteps = 40, plot=3) #constrainer.phase = 2 for starti, (startu, startx, startL) in enumerate(live_points): u, x, L, k = constrainer.draw_constrained(Lmin=Lmin, priortransform=priortransform, loglikelihood=loglikelihood, ndim=ndim, startu=startu, startx=startx, startL=startL, starti=starti, previous=live_points) #print u, L #break constrainer.stats()