# noqa: D400 D205
"""
Storage for nested sampling points
-----------------------------------
The information stored is a table with
- the likelihood threshold drawn from
- the likelihood, prior volume coordinates and physical coordinates of the point
"""
from __future__ import division, print_function
import contextlib
import os
import warnings
import numpy as np
[docs]
class NullPointStore:
"""No storage."""
def __init__(self, ncols):
"""Mock initialisation."""
self.ncols = int(ncols)
self.nrows = 0
self.stack_empty = True
self.ncalls = 0
[docs]
def reset(self):
"""Do nothing."""
pass
[docs]
def close(self):
"""Do nothing."""
pass
[docs]
def flush(self):
"""Do nothing."""
pass
[docs]
def add(self, row, ncalls):
"""Increases the number of "stored" points."""
self.nrows += 1
self.ncalls = ncalls
return self.nrows - 1
[docs]
def pop(self, Lmin):
"""Return no point (None, None)."""
return None, None
[docs]
class FilePointStore:
"""Base class for storing points in a file."""
[docs]
def reset(self):
"""Reset stack to loaded data.
Useful when Lmin is not reset to a lower value.
"""
# self.stack = sorted(self.stack + self.data, key=lambda e: (e[1][0], e[0]))
self.stack_empty = len(self.stack) == 0
# print("PointStore: have %d items" % len(self.stack))
[docs]
def close(self):
"""Close file."""
self.fileobj.close()
[docs]
def flush(self):
"""Flush file to disk."""
self.fileobj.flush()
[docs]
def pop(self, Lmin):
"""Request from the storage a point sampled from <= Lmin with L > Lmin.
Parameters
----------
Lmin: float
loglikelihood threshold
Returns
-------
index: int
index of the point, None if no point exists
point: array
point values, None if no point exists
"""
if self.stack_empty:
return None, None
# look forward to see if there is an exact match
# if we do not use the exact matches
# this causes a shift in the loglikelihoods
for i, (idx, next_row) in enumerate(self.stack):
row_Lmin = next_row[0]
L = next_row[1]
if row_Lmin <= Lmin and L > Lmin:
idx, row = self.stack.pop(i)
self.stack_empty = self.stack == []
return idx, row
self.stack_empty = len(self.stack) == 0
return None, None
[docs]
class TextPointStore(FilePointStore):
"""Storage in a text file.
Stores previously drawn points above some likelihood contour,
so that they can be reused in another run.
The format is a tab separated text file.
Through the fmt and delimiter attributes the output can be altered.
"""
def __init__(self, filepath, ncols):
"""Load and append to storage at *filepath*.
The file should contain *ncols* columns (Lmin, L, and others).
"""
self.ncols = int(ncols)
self.nrows = 0
self.stack_empty = True
self._load(filepath)
self.fileobj = open(filepath, 'ab') # noqa: SIM115
self.fmt = '%.18e'
self.delimiter = '\t'
def _load(self, filepath):
"""Load from data file *filepath*."""
stack = []
if os.path.exists(filepath):
with contextlib.suppress(IOError), open(filepath) as f:
for line in f:
try:
parts = [float(p) for p in line.split()]
if len(parts) != self.ncols:
warnings.warn("skipping lines in '%s' with different number of columns" % (filepath), stacklevel=3)
continue
stack.append(parts)
except ValueError:
warnings.warn("skipping unparsable line in '%s'" % (filepath), stacklevel=3)
self.stack = list(enumerate(stack))
self.ncalls = len(self.stack)
self.reset()
[docs]
def add(self, row, ncalls):
r"""Add data point *row* = [Lmin, L, \*otherinfo] to storage."""
if len(row) != self.ncols:
raise ValueError("expected %d values, got %d in %s" % (self.ncols, len(row), row))
np.savetxt(self.fileobj, [row], fmt=self.fmt, delimiter=self.delimiter)
self.nrows += 1
self.ncalls = ncalls
return self.nrows - 1
[docs]
class HDF5PointStore(FilePointStore):
"""Storage in a HDF5 file.
Stores previously drawn points above some likelihood contour,
so that they can be reused in another run.
The format is a HDF5 file, which grows as needed.
"""
FILES_OPENED = []
def __init__(self, filepath, ncols, **h5_file_args):
"""Load and append to storage at filepath.
File contains *ncols* columns in 'points' dataset (Lmin, L, and others).
h5_file_args are passed on to hdf5.File.
"""
import h5py
self.ncols = int(ncols)
self.stack_empty = True
h5_file_args['mode'] = h5_file_args.get('mode', 'a')
# An annoying part of jupyter notebooks is that they keep all the variables
# This means a old pointstore can survive, as we don't usually close them
# Opening a new one with the same path will then fail with
# Unable to create file (unable to truncate a file which is already open)
# even when overwriting/truncating (mode='w')
# To avoid this problem, we keep track of all the files opened in this process
# and when another HDF5PointStore instance is created with the same path,
# we close the old one. Further operations on it will then likely fail.
for i, (filepath2, fileobj2) in enumerate(HDF5PointStore.FILES_OPENED):
if filepath == filepath2:
fileobj2.close()
HDF5PointStore.FILES_OPENED.pop(i)
self.fileobj = h5py.File(filepath, **h5_file_args)
HDF5PointStore.FILES_OPENED.append((filepath, self.fileobj))
self._load()
def _load(self):
"""Load from data file."""
if 'points' not in self.fileobj:
self.fileobj.create_dataset(
'points', dtype=float,
shape=(0, self.ncols), maxshape=(None, self.ncols))
self.nrows, ncols = self.fileobj['points'].shape
if ncols != self.ncols:
raise IOError("Tried to resume from file '%s', which has a different number of columns!" % (self.fileobj))
points = self.fileobj['points'][:]
self.stack = list(enumerate(points))
self.ncalls = self.fileobj.attrs.get('ncalls', len(self.stack))
self.reset()
[docs]
def add(self, row, ncalls):
"""Add data point row = [Lmin, L, *otherinfo* to storage."""
if len(row) != self.ncols:
raise ValueError("expected %d values, got %d in %s" % (self.ncols, len(row), row))
# make space:
self.fileobj['points'].resize(self.nrows + 1, axis=0)
# insert:
self.fileobj['points'][self.nrows,:] = row
if self.ncalls != ncalls:
self.ncalls = self.fileobj.attrs['ncalls'] = ncalls
self.nrows += 1
return self.nrows - 1