Source code for ultranest.store

# noqa: D400 D205
"""
Storage for nested sampling points
-----------------------------------

The information stored is a table with

- the likelihood threshold drawn from
- the likelihood, prior volume coordinates and physical coordinates of the point

"""

from __future__ import division, print_function

import contextlib
import os
import warnings

import numpy as np



[docs]
class NullPointStore:
    """No storage."""

    def __init__(self, ncols):
        """Mock initialisation."""
        self.ncols = int(ncols)
        self.nrows = 0
        self.stack_empty = True
        self.ncalls = 0


[docs]
    def reset(self):
        """Do nothing."""
        pass



[docs]
    def close(self):
        """Do nothing."""
        pass



[docs]
    def flush(self):
        """Do nothing."""
        pass



[docs]
    def add(self, row, ncalls):
        """Increases the number of "stored" points."""
        self.nrows += 1
        self.ncalls = ncalls
        return self.nrows - 1



[docs]
    def pop(self, Lmin):
        """Return no point (None, None)."""
        return None, None





[docs]
class FilePointStore:
    """Base class for storing points in a file."""


[docs]
    def reset(self):
        """Reset stack to loaded data.

        Useful when Lmin is not reset to a lower value.
        """
        # self.stack = sorted(self.stack + self.data, key=lambda e: (e[1][0], e[0]))
        self.stack_empty = len(self.stack) == 0

        # print("PointStore: have %d items" % len(self.stack))


[docs]
    def close(self):
        """Close file."""
        self.fileobj.close()



[docs]
    def flush(self):
        """Flush file to disk."""
        self.fileobj.flush()



[docs]
    def pop(self, Lmin):
        """Request from the storage a point sampled from <= Lmin with L > Lmin.

        Parameters
        ----------
        Lmin: float
            loglikelihood threshold

        Returns
        -------
        index: int
            index of the point, None if no point exists
        point: array
            point values, None if no point exists

        """
        if self.stack_empty:
            return None, None

        # look forward to see if there is an exact match
        # if we do not use the exact matches
        #   this causes a shift in the loglikelihoods
        for i, (idx, next_row) in enumerate(self.stack):
            row_Lmin = next_row[0]
            L = next_row[1]
            if row_Lmin <= Lmin and L > Lmin:
                idx, row = self.stack.pop(i)
                self.stack_empty = self.stack == []
                return idx, row

        self.stack_empty = len(self.stack) == 0
        return None, None





[docs]
class TextPointStore(FilePointStore):
    """Storage in a text file.

    Stores previously drawn points above some likelihood contour,
    so that they can be reused in another run.

    The format is a tab separated text file.
    Through the fmt and delimiter attributes the output can be altered.
    """

    def __init__(self, filepath, ncols):
        """Load and append to storage at *filepath*.

        The file should contain *ncols* columns (Lmin, L, and others).
        """
        self.ncols = int(ncols)
        self.nrows = 0
        self.stack_empty = True
        self._load(filepath)
        self.fileobj = open(filepath, 'ab')  # noqa: SIM115
        self.fmt = '%.18e'
        self.delimiter = '\t'

    def _load(self, filepath):
        """Load from data file *filepath*."""
        stack = []
        if os.path.exists(filepath):
            with contextlib.suppress(IOError), open(filepath) as f:
                for line in f:
                    try:
                        parts = [float(p) for p in line.split()]
                        if len(parts) != self.ncols:
                            warnings.warn("skipping lines in '%s' with different number of columns" % (filepath), stacklevel=3)
                            continue
                        stack.append(parts)
                    except ValueError:
                        warnings.warn("skipping unparsable line in '%s'" % (filepath), stacklevel=3)

        self.stack = list(enumerate(stack))
        self.ncalls = len(self.stack)
        self.reset()


[docs]
    def add(self, row, ncalls):
        r"""Add data point *row* = [Lmin, L, \*otherinfo] to storage."""
        if len(row) != self.ncols:
            raise ValueError("expected %d values, got %d in %s" % (self.ncols, len(row), row))
        np.savetxt(self.fileobj, [row], fmt=self.fmt, delimiter=self.delimiter)
        self.nrows += 1
        self.ncalls = ncalls
        return self.nrows - 1





[docs]
class HDF5PointStore(FilePointStore):
    """Storage in a HDF5 file.

    Stores previously drawn points above some likelihood contour,
    so that they can be reused in another run.

    The format is a HDF5 file, which grows as needed.
    """

    FILES_OPENED = []

    def __init__(self, filepath, ncols, **h5_file_args):
        """Load and append to storage at filepath.

        File contains *ncols* columns in 'points' dataset (Lmin, L, and others).
        h5_file_args are passed on to hdf5.File.
        """
        import h5py
        self.ncols = int(ncols)
        self.stack_empty = True
        h5_file_args['mode'] = h5_file_args.get('mode', 'a')

        # An annoying part of jupyter notebooks is that they keep all the variables
        # This means a old pointstore can survive, as we don't usually close them
        # Opening a new one with the same path will then fail with
        #    Unable to create file (unable to truncate a file which is already open)
        # even when overwriting/truncating (mode='w')
        # To avoid this problem, we keep track of all the files opened in this process
        # and when another HDF5PointStore instance is created with the same path,
        # we close the old one. Further operations on it will then likely fail.
        for i, (filepath2, fileobj2) in enumerate(HDF5PointStore.FILES_OPENED):
            if filepath == filepath2:
                fileobj2.close()
                HDF5PointStore.FILES_OPENED.pop(i)

        self.fileobj = h5py.File(filepath, **h5_file_args)
        HDF5PointStore.FILES_OPENED.append((filepath, self.fileobj))
        self._load()

    def _load(self):
        """Load from data file."""
        if 'points' not in self.fileobj:
            self.fileobj.create_dataset(
                'points', dtype=float,
                shape=(0, self.ncols), maxshape=(None, self.ncols))

        self.nrows, ncols = self.fileobj['points'].shape
        if ncols != self.ncols:
            raise IOError("Tried to resume from file '%s', which has a different number of columns!" % (self.fileobj))
        points = self.fileobj['points'][:]
        self.stack = list(enumerate(points))
        self.ncalls = self.fileobj.attrs.get('ncalls', len(self.stack))
        self.reset()


[docs]
    def add(self, row, ncalls):
        """Add data point row = [Lmin, L, *otherinfo* to storage."""
        if len(row) != self.ncols:
            raise ValueError("expected %d values, got %d in %s" % (self.ncols, len(row), row))

        # make space:
        self.fileobj['points'].resize(self.nrows + 1, axis=0)
        # insert:
        self.fileobj['points'][self.nrows,:] = row
        if self.ncalls != ncalls:
            self.ncalls = self.fileobj.attrs['ncalls'] = ncalls
        self.nrows += 1
        return self.nrows - 1