Source code for quinn.utils.xutils

#!/usr/bin/env python
"""Collection of various useful utilities."""

import sys
import os
import itertools
import numpy as np
try:
    import dill as pk
except ModuleNotFoundError:
    import pickle as pk
import matplotlib as mpl

from scipy import stats
from scipy.stats.mstats import mquantiles
from scipy.interpolate import interp1d


[docs]
def idt(x):
    """Identity function.

    Args:
        x (any type): input

    Returns:
        any type: output
    """
    return x


####################################################################
####################################################################


[docs]
def savepk(sobj, nameprefix='savestate'):
    """Pickle a python object.

    Args:
        sobj (any type): Object to be pickled.
        nameprefix (str, optional): Name prefix.
    """
    with open(nameprefix + '.pk', 'wb') as f:
        pk.dump(sobj, f, protocol=pk.HIGHEST_PROTOCOL)




[docs]
def loadpk(nameprefix='savestate'):
    """Unpickle a python object from a pickle file.

    Args:
        nameprefix (str, optional): Filename prefix

    Returns:
        any type: Unpickled object
    """
    with open(nameprefix + '.pk', 'rb') as f:
        return pk.load(f)


####################################################################
####################################################################


[docs]
def cartes_list(somelists):
    """Generate a list of all combination of elements in given lists.

    Args:
        somelists (list): List of lists
    Returns:
        list[tuple]: List of all combinations of elements in lists that make up somelists
    Example:
        >>> cartes_list([['a', 'b'], [3, 4, 2]])
        [('a', 3), ('a', 4), ('a', 2), ('b', 3), ('b', 4), ('b', 2)]

    """

    # final_list = []
    # for element in itertools.product(*somelists):
    #     final_list.append(element)

    final_list = list(itertools.product(*somelists))

    return final_list


####################################################################
####################################################################


[docs]
def read_textlist(filename, nsize, names_prefix=''):
    """Read a textfile into a list containing the rows.

    Args:
        filename (str): File name
        nsize (int): Number of rows in the file
        names_prefix (str, optional): Prefix of a dummy list entry names if the file is not present.

    Returns:
        list[str]: List of elements that are rows of the file
    """
    if os.path.exists(filename):
        with open(filename) as f:
            names = f.read().splitlines()
            assert(len(names) == nsize)
    else:
        names = [names_prefix + '_' + str(i) for i in range(1, nsize + 1)]

    return names


####################################################################
####################################################################


[docs]
def sample_sphere(center=None, rad=1.0, nsam=100):
    """Sample on a hypersphere of a given radius.

    Args:
        center (np.ndarray, optional): Center of the sphere. Defaults to origin.
        rad (float, optional): Radius of the sphere. Defaults to 1.0.
        nsam (int, optional): Number of samples requested. Defaults to 100.

    Returns:
        np.ndarray: Array of size `(N,d)`
    """
    if center is None:
        center = np.zeros((3,))
    dim = center.shape[0]

    samples = np.random.randn(nsam, dim)
    samples /= np.linalg.norm(samples, axis=1).reshape(-1,1)
    samples *= rad
    samples += center

    return samples


####################################################################
####################################################################

####################################################################
####################################################################


[docs]
def get_opt_bw(xsam, bwf=1.0):
    """Get the rule-of-thumb optimal bandwidth for kernel density estimation.

    Args:
        xsam (np.ndarray): Data array, `(N,d)`
        bwf (float): Factor behind the scaling optimal rule
    Returns:
        np.ndarray: Array of length `d`, the optimal per-dimension bandwidth
    """
    nsam, ndim = xsam.shape
    xstd = np.std(xsam, axis=0)
    bw=xstd
    bw *= np.power(4./(ndim+2),1./(ndim+4.))
    bw *= np.power(nsam,-1./(ndim+4.))

    bw *= bwf

    #xmin, xmax = np.min(xsam, axis=0), np.max(xsam, axis=0)

    # in case standard deviation is 0
    bw[bw<1.e-16] = 0.5
    return bw


####################################################################
####################################################################


[docs]
def get_pdf(data, target):
    """Compute PDF given data at target points.

    Args:
        data (np.ndarray): an `(N,d)` array of `N` samples in `d` dimensions
        target np.ndarray): an `(M,d)` array of target points

    Returns:
        np.ndarray: PDF values at target
    """
    assert(np.prod(np.var(data, axis=0))>0.0)

    # Python Scipy built-in method of KDE
    kde_py=stats.gaussian_kde(data.T)
    dens=kde_py(target.T)

    # Return the target points and the probability density
    return dens


####################################################################
####################################################################


[docs]
def strarr(array):
    """Turn an array into a neatly formatted one for annotating figures.

    Args:
        array (np.ndarray): 1d array

    Returns:
        list: list of floats with two decimal digits
    """
    return [float("{:0.2f}".format(i)) for i in array]



####################################################################
####################################################################


[docs]
def project(a, b):
    """Project a vector onto another vector in high-d space.

    Args:
        a (np.ndarray): The 1d array to be projected.
        b (np.ndarray): The array to project onto.

    Returns:
        tuple(np.ndarray, np.ndarray): tuple (projection, residual) where projection+residual=a, and projection is orthogonal to residual, and colinear with b.
    """
    assert(a.shape[0]==b.shape[0])
    proj = (np.dot(a, b)/ np.dot(b, b))*b
    resid = a - proj
    return proj, resid


####################################################################
####################################################################


[docs]
def pick_basis(x1, x2, x3, x0=None, random_direction_in_plane=None):
    """Given three points in a high-d space, picks a basis in a plane that goes through these points.

    Args:
        x1 (np.ndarray): 1d array, the first point
        x2 (np.ndarray): 1d array, the second point
        x3 (np.ndarray): 1d array, the third point
        x0 (np.ndarray, optional): 1d array, the central point of basis. Defaults to None, in which case the center-of-mass is selected.
        random_direction_in_plane (np.ndarray, optional): Direction aligned with the first basis. Has to be in the plane already. Defaults to None, in which case a random direction is selected.

    Returns:
        tuple(np.ndarray, np.ndarray, np.ndarray): tuple(origin, e1, e2) of the origin and two basis directions.
    """
    assert(x1.shape==x2.shape and x1.shape==x3.shape)
    if x0 is None:
        x0 = (x1+x2+x3)/3.

    assert(x0.shape==x1.shape)


    # random direction in that plane
    x1230 = np.vstack((x1-x0, x2-x0, x3-x0))
    assert(np.linalg.matrix_rank(x1230)==2)
    if random_direction_in_plane is None:
        random_direction_in_plane = np.dot(np.random.rand(1, 3), x1230)[0]
    random_direction_in_plane /= np.linalg.norm(random_direction_in_plane)
    # TODO: this assertion occasionally fails, e.g. when running all examples in bulk
    #assert(np.linalg.matrix_rank(np.vstack((x1230, random_direction_in_plane)))==2)

    proj_norms = np.empty(3,)
    resid_norms = np.empty(3,)
    for i in range(3):
        proj, resid = project(x1230[i], random_direction_in_plane)
        proj_norms[i] = np.linalg.norm(proj)
        resid_norms[i] = np.linalg.norm(resid)

    pm = np.argmax(proj_norms)
    rm = np.argmax(resid_norms)

    origin = x0
    e1, _ = project(x1230[pm], random_direction_in_plane)
    _, e2 = project(x1230[rm], random_direction_in_plane)

    return origin, e1, e2


####################################################################
####################################################################


[docs]
def safe_cholesky(cov):
    r"""Cholesky decomposition with some error handlers, and using SVD+QR trick in case the covariance is degenerate.

    Args:
        cov (np.ndarray): Positive-definite or zero-determinant symmetric matrix `C`.

    Returns:
        np.ndarray: Lower-triangular factor `L` such that `C=L L^T`.
    """

    dim, dim_ = cov.shape
    assert(dim_==dim)
    assert(np.linalg.norm(cov-cov.T)<1.e-14)

    if np.min(np.linalg.eigvals(cov))<0:
        print("The matrix is not a covariance matrix (negative eigenvalues). Exiting.")
        sys.exit()
    elif np.min(np.linalg.eigvals(cov))<1e-14:
        print("Small/near-zero eigenvalue: replacing Cholesky with SVD+QR.")
        u, s, vd = np.linalg.svd(cov, hermitian=True)
        lower = np.linalg.qr(np.dot(np.diag(np.sqrt(s)),vd))[1].T
        signs = np.sign(np.diag(lower))
        lower = np.dot(lower, np.diag(signs))
    else:
        lower = np.linalg.cholesky(cov)

    assert(np.linalg.norm(cov - np.dot(lower, lower.T)) < 1.e-12)

    return lower