Source code for exatomic.algorithms.pcf

# -*- coding: utf-8 -*-
# Copyright (c) 2015-2022, Exa Analytics Development Team
# Distributed under the terms of the Apache License 2.0
"""
Pair Correlation Functions
############################
"""
import numpy as np
import pandas as pd
from IPython.display import display
from ipywidgets import FloatProgress
from exatomic.exa.util.units import Length
from exatomic.core.universe import Universe


[docs]def radial_pair_correlation(universe, a, b, dr=0.05, start=1.0, stop=13.0,
                            length="Angstrom", window=1):
    """
    Compute the angularly independent pair correlation function.

    This function is sometimes called the pair radial distribution function. The
    quality of the result depends strongly on the amount of two body distances
    computed (see :func:`~exatomic.atom_two.compute_two_body`) in the case of a
    periodic unvierse. Furthermore, the result can be skewed if only a single
    atom a (or b) exists in each frame. In these situations one can use the
    **window** and **dr** parameter to adjust the result accordingly. Reasonable
    values for **dr** range from 0.1 to 0.01 and reasonable values for **window**
    range from 1 to 5 (default is 1 - no smoothing).

    .. code-block:: Python

        pcf = radial_pair_correlation(universe, "O", "O")
        pcf.plot(secondary_y="Pair Count")

    .. math::

        g_{AB}\left(r\\right) = \\frac{V}{4\pi r^{2}\Delta r MN_{A}N_{B}}
        \sum_{m=1}^{M}\sum_{a=1}^{N_{A}}\sum_{b=1}^{N_{B}}Q_{m}
        \left(r_{a}, r_{b}; r, \Delta r\\right)

        Q_{m}\\left(r_{a}, r_{b}; r, \\Delta r\\right) = \\begin{cases} \\
            &1\\ \\ if\\ r - \\frac{\Delta r}{2} \le \left|r_{a} - r_{b}\\right|\lt r + \\frac{\Delta r}{2} \\\\
            &0\\ \\ otherwise \\\\
        \\end{cases}

    Args:
        universe (:class:`~exatomic.Universe`): The universe (with two body data)
        a (str, list, array): First atom type (see Note)
        b (str, list, array): Second atom type (see Note)
        dr (float): Radial step size
        start (float): Starting radial point
        stop (float): Stopping radial point
        length (str): Output unit of length
        window (int): Smoothen data (useful when only a single a or b exist, default no smoothing)

    Returns:
        pcf (:class:`~pandas.DataFrame`): Pair correlation distribution and count

    Note:
        If a, b are strings pairs are determined using atomic symbols. If integers
        or lists/tuples are passed pairs are determined by atomic labels (see
        :func:`~exatomic.core.atom.Atom.get_atom_labels`). Arrays are assumed to
        be index values directly.

    Tip:
        Depending on the type of two body computation (or data) used, the volume
        may not be the cell volume; the normalization factor (the prefactor) is
        the volume sampled during computation of two body properties divided by
        the number of properties used in the histogram (the triple summation
        above, divided by the normalization for the radial distance outward).

    Warning:
        Using a start and stop length different from 0 and simple cubic cell dimension
        will cause the y axis magnitudes to be inaccurate. This can be remedied by
        rescaling values appropriately.
    """
    bins = np.arange(start, stop, dr)                     # Discrete values of r for histogram
    if isinstance(a, str):
        a_idx = universe.atom[universe.atom['symbol'] == a].index.values
    elif isinstance(a, (int, list, tuple, np.int64, np.int32)):
        a = [a] if not isinstance(a, (list, tuple)) else a
        a_idx = universe.atom[universe.atom['label'].isin(a)].index.values
    else:
        a_idx = a
    if isinstance(b, str):
        b_idx = universe.atom[universe.atom['symbol'] == b].index.values
    elif isinstance(a, (int, list, tuple, np.int64, np.int32)):
        b = [b] if not isinstance(b, (list, tuple)) else b
        b_idx = universe.atom[universe.atom['label'].isin(b)].index.values
    else:
        b_idx = b
    if "distance" in universe.atom_two.columns:
        c = "distance"
    else:
        c = "dr"
    distances = universe.atom_two.loc[(universe.atom_two['atom0'].isin(a_idx) &
                                       universe.atom_two['atom1'].isin(b_idx)) |
                                      (universe.atom_two['atom0'].isin(b_idx) &
                                       universe.atom_two['atom1'].isin(a_idx)), c]
    hist, bins = np.histogram(distances, bins)            # Compute histogram
    nn = hist.sum()                                       # Number of observations
    bmax = bins.max()                                     # Note that bins is unchanged by np.hist..
    rx, ry, rz = universe.frame[["rx", "ry", "rz"]].mean().values
    ratio = (((bmax/rx + bmax/ry + bmax/rz)/3)**3).mean() # Variable actual vol and bin vol
    v_shell = bins[1:]**3 - bins[:-1]**3                  # Volume of each bin shell
    if 'cell_volume' in universe.frame.columns:
        v_cell = universe.frame["cell_volume"].mean()         # Actual volume
    elif 'Volume' in universe.frame.columns:
        v_cell = universe.frame["Volume"].mean()         # Actual volume
        c = 'Volume'
    elif 'volume' in universe.frame.columns:
        v_cell = universe.frame["volume"].mean()         # Actual volume
    else:
        v_cell = universe.frame["rx"].max()**3
    g = hist*v_cell*ratio/(v_shell*nn)                    # Compute pair correlation
    numa = len(a_idx)/len(universe)
    numb = len(b_idx)/len(universe)
    n = hist.cumsum()/nn*numa*numb*4/3*np.pi*bmax**3/v_cell
    r = (bins[1:] + bins[:-1])/2*Length["au", length]
    unit = "au"
    if length in ["A", "angstrom", "ang", "Angstrom"]:
        unit = r"\AA"
    rlabel = r"$r\ \mathrm{(" + unit + ")}$"
    glabel = r"$g(r)$"
    nlabel = r"$n(r)$"
    df = pd.DataFrame.from_dict({rlabel: r, glabel: g, nlabel: n})
    if window > 1:
        df = df.rolling(window=window).mean()
        df = df.iloc[window:]
    df.set_index(rlabel, inplace=True)
    return df


[docs]def radial_pcf_out_of_core(hdftwo, hdfout, u, pairs, **kwargs):
    """
    Out of core radial pair correlation calculation.

    Atomic two body data is expected to have been computed (see
    :func:`~exatomic.core.two.compute_atom_two_out_of_core`)
    An example is given below. Note the importance of the definition
    of pairs and the presence of additional arguments.

    .. code:: Python

        radial_pcf_out_of_core("in.hdf", "out.hdf", uni, {"O_H": ([0], "H")},
                               length="Angstrom", dr=0.01)

    Args:
        hdftwo (str): HDF filepath containing atomic two body data
        hdfout (str): HDF filepath to which radial PCF data will be written (see Note)
        u (:class:`~exatomic.core.universe.Universe`): Universe
        pairs (dict): Dictionary of string name keys, values of ``a``, ``b`` arguments (see Note)
        kwargs: Additional keyword arguments to be passed (see Note)

    Note:
        Results will be stored in the hdfout HDF file. Keys are of the form
        ``radial_pcf_key``. The keys of ``pairs`` are used to store the output
        while the values are used to perform the pair correlation itself.
    """
    f = u.atom['frame'].unique()
    n = len(f)
    fp = FloatProgress(description="Computing:")
    display(fp)
    fdx = f[0]
    twokey = "frame_" + str(fdx) + "/atom_two"
    atom = u.atom[u.atom['frame'] == fdx].copy()
    uu = Universe(atom=atom, frame=u.frame.loc[[fdx]],
    atom_two = pd.read_hdf(hdftwo, twokey))
    pcfs = {}
    for key, ab in pairs.items():
        pcfs[key] = radial_pair_correlation(uu, ab[0], ab[1], **kwargs).reset_index()
    fp.value = 1/n*100
    for i, fdx in enumerate(f[1:]):
        twokey = "frame_" + str(fdx) + "/atom_two"
        atom = u.atom[u.atom['frame'] == fdx].copy()
        uu = Universe(atom=atom, frame=u.frame.loc[[fdx]],
        atom_two = pd.read_hdf(hdftwo, twokey))
        for key, ab in pairs.items():
            pcfs[key] += radial_pair_correlation(uu, ab[0], ab[1], **kwargs).reset_index()
        fp.value = (i+1)/n*100
    store = pd.HDFStore(hdfout)
    for key in pairs.keys():
        pcfs[key] /= n
        store.put("radial_pcf_"+key, pcfs[key])
    store.close()
    fp.close()