Source code for exatomic.core.basis

# -*- coding: utf-8 -*-
# Copyright (c) 2015-2022, Exa Analytics Development Team
# Distributed under the terms of the Apache License 2.0
"""
Basis Set Representations
##############################
This module provides classes that support representations of various basis sets.
There are a handful of basis sets in computational chemistry, the most common of
which are Gaussian type functions, Slater type functions, and plane waves. The
classes provided by this module support not only storage of basis set data, but
also analytical and discrete manipulations of the basis set.

See Also:
    For symbolic and discrete manipulations see :mod:`~exatomic.algorithms.basis`.
"""
import os, six
import numpy as np
import pandas as pd
from io import StringIO

from exatomic.exa import DataFrame
from exatomic.algorithms.basis import cart_lml_count, spher_lml_count
from exatomic.algorithms.numerical import _tri_indices, _square, Shell


[docs]class BasisSet(DataFrame):
    """
    Stores information about a basis set. Common basis set types in use for
    electronic structure calculations usually consist of Gaussians or Slater
    Type Orbitals (STOs). Both types usually employ atom-centered basis functions,
    where each basis function resides on a given atom with coordinates
    :math:`\\left(A_{x}, A_{y}, A_{z}\\right)`. For Gaussian basis sets, the
    functional form of :math:`f\\left(x, y, z\\right)` is:

    .. math::

        r^{2} = \\left(x - A_{x}\\right)^{2} + \\left(x - A_{y}\\right)^{2} + \\left(z - A_{z}\\right)^{2} \\\\
        f\\left(x, y, z\\right) = \\left(x - A_{x}\\right)^{l}\\left(x - A_{y}\\right)^{m}\\left(z - A_{z}\\right)^{n}e^{-\\alpha r^{2}}

    where :math:`l`, :math:`m`, and :math:`n` are not quantum numbers but positive
    integers (including zero) whose sum defines the orbital angular momentum of
    each function and :math:`alpha` governs the exponential decay of the given
    function. Gaussian basis functions are usually constructed from multiple
    primitive Gaussians, with fixed contraction coefficients. Therefore, a basis
    function consists of the sum of one or more primitive functions:

    .. math::

        g_{i}\\left(x, y, z\\right) = \\sum_{j=1}^{N_{i}}c_{ij}f_{ij}\\left(x, y, z\\right)

    Alternatively, STOs are usually not constructed from linear combinations
    of multiple primitives, and differ from Gaussian type functions in that they
    do not contain an exponent in the :math:`r` term of the exponential decay.
    These functions have 2 main benefits; an adequate description of the cusp
    in the density at the nucleus, and the appropriate long-range decay behavior.

    +-------------------+----------+-------------------------------------------+
    | Column            | Type     | Description                               |
    +===================+==========+===========================================+
    | alpha             | float    | exponent                                  |
    +-------------------+----------+-------------------------------------------+
    | shell             | int      | group of primitives                       |
    +-------------------+----------+-------------------------------------------+
    | set               | int/cat  | unique basis set identifier               |
    +-------------------+----------+-------------------------------------------+
    | d                 | float    | contraction coefficient                   |
    +-------------------+----------+-------------------------------------------+
    | L                 | int      | orbital angular momentum                  |
    +-------------------+----------+-------------------------------------------+
    """
    _columns = ['alpha', 'd', 'shell', 'L', 'set']
    _cardinal = ('frame', np.int64)
    _index = 'function'
    _categories = {'L': np.int64, 'set': np.int64, 'frame': np.int64, 'norm': str}
    _indexes = ['set', 'L']

    @property
    def lmax(self):
        return self['L'].cat.as_ordered().max()

[docs]    def shells(self, program='', spherical=True, gaussian=True):
        """
        Generate a multi-index series of :class:`~exatomic.algorithms.numerical.Shell`
        in the basis set, indexed by set and L.

        Args:
            program (str): which code the basis set comes from
            spherical (bool): expand in ml or cartesian powers
            gaussian (bool): exponential dependence of basis functions

        Returns:
            srs (pd.Series): multi-indexed by set and L
        """
        self.spherical_by_shell(program, spherical)
        grps = self.groupby(self._indexes)
        shells = []
        for (seht, L), grp in grps:
            alphas = grp['alpha'].unique()
            piv = grp.pivot_table(index='alpha', columns='shell', values='d').loc[alphas].fillna(0.)
            args = piv.values.flatten(), alphas, *piv.shape, L, grp['norm'].values[0], gaussian
            shell = Shell(*args, None, None) if gaussian else Shell(*args, grp['r'].values, grp['n'].values)
            shells.append((seht, L, shell))
        return pd.DataFrame(shells, columns=self._indexes + [0])

[docs]    def spherical_by_shell(self, program, spherical=True):
        """Allows for some flexibility in treating shells either as
        cartesian functions or spherical functions (different normalizations).

        Args:
            program (str): which code the basis set comes from
        """
        self['L'] = self['L'].astype(np.int64)
        if program in ['molcas', 'nwchem']:
            self['norm'] = self['L'].apply(lambda L: L > 1)
        else:
            self['norm'] = spherical
        self['L'] = self['L'].astype('category')

[docs]    def functions_by_shell(self):
        """Return a series of n functions per (set, L).
        This does not include degenerate functions."""
        obj = self._revert_categories(inplace=False)
        obj = obj.groupby(self._indexes)['shell'].nunique()
        obj.index.set_names(self._indexes, inplace=True)
        return obj

[docs]    def primitives_by_shell(self):
        """Return a series of n primitives per (set, L).
        This does not include degenerate primitives."""
        obj = self._revert_categories(inplace=False)
        obj = obj.groupby(self._indexes)['alpha'].nunique()
        obj.index.set_names(self._indexes, inplace=True)
        return obj

[docs]    def functions(self, spherical):
        """Return a series of n functions per (set, L).
        This does include degenerate functions."""
        self._revert_categories()
        if spherical:
            mapper = lambda x: spher_lml_count[x]
        else:
            mapper = lambda x: cart_lml_count[x]
        n = self.functions_by_shell()
        ret = n * n.index.get_level_values('L').map(mapper)
        self._set_categories()
        return ret.astype(int)

[docs]    def primitives(self, spherical):
        """Return a series of n primitives per (set, L).
        This does include degenerate primitives."""
        self._revert_categories()
        if spherical:
            mapper = lambda x: spher_lml_count[x]
        else:
            mapper = lambda x: cart_lml_count[x]
        n = self.primitives_by_shell()
        ret = n * n.index.get_level_values('L').map(mapper)
        self._set_categories()
        return ret.astype(int)


[docs]def deduplicate_basis_sets(sets, sp=False):
    """Deduplicate identical basis sets on different centers.

    Args:
        sets (pd.DataFrame): non-unique basis sets
        sp (bool): Whether or not to call _expand_sp (gaussian program only)

    Returns:
        tup (tuple): deduplicated basis sets and basis set map for atom table
    """
    unique, setmap, cnt = [], {}, 0
    sets = sets.groupby('center')
    chk = ['alpha', 'd']
    for center, seht in sets:
        for i, other in enumerate(unique):
            if other.shape != seht.shape: continue
            if np.allclose(other[chk], seht[chk]):
                setmap[center] = i
                break
        else:
            unique.append(seht)
            setmap[center] = cnt
            cnt += 1
    if sp: unique = _expand_sp(unique)
    sets = pd.concat(unique, sort=False).reset_index(drop=True)    # sort=False silences warning
    try: sets.drop([2, 3], axis=1, inplace=True)
    except (KeyError, ValueError): pass
    sets.rename(columns={'center': 'set'}, inplace=True)
    sets['set'] = sets['set'].map(setmap)
    sets['frame'] = 0
    return sets, setmap

def _expand_sp(unique):
    """Currently only used when 'program' == 'gaussian'."""
    expand = []
    for seht in unique:
        if np.isnan(seht[2]).sum() == seht.shape[0]:
            expand.append(seht)
            continue
        sps = seht[2][~np.isnan(seht[2])].index
        shls = len(seht.loc[sps]['shell'].unique())
        dupl = seht.loc[sps[0]:sps[-1]].copy()
        dupl[1] = dupl[2]
        dupl['L'] = 1
        dupl['shell'] += shls
        last = seht.loc[sps[-1] + 1:].copy()
        last['shell'] += shls
        expand.append(pd.concat([seht.loc[:sps[0] - 1],
                                 seht.loc[sps[0]:sps[-1]],
                                 dupl, last], sort=False))   # Silences warning
    return expand


[docs]class BasisSetOrder(DataFrame):
    """
    BasisSetOrder uniquely determines the basis function ordering scheme for
    a given :class:`~exatomic.core.universe.Universe`. This table is provided to
    make transparent the characteristic ordering scheme of various quantum
    codes. Either (L, ml) or (l, m, n) must be provided to have access to
    orbital visualization functionality.

    +-------------------+----------+-------------------------------------------+
    | Column            | Type     | Description                               |
    +===================+==========+===========================================+
    | center            | int      | atomic center                             |
    +-------------------+----------+-------------------------------------------+
    | L                 | int      | orbital angular momentum                  |
    +-------------------+----------+-------------------------------------------+
    | shell             | int      | group of primitives                       |
    +-------------------+----------+-------------------------------------------+
    | ml                | int      | magnetic quantum number                   |
    +-------------------+----------+-------------------------------------------+
    | l                 | int      | power in x                                |
    +-------------------+----------+-------------------------------------------+
    | m                 | int      | power in y                                |
    +-------------------+----------+-------------------------------------------+
    | n                 | int      | power in z                                |
    +-------------------+----------+-------------------------------------------+
    | r                 | int      | power in r (optional - for STOs)          |
    +-------------------+----------+-------------------------------------------+
    | prefac            | float    | prefactor (optional - for STOs)           |
    +-------------------+----------+-------------------------------------------+
    """
    _columns = ['center', 'L']
    _index = 'chi'
    _cardinal = ('frame', np.int64)
    _categories = {'L': np.int64}


[docs]class Overlap(DataFrame):
    """
    Overlap enumerates the overlap matrix elements between basis functions in
    a contracted basis set. Currently nothing disambiguates between the
    primitive overlap matrix and the contracted overlap matrix. As it is
    square symmetric, only n_basis_functions * (n_basis_functions + 1) / 2
    rows are stored.

    See Gramian matrix for more on the general properties of the overlap matrix.

    +-------------------+----------+-------------------------------------------+
    | Column            | Type     | Description                               |
    +===================+==========+===========================================+
    | frame             | int/cat  | non-unique integer                        |
    +-------------------+----------+-------------------------------------------+
    | chi0              | int      | first basis function                      |
    +-------------------+----------+-------------------------------------------+
    | chi1              | int      | second basis function                     |
    +-------------------+----------+-------------------------------------------+
    | coef              | float    | overlap matrix element                    |
    +-------------------+----------+-------------------------------------------+
    """
    _columns = ['chi0', 'chi1', 'coef', 'frame']
    _index = 'index'


[docs]    def square(self, frame=0, column='coef', mocoefs=None, irrep=None):
        """Return a 'square' matrix DataFrame of the Overlap.

        Args:
            column (str): column of coefficients to reshape
            mocoefs (str): alias for `column`
            frame (int): default 0
            irrep (int): irreducible representation if symmetrized
        """
        if mocoefs is not None: column = mocoefs
        if 'irrep' in self.columns:
            if irrep is None:
                irreps, i, j = self.groupby('irrep'), 0, 0
                norb = (irreps.chi0.max() + 1).sum()
                nchi = (irreps.chi1.max() + 1).sum()
                cmat = np.zeros((nchi, norb))
                for irrep, grp in irreps:
                    piv = grp.pivot('chi0', 'chi1', column)
                    ii, jj = piv.shape
                    cmat[i : i + ii, j : j + jj] = piv.values
                    i += ii
                    j += jj
                idx = pd.Index(range(nchi), name='chi0')
                orb = pd.Index(range(norb), name='chi1')
                return pd.DataFrame(cmat, index=idx, columns=orb)
            return self.groupby('irrep').get_group(irrep
                        ).pivot('chi', 'orbital', column)
        sq = _square(self[column].values)
        idx = pd.Index(range(sq.shape[0]), name='chi0')
        orb = pd.Index(range(sq.shape[1]), name='chi1')
        return pd.DataFrame(sq, index=idx, columns=orb)

[docs]    @classmethod
    def from_column(cls, source):
        """Create an Overlap from a file with just the array of coefficients or
        an array of the values directly."""
        # Assuming source is a file of triangular elements of the overlap matrix
        if isinstance(source, np.ndarray):
            vals = source
        elif isinstance(source, six.string_types):
            if os.sep not in source: source = StringIO(source)
            vals = pd.read_csv(source, header=None).values.flatten()
        else:
            # Without a catchall, _tri_indices may through UnboundLocalError
            raise TypeError("Invalid type for source: {}".format(type(source)))
        chi0, chi1 = _tri_indices(vals)
        return cls(pd.DataFrame.from_dict({'chi0': chi0,
                                           'chi1': chi1,
                                           'coef': vals,
                                           'frame': 0}))

[docs]    @classmethod
    def from_square(cls, df):
        ndim = df.shape[0]
        try: arr = df.values
        except AttributeError: arr = df
        arlen = ndim * (ndim + 1) // 2
        ret = np.empty((arlen,), dtype=[('chi0', 'i8'),
                                        ('chi1', 'i8'),
                                        ('coef', 'f8'),
                                        ('frame', 'i8')])
        cnt = 0
        for i in range(ndim):
            for j in range(i + 1):
                ret[cnt] = (i, j, arr[i, j], 0)
                cnt += 1
        return cls(ret)