Source code for exatomic.nbo.inputs

# -*- coding: utf-8 -*-
# Copyright (c) 2015-2022, Exa Analytics Development Team
# Distributed under the terms of the Apache License 2.0
"""
exnbo Input Generator and Parser
###################################
"""
import six
import numpy as np
import pandas as pd
from exatomic.exa import TypedMeta
from exatomic import __version__
from exatomic.base import z2sym
from .editor import Editor
from exatomic.core.atom import Atom
from exatomic.core.frame import Frame
from exatomic.core.basis import BasisSetOrder, Overlap
from exatomic.core.orbital import DensityMatrix, MOMatrix
from exatomic.algorithms.basis import lorder, cart_lml_count, spher_lml_count
from exatomic.algorithms.orbital_util import _check_column
from exatomic.base import sym2z
from itertools import combinations_with_replacement as cwr


from exatomic.algorithms.numerical import _flat_square_to_triangle, _tri_indices



_exaver = 'exatomic.v' + __version__


_header = """\
$GENNBO NATOMS={nat}    NBAS={nbas}  UPPER  BODM BOHR $END
$NBO BNDIDX NLMO AONBO=W AONLMO=W $END
$COORD
{exaver} -- {name} -- tr[P*S] = {check}
{atom}
$END
$BASIS
 CENTER = {center}
  LABEL = {label}
$END
$CONTRACT
 NSHELL = {nshell:>7}
   NEXP = {nexpnt:>7}
  NCOMP = {ncomps}
  NPRIM = {nprims}
   NPTR = {npntrs}
    EXP = {expnts}
{coeffs}
$END"""


_matrices = """
$OVERLAP
{overlap}
$END
$DENSITY
{density}
$END"""


def _nbo_labels():
    """Generate dataframes of NBO label, L, and ml or l, m, n."""
    sph = pd.DataFrame([(L, m) for L in range(7) for m in range(-L, L + 1)],
                       columns=('L', 'ml'))

    # See the NBO 6.0 manual for more details
    # This is the basis function labeling scheme
    # In order of increasing ml from most negative
    # to most positive in the same order as the
    # results from the solid_harmonics code.
    sph['label'] = [1,   101, 102, 103, 251, 253, 255,
                    252, 254, 357, 355, 353, 351, 352,
                    354, 356, 459, 457, 455, 453, 451,
                    452, 454, 456, 458, 561, 559, 557,
                    555, 553, 551, 552, 554, 556, 558,
                    560, 663, 661, 659, 657, 655, 653,
                    651, 652, 654, 656, 658, 660, 662]
    Ls, ls, ms, ns, label = [], [], [], [], []
    # Even NBO 6.0 doesn't support cartesian basis
    # functions with an l value greater than g functions
    for i in range(5):
        start = i * 100 + 1
        label += list(range(start, start + cart_lml_count[i]))
        car = [''.join(i) for i in list(cwr('xyz', i))]
        Ls += [i for k in car]
        ls += [i.count('x') for i in car]
        ms += [i.count('y') for i in car]
        ns += [i.count('z') for i in car]
    car = pd.DataFrame({'L': Ls, 'l': ls, 'm': ms,
                        'n': ns, 'label': label})
    return sph, car

spher, cart = _nbo_labels()

def _get_labels(Ls, mls=None, ls=None, ms=None, ns=None):
    """Get the NBO labels corresponding to L, (ml | l, m, n)."""
    if mls is not None:
        return [spher[(spher['L'] == l) &
                      (spher['ml'] == ml)]['label'].iloc[0]
                      for l, ml in zip(Ls, mls)]
    if ls is not None:
        return [cart[(cart['L'] == L) &
                     (cart['l'] == l) &
                     (cart['m'] == m) &
                     (cart['n'] == n)]['label'].iloc[0]
                     for L, l, m, n in zip(Ls, ls, ms, ns)]

def _clean_coeffs(arr, width=16, decimals=6):
    """Call _clean_to_string for each shell."""
    # Format C(shell) for coeffs
    ls = ['     {} = '.format('C' + l.upper()) for l in lorder]
    # Clean to string by shell
    dat = [''.join([l, _clean_to_string(ar, decimals=decimals, width=width), '\n'])
           for l, ar in zip(ls, arr)]
    # Return the whole minus the last line break
    return ''.join(dat)[:-1]

def _clean_to_string(arr, ncol=4, width=16, decimals='', just=True):
    """Convert a numerical array into nicely formatted text block."""
    # Justify the data arrays with the tags in the template
    pad = ' ' * 10 if just else ''
    # Some flexibility in how this function handles int/floats
    dec = '.' + str(decimals) + 'E' if decimals else decimals
    # A format string for the numbers in the array
    fmt = ''.join(['{:>', str(width), dec, '}'])
    # The formmatted array with tabs and new line breaks
    dat = [''.join(['\n', pad, fmt.format(a)]) if not i % ncol and i > 0
           else fmt.format(a) for i, a in enumerate(arr)]
    return ''.join(dat)

def _obtain_arrays(uni):
    """Get numerical arrays of information from a universe."""
    # Get number of functions by shell
    shells = uni.basis_set.functions_by_shell()
    # This is how many times each L value shows up
    shlcnt = shells.index.get_level_values(0)
    # Add subshells for each time L shows up
    shells = shells.groupby(shlcnt).apply(lambda x: x.sum())

    # Group our basis sets, will be used later
    bases = uni.basis_set[np.abs(uni.basis_set['d']) > 0].groupby('set')
    # Exponents per basis set
    expnts = bases.apply(lambda x: x.shape[0])
    # mapped onto the atoms with each basis set
    if uni.basis_set_order.irrep.max():
        raise Exception("Need to figure out basis desymmetrization.")
    else:
        center = uni.basis_set_order['center'].values.copy()
    kwargs = {'center': center,
              'nshell': uni.atom['set'].map(shells).sum(),
              'nexpnt': uni.atom['set'].map(expnts).sum(),
                   'L': uni.basis_set_order['L'].values}
    kwargs['center'] += 1
    nshell = kwargs['nshell']
    nexpnt = kwargs['nexpnt']

    if uni.meta['spherical']:
        # Spherical basis set
        kwargs.update({'ml': uni.basis_set_order['ml'].values})
        lml_count = spher_lml_count
    else:
        # Cartesian basis set
        kwargs.update({'l': uni.basis_set_order['l'].values,
                       'm': uni.basis_set_order['m'].values,
                       'n': uni.basis_set_order['n'].values})
        lml_count = cart_lml_count

    # For the NBO specicific arrays
    lmax = uni.basis_set['L'].cat.as_ordered().max()
    # ---- There are 3 that are length nshell
    # The number of components per basis function (l degeneracy)
    ncomps = np.empty(nshell, dtype=np.int64)
    # The number of primitive functions per basis function
    nprims = np.empty(nshell, dtype=np.int64)
    # The pointers in the arrays above for each basis funciton
    npntrs = np.empty(nshell, dtype=np.int64)
    # ---- And 2 that are length nexpnt
    # The total number of exponents in the basis set
    expnts = np.empty(nexpnt, dtype=np.float64)
    # The contraction coefficients within the basis set
    ds = np.empty((lmax + 1, nexpnt), dtype=np.float64)
    # The following algorithm must be generalized
    # and simplified by either some bound methods
    # on basis_set attributes
    cnt, ptr, xpc = 0, 1, 0
    for seht in uni.atom['set']:
        b = bases.get_group(seht)
        #for sh, grp in b.groupby('shell'):
        for _, grp in b.groupby('shell'):
            if len(grp) == 0: continue
            ncomps[cnt] = lml_count[grp['L'].values[0]]
            nprims[cnt] = grp.shape[0]
            npntrs[cnt] = ptr
            ptr += nprims[cnt]
            cnt += 1
        for l, d, exp in zip(b['L'], b['d'], b['alpha']):
            expnts[xpc] = exp
            #   i, ang
            for i, _ in enumerate(ds):
                ds[i][xpc] = d if i == l else 0
            xpc += 1
    kwargs.update({'nprims': nprims, 'ncomps': ncomps,
                   'npntrs': npntrs, 'expnts': expnts,
                   'coeffs': ds})
    return kwargs




[docs]class InpMeta(TypedMeta):
    atom = Atom
    basis_set_order = BasisSetOrder
    momatrix = MOMatrix
    frame = Frame
    overlap = Overlap

[docs]class Input(six.with_metaclass(InpMeta, Editor)):

[docs]    def parse_atom(self):
        start = 0
        while True:
            try:
                ln = self[start].split()
                _, _ = int(ln[0]), float(ln[2])
                break
            except:
                start += 1
        stop = self.find_next("$END", start=start, keys_only=True)
        cols = ('Z', 'Zeff', 'x', 'y', 'z')
        atom = self.pandas_dataframe(start, stop, cols)
        atom['frame'] = 0
        atom['symbol'] = atom['Z'].map(z2sym)
        self.atom = atom

[docs]    def parse_basis_set_order(self):
        start = self.find_next("$BASIS", keys_only=True) + 1
        stop = self.find_next("$END", start=start, keys_only=True)
        arr = ' '.join([' '.join(ln.strip().split()) for ln in self[start:stop]])
        arr = np.fromstring(arr.replace('CENTER=', '').replace('LABEL=', ''),
                            sep=' ', dtype=np.int64)
        nbas = arr.shape[0] // 2
        bso = pd.DataFrame.from_dict({'center': arr[:nbas] - 1,
                                      'label': arr[nbas:],
                                      'frame': 0, 'shell': 0})
        tmp = spher.set_index('label')
        bso['L'] = bso['label'].map(tmp['L'].to_dict())
        bso['ml'] = bso['label'].map(tmp['ml'].to_dict())
        self.basis_set_order = bso

[docs]    def parse_overlap(self):
        self._init()
        start = self.find_next("$OVERLAP", keys_only=True) + 1
        stop = self.find_next("$END", start=start, keys_only=True)
        ovl = self.pandas_dataframe(start, stop,
                                    range(len(self[start].split()))).stack().values
        if ovl.shape[0] != self._nbas * (self._nbas + 1) // 2:
            ovl = _flat_square_to_triangle(ovl.stack().values)
        chi0, chi1 = _tri_indices(ovl)
        self.overlap = Overlap.from_dict({'coef': ovl, 'chi0': chi0,
                                          'chi1': chi1, 'frame': 0})

[docs]    def parse_momatrix(self):
        arrs = {}
        kws = ['$FOCK', '$KINETIC', '$DIPOLE', '$DENSITY', '$LCAOMO']
        found = self.find(*kws, keys_only=True)
        nbas = None
        for find, start in found.items():
            start = start[0] + 1
            stop = self.find_next('$END', start=start, keys_only=True)
            arr = self.pandas_dataframe(start, stop, range(len(self[start].split())))
            arrs[find] = arr.stack().values
            if nbas is None: nbas = np.int64(np.sqrt(arrs[find].shape[0]))
        nbas = np.int64(np.sqrt(min((arr.shape[0] for arr in arrs.values()))))
        if nbas != self._nbas:
            print('Warning: matrices may be triangular or square.')
        nbas2 = nbas ** 2
        for key in kws:
            arr = arrs[key]
            nkey = key.strip('$').lower()
            if arr.shape[0] == nbas2:
                arrs[nkey] = arr
            else:
                rpt = arr.shape[0] // (nbas2)
                for i in range(rpt):
                    arrs[nkey + str(i)] = arr[i * nbas2 : i * nbas2 + nbas2]
            del arrs[key]
        arrs['chi'] = np.tile(range(nbas), nbas)
        arrs['orbital'] = np.repeat(range(nbas), nbas)
        arrs['frame'] = 0
        self.momatrix = MOMatrix.from_dict(arrs)

    def _init(self):
        find = self.find_next('NBAS', keys_only=True)
        self._nbas = int(self[find].split('NBAS=')[1].split()[0])


[docs]    @classmethod
    def from_universe(cls, uni, mocoefs=None, orbocc=None, name=''):
        """
        Generate an NBO input from a properly populated universe.
        uni must have atom, basis_set, basis_set_order, overlap,
        momatrix and orbital information.

        Args
            uni (:class:`~exatomic.container.Universe`): containing the above attributes
            mocoefs (str): column name of MO coefficients to use in uni.momatrix
            orbocc (str): column name of occupations in uni.orbital
            name (str): prefix of file name to write

        Returns
            editor (:class:`~exatomic.nbo.Input`)
        """
        for attr in ['overlap', 'momatrix', 'orbital']:
            if not hasattr(uni, attr):
                raise Exception('uni must have "{}" attribute.'.format(attr))
        mocoefs = _check_column(uni, 'momatrix', mocoefs)
        orbocc = mocoefs if orbocc is None and mocoefs != 'coef' else orbocc
        orbocc = _check_column(uni, 'orbital', orbocc)
        p1 = 'Assuming "{}" vector corresponds to "{}" matrix'
        print(p1.format(orbocc, mocoefs))

        kwargs = _obtain_arrays(uni)
        kwargs.update({'exaver': _exaver, 'name': name, 'check': '',
                       'nat': kwargs['center'].max(),
                       'nbas': len(kwargs['center'])})
        columns = ('Z', 'Z', 'x', 'y', 'z')
        if 'Zeff' in uni.atom.columns:
            columns = ('Z', 'Zeff', 'x', 'y', 'z')
        if 'Z' not in uni.atom.columns:
            uni.atom['Z'] = uni.atom.symbol.map(sym2z)
        kwargs['atom'] = uni.atom.to_xyz(columns=columns)
        # Assign appropriate NBO basis function labels
        if 'ml' in kwargs:
            labargs = {'mls': kwargs['ml']}
        else:
            labargs = {'ls': kwargs['l'],
                       'ms': kwargs['m'],
                       'ns': kwargs['n']}
        kwargs['label'] = _get_labels(kwargs['L'], **labargs)
        # Clean the arrays to strings for a text input file
        kwargs['label'] = _clean_to_string(kwargs['label'], ncol=10, width=5)
        kwargs['center'] = _clean_to_string(kwargs['center'], ncol=10, width=5)
        kwargs['ncomps'] = _clean_to_string(kwargs['ncomps'], ncol=10, width=5)
        kwargs['nprims'] = _clean_to_string(kwargs['nprims'], ncol=10, width=5)
        kwargs['npntrs'] = _clean_to_string(kwargs['npntrs'], ncol=10, width=5)
        kwargs['expnts'] = _clean_to_string(kwargs['expnts'], decimals=10, width=18)
        kwargs['coeffs'] = _clean_coeffs(kwargs['coeffs'])

        matargs = {'overlap': '', 'density': ''}
        margs = {'decimals': 15, 'width': 23, 'just': False}
        if 'irrep' in uni.overlap.columns:
            matargs['overlap'] = _clean_to_string(uni.overlap.square().values.flatten(),
                                                  **margs)
        else:
            matargs['overlap'] = _clean_to_string(uni.overlap['coef'].values,
                                                  **margs)
        d = DensityMatrix.from_momatrix(uni.momatrix,
                                        uni.orbital[orbocc].values,
                                        mocoefs=mocoefs)
        #if 'irrep' in uni.overlap.columns:
        matargs['density'] = _clean_to_string(d['coef'].values, **margs)
        kwargs['check'] = np.trace(np.dot(d.square(), uni.overlap.square()))
        print('If {:.8f} is not the correct number of electrons,\n'
              '"{}" vector in uni.orbital may not correspond to "{}" matrix\n'
              'in uni.momatrix'.format(kwargs['check'], orbocc, mocoefs))
        return cls(_header.format(**kwargs) + _matrices.format(**matargs))

    def __init__(self, *args, **kwargs):
        super(Input, self).__init__(*args, **kwargs)