Source code for exatomic.adf.tape21

# -*- coding: utf-8 -*-
# Copyright (c) 2015-2022, Exa Analytics Development Team
# Distributed under the terms of the Apache License 2.0
"""
ADF TAPE21 ASCII converted output
######################################
this module provides the primary (user facing) parser for an ASCII
converted TAPE21 file from ADF
"""

from exatomic.exa.core.container import TypedMeta
from exatomic.exa.core.editor import Editor
from exatomic.core.atom import Atom, Frequency
from exatomic.core.gradient import Gradient
from exatomic.core.tensor import JCoupling, NMRShielding
from exatomic.base import z2sym, sym2isomass
import numpy as np
import pandas as pd
import six

[docs]class MissingSection(Exception):
    pass

def _get_isomass(symbol):
    mapper = sym2isomass(symbol)
    mass = list(map(mapper.get, symbol))
    mass = np.repeat(mass, 3).astype(float)
    return mass

[docs]class Tape21Meta(TypedMeta):
    atom = Atom
    frequency = Frequency
    gradient = Gradient
    j_coupling = JCoupling
    nmr_shielding = NMRShielding

[docs]class Tape21(six.with_metaclass(Tape21Meta, Editor)):
    '''
    Parser for ADF Tape21 that have been converted to an ASCII file with
    their dmpkf utility.

    **All properties are parsed based on the input order.**

    Note:
        This is not yet tested for ADF versions newer than 2017.
    '''

[docs]    @staticmethod
    def rmass_mwc(data, symbol):
        '''
        Calculate the reduced masses from the mass-weighted normal modes. With
        the equation,

        .. math::
            \\mu_i = \\left(\\sum_k^{3N} \\left(\\frac{l_{MWCk,i}}
                                                {\\sqrt{m_k}}\\right)^2\\right)^{-1}

        Note:
            This assumes that the normal modes have already been placed in the
            :code:`['dx', 'dy', 'dz']` columns.

        Args:
            data (:class:`pandas.DataFrame`): Data frame the has the mass-weighted
                                              normal modes.
            symbol (:obj:`list`): List-like object that has the atomic symbols.

        Returns:
            r_mass (:class:`numpy.ndarray`): Array containing the calculated reduced
                                             masses.
        '''
        cols = ['dx', 'dy', 'dz']
        mass = _get_isomass(symbol)
        mass = mass.reshape(data[cols].shape)
        disps = data[cols].values
        r_mass = np.sum(np.square(disps)/mass)
        r_mass = 1/r_mass
        return r_mass

[docs]    @staticmethod
    def rmass_cart(data, symbol):
        '''
        Calculate the reduced masses from the normalized non-mass-weighted cartesian
        normal modes. With the equation,

        .. math::
            \\mu_i = \\left(\\sum_k^{3N} l_{CARTk,i}^2\\right)^{-1}

        Note:
            This assumes that the normal modes have already been placed in the
            :code:`['dx', 'dy', 'dz']` columns.

        Args:
            data (:class:`pandas.DataFrame`): Data frame the has the non-mass-weighted
                                              cartesian normal modes.
            symbol (:obj:`list`): List-like object that has the atomic symbols.

        Returns:
            r_mass (:class:`numpy.ndarray`): Array containing the calculated reduced
                                             masses.
        '''
        cols = ['dx', 'dy', 'dz']
        # get the isotopic masses of the unique atoms
        mass = _get_isomass(symbol)
        mass = mass.reshape(data[cols].shape)
        disps = data[cols].values
        norms = np.linalg.norm(disps*np.sqrt(mass))
        norms = 1/norms
        disps *= norms
        r_mass = np.sum(np.square(disps))
        r_mass = 1/r_mass
        return r_mass

    def _intme(self, fitem, idx=0):
        return int(self[fitem[idx]+1].split()[0])

    def _dfme(self, fitem, dim, idx=0):
        start = fitem[idx] + 2
        col = min(len(self[start].split()), dim)
        stop = np.ceil(start + dim / col).astype(np.int64)
        return self.pandas_dataframe(start, stop, col).stack().values

[docs]    def parse_frequency(self, cart=True):
        '''
        ADF frequency parser.

        Note:
            This will toss a warning if it cannot find the mass-weighted normal modes
            which must be used to generate the displaced structures for vibrational
            averaging. Also, it will be unable to calculate the reduced masses as it will
            have normalized cartesian coordinates where it expects normalized
            mass-weighted cartesian normal modes.

        Args:
            cart (:obj:`bool`, optional): Parse the normalized cartesian coordinates or
                                          the mass-weighted normal modes. Defaults to
                                          :code:`True`.
        '''
        # search flags
        _renorm = "NormalModes_RAW"
        _recartnorm = "Normalmodes"
        _refreq = "Frequencies"
        _refreqexc = r"\bFrequencies\b"
        _rekey = r"\bFreq\b"
        found = self.find(_refreq, _renorm, _recartnorm, keys_only=True)
        key = self.regex(_rekey, _refreqexc, keys_only=True)
        # need to do this to ensure that we only match the data in the Freq block
        found_freq = []
        for k in key[_rekey]:
            for f in found[_refreq]:
                if f-1 == k:
                    found_freq.append(f)
                    break
        if not found_freq:
            return
        found[_refreq] = found_freq
        if not found[_refreq]:
            return
        if not hasattr(self, 'atom'):
            self.parse_atom()
        # get the number of atoms
        nat = self.atom.last_frame.shape[0]
        # get the frequencies
        freq = self._dfme(found[_refreq], nat*3)
        # find where the frequencies are zero
        # these should be the ones that ADF determines to be translations and rotations
        # TODO: need a test case with one imaginary frequency
        low = freq != 0
        # get only the ones that are non-zero
        freq = freq[low]
        nmodes = freq.shape[0]
        freq = np.repeat(freq, nat)
        if found[_renorm] and not cart:
            # get the mass-weighted normal modes
            ndisps = int(self[found[_renorm][0]+1].split()[0])
            normalmodes = self._dfme(np.array(found[_renorm]), ndisps, idx=0)
        elif found[_recartnorm] and cart:
            # get the non-mass-weighted normal modes and toss warning
            ndisps = int(self[found[_recartnorm][0]+1].split()[0])
            normalmodes = self._dfme(np.array(found[_recartnorm]), ndisps, idx=0)
        else:
            raise MissingSection("There was an issue reading the file. Could " \
                                 +"not find the secions 'NormalModes_RAW', " \
                                 +"or 'Normalmodes'. Contents of what was " \
                                 +"found in the file {}".format(found))
        # get the vibrational modes in the three cartesian directions
        # the loop is neede in case there are any negative modes
        # because then the normal mode displacements for the negative mode
        # are listed first and we need those
        dx = []
        dy = []
        dz = []
        for idx in np.where(low)[0]:
            dx.append(normalmodes[idx*nat*3+0:(idx+1)*nat*3+0:3])
            dy.append(normalmodes[idx*nat*3+1:(idx+1)*nat*3+1:3])
            dz.append(normalmodes[idx*nat*3+2:(idx+1)*nat*3+2:3])
        # flatten arrays to vectors
        dx = np.array(dx).flatten()
        dy = np.array(dy).flatten()
        dz = np.array(dz).flatten()
        freqdx = np.repeat(range(nmodes), nat)
        label = np.tile(self.atom['label'], nmodes)
        symbol = np.tile(self.atom['symbol'], nmodes)
        # put the data together
        df = pd.DataFrame({'dx': dx, 'dy': dy, 'dz': dz, 'frequency': freq,
                           'freqdx': freqdx})
        # calculate the reduced masses
        if not cart:
            r_mass = df.groupby(['freqdx']).apply(self.rmass_mwc,
                                                  self.atom['symbol']).values
        else:
            r_mass = df.groupby(['freqdx']).apply(self.rmass_cart,
                                                  self.atom['symbol']).values
        df['r_mass'] = np.repeat(r_mass, nat)
        df['symbol'] = symbol
        df['label'] = label
        # TODO: find out if this is stored in the file anywhere
        df['ir_int'] = 0
        df['frame'] = 0
        self.frequency = df

[docs]    def parse_atom(self, input_order=False):
        '''
        Parse the atom table.

        Args:
            input_order (:obj:`bool`, optional): Parse the atom table
                in the input order format. Defaults to :code:`False`.
        '''
        # search flags
        _reinpatom = "xyz InputOrder"
        _reordatom = "xyz"
        #_regeom = "Geometry"
        _reqtch = "qtch"
        _rentyp = "ntyp"
        _renqptr = "nqptr"
        _reinporder = "atom order index"
        _remass = "mass"
        found = self.find(_reinpatom, _reordatom, _reqtch, _rentyp,
                          _renqptr, _reinporder, _remass, keys_only=True)
        if input_order:
            _reatom = found[_reinpatom]
        else:
            idx = 0
            tmp = found[_reordatom]
            while self[tmp[idx]-1].strip() != 'Geometry': idx += 1
            _reatom = [tmp[idx]]
        ncoords = self._intme(_reatom)
        coords = self._dfme(_reatom, ncoords)
        x = coords[::3]
        y = coords[1::3]
        z = coords[2::3]
        # get the number of atom types
        ntyp = int(self[found[_rentyp][1]+2].split()[0])
        # get the charges for each atom type
        qtch = self._dfme(found[_reqtch], ntyp)
        # get the span of each atom type
        nqptr = self._dfme(found[_renqptr], ntyp+1) - 1
        nat = nqptr.max()
        # get the znum vector from the ordered atom table
        zordered = np.zeros(nat)
        for n in range(ntyp):
            for idx in range(nqptr[n], nqptr[n+1]):
                zordered[idx] = qtch[n]
        if input_order:
            # convert to the input structure
            zinput = np.zeros(nat)
            input_order = self._dfme(found[_reinporder],
                                     nat*2).reshape(2, nat).astype(int) - 1
            # iterate over the input order array as this gives the
            # location of each atom type after
            # the re-ordering done in adf
            for od, inp in zip(input_order[0], range(nat)):
                zinput[inp] = zordered[od]
            Z = zinput.astype(int)
        else:
            Z = zordered
        set = np.array(list(range(nat)))
        symbol = pd.Series(Z).map(z2sym)
        # put it all together
        df = pd.DataFrame.from_dict({'symbol': symbol, 'set': set,
                                     'label': set, 'x': x, 'y': y,
                                     'z': z, 'Z': Z, 'frame': 0})
        self.atom = df

[docs]    def parse_gradient(self, input_order=False):
        ''' Parse the gradients in the input order. '''
        # search flags
        _reinpgrad = "Gradients_InputOrder"
        _refrggrad = "Gradients_CART"
        _reinporder = "atom order index"
        found = self.find(_reinpgrad, _refrggrad, keys_only=True)
        if not found[_refrggrad]:
            return
        if input_order:
            if found[_reinpgrad]:
                _regrad = _reinpgrad
            else:
                msg = "Could not find the 'Gradients_InputOrder'" \
                      +"section."
                raise MissingSection(msg)
        else:
            _regrad = _refrggrad
        # get the atom frame with the selected input_order flag
        # will overwrite what was previously parsed
        self.parse_atom(input_order=input_order)
        symbol = self.atom.last_frame['symbol'].values
        Z = self.atom.last_frame['Z'].values.astype(int)
        # get the gradients
        ngrad = self._intme(np.array(found[_regrad]))
        grad = self._dfme(np.array(found[_regrad]), ngrad)
        x = grad[::3]
        y = grad[1::3]
        z = grad[2::3]
        atom = list(range(len(x)))
        df = pd.DataFrame.from_dict({'Z': Z, 'atom': atom, 'fx': x,
                                     'fy': y, 'fz': z, 'symbol': symbol,
                                     'frame': 0})
        df = df[['atom', 'Z', 'fx', 'fy', 'fz', 'symbol', 'frame']]
        self.gradient = df

[docs]    def parse_nmr_shielding(self):
        ''' Parse the NMR shielding tensors in the input order. '''
        _reiso = "NMR Shieldings InputOrder"
        _retensor = "NMR Shielding Tensor InputOrder"
        found = self.find(_reiso, _retensor, keys_only=True)
        if not found[_reiso]:
            return
        if not hasattr(self, 'atom'):
            self.parse_atom(input_order=True)
        nshield = self._intme(found[_reiso])
        shielding = self._dfme(found[_reiso], nshield)
        ntens = self._intme(found[_retensor])
        tensor = self._dfme(found[_retensor], ntens)
        tensor = tensor.reshape(nshield, 9)
        zeros = list(map(lambda x: all(x != 0), tensor))
        requested = np.where(zeros)[0]
        tensor = tensor[requested]
        shielding = shielding[requested]
        #requested = np.where(shielding != 0)[0]
        cols = ['xx', 'xy', 'xz', 'yx', 'yy', 'yz', 'zx', 'zy', 'zz']
        df = pd.DataFrame(tensor, columns=cols)
        df['isotropic'] = shielding
        df['atom'] = requested
        df['symbol'] = self.atom.last_frame.iloc[requested]['symbol'].values
        df['label'] = 'nmr_shielding'
        df['frame'] = 0
        self.nmr_shielding = df

[docs]    def parse_j_coupling(self):
        ''' Parse the J Coupling in the Cartesian representation. '''
        _reiso = "NMR Coupling J const InputOrder"
        _retensor = "NMR Coupling J tens InputOrder"
        found = self.find(_reiso, _retensor, keys_only=True)
        if not found[_reiso]:
            return
        if not hasattr(self, 'atom'):
            self.parse_atom(input_order=True)
        ncoupl = self._intme(found[_reiso])
        natom = np.sqrt(ncoupl)
        coupling = self._dfme(found[_reiso], ncoupl)
        ntens = self._intme(found[_retensor])
        tensor = self._dfme(found[_retensor], ntens)
        requested = np.where(coupling != 0)[0]
        tensor = tensor.reshape(ncoupl, 9)[requested]
        cols = ['xx', 'xy', 'xz', 'yx', 'yy', 'yz', 'zx', 'zy', 'zz']
        df = pd.DataFrame(tensor, columns=cols)
        atoms = np.transpose(list(map(lambda x: divmod(x, natom), requested)))
        df['isotropic'] = coupling[coupling != 0]
        df['atom'] = atoms[0].astype(int)
        symbols = self.atom.last_frame['symbol'].values
        if len(symbols) > natom:
            raise NotImplementedError("Cannot deal with more than one atom frame.")
        df['symbol'] = list(map(lambda x: symbols[x], df['atom'].values))
        df['pt_atom'] = atoms[1].astype(int)
        df['pt_symbol'] = list(map(lambda x: symbols[x], df['pt_atom'].values))
        df['label'] = 'j coupling'
        df['frame'] = 0
        self.j_coupling = df