# -*- coding: utf-8 -*-
# Copyright (c) 2015-2022, Exa Analytics Development Team
# Distributed under the terms of the Apache License 2.0
"""
Molcas Output Parser
#####################
Multiple frames are not currently supported
"""
from __future__ import absolute_import
from __future__ import print_function
from __future__ import division
import os
import re
import six
import pandas as pd
import numpy as np
from six import StringIO
from exatomic.exa import TypedMeta
import exatomic
from .editor import Editor
from exatomic.core import Atom, Gradient, Frequency
from exatomic.algorithms.numerical import _flat_square_to_triangle, _square_indices
from exatomic.algorithms.basis import lmap, spher_lml_count
from exatomic.core.basis import (Overlap, BasisSet, BasisSetOrder,
deduplicate_basis_sets)
from exatomic.core.orbital import DensityMatrix, MOMatrix, Orbital
from exatomic.base import sym2z, z2sym
[docs]class Orb(six.with_metaclass(OrbMeta, Editor)):
"""
Parser for molcas coefficient matrix dumps (e.g. RasOrb).
Note:
This parser assumes the file contains data from a single
calculation (i.e. a single frame).
"""
[docs] def to_universe(self):
raise NotImplementedError("This editor has no parse_atom method.")
def _read_one(self, found, keys, start, stop, osh, old, ret, key):
if not old:
# In order for this not to break if someone decides
# to change the print width format, compute it here
ln = self[start]
width = ln[3:].index(' ') + 3
keys['widths'] = [width] * len(ln.split())
keys['names'] = range(len(keys['widths']))
df = pd.read_fwf(StringIO('\n'.join(self[start:stop])), **keys)
if not osh:
ret.update({key: df.stack().dropna().values})
else: # Clean for open shell calcs
rm = found[1] - found[0] - 1
df.drop([rm - 1, rm], inplace=True)
df[0] = df[0].astype(np.float64)
df = df.stack().dropna().astype(np.float64).values
ret.update({key: df[:len(df)//2],
key+'1': df[len(df)//2:]})
[docs] def parse_momatrix(self):
_re_orb = 'ORBITAL'
_re_occ = 'OCCUPATION NUMBERS'
_re_ens = 'ONE ELECTRON ENERGIES'
_re_osh = '#UORB'
_re_idx = '#INDEX'
_re_hmn = 'human'
found = self.find(_re_orb, _re_occ, _re_ens,
_re_osh, _re_idx, keys_only=True)
found[_re_hmn] = [i for i in found[_re_occ]
if _re_hmn in self[i].lower()]
# Dimensions per irrep
dims = list(map(int, self[5].split()))
ndim = sum(dims)
osh = len(found[_re_osh]) > 0
start = found[_re_orb][0] + 1
stops = found[_re_occ] + found[_re_ens]
stop = min(stops) - 1
# Old file format
old = len(self[start].split()) == 1
# MOMatrix table
widths = [18] * 4 if old else [22] * 5
kws = {'widths': widths, 'names': range(len(widths))}
df = pd.read_fwf(StringIO('\n'.join(self[start:stop])),
widths=widths, names=range(len(widths)))
df.drop(df[df[0].str.contains(_re_orb)].index, inplace=True)
# Orbital occupations
mo, orb = {}, {}
start = found[_re_occ][0] + 1
stops = found[_re_hmn] + found[_re_ens] + found[_re_idx] + [len(self) + 1]
stop = min(stops) - 1
self._read_one(found[_re_occ], kws, start, stop,
osh, old, orb, 'occupation')
# Orbital energies
if found[_re_ens]:
start = found[_re_ens][0] + 1
stop = found[_re_idx][0]
self._read_one(found[_re_ens], kws, start, stop,
osh, old, orb, 'energy')
else:
orb.update({'energy': 0})
# Get all the groupby indices
if len(dims) > 1: # Symmetrized calc.
mo.update({
'irrep': [i for i, d in enumerate(dims) for _ in range(d * d)],
'orbital': [i for d in dims for i in range(d) for _ in range(d)],
'chi': [i for d in dims for _ in range(d) for i in range(d)]})
orb.update({'vector': [j for d in dims for j in range(d)],
'irrep': [i for i, d in enumerate(dims) for _ in range(d)]})
else:
ordx, chidx = _square_indices(ndim)
mo.update({'orbital': ordx, 'chi': chidx})
orb.update({'vector': range(ndim)})
# Unused groupby indices
orb.update({'group': 0, 'spin': 0, 'frame': 0})
mo.update({'frame': 0})
if not osh:
df[0] = df[0].astype(np.float64)
mo.update({'coef': df.stack().dropna().values})
else: # Open shell calc.
off = found[_re_orb][0] + 1
df.drop(found[_re_osh][0] - off, inplace=True)
df[0] = df[0].astype(np.float64)
coef = df.stack().dropna().values
mo.update({'coef': coef[:len(coef)//2],
'coef1': coef[len(coef)//2:]})
self.momatrix = pd.DataFrame.from_dict(mo)
self.orbital = pd.DataFrame.from_dict(orb)
def __init__(self, *args, **kwargs):
super(Orb, self).__init__(*args, **kwargs)
[docs]class Output(six.with_metaclass(OutMeta, Editor)):
_resta = "STATE"
def _property_parsing(self, props, data_length):
''' Helper method for parsing the spin-free properties sections. '''
all_dfs = []
for idx, prop in enumerate(props):
# find where the data blocks are printed
starts = np.array(self.find(self._resta, start=prop, keys_only=True)) + prop + 2
# data_length should always be the same
stops = starts + data_length
# use np.ceil as if we have a 7x7 matrix we get one data set
# with 4 columns and one with 3 columns
n = int(np.ceil(data_length/4))
dfs = []
# grab all of the data
# we use all hits up to n as there are many places with the
# 'STATE' keyword so we want to just grab the data starting
# at the property keyword
for ndx, (start, stop) in enumerate(zip(starts[:n], stops[:n])):
# should be four in all but the last data set
ncols = len(self[start-2].split())
df = self.pandas_dataframe(start, stop, ncol=ncols)
# the only column we do not need will always be the
# column with name 0
df.drop(0, axis=1, inplace=True)
# set the columns as they should be
df.columns = list(range(ndx*4, ndx*4+ncols-1))
dfs.append(df)
# put the component together
all_dfs.append(pd.concat(dfs, axis=1))
all_dfs[-1]['component'] = idx
df = pd.concat(all_dfs, ignore_index=True)
return df
def _oscillator_parsing(self, start_idx):
''' Helper method to parse the oscillators. '''
ldx = start_idx + 6
oscillators = []
while '-----' not in self[ldx]:
oscillators.append(self[ldx].split())
ldx += 1
df = pd.DataFrame(oscillators)
df.columns = ['nrow', 'ncol', 'oscil', 'a_x', 'a_y', 'a_z', 'a_tot']
df[['nrow', 'ncol']] = df[['nrow', 'ncol']].astype(int)
df[['nrow', 'ncol']] -= [1, 1]
df[['nrow', 'ncol']] = df[['nrow', 'ncol']].astype('category')
cols = ['oscil', 'a_x', 'a_y', 'a_z', 'a_tot']
df[cols] = df[cols].astype(np.float64)
return df
[docs] def add_orb(self, path, mocoefs='coef', orbocc='occupation'):
"""
Add a MOMatrix and Orbital table to a molcas.Output. If path is
an Editor containing momatrix and orbital tables then adds them
directly, otherwise assumes it is a molcas.Orb file.
Args:
path (str, :class:`exatomic.core.editor.Editor`): path to file or Editor object
mocoefs (str): rename coefficients
orbocc (str): rename occupations
"""
if isinstance(path, exatomic.Editor): orb = path
else: orb = Orb(path)
if mocoefs != 'coef' and orbocc == 'occupation':
orbocc = mocoefs
# MOMatrix
curmo = getattr(self, 'momatrix', None)
if curmo is None:
self.momatrix = orb.momatrix
if mocoefs != 'coef':
self.momatrix.rename(columns={'coef': mocoefs}, inplace=True)
else:
if mocoefs in self.momatrix.columns:
raise ValueError('This action would overwrite '
'coefficients. Specify mocoefs parameter.')
for i, default in enumerate(['coef', 'coef1']):
final = mocoefs + '1' if i else mocoefs
if default in orb.momatrix:
self.momatrix[final] = orb.momatrix[default]
# Orbital
curorb = getattr(self, 'orbital', None)
if curorb is None:
self.orbital = orb.orbital
if orbocc != 'occupation':
self.orbital.rename(columns={'occupation': orbocc}, inplace=True)
else:
if orbocc in self.orbital.columns:
raise ValueError('This action would overwrite '
'occupations. Specify orbocc parameter.')
for i, default in enumerate(['occupation', 'occupation1']):
final = orbocc + '1' if i else orbocc
if default in orb.orbital.columns:
self.orbital[final] = orb.orbital[default]
[docs] def add_overlap(self, path):
try: # If it's an ASCII text file
self.overlap = Overlap.from_column(path)
except Exception: # If it's an HDF5 file
hdf = HDF(path)
if 'DESYM_CENTER_CHARGES' not in hdf._hdf:
self.overlap = hdf.overlap
return
if 'irrep' not in self.momatrix:
raise Exception("Trying to set symmetrized overlap with "
"desymmetrized MOMatrix data.")
ovl = pd.DataFrame(np.array(hdf._hdf['AO_OVERLAP_MATRIX']),
columns=('coef',))
ovl['irrep'] = self.momatrix['irrep']
ovl['chi0'] = self.momatrix['chi']
ovl['chi1'] = self.momatrix['orbital']
ovl['frame'] = 0
self.overlap = ovl
def _check_atom_sym(self):
"""Parses a less accurate atom list to check for symmetry."""
_re_sym = 'Cartesian Coordinates / Bohr, Angstrom'
start = self.find(_re_sym, keys_only=True)[0] + 4
cols = ['center', 'tag', 'x', 'y', 'z', 'd1', 'd2', 'd3']
stop = start
while len(self[stop].split()) == len(cols): stop += 1
atom = self.pandas_dataframe(start, stop, cols)
atom.drop(['d1', 'd2', 'd3'], axis=1, inplace=True)
atom['symbol'] = atom['tag'].str.extract(
'([A-z]{1,})([0-9]*)', expand=False)[0].str.lower().str.title()
atom['frame'] = 0
atom['center'] -= 1
return atom
[docs] def parse_atom(self, seward=True):
"""Parses the atom list generated in SEWARD."""
if seward:
_re_atom0 = 'Label Cartesian Coordinates'
_re_atom1 = 'Center Label'
found = self.find(_re_atom0, _re_atom1, keys_only=True)
if found[_re_atom0]:
accurate = True
starts = [i + 2 for i in found[_re_atom0]]
else:
accurate = False
starts = [i + 1 for i in found[_re_atom1]]
stops = starts[:] # Copy the list
for i in range(len(stops)):
while len(self[stops[i]].strip().split()) > 3:
stops[i] += 1
if not self[stops[i]].strip(): break
stops[i] -= 1
if accurate:
lns = StringIO('\n'.join([self._lines[i] for j in (range(i, j + 1)
for i, j in zip(starts, stops)) for i in j]))
cols = ['tag', 'x', 'y', 'z']
else:
lns = StringIO('\n'.join(self[starts[0]:stops[0] + 1]))
cols = ['center', 'tag', 'x', 'y', 'z', 'xa', 'ya', 'za']
atom = pd.read_csv(lns, delim_whitespace=True,
names=cols)
if len(cols) == 8:
atom.drop(['xa', 'ya', 'za'], axis=1, inplace=True)
atom['symbol'] = atom['tag'].str.extract(
'([A-z]{1,})([0-9]*)', expand=False)[0].str.lower().str.title()
atom['Z'] = atom['symbol'].map(sym2z).astype(np.int64)
atom['center'] = range(atom.shape[0])
atom['frame'] = 0
self.atom = atom
# Work-around for symmetrized calcs?
allatom = self._check_atom_sym()
self.meta['symmetrized'] = False
if allatom.shape[0] > self.atom.shape[0]:
self.atom = allatom
self.meta['symmetrized'] = True
try:
self.atom['utag'] = _add_unique_tags(self.atom)
except ValueError:
pass
else:
_reatom = "Nuclear coordinates for the next iteration / Bohr"
_reatom02 = "Nuclear coordinates of the final structure / Bohr"
found = self.find(_reatom, _reatom02, keys_only=True)
if len(found[_reatom02]) > 1:
raise ValueError("Found more than one final atom table.")
elif len(found[_reatom02]) == 0:
raise ValueError("Could not find a final atom table in the file.")
starts = np.array(found[_reatom])
starts = np.array(list(starts)+found[_reatom02])+3
stop = starts[0]
while self[stop].strip(): stop += 1
stops = starts + (stop - starts[0])
cols = ['symbol', 'x', 'y', 'z']
dfs = []
for idx, (start, stop) in enumerate(zip(starts, stops)):
df = self.pandas_dataframe(start, stop, ncol=4)
df.columns = cols
label = np.concatenate(list(map(lambda x: re.findall(r'\D+', x),
df['symbol'])))
if label.shape[0] > df.shape[0]:
text = "There was an issue with interpreting the labels " \
+"of the atom table. Expected {} labels but " \
+"interpreted {}"
raise ValueError(text.format(df.shape[0], label.shape[0]))
df['symbol'] = label
df['Z'] = df['symbol'].map(sym2z)
df['set'] = range(df.shape[0])
df['frame'] = idx
dfs.append(df)
atom = pd.concat(dfs, ignore_index=True)
self.atom = atom
[docs] def parse_gradient(self):
_regrad = "Molecular gradients"
_reirr = "Irreducible representation:"
found = self.find(_regrad, _reirr, keys_only=True)
if not found[_regrad]:
return
if len(found[_regrad]) != len(found[_reirr]):
text = "Do not have support for multiple irreducible " \
+"representations yet."
raise NotImplementedError(text)
starts = np.array(found[_regrad]) + 8
stop = starts[0]
while self[stop].strip()[:2] != '--': stop += 1
stops = starts + (stop - starts[0])
dfs = []
for idx, (start, stop) in enumerate(zip(starts, stops)):
df = self.pandas_dataframe(start, stop, ncol=4)
cols = ['symbol', 'fx', 'fy', 'fz']
df.columns = cols
label = np.concatenate(list(map(lambda x: re.findall(r'\D+', x),
df['symbol'])))
df['symbol'] = label
df['atom'] = range(df.shape[0])
df['frame'] = idx
df['Z'] = df['symbol'].map(sym2z)
dfs.append(df)
grad = pd.concat(dfs, ignore_index=True)
self.gradient = grad
[docs] def parse_frequency(self, linear=False, normalize=True):
_refreq = "Frequency:"
_reint = "Intensity:"
_remass = "Red. mass:"
_rerot = "Note that rotational and translational degrees " \
+"have been automatically removed,"
found = self.find(_refreq, _reint, _rerot, _remass)
if not found[_refreq]:
return
start = found[_remass][-1][0] + 2
stop = found[_remass][-1][0] + 2
while self[stop].strip(): stop += 1
lines = stop - start
dfs = []
count = 0
arr = zip(found[_refreq], found[_reint], found[_remass])
for idx, ((_, freq), (_, inten), (ldx, mass)) in enumerate(arr):
# parse the normal modes
start = ldx + 2
stop = start + lines
arr = self.pandas_dataframe(start, stop, ncol=8)
arr.dropna(how='all', axis=1, inplace=True)
# extract the atomic symbols
symbols = arr[0].drop_duplicates()
symbols = list(map(lambda x: re.findall(r'\D+', x)[0], symbols))
nat = len(symbols)
if linear and idx == 0 and found[_rerot]:
arr.drop([2, 3, 4, 5, 6], axis=1, inplace=True)
elif not found[_rerot] and idx == 0 and not linear:
print("Found rotational modes in the calculation")
continue
# set the columns where the normal modes are on the table
cols = range(2, arr.columns.max()+1)
arr.drop([0], axis=1, inplace=True)
# re-organize the normal mode data
tmp = arr.groupby(1).apply(lambda x: x[cols].values.T.flatten())
df = pd.DataFrame(tmp.to_dict())
df.columns = ['dx', 'dy', 'dz']
errortext = "Something went wrong when trying to turn the {} " \
+"into floating point numbers."
# get the frequencies
# skip the first element as it is the table label
freqs = freq.split()[1:]
# replace all imaginary signs with '-'
freqs = list(map(lambda x: x.replace('i', '-'), freqs))
# turn them into floats
try:
freqs = list(map(float, freqs))
except ValueError:
print(freqs)
raise ValueError(errortext.format('frequencies'))
# get the reduced masses
# skip the first element as it is the table label
rmass = mass.split()[2:]
# turn them into floats
try:
rmass = list(map(float, rmass))
except ValueError:
print(rmass)
raise ValueError(errortext.format('reduced masses'))
# get the IR intensities
# skip the first element as it is the table label
irint = inten.split()[1:]
# turn them into floats
try:
irint = list(map(float, irint))
except ValueError:
print(irint)
raise ValueError(errortext.format('IR intensities'))
# put everything together
df['symbol'] = np.tile(symbols, len(cols))
df['label'] = np.tile(range(nat), len(cols))
df['Z'] = df['symbol'].map(sym2z)
df['frequency'] = np.repeat(freqs, nat)
df['r_mass'] = np.repeat(rmass, nat)
df['ir_int'] = np.repeat(irint, nat)
df.loc[df['ir_int'].abs() < 1e-9, 'ir_int'] = 0
df['freqdx'] = np.repeat(range(count*6, (count+1)*len(cols)), nat)
count += 1
df['frame'] = 0
if normalize and found[_rerot]:
df[['dx', 'dy', 'dz']] *= np.sqrt(df['r_mass'].values.reshape(-1,1))
elif normalize and not found[_rerot]:
pass
dfs.append(df)
df = pd.concat(dfs, ignore_index=True)
cols = ['Z', 'label', 'dx', 'dy', 'dz', 'frequency', 'freqdx',
'ir_int', 'r_mass', 'symbol', 'frame']
self.frequency = df[cols]
[docs] def parse_basis_set_order(self):
"""
Parses the shell ordering scheme if BSSHOW specified in SEWARD.
"""
_re_bas_order = 'Basis Label Type Center'
starts = [i + 1 for i in self.find(_re_bas_order, keys_only=True)]
lines, irreps, vecs, vec, nsym = [], [], [], 0, 0
for i, start in enumerate(starts):
stop = start
while self[stop].strip():
lines.append(stop)
irreps.append(i)
vecs.append(vec)
nsym = max(nsym, len(self[stop].split()))
stop += 1
vec += 1
lines.append(stop)
vec = 0
symcols = [('ocen{}'.format(i), 'sign{}'.format(i))
for i in range((nsym - 5) // 2)]
if symcols: self.meta['symmetrized'] = True
cols = ['idx', 'tag', 'type', 'center', 'phase'] + \
[c for t in symcols for c in t]
df = pd.read_csv(StringIO('\n'.join(self[i] for i in lines)),
delim_whitespace=True, names=cols)
# Symmetrized basis functions
for col in df.columns:
if col.startswith('ocen'):
df[col] = df[col].fillna(0.).astype(np.int64) - 1
# Extra info for symmetrized calcs
df['irrep'] = irreps
df['vector'] = vecs
df.drop(['idx'], inplace=True, axis=1)
df['frame'] = 0
# 0-based indexing
df['center'] -= 1
# Clean up (L, ml) values to ints
df['L'] = df['type'].str[1].map(lmap)
df['ml'] = df['type'].str[2:]
df['ml'].update(df['ml'].map({'': 0, 'x': 1, 'y': -1, 'z': 0}))
try:
df['ml'].update(df['ml'].str[::-1])
except Exception:
pass
df['ml'] = df['ml'].astype(np.int64)
# Seward and gateway may each print basis info.
# See if this happened and if so, keep only the first half.
try: # Apparently BSSHOW doesn't always print the basis set.
if 'set' not in self.atom.columns:
self.parse_basis_set()
except ValueError:
self.basis_set_order = df
return
bs = self.basis_set
sp = self.meta['spherical']
nbf = self.atom.set.map(bs.functions(sp).groupby('set').sum()).sum()
if df.shape[0] > nbf:
df = df.loc[:nbf - 1]
irreps = irreps[:nbf]
vecs = vecs[:nbf]
self.basis_set_order = df
shls = []
grps = df.groupby(['irrep', 'center', 'L', 'ml'])
for (_, _, L, ml), grp in grps: # (? cen L, ml)
shl = 0
for _ in grp.index:
shls.append(shl)
shl += 1
self.basis_set_order['shell'] = shls
[docs] def parse_basis_set(self):
"""Parses the primitive exponents, coefficients and
shell if BSSHOW specified in SEWARD."""
_re_bas_0 = 'Shell nPrim nBasis Cartesian Spherical Contaminant'
_re_bas_1 = 'Label Cartesian Coordinates / Bohr'
_re_bas_2 = 'No. Exponent Contraction Coefficients'
found = self.find(_re_bas_0, _re_bas_1, _re_bas_2, keys_only=True)
bmaps = [i + 1 for i in found[_re_bas_0]]
atoms = [i + 2 for i in found[_re_bas_1]]
alphs = [i + 1 for i in found[_re_bas_2]]
widths = [11, 7, 8, 11, 10, 12]
names = _re_bas_0.split()
setmap, basmap = {}, []
for seht, (start, atst) in enumerate(zip(bmaps, atoms)):
stop = start
while self[stop].strip(): stop += 1
while self[atst].strip():
setmap[self[atst].split()[0]] = seht
atst += 1
basmap.append(pd.read_fwf(StringIO('\n'.join(self[start:stop])),
widths=widths, header=None, names=names))
basmap[-1]['set'] = seht
self.atom['set'] = self.atom['tag'].map(setmap)
basmap = pd.concat(basmap).reset_index(drop=True)
basmap['Shell'] = basmap['Shell'].map(lmap)
prims, pset, shell = [], 0, 0
for start, seht, L, nprim, nbas in zip(alphs, basmap['set'], basmap['Shell'],
basmap['nPrim'], basmap['nBasis']):
if pset != seht: shell = 0
# In case contraction coefficients overflow to next line
nmatch = len(self[start].split())
neat = nmatch == len(self[start + 1].split())
if neat:
block = self.pandas_dataframe(start, start + nprim, nbas + 2)
else:
# Extra obfuscation to handle exotic cases
ext = 1
while nmatch != len(self[start + ext].split()):
ext += 1
stop = start + ext * nprim
collated = [''.join(self[start + i * ext : start + i * ext + ext])
for i in range(nprim)]
ncols = len(collated[0].split())
block = pd.read_csv(StringIO('\n'.join(collated)),
delim_whitespace=True, names=range(ncols))
alps = (pd.concat([block[1]] * nbas).reset_index(drop=True)
.str.replace('D', 'E').astype(np.float64))
ds = block[list(range(2, nbas + 2))].unstack().reset_index(drop=True)
pdf = pd.concat([alps, ds], axis=1)
pdf.columns = ['alpha', 'd']
pdf['L'] = L
pdf['shell'] = np.repeat(range(shell, shell + nbas), nprim)
pdf['set'] = seht
prims.append(pdf)
shell += nbas
pset = seht
prims = pd.concat(prims).reset_index(drop=True)
prims['frame'] = 0
self.basis_set = prims
self.meta['spherical'] = True
if self.basis_set.lmax < 2:
self.meta['spherical'] = False
[docs] def parse_sf_dipole_moment(self):
''' Get the Spin-Free electric dipole moment from RASSI '''
# define the search string
_retdm = "PROPERTY: MLTPL 1"
component_map = {0: 'x', 1: 'y', 2: 'z'}
found = self.find(_retdm, keys_only=True)
if not found:
return
if len(found) > 6:
# in new version of molcas they introduced
# extra matrices that are printed so this
# was added due to that change
props = np.array(found)[:6:2]
else:
props = np.array(found)[:3]
# find the number of lines that the data spans
stop = props[0] + 5
while self[stop].strip(): stop += 1
data_length = stop - props[0] - 5
# get the data
stdm = self._property_parsing(props, data_length)
stdm['component'] = stdm['component'].map(component_map)
self.sf_dipole_moment = stdm
[docs] def parse_sf_quadrupole_moment(self):
''' Get the Spin-Free electric quadrupole moment from RASSI '''
_requad = "PROPERTY: MLTPL 2"
component_map = {0: 'xx', 1: 'xy', 2: 'xz', 3: 'yy', 4: 'yz', 5: 'zz'}
found = self.find(_requad, keys_only=True)
if not found:
return
# molcas prints the upper triangular tensor
# elements of the full quadrupole tensor
props = np.array(found)[:6]
# find the number of lines that the data spans
stop = props[0] + 5
while self[stop].strip(): stop += 1
data_length = stop - props[0] - 5
# get the data
sqdm = self._property_parsing(props, data_length)
sqdm['component'] = sqdm['component'].map(component_map)
self.sf_quadrupole_moment = sqdm
[docs] def parse_sf_angmom(self):
''' Get the Spin-Free angular momentum from RASSI '''
_reangm = "PROPERTY: ANGMOM"
component_map = {0: 'x', 1: 'y', 2: 'z'}
found = self.find(_reangm, keys_only=True)
if not found:
return
props = np.array(found)[:3]
# find the number of lines that the data spans
stop = props[0] + 5
while self[stop].strip(): stop += 1
data_length = stop - props[0] - 5
# get the data
sangm = self._property_parsing(props, data_length)
sangm['component'] = sangm['component'].map(component_map)
self.sf_angmom = sangm
[docs] def parse_sf_energy(self):
''' Get the Spin-Free energies from RASSI or RASSCF '''
_reenerg = " RASSI State "
_reenerg_rasscf = " RASSCF root number"
found = self.find(_reenerg, _reenerg_rasscf)
key = ''
if found[_reenerg]:
if found[_reenerg_rasscf]:
msg = "Found RASSCF and RASSI Spin-Free energies.\n" \
+"Will only parse the RASSI Spin-Free energies."
print(msg)
key = _reenerg
elif found[_reenerg_rasscf]:
key = _reenerg_rasscf
else:
return
# should not be necessary but you never know
if key == '':
msg = "There was an issue in determining the key to " \
+"be used in the parsing of the SF energies. " \
+"Please open an issue ticket as this should " \
+"never show."
raise ValueError(msg)
energies = []
for _, line in found[key]:
energy = float(line.split()[-1])
energies.append(energy)
# should always be the case that the first energy is the
# ground state minimum energy
rel_energy = list(map(lambda x: x - energies[0], energies))
df = pd.DataFrame.from_dict({'energy': energies, 'rel_energy': rel_energy})
self.sf_energy = df
[docs] def parse_so_energy(self):
''' Get the Spin-Orbit energies from RASSI '''
_reenerg = " SO-RASSI State "
found = self.find(_reenerg)
if not found:
return
energies = []
for _, line in found:
energy = float(line.split()[-1])
energies.append(energy)
rel_energy = list(map(lambda x: x - energies[0], energies))
df = pd.DataFrame.from_dict({'energy': energies, 'rel_energy': rel_energy})
self.so_energy = df
[docs] def parse_sf_oscillator(self):
''' Get the printed Spin-Free oscillators from RASSI '''
# TODO: check how this has to be adjusted for different
# molcas print levels
_reosc = "++ Dipole transition strengths (spin-free states):"
found = self.find(_reosc, keys_only=True)
if not found:
return
if len(found) > 1:
raise NotImplementedError("We have found more than one key for the spin-free " \
+"oscillators.")
df = self._oscillator_parsing(found[0])
self.sf_oscillator = df
[docs] def parse_so_oscillator(self):
''' Get the printed Spin-Orbit oscillators from RASSI '''
# TODO: check how this has to be adjusted for different
# molcas print levels
_reosc = "++ Dipole transition strengths (SO states):"
found = self.find(_reosc, keys_only=True)
if not found:
return
if len(found) > 1:
raise NotImplementedError("We have found more than one key for the spin-orbit " \
+"oscillators.")
df = self._oscillator_parsing(found[0])
self.so_oscillator = df
[docs] def parse_natural_occ(self):
# define search string for natural occupations
_renoo = "Natural orbitals and occupation numbers for root"
# define search string for energies per root
_reeng = "energy="
found = self.find(_renoo, _reeng)
if not found[_renoo]:
return
# get the occupations, roots, and energies of each root
occs = {}
roots = []
energies = []
for (nooldx, nooline), (_, engline) in zip(found[_renoo], found[_reeng]):
energy = float(engline.split('=')[-1].strip())
energies.append(energy)
root = list(map(int, re.findall(r'\d+', nooline)))
# just an error check should never actually happen
if len(root) != 1:
raise ValueError("Found more than one group of numbers where one " \
+"number should have been listed for the root. " \
+"Found line {}".format(nooline))
roots.append(root[0])
# get the natural occupations
dldx = nooldx+1
vals = {}
while self[dldx].strip() != '':
d = self[dldx].split()
if d[0] == 'sym':
arr = list(map(float, d[2:]))
symm = int(d[1].replace(':', ''))
if root[0] == 1:
occs[symm] = []
vals[symm] = []
else:
arr = list(map(float, d))
vals[symm].append(arr)
dldx += 1
for key, item in vals.items():
occs[key].append(list(np.concatenate(item)))
dfs = []
for symm, occ in occs.items():
df = pd.DataFrame(occ)
df['symmetry'] = symm
df['root'] = roots
df['energy'] = energies
df['rel_energy'] = df['energy'] - df['energy'].values.min()
dfs.append(df)
df = pd.concat(dfs, ignore_index=True)
self.natural_occ = df.sort_values(by=['symmetry', 'root']).reset_index(drop=True)
[docs] def parse_caspt2_energy(self):
# parsing strings
_resspt2 = " CASPT2 Root"
_remspt2 = " MS-CASPT2 Root"
_reref = "Reference energy:"
_reweight = "Reference weight:"
_reroot = "Compute H0 matrices for state"
_respin = "Spin quantum number"
found = self.find(_resspt2, _reref, _reweight, _reroot, _respin,
_remspt2)
if not found[_resspt2]:
return
# TODO: this needs to be verified as it assumes that there is
# only one spin/multiplicity in the system
# do not know if this is always the case
# get the spin quantum number of the system
spin = float(found[_respin][0][1].split()[-1])
# get the multiplicity
mult = np.repeat(int(2*spin + 1), len(found[_remspt2]))
sspt2 = [] # SSPT2 energies
mspt2 = [] # MSPT2 energies
reference = [] # reference energies from the wavefunction file
weights = [] # reference weight (not sure what it actually is)
roots = [] # root index
arr = zip(found[_resspt2], found[_reref], found[_reweight],
found[_reroot], found[_remspt2])
# grab all of the values we are interested in
for (_, ss), (_, ref), (_, weight), (_, root), (_, ms) in arr:
sspt2.append(float(ss.split()[-1]))
reference.append(float(ref.split()[-1]))
weights.append(float(weight.split()[-1]))
roots.append(int(re.findall(r'\d+', root)[-1]))
mspt2.append(float(ms.split()[-1]))
df = pd.DataFrame.from_dict({'sspt2_au': sspt2, 'mspt2_au': mspt2,
'ras_au': reference, 'weight': weights,
'root': roots, 'mult': mult})
self.caspt2_energy = df
def __init__(self, *args, **kwargs):
super(Output, self).__init__(*args, **kwargs)
[docs]class HDF(six.with_metaclass(HDFMeta, object)):
_getter_prefix = 'parse'
_to_universe = Editor._to_universe
[docs] def to_universe(self):
return self._to_universe()
[docs] def parse_atom(self):
ztag = 'CENTER_CHARGES'
xtag = 'CENTER_COORDINATES'
ltag = 'CENTER_LABELS'
self.meta['symmetrized'] = False
if 'DESYM_CENTER_CHARGES' in self._hdf:
self.meta['symmetrized'] = True
ztag = 'DESYM_' + ztag
xtag = 'DESYM_' + xtag
ltag = 'DESYM_' + ltag
Z = pd.Series(self._hdf[ztag]).astype(np.int64)
xyzs = np.array(self._hdf[xtag])
labs = pd.Series(self._hdf[ltag]).apply(
lambda s: s.decode('utf-8').strip())
self.atom = pd.DataFrame.from_dict({'Z': Z,
'x': xyzs[:, 0],
'y': xyzs[:, 1],
'z': xyzs[:, 2],
'center': range(len(Z)),
'symbol': Z.map(z2sym),
'label': labs,
'frame': 0})
if self.meta['symmetrized']:
symops = {'E': np.array([ 1., 1., 1.]),
'x': np.array([-1., 0., 0.]),
'y': np.array([ 0., -1., 0.]),
'z': np.array([ 0., 0., -1.]),
'xy': np.array([-1., -1., 0.]),
'xz': np.array([-1., 0., -1.]),
'yz': np.array([ 0., -1., -1.]),
'xyz': np.array([-1., -1., -1.])}
self.meta['symops'] = symops
self.atom[['tag', 'symop']] = self.atom['label'].str.extract('(.*):(.*)',
expand=True)
self.atom['tag'] = self.atom['tag'].str.strip()
self.atom['symop'] = self.atom['symop'].str.strip()
try:
self.atom['utag'] = _add_unique_tags(self.atom)
except ValueError:
pass
[docs] def parse_basis_set_order(self):
bso = np.array(self._hdf['BASIS_FUNCTION_IDS'])
df = {'center': bso[:, 0] - 1,
'shell': bso[:, 1] - 1,
'L': bso[:, 2],
'frame': 0}
if bso.shape[1] == 4:
df['ml'] = bso[:, 3]
else:
df['l'] = bso[:, 3]
df['m'] = bso[:, 4]
df['n'] = bso[:, 5]
self.basis_set_order = pd.DataFrame.from_dict(df)
[docs] def parse_orbital(self):
ens = np.array(self._hdf['MO_ENERGIES'])
self.orbital = pd.DataFrame.from_dict({
'energy': ens, 'vector': range(len(ens)),
'occupation': np.array(self._hdf['MO_OCCUPATIONS']),
'label': pd.Series(self._hdf['MO_TYPEINDICES']).apply(
lambda s: s.decode('utf-8')),
'frame': 0, 'group': 0, 'spin': 0})
[docs] def parse_overlap(self):
if 'symmetrized' not in self.meta: self.parse_atom()
key = 'AO_OVERLAP_MATRIX'
if not self.meta['symmetrized']:
self.overlap = Overlap.from_column(
_flat_square_to_triangle(np.array(self._hdf[key])))
else:
print('Symmetrized overlap indices not set correctly.')
self.overlap = Overlap.from_dict({
'coef': np.array(self._hdf[key]),
'chi0': 0, 'chi1': 0, 'frame': 0
})
[docs] def parse_basis_set(self):
if 'symmetrized' not in self.meta: self.parse_atom()
bset = np.array(self._hdf['PRIMITIVES'])
idxs = np.array(self._hdf['PRIMITIVE_IDS'])
bset = pd.DataFrame.from_dict({
'alpha': bset[:, 0], 'd': bset[:, 1],
'center': idxs[:, 0] - 1, 'L': idxs[:, 1],
'shell': idxs[:, 2] - 1
})
self.no_dup = bset
if self.meta['symmetrized']:
self.basis_set, atommap = deduplicate_basis_sets(bset)
self.atom['set'] = self.atom['center'].map(atommap)
else:
self.basis_set = bset.rename(columns={'center': 'set'})
[docs] def parse_momatrix(self):
if 'MO_VECTORS' not in self._hdf: return
coefs = np.array(self._hdf['MO_VECTORS'])
try:
symm = np.array(self._hdf['DESYM_MATRIX'])
print('Symmetry not supported on HDF yet.')
return
except KeyError:
dim = np.int64(np.sqrt(coefs.shape[0]))
self.momatrix = pd.DataFrame.from_dict({
'orbital': np.repeat(range(dim), dim), 'frame': 0,
'chi': np.tile(range(dim), dim), 'coef': coefs})
def __getitem__(self, key):
if isinstance(key, str):
return getattr(self, key)
raise KeyError()
def __init__(self, *args, **kwargs):
try:
import h5py
except ImportError:
print("You must install h5py for access to HDF5 utilities.")
return
if not os.path.isfile(args[0]):
print('Argument is likely incorrect file path.')
self._hdf = h5py.File(args[0], 'r')
self.meta = {'gaussian': True, 'program': 'molcas'}
def _add_unique_tags(atom):
"""De-duplicates atom identifier in symmetrized calcs."""
utags = []
for tag in atom['tag']:
utag = tag
while utag in utags:
utag = ''.join(filter(str.isalpha, utag)) + \
str(int(''.join(filter(str.isdigit, utag))) + 1)
utags.append(utag)
return utags
[docs]def parse_molcas(fp, momatrix=None, overlap=None, occvec=None, **kwargs):
"""
Will parse a Molcas output file. Optionally it will attempt
to parse additional information obtained from the same directory
from specified Orb files or the AO overlap matrix and density matrix.
If density keyword is specified, the momatrix keyword is ignored.
Args:
fp (str): Path to output file
momatrix (str): file name of the C matrix of interest
overlap (str): file name of the overlap matrix
occvec (str): an occupation vector
Returns:
parsed (Editor): contains many attributes similar to the
exatomic universe
"""
uni = Output(fp, **kwargs)
adir = os.sep.join(fp.split(os.sep)[:-1])
if momatrix is not None:
fp = os.sep.join([adir, momatrix])
if os.path.isfile(fp):
orb = Orb(fp)
uni.momatrix = orb.momatrix
uni.occupation_vector = orb.occupation_vector
occvec = occvec if occvec is not None else orb.occupation_vector
d = DensityMatrix.from_momatrix(orb.momatrix, occvec)
uni.density = d
else:
print('Is {} in the same directory as {}?'.format(momatrix, fp))
if overlap is not None:
fp = os.sep.join([adir, overlap])
if os.path.isfile(fp): uni.overlap = Overlap.from_file(fp)
else: print('Is {} in the same directory as {}?'.format(overlap, fp))
return uni