# -*- coding: utf-8 -*-
# Copyright (c) 2015-2022, Exa Analytics Development Team
# Distributed under the terms of the Apache License 2.0
"""
Basis Set Representations
##############################
This module provides classes that support representations of various basis sets.
There are a handful of basis sets in computational chemistry, the most common of
which are Gaussian type functions, Slater type functions, and plane waves. The
classes provided by this module support not only storage of basis set data, but
also analytical and discrete manipulations of the basis set.
See Also:
For symbolic and discrete manipulations see :mod:`~exatomic.algorithms.basis`.
"""
import os, six
import numpy as np
import pandas as pd
from io import StringIO
from exatomic.exa import DataFrame
from exatomic.algorithms.basis import cart_lml_count, spher_lml_count
from exatomic.algorithms.numerical import _tri_indices, _square, Shell
[docs]class BasisSet(DataFrame):
"""
Stores information about a basis set. Common basis set types in use for
electronic structure calculations usually consist of Gaussians or Slater
Type Orbitals (STOs). Both types usually employ atom-centered basis functions,
where each basis function resides on a given atom with coordinates
:math:`\\left(A_{x}, A_{y}, A_{z}\\right)`. For Gaussian basis sets, the
functional form of :math:`f\\left(x, y, z\\right)` is:
.. math::
r^{2} = \\left(x - A_{x}\\right)^{2} + \\left(x - A_{y}\\right)^{2} + \\left(z - A_{z}\\right)^{2} \\\\
f\\left(x, y, z\\right) = \\left(x - A_{x}\\right)^{l}\\left(x - A_{y}\\right)^{m}\\left(z - A_{z}\\right)^{n}e^{-\\alpha r^{2}}
where :math:`l`, :math:`m`, and :math:`n` are not quantum numbers but positive
integers (including zero) whose sum defines the orbital angular momentum of
each function and :math:`alpha` governs the exponential decay of the given
function. Gaussian basis functions are usually constructed from multiple
primitive Gaussians, with fixed contraction coefficients. Therefore, a basis
function consists of the sum of one or more primitive functions:
.. math::
g_{i}\\left(x, y, z\\right) = \\sum_{j=1}^{N_{i}}c_{ij}f_{ij}\\left(x, y, z\\right)
Alternatively, STOs are usually not constructed from linear combinations
of multiple primitives, and differ from Gaussian type functions in that they
do not contain an exponent in the :math:`r` term of the exponential decay.
These functions have 2 main benefits; an adequate description of the cusp
in the density at the nucleus, and the appropriate long-range decay behavior.
+-------------------+----------+-------------------------------------------+
| Column | Type | Description |
+===================+==========+===========================================+
| alpha | float | exponent |
+-------------------+----------+-------------------------------------------+
| shell | int | group of primitives |
+-------------------+----------+-------------------------------------------+
| set | int/cat | unique basis set identifier |
+-------------------+----------+-------------------------------------------+
| d | float | contraction coefficient |
+-------------------+----------+-------------------------------------------+
| L | int | orbital angular momentum |
+-------------------+----------+-------------------------------------------+
"""
_columns = ['alpha', 'd', 'shell', 'L', 'set']
_cardinal = ('frame', np.int64)
_index = 'function'
_categories = {'L': np.int64, 'set': np.int64, 'frame': np.int64, 'norm': str}
_indexes = ['set', 'L']
@property
def lmax(self):
return self['L'].cat.as_ordered().max()
[docs] def shells(self, program='', spherical=True, gaussian=True):
"""
Generate a multi-index series of :class:`~exatomic.algorithms.numerical.Shell`
in the basis set, indexed by set and L.
Args:
program (str): which code the basis set comes from
spherical (bool): expand in ml or cartesian powers
gaussian (bool): exponential dependence of basis functions
Returns:
srs (pd.Series): multi-indexed by set and L
"""
self.spherical_by_shell(program, spherical)
grps = self.groupby(self._indexes)
shells = []
for (seht, L), grp in grps:
alphas = grp['alpha'].unique()
piv = grp.pivot_table(index='alpha', columns='shell', values='d').loc[alphas].fillna(0.)
args = piv.values.flatten(), alphas, *piv.shape, L, grp['norm'].values[0], gaussian
shell = Shell(*args, None, None) if gaussian else Shell(*args, grp['r'].values, grp['n'].values)
shells.append((seht, L, shell))
return pd.DataFrame(shells, columns=self._indexes + [0])
[docs] def spherical_by_shell(self, program, spherical=True):
"""Allows for some flexibility in treating shells either as
cartesian functions or spherical functions (different normalizations).
Args:
program (str): which code the basis set comes from
"""
self['L'] = self['L'].astype(np.int64)
if program in ['molcas', 'nwchem']:
self['norm'] = self['L'].apply(lambda L: L > 1)
else:
self['norm'] = spherical
self['L'] = self['L'].astype('category')
[docs] def functions_by_shell(self):
"""Return a series of n functions per (set, L).
This does not include degenerate functions."""
obj = self._revert_categories(inplace=False)
obj = obj.groupby(self._indexes)['shell'].nunique()
obj.index.set_names(self._indexes, inplace=True)
return obj
[docs] def primitives_by_shell(self):
"""Return a series of n primitives per (set, L).
This does not include degenerate primitives."""
obj = self._revert_categories(inplace=False)
obj = obj.groupby(self._indexes)['alpha'].nunique()
obj.index.set_names(self._indexes, inplace=True)
return obj
[docs] def functions(self, spherical):
"""Return a series of n functions per (set, L).
This does include degenerate functions."""
self._revert_categories()
if spherical:
mapper = lambda x: spher_lml_count[x]
else:
mapper = lambda x: cart_lml_count[x]
n = self.functions_by_shell()
ret = n * n.index.get_level_values('L').map(mapper)
self._set_categories()
return ret.astype(int)
[docs] def primitives(self, spherical):
"""Return a series of n primitives per (set, L).
This does include degenerate primitives."""
self._revert_categories()
if spherical:
mapper = lambda x: spher_lml_count[x]
else:
mapper = lambda x: cart_lml_count[x]
n = self.primitives_by_shell()
ret = n * n.index.get_level_values('L').map(mapper)
self._set_categories()
return ret.astype(int)
[docs]def deduplicate_basis_sets(sets, sp=False):
"""Deduplicate identical basis sets on different centers.
Args:
sets (pd.DataFrame): non-unique basis sets
sp (bool): Whether or not to call _expand_sp (gaussian program only)
Returns:
tup (tuple): deduplicated basis sets and basis set map for atom table
"""
unique, setmap, cnt = [], {}, 0
sets = sets.groupby('center')
chk = ['alpha', 'd']
for center, seht in sets:
for i, other in enumerate(unique):
if other.shape != seht.shape: continue
if np.allclose(other[chk], seht[chk]):
setmap[center] = i
break
else:
unique.append(seht)
setmap[center] = cnt
cnt += 1
if sp: unique = _expand_sp(unique)
sets = pd.concat(unique, sort=False).reset_index(drop=True) # sort=False silences warning
try: sets.drop([2, 3], axis=1, inplace=True)
except (KeyError, ValueError): pass
sets.rename(columns={'center': 'set'}, inplace=True)
sets['set'] = sets['set'].map(setmap)
sets['frame'] = 0
return sets, setmap
def _expand_sp(unique):
"""Currently only used when 'program' == 'gaussian'."""
expand = []
for seht in unique:
if np.isnan(seht[2]).sum() == seht.shape[0]:
expand.append(seht)
continue
sps = seht[2][~np.isnan(seht[2])].index
shls = len(seht.loc[sps]['shell'].unique())
dupl = seht.loc[sps[0]:sps[-1]].copy()
dupl[1] = dupl[2]
dupl['L'] = 1
dupl['shell'] += shls
last = seht.loc[sps[-1] + 1:].copy()
last['shell'] += shls
expand.append(pd.concat([seht.loc[:sps[0] - 1],
seht.loc[sps[0]:sps[-1]],
dupl, last], sort=False)) # Silences warning
return expand
[docs]class BasisSetOrder(DataFrame):
"""
BasisSetOrder uniquely determines the basis function ordering scheme for
a given :class:`~exatomic.core.universe.Universe`. This table is provided to
make transparent the characteristic ordering scheme of various quantum
codes. Either (L, ml) or (l, m, n) must be provided to have access to
orbital visualization functionality.
+-------------------+----------+-------------------------------------------+
| Column | Type | Description |
+===================+==========+===========================================+
| center | int | atomic center |
+-------------------+----------+-------------------------------------------+
| L | int | orbital angular momentum |
+-------------------+----------+-------------------------------------------+
| shell | int | group of primitives |
+-------------------+----------+-------------------------------------------+
| ml | int | magnetic quantum number |
+-------------------+----------+-------------------------------------------+
| l | int | power in x |
+-------------------+----------+-------------------------------------------+
| m | int | power in y |
+-------------------+----------+-------------------------------------------+
| n | int | power in z |
+-------------------+----------+-------------------------------------------+
| r | int | power in r (optional - for STOs) |
+-------------------+----------+-------------------------------------------+
| prefac | float | prefactor (optional - for STOs) |
+-------------------+----------+-------------------------------------------+
"""
_columns = ['center', 'L']
_index = 'chi'
_cardinal = ('frame', np.int64)
_categories = {'L': np.int64}
[docs]class Overlap(DataFrame):
"""
Overlap enumerates the overlap matrix elements between basis functions in
a contracted basis set. Currently nothing disambiguates between the
primitive overlap matrix and the contracted overlap matrix. As it is
square symmetric, only n_basis_functions * (n_basis_functions + 1) / 2
rows are stored.
See Gramian matrix for more on the general properties of the overlap matrix.
+-------------------+----------+-------------------------------------------+
| Column | Type | Description |
+===================+==========+===========================================+
| frame | int/cat | non-unique integer |
+-------------------+----------+-------------------------------------------+
| chi0 | int | first basis function |
+-------------------+----------+-------------------------------------------+
| chi1 | int | second basis function |
+-------------------+----------+-------------------------------------------+
| coef | float | overlap matrix element |
+-------------------+----------+-------------------------------------------+
"""
_columns = ['chi0', 'chi1', 'coef', 'frame']
_index = 'index'
[docs] def square(self, frame=0, column='coef', mocoefs=None, irrep=None):
"""Return a 'square' matrix DataFrame of the Overlap.
Args:
column (str): column of coefficients to reshape
mocoefs (str): alias for `column`
frame (int): default 0
irrep (int): irreducible representation if symmetrized
"""
if mocoefs is not None: column = mocoefs
if 'irrep' in self.columns:
if irrep is None:
irreps, i, j = self.groupby('irrep'), 0, 0
norb = (irreps.chi0.max() + 1).sum()
nchi = (irreps.chi1.max() + 1).sum()
cmat = np.zeros((nchi, norb))
for irrep, grp in irreps:
piv = grp.pivot('chi0', 'chi1', column)
ii, jj = piv.shape
cmat[i : i + ii, j : j + jj] = piv.values
i += ii
j += jj
idx = pd.Index(range(nchi), name='chi0')
orb = pd.Index(range(norb), name='chi1')
return pd.DataFrame(cmat, index=idx, columns=orb)
return self.groupby('irrep').get_group(irrep
).pivot('chi', 'orbital', column)
sq = _square(self[column].values)
idx = pd.Index(range(sq.shape[0]), name='chi0')
orb = pd.Index(range(sq.shape[1]), name='chi1')
return pd.DataFrame(sq, index=idx, columns=orb)
[docs] @classmethod
def from_column(cls, source):
"""Create an Overlap from a file with just the array of coefficients or
an array of the values directly."""
# Assuming source is a file of triangular elements of the overlap matrix
if isinstance(source, np.ndarray):
vals = source
elif isinstance(source, six.string_types):
if os.sep not in source: source = StringIO(source)
vals = pd.read_csv(source, header=None).values.flatten()
else:
# Without a catchall, _tri_indices may through UnboundLocalError
raise TypeError("Invalid type for source: {}".format(type(source)))
chi0, chi1 = _tri_indices(vals)
return cls(pd.DataFrame.from_dict({'chi0': chi0,
'chi1': chi1,
'coef': vals,
'frame': 0}))
[docs] @classmethod
def from_square(cls, df):
ndim = df.shape[0]
try: arr = df.values
except AttributeError: arr = df
arlen = ndim * (ndim + 1) // 2
ret = np.empty((arlen,), dtype=[('chi0', 'i8'),
('chi1', 'i8'),
('coef', 'f8'),
('frame', 'i8')])
cnt = 0
for i in range(ndim):
for j in range(i + 1):
ret[cnt] = (i, j, arr[i, j], 0)
cnt += 1
return cls(ret)