Source code for exatomic.qe.cp.dynamics

# -*- coding: utf-8 -*-
# Copyright (c) 2015-2022, Exa Analytics Development Team
# Distributed under the terms of the Apache License 2.0
"""
QE cp.x Molecular Dynamics
###############################
Functionality related to parsing inputs and outputs generated when running QE's
cp.x module. Data files that come from dynamics include evp, for, pos, etc.
Since these files are CSV-like files, and typically large, CSV reading functions,
rather than :class:`~exatomic.exa.core.editor.Editor` objects are used to parse in the
data.
"""
import re, bz2
from six import StringIO
import pandas as pd
import numpy as np
import numba as nb
from exatomic.exa import Editor
from exatomic.exa.util.units import Length
from exatomic.base import nbpll


[docs]@nb.jit(nopython=True, nogil=True, parallel=nbpll) def construct_fdx(fdxs, size): n = len(fdxs)*size frame = np.empty((n, ), dtype=np.int64) k = 0 for fdx in fdxs: for _ in range(size): frame[k] = fdx k += 1 return frame
[docs]def parse_symbols_from_input(path): """ The only way to get the symbols, in the correct order, is by parsing them from an input file. """ inp = Editor(path) found = inp.regex("atomic_positions", "nat", flags=re.I) nat = int(found['nat'][0][1].split("=")[-1]) start = found['atomic_positions'][0][0] length = "Angstrom" if "bohr" in inp[start].lower(): length = "au" xyz = pd.read_csv(StringIO("\n".join(inp[start+1:start+1+nat])), names=("symbol", "x", "y", "z"), delim_whitespace=True) for q in ("x", "y", "z"): xyz[q] = Length[length, 'au']*xyz[q].astype(float) return xyz
[docs]def parse_evp(path, symbols, columns=None, **kwargs): """ Parse in the EVP file using pandas. If the ``columns`` argument is None, this function attempts to determine if the column names are present otherwise uses defaults. """ dw = kwargs.pop("delim_whitespace", True) skiprows = kwargs.pop("skiprows", []) def parser(columns, skiprows): df = pd.read_csv(path, names=columns, skiprows=skiprows, delim_whitespace=dw, **kwargs) df.drop_duplicates(columns[0], keep="last", inplace=True) df.dropna(how="all", axis=1, inplace=True) return df if path.endswith("bz2"): opener = bz2.open else: opener = open if columns is None: with opener(path) as f: try: first = f.readline().decode("utf-8") except: first = f.readline() if first.strip().startswith("#"): columns = first.split()[1:] skiprows = [0] else: columns = list(range(13)) skiprows = [] return parser(columns, skiprows)
[docs]def parse_xyz(path, symbols, columns=("x", "y", "z"), **kwargs): """ Parse XYZ-like files, pos, vel, for, using pandas. Warning: In certain cases using ``pandas.read_fwf`` may work better. """ # Parse in the data using pandas dw = kwargs.pop("delim_whitespace", True) df = pd.read_csv(path, delim_whitespace=dw, names=columns, **kwargs) # The first line contains the frame number, isolate it fdxs = df.loc[df[columns[-1]].isnull(), columns[0]] nat = int(fdxs.index[1] - fdxs.index[0] - 1) # And remove those lines from the xyz-like data df.dropna(how="any", inplace=True) # Construct the frame index df['frame'] = construct_fdx(fdxs.values.astype(int), nat) df['frame'] = df['frame'].astype("category") # and label so that we can deduplicate the data df['label'] = list(range(len(symbols)))*len(fdxs) df['label'] = df['label'].astype("category") # Drop duplicated data (typically due to simulation errors) df.drop_duplicates(["frame", "label"], keep="last", inplace=True) # Cleanup and add symbols del df['label'] df['symbol'] = symbols*len(df['frame'].unique()) df['symbol'] = df['symbol'].astype("category") df.reset_index(drop=True, inplace=True) return df