# -*- coding: utf-8 -*-
# Copyright (c) 2015-2022, Exa Analytics Development Team
# Distributed under the terms of the Apache License 2.0
"""
QE cp.x Molecular Dynamics
###############################
Functionality related to parsing inputs and outputs generated when running QE's
cp.x module. Data files that come from dynamics include evp, for, pos, etc.
Since these files are CSV-like files, and typically large, CSV reading functions,
rather than :class:`~exatomic.exa.core.editor.Editor` objects are used to parse in the
data.
"""
import re, bz2
from six import StringIO
import pandas as pd
import numpy as np
import numba as nb
from exatomic.exa import Editor
from exatomic.exa.util.units import Length
from exatomic.base import nbpll
[docs]@nb.jit(nopython=True, nogil=True, parallel=nbpll)
def construct_fdx(fdxs, size):
n = len(fdxs)*size
frame = np.empty((n, ), dtype=np.int64)
k = 0
for fdx in fdxs:
for _ in range(size):
frame[k] = fdx
k += 1
return frame
[docs]def parse_evp(path, symbols, columns=None, **kwargs):
"""
Parse in the EVP file using pandas.
If the ``columns`` argument is None, this function attempts to
determine if the column names are present otherwise uses defaults.
"""
dw = kwargs.pop("delim_whitespace", True)
skiprows = kwargs.pop("skiprows", [])
def parser(columns, skiprows):
df = pd.read_csv(path, names=columns, skiprows=skiprows,
delim_whitespace=dw, **kwargs)
df.drop_duplicates(columns[0], keep="last", inplace=True)
df.dropna(how="all", axis=1, inplace=True)
return df
if path.endswith("bz2"):
opener = bz2.open
else:
opener = open
if columns is None:
with opener(path) as f:
try:
first = f.readline().decode("utf-8")
except:
first = f.readline()
if first.strip().startswith("#"):
columns = first.split()[1:]
skiprows = [0]
else:
columns = list(range(13))
skiprows = []
return parser(columns, skiprows)
[docs]def parse_xyz(path, symbols, columns=("x", "y", "z"), **kwargs):
"""
Parse XYZ-like files, pos, vel, for, using pandas.
Warning:
In certain cases using ``pandas.read_fwf`` may work better.
"""
# Parse in the data using pandas
dw = kwargs.pop("delim_whitespace", True)
df = pd.read_csv(path, delim_whitespace=dw, names=columns, **kwargs)
# The first line contains the frame number, isolate it
fdxs = df.loc[df[columns[-1]].isnull(), columns[0]]
nat = int(fdxs.index[1] - fdxs.index[0] - 1)
# And remove those lines from the xyz-like data
df.dropna(how="any", inplace=True)
# Construct the frame index
df['frame'] = construct_fdx(fdxs.values.astype(int), nat)
df['frame'] = df['frame'].astype("category")
# and label so that we can deduplicate the data
df['label'] = list(range(len(symbols)))*len(fdxs)
df['label'] = df['label'].astype("category")
# Drop duplicated data (typically due to simulation errors)
df.drop_duplicates(["frame", "label"], keep="last", inplace=True)
# Cleanup and add symbols
del df['label']
df['symbol'] = symbols*len(df['frame'].unique())
df['symbol'] = df['symbol'].astype("category")
df.reset_index(drop=True, inplace=True)
return df