{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Extraction of cluster geometries from a trajectory"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from exa.util import isotopes\n",
    "import exatomic\n",
    "from exatomic.core.two import compute_atom_two_out_of_core    # If we need to compute atom_two out of core (low RAM)\n",
    "from exatomic.algorithms.neighbors import periodic_nearest_neighbors_by_atom    # Only valid for simple cubic periodic cells\n",
    "from exatomic.base import resource # Helper function to find the path to an example file"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "xyzpath = resource(\"rh2.traj.xyz\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "xyz = exatomic.XYZ(xyzpath)    # Editor that can parse the XYZ file and build a universe!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "u = xyz.to_universe()    # Parse the XYZ data to a universe"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>size</th>\n",
       "      <th>type</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>object</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>METADATA</th>\n",
       "      <td>48</td>\n",
       "      <td>-</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>WIDGET</th>\n",
       "      <td>0</td>\n",
       "      <td>-</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>atom</th>\n",
       "      <td>985706</td>\n",
       "      <td>exatomic.core.atom.Atom</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>frame</th>\n",
       "      <td>1616</td>\n",
       "      <td>exatomic.core.frame.Frame</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            size                       type\n",
       "object                                     \n",
       "METADATA      48                          -\n",
       "WIDGET         0                          -\n",
       "atom      985706    exatomic.core.atom.Atom\n",
       "frame       1616  exatomic.core.frame.Frame"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "u.info()    # Current tables"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Add the unit cell dimensions to the frame table of the universe\n",
    "a = 37.08946302\n",
    "for i, q in enumerate((\"x\", \"y\", \"z\")):\n",
    "    for j, r in enumerate((\"i\", \"j\", \"k\")):\n",
    "        if i == j:\n",
    "            u.frame[q+r] = a\n",
    "        else:\n",
    "            u.frame[q+r] = 0.0\n",
    "    u.frame[\"o\"+q] = 0.0\n",
    "u.frame['periodic'] = True"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "101"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(u)    # Number of steps in this trajectory (small for example purposes)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2.3621574999999995"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "isotopes.Rh.radius    # Default Rh (covalent) radius"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### To extract clusters from the trajectory, we don't need to pre-compute atom_two"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>symbol</th>\n",
       "      <th>x</th>\n",
       "      <th>y</th>\n",
       "      <th>z</th>\n",
       "      <th>frame</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>atom</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Rh</td>\n",
       "      <td>19.3251</td>\n",
       "      <td>20.99490</td>\n",
       "      <td>12.9253</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Rh</td>\n",
       "      <td>20.8605</td>\n",
       "      <td>19.40470</td>\n",
       "      <td>17.0475</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Cl</td>\n",
       "      <td>32.7903</td>\n",
       "      <td>9.48909</td>\n",
       "      <td>27.4356</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Cl</td>\n",
       "      <td>27.2017</td>\n",
       "      <td>10.59920</td>\n",
       "      <td>26.4768</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Cl</td>\n",
       "      <td>39.1224</td>\n",
       "      <td>25.75620</td>\n",
       "      <td>13.6518</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "DataFrame(5, 5)"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "u.atom.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "FloatProgress(value=0.0, description='Slicing:')"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 2min 20s, sys: 2min 35s, total: 4min 56s\n",
      "Wall time: 5min 20s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "dct = periodic_nearest_neighbors_by_atom(u,       # Universe with all frames from which we want to extract clusters\n",
    "                                         \"Rh\",    # Source atom from which we will search for neighbors\n",
    "                                         a,       # Cubic cell dimension\n",
    "                                         # Cluster sizes we want\n",
    "                                         [0, 1, 2, 3, 4, 8, 12, 16],\n",
    "                                         # Additional arguments to be passed to the atomic two body calculation\n",
    "                                         # Note that it is critical to increase dmax (in compute_atom_two)!!!\n",
    "                                         Rh=2.6, dmax=a/2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dict_keys(['nearest', 0, 1, 2, 3, 4, 8, 12, 16])"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dct.keys()    # 'dct' is a dictionary containing our clustered universes as well as the sorted neighbor list (per frame)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>frame</th>\n",
       "      <th>idx</th>\n",
       "      <th>two</th>\n",
       "      <th>atom</th>\n",
       "      <th>molecule</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>18</td>\n",
       "      <td>421055</td>\n",
       "      <td>5186</td>\n",
       "      <td>890</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td>26</td>\n",
       "      <td>420722</td>\n",
       "      <td>4930</td>\n",
       "      <td>879</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "      <td>30</td>\n",
       "      <td>420997</td>\n",
       "      <td>5083</td>\n",
       "      <td>859</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>34</td>\n",
       "      <td>420801</td>\n",
       "      <td>5090</td>\n",
       "      <td>857</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>35</td>\n",
       "      <td>420834</td>\n",
       "      <td>5155</td>\n",
       "      <td>881</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   frame  idx     two  atom  molecule\n",
       "0      0   18  421055  5186       890\n",
       "1      0   26  420722  4930       879\n",
       "2      0   30  420997  5083       859\n",
       "3      0   34  420801  5090       857\n",
       "4      0   35  420834  5155       881"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dct['nearest'].head()    # Table of sorted indexes of nearest molecules to \"Rh\" in each frame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "exatomic.UniverseWidget(dct[0])    # Just the analyte"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "exatomic.UniverseWidget(dct[16])   # View the universe with 16 nearest molecules"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### If we had wanted to view the entire universe..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 3.17 s, sys: 281 ms, total: 3.45 s\n",
      "Wall time: 3.51 s\n"
     ]
    }
   ],
   "source": [
    "%time u.compute_atom_two(Rh=2.6)    # Compute atom two body data with slightly larger Rh radius\n",
    "# OR\n",
    "#%time compute_atom_two_out_of_core(\"atom_two.hdf\", u, a, Rh=2.6)      # Writes the two body data to \"atom_two.hdf\" with keys \"frame_{index}/atom_two\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "10.53761100769043"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "u.memory_usage().sum()/1024**2    # RAM usage (MB)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "exatomic.UniverseWidget(u)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}