diff --git a/doc/source/tutorials.rst b/doc/source/tutorials.rst index e31586a..4c1394e 100644 --- a/doc/source/tutorials.rst +++ b/doc/source/tutorials.rst @@ -6,6 +6,7 @@ Tutorials ./tutorials/dimer_lattice_tutorial ./tutorials/analysis_tutorial/analysis_tutorial + ./tutorials/numba_tutorial/numba_tutorial ./tutorials/ethanol-water-tutorial/ethanol_water_mixture .. grid:: 2 @@ -14,18 +15,26 @@ Tutorials :link: ./tutorials/dimer_lattice_tutorial :link-type: doc - Use Snapshot to create and modify LAMMPS data files, - and convert them to HOOMD-blue's GSD format. + Create a LAMMPS data file and also convert it to HOOMD-blue's GSD + format. .. grid-item-card:: Analyzing a Lennard-Jones Fluid :link: ./tutorials/analysis_tutorial/analysis_tutorial :link-type: doc - Read and analyze a LAMMPS dump file to study the Lennard-Jones fluid - using the freud Python package. + Read a LAMMPS dump file for a Lennard-Jones fluid and analyze it using + ``freud``. .. grid-item-card:: Initializing an Ethanol-Water Mixture :link: ./tutorials/ethanol-water-tutorial/ethanol_water_mixture :link-type: doc - Use Snapshot to create a LAMMPS data file for an ethanol-water mixture. + Create a LAMMPS data file for an atomistic simulation of ethanol and + water using PACKMOL and LAMMPS molecule templates. + + .. grid-item-card:: Speeding Up Analysis + :link: ./tutorials/numba_tutorial/numba_tutorial + :link-type: doc + + Accelerate the analysis of a LAMMPS dump file for a linear polymer using + NumPy and Numba. diff --git a/doc/source/tutorials/numba_tutorial/init.data b/doc/source/tutorials/numba_tutorial/init.data new file mode 100644 index 0000000..88047a6 --- /dev/null +++ b/doc/source/tutorials/numba_tutorial/init.data @@ -0,0 +1,218 @@ +LAMMPS init.data + +100 atoms +99 bonds +1 atom types +1 bond types +0.0 200.0 xlo xhi +0.0 200.0 ylo yhi +0.0 200.0 zlo zhi + +Masses + + 1 1.0 + +Atoms # molecular + +1 0 1 41.07031216 98.97420857 107.59355066 +2 0 1 42.07031216 98.97420857 107.59355066 +3 0 1 43.07031216 98.97420857 107.59355066 +4 0 1 44.07031216 98.97420857 107.59355066 +5 0 1 45.07031216 98.97420857 107.59355066 +6 0 1 46.07031216 98.97420857 107.59355066 +7 0 1 47.07031216 98.97420857 107.59355066 +8 0 1 48.07031216 98.97420857 107.59355066 +9 0 1 49.07031216 98.97420857 107.59355066 +10 0 1 50.07031216 98.97420857 107.59355066 +11 0 1 51.07031216 98.97420857 107.59355066 +12 0 1 52.07031216 98.97420857 107.59355066 +13 0 1 53.07031216 98.97420857 107.59355066 +14 0 1 54.07031216 98.97420857 107.59355066 +15 0 1 55.07031216 98.97420857 107.59355066 +16 0 1 56.07031216 98.97420857 107.59355066 +17 0 1 57.07031216 98.97420857 107.59355066 +18 0 1 58.07031216 98.97420857 107.59355066 +19 0 1 59.07031216 98.97420857 107.59355066 +20 0 1 60.07031216 98.97420857 107.59355066 +21 0 1 61.07031216 98.97420857 107.59355066 +22 0 1 62.07031216 98.97420857 107.59355066 +23 0 1 63.07031216 98.97420857 107.59355066 +24 0 1 64.07031216 98.97420857 107.59355066 +25 0 1 65.07031216 98.97420857 107.59355066 +26 0 1 66.07031216 98.97420857 107.59355066 +27 0 1 67.07031216 98.97420857 107.59355066 +28 0 1 68.07031216 98.97420857 107.59355066 +29 0 1 69.07031216 98.97420857 107.59355066 +30 0 1 70.07031216 98.97420857 107.59355066 +31 0 1 71.07031216 98.97420857 107.59355066 +32 0 1 72.07031216 98.97420857 107.59355066 +33 0 1 73.07031216 98.97420857 107.59355066 +34 0 1 74.07031216 98.97420857 107.59355066 +35 0 1 75.07031216 98.97420857 107.59355066 +36 0 1 76.07031216 98.97420857 107.59355066 +37 0 1 77.07031216 98.97420857 107.59355066 +38 0 1 78.07031216 98.97420857 107.59355066 +39 0 1 79.07031216 98.97420857 107.59355066 +40 0 1 80.07031216 98.97420857 107.59355066 +41 0 1 81.07031216 98.97420857 107.59355066 +42 0 1 82.07031216 98.97420857 107.59355066 +43 0 1 83.07031216 98.97420857 107.59355066 +44 0 1 84.07031216 98.97420857 107.59355066 +45 0 1 85.07031216 98.97420857 107.59355066 +46 0 1 86.07031216 98.97420857 107.59355066 +47 0 1 87.07031216 98.97420857 107.59355066 +48 0 1 88.07031216 98.97420857 107.59355066 +49 0 1 89.07031216 98.97420857 107.59355066 +50 0 1 90.07031216 98.97420857 107.59355066 +51 0 1 91.07031216 98.97420857 107.59355066 +52 0 1 92.07031216 98.97420857 107.59355066 +53 0 1 93.07031216 98.97420857 107.59355066 +54 0 1 94.07031216 98.97420857 107.59355066 +55 0 1 95.07031216 98.97420857 107.59355066 +56 0 1 96.07031216 98.97420857 107.59355066 +57 0 1 97.07031216 98.97420857 107.59355066 +58 0 1 98.07031216 98.97420857 107.59355066 +59 0 1 99.07031216 98.97420857 107.59355066 +60 0 1 100.07031216 98.97420857 107.59355066 +61 0 1 101.07031216 98.97420857 107.59355066 +62 0 1 102.07031216 98.97420857 107.59355066 +63 0 1 103.07031216 98.97420857 107.59355066 +64 0 1 104.07031216 98.97420857 107.59355066 +65 0 1 105.07031216 98.97420857 107.59355066 +66 0 1 106.07031216 98.97420857 107.59355066 +67 0 1 107.07031216 98.97420857 107.59355066 +68 0 1 108.07031216 98.97420857 107.59355066 +69 0 1 109.07031216 98.97420857 107.59355066 +70 0 1 110.07031216 98.97420857 107.59355066 +71 0 1 111.07031216 98.97420857 107.59355066 +72 0 1 112.07031216 98.97420857 107.59355066 +73 0 1 113.07031216 98.97420857 107.59355066 +74 0 1 114.07031216 98.97420857 107.59355066 +75 0 1 115.07031216 98.97420857 107.59355066 +76 0 1 116.07031216 98.97420857 107.59355066 +77 0 1 117.07031216 98.97420857 107.59355066 +78 0 1 118.07031216 98.97420857 107.59355066 +79 0 1 119.07031216 98.97420857 107.59355066 +80 0 1 120.07031216 98.97420857 107.59355066 +81 0 1 121.07031216 98.97420857 107.59355066 +82 0 1 122.07031216 98.97420857 107.59355066 +83 0 1 123.07031216 98.97420857 107.59355066 +84 0 1 124.07031216 98.97420857 107.59355066 +85 0 1 125.07031216 98.97420857 107.59355066 +86 0 1 126.07031216 98.97420857 107.59355066 +87 0 1 127.07031216 98.97420857 107.59355066 +88 0 1 128.07031216 98.97420857 107.59355066 +89 0 1 129.07031216 98.97420857 107.59355066 +90 0 1 130.07031216 98.97420857 107.59355066 +91 0 1 131.07031216 98.97420857 107.59355066 +92 0 1 132.07031216 98.97420857 107.59355066 +93 0 1 133.07031216 98.97420857 107.59355066 +94 0 1 134.07031216 98.97420857 107.59355066 +95 0 1 135.07031216 98.97420857 107.59355066 +96 0 1 136.07031216 98.97420857 107.59355066 +97 0 1 137.07031216 98.97420857 107.59355066 +98 0 1 138.07031216 98.97420857 107.59355066 +99 0 1 139.07031216 98.97420857 107.59355066 +100 0 1 140.07031216 98.97420857 107.59355066 + +Bonds + +1 1 1 2 +2 1 2 3 +3 1 3 4 +4 1 4 5 +5 1 5 6 +6 1 6 7 +7 1 7 8 +8 1 8 9 +9 1 9 10 +10 1 10 11 +11 1 11 12 +12 1 12 13 +13 1 13 14 +14 1 14 15 +15 1 15 16 +16 1 16 17 +17 1 17 18 +18 1 18 19 +19 1 19 20 +20 1 20 21 +21 1 21 22 +22 1 22 23 +23 1 23 24 +24 1 24 25 +25 1 25 26 +26 1 26 27 +27 1 27 28 +28 1 28 29 +29 1 29 30 +30 1 30 31 +31 1 31 32 +32 1 32 33 +33 1 33 34 +34 1 34 35 +35 1 35 36 +36 1 36 37 +37 1 37 38 +38 1 38 39 +39 1 39 40 +40 1 40 41 +41 1 41 42 +42 1 42 43 +43 1 43 44 +44 1 44 45 +45 1 45 46 +46 1 46 47 +47 1 47 48 +48 1 48 49 +49 1 49 50 +50 1 50 51 +51 1 51 52 +52 1 52 53 +53 1 53 54 +54 1 54 55 +55 1 55 56 +56 1 56 57 +57 1 57 58 +58 1 58 59 +59 1 59 60 +60 1 60 61 +61 1 61 62 +62 1 62 63 +63 1 63 64 +64 1 64 65 +65 1 65 66 +66 1 66 67 +67 1 67 68 +68 1 68 69 +69 1 69 70 +70 1 70 71 +71 1 71 72 +72 1 72 73 +73 1 73 74 +74 1 74 75 +75 1 75 76 +76 1 76 77 +77 1 77 78 +78 1 78 79 +79 1 79 80 +80 1 80 81 +81 1 81 82 +82 1 82 83 +83 1 83 84 +84 1 84 85 +85 1 85 86 +86 1 86 87 +87 1 87 88 +88 1 88 89 +89 1 89 90 +90 1 90 91 +91 1 91 92 +92 1 92 93 +93 1 93 94 +94 1 94 95 +95 1 95 96 +96 1 96 97 +97 1 97 98 +98 1 98 99 +99 1 99 100 diff --git a/doc/source/tutorials/numba_tutorial/lammps_input.in b/doc/source/tutorials/numba_tutorial/lammps_input.in new file mode 100644 index 0000000..7e721cf --- /dev/null +++ b/doc/source/tutorials/numba_tutorial/lammps_input.in @@ -0,0 +1,29 @@ +# Initialize simulation +units lj +atom_style molecular +boundary p p p + +# Read data file +read_data init.data + +# Neighbor list settings +neighbor 0.4 bin + +# Pair potential (LJ with cutoff at 2^(1/6)) +pair_style lj/cut 1.122462048309373 +pair_coeff 1 1 1.0 1.0 1.122462048309373 + +# Bond potential (FENE) +bond_style fene +bond_coeff 1 30.0 1.5 1.0 1.0 +special_bonds fene + +# Integrator settings +timestep 0.005 +fix 1 all nvt temp 1.0 1.0 1.0 +fix 2 all langevin 1.0 1.0 1.0 5 + +# Run simulation +run 10000 +dump 2 all custom 1000 traj.lammpstrj id type x y z ix iy iz vx vy vz +run 100000 diff --git a/doc/source/tutorials/numba_tutorial/numba_tutorial.ipynb b/doc/source/tutorials/numba_tutorial/numba_tutorial.ipynb new file mode 100644 index 0000000..f6a68a9 --- /dev/null +++ b/doc/source/tutorials/numba_tutorial/numba_tutorial.ipynb @@ -0,0 +1,277 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "334b4e2a", + "metadata": {}, + "source": [ + "# Speeding Up Analysis\n", + "\n", + "In this tutorial, we'll explore how to speed up analysis of LAMMPS trajectory data with `lammpsio`. We will analyze a dump file from a simulation of a single 100-bead linear polymer represented using the Kremer–Grest model. All particles interact pairwise through the Weeks-Chandler-Anderson potential:\n", + "\n", + "$$\n", + "u_{\\rm WCA}(r) = 4 \\varepsilon \\left[ \\left(\\frac{\\sigma}{r} \\right)^{12} - \\left(\\frac{\\sigma}{r}\\right)^6 + \\frac{1}{4} \\right], \\quad r \\le 2^{1/6} \\sigma.\n", + "$$\n", + "\n", + "Additionally, bonded particles interact via the FENE bond potential:\n", + "\n", + "$$\n", + "u_{\\rm FENE}(r) = \n", + "-\\dfrac{1}{2} k R_0^2 \\ln\\left[1 - \\left(\\dfrac{r}{R_0}\\right)^2\\right], \\quad r < R_0.\n", + "$$\n", + "\n", + "We've chosen standard model parameters of $\\sigma=1$, $\\varepsilon=1$, $R_0=1.5$, and $k=30$ in LAMMPS lj units.\n", + "\n", + "We've included the [LAMMPS input](lammps_input.in) and [initial configuration](init.data) files if you want to run the simulation yourself and follow along. Note that to keep compute time short, we have reduced the length of the trajectory. Averages computed from this trajectory are probably not reliable, but you can extend the simulation if you'd like to improve them!\n", + "\n", + "First, we import `lammpsio` and load the corresponding dump file." + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "0bebf3ef", + "metadata": {}, + "outputs": [], + "source": [ + "import lammpsio\n", + "import numpy\n", + "import numba\n", + "\n", + "\n", + "traj = lammpsio.DumpFile(\"traj.lammpstrj\")" + ] + }, + { + "cell_type": "markdown", + "id": "cd9d0f34", + "metadata": {}, + "source": [ + "## Calculating the radius of gyration\n", + "\n", + "We will calculate the radius of gyration $R_g$, a key measure of polymer size:\n", + "\n", + "$$\n", + "R_g^2 = \n", + "\\frac{1}{N}\\sum_{i=1}^{N}|\\mathbf{R}_i-\\mathbf{R}_{\\rm cm}|^2\n", + "$$\n", + "\n", + "where $\\mathbf{R}_i$ is the position vector of particle *i*, *N* is the number of beads ($N=100$ for our polymer), and $\\mathbf{R}_{\\rm cm}$ is the position vector of the center of mass of the polymer:\n", + "\n", + "$$\n", + "\\mathbf{R}_{cm} = \\frac{1}{N} \\sum_{i=1}^{N} \\mathbf{R}_i,\n", + "$$\n", + "\n", + "Note that the position vectors of the particles should all be unwrapped to account for the periodic boundary conditions. `lammpsio` makes it easy to extract this data from LAMMPS dump files!\n", + "\n", + "## Literal implementation\n", + "\n", + "We'll start by calculating $R_g^2$ with a literal Python implementation that uses loops." + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "2077d4ab", + "metadata": {}, + "outputs": [], + "source": [ + "def compute_rg(pos):\n", + " N = pos.shape[0]\n", + "\n", + " # Compute center of mass\n", + " rcm = numpy.zeros(3)\n", + " for i in range(N):\n", + " rcm += pos[i]\n", + " rcm /= N\n", + " \n", + " # Compute radius of gyration squared\n", + " rg_sqr = 0\n", + " for i in range(N):\n", + " dr = pos[i] - rcm\n", + " rg_sqr += dr[0]**2 + dr[1]**2 + dr[2]**2\n", + " rg_sqr /= N\n", + "\n", + " return rg_sqr" + ] + }, + { + "cell_type": "markdown", + "id": "51c199a4", + "metadata": {}, + "source": [ + "We will time how long it takes to evaluate $R_g^2$ over all configurations in the trajectory." + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "9b2011c7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "40.2 ms ± 920 μs per loop (mean ± std. dev. of 3 runs, 100 loops each)\n" + ] + } + ], + "source": [ + "%%timeit -n 100 -r 3\n", + "rg_sqr = []\n", + "for snapshot in traj:\n", + " pos = snapshot.position + 2 * snapshot.box.high[0] * snapshot.image\n", + " rg_sqr.append(compute_rg(pos))" + ] + }, + { + "cell_type": "markdown", + "id": "f2e6f1f3", + "metadata": {}, + "source": [ + "It is known that loops are usually quite slow in Python. Next, we'll look at ways to speed things up using either NumPy or Numba." + ] + }, + { + "cell_type": "markdown", + "id": "98677061", + "metadata": {}, + "source": [ + "## NumPy\n", + "\n", + "One approach we can take is to use NumPy's vectorized array operations. Let's replace the first loop with `numpy.mean` and the second loop with a combination of `numpy.sum` to get the squared distance from the center of mass for each particle and `numpy.mean` to average this quantity over all particles." + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "29dc3b86", + "metadata": {}, + "outputs": [], + "source": [ + "def compute_rg_numpy(pos):\n", + " rcm = numpy.mean(pos, axis=0)\n", + " rg_sqr = numpy.mean(numpy.sum((pos - rcm)**2, axis=1))\n", + " return rg_sqr" + ] + }, + { + "cell_type": "markdown", + "id": "bc53efbc", + "metadata": {}, + "source": [ + "Now we repeat the same timing." + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "3aa845c2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "30.3 ms ± 300 μs per loop (mean ± std. dev. of 3 runs, 100 loops each)\n" + ] + } + ], + "source": [ + "%%timeit -n 100 -r 3\n", + "rg_sqr = []\n", + "for snapshot in traj: \n", + " pos = snapshot.position + 2 * snapshot.box.high[0] * snapshot.image\n", + " rg_sqr.append(compute_rg_numpy(pos))" + ] + }, + { + "cell_type": "markdown", + "id": "11cfd8c3", + "metadata": {}, + "source": [ + "The vectorized NumPy approach is significantly faster than the literal implementation!\n", + "\n", + "## Numba\n", + "\n", + "Alternatively, we can use just-in-time (JIT) compilation with [numba](https://numba.readthedocs.io/en/stable/index.html) to speed up our calculations. We'll take the `compute_rg` function from our first implementation, then use `numba.njit` to enable JIT compilation. If you were writing the function from scratch, you could put the `@numba.njit` decorator on your function instead." + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "eb3d264b", + "metadata": {}, + "outputs": [], + "source": [ + "compute_rg_numba = numba.njit(compute_rg)" + ] + }, + { + "cell_type": "markdown", + "id": "5cc37451", + "metadata": {}, + "source": [ + "Now we'll go ahead with timing! Note that the first time `compute_rg_numba` gets called will be slower than subsequent calls. We don't worry about separating that out here because we repeat the timing many times." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "959fd50b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "29.8 ms ± 448 μs per loop (mean ± std. dev. of 3 runs, 100 loops each)\n" + ] + } + ], + "source": [ + "%%timeit -n 100 -r 3\n", + "rg_sqr = []\n", + "for snapshot in traj:\n", + " pos = snapshot.position + 2 * snapshot.box.high[0] * snapshot.image\n", + " rg_sqr.append(compute_rg_numba(pos))" + ] + }, + { + "cell_type": "markdown", + "id": "d56a8c5e", + "metadata": {}, + "source": [ + "This approach gives a similar speed up as using NumPy, but we didn't need to rewrite anything!\n", + "\n", + "## Summary\n", + "\n", + "`lammpsio` makes it simple to load and analyze LAMMPS dump files in Python. As shown above, taking advantage of the Python ecosystem can dramatically speed up your analysis! NumPy and Numba are just two examples that provide significant performance gains. There are many other tools that `lammpsio` can interface with to optimize your specific workflows!\n", + "\n", + "Disclaimer: the timings above are for our workstation. Your mileage may vary based on your system specifications." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "lammpsio", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/src/lammpsio/dump.py b/src/lammpsio/dump.py index eda6826..ab04189 100644 --- a/src/lammpsio/dump.py +++ b/src/lammpsio/dump.py @@ -119,6 +119,21 @@ class DumpFile: for snapshot in traj: print(snapshot.step) + You can also use multiprocessing to read snapshots in parallel: + + .. code-block:: python + + import multiprocessing + + def process_snapshot(snapshot): + return snapshot.N + + if __name__ == '__main__': + num_workers = max(2, multiprocessing.cpu_count()) + with multiprocessing.Pool(num_workers) as p: + num_particles = p.map(process_snapshot, traj) + print(num_particles) + You can also get the number of snapshots in the `DumpFile`, but this does require reading the entire file: use with caution!