Skip to content

Commit bd42e6a

Browse files
Refactor the reading and writing of abacus/stru format (#793)
Refactor the codes to read and write ABACUS/STRU, and move the functions in a single file abaucs/stru.py Now, now using dpdata.system to read an ABACUS STRU will also return below informations in data dict: ``` { "masses": list of atomic masses, "pp_files", list of pseudo potential files, "orb_files", list of orbital files, "dpks_descriptor": the deepks descriptor file, } ``` And, these information can also be written to a new STRU file automatically. Later, I will based on this commit to fix the bug in dpgen deepmodeling/dpgen#1711 <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit - **New Features** - Introduced a dedicated module for structure file handling, enhancing the parsing and conversion of lattice, species, and coordinate data. - **Refactor** - Streamlined data extraction processes for simulation and relaxation workflows, reducing redundant operations and improving error clarity. - Updated plugin methods to leverage the enhanced structure processing functions for improved efficiency. - **Tests** - Improved test setups and cleanups, ensuring consistent handling of structure files and robust validation of the new parsing logic. <!-- end of auto-generated comment: release notes by coderabbit.ai --> --------- Co-authored-by: root <pxlxingliang> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 0af5e66 commit bd42e6a

File tree

8 files changed

+881
-768
lines changed

8 files changed

+881
-768
lines changed

dpdata/abacus/md.py

Lines changed: 11 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,11 @@
99

1010
from .scf import (
1111
bohr2ang,
12-
get_cell,
13-
get_coords,
1412
get_geometry_in,
1513
get_mag_force,
1614
kbar2evperang3,
1715
)
16+
from .stru import get_frame_from_stru
1817

1918
# Read in geometries from an ABACUS MD trajectory.
2019
# The atomic coordinates are read in from generated files in OUT.XXXX.
@@ -164,12 +163,12 @@ def get_frame(fname):
164163
geometry_path_in = get_geometry_in(fname, inlines) # base dir of STRU
165164
path_out = get_path_out(fname, inlines)
166165

167-
with open_file(geometry_path_in) as fp:
168-
geometry_inlines = fp.read().split("\n")
169-
celldm, cell = get_cell(geometry_inlines)
170-
atom_names, natoms, types, coords, move, magmom = get_coords(
171-
celldm, cell, geometry_inlines, inlines
172-
)
166+
data = get_frame_from_stru(geometry_path_in)
167+
natoms = data["atom_numbs"]
168+
# should remove spins from STRU file
169+
if "spins" in data:
170+
data.pop("spins")
171+
173172
# This coords is not to be used.
174173
dump_freq = get_coord_dump_freq(inlines=inlines)
175174
# ndump = int(os.popen("ls -l %s | grep 'md_pos_' | wc -l" %path_out).readlines()[0])
@@ -203,10 +202,6 @@ def get_frame(fname):
203202

204203
magmom, magforce = get_mag_force(outlines)
205204

206-
data = {}
207-
data["atom_names"] = atom_names
208-
data["atom_numbs"] = natoms
209-
data["atom_types"] = types
210205
data["cells"] = cells
211206
# for idx in range(ndump):
212207
# data['cells'][:, :, :] = cell
@@ -221,7 +216,9 @@ def get_frame(fname):
221216
data["spins"] = magmom
222217
if len(magforce) > 0:
223218
data["force_mags"] = magforce
224-
if len(move) > 0:
225-
data["move"] = move
219+
220+
# need to expand the move.
221+
if "move" in data:
222+
data["move"] = [data["move"][0] for i in range(ndump)]
226223

227224
return data

dpdata/abacus/relax.py

Lines changed: 11 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,11 @@
1010
bohr2ang,
1111
collect_force,
1212
collect_stress,
13-
get_cell,
14-
get_coords,
1513
get_geometry_in,
1614
get_mag_force,
1715
kbar2evperang3,
1816
)
17+
from .stru import get_frame_from_stru
1918

2019
# Read in geometries from an ABACUS RELAX(CELL-RELAX) trajectory in OUT.XXXX/runnning_relax/cell-relax.log.
2120

@@ -47,7 +46,7 @@ def get_coords_from_log(loglines, natoms):
4746
natoms_log += int(line.split()[-1])
4847

4948
assert natoms_log > 0 and natoms_log == natoms, (
50-
"ERROR: detected atom number in log file is %d" % natoms # noqa: UP031
49+
f"ERROR: detected atom number in log file is {natoms_log}, while the atom number in STRU file is {natoms}"
5150
)
5251

5352
energy = []
@@ -180,31 +179,22 @@ def get_frame(fname):
180179
with open_file(path_in) as fp:
181180
inlines = fp.read().split("\n")
182181
geometry_path_in = get_geometry_in(fname, inlines) # base dir of STRU
183-
with open_file(geometry_path_in) as fp:
184-
geometry_inlines = fp.read().split("\n")
185-
celldm, cell = get_cell(geometry_inlines)
186-
atom_names, natoms, types, coord_tmp, move, magmom = get_coords(
187-
celldm, cell, geometry_inlines, inlines
188-
)
182+
183+
data = get_frame_from_stru(geometry_path_in)
184+
natoms = sum(data["atom_numbs"])
185+
# should remove spins from STRU file
186+
if "spins" in data:
187+
data.pop("spins")
189188

190189
logf = get_log_file(fname, inlines)
191190
assert os.path.isfile(logf), f"Error: can not find {logf}"
192191
with open_file(logf) as f1:
193192
lines = f1.readlines()
194193

195-
atomnumber = 0
196-
for i in natoms:
197-
atomnumber += i
198-
energy, cells, coords, force, stress, virial = get_coords_from_log(
199-
lines, atomnumber
200-
)
194+
energy, cells, coords, force, stress, virial = get_coords_from_log(lines, natoms)
201195

202196
magmom, magforce = get_mag_force(lines)
203197

204-
data = {}
205-
data["atom_names"] = atom_names
206-
data["atom_numbs"] = natoms
207-
data["atom_types"] = types
208198
data["cells"] = cells
209199
data["coords"] = coords
210200
data["energies"] = energy
@@ -218,7 +208,7 @@ def get_frame(fname):
218208
data["spins"] = magmom
219209
if len(magforce) > 0:
220210
data["force_mags"] = magforce
221-
if len(move) > 0:
222-
data["move"] = move
211+
if "move" in data:
212+
data["move"] = [data["move"][0] for i in range(len(data["energies"]))]
223213

224214
return data

0 commit comments

Comments
 (0)