From c226a07d1631896853b418fac0cdf97a6b2ccc3f Mon Sep 17 00:00:00 2001 From: root Date: Tue, 11 Feb 2025 17:34:20 +0800 Subject: [PATCH 1/4] fix bug in reading abacus stru --- dpgen/auto_test/lib/abacus.py | 2 +- dpgen/generator/lib/abacus_scf.py | 195 +++--------------------------- dpgen/generator/run.py | 1 - tests/auto_test/test_abacus.py | 8 +- 4 files changed, 24 insertions(+), 182 deletions(-) diff --git a/dpgen/auto_test/lib/abacus.py b/dpgen/auto_test/lib/abacus.py index 34b53af62..a9e0a613b 100644 --- a/dpgen/auto_test/lib/abacus.py +++ b/dpgen/auto_test/lib/abacus.py @@ -4,7 +4,7 @@ import dpdata import numpy as np -from dpdata.abacus.scf import make_unlabeled_stru +from dpdata.abacus.stru import make_unlabeled_stru from dpdata.utils import uniq_atom_names from dpdata.vasp import poscar as dpdata_poscar from pymatgen.core.structure import Structure diff --git a/dpgen/generator/lib/abacus_scf.py b/dpgen/generator/lib/abacus_scf.py index 744147e88..ef3129ebc 100644 --- a/dpgen/generator/lib/abacus_scf.py +++ b/dpgen/generator/lib/abacus_scf.py @@ -3,7 +3,8 @@ import re import numpy as np -from dpdata.abacus.scf import get_cell, get_coords, get_nele_from_stru + +from dpdata.abacus.stru import get_frame_from_stru, make_unlabeled_stru from dpgen.auto_test.lib import vasp @@ -209,90 +210,19 @@ def make_abacus_scf_stru( fp_pp_files, fp_orb_files=None, fp_dpks_descriptor=None, - fp_params=None, type_map=None, pporb="", # pull all pp orb dpks files to pporb folder ): - atom_names = sys_data["atom_names"] - atom_numbs = sys_data["atom_numbs"] - if type_map is None: - type_map = atom_names - - assert len(atom_names) == len(atom_numbs), "Please check the name of atoms. " - cell = sys_data["cells"].reshape([3, 3]) - coord = sys_data["coords"].reshape([sum(atom_numbs), 3]) - # volume = np.linalg.det(cell) - # lattice_const = np.power(volume, 1/3) - - ret = "ATOMIC_SPECIES\n" - for iatom in range(len(atom_names)): - assert atom_names[iatom] in type_map, ( - "element %s is not defined in type_map" % atom_names[iatom] - ) - idx = type_map.index(atom_names[iatom]) - if "atom_masses" not in sys_data: - ret += ( - atom_names[iatom] - + " 1.00 " - + os.path.join(pporb, fp_pp_files[idx]) - + "\n" - ) - else: - ret += ( - atom_names[iatom] - + " %.3f " % sys_data["atom_masses"][iatom] - + os.path.join(pporb, fp_pp_files[idx]) - + "\n" - ) - - if fp_params is not None and "lattice_constant" in fp_params: - ret += "\nLATTICE_CONSTANT\n" - ret += ( - str(fp_params["lattice_constant"]) + "\n\n" - ) # in Bohr, in this way coord and cell are in Angstrom - else: - ret += "\nLATTICE_CONSTANT\n" - ret += str(1 / bohr2ang) + "\n\n" - - ret += "LATTICE_VECTORS\n" - for ix in range(3): - for iy in range(3): - ret += str(cell[ix][iy]) + " " - ret += "\n" - ret += "\n" - - ret += "ATOMIC_POSITIONS\n" - ret += "Cartesian # Cartesian(Unit is LATTICE_CONSTANT)\n" - # ret += "\n" - natom_tot = 0 - for iele in range(len(atom_names)): - ret += atom_names[iele] + "\n" - ret += "0.0\n" - ret += str(atom_numbs[iele]) + "\n" - for iatom in range(atom_numbs[iele]): - ret += "%.12f %.12f %.12f %d %d %d\n" % ( - coord[natom_tot, 0], - coord[natom_tot, 1], - coord[natom_tot, 2], - 1, - 1, - 1, - ) - natom_tot += 1 - assert natom_tot == sum(atom_numbs) - + # re-construct the path of files by pporb + file name + fp_pp_files = [os.path.join(pporb, i) for i in fp_pp_files] if fp_orb_files is not None: - ret += "\nNUMERICAL_ORBITAL\n" - assert len(fp_orb_files) == len(type_map) - for iatom in range(len(atom_names)): - idx = type_map.index(atom_names[iatom]) - ret += os.path.join(pporb, fp_orb_files[idx]) + "\n" - + fp_orb_files = [os.path.join(pporb, i) for i in fp_orb_files] if fp_dpks_descriptor is not None: - ret += "\nNUMERICAL_DESCRIPTOR\n" - ret += os.path.join(pporb, fp_dpks_descriptor) + "\n" - - return ret + fp_dpks_descriptor = os.path.join(pporb, fp_dpks_descriptor) + + c = make_unlabeled_stru(sys_data, 0, pp_files=fp_pp_files, numerical_orbital=fp_orb_files, numerical_descriptor=fp_dpks_descriptor) + + return c def get_abacus_input_parameters(INPUT): @@ -307,106 +237,17 @@ def get_abacus_input_parameters(INPUT): return input_parameters -def get_mass_from_STRU(geometry_inlines, atom_names): - nele = get_nele_from_stru(geometry_inlines) - assert nele > 0 - mass_list = [0 for i in atom_names] - pp_file_list = [i for i in atom_names] - for iline, line in enumerate(geometry_inlines): - if line.split() == []: - continue - if "ATOMIC_SPECIES" == line.split()[0]: - for iele1 in range(1, 1 + nele): - for iele2 in range(nele): - if geometry_inlines[iline + iele1].split()[0] == atom_names[iele2]: - mass_list[iele2] = float( - geometry_inlines[iline + iele1].split()[1] - ) - pp_file_list[iele2] = geometry_inlines[iline + iele1].split()[2] - for iele in range(len(mass_list)): - assert mass_list[iele] > 0 - return mass_list, pp_file_list - - -def get_natoms_from_stru(geometry_inlines): - key_words_list = [ - "ATOMIC_SPECIES", - "NUMERICAL_ORBITAL", - "LATTICE_CONSTANT", - "LATTICE_VECTORS", - "ATOMIC_POSITIONS", - "NUMERICAL_DESCRIPTOR", - ] - keyword_sequence = [] - keyword_line_index = [] - atom_names = [] - atom_numbs = [] - tmp_line = [] - for i in geometry_inlines: - if i.strip() != "": - tmp_line.append(i) - for iline, line in enumerate(tmp_line): - if line.split() == []: - continue - have_key_word = False - for keyword in key_words_list: - if keyword in line and keyword == line.split()[0]: - keyword_sequence.append(keyword) - keyword_line_index.append(iline) - assert len(keyword_line_index) == len(keyword_sequence) - assert len(keyword_sequence) > 0 - keyword_line_index.append(len(tmp_line)) - for idx, keyword in enumerate(keyword_sequence): - if keyword == "ATOMIC_POSITIONS": - iline = keyword_line_index[idx] + 2 - while iline < keyword_line_index[idx + 1] - 1: - atom_names.append(tmp_line[iline].split()[0]) - atom_numbs.append(int(tmp_line[iline + 2].split()[0])) - iline += 3 + atom_numbs[-1] - return atom_names, atom_numbs - - -def get_additional_from_STRU(geometry_inlines, nele): - dpks_descriptor_kw = "NUMERICAL_DESCRIPTOR" - orb_file_kw = "NUMERICAL_ORBITAL" - dpks_descriptor = None - orb_file = None - for iline in range(len(geometry_inlines)): - if len(geometry_inlines[iline]) > 0: - if orb_file_kw == geometry_inlines[iline].split()[0]: - orb_file = [] - for iele in range(nele): - orb_file.append(geometry_inlines[iline + iele + 1].strip()) - if dpks_descriptor_kw == geometry_inlines[iline].split()[0]: - dpks_descriptor = geometry_inlines[iline + 1].strip() - return orb_file, dpks_descriptor - - def get_abacus_STRU(STRU, INPUT=None, n_ele=None): # read in geometry from STRU file. n_ele is the number of elements. # Either n_ele or INPUT should be provided. - with open(STRU) as fp: - geometry_inlines = fp.read().split("\n") - for iline, line in enumerate(geometry_inlines): - if line.split() == [] or len(line) == 0: - del geometry_inlines[iline] - geometry_inlines.append("") - celldm, cell = get_cell(geometry_inlines) - atom_names, natoms, types, coords = get_coords(celldm, cell, geometry_inlines) - masses, pp_files = get_mass_from_STRU(geometry_inlines, atom_names) - orb_files, dpks_descriptor = get_additional_from_STRU(geometry_inlines, len(masses)) - data = {} - data["atom_names"] = atom_names - data["atom_numbs"] = natoms - data["atom_types"] = types - data["cells"] = cell - data["coords"] = coords - data["atom_masses"] = ( - masses # Notice that this key is not defined in dpdata system. - ) - data["pp_files"] = pp_files - data["orb_files"] = orb_files - data["dpks_descriptor"] = dpks_descriptor + data = get_frame_from_stru(STRU) + data["atom_masses"] = data.pop("masses") + data["cells"] = data.pop("cells")[0] + data["coords"] = data.pop("coords")[0] + if "orb_files" not in data: + data["orb_files"] = None + if "dpks_descriptor" not in data: + data["dpks_descriptor"] = None return data diff --git a/dpgen/generator/run.py b/dpgen/generator/run.py index 7e8bfdce6..6973d0539 100644 --- a/dpgen/generator/run.py +++ b/dpgen/generator/run.py @@ -3482,7 +3482,6 @@ def make_fp_abacus_scf(iter_index, jdata): fp_pp_files, fp_orb_files, fp_dpks_descriptor, - fp_params, type_map=jdata["type_map"], pporb=pporb_path, ) diff --git a/tests/auto_test/test_abacus.py b/tests/auto_test/test_abacus.py index 28099e821..f42614802 100644 --- a/tests/auto_test/test_abacus.py +++ b/tests/auto_test/test_abacus.py @@ -216,7 +216,7 @@ def test_make_input_file_kspacing_three_value(self): kpt = f1.read().strip().split("\n")[-1].split() self.assertEqual(kpt, ["9", "5", "3", "0", "0", "0"]) - def test_compuate(self): + def test_compute(self): ret = self.ABACUS.compute(os.path.join(self.equi_path)) self.assertIsNone(ret) shutil.copy( @@ -246,8 +246,10 @@ def compare_dict(dict1, dict2): ) else: self.assertTrue(dict1[key] == dict2[key]) - - compare_dict(ret, ret_ref.as_dict()) + compare_keys = ["atom_numbs", "atom_names", "atom_types", "cells", "coords", + 'energies', 'forces', 'virials', 'stress'] + compare_dict({k:ret["data"][k] for k in compare_keys}, + {k: ret_ref.data[k] for k in compare_keys}) class TestABACUSDeepKS(unittest.TestCase): From 6badb9b2578868af225b1618588024738060c29c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 11 Feb 2025 09:42:10 +0000 Subject: [PATCH 2/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- dpgen/generator/lib/abacus_scf.py | 13 +++++++++---- tests/auto_test/test_abacus.py | 20 ++++++++++++++++---- 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/dpgen/generator/lib/abacus_scf.py b/dpgen/generator/lib/abacus_scf.py index 65dcdced8..41e05561b 100644 --- a/dpgen/generator/lib/abacus_scf.py +++ b/dpgen/generator/lib/abacus_scf.py @@ -3,7 +3,6 @@ import re import numpy as np - from dpdata.abacus.stru import get_frame_from_stru, make_unlabeled_stru from dpgen.auto_test.lib import vasp @@ -218,9 +217,15 @@ def make_abacus_scf_stru( fp_orb_files = [os.path.join(pporb, i) for i in fp_orb_files] if fp_dpks_descriptor is not None: fp_dpks_descriptor = os.path.join(pporb, fp_dpks_descriptor) - - c = make_unlabeled_stru(sys_data, 0, pp_files=fp_pp_files, numerical_orbital=fp_orb_files, numerical_descriptor=fp_dpks_descriptor) - + + c = make_unlabeled_stru( + sys_data, + 0, + pp_files=fp_pp_files, + numerical_orbital=fp_orb_files, + numerical_descriptor=fp_dpks_descriptor, + ) + return c diff --git a/tests/auto_test/test_abacus.py b/tests/auto_test/test_abacus.py index f42614802..d4944497e 100644 --- a/tests/auto_test/test_abacus.py +++ b/tests/auto_test/test_abacus.py @@ -246,10 +246,22 @@ def compare_dict(dict1, dict2): ) else: self.assertTrue(dict1[key] == dict2[key]) - compare_keys = ["atom_numbs", "atom_names", "atom_types", "cells", "coords", - 'energies', 'forces', 'virials', 'stress'] - compare_dict({k:ret["data"][k] for k in compare_keys}, - {k: ret_ref.data[k] for k in compare_keys}) + + compare_keys = [ + "atom_numbs", + "atom_names", + "atom_types", + "cells", + "coords", + "energies", + "forces", + "virials", + "stress", + ] + compare_dict( + {k: ret["data"][k] for k in compare_keys}, + {k: ret_ref.data[k] for k in compare_keys}, + ) class TestABACUSDeepKS(unittest.TestCase): From 3fc4957e4b0fb2f21e03280094d1e2f222d604c6 Mon Sep 17 00:00:00 2001 From: root Date: Sat, 15 Feb 2025 22:00:27 +0800 Subject: [PATCH 3/4] fix ut --- dpgen/data/gen.py | 1 + dpgen/data/tools/create_random_disturb.py | 1 + dpgen/generator/lib/abacus_scf.py | 48 ++++++++++++++++++++--- tests/data/test_gen_bulk_abacus.py | 1 + 4 files changed, 46 insertions(+), 5 deletions(-) diff --git a/dpgen/data/gen.py b/dpgen/data/gen.py index 484749ed7..f0a88bcb4 100644 --- a/dpgen/data/gen.py +++ b/dpgen/data/gen.py @@ -145,6 +145,7 @@ def stru_ele(supercell_stru, stru_out, eles, natoms, jdata, path_work): dpks_descriptor_name = os.path.basename(jdata["dpks_descriptor"]) supercell_stru["atom_masses"] = jdata["atom_masses"] supercell_stru["atom_names"] = eles + supercell_stru["atom_types"] = np.array(supercell_stru["types"]) stru_text = make_abacus_scf_stru( supercell_stru, pp_file_names, diff --git a/dpgen/data/tools/create_random_disturb.py b/dpgen/data/tools/create_random_disturb.py index 957e20872..fd6e0faec 100755 --- a/dpgen/data/tools/create_random_disturb.py +++ b/dpgen/data/tools/create_random_disturb.py @@ -192,6 +192,7 @@ def create_disturbs_abacus_dev( stru = get_abacus_STRU(fin) natoms = sum(stru["atom_numbs"]) cell0 = stru["cells"] + stru["masses"] = stru["atom_masses"] # in dpdata, it is masses that should be used. # creat nfile ofmt files. for fid in range(1, nfile + 1): diff --git a/dpgen/generator/lib/abacus_scf.py b/dpgen/generator/lib/abacus_scf.py index 65dcdced8..edefeb431 100644 --- a/dpgen/generator/lib/abacus_scf.py +++ b/dpgen/generator/lib/abacus_scf.py @@ -212,6 +212,7 @@ def make_abacus_scf_stru( type_map=None, pporb="", # pull all pp orb dpks files to pporb folder ): + sys_data_copy = copy.deepcopy(sys_data) # re-construct the path of files by pporb + file name fp_pp_files = [os.path.join(pporb, i) for i in fp_pp_files] if fp_orb_files is not None: @@ -219,7 +220,16 @@ def make_abacus_scf_stru( if fp_dpks_descriptor is not None: fp_dpks_descriptor = os.path.join(pporb, fp_dpks_descriptor) - c = make_unlabeled_stru(sys_data, 0, pp_files=fp_pp_files, numerical_orbital=fp_orb_files, numerical_descriptor=fp_dpks_descriptor) + # we need to make sure that the shape of cells and coords are the same + # and if they are 2D, we need to convert them to 3D + cells = np.array(sys_data["cells"]) + coords = np.array(sys_data["coords"]) + assert len(cells.shape) == len(coords.shape), "cells and coords should have the same shape." + + if len(cells.shape) == 2: + sys_data_copy["cells"] = np.array([cells]) + sys_data_copy["coords"] = np.array([coords]) + c = make_unlabeled_stru(sys_data_copy, 0, pp_file=fp_pp_files, numerical_orbital=fp_orb_files, numerical_descriptor=fp_dpks_descriptor) return c @@ -236,13 +246,32 @@ def get_abacus_input_parameters(INPUT): return input_parameters -def get_abacus_STRU(STRU, INPUT=None, n_ele=None): - # read in geometry from STRU file. n_ele is the number of elements. - # Either n_ele or INPUT should be provided. +def get_abacus_STRU(STRU): + """Read STRU file and return a dictionary containing the structure information. + + Args: + STRU (str): The path of STRU file. + + Returns: + dict: A dictionary containing the structure information. + { + "atom_names": list of str, + "atom_numbs": list of int, + "atom_masses": list of float, + "coords": np.ndarray, + "cells": np.ndarray, + "pp_files": list of str, + "orb_files": list of str, + "dpks_descriptor": str, + } + """ data = get_frame_from_stru(STRU) data["atom_masses"] = data.pop("masses") data["cells"] = data.pop("cells")[0] data["coords"] = data.pop("coords")[0] + assert "pp_files" in data, "pp_files should be provided in STRU file." + if None in data["pp_files"]: + data["pp_files"] = None if "orb_files" not in data: data["orb_files"] = None if "dpks_descriptor" not in data: @@ -260,7 +289,16 @@ def make_supercell_abacus(from_struct, super_cell): # ) for idx_atm in from_struct["atom_types"]: new_types += [idx_atm] * super_cell[0] * super_cell[1] * super_cell[2] - to_struct["atom_types"] = new_types + to_struct["atom_types"] = np.array(new_types) + + # expand move, spins + for ikey in ["move", "spins"]: + if ikey in from_struct: + new_list = [] + for ia in range(sum(from_struct["atom_numbs"])): + new_list += [from_struct[ikey][0][ia]] * super_cell[0] * super_cell[1] * super_cell[2] + to_struct[ikey] = np.array([new_list]) + to_atom_num = ( sum(from_struct["atom_numbs"]) * super_cell[0] * super_cell[1] * super_cell[2] ) diff --git a/tests/data/test_gen_bulk_abacus.py b/tests/data/test_gen_bulk_abacus.py index 520589829..728a6964e 100644 --- a/tests/data/test_gen_bulk_abacus.py +++ b/tests/data/test_gen_bulk_abacus.py @@ -76,6 +76,7 @@ def test(self): def testSTRU(self): jdata = self.jdata jdata["from_poscar_path"] = "./Cu.STRU" + jdata["potcars"] = ["abacus.in/Cu_ONCV_PBE-1.0.upf"] make_super_cell_STRU(jdata) make_abacus_relax(jdata, {"fp_resources": {}}) make_scale_ABACUS(jdata) From ad6622cf31020dbc5c9d06984390eca830f50b12 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 15 Feb 2025 14:01:35 +0000 Subject: [PATCH 4/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- dpgen/generator/lib/abacus_scf.py | 52 ++++++++++++++++++++----------- 1 file changed, 33 insertions(+), 19 deletions(-) diff --git a/dpgen/generator/lib/abacus_scf.py b/dpgen/generator/lib/abacus_scf.py index 812d8751e..f04dbdd96 100644 --- a/dpgen/generator/lib/abacus_scf.py +++ b/dpgen/generator/lib/abacus_scf.py @@ -218,18 +218,26 @@ def make_abacus_scf_stru( fp_orb_files = [os.path.join(pporb, i) for i in fp_orb_files] if fp_dpks_descriptor is not None: fp_dpks_descriptor = os.path.join(pporb, fp_dpks_descriptor) - + # we need to make sure that the shape of cells and coords are the same # and if they are 2D, we need to convert them to 3D cells = np.array(sys_data["cells"]) coords = np.array(sys_data["coords"]) - assert len(cells.shape) == len(coords.shape), "cells and coords should have the same shape." - + assert len(cells.shape) == len(coords.shape), ( + "cells and coords should have the same shape." + ) + if len(cells.shape) == 2: sys_data_copy["cells"] = np.array([cells]) sys_data_copy["coords"] = np.array([coords]) - c = make_unlabeled_stru(sys_data_copy, 0, pp_file=fp_pp_files, numerical_orbital=fp_orb_files, numerical_descriptor=fp_dpks_descriptor) - + c = make_unlabeled_stru( + sys_data_copy, + 0, + pp_file=fp_pp_files, + numerical_orbital=fp_orb_files, + numerical_descriptor=fp_dpks_descriptor, + ) + return c @@ -251,18 +259,19 @@ def get_abacus_STRU(STRU): Args: STRU (str): The path of STRU file. - Returns: - dict: A dictionary containing the structure information. - { - "atom_names": list of str, - "atom_numbs": list of int, - "atom_masses": list of float, - "coords": np.ndarray, - "cells": np.ndarray, - "pp_files": list of str, - "orb_files": list of str, - "dpks_descriptor": str, - } + Returns + ------- + dict: A dictionary containing the structure information. + { + "atom_names": list of str, + "atom_numbs": list of int, + "atom_masses": list of float, + "coords": np.ndarray, + "cells": np.ndarray, + "pp_files": list of str, + "orb_files": list of str, + "dpks_descriptor": str, + } """ data = get_frame_from_stru(STRU) data["atom_masses"] = data.pop("masses") @@ -289,13 +298,18 @@ def make_supercell_abacus(from_struct, super_cell): for idx_atm in from_struct["atom_types"]: new_types += [idx_atm] * super_cell[0] * super_cell[1] * super_cell[2] to_struct["atom_types"] = np.array(new_types) - + # expand move, spins for ikey in ["move", "spins"]: if ikey in from_struct: new_list = [] for ia in range(sum(from_struct["atom_numbs"])): - new_list += [from_struct[ikey][0][ia]] * super_cell[0] * super_cell[1] * super_cell[2] + new_list += ( + [from_struct[ikey][0][ia]] + * super_cell[0] + * super_cell[1] + * super_cell[2] + ) to_struct[ikey] = np.array([new_list]) to_atom_num = (