diff --git a/dpgen/auto_test/lib/abacus.py b/dpgen/auto_test/lib/abacus.py index 6d097dbaa..05ebd5cc1 100644 --- a/dpgen/auto_test/lib/abacus.py +++ b/dpgen/auto_test/lib/abacus.py @@ -4,7 +4,7 @@ import dpdata import numpy as np -from dpdata.abacus.scf import make_unlabeled_stru +from dpdata.abacus.stru import make_unlabeled_stru from dpdata.utils import uniq_atom_names from dpdata.vasp import poscar as dpdata_poscar diff --git a/dpgen/data/gen.py b/dpgen/data/gen.py index 484749ed7..f0a88bcb4 100644 --- a/dpgen/data/gen.py +++ b/dpgen/data/gen.py @@ -145,6 +145,7 @@ def stru_ele(supercell_stru, stru_out, eles, natoms, jdata, path_work): dpks_descriptor_name = os.path.basename(jdata["dpks_descriptor"]) supercell_stru["atom_masses"] = jdata["atom_masses"] supercell_stru["atom_names"] = eles + supercell_stru["atom_types"] = np.array(supercell_stru["types"]) stru_text = make_abacus_scf_stru( supercell_stru, pp_file_names, diff --git a/dpgen/data/tools/create_random_disturb.py b/dpgen/data/tools/create_random_disturb.py index 957e20872..fd6e0faec 100755 --- a/dpgen/data/tools/create_random_disturb.py +++ b/dpgen/data/tools/create_random_disturb.py @@ -192,6 +192,7 @@ def create_disturbs_abacus_dev( stru = get_abacus_STRU(fin) natoms = sum(stru["atom_numbs"]) cell0 = stru["cells"] + stru["masses"] = stru["atom_masses"] # in dpdata, it is masses that should be used. # creat nfile ofmt files. for fid in range(1, nfile + 1): diff --git a/dpgen/generator/lib/abacus_scf.py b/dpgen/generator/lib/abacus_scf.py index 96262620e..f04dbdd96 100644 --- a/dpgen/generator/lib/abacus_scf.py +++ b/dpgen/generator/lib/abacus_scf.py @@ -3,7 +3,7 @@ import re import numpy as np -from dpdata.abacus.scf import get_cell, get_coords, get_nele_from_stru +from dpdata.abacus.stru import get_frame_from_stru, make_unlabeled_stru from dpgen.auto_test.lib import vasp @@ -208,90 +208,37 @@ def make_abacus_scf_stru( fp_pp_files, fp_orb_files=None, fp_dpks_descriptor=None, - fp_params=None, type_map=None, pporb="", # pull all pp orb dpks files to pporb folder ): - atom_names = sys_data["atom_names"] - atom_numbs = sys_data["atom_numbs"] - if type_map is None: - type_map = atom_names - - assert len(atom_names) == len(atom_numbs), "Please check the name of atoms. " - cell = sys_data["cells"].reshape([3, 3]) - coord = sys_data["coords"].reshape([sum(atom_numbs), 3]) - # volume = np.linalg.det(cell) - # lattice_const = np.power(volume, 1/3) - - ret = "ATOMIC_SPECIES\n" - for iatom in range(len(atom_names)): - assert atom_names[iatom] in type_map, ( - f"element {atom_names[iatom]} is not defined in type_map" - ) - idx = type_map.index(atom_names[iatom]) - if "atom_masses" not in sys_data: - ret += ( - atom_names[iatom] - + " 1.00 " - + os.path.join(pporb, fp_pp_files[idx]) - + "\n" - ) - else: - ret += ( - atom_names[iatom] - + " {:.3f} ".format(sys_data["atom_masses"][iatom]) - + os.path.join(pporb, fp_pp_files[idx]) - + "\n" - ) - - if fp_params is not None and "lattice_constant" in fp_params: - ret += "\nLATTICE_CONSTANT\n" - ret += ( - str(fp_params["lattice_constant"]) + "\n\n" - ) # in Bohr, in this way coord and cell are in Angstrom - else: - ret += "\nLATTICE_CONSTANT\n" - ret += str(1 / bohr2ang) + "\n\n" - - ret += "LATTICE_VECTORS\n" - for ix in range(3): - for iy in range(3): - ret += str(cell[ix][iy]) + " " - ret += "\n" - ret += "\n" - - ret += "ATOMIC_POSITIONS\n" - ret += "Cartesian # Cartesian(Unit is LATTICE_CONSTANT)\n" - # ret += "\n" - natom_tot = 0 - for iele in range(len(atom_names)): - ret += atom_names[iele] + "\n" - ret += "0.0\n" - ret += str(atom_numbs[iele]) + "\n" - for iatom in range(atom_numbs[iele]): - ret += "%.12f %.12f %.12f %d %d %d\n" % ( # noqa: UP031 - coord[natom_tot, 0], - coord[natom_tot, 1], - coord[natom_tot, 2], - 1, - 1, - 1, - ) - natom_tot += 1 - assert natom_tot == sum(atom_numbs) - + sys_data_copy = copy.deepcopy(sys_data) + # re-construct the path of files by pporb + file name + fp_pp_files = [os.path.join(pporb, i) for i in fp_pp_files] if fp_orb_files is not None: - ret += "\nNUMERICAL_ORBITAL\n" - assert len(fp_orb_files) == len(type_map) - for iatom in range(len(atom_names)): - idx = type_map.index(atom_names[iatom]) - ret += os.path.join(pporb, fp_orb_files[idx]) + "\n" - + fp_orb_files = [os.path.join(pporb, i) for i in fp_orb_files] if fp_dpks_descriptor is not None: - ret += "\nNUMERICAL_DESCRIPTOR\n" - ret += os.path.join(pporb, fp_dpks_descriptor) + "\n" + fp_dpks_descriptor = os.path.join(pporb, fp_dpks_descriptor) + + # we need to make sure that the shape of cells and coords are the same + # and if they are 2D, we need to convert them to 3D + cells = np.array(sys_data["cells"]) + coords = np.array(sys_data["coords"]) + assert len(cells.shape) == len(coords.shape), ( + "cells and coords should have the same shape." + ) - return ret + if len(cells.shape) == 2: + sys_data_copy["cells"] = np.array([cells]) + sys_data_copy["coords"] = np.array([coords]) + c = make_unlabeled_stru( + sys_data_copy, + 0, + pp_file=fp_pp_files, + numerical_orbital=fp_orb_files, + numerical_descriptor=fp_dpks_descriptor, + ) + + return c def get_abacus_input_parameters(INPUT): @@ -306,106 +253,37 @@ def get_abacus_input_parameters(INPUT): return input_parameters -def get_mass_from_STRU(geometry_inlines, atom_names): - nele = get_nele_from_stru(geometry_inlines) - assert nele > 0 - mass_list = [0 for i in atom_names] - pp_file_list = [i for i in atom_names] - for iline, line in enumerate(geometry_inlines): - if line.split() == []: - continue - if "ATOMIC_SPECIES" == line.split()[0]: - for iele1 in range(1, 1 + nele): - for iele2 in range(nele): - if geometry_inlines[iline + iele1].split()[0] == atom_names[iele2]: - mass_list[iele2] = float( - geometry_inlines[iline + iele1].split()[1] - ) - pp_file_list[iele2] = geometry_inlines[iline + iele1].split()[2] - for iele in range(len(mass_list)): - assert mass_list[iele] > 0 - return mass_list, pp_file_list - - -def get_natoms_from_stru(geometry_inlines): - key_words_list = [ - "ATOMIC_SPECIES", - "NUMERICAL_ORBITAL", - "LATTICE_CONSTANT", - "LATTICE_VECTORS", - "ATOMIC_POSITIONS", - "NUMERICAL_DESCRIPTOR", - ] - keyword_sequence = [] - keyword_line_index = [] - atom_names = [] - atom_numbs = [] - tmp_line = [] - for i in geometry_inlines: - if i.strip() != "": - tmp_line.append(i) - for iline, line in enumerate(tmp_line): - if line.split() == []: - continue - have_key_word = False - for keyword in key_words_list: - if keyword in line and keyword == line.split()[0]: - keyword_sequence.append(keyword) - keyword_line_index.append(iline) - assert len(keyword_line_index) == len(keyword_sequence) - assert len(keyword_sequence) > 0 - keyword_line_index.append(len(tmp_line)) - for idx, keyword in enumerate(keyword_sequence): - if keyword == "ATOMIC_POSITIONS": - iline = keyword_line_index[idx] + 2 - while iline < keyword_line_index[idx + 1] - 1: - atom_names.append(tmp_line[iline].split()[0]) - atom_numbs.append(int(tmp_line[iline + 2].split()[0])) - iline += 3 + atom_numbs[-1] - return atom_names, atom_numbs - - -def get_additional_from_STRU(geometry_inlines, nele): - dpks_descriptor_kw = "NUMERICAL_DESCRIPTOR" - orb_file_kw = "NUMERICAL_ORBITAL" - dpks_descriptor = None - orb_file = None - for iline in range(len(geometry_inlines)): - if len(geometry_inlines[iline]) > 0: - if orb_file_kw == geometry_inlines[iline].split()[0]: - orb_file = [] - for iele in range(nele): - orb_file.append(geometry_inlines[iline + iele + 1].strip()) - if dpks_descriptor_kw == geometry_inlines[iline].split()[0]: - dpks_descriptor = geometry_inlines[iline + 1].strip() - return orb_file, dpks_descriptor - - -def get_abacus_STRU(STRU, INPUT=None, n_ele=None): - # read in geometry from STRU file. n_ele is the number of elements. - # Either n_ele or INPUT should be provided. - with open(STRU) as fp: - geometry_inlines = fp.read().split("\n") - for iline, line in enumerate(geometry_inlines): - if line.split() == [] or len(line) == 0: - del geometry_inlines[iline] - geometry_inlines.append("") - celldm, cell = get_cell(geometry_inlines) - atom_names, natoms, types, coords = get_coords(celldm, cell, geometry_inlines) - masses, pp_files = get_mass_from_STRU(geometry_inlines, atom_names) - orb_files, dpks_descriptor = get_additional_from_STRU(geometry_inlines, len(masses)) - data = {} - data["atom_names"] = atom_names - data["atom_numbs"] = natoms - data["atom_types"] = types - data["cells"] = cell - data["coords"] = coords - data["atom_masses"] = ( - masses # Notice that this key is not defined in dpdata system. - ) - data["pp_files"] = pp_files - data["orb_files"] = orb_files - data["dpks_descriptor"] = dpks_descriptor +def get_abacus_STRU(STRU): + """Read STRU file and return a dictionary containing the structure information. + + Args: + STRU (str): The path of STRU file. + + Returns + ------- + dict: A dictionary containing the structure information. + { + "atom_names": list of str, + "atom_numbs": list of int, + "atom_masses": list of float, + "coords": np.ndarray, + "cells": np.ndarray, + "pp_files": list of str, + "orb_files": list of str, + "dpks_descriptor": str, + } + """ + data = get_frame_from_stru(STRU) + data["atom_masses"] = data.pop("masses") + data["cells"] = data.pop("cells")[0] + data["coords"] = data.pop("coords")[0] + assert "pp_files" in data, "pp_files should be provided in STRU file." + if None in data["pp_files"]: + data["pp_files"] = None + if "orb_files" not in data: + data["orb_files"] = None + if "dpks_descriptor" not in data: + data["dpks_descriptor"] = None return data @@ -419,7 +297,21 @@ def make_supercell_abacus(from_struct, super_cell): # ) for idx_atm in from_struct["atom_types"]: new_types += [idx_atm] * super_cell[0] * super_cell[1] * super_cell[2] - to_struct["atom_types"] = new_types + to_struct["atom_types"] = np.array(new_types) + + # expand move, spins + for ikey in ["move", "spins"]: + if ikey in from_struct: + new_list = [] + for ia in range(sum(from_struct["atom_numbs"])): + new_list += ( + [from_struct[ikey][0][ia]] + * super_cell[0] + * super_cell[1] + * super_cell[2] + ) + to_struct[ikey] = np.array([new_list]) + to_atom_num = ( sum(from_struct["atom_numbs"]) * super_cell[0] * super_cell[1] * super_cell[2] ) diff --git a/dpgen/generator/run.py b/dpgen/generator/run.py index 8cacb0e87..0f6953721 100644 --- a/dpgen/generator/run.py +++ b/dpgen/generator/run.py @@ -3569,7 +3569,6 @@ def make_fp_abacus_scf(iter_index, jdata): fp_pp_files, fp_orb_files, fp_dpks_descriptor, - fp_params, type_map=jdata["type_map"], pporb=pporb_path, ) diff --git a/tests/auto_test/test_abacus.py b/tests/auto_test/test_abacus.py index 28099e821..d4944497e 100644 --- a/tests/auto_test/test_abacus.py +++ b/tests/auto_test/test_abacus.py @@ -216,7 +216,7 @@ def test_make_input_file_kspacing_three_value(self): kpt = f1.read().strip().split("\n")[-1].split() self.assertEqual(kpt, ["9", "5", "3", "0", "0", "0"]) - def test_compuate(self): + def test_compute(self): ret = self.ABACUS.compute(os.path.join(self.equi_path)) self.assertIsNone(ret) shutil.copy( @@ -247,7 +247,21 @@ def compare_dict(dict1, dict2): else: self.assertTrue(dict1[key] == dict2[key]) - compare_dict(ret, ret_ref.as_dict()) + compare_keys = [ + "atom_numbs", + "atom_names", + "atom_types", + "cells", + "coords", + "energies", + "forces", + "virials", + "stress", + ] + compare_dict( + {k: ret["data"][k] for k in compare_keys}, + {k: ret_ref.data[k] for k in compare_keys}, + ) class TestABACUSDeepKS(unittest.TestCase): diff --git a/tests/data/test_gen_bulk_abacus.py b/tests/data/test_gen_bulk_abacus.py index 520589829..728a6964e 100644 --- a/tests/data/test_gen_bulk_abacus.py +++ b/tests/data/test_gen_bulk_abacus.py @@ -76,6 +76,7 @@ def test(self): def testSTRU(self): jdata = self.jdata jdata["from_poscar_path"] = "./Cu.STRU" + jdata["potcars"] = ["abacus.in/Cu_ONCV_PBE-1.0.upf"] make_super_cell_STRU(jdata) make_abacus_relax(jdata, {"fp_resources": {}}) make_scale_ABACUS(jdata)