Skip to content

Commit 6041506

Browse files
committed
Update closest_site_info to be robust for very very weird structure inputs (e.g. mp-674158, mp-1208561)
1 parent 1a28d56 commit 6041506

12 files changed

+111
-76
lines changed

doped/core.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2175,7 +2175,7 @@ def __eq__(self, other) -> bool:
21752175
loose ``stol`` used in ``pymatgen-analysis-defects``) and
21762176
much more efficient.
21772177
"""
2178-
if not isinstance(other, (self, core.Defect)):
2178+
if not isinstance(other, (type(self), core.Defect)):
21792179
raise TypeError("Can only compare `Defect`s with `Defect`s!")
21802180

21812181
if self.defect_type != other.defect_type:

doped/generation.py

+86-57
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
calculations.
44
"""
55

6+
import contextlib
67
import copy
78
import logging
89
import operator
@@ -28,7 +29,7 @@
2829
from pymatgen.core import IStructure, Structure
2930
from pymatgen.core.composition import Composition, Element
3031
from pymatgen.core.periodic_table import DummySpecies
31-
from pymatgen.core.structure import PeriodicSite
32+
from pymatgen.core.structure import PeriodicNeighbor, PeriodicSite
3233
from pymatgen.entries.computed_entries import ComputedStructureEntry
3334
from pymatgen.transformations.advanced_transformations import CubicSupercellTransformation
3435
from pymatgen.util.typing import PathLike
@@ -199,31 +200,25 @@ def closest_site_info(
199200
be at least 0.02 Å further away than the n-1th site.
200201
"""
201202
if isinstance(defect_entry_or_defect, (DefectEntry, thermo.DefectEntry)):
202-
defect = defect_entry_or_defect.defect
203-
# use defect_supercell_site if attribute exists, otherwise use sc_defect_frac_coords:
204-
defect_supercell_site = parsing._get_defect_supercell_site(defect_entry_or_defect)
205-
defect_supercell = parsing._get_defect_supercell(defect_entry_or_defect)
203+
site = None
204+
with contextlib.suppress(Exception):
205+
defect = defect_entry_or_defect.defect
206+
site = defect.site
207+
structure = defect.structure
208+
if site is None:
209+
# use defect_supercell_site if attribute exists, otherwise use sc_defect_frac_coords:
210+
site = parsing._get_defect_supercell_site(defect_entry_or_defect)
211+
structure = parsing._get_bulk_supercell(defect_entry_or_defect)
206212

207213
elif isinstance(defect_entry_or_defect, (Defect, core.Defect)):
208214
if isinstance(defect_entry_or_defect, core.Defect):
209215
defect = doped_defect_from_pmg_defect(defect_entry_or_defect) # convert to doped Defect
210216
else:
211217
defect = defect_entry_or_defect
212218

213-
req_sc_mat = np.eye(3) * np.ceil((5 * np.sqrt(n)) / min(defect.defect_structure.lattice.abc))
214-
if np.all(req_sc_mat == np.eye(3)): # just defect is fine
215-
defect_supercell = defect.defect_structure
216-
defect_supercell_site = defect.site
219+
site = defect.site
220+
structure = defect.structure
217221

218-
else:
219-
(
220-
defect_supercell,
221-
defect_supercell_site,
222-
_equivalent_supercell_sites,
223-
) = defect.get_supercell_structure(
224-
sc_mat=req_sc_mat,
225-
return_sites=True,
226-
)
227222
else:
228223
raise TypeError(
229224
f"defect_entry_or_defect must be a DefectEntry or Defect object, not "
@@ -233,43 +228,68 @@ def closest_site_info(
233228
if element_list is None:
234229
element_list = _get_element_list(defect)
235230

236-
distance_matrix = defect_supercell.lattice.get_all_distances(
237-
defect_supercell.frac_coords,
238-
defect_supercell_site.frac_coords,
239-
)
240-
if distance_matrix.shape[1] == 1: # Check if it is (X, 1)
241-
distance_matrix = distance_matrix.ravel()
242-
243-
# ensure the defect site itself is excluded, and ignore sites further than 5*sqrt(n) Å away
244-
possible_close_site_indices = np.where((distance_matrix > 0.05) & (distance_matrix < 5 * np.sqrt(n)))[
245-
0
246-
]
247-
248-
site_distances = sorted( # Could make this faster using caching if it was becoming a bottleneck
249-
[
250-
(
251-
distance_matrix[i],
252-
defect_supercell.sites[i].specie.symbol,
253-
)
254-
for i in possible_close_site_indices
255-
],
256-
key=lambda x: (symmetry._custom_round(x[0], 2), _list_index_or_val(element_list, x[1]), x[1]),
257-
)
258-
259-
# prune site_distances to remove any tuples with distances within 0.02 Å of the previous entry:
260-
site_distances = [
261-
site_distances[i]
262-
for i in range(len(site_distances))
263-
if i == 0
264-
or abs(site_distances[i][0] - site_distances[i - 1][0]) > 0.02
265-
or site_distances[i][1] != site_distances[i - 1][1]
266-
]
231+
def _get_site_distances_and_symbols(
232+
site: PeriodicSite,
233+
structure: Structure,
234+
n: int,
235+
element_list: list[str],
236+
dist_tol_prefactor: float = 3.0,
237+
):
238+
"""
239+
Get a list of sorted tuples of (distance, element) for the closest
240+
sites to the input site in the input structure, and the last used
241+
``dist_tol_prefactor``.
242+
243+
Dynamically increases ``dist_tol_prefactor`` until at least one other
244+
site is found within the distance tolerance. Function defined and used
245+
here to allow dynamic upscaling of the distance tolerance for weird
246+
structures (e.g. mp-674158, mp-1208561).
247+
"""
248+
neighbours: list[PeriodicNeighbor] = []
249+
while not neighbours: # for efficiency, ignore sites further than dist_tol*sqrt(n) Å away:
250+
neighbours_w_site_itself = structure.get_sites_in_sphere(
251+
site.coords, dist_tol_prefactor * np.sqrt(n)
252+
) # exclude defect site itself:
253+
neighbours = sorted(neighbours_w_site_itself, key=lambda x: x.nn_distance)[1:]
254+
dist_tol_prefactor += 0.5 # increase the distance tolerance if no other sites are found
255+
if dist_tol_prefactor > 40:
256+
warnings.warn(
257+
"No other sites found within 40*sqrt(n) Å of the defect site, indicating a very "
258+
"weird structure..."
259+
)
260+
break
261+
if not neighbours:
262+
return [], dist_tol_prefactor
267263

268-
if site_distances:
269-
min_distance, closest_site = site_distances[n - 1]
270-
return f"{closest_site}{symmetry._custom_round(min_distance, 2):.2f}"
264+
site_distances = sorted( # Could make this faster using caching if it was becoming a bottleneck
265+
[
266+
(
267+
neigh.nn_distance,
268+
neigh.specie.symbol,
269+
)
270+
for neigh in neighbours
271+
],
272+
key=lambda x: (symmetry._custom_round(x[0], 2), _list_index_or_val(element_list, x[1]), x[1]),
273+
)
274+
return [ # prune site_distances to remove any with distances within 0.02 Å of the previous n:
275+
site_distances[i]
276+
for i in range(len(site_distances))
277+
if i == 0
278+
or abs(site_distances[i][0] - site_distances[i - 1][0]) > 0.02
279+
or site_distances[i][1] != site_distances[i - 1][1]
280+
], dist_tol_prefactor
281+
282+
site_distances, dist_tol_prefactor = _get_site_distances_and_symbols(site, structure, n, element_list)
283+
while len(site_distances) < n:
284+
if dist_tol_prefactor > 40:
285+
return "" # already warned
286+
287+
site_distances, dist_tol_prefactor = _get_site_distances_and_symbols(
288+
site, structure, n, element_list, dist_tol_prefactor + 2
289+
)
271290

272-
return "" # hypothetical case of very weird structure with no sites within 5*sqrt(n) Å...
291+
min_distance, closest_site = site_distances[n - 1]
292+
return f"{closest_site}{symmetry._custom_round(min_distance, 2):.2f}"
273293

274294

275295
def get_defect_name_from_defect(
@@ -2497,12 +2517,21 @@ def get_Voronoi_interstitial_sites(
24972517
f"only the following keys are supported: {supported_interstitial_gen_kwargs}"
24982518
)
24992519
top = DopedTopographyAnalyzer(host_structure)
2520+
if not top.vnodes:
2521+
warnings.warn("No interstitial sites found in host structure!")
2522+
return []
2523+
25002524
sites_list = [v.frac_coords for v in top.vnodes]
2501-
sites_list = remove_collisions(
2502-
sites_list, structure=host_structure, min_dist=interstitial_gen_kwargs.get("min_dist", 0.9)
2503-
)
2525+
min_dist = interstitial_gen_kwargs.get("min_dist", 0.9)
2526+
sites_array = remove_collisions(sites_list, structure=host_structure, min_dist=min_dist)
2527+
if sites_array.size == 0:
2528+
warnings.warn(
2529+
f"No interstitial sites found after removing those within {min_dist} Å of host atoms!"
2530+
)
2531+
return []
2532+
25042533
site_frac_coords_array: np.array = _doped_cluster_frac_coords(
2505-
sites_list,
2534+
sites_array,
25062535
host_structure,
25072536
tol=interstitial_gen_kwargs.get("clustering_tol", 0.55),
25082537
symmetry_preference=interstitial_gen_kwargs.get("symmetry_preference", 0.1),

doped/utils/efficiency.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -457,8 +457,12 @@ def _doped_cluster_frac_coords(
457457
Returns:
458458
np.typing.NDArray: Clustered fractional coordinates
459459
"""
460-
if len(fcoords) <= 1:
460+
if len(fcoords) == 0:
461461
return None
462+
if len(fcoords) == 1:
463+
return symmetry._vectorized_custom_round(
464+
np.mod(symmetry._vectorized_custom_round(fcoords, 5), 1), 4
465+
) # to unit cell
462466

463467
lattice = structure.lattice
464468
sga = symmetry.get_sga(structure, symprec=0.1) # for getting symmetries of different sites

tests/data/CdTe_defect_gen.json

+1-1
Large diffs are not rendered by default.

tests/data/N_diamond_defect_gen.json

+1-1
Large diffs are not rendered by default.

tests/data/agcu_defect_gen.json

+1-1
Large diffs are not rendered by default.

tests/data/cd_i_supercell_defect_gen.json

+1-1
Large diffs are not rendered by default.

tests/data/cu_defect_gen.json

+1-1
Large diffs are not rendered by default.

tests/data/lmno_defect_gen.json

+1-1
Large diffs are not rendered by default.

tests/data/ytos_defect_gen.json

+1-1
Large diffs are not rendered by default.

tests/data/ytos_defect_gen_supercell.json

+1-1
Large diffs are not rendered by default.

tests/test_generation.py

+11-9
Original file line numberDiff line numberDiff line change
@@ -736,23 +736,23 @@ def setUp(self):
736736
737737
Interstitials Guessed Charges Conv. Cell Coords Wyckoff
738738
--------------- --------------------------- ------------------- ---------
739+
Si_i_C1_Sb2.48 [+4,+3,+2,+1,0] [0.347,0.348,0.457] 18f
739740
Si_i_C1_Si2.21 [+4,+3,+2,+1,0] [0.158,0.359,0.167] 18f
740-
Si_i_C1_Si2.48 [+4,+3,+2,+1,0] [0.347,0.348,0.457] 18f
741741
Si_i_C1_Te2.44 [+4,+3,+2,+1,0] [0.001,0.336,0.289] 18f
742-
Si_i_C3_Sb2.41 [+4,+3,+2,+1,0] [0.000,0.000,0.050] 6c
743742
Si_i_C3_Si2.64 [+4,+3,+2,+1,0] [0.000,0.000,0.318] 6c
743+
Si_i_C3_Te2.41 [+4,+3,+2,+1,0] [0.000,0.000,0.050] 6c
744744
Si_i_C3i_Te2.81 [+4,+3,+2,+1,0] [0.000,0.000,0.000] 3a
745+
Sb_i_C1_Sb2.48 [+5,+4,+3,+2,+1,0,-1,-2,-3] [0.347,0.348,0.457] 18f
745746
Sb_i_C1_Si2.21 [+5,+4,+3,+2,+1,0,-1,-2,-3] [0.158,0.359,0.167] 18f
746-
Sb_i_C1_Si2.48 [+5,+4,+3,+2,+1,0,-1,-2,-3] [0.347,0.348,0.457] 18f
747747
Sb_i_C1_Te2.44 [+5,+4,+3,+2,+1,0,-1,-2,-3] [0.001,0.336,0.289] 18f
748-
Sb_i_C3_Sb2.41 [+5,+4,+3,+2,+1,0,-1,-2,-3] [0.000,0.000,0.050] 6c
749748
Sb_i_C3_Si2.64 [+5,+4,+3,+2,+1,0,-1,-2,-3] [0.000,0.000,0.318] 6c
749+
Sb_i_C3_Te2.41 [+5,+4,+3,+2,+1,0,-1,-2,-3] [0.000,0.000,0.050] 6c
750750
Sb_i_C3i_Te2.81 [+5,+4,+3,+2,+1,0,-1,-2,-3] [0.000,0.000,0.000] 3a
751+
Te_i_C1_Sb2.48 [+4,+3,+2,+1,0,-1,-2] [0.347,0.348,0.457] 18f
751752
Te_i_C1_Si2.21 [+4,+3,+2,+1,0,-1,-2] [0.158,0.359,0.167] 18f
752-
Te_i_C1_Si2.48 [+4,+3,+2,+1,0,-1,-2] [0.347,0.348,0.457] 18f
753753
Te_i_C1_Te2.44 [+4,+3,+2,+1,0,-1,-2] [0.001,0.336,0.289] 18f
754-
Te_i_C3_Sb2.41 [+4,+3,+2,+1,0,-1,-2] [0.000,0.000,0.050] 6c
755754
Te_i_C3_Si2.64 [+4,+3,+2,+1,0,-1,-2] [0.000,0.000,0.318] 6c
755+
Te_i_C3_Te2.41 [+4,+3,+2,+1,0,-1,-2] [0.000,0.000,0.050] 6c
756756
Te_i_C3i_Te2.81 [+4,+3,+2,+1,0,-1,-2] [0.000,0.000,0.000] 3a
757757
\n"""
758758
"The number in the Wyckoff label is the site multiplicity/degeneracy of that defect "
@@ -854,7 +854,9 @@ def _general_defect_gen_check(self, defect_gen, charge_states_removed=False):
854854
set(Poscar(structure).site_symbols)
855855
) # no duplicates
856856

857-
assert np.isclose(defect_gen.min_image_distance, get_min_image_distance(defect_gen.bulk_supercell))
857+
assert np.isclose(
858+
defect_gen.min_image_distance, get_min_image_distance(defect_gen.bulk_supercell), atol=1e-2
859+
)
858860

859861
print("Checking Defect/DefectEntry types")
860862
assert all(defect.defect_type == DefectType.Vacancy for defect in defect_gen.defects["vacancies"])
@@ -1854,7 +1856,7 @@ def test_supercell_gen_kwargs(self):
18541856
) # gives 4x conventional cell
18551857
assert self.CdTe_defect_gen_info in output
18561858
self._general_defect_gen_check(CdTe_defect_gen)
1857-
assert CdTe_defect_gen.min_image_distance == 26.1626
1859+
assert np.isclose(CdTe_defect_gen.min_image_distance, 26.1626, atol=1e-2)
18581860
assert len(CdTe_defect_gen.bulk_supercell) == 512
18591861
assert CdTe_defect_gen.supercell_gen_kwargs["min_image_distance"] == 20
18601862
assert CdTe_defect_gen.supercell_gen_kwargs["force_cubic"] is True
@@ -3372,7 +3374,7 @@ def Sn5O6_defect_gen_check(self, defect_gen, manual_oxi=False):
33723374
"v_O_C1_Sn2.09 [+2,+1,0,-1] [0.642,0.323,0.461] 4e",
33733375
"Sn_Sn_C1_O2.08O2.11O2.14b [+2,+1,0,-1] [0.101,0.502,0.319] 4e",
33743376
"Sn_Sn_C1_O2.08Sn3.28O3.69a [+1,0,-1] [0.500,0.500,0.500] 2b",
3375-
"Sn_i_C1_Sn2.33O2.33O2.39d [+4,+3,+2,+1,0] [0.273,0.460,0.248] 4e",
3377+
"Sn_i_C1_Sn2.33O2.39O2.60d [+4,+3,+2,+1,0] [0.273,0.460,0.248] 4e",
33763378
"O_i_C1_O1.83Sn1.99Sn2.09a [0,-1,-2] [0.567,0.320,0.205] 4e",
33773379
]
33783380
assert set(defect_gen._bulk_oxi_states.composition.elements) == {

0 commit comments

Comments
 (0)