Skip to content

Commit add2f35

Browse files
committed
Skip terminate tests on Windows (os.killpg/getpgid are POSIX-only)
- Code now falls back to process.terminate() on Windows - Tests use create=True to mock POSIX-only functions on Windows - Only integration test with real subprocess skipped on Windows
1 parent f1c8fa5 commit add2f35

2 files changed

Lines changed: 37 additions & 33 deletions

File tree

src/custodian/vasp/jobs.py

Lines changed: 26 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -712,31 +712,32 @@ def constrained_opt_run(
712712
def terminate(self, directory="./") -> None:
713713
"""Kill all VASP processes associated with the current job.
714714
715-
Sends SIGTERM to the entire process group to ensure all MPI workers
716-
are terminated, not just the parent process (srun/mpirun). Falls back
717-
to SIGKILL if processes don't terminate gracefully within timeout.
715+
On POSIX, sends SIGTERM/SIGKILL to entire process group to ensure all
716+
MPI workers are terminated. On Windows, falls back to process.terminate().
718717
"""
719718
pid = self._vasp_process.pid
720719

721-
# Check if process already finished
722720
if self._vasp_process.poll() is not None:
723721
logger.warning(f"Process {pid} already terminated")
724722
return
725723

726-
# Get process group ID
727-
try:
728-
pgid = os.getpgid(pid)
729-
except ProcessLookupError:
730-
logger.warning(f"Process {pid} not found (already dead)")
731-
return
732-
733-
# Send SIGTERM to process group for graceful shutdown
734-
logger.info(f"Sending SIGTERM to process group {pgid}")
735-
try:
736-
os.killpg(pgid, signal.SIGTERM)
737-
except ProcessLookupError:
738-
logger.warning(f"Process group {pgid} not found (already dead)")
739-
return
724+
# POSIX: kill entire process group; Windows: just terminate parent
725+
use_pgid = hasattr(os, "killpg")
726+
if use_pgid:
727+
try:
728+
pgid = os.getpgid(pid)
729+
except ProcessLookupError:
730+
logger.warning(f"Process {pid} not found (already dead)")
731+
return
732+
logger.info(f"Sending SIGTERM to process group {pgid}")
733+
try:
734+
os.killpg(pgid, signal.SIGTERM)
735+
except ProcessLookupError:
736+
logger.warning(f"Process group {pgid} not found (already dead)")
737+
return
738+
else:
739+
logger.info(f"Terminating process {pid}")
740+
self._vasp_process.terminate()
740741

741742
# Wait for graceful termination
742743
try:
@@ -746,12 +747,13 @@ def terminate(self, directory="./") -> None:
746747
except subprocess.TimeoutExpired:
747748
pass
748749

749-
# Force kill with SIGKILL
750-
logger.warning(f"SIGTERM timeout ({self.terminate_timeout}s), sending SIGKILL to process group {pgid}")
751-
try:
752-
os.killpg(pgid, signal.SIGKILL)
753-
except ProcessLookupError:
754-
pass
750+
# Force kill
751+
logger.warning(f"Timeout ({self.terminate_timeout}s), force killing {pid}")
752+
if use_pgid:
753+
try:
754+
os.killpg(pgid, signal.SIGKILL) # type: ignore[possibly-undefined]
755+
except ProcessLookupError:
756+
pass
755757
self._vasp_process.kill()
756758
self._vasp_process.wait()
757759
logger.info(f"Process {pid} force-killed")

tests/vasp/test_jobs.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import shutil
44
import signal
55
import subprocess
6+
import sys
67
from glob import glob
78
from types import SimpleNamespace
89
from typing import TYPE_CHECKING
@@ -227,7 +228,7 @@ def test_gamma_checks(self) -> None:
227228

228229

229230
class TestVaspJobTerminate:
230-
"""Tests for VaspJob.terminate() process group termination."""
231+
"""Tests for VaspJob.terminate() - cross-platform with POSIX process group support."""
231232

232233
@pytest.fixture
233234
def mocks(self) -> "Generator[SimpleNamespace, None, None]":
@@ -238,8 +239,8 @@ def mocks(self) -> "Generator[SimpleNamespace, None, None]":
238239

239240
with (
240241
patch("custodian.vasp.jobs.logger") as logger,
241-
patch("os.killpg") as killpg,
242-
patch("os.getpgid", return_value=67890),
242+
patch("os.killpg", create=True) as killpg,
243+
patch("os.getpgid", return_value=67890, create=True),
243244
):
244245
yield SimpleNamespace(job=job, process=process, logger=logger, killpg=killpg)
245246

@@ -259,7 +260,7 @@ def test_graceful_sigterm(self, mocks: SimpleNamespace) -> None:
259260
mocks.logger.info.assert_any_call("Sending SIGTERM to process group 67890")
260261
mocks.logger.info.assert_any_call("Process 12345 terminated gracefully")
261262
mocks.killpg.assert_called_once_with(67890, signal.SIGTERM)
262-
mocks.process.wait.assert_called_once_with(timeout=10.0) # default timeout
263+
mocks.process.wait.assert_called_once_with(timeout=10.0)
263264
mocks.process.kill.assert_not_called()
264265

265266
def test_force_kill_after_timeout(self, mocks: SimpleNamespace) -> None:
@@ -268,7 +269,7 @@ def test_force_kill_after_timeout(self, mocks: SimpleNamespace) -> None:
268269
mocks.process.wait.side_effect = [subprocess.TimeoutExpired("vasp", 10), None]
269270
mocks.job.terminate()
270271

271-
mocks.logger.warning.assert_called_with("SIGTERM timeout (10.0s), sending SIGKILL to process group 67890")
272+
mocks.logger.warning.assert_called_with("Timeout (10.0s), force killing 12345")
272273
mocks.logger.info.assert_any_call("Process 12345 force-killed")
273274
assert mocks.killpg.call_count == 2
274275
mocks.process.kill.assert_called_once()
@@ -281,12 +282,12 @@ def test_custom_timeout(self, mocks: SimpleNamespace) -> None:
281282
mocks.job.terminate()
282283

283284
mocks.process.wait.assert_any_call(timeout=60.0)
284-
mocks.logger.warning.assert_called_with("SIGTERM timeout (60.0s), sending SIGKILL to process group 67890")
285+
mocks.logger.warning.assert_called_with("Timeout (60.0s), force killing 12345")
285286

286287
def test_process_not_found_on_getpgid(self, mocks: SimpleNamespace) -> None:
287288
"""ProcessLookupError when getting PGID."""
288289
mocks.process.poll.return_value = None
289-
with patch("os.getpgid", side_effect=ProcessLookupError):
290+
with patch("os.getpgid", side_effect=ProcessLookupError, create=True):
290291
mocks.job.terminate()
291292

292293
mocks.logger.warning.assert_called_with("Process 12345 not found (already dead)")
@@ -300,10 +301,11 @@ def test_process_group_not_found_on_sigterm(self, mocks: SimpleNamespace) -> Non
300301

301302
mocks.logger.warning.assert_called_with("Process group 67890 not found (already dead)")
302303

304+
@pytest.mark.skipif(sys.platform == "win32", reason="sleep command and PGID not available")
303305
def test_integration_with_real_process(self) -> None:
304-
"""Integration test with real subprocess."""
306+
"""Integration test with real subprocess (POSIX only)."""
305307
vasp_job = VaspJob.__new__(VaspJob)
306-
vasp_job.terminate_timeout = 10.0 # Set since __new__ bypasses __init__
308+
vasp_job.terminate_timeout = 10.0
307309
real_process = subprocess.Popen(["sleep", "30"], start_new_session=True)
308310
vasp_job._vasp_process = real_process
309311
original_pgid = os.getpgid(real_process.pid)

0 commit comments

Comments
 (0)