|
12 | 12 | import time
|
13 | 13 | import warnings
|
14 | 14 |
|
| 15 | +from enum import Enum |
| 16 | + |
15 | 17 | import zmq
|
16 | 18 |
|
17 | 19 | from ipython_genutils.importstring import import_item
|
|
29 | 31 | KernelManagerABC
|
30 | 32 | )
|
31 | 33 |
|
| 34 | +class _ShutdownStatus(Enum): |
| 35 | + """ |
| 36 | +
|
| 37 | + This is so far used only for testing in order to track the internal state of |
| 38 | + the shutdown logic, and verifying which path is taken for which |
| 39 | + missbehavior. |
| 40 | +
|
| 41 | + """ |
| 42 | + Unset = None |
| 43 | + ShutdownRequest = "ShutdownRequest" |
| 44 | + SigtermRequest = "SigtermRequest" |
| 45 | + SigkillRequest = "SigkillRequest" |
| 46 | + |
32 | 47 |
|
33 | 48 | class KernelManager(ConnectionFileMixin):
|
34 | 49 | """Manages a single kernel in a subprocess on this host.
|
35 | 50 |
|
36 | 51 | This version starts kernels with Popen.
|
37 | 52 | """
|
38 | 53 |
|
| 54 | + def __init__(self, *args, **kwargs): |
| 55 | + super().__init__(*args, **kwargs) |
| 56 | + self._shutdown_status = _ShutdownStatus.Unset |
| 57 | + |
39 | 58 | _created_context = Bool(False)
|
40 | 59 |
|
41 | 60 | # The PyZMQ Context to use for communication with the kernel.
|
@@ -71,7 +90,13 @@ def _kernel_spec_manager_changed(self, change):
|
71 | 90 | shutdown_wait_time = Float(
|
72 | 91 | 5.0, config=True,
|
73 | 92 | help="Time to wait for a kernel to terminate before killing it, "
|
74 |
| - "in seconds.") |
| 93 | + "in seconds. When a shutdown request is initiated, the kernel " |
| 94 | + "will be immediately send and interrupt (SIGINT), followed" |
| 95 | + "by a shutdown_request message, after 1/2 of `shutdown_wait_time`" |
| 96 | + "it will be sent a terminate (SIGTERM) request, and finally at " |
| 97 | + "the end of `shutdown_wait_time` will be killed (SIGKILL). terminate " |
| 98 | + "and kill may be equivalent on windows.", |
| 99 | + ) |
75 | 100 |
|
76 | 101 | kernel_name = Unicode(kernelspec.NATIVE_KERNEL_NAME)
|
77 | 102 |
|
@@ -333,20 +358,45 @@ def finish_shutdown(self, waittime=None, pollinterval=0.1):
|
333 | 358 | """
|
334 | 359 | if waittime is None:
|
335 | 360 | waittime = max(self.shutdown_wait_time, 0)
|
336 |
| - for i in range(int(waittime/pollinterval)): |
| 361 | + self._shutdown_status = _ShutdownStatus.ShutdownRequest |
| 362 | + |
| 363 | + def poll_or_sleep_to_kernel_gone(): |
| 364 | + """ |
| 365 | + Poll until the kernel is not responding, |
| 366 | + then wait (the subprocess), until process gone. |
| 367 | +
|
| 368 | + After this function the kernel is either: |
| 369 | + - still responding; or |
| 370 | + - subprocess has been culled. |
| 371 | + """ |
337 | 372 | if self.is_alive():
|
338 | 373 | time.sleep(pollinterval)
|
339 | 374 | else:
|
340 | 375 | # If there's still a proc, wait and clear
|
341 | 376 | if self.has_kernel:
|
342 | 377 | self.kernel.wait()
|
343 | 378 | self.kernel = None
|
| 379 | + return True |
| 380 | + |
| 381 | + # wait 50% of the shutdown timeout... |
| 382 | + for i in range(int(waittime / 2 / pollinterval)): |
| 383 | + if poll_or_sleep_to_kernel_gone(): |
344 | 384 | break
|
345 | 385 | else:
|
346 |
| - # OK, we've waited long enough. |
347 |
| - if self.has_kernel: |
348 |
| - self.log.debug("Kernel is taking too long to finish, killing") |
349 |
| - self._kill_kernel() |
| 386 | + # if we've exited the loop normally (no break) |
| 387 | + # send sigterm and wait the other 50%. |
| 388 | + self.log.debug("Kernel is taking too long to finish, terminating") |
| 389 | + self._shutdown_status = _ShutdownStatus.SigtermRequest |
| 390 | + self._send_kernel_sigterm() |
| 391 | + for i in range(int(waittime / 2 / pollinterval)): |
| 392 | + if poll_or_sleep_to_kernel_gone(): |
| 393 | + break |
| 394 | + else: |
| 395 | + # OK, we've waited long enough. |
| 396 | + if self.has_kernel: |
| 397 | + self.log.debug("Kernel is taking too long to finish, killing") |
| 398 | + self._shutdown_status = _ShutdownStatus.SigkillRequest |
| 399 | + self._kill_kernel() |
350 | 400 |
|
351 | 401 | def cleanup_resources(self, restart=False):
|
352 | 402 | """Clean up resources when the kernel is shut down"""
|
@@ -388,6 +438,8 @@ def shutdown_kernel(self, now=False, restart=False):
|
388 | 438 | # Stop monitoring for restarting while we shutdown.
|
389 | 439 | self.stop_restarter()
|
390 | 440 |
|
| 441 | + self.interrupt_kernel() |
| 442 | + |
391 | 443 | if now:
|
392 | 444 | self._kill_kernel()
|
393 | 445 | else:
|
@@ -462,6 +514,36 @@ def has_kernel(self):
|
462 | 514 | """Has a kernel been started that we are managing."""
|
463 | 515 | return self.kernel is not None
|
464 | 516 |
|
| 517 | + def _send_kernel_sigterm(self): |
| 518 | + """similar to _kill_kernel, but with sigterm (not sigkill), but do not block""" |
| 519 | + if self.has_kernel: |
| 520 | + # Signal the kernel to terminate (sends SIGTERM on Unix and |
| 521 | + # if the kernel is a subprocess and we are on windows; this is |
| 522 | + # equivalent to kill |
| 523 | + try: |
| 524 | + if hasattr(self.kernel, "terminate"): |
| 525 | + self.kernel.terminate() |
| 526 | + elif hasattr(signal, "SIGTERM"): |
| 527 | + self.signal_kernel(signal.SIGTERM) |
| 528 | + else: |
| 529 | + self.log.debug( |
| 530 | + "Cannot set term signal to kernel, no" |
| 531 | + " `.terminate()` method and no values for SIGTERM" |
| 532 | + ) |
| 533 | + except OSError as e: |
| 534 | + # In Windows, we will get an Access Denied error if the process |
| 535 | + # has already terminated. Ignore it. |
| 536 | + if sys.platform == "win32": |
| 537 | + if e.winerror != 5: |
| 538 | + raise |
| 539 | + # On Unix, we may get an ESRCH error if the process has already |
| 540 | + # terminated. Ignore it. |
| 541 | + else: |
| 542 | + from errno import ESRCH |
| 543 | + |
| 544 | + if e.errno != ESRCH: |
| 545 | + raise |
| 546 | + |
465 | 547 | def _kill_kernel(self):
|
466 | 548 | """Kill the running kernel.
|
467 | 549 |
|
@@ -587,10 +669,23 @@ async def finish_shutdown(self, waittime=None, pollinterval=0.1):
|
587 | 669 | """
|
588 | 670 | if waittime is None:
|
589 | 671 | waittime = max(self.shutdown_wait_time, 0)
|
| 672 | + self._shutdown_status = _ShutdownStatus.ShutdownRequest |
590 | 673 | try:
|
591 |
| - await asyncio.wait_for(self._async_wait(pollinterval=pollinterval), timeout=waittime) |
| 674 | + await asyncio.wait_for( |
| 675 | + self._async_wait(pollinterval=pollinterval), timeout=waittime / 2 |
| 676 | + ) |
| 677 | + except asyncio.TimeoutError: |
| 678 | + self.log.debug("Kernel is taking too long to finish, terminating") |
| 679 | + self._shutdown_status = _ShutdownStatus.SigtermRequest |
| 680 | + await self._send_kernel_sigterm() |
| 681 | + |
| 682 | + try: |
| 683 | + await asyncio.wait_for( |
| 684 | + self._async_wait(pollinterval=pollinterval), timeout=waittime / 2 |
| 685 | + ) |
592 | 686 | except asyncio.TimeoutError:
|
593 | 687 | self.log.debug("Kernel is taking too long to finish, killing")
|
| 688 | + self._shutdown_status = _ShutdownStatus.SigkillRequest |
594 | 689 | await self._kill_kernel()
|
595 | 690 | else:
|
596 | 691 | # Process is no longer alive, wait and clear
|
@@ -620,6 +715,8 @@ async def shutdown_kernel(self, now=False, restart=False):
|
620 | 715 | # Stop monitoring for restarting while we shutdown.
|
621 | 716 | self.stop_restarter()
|
622 | 717 |
|
| 718 | + await self.interrupt_kernel() |
| 719 | + |
623 | 720 | if now:
|
624 | 721 | await self._kill_kernel()
|
625 | 722 | else:
|
@@ -678,6 +775,36 @@ async def restart_kernel(self, now=False, newports=False, **kw):
|
678 | 775 | await self.start_kernel(**self._launch_args)
|
679 | 776 | return None
|
680 | 777 |
|
| 778 | + async def _send_kernel_sigterm(self): |
| 779 | + """similar to _kill_kernel, but with sigterm (not sigkill), but do not block""" |
| 780 | + if self.has_kernel: |
| 781 | + # Signal the kernel to terminate (sends SIGTERM on Unix and |
| 782 | + # if the kernel is a subprocess and we are on windows; this is |
| 783 | + # equivalent to kill |
| 784 | + try: |
| 785 | + if hasattr(self.kernel, "terminate"): |
| 786 | + self.kernel.terminate() |
| 787 | + elif hasattr(signal, "SIGTERM"): |
| 788 | + await self.signal_kernel(signal.SIGTERM) |
| 789 | + else: |
| 790 | + self.log.debug( |
| 791 | + "Cannot set term signal to kernel, no" |
| 792 | + " `.terminate()` method and no values for SIGTERM" |
| 793 | + ) |
| 794 | + except OSError as e: |
| 795 | + # In Windows, we will get an Access Denied error if the process |
| 796 | + # has already terminated. Ignore it. |
| 797 | + if sys.platform == "win32": |
| 798 | + if e.winerror != 5: |
| 799 | + raise |
| 800 | + # On Unix, we may get an ESRCH error if the process has already |
| 801 | + # terminated. Ignore it. |
| 802 | + else: |
| 803 | + from errno import ESRCH |
| 804 | + |
| 805 | + if e.errno != ESRCH: |
| 806 | + raise |
| 807 | + |
681 | 808 | async def _kill_kernel(self):
|
682 | 809 | """Kill the running kernel.
|
683 | 810 |
|
|
0 commit comments