Skip to content

Commit

Permalink
Use heuristic to identify "stable" startup of kernel
Browse files Browse the repository at this point in the history
This is mainly due to the weakness of the previous method: `is_alive` only tests that the kernel process is alive, it does not indicate that the kernel has successfully completed startup. To solve this correctly, we would need to wait for a kernel info reply, but it is not necessarily appropriate to start a kernel client + channels in the restarter. Therefore, we use a "has been alive continuously for X time" as a heuristic for a stable start up.
  • Loading branch information
vidartf committed Nov 1, 2021
1 parent 6889cea commit 2fabf92
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 8 deletions.
12 changes: 8 additions & 4 deletions jupyter_client/ioloop/restarter.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
# Copyright (c) Jupyter Development Team.
# Distributed under the terms of the Modified BSD License.
import inspect
import time
import warnings

from traitlets import Instance
Expand Down Expand Up @@ -51,17 +52,20 @@ def stop(self):


class AsyncIOLoopKernelRestarter(IOLoopKernelRestarter):

async def poll(self):
if self.debug:
self.log.debug("Polling kernel...")
is_alive = await self.kernel_manager.is_alive()
now = time.time()
if not is_alive:
self._last_dead = now
if self._restarting:
self._restart_count += 1
else:
self._restart_count = 1

if self._restart_count >= self.restart_limit:
if self._restart_count > self.restart_limit:
self.log.warning("AsyncIOLoopKernelRestarter: restart failed")
self._fire_callbacks("dead")
self._restarting = False
Expand All @@ -79,8 +83,8 @@ async def poll(self):
await self.kernel_manager.restart_kernel(now=True, newports=newports)
self._restarting = True
else:
if self._initial_startup:
if self._initial_startup and self._last_dead - now >= self.stable_start_time:
self._initial_startup = False
if self._restarting:
if self._restarting and self._last_dead - now >= self.stable_start_time:
self.log.debug("AsyncIOLoopKernelRestarter: restart apparently succeeded")
self._restarting = False
self._restarting = False
22 changes: 18 additions & 4 deletions jupyter_client/restarter.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
"""
# Copyright (c) Jupyter Development Team.
# Distributed under the terms of the Modified BSD License.
import time

from traitlets import default
from traitlets import Bool # type: ignore
from traitlets import Dict
from traitlets import Float
Expand All @@ -31,6 +34,9 @@ class KernelRestarter(LoggingConfigurable):

time_to_dead = Float(3.0, config=True, help="""Kernel heartbeat interval in seconds.""")

stable_start_time = Float(10.0, config=True,
help="""The time in seconds to consider the kernel to have completed a stable start up.""")

restart_limit = Integer(
5,
config=True,
Expand All @@ -45,6 +51,12 @@ class KernelRestarter(LoggingConfigurable):
_restarting = Bool(False)
_restart_count = Integer(0)
_initial_startup = Bool(True)
_last_dead = Float()

@default("_last_dead")
def _default_last_dead(self):
return time.time()


callbacks = Dict()

Expand Down Expand Up @@ -103,13 +115,15 @@ def poll(self):
if self.kernel_manager.shutting_down:
self.log.debug("Kernel shutdown in progress...")
return
now = time.time()
if not self.kernel_manager.is_alive():
self._last_dead = now
if self._restarting:
self._restart_count += 1
else:
self._restart_count = 1

if self._restart_count >= self.restart_limit:
if self._restart_count > self.restart_limit:
self.log.warning("KernelRestarter: restart failed")
self._fire_callbacks("dead")
self._restarting = False
Expand All @@ -127,8 +141,8 @@ def poll(self):
self.kernel_manager.restart_kernel(now=True, newports=newports)
self._restarting = True
else:
if self._initial_startup:
if self._initial_startup and self._last_dead - now >= self.stable_start_time:
self._initial_startup = False
if self._restarting:
if self._restarting and self._last_dead - now >= self.stable_start_time:
self.log.debug("KernelRestarter: restart apparently succeeded")
self._restarting = False
self._restarting = False

0 comments on commit 2fabf92

Please sign in to comment.