Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use ThreadSelectorEventLoop on Windows with ProactorEventLoop #820

Closed
wants to merge 16 commits into from
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/versionhistory.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ This library adheres to `Semantic Versioning 2.0 <http://semver.org/>`_.

- Fixed a misleading ``ValueError`` in the context of DNS failures
(`#815 <https://github.com/agronholm/anyio/issues/815>`_; PR by @graingert)
- Ported ``ThreadSelectorEventLoop`` from Tornado to allow
``anyio.wait_socket_readable(sock)`` to work on Windows with a ``ProactorEventLoop``.

**4.6.2**

Expand Down
16 changes: 14 additions & 2 deletions src/anyio/_backends/_asyncio.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@
from ..abc._eventloop import StrOrBytesPath
from ..lowlevel import RunVar
from ..streams.memory import MemoryObjectReceiveStream, MemoryObjectSendStream
from ._selector_thread import _get_selector_windows

if sys.version_info >= (3, 10):
from typing import ParamSpec
Expand Down Expand Up @@ -2683,13 +2684,24 @@ async def wait_socket_readable(cls, sock: socket.socket) -> None:
raise BusyResourceError("reading from") from None

loop = get_running_loop()
if (
sys.platform == "win32"
Copy link
Collaborator

@graingert graingert Nov 12, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it would be better to try to use add_reader, and catch the NotImplementedError - raising if we're not on sys.platform == "win32"?

Or maybe we don't care if sys.platform == "win32" or not at this point? if add_reader is not implemented on the loop we can still work around it with the selector in a thread

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I think if you catch the error you can register the selector independent of the platform in case someone else decides to make another incomplete EventLoop implementation like Proactor. The selector thread should work everywhere.

and asyncio.get_event_loop_policy().__class__.__name__
== "WindowsProactorEventLoopPolicy"
):
selector = _get_selector_windows(loop)
add_reader = selector.add_reader
remove_reader = selector.remove_reader
else:
add_reader = loop.add_reader
remove_reader = loop.remove_reader
event = read_events[sock] = asyncio.Event()
loop.add_reader(sock, event.set)
add_reader(sock, event.set)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

perhaps something like:

Suggested change
add_reader(sock, event.set)
try:
add_reader(sock, event.set)
except NotImplementedError:
# Proactor on Windows does not yet implement add/remove reader
selector = _get_selector_windows(loop)
selector.add_reader(sock, event.set)
remove_reader = selector.remove_reader
else:
remove_reader = loop.remove_reader

try:
await event.wait()
finally:
if read_events.pop(sock, None) is not None:
loop.remove_reader(sock)
remove_reader(sock)
readable = True
else:
readable = False
Expand Down
315 changes: 315 additions & 0 deletions src/anyio/_backends/_selector_thread.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,315 @@
"""Ensure asyncio selector methods (add_reader, etc.) are available.
Running select in a thread and defining these methods on the running event loop.
Originally in tornado.platform.asyncio.
Redistributed under license Apache-2.0
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we'll need to include the copyright statement and notice

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, if we end up going with their implementation.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know how this plays with AnyIO's MIT license? Would it be an issue to vendor this code?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, the Apache 2 license supports re-distribution under a different license

"""

from __future__ import annotations

import asyncio
import atexit
import errno
import functools
import select
import socket
import threading
import typing
from typing import (
Any,
Callable,
Union,
)
from weakref import WeakKeyDictionary

if typing.TYPE_CHECKING:
from typing_extensions import Protocol

class _HasFileno(Protocol):
def fileno(self) -> int:
pass

_FileDescriptorLike = Union[int, _HasFileno]


# registry of asyncio loop : selector thread
_selectors: WeakKeyDictionary = WeakKeyDictionary()

# Collection of selector thread event loops to shut down on exit.
_selector_loops: set[SelectorThread] = set()


def _atexit_callback() -> None:
for loop in _selector_loops:
with loop._select_cond:
loop._closing_selector = True
loop._select_cond.notify()
try:
loop._waker_w.send(b"a")
except BlockingIOError:
pass
# If we don't join our (daemon) thread here, we may get a deadlock
# during interpreter shutdown. I don't really understand why. This
# deadlock happens every time in CI (both travis and appveyor) but
# I've never been able to reproduce locally.
assert loop._thread is not None
loop._thread.join()
_selector_loops.clear()


atexit.register(_atexit_callback)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This causes an issue for dask/distributed where they can't use tornado on the proactor because they send a shutdown message on a socket then close the event loop in atexit

the socket add_reader/add_writer then stops working while it's trying to send the shutdown message

anyio already has a thread pool implementation that shuts down when run_forever completes I think that should be used here

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To be precise, it ties itself into the life cycle of the root task.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also the thread here is closed explicitly using the async generator hook and patching EventLoop.close so I think it's ok to leak the demon thread atexit if nobody explicitly closes the loop



# SelectorThread from tornado 6.4.0


class SelectorThread:
"""Define ``add_reader`` methods to be called in a background select thread.

Instances of this class start a second thread to run a selector.
This thread is completely hidden from the user;
all callbacks are run on the wrapped event loop's thread.

Typically used via ``AddThreadSelectorEventLoop``,
but can be attached to a running asyncio loop.
"""

_closed = False

def __init__(self, real_loop: asyncio.AbstractEventLoop) -> None:
self._real_loop = real_loop

self._select_cond = threading.Condition()
self._select_args: (
tuple[list[_FileDescriptorLike], list[_FileDescriptorLike]] | None
) = None
self._closing_selector = False
self._thread: threading.Thread | None = None
self._thread_manager_handle = self._thread_manager()

async def thread_manager_anext() -> None:
# the anext builtin wasn't added until 3.10. We just need to iterate
# this generator one step.
await self._thread_manager_handle.__anext__()

# When the loop starts, start the thread. Not too soon because we can't
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we need this, because we know the loop has started when we create the Selector thread. We should probably also stash the task somewhere

# clean up if we get to this point but the event loop is closed without
# starting.
self._real_loop.call_soon(
lambda: self._real_loop.create_task(thread_manager_anext())
)

self._readers: dict[_FileDescriptorLike, Callable] = {}
self._writers: dict[_FileDescriptorLike, Callable] = {}

# Writing to _waker_w will wake up the selector thread, which
# watches for _waker_r to be readable.
self._waker_r, self._waker_w = socket.socketpair()
self._waker_r.setblocking(False)
self._waker_w.setblocking(False)
_selector_loops.add(self)
self.add_reader(self._waker_r, self._consume_waker)

def close(self) -> None:
if self._closed:
return
with self._select_cond:
self._closing_selector = True
self._select_cond.notify()
self._wake_selector()
if self._thread is not None:
self._thread.join()
_selector_loops.discard(self)
self.remove_reader(self._waker_r)
self._waker_r.close()
self._waker_w.close()
self._closed = True

async def _thread_manager(self) -> typing.AsyncGenerator[None, None]:
# Create a thread to run the select system call. We manage this thread
# manually so we can trigger a clean shutdown from an atexit hook. Note
# that due to the order of operations at shutdown, only daemon threads
# can be shut down in this way (non-daemon threads would require the
# introduction of a new hook: https://bugs.python.org/issue41962)
self._thread = threading.Thread(
name="Tornado selector",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

probably will need a different name

daemon=True,
target=self._run_select,
)
self._thread.start()
self._start_select()
try:
# The presense of this yield statement means that this coroutine
# is actually an asynchronous generator, which has a special
# shutdown protocol. We wait at this yield point until the
# event loop's shutdown_asyncgens method is called, at which point
# we will get a GeneratorExit exception and can shut down the
# selector thread.
yield
except GeneratorExit:
self.close()
raise

def _wake_selector(self) -> None:
if self._closed:
return
try:
self._waker_w.send(b"a")
except BlockingIOError:
pass

def _consume_waker(self) -> None:
try:
self._waker_r.recv(1024)
except BlockingIOError:
pass

def _start_select(self) -> None:
# Capture reader and writer sets here in the event loop
# thread to avoid any problems with concurrent
# modification while the select loop uses them.
with self._select_cond:
assert self._select_args is None
self._select_args = (list(self._readers.keys()), list(self._writers.keys()))
self._select_cond.notify()

def _run_select(self) -> None:
while True:
with self._select_cond:
while self._select_args is None and not self._closing_selector:
self._select_cond.wait()
if self._closing_selector:
return
assert self._select_args is not None
to_read, to_write = self._select_args
self._select_args = None

# We use the simpler interface of the select module instead of
# the more stateful interface in the selectors module because
# this class is only intended for use on windows, where
# select.select is the only option. The selector interface
# does not have well-documented thread-safety semantics that
# we can rely on so ensuring proper synchronization would be
# tricky.
try:
# On windows, selecting on a socket for write will not
# return the socket when there is an error (but selecting
# for reads works). Also select for errors when selecting
# for writes, and merge the results.
#
# This pattern is also used in
# https://github.com/python/cpython/blob/v3.8.0/Lib/selectors.py#L312-L317
rs, ws, xs = select.select(to_read, to_write, to_write)
ws = ws + xs
except OSError as e:
# After remove_reader or remove_writer is called, the file
# descriptor may subsequently be closed on the event loop
# thread. It's possible that this select thread hasn't
# gotten into the select system call by the time that
# happens in which case (at least on macOS), select may
# raise a "bad file descriptor" error. If we get that
# error, check and see if we're also being woken up by
# polling the waker alone. If we are, just return to the
# event loop and we'll get the updated set of file
# descriptors on the next iteration. Otherwise, raise the
# original error.
if e.errno == getattr(errno, "WSAENOTSOCK", errno.EBADF):
rs, _, _ = select.select([self._waker_r.fileno()], [], [], 0)
if rs:
ws = []
else:
raise
else:
raise

try:
self._real_loop.call_soon_threadsafe(self._handle_select, rs, ws)
except RuntimeError:
# "Event loop is closed". Swallow the exception for
# consistency with PollIOLoop (and logical consistency
# with the fact that we can't guarantee that an
# add_callback that completes without error will
# eventually execute).
pass
except AttributeError:
# ProactorEventLoop may raise this instead of RuntimeError
# if call_soon_threadsafe races with a call to close().
# Swallow it too for consistency.
pass

def _handle_select(
self, rs: list[_FileDescriptorLike], ws: list[_FileDescriptorLike]
) -> None:
for r in rs:
self._handle_event(r, self._readers)
for w in ws:
self._handle_event(w, self._writers)
self._start_select()

def _handle_event(
self,
fd: _FileDescriptorLike,
cb_map: dict[_FileDescriptorLike, Callable],
) -> None:
try:
callback = cb_map[fd]
except KeyError:
return
callback()

def add_reader(
self, fd: _FileDescriptorLike, callback: Callable[..., None], *args: Any
) -> None:
self._readers[fd] = functools.partial(callback, *args)
self._wake_selector()

def add_writer(
self, fd: _FileDescriptorLike, callback: Callable[..., None], *args: Any
) -> None:
self._writers[fd] = functools.partial(callback, *args)
self._wake_selector()

def remove_reader(self, fd: _FileDescriptorLike) -> bool:
try:
del self._readers[fd]
except KeyError:
return False
self._wake_selector()
return True

def remove_writer(self, fd: _FileDescriptorLike) -> bool:
try:
del self._writers[fd]
except KeyError:
return False
self._wake_selector()
return True


def _get_selector_windows(
asyncio_loop: asyncio.AbstractEventLoop,
) -> SelectorThread:
"""Get selector-compatible loop.

Sets ``add_reader`` family of methods on the asyncio loop.

Workaround Windows proactor removal of *reader methods.
"""

if asyncio_loop in _selectors:
return _selectors[asyncio_loop]

selector_thread = _selectors[asyncio_loop] = SelectorThread(asyncio_loop)

# patch loop.close to also close the selector thread
loop_close = asyncio_loop.close

def _close_selector_and_loop() -> None:
# restore original before calling selector.close,
# which in turn calls eventloop.close!
asyncio_loop.close = loop_close # type: ignore[method-assign]
_selectors.pop(asyncio_loop, None)
selector_thread.close()

asyncio_loop.close = _close_selector_and_loop # type: ignore[method-assign]

return selector_thread
3 changes: 0 additions & 3 deletions src/anyio/_core/_sockets.py
Original file line number Diff line number Diff line change
Expand Up @@ -595,9 +595,6 @@ def wait_socket_readable(sock: socket.socket) -> Awaitable[None]:
"""
Wait until the given socket has data to be read.

This does **NOT** work on Windows when using the asyncio backend with a proactor
event loop (default on py3.8+).

.. warning:: Only use this on raw sockets that have not been wrapped by any higher
level constructs like socket streams!

Expand Down
Loading
Loading