From f0533527b1ea57f328b5ee4b0e50651bb171754b Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Wed, 4 Dec 2024 10:49:49 -0600 Subject: [PATCH 01/16] Fix race in FileResponse when the file is changed between the stat and open calls There was a race in ``FileResponse`` where the stat would be incorrect if the file was changed out between the `stat` and `open` syscalls. This would lead to various unexpected behaviors such as trying to read beyond the length of the file or sending a partial file. This problem is likely to occour when files are being renamed/linked into place. An example of how this can happen with a system that provides weather data every 60s: An external process writes `.weather.txt` at the top of each minute, and than renames it to `weather.txt`. In this case `aiohttp` may stat the old `weather.txt`, and than open the new `weather.txt`, and use the `stat` result from the original file. To fix this we now `fstat` the open file on operating systems where `fstat` is available fixes #8013 --- aiohttp/web_fileresponse.py | 340 +++++++++++++++++++----------------- 1 file changed, 184 insertions(+), 156 deletions(-) diff --git a/aiohttp/web_fileresponse.py b/aiohttp/web_fileresponse.py index daadda9a207..62a2ed45b94 100644 --- a/aiohttp/web_fileresponse.py +++ b/aiohttp/web_fileresponse.py @@ -1,4 +1,5 @@ import asyncio +import io import os import pathlib from contextlib import suppress @@ -13,6 +14,7 @@ Callable, Final, Optional, + Set, Tuple, cast, ) @@ -69,6 +71,24 @@ CONTENT_TYPES.add_type(content_type, extension) +_CLOSE_FUTURES: Set[asyncio.Future[None]] = set() + + +def _stat_open_file( + file_path: pathlib.Path, fobj: io.BufferedReader, st: Optional[os.stat_result] +) -> os.stat_result: + """Return the stat result of the file or the fallback stat result. + + Ideally we can use fstat() to get the stat result of the file object, + to ensure we are returning the correct length of the open file, + but it is not possible to get the file descriptor from the file object + on some operating systems, so we have to use the stat result of the file path. + """ + with suppress(OSError): # May not work on Windows + st = os.stat(fobj.fileno()) + return st or file_path.stat() + + class FileResponse(StreamResponse): """A response object can be used to send files.""" @@ -157,10 +177,10 @@ async def _precondition_failed( self.content_length = 0 return await super().prepare(request) - def _get_file_path_stat_encoding( + def _open_file_path_stat_encoding( self, accept_encoding: str - ) -> Tuple[pathlib.Path, os.stat_result, Optional[str]]: - """Return the file path, stat result, and encoding. + ) -> Tuple[io.BufferedReader, os.stat_result, Optional[str]]: + """Return the io object, stat result, and encoding. If an uncompressed file is returned, the encoding is set to :py:data:`None`. @@ -178,10 +198,14 @@ def _get_file_path_stat_encoding( # Do not follow symlinks and ignore any non-regular files. st = compressed_path.lstat() if S_ISREG(st.st_mode): - return compressed_path, st, file_encoding + fobj = compressed_path.open("rb") + st = _stat_open_file(compressed_path, fobj, st) + return fobj, st, file_encoding # Fallback to the uncompressed file - return file_path, file_path.stat(), None + fobj = file_path.open("rb") + st = _stat_open_file(file_path, fobj, None) + return fobj, st, None async def prepare(self, request: "BaseRequest") -> Optional[AbstractStreamWriter]: loop = asyncio.get_running_loop() @@ -189,172 +213,176 @@ async def prepare(self, request: "BaseRequest") -> Optional[AbstractStreamWriter # https://www.rfc-editor.org/rfc/rfc9110#section-8.4.1 accept_encoding = request.headers.get(hdrs.ACCEPT_ENCODING, "").lower() try: - file_path, st, file_encoding = await loop.run_in_executor( - None, self._get_file_path_stat_encoding, accept_encoding + fobj, st, file_encoding = await loop.run_in_executor( + None, self._open_file_path_stat_encoding, accept_encoding ) + except PermissionError: + self.set_status(HTTPForbidden.status_code) + return await super().prepare(request) except OSError: # Most likely to be FileNotFoundError or OSError for circular # symlinks in python >= 3.13, so respond with 404. self.set_status(HTTPNotFound.status_code) return await super().prepare(request) - # Forbid special files like sockets, pipes, devices, etc. - if not S_ISREG(st.st_mode): - self.set_status(HTTPForbidden.status_code) - return await super().prepare(request) - - etag_value = f"{st.st_mtime_ns:x}-{st.st_size:x}" - last_modified = st.st_mtime - - # https://www.rfc-editor.org/rfc/rfc9110#section-13.1.1-2 - ifmatch = request.if_match - if ifmatch is not None and not self._etag_match( - etag_value, ifmatch, weak=False - ): - return await self._precondition_failed(request) - - unmodsince = request.if_unmodified_since - if ( - unmodsince is not None - and ifmatch is None - and st.st_mtime > unmodsince.timestamp() - ): - return await self._precondition_failed(request) - - # https://www.rfc-editor.org/rfc/rfc9110#section-13.1.2-2 - ifnonematch = request.if_none_match - if ifnonematch is not None and self._etag_match( - etag_value, ifnonematch, weak=True - ): - return await self._not_modified(request, etag_value, last_modified) - - modsince = request.if_modified_since - if ( - modsince is not None - and ifnonematch is None - and st.st_mtime <= modsince.timestamp() - ): - return await self._not_modified(request, etag_value, last_modified) - - status = self._status - file_size = st.st_size - count = file_size - - start = None - - ifrange = request.if_range - if ifrange is None or st.st_mtime <= ifrange.timestamp(): - # If-Range header check: - # condition = cached date >= last modification date - # return 206 if True else 200. - # if False: - # Range header would not be processed, return 200 - # if True but Range header missing - # return 200 - try: - rng = request.http_range - start = rng.start - end = rng.stop - except ValueError: - # https://tools.ietf.org/html/rfc7233: - # A server generating a 416 (Range Not Satisfiable) response to - # a byte-range request SHOULD send a Content-Range header field - # with an unsatisfied-range value. - # The complete-length in a 416 response indicates the current - # length of the selected representation. - # - # Will do the same below. Many servers ignore this and do not - # send a Content-Range header with HTTP 416 - self.headers[hdrs.CONTENT_RANGE] = f"bytes */{file_size}" - self.set_status(HTTPRequestRangeNotSatisfiable.status_code) + try: + # Forbid special files like sockets, pipes, devices, etc. + if not S_ISREG(st.st_mode): + self.set_status(HTTPForbidden.status_code) return await super().prepare(request) - # If a range request has been made, convert start, end slice - # notation into file pointer offset and count - if start is not None or end is not None: - if start < 0 and end is None: # return tail of file - start += file_size - if start < 0: - # if Range:bytes=-1000 in request header but file size - # is only 200, there would be trouble without this - start = 0 - count = file_size - start - else: - # rfc7233:If the last-byte-pos value is - # absent, or if the value is greater than or equal to - # the current length of the representation data, - # the byte range is interpreted as the remainder - # of the representation (i.e., the server replaces the - # value of last-byte-pos with a value that is one less than - # the current length of the selected representation). - count = ( - min(end if end is not None else file_size, file_size) - start - ) - - if start >= file_size: - # HTTP 416 should be returned in this case. + etag_value = f"{st.st_mtime_ns:x}-{st.st_size:x}" + last_modified = st.st_mtime + + # https://www.rfc-editor.org/rfc/rfc9110#section-13.1.1-2 + ifmatch = request.if_match + if ifmatch is not None and not self._etag_match( + etag_value, ifmatch, weak=False + ): + return await self._precondition_failed(request) + + unmodsince = request.if_unmodified_since + if ( + unmodsince is not None + and ifmatch is None + and st.st_mtime > unmodsince.timestamp() + ): + return await self._precondition_failed(request) + + # https://www.rfc-editor.org/rfc/rfc9110#section-13.1.2-2 + ifnonematch = request.if_none_match + if ifnonematch is not None and self._etag_match( + etag_value, ifnonematch, weak=True + ): + return await self._not_modified(request, etag_value, last_modified) + + modsince = request.if_modified_since + if ( + modsince is not None + and ifnonematch is None + and st.st_mtime <= modsince.timestamp() + ): + return await self._not_modified(request, etag_value, last_modified) + + status = self._status + file_size = st.st_size + count = file_size + + start = None + + ifrange = request.if_range + if ifrange is None or st.st_mtime <= ifrange.timestamp(): + # If-Range header check: + # condition = cached date >= last modification date + # return 206 if True else 200. + # if False: + # Range header would not be processed, return 200 + # if True but Range header missing + # return 200 + try: + rng = request.http_range + start = rng.start + end = rng.stop + except ValueError: + # https://tools.ietf.org/html/rfc7233: + # A server generating a 416 (Range Not Satisfiable) response to + # a byte-range request SHOULD send a Content-Range header field + # with an unsatisfied-range value. + # The complete-length in a 416 response indicates the current + # length of the selected representation. # - # According to https://tools.ietf.org/html/rfc7233: - # If a valid byte-range-set includes at least one - # byte-range-spec with a first-byte-pos that is less than - # the current length of the representation, or at least one - # suffix-byte-range-spec with a non-zero suffix-length, - # then the byte-range-set is satisfiable. Otherwise, the - # byte-range-set is unsatisfiable. + # Will do the same below. Many servers ignore this and do not + # send a Content-Range header with HTTP 416 self.headers[hdrs.CONTENT_RANGE] = f"bytes */{file_size}" self.set_status(HTTPRequestRangeNotSatisfiable.status_code) return await super().prepare(request) - status = HTTPPartialContent.status_code - # Even though you are sending the whole file, you should still - # return a HTTP 206 for a Range request. - self.set_status(status) - - # If the Content-Type header is not already set, guess it based on the - # extension of the request path. The encoding returned by guess_type - # can be ignored since the map was cleared above. - if hdrs.CONTENT_TYPE not in self.headers: - self.content_type = ( - CONTENT_TYPES.guess_type(self._path)[0] or FALLBACK_CONTENT_TYPE - ) - - if file_encoding: - self.headers[hdrs.CONTENT_ENCODING] = file_encoding - self.headers[hdrs.VARY] = hdrs.ACCEPT_ENCODING - # Disable compression if we are already sending - # a compressed file since we don't want to double - # compress. - self._compression = False - - self.etag = etag_value # type: ignore[assignment] - self.last_modified = st.st_mtime # type: ignore[assignment] - self.content_length = count - - self.headers[hdrs.ACCEPT_RANGES] = "bytes" - - real_start = cast(int, start) - - if status == HTTPPartialContent.status_code: - self.headers[hdrs.CONTENT_RANGE] = "bytes {}-{}/{}".format( - real_start, real_start + count - 1, file_size - ) - - # If we are sending 0 bytes calling sendfile() will throw a ValueError - if count == 0 or must_be_empty_body(request.method, self.status): - return await super().prepare(request) - - try: - fobj = await loop.run_in_executor(None, file_path.open, "rb") - except PermissionError: - self.set_status(HTTPForbidden.status_code) - return await super().prepare(request) + # If a range request has been made, convert start, end slice + # notation into file pointer offset and count + if start is not None or end is not None: + if start < 0 and end is None: # return tail of file + start += file_size + if start < 0: + # if Range:bytes=-1000 in request header but file size + # is only 200, there would be trouble without this + start = 0 + count = file_size - start + else: + # rfc7233:If the last-byte-pos value is + # absent, or if the value is greater than or equal to + # the current length of the representation data, + # the byte range is interpreted as the remainder + # of the representation (i.e., the server replaces the + # value of last-byte-pos with a value that is one less than + # the current length of the selected representation). + count = ( + min(end if end is not None else file_size, file_size) + - start + ) + + if start >= file_size: + # HTTP 416 should be returned in this case. + # + # According to https://tools.ietf.org/html/rfc7233: + # If a valid byte-range-set includes at least one + # byte-range-spec with a first-byte-pos that is less than + # the current length of the representation, or at least one + # suffix-byte-range-spec with a non-zero suffix-length, + # then the byte-range-set is satisfiable. Otherwise, the + # byte-range-set is unsatisfiable. + self.headers[hdrs.CONTENT_RANGE] = f"bytes */{file_size}" + self.set_status(HTTPRequestRangeNotSatisfiable.status_code) + return await super().prepare(request) + + status = HTTPPartialContent.status_code + # Even though you are sending the whole file, you should still + # return a HTTP 206 for a Range request. + self.set_status(status) + + # If the Content-Type header is not already set, guess it based on the + # extension of the request path. The encoding returned by guess_type + # can be ignored since the map was cleared above. + if hdrs.CONTENT_TYPE not in self.headers: + self.content_type = ( + CONTENT_TYPES.guess_type(self._path)[0] or FALLBACK_CONTENT_TYPE + ) + + if file_encoding: + self.headers[hdrs.CONTENT_ENCODING] = file_encoding + self.headers[hdrs.VARY] = hdrs.ACCEPT_ENCODING + # Disable compression if we are already sending + # a compressed file since we don't want to double + # compress. + self._compression = False + + self.etag = etag_value # type: ignore[assignment] + self.last_modified = st.st_mtime # type: ignore[assignment] + self.content_length = count + + self.headers[hdrs.ACCEPT_RANGES] = "bytes" + + real_start = cast(int, start) + + if status == HTTPPartialContent.status_code: + self.headers[hdrs.CONTENT_RANGE] = "bytes {}-{}/{}".format( + real_start, real_start + count - 1, file_size + ) + + # If we are sending 0 bytes calling sendfile() will throw a ValueError + if count == 0 or must_be_empty_body(request.method, self.status): + return await super().prepare(request) - if start: # be aware that start could be None or int=0 here. - offset = start - else: - offset = 0 + if start: # be aware that start could be None or int=0 here. + offset = start + else: + offset = 0 - try: return await self._sendfile(request, fobj, offset, count) finally: - await asyncio.shield(loop.run_in_executor(None, fobj.close)) + # We do not await here because we do not want to wait + # for the executor to finish before returning the response. + close_future = loop.run_in_executor(None, fobj.close) + # Hold a strong reference to the future to prevent it from being + # garbage collected before it completes. + _CLOSE_FUTURES.add(close_future) + close_future.add_done_callback(_CLOSE_FUTURES.remove) From e79d9c651b5470450a91ae8c2f7bb82ccaf419ff Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Wed, 4 Dec 2024 10:56:16 -0600 Subject: [PATCH 02/16] lint --- aiohttp/web_fileresponse.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aiohttp/web_fileresponse.py b/aiohttp/web_fileresponse.py index 62a2ed45b94..c1bf035501d 100644 --- a/aiohttp/web_fileresponse.py +++ b/aiohttp/web_fileresponse.py @@ -281,8 +281,6 @@ async def prepare(self, request: "BaseRequest") -> Optional[AbstractStreamWriter # return 200 try: rng = request.http_range - start = rng.start - end = rng.stop except ValueError: # https://tools.ietf.org/html/rfc7233: # A server generating a 416 (Range Not Satisfiable) response to @@ -297,6 +295,8 @@ async def prepare(self, request: "BaseRequest") -> Optional[AbstractStreamWriter self.set_status(HTTPRequestRangeNotSatisfiable.status_code) return await super().prepare(request) + start = rng.start + end = rng.stop # If a range request has been made, convert start, end slice # notation into file pointer offset and count if start is not None or end is not None: From 4b0cd140f23769e9906fd2c3243b7b282d0ea523 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Wed, 4 Dec 2024 11:26:36 -0600 Subject: [PATCH 03/16] back compat --- aiohttp/web_fileresponse.py | 12 ++++++++++-- tests/test_web_urldispatcher.py | 7 ++++--- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/aiohttp/web_fileresponse.py b/aiohttp/web_fileresponse.py index c1bf035501d..6cacece4ad4 100644 --- a/aiohttp/web_fileresponse.py +++ b/aiohttp/web_fileresponse.py @@ -85,7 +85,7 @@ def _stat_open_file( on some operating systems, so we have to use the stat result of the file path. """ with suppress(OSError): # May not work on Windows - st = os.stat(fobj.fileno()) + return os.stat(fobj.fileno()) return st or file_path.stat() @@ -203,7 +203,15 @@ def _open_file_path_stat_encoding( return fobj, st, file_encoding # Fallback to the uncompressed file - fobj = file_path.open("rb") + try: + fobj = file_path.open("rb") + except OSError as e: + if e.errno == 102: # Operation not supported on socket + # This will also be checked in prepare() but we want + # to preserve backwards compatibility and give + # as 403 Forbidden. + raise PermissionError("File is a socket") + raise st = _stat_open_file(file_path, fobj, None) return fobj, st, None diff --git a/tests/test_web_urldispatcher.py b/tests/test_web_urldispatcher.py index 5cd4aebdc55..d21ecaa101a 100644 --- a/tests/test_web_urldispatcher.py +++ b/tests/test_web_urldispatcher.py @@ -579,16 +579,17 @@ async def test_access_mock_special_resource( my_special.touch() real_result = my_special.stat() - real_stat = pathlib.Path.stat + real_stat = os.stat - def mock_stat(self: pathlib.Path, **kwargs: Any) -> os.stat_result: - s = real_stat(self, **kwargs) + def mock_stat(path: Any, **kwargs: Any) -> os.stat_result: + s = real_stat(path, **kwargs) if os.path.samestat(s, real_result): mock_mode = S_IFIFO | S_IMODE(s.st_mode) s = os.stat_result([mock_mode] + list(s)[1:]) return s monkeypatch.setattr("pathlib.Path.stat", mock_stat) + monkeypatch.setattr("os.stat", mock_stat) app = web.Application() app.router.add_static("/", str(tmp_path)) From 74b93907f272862fea092e68dd4675cda3107906 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Wed, 4 Dec 2024 11:29:13 -0600 Subject: [PATCH 04/16] lint --- aiohttp/web_fileresponse.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/aiohttp/web_fileresponse.py b/aiohttp/web_fileresponse.py index 6cacece4ad4..5dce2037b28 100644 --- a/aiohttp/web_fileresponse.py +++ b/aiohttp/web_fileresponse.py @@ -303,6 +303,8 @@ async def prepare(self, request: "BaseRequest") -> Optional[AbstractStreamWriter self.set_status(HTTPRequestRangeNotSatisfiable.status_code) return await super().prepare(request) + if TYPE_CHECKING: + assert rng is not None start = rng.start end = rng.stop # If a range request has been made, convert start, end slice From 6259681e9535172c7dd8b7435103be5ae0ad4e7a Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Wed, 4 Dec 2024 11:30:03 -0600 Subject: [PATCH 05/16] comments --- aiohttp/web_fileresponse.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/aiohttp/web_fileresponse.py b/aiohttp/web_fileresponse.py index 5dce2037b28..f68e78b6a63 100644 --- a/aiohttp/web_fileresponse.py +++ b/aiohttp/web_fileresponse.py @@ -390,7 +390,9 @@ async def prepare(self, request: "BaseRequest") -> Optional[AbstractStreamWriter return await self._sendfile(request, fobj, offset, count) finally: # We do not await here because we do not want to wait - # for the executor to finish before returning the response. + # for the executor to finish before returning the response + # so the connection can begin servicing another request + # as soon as possible. close_future = loop.run_in_executor(None, fobj.close) # Hold a strong reference to the future to prevent it from being # garbage collected before it completes. From 2a41636f42e6bee94621dfe491d4e473d61719a2 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Wed, 4 Dec 2024 11:30:46 -0600 Subject: [PATCH 06/16] comments --- aiohttp/web_fileresponse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aiohttp/web_fileresponse.py b/aiohttp/web_fileresponse.py index f68e78b6a63..9045cb09051 100644 --- a/aiohttp/web_fileresponse.py +++ b/aiohttp/web_fileresponse.py @@ -208,7 +208,7 @@ def _open_file_path_stat_encoding( except OSError as e: if e.errno == 102: # Operation not supported on socket # This will also be checked in prepare() but we want - # to preserve backwards compatibility and give + # to preserve backwards compatibility and handle # as 403 Forbidden. raise PermissionError("File is a socket") raise From 053984a173166812a229913e5f5f43f496be62e5 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Wed, 4 Dec 2024 11:40:59 -0600 Subject: [PATCH 07/16] no way to avoid double stat --- aiohttp/web_fileresponse.py | 323 ++++++++++++++++++------------------ 1 file changed, 163 insertions(+), 160 deletions(-) diff --git a/aiohttp/web_fileresponse.py b/aiohttp/web_fileresponse.py index 9045cb09051..2d97d5a8c50 100644 --- a/aiohttp/web_fileresponse.py +++ b/aiohttp/web_fileresponse.py @@ -179,7 +179,7 @@ async def _precondition_failed( def _open_file_path_stat_encoding( self, accept_encoding: str - ) -> Tuple[io.BufferedReader, os.stat_result, Optional[str]]: + ) -> Tuple[Optional[io.BufferedReader], os.stat_result, Optional[str]]: """Return the io object, stat result, and encoding. If an uncompressed file is returned, the encoding is set to @@ -203,15 +203,10 @@ def _open_file_path_stat_encoding( return fobj, st, file_encoding # Fallback to the uncompressed file - try: - fobj = file_path.open("rb") - except OSError as e: - if e.errno == 102: # Operation not supported on socket - # This will also be checked in prepare() but we want - # to preserve backwards compatibility and handle - # as 403 Forbidden. - raise PermissionError("File is a socket") - raise + st = file_path.stat() + if not S_ISREG(st.st_mode): + return None, st, None + fobj = file_path.open("rb") st = _stat_open_file(file_path, fobj, None) return fobj, st, None @@ -235,159 +230,11 @@ async def prepare(self, request: "BaseRequest") -> Optional[AbstractStreamWriter try: # Forbid special files like sockets, pipes, devices, etc. - if not S_ISREG(st.st_mode): + if not fobj or not S_ISREG(st.st_mode): self.set_status(HTTPForbidden.status_code) return await super().prepare(request) - etag_value = f"{st.st_mtime_ns:x}-{st.st_size:x}" - last_modified = st.st_mtime - - # https://www.rfc-editor.org/rfc/rfc9110#section-13.1.1-2 - ifmatch = request.if_match - if ifmatch is not None and not self._etag_match( - etag_value, ifmatch, weak=False - ): - return await self._precondition_failed(request) - - unmodsince = request.if_unmodified_since - if ( - unmodsince is not None - and ifmatch is None - and st.st_mtime > unmodsince.timestamp() - ): - return await self._precondition_failed(request) - - # https://www.rfc-editor.org/rfc/rfc9110#section-13.1.2-2 - ifnonematch = request.if_none_match - if ifnonematch is not None and self._etag_match( - etag_value, ifnonematch, weak=True - ): - return await self._not_modified(request, etag_value, last_modified) - - modsince = request.if_modified_since - if ( - modsince is not None - and ifnonematch is None - and st.st_mtime <= modsince.timestamp() - ): - return await self._not_modified(request, etag_value, last_modified) - - status = self._status - file_size = st.st_size - count = file_size - - start = None - - ifrange = request.if_range - if ifrange is None or st.st_mtime <= ifrange.timestamp(): - # If-Range header check: - # condition = cached date >= last modification date - # return 206 if True else 200. - # if False: - # Range header would not be processed, return 200 - # if True but Range header missing - # return 200 - try: - rng = request.http_range - except ValueError: - # https://tools.ietf.org/html/rfc7233: - # A server generating a 416 (Range Not Satisfiable) response to - # a byte-range request SHOULD send a Content-Range header field - # with an unsatisfied-range value. - # The complete-length in a 416 response indicates the current - # length of the selected representation. - # - # Will do the same below. Many servers ignore this and do not - # send a Content-Range header with HTTP 416 - self.headers[hdrs.CONTENT_RANGE] = f"bytes */{file_size}" - self.set_status(HTTPRequestRangeNotSatisfiable.status_code) - return await super().prepare(request) - - if TYPE_CHECKING: - assert rng is not None - start = rng.start - end = rng.stop - # If a range request has been made, convert start, end slice - # notation into file pointer offset and count - if start is not None or end is not None: - if start < 0 and end is None: # return tail of file - start += file_size - if start < 0: - # if Range:bytes=-1000 in request header but file size - # is only 200, there would be trouble without this - start = 0 - count = file_size - start - else: - # rfc7233:If the last-byte-pos value is - # absent, or if the value is greater than or equal to - # the current length of the representation data, - # the byte range is interpreted as the remainder - # of the representation (i.e., the server replaces the - # value of last-byte-pos with a value that is one less than - # the current length of the selected representation). - count = ( - min(end if end is not None else file_size, file_size) - - start - ) - - if start >= file_size: - # HTTP 416 should be returned in this case. - # - # According to https://tools.ietf.org/html/rfc7233: - # If a valid byte-range-set includes at least one - # byte-range-spec with a first-byte-pos that is less than - # the current length of the representation, or at least one - # suffix-byte-range-spec with a non-zero suffix-length, - # then the byte-range-set is satisfiable. Otherwise, the - # byte-range-set is unsatisfiable. - self.headers[hdrs.CONTENT_RANGE] = f"bytes */{file_size}" - self.set_status(HTTPRequestRangeNotSatisfiable.status_code) - return await super().prepare(request) - - status = HTTPPartialContent.status_code - # Even though you are sending the whole file, you should still - # return a HTTP 206 for a Range request. - self.set_status(status) - - # If the Content-Type header is not already set, guess it based on the - # extension of the request path. The encoding returned by guess_type - # can be ignored since the map was cleared above. - if hdrs.CONTENT_TYPE not in self.headers: - self.content_type = ( - CONTENT_TYPES.guess_type(self._path)[0] or FALLBACK_CONTENT_TYPE - ) - - if file_encoding: - self.headers[hdrs.CONTENT_ENCODING] = file_encoding - self.headers[hdrs.VARY] = hdrs.ACCEPT_ENCODING - # Disable compression if we are already sending - # a compressed file since we don't want to double - # compress. - self._compression = False - - self.etag = etag_value # type: ignore[assignment] - self.last_modified = st.st_mtime # type: ignore[assignment] - self.content_length = count - - self.headers[hdrs.ACCEPT_RANGES] = "bytes" - - real_start = cast(int, start) - - if status == HTTPPartialContent.status_code: - self.headers[hdrs.CONTENT_RANGE] = "bytes {}-{}/{}".format( - real_start, real_start + count - 1, file_size - ) - - # If we are sending 0 bytes calling sendfile() will throw a ValueError - if count == 0 or must_be_empty_body(request.method, self.status): - return await super().prepare(request) - - if start: # be aware that start could be None or int=0 here. - offset = start - else: - offset = 0 - - return await self._sendfile(request, fobj, offset, count) + await self._prepare_open_file(request, fobj, st, file_encoding) finally: # We do not await here because we do not want to wait # for the executor to finish before returning the response @@ -398,3 +245,159 @@ async def prepare(self, request: "BaseRequest") -> Optional[AbstractStreamWriter # garbage collected before it completes. _CLOSE_FUTURES.add(close_future) close_future.add_done_callback(_CLOSE_FUTURES.remove) + + async def _prepare_open_file( + self, + request: "BaseRequest", + fobj: io.BufferedReader, + st: os.stat_result, + file_encoding: Optional[str], + ) -> Optional[AbstractStreamWriter]: + etag_value = f"{st.st_mtime_ns:x}-{st.st_size:x}" + last_modified = st.st_mtime + + # https://www.rfc-editor.org/rfc/rfc9110#section-13.1.1-2 + ifmatch = request.if_match + if ifmatch is not None and not self._etag_match( + etag_value, ifmatch, weak=False + ): + return await self._precondition_failed(request) + + unmodsince = request.if_unmodified_since + if ( + unmodsince is not None + and ifmatch is None + and st.st_mtime > unmodsince.timestamp() + ): + return await self._precondition_failed(request) + + # https://www.rfc-editor.org/rfc/rfc9110#section-13.1.2-2 + ifnonematch = request.if_none_match + if ifnonematch is not None and self._etag_match( + etag_value, ifnonematch, weak=True + ): + return await self._not_modified(request, etag_value, last_modified) + + modsince = request.if_modified_since + if ( + modsince is not None + and ifnonematch is None + and st.st_mtime <= modsince.timestamp() + ): + return await self._not_modified(request, etag_value, last_modified) + + status = self._status + file_size = st.st_size + count = file_size + + start = None + + ifrange = request.if_range + if ifrange is None or st.st_mtime <= ifrange.timestamp(): + # If-Range header check: + # condition = cached date >= last modification date + # return 206 if True else 200. + # if False: + # Range header would not be processed, return 200 + # if True but Range header missing + # return 200 + try: + rng = request.http_range + except ValueError: + # https://tools.ietf.org/html/rfc7233: + # A server generating a 416 (Range Not Satisfiable) response to + # a byte-range request SHOULD send a Content-Range header field + # with an unsatisfied-range value. + # The complete-length in a 416 response indicates the current + # length of the selected representation. + # + # Will do the same below. Many servers ignore this and do not + # send a Content-Range header with HTTP 416 + self.headers[hdrs.CONTENT_RANGE] = f"bytes */{file_size}" + self.set_status(HTTPRequestRangeNotSatisfiable.status_code) + return await super().prepare(request) + + if TYPE_CHECKING: + assert rng is not None + start = rng.start + end = rng.stop + # If a range request has been made, convert start, end slice + # notation into file pointer offset and count + if start is not None or end is not None: + if start < 0 and end is None: # return tail of file + start += file_size + if start < 0: + # if Range:bytes=-1000 in request header but file size + # is only 200, there would be trouble without this + start = 0 + count = file_size - start + else: + # rfc7233:If the last-byte-pos value is + # absent, or if the value is greater than or equal to + # the current length of the representation data, + # the byte range is interpreted as the remainder + # of the representation (i.e., the server replaces the + # value of last-byte-pos with a value that is one less than + # the current length of the selected representation). + count = ( + min(end if end is not None else file_size, file_size) - start + ) + + if start >= file_size: + # HTTP 416 should be returned in this case. + # + # According to https://tools.ietf.org/html/rfc7233: + # If a valid byte-range-set includes at least one + # byte-range-spec with a first-byte-pos that is less than + # the current length of the representation, or at least one + # suffix-byte-range-spec with a non-zero suffix-length, + # then the byte-range-set is satisfiable. Otherwise, the + # byte-range-set is unsatisfiable. + self.headers[hdrs.CONTENT_RANGE] = f"bytes */{file_size}" + self.set_status(HTTPRequestRangeNotSatisfiable.status_code) + return await super().prepare(request) + + status = HTTPPartialContent.status_code + # Even though you are sending the whole file, you should still + # return a HTTP 206 for a Range request. + self.set_status(status) + + # If the Content-Type header is not already set, guess it based on the + # extension of the request path. The encoding returned by guess_type + # can be ignored since the map was cleared above. + if hdrs.CONTENT_TYPE not in self.headers: + self.content_type = ( + CONTENT_TYPES.guess_type(self._path)[0] or FALLBACK_CONTENT_TYPE + ) + + if file_encoding: + self.headers[hdrs.CONTENT_ENCODING] = file_encoding + self.headers[hdrs.VARY] = hdrs.ACCEPT_ENCODING + # Disable compression if we are already sending + # a compressed file since we don't want to double + # compress. + self._compression = False + + self.etag = etag_value # type: ignore[assignment] + self.last_modified = st.st_mtime # type: ignore[assignment] + self.content_length = count + + self.headers[hdrs.ACCEPT_RANGES] = "bytes" + + real_start = cast(int, start) + + if status == HTTPPartialContent.status_code: + self.headers[hdrs.CONTENT_RANGE] = "bytes {}-{}/{}".format( + real_start, real_start + count - 1, file_size + ) + + # If we are sending 0 bytes calling sendfile() will throw a ValueError + if count == 0 or must_be_empty_body(request.method, self.status): + return await super().prepare(request) + + if start: # be aware that start could be None or int=0 here. + offset = start + else: + offset = 0 + + return await self._sendfile(request, fobj, offset, count) From bdef0a9b87d943321ca775ff538d38a6bf2f540f Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Wed, 4 Dec 2024 11:41:31 -0600 Subject: [PATCH 08/16] reverts --- aiohttp/web_fileresponse.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/aiohttp/web_fileresponse.py b/aiohttp/web_fileresponse.py index 2d97d5a8c50..a3fc0a5caa7 100644 --- a/aiohttp/web_fileresponse.py +++ b/aiohttp/web_fileresponse.py @@ -303,6 +303,8 @@ async def _prepare_open_file( # return 200 try: rng = request.http_range + start = rng.start + end = rng.stop except ValueError: # https://tools.ietf.org/html/rfc7233: # A server generating a 416 (Range Not Satisfiable) response to @@ -317,10 +319,6 @@ async def _prepare_open_file( self.set_status(HTTPRequestRangeNotSatisfiable.status_code) return await super().prepare(request) - if TYPE_CHECKING: - assert rng is not None - start = rng.start - end = rng.stop # If a range request has been made, convert start, end slice # notation into file pointer offset and count if start is not None or end is not None: From 9664b5e7fc7fe97a20a7fc523c177b412a1d2d1b Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Wed, 4 Dec 2024 11:48:40 -0600 Subject: [PATCH 09/16] lint --- aiohttp/web_fileresponse.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/aiohttp/web_fileresponse.py b/aiohttp/web_fileresponse.py index a3fc0a5caa7..2a3b0370bff 100644 --- a/aiohttp/web_fileresponse.py +++ b/aiohttp/web_fileresponse.py @@ -234,17 +234,18 @@ async def prepare(self, request: "BaseRequest") -> Optional[AbstractStreamWriter self.set_status(HTTPForbidden.status_code) return await super().prepare(request) - await self._prepare_open_file(request, fobj, st, file_encoding) + return await self._prepare_open_file(request, fobj, st, file_encoding) finally: - # We do not await here because we do not want to wait - # for the executor to finish before returning the response - # so the connection can begin servicing another request - # as soon as possible. - close_future = loop.run_in_executor(None, fobj.close) - # Hold a strong reference to the future to prevent it from being - # garbage collected before it completes. - _CLOSE_FUTURES.add(close_future) - close_future.add_done_callback(_CLOSE_FUTURES.remove) + if fobj: + # We do not await here because we do not want to wait + # for the executor to finish before returning the response + # so the connection can begin servicing another request + # as soon as possible. + close_future = loop.run_in_executor(None, fobj.close) + # Hold a strong reference to the future to prevent it from being + # garbage collected before it completes. + _CLOSE_FUTURES.add(close_future) + close_future.add_done_callback(_CLOSE_FUTURES.remove) async def _prepare_open_file( self, From b376555aff165e198a6d43ed64bb1fffca92132c Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Wed, 4 Dec 2024 11:50:30 -0600 Subject: [PATCH 10/16] remove unreachable code --- aiohttp/web_fileresponse.py | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/aiohttp/web_fileresponse.py b/aiohttp/web_fileresponse.py index 2a3b0370bff..ae87dfec5c7 100644 --- a/aiohttp/web_fileresponse.py +++ b/aiohttp/web_fileresponse.py @@ -74,21 +74,6 @@ _CLOSE_FUTURES: Set[asyncio.Future[None]] = set() -def _stat_open_file( - file_path: pathlib.Path, fobj: io.BufferedReader, st: Optional[os.stat_result] -) -> os.stat_result: - """Return the stat result of the file or the fallback stat result. - - Ideally we can use fstat() to get the stat result of the file object, - to ensure we are returning the correct length of the open file, - but it is not possible to get the file descriptor from the file object - on some operating systems, so we have to use the stat result of the file path. - """ - with suppress(OSError): # May not work on Windows - return os.stat(fobj.fileno()) - return st or file_path.stat() - - class FileResponse(StreamResponse): """A response object can be used to send files.""" @@ -199,7 +184,11 @@ def _open_file_path_stat_encoding( st = compressed_path.lstat() if S_ISREG(st.st_mode): fobj = compressed_path.open("rb") - st = _stat_open_file(compressed_path, fobj, st) + with suppress(OSError): + # Once we open the file, we want the fstat() to ensure + # the file has not changed between the first stat() + # and the open(). + st = os.stat(fobj.fileno()) return fobj, st, file_encoding # Fallback to the uncompressed file @@ -207,7 +196,11 @@ def _open_file_path_stat_encoding( if not S_ISREG(st.st_mode): return None, st, None fobj = file_path.open("rb") - st = _stat_open_file(file_path, fobj, None) + with suppress(OSError): + # Once we open the file, we want the fstat() to ensure + # the file has not changed between the first stat() + # and the open(). + st = os.stat(fobj.fileno()) return fobj, st, None async def prepare(self, request: "BaseRequest") -> Optional[AbstractStreamWriter]: From caeac0f2e02e1a790b9db07cf05a3eedf4753be2 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Wed, 4 Dec 2024 11:50:50 -0600 Subject: [PATCH 11/16] comments --- aiohttp/web_fileresponse.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/aiohttp/web_fileresponse.py b/aiohttp/web_fileresponse.py index ae87dfec5c7..a9eec287b64 100644 --- a/aiohttp/web_fileresponse.py +++ b/aiohttp/web_fileresponse.py @@ -184,7 +184,9 @@ def _open_file_path_stat_encoding( st = compressed_path.lstat() if S_ISREG(st.st_mode): fobj = compressed_path.open("rb") - with suppress(OSError): + with suppress( + OSError + ): # fstat() may not be available on all platforms # Once we open the file, we want the fstat() to ensure # the file has not changed between the first stat() # and the open(). @@ -196,7 +198,7 @@ def _open_file_path_stat_encoding( if not S_ISREG(st.st_mode): return None, st, None fobj = file_path.open("rb") - with suppress(OSError): + with suppress(OSError): # fstat() may not be available on all platforms # Once we open the file, we want the fstat() to ensure # the file has not changed between the first stat() # and the open(). From 2efe67f02e9a367a99343bca993a2a98659df7c0 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Wed, 4 Dec 2024 11:57:48 -0600 Subject: [PATCH 12/16] preen --- aiohttp/web_fileresponse.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/aiohttp/web_fileresponse.py b/aiohttp/web_fileresponse.py index a9eec287b64..7a29c767ba5 100644 --- a/aiohttp/web_fileresponse.py +++ b/aiohttp/web_fileresponse.py @@ -184,9 +184,8 @@ def _open_file_path_stat_encoding( st = compressed_path.lstat() if S_ISREG(st.st_mode): fobj = compressed_path.open("rb") - with suppress( - OSError - ): # fstat() may not be available on all platforms + with suppress(OSError): + # fstat() may not be available on all platforms # Once we open the file, we want the fstat() to ensure # the file has not changed between the first stat() # and the open(). @@ -198,7 +197,8 @@ def _open_file_path_stat_encoding( if not S_ISREG(st.st_mode): return None, st, None fobj = file_path.open("rb") - with suppress(OSError): # fstat() may not be available on all platforms + with suppress(OSError): + # fstat() may not be available on all platforms # Once we open the file, we want the fstat() to ensure # the file has not changed between the first stat() # and the open(). From 0f7362bf4ed948815bb0a54d741b0cbd82ffe4ad Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Wed, 4 Dec 2024 12:46:28 -0600 Subject: [PATCH 13/16] changelog --- CHANGES/10101.bugfix.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 CHANGES/10101.bugfix.rst diff --git a/CHANGES/10101.bugfix.rst b/CHANGES/10101.bugfix.rst new file mode 100644 index 00000000000..a99da561f76 --- /dev/null +++ b/CHANGES/10101.bugfix.rst @@ -0,0 +1 @@ +Fixed race condition in :class:`aiohttp.web.FileResponse` that could result in an incorrect response if the file was replaced on the filesystem during ``prepare`` -- by :user:`bdraco`. From 367dda54afdcb7417b89e61150f17ed9028d272e Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Wed, 4 Dec 2024 12:49:27 -0600 Subject: [PATCH 14/16] Update CHANGES/10101.bugfix.rst --- CHANGES/10101.bugfix.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES/10101.bugfix.rst b/CHANGES/10101.bugfix.rst index a99da561f76..ee68238834f 100644 --- a/CHANGES/10101.bugfix.rst +++ b/CHANGES/10101.bugfix.rst @@ -1 +1 @@ -Fixed race condition in :class:`aiohttp.web.FileResponse` that could result in an incorrect response if the file was replaced on the filesystem during ``prepare`` -- by :user:`bdraco`. +Fixed race condition in :class:`aiohttp.web.FileResponse` that could have resulted in an incorrect response if the file was replaced on the filesystem during :meth:`aiohttp.web.FileResponse.prepare` -- by :user:`bdraco`. From 52d77194bf6c75e7b4494ee1fdfb233b85990ce8 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Wed, 4 Dec 2024 12:53:29 -0600 Subject: [PATCH 15/16] Update CHANGES/10101.bugfix.rst --- CHANGES/10101.bugfix.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES/10101.bugfix.rst b/CHANGES/10101.bugfix.rst index ee68238834f..e922723b602 100644 --- a/CHANGES/10101.bugfix.rst +++ b/CHANGES/10101.bugfix.rst @@ -1 +1 @@ -Fixed race condition in :class:`aiohttp.web.FileResponse` that could have resulted in an incorrect response if the file was replaced on the filesystem during :meth:`aiohttp.web.FileResponse.prepare` -- by :user:`bdraco`. +Fixed race condition in :class:`aiohttp.web.FileResponse` that could have resulted in an incorrect response if the file was replaced on the filesystem during ``prepare`` -- by :user:`bdraco`. From db087f2a29a37965227777be4dc6c6c01b711a9e Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Wed, 4 Dec 2024 13:03:03 -0600 Subject: [PATCH 16/16] Update CHANGES/10101.bugfix.rst --- CHANGES/10101.bugfix.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES/10101.bugfix.rst b/CHANGES/10101.bugfix.rst index e922723b602..e06195ac028 100644 --- a/CHANGES/10101.bugfix.rst +++ b/CHANGES/10101.bugfix.rst @@ -1 +1 @@ -Fixed race condition in :class:`aiohttp.web.FileResponse` that could have resulted in an incorrect response if the file was replaced on the filesystem during ``prepare`` -- by :user:`bdraco`. +Fixed race condition in :class:`aiohttp.web.FileResponse` that could have resulted in an incorrect response if the file was replaced on the file system during ``prepare`` -- by :user:`bdraco`.