Skip to content

Commit

Permalink
Rewrite urllib3 retry warnings and raise a diagnostic error on
Browse files Browse the repository at this point in the history
connection errors
  • Loading branch information
ichard26 committed Jul 2, 2024
1 parent 47ffcdd commit 3c4c844
Show file tree
Hide file tree
Showing 4 changed files with 251 additions and 9 deletions.
5 changes: 1 addition & 4 deletions src/pip/_internal/cli/index_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ def _build_session(
trusted_hosts=options.trusted_hosts,
index_urls=self._get_index_urls(options),
ssl_context=ssl_context,
timeout=(timeout if timeout is not None else options.timeout),
)

# Handle custom ca-bundles from the user
Expand All @@ -113,10 +114,6 @@ def _build_session(
if options.client_cert:
session.cert = options.client_cert

# Handle timeouts
if options.timeout or timeout:
session.timeout = timeout if timeout is not None else options.timeout

# Handle configured proxies
if options.proxy:
session.proxies = {
Expand Down
101 changes: 101 additions & 0 deletions src/pip/_internal/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import pathlib
import re
import sys
from http.client import RemoteDisconnected
from itertools import chain, groupby, repeat
from typing import TYPE_CHECKING, Dict, Iterator, List, Literal, Optional, Union

Expand All @@ -22,6 +23,7 @@
if TYPE_CHECKING:
from hashlib import _Hash

from pip._vendor import urllib3
from pip._vendor.requests.models import Request, Response

from pip._internal.metadata import BaseDistribution
Expand Down Expand Up @@ -312,6 +314,105 @@ def __str__(self) -> str:
return str(self.error_msg)


class ConnectionFailedError(DiagnosticPipError):
reference = "connection-failed"

def __init__(self, url: str, host: str, error: Exception) -> None:
from pip._vendor.urllib3.exceptions import NewConnectionError, ProtocolError

details = str(error)
if isinstance(error, NewConnectionError):
_, details = str(error).split("Failed to establish a new connection: ")
elif isinstance(error, ProtocolError):
try:
reason = error.args[1]
except IndexError:
pass
else:
if isinstance(reason, RemoteDisconnected):
details = "the server closed the connection without replying."

super().__init__(
message=f"Failed to connect to [magenta]{host}[/] while fetching {url}",
context=Text(f"Details: {details}"),
hint_stmt=(
"This is likely a system or network issue. Are you connected to the "
"Internet? Otherwise, check whether your system can connect to "
f"[magenta]{host}[/] before trying again (check your firewall/proxy "
"settings)."
),
)


class ConnectionTimeoutError(DiagnosticPipError):
reference = "connection-timeout"

def __init__(
self, url: str, host: str, *, kind: Literal["connect", "read"], timeout: float
) -> None:
context = Text()
context.append(host, style="magenta")
context.append(f" didn't respond within {timeout} seconds")
if kind == "connect":
context.append(" (while establishing a connection)")
super().__init__(
message=f"Unable to fetch {url}",
context=context,
hint_stmt=(
"This is likely a network problem, or a transient issue with the "
"remote server."
),
)


class SSLVerificationError(DiagnosticPipError):
reference = "ssl-verification-failed"

def __init__(
self,
url: str,
host: str,
error: "urllib3.exceptions.SSLError",
*,
is_tls_available: bool,
) -> None:
message = (
"Failed to establish a secure connection to "
f"[magenta]{host}[/] while fetching {url}"
)
if not is_tls_available:
context = Text("The built-in ssl module is not available.")
hint = (
"Your Python installation is missing SSL/TLS support, which is "
"required to access HTTPS URLs."
)
else:
context = Text(f"Details: {error!s}")
hint = (
"This was likely caused by the system or pip's network configuration."
)
super().__init__(message=message, context=context, hint_stmt=hint)


class ProxyConnectionError(DiagnosticPipError):
reference = "proxy-connection-failed"

def __init__(
self, url: str, host: str, error: "urllib3.exceptions.ProxyError"
) -> None:
try:
reason = error.args[1]
except IndexError:
reason = error
super().__init__(
message=(
f"Failed to connect to proxy [magenta]{host}[/] while fetching {url}"
),
context=Text(f"Details: {reason!s}"),
hint_stmt="This is likely a proxy configuration issue.",
)


class InvalidWheelFilename(InstallationError):
"""Invalid wheel filename."""

Expand Down
13 changes: 10 additions & 3 deletions src/pip/_internal/network/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
from pip._internal.models.link import Link
from pip._internal.network.auth import MultiDomainBasicAuth
from pip._internal.network.cache import SafeFileCache
from pip._internal.network.utils import Urllib3RetryFilter, raise_connection_error

# Import ssl from compat so the initial import occurs in only one place.
from pip._internal.utils.compat import has_tls
Expand Down Expand Up @@ -318,11 +319,10 @@ def cert_verify(


class PipSession(requests.Session):
timeout: Optional[int] = None

def __init__(
self,
*args: Any,
timeout: float = 60,
retries: int = 0,
cache: Optional[str] = None,
trusted_hosts: Sequence[str] = (),
Expand All @@ -336,6 +336,10 @@ def __init__(
"""
super().__init__(*args, **kwargs)

retry_filter = Urllib3RetryFilter(timeout=timeout)
logging.getLogger("pip._vendor.urllib3.connectionpool").addFilter(retry_filter)

self.timeout = timeout
# Namespace the attribute with "pip_" just in case to prevent
# possible conflicts with the base class.
self.pip_trusted_origins: List[Tuple[str, Optional[int]]] = []
Expand Down Expand Up @@ -519,4 +523,7 @@ def request(self, method: str, url: str, *args: Any, **kwargs: Any) -> Response:
kwargs.setdefault("proxies", self.proxies)

# Dispatch the actual request
return super().request(method, url, *args, **kwargs)
try:
return super().request(method, url, *args, **kwargs)
except requests.ConnectionError as e:
raise_connection_error(e, timeout=self.timeout)
141 changes: 139 additions & 2 deletions src/pip/_internal/network/utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,20 @@
from typing import Dict, Generator
import logging
import urllib.parse
from http.client import RemoteDisconnected
from typing import Dict, Generator, Literal, Optional

from pip._vendor import requests, urllib3
from pip._vendor.requests.models import CONTENT_CHUNK_SIZE, Response

from pip._internal.exceptions import NetworkConnectionError
from pip._internal.exceptions import (
ConnectionFailedError,
ConnectionTimeoutError,
NetworkConnectionError,
ProxyConnectionError,
SSLVerificationError,
)
from pip._internal.utils.compat import has_tls
from pip._internal.utils.logging import VERBOSE

# The following comments and HTTP headers were originally added by
# Donald Stufft in git commit 22c562429a61bb77172039e480873fb239dd8c03.
Expand Down Expand Up @@ -94,3 +106,128 @@ def response_chunks(
if not chunk:
break
yield chunk


def raise_connection_error(error: requests.ConnectionError, *, timeout: float) -> None:
"""Raise a specific error for a given ConnectionError, if possible.
Note: requests.ConnectionError is the parent class of
requests.ProxyError, requests.SSLError, and requests.ConnectTimeout
so these errors are also handled here. In addition, a ReadTimeout
wrapped in a requests.MayRetryError is converted into a
ConnectionError by requests internally.
"""
# NOTE: this function was written defensively to degrade gracefully when
# a specific error cannot be raised due to issues inspecting the exception
# state. This logic isn't pretty.

url = error.request.url
# requests.ConnectionError.args[0] should be an instance of
# urllib3.exceptions.MaxRetryError.
reason = error.args[0]
# Attempt to query the host from the urllib3 error, otherwise fall back to
# parsing the request URL.
if isinstance(reason, urllib3.exceptions.MaxRetryError):
host = reason.pool.host
# Narrow the reason further to the specific error from the last retry.
reason = reason.reason
else:
host = urllib.parse.urlsplit(error.request.url).netloc

if isinstance(reason, urllib3.exceptions.SSLError):
raise SSLVerificationError(url, host, reason, is_tls_available=has_tls())
# NewConnectionError is a subclass of TimeoutError for some reason ...
if isinstance(reason, urllib3.exceptions.TimeoutError) and not isinstance(
reason, urllib3.exceptions.NewConnectionError
):
kind: Literal["connect", "read"] = (
"connect"
if isinstance(reason, urllib3.exceptions.ConnectTimeoutError)
else "read"
)
raise ConnectionTimeoutError(url, host, kind=kind, timeout=timeout)
if isinstance(reason, urllib3.exceptions.ProxyError):
raise ProxyConnectionError(url, host, reason)

# Unknown error, give up and raise a generic error.
raise ConnectionFailedError(url, host, reason)


class Urllib3RetryFilter:
"""A logging filter which attempts to rewrite urllib3's retrying
warnings to be more readable and less technical.
"""

def __init__(self, *, timeout: Optional[float]) -> None:
self.timeout = timeout
self.verbose = logging.getLogger().isEnabledFor(VERBOSE) # HACK

def filter(self, record: logging.LogRecord) -> bool:
# Attempt to "sniff out" the retrying warning.
if not isinstance(record.args, tuple):
return True

retry = next(
(a for a in record.args if isinstance(a, urllib3.util.Retry)), None
)
if record.levelno != logging.WARNING or retry is None:
# Not the right warning, leave it alone.
return True

error = next((a for a in record.args if isinstance(a, Exception)), None)
if error is None:
# No error information available, leave it alone.
return True

rewritten = False
if isinstance(error, urllib3.exceptions.NewConnectionError):
rewritten = True
connection = error.pool
record.msg = f"failed to connect to {connection.host}"
if isinstance(connection, urllib3.connection.HTTPSConnection):
record.msg += " via HTTPS"
elif isinstance(connection, urllib3.connection.HTTPConnection):
record.msg += " via HTTP"
elif isinstance(error, urllib3.exceptions.SSLError):
rewritten = True
# Yes, this is the most information we can provide as urllib3
# doesn't give us much.
record.msg = "SSL verification failed"
elif isinstance(error, urllib3.exceptions.TimeoutError):
rewritten = True
record.msg = f"server didn't respond within {self.timeout} seconds"
elif isinstance(error, urllib3.exceptions.ProtocolError):
try:
if isinstance(error.args[1], RemoteDisconnected):
rewritten = True
record.msg = "Server closed connection unexpectedly"
except IndexError:
pass
elif isinstance(error, urllib3.exceptions.ProxyError):
rewritten = True
record.msg = "failed to connect to proxy"
try:
reason = error.args[1]
proxy = reason.pool.host
except Exception:
pass
else:
record.msg += f" {proxy}"

if rewritten:
# The total remaining retries is already decremented when this
# warning is raised.
retries = retry.total + 1
assert isinstance(retry, urllib3.util.Retry)
if retries > 1:
record.msg += f", retrying {retries} more times"
elif retries == 1:
record.msg += ", retrying 1 last time"

if self.verbose:
# As it's hard to provide enough detail, show the original
# error under verbose mode.
record.msg += f": {error!s}"
record.args = ()

return True

0 comments on commit 3c4c844

Please sign in to comment.