Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Add tuning options for federation client backoff #5556

Closed
wants to merge 20 commits into from
Closed
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/5556.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Treat more outgoing federation connection failures (like refused connection, dead domains, and no route to host) as fatal and not able to be retried immediately.
1 change: 1 addition & 0 deletions synapse/federation/transport/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ def send_transaction(self, transaction, json_data_callback=None):
long_retries=True,
backoff_on_404=True, # If we get a 404 the other side has gone
try_trailing_slash_on_400=True,
retry_on_dns_fail=False, # If we get DNS errors, the other side has gone
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For what its worth this was to fix a bug we had where our local DNS server would SERVFAIL fairly frequently

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@erikjohnston: which was? the addition of this line?

)

defer.returnValue(response)
Expand Down
41 changes: 39 additions & 2 deletions synapse/http/matrixfederationclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,17 @@
from signedjson.sign import sign_json
from zope.interface import implementer

from OpenSSL import SSL
from twisted.internet import defer, protocol
from twisted.internet.error import DNSLookupError
from twisted.internet.error import ConnectError, ConnectionRefusedError, DNSLookupError
from twisted.internet.interfaces import IReactorPluggableNameResolver
from twisted.internet.task import _EPSILON, Cooperator
from twisted.web._newclient import ResponseDone
from twisted.names.error import DNSServerError
from twisted.web._newclient import (
RequestTransmissionFailed,
ResponseDone,
ResponseNeverReceived,
)
from twisted.web.http_headers import Headers

import synapse.metrics
Expand Down Expand Up @@ -407,6 +413,35 @@ def _send_request(
response = yield request_deferred
except DNSLookupError as e:
raise_from(RequestSendFailed(e, can_retry=retry_on_dns_fail), e)
except DNSServerError as e:
# Their domain's nameserver is busted and can't give us a result
raise_from(RequestSendFailed(e, can_retry=retry_on_dns_fail), e)
except (ConnectError, ConnectionRefusedError) as e:
if e.osError == 113:
# No route to host -- they're gone
raise_from(RequestSendFailed(e, can_retry=False), e)
elif e.osError == 111:
# Refused connection -- they're gone
raise_from(RequestSendFailed(e, can_retry=False), e)
elif e.osError == 99:
# Cannot assign address -- don't try?
raise_from(RequestSendFailed(e, can_retry=False), e)

# Some other socket error, try retrying
logger.info("Failed to send request: %s", e)
raise_from(RequestSendFailed(e, can_retry=True), e)

except (RequestTransmissionFailed, ResponseNeverReceived) as e:
for i in e.reasons:
# If it's an OpenSSL error, they probably don't have
# a valid certificate or something else very bad went on.
if i.trap(SSL.Error):
raise_from(RequestSendFailed(e, can_retry=False), e)

# If it's not that, raise it normally.
logger.info("Failed to send request: %s", e)
raise_from(RequestSendFailed(e, can_retry=True), e)

except Exception as e:
logger.info("Failed to send request: %s", e)
raise_from(RequestSendFailed(e, can_retry=True), e)
Expand Down Expand Up @@ -557,6 +592,7 @@ def put_json(
ignore_backoff=False,
backoff_on_404=False,
try_trailing_slash_on_400=False,
retry_on_dns_fail=True,
):
""" Sends the specifed json data using PUT

Expand Down Expand Up @@ -618,6 +654,7 @@ def put_json(
ignore_backoff=ignore_backoff,
long_retries=long_retries,
timeout=timeout,
retry_on_dns_fail=retry_on_dns_fail,
)

body = yield _handle_json_response(
Expand Down