This repository has been archived by the owner on Apr 26, 2024. It is now read-only.
-
-
Notifications
You must be signed in to change notification settings - Fork 2.1k
Add metrics to track how the rate limiter is affecting requests (sleep/reject) #13534
Merged
MadLittleMods
merged 6 commits into
develop
from
madlittlemods/track-metrics-from-rate-limiter
Aug 17, 2022
Merged
Changes from all commits
Commits
Show all changes
6 commits
Select commit
Hold shift + click to select a range
325cadc
Add metrics to track how the rate limiter is affecting requests
MadLittleMods 3267318
Add changelog
MadLittleMods 5679bb2
Fix lints
MadLittleMods 149ac1d
Remove unbounded host from labels
MadLittleMods 75ca101
Merge branch 'develop' into madlittlemods/track-metrics-from-rate-lim…
MadLittleMods 2e0e5cc
Merge branch 'develop' into madlittlemods/track-metrics-from-rate-lim…
MadLittleMods File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Add metrics to track how the rate limiter is affecting requests (sleep/reject). |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,6 +18,8 @@ | |
import typing | ||
from typing import Any, DefaultDict, Iterator, List, Set | ||
|
||
from prometheus_client.core import Counter | ||
|
||
from twisted.internet import defer | ||
|
||
from synapse.api.errors import LimitExceededError | ||
|
@@ -37,6 +39,9 @@ | |
logger = logging.getLogger(__name__) | ||
|
||
|
||
# Track how much the ratelimiter is affecting requests | ||
rate_limit_sleep_counter = Counter("synapse_rate_limit_sleep", "") | ||
rate_limit_reject_counter = Counter("synapse_rate_limit_reject", "") | ||
queue_wait_timer = Histogram( | ||
"synapse_rate_limit_queue_wait_time_seconds", | ||
"sec", | ||
|
@@ -84,7 +89,7 @@ def ratelimit(self, host: str) -> "_GeneratorContextManager[defer.Deferred[None] | |
Returns: | ||
context manager which returns a deferred. | ||
""" | ||
return self.ratelimiters[host].ratelimit() | ||
return self.ratelimiters[host].ratelimit(host) | ||
|
||
|
||
class _PerHostRatelimiter: | ||
|
@@ -119,12 +124,14 @@ def __init__(self, clock: Clock, config: FederationRatelimitSettings): | |
self.request_times: List[int] = [] | ||
|
||
@contextlib.contextmanager | ||
def ratelimit(self) -> "Iterator[defer.Deferred[None]]": | ||
def ratelimit(self, host: str) -> "Iterator[defer.Deferred[None]]": | ||
# `contextlib.contextmanager` takes a generator and turns it into a | ||
# context manager. The generator should only yield once with a value | ||
# to be returned by manager. | ||
# Exceptions will be reraised at the yield. | ||
|
||
self.host = host | ||
|
||
request_id = object() | ||
ret = self._on_enter(request_id) | ||
try: | ||
|
@@ -144,6 +151,8 @@ def _on_enter(self, request_id: object) -> "defer.Deferred[None]": | |
# sleeping or in the ready queue). | ||
queue_size = len(self.ready_request_queue) + len(self.sleeping_requests) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would it be good to track |
||
if queue_size > self.reject_limit: | ||
logger.debug("Ratelimiter(%s): rejecting request", self.host) | ||
rate_limit_reject_counter.inc() | ||
raise LimitExceededError( | ||
retry_after_ms=int(self.window_size / self.sleep_limit) | ||
) | ||
|
@@ -155,7 +164,8 @@ def queue_request() -> "defer.Deferred[None]": | |
queue_defer: defer.Deferred[None] = defer.Deferred() | ||
self.ready_request_queue[request_id] = queue_defer | ||
logger.info( | ||
"Ratelimiter: queueing request (queue now %i items)", | ||
"Ratelimiter(%s): queueing request (queue now %i items)", | ||
self.host, | ||
len(self.ready_request_queue), | ||
) | ||
|
||
|
@@ -164,19 +174,28 @@ def queue_request() -> "defer.Deferred[None]": | |
return defer.succeed(None) | ||
|
||
logger.debug( | ||
"Ratelimit [%s]: len(self.request_times)=%d", | ||
"Ratelimit(%s) [%s]: len(self.request_times)=%d", | ||
self.host, | ||
id(request_id), | ||
len(self.request_times), | ||
) | ||
|
||
if len(self.request_times) > self.sleep_limit: | ||
logger.debug("Ratelimiter: sleeping request for %f sec", self.sleep_sec) | ||
logger.debug( | ||
"Ratelimiter(%s) [%s]: sleeping request for %f sec", | ||
self.host, | ||
id(request_id), | ||
self.sleep_sec, | ||
) | ||
rate_limit_sleep_counter.inc() | ||
ret_defer = run_in_background(self.clock.sleep, self.sleep_sec) | ||
|
||
self.sleeping_requests.add(request_id) | ||
|
||
def on_wait_finished(_: Any) -> "defer.Deferred[None]": | ||
logger.debug("Ratelimit [%s]: Finished sleeping", id(request_id)) | ||
logger.debug( | ||
"Ratelimit(%s) [%s]: Finished sleeping", self.host, id(request_id) | ||
) | ||
self.sleeping_requests.discard(request_id) | ||
queue_defer = queue_request() | ||
return queue_defer | ||
|
@@ -186,7 +205,9 @@ def on_wait_finished(_: Any) -> "defer.Deferred[None]": | |
ret_defer = queue_request() | ||
|
||
def on_start(r: object) -> object: | ||
logger.debug("Ratelimit [%s]: Processing req", id(request_id)) | ||
logger.debug( | ||
"Ratelimit(%s) [%s]: Processing req", self.host, id(request_id) | ||
) | ||
self.current_processing.add(request_id) | ||
return r | ||
|
||
|
@@ -217,7 +238,7 @@ def on_both(r: object) -> object: | |
return make_deferred_yieldable(ret_defer) | ||
|
||
def _on_exit(self, request_id: object) -> None: | ||
logger.debug("Ratelimit [%s]: Processed req", id(request_id)) | ||
logger.debug("Ratelimit(%s) [%s]: Processed req", self.host, id(request_id)) | ||
self.current_processing.discard(request_id) | ||
try: | ||
# start processing the next item on the queue. | ||
|
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Better way to pass the
host
string to the_PerHostRateLimiter
?There doesn't seem to be a good way to make
DefaultDict
pass the string key host on, https://stackoverflow.com/questions/2912231/is-there-a-clever-way-to-pass-the-key-to-defaultdicts-default-factory