Skip to content

Commit

Permalink
feat(translator): RateLimiter
Browse files Browse the repository at this point in the history
  • Loading branch information
awwaawwa committed Dec 23, 2024
1 parent 797192a commit c9c1f42
Show file tree
Hide file tree
Showing 2 changed files with 80 additions and 1 deletion.
43 changes: 43 additions & 0 deletions pdf2zh/translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import logging
import os
import re
import time
import unicodedata
from copy import copy
import deepl
Expand All @@ -24,6 +25,47 @@ def remove_control_characters(s):
return "".join(ch for ch in s if unicodedata.category(ch)[0] != "C")


class RateLimiter:
def __init__(self, max_qps: int):
self.max_qps = max_qps
self.min_interval = 1.0 / max_qps
self.last_requests = [] # Track last N requests
self.window_size = max_qps # Track requests in a sliding window
self.lock = asyncio.Lock()

async def wait_async(self):
async with self.lock:
now = time.time()

# Clean up old requests outside the 1-second window
while self.last_requests and now - self.last_requests[0] > 1.0:
self.last_requests.pop(0)

# If we have less than max_qps requests in the last second, allow immediately
if len(self.last_requests) < self.max_qps:
self.last_requests.append(now)
return

# Otherwise, wait until we can make the next request
next_time = self.last_requests[0] + 1.0
if next_time > now:
await asyncio.sleep(next_time - now)
self.last_requests.pop(0)
self.last_requests.append(next_time)

def set_max_qps(self, max_qps):
self.max_qps = max_qps
self.min_interval = 1.0 / max_qps
self.window_size = max_qps


_translate_rate_limiter = RateLimiter(5)


def set_translate_rate_limiter(max_qps):
_translate_rate_limiter.set_max_qps(max_qps)


class BaseTranslator:
name = "base"
envs = {}
Expand Down Expand Up @@ -77,6 +119,7 @@ async def translate_async(self, text, ignore_cache=False):
cache = self.cache.get(text)
if cache is not None:
return cache
await _translate_rate_limiter.wait_async()
try:
translation = await self.do_translate_async(text)
except NotImplementedError:
Expand Down
38 changes: 37 additions & 1 deletion test/test_translator.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import unittest
from pdf2zh.translator import BaseTranslator
from pdf2zh import cache

from pdf2zh.translator import RateLimiter
import asyncio
import time

class AutoIncreaseTranslator(BaseTranslator):
name = "auto_increase"
Expand Down Expand Up @@ -90,5 +92,39 @@ async def test_call_sync_from_async(self):
self.assertEqual(await sync_translator.translate_async("Hello World"), "1")


class TestRateLimiter(unittest.IsolatedAsyncioTestCase):
async def test_concurrent_rate_limit(self):
limiter = RateLimiter(10) # 10 QPS
start_time = time.time()

async def task():
await limiter.wait_async()
return time.time()

# Run 20 concurrent tasks
tasks = [task() for _ in range(20)]
timestamps = await asyncio.gather(*tasks)

# Verify timing
total_time = timestamps[-1] - start_time
self.assertGreaterEqual(total_time, 1.0) # Should take at least 1s for 20 requests at 10 QPS

# Check even distribution
intervals = [timestamps[i + 1] - timestamps[i] for i in range(len(timestamps) - 1)]
avg_interval = sum(intervals) / len(intervals)
self.assertAlmostEqual(avg_interval, 0.1, delta=0.05) # Should be close to 0.1s (1/10 QPS)

async def test_burst_handling(self):
limiter = RateLimiter(10) # 10 QPS

# First burst of 5 requests should be immediate
start = time.time()
tasks = [limiter.wait_async() for _ in range(5)]
await asyncio.gather(*tasks)
burst_time = time.time() - start

self.assertLess(burst_time, 0.1) # Should complete quickly


if __name__ == "__main__":
unittest.main()

0 comments on commit c9c1f42

Please sign in to comment.