Skip to content

Commit

Permalink
feat(translator): RateLimiter
Browse files Browse the repository at this point in the history
  • Loading branch information
awwaawwa committed Dec 23, 2024
1 parent 97e9a90 commit 11bbd55
Show file tree
Hide file tree
Showing 2 changed files with 92 additions and 2 deletions.
46 changes: 44 additions & 2 deletions pdf2zh/translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import logging
import os
import re
import time
import unicodedata
from copy import copy
import deepl
Expand All @@ -15,14 +16,55 @@
from tencentcloud.tmt.v20180321.tmt_client import TmtClient
from tencentcloud.tmt.v20180321.models import TextTranslateRequest
from tencentcloud.tmt.v20180321.models import TextTranslateResponse

import threading
import json


def remove_control_characters(s):
return "".join(ch for ch in s if unicodedata.category(ch)[0] != "C")


class RateLimiter:
def __init__(self, max_qps: int):
self.max_qps = max_qps
self.min_interval = 1.0 / max_qps
self.last_requests = [] # Track last N requests
self.window_size = max_qps # Track requests in a sliding window
self.lock = threading.Lock()

def wait(self):
with self.lock:
now = time.time()

# Clean up old requests outside the 1-second window
while self.last_requests and now - self.last_requests[0] > 1.0:
self.last_requests.pop(0)

# If we have less than max_qps requests in the last second, allow immediately
if len(self.last_requests) < self.max_qps:
self.last_requests.append(now)
return

# Otherwise, wait until we can make the next request
next_time = self.last_requests[0] + 1.0
if next_time > now:
time.sleep(next_time - now)
self.last_requests.pop(0)
self.last_requests.append(next_time)

def set_max_qps(self, max_qps):
self.max_qps = max_qps
self.min_interval = 1.0 / max_qps
self.window_size = max_qps


_translate_rate_limiter = RateLimiter(5)


def set_translate_rate_limiter(max_qps):
_translate_rate_limiter.set_max_qps(max_qps)


class BaseTranslator:
name = "base"
envs = {}
Expand Down Expand Up @@ -76,7 +118,7 @@ def translate(self, text, ignore_cache=False):
cache = self.cache.get(text)
if cache is not None:
return cache

_translate_rate_limiter.wait()
translation = self.do_translate(text)
if not (self.ignore_cache or ignore_cache):
self.cache.set(text, translation)
Expand Down
48 changes: 48 additions & 0 deletions test/test_translator.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import unittest
from pdf2zh.translator import BaseTranslator
from pdf2zh import cache
from pdf2zh.translator import RateLimiter
import threading
import time


class AutoIncreaseTranslator(BaseTranslator):
Expand Down Expand Up @@ -73,5 +76,50 @@ def test_base_translator_throw(self):
translator.translate("Hello World")


class TestRateLimiter(unittest.TestCase):
def test_concurrent_rate_limit(self):
limiter = RateLimiter(10) # 10 QPS
start_time = time.time()
timestamps = [None for _ in range(20)]

def task(i):
limiter.wait()
timestamps[i] = time.time()

# Run 20 concurrent tasks
tasks = [threading.Thread(target=task, args=(i,)) for i in range(20)]

# Start the tasks
for task in tasks:
task.start()

# Wait for all tasks to complete
for task in tasks:
task.join()

# Verify timing
total_time = timestamps[-1] - start_time
self.assertGreaterEqual(total_time, 1.0) # Should take at least 1s for 20 requests at 10 QPS

# Check even distribution
intervals = [timestamps[i + 1] - timestamps[i] for i in range(len(timestamps) - 1)]
avg_interval = sum(intervals) / len(intervals)
self.assertAlmostEqual(avg_interval, 0.1, delta=0.05) # Should be close to 0.1s (1/10 QPS)

def test_burst_handling(self):
limiter = RateLimiter(10) # 10 QPS

# First burst of 5 requests should be immediate
start = time.time()
tasks = [threading.Thread(target=limiter.wait) for _ in range(5)]
for task in tasks:
task.start()
for task in tasks:
task.join()
burst_time = time.time() - start

self.assertLess(burst_time, 0.1) # Should complete quickly


if __name__ == "__main__":
unittest.main()

0 comments on commit 11bbd55

Please sign in to comment.