Skip to content

Commit

Permalink
Add possibility to cache dns lookups
Browse files Browse the repository at this point in the history
Add optional argument dns_resolver_cache to validate_email. dns_resolver_cache can be either an instance of dns.resolver.Cache or dns.resolver.LRUCache. If passed lookups will be cached according to the specified class.

dns.resolver.Cache can take a cleaning_interval argument and dns.resolver.LRUCache can take a max_size argument.

This can lead to fewer dns lookups needed since many domains of large email providers will likely be in cache.
  • Loading branch information
HeyHugo committed Nov 14, 2020
1 parent fd76e66 commit 140f2ff
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 2 deletions.
14 changes: 12 additions & 2 deletions email_validator/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,7 @@ def validate_email(
allow_empty_local=False,
check_deliverability=True,
timeout=DEFAULT_TIMEOUT,
dns_resolver_cache=None
):
"""
Validates an email address, raising an EmailNotValidError if the address is not valid or returning a dict of
Expand Down Expand Up @@ -273,7 +274,9 @@ def validate_email(
if check_deliverability:
# Validate the email address's deliverability and update the
# return dict with metadata.
deliverability_info = validate_email_deliverability(ret["domain"], ret["domain_i18n"], timeout)
deliverability_info = validate_email_deliverability(
ret["domain"], ret["domain_i18n"], timeout, dns_resolver_cache
)
if "mx" in deliverability_info:
ret.mx = deliverability_info["mx"]
ret.mx_fallback_type = deliverability_info["mx-fallback"]
Expand Down Expand Up @@ -443,7 +446,12 @@ def validate_email_domain_part(domain):
}


def validate_email_deliverability(domain, domain_i18n, timeout=DEFAULT_TIMEOUT):
def validate_email_deliverability(
domain,
domain_i18n,
timeout=DEFAULT_TIMEOUT,
dns_resolver_cache=None
):
# Check that the domain resolves to an MX record. If there is no MX record,
# try an A or AAAA record which is a deprecated fallback for deliverability.

Expand All @@ -470,6 +478,8 @@ def dns_resolver_resolve_shim(resolver, domain, record):
raise dns.exception.Timeout()

resolver = dns.resolver.get_default_resolver()
if resolver.cache is None and dns_resolver_cache is not None:
resolver.cache = dns_resolver_cache

if timeout:
resolver.lifetime = timeout
Expand Down
16 changes: 16 additions & 0 deletions tests/test_main.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from unittest import mock
import dns.resolver
import pytest
from email_validator import EmailSyntaxError, EmailUndeliverableError, \
validate_email, validate_email_deliverability, \
Expand Down Expand Up @@ -344,3 +346,17 @@ def test_main_output_shim(monkeypatch, capsys):
# The \n is part of the print statement, not part of the string, which is what the b'...' is
# Since we're mocking py 2.7 here instead of actually using 2.7, this was the closest I could get
assert stdout == "b'An email address cannot have a period immediately after the @-sign.'\n"


@mock.patch("dns.resolver.LRUCache.put")
def test_validate_email__with_dns_resolver_cache(mocked_put):
validate_email("test@gmail.com", dns_resolver_cache=dns.resolver.LRUCache(max_size=10000))
assert mocked_put.called

# Check that global resolver has the cache class set
resolver = dns.resolver.get_default_resolver()
assert resolver.cache.__class__ == dns.resolver.LRUCache

with mock.patch("dns.resolver.LRUCache.get") as mocked_get:
validate_email("test@gmail.com", dns_resolver_cache=dns.resolver.LRUCache(max_size=10000))
assert mocked_get.called

0 comments on commit 140f2ff

Please sign in to comment.