Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Split Parser and reorganise package #192

Merged
merged 1 commit into from
Feb 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ class EagerWriter(Writer):
__all__ = ["MATCHERS"]

from typing import Tuple, List
from .core import UserAgentMatcher, OSMatcher, DeviceMatcher
from .matchers import UserAgentMatcher, OSMatcher, DeviceMatcher

MATCHERS: Tuple[List[UserAgentMatcher], List[OSMatcher], List[DeviceMatcher]] = ([
"""
Expand Down
98 changes: 70 additions & 28 deletions src/ua_parser/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,29 +16,25 @@
This way importing anything but the top-level package should not be
necessary unless you want to *implement* a parser.
"""
from __future__ import annotations

__all__ = [
"BasicParser",
"CachingParser",
"BasicResolver",
"CachingResolver",
"Clearing",
"DefaultedParseResult",
"Device",
"DeviceMatcher",
"Domain",
"LRU",
"Locking",
"Matchers",
"OS",
"OSMatcher",
"ParseResult",
"Parser",
"Resolver",
"PartialParseResult",
"UserAgent",
"UserAgentMatcher",
"load_builtins",
"load_lazy_builtins",
"load_data",
"load_yaml",
"parse",
"parse_device",
"parse_os",
Expand All @@ -48,43 +44,89 @@
import contextlib
from typing import Callable, Optional

from .basic import Parser as BasicParser
from .caching import CachingParser, Clearing, Locking, LRU
from .basic import Resolver as BasicResolver
from .caching import CachingResolver, Clearing, Locking, LRU
from .core import (
DefaultedParseResult,
Device,
DeviceMatcher,
Domain,
Matchers,
OS,
OSMatcher,
Parser,
ParseResult,
PartialParseResult,
Resolver,
UserAgent,
UserAgentMatcher,
)
from .loaders import load_builtins, load_data, load_lazy_builtins, load_yaml
from .loaders import load_builtins, load_lazy_builtins

Re2Parser: Optional[Callable[[Matchers], Parser]] = None
Re2Resolver: Optional[Callable[[Matchers], Resolver]] = None
with contextlib.suppress(ImportError):
from .re2 import Parser as Re2Parser
from .re2 import Resolver as Re2Resolver


VERSION = (1, 0, 0)


class Parser:
@classmethod
def from_matchers(cls, m: Matchers, /) -> Parser:
if Re2Resolver is not None:
return cls(Re2Resolver(m))
else:
return cls(
CachingResolver(
BasicResolver(m),
Locking(LRU(200)),
)
)

def __init__(self, resolver: Resolver) -> None:
self.resolver = resolver

def __call__(self, ua: str, domains: Domain, /) -> PartialParseResult:
"""Parses the ``ua`` string, returning a parse result with *at least*
the requested :class:`domains <Domain>` resolved (whether to success or
failure).

A parser may resolve more :class:`domains <Domain>` than
requested, but it *must not* resolve less.
"""
return self.resolver(ua, domains)

def parse(self, ua: str) -> ParseResult:
"""Convenience method for parsing all domains, and falling back to
default values for all failures.
"""
return self(ua, Domain.ALL).complete()

def parse_user_agent(self, ua: str) -> Optional[UserAgent]:
"""Convenience method for parsing the :class:`UserAgent` domain,
falling back to the default value in case of failure.
"""
return self(ua, Domain.USER_AGENT).user_agent

def parse_os(self, ua: str) -> Optional[OS]:
"""Convenience method for parsing the :class:`OS` domain, falling back
to the default value in case of failure.
"""
return self(ua, Domain.OS).os

def parse_device(self, ua: str) -> Optional[Device]:
"""Convenience method for parsing the :class:`Device` domain, falling
back to the default value in case of failure.
"""
return self(ua, Domain.DEVICE).device


parser: Parser


def __getattr__(name: str) -> Parser:
global parser
if name == "parser":
if Re2Parser is not None:
parser = Re2Parser(load_lazy_builtins())
else:
parser = CachingParser(
BasicParser(load_builtins()),
Locking(LRU(200)),
)
parser = Parser.from_matchers(
load_builtins() if Re2Resolver is None else load_lazy_builtins()
)
return parser
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")

Expand All @@ -105,7 +147,7 @@ def parse(ua: str) -> ParseResult:
# parser, a `global` access fails to and we get a NameError
from . import parser

return parser.parse(ua)
return parser(ua, Domain.ALL).complete()


def parse_user_agent(ua: str) -> Optional[UserAgent]:
Expand All @@ -114,7 +156,7 @@ def parse_user_agent(ua: str) -> Optional[UserAgent]:
"""
from . import parser

return parser.parse_user_agent(ua)
return parser(ua, Domain.USER_AGENT).user_agent


def parse_os(ua: str) -> Optional[OS]:
Expand All @@ -123,7 +165,7 @@ def parse_os(ua: str) -> Optional[OS]:
"""
from . import parser

return parser.parse_os(ua)
return parser(ua, Domain.OS).os


def parse_device(ua: str) -> Optional[Device]:
Expand All @@ -132,4 +174,4 @@ def parse_device(ua: str) -> Optional[Device]:
"""
from . import parser

return parser.parse_device(ua)
return parser(ua, Domain.DEVICE).device
2 changes: 1 addition & 1 deletion src/ua_parser/_matchers.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ __all__ = ["MATCHERS"]

from typing import List, Tuple

from .core import DeviceMatcher, OSMatcher, UserAgentMatcher
from .matchers import DeviceMatcher, OSMatcher, UserAgentMatcher

MATCHERS: Tuple[
List[UserAgentMatcher],
Expand Down
9 changes: 4 additions & 5 deletions src/ua_parser/basic.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
__all__ = ["Resolver"]

from operator import methodcaller
from typing import List

Expand All @@ -7,13 +9,12 @@
Matcher,
Matchers,
OS,
Parser as AbstractParser,
PartialParseResult,
UserAgent,
)


class Parser(AbstractParser):
class Resolver:
"""A simple pure-python parser based around trying a numer of regular
expressions in sequence for each domain, and returning a result
when one matches.
Expand All @@ -27,9 +28,7 @@ def __init__(
self,
matchers: Matchers,
) -> None:
self.user_agent_matchers = matchers[0]
self.os_matchers = matchers[1]
self.device_matchers = matchers[2]
self.user_agent_matchers, self.os_matchers, self.device_matchers = matchers

def __call__(self, ua: str, domains: Domain, /) -> PartialParseResult:
parse = methodcaller("__call__", ua)
Expand Down
20 changes: 10 additions & 10 deletions src/ua_parser/bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,18 @@
from typing import Any, Callable, Iterable, List, Optional

from . import (
BasicParser,
CachingParser,
BasicResolver,
CachingResolver,
Clearing,
Locking,
LRU,
Matchers,
Parser,
load_builtins,
load_yaml,
Resolver,
)
from .caching import Cache
from .re2 import Parser as Re2Parser
from .loaders import load_builtins, load_yaml
from .re2 import Resolver as Re2Resolver
from .user_agent_parser import Parse

CACHEABLE = {
Expand Down Expand Up @@ -222,19 +222,19 @@ def run_csv(args: argparse.Namespace) -> None:
def get_parser(
parser: str, cache: str, cachesize: int, rules: Matchers
) -> Callable[[str], Any]:
p: Parser
r: Resolver
if parser == "legacy":
return Parse
elif parser == "basic":
p = BasicParser(rules)
r = BasicResolver(rules)
elif parser == "re2":
p = Re2Parser(rules)
r = Re2Resolver(rules)
else:
sys.exit(f"unknown parser {parser!r}")

c: Callable[[int], Cache]
if cache == "none":
return p.parse
return Parser(r).parse
elif cache == "clearing":
c = Clearing
elif cache == "lru":
Expand All @@ -244,7 +244,7 @@ def get_parser(
else:
sys.exit(f"unknown cache algorithm {cache!r}")

return CachingParser(p, c(cachesize)).parse
return Parser(CachingResolver(r, c(cachesize))).parse


def run(
Expand Down
20 changes: 10 additions & 10 deletions src/ua_parser/caching.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
import abc
import threading
from collections import OrderedDict
from typing import Dict, Optional
from typing import Dict, Optional, Protocol

from .core import Domain, Parser, PartialParseResult
from .core import Domain, PartialParseResult, Resolver

__all__ = [
"CachingParser",
"CachingResolver",
"Cache",
"Clearing",
"Locking",
"LRU",
]


class Cache(abc.ABC):
class Cache(Protocol):
"""Cache abstract protocol. The :class:`CachingParser` will look
values up, merge what was returned (possibly nothing) with what it
got from its actual parser, and *re-set the result*.
Expand All @@ -33,7 +33,7 @@ def __getitem__(self, key: str) -> Optional[PartialParseResult]:
...


class Clearing(Cache):
class Clearing:
"""A clearing cache, if the cache is full, just remove all the entries
and re-fill from scratch.

Expand Down Expand Up @@ -62,7 +62,7 @@ def __setitem__(self, key: str, value: PartialParseResult) -> None:
self.cache[key] = value


class LRU(Cache):
class LRU:
"""Cache following a least-recently used replacement policy: when
there is no more room in the cache, whichever entry was last seen
the least recently is removed.
Expand Down Expand Up @@ -103,7 +103,7 @@ def __setitem__(self, key: str, value: PartialParseResult) -> None:
self.cache.popitem(last=False)


class Locking(Cache):
class Locking:
"""Locking cache decorator. Takes a non-thread-safe cache and
ensures retrieving and setting entries is protected by a mutex.

Expand All @@ -122,7 +122,7 @@ def __setitem__(self, key: str, value: PartialParseResult) -> None:
self.cache[key] = value


class CachingParser(Parser):
class CachingResolver:
"""A wrapping parser which takes an underlying concrete :class:`Cache`
for the actual caching and cache strategy.

Expand All @@ -134,8 +134,8 @@ class CachingParser(Parser):
really, they're immutable).
"""

def __init__(self, parser: Parser, cache: Cache):
self.parser: Parser = parser
def __init__(self, parser: Resolver, cache: Cache):
self.parser: Resolver = parser
self.cache: Cache = cache

def __call__(self, ua: str, domains: Domain, /) -> PartialParseResult:
Expand Down
Loading
Loading