Skip to content

Commit

Permalink
🎨 Enable strict type check and improve the project typing
Browse files Browse the repository at this point in the history
Following #182
  • Loading branch information
Ousret committed Aug 14, 2022
1 parent 6155b6b commit 370d9ee
Show file tree
Hide file tree
Showing 6 changed files with 28 additions and 22 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ jobs:
python setup.py install
- name: Type checking (Mypy)
run: |
mypy charset_normalizer
mypy --strict charset_normalizer
- name: Import sorting check (isort)
run: |
isort --check charset_normalizer
Expand Down
22 changes: 11 additions & 11 deletions charset_normalizer/api.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging
from os import PathLike
from os.path import basename, splitext
from typing import BinaryIO, List, Optional, Set
from typing import BinaryIO, List, Optional, Set, Any

from .cd import (
coherence_ratio,
Expand Down Expand Up @@ -36,8 +36,8 @@ def from_bytes(
steps: int = 5,
chunk_size: int = 512,
threshold: float = 0.2,
cp_isolation: List[str] = None,
cp_exclusion: List[str] = None,
cp_isolation: Optional[List[str]] = None,
cp_exclusion: Optional[List[str]] = None,
preemptive_behaviour: bool = True,
explain: bool = False,
) -> CharsetMatches:
Expand Down Expand Up @@ -486,8 +486,8 @@ def from_fp(
steps: int = 5,
chunk_size: int = 512,
threshold: float = 0.20,
cp_isolation: List[str] = None,
cp_exclusion: List[str] = None,
cp_isolation: Optional[List[str]] = None,
cp_exclusion: Optional[List[str]] = None,
preemptive_behaviour: bool = True,
explain: bool = False,
) -> CharsetMatches:
Expand All @@ -508,12 +508,12 @@ def from_fp(


def from_path(
path: PathLike,
path: 'PathLike[Any]',
steps: int = 5,
chunk_size: int = 512,
threshold: float = 0.20,
cp_isolation: List[str] = None,
cp_exclusion: List[str] = None,
cp_isolation: Optional[List[str]] = None,
cp_exclusion: Optional[List[str]] = None,
preemptive_behaviour: bool = True,
explain: bool = False,
) -> CharsetMatches:
Expand All @@ -535,12 +535,12 @@ def from_path(


def normalize(
path: PathLike,
path: 'PathLike[Any]',
steps: int = 5,
chunk_size: int = 512,
threshold: float = 0.20,
cp_isolation: List[str] = None,
cp_exclusion: List[str] = None,
cp_isolation: Optional[List[str]] = None,
cp_exclusion: Optional[List[str]] = None,
preemptive_behaviour: bool = True,
) -> CharsetMatch:
"""
Expand Down
4 changes: 2 additions & 2 deletions charset_normalizer/cd.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def encoding_unicode_range(iana_name: str) -> List[str]:
if is_multi_byte_encoding(iana_name):
raise IOError("Function not supported on multi-byte code page")

decoder = importlib.import_module("encodings.{}".format(iana_name)).IncrementalDecoder # type: ignore
decoder = importlib.import_module("encodings.{}".format(iana_name)).IncrementalDecoder

p: IncrementalDecoder = decoder(errors="ignore")
seen_ranges: Dict[str, int] = {}
Expand Down Expand Up @@ -307,7 +307,7 @@ def coherence_ratio(
lg_inclusion_list.remove("Latin Based")

for layer in alpha_unicode_split(decoded_sequence):
sequence_frequencies: Counter = Counter(layer)
sequence_frequencies: Counter[str] = Counter(layer)
most_common = sequence_frequencies.most_common()

character_count: int = sum(o for c, o in most_common)
Expand Down
4 changes: 2 additions & 2 deletions charset_normalizer/cli/normalizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from json import dumps
from os.path import abspath
from platform import python_version
from typing import List
from typing import List, Optional

try:
from unicodedata2 import unidata_version
Expand Down Expand Up @@ -48,7 +48,7 @@ def query_yes_no(question: str, default: str = "yes") -> bool:
sys.stdout.write("Please respond with 'yes' or 'no' " "(or 'y' or 'n').\n")


def cli_detect(argv: List[str] = None) -> int:
def cli_detect(argv: Optional[List[str]] = None) -> int:
"""
CLI assistant using ARGV and ArgumentParser
:param argv:
Expand Down
4 changes: 2 additions & 2 deletions charset_normalizer/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def coherence_non_latin(self) -> float:
return 0.0

@property
def w_counter(self) -> Counter:
def w_counter(self) -> Counter[str]:
"""
Word counter instance on decoded text.
Notice: Will be removed in 3.0
Expand Down Expand Up @@ -280,7 +280,7 @@ class CharsetMatches:
Act like a list(iterable) but does not implements all related methods.
"""

def __init__(self, results: List[CharsetMatch] = None):
def __init__(self, results: Optional[List[CharsetMatch]] = None):
self._results: List[CharsetMatch] = sorted(results) if results else []

def __iter__(self) -> Iterator[CharsetMatch]:
Expand Down
14 changes: 10 additions & 4 deletions charset_normalizer/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from re import findall
from typing import Generator, List, Optional, Set, Tuple, Union

from _multibytecodec import MultibyteIncrementalDecoder # type: ignore
from _multibytecodec import MultibyteIncrementalDecoder

from .constant import (
ENCODING_MARKS,
Expand Down Expand Up @@ -231,6 +231,9 @@ def any_specified_encoding(sequence: bytes, search_zone: int = 4096) -> Optional
for specified_encoding in results:
specified_encoding = specified_encoding.lower().replace("-", "_")

encoding_alias: str
encoding_iana: str

for encoding_alias, encoding_iana in aliases.items():
if encoding_alias == specified_encoding:
return encoding_iana
Expand All @@ -256,7 +259,7 @@ def is_multi_byte_encoding(name: str) -> bool:
"utf_32_be",
"utf_7",
} or issubclass(
importlib.import_module("encodings.{}".format(name)).IncrementalDecoder, # type: ignore
importlib.import_module("encodings.{}".format(name)).IncrementalDecoder,
MultibyteIncrementalDecoder,
)

Expand Down Expand Up @@ -286,6 +289,9 @@ def should_strip_sig_or_bom(iana_encoding: str) -> bool:
def iana_name(cp_name: str, strict: bool = True) -> str:
cp_name = cp_name.lower().replace("-", "_")

encoding_alias: str
encoding_iana: str

for encoding_alias, encoding_iana in aliases.items():
if cp_name in [encoding_alias, encoding_iana]:
return encoding_iana
Expand Down Expand Up @@ -315,8 +321,8 @@ def cp_similarity(iana_name_a: str, iana_name_b: str) -> float:
if is_multi_byte_encoding(iana_name_a) or is_multi_byte_encoding(iana_name_b):
return 0.0

decoder_a = importlib.import_module("encodings.{}".format(iana_name_a)).IncrementalDecoder # type: ignore
decoder_b = importlib.import_module("encodings.{}".format(iana_name_b)).IncrementalDecoder # type: ignore
decoder_a = importlib.import_module("encodings.{}".format(iana_name_a)).IncrementalDecoder
decoder_b = importlib.import_module("encodings.{}".format(iana_name_b)).IncrementalDecoder

id_a: IncrementalDecoder = decoder_a(errors="ignore")
id_b: IncrementalDecoder = decoder_b(errors="ignore")
Expand Down

0 comments on commit 370d9ee

Please sign in to comment.