Skip to content

Commit

Permalink
Move sanitize_value to be common
Browse files Browse the repository at this point in the history
Fix Indian Passport Number regex
  • Loading branch information
SharonHart committed Jan 26, 2025
1 parent 6f840ea commit b3c79f7
Show file tree
Hide file tree
Showing 22 changed files with 207 additions and 249 deletions.
2 changes: 0 additions & 2 deletions presidio-analyzer/presidio_analyzer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
from presidio_analyzer.analyzer_engine import AnalyzerEngine
from presidio_analyzer.batch_analyzer_engine import BatchAnalyzerEngine
from presidio_analyzer.analyzer_request import AnalyzerRequest
from presidio_analyzer.analyzer_utils import PresidioAnalyzerUtils
from presidio_analyzer.context_aware_enhancers import ContextAwareEnhancer
from presidio_analyzer.context_aware_enhancers import LemmaContextAwareEnhancer
from presidio_analyzer.analyzer_engine_provider import AnalyzerEngineProvider
Expand Down Expand Up @@ -51,6 +50,5 @@
"ContextAwareEnhancer",
"LemmaContextAwareEnhancer",
"BatchAnalyzerEngine",
"PresidioAnalyzerUtils",
"AnalyzerEngineProvider",
]
75 changes: 0 additions & 75 deletions presidio-analyzer/presidio_analyzer/analyzer_utils.py

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from typing import List, Optional, Tuple

from validation.validation_utils import ValidationUtils

from presidio_analyzer import Pattern, PatternRecognizer


Expand Down Expand Up @@ -59,7 +61,9 @@ def __init__(
)

def validate_result(self, pattern_text: str) -> bool: # noqa D102
sanitized_value = self.__sanitize_value(pattern_text, self.replacement_pairs)
sanitized_value = ValidationUtils.sanitize_value(
pattern_text, self.replacement_pairs
)
return self.__checksum(sanitized_value)

@staticmethod
Expand All @@ -68,9 +72,3 @@ def __checksum(sanitized_value: str) -> bool:
for idx, m in enumerate([3, 7, 1, 3, 7, 1, 3, 7, 1]):
s += int(sanitized_value[idx]) * m
return s % 10 == 0

@staticmethod
def __sanitize_value(text: str, replacement_pairs: List[Tuple[str, str]]) -> str:
for search_string, replacement_string in replacement_pairs:
text = text.replace(search_string, replacement_string)
return text
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from typing import List, Optional, Tuple

from validation.validation_utils import ValidationUtils

from presidio_analyzer import Pattern, PatternRecognizer


Expand Down Expand Up @@ -72,7 +74,7 @@ def validate_result(self, pattern_text: str) -> bool:
:return: A bool indicating whether the validation was successful.
"""
# Pre-processing before validation checks
text = self.__sanitize_value(pattern_text, self.replacement_pairs)
text = ValidationUtils.sanitize_value(pattern_text, self.replacement_pairs)
abn_list = [int(digit) for digit in text if not digit.isspace()]

# Set weights based on digit position
Expand All @@ -85,9 +87,3 @@ def validate_result(self, pattern_text: str) -> bool:
sum_product += abn_list[i] * weight[i]
remainder = sum_product % 89
return remainder == 0

@staticmethod
def __sanitize_value(text: str, replacement_pairs: List[Tuple[str, str]]) -> str:
for search_string, replacement_string in replacement_pairs:
text = text.replace(search_string, replacement_string)
return text
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from typing import List, Optional, Tuple

from validation.validation_utils import ValidationUtils

from presidio_analyzer import Pattern, PatternRecognizer


Expand Down Expand Up @@ -69,7 +71,7 @@ def validate_result(self, pattern_text: str) -> bool:
:return: A bool indicating whether the validation was successful.
"""
# Pre-processing before validation checks
text = self.__sanitize_value(pattern_text, self.replacement_pairs)
text = ValidationUtils.sanitize_value(pattern_text, self.replacement_pairs)
acn_list = [int(digit) for digit in text if not digit.isspace()]

# Set weights based on digit position
Expand All @@ -82,9 +84,3 @@ def validate_result(self, pattern_text: str) -> bool:
remainder = sum_product % 10
complement = 10 - remainder
return complement == acn_list[-1]

@staticmethod
def __sanitize_value(text: str, replacement_pairs: List[Tuple[str, str]]) -> str:
for search_string, replacement_string in replacement_pairs:
text = text.replace(search_string, replacement_string)
return text
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from typing import List, Optional, Tuple

from validation.validation_utils import ValidationUtils

from presidio_analyzer import Pattern, PatternRecognizer


Expand Down Expand Up @@ -69,7 +71,7 @@ def validate_result(self, pattern_text: str) -> bool:
:return: A bool indicating whether the validation was successful.
"""
# Pre-processing before validation checks
text = self.__sanitize_value(pattern_text, self.replacement_pairs)
text = ValidationUtils.sanitize_value(pattern_text, self.replacement_pairs)
medicare_list = [int(digit) for digit in text if not digit.isspace()]

# Set weights based on digit position
Expand All @@ -81,9 +83,3 @@ def validate_result(self, pattern_text: str) -> bool:
sum_product += medicare_list[i] * weight[i]
remainder = sum_product % 10
return remainder == medicare_list[8]

@staticmethod
def __sanitize_value(text: str, replacement_pairs: List[Tuple[str, str]]) -> str:
for search_string, replacement_string in replacement_pairs:
text = text.replace(search_string, replacement_string)
return text
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from typing import List, Optional, Tuple

from validation.validation_utils import ValidationUtils

from presidio_analyzer import Pattern, PatternRecognizer


Expand Down Expand Up @@ -75,7 +77,7 @@ def validate_result(self, pattern_text: str) -> bool:
:return: A bool indicating whether the validation was successful.
"""
# Pre-processing before validation checks
text = self.__sanitize_value(pattern_text, self.replacement_pairs)
text = ValidationUtils.sanitize_value(pattern_text, self.replacement_pairs)
tfn_list = [int(digit) for digit in text if not digit.isspace()]

# Set weights based on digit position
Expand All @@ -87,9 +89,3 @@ def validate_result(self, pattern_text: str) -> bool:
sum_product += tfn_list[i] * weight[i]
remainder = sum_product % 11
return remainder == 0

@staticmethod
def __sanitize_value(text: str, replacement_pairs: List[Tuple[str, str]]) -> str:
for search_string, replacement_string in replacement_pairs:
text = text.replace(search_string, replacement_string)
return text
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from typing import List, Optional, Tuple

from validation.validation_utils import ValidationUtils

from presidio_analyzer import Pattern, PatternRecognizer


Expand Down Expand Up @@ -59,7 +61,9 @@ def __init__(
)

def validate_result(self, pattern_text: str) -> bool: # noqa D102
sanitized_value = self.__sanitize_value(pattern_text, self.replacement_pairs)
sanitized_value = ValidationUtils.sanitize_value(
pattern_text, self.replacement_pairs
)
checksum = self.__luhn_checksum(sanitized_value)

return checksum
Expand All @@ -76,9 +80,3 @@ def digits_of(n: str) -> List[int]:
for d in even_digits:
checksum += sum(digits_of(str(d * 2)))
return checksum % 10 == 0

@staticmethod
def __sanitize_value(text: str, replacement_pairs: List[Tuple[str, str]]) -> str:
for search_string, replacement_string in replacement_pairs:
text = text.replace(search_string, replacement_string)
return text
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from typing import List, Optional, Tuple

from validation.validation_utils import ValidationUtils

from presidio_analyzer import Pattern, PatternRecognizer


Expand Down Expand Up @@ -39,6 +41,9 @@ def __init__(
supported_entity: str = "ES_NIE",
replacement_pairs: Optional[List[Tuple[str, str]]] = None,
):
self.replacement_pairs = (
replacement_pairs if replacement_pairs else [("-", ""), (" ", "")]
)
patterns = patterns if patterns else self.PATTERNS
context = context if context else self.CONTEXT
super().__init__(
Expand All @@ -51,7 +56,9 @@ def __init__(
def validate_result(self, pattern_text: str) -> bool:
"""Validate the pattern by using the control character."""

pattern_text = EsNieRecognizer.__sanitize_value(pattern_text)
pattern_text = ValidationUtils.sanitize_value(
pattern_text, self.replacement_pairs
)

letters = "TRWAGMYFPDXBNJZSQVHLCKE"
letter = pattern_text[-1]
Expand All @@ -66,7 +73,3 @@ def validate_result(self, pattern_text: str) -> bool:
# replace XYZ with 012, and check the mod 23
number = int(str("XYZ".index(pattern_text[0])) + pattern_text[1:-1])
return letter == letters[number % 23]

@staticmethod
def __sanitize_value(text: str) -> str:
return text.replace("-", "").replace(" ", "")
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from typing import List, Optional, Tuple

from validation.validation_utils import ValidationUtils

from presidio_analyzer import Pattern, PatternRecognizer


Expand Down Expand Up @@ -47,12 +49,10 @@ def __init__(
)

def validate_result(self, pattern_text: str) -> bool: # noqa D102
pattern_text = EsNifRecognizer.__sanitize_value(pattern_text)
pattern_text = ValidationUtils.sanitize_value(
pattern_text, self.replacement_pairs
)
letter = pattern_text[-1]
number = int("".join(filter(str.isdigit, pattern_text)))
letters = "TRWAGMYFPDXBNJZSQVHLCKE"
return letter == letters[number % 23]

@staticmethod
def __sanitize_value(text: str) -> str:
return text.replace("-", "").replace(" ", "")
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from typing import Dict, List, Optional, Tuple

import regex as re
from validation.validation_utils import ValidationUtils

from presidio_analyzer import (
EntityRecognizer,
Expand Down Expand Up @@ -79,7 +80,9 @@ def __init__(

def validate_result(self, pattern_text: str): # noqa D102
try:
pattern_text = self.__sanitize_value(pattern_text, self.replacement_pairs)
pattern_text = ValidationUtils.sanitize_value(
pattern_text, self.replacement_pairs
)
is_valid_checksum = (
self.__generate_iban_check_digits(pattern_text, self.LETTERS)
== pattern_text[2:4]
Expand Down Expand Up @@ -204,9 +207,3 @@ def __is_valid_format(
return country_regex and re.match(country_regex, iban, flags=flags)

return False

@staticmethod
def __sanitize_value(text: str, replacement_pairs: List[Tuple[str, str]]) -> str:
for search_string, replacement_string in replacement_pairs:
text = text.replace(search_string, replacement_string)
return text
Loading

0 comments on commit b3c79f7

Please sign in to comment.