From 228b12b4d2d89dd3e5b53a248aaf2a4ae2c7d57a Mon Sep 17 00:00:00 2001 From: Fabian Neugart Date: Fri, 28 Jan 2022 01:52:12 +0100 Subject: [PATCH] Fixing an issue with non recognized characters --- bbva2pandas/extractor.py | 6 +++--- requirements.txt | 6 +++--- tests/data/pdf-content.txt | 2 ++ tests/test_extractor.py | 8 ++++++-- 4 files changed, 14 insertions(+), 8 deletions(-) diff --git a/bbva2pandas/extractor.py b/bbva2pandas/extractor.py index d29e819..cdd36ba 100644 --- a/bbva2pandas/extractor.py +++ b/bbva2pandas/extractor.py @@ -8,15 +8,15 @@ \s (\d\d/\d\d) #value date \s* - ([A-ZÑÁÉÍÓÚÜ\'\,\.\:\s]+) #concept - \s* + ([\wÀ-ÿ .,:*%\'\/()\-\\]+?) #concept + \s+ (-?\d*.?\d*,\d*) #amount of the movement \s* (\d*.?\d*,\d*) #balance after movement \s* (\d*) # credit card number \s* - ([\d\wÑÁÉÍÓÚÜ \.\,\:\*\'\-\/\(\)]*) # subconcept + ([\wÀ-ÿ .,:*%\'\/()\-\\]*) # subconcept $''', re.MULTILINE | re.IGNORECASE | re.VERBOSE ) diff --git a/requirements.txt b/requirements.txt index bf7cbe8..cdb1dba 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ -pandas==1.2.1 -numpy==1.20.2 -pdftotext==2.1.5 \ No newline at end of file +pandas==1.4.0 +numpy==1.22.1 +pdftotext==2.2.2 \ No newline at end of file diff --git a/tests/data/pdf-content.txt b/tests/data/pdf-content.txt index 78a4c6e..7c7d7ac 100644 --- a/tests/data/pdf-content.txt +++ b/tests/data/pdf-content.txt @@ -6,6 +6,8 @@ F.Oper. F.Valor Concepto SALDO ANTERIOR - - - - - - - - - - - - - - - - - - - - - 0,00 05/08 05/08 TRANSFERENCIAS 42,00 42,00 X +12/08 10/08 COMPRA EN COMERCIO EXTRANJERO-COMISIÓN 3 % INCLUÍDA -8,79 13,55 + LIDL BCN-CAN BATLL\ BARCELONA Todos los importes de este extracto se expresan en: SALDO A NUESTRO FAVOR SALDO A SU FAVOR EURO 42,00 F12345 diff --git a/tests/test_extractor.py b/tests/test_extractor.py index 6e865ca..900e999 100644 --- a/tests/test_extractor.py +++ b/tests/test_extractor.py @@ -14,6 +14,10 @@ def test_movements_extraction(self): with open('tests/data/pdf-content.txt') as f: input = f.read() movements = extractor.find_movements(input) - expected = [('05/08', '05/08', 'TRANSFERENCIAS ', '42,00', '42,00', '', 'X')] - self.assertEqual(1, len(movements)) + expected = [('05/08', '05/08', + 'TRANSFERENCIAS', '42,00', + '42,00', '', 'X'), + ('12/08', '10/08', 'COMPRA EN COMERCIO EXTRANJERO-COMISIÓN 3 % INCLUÍDA', '-8,79', '13,55', '', + 'LIDL BCN-CAN BATLL\ BARCELONA')] + self.assertEqual(2, len(movements)) self.assertEqual(expected, movements)