Skip to content

Commit

Permalink
Merge pull request #2 from blalop/master
Browse files Browse the repository at this point in the history
Release 1.0.0: Refactoring of the API
  • Loading branch information
blalop authored Sep 12, 2021
2 parents f239e1b + 5324ef6 commit ebc215c
Show file tree
Hide file tree
Showing 18 changed files with 177 additions and 126 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,4 @@ jobs:
uses: pypa/gh-action-pypi-publish@release/v1
with:
user: __token__
password: ${{ secrets.PYPI_API_TOKEN }}
password: ${{ secrets.PYPI_API_TOKEN }}
4 changes: 2 additions & 2 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ jobs:
pip install flake8 mypy
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- name: Lint with flake8
run: flake8 .
run: flake8 --per-file-ignores="__init__.py:F401" bbva2pandas
- name: Lint with mypy
run: mypy bbva2pandas
- name: Test with unittest
run: python3 -m unittest discover tests
run: python3 -m unittest discover tests
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ This project adheres to [Semantic Versioning](http://semver.org/) and [Keep a ch

## [Unreleased](https://github.com/blalop/bbva2pandas/tree/develop)

## [1.0.0](https://github.com/idealista/bbva2pandas/tree/1.0.0)
### Changed
- API refactoring

## [0.1.0](https://github.com/idealista/bbva2pandas/tree/0.1.0)
### Added
- Initial release
11 changes: 3 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,13 @@ In [bbva.es](https://bbva.es), login and go to Posición global > Cuentas y Tarj

## Using the libray

You can either provide the file to be read or the raw string:
Just provide the filepath:

```python
from bbva2pandas.file_handler import read_report
with open(filename, 'rb') as f:
dataframe = read_report(f)
import bbva2pandas
dataframe = bbva2pandas.Report('myfile').to_df()
```

```python
from bbva2pandas.report_parser import parse_report_content
dataframe = report_parser.parse_report_content('filecontent')
```

## Running the script

Expand Down
1 change: 1 addition & 0 deletions bbva2pandas/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from bbva2pandas.report import Report
36 changes: 36 additions & 0 deletions bbva2pandas/dataframe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import re

import pandas as pd

DF_COLUMNS = ['date', 'value_date', 'concept',
'amount', 'balance',
'card', 'subconcept']


def _trim_string(col: pd.Series) -> pd.Series:
"""Remove unnecesary whitespaces"""
return col.apply(lambda x: re.sub(r'\s+', ' ', x).strip())


def _transform_decimal_separator(col: pd.Series) -> pd.Series:
"""Parses the decimal separator from ',' to '.'"""
col = col.apply(lambda x: x.replace('.', '').replace(',', '.'))
return pd.to_numeric(col)


def _format_date(col: pd.Series, year: str) -> pd.Series:
"""Formats the date in Pandas format"""
return pd.to_datetime(col + '/' + year, dayfirst=True)


def build_dataframe(movements: list, year: str) -> pd.DataFrame:
"""Builds a dataframe from the report data"""
df = pd.DataFrame(movements, columns=DF_COLUMNS)

df.concept = _trim_string(df.concept)
df.subconcept = _trim_string(df.subconcept)
df.date = _format_date(df.date, year)
df.value_date = _format_date(df.value_date, year)
df.amount = _transform_decimal_separator(df.amount)
df.balance = _transform_decimal_separator(df.balance)
return df
32 changes: 32 additions & 0 deletions bbva2pandas/extractor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import re

YEAR_FIND_REGEX = re.compile(r'EXTRACTO DE \w* (\d{4})', re.MULTILINE)

MOVEMENTS_PARSE_REGEX = re.compile(
r'''^
(\d\d/\d\d) #date
\s
(\d\d/\d\d) #value date
\s*
([A-ZÑÁÉÍÓÚÜ\'\,\.\:\s]+) #concept
\s*
(-?\d*.?\d*,\d*) #amount of the movement
\s*
(\d*.?\d*,\d*) #balance after movement
\s*
(\d*) # credit card number
\s*
([\d\wÑÁÉÍÓÚÜ \.\,\:\*\'\-\/\(\)]*) # subconcept
$''',
re.MULTILINE | re.IGNORECASE | re.VERBOSE
)


def find_movements(content: str) -> list:
"""Searches the file content for movements"""
return MOVEMENTS_PARSE_REGEX.findall(content)


def find_year(content: str) -> str:
"""Extracts the year from file content"""
return YEAR_FIND_REGEX.findall(content)[0]
11 changes: 0 additions & 11 deletions bbva2pandas/file_handler.py

This file was deleted.

12 changes: 12 additions & 0 deletions bbva2pandas/pdf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import pdftotext

from bbva2pandas.typing import FilePathOrBuffer


def read_pdf(filepath: FilePathOrBuffer) -> str:
"""Reads the PDF"""
if isinstance(filepath, str):
with open(filepath, 'rb') as f:
return '\n'.join(pdftotext.PDF(f))
else:
return '\n'.join(pdftotext.PDF(filepath))
17 changes: 17 additions & 0 deletions bbva2pandas/report.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import pandas as pd

from bbva2pandas.extractor import find_movements, find_year
from bbva2pandas.pdf import read_pdf
from bbva2pandas.dataframe import build_dataframe
from bbva2pandas.typing import FilePathOrBuffer


class Report:
def __init__(self, filepath: FilePathOrBuffer) -> None:
self.content = read_pdf(filepath)
self.year = find_year(self.content)
self.movements = find_movements(self.content)

def to_df(self) -> pd.DataFrame:
"""Receives a filename and parses it to Dataframe"""
return build_dataframe(self.movements, self.year)
61 changes: 0 additions & 61 deletions bbva2pandas/report_parser.py

This file was deleted.

4 changes: 4 additions & 0 deletions bbva2pandas/typing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
import io
import typing

FilePathOrBuffer = typing.Union[str, io.TextIOWrapper]
7 changes: 3 additions & 4 deletions bin/bbva2pandas
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,14 @@ import sys
import os

try:
from bbva2pandas.file_handler import read_report
from bbva2pandas import Report
except ImportError:
sys.path.append(os.path.abspath('./'))
from bbva2pandas.file_handler import read_report
from bbva2pandas import Report


def read_report_file(filename: str) -> pd.DataFrame:
with open(filename, 'rb') as f:
return read_report(f)
return Report(filename).to_df()


def extract_directory(dirname: str) -> pd.DataFrame:
Expand Down
Binary file added tests/data/abcdef.pdf
Binary file not shown.
27 changes: 27 additions & 0 deletions tests/test_dataframe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import unittest

import pandas as pd
from pandas._libs.tslibs.timestamps import Timestamp

from bbva2pandas import dataframe


class TestDataframe(unittest.TestCase):
def test_trim_string(self):
original = pd.Series(['a', 'a b ', 'a b c'])
expected = ['a', 'a b', 'a b c']
actual = dataframe._trim_string(original)
self.assertEqual(expected, actual.to_list())

def test_transform_decimal_separator(self):
original = pd.Series(['23,45', '1.500,00'])
expected = [23.45, 1500]
actual = dataframe._transform_decimal_separator(original)
self.assertEqual(expected, actual.to_list())

def test_format_date(self):
original = pd.Series(['12/10', '1/1'])
expected = [Timestamp('2020-10-12 00:00:00'),
Timestamp('2020-01-01 00:00:00')]
actual = dataframe._format_date(original, '2020')
self.assertEqual(expected, actual.to_list())
19 changes: 19 additions & 0 deletions tests/test_extractor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import unittest

from bbva2pandas import extractor


class TestExtractor(unittest.TestCase):
def test_year_extraction(self):
with open('tests/data/pdf-content.txt') as f:
input = f.read()
year = extractor.find_year(input)
self.assertEqual('2050', year)

def test_movements_extraction(self):
with open('tests/data/pdf-content.txt') as f:
input = f.read()
movements = extractor.find_movements(input)
expected = [('05/08', '05/08', 'TRANSFERENCIAS ', '42,00', '42,00', '', 'X')]
self.assertEqual(1, len(movements))
self.assertEqual(expected, movements)
16 changes: 16 additions & 0 deletions tests/test_pdf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import unittest

from bbva2pandas import pdf


class TestExtractor(unittest.TestCase):
FILEPATH = 'tests/data/abcdef.pdf'

def test_with_file_open(self):
with open(self.FILEPATH, 'rb') as f:
content = pdf.read_pdf(f)
self.assertEqual('abcdef', content)

def test_with_file_path(self):
content = pdf.read_pdf(self.FILEPATH)
self.assertEqual('abcdef', content)
39 changes: 0 additions & 39 deletions tests/test_report_parser.py

This file was deleted.

0 comments on commit ebc215c

Please sign in to comment.