forked from CCI-MOC/invoicing
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implemented
Processor
class and refactored some preliminary processing
A `Processor` class has been added, subclassing from the `Invoice` class. This is the first step to refactor invoicing in order to seperate the processing and filtering/exporting functionalities of our current Invoice subclasses. Subclasses of `Processor` should only process invoices and manipulate its internal data, while subclasses of `Invoice` should only perform filtering and exporting, never changing any data itself. In addition to implementing `Processor`, two of its subclasses, `ValidatePIAliasProcessor` and `AddInstitutionProcessor` has been added to perform some preliminary processing steps.
- Loading branch information
Showing
7 changed files
with
146 additions
and
90 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
from dataclasses import dataclass | ||
import json | ||
|
||
import pandas | ||
|
||
from process_report.invoices import invoice | ||
from process_report.processors import processor | ||
|
||
|
||
@dataclass | ||
class AddInstitutionProcessor(processor.Processor): | ||
@staticmethod | ||
def _load_institute_map() -> dict: | ||
with open("process_report/institute_map.json", "r") as f: | ||
institute_map = json.load(f) | ||
|
||
return institute_map | ||
|
||
@staticmethod | ||
def _get_institution_from_pi(institute_map, pi_uname): | ||
institution_domain = pi_uname.split("@")[-1] | ||
for i in range(institution_domain.count(".") + 1): | ||
if institution_name := institute_map.get(institution_domain, ""): | ||
break | ||
institution_domain = institution_domain[institution_domain.find(".") + 1 :] | ||
|
||
if institution_name == "": | ||
print(f"Warning: PI name {pi_uname} does not match any institution!") | ||
|
||
return institution_name | ||
|
||
def _add_institution(self, dataframe: pandas.DataFrame): | ||
"""Determine every PI's institution name, logging any PI whose institution cannot be determined | ||
This is performed by `get_institution_from_pi()`, which tries to match the PI's username to | ||
a list of known institution email domains (i.e bu.edu), or to several edge cases (i.e rudolph) if | ||
the username is not an email address. | ||
Exact matches are then mapped to the corresponding institution name. | ||
I.e "foo@bu.edu" would match with "bu.edu", which maps to the instition name "Boston University" | ||
The list of mappings are defined in `institute_map.json`. | ||
""" | ||
institute_map = self._load_institute_map() | ||
dataframe = dataframe.astype({invoice.INSTITUTION_FIELD: "str"}) | ||
for i, row in dataframe.iterrows(): | ||
pi_name = row[invoice.PI_FIELD] | ||
if pandas.isna(pi_name): | ||
print(f"Project {row[invoice.PROJECT_FIELD]} has no PI") | ||
else: | ||
dataframe.at[ | ||
i, invoice.INSTITUTION_FIELD | ||
] = self._get_institution_from_pi(institute_map, pi_name) | ||
|
||
return dataframe | ||
|
||
def _process(self): | ||
self.data = self._add_institution(self.data) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
from dataclasses import dataclass | ||
|
||
from process_report.invoices import invoice | ||
|
||
|
||
@dataclass | ||
class Processor(invoice.Invoice): | ||
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
from dataclasses import dataclass | ||
|
||
import pandas | ||
|
||
from process_report.invoices import invoice | ||
from process_report.processors import processor | ||
|
||
|
||
@dataclass | ||
class ValidatePIAliasProcessor(processor.Processor): | ||
alias_map: dict | ||
|
||
@staticmethod | ||
def _validate_pi_aliases(dataframe: pandas.DataFrame, alias_dict: dict): | ||
for pi, pi_aliases in alias_dict.items(): | ||
dataframe.loc[ | ||
dataframe[invoice.PI_FIELD].isin(pi_aliases), invoice.PI_FIELD | ||
] = pi | ||
|
||
return dataframe | ||
|
||
def _process(self): | ||
self.data = self._validate_pi_aliases(self.data, self.alias_map) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters