diff --git a/process_report/invoices/billable_invoice.py b/process_report/invoices/billable_invoice.py index f007670..2d1f33b 100644 --- a/process_report/invoices/billable_invoice.py +++ b/process_report/invoices/billable_invoice.py @@ -20,9 +20,6 @@ class BillableInvoice(discount_invoice.DiscountInvoice): EXCLUDE_SU_TYPES = ["OpenShift GPUA100SXM4", "OpenStack GPUA100SXM4"] PI_S3_FILEPATH = "PIs/PI.csv" - nonbillable_pis: list[str] - nonbillable_projects: list[str] - export_columns_list = [ invoice.INVOICE_DATE_FIELD, invoice.PROJECT_FIELD, @@ -62,26 +59,6 @@ def _load_old_pis(old_pi_filepath) -> pandas.DataFrame: return old_pi_df - @staticmethod - def _remove_nonbillables( - data: pandas.DataFrame, - nonbillable_pis: list[str], - nonbillable_projects: list[str], - ): - return data[ - ~data[invoice.PI_FIELD].isin(nonbillable_pis) - & ~data[invoice.PROJECT_FIELD].isin(nonbillable_projects) - ] - - @staticmethod - def _validate_pi_names(data: pandas.DataFrame): - invalid_pi_projects = data[pandas.isna(data[invoice.PI_FIELD])] - for i, row in invalid_pi_projects.iterrows(): - logger.warn( - f"Billable project {row[invoice.PROJECT_FIELD]} has empty PI field" - ) - return data[~pandas.isna(data[invoice.PI_FIELD])] - @staticmethod def _get_pi_age(old_pi_df: pandas.DataFrame, pi, invoice_month): """Returns time difference between current invoice month and PI's first invoice month @@ -102,10 +79,6 @@ def _get_pi_age(old_pi_df: pandas.DataFrame, pi, invoice_month): return month_diff def _prepare(self): - self.data = self._remove_nonbillables( - self.data, self.nonbillable_pis, self.nonbillable_projects - ) - self.data = self._validate_pi_names(self.data) self.data[invoice.CREDIT_FIELD] = None self.data[invoice.CREDIT_CODE_FIELD] = None self.data[invoice.BALANCE_FIELD] = self.data[invoice.COST_FIELD] diff --git a/process_report/process_report.py b/process_report/process_report.py index be378eb..002d8fa 100644 --- a/process_report/process_report.py +++ b/process_report/process_report.py @@ -18,6 +18,8 @@ from process_report.processors import ( validate_pi_alias_processor, add_institution_processor, + remove_nonbillables_processor, + validate_billable_pi_processor, ) ### PI file field names @@ -232,6 +234,24 @@ def main(): nonbillable_projects=projects, ) + ### Remove nonbillables + + remove_nonbillables_proc = remove_nonbillables_processor.RemoveNonbillables( + "", invoice_month, add_institute_proc.data, pi, projects + ) + remove_nonbillables_proc.process() + + validate_billable_pi_proc = ( + validate_billable_pi_processor.ValidateBillablePIsProcessor( + "", invoice_month, remove_nonbillables_proc.data + ) + ) + validate_billable_pi_proc.process() + + processed_data = validate_billable_pi_proc.data + + ### Initialize invoices + if args.upload_to_s3: backup_to_s3_old_pi_file(old_pi_file) @@ -239,9 +259,7 @@ def main(): billable_inv = billable_invoice.BillableInvoice( name=args.output_file, invoice_month=invoice_month, - data=preliminary_processed_data.copy(), - nonbillable_pis=pi, - nonbillable_projects=projects, + data=processed_data.copy(), old_pi_filepath=old_pi_file, limit_new_pi_credit_to_partners=rates_info.get_value_at( "Limit New PI Credit to MGHPCC Partners", invoice_month diff --git a/process_report/processors/remove_nonbillables_processor.py b/process_report/processors/remove_nonbillables_processor.py new file mode 100644 index 0000000..326c98b --- /dev/null +++ b/process_report/processors/remove_nonbillables_processor.py @@ -0,0 +1,28 @@ +from dataclasses import dataclass + +import pandas + +from process_report.invoices import invoice +from process_report.processors import processor + + +@dataclass +class RemoveNonbillables(processor.Processor): + nonbillable_pis: list[str] + nonbillable_projects: list[str] + + @staticmethod + def _remove_nonbillables( + data: pandas.DataFrame, + nonbillable_pis: list[str], + nonbillable_projects: list[str], + ): + return data[ + ~data[invoice.PI_FIELD].isin(nonbillable_pis) + & ~data[invoice.PROJECT_FIELD].isin(nonbillable_projects) + ] + + def _process(self): + self.data = self._remove_nonbillables( + self.data, self.nonbillable_pis, self.nonbillable_projects + ) diff --git a/process_report/processors/validate_billable_pi_processor.py b/process_report/processors/validate_billable_pi_processor.py new file mode 100644 index 0000000..72ab60a --- /dev/null +++ b/process_report/processors/validate_billable_pi_processor.py @@ -0,0 +1,25 @@ +from dataclasses import dataclass +import logging + +import pandas + +from process_report.invoices import invoice +from process_report.processors import processor + +logger = logging.getLogger(__name__) +logging.basicConfig(level=logging.INFO) + + +@dataclass +class ValidateBillablePIsProcessor(processor.Processor): + @staticmethod + def _validate_pi_names(data: pandas.DataFrame): + invalid_pi_projects = data[pandas.isna(data[invoice.PI_FIELD])] + for i, row in invalid_pi_projects.iterrows(): + logger.warn( + f"Billable project {row[invoice.PROJECT_FIELD]} has empty PI field" + ) + return data[~pandas.isna(data[invoice.PI_FIELD])] + + def _process(self): + self.data = self._validate_pi_names(self.data) diff --git a/process_report/tests/unit_tests.py b/process_report/tests/unit_tests.py index 858600c..12e8892 100644 --- a/process_report/tests/unit_tests.py +++ b/process_report/tests/unit_tests.py @@ -104,26 +104,6 @@ def test_remove_billables(self): self.assertNotIn("ProjectE", result_df["Project - Allocation"].tolist()) -class TestBillableInvoice(TestCase): - def test_remove_nonbillables(self): - pis = [uuid.uuid4().hex for x in range(10)] - projects = [uuid.uuid4().hex for x in range(10)] - nonbillable_pis = pis[:3] - nonbillable_projects = projects[7:] - billable_pis = pis[3:7] - data = pandas.DataFrame({"Manager (PI)": pis, "Project - Allocation": projects}) - - test_invoice = test_utils.new_billable_invoice() - data = test_invoice._remove_nonbillables( - data, nonbillable_pis, nonbillable_projects - ) - self.assertTrue(data[data["Manager (PI)"].isin(nonbillable_pis)].empty) - self.assertTrue( - data[data["Project - Allocation"].isin(nonbillable_projects)].empty - ) - self.assertTrue(data.equals(data[data["Manager (PI)"].isin(billable_pis)])) - - class TestMergeCSV(TestCase): def setUp(self): self.header = ["ID", "Name", "Age"] @@ -281,6 +261,46 @@ def test_validate_alias(self): self.assertTrue(answer_data.equals(validate_pi_alias_proc.data)) +class TestRemoveNonbillablesProcessor(TestCase): + def test_remove_nonbillables(self): + pis = [uuid.uuid4().hex for x in range(10)] + projects = [uuid.uuid4().hex for x in range(10)] + nonbillable_pis = pis[:3] + nonbillable_projects = projects[7:] + billable_pis = pis[3:7] + data = pandas.DataFrame({"Manager (PI)": pis, "Project - Allocation": projects}) + + remove_nonbillables_proc = test_utils.new_remove_nonbillables_processor() + data = remove_nonbillables_proc._remove_nonbillables( + data, nonbillable_pis, nonbillable_projects + ) + self.assertTrue(data[data["Manager (PI)"].isin(nonbillable_pis)].empty) + self.assertTrue( + data[data["Project - Allocation"].isin(nonbillable_projects)].empty + ) + self.assertTrue(data.equals(data[data["Manager (PI)"].isin(billable_pis)])) + + +class TestValidateBillablePIProcessor(TestCase): + def test_validate_billables(self): + test_data = pandas.DataFrame( + { + "Manager (PI)": ["PI1", math.nan, "PI1", "PI2", "PI2"], + "Project - Allocation": [ + "ProjectA", + "ProjectB", + "ProjectC", + "ProjectD", + "ProjectE", + ], + } + ) + self.assertEqual(1, len(test_data[pandas.isna(test_data["Manager (PI)"])])) + validate_billable_pi_proc = test_utils.new_validate_billable_pi_processor() + output_data = validate_billable_pi_proc._validate_pi_names(test_data) + self.assertEqual(0, len(output_data[pandas.isna(output_data["Manager (PI)"])])) + + class TestMonthUtils(TestCase): def test_get_month_diff(self): testcases = [ @@ -710,31 +730,6 @@ def test_apply_BU_subsidy(self): self.assertEqual(50, output_df.loc[3, "Balance"]) -class TestValidateBillables(TestCase): - def setUp(self): - data = { - "Manager (PI)": ["PI1", math.nan, "PI1", "PI2", "PI2"], - "Project - Allocation": [ - "ProjectA", - "ProjectB", - "ProjectC", - "ProjectD", - "ProjectE", - ], - } - self.dataframe = pandas.DataFrame(data) - - def test_validate_billables(self): - self.assertEqual( - 1, len(self.dataframe[pandas.isna(self.dataframe["Manager (PI)"])]) - ) - test_invoice = test_utils.new_billable_invoice() - validated_df = test_invoice._validate_pi_names(self.dataframe) - self.assertEqual( - 0, len(validated_df[pandas.isna(validated_df["Manager (PI)"])]) - ) - - class TestExportLenovo(TestCase): def setUp(self): data = { diff --git a/process_report/tests/util.py b/process_report/tests/util.py index 81a27e1..9be47b5 100644 --- a/process_report/tests/util.py +++ b/process_report/tests/util.py @@ -10,6 +10,8 @@ from process_report.processors import ( add_institution_processor, validate_pi_alias_processor, + remove_nonbillables_processor, + validate_billable_pi_processor, ) @@ -25,8 +27,6 @@ def new_billable_invoice( name="", invoice_month="0000-00", data=pandas.DataFrame(), - nonbillable_pis=[], - nonbillable_projects=[], old_pi_filepath="", limit_new_pi_credit_to_partners=False, ): @@ -34,8 +34,6 @@ def new_billable_invoice( name, invoice_month, data, - nonbillable_pis, - nonbillable_projects, old_pi_filepath, limit_new_pi_credit_to_partners, ) @@ -75,3 +73,23 @@ def new_validate_pi_alias_processor( return validate_pi_alias_processor.ValidatePIAliasProcessor( name, invoice_month, data, alias_map ) + + +def new_remove_nonbillables_processor( + name="", + invoice_month="0000-00", + data=pandas.DataFrame(), + nonbillable_pis=[], + nonbillable_projects=[], +): + return remove_nonbillables_processor.RemoveNonbillables( + name, invoice_month, data, nonbillable_pis, nonbillable_projects + ) + + +def new_validate_billable_pi_processor( + name="", invoice_month="0000-00", data=pandas.DataFrame() +): + return validate_billable_pi_processor.ValidateBillablePIsProcessor( + name, invoice_month, data + )