diff --git a/process_report/invoices/bu_internal_invoice.py b/process_report/invoices/bu_internal_invoice.py index e1f7732..5bd820c 100644 --- a/process_report/invoices/bu_internal_invoice.py +++ b/process_report/invoices/bu_internal_invoice.py @@ -1,12 +1,10 @@ from dataclasses import dataclass -from decimal import Decimal import process_report.invoices.invoice as invoice -import process_report.invoices.discount_invoice as discount_invoice @dataclass -class BUInternalInvoice(discount_invoice.DiscountInvoice): +class BUInternalInvoice(invoice.Invoice): """ This invoice operates on data processed by these Processors: - ValidateBillablePIsProcessor @@ -20,31 +18,19 @@ class BUInternalInvoice(discount_invoice.DiscountInvoice): invoice.COST_FIELD, invoice.CREDIT_FIELD, invoice.SUBSIDY_FIELD, - invoice.BALANCE_FIELD, + invoice.PI_BALANCE_FIELD, ] - subsidy_amount: int - - def _prepare(self): - def get_project(row): - project_alloc = row[invoice.PROJECT_FIELD] - if project_alloc.rfind("-") == -1: - return project_alloc - else: - return project_alloc[: project_alloc.rfind("-")] + exported_columns_map = {invoice.PI_BALANCE_FIELD: "Balance"} + def _prepare_export(self): self.data = self.data[ self.data[invoice.IS_BILLABLE_FIELD] & ~self.data[invoice.MISSING_PI_FIELD] ] self.data = self.data[ self.data[invoice.INSTITUTION_FIELD] == "Boston University" - ].copy() - self.data["Project"] = self.data.apply(get_project, axis=1) - self.data[invoice.SUBSIDY_FIELD] = Decimal(0) - - def _process(self): - data_summed_projects = self._sum_project_allocations(self.data) - self.data = self._apply_subsidy(data_summed_projects, self.subsidy_amount) + ] + self.data = self._sum_project_allocations(self.data) def _sum_project_allocations(self, dataframe): """A project may have multiple allocations, and therefore multiple rows @@ -52,7 +38,12 @@ def _sum_project_allocations(self, dataframe): each unique project, summing up its allocations' costs""" project_list = dataframe["Project"].unique() data_no_dup = dataframe.drop_duplicates("Project", inplace=False) - sum_fields = [invoice.COST_FIELD, invoice.CREDIT_FIELD, invoice.BALANCE_FIELD] + sum_fields = [ + invoice.COST_FIELD, + invoice.CREDIT_FIELD, + invoice.SUBSIDY_FIELD, + invoice.PI_BALANCE_FIELD, + ] for project in project_list: project_mask = dataframe["Project"] == project no_dup_project_mask = data_no_dup["Project"] == project @@ -61,18 +52,3 @@ def _sum_project_allocations(self, dataframe): data_no_dup.loc[no_dup_project_mask, sum_fields] = sum_fields_sums return data_no_dup - - def _apply_subsidy(self, dataframe, subsidy_amount): - pi_list = dataframe[invoice.PI_FIELD].unique() - - for pi in pi_list: - pi_projects = dataframe[dataframe[invoice.PI_FIELD] == pi] - self.apply_flat_discount( - dataframe, - pi_projects, - subsidy_amount, - invoice.SUBSIDY_FIELD, - invoice.BALANCE_FIELD, - ) - - return dataframe diff --git a/process_report/invoices/discount_invoice.py b/process_report/invoices/discount_invoice.py deleted file mode 100644 index 333b998..0000000 --- a/process_report/invoices/discount_invoice.py +++ /dev/null @@ -1,79 +0,0 @@ -from dataclasses import dataclass - -import pandas - -import process_report.invoices.invoice as invoice - - -@dataclass -class DiscountInvoice(invoice.Invoice): - """ - Invoice class containing functions useful for applying discounts - on dataframes - """ - - @staticmethod - def apply_flat_discount( - invoice: pandas.DataFrame, - pi_projects: pandas.DataFrame, - discount_amount: int, - discount_field: str, - balance_field: str, - code_field: str = None, - discount_code: str = None, - ): - """ - Takes in an invoice and a list of PI projects that are a subset of it, - and applies a flat discount to those PI projects. Note that this function - will change the provided `invoice` Dataframe directly. Therefore, it does - not return the changed invoice. - - This function assumes that the balance field shows the remaining cost of the project, - or what the PI would pay before the flat discount is applied. - - If the optional parameters `code_field` and `discount_code` are passed in, - `discount_code` will be comma-APPENDED to the `code_field` of projects where - the discount is applied - - Returns the amount of discount used. - - :param invoice: Dataframe containing all projects - :param pi_projects: A subset of `invoice`, containing all projects for a PI you want to apply the discount - :param discount_amount: The discount given to the PI - :param discount_field: Name of the field to put the discount amount applied to each project - :param balance_field: Name of the balance field - :param code_field: Name of the discount code field - :param discount_code: Code of the discount - """ - - def apply_discount_on_project(remaining_discount_amount, project_i, project): - remaining_project_balance = project[balance_field] - applied_discount = min(remaining_project_balance, remaining_discount_amount) - invoice.at[project_i, discount_field] = applied_discount - invoice.at[project_i, balance_field] = ( - project[balance_field] - applied_discount - ) - remaining_discount_amount -= applied_discount - return remaining_discount_amount - - def apply_credit_code_on_project(project_i): - if code_field and discount_code: - if pandas.isna(invoice.at[project_i, code_field]): - invoice.at[project_i, code_field] = discount_code - else: - invoice.at[project_i, code_field] = ( - invoice.at[project_i, code_field] + "," + discount_code - ) - - remaining_discount_amount = discount_amount - for i, row in pi_projects.iterrows(): - if remaining_discount_amount == 0: - break - else: - remaining_discount_amount = apply_discount_on_project( - remaining_discount_amount, i, row - ) - apply_credit_code_on_project(i) - - discount_used = discount_amount - remaining_discount_amount - return discount_used diff --git a/process_report/invoices/invoice.py b/process_report/invoices/invoice.py index 35c8ffb..7ffb6d8 100644 --- a/process_report/invoices/invoice.py +++ b/process_report/invoices/invoice.py @@ -37,6 +37,7 @@ IS_BILLABLE_FIELD = "Is Billable" MISSING_PI_FIELD = "Missing PI" PI_BALANCE_FIELD = "PI Balance" +PROJECT_NAME_FIELD = "Project" ### diff --git a/process_report/process_report.py b/process_report/process_report.py index cd65430..49b7644 100644 --- a/process_report/process_report.py +++ b/process_report/process_report.py @@ -21,6 +21,7 @@ lenovo_processor, validate_billable_pi_processor, new_pi_credit_processor, + bu_subsidy_processor, ) ### PI file field names @@ -242,7 +243,12 @@ def main(): ) new_pi_credit_proc.process() - processed_data = new_pi_credit_proc.data + bu_subsidy_proc = bu_subsidy_processor.BUSubsidyProcessor( + "", invoice_month, new_pi_credit_proc.data.copy(), args.BU_subsidy_amount + ) + bu_subsidy_proc.process() + + processed_data = bu_subsidy_proc.data ### Initialize invoices @@ -280,7 +286,6 @@ def main(): name=args.BU_invoice_file, invoice_month=invoice_month, data=processed_data.copy(), - subsidy_amount=args.BU_subsidy_amount, ) pi_inv = pi_specific_invoice.PIInvoice( diff --git a/process_report/processors/bu_subsidy_processor.py b/process_report/processors/bu_subsidy_processor.py new file mode 100644 index 0000000..8dc35f7 --- /dev/null +++ b/process_report/processors/bu_subsidy_processor.py @@ -0,0 +1,56 @@ +from dataclasses import dataclass +from decimal import Decimal + +from process_report.invoices import invoice +from process_report.processors import discount_processor + + +@dataclass +class BUSubsidyProcessor(discount_processor.DiscountProcessor): + IS_DISCOUNT_BY_NERC = False + + subsidy_amount: int + + def _prepare(self): + def get_project(row): + project_alloc = row[invoice.PROJECT_FIELD] + if project_alloc.rfind("-") == -1: + return project_alloc + else: + return project_alloc[: project_alloc.rfind("-")] + + self.data[invoice.PROJECT_NAME_FIELD] = self.data.apply(get_project, axis=1) + self.data[invoice.SUBSIDY_FIELD] = Decimal(0) + + def _process(self): + self.data = self._apply_subsidy(self.data, self.subsidy_amount) + + @staticmethod + def _get_subsidy_eligible_projects(data): + filtered_data = data[ + data[invoice.IS_BILLABLE_FIELD] & ~data[invoice.MISSING_PI_FIELD] + ] + filtered_data = filtered_data[ + filtered_data[invoice.INSTITUTION_FIELD] == "Boston University" + ].copy() + + return filtered_data + + def _apply_subsidy(self, dataframe, subsidy_amount): + subsidy_eligible_projects = self._get_subsidy_eligible_projects(dataframe) + pi_list = subsidy_eligible_projects[invoice.PI_FIELD].unique() + + for pi in pi_list: + pi_projects = subsidy_eligible_projects[ + subsidy_eligible_projects[invoice.PI_FIELD] == pi + ] + self.apply_flat_discount( + dataframe, + pi_projects, + invoice.PI_BALANCE_FIELD, + subsidy_amount, + invoice.SUBSIDY_FIELD, + invoice.BALANCE_FIELD, + ) + + return dataframe diff --git a/process_report/tests/unit_tests.py b/process_report/tests/unit_tests.py index f09a7de..315ecaf 100644 --- a/process_report/tests/unit_tests.py +++ b/process_report/tests/unit_tests.py @@ -757,119 +757,179 @@ def test_apply_credit_error(self): test_invoice._get_pi_age(old_pi_df, "PI1", invoice_month) -class TestBUSubsidy(TestCase): - def setUp(self): - data = { - "Invoice Month": [ - "2024-03", - "2024-03", - "2024-03", - "2024-03", - "2024-03", - "2024-03", - "2024-03", - "2024-03", - ], - "Manager (PI)": ["PI1", "PI1", "PI2", "PI2", "PI3", "PI3", "PI4", "PI4"], - "Institution": [ - "Boston University", - "Boston University", +class TestBUSubsidyProcessor(TestCase): + def _assert_result_invoice( + self, + subsidy_amount, + test_invoice, + answer_invoice, + invoice_month="0000-00", + ): + new_bu_subsidy_proc = test_utils.new_bu_subsidy_processor( + invoice_month=invoice_month, + data=test_invoice, + subsidy_amount=subsidy_amount, + ) + new_bu_subsidy_proc.process() + output_invoice = new_bu_subsidy_proc.data + answer_invoice = answer_invoice.astype(output_invoice.dtypes) + print(output_invoice) + print(answer_invoice) + + self.assertTrue(output_invoice.equals(answer_invoice)) + + def _get_test_invoice( + self, + pi, + pi_balances, + balances=None, + project_names=None, + institution=None, + is_billable=None, + missing_pi=None, + ): + if not balances: + balances = pi_balances + + if not project_names: + project_names = ["Project" for _ in range(len(pi))] + + if not institution: + institution = ["Boston University" for _ in range(len(pi))] + + if not is_billable: + is_billable = [True for _ in range(len(pi))] + + if not missing_pi: + missing_pi = [False for _ in range(len(pi))] + + return pandas.DataFrame( + { + "Manager (PI)": pi, + "Project - Allocation": project_names, + "PI Balance": pi_balances, + "Balance": balances, + "Institution": institution, + "Is Billable": is_billable, + "Missing PI": missing_pi, + } + ) + + def test_exclude_non_BU_pi(self): + """Are only BU PIs given the subsidy?""" + + subsidy_amount = 100 + test_invoice = self._get_test_invoice( + [str(i) for i in range(5)], + pi_balances=[subsidy_amount for _ in range(5)], + institution=[ "Boston University", "Boston University", - "Harvard University", # Test case for non-BU PIs + "boston university", "Harvard University", - "Boston University", - "Boston University", + "BU", ], - "Project - Allocation": [ - "ProjectA-e6413", - "ProjectA-t575e6", # Test case for project with >1 allocation - "ProjectB-fddgfygg", - "ProjectB-5t143t", - "ProjectC-t14334", - "ProjectD", # Test case for correctly extracting project name - "ProjectE-test-r25135", # Test case for BU PI with >1 project - "ProjectF", - ], - "Cost": [1050, 500, 100, 925, 10000, 1000, 1050, 100], - "Credit": [ - 1000, - 0, - 100, - 900, - 0, - 0, - 1000, - 0, - ], # Test cases where PI does/dones't have credits alreadys - "Balance": [ - 50, - 500, - 0, - 25, - 10000, - 1000, - 50, - 100, - ], # Test case where subsidy does/doesn't cover fully balance - "Is Billable": [True, True, True, True, True, True, True, True], - "Missing PI": [False, False, False, False, False, False, False, False], - } - self.dataframe = pandas.DataFrame(data) - self.subsidy = 100 - - def test_apply_BU_subsidy(self): - test_invoice = test_utils.new_bu_internal_invoice( - data=self.dataframe, subsidy_amount=self.subsidy ) - test_invoice.process() - output_df = test_invoice.data.reset_index() - self.assertTrue( - set( - [ - process_report.INVOICE_DATE_FIELD, - "Project", - process_report.PI_FIELD, - process_report.COST_FIELD, - process_report.CREDIT_FIELD, - process_report.SUBSIDY_FIELD, - process_report.BALANCE_FIELD, - ] - ).issubset(output_df) + answer_invoice = test_invoice.copy() + answer_invoice["Project"] = answer_invoice["Project - Allocation"] + answer_invoice["Subsidy"] = [subsidy_amount, subsidy_amount, 0, 0, 0] + answer_invoice["PI Balance"] = [ + 0, + 0, + subsidy_amount, + subsidy_amount, + subsidy_amount, + ] + + self._assert_result_invoice(subsidy_amount, test_invoice, answer_invoice) + + def test_exclude_nonbillables(self): + """Are nonbillables excluded from the subsidy?""" + subsidy_amount = 100 + test_invoice = self._get_test_invoice( + [str(i) for i in range(6)], + pi_balances=[subsidy_amount for _ in range(6)], + is_billable=[True, True, False, False, True, True], + missing_pi=[True, True, False, False, False, False], ) - self.assertTrue( - set(["PI1", "PI2", "PI4"]).issubset(output_df["Manager (PI)"].unique()) + answer_invoice = test_invoice.copy() + answer_invoice["Project"] = answer_invoice["Project - Allocation"] + answer_invoice["Subsidy"] = [0, 0, 0, 0, subsidy_amount, subsidy_amount] + answer_invoice["PI Balance"] = [ + subsidy_amount, + subsidy_amount, + subsidy_amount, + subsidy_amount, + 0, + 0, + ] + + self._assert_result_invoice(subsidy_amount, test_invoice, answer_invoice) + + def test_one_pi_many_allocations(self): + """Is subsidy applied properly to BU PI with many allocations?""" + + # Two projects, one allocation each + subsidy_amount = 100 + test_invoice = self._get_test_invoice( + ["PI" for i in range(2)], + pi_balances=[60, 60], + project_names=["P1", "P2"], ) - self.assertFalse("PI3" in output_df["Project"].unique()) - self.assertTrue( - set(["ProjectA", "ProjectB", "ProjectE-test", "ProjectF"]).issubset( - output_df["Project"].unique() - ) + answer_invoice = test_invoice.copy() + answer_invoice["Project"] = answer_invoice["Project - Allocation"] + answer_invoice["Subsidy"] = [60, 40] + answer_invoice["PI Balance"] = [0, 20] + + self._assert_result_invoice(subsidy_amount, test_invoice, answer_invoice) + + # Two projects, two allocations each + test_invoice = self._get_test_invoice( + ["PI" for i in range(4)], + pi_balances=[40, 40, 40, 40], + project_names=["P1-A1", "P1-A1-test", "P2", "P2-"], ) - self.assertFalse( - set(["ProjectC-t14334", "ProjectC", "ProjectD"]).intersection( - output_df["Project"].unique() - ) + + answer_invoice = test_invoice.copy() + answer_invoice["Project"] = ["P1", "P1-A1", "P2", "P2"] + answer_invoice["Subsidy"] = [40, 40, 20, 0] + answer_invoice["PI Balance"] = [0, 0, 20, 40] + + self._assert_result_invoice(subsidy_amount, test_invoice, answer_invoice) + + # Two allocations, one where PI balance != NERC balance + test_invoice = self._get_test_invoice( + ["PI" for i in range(2)], + pi_balances=[80, 80], + project_names=["P1", "P2"], + balances=[100, 80], ) - self.assertEqual(4, len(output_df.index)) - self.assertEqual(1550, output_df.loc[0, "Cost"]) - self.assertEqual(1025, output_df.loc[1, "Cost"]) - self.assertEqual(1050, output_df.loc[2, "Cost"]) - self.assertEqual(100, output_df.loc[3, "Cost"]) + answer_invoice = test_invoice.copy() + answer_invoice["Project"] = answer_invoice["Project - Allocation"] + answer_invoice["Subsidy"] = [80, 20] + answer_invoice["PI Balance"] = [0, 60] + + self._assert_result_invoice(subsidy_amount, test_invoice, answer_invoice) - self.assertEqual(100, output_df.loc[0, "Subsidy"]) - self.assertEqual(25, output_df.loc[1, "Subsidy"]) - self.assertEqual(50, output_df.loc[2, "Subsidy"]) - self.assertEqual(50, output_df.loc[3, "Subsidy"]) + def test_two_pi(self): + """Is subsidy applied to more than one PI?""" + # Each PI has two allocations + subsidy_amount = 100 + test_invoice = self._get_test_invoice( + ["PI1", "PI1", "PI2", "PI2"], + pi_balances=[80, 80, 40, 40], + ) + + answer_invoice = test_invoice.copy() + answer_invoice["Project"] = answer_invoice["Project - Allocation"] + answer_invoice["Subsidy"] = [80, 20, 40, 40] + answer_invoice["PI Balance"] = [0, 60, 0, 0] - self.assertEqual(450, output_df.loc[0, "Balance"]) - self.assertEqual(0, output_df.loc[1, "Balance"]) - self.assertEqual(0, output_df.loc[2, "Balance"]) - self.assertEqual(50, output_df.loc[3, "Balance"]) + self._assert_result_invoice(subsidy_amount, test_invoice, answer_invoice) class TestLenovoProcessor(TestCase): diff --git a/process_report/tests/util.py b/process_report/tests/util.py index 4faf672..cfe0bbb 100644 --- a/process_report/tests/util.py +++ b/process_report/tests/util.py @@ -3,7 +3,6 @@ from process_report.invoices import ( invoice, billable_invoice, - bu_internal_invoice, pi_specific_invoice, ) @@ -13,6 +12,7 @@ lenovo_processor, validate_billable_pi_processor, new_pi_credit_processor, + bu_subsidy_processor, ) @@ -50,16 +50,6 @@ def new_billable_invoice( ) -def new_bu_internal_invoice( - name="", invoice_month="0000-00", data=None, subsidy_amount=0 -): - if data is None: - data = pandas.DataFrame() - return bu_internal_invoice.BUInternalInvoice( - name, invoice_month, data, subsidy_amount - ) - - def new_pi_specific_invoice( name="", invoice_month="0000-00", @@ -137,3 +127,16 @@ def new_new_pi_credit_processor( return new_pi_credit_processor.NewPICreditProcessor( name, invoice_month, data, old_pi_filepath, limit_new_pi_credit_to_partners ) + + +def new_bu_subsidy_processor( + name="", + invoice_month="0000-00", + data=None, + subsidy_amount=0, +): + if data is None: + data = pandas.DataFrame() + return bu_subsidy_processor.BUSubsidyProcessor( + name, invoice_month, data, subsidy_amount + )