diff --git a/process_report/process_report.py b/process_report/process_report.py index 2e5585d..24d5a4c 100644 --- a/process_report/process_report.py +++ b/process_report/process_report.py @@ -10,6 +10,15 @@ import pyarrow +### PI file field names +PI_PI_FIELD = "PI" +PI_FIRST_MONTH = "First Invoice Month" +PI_INITIAL_CREDITS = "Initial Credits" +PI_1ST_USED = "1st Month Used" +PI_2ND_USED = "2nd Month Used" +### + + ### Invoice field names INVOICE_DATE_FIELD = "Invoice Month" PROJECT_FIELD = "Project - Allocation" @@ -51,24 +60,24 @@ def load_institute_map() -> dict: return institute_map -def load_old_pis(old_pi_file): - old_pi_dict = dict() - +def load_old_pis(old_pi_file) -> pandas.DataFrame: try: - with open(old_pi_file) as f: - for pi_info in f: - pi, first_month = pi_info.strip().split(",") - old_pi_dict[pi] = first_month + old_pi_df = pandas.read_csv( + old_pi_file, + converters={ + PI_INITIAL_CREDITS: Decimal, + PI_1ST_USED: Decimal, + PI_2ND_USED: Decimal, + }, + ) except FileNotFoundError: sys.exit("Applying credit 0002 failed. Old PI file does not exist") - return old_pi_dict + return old_pi_df -def dump_old_pis(old_pi_file, old_pi_dict: dict): - with open(old_pi_file, "w") as f: - for pi, first_month in old_pi_dict.items(): - f.write(f"{pi},{first_month}\n") +def dump_old_pis(old_pi_file, old_pi_df: pandas.DataFrame): + old_pi_df.to_csv(old_pi_file, index=False) def load_alias(alias_file): @@ -86,22 +95,29 @@ def load_alias(alias_file): return alias_dict -def is_old_pi(old_pi_dict, pi, invoice_month): - first_invoice_month = old_pi_dict.get(pi, invoice_month) - if compare_invoice_month(first_invoice_month, invoice_month): +def get_pi_age(old_pi_df: pandas.DataFrame, pi, invoice_month): + """Returns time difference between current invoice month and PI's first invoice month + I.e 0 for new PIs + + Will raise an error if the PI'a age is negative, which suggests a faulty invoice, or a program bug""" + first_invoice_month = old_pi_df.loc[old_pi_df[PI_PI_FIELD] == pi, PI_FIRST_MONTH] + if first_invoice_month.empty: + return 0 + + month_diff = get_month_diff(invoice_month, first_invoice_month.iat[0]) + if month_diff < 0: sys.exit( f"PI {pi} from {first_invoice_month} found in {invoice_month} invoice!" ) - if compare_invoice_month(invoice_month, first_invoice_month): - return True - return False + else: + return month_diff -def compare_invoice_month(month_1, month_2): - """Returns True if 1st date is later than 2nd date""" +def get_month_diff(month_1, month_2): + """Returns a positive integer if month_1 is ahead in time of month_2""" dt1 = datetime.datetime.strptime(month_1, "%Y-%m") dt2 = datetime.datetime.strptime(month_2, "%Y-%m") - return dt1 > dt2 + return (dt1.year - dt2.year) * 12 + (dt1.month - dt2.month) def get_invoice_bucket(): @@ -384,28 +400,57 @@ def fetch_s3_alias_file(): def apply_credits_new_pi(dataframe, old_pi_file): new_pi_credit_code = "0002" - new_pi_credit_amount = 1000 + INITIAL_CREDIT_AMOUNT = 1000 EXCLUDE_SU_TYPES = ["OpenShift GPUA100SXM4", "OpenStack GPUA100SXM4"] dataframe[CREDIT_FIELD] = None dataframe[CREDIT_CODE_FIELD] = None dataframe[BALANCE_FIELD] = Decimal(0) - old_pi_dict = load_old_pis(old_pi_file) + old_pi_df = load_old_pis(old_pi_file) - current_pi_list = dataframe[PI_FIELD].unique() + current_pi_set = set(dataframe[PI_FIELD]) invoice_month = dataframe[INVOICE_DATE_FIELD].iat[0] + invoice_pis = old_pi_df[old_pi_df[PI_FIRST_MONTH] == invoice_month] + if invoice_pis[PI_INITIAL_CREDITS].empty or pandas.isna( + new_pi_credit_amount := invoice_pis[PI_INITIAL_CREDITS].iat[0] + ): + new_pi_credit_amount = INITIAL_CREDIT_AMOUNT + + print(f"New PI Credit set at {new_pi_credit_amount} for {invoice_month}") - for pi in current_pi_list: + for pi in current_pi_set: pi_projects = dataframe[dataframe[PI_FIELD] == pi] + pi_age = get_pi_age(old_pi_df, pi, invoice_month) + pi_old_pi_entry = old_pi_df.loc[old_pi_df[PI_PI_FIELD] == pi].squeeze() - if is_old_pi(old_pi_dict, pi, invoice_month): + if pi_age > 1: for i, row in pi_projects.iterrows(): dataframe.at[i, BALANCE_FIELD] = row[COST_FIELD] else: - old_pi_dict[pi] = invoice_month - print(f"Found new PI {pi}") - remaining_credit = new_pi_credit_amount + if pi_age == 0: + if len(pi_old_pi_entry) == 0: + pi_entry = [pi, invoice_month, new_pi_credit_amount, 0, 0] + old_pi_df = pandas.concat( + [ + pandas.DataFrame([pi_entry], columns=old_pi_df.columns), + old_pi_df, + ], + ignore_index=True, + ) + pi_old_pi_entry = old_pi_df.loc[ + old_pi_df[PI_PI_FIELD] == pi + ].squeeze() + + remaining_credit = new_pi_credit_amount + credit_used_field = PI_1ST_USED + elif pi_age == 1: + remaining_credit = ( + pi_old_pi_entry[PI_INITIAL_CREDITS] - pi_old_pi_entry[PI_1ST_USED] + ) + credit_used_field = PI_2ND_USED + + initial_credit = remaining_credit for i, row in pi_projects.iterrows(): if remaining_credit == 0 or row[SU_TYPE_FIELD] in EXCLUDE_SU_TYPES: dataframe.at[i, BALANCE_FIELD] = row[COST_FIELD] @@ -418,7 +463,18 @@ def apply_credits_new_pi(dataframe, old_pi_file): dataframe.at[i, BALANCE_FIELD] = row[COST_FIELD] - applied_credit remaining_credit -= applied_credit - dump_old_pis(old_pi_file, old_pi_dict) + credits_used = initial_credit - remaining_credit + if (pi_old_pi_entry[credit_used_field] != 0) and ( + credits_used != pi_old_pi_entry[credit_used_field] + ): + print( + f"Warning: PI file overwritten. PI {pi} previously used ${pi_old_pi_entry[credit_used_field]} of New PI credits, now uses ${credits_used}" + ) + old_pi_df.loc[ + old_pi_df[PI_PI_FIELD] == pi, credit_used_field + ] = credits_used + + dump_old_pis(old_pi_file, old_pi_df) return dataframe diff --git a/process_report/tests/unit_tests.py b/process_report/tests/unit_tests.py index 7eb7a65..11080ea 100644 --- a/process_report/tests/unit_tests.py +++ b/process_report/tests/unit_tests.py @@ -279,6 +279,20 @@ def test_validate_alias(self): self.assertTrue(self.answer.equals(output)) +class TestMonthUtils(TestCase): + def test_get_month_diff(self): + testcases = [ + (("2024-12", "2024-03"), 9), + (("2024-12", "2023-03"), 21), + (("2024-11", "2024-12"), -1), + (("2024-12", "2025-03"), -3), + ] + for arglist, answer in testcases: + self.assertEqual(process_report.get_month_diff(*arglist), answer) + with self.assertRaises(ValueError): + process_report.get_month_diff("2024-16", "2025-03") + + class TestCredit0002(TestCase): def setUp(self): data = { @@ -289,8 +303,38 @@ def setUp(self): "2024-03", "2024-03", "2024-03", + "2024-03", + "2024-03", + "2024-03", + "2024-03", + "2024-03", + ], + "Manager (PI)": [ + "PI1", + "PI2", + "PI3", + "PI4", + "PI4", + "PI5", + "PI7", + "NewPI1", + "NewPI1", + "NewPI2", + "NewPI2", + ], + "SU Type": [ + "CPU", + "CPU", + "CPU", + "CPU", + "CPU", + "CPU", + "CPU", + "CPU", + "CPU", + "CPU", + "CPU", ], - "Manager (PI)": ["PI1", "PI1", "PI2", "PI3", "PI4", "PI4"], "Project - Allocation": [ "ProjectA", "ProjectB", @@ -298,44 +342,143 @@ def setUp(self): "ProjectD", "ProjectE", "ProjectF", + "ProjectG", + "ProjectH", + "ProjectI", + "ProjectJ", + "ProjectK", ], - "SU Type": ["CPU", "CPU", "CPU", "GPU", "GPU", "GPU"], - "Cost": [10, 100, 10000, 5000, 800, 1000], + "Cost": [10, 100, 10000, 500, 100, 400, 200, 250, 250, 700, 700], } - self.dataframe = pandas.DataFrame(data) - - data_no_gpu = { + answer_df_dict = { "Invoice Month": [ "2024-03", "2024-03", "2024-03", "2024-03", "2024-03", + "2024-03", + "2024-03", + "2024-03", + "2024-03", + "2024-03", + "2024-03", + ], + "Manager (PI)": [ + "PI1", + "PI2", + "PI3", + "PI4", + "PI4", + "PI5", + "PI7", + "NewPI1", + "NewPI1", + "NewPI2", + "NewPI2", ], - "Manager (PI)": ["PI1", "PI1", "PI1", "PI2", "PI2"], "SU Type": [ - "GPU", - "OpenShift GPUA100SXM4", - "OpenStack GPUA100SXM4", - "OpenShift GPUA100SXM4", - "OpenStack GPUA100SXM4", + "CPU", + "CPU", + "CPU", + "CPU", + "CPU", + "CPU", + "CPU", + "CPU", + "CPU", + "CPU", + "CPU", ], - "Cost": [500, 100, 100, 500, 500], + "Project - Allocation": [ + "ProjectA", + "ProjectB", + "ProjectC", + "ProjectD", + "ProjectE", + "ProjectF", + "ProjectG", + "ProjectH", + "ProjectI", + "ProjectJ", + "ProjectK", + ], + "Cost": [10, 100, 10000, 500, 100, 400, 200, 250, 250, 700, 700], + "Credit": [None, None, None, 100, None, 400, 200, 250, 250, 500, None], + "Credit Code": [ + None, + None, + None, + "0002", + None, + "0002", + "0002", + "0002", + "0002", + "0002", + None, + ], + "Balance": [10, 100, 10000, 400, 100, 0, 0, 0, 0, 200, 700], } - self.dataframe_no_gpu = pandas.DataFrame(data_no_gpu) - + self.dataframe = pandas.DataFrame(data) + self.answer_dataframe = pandas.DataFrame(answer_df_dict) old_pi = [ - "PI2,2023-09", - "PI3,2024-02", - "PI4,2024-03", - ] # Case with old and new pi in pi file - old_pi_file = tempfile.NamedTemporaryFile(delete=False, mode="w", suffix=".csv") + "PI,First Invoice Month,Initial Credits,1st Month Used,2nd Month Used", + "PI1,2023-09,500,200,0", + "PI2,2024-01,2000,0,0", + "PI3,2024-01,2000,1000,500", + "PI4,2024-02,1000,900,0", + "PI5,2024-02,1000,300,500", + "PI6,2024-02,1000,700,0", + "PI7,2024-03,500,300,0", # This as current month we're testing, new PIs should get $500 + "PI8,2024-04,1000,500,0", + ] + self.old_pi_answer = [ + "PI,First Invoice Month,Initial Credits,1st Month Used,2nd Month Used", + "PI1,2023-09,500,200,0", + "PI2,2024-01,2000,0,0", + "PI3,2024-01,2000,1000,500", + "PI4,2024-02,1000,900,100", + "PI5,2024-02,1000,300,400", + "PI6,2024-02,1000,700,0", + "PI7,2024-03,500,200,0", + "NewPI1,2024-03,500,500,0", + "NewPI2,2024-03,500,500,0", + "PI8,2024-04,1000,500,0", + ] + self.old_pi_answer.sort() + # Contains cases with new, one month old, two month old, older PI, and future PI that hasn't appeared in invoices yet + # For each invoice month, test case where pi has 1 project, >1, and has spare credit + old_pi_file = tempfile.NamedTemporaryFile( + delete=False, mode="w+", suffix=".csv" + ) for pi in old_pi: old_pi_file.write(pi + "\n") self.old_pi_file = old_pi_file.name + self.dataframe_no_gpu = pandas.DataFrame( + { + "Invoice Month": [ + "2024-03", + "2024-03", + "2024-03", + "2024-03", + "2024-03", + ], + "Manager (PI)": ["PI1", "PI1", "PI1", "PI2", "PI2"], + "SU Type": [ + "GPU", + "OpenShift GPUA100SXM4", + "OpenStack GPUA100SXM4", + "OpenShift GPUA100SXM4", + "OpenStack GPUA100SXM4", + ], + "Cost": [500, 100, 100, 500, 500], + } + ) old_pi_no_gpu = [ - "OldPI,2024-03", + "PI,First Invoice Month,Initial Credits,1st Month Used,2nd Month Used", + "OldPI,2024-03,500,200,0", ] old_pi_no_gpu_file = tempfile.NamedTemporaryFile( delete=False, mode="w", suffix=".csv" @@ -375,39 +518,13 @@ def test_apply_credit_0002(self): dataframe = process_report.apply_credits_new_pi( self.dataframe, self.old_pi_file ) + dataframe = dataframe.astype({"Credit": "float64", "Balance": "int64"}) + self.assertTrue(self.answer_dataframe.equals(dataframe)) - self.assertTrue("Credit" in dataframe) - self.assertTrue("Credit Code" in dataframe) - self.assertTrue("Balance" in dataframe) - - non_credited_project = dataframe[pandas.isna(dataframe["Credit Code"])] - credited_projects = dataframe[dataframe["Credit Code"] == "0002"] - - self.assertEqual(2, len(non_credited_project)) - self.assertEqual( - non_credited_project.loc[2, "Cost"], non_credited_project.loc[2, "Balance"] - ) - self.assertEqual( - non_credited_project.loc[3, "Cost"], non_credited_project.loc[3, "Balance"] - ) - - self.assertEqual(4, len(credited_projects.index)) - self.assertTrue("PI2" not in credited_projects["Manager (PI)"].unique()) - self.assertTrue("PI3" not in credited_projects["Manager (PI)"].unique()) - - self.assertEqual(10, credited_projects.loc[0, "Credit"]) - self.assertEqual(100, credited_projects.loc[1, "Credit"]) - self.assertEqual(800, credited_projects.loc[4, "Credit"]) - self.assertEqual(200, credited_projects.loc[5, "Credit"]) - - self.assertEqual(0, credited_projects.loc[0, "Balance"]) - self.assertEqual(0, credited_projects.loc[1, "Balance"]) - self.assertEqual(0, credited_projects.loc[4, "Balance"]) - self.assertEqual(800, credited_projects.loc[5, "Balance"]) - - updated_old_pi_answer = "PI2,2023-09\nPI3,2024-02\nPI4,2024-03\nPI1,2024-03\n" with open(self.old_pi_file, "r") as f: - self.assertEqual(updated_old_pi_answer, f.read()) + pi_file_list = f.read().splitlines() + pi_file_list.sort() + self.assertEqual(pi_file_list, self.old_pi_answer) def test_no_gpu(self): dataframe = process_report.apply_credits_new_pi( @@ -417,10 +534,12 @@ def test_no_gpu(self): self.assertTrue(self.no_gpu_df_answer.equals(dataframe)) def test_apply_credit_error(self): - old_pi_dict = {"PI1": "2024-12"} + old_pi_df = pandas.DataFrame( + {"PI": ["PI1"], "First Invoice Month": ["2024-04"]} + ) invoice_month = "2024-03" with self.assertRaises(SystemExit): - process_report.is_old_pi(old_pi_dict, "PI1", invoice_month) + process_report.get_pi_age(old_pi_df, "PI1", invoice_month) class TestBUSubsidy(TestCase):