Skip to content

Commit

Permalink
Implemented processor for applying the New-PI Credit
Browse files Browse the repository at this point in the history
The `DiscountProcessor` class has been created, to be subclassed by
processors which applies discounts, such as the New-PI Credit.
This processor class introduces an important class constant,
`IS_DISCOUNT_BY_NERC`, which detemines whether the final balance,
as opposed to the PI balance (more info below), reflects the discount
being applied.

The New-PI credit is now implemented by `NewPICreditProcessor`

During discussions about billing, it was made noted that some discounts
are not provided by the MGHPCC, but instead from other sources, such as
the BU subsidy, which is provided to BU PIs by BU. This provided
motivation for a `PI Balance` field, which would reflect how much money the
PI should be billed, as opposed to the `Balance` field, which currently
reflects how much money the MGHPCC should receive. These two fields would
not equal each other if the PI received discounts not provided by the
MGHPCC.

Implementation of `NewPICreditProcessor` and the new billing
feature required a range of changes:
- `apply_discount_on_project()` in `DiscountProcessor` has been slightly
modified, where the PI balance and MGHPCC balance is now calculated seperately.
- As `BillableInvoice` no longer performs any processing itself, the
dataframe from `NewPICreditProcessor` is now passed to all invoice objects.
- The test cases for the New-PI credit have been refactored. Test cases
for the new billing feature is not written yet. I plan to write
them when the processor for the BU Subsidy is implemented
- With the new processor, certain Processor and Invoice classes depend on
fields created by other Processors, such as the case with
`NewPICreditProcessor` and `ValidateBillablePIsProcessor`. As such, docstrings
have been added to indicate dependancies
  • Loading branch information
QuanMPhm committed Nov 7, 2024
1 parent dcd5fc2 commit 9a6fac4
Show file tree
Hide file tree
Showing 10 changed files with 568 additions and 473 deletions.
7 changes: 7 additions & 0 deletions process_report/invoices/NERC_total_invoice.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@

@dataclass
class NERCTotalInvoice(invoice.Invoice):
"""
Dependancies:
- ValidateBillablePIsProcessor
- NewPICreditProcessor
"""

INCLUDED_INSTITUTIONS = [
"Harvard University",
"Boston University",
Expand Down Expand Up @@ -45,6 +51,7 @@ def output_s3_archive_key(self):
return f"Invoices/{self.invoice_month}/Archive/NERC-{self.invoice_month}-Total-Invoice {util.get_iso8601_time()}.csv"

def _prepare_export(self):
self.data = self.data[invoice.IS_BILLABLE_FIELD] & ~self.data[invoice.MISSING_PI_FIELD]
self.data = self.data[
self.data[invoice.INSTITUTION_FIELD].isin(self.INCLUDED_INSTITUTIONS)
].copy()
181 changes: 12 additions & 169 deletions process_report/invoices/billable_invoice.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,28 @@
from dataclasses import dataclass
import logging
import sys

import pandas
import pyarrow

from process_report.invoices import invoice, discount_invoice
from process_report import util

from process_report.invoices import invoice

logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)


@dataclass
class BillableInvoice(discount_invoice.DiscountInvoice):
NEW_PI_CREDIT_CODE = "0002"
INITIAL_CREDIT_AMOUNT = 1000
EXCLUDE_SU_TYPES = ["OpenShift GPUA100SXM4", "OpenStack GPUA100SXM4"]
class BillableInvoice(invoice.Invoice):
"""
Dependancies:
- ValidateBillablePIsProcessor
- NewPICreditProcessor
"""

PI_S3_FILEPATH = "PIs/PI.csv"

old_pi_filepath: str
updated_old_pi_df: pandas.DataFrame

export_columns_list = [
invoice.INVOICE_DATE_FIELD,
invoice.PROJECT_FIELD,
Expand All @@ -38,61 +41,8 @@ class BillableInvoice(discount_invoice.DiscountInvoice):
invoice.BALANCE_FIELD,
]

old_pi_filepath: str
limit_new_pi_credit_to_partners: bool = False

@staticmethod
def _load_old_pis(old_pi_filepath) -> pandas.DataFrame:
try:
old_pi_df = pandas.read_csv(
old_pi_filepath,
dtype={
invoice.PI_INITIAL_CREDITS: pandas.ArrowDtype(
pyarrow.decimal128(21, 2)
),
invoice.PI_1ST_USED: pandas.ArrowDtype(pyarrow.decimal128(21, 2)),
invoice.PI_2ND_USED: pandas.ArrowDtype(pyarrow.decimal128(21, 2)),
},
)
except FileNotFoundError:
sys.exit("Applying credit 0002 failed. Old PI file does not exist")

return old_pi_df

@staticmethod
def _get_pi_age(old_pi_df: pandas.DataFrame, pi, invoice_month):
"""Returns time difference between current invoice month and PI's first invoice month
I.e 0 for new PIs
Will raise an error if the PI'a age is negative, which suggests a faulty invoice, or a program bug"""
first_invoice_month = old_pi_df.loc[
old_pi_df[invoice.PI_PI_FIELD] == pi, invoice.PI_FIRST_MONTH
]
if first_invoice_month.empty:
return 0

month_diff = util.get_month_diff(invoice_month, first_invoice_month.iat[0])
if month_diff < 0:
sys.exit(
f"PI {pi} from {first_invoice_month} found in {invoice_month} invoice!"
)
else:
return month_diff

def _prepare(self):
self.data = self.data[
self.data[invoice.IS_BILLABLE_FIELD] & ~self.data[invoice.MISSING_PI_FIELD]
]
self.data[invoice.CREDIT_FIELD] = None
self.data[invoice.CREDIT_CODE_FIELD] = None
self.data[invoice.BALANCE_FIELD] = self.data[invoice.COST_FIELD]
self.old_pi_df = self._load_old_pis(self.old_pi_filepath)

def _process(self):
self.data, self.updated_old_pi_df = self._apply_credits_new_pi(
self.data, self.old_pi_df
)

def _prepare_export(self):
self.data = self.data[invoice.IS_BILLABLE_FIELD] & ~self.data[invoice.MISSING_PI_FIELD]
self.updated_old_pi_df = self.updated_old_pi_df.astype(
{
invoice.PI_INITIAL_CREDITS: pandas.ArrowDtype(
Expand All @@ -110,110 +60,3 @@ def export(self):
def export_s3(self, s3_bucket):
super().export_s3(s3_bucket)
s3_bucket.upload_file(self.old_pi_filepath, self.PI_S3_FILEPATH)

def _filter_partners(self, data):
active_partnerships = list()
institute_list = util.load_institute_list()
for institute_info in institute_list:
if partnership_start_date := institute_info.get(
"mghpcc_partnership_start_date"
):
if util.get_month_diff(self.invoice_month, partnership_start_date) >= 0:
active_partnerships.append(institute_info["display_name"])

return data[data[invoice.INSTITUTION_FIELD].isin(active_partnerships)]

def _filter_excluded_su_types(self, data):
return data[~(data[invoice.SU_TYPE_FIELD].isin(self.EXCLUDE_SU_TYPES))]

def _get_credit_eligible_projects(self, data: pandas.DataFrame):
filtered_data = self._filter_excluded_su_types(data)
if self.limit_new_pi_credit_to_partners:
filtered_data = self._filter_partners(filtered_data)

return filtered_data

def _apply_credits_new_pi(
self, data: pandas.DataFrame, old_pi_df: pandas.DataFrame
):
def get_initial_credit_amount(
old_pi_df, invoice_month, default_initial_credit_amount
):
first_month_processed_pis = old_pi_df[
old_pi_df[invoice.PI_FIRST_MONTH] == invoice_month
]
if first_month_processed_pis[
invoice.PI_INITIAL_CREDITS
].empty or pandas.isna(
new_pi_credit_amount := first_month_processed_pis[
invoice.PI_INITIAL_CREDITS
].iat[0]
):
new_pi_credit_amount = default_initial_credit_amount

return new_pi_credit_amount

new_pi_credit_amount = get_initial_credit_amount(
old_pi_df, self.invoice_month, self.INITIAL_CREDIT_AMOUNT
)
print(f"New PI Credit set at {new_pi_credit_amount} for {self.invoice_month}")

credit_eligible_projects = self._get_credit_eligible_projects(data)
current_pi_set = set(credit_eligible_projects[invoice.PI_FIELD])
for pi in current_pi_set:
pi_projects = credit_eligible_projects[
credit_eligible_projects[invoice.PI_FIELD] == pi
]
pi_age = self._get_pi_age(old_pi_df, pi, self.invoice_month)
pi_old_pi_entry = old_pi_df.loc[
old_pi_df[invoice.PI_PI_FIELD] == pi
].squeeze()

if pi_age > 1:
for i, row in pi_projects.iterrows():
data.at[i, invoice.BALANCE_FIELD] = row[invoice.COST_FIELD]
else:
if pi_age == 0:
if len(pi_old_pi_entry) == 0:
pi_entry = [pi, self.invoice_month, new_pi_credit_amount, 0, 0]
old_pi_df = pandas.concat(
[
pandas.DataFrame([pi_entry], columns=old_pi_df.columns),
old_pi_df,
],
ignore_index=True,
)
pi_old_pi_entry = old_pi_df.loc[
old_pi_df[invoice.PI_PI_FIELD] == pi
].squeeze()

remaining_credit = new_pi_credit_amount
credit_used_field = invoice.PI_1ST_USED
elif pi_age == 1:
remaining_credit = (
pi_old_pi_entry[invoice.PI_INITIAL_CREDITS]
- pi_old_pi_entry[invoice.PI_1ST_USED]
)
credit_used_field = invoice.PI_2ND_USED

credits_used = self.apply_flat_discount(
data,
pi_projects,
remaining_credit,
invoice.CREDIT_FIELD,
invoice.BALANCE_FIELD,
invoice.CREDIT_CODE_FIELD,
self.NEW_PI_CREDIT_CODE,
)

if (pi_old_pi_entry[credit_used_field] != 0) and (
credits_used != pi_old_pi_entry[credit_used_field]
):
print(
f"Warning: PI file overwritten. PI {pi} previously used ${pi_old_pi_entry[credit_used_field]} of New PI credits, now uses ${credits_used}"
)
old_pi_df.loc[
old_pi_df[invoice.PI_PI_FIELD] == pi, credit_used_field
] = credits_used

return (data, old_pi_df)
7 changes: 7 additions & 0 deletions process_report/invoices/bu_internal_invoice.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,12 @@

@dataclass
class BUInternalInvoice(discount_invoice.DiscountInvoice):
"""
Dependancies:
- ValidateBillablePIsProcessor
- NewPICreditProcessor
"""

export_columns_list = [
invoice.INVOICE_DATE_FIELD,
invoice.PI_FIELD,
Expand All @@ -27,6 +33,7 @@ def get_project(row):
else:
return project_alloc[: project_alloc.rfind("-")]

self.data = self.data[invoice.IS_BILLABLE_FIELD] & ~self.data[invoice.MISSING_PI_FIELD]
self.data = self.data[
self.data[invoice.INSTITUTION_FIELD] == "Boston University"
].copy()
Expand Down
1 change: 1 addition & 0 deletions process_report/invoices/invoice.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
### Internally used field names
IS_BILLABLE_FIELD = "Is Billable"
MISSING_PI_FIELD = "Missing PI"
PI_BALANCE_FIELD = "PI Balance"
###


Expand Down
7 changes: 7 additions & 0 deletions process_report/invoices/pi_specific_invoice.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,12 @@

@dataclass
class PIInvoice(invoice.Invoice):
"""
Dependancies:
- ValidateBillablePIsProcessor
- NewPICreditProcessor
"""

export_columns_list = [
invoice.INVOICE_DATE_FIELD,
invoice.PROJECT_FIELD,
Expand All @@ -28,6 +34,7 @@ class PIInvoice(invoice.Invoice):
]

def _prepare(self):
self.data = self.data[invoice.IS_BILLABLE_FIELD] & ~self.data[invoice.MISSING_PI_FIELD]
self.pi_list = self.data[invoice.PI_FIELD].unique()

def export(self):
Expand Down
40 changes: 27 additions & 13 deletions process_report/process_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
add_institution_processor,
lenovo_processor,
validate_billable_pi_processor,
new_pi_credit_processor,
)

### PI file field names
Expand Down Expand Up @@ -229,7 +230,19 @@ def main():
)
validate_billable_pi_proc.process()

processed_data = validate_billable_pi_proc.data
rates_info = load_from_url()
new_pi_credit_proc = new_pi_credit_processor.NewPICreditProcessor(
"",
invoice_month,
data=validate_billable_pi_proc.data,
old_pi_filepath=old_pi_file,
limit_new_pi_credit_to_partners=rates_info.get_value_at(
"Limit New PI Credit to MGHPCC Partners", invoice_month
),
)
new_pi_credit_proc.process()

processed_data = new_pi_credit_proc.data

### Initialize invoices

Expand All @@ -249,40 +262,41 @@ def main():
if args.upload_to_s3:
backup_to_s3_old_pi_file(old_pi_file)

rates_info = load_from_url()
billable_inv = billable_invoice.BillableInvoice(
name=args.output_file,
invoice_month=invoice_month,
data=processed_data.copy(),
old_pi_filepath=old_pi_file,
limit_new_pi_credit_to_partners=rates_info.get_value_at(
"Limit New PI Credit to MGHPCC Partners", invoice_month
),
)

util.process_and_export_invoices(
[lenovo_inv, nonbillable_inv, billable_inv], args.upload_to_s3
updated_old_pi_df=new_pi_credit_proc.updated_old_pi_df,
)

nerc_total_inv = NERC_total_invoice.NERCTotalInvoice(
name=args.NERC_total_invoice_file,
invoice_month=invoice_month,
data=billable_inv.data.copy(),
data=processed_data.copy(),
)

bu_internal_inv = bu_internal_invoice.BUInternalInvoice(
name=args.BU_invoice_file,
invoice_month=invoice_month,
data=billable_inv.data.copy(),
data=processed_data.copy(),
subsidy_amount=args.BU_subsidy_amount,
)

pi_inv = pi_specific_invoice.PIInvoice(
name=args.output_folder, invoice_month=invoice_month, data=billable_inv.data
name=args.output_folder, invoice_month=invoice_month, data=processed_data.copy()
)

util.process_and_export_invoices(
[nerc_total_inv, bu_internal_inv, pi_inv], args.upload_to_s3
[
lenovo_inv,
nonbillable_inv,
billable_inv,
nerc_total_inv,
bu_internal_inv,
pi_inv,
],
args.upload_to_s3,
)


Expand Down
Loading

0 comments on commit 9a6fac4

Please sign in to comment.