Skip to content

Commit

Permalink
Implemented processors for removing nonbillables and validating billa…
Browse files Browse the repository at this point in the history
…ble PIs
  • Loading branch information
QuanMPhm committed Oct 22, 2024
1 parent 6276e84 commit 0a88aba
Show file tree
Hide file tree
Showing 6 changed files with 136 additions and 79 deletions.
27 changes: 0 additions & 27 deletions process_report/invoices/billable_invoice.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,6 @@ class BillableInvoice(discount_invoice.DiscountInvoice):
EXCLUDE_SU_TYPES = ["OpenShift GPUA100SXM4", "OpenStack GPUA100SXM4"]
PI_S3_FILEPATH = "PIs/PI.csv"

nonbillable_pis: list[str]
nonbillable_projects: list[str]

export_columns_list = [
invoice.INVOICE_DATE_FIELD,
invoice.PROJECT_FIELD,
Expand Down Expand Up @@ -62,26 +59,6 @@ def _load_old_pis(old_pi_filepath) -> pandas.DataFrame:

return old_pi_df

@staticmethod
def _remove_nonbillables(
data: pandas.DataFrame,
nonbillable_pis: list[str],
nonbillable_projects: list[str],
):
return data[
~data[invoice.PI_FIELD].isin(nonbillable_pis)
& ~data[invoice.PROJECT_FIELD].isin(nonbillable_projects)
]

@staticmethod
def _validate_pi_names(data: pandas.DataFrame):
invalid_pi_projects = data[pandas.isna(data[invoice.PI_FIELD])]
for i, row in invalid_pi_projects.iterrows():
logger.warn(
f"Billable project {row[invoice.PROJECT_FIELD]} has empty PI field"
)
return data[~pandas.isna(data[invoice.PI_FIELD])]

@staticmethod
def _get_pi_age(old_pi_df: pandas.DataFrame, pi, invoice_month):
"""Returns time difference between current invoice month and PI's first invoice month
Expand All @@ -102,10 +79,6 @@ def _get_pi_age(old_pi_df: pandas.DataFrame, pi, invoice_month):
return month_diff

def _prepare(self):
self.data = self._remove_nonbillables(
self.data, self.nonbillable_pis, self.nonbillable_projects
)
self.data = self._validate_pi_names(self.data)
self.data[invoice.CREDIT_FIELD] = None
self.data[invoice.CREDIT_CODE_FIELD] = None
self.data[invoice.BALANCE_FIELD] = self.data[invoice.COST_FIELD]
Expand Down
24 changes: 21 additions & 3 deletions process_report/process_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
from process_report.processors import (
validate_pi_alias_processor,
add_institution_processor,
remove_nonbillables_processor,
validate_billable_pi_processor,
)

### PI file field names
Expand Down Expand Up @@ -232,16 +234,32 @@ def main():
nonbillable_projects=projects,
)

### Remove nonbillables

remove_nonbillables_proc = remove_nonbillables_processor.RemoveNonbillables(
"", invoice_month, add_institute_proc.data, pi, projects
)
remove_nonbillables_proc.process()

validate_billable_pi_proc = (
validate_billable_pi_processor.ValidateBillablePIsProcessor(
"", invoice_month, remove_nonbillables_proc.data
)
)
validate_billable_pi_proc.process()

processed_data = validate_billable_pi_proc.data

### Initialize invoices

if args.upload_to_s3:
backup_to_s3_old_pi_file(old_pi_file)

rates_info = load_from_url()
billable_inv = billable_invoice.BillableInvoice(
name=args.output_file,
invoice_month=invoice_month,
data=preliminary_processed_data.copy(),
nonbillable_pis=pi,
nonbillable_projects=projects,
data=processed_data.copy(),
old_pi_filepath=old_pi_file,
limit_new_pi_credit_to_partners=rates_info.get_value_at(
"Limit New PI Credit to MGHPCC Partners", invoice_month
Expand Down
28 changes: 28 additions & 0 deletions process_report/processors/remove_nonbillables_processor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from dataclasses import dataclass

import pandas

from process_report.invoices import invoice
from process_report.processors import processor


@dataclass
class RemoveNonbillables(processor.Processor):
nonbillable_pis: list[str]
nonbillable_projects: list[str]

@staticmethod
def _remove_nonbillables(
data: pandas.DataFrame,
nonbillable_pis: list[str],
nonbillable_projects: list[str],
):
return data[
~data[invoice.PI_FIELD].isin(nonbillable_pis)
& ~data[invoice.PROJECT_FIELD].isin(nonbillable_projects)
]

def _process(self):
self.data = self._remove_nonbillables(
self.data, self.nonbillable_pis, self.nonbillable_projects
)
25 changes: 25 additions & 0 deletions process_report/processors/validate_billable_pi_processor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from dataclasses import dataclass
import logging

import pandas

from process_report.invoices import invoice
from process_report.processors import processor

logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)


@dataclass
class ValidateBillablePIsProcessor(processor.Processor):
@staticmethod
def _validate_pi_names(data: pandas.DataFrame):
invalid_pi_projects = data[pandas.isna(data[invoice.PI_FIELD])]
for i, row in invalid_pi_projects.iterrows():
logger.warn(
f"Billable project {row[invoice.PROJECT_FIELD]} has empty PI field"
)
return data[~pandas.isna(data[invoice.PI_FIELD])]

def _process(self):
self.data = self._validate_pi_names(self.data)
85 changes: 40 additions & 45 deletions process_report/tests/unit_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,26 +104,6 @@ def test_remove_billables(self):
self.assertNotIn("ProjectE", result_df["Project - Allocation"].tolist())


class TestBillableInvoice(TestCase):
def test_remove_nonbillables(self):
pis = [uuid.uuid4().hex for x in range(10)]
projects = [uuid.uuid4().hex for x in range(10)]
nonbillable_pis = pis[:3]
nonbillable_projects = projects[7:]
billable_pis = pis[3:7]
data = pandas.DataFrame({"Manager (PI)": pis, "Project - Allocation": projects})

test_invoice = test_utils.new_billable_invoice()
data = test_invoice._remove_nonbillables(
data, nonbillable_pis, nonbillable_projects
)
self.assertTrue(data[data["Manager (PI)"].isin(nonbillable_pis)].empty)
self.assertTrue(
data[data["Project - Allocation"].isin(nonbillable_projects)].empty
)
self.assertTrue(data.equals(data[data["Manager (PI)"].isin(billable_pis)]))


class TestMergeCSV(TestCase):
def setUp(self):
self.header = ["ID", "Name", "Age"]
Expand Down Expand Up @@ -281,6 +261,46 @@ def test_validate_alias(self):
self.assertTrue(answer_data.equals(validate_pi_alias_proc.data))


class TestRemoveNonbillablesProcessor(TestCase):
def test_remove_nonbillables(self):
pis = [uuid.uuid4().hex for x in range(10)]
projects = [uuid.uuid4().hex for x in range(10)]
nonbillable_pis = pis[:3]
nonbillable_projects = projects[7:]
billable_pis = pis[3:7]
data = pandas.DataFrame({"Manager (PI)": pis, "Project - Allocation": projects})

remove_nonbillables_proc = test_utils.new_remove_nonbillables_processor()
data = remove_nonbillables_proc._remove_nonbillables(
data, nonbillable_pis, nonbillable_projects
)
self.assertTrue(data[data["Manager (PI)"].isin(nonbillable_pis)].empty)
self.assertTrue(
data[data["Project - Allocation"].isin(nonbillable_projects)].empty
)
self.assertTrue(data.equals(data[data["Manager (PI)"].isin(billable_pis)]))


class TestValidateBillablePIProcessor(TestCase):
def test_validate_billables(self):
test_data = pandas.DataFrame(
{
"Manager (PI)": ["PI1", math.nan, "PI1", "PI2", "PI2"],
"Project - Allocation": [
"ProjectA",
"ProjectB",
"ProjectC",
"ProjectD",
"ProjectE",
],
}
)
self.assertEqual(1, len(test_data[pandas.isna(test_data["Manager (PI)"])]))
validate_billable_pi_proc = test_utils.new_validate_billable_pi_processor()
output_data = validate_billable_pi_proc._validate_pi_names(test_data)
self.assertEqual(0, len(output_data[pandas.isna(output_data["Manager (PI)"])]))


class TestMonthUtils(TestCase):
def test_get_month_diff(self):
testcases = [
Expand Down Expand Up @@ -710,31 +730,6 @@ def test_apply_BU_subsidy(self):
self.assertEqual(50, output_df.loc[3, "Balance"])


class TestValidateBillables(TestCase):
def setUp(self):
data = {
"Manager (PI)": ["PI1", math.nan, "PI1", "PI2", "PI2"],
"Project - Allocation": [
"ProjectA",
"ProjectB",
"ProjectC",
"ProjectD",
"ProjectE",
],
}
self.dataframe = pandas.DataFrame(data)

def test_validate_billables(self):
self.assertEqual(
1, len(self.dataframe[pandas.isna(self.dataframe["Manager (PI)"])])
)
test_invoice = test_utils.new_billable_invoice()
validated_df = test_invoice._validate_pi_names(self.dataframe)
self.assertEqual(
0, len(validated_df[pandas.isna(validated_df["Manager (PI)"])])
)


class TestExportLenovo(TestCase):
def setUp(self):
data = {
Expand Down
26 changes: 22 additions & 4 deletions process_report/tests/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
from process_report.processors import (
add_institution_processor,
validate_pi_alias_processor,
remove_nonbillables_processor,
validate_billable_pi_processor,
)


Expand All @@ -25,17 +27,13 @@ def new_billable_invoice(
name="",
invoice_month="0000-00",
data=pandas.DataFrame(),
nonbillable_pis=[],
nonbillable_projects=[],
old_pi_filepath="",
limit_new_pi_credit_to_partners=False,
):
return billable_invoice.BillableInvoice(
name,
invoice_month,
data,
nonbillable_pis,
nonbillable_projects,
old_pi_filepath,
limit_new_pi_credit_to_partners,
)
Expand Down Expand Up @@ -75,3 +73,23 @@ def new_validate_pi_alias_processor(
return validate_pi_alias_processor.ValidatePIAliasProcessor(
name, invoice_month, data, alias_map
)


def new_remove_nonbillables_processor(
name="",
invoice_month="0000-00",
data=pandas.DataFrame(),
nonbillable_pis=[],
nonbillable_projects=[],
):
return remove_nonbillables_processor.RemoveNonbillables(
name, invoice_month, data, nonbillable_pis, nonbillable_projects
)


def new_validate_billable_pi_processor(
name="", invoice_month="0000-00", data=pandas.DataFrame()
):
return validate_billable_pi_processor.ValidateBillablePIsProcessor(
name, invoice_month, data
)

0 comments on commit 0a88aba

Please sign in to comment.