diff --git a/Dockerfile b/Dockerfile
index 8c4af28..712fb36 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -2,7 +2,7 @@ FROM python:3.11-slim
WORKDIR /app
-RUN apt-get update && apt-get install -y git
+RUN apt-get update && apt-get install -y git chromium
COPY requirements.txt .
RUN pip install -r requirements.txt
diff --git a/process_report/invoices/pi_specific_invoice.py b/process_report/invoices/pi_specific_invoice.py
index 15baee0..7958994 100644
--- a/process_report/invoices/pi_specific_invoice.py
+++ b/process_report/invoices/pi_specific_invoice.py
@@ -1,12 +1,24 @@
import os
+import sys
from dataclasses import dataclass
+import subprocess
+import tempfile
+import logging
import pandas
+from jinja2 import Environment, FileSystemLoader
import process_report.invoices.invoice as invoice
import process_report.util as util
+TEMPLATE_DIR_PATH = "process_report/templates"
+
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(level=logging.INFO)
+
+
@dataclass
class PIInvoice(invoice.Invoice):
"""
@@ -15,6 +27,21 @@ class PIInvoice(invoice.Invoice):
- NewPICreditProcessor
"""
+ TOTAL_COLUMN_LIST = [
+ invoice.COST_FIELD,
+ invoice.CREDIT_FIELD,
+ invoice.BALANCE_FIELD,
+ ]
+
+ DOLLAR_COLUMN_LIST = [
+ invoice.RATE_FIELD,
+ invoice.GROUP_BALANCE_FIELD,
+ invoice.COST_FIELD,
+ invoice.GROUP_BALANCE_USED_FIELD,
+ invoice.CREDIT_FIELD,
+ invoice.BALANCE_FIELD,
+ ]
+
export_columns_list = [
invoice.INVOICE_DATE_FIELD,
invoice.PROJECT_FIELD,
@@ -43,31 +70,109 @@ def _prepare(self):
]
self.pi_list = self.export_data[invoice.PI_FIELD].unique()
+ def _get_pi_dataframe(self, data, pi):
+ def add_dollar_sign(data):
+ if pandas.isna(data):
+ return data
+ else:
+ return "$" + str(data)
+
+ pi_projects = data[data[invoice.PI_FIELD] == pi].copy().reset_index(drop=True)
+
+ # Remove prepay group data if it's empty
+ if pandas.isna(pi_projects[invoice.GROUP_NAME_FIELD]).all():
+ pi_projects = pi_projects.drop(
+ [
+ invoice.GROUP_NAME_FIELD,
+ invoice.GROUP_INSTITUTION_FIELD,
+ invoice.GROUP_BALANCE_FIELD,
+ invoice.GROUP_BALANCE_USED_FIELD,
+ ],
+ axis=1,
+ )
+
+ # Add a row containing sums for certain columns
+ column_sums = list()
+ sum_columns_list = list()
+ for column_name in self.TOTAL_COLUMN_LIST:
+ if column_name in pi_projects.columns:
+ column_sums.append(pi_projects[column_name].sum())
+ sum_columns_list.append(column_name)
+ pi_projects.loc[
+ len(pi_projects)
+ ] = None # Adds a new row to end of dataframe initialized with None
+ pi_projects.loc[pi_projects.index[-1], invoice.INVOICE_DATE_FIELD] = "Total"
+ pi_projects.loc[pi_projects.index[-1], sum_columns_list] = column_sums
+
+ # Add dollar sign to certain columns
+ for column_name in self.DOLLAR_COLUMN_LIST:
+ if column_name in pi_projects.columns:
+ pi_projects[column_name] = pi_projects[column_name].apply(
+ add_dollar_sign
+ )
+
+ pi_projects.fillna("", inplace=True)
+
+ return pi_projects
+
def export(self):
- def _export_pi_invoice(pi):
- if pandas.isna(pi):
- return
- pi_projects = self.export_data[self.export_data[invoice.PI_FIELD] == pi]
- pi_instituition = pi_projects[invoice.INSTITUTION_FIELD].iat[0]
- pi_projects.to_csv(
- f"{self.name}/{pi_instituition}_{pi} {self.invoice_month}.csv"
+ def _create_html_invoice(temp_fd):
+ environment = Environment(loader=FileSystemLoader(TEMPLATE_DIR_PATH))
+ template = environment.get_template("pi_invoice.html")
+ content = template.render(
+ data=pi_dataframe,
+ )
+ temp_fd.write(content)
+ temp_fd.flush()
+
+ def _create_pdf_invoice(temp_fd_name):
+ chrome_binary_location = os.environ.get(
+ "CHROME_BIN_PATH", "usr/bin/chromium"
+ )
+ if not os.path.exists(chrome_binary_location):
+ sys.exit(
+ f"Chrome binary does not exist at {chrome_binary_location}. Make sure the env var CHROME_BIN_PATH is set correctly or that Google Chrome is installed"
+ )
+
+ invoice_pdf_path = (
+ f"{self.name}/{pi_instituition}_{pi}_{self.invoice_month}.pdf"
+ )
+ subprocess.run(
+ [
+ chrome_binary_location,
+ "--headless",
+ "--no-sandbox",
+ f"--print-to-pdf={invoice_pdf_path}",
+ "--no-pdf-header-footer",
+ "file://" + temp_fd_name,
+ ],
+ capture_output=True,
)
self._filter_columns()
+
if not os.path.exists(
self.name
): # self.name is name of folder storing invoices
os.mkdir(self.name)
for pi in self.pi_list:
- _export_pi_invoice(pi)
+ if pandas.isna(pi):
+ continue
+
+ pi_dataframe = self._get_pi_dataframe(self.export_data, pi)
+ pi_instituition = pi_dataframe[invoice.INSTITUTION_FIELD].iat[0]
+
+ with tempfile.NamedTemporaryFile(mode="w", suffix=".html") as temp_fd:
+ _create_html_invoice(temp_fd)
+ _create_pdf_invoice(temp_fd.name)
def export_s3(self, s3_bucket):
def _export_s3_pi_invoice(pi_invoice):
pi_invoice_path = os.path.join(self.name, pi_invoice)
striped_invoice_path = os.path.splitext(pi_invoice_path)[0]
- output_s3_path = f"Invoices/{self.invoice_month}/{striped_invoice_path}.csv"
- output_s3_archive_path = f"Invoices/{self.invoice_month}/Archive/{striped_invoice_path} {util.get_iso8601_time()}.csv"
+ output_s3_path = f"Invoices/{self.invoice_month}/{striped_invoice_path}.pdf"
+ output_s3_archive_path = f"Invoices/{self.invoice_month}/Archive/{striped_invoice_path} {util.get_iso8601_time()}.pdf"
s3_bucket.upload_file(pi_invoice_path, output_s3_path)
s3_bucket.upload_file(pi_invoice_path, output_s3_archive_path)
diff --git a/process_report/templates/pi_invoice.html b/process_report/templates/pi_invoice.html
new file mode 100644
index 0000000..7a75a5f
--- /dev/null
+++ b/process_report/templates/pi_invoice.html
@@ -0,0 +1,73 @@
+
+
+
+
+
+
+
+
+
+
+
+ {% for col in data.columns %}
+ {{col}} |
+ {% endfor %}
+
+
+ {% for i, row in data.iterrows() %}
+
+ {% for field in row %}
+ {% if i == data.index[-1] %}
+ {% if field %}
+ {{field}} |
+ {% else %}
+ |
+ {% endif %}
+ {% else %}
+ {{field}} |
+ {% endif %}
+ {% endfor %}
+
+ {% endfor %}
+
+
+
+
+
diff --git a/process_report/tests/unit/invoices/test_pi_specific_invoice.py b/process_report/tests/unit/invoices/test_pi_specific_invoice.py
index c96b5b7..3884ecc 100644
--- a/process_report/tests/unit/invoices/test_pi_specific_invoice.py
+++ b/process_report/tests/unit/invoices/test_pi_specific_invoice.py
@@ -1,68 +1,147 @@
from unittest import TestCase, mock
import tempfile
import pandas
-import os
from process_report.tests import util as test_utils
-class TestExportPICSV(TestCase):
- def setUp(self):
- data = {
- "Invoice Month": ["2023-01", "2023-01", "2023-01", "2023-01", "2023-01"],
- "Manager (PI)": ["PI1", "PI1", "PI1", "PI2", "PI2"],
- "Institution": ["BU", "BU", "BU", "HU", "HU"],
- "Project - Allocation": [
- "ProjectA",
- "ProjectB",
- "ProjectC",
- "ProjectD",
- "ProjectE",
- ],
- "Untouch Data Column": ["DataA", "DataB", "DataC", "DataD", "DataE"],
- "Is Billable": [True, True, True, True, True],
- "Missing PI": [False, False, False, False, False],
- }
- self.dataframe = pandas.DataFrame(data)
- self.invoice_month = data["Invoice Month"][0]
+class TestPISpecificInvoice(TestCase):
+ def _get_test_invoice(
+ self,
+ pi,
+ institution,
+ balance,
+ is_billable=None,
+ missing_pi=None,
+ group_name=None,
+ ):
+ if not is_billable:
+ is_billable = [True for _ in range(len(pi))]
- @mock.patch("process_report.invoices.invoice.Invoice._filter_columns")
- def test_export_pi(self, mock_filter_cols):
- mock_filter_cols.return_value = self.dataframe
+ if not missing_pi:
+ missing_pi = [False for _ in range(len(pi))]
- output_dir = tempfile.TemporaryDirectory()
- pi_inv = test_utils.new_pi_specific_invoice(
- output_dir.name, invoice_month=self.invoice_month, data=self.dataframe
+ if not group_name:
+ group_name = [None for _ in range(len(pi))]
+
+ return pandas.DataFrame(
+ {
+ "Manager (PI)": pi,
+ "Institution": institution,
+ "Is Billable": is_billable,
+ "Missing PI": missing_pi,
+ "Prepaid Group Name": group_name,
+ "Prepaid Group Institution": ["" for _ in range(len(pi))],
+ "Prepaid Group Balance": [0 for _ in range(len(pi))],
+ "Prepaid Group Used": [0 for _ in range(len(pi))],
+ "Balance": balance,
+ }
)
- pi_inv.process()
- pi_inv.export()
- pi_csv_1 = f'{self.dataframe["Institution"][0]}_{self.dataframe["Manager (PI)"][0]} {self.dataframe["Invoice Month"][0]}.csv'
- pi_csv_2 = f'{self.dataframe["Institution"][3]}_{self.dataframe["Manager (PI)"][3]} {self.dataframe["Invoice Month"][3]}.csv'
- self.assertIn(pi_csv_1, os.listdir(output_dir.name))
- self.assertIn(pi_csv_2, os.listdir(output_dir.name))
- self.assertEqual(
- len(os.listdir(output_dir.name)),
- len(self.dataframe["Manager (PI)"].unique()),
+
+ def test_get_pi_dataframe(self):
+ def add_dollar_sign(data):
+ if pandas.isna(data):
+ return data
+ else:
+ return "$" + str(data)
+
+ test_invoice = self._get_test_invoice(
+ ["PI1", "PI1", "PI2", "PI2"],
+ [
+ "BU",
+ "BU",
+ "HU",
+ "HU",
+ ],
+ [100, 200, 300, 400],
+ group_name=[None, "G1", None, None],
+ )
+ answer_invoice_pi1 = (
+ test_invoice[test_invoice["Manager (PI)"] == "PI1"]
+ .copy()
+ .reset_index(drop=True)
+ )
+ answer_invoice_pi1.loc[len(answer_invoice_pi1)] = None
+ answer_invoice_pi1.loc[
+ answer_invoice_pi1.index[-1], ["Invoice Month", "Balance"]
+ ] = ["Total", 300]
+ for column_name in [
+ "Prepaid Group Balance",
+ "Prepaid Group Used",
+ "Balance",
+ ]:
+ answer_invoice_pi1[column_name] = answer_invoice_pi1[column_name].apply(
+ add_dollar_sign
+ )
+ answer_invoice_pi1.fillna("", inplace=True)
+
+ answer_invoice_pi2 = (
+ test_invoice[test_invoice["Manager (PI)"] == "PI2"]
+ .copy()
+ .reset_index(drop=True)
+ )
+ answer_invoice_pi2.loc[len(answer_invoice_pi2)] = None
+ answer_invoice_pi2.loc[
+ answer_invoice_pi2.index[-1], ["Invoice Month", "Balance"]
+ ] = ["Total", 700]
+ answer_invoice_pi2 = answer_invoice_pi2.drop(
+ [
+ "Prepaid Group Name",
+ "Prepaid Group Institution",
+ "Prepaid Group Balance",
+ "Prepaid Group Used",
+ ],
+ axis=1,
)
+ answer_invoice_pi2["Balance"] = answer_invoice_pi2["Balance"].apply(
+ add_dollar_sign
+ )
+ answer_invoice_pi2.fillna("", inplace=True)
+
+ pi_inv = test_utils.new_pi_specific_invoice(data=test_invoice)
+ output_invoice = pi_inv._get_pi_dataframe(test_invoice, "PI1")
+ self.assertTrue(answer_invoice_pi1.equals(output_invoice))
+
+ output_invoice = pi_inv._get_pi_dataframe(test_invoice, "PI2")
+ self.assertTrue(answer_invoice_pi2.equals(output_invoice))
- pi_df = pandas.read_csv(output_dir.name + "/" + pi_csv_1)
- self.assertEqual(len(pi_df["Manager (PI)"].unique()), 1)
- self.assertEqual(
- pi_df["Manager (PI)"].unique()[0], self.dataframe["Manager (PI)"][0]
+ @mock.patch("process_report.invoices.invoice.Invoice._filter_columns")
+ @mock.patch("os.path.exists")
+ @mock.patch("subprocess.run")
+ def test_export_pi(self, mock_subprocess_run, mock_path_exists, mock_filter_cols):
+ invoice_month = "2024-10"
+ test_invoice = self._get_test_invoice(
+ ["PI1", "PI1", "PI2", "PI2"],
+ [
+ "BU",
+ "BU",
+ "HU",
+ "HU",
+ ],
+ [100, 200, 300, 400],
+ group_name=[None, "G1", None, None],
)
- self.assertIn("ProjectA", pi_df["Project - Allocation"].tolist())
- self.assertIn("ProjectB", pi_df["Project - Allocation"].tolist())
- self.assertIn("ProjectC", pi_df["Project - Allocation"].tolist())
+ mock_filter_cols.return_value = test_invoice
+ mock_path_exists.return_value = True
+ output_dir = tempfile.TemporaryDirectory()
- pi_df = pandas.read_csv(output_dir.name + "/" + pi_csv_2)
- self.assertEqual(len(pi_df["Manager (PI)"].unique()), 1)
- self.assertEqual(
- pi_df["Manager (PI)"].unique()[0], self.dataframe["Manager (PI)"][3]
+ pi_inv = test_utils.new_pi_specific_invoice(
+ output_dir.name, invoice_month, data=test_invoice
)
+ pi_inv.process()
+ pi_inv.export()
+ pi_pdf_1 = f"{output_dir.name}/BU_PI1_{invoice_month}.pdf"
+ pi_pdf_2 = f"{output_dir.name}/HU_PI2_{invoice_month}.pdf"
- self.assertIn("ProjectD", pi_df["Project - Allocation"].tolist())
- self.assertIn("ProjectE", pi_df["Project - Allocation"].tolist())
- self.assertNotIn("ProjectA", pi_df["Project - Allocation"].tolist())
- self.assertNotIn("ProjectB", pi_df["Project - Allocation"].tolist())
- self.assertNotIn("ProjectC", pi_df["Project - Allocation"].tolist())
+ for i, pi_pdf_path in enumerate([pi_pdf_1, pi_pdf_2]):
+ chrome_arglist, _ = mock_subprocess_run.call_args_list[i]
+ answer_arglist = [
+ "usr/bin/chromium",
+ "--headless",
+ "--no-sandbox",
+ f"--print-to-pdf={pi_pdf_path}",
+ "--no-pdf-header-footer",
+ ]
+ for answer_arg in answer_arglist:
+ self.assertTrue(answer_arg in chrome_arglist[0])
diff --git a/requirements.txt b/requirements.txt
index 748b45a..4eadb6c 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,3 +2,4 @@ git+https://github.com/CCI-MOC/nerc-rates@74eb4a7#egg=nerc_rates
pandas
pyarrow
boto3
+Jinja2