From 8e9798c714945222363f776efe9d3e5a5f01fdda Mon Sep 17 00:00:00 2001 From: Quan Pham Date: Thu, 19 Dec 2024 15:47:02 -0500 Subject: [PATCH] Exported PI invoices as PDFs The PI-specific dataframes will first be converted to HTML tables using Jinja templates, and then converted to PDFs using Chromium. Now, users of the script must provide a path to the Chromium/Chrome binary throught the env var `CHROME_BIN_PATH` A html template folder has been added, and the test cases for the PI-specific invoice will now both check whether the dataframe is formatted correctly and if the PDFs are correctly generated. The dockerfile has been to install chromium --- Dockerfile | 2 +- .../invoices/pi_specific_invoice.py | 125 +++++++++++- process_report/templates/pi_invoice.html | 73 +++++++ .../unit/invoices/test_pi_specific_invoice.py | 181 +++++++++++++----- requirements.txt | 1 + 5 files changed, 320 insertions(+), 62 deletions(-) create mode 100644 process_report/templates/pi_invoice.html diff --git a/Dockerfile b/Dockerfile index 8c4af28..712fb36 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,7 +2,7 @@ FROM python:3.11-slim WORKDIR /app -RUN apt-get update && apt-get install -y git +RUN apt-get update && apt-get install -y git chromium COPY requirements.txt . RUN pip install -r requirements.txt diff --git a/process_report/invoices/pi_specific_invoice.py b/process_report/invoices/pi_specific_invoice.py index 15baee0..7958994 100644 --- a/process_report/invoices/pi_specific_invoice.py +++ b/process_report/invoices/pi_specific_invoice.py @@ -1,12 +1,24 @@ import os +import sys from dataclasses import dataclass +import subprocess +import tempfile +import logging import pandas +from jinja2 import Environment, FileSystemLoader import process_report.invoices.invoice as invoice import process_report.util as util +TEMPLATE_DIR_PATH = "process_report/templates" + + +logger = logging.getLogger(__name__) +logging.basicConfig(level=logging.INFO) + + @dataclass class PIInvoice(invoice.Invoice): """ @@ -15,6 +27,21 @@ class PIInvoice(invoice.Invoice): - NewPICreditProcessor """ + TOTAL_COLUMN_LIST = [ + invoice.COST_FIELD, + invoice.CREDIT_FIELD, + invoice.BALANCE_FIELD, + ] + + DOLLAR_COLUMN_LIST = [ + invoice.RATE_FIELD, + invoice.GROUP_BALANCE_FIELD, + invoice.COST_FIELD, + invoice.GROUP_BALANCE_USED_FIELD, + invoice.CREDIT_FIELD, + invoice.BALANCE_FIELD, + ] + export_columns_list = [ invoice.INVOICE_DATE_FIELD, invoice.PROJECT_FIELD, @@ -43,31 +70,109 @@ def _prepare(self): ] self.pi_list = self.export_data[invoice.PI_FIELD].unique() + def _get_pi_dataframe(self, data, pi): + def add_dollar_sign(data): + if pandas.isna(data): + return data + else: + return "$" + str(data) + + pi_projects = data[data[invoice.PI_FIELD] == pi].copy().reset_index(drop=True) + + # Remove prepay group data if it's empty + if pandas.isna(pi_projects[invoice.GROUP_NAME_FIELD]).all(): + pi_projects = pi_projects.drop( + [ + invoice.GROUP_NAME_FIELD, + invoice.GROUP_INSTITUTION_FIELD, + invoice.GROUP_BALANCE_FIELD, + invoice.GROUP_BALANCE_USED_FIELD, + ], + axis=1, + ) + + # Add a row containing sums for certain columns + column_sums = list() + sum_columns_list = list() + for column_name in self.TOTAL_COLUMN_LIST: + if column_name in pi_projects.columns: + column_sums.append(pi_projects[column_name].sum()) + sum_columns_list.append(column_name) + pi_projects.loc[ + len(pi_projects) + ] = None # Adds a new row to end of dataframe initialized with None + pi_projects.loc[pi_projects.index[-1], invoice.INVOICE_DATE_FIELD] = "Total" + pi_projects.loc[pi_projects.index[-1], sum_columns_list] = column_sums + + # Add dollar sign to certain columns + for column_name in self.DOLLAR_COLUMN_LIST: + if column_name in pi_projects.columns: + pi_projects[column_name] = pi_projects[column_name].apply( + add_dollar_sign + ) + + pi_projects.fillna("", inplace=True) + + return pi_projects + def export(self): - def _export_pi_invoice(pi): - if pandas.isna(pi): - return - pi_projects = self.export_data[self.export_data[invoice.PI_FIELD] == pi] - pi_instituition = pi_projects[invoice.INSTITUTION_FIELD].iat[0] - pi_projects.to_csv( - f"{self.name}/{pi_instituition}_{pi} {self.invoice_month}.csv" + def _create_html_invoice(temp_fd): + environment = Environment(loader=FileSystemLoader(TEMPLATE_DIR_PATH)) + template = environment.get_template("pi_invoice.html") + content = template.render( + data=pi_dataframe, + ) + temp_fd.write(content) + temp_fd.flush() + + def _create_pdf_invoice(temp_fd_name): + chrome_binary_location = os.environ.get( + "CHROME_BIN_PATH", "usr/bin/chromium" + ) + if not os.path.exists(chrome_binary_location): + sys.exit( + f"Chrome binary does not exist at {chrome_binary_location}. Make sure the env var CHROME_BIN_PATH is set correctly or that Google Chrome is installed" + ) + + invoice_pdf_path = ( + f"{self.name}/{pi_instituition}_{pi}_{self.invoice_month}.pdf" + ) + subprocess.run( + [ + chrome_binary_location, + "--headless", + "--no-sandbox", + f"--print-to-pdf={invoice_pdf_path}", + "--no-pdf-header-footer", + "file://" + temp_fd_name, + ], + capture_output=True, ) self._filter_columns() + if not os.path.exists( self.name ): # self.name is name of folder storing invoices os.mkdir(self.name) for pi in self.pi_list: - _export_pi_invoice(pi) + if pandas.isna(pi): + continue + + pi_dataframe = self._get_pi_dataframe(self.export_data, pi) + pi_instituition = pi_dataframe[invoice.INSTITUTION_FIELD].iat[0] + + with tempfile.NamedTemporaryFile(mode="w", suffix=".html") as temp_fd: + _create_html_invoice(temp_fd) + _create_pdf_invoice(temp_fd.name) def export_s3(self, s3_bucket): def _export_s3_pi_invoice(pi_invoice): pi_invoice_path = os.path.join(self.name, pi_invoice) striped_invoice_path = os.path.splitext(pi_invoice_path)[0] - output_s3_path = f"Invoices/{self.invoice_month}/{striped_invoice_path}.csv" - output_s3_archive_path = f"Invoices/{self.invoice_month}/Archive/{striped_invoice_path} {util.get_iso8601_time()}.csv" + output_s3_path = f"Invoices/{self.invoice_month}/{striped_invoice_path}.pdf" + output_s3_archive_path = f"Invoices/{self.invoice_month}/Archive/{striped_invoice_path} {util.get_iso8601_time()}.pdf" s3_bucket.upload_file(pi_invoice_path, output_s3_path) s3_bucket.upload_file(pi_invoice_path, output_s3_archive_path) diff --git a/process_report/templates/pi_invoice.html b/process_report/templates/pi_invoice.html new file mode 100644 index 0000000..7a75a5f --- /dev/null +++ b/process_report/templates/pi_invoice.html @@ -0,0 +1,73 @@ + + + + + + + + + + + + {% for col in data.columns %} + + {% endfor %} + + + {% for i, row in data.iterrows() %} + + {% for field in row %} + {% if i == data.index[-1] %} + {% if field %} + + {% else %} + + {% endif %} + {% else %} + + {% endif %} + {% endfor %} + + {% endfor %} +
{{col}}
{{field}}{{field}}
+ + + + diff --git a/process_report/tests/unit/invoices/test_pi_specific_invoice.py b/process_report/tests/unit/invoices/test_pi_specific_invoice.py index c96b5b7..3884ecc 100644 --- a/process_report/tests/unit/invoices/test_pi_specific_invoice.py +++ b/process_report/tests/unit/invoices/test_pi_specific_invoice.py @@ -1,68 +1,147 @@ from unittest import TestCase, mock import tempfile import pandas -import os from process_report.tests import util as test_utils -class TestExportPICSV(TestCase): - def setUp(self): - data = { - "Invoice Month": ["2023-01", "2023-01", "2023-01", "2023-01", "2023-01"], - "Manager (PI)": ["PI1", "PI1", "PI1", "PI2", "PI2"], - "Institution": ["BU", "BU", "BU", "HU", "HU"], - "Project - Allocation": [ - "ProjectA", - "ProjectB", - "ProjectC", - "ProjectD", - "ProjectE", - ], - "Untouch Data Column": ["DataA", "DataB", "DataC", "DataD", "DataE"], - "Is Billable": [True, True, True, True, True], - "Missing PI": [False, False, False, False, False], - } - self.dataframe = pandas.DataFrame(data) - self.invoice_month = data["Invoice Month"][0] +class TestPISpecificInvoice(TestCase): + def _get_test_invoice( + self, + pi, + institution, + balance, + is_billable=None, + missing_pi=None, + group_name=None, + ): + if not is_billable: + is_billable = [True for _ in range(len(pi))] - @mock.patch("process_report.invoices.invoice.Invoice._filter_columns") - def test_export_pi(self, mock_filter_cols): - mock_filter_cols.return_value = self.dataframe + if not missing_pi: + missing_pi = [False for _ in range(len(pi))] - output_dir = tempfile.TemporaryDirectory() - pi_inv = test_utils.new_pi_specific_invoice( - output_dir.name, invoice_month=self.invoice_month, data=self.dataframe + if not group_name: + group_name = [None for _ in range(len(pi))] + + return pandas.DataFrame( + { + "Manager (PI)": pi, + "Institution": institution, + "Is Billable": is_billable, + "Missing PI": missing_pi, + "Prepaid Group Name": group_name, + "Prepaid Group Institution": ["" for _ in range(len(pi))], + "Prepaid Group Balance": [0 for _ in range(len(pi))], + "Prepaid Group Used": [0 for _ in range(len(pi))], + "Balance": balance, + } ) - pi_inv.process() - pi_inv.export() - pi_csv_1 = f'{self.dataframe["Institution"][0]}_{self.dataframe["Manager (PI)"][0]} {self.dataframe["Invoice Month"][0]}.csv' - pi_csv_2 = f'{self.dataframe["Institution"][3]}_{self.dataframe["Manager (PI)"][3]} {self.dataframe["Invoice Month"][3]}.csv' - self.assertIn(pi_csv_1, os.listdir(output_dir.name)) - self.assertIn(pi_csv_2, os.listdir(output_dir.name)) - self.assertEqual( - len(os.listdir(output_dir.name)), - len(self.dataframe["Manager (PI)"].unique()), + + def test_get_pi_dataframe(self): + def add_dollar_sign(data): + if pandas.isna(data): + return data + else: + return "$" + str(data) + + test_invoice = self._get_test_invoice( + ["PI1", "PI1", "PI2", "PI2"], + [ + "BU", + "BU", + "HU", + "HU", + ], + [100, 200, 300, 400], + group_name=[None, "G1", None, None], + ) + answer_invoice_pi1 = ( + test_invoice[test_invoice["Manager (PI)"] == "PI1"] + .copy() + .reset_index(drop=True) + ) + answer_invoice_pi1.loc[len(answer_invoice_pi1)] = None + answer_invoice_pi1.loc[ + answer_invoice_pi1.index[-1], ["Invoice Month", "Balance"] + ] = ["Total", 300] + for column_name in [ + "Prepaid Group Balance", + "Prepaid Group Used", + "Balance", + ]: + answer_invoice_pi1[column_name] = answer_invoice_pi1[column_name].apply( + add_dollar_sign + ) + answer_invoice_pi1.fillna("", inplace=True) + + answer_invoice_pi2 = ( + test_invoice[test_invoice["Manager (PI)"] == "PI2"] + .copy() + .reset_index(drop=True) + ) + answer_invoice_pi2.loc[len(answer_invoice_pi2)] = None + answer_invoice_pi2.loc[ + answer_invoice_pi2.index[-1], ["Invoice Month", "Balance"] + ] = ["Total", 700] + answer_invoice_pi2 = answer_invoice_pi2.drop( + [ + "Prepaid Group Name", + "Prepaid Group Institution", + "Prepaid Group Balance", + "Prepaid Group Used", + ], + axis=1, ) + answer_invoice_pi2["Balance"] = answer_invoice_pi2["Balance"].apply( + add_dollar_sign + ) + answer_invoice_pi2.fillna("", inplace=True) + + pi_inv = test_utils.new_pi_specific_invoice(data=test_invoice) + output_invoice = pi_inv._get_pi_dataframe(test_invoice, "PI1") + self.assertTrue(answer_invoice_pi1.equals(output_invoice)) + + output_invoice = pi_inv._get_pi_dataframe(test_invoice, "PI2") + self.assertTrue(answer_invoice_pi2.equals(output_invoice)) - pi_df = pandas.read_csv(output_dir.name + "/" + pi_csv_1) - self.assertEqual(len(pi_df["Manager (PI)"].unique()), 1) - self.assertEqual( - pi_df["Manager (PI)"].unique()[0], self.dataframe["Manager (PI)"][0] + @mock.patch("process_report.invoices.invoice.Invoice._filter_columns") + @mock.patch("os.path.exists") + @mock.patch("subprocess.run") + def test_export_pi(self, mock_subprocess_run, mock_path_exists, mock_filter_cols): + invoice_month = "2024-10" + test_invoice = self._get_test_invoice( + ["PI1", "PI1", "PI2", "PI2"], + [ + "BU", + "BU", + "HU", + "HU", + ], + [100, 200, 300, 400], + group_name=[None, "G1", None, None], ) - self.assertIn("ProjectA", pi_df["Project - Allocation"].tolist()) - self.assertIn("ProjectB", pi_df["Project - Allocation"].tolist()) - self.assertIn("ProjectC", pi_df["Project - Allocation"].tolist()) + mock_filter_cols.return_value = test_invoice + mock_path_exists.return_value = True + output_dir = tempfile.TemporaryDirectory() - pi_df = pandas.read_csv(output_dir.name + "/" + pi_csv_2) - self.assertEqual(len(pi_df["Manager (PI)"].unique()), 1) - self.assertEqual( - pi_df["Manager (PI)"].unique()[0], self.dataframe["Manager (PI)"][3] + pi_inv = test_utils.new_pi_specific_invoice( + output_dir.name, invoice_month, data=test_invoice ) + pi_inv.process() + pi_inv.export() + pi_pdf_1 = f"{output_dir.name}/BU_PI1_{invoice_month}.pdf" + pi_pdf_2 = f"{output_dir.name}/HU_PI2_{invoice_month}.pdf" - self.assertIn("ProjectD", pi_df["Project - Allocation"].tolist()) - self.assertIn("ProjectE", pi_df["Project - Allocation"].tolist()) - self.assertNotIn("ProjectA", pi_df["Project - Allocation"].tolist()) - self.assertNotIn("ProjectB", pi_df["Project - Allocation"].tolist()) - self.assertNotIn("ProjectC", pi_df["Project - Allocation"].tolist()) + for i, pi_pdf_path in enumerate([pi_pdf_1, pi_pdf_2]): + chrome_arglist, _ = mock_subprocess_run.call_args_list[i] + answer_arglist = [ + "usr/bin/chromium", + "--headless", + "--no-sandbox", + f"--print-to-pdf={pi_pdf_path}", + "--no-pdf-header-footer", + ] + for answer_arg in answer_arglist: + self.assertTrue(answer_arg in chrome_arglist[0]) diff --git a/requirements.txt b/requirements.txt index 748b45a..4eadb6c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,3 +2,4 @@ git+https://github.com/CCI-MOC/nerc-rates@74eb4a7#egg=nerc_rates pandas pyarrow boto3 +Jinja2