Skip to content

Commit

Permalink
Exported PI invoices as PDFs
Browse files Browse the repository at this point in the history
The PI-specific dataframes will first be converted to HTML
tables using Jinja templates, and then converted to PDFs using
Chromium. Now, users of the script must provide a path to the
Chromium/Chrome binary throught the env var `CHROME_BIN_PATH`

A html template folder has been added, and the test cases
for the PI-specific invoice will now both check whether the
dataframe is formatted correctly and if the PDFs are
correctly generated. The dockerfile has been to install chromium
  • Loading branch information
QuanMPhm committed Jan 18, 2025
1 parent e95349a commit 8e9798c
Show file tree
Hide file tree
Showing 5 changed files with 320 additions and 62 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ FROM python:3.11-slim

WORKDIR /app

RUN apt-get update && apt-get install -y git
RUN apt-get update && apt-get install -y git chromium

COPY requirements.txt .
RUN pip install -r requirements.txt
Expand Down
125 changes: 115 additions & 10 deletions process_report/invoices/pi_specific_invoice.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,24 @@
import os
import sys
from dataclasses import dataclass
import subprocess
import tempfile
import logging

import pandas
from jinja2 import Environment, FileSystemLoader

import process_report.invoices.invoice as invoice
import process_report.util as util


TEMPLATE_DIR_PATH = "process_report/templates"


logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)


@dataclass
class PIInvoice(invoice.Invoice):
"""
Expand All @@ -15,6 +27,21 @@ class PIInvoice(invoice.Invoice):
- NewPICreditProcessor
"""

TOTAL_COLUMN_LIST = [
invoice.COST_FIELD,
invoice.CREDIT_FIELD,
invoice.BALANCE_FIELD,
]

DOLLAR_COLUMN_LIST = [
invoice.RATE_FIELD,
invoice.GROUP_BALANCE_FIELD,
invoice.COST_FIELD,
invoice.GROUP_BALANCE_USED_FIELD,
invoice.CREDIT_FIELD,
invoice.BALANCE_FIELD,
]

export_columns_list = [
invoice.INVOICE_DATE_FIELD,
invoice.PROJECT_FIELD,
Expand Down Expand Up @@ -43,31 +70,109 @@ def _prepare(self):
]
self.pi_list = self.export_data[invoice.PI_FIELD].unique()

def _get_pi_dataframe(self, data, pi):
def add_dollar_sign(data):
if pandas.isna(data):
return data
else:
return "$" + str(data)

pi_projects = data[data[invoice.PI_FIELD] == pi].copy().reset_index(drop=True)

# Remove prepay group data if it's empty
if pandas.isna(pi_projects[invoice.GROUP_NAME_FIELD]).all():
pi_projects = pi_projects.drop(
[
invoice.GROUP_NAME_FIELD,
invoice.GROUP_INSTITUTION_FIELD,
invoice.GROUP_BALANCE_FIELD,
invoice.GROUP_BALANCE_USED_FIELD,
],
axis=1,
)

# Add a row containing sums for certain columns
column_sums = list()
sum_columns_list = list()
for column_name in self.TOTAL_COLUMN_LIST:
if column_name in pi_projects.columns:
column_sums.append(pi_projects[column_name].sum())
sum_columns_list.append(column_name)
pi_projects.loc[
len(pi_projects)
] = None # Adds a new row to end of dataframe initialized with None
pi_projects.loc[pi_projects.index[-1], invoice.INVOICE_DATE_FIELD] = "Total"
pi_projects.loc[pi_projects.index[-1], sum_columns_list] = column_sums

# Add dollar sign to certain columns
for column_name in self.DOLLAR_COLUMN_LIST:
if column_name in pi_projects.columns:
pi_projects[column_name] = pi_projects[column_name].apply(
add_dollar_sign
)

pi_projects.fillna("", inplace=True)

return pi_projects

def export(self):
def _export_pi_invoice(pi):
if pandas.isna(pi):
return
pi_projects = self.export_data[self.export_data[invoice.PI_FIELD] == pi]
pi_instituition = pi_projects[invoice.INSTITUTION_FIELD].iat[0]
pi_projects.to_csv(
f"{self.name}/{pi_instituition}_{pi} {self.invoice_month}.csv"
def _create_html_invoice(temp_fd):
environment = Environment(loader=FileSystemLoader(TEMPLATE_DIR_PATH))
template = environment.get_template("pi_invoice.html")
content = template.render(
data=pi_dataframe,
)
temp_fd.write(content)
temp_fd.flush()

def _create_pdf_invoice(temp_fd_name):
chrome_binary_location = os.environ.get(
"CHROME_BIN_PATH", "usr/bin/chromium"
)
if not os.path.exists(chrome_binary_location):
sys.exit(
f"Chrome binary does not exist at {chrome_binary_location}. Make sure the env var CHROME_BIN_PATH is set correctly or that Google Chrome is installed"
)

invoice_pdf_path = (
f"{self.name}/{pi_instituition}_{pi}_{self.invoice_month}.pdf"
)
subprocess.run(
[
chrome_binary_location,
"--headless",
"--no-sandbox",
f"--print-to-pdf={invoice_pdf_path}",
"--no-pdf-header-footer",
"file://" + temp_fd_name,
],
capture_output=True,
)

self._filter_columns()

if not os.path.exists(
self.name
): # self.name is name of folder storing invoices
os.mkdir(self.name)

for pi in self.pi_list:
_export_pi_invoice(pi)
if pandas.isna(pi):
continue

pi_dataframe = self._get_pi_dataframe(self.export_data, pi)
pi_instituition = pi_dataframe[invoice.INSTITUTION_FIELD].iat[0]

with tempfile.NamedTemporaryFile(mode="w", suffix=".html") as temp_fd:
_create_html_invoice(temp_fd)
_create_pdf_invoice(temp_fd.name)

def export_s3(self, s3_bucket):
def _export_s3_pi_invoice(pi_invoice):
pi_invoice_path = os.path.join(self.name, pi_invoice)
striped_invoice_path = os.path.splitext(pi_invoice_path)[0]
output_s3_path = f"Invoices/{self.invoice_month}/{striped_invoice_path}.csv"
output_s3_archive_path = f"Invoices/{self.invoice_month}/Archive/{striped_invoice_path} {util.get_iso8601_time()}.csv"
output_s3_path = f"Invoices/{self.invoice_month}/{striped_invoice_path}.pdf"
output_s3_archive_path = f"Invoices/{self.invoice_month}/Archive/{striped_invoice_path} {util.get_iso8601_time()}.pdf"
s3_bucket.upload_file(pi_invoice_path, output_s3_path)
s3_bucket.upload_file(pi_invoice_path, output_s3_archive_path)

Expand Down
73 changes: 73 additions & 0 deletions process_report/templates/pi_invoice.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
</head>

<style>
table {
font-family: arial, sans-serif;
border-collapse: collapse;
width: 100%;
}
td, th {
border: 1px solid #8d8d8d;
text-align: left;
padding: 8px;
}
th {
text-align: center;
}
tr {
page-break-inside: avoid;
}
tr:nth-child(even) {
background-color: #dddddd;
}
tr:last-child {
background-color: #dddddd;
font-weight: bold;
}
</style>

<body>
<table>
<tr>
{% for col in data.columns %}
<th>{{col}}</th>
{% endfor %}
</tr>

{% for i, row in data.iterrows() %}
<tr>
{% for field in row %}
{% if i == data.index[-1] %}
{% if field %}
<th>{{field}}</th>
{% else %}
<td style="border-width: 0;"></td>
{% endif %}
{% else %}
<td>{{field}}</td>
{% endif %}
{% endfor %}
</tr>
{% endfor %}
</table>
</body>

<script>
// To ensure the HTML invoice table always fit the page when
// printed to PDF, the width of the page is assigned to be
// the width of the table

var table_width = document.getElementsByTagName('table')[0].clientWidth;
const style = document.createElement('style');
style.innerHTML = `
@page {
size: ${table_width}px 1200px;
}
`;
document.head.appendChild(style);
</script>
</html>
Loading

0 comments on commit 8e9798c

Please sign in to comment.