diff --git a/process_report/process_report.py b/process_report/process_report.py index b2d2dd9..27aaf05 100644 --- a/process_report/process_report.py +++ b/process_report/process_report.py @@ -1,9 +1,11 @@ import argparse import os import sys +import datetime import json import pandas +import boto3 ### Invoice field names @@ -35,7 +37,7 @@ def get_institution_from_pi(institute_map, pi_uname): def load_institute_map() -> dict: - with open("institute_map.json", "r") as f: + with open("process_report/institute_map.json", "r") as f: institute_map = json.load(f) return institute_map @@ -62,6 +64,25 @@ def is_old_pi(old_pi_dict, pi, invoice_month): return False +def get_invoice_bucket(): + try: + s3_resource = boto3.resource( + service_name="s3", + endpoint_url=os.environ.get( + "S3_ENDPOINT", "https://s3.us-east-005.backblazeb2.com" + ), + aws_access_key_id=os.environ["S3_KEY_ID"], + aws_secret_access_key=os.environ["S3_APP_KEY"], + ) + except KeyError: + print("Error: Please set the environment variables S3_KEY_ID and S3_APP_KEY") + return s3_resource.Bucket(os.environ.get("S3_BUCKET_NAME", "nerc-invoicing")) + + +def get_iso8601_time(): + return datetime.datetime.now().strftime("%Y%m%dT%H%M%SZ") + + def main(): """Remove non-billable PIs and projects""" @@ -69,9 +90,24 @@ def main(): parser.add_argument( "csv_files", - nargs="+", + nargs="*", help="One or more CSV files that need to be processed", ) + parser.add_argument( + "--fetch-from-s3", + action="store_true", + help="If set, fetches invoices from S3 storage. Requires environment variables for S3 authentication to be set", + ) + parser.add_argument( + "--upload-to-s3", + action="store_true", + help="If set, uploads all processed invoices to S3", + ) + parser.add_argument( + "--invoice-month", + required=True, + help="Invoice month to process", + ) parser.add_argument( "--pi-file", required=True, @@ -87,6 +123,13 @@ def main(): required=True, help="File containing list of projects that are non-billable within a specified duration", ) + + parser.add_argument( + "--nonbillable-file", + required=False, + default="nonbillable.csv", + help="Name of nonbillable file", + ) parser.add_argument( "--output-file", required=False, @@ -103,13 +146,19 @@ def main(): "--HU-invoice-file", required=False, default="HU_only.csv", - help="Name of output csv for HU invoices", + help="Name of output csv for HU invoice", ) parser.add_argument( "--HU-BU-invoice-file", required=False, default="HU_BU.csv", - help="Name of output csv for HU and BU invoices", + help="Name of output csv for HU and BU invoice", + ) + parser.add_argument( + "--Lenovo-file", + required=False, + default="Lenovo.csv", + help="Name of output csv for Lenovo SU Types invoice", ) parser.add_argument( "--old-pi-file", @@ -117,7 +166,15 @@ def main(): help="Name of csv file listing previously billed PIs", ) args = parser.parse_args() - merged_dataframe = merge_csv(args.csv_files) + + invoice_month = args.invoice_month + + if args.fetch_from_s3: + csv_files = fetch_S3_invoices(invoice_month) + else: + csv_files = args.csv_files + + merged_dataframe = merge_csv(csv_files) pi = [] projects = [] @@ -126,26 +183,54 @@ def main(): with open(args.projects_file) as file: projects = [line.rstrip() for line in file] - invoice_date = get_invoice_date(merged_dataframe) - print("Invoice date: " + str(invoice_date)) + print("Invoice date: " + str(invoice_month)) - timed_projects_list = timed_projects(args.timed_projects_file, invoice_date) + timed_projects_list = timed_projects(args.timed_projects_file, invoice_month) print("The following timed-projects will not be billed for this period: ") print(timed_projects_list) projects = list(set(projects + timed_projects_list)) merged_dataframe = add_institution(merged_dataframe) - remove_billables(merged_dataframe, pi, projects, "non_billable.csv") + remove_billables(merged_dataframe, pi, projects, args.nonbillable_file) billable_projects = remove_non_billables(merged_dataframe, pi, projects) billable_projects = validate_pi_names(billable_projects) credited_projects = apply_credits_new_pi(billable_projects, args.old_pi_file) + export_billables(credited_projects, args.output_file) - export_pi_billables(billable_projects, args.output_folder) - export_HU_only(billable_projects, args.HU_invoice_file) - export_HU_BU(billable_projects, args.HU_BU_invoice_file) - export_lenovo(billable_projects) + export_pi_billables(credited_projects, args.output_folder, invoice_month) + export_HU_only(credited_projects, args.HU_invoice_file) + export_HU_BU(credited_projects, args.HU_BU_invoice_file) + export_lenovo(credited_projects, args.Lenovo_file) + + if args.upload_to_s3: + invoice_list = [ + args.nonbillable_file, + args.output_file, + args.HU_invoice_file, + args.HU_BU_invoice_file, + args.Lenovo_file, + ] + + for pi_invoice in os.listdir(args.output_folder): + invoice_list.append(os.path.join(args.output_folder, pi_invoice)) + + upload_to_s3(invoice_list, invoice_month) + + +def fetch_S3_invoices(invoice_month): + """Fetches usage invoices from S3 given invoice month""" + s3_invoice_list = list() + invoice_bucket = get_invoice_bucket() + for obj in invoice_bucket.objects.filter( + Prefix=f"Invoices/{invoice_month}/Service Invoices/" + ): + local_name = obj.key.split("/")[-1] + s3_invoice_list.append(local_name) + invoice_bucket.download_file(obj.key, local_name) + + return s3_invoice_list def merge_csv(files): @@ -215,27 +300,6 @@ def validate_pi_names(dataframe): return dataframe -def export_billables(dataframe, output_file): - dataframe.to_csv(output_file, index=False) - - -def export_pi_billables(dataframe: pandas.DataFrame, output_folder): - if not os.path.exists(output_folder): - os.mkdir(output_folder) - - invoice_month = dataframe[INVOICE_DATE_FIELD].iat[0] - pi_list = dataframe[PI_FIELD].unique() - - for pi in pi_list: - if pandas.isna(pi): - continue - pi_projects = dataframe[dataframe[PI_FIELD] == pi] - pi_instituition = pi_projects[INSTITUTION_FIELD].iat[0] - pi_projects.to_csv( - output_folder + f"/{pi_instituition}_{pi}_{invoice_month}.csv" - ) - - def apply_credits_new_pi(dataframe, old_pi_file): new_pi_credit_code = "0002" new_pi_credit_amount = 1000 @@ -297,6 +361,26 @@ def add_institution(dataframe: pandas.DataFrame): return dataframe +def export_billables(dataframe, output_file): + dataframe.to_csv(output_file, index=False) + + +def export_pi_billables(dataframe: pandas.DataFrame, output_folder, invoice_month): + if not os.path.exists(output_folder): + os.mkdir(output_folder) + + pi_list = dataframe[PI_FIELD].unique() + + for pi in pi_list: + if pandas.isna(pi): + continue + pi_projects = dataframe[dataframe[PI_FIELD] == pi] + pi_instituition = pi_projects[INSTITUTION_FIELD].iat[0] + pi_projects.to_csv( + output_folder + f"/{pi_instituition}_{pi}_{invoice_month}.csv" + ) + + def export_HU_only(dataframe, output_file): HU_projects = dataframe[dataframe[INSTITUTION_FIELD] == "Harvard University"] HU_projects.to_csv(output_file) @@ -310,11 +394,7 @@ def export_HU_BU(dataframe, output_file): HU_BU_projects.to_csv(output_file) -def export_lenovo(dataframe: pandas.DataFrame, output_file=None): - lenovo_file_name = ( - output_file or f"Lenovo_{dataframe[INVOICE_DATE_FIELD].iat[0]}.csv" - ) - +def export_lenovo(dataframe: pandas.DataFrame, output_file): LENOVO_SU_TYPES = ["OpenShift GPUA100SXM4", "OpenStack GPUA100SXM4"] SU_CHARGE_MULTIPLIER = 1 @@ -331,7 +411,19 @@ def export_lenovo(dataframe: pandas.DataFrame, output_file=None): lenovo_df.rename(columns={SU_HOURS_FIELD: "SU Hours"}, inplace=True) lenovo_df.insert(len(lenovo_df.columns), "SU Charge", SU_CHARGE_MULTIPLIER) lenovo_df["Charge"] = lenovo_df["SU Hours"] * lenovo_df["SU Charge"] - lenovo_df.to_csv(lenovo_file_name) + lenovo_df.to_csv(output_file) + + +def upload_to_s3(invoice_list: list, invoice_month): + invoice_bucket = get_invoice_bucket() + for invoice_filename in invoice_list: + striped_filename = os.path.splitext(invoice_filename)[0] + invoice_s3_path = ( + f"Invoices/{invoice_month}/{striped_filename} {invoice_month}.csv" + ) + invoice_s3_path_archive = f"Invoices/{invoice_month}/Archive/{striped_filename} {invoice_month} {get_iso8601_time()}.csv" + invoice_bucket.upload_file(invoice_filename, invoice_s3_path) + invoice_bucket.upload_file(invoice_filename, invoice_s3_path_archive) if __name__ == "__main__": diff --git a/process_report/tests/unit_tests.py b/process_report/tests/unit_tests.py index 8f0c381..1aa1428 100644 --- a/process_report/tests/unit_tests.py +++ b/process_report/tests/unit_tests.py @@ -1,4 +1,4 @@ -from unittest import TestCase +from unittest import TestCase, mock import tempfile import pandas import os @@ -181,10 +181,13 @@ def setUp(self): "Untouch Data Column": ["DataA", "DataB", "DataC", "DataD", "DataE"], } self.dataframe = pandas.DataFrame(data) + self.invoice_month = data["Invoice Month"][0] def test_export_pi(self): output_dir = tempfile.TemporaryDirectory() - process_report.export_pi_billables(self.dataframe, output_dir.name) + process_report.export_pi_billables( + self.dataframe, output_dir.name, self.invoice_month + ) pi_csv_1 = f'{self.dataframe["Institution"][0]}_{self.dataframe["Manager (PI)"][0]}_{self.dataframe["Invoice Month"][0]}.csv' pi_csv_2 = f'{self.dataframe["Institution"][3]}_{self.dataframe["Manager (PI)"][3]}_{self.dataframe["Invoice Month"][3]}.csv' @@ -412,3 +415,36 @@ def test_apply_credit_0002(self): ["OpenShift GPUA100SXM4", "OpenStack GPUA100SXM4"], ) self.assertEqual(row["Charge"], row["SU Charge"] * row["SU Hours"]) + + +class TestUploadToS3(TestCase): + @mock.patch("process_report.process_report.get_invoice_bucket") + @mock.patch("process_report.process_report.get_iso8601_time") + def test_remove_prefix(self, mock_get_time, mock_get_bucket): + mock_bucket = mock.MagicMock() + mock_get_bucket.return_value = mock_bucket + mock_get_time.return_value = "0" + + invoice_month = "2024-03" + filenames = ["test.csv", "test2.test.csv", "test3"] + answers = [ + ("test.csv", f"Invoices/{invoice_month}/test {invoice_month}.csv"), + ( + "test.csv", + f"Invoices/{invoice_month}/Archive/test {invoice_month} 0.csv", + ), + ( + "test2.test.csv", + f"Invoices/{invoice_month}/test2.test {invoice_month}.csv", + ), + ( + "test2.test.csv", + f"Invoices/{invoice_month}/Archive/test2.test {invoice_month} 0.csv", + ), + ("test3", f"Invoices/{invoice_month}/test3 {invoice_month}.csv"), + ("test3", f"Invoices/{invoice_month}/Archive/test3 {invoice_month} 0.csv"), + ] + + process_report.upload_to_s3(filenames, invoice_month) + for i, call_args in enumerate(mock_bucket.upload_file.call_args_list): + self.assertTrue(answers[i] in call_args) diff --git a/requirements.txt b/requirements.txt index fb6c7ed..b650973 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ pandas +boto3 diff --git a/tools/clone_nonbillables_and_process.sh b/tools/clone_nonbillables_and_process.sh index bcc77e2..15198e5 100644 --- a/tools/clone_nonbillables_and_process.sh +++ b/tools/clone_nonbillables_and_process.sh @@ -15,6 +15,22 @@ if [ ! -e ~/.ssh/config ]; then chmod 600 ~/.ssh/id_nonbillable fi +if [ ! -d ~/.ssh/known_hosts ]; then + touch ~/.ssh/known_hosts + echo "github.com ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOMqqnkVzrm0SdG6UOoqKLsabgH5C9okWi0dh2l9GKJl + github.com ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBEmKSENjQEezOmxkZMy7opKgwFB9nkt5YRrYMjNuG5N87uRgg6CLrbo5wAdT/y6v0mKV0U2w0WZ2YB/++Tpockg= + github.com ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQCj7ndNxQowgcQnjshcLrqPEiiphnt+VTTvDP6mHBL9j1aNUkY4Ue1gvwnGLVlOhGeYrnZaMgRK6+PKCUXaDbC7qtbW8gIkhL7aGCsOr/C56SJMy/BCZfxd1nWzAOxSDPgVsmerOBYfNqltV9/hWCqBywINIR+5dIg6JTJ72pcEpEjcYgXkE2YEFXV1JHnsKgbLWNlhScqb2UmyRkQyytRLtL+38TGxkxCflmO+5Z8CSSNY7GidjMIZ7Q4zMjA2n1nGrlTDkzwDCsw+wqFPGQA179cnfGWOWRVruj16z6XyvxvjJwbz0wQZ75XK5tKSb7FNyeIEs4TT4jk+S4dhPeAUC5y+bDYirYgM4GC7uEnztnZyaVWQ7B381AK4Qdrwt51ZqExKbQpTUNn+EjqoTwvqNj4kqx5QUCI0ThS/YkOxJCXmPUWZbhjpCg56i+2aB6CmK2JGhn57K5mj0MNdBXA4/WnwH6XoPWJzK5Nyu2zB3nAZp+S5hpQs+p1vN1/wsjk= + " >> ~/.ssh/known_hosts +fi + if [ ! -d ./non-billable-projects ]; then git clone git@github-nonbillable:CCI-MOC/non-billable-projects.git ./non-billable-projects fi + +INVOICE_MONTH=$(date --date="$(date +%Y-%m-01) -1 month" +%Y-%m) +python process_report/process_report.py \ + --invoice-month $INVOICE_MONTH \ + --pi-file ./non-billable-projects/pi.txt \ + --projects-file ./non-billable-projects/projects.txt \ + --timed-projects-file ./non-billable-projects/timed_projects.txt \ + --old-pi-file old_pi.csv