Skip to content

Commit

Permalink
Add command-line interface for PDF upload script
Browse files Browse the repository at this point in the history
  • Loading branch information
quang-ng committed Jan 22, 2025
1 parent 529d665 commit 5e185a6
Showing 1 changed file with 37 additions and 0 deletions.
37 changes: 37 additions & 0 deletions dsst_etl/upload_pdfs_title_is_pmid.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import argparse
import hashlib

import boto3
Expand Down Expand Up @@ -111,3 +112,39 @@ def _create_document_entries(self, key, file_content, file_hash, provenance):
self.db_session.flush()
except Exception as e:
logger.error(f"Error running Oddpub analysis: {str(e)}")


def main():
parser = argparse.ArgumentParser(
description="Upload PDFs to S3 where the title is the PMID."
)

parser.add_argument(
"--db-url",
required=True,
help="The database connection URL. This should be a valid SQLAlchemy database URL.",
)
parser.add_argument(
"--pdf-path",
required=True,
help="The path to the PDF file to upload. This should be a valid file path on your system.",
)

args = parser.parse_args()

# Set up the database session
engine = sqlalchemy.create_engine(args.db_url)
Session = sqlalchemy.orm.sessionmaker(bind=engine)
db_session = Session()

try:
uploader = UploadPDFsTitleIsPMID(db_session)
uploader.process_s3_inventory(args.pdf_path)
except Exception as e:
logger.error(f"An error occurred: {str(e)}")
finally:
db_session.close()


if __name__ == "__main__":
main()

0 comments on commit 5e185a6

Please sign in to comment.