Skip to content

Commit

Permalink
Add logs and remove print
Browse files Browse the repository at this point in the history
  • Loading branch information
quang-ng committed Nov 24, 2024
1 parent a084c57 commit c7525b7
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 36 deletions.
16 changes: 5 additions & 11 deletions dsst_etl/db.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
from sqlalchemy.engine.url import make_url
import logging

from sqlalchemy.orm import sessionmaker
from sqlalchemy_utils import create_database, database_exists

from dsst_etl import get_db_engine

from .models import Base

logger = logging.getLogger(__name__)


def get_db_session(is_test=False):
engine = get_db_engine(is_test)
Expand All @@ -15,17 +18,8 @@ def get_db_session(is_test=False):

def init_db(is_test=False):
engine = get_db_engine(is_test)
url = make_url(engine.url)
database = url.database
dialect_name = url.get_dialect().name

print("url: ", url)
print("database: ", database)
print("dialect_name: ", dialect_name)

print("engine.urlengine.urlengine.url: ", engine.url)
print("database_exists(engine.url): ", database_exists(engine.url))
if not database_exists(engine.url):
print("Creating database")
logger.info("Creating database.....")
create_database(engine.url)
Base.metadata.create_all(engine)
50 changes: 25 additions & 25 deletions tests/test_pdf_uploader.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,54 +11,43 @@

class TestPDFUploader(unittest.TestCase):

@patch('dsst_etl.upload_pdfs.boto3.client')
@patch("dsst_etl.upload_pdfs.boto3.client")
def setUp(self, mock_boto_client):
# Mock S3 client
print("Setting up test")
self.mock_s3_client = MagicMock()
mock_boto_client.return_value = self.mock_s3_client


init_db(is_test=True)

# Create a new session for each test
self.session = get_db_session()
# Start a transaction that we can roll back after each test
self.transaction = self.session.begin()

inspector = inspect(self.session.bind)
print("Tbl: ", inspector.get_table_names())


# Initialize PDFUploader with the session
self.uploader = PDFUploader(self.session)



def tearDown(self):
print("Tearing down test")
# Rollback the transaction
self.session.rollback()

# Check if the Works table exists before attempting to update or delete
inspector = inspect(self.session.bind)
if 'works' in inspector.get_table_names():
if "works" in inspector.get_table_names():
# Ensure all data is removed
self.session.execute(update(Works).values(provenance_id=None))
self.session.execute(update(Works).values(initial_document_id=None))
self.session.execute(update(Works).values(primary_document_id=None))
self.session.commit()


# Check if the Documents table exists before attempting to update or delete
if 'documents' in inspector.get_table_names():
if "documents" in inspector.get_table_names():
self.session.execute(update(Documents).values(provenance_id=None))
self.session.execute(update(Documents).values(work_id=None))
self.session.commit()


# Check if the Provenance table exists before attempting to delete
if 'provenance' in inspector.get_table_names():
if "provenance" in inspector.get_table_names():
self.session.query(Provenance).delete()

if "documents" in inspector.get_table_names():
Expand All @@ -70,14 +59,15 @@ def tearDown(self):
self.session.commit()
self.session.close()



def test_upload_pdfs_success(self):
# Mock successful upload
self.mock_s3_client.upload_file.return_value = None

base_dir = Path(__file__).resolve().parent
pdf_paths = [base_dir / 'pdf-test' / 'test1.pdf', base_dir / 'pdf-test' / 'test2.pdf']
pdf_paths = [
base_dir / "pdf-test" / "test1.pdf",
base_dir / "pdf-test" / "test2.pdf",
]
successful_uploads, failed_uploads = self.uploader.upload_pdfs(pdf_paths)

self.assertEqual(successful_uploads, pdf_paths)
Expand All @@ -89,15 +79,18 @@ def test_upload_pdfs_failure(self):
self.mock_s3_client.upload_file.side_effect = Exception("Upload failed")

base_dir = Path(__file__).resolve().parent
pdf_paths = [base_dir / 'pdf-test' / 'test1.pdf', base_dir / 'pdf-test' / 'test2.pdf']
pdf_paths = [
base_dir / "pdf-test" / "test1.pdf",
base_dir / "pdf-test" / "test2.pdf",
]
successful_uploads, failed_uploads = self.uploader.upload_pdfs(pdf_paths)

self.assertEqual(successful_uploads, [])
self.assertEqual(failed_uploads, pdf_paths)

def test_create_document_records(self):
base_dir = Path(__file__).resolve().parent
successful_uploads = [base_dir / 'pdf-test' / 'test1.pdf']
successful_uploads = [base_dir / "pdf-test" / "test1.pdf"]
documents = self.uploader.create_document_records(successful_uploads)

self.assertEqual(len(documents), 1)
Expand All @@ -107,7 +100,7 @@ def test_create_provenance_record(self):
documents = [self.session.query(Documents).first()]
if documents[0] is None:
base_dir = Path(__file__).resolve().parent
successful_uploads = [base_dir / 'pdf-test' / 'test1.pdf']
successful_uploads = [base_dir / "pdf-test" / "test1.pdf"]
documents = self.uploader.create_document_records(successful_uploads)

self.session.add_all(documents)
Expand All @@ -122,11 +115,17 @@ def test_initial_work_for_document(self):
document = self.session.query(Documents).first()
if document is None:
base_dir = Path(__file__).resolve().parent
successful_uploads = [base_dir / 'pdf-test' / 'test1.pdf']
successful_uploads = [base_dir / "pdf-test" / "test1.pdf"]
documents = self.uploader.create_document_records(successful_uploads)
document = documents[0]

provenance = Provenance(pipeline_name="test", version="0.0.1", compute="test", personnel="test", comment="test")
provenance = Provenance(
pipeline_name="test",
version="0.0.1",
compute="test",
personnel="test",
comment="test",
)
self.session.add_all([document, provenance])
self.session.commit()

Expand All @@ -153,5 +152,6 @@ def test_link_documents_to_work(self):
for doc in documents:
self.assertEqual(doc.work_id, work_id)

if __name__ == '__main__':
unittest.main()

if __name__ == "__main__":
unittest.main()

0 comments on commit c7525b7

Please sign in to comment.