diff --git a/scripts/print_umls_stats.py b/scripts/print_umls_stats.py index f4de20e..f92abb5 100644 --- a/scripts/print_umls_stats.py +++ b/scripts/print_umls_stats.py @@ -5,6 +5,7 @@ console = Console() + async def get_database_statistics(): # Connect to MongoDB client = AsyncIOMotorClient("mongodb://root:password@localhost:27017") @@ -15,9 +16,13 @@ async def get_database_statistics(): total_docs = await collection.count_documents({}) # Count documents with missing fields - missing_definitions = await collection.count_documents({"definitions": {"$size": 0}}) + missing_definitions = await collection.count_documents( + {"definitions": {"$size": 0}} + ) missing_preferred_term = await collection.count_documents({"preferred_term": ""}) - missing_semantic_types = await collection.count_documents({"semantic_types": {"$size": 0}}) + missing_semantic_types = await collection.count_documents( + {"semantic_types": {"$size": 0}} + ) missing_synonyms = await collection.count_documents({"synonyms": {"$size": 0}}) # Calculate percentages @@ -31,15 +36,29 @@ def percentage(count): table.add_column("Percentage", style="green") table.add_row("Total Documents", str(total_docs), "100%") - table.add_row("Missing Definitions", str(missing_definitions), percentage(missing_definitions)) - table.add_row("Missing Preferred Term", str(missing_preferred_term), percentage(missing_preferred_term)) - table.add_row("Missing Semantic Types", str(missing_semantic_types), percentage(missing_semantic_types)) - table.add_row("Missing Synonyms", str(missing_synonyms), percentage(missing_synonyms)) + table.add_row( + "Missing Definitions", str(missing_definitions), percentage(missing_definitions) + ) + table.add_row( + "Missing Preferred Term", + str(missing_preferred_term), + percentage(missing_preferred_term), + ) + table.add_row( + "Missing Semantic Types", + str(missing_semantic_types), + percentage(missing_semantic_types), + ) + table.add_row( + "Missing Synonyms", str(missing_synonyms), percentage(missing_synonyms) + ) console.print(table) + async def main(): await get_database_statistics() + if __name__ == "__main__": - asyncio.run(main()) \ No newline at end of file + asyncio.run(main())