From b676542c3e291c0a4cf172ea0aab7cdbd25ba792 Mon Sep 17 00:00:00 2001 From: Thanh Nguyen Date: Wed, 28 Aug 2024 23:09:29 -0500 Subject: [PATCH] add original data and explanation --- .../original_test_data/DataImportOrder.txt | 1 + .../original_test_data/annotation_file.json | 174 ++++++++++++++++++ .../test_dataframe_injection.py | 39 ++++ 3 files changed, 214 insertions(+) create mode 100644 tests/dataframe_tests/test_data/midrc/original_test_data/annotation_file.json diff --git a/tests/dataframe_tests/test_data/midrc/original_test_data/DataImportOrder.txt b/tests/dataframe_tests/test_data/midrc/original_test_data/DataImportOrder.txt index c8797b69..cde4f93a 100644 --- a/tests/dataframe_tests/test_data/midrc/original_test_data/DataImportOrder.txt +++ b/tests/dataframe_tests/test_data/midrc/original_test_data/DataImportOrder.txt @@ -8,3 +8,4 @@ ct_series_file dx_series_file mr_series_file rf_series_file +annotation_file diff --git a/tests/dataframe_tests/test_data/midrc/original_test_data/annotation_file.json b/tests/dataframe_tests/test_data/midrc/original_test_data/annotation_file.json new file mode 100644 index 00000000..e59fd1f2 --- /dev/null +++ b/tests/dataframe_tests/test_data/midrc/original_test_data/annotation_file.json @@ -0,0 +1,174 @@ +[ + { + "cases": [ + { + "submitter_id": "case_protesting_solely" + } + ], + "core_metadata_collections": [], + "cr_series_files": [], + "ct_series_files": [ + { + "submitter_id": "ct_series_file_exiling_fineness's" + } + ], + "datasets": [], + "dx_series_files": [], + "imaging_studies": [ + { + "submitter_id": "imaging_study_traffic's_Kaohsiung" + } + ], + "mg_series_files": [], + "mr_series_files": [], + "nm_series_files": [], + "pt_series_files": [], + "rf_series_files": [], + "us_series_files": [], + "xa_series_files": [], + "type": "annotation_file", + "submitter_id": "A_Kp7X9r_1.2.827.1.3", + "annotation_name": "midrc_mdai_chestxr", + "data_category": "annotation_file", + "data_format": "JSON", + "data_type": "MIDRC Annotation", + "file_name": "annotation_file_773863174424.json", + "file_size": 702, + "md5sum": "5d33e550c40249120481045352342345", + "annotation_method": "Clinically_derived", + "case_ids": [ + "Rimbaud's_sweltering", + "boobs_Yaroslavl" + ], + "object_id": "dg.MD1R/0fc28986-0bb2-b12c-ad4c-a32a5d0201f9" + }, + { + "cases": [ + { + "submitter_id": "case_protesting_solely" + } + ], + "core_metadata_collections": [], + "cr_series_files": [], + "ct_series_files": [ + { + "submitter_id": "ct_series_file_exiling_fineness's" + } + ], + "datasets": [], + "dx_series_files": [], + "imaging_studies": [ + { + "submitter_id": "imaging_study_traffic's_Kaohsiung" + } + ], + "mg_series_files": [], + "mr_series_files": [], + "nm_series_files": [], + "pt_series_files": [], + "rf_series_files": [], + "us_series_files": [], + "xa_series_files": [], + "type": "annotation_file", + "submitter_id": "A_Kp7X9r_1.2.827.1.2", + "annotation_name": "midrc_bpr_regions", + "data_category": "annotation_file", + "data_format": "JSON", + "data_type": "Other Annotation", + "file_name": "annotation_file_773863174425.json", + "file_size": 702, + "md5sum": "5d33e550c40249120481045352342543", + "annotation_method": "Retrospective_auto", + "case_ids": [ + "Rimbaud's_sweltering", + "boobs_Yaroslavl" + ], + "object_id": "dg.MD1R/0fc28986-0bb2-c12e-ad4c-a32a5d0201f9" + }, + { + "cases": [ + { + "submitter_id": "case_blessedly_heuristics" + } + ], + "core_metadata_collections": [], + "cr_series_files": [], + "ct_series_files": [ + { + "submitter_id": "ct_series_file_oiliest_distributions" + } + ], + "datasets": [], + "dx_series_files": [], + "imaging_studies": [ + { + "submitter_id": "imaging_study_sriracha_unsolicited" + } + ], + "mg_series_files": [], + "mr_series_files": [], + "nm_series_files": [], + "pt_series_files": [], + "rf_series_files": [], + "us_series_files": [], + "xa_series_files": [], + "type": "annotation_file", + "submitter_id": "A_Kp7X9r_1.2.827.4.3", + "annotation_name": "midrc_lung_segs", + "data_category": "annotation_file", + "data_format": "JSON", + "data_type": "MIDRC Annotation", + "file_name": "annotation_file_773863174424.json", + "file_size": 702, + "md5sum": "5d33e550c40249120481045352342456", + "annotation_method": "Retrospective_expert", + "case_ids": [ + "accusatives_Deon's", + "Purim's_Gary" + ], + "object_id": "dg.MD1R/0fc28986-0bb2-d12f-ad4c-a32a5d0201f9" + }, + { + "cases": [ + { + "submitter_id": "case_blessedly_heuristics" + } + ], + "core_metadata_collections": [], + "cr_series_files": [], + "ct_series_files": [ + { + "submitter_id": "ct_series_file_oiliest_distributions" + } + ], + "datasets": [], + "dx_series_files": [], + "imaging_studies": [ + { + "submitter_id": "imaging_study_sriracha_unsolicited" + } + ], + "mg_series_files": [], + "mr_series_files": [], + "nm_series_files": [], + "pt_series_files": [], + "rf_series_files": [], + "us_series_files": [], + "xa_series_files": [], + "type": "annotation_file", + "submitter_id": "A_Kp7X9r_1.2.827.6.7", + "annotation_name": "mRALE_Mastermind_Challenge", + "data_category": "annotation_file", + "data_format": "JSON", + "data_type": "Other Annotation", + "file_name": "annotation_file_773863174425.json", + "file_size": 702, + "md5sum": "5d33e550c40249120481045352342765", + "annotation_method": "Retrospective_general", + "case_ids": [ + "accusatives_Deon's", + "Purim's_Gary" + ], + "object_id": "dg.MD1R/0fc28986-0bb2-a12b-ad4c-a32a5d0201f9" + } +] \ No newline at end of file diff --git a/tests/dataframe_tests/test_dataframe_injection.py b/tests/dataframe_tests/test_dataframe_injection.py index 9abf855e..5086b3fd 100644 --- a/tests/dataframe_tests/test_dataframe_injection.py +++ b/tests/dataframe_tests/test_dataframe_injection.py @@ -175,6 +175,45 @@ def test_flatten_nested_list(translator): "edge_imagingstudyrelatedtocase", "edge_projectmemberofprogram", ])], indirect=True) def test_nested_props_injection(translator): + """ + Tset to ensure that the nested dataframe has structure like below + data = [ + { + "_data_file_id":"cfd912e8-f9e1-40cc-abd3-6dcb3a16e0b6", + "file_annotations":[ + { + "annotation_method":"Retrospective_auto", + "annotation_name":"midrc_bpr_regions", + "_annotation_file_id":"0c7e701e-cb27-4cbf-a996-eaa9142b633f" + }, + { + "annotation_method":"Clinically_derived", + "annotation_name":"midrc_mdai_chestxr", + "_annotation_file_id": + "f7648cd4-19cc-4e49-8100-f99b15e28e56" + } + ] + }, + { + "_data_file_id":"effe5932-2c26-4919-8544-0d4cb62d20f9", + "file_annotations":[ + { + "annotation_method":"Retrospective_expert", + "annotation_name":"midrc_lung_segs", + "_annotation_file_id":"82bcca4b-cebc-4062-a614-7ef0072d152f" + }, + { + "annotation_method":"Retrospective_general", + "annotation_name":"mRALE_Mastermind_Challenge", + "_annotation_file_id":"8c8558f4-aa4f-487f-b00a-ee4f14b871f6" + } + ] + } + ] + + :param translator: + :return: + """ [collected_leaf_df, final_df] = get_dataframes_from_names( get_spark_session(translator.sc), "midrc",