-
Notifications
You must be signed in to change notification settings - Fork 1.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
4502-implement-pydicomreader #4550
Conversation
Signed-off-by: Yiheng Wang <vennw@nvidia.com>
Hi @wyli @Nic-Ma I just submitted this draft PR, and it has been tested with the data in |
/black |
Signed-off-by: monai-bot <monai.miccai2019@gmail.com>
Signed-off-by: Yiheng Wang <vennw@nvidia.com>
Signed-off-by: Yiheng Wang <vennw@nvidia.com>
Signed-off-by: Yiheng Wang <vennw@nvidia.com>
I tried to use the reader in this PR attached the script based on @AHarouni's notebook: scriptimport glob
import json
import os
import shutil
import itk
import requests
import monai
from monai.apps import download_and_extract
# pickedCollection,pickedModality='Pancreatic-CT-CBCT-SEG','RTSTRUCT' # works correcly
# pickedCollection,pickedModality='Pediatric-CT-SEG','RTSTRUCT'
# pickedCollection,pickedModality='LCTSC','RTSTRUCT' # works correcly
# pickedCollection,pickedModality='4D-Lung','RTSTRUCT' # works correcly
# pickedCollection,pickedModality='Breast-MRI-NACT-Pilot','SEG' # I get access denied ! when downloading the dicom images
# pickedCollection, pickedModality = "C4KC-KiTS", "SEG" # works correcly
# DRO-Toolkit
# pickedCollection,pickedModality='HCC-TACE-Seg','SEG' # Works with workaround , some images are blocked though
# pickedCollection,pickedModality='ISPY1','SEG' # Works with workaround
pickedCollection, pickedModality = "LIDC-IDRI", "SEG" # Works fine
# Lung Phantom skip
# pickedCollection,pickedModality="Lung-Fused-CT-Pathology", 'SEG' #### strange SEG obj each slice is a separate dicom. not handling that case for now
# pickedCollection,pickedModality='NSCLC Radiogenomics','SEG' # Works fine
# pickedCollection,pickedModality='NSCLC-Radiomics', 'SEG' # Works fine
# pickedCollection,pickedModality='NSCLC-Radiomics-Interobserver1', 'SEG' # Works fine
# pickedCollection,pickedModality='PROSTATEx', 'SEG' # works fine, resolution difference will cause AI errors
# pickedCollection,pickedModality="QIBA CT-1C", 'SEG' # Doesn't work No reference information at all not even instance ID
# pickedCollection,pickedModality="QIN LUNG CT", 'SEG' # works fine, SEG is from AI algorithm
# pickedCollection,pickedModality="QIN-PROSTATE-Repeatability", 'SEG'
# pickedCollection,pickedModality="RIDER Lung CT", 'SEG'
# You need to change this base URL if you want to access "restricted" collections that require logging in with an account
# TODO: add optional code to handle authentication using info from https://wiki.cancerimagingarchive.net/x/X4ATBg (base URL, token creation)
# TODO: add code to demonstrate the Data Retriever parameters and credential file from https://wiki.cancerimagingarchive.net/x/2QKPBQ
baseurl = "https://services.cancerimagingarchive.net/nbia-api/services/v1/"
ROOT_FLD_DCM = pickedCollection + "/"
def restCall(url, itemName):
response = requests.get(baseurl + url)
if len(response.text) == 0: # some calls returns empty response
return []
# some calls return empty dict items
# retList = set(item[itemName] for item in response.json())
retList = []
for d in response.json():
if itemName in d:
retList.append(d[itemName])
return retList
def download_series_uid(series_uid):
data_dir = os.path.join(pickedCollection, f"{series_uid}")
url = "https://services.cancerimagingarchive.net/nbia-api/services/v1/getImage?SeriesInstanceUID=" + series_uid
if not os.path.exists(data_dir):
download_and_extract(url=url, filepath=data_dir + ".zip", output_dir=data_dir)
return data_dir + "/"
def openDCM(dcmPath):
tag4Type = (0x0008, 0x0060) # to find type, could also use ,(0x0002, 0x0002),(0x0002, 0x0012)]
tag4SEGreferenceSeries = [(0x0008, 0x1115)] # worked for CT images
tag4SEGreferenceSeries += [(0x0008, 0x1140)] # ReferencedImageSequence needed when ref series is not populated
tag4RTreferenceSeries = [(0x3006, 0x0010)]
tag4StudyID = [(0x0020, 0x000D)] # StudyInstanceUID
patientTags = [(0x0010, 0x0010), (0x0010, 0x0020)]
seriesNumberTags = [(0x0020, 0x0011), (0x0020, 0x0012)]
taglist = [tag4Type] + tag4SEGreferenceSeries + tag4RTreferenceSeries + tag4StudyID + patientTags + seriesNumberTags
ds = monai.data.PydicomReader(stop_before_pixels=True, specific_tags=taglist).read(dcmPath)
return ds, ds[tag4Type].value
def dcmSafeKeyAccess(ds, findfirstRefInst=False):
look4keys = [(0x0020, 0x000E)] # nested ref SeriesInstanceUID # worked for CT images
if findfirstRefInst:
look4keys += [(0x0008, 0x1155)] # Referenced SOP Instance UID
returnStr = ""
for elem in ds:
# print(f"elem.tag is {elem.tag} , elem.VR = {elem.VR}")
if elem.VR == "SQ":
for item in elem:
returnStr = dcmSafeKeyAccess(item, findfirstRefInst)
if elem.tag in look4keys:
# print(f"ref series is {str(elem)}")
return elem.value
return returnStr
def match_refInstance_w_each_series_in_Study(StudyInstanceUID, refInstID):
# doing work around when ref Series UUID is not populated
# get all sereies in the study and compare it with 1 of the ref series ID in the seg/rt struct
# print(f" refInstID {refInstID} ,series lst len ={len(seriesLst)}")
seriesLst = restCall("getSeries?StudyInstanceUID=" + StudyInstanceUID, "SeriesInstanceUID")
for s, seriesID in enumerate(seriesLst):
instLst = restCall("getSOPInstanceUIDs?SeriesInstanceUID=" + seriesID, "SOPInstanceUID")
# print(f" inst {s} len = {len(instLst)}")
for i, instID in enumerate(instLst):
if instID == refInstID:
return seriesID
return "" # failed
def getRefUUIDs(input_dir):
refUUIDList = []
for root, subFolders, files in os.walk(input_dir):
for fileName in files:
file = os.path.join(root, fileName)
parentDirPath = os.path.dirname(file)
parentDirName = os.path.basename(os.path.normpath(parentDirPath))
ext = os.path.splitext(file)[1]
if ext.lower() != ".dcm":
continue
ds, modality = openDCM(dcmPath=file)
if modality in ["SEG", "RTSTRUCT"]:
refUUID = dcmSafeKeyAccess(ds=ds)
if refUUID == "": # DICOM seg is not implemented correctlying standard ref Series tag is empty
print("---DICOM seg is not implemented correctlying standard ref Series tag is empty")
# need to do more work to find referenced series ID
refInstID = dcmSafeKeyAccess(ds=ds, findfirstRefInst=True)
refUUID = match_refInstance_w_each_series_in_Study(ds.StudyInstanceUID, refInstID)
print(f"file name is {fileName} parent is {parentDirName} , modality = {modality} refUUID is {refUUID}")
refUUIDList.append(refUUID)
return refUUIDList
seriesLst = restCall("getSeries?Collection=" + pickedCollection + "&Modality=" + pickedModality, "SeriesInstanceUID")
# print the total number of segs in the manifest
print(len(seriesLst))
## reduce to the first 5 segs for demo purposes
seriesLst2Download = seriesLst[:4]
print(seriesLst2Download)
for s in seriesLst2Download:
dcmPath = download_series_uid(s)
print(f"dcmPath = {dcmPath}")
dcm_files = [f for f in os.listdir(dcmPath) if f.endswith(".dcm")]
ds, modality = openDCM(dcmPath + dcm_files[0])
print(f" modality = {modality} ds id ={ds.PatientID} , ds name= {ds.PatientName}")
patID = ds.PatientID if ds.PatientID else ds.PatientName
if not patID: # still no name skip this patient
print(f" unable to find patient name --> TODO need to find another unique patient name ,skipping for now")
continue
serNum = ds.SeriesNumber if ds.SeriesNumber else ds.AcquisitionNumber
if not serNum:
print(f" unable to find series Number --> TODO need to find another unique series Number ,skipping for now")
continue
serNum = str(serNum)
segDir = ROOT_FLD_DCM + patID + "/" + serNum + "/segRT/"
# download ref uuid
refUUIDList = getRefUUIDs(ROOT_FLD_DCM)
refUUID = refUUIDList[0]
refdcmPath = download_series_uid(refUUID)
print(f"move seg/RT dir {dcmPath} \n to {segDir}")
if not os.path.exists(segDir):
shutil.move(dcmPath, segDir)
imgdcmDir = ROOT_FLD_DCM + patID + "/" + serNum + "/dcm/"
print(f"move dcm image dir {refdcmPath} \n to {imgdcmDir}")
if not os.path.exists(imgdcmDir):
shutil.move(refdcmPath, imgdcmDir)
# # clean up the zip files
# for f in glob.glob(ROOT_FLD_DCM + "*.zip"):
# os.remove(f)
dcm = monai.transforms.LoadImage(reader="PydicomReader")("LIDC-IDRI/LIDC-IDRI-0314/1000/dcm")
monai.visualize.matshow3d(dcm[0], show=True, every_n=20)
|
Signed-off-by: Wenqi Li <wenqil@nvidia.com>
Signed-off-by: Wenqi Li <wenqil@nvidia.com>
/build |
/build |
Signed-off-by: Yiheng Wang vennw@nvidia.com
Fixes #4502 .
Description
This PR implements a Pydicom based reader and can support to read dicom data.
Status
Ready
Types of changes
./runtests.sh -f -u --net --coverage
../runtests.sh --quick --unittests --disttests
.make html
command in thedocs/
folder.