I followed most of the suggestions above (useSeriesDetails
, dcm2niix
, Steve Pieper’s code etc.), but none of them worked for me. The issue is that if there are systemic problems with the way the DICOMs were acquired/saved, then it is very challenging to obtain the sub-components of each series. This has to do with legacy reasons (older scanners, different FOV saved as a “new” acquisition etc.).
Anyway, I wrote my own little script to read all the files for a given study, separate them into individual series and sub-series (based on acquisition number), and then dump them all to disk. It uses SimpleITK
and pydicom
, so adapt for your own needs. Trust but verify yourself.
import SimpleITK as sitk
import pydicom
def extract_series_and_dump(
dir_study_DICOM,
dir_save_NII
):
print('--' * 25)
print('idx:', row_idx)
print('dir_study_DICOM')
print(dir_study_DICOM)
print('dir_save_NII')
print(dir_save_NII)
## check if folder exists
if os.path.isdir(dir_study_DICOM) == True:
print('Processing study:')
print(dir_study_DICOM)
"""
First use GDCM to get a rough division of filenames for each series UID
"""
## get all series IDs in this study
l_series_IDs = sitk.ImageSeriesReader.GetGDCMSeriesIDs(dir_study_DICOM, useSeriesDetails=True)
if not l_series_IDs:
return False
num_series_found = len(l_series_IDs)
if num_series_found > 1:
print('num_series_found:', num_series_found)
"""
iterate + identify any sub-acquisitions in each series
"""
for idx in range(0, num_series_found):
series_UID = l_series_IDs[idx]
print('--' * 25)
print('idx:', idx)
print('seriesID:', series_UID)
## get all filenames for this series
series_file_names = sitk.ImageSeriesReader.GetGDCMSeriesFileNames(dir_study_DICOM, series_UID, useSeriesDetails=True)
"""
identify any sub-acquisitions in this series
----- use ACQUISITION NUMBER DICOM TAG -----
"""
d_series_by_acquisition_number = {}
for i, file_name in enumerate(series_file_names):
file_reader = sitk.ImageFileReader()
## set filename
file_reader.SetFileName(file_name)
## set flags
file_reader.LoadPrivateTagsOn()
## read file info
file_reader.ReadImageInformation()
## Get the list of metadata keys (DICOM tags)
meta_data_keys = file_reader.GetMetaDataKeys()
## get acquisition number
if "0020|0012" in meta_data_keys:
acquisition_number = file_reader.GetMetaData('0020|0012')
else:
continue
## get acquisition time
if "0008|0032" in meta_data_keys:
acquisition_time = file_reader.GetMetaData('0008|0032')
else:
acquisition_time = ''
## get series time
if "0008|0031" in meta_data_keys:
series_time = file_reader.GetMetaData('0008|0031')
else:
series_time = ''
## get location of slice
# instance_number = file_reader.GetMetaData("0020|0032").split("\\")[-1]
pydf = pydicom.dcmread(file_name)
## get pydicom slice location
if hasattr(pydf, 'SliceLocation'):
# print('SliceLocation:', pydf.SliceLocation)
instance_number = pydf.SliceLocation
else:
instance_number = -1
## protocol name
if "0018|103E" in meta_data_keys:
protocol_name = file_reader.GetMetaData("0018|103E")
else:
protocol_name = ''
## series description
if "0008|103E" in meta_data_keys:
series_description = file_reader.GetMetaData("0008|103E")
else:
series_description = ''
# print('acquisition_number:', acquisition_number, 'acquisition_time:', acquisition_time)
# print('series_time:', series_time)
# print('instance_number:', instance_number)
# print('protocol_name:', protocol_name)
# print('series_description:', series_description)
key_ = acquisition_number
if key_ not in d_series_by_acquisition_number:
d_series_by_acquisition_number[key_] = {}
## create
d_series_by_acquisition_number[key_]['files'] = []
d_series_by_acquisition_number[key_]['instances'] = []
## store
d_series_by_acquisition_number[key_]['files'].append(file_name)
d_series_by_acquisition_number[key_]['instances'].append(instance_number)
else:
d_series_by_acquisition_number[key_]['files'].append(file_name)
d_series_by_acquisition_number[key_]['instances'].append(instance_number)
## how many?
print('num_sub_series:', len(d_series_by_acquisition_number))
"""
Sort files in each sub-component by the position in volume
"""
d_sorted = {}
for key_ in d_series_by_acquisition_number:
## get
tfiles_ = d_series_by_acquisition_number[key_]['files']
tinstances_ = d_series_by_acquisition_number[key_]['instances']
## sort by position
sorted_files, sorted_instances = zip(*sorted(zip(tfiles_, tinstances_)))
## flip direction of files
sorted_files = sorted_files[::-1]
d_sorted[key_] = {}
## store
d_sorted[key_]['files'] = sorted_files
d_sorted[key_]['instances'] = sorted_instances
## how many?
print('num_sub_series:', len(d_sorted))
num_sub_series = len(d_sorted)
"""
Dump each sub-component + JSON to disk
"""
for tidx, key_ in enumerate(d_sorted):
l_sorted_files = d_sorted[key_]['files']
## initialize
series_reader = sitk.ImageSeriesReader()
## set
series_reader.SetFileNames(l_sorted_files)
## set flags
series_reader.MetaDataDictionaryArrayUpdateOn()
series_reader.LoadPrivateTagsOn()
## read
image_dicom = series_reader.Execute()
## Create the file reader and get the series_ID
file_reader = sitk.ImageFileReader()
## read file
temp_file_2_read = l_sorted_files[0]
# print(l_file_names[0])
## set filename
file_reader.SetFileName(temp_file_2_read)
## set flags
file_reader.LoadPrivateTagsOn()
## read file info
file_reader.ReadImageInformation()
## get metadata
metadata = {}
for key in file_reader.GetMetaDataKeys():
## get group + element
str_tag_group, str_tag_element = key.split('|')
## convert to HEX
hex_tag_group = hex(int(str_tag_group, 16))
hex_tag_element = hex(int(str_tag_element, 16))
## get keyword from tag using pydicom
tag = pydicom.tag.Tag(hex_tag_group, hex_tag_element)
keyword = pydicom.datadict.keyword_for_tag(tag)
## store
metadata[keyword] = file_reader.GetMetaData(key)
## get StudyID
temp_study_ID = file_reader.GetMetaData('0020|0010')
## get SeriesID
temp_series_ID = file_reader.GetMetaData('0020|0011')
## save image
if num_sub_series == 1:
## only one acquisition, save as is
fn_save = str(temp_study_ID).replace(" ", "") + '_' + str(temp_series_ID).replace(" ", "")
else:
## more than one acquisition, ID of this one
fn_save = str(temp_study_ID).replace(" ", "") + '_' + str(temp_series_ID).replace(" ", "") + '_acq' + str(tidx + 1)
## save
ffpn_save_image = os.path.join(dir_save_NII, fn_save + ".nii.gz")
print('study_series:')
print(fn_save)
## write
sitk.WriteImage(image_dicom, ffpn_save_image)
## save metadata
ffpn_save_metadata_json = os.path.join(dir_save_NII, fn_save + ".json")
## write metadata to disk
with open(ffpn_save_metadata_json, 'w') as f:
json.dump(metadata, f, indent=4) # indent for pretty printing
return True