I followed most of the suggestions above (useSeriesDetails , dcm2niix, Steve Pieper’s code etc.), but none of them worked for me. The issue is that if there are systemic problems with the way the DICOMs were acquired/saved, then it is very challenging to obtain the sub-components of each series. This has to do with legacy reasons (older scanners, different FOV saved as a “new” acquisition etc.).
Anyway, I wrote my own little script to read all the files for a given study, separate them into individual series and sub-series (based on acquisition number), and then dump them all to disk. It uses SimpleITK and pydicom, so adapt for your own needs. Trust but verify yourself.
import SimpleITK as sitk
import pydicom
def extract_series_and_dump(
dir_study_DICOM,
dir_save_NII
):
print('--' * 25)
print('idx:', row_idx)
print('dir_study_DICOM')
print(dir_study_DICOM)
print('dir_save_NII')
print(dir_save_NII)
## check if folder exists
if os.path.isdir(dir_study_DICOM) == True:
print('Processing study:')
print(dir_study_DICOM)
"""
First use GDCM to get a rough division of filenames for each series UID
"""
## get all series IDs in this study
l_series_IDs = sitk.ImageSeriesReader.GetGDCMSeriesIDs(dir_study_DICOM, useSeriesDetails=True)
if not l_series_IDs:
return False
num_series_found = len(l_series_IDs)
if num_series_found > 1:
print('num_series_found:', num_series_found)
"""
iterate + identify any sub-acquisitions in each series
"""
for idx in range(0, num_series_found):
series_UID = l_series_IDs[idx]
print('--' * 25)
print('idx:', idx)
print('seriesID:', series_UID)
## get all filenames for this series
series_file_names = sitk.ImageSeriesReader.GetGDCMSeriesFileNames(dir_study_DICOM, series_UID, useSeriesDetails=True)
"""
identify any sub-acquisitions in this series
----- use ACQUISITION NUMBER DICOM TAG -----
"""
d_series_by_acquisition_number = {}
for i, file_name in enumerate(series_file_names):
file_reader = sitk.ImageFileReader()
## set filename
file_reader.SetFileName(file_name)
## set flags
file_reader.LoadPrivateTagsOn()
## read file info
file_reader.ReadImageInformation()
## Get the list of metadata keys (DICOM tags)
meta_data_keys = file_reader.GetMetaDataKeys()
## get acquisition number
if "0020|0012" in meta_data_keys:
acquisition_number = file_reader.GetMetaData('0020|0012')
else:
continue
## get acquisition time
if "0008|0032" in meta_data_keys:
acquisition_time = file_reader.GetMetaData('0008|0032')
else:
acquisition_time = ''
## get series time
if "0008|0031" in meta_data_keys:
series_time = file_reader.GetMetaData('0008|0031')
else:
series_time = ''
## get location of slice
# instance_number = file_reader.GetMetaData("0020|0032").split("\\")[-1]
pydf = pydicom.dcmread(file_name)
## get pydicom slice location
if hasattr(pydf, 'SliceLocation'):
# print('SliceLocation:', pydf.SliceLocation)
instance_number = pydf.SliceLocation
else:
instance_number = -1
## protocol name
if "0018|103E" in meta_data_keys:
protocol_name = file_reader.GetMetaData("0018|103E")
else:
protocol_name = ''
## series description
if "0008|103E" in meta_data_keys:
series_description = file_reader.GetMetaData("0008|103E")
else:
series_description = ''
# print('acquisition_number:', acquisition_number, 'acquisition_time:', acquisition_time)
# print('series_time:', series_time)
# print('instance_number:', instance_number)
# print('protocol_name:', protocol_name)
# print('series_description:', series_description)
key_ = acquisition_number
if key_ not in d_series_by_acquisition_number:
d_series_by_acquisition_number[key_] = {}
## create
d_series_by_acquisition_number[key_]['files'] = []
d_series_by_acquisition_number[key_]['instances'] = []
## store
d_series_by_acquisition_number[key_]['files'].append(file_name)
d_series_by_acquisition_number[key_]['instances'].append(instance_number)
else:
d_series_by_acquisition_number[key_]['files'].append(file_name)
d_series_by_acquisition_number[key_]['instances'].append(instance_number)
## how many?
print('num_sub_series:', len(d_series_by_acquisition_number))
"""
Sort files in each sub-component by the position in volume
"""
d_sorted = {}
for key_ in d_series_by_acquisition_number:
## get
tfiles_ = d_series_by_acquisition_number[key_]['files']
tinstances_ = d_series_by_acquisition_number[key_]['instances']
## sort by position
sorted_files, sorted_instances = zip(*sorted(zip(tfiles_, tinstances_)))
## flip direction of files
sorted_files = sorted_files[::-1]
d_sorted[key_] = {}
## store
d_sorted[key_]['files'] = sorted_files
d_sorted[key_]['instances'] = sorted_instances
## how many?
print('num_sub_series:', len(d_sorted))
num_sub_series = len(d_sorted)
"""
Dump each sub-component + JSON to disk
"""
for tidx, key_ in enumerate(d_sorted):
l_sorted_files = d_sorted[key_]['files']
## initialize
series_reader = sitk.ImageSeriesReader()
## set
series_reader.SetFileNames(l_sorted_files)
## set flags
series_reader.MetaDataDictionaryArrayUpdateOn()
series_reader.LoadPrivateTagsOn()
## read
image_dicom = series_reader.Execute()
## Create the file reader and get the series_ID
file_reader = sitk.ImageFileReader()
## read file
temp_file_2_read = l_sorted_files[0]
# print(l_file_names[0])
## set filename
file_reader.SetFileName(temp_file_2_read)
## set flags
file_reader.LoadPrivateTagsOn()
## read file info
file_reader.ReadImageInformation()
## get metadata
metadata = {}
for key in file_reader.GetMetaDataKeys():
## get group + element
str_tag_group, str_tag_element = key.split('|')
## convert to HEX
hex_tag_group = hex(int(str_tag_group, 16))
hex_tag_element = hex(int(str_tag_element, 16))
## get keyword from tag using pydicom
tag = pydicom.tag.Tag(hex_tag_group, hex_tag_element)
keyword = pydicom.datadict.keyword_for_tag(tag)
## store
metadata[keyword] = file_reader.GetMetaData(key)
## get StudyID
temp_study_ID = file_reader.GetMetaData('0020|0010')
## get SeriesID
temp_series_ID = file_reader.GetMetaData('0020|0011')
## save image
if num_sub_series == 1:
## only one acquisition, save as is
fn_save = str(temp_study_ID).replace(" ", "") + '_' + str(temp_series_ID).replace(" ", "")
else:
## more than one acquisition, ID of this one
fn_save = str(temp_study_ID).replace(" ", "") + '_' + str(temp_series_ID).replace(" ", "") + '_acq' + str(tidx + 1)
## save
ffpn_save_image = os.path.join(dir_save_NII, fn_save + ".nii.gz")
print('study_series:')
print(fn_save)
## write
sitk.WriteImage(image_dicom, ffpn_save_image)
## save metadata
ffpn_save_metadata_json = os.path.join(dir_save_NII, fn_save + ".json")
## write metadata to disk
with open(ffpn_save_metadata_json, 'w') as f:
json.dump(metadata, f, indent=4) # indent for pretty printing
return True