-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconvert.py
More file actions
442 lines (396 loc) · 20.4 KB
/
convert.py
File metadata and controls
442 lines (396 loc) · 20.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
"""
DICOM converting module.
This module reads DICOM files for a subject,
looks for the correct series (sometimes there are more than one series for one slice),
stack the slices into a volume and save as a nifti image.
pydicom is used for reading DICOM images. However, I have found that very rarely it could
fail in reading certain DICOM images, perhaps due to the DICOM format, which has no standard
and vary between manufacturers and machines.
IMPORTANT!!
The structure of the project SHOULD BE
dataset folder
--> patientA
--> cvi42wsx file
--> folderA with series
--> folderB with series
--> etc.
--> patientB
--> etc.
"""
import os, regex, sys, glob, pydicom, zipfile
import pickle, json, shutil, argparse
import numpy as np
import pandas as pd
import nibabel as nib
import SimpleITK as sitk
from utils.contour import parseContours, getContour
from utils.post_processing import postProcess
class DICOM_Dataset(object):
""" Class for managing DICOM datasets """
def __init__(self, input_dir, output_dir='', post_process=False):
"""
Initialise data
Group series by study type or field of view.
"""
# Set output dir
self.output_dir = output_dir
# If input folder is a zip file, unzip it
_, ext = os.path.splitext(input_dir)
if ext == '.zip':
zip_ref = zipfile.ZipFile(input_dir, 'r')
zip_ref.extractall(os.path.dirname(input_dir))
zip_ref.close()
input_dir = os.path.dirname(input_dir)
# Find patients and look for subdirs for each of them
self.patient_dict = {}
patients = os.listdir(input_dir)
for p in sorted(patients):
if '_transformed' in p:
continue
folder = os.path.join(input_dir, p)
if not os.path.isdir(folder):
continue
print('Patient found', p)
self.patient_dict[p] = []
new_dir = os.path.join(input_dir, p + '_transformed')
if os.path.exists(new_dir):
os.system('rm -rf "{}"'.format(new_dir))
os.mkdir(new_dir)
# if self.output_dir == '':
self.output_dir = new_dir
# Process and extract contours
self.cvi42_lb = parseContours(folder, new_dir)
if self.cvi42_lb:
contour_dir = os.path.join(new_dir, 'contours')
avail_cont = [f.rstrip('.pickle') for f in os.listdir(contour_dir)]
# Go into each subdir, list DICOM series, and move them to new folders
filedf = pd.DataFrame(columns=['sop','siuid','src','dst','at','copy'])
for root, _dir, files in os.walk(folder):
for _file in files:
if _file == 'DICOMDIR':
continue
try:
ds = pydicom.dcmread(os.path.join(root, _file))
except pydicom.errors.InvalidDicomError:
# Handle invalid dicom files. These are found to be the empty files (0 bytes).
print('Found invalid dicom file {}. Skipping!'.format(os.path.join(root, _file)))
continue
# serDesc = ds.SeriesDescription
if not ds.__contains__('SeriesDescription') and not ds.__contains__('SequenceName'):
serDesc = ds.StudyDescription if ds.__contains__('StudyDescription') else 'unknown'
else:
serDesc = ds.SeriesDescription if ds.__contains__('SeriesDescription') else ds.SequenceName
serDesc = regex.sub( r' |-|\\|\/', '_', serDesc)
serDesc = regex.sub(r'\(|\)', '', serDesc)
desc = ds.SeriesInstanceUID
if 'Workspace' in serDesc:
# Ignore cvi42 workspace dicom
# Maybe this file can be used to extract contours as well
continue
try:
pos = np.array([float(x) for x in ds.ImagePositionPatient])
pos[:2] = -pos[:2]
except Exception:
print('ERROR: ImagePositionPatient attribute does not exist.')
continue
instNumber = int(ds.InstanceNumber)
if ds.__contains__('SliceLocation'):
slcLoc = ds.SliceLocation
else: # Choose patient position coordinate
slcLoc = pos[2]
try:
ffile = os.path.join(new_dir, desc, 'img{0}-{1:.4f}.dcm'.format(str(instNumber).zfill(4), slcLoc))
except AttributeError: # SliceLocation does not exist
print('ERROR: SliceLocation does not exist.')
continue
# Save new file
# Copy file if it does not exist yet or it's not set for copy
copy = False
if ffile not in filedf.loc[filedf['copy'] == True, 'dst'].values:
copy = True
new_row = {
'sop': ds.SOPInstanceUID,
'siuid': desc,
'src': os.path.join(root, _file),
'dst': ffile,
'at': ds.AcquisitionTime if ds.__contains__('AcquisitionTime') else '',
'copy': copy
}
filedf = filedf.append(new_row, ignore_index=True)
if not copy:
# This particular slice was already present in filedict
# This means we found a corrected version of some series
# (probably due to some artifact or bad acquisition)
print('Repeated series', desc, os.path.basename(ffile))
at = ds.AcquisitionTime
if not self.cvi42_lb:
alternative_file = filedf.loc[(filedf['dst'] == ffile) & (filedf['at'] != ffile), 'src'].values[0]
ds2 = pydicom.dcmread(alternative_file)
# Skip file if it is an old version of an existing one
if float(ds.AcquisitionTime) < float(ds2.AcquisitionTime):
continue
else:
# When contours exist, keep images according to those files.
# If this particular image is contoured, we keep it.
# Although we must keep all temporal phases associated to this one
# that are linked by the AcquisitionTime.
if ds.SOPInstanceUID not in avail_cont: continue
ats = filedf.loc[filedf['dst'] == ffile, 'at'].values
other_at = list(set(ats) - set([at]))
if len(other_at) == 0: continue
other_at = other_at[0]
filedf.loc[filedf['at'] == other_at, 'copy'] = False
# Set copy to true for all slices for this acquisition time
filedf.loc[filedf['at'] == at, ['copy']] = True
# Copy definitive files around
for _, row in filedf.iterrows():
if not row['copy']: continue
os.makedirs(os.path.dirname(row['dst']), exist_ok=True)
shutil.copy(row['src'], row['dst'])
self.transformDicoms(new_dir, p)
# File postprocessing
if post_process:
try:
postProcess(new_dir)
except Exception as e:
print('Exception during postprocessing: ', e)
continue
# Delete origin folder
# os.system('rm -rf {}'.format(os.path.join(input_dir, p)))
# Remove DICOM files from converted folders
# for f in glob.iglob(os.path.join(input_dir, '*_transformed', '**', '*.dcm'), recursive=True):
# os.remove(f)
def transformDicoms(self, dicom_dir, patient):
avail_cont = []
self.cvi42_lb_used = False
if self.cvi42_lb:
contour_dir = os.path.join(dicom_dir, 'contours')
avail_cont = [f.rstrip('.pickle') for f in os.listdir(contour_dir)]
# Go through each folder and transform it to nifti files
processed = []
for s in sorted(os.listdir(dicom_dir)):
SKIP = False
print('Series', s)
if s == 'contours':
continue
# Look for series that have the same name, but for the last three digits
# or the last code
spl = s.split('.')
prefix = ''
if len(spl[-1]) > 3:
prefix = spl[-1]
elif len(spl[-2]) > 3:
prefix = s
else:
prefix = '.'.join(spl[:-3])
# Process each series once
if prefix in processed:
print('Series already considered! {}. Skipping!'.format(prefix))
continue
processed.append(prefix)
matchSeries = list(glob.iglob(os.path.join(dicom_dir, prefix + '.*')))
# Set destination folder to the series
folder = os.path.join(dicom_dir, s)
files = []
if len(matchSeries) < 2:
files = list(glob.iglob(os.path.join(folder, '*.dcm')))
else:
for ms in sorted(matchSeries):
files.extend(list(glob.iglob(os.path.join(ms, '*.dcm'))))
# Compute "normalised" file name. Avoid instance numbers that are not sequential
files_normalised = []
for zpos in np.unique([os.path.basename(f)[8:].rstrip('.dcm') for f in files]):
aux_files = sorted([f for f in files if os.path.basename(f)[8:].rstrip('.dcm') == zpos])
aux_norm = [regex.sub(r'img\d{4}\-', 'img{}-'.format(str(idx+1).zfill(4)), os.path.basename(f)) for idx, f in enumerate(aux_files)]
files_normalised.extend(list(zip(aux_files, aux_norm)))
# List of files with Z value in position 2
file_list = [[f, fnorm, float(os.path.basename(f)[8:].rstrip('.dcm'))] for f, fnorm in files_normalised]
# Vertical slices (Z) and time steps (T)
Z = len(set([float(os.path.basename(i)[8:].rstrip('.dcm')) for i in files]))
T = 1 + (len(files)-Z) // Z
print('T', T, ' Z', Z)
# print('-> files', files)
if T*Z < len(files):
print('DICOM files seem to come from different cine images or have different time steps.')
print('Series will be grouped separately.')
print('Work in progress...')
# TODO
continue
files_time = []
# Order file list so that the slices go from earlier to later times
# and for each time step we have increasing Z component
file_list = sorted(file_list, key=lambda x: x[1])
if T >= 2 and Z >= 2:
for t in range(T):
file_list[t*Z:(t+1)*Z] = sorted(file_list[t*Z:(t+1)*Z], key=lambda x: x[2])
elif T == 1: # Order by Z in case there is no temporal axis (just in case). It fails on DE sax images
file_list = sorted(file_list, key=lambda x: x[2])
# Create default volume and fill it with corresponding images
ds = pydicom.dcmread(file_list[0][0])
if not ds.__contains__('SeriesDescription') and not ds.__contains__('SequenceName'):
serDesc = ds.StudyDescription if ds.__contains__('StudyDescription') else 'unknown'
else:
serDesc = ds.SeriesDescription if ds.__contains__('SeriesDescription') else ds.SequenceName
if serDesc == 'MESURES': continue
serDesc = regex.sub( r' |-|\\|\/', '_', serDesc)
serDesc = regex.sub(r'\(|\)', '', serDesc)
Y, X = ds.Rows, ds.Columns
volume = np.zeros((X,Y,Z,T), dtype='float32')
label = np.zeros((X,Y,Z,T), dtype='uint8')
label_up = np.zeros((X*4,Y*4,Z,T), dtype='uint8')
for idx, fi in enumerate(file_list):
f = fi[0]
ds = pydicom.dcmread(f)
reader = sitk.ImageFileReader()
reader.SetFileName(f)
try:
im = sitk.GetArrayFromImage(reader.Execute()).squeeze()
except Exception:
print('ERROR: Unable to read file with SimpleITK')
SKIP = True # Skip series
break
try:
volume[..., idx%Z, idx//Z] = im.transpose()
except ValueError:
print('Debug: Problem with transposing image. Saving as it comes.')
volume[..., idx%Z, idx//Z] = im
if ds.SOPInstanceUID in avail_cont:
print('-> SOP', ds.SOPInstanceUID)
self.cvi42_lb_used = True # Contouring found
lab_down, lab_up = getContour(os.path.join(contour_dir, '{}.pickle'.format(ds.SOPInstanceUID)), X, Y)
label[..., idx%Z, idx//Z] = lab_down
label_up[..., idx%Z, idx//Z] = lab_up
t = 0
if ds.__contains__('TriggerTime'):
t = ds.TriggerTime
files_time.append(t)
# If SimpleITK failed to read some dicom file, we skip this series
if SKIP: continue
dx = float(ds.PixelSpacing[1])
dy = float(ds.PixelSpacing[0])
# Temporal spacing
dt = (files_time[Z-1] - files_time[0]) * 1e-3
# DICOM coordinate (LPS)
# x: left
# y: posterior
# z: superior
# Nifti coordinate (RAS)
# x: right
# y: anterior
# z: superior
# Therefore, to transform between DICOM and Nifti, the x and y coordinates need to be negated.
# Refer to
# http://nifti.nimh.nih.gov/pub/dist/src/niftilib/nifti1.h
# http://nifti.nimh.nih.gov/nifti-1/documentation/nifti1fields/nifti1fields_pages/figqformusage
# The coordinate of the upper-left voxel of the first and second slices
ds = pydicom.dcmread(file_list[0][0])
pos_ul = np.array([float(x) for x in ds.ImagePositionPatient])
pos_ul[:2] = -pos_ul[:2]
# Image orientation
axis_x = np.array([float(x) for x in ds.ImageOrientationPatient[:3]])
axis_y = np.array([float(x) for x in ds.ImageOrientationPatient[3:]])
axis_x[:2] = -axis_x[:2]
axis_y[:2] = -axis_y[:2]
if Z >= 2:
# Read a dicom file at the second slice
d2 = pydicom.dcmread(file_list[1][0])
pos_ul2 = np.array([float(x) for x in d2.ImagePositionPatient])
pos_ul2[:2] = -pos_ul2[:2]
axis_z = pos_ul2 - pos_ul
axis_z = axis_z / np.linalg.norm(axis_z)
else:
axis_z = np.cross(axis_x, axis_y)
# Determine the z spacing
if hasattr(ds, 'SpacingBetweenSlices'):
dz = float(ds.SpacingBetweenSlices)
elif Z >= 2:
print('Debug: can not find attribute SpacingBetweenSlices. '
'Calculate from two successive slices.')
dz = float(np.linalg.norm(pos_ul2 - pos_ul))
else:
print('Debug: can not find attribute SpacingBetweenSlices. '
'Use attribute SliceThickness instead.')
dz = float(ds.SliceThickness)
# Affine matrix which converts the voxel coordinate to world coordinate
affine = np.eye(4)
affine[:3,0] = axis_x * dx
affine[:3,1] = axis_y * dy
affine[:3,2] = axis_z * dz
affine[:3,3] = pos_ul
nii = nib.Nifti1Image(volume, affine)
nii.header['pixdim'][4] = dt
nii.header['sform_code'] = 1
nib.save(nii, os.path.join(self.output_dir, s, '{}.nii.gz'.format(serDesc)))
aux_file = {}
aux_file['file_path'] = os.path.join(self.output_dir, s, '{}.nii.gz'.format(serDesc))
# TODO: Save only label files for SERIES where at least one slice had a contour
if self.cvi42_lb:
nii_lb = nib.Nifti1Image(label, affine)
nii_lb.header['pixdim'][4] = dt
nii_lb.header['sform_code'] = 1
nib.save(nii_lb, os.path.join(self.output_dir, s, '{}_label.nii.gz'.format(serDesc)))
nii_lb_up = nib.Nifti1Image(label_up, affine)
nii_lb_up.header['pixdim'][4] = dt
nii_lb_up.header['sform_code'] = 1
nib.save(nii_lb_up, os.path.join(self.output_dir, s, '{}_label_upsample.nii.gz'.format(serDesc)))
aux_file['mask_path'] = os.path.join(self.output_dir, s, '{}_label.nii.gz'.format(serDesc))
aux_file['upsample_mask_path'] = os.path.join(self.output_dir, s, '{}_label_upsample.nii.gz'.format(serDesc))
# Save information of patient
studyDate = ds.StudyDate if ds.__contains__('StudyDate') else ''
patientBD = ds.PatientBirthDate[:4] if ds.__contains__('PatientBirthDate') else ''
age = ''
if ds.__contains__('PatientAge'):
age = ds.PatientAge
elif studyDate != '' and patientBD != '':
# Compute age at the time of scan
age = int(studyDate[:4]) - int(patientBD)
# Image view and use of contrast
patt = ['cine_short_axis', 'cine_short_axis_6MM', 'CINE_EC.*_apex', 'EC_.*_FIL', 'EC_.*_10slices',
'CINE_EC_barrido', 'CINE_EC', 'cine_.*_EC', 'SHORT_AXIS', 'CINE_EJE_CORTO',
'FUNCION_VI', r'.*\_\#SA', r'.*SAX.*', 'viabilidad', 'sa_cine']
sa = np.any([regex.search(p, serDesc) for p in patt])
patt = ['viabilidad', '.*RTG']
ge = np.any([regex.search(p, serDesc) for p in patt])
patt = ['.*2C.*', '.*2_C.*', '2c_*']
c2 = np.any([regex.search(p, serDesc) for p in patt])
patt = ['.*3C.*', '.*3_C.*', '3c_*']
c3 = np.any([regex.search(p, serDesc) for p in patt])
patt = ['.*4C.*', '.*4_C.*', '4c_*']
c4 = np.any([regex.search(p, serDesc) for p in patt])
view = ''
contrast = False
if c2:
view = '2chambers'
elif c3:
view = '3chambers'
elif c4:
view = '4chambers'
elif sa:
view = 'short_axis'
if ge:
contrast = True
info = {
'age': age,
'patientID': ds.PatientID if ds.__contains__('PatientID') else '',
'sex': ds.PatientSex if ds.__contains__('PatientSex') else '',
'weight': ds.PatientWeight if ds.__contains__('PatientWeight') else '',
'size': ds.PatientSize if ds.__contains__('PatientSize') else '',
'vendor': ds.Manufacturer if ds.__contains__('Manufacturer') else '',
'institution': ds.InstitutionName if ds.__contains__('InstitutionName') else '',
'modality': ds.Modality if ds.__contains__('Modality') else '',
'bodyPartExamined': ds.BodyPartExamined if ds.__contains__('BodyPartExamined') else '',
'view': view, 'contrast': contrast,
'studyDate': studyDate
}
# with open(os.path.join(folder, '..', 'info.json'), 'w') as f:
# json.dump(info, f)
aux_file.update(info)
self.patient_dict[patient].append(aux_file)
def getNiftis(self):
return self.patient_dict
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Convert DICOM files to NIFTI format.')
parser.add_argument('path', type=str, help='Directory path to dataset that must be converted.')
parser.add_argument('--pp', action='store_true', help='Whether or not to apply post-processing to the final images.')
args = parser.parse_args()
DICOM_Dataset(args.path, post_process=args.pp)