[DICOM to NRRD Converter] A Python 3 - based script for converting DICOM to nrrd
Currently, there are no very good methods or extensions available for efficient loading of large-size DICOM data. Spending some time converting it into a single-file nrrd format is one feasible approach. However, this only alleviates the loading process; rotation or cropping operations during the actual segmentation process are still very laggy.
Test process information
-
Data size
- Source volume: 2GB
- Converted nrrd file: 900MB
-
Loading time
- DICOM: 4 min
- nrrd: 8 seconds
-
Loading result
- Error, half of the volume did not load
- No problem
dicom2nrrd.py Source code (Python 3)
# -*- coding: utf-8 -*-
# DICOM_to_NRRD.py
"""
Batch transform DICOM images to NRRD images
Dependencies:
- Python Library: pydicom, pynrrd, tqdm (optional, for better progress bars)
- Third party softwares: gdcm (accessible via command line)
Usage:
python DICOM_to_NRRD.py -i /path/to/input/folder -o /path/to/output/folder
"""
import glob
import subprocess
import sys
import os
import getopt
import numpy
import tempfile
import time # For timing
from datetime import datetime # For timestamp in summary
# Try importing tqdm for better progress bars
try:
from tqdm import tqdm
TQDM_AVAILABLE = True
except ImportError:
TQDM_AVAILABLE = False
print("Info: 'tqdm' library not found. Install it with 'pip install tqdm' for better progress bars.")
# Updated import for pydicom
try:
import pydicom as dicom
except ImportError:
print("Error: pydicom library is required. Install it using 'pip install pydicom'")
sys.exit(1)
# Ensure pynrrd is installed
try:
import nrrd
except ImportError:
print("Error: pynrrd library is required. Install it using 'pip install pynrrd'")
sys.exit(1)
class DICOM_to_NRRD:
def __init__(self):
self.BACKGROUND = -2048
self.AIR = -1024
# Removed KEY_WORD_FOLDER and KEY_WORD_FILE as they are no longer needed
def batch_dicom_to_nrrd(self, dicom_root, nrrd_root):
"""Iteratively convert all dicom data in dicom_root to nrrd.
Processes each subdirectory within dicom_root.
"""
print("--- Batch DICOM to NRRD Conversion Started ---")
start_time = time.time()
processed_folders = []
failed_folders = []
# Use os.path.join for better cross-platform compatibility
dicom_files_dirs = glob.glob(os.path.join(dicom_root, '*'))
total_folders = len([d for d in dicom_files_dirs if os.path.isdir(d)])
print(f"Found {total_folders} subject folder(s) to process.")
# Use tqdm if available for overall progress, otherwise simple counter
if TQDM_AVAILABLE:
folder_iterator = tqdm(dicom_files_dirs, desc="Processing Folders", unit="folder")
else:
folder_iterator = dicom_files_dirs
processed_count = 0
for dicom_subject_path in folder_iterator:
if os.path.isdir(dicom_subject_path):
subject_folder_name = os.path.basename(dicom_subject_path)
if not subject_folder_name:
print(f"Warning: Could not determine folder name for {dicom_subject_path}. Skipping.")
failed_folders.append(dicom_subject_path)
continue
nrrd_subject = os.path.join(nrrd_root, subject_folder_name)
try:
success = self.dicom_to_nrrd(dicom_subject_path, nrrd_subject)
if success:
processed_folders.append(nrrd_subject)
else:
failed_folders.append(dicom_subject_path)
except Exception as e:
print(f"\nError during conversion of {dicom_subject_path}: {e}")
failed_folders.append(dicom_subject_path)
# Update tqdm description or simple counter
if TQDM_AVAILABLE:
# tqdm handles this automatically
pass
else:
processed_count += 1
print(f" Progress: {processed_count}/{total_folders} folders processed.")
end_time = time.time()
duration = end_time - start_time
self._print_summary(processed_folders, failed_folders, duration, nrrd_root)
print("--- Batch DICOM to NRRD Conversion Finished ---")
def _print_summary(self, processed_folders, failed_folders, duration, output_root):
"""Prints a formatted summary of the conversion process."""
print("\n" + "="*50)
print(" CONVERSION SUMMARY")
print("="*50)
print(f"Start Time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"Duration: {duration:.2f} seconds")
print(f"Output Root: {output_root}")
print("-" * 50)
print(f"Successful: {len(processed_folders)} folder(s)")
for path in processed_folders:
print(f" - {path}")
print("-" * 50)
print(f"Failed: {len(failed_folders)} folder(s)")
for path in failed_folders:
print(f" - {path}")
print("="*50 + "\n")
def batch_preprocess(self, input_folder, output_folder, padding=20):
"""Pad all images in the input folder.
This function processes .nrrd files and can modify data.
"""
input_files = glob.glob(os.path.join(input_folder, '*.nrrd'))
for input_path in input_files:
filename = os.path.basename(input_path)
output_path = os.path.join(output_folder, filename)
try:
data, options = nrrd.read(input_path)
data, options = self.filter_background_to_air(data, options)
# Padding is disabled by default here to preserve CT range.
# data, options = self.pad_upper(data, options, padding)
print(f'Writing {output_path}')
nrrd.write(output_path, data, options)
except Exception as e:
print(f"Error processing {input_path}: {e}")
def dicom_to_nrrd(self, dicom_root_dir, nrrd_files_dir):
"""Transfer dicom volume into nrrd format"""
# Use tempfile for safer temporary file handling
with tempfile.NamedTemporaryFile(delete=False, suffix='.dcm') as tmp_file:
TEMP_FILE = tmp_file.name
try:
SYSTEM_COMMAND = 'gdcmconv -w {0} {1}'
parent_dir_name = os.path.basename(os.path.normpath(dicom_root_dir))
potential_dicom_items = glob.glob(os.path.join(dicom_root_dir, '*'))
has_subfolders = any(os.path.isdir(item) for item in potential_dicom_items)
folders_to_process = []
if has_subfolders:
folders_to_process = [item for item in potential_dicom_items if os.path.isdir(item)]
else:
folders_to_process = [dicom_root_dir]
# --- Handle single folder vs multiple subfolders ---
if len(folders_to_process) == 1 and folders_to_process[0] == dicom_root_dir:
# Process the main folder directly
folders_to_iterate = [(0, dicom_root_dir)] # (index, path)
base_name_for_files = parent_dir_name
else:
# Process subfolders
folders_to_iterate = list(enumerate(folders_to_process))
base_name_for_files = parent_dir_name
# --- End Handle ---
success_flag = True # Flag to indicate if at least one file was processed successfully
for i, subject_folder in folders_to_iterate:
# --- Determine output filename ---
if subject_folder == dicom_root_dir and len(folders_to_process) == 1:
# Processing main folder directly
nrrd_file = os.path.join(nrrd_files_dir, f"{base_name_for_files}_{i+1:02d}.nrrd")
else:
# Processing a subfolder
subfolder_name = os.path.basename(subject_folder)
nrrd_file = os.path.join(nrrd_files_dir, f"{base_name_for_files}_{subfolder_name}_{i+1:02d}.nrrd")
# --- End Determine ---
print(f'\nProcessing {nrrd_file}')
if not os.path.exists(nrrd_files_dir):
os.makedirs(nrrd_files_dir)
data_3d = None
# --- MODIFIED FILE SELECTION LOGIC ---
dicom_files = [
f for f in glob.glob(os.path.join(subject_folder, '*'))
if (not os.path.splitext(f)[1] and not f.lower().endswith('.nrrd')) or f.lower().endswith('.dcm')
]
if not dicom_files:
dicom_files = [f for f in glob.glob(os.path.join(subject_folder, '*')) if not f.lower().endswith('.nrrd')]
dicom_files = sorted(dicom_files)
# --- END MODIFIED FILE SELECTION LOGIC ---
if not dicom_files:
print(f"Warning: No files found in {subject_folder}")
success_flag = False # Mark as not successful if no files
continue
total_files = len(dicom_files)
# --- PROGRESS BAR ---
if TQDM_AVAILABLE:
file_pbar = tqdm(total=total_files, desc=" Slices", unit="slice", leave=False)
else:
print(f" Slices: 0/{total_files}", end='', flush=True)
# --- END PROGRESS BAR ---
for j, dicom_file in enumerate(dicom_files):
# --- PROGRESS UPDATE ---
if TQDM_AVAILABLE:
file_pbar.update(1)
else:
# Simple text progress update
print(f"\r Slices: {j+1}/{total_files}", end='', flush=True)
# --- END PROGRESS UPDATE ---
try:
result = subprocess.run(
SYSTEM_COMMAND.format(dicom_file, TEMP_FILE).split(),
check=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True
)
except subprocess.CalledProcessError as e:
print(f"\nError: gdcmconv failed for {dicom_file}.")
print(f"Command: {e.cmd}")
# print(f"Error output: {e.stderr}") # Optional verbose error
success_flag = False
continue
except FileNotFoundError:
print(f"\nError: gdcmconv command not found. Please ensure GDCM is installed and accessible.")
success_flag = False
# sys.exit(1) # Or continue with other folders?
continue
except Exception as e:
print(f"\nUnexpected error running gdcmconv for {dicom_file}: {e}")
success_flag = False
continue
try:
ds = dicom.dcmread(TEMP_FILE)
data = ds.pixel_array
data_3d = self.concatenate_layers(data_3d, data)
except Exception as e:
print(f"\nError reading DICOM file {TEMP_FILE} (from {dicom_file}): {e}")
success_flag = False
continue
# --- FINAL PROGRESS UPDATE TO 100% ---
if not TQDM_AVAILABLE and total_files > 0:
print(f"\r Slices: {total_files}/{total_files} (100%)") # Force 100%
elif TQDM_AVAILABLE:
file_pbar.close() # Close the file progress bar
# --- END FINAL PROGRESS ---
if data_3d is None:
print(f"\nWarning: No valid DICOM data loaded for {subject_folder}. Skipping NRRD creation.")
success_flag = False
continue
try:
options = self.load_dicom_options(TEMP_FILE, len(dicom_files))
except Exception as e:
print(f"\nError loading DICOM options from {TEMP_FILE}: {e}")
success_flag = False
continue
data_3d = numpy.swapaxes(data_3d, 0, 1)
data_3d = data_3d[:, :, ::-1]
# CT value modification and padding are disabled by default here.
# data_3d, options = self.filter_background_to_air(data_3d, options)
# data_3d, options = self.pad_upper(data_3d, options, padding_value)
try:
nrrd.write(nrrd_file, data_3d, options)
print(f" -> Saved {nrrd_file}")
except Exception as e:
print(f"\nError writing NRRD file {nrrd_file}: {e}")
success_flag = False
return success_flag # Return overall success for this dicom_root_dir
finally:
if os.path.exists(TEMP_FILE):
os.remove(TEMP_FILE)
def load_dicom_options(self, file_name, number_of_dicoms):
ds = dicom.dcmread(file_name)
options = dict()
options['type'] = 'short'
options['dimension'] = 3
options['space'] = 'left-posterior-superior'
pixel_spacing = getattr(ds, 'PixelSpacing', [1.0, 1.0])
try:
pixel_spacing = [float(p) for p in pixel_spacing]
except (ValueError, TypeError):
print(f"Warning: Invalid PixelSpacing {pixel_spacing}. Using defaults [1.0, 1.0]")
pixel_spacing = [1.0, 1.0]
slice_thickness = float(getattr(ds, 'SliceThickness', 1.0))
options['space directions'] = [
[pixel_spacing[0], 0, 0],
[0, pixel_spacing[1], 0],
[0, 0, slice_thickness]
]
options['kinds'] = ['domain', 'domain', 'domain']
# options['encoding'] = 'gzip'
image_position = getattr(ds, 'ImagePositionPatient', [0.0, 0.0, 0.0])
try:
image_position = [float(p) for p in image_position]
except (ValueError, TypeError):
print(f"Warning: Invalid ImagePositionPatient {image_position}. Using defaults [0.0, 0.0, 0.0]")
image_position = [0.0, 0.0, 0.0]
options['space origin'] = image_position
if hasattr(ds, 'Rows') and hasattr(ds, 'Columns'):
options['sizes'] = [ds.Columns, ds.Rows, number_of_dicoms]
else:
print("Warning: Could not determine DICOM dimensions from metadata.")
return options
def concatenate_layers(self, data_3d, data):
"""Concatenates 2D slices into a 3D volume with robust error handling."""
try:
if data_3d is None:
if data.ndim == 2:
return data[:, :, numpy.newaxis]
else:
print(f"Warning: First slice is not 2D (ndim={data.ndim}). Attempting to use as-is.")
return data
if data.ndim != 2:
print(f"Warning: Slice is not 2D (ndim={data.ndim}). Attempting to stack.")
if data.ndim == 1:
print(f"Error: Cannot stack 1D slice. Skipping.")
return data_3d
try:
data = data[..., 0]
if data.ndim != 2:
raise ValueError("Extracted slice is still not 2D")
except Exception:
print(f"Error: Could not extract 2D data from >2D slice. Skipping.")
return data_3d
data_to_stack = data[:, :, numpy.newaxis]
return numpy.concatenate((data_3d, data_to_stack), axis=2)
except Exception as e:
print(f"\nWarning during concatenation: {e}. Skipping slice.")
return data_3d
def filter_background_to_air(self, data, options):
"""Change value -2048 (background) to -1024 (air)"""
data[data <= self.BACKGROUND] = self.AIR
return (data, options)
def pad_upper(self, data, options, padding):
"""Padding functionality warning."""
print(f"Warning: pad_upper called with padding={padding}, but is disabled by default.")
print(" To enable, modify the pad_upper method implementation.")
return (data, options)
# --- OLD IMPLEMENTATION (FOR REFERENCE IF RE-ENABLING) ---
# if padding <= 0:
# return (data, options)
# if data.ndim != 3:
# print(f"Warning: pad_upper expects 3D data, got {data.ndim}D. Skipping padding.")
# return (data, options)
# rows, columns, depths = data.shape
# padding_layer = numpy.full((rows, columns), self.AIR, dtype=data.dtype)
# padding_volume = numpy.repeat(padding_layer[:, :, numpy.newaxis], padding, axis=2)
# data = numpy.concatenate((data, padding_volume), axis=2)
# if 'sizes' in options and len(options['sizes']) == 3:
# options['sizes'][2] += padding
# return (data, options)
# --- END OLD IMPLEMENTATION ---
def main(argv):
# --- PRINT BASIC INFO ---
print("=" * 60)
print("DICOM to NRRD Converter")
print("=" * 60)
print(f"Python Version: {sys.version}")
print(f"Script Path: {os.path.abspath(__file__)}")
print(f"Start Time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print("-" * 60)
# --- END PRINT BASIC INFO ---
dicom_root = ''
nrrd_root = ''
try:
opts, args = getopt.getopt(argv, "hi:o:", ["ifile=", "ofile="])
except getopt.GetoptError:
print('DICOM_to_NRRD.py -i <input_folder> -o <output_folder>')
sys.exit(2)
for opt, arg in opts:
if opt == '-h':
print('DICOM_to_NRRD.py -i <input_folder> -o <output_folder>')
sys.exit()
elif opt in ("-i", "--ifile"):
dicom_root = arg
elif opt in ("-o", "--ofile"):
nrrd_root = arg
if not dicom_root or not nrrd_root:
print('Both input (-i) and output (-o) folders are required.')
print('DICOM_to_NRRD.py -i <input_folder> -o <output_folder>')
sys.exit(2)
if not os.path.isdir(dicom_root):
print(f"Error: Input folder '{dicom_root}' does not exist or is not a directory.")
sys.exit(1)
converter = DICOM_to_NRRD()
converter.batch_dicom_to_nrrd(dicom_root, nrrd_root)
if __name__ == "__main__":
main(sys.argv[1:])
Prepare the environment
Install Python 3
pip install pydicom pynrrd numpy tqdm
install GDCM
put dicom2nrrd.py into GDCM-x.x.xx-Windows-x86_64\bin folder
Running snapshot (gif animation)
Note
There is still room for further development in areas such as NRRD compression, batch processing of patients, and format validation. This is just a concept provided as a reference for those encountering the same issues.