I was trying to use Dask to parallelize the application of a python function that uses ITK on multiple images, and encountered issues if LazyLoading is enabled.
Here is a code that replicates the error:
import os
import skimage.data
import skimage.io
import dask
import dask.distributed
import itk
# Instantiate a client to the dask cluster
scheduler = 'localhost:8786'
c = dask.distributed.Client(scheduler)
print c
# get and save sample image for testing
skimage.io.imsave('camera_man.jpg', skimage.data.camera())
fpath = os.path.abspath('camera_man.jpg')
# try parallelizing a function that uses itk.imread using Dask
result = []
def process(i):
im_itk = itk.imread(fpath)
time.sleep(0.2) # simulate some processing time
return i
for i in range(1000):
result.append(dask.delayed(process)(i))
result = dask.delayed(result).compute()
This resulted in the following error:
distributed.protocol.pickle - INFO - Failed to serialize <function process at 0x7f4e4f98ab90>. Exception: ‘LazyITKModule’ object has no attribute ‘package’
I managed to resolve this by disabling Lazy Loading before importing itk as shown below:
import os
import skimage.data
import skimage.io
import dask
import dask.distributed
# disable lazy loading first and then import itk
import itkConfig
itkConfig.LazyLoading = False
import itk
# Instantiate a client to the dask cluster
scheduler = 'localhost:8786'
c = dask.distributed.Client(scheduler)
print c
# get and save sample image for testing
skimage.io.imsave('camera_man.jpg', skimage.data.camera())
fpath = os.path.abspath('camera_man.jpg')
# try parallelizing a function that uses itk.imread using Dask
result = []
def process(i):
im_itk = itk.imread(fpath)
time.sleep(0.2) # simulate some processing time
return i
for i in range(1000):
result.append(dask.delayed(process)(i))
result = dask.delayed(result).compute()