Thanks a lot @dzenanz for looking at it! You’re right, I’ll check by taking import itk out of the loop. I’ll test and retry.
But I don’t think that will solve the full picture: I also had a different way of invoking several runs with a much larger 2-step registration and the factor remains the same (you can even see a small warmup effect here, and the penalty for the first affine registration, probably linked to the import). If the import was the limiting factor, I would expect the difference to reduce with a longer registration, but I do not observe that:
╔═══════════════════╦═════════════╦═════════════╦═════════════╗
║ ║ Affine ║ BSpline ║ Total ║
╠═══════════════════╬═════════════╬═════════════╬═════════════╣
║ Appose run 1 ║ 36970 ms ║ 211337 ms ║ 248307 ms ║
║ Appose run 2 ║ 6495 ms ║ 203000 ms ║ 209495 ms ║
║ Appose run 3 ║ 5759 ms ║ 201558 ms ║ 207317 ms ║
╠═══════════════════╬═════════════╬═════════════╬═════════════╣
║ CLI run 1 ║ 6714 ms ║ 39521 ms ║ 46235 ms ║
║ CLI run 2 ║ 6798 ms ║ 39982 ms ║ 46780 ms ║
║ CLI run 3 ║ 6691 ms ║ 40067 ms ║ 46758 ms ║
╚═══════════════════╩═════════════╩═════════════╩═════════════╝
But I will test with the bench I send you and report back.
(Note: Appose runs itk-elastix)
EDIT:
I tried to move import, image reading and parameter settings upfront, I still get this:
Run CLI (s) itk-API (s) ratio (itk/cli)
-------------------------------------------------------
*1 2.051 22.421 10.93x
2 2.173 22.279 10.25x
3 2.139 22.068 10.32x
The slightly modified bench.py is here:
"""
Minimal benchmark: elastix CLI vs itk-elastix Python API.
Context
-------
In a Java/Appose-based setup we observed a ~5x slowdown when running
itk-elastix inside a persistent Python subprocess (via Appose) compared
to calling the elastix CLI executable directly. This script reproduces
the two execution paths in pure Python to isolate where the time goes.
Two methods are compared:
CLI -- subprocess.run(["elastix", "-f", ..., "-m", ..., "-p", ..., "-out", ...])
Each call spawns a fresh elastix process, exactly like DefaultElastixTask.
itk-API -- itk.ElastixRegistrationMethod[...].UpdateLargestPossibleRegion()
Registration runs inside the current Python process, exactly like the
script that Appose dispatches to its persistent worker process.
Usage
-----
python bench.py \\
--elastix /path/to/elastix \\
--fixed ../src/test/resources/blobs-rot15deg.tif \\
--moving ../src/test/resources/blobs.tif
Optional flags:
--threads N number of ITK/elastix threads (0 = auto-detect physical cores)
--runs N total number of timed repetitions (default 3; run 1 is warm-up)
--no-cli skip the CLI measurements
--no-itk skip the itk-elastix measurements
"""
import argparse
import os
import shutil
import subprocess
import sys
import tempfile
import time
import itk
PARAM_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "params_bspline.txt")
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def physical_cores():
try:
import psutil
return psutil.cpu_count(logical=False) or os.cpu_count()
except ImportError:
return os.cpu_count()
def resolve_threads(n):
return physical_cores() if n == 0 else n
# ---------------------------------------------------------------------------
# CLI backend
# ---------------------------------------------------------------------------
def run_cli(elastix_exe, fixed, moving, param_file, n_threads):
out_dir = tempfile.mkdtemp(prefix="elastix_cli_")
try:
cmd = [
elastix_exe,
"-f", fixed,
"-m", moving,
"-p", param_file,
"-out", out_dir,
"-threads", str(n_threads),
]
t0 = time.perf_counter()
result = subprocess.run(cmd, capture_output=True, text=True)
elapsed = time.perf_counter() - t0
if result.returncode != 0:
print(" [CLI] STDERR (last 600 chars):", result.stderr[-600:], file=sys.stderr)
raise RuntimeError(f"elastix CLI failed (rc={result.returncode})")
transform = os.path.join(out_dir, "TransformParameters.0.txt")
ok = os.path.exists(transform)
print(f" [CLI] {elapsed:.2f}s transform exists: {ok}")
return elapsed
finally:
shutil.rmtree(out_dir, ignore_errors=True)
# ---------------------------------------------------------------------------
# itk-elastix backend (in-process)
# ---------------------------------------------------------------------------
def run_itk(fixed_img, moving_img, param_obj, n_threads):
out_dir = tempfile.mkdtemp(prefix="elastix_itk_")
try:
ImageType = type(fixed_img)
erm = itk.ElastixRegistrationMethod[ImageType, ImageType].New()
erm.SetFixedImage(fixed_img)
erm.SetMovingImage(moving_img)
erm.SetParameterObject(param_obj)
erm.SetOutputDirectory(out_dir)
erm.SetLogToConsole(False)
erm.SetLogToFile(True)
t0 = time.perf_counter()
erm.UpdateLargestPossibleRegion()
elapsed = time.perf_counter() - t0
transform = os.path.join(out_dir, "TransformParameters.0.txt")
ok = os.path.exists(transform)
print(f" [itk-API] {elapsed:.2f}s transform exists: {ok}")
return elapsed
finally:
shutil.rmtree(out_dir, ignore_errors=True)
# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------
def main():
parser = argparse.ArgumentParser(
description="Benchmark elastix CLI vs itk-elastix Python API"
)
parser.add_argument("--elastix", default=None,
help="Path to elastix CLI executable (required unless --no-cli)")
parser.add_argument("--fixed", required=True, help="Fixed image (TIFF/MHD/...)")
parser.add_argument("--moving", required=True, help="Moving image")
parser.add_argument("--threads", type=int, default=0,
help="Number of threads (0 = physical cores, default)")
parser.add_argument("--runs", type=int, default=3,
help="Number of timed runs (first is warm-up)")
parser.add_argument("--no-cli", action="store_true", help="Skip CLI measurements")
parser.add_argument("--no-itk", action="store_true", help="Skip itk-elastix measurements")
args = parser.parse_args()
if not args.no_cli and args.elastix is None:
parser.error("--elastix is required unless --no-cli is set")
n_threads = resolve_threads(args.threads)
print(f"Python {sys.version}")
print(f"Threads: {n_threads} (requested: {args.threads})")
try:
import itk_elastix
print(f"itk-elastix version: {itk_elastix.__version__}")
except Exception:
pass
print(f"Param file: {PARAM_FILE}")
print(f"Fixed: {args.fixed}")
print(f"Moving: {args.moving}")
print()
cli_times = []
itk_times = []
fixed_img = itk.imread(args.fixed, itk.F)
moving_img = itk.imread(args.moving, itk.F)
param_obj = itk.ParameterObject.New()
param_obj.ReadParameterFile(PARAM_FILE)
pm = param_obj.GetParameterMap(0)
pm["NumberOfThreads"] = [str(n_threads)]
param_obj.SetParameterMap(0, pm)
for run in range(args.runs):
label = f"Run {run + 1}/{args.runs}" + (" (warm-up)" if run == 0 else "")
print(f"--- {label} ---")
if not args.no_cli:
t = run_cli(args.elastix, args.fixed, args.moving, PARAM_FILE, n_threads)
cli_times.append(t)
if not args.no_itk:
t = run_itk(fixed_img, moving_img, param_obj, n_threads)
itk_times.append(t)
print()
# Summary table
col = 14
header = f"{'Run':<6} {'CLI (s)':>{col}} {'itk-API (s)':>{col}} {'ratio (itk/cli)':>{col}}"
print(header)
print("-" * len(header))
for i in range(args.runs):
warm = "*" if i == 0 else " "
c = f"{cli_times[i]:.3f}" if cli_times else "n/a"
t = f"{itk_times[i]:.3f}" if itk_times else "n/a"
if cli_times and itk_times:
ratio = f"{itk_times[i] / cli_times[i]:.2f}x"
else:
ratio = "n/a"
print(f"{warm}{i + 1:<5} {c:>{col}} {t:>{col}} {ratio:>{col}}")
if args.runs > 1:
print()
if cli_times and len(cli_times) > 1:
avg_c = sum(cli_times[1:]) / (len(cli_times) - 1)
print(f"Steady-state avg CLI (runs 2+): {avg_c:.3f}s")
if itk_times and len(itk_times) > 1:
avg_t = sum(itk_times[1:]) / (len(itk_times) - 1)
print(f"Steady-state avg itk-API (runs 2+): {avg_t:.3f}s")
if cli_times and itk_times and len(cli_times) > 1:
print(f"Steady-state ratio (itk/cli): {avg_t / avg_c:.2f}x")
if __name__ == "__main__":
main()
I may be doing something stupid, I just don’t know what. Maybe some parameters are ignored in the cli and not in itk-elastix or vice versa.