devitocodes · FabioLuporini · Feb 13, 2026 · Feb 13, 2026 · Mar 3, 2026 · Mar 12, 2026
diff --git a/devito/core/cpu.py b/devito/core/cpu.py
@@ -140,18 +140,15 @@ class Cpu64NoopOperator(Cpu64OperatorMixin, CoreOperator):
     @timed_pass(name='specializing.IET')
     def _specialize_iet(cls, graph, **kwargs):
         options = kwargs['options']
-        platform = kwargs['platform']
-        compiler = kwargs['compiler']
-        sregistry = kwargs['sregistry']
 
         # Distributed-memory parallelism
         mpiize(graph, **kwargs)
 
         # Shared-memory parallelism
         if options['openmp']:
-            parizer = cls._Target.Parizer(sregistry, options, platform, compiler)
+            parizer = cls._Target.Parizer(**kwargs)
             parizer.make_parallel(graph)
-            parizer.initialize(graph, options=options)
+            parizer.initialize(graph)
 
         # Symbol definitions
         cls._Target.DataManager(**kwargs).process(graph)
@@ -205,11 +202,6 @@ def _specialize_clusters(cls, clusters, **kwargs):
     @classmethod
     @timed_pass(name='specializing.IET')
     def _specialize_iet(cls, graph, **kwargs):
-        options = kwargs['options']
-        platform = kwargs['platform']
-        compiler = kwargs['compiler']
-        sregistry = kwargs['sregistry']
-
         # Flush denormal numbers
         avoid_denormals(graph, **kwargs)
 
@@ -220,10 +212,10 @@ def _specialize_iet(cls, graph, **kwargs):
         relax_incr_dimensions(graph, **kwargs)
 
         # Parallelism
-        parizer = cls._Target.Parizer(sregistry, options, platform, compiler)
+        parizer = cls._Target.Parizer(**kwargs)
         parizer.make_simd(graph)
         parizer.make_parallel(graph)
-        parizer.initialize(graph, options=options)
+        parizer.initialize(graph)
 
         # Misc optimizations
         hoist_prodders(graph)
@@ -300,12 +292,7 @@ def callback(f, *args):
 
     @classmethod
     def _make_iet_passes_mapper(cls, **kwargs):
-        options = kwargs['options']
-        platform = kwargs['platform']
-        compiler = kwargs['compiler']
-        sregistry = kwargs['sregistry']
-
-        parizer = cls._Target.Parizer(sregistry, options, platform, compiler)
+        parizer = cls._Target.Parizer(**kwargs)
 
         return {
             'denormals': partial(avoid_denormals, **kwargs),
@@ -316,7 +303,7 @@ def _make_iet_passes_mapper(cls, **kwargs):
             'linearize': partial(linearize, **kwargs),
             'simd': partial(parizer.make_simd),
             'prodders': hoist_prodders,
-            'init': partial(parizer.initialize, options=options)
+            'init': partial(parizer.initialize)
         }
 
     _known_passes = (

diff --git a/devito/core/gpu.py b/devito/core/gpu.py
@@ -180,18 +180,13 @@ class DeviceNoopOperator(DeviceOperatorMixin, CoreOperator):
     @classmethod
     @timed_pass(name='specializing.IET')
     def _specialize_iet(cls, graph, **kwargs):
-        options = kwargs['options']
-        platform = kwargs['platform']
-        compiler = kwargs['compiler']
-        sregistry = kwargs['sregistry']
-
         # Distributed-memory parallelism
         mpiize(graph, **kwargs)
 
         # GPU parallelism
-        parizer = cls._Target.Parizer(sregistry, options, platform, compiler)
+        parizer = cls._Target.Parizer(**kwargs)
         parizer.make_parallel(graph)
-        parizer.initialize(graph, options=options)
+        parizer.initialize(graph)
 
         # Symbol definitions
         cls._Target.DataManager(**kwargs).process(graph)
@@ -248,21 +243,16 @@ def _specialize_clusters(cls, clusters, **kwargs):
     @classmethod
     @timed_pass(name='specializing.IET')
     def _specialize_iet(cls, graph, **kwargs):
-        options = kwargs['options']
-        platform = kwargs['platform']
-        compiler = kwargs['compiler']
-        sregistry = kwargs['sregistry']
-
         # Distributed-memory parallelism
         mpiize(graph, **kwargs)
 
         # Lower BlockDimensions so that blocks of arbitrary shape may be used
         relax_incr_dimensions(graph, **kwargs)
 
         # GPU parallelism
-        parizer = cls._Target.Parizer(sregistry, options, platform, compiler)
+        parizer = cls._Target.Parizer(**kwargs)
         parizer.make_parallel(graph)
-        parizer.initialize(graph, options=options)
+        parizer.initialize(graph)
 
         # Misc optimizations
         hoist_prodders(graph)
@@ -325,22 +315,17 @@ def _make_clusters_passes_mapper(cls, **kwargs):
 
     @classmethod
     def _make_iet_passes_mapper(cls, **kwargs):
-        options = kwargs['options']
-        platform = kwargs['platform']
-        compiler = kwargs['compiler']
-        sregistry = kwargs['sregistry']
-
-        parizer = cls._Target.Parizer(sregistry, options, platform, compiler)
+        parizer = cls._Target.Parizer(**kwargs)
         orchestrator = cls._Target.Orchestrator(**kwargs)
 
         return {
             'parallel': parizer.make_parallel,
             'orchestrate': partial(orchestrator.process),
-            'pthreadify': partial(pthreadify, sregistry=sregistry),
+            'pthreadify': partial(pthreadify, **kwargs),
             'mpi': partial(mpiize, **kwargs),
             'linearize': partial(linearize, **kwargs),
             'prodders': partial(hoist_prodders),
-            'init': partial(parizer.initialize, options=options)
+            'init': partial(parizer.initialize)
         }
 
     _known_passes = (

diff --git a/devito/operator/operator.py b/devito/operator/operator.py
@@ -474,7 +474,7 @@ def _lower_uiet(cls, stree, profiler=None, **kwargs):
 
     @classmethod
     @timed_pass(name='lowering.IET')
-    def _lower_iet(cls, uiet, profiler=None, **kwargs):
+    def _lower_iet(cls, uiet, **kwargs):
         """
         Iteration/Expression tree lowering:
 
@@ -496,7 +496,7 @@ def _lower_iet(cls, uiet, profiler=None, **kwargs):
         # Instrument the IET for C-level profiling
         # Note: this is postponed until after _specialize_iet because during
         # specialization further Sections may be introduced
-        cls._Target.instrument(graph, profiler=profiler, **kwargs)
+        cls._Target.instrument(graph, **kwargs)
 
         # Extract the necessary macros from the symbolic objects
         generate_macros(graph, **kwargs)

diff --git a/devito/operator/profiling.py b/devito/operator/profiling.py
@@ -180,6 +180,10 @@ def record_ops_variation(self, initial, final):
     def all_sections(self):
         return list(self._sections) + flatten(self._subsections.values())
 
+    @property
+    def high_verbosity(self):
+        return self._verbosity >= 2
+
     def summary(self, args, dtype, reduce_over=None):
         """
         Return a PerformanceSummary of the profiled sections.

diff --git a/devito/passes/clusters/aliases.py b/devito/passes/clusters/aliases.py
@@ -292,14 +292,15 @@ def process(self, clusters):
     def callback(self, clusters, prefix, xtracted=None):
         if not prefix:
             return clusters
-        d = prefix[-1].dim
+        p = prefix[-1]
+        d = p.dim
 
         # Rule out extractions that would break data dependencies
         exclude = set().union(*[c.scope.writes for c in clusters])
 
         # Rule out extractions that depend on the Dimension currently investigated,
         # as they clearly wouldn't be invariants
-        exclude.add(d)
+        exclude.update({d, *p.sub_iterators})
 
         key = lambda c: self._lookup_key(c, d)
         processed = list(clusters)

diff --git a/devito/passes/iet/instrument.py b/devito/passes/iet/instrument.py
@@ -14,7 +14,7 @@
 
 
 def instrument(graph, **kwargs):
-    profiler = kwargs['profiler']
+    profiler = kwargs.get('profiler')
     if profiler is None:
         return
 

diff --git a/devito/passes/iet/langbase.py b/devito/passes/iet/langbase.py
@@ -160,27 +160,36 @@ class LangTransformer:
     The constructs of the target language. To be specialized by a subclass.
     """
 
-    def __init__(self, key, sregistry, platform, compiler):
+    def __init__(self, key=None, options=None, sregistry=None, platform=None,
+                 compiler=None, profiler=None, **kwargs):
         """
         Parameters
         ----------
         key : callable, optional
             Return True if an Iteration can and should be parallelized,
             False otherwise.
+        options : dict, optional
+            The optimization options.
         sregistry : SymbolRegistry
             The symbol registry, to access the symbols appearing in an IET.
         platform : Platform
             The underlying platform.
         compiler : Compiler
             The underlying JIT compiler.
+        profiler : Profiler
+            The underlying Profiler, used to instrument the IET.
         """
         if key is not None:
             self.key = key
         else:
             self.key = lambda i: False
+
+        self.uses_mpi = options['mpi']
+
         self.sregistry = sregistry
         self.platform = platform
         self.compiler = compiler
+        self.profiler = profiler
 
     @iet_pass
     def make_parallel(self, iet):
@@ -228,11 +237,11 @@ class ShmTransformer(LangTransformer):
     shared-memory-parallel IETs for CPUs.
     """
 
-    def __init__(self, key, sregistry, options, platform, compiler):
+    def __init__(self, key, options=None, **kwargs):
         """
         Parameters
         ----------
-        key : callable, optional
+        key : callable
             Return True if an Iteration can and should be parallelized,
             False otherwise.
         sregistry : SymbolRegistry
@@ -251,12 +260,13 @@ def __init__(self, key, sregistry, options, platform, compiler):
                iteration exceeds this threshold. Otherwise, use static scheduling.
              * 'par-nested': nested parallelism if the number of hyperthreads
                per core is greater than this threshold.
+             * 'mpi': tells whether MPI is enabled.
         platform : Platform
             The underlying platform.
         compiler : Compiler
             The underlying JIT compiler.
         """
-        super().__init__(key, sregistry, platform, compiler)
+        super().__init__(key, options=options, **kwargs)
 
         self.collapse_ncores = options['par-collapse-ncores']
         self.collapse_work = options['par-collapse-work']
@@ -391,7 +401,7 @@ def deviceid(self):
         return self.sregistry.deviceid
 
     @iet_pass
-    def initialize(self, iet, options=None):
+    def initialize(self, iet):
         """
         An `iet_pass` which transforms an IET such that the target language
         runtime is initialized.
@@ -416,7 +426,7 @@ def _extract_objcomm(iet):
             # Fallback -- might end up here because the Operator has no
             # halo exchanges, but we now need it nonetheless to perform
             # the rank-GPU assignment
-            if options['mpi']:
+            if self.uses_mpi:
                 for i in iet.parameters:
                     try:
                         return i.grid.distributor._obj_comm

diff --git a/devito/passes/iet/parpragma.py b/devito/passes/iet/parpragma.py
@@ -225,9 +225,9 @@ class PragmaShmTransformer(ShmTransformer, PragmaSimdTransformer):
     IETs for CPUs.
     """
 
-    def __init__(self, sregistry, options, platform, compiler):
+    def __init__(self, **kwargs):
         key = lambda i: i.is_ParallelRelaxed and not i.is_Vectorized
-        super().__init__(key, sregistry, options, platform, compiler)
+        super().__init__(key, **kwargs)
 
     def _make_reductions(self, partree):
         if not any(i.is_ParallelAtomic for i in partree.collapsed):
@@ -491,8 +491,8 @@ class PragmaDeviceAwareTransformer(DeviceAwareMixin, PragmaShmTransformer):
     shared-memory-parallel, and device-parallel IETs.
     """
 
-    def __init__(self, sregistry, options, platform, compiler):
-        super().__init__(sregistry, options, platform, compiler)
+    def __init__(self, options=None, **kwargs):
+        super().__init__(options=options, **kwargs)
 
         self.gpu_fit = options['gpu-fit']
         # Need to reset the tile in case was already used and iter over by blocking

diff --git a/devito/tools/data_structures.py b/devito/tools/data_structures.py
@@ -99,7 +99,10 @@ def __getnewargs_ex__(self):
         return tuple(self), sdict
 
     def get(self, key, val=None):
-        return self.getters.get(key, val)
+        try:
+            return self[key]
+        except KeyError:
+            return val
 
     @property
     def items(self) -> tuple:

diff --git a/tests/test_linearize.py b/tests/test_linearize.py
@@ -688,4 +688,4 @@ def test_cire_n_strides():
 
     # NOTE: not exact equality because `op2` slightly changes the order of
     # arithmetic operations, which in turn causes some rounding differences
-    assert np.allclose(u.data, u1.data, rtol=1e-5)
+    assert np.allclose(u.data, u1.data, rtol=1e-4)