Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 6 additions & 19 deletions devito/core/cpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,18 +140,15 @@ class Cpu64NoopOperator(Cpu64OperatorMixin, CoreOperator):
@timed_pass(name='specializing.IET')
def _specialize_iet(cls, graph, **kwargs):
options = kwargs['options']
platform = kwargs['platform']
compiler = kwargs['compiler']
sregistry = kwargs['sregistry']

# Distributed-memory parallelism
mpiize(graph, **kwargs)

# Shared-memory parallelism
if options['openmp']:
parizer = cls._Target.Parizer(sregistry, options, platform, compiler)
parizer = cls._Target.Parizer(**kwargs)
parizer.make_parallel(graph)
parizer.initialize(graph, options=options)
parizer.initialize(graph)

# Symbol definitions
cls._Target.DataManager(**kwargs).process(graph)
Expand Down Expand Up @@ -205,11 +202,6 @@ def _specialize_clusters(cls, clusters, **kwargs):
@classmethod
@timed_pass(name='specializing.IET')
def _specialize_iet(cls, graph, **kwargs):
options = kwargs['options']
platform = kwargs['platform']
compiler = kwargs['compiler']
sregistry = kwargs['sregistry']

# Flush denormal numbers
avoid_denormals(graph, **kwargs)

Expand All @@ -220,10 +212,10 @@ def _specialize_iet(cls, graph, **kwargs):
relax_incr_dimensions(graph, **kwargs)

# Parallelism
parizer = cls._Target.Parizer(sregistry, options, platform, compiler)
parizer = cls._Target.Parizer(**kwargs)
parizer.make_simd(graph)
parizer.make_parallel(graph)
parizer.initialize(graph, options=options)
parizer.initialize(graph)

# Misc optimizations
hoist_prodders(graph)
Expand Down Expand Up @@ -300,12 +292,7 @@ def callback(f, *args):

@classmethod
def _make_iet_passes_mapper(cls, **kwargs):
options = kwargs['options']
platform = kwargs['platform']
compiler = kwargs['compiler']
sregistry = kwargs['sregistry']

parizer = cls._Target.Parizer(sregistry, options, platform, compiler)
parizer = cls._Target.Parizer(**kwargs)

return {
'denormals': partial(avoid_denormals, **kwargs),
Expand All @@ -316,7 +303,7 @@ def _make_iet_passes_mapper(cls, **kwargs):
'linearize': partial(linearize, **kwargs),
'simd': partial(parizer.make_simd),
'prodders': hoist_prodders,
'init': partial(parizer.initialize, options=options)
'init': partial(parizer.initialize)
}

_known_passes = (
Expand Down
29 changes: 7 additions & 22 deletions devito/core/gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,18 +180,13 @@ class DeviceNoopOperator(DeviceOperatorMixin, CoreOperator):
@classmethod
@timed_pass(name='specializing.IET')
def _specialize_iet(cls, graph, **kwargs):
options = kwargs['options']
platform = kwargs['platform']
compiler = kwargs['compiler']
sregistry = kwargs['sregistry']

# Distributed-memory parallelism
mpiize(graph, **kwargs)

# GPU parallelism
parizer = cls._Target.Parizer(sregistry, options, platform, compiler)
parizer = cls._Target.Parizer(**kwargs)
parizer.make_parallel(graph)
parizer.initialize(graph, options=options)
parizer.initialize(graph)

# Symbol definitions
cls._Target.DataManager(**kwargs).process(graph)
Expand Down Expand Up @@ -248,21 +243,16 @@ def _specialize_clusters(cls, clusters, **kwargs):
@classmethod
@timed_pass(name='specializing.IET')
def _specialize_iet(cls, graph, **kwargs):
options = kwargs['options']
platform = kwargs['platform']
compiler = kwargs['compiler']
sregistry = kwargs['sregistry']

# Distributed-memory parallelism
mpiize(graph, **kwargs)

# Lower BlockDimensions so that blocks of arbitrary shape may be used
relax_incr_dimensions(graph, **kwargs)

# GPU parallelism
parizer = cls._Target.Parizer(sregistry, options, platform, compiler)
parizer = cls._Target.Parizer(**kwargs)
parizer.make_parallel(graph)
parizer.initialize(graph, options=options)
parizer.initialize(graph)

# Misc optimizations
hoist_prodders(graph)
Expand Down Expand Up @@ -325,22 +315,17 @@ def _make_clusters_passes_mapper(cls, **kwargs):

@classmethod
def _make_iet_passes_mapper(cls, **kwargs):
options = kwargs['options']
platform = kwargs['platform']
compiler = kwargs['compiler']
sregistry = kwargs['sregistry']

parizer = cls._Target.Parizer(sregistry, options, platform, compiler)
parizer = cls._Target.Parizer(**kwargs)
orchestrator = cls._Target.Orchestrator(**kwargs)

return {
'parallel': parizer.make_parallel,
'orchestrate': partial(orchestrator.process),
'pthreadify': partial(pthreadify, sregistry=sregistry),
'pthreadify': partial(pthreadify, **kwargs),
'mpi': partial(mpiize, **kwargs),
'linearize': partial(linearize, **kwargs),
'prodders': partial(hoist_prodders),
'init': partial(parizer.initialize, options=options)
'init': partial(parizer.initialize)
}

_known_passes = (
Expand Down
4 changes: 2 additions & 2 deletions devito/operator/operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -474,7 +474,7 @@ def _lower_uiet(cls, stree, profiler=None, **kwargs):

@classmethod
@timed_pass(name='lowering.IET')
def _lower_iet(cls, uiet, profiler=None, **kwargs):
def _lower_iet(cls, uiet, **kwargs):
"""
Iteration/Expression tree lowering:

Expand All @@ -496,7 +496,7 @@ def _lower_iet(cls, uiet, profiler=None, **kwargs):
# Instrument the IET for C-level profiling
# Note: this is postponed until after _specialize_iet because during
# specialization further Sections may be introduced
cls._Target.instrument(graph, profiler=profiler, **kwargs)
cls._Target.instrument(graph, **kwargs)

# Extract the necessary macros from the symbolic objects
generate_macros(graph, **kwargs)
Expand Down
4 changes: 4 additions & 0 deletions devito/operator/profiling.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,10 @@ def record_ops_variation(self, initial, final):
def all_sections(self):
return list(self._sections) + flatten(self._subsections.values())

@property
def high_verbosity(self):
return self._verbosity >= 2

def summary(self, args, dtype, reduce_over=None):
"""
Return a PerformanceSummary of the profiled sections.
Expand Down
5 changes: 3 additions & 2 deletions devito/passes/clusters/aliases.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,14 +292,15 @@ def process(self, clusters):
def callback(self, clusters, prefix, xtracted=None):
if not prefix:
return clusters
d = prefix[-1].dim
p = prefix[-1]
d = p.dim

# Rule out extractions that would break data dependencies
exclude = set().union(*[c.scope.writes for c in clusters])

# Rule out extractions that depend on the Dimension currently investigated,
# as they clearly wouldn't be invariants
exclude.add(d)
exclude.update({d, *p.sub_iterators})

key = lambda c: self._lookup_key(c, d)
processed = list(clusters)
Expand Down
2 changes: 1 addition & 1 deletion devito/passes/iet/instrument.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@


def instrument(graph, **kwargs):
profiler = kwargs['profiler']
profiler = kwargs.get('profiler')
if profiler is None:
return

Expand Down
22 changes: 16 additions & 6 deletions devito/passes/iet/langbase.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,27 +160,36 @@ class LangTransformer:
The constructs of the target language. To be specialized by a subclass.
"""

def __init__(self, key, sregistry, platform, compiler):
def __init__(self, key=None, options=None, sregistry=None, platform=None,
compiler=None, profiler=None, **kwargs):
"""
Parameters
----------
key : callable, optional
Return True if an Iteration can and should be parallelized,
False otherwise.
options : dict, optional
The optimization options.
sregistry : SymbolRegistry
The symbol registry, to access the symbols appearing in an IET.
platform : Platform
The underlying platform.
compiler : Compiler
The underlying JIT compiler.
profiler : Profiler
The underlying Profiler, used to instrument the IET.
"""
if key is not None:
self.key = key
else:
self.key = lambda i: False

self.uses_mpi = options['mpi']

self.sregistry = sregistry
self.platform = platform
self.compiler = compiler
self.profiler = profiler

@iet_pass
def make_parallel(self, iet):
Expand Down Expand Up @@ -228,11 +237,11 @@ class ShmTransformer(LangTransformer):
shared-memory-parallel IETs for CPUs.
"""

def __init__(self, key, sregistry, options, platform, compiler):
def __init__(self, key, options=None, **kwargs):
"""
Parameters
----------
key : callable, optional
key : callable
Return True if an Iteration can and should be parallelized,
False otherwise.
sregistry : SymbolRegistry
Expand All @@ -251,12 +260,13 @@ def __init__(self, key, sregistry, options, platform, compiler):
iteration exceeds this threshold. Otherwise, use static scheduling.
* 'par-nested': nested parallelism if the number of hyperthreads
per core is greater than this threshold.
* 'mpi': tells whether MPI is enabled.
platform : Platform
The underlying platform.
compiler : Compiler
The underlying JIT compiler.
"""
super().__init__(key, sregistry, platform, compiler)
super().__init__(key, options=options, **kwargs)

self.collapse_ncores = options['par-collapse-ncores']
self.collapse_work = options['par-collapse-work']
Expand Down Expand Up @@ -391,7 +401,7 @@ def deviceid(self):
return self.sregistry.deviceid

@iet_pass
def initialize(self, iet, options=None):
def initialize(self, iet):
"""
An `iet_pass` which transforms an IET such that the target language
runtime is initialized.
Expand All @@ -416,7 +426,7 @@ def _extract_objcomm(iet):
# Fallback -- might end up here because the Operator has no
# halo exchanges, but we now need it nonetheless to perform
# the rank-GPU assignment
if options['mpi']:
if self.uses_mpi:
for i in iet.parameters:
try:
return i.grid.distributor._obj_comm
Expand Down
8 changes: 4 additions & 4 deletions devito/passes/iet/parpragma.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,9 +225,9 @@ class PragmaShmTransformer(ShmTransformer, PragmaSimdTransformer):
IETs for CPUs.
"""

def __init__(self, sregistry, options, platform, compiler):
def __init__(self, **kwargs):
key = lambda i: i.is_ParallelRelaxed and not i.is_Vectorized
super().__init__(key, sregistry, options, platform, compiler)
super().__init__(key, **kwargs)

def _make_reductions(self, partree):
if not any(i.is_ParallelAtomic for i in partree.collapsed):
Expand Down Expand Up @@ -491,8 +491,8 @@ class PragmaDeviceAwareTransformer(DeviceAwareMixin, PragmaShmTransformer):
shared-memory-parallel, and device-parallel IETs.
"""

def __init__(self, sregistry, options, platform, compiler):
super().__init__(sregistry, options, platform, compiler)
def __init__(self, options=None, **kwargs):
super().__init__(options=options, **kwargs)

self.gpu_fit = options['gpu-fit']
# Need to reset the tile in case was already used and iter over by blocking
Expand Down
5 changes: 4 additions & 1 deletion devito/tools/data_structures.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,10 @@ def __getnewargs_ex__(self):
return tuple(self), sdict

def get(self, key, val=None):
return self.getters.get(key, val)
try:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this way it goes through __getitem__ and in particular through the __getitem_hook__ which we exploit in DimensionTuple

return self[key]
except KeyError:
return val

@property
def items(self) -> tuple:
Expand Down
2 changes: 1 addition & 1 deletion tests/test_linearize.py
Original file line number Diff line number Diff line change
Expand Up @@ -688,4 +688,4 @@ def test_cire_n_strides():

# NOTE: not exact equality because `op2` slightly changes the order of
# arithmetic operations, which in turn causes some rounding differences
assert np.allclose(u.data, u1.data, rtol=1e-5)
assert np.allclose(u.data, u1.data, rtol=1e-4)
Loading