diff --git a/docs/superbench-config.mdx b/docs/superbench-config.mdx index feebef6d6..2d3454ee6 100644 --- a/docs/superbench-config.mdx +++ b/docs/superbench-config.mdx @@ -370,6 +370,7 @@ proc_num: int node_num: int env: dict mca: dict +bind_to: string prefix: str parallel: bool ``` @@ -403,6 +404,7 @@ Some attributes may only be suitable for specific mode. | `prefix` | ✓ | ✘ | ✘ | | `env` | ✓ | ✓ | ✓ | | `mca` | ✘ | ✘ | ✓ | +| `bind_to` | ✘ | ✘ | ✓ | | `parallel` | ✓ | ✘ | ✘ | | `pattern` | ✘ | ✘ | ✓ | @@ -452,6 +454,16 @@ MCA (Modular Component Architecture) frameworks, components, or modules to use i in a flatten key-value dictionary. Only available for `mpi` mode. +### `bind_to` + +Process binding policy passed to `mpirun -bind-to`. +Only available for `mpi` mode. + +Use this option when a benchmark needs to override the runner's default MPI binding behavior, +for example when the benchmark implements its own topology-aware CPU/NUMA affinity logic. + +* default value: `numa` + ### `parallel` Whether run benchmarks in parallel (all ranks at the same time) or in sequence (one rank at a time). diff --git a/superbench/runner/runner.py b/superbench/runner/runner.py index a5ac13cbb..777f1b5b3 100644 --- a/superbench/runner/runner.py +++ b/superbench/runner/runner.py @@ -91,6 +91,8 @@ def __validate_sb_config(self): # noqa: C901 'btl_tcp_if_exclude': 'lo,docker0', 'coll_hcoll_enable': 0, } + if 'bind_to' not in mode: + self._sb_benchmarks[name].modes[idx].bind_to = 'numa' for key in ['PATH', 'LD_LIBRARY_PATH', 'SB_MICRO_PATH', 'SB_WORKSPACE']: self._sb_benchmarks[name].modes[idx].env.setdefault(key, None) if 'pattern' in mode: @@ -182,13 +184,14 @@ def __get_mode_command(self, benchmark_name, mode, timeout=None): '-tag-output ' # tag mpi output with [jobid,rank] prefix '-allow-run-as-root ' # allow mpirun to run when executed by root user '{host_list} ' # use prepared hostfile or specify nodes and launch {proc_num} processes on each node - '-bind-to numa ' # bind processes to numa + '-bind-to {bind_to} ' # bind processes according to mode config '{mca_list} {env_list} {command}' ).format( trace=trace_command, host_list=f'-host localhost:{mode.proc_num}' if 'node_num' in mode and mode.node_num == 1 else f'-hostfile hostfile -map-by ppr:{mode.proc_num}:node' if 'host_list' not in mode else '-host ' + ','.join(f'{host}:{mode.proc_num}' for host in mode.host_list), + bind_to=mode.bind_to, mca_list=' '.join(f'-mca {k} {v}' for k, v in mode.mca.items()), env_list=' '.join( f'-x {k}={str(v).format(proc_rank=mode.proc_rank, proc_num=mode.proc_num)}'