From a22f2061c7b7841172ac5724c21fd8e65563cc7d Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Wed, 25 Mar 2026 10:47:26 -0400
Subject: [PATCH 1/5] Add --deps-only flag to separate dependency fetching from
 source builds

This allows CI to fetch and build dependencies (FFTW, HDF5, etc.) on
login nodes with internet access, then build MFC source code on compute
nodes that may have no network connectivity.

Key changes:
- New --deps-only CLI flag for ./mfc.sh build
- Already-configured dependencies are skipped entirely during regular
  builds, guaranteeing no network access in the source build step
- Frontier and Frontier AMD now follow the pattern: deps on login node,
  source build + test on compute node
---
 .github/workflows/bench.yml         |  8 ++++----
 .github/workflows/common/bench.sh   | 12 ++++++------
 .github/workflows/common/test.sh    | 30 ++++++++++++++---------------
 .github/workflows/frontier/build.sh |  7 +------
 .github/workflows/test.yml          |  4 ++--
 toolchain/mfc/build.py              | 26 +++++++++++++++++++++++++
 toolchain/mfc/cli/commands.py       |  7 +++++++
 7 files changed, 61 insertions(+), 33 deletions(-)

diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml
index 7ce02c1e3f..d39831730d 100644
--- a/.github/workflows/bench.yml
+++ b/.github/workflows/bench.yml
@@ -68,7 +68,7 @@ jobs:
             flag: f
             device: gpu
             interface: acc
-            build_script: "bash .github/workflows/frontier/build.sh gpu acc bench"
+            build_script: "bash .github/workflows/frontier/build.sh gpu acc"
           - cluster: frontier
             name: Oak Ridge | Frontier (CCE)
             group: phoenix
@@ -76,7 +76,7 @@ jobs:
             flag: f
             device: gpu
             interface: omp
-            build_script: "bash .github/workflows/frontier/build.sh gpu omp bench"
+            build_script: "bash .github/workflows/frontier/build.sh gpu omp"
           - cluster: frontier_amd
             name: Oak Ridge | Frontier (AMD)
             group: phoenix
@@ -84,7 +84,7 @@ jobs:
             flag: famd
             device: gpu
             interface: omp
-            build_script: "bash .github/workflows/frontier_amd/build.sh gpu omp bench"
+            build_script: "bash .github/workflows/frontier_amd/build.sh gpu omp"
     continue-on-error: ${{ matrix.cluster == 'frontier' || matrix.cluster == 'frontier_amd' }}
     runs-on:
       group: ${{ matrix.group }}
@@ -103,7 +103,7 @@ jobs:
           ref: master
           path: master
 
-      - name: Setup & Build
+      - name: Fetch Dependencies
         if: matrix.build_script != ''
         timeout-minutes: 150
         run: |
diff --git a/.github/workflows/common/bench.sh b/.github/workflows/common/bench.sh
index 66d77cfd99..be83e57b87 100644
--- a/.github/workflows/common/bench.sh
+++ b/.github/workflows/common/bench.sh
@@ -21,18 +21,18 @@ if [ "$job_cluster" = "phoenix" ]; then
     trap 'rm -rf "$currentdir" || true' EXIT
 fi
 
-# --- Build (if not pre-built on login node) ---
-# Phoenix builds inside SLURM; Frontier pre-builds via build.sh on the login node.
+# --- Build ---
+# Phoenix builds everything inside SLURM (no login-node build step).
+# Frontier/Frontier AMD: deps already fetched on login node via --deps-only;
+# source code is built here on the compute node.
 # Phoenix: always nuke stale builds (heterogeneous compute nodes → ISA mismatch risk).
 if [ "$job_cluster" = "phoenix" ]; then
     source .github/scripts/clean-build.sh
     clean_build
 fi
 
-if [ ! -d "build" ]; then
-    source .github/scripts/retry-build.sh
-    retry_build ./mfc.sh build -j $n_jobs $build_opts || exit 1
-fi
+source .github/scripts/retry-build.sh
+retry_build ./mfc.sh build -j $n_jobs $build_opts || exit 1
 
 # --- Bench cluster flag ---
 if [ "$job_cluster" = "phoenix" ]; then
diff --git a/.github/workflows/common/test.sh b/.github/workflows/common/test.sh
index e155fd48f8..9eb116e183 100644
--- a/.github/workflows/common/test.sh
+++ b/.github/workflows/common/test.sh
@@ -21,29 +21,29 @@ if [ "$job_cluster" = "phoenix" ]; then
     trap 'rm -rf "$currentdir" || true' EXIT
 fi
 
-# --- Build (if not pre-built on login node) ---
-# Phoenix builds inside SLURM; Frontier pre-builds via build.sh on the login node.
-# Phoenix builds inside SLURM on heterogeneous compute nodes — always start fresh
-# to avoid SIGILL from stale binaries compiled on a different microarchitecture.
+# --- Build ---
+# Phoenix builds everything inside SLURM (no login-node build step).
+# Frontier/Frontier AMD: deps already fetched on login node via --deps-only;
+# source code is built here on the compute node.
+# Phoenix: always start fresh to avoid SIGILL from stale binaries compiled
+# on a different microarchitecture.
 if [ "$job_cluster" = "phoenix" ]; then
     source .github/scripts/clean-build.sh
     clean_build
 fi
 
-if [ ! -d "build" ]; then
-    source .github/scripts/retry-build.sh
+source .github/scripts/retry-build.sh
 
-    # Phoenix: smoke-test the syscheck binary to catch architecture mismatches
-    # (SIGILL from binaries compiled on a different compute node).
-    validate_cmd=""
-    if [ "$job_cluster" = "phoenix" ]; then
-        validate_cmd='syscheck_bin=$(find build/install -name syscheck -type f 2>/dev/null | head -1); [ -z "$syscheck_bin" ] || "$syscheck_bin" > /dev/null 2>&1'
-    fi
-
-    RETRY_VALIDATE_CMD="$validate_cmd" \
-        retry_build ./mfc.sh test -v --dry-run -j 8 $build_opts || exit 1
+# Phoenix: smoke-test the syscheck binary to catch architecture mismatches
+# (SIGILL from binaries compiled on a different compute node).
+validate_cmd=""
+if [ "$job_cluster" = "phoenix" ]; then
+    validate_cmd='syscheck_bin=$(find build/install -name syscheck -type f 2>/dev/null | head -1); [ -z "$syscheck_bin" ] || "$syscheck_bin" > /dev/null 2>&1'
 fi
 
+RETRY_VALIDATE_CMD="$validate_cmd" \
+    retry_build ./mfc.sh test -v --dry-run -j 8 $build_opts || exit 1
+
 # --- GPU detection and thread count ---
 device_opts=""
 rdma_opts=""
diff --git a/.github/workflows/frontier/build.sh b/.github/workflows/frontier/build.sh
index 5bd40999d7..cd289ef074 100644
--- a/.github/workflows/frontier/build.sh
+++ b/.github/workflows/frontier/build.sh
@@ -14,7 +14,6 @@ esac
 
 job_device=$1
 job_interface=$2
-run_bench=$3
 source .github/scripts/gpu-opts.sh
 build_opts="$gpu_opts"
 
@@ -24,8 +23,4 @@ source .github/scripts/clean-build.sh
 clean_build
 
 source .github/scripts/retry-build.sh
-if [ "$run_bench" == "bench" ]; then
-    retry_build ./mfc.sh build -j 8 $build_opts || exit 1
-else
-    retry_build ./mfc.sh test -v -a --dry-run $([ "$cluster_name" = "frontier" ] && echo "--rdma-mpi") -j 8 $build_opts || exit 1
-fi
+retry_build ./mfc.sh build --deps-only -j 8 $build_opts || exit 1
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 90ad965c52..c13e5c22e3 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -400,12 +400,12 @@ jobs:
             echo "Coverage cache: none available — full test suite will run"
           fi
 
-      - name: Build (login node)
+      - name: Fetch Dependencies
         if:   matrix.cluster != 'phoenix'
         timeout-minutes: 60
         run:  bash .github/workflows/${{ matrix.cluster }}/build.sh ${{ matrix.device }} ${{ matrix.interface }}
 
-      - name: Test
+      - name: Build & Test
         run:  bash .github/scripts/submit-slurm-job.sh .github/workflows/common/test.sh ${{ matrix.device }} ${{ matrix.interface }} ${{ matrix.cluster }} ${{ matrix.shard }}
 
       - name: Cancel SLURM Jobs
diff --git a/toolchain/mfc/build.py b/toolchain/mfc/build.py
index d6daf97bb6..6173fbfb0e 100644
--- a/toolchain/mfc/build.py
+++ b/toolchain/mfc/build.py
@@ -552,6 +552,12 @@ def __build_target(target: typing.Union[MFCTarget, str], case: input.MFCInputFil
 
     history.add(target.name)
 
+    # Dependencies are pinned to fixed versions. If already configured
+    # (built & installed by a prior --deps-only step), skip entirely
+    # to avoid re-entering the superbuild (which may access the network).
+    if target.isDependency and target.is_configured(case):
+        return
+
     for dep in target.requires.compute():
         # If we have already built and installed this target,
         # do not do so again. This can be inferred by whether
@@ -594,6 +600,26 @@ def build(targets=None, case: input.MFCInputFile = None, history: typing.Set[str
     case = case or input.load(ARG("input"), ARG("--"), {})
     case.validate_params()
 
+    if ARG("deps_only", False):
+        all_deps = set()
+        for t in targets:
+            resolved = get_target(t)
+            for dep in resolved.requires.compute():
+                all_deps.add(dep)
+
+        if len(history) == 0:
+            cons.print(f"[bold]Fetch Dependencies | {format_list_to_string([d.name for d in all_deps], 'magenta', 'None')}[/bold]")
+            cons.print(no_indent=True)
+
+        if not all_deps:
+            cons.print("[yellow]No dependencies to build for the requested targets.[/yellow]")
+            return
+
+        for dep in all_deps:
+            __build_target(dep, case, history)
+
+        return
+
     if len(history) == 0:
         cons.print(__generate_header(case, targets))
         cons.print(no_indent=True)
diff --git a/toolchain/mfc/cli/commands.py b/toolchain/mfc/cli/commands.py
index 85aab95031..e98003aa74 100644
--- a/toolchain/mfc/cli/commands.py
+++ b/toolchain/mfc/cli/commands.py
@@ -134,6 +134,13 @@
             default=False,
             dest="case_optimization",
         ),
+        Argument(
+            name="deps-only",
+            help="Only fetch and build dependencies, do not build MFC targets.",
+            action=ArgAction.STORE_TRUE,
+            default=False,
+            dest="deps_only",
+        ),
     ],
     examples=[
         Example("./mfc.sh build", "Build all default targets (CPU)"),

From 8d39c7b982187b00305bd1996455d2abc83b45f0 Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Wed, 25 Mar 2026 13:50:45 -0400
Subject: [PATCH 2/5] Fix --deps-only recursive build: only gate top-level call

When __build_target recursively called build() to resolve sub-deps
(e.g. SILO depends on HDF5), the --deps-only guard intercepted
the recursive call and only built deps-of-deps (none for HDF5),
never building HDF5 itself. This caused SILO's configure to fail
with 'HDF5 was not found'.

Fix: only enter the --deps-only path on the top-level call
(history is empty), letting recursive calls proceed normally.
---
 toolchain/mfc/build.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/toolchain/mfc/build.py b/toolchain/mfc/build.py
index 6173fbfb0e..9fed43c271 100644
--- a/toolchain/mfc/build.py
+++ b/toolchain/mfc/build.py
@@ -600,16 +600,15 @@ def build(targets=None, case: input.MFCInputFile = None, history: typing.Set[str
     case = case or input.load(ARG("input"), ARG("--"), {})
     case.validate_params()
 
-    if ARG("deps_only", False):
+    if ARG("deps_only", False) and len(history) == 0:
         all_deps = set()
         for t in targets:
             resolved = get_target(t)
             for dep in resolved.requires.compute():
                 all_deps.add(dep)
 
-        if len(history) == 0:
-            cons.print(f"[bold]Fetch Dependencies | {format_list_to_string([d.name for d in all_deps], 'magenta', 'None')}[/bold]")
-            cons.print(no_indent=True)
+        cons.print(f"[bold]Fetch Dependencies | {format_list_to_string([d.name for d in all_deps], 'magenta', 'None')}[/bold]")
+        cons.print(no_indent=True)
 
         if not all_deps:
             cons.print("[yellow]No dependencies to build for the requested targets.[/yellow]")

From 7317ad54a65048c24f86f54c0082940b6e2e202f Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Wed, 25 Mar 2026 15:10:19 -0400
Subject: [PATCH 3/5] ci: move case-optimization source builds to compute nodes
 for Frontier

Frontier/Frontier AMD now follow the same deps-on-login, source-on-compute
pattern for case optimization tests. Previously, prebuild-case-optimization.sh
built deps + source on the login node. Now:
- Login node: build.sh fetches deps via --deps-only
- Compute node: run_case_optimization.sh builds case-optimized binaries
  then runs them

Phoenix is unchanged (prebuild + run both in SLURM).
---
 .github/scripts/run_case_optimization.sh | 19 ++++++++++++-------
 .github/workflows/test.yml               |  9 +++++++--
 2 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/.github/scripts/run_case_optimization.sh b/.github/scripts/run_case_optimization.sh
index 21b6ff0b6f..edc01dd6c9 100755
--- a/.github/scripts/run_case_optimization.sh
+++ b/.github/scripts/run_case_optimization.sh
@@ -13,13 +13,6 @@ if [ "$job_device" = "gpu" ] && [ "$ngpus" -eq 0 ]; then
     ngpus=1
 fi
 
-# Verify the venv Python interpreter exists (created by ./mfc.sh build)
-if [ ! -x build/venv/bin/python3 ]; then
-    echo "ERROR: build/venv/bin/python3 not found."
-    echo "The MFC build venv may not have been created. Was the pre-build step successful?"
-    exit 1
-fi
-
 benchmarks=(
     benchmarks/5eq_rk3_weno3_hllc/case.py
     benchmarks/viscous_weno5_sgb_acoustic/case.py
@@ -28,6 +21,18 @@ benchmarks=(
     benchmarks/igr/case.py
 )
 
+# For Frontier/Frontier AMD: deps were fetched on the login node via --deps-only;
+# build case-optimized binaries here on the compute node before running.
+# For Phoenix: prebuild-case-optimization.sh already built everything in a prior SLURM job.
+if [ "$job_cluster" != "phoenix" ]; then
+    echo "=== Building case-optimized binaries on compute node ==="
+    for case in "${benchmarks[@]}"; do
+        echo "--- Building: $case ---"
+        ./mfc.sh build -i "$case" --case-optimization $gpu_opts -j 8
+    done
+    echo "=== All case-optimized binaries built ==="
+fi
+
 passed=0
 failed=0
 failed_cases=""
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index c13e5c22e3..d40a44f04f 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -486,15 +486,20 @@ jobs:
       - name: Clean stale output files
         run:  rm -f *.out
 
+      - name: Fetch Dependencies
+        if:   matrix.cluster != 'phoenix'
+        run:  bash .github/workflows/${{ matrix.cluster }}/build.sh ${{ matrix.device }} ${{ matrix.interface }}
+
       - name: Pre-Build (SLURM)
         if:   matrix.cluster == 'phoenix'
         run:  bash .github/scripts/submit-slurm-job.sh .github/scripts/prebuild-case-optimization.sh cpu ${{ matrix.interface }} ${{ matrix.cluster }}
 
-      - name: Pre-Build (login node)
+      - name: Build & Run Case-Optimization Tests
         if:   matrix.cluster != 'phoenix'
-        run:  bash .github/scripts/prebuild-case-optimization.sh ${{ matrix.cluster }} ${{ matrix.device }} ${{ matrix.interface }}
+        run:  bash .github/scripts/submit-slurm-job.sh .github/scripts/run_case_optimization.sh ${{ matrix.device }} ${{ matrix.interface }} ${{ matrix.cluster }}
 
       - name: Run Case-Optimization Tests
+        if:   matrix.cluster == 'phoenix'
         run:  bash .github/scripts/submit-slurm-job.sh .github/scripts/run_case_optimization.sh ${{ matrix.device }} ${{ matrix.interface }} ${{ matrix.cluster }}
 
       - name: Cancel SLURM Jobs

From 750b1fd8d81dbebf212bdb7acb7ebeb18e0c37a5 Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Wed, 25 Mar 2026 18:52:55 -0400
Subject: [PATCH 4/5] ci: use -j 1 for Frontier Cray builds to work around CCE
 19.0.0 IPA SIGSEGV

---
 .github/scripts/run_case_optimization.sh | 8 +++++++-
 .github/workflows/common/bench.sh        | 5 +++++
 .github/workflows/common/test.sh         | 8 +++++++-
 3 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/.github/scripts/run_case_optimization.sh b/.github/scripts/run_case_optimization.sh
index edc01dd6c9..bd54aedc05 100755
--- a/.github/scripts/run_case_optimization.sh
+++ b/.github/scripts/run_case_optimization.sh
@@ -24,11 +24,17 @@ benchmarks=(
 # For Frontier/Frontier AMD: deps were fetched on the login node via --deps-only;
 # build case-optimized binaries here on the compute node before running.
 # For Phoenix: prebuild-case-optimization.sh already built everything in a prior SLURM job.
+# Frontier Cray: -j 1 to work around CCE 19.0.0 IPA SIGSEGV
+build_jobs=8
+if [ "$job_cluster" = "frontier" ]; then
+    build_jobs=1
+fi
+
 if [ "$job_cluster" != "phoenix" ]; then
     echo "=== Building case-optimized binaries on compute node ==="
     for case in "${benchmarks[@]}"; do
         echo "--- Building: $case ---"
-        ./mfc.sh build -i "$case" --case-optimization $gpu_opts -j 8
+        ./mfc.sh build -i "$case" --case-optimization $gpu_opts -j $build_jobs
     done
     echo "=== All case-optimized binaries built ==="
 fi
diff --git a/.github/workflows/common/bench.sh b/.github/workflows/common/bench.sh
index be83e57b87..9522e3a043 100644
--- a/.github/workflows/common/bench.sh
+++ b/.github/workflows/common/bench.sh
@@ -31,6 +31,11 @@ if [ "$job_cluster" = "phoenix" ]; then
     clean_build
 fi
 
+# Frontier Cray: -j 1 to work around CCE 19.0.0 IPA SIGSEGV
+if [ "$job_cluster" = "frontier" ]; then
+    n_jobs=1
+fi
+
 source .github/scripts/retry-build.sh
 retry_build ./mfc.sh build -j $n_jobs $build_opts || exit 1
 
diff --git a/.github/workflows/common/test.sh b/.github/workflows/common/test.sh
index 9eb116e183..2733235549 100644
--- a/.github/workflows/common/test.sh
+++ b/.github/workflows/common/test.sh
@@ -41,8 +41,14 @@ if [ "$job_cluster" = "phoenix" ]; then
     validate_cmd='syscheck_bin=$(find build/install -name syscheck -type f 2>/dev/null | head -1); [ -z "$syscheck_bin" ] || "$syscheck_bin" > /dev/null 2>&1'
 fi
 
+# Frontier Cray: -j 1 to work around CCE 19.0.0 IPA SIGSEGV
+build_jobs=8
+if [ "$job_cluster" = "frontier" ]; then
+    build_jobs=1
+fi
+
 RETRY_VALIDATE_CMD="$validate_cmd" \
-    retry_build ./mfc.sh test -v --dry-run -j 8 $build_opts || exit 1
+    retry_build ./mfc.sh test -v --dry-run -j $build_jobs $build_opts || exit 1
 
 # --- GPU detection and thread count ---
 device_opts=""

From 3adf7dc89f11d6b99242eb6fbb305d48811280ca Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Thu, 26 Mar 2026 10:30:46 -0400
Subject: [PATCH 5/5] ci: suppress Cray warnings 990 and 7208

---
 CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index fb77271a37..04c51eafc3 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -176,7 +176,7 @@ if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU")
     endif()
 elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "Cray")
     add_compile_options(
-        "SHELL:-M 296,878,1391,1069,5025"
+        "SHELL:-M 296,878,1391,1069,990,5025,7208"
         "SHELL:-h static" "SHELL:-h keepfiles"
         "SHELL:-h acc_model=auto_async_none"
         "SHELL: -h acc_model=no_fast_addr"