From 313f2394ef122f22e7722ef033f8e4c7db0c6ea1 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 20 Mar 2026 13:16:03 +0000 Subject: [PATCH 1/5] ci: replace global RUST_TEST_THREADS=1 with per-crate --test-threads=1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On musl, fork() is unsafe when other threads exist — even threads sleeping on a futex leave musl internal state that the child inherits in an inconsistent form, causing SIGSEGV between fork and exec. A process-wide mutex (PTY_LOCK) cannot fix this because the test harness still spawns multiple threads (one per test). The real fix is --test-threads=1 for PTY test binaries, which ensures only one thread exists during fork. Split `cargo test` into: - Non-PTY tests: run in parallel (default thread count) - PTY tests (pty_terminal, pty_terminal_test, vite_task_bin): run with --test-threads=1 This removes the global RUST_TEST_THREADS=1 that forced ALL tests to run sequentially, improving CI speed for non-PTY tests. https://claude.ai/code/session_011H8UR3gS6hoyQAf2x7Dfw8 --- .github/workflows/ci.yml | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 41aaa1b3..ad67590c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -149,9 +149,6 @@ jobs: # -crt-static: vite-task is shipped as a NAPI module in vite+, and musl Node # with native modules links to musl libc dynamically, so we must do the same. RUSTFLAGS: --cfg tokio_unstable -D warnings -C target-feature=-crt-static - # On musl, concurrent PTY operations can trigger SIGSEGV in musl internals. - # Run test threads sequentially to avoid the race. - RUST_TEST_THREADS: 1 steps: - name: Install Alpine dependencies shell: sh {0} @@ -175,7 +172,15 @@ jobs: corepack enable pnpm install - - run: cargo test + # Run non-PTY tests in parallel (default thread count). + - run: cargo test --workspace --exclude pty_terminal --exclude pty_terminal_test --exclude vite_task_bin + + # PTY tests must run single-threaded: musl's fork() is not safe when + # other threads exist (even sleeping on a futex), because musl internal + # state inherited by the child can be inconsistent. + - run: cargo test -p pty_terminal -- --test-threads=1 + - run: cargo test -p pty_terminal_test -- --test-threads=1 + - run: cargo test -p vite_task_bin -- --test-threads=1 fmt: name: Format and Check Deps From dfc62775fc88eeeb7a165893423aa498a9c10d4b Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 20 Mar 2026 14:14:37 +0000 Subject: [PATCH 2/5] fix: use posix_spawn on musl to avoid fork() SIGSEGV in parallel tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On musl libc, fork() in a multi-threaded process is unsafe because musl internal state (locks, allocator metadata) in other threads can be left inconsistent in the child, causing SIGSEGV/SIGBUS. portable_pty uses pre_exec() for PTY setup, which forces Rust's Command to use fork()+exec() instead of posix_spawn(). This commit bypasses portable_pty's spawn_command() on musl and uses posix_spawn() directly, which musl implements via clone(CLONE_VM|CLONE_VFORK) — avoiding fork() entirely. PTY setup is handled via spawn attributes and file actions: - POSIX_SPAWN_SETSID for session leadership (replaces setsid()) - addopen() on the slave TTY path sets the controlling terminal - SETSIGDEF/SETSIGMASK for signal cleanup (replaces manual signal reset) This allows all tests (including PTY tests) to run in parallel on musl, removing the need for per-crate --test-threads=1. https://claude.ai/code/session_011H8UR3gS6hoyQAf2x7Dfw8 --- .github/workflows/ci.yml | 10 +- Cargo.lock | 1 + crates/pty_terminal/Cargo.toml | 3 + crates/pty_terminal/src/lib.rs | 2 + crates/pty_terminal/src/musl_spawn.rs | 226 ++++++++++++++++++++++++++ crates/pty_terminal/src/terminal.rs | 111 ++++++++----- 6 files changed, 306 insertions(+), 47 deletions(-) create mode 100644 crates/pty_terminal/src/musl_spawn.rs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ad67590c..0e21551c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -172,15 +172,7 @@ jobs: corepack enable pnpm install - # Run non-PTY tests in parallel (default thread count). - - run: cargo test --workspace --exclude pty_terminal --exclude pty_terminal_test --exclude vite_task_bin - - # PTY tests must run single-threaded: musl's fork() is not safe when - # other threads exist (even sleeping on a futex), because musl internal - # state inherited by the child can be inconsistent. - - run: cargo test -p pty_terminal -- --test-threads=1 - - run: cargo test -p pty_terminal_test -- --test-threads=1 - - run: cargo test -p vite_task_bin -- --test-threads=1 + - run: cargo test fmt: name: Format and Check Deps diff --git a/Cargo.lock b/Cargo.lock index 905cd6b9..9de6086b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2527,6 +2527,7 @@ dependencies = [ "anyhow", "ctor", "ctrlc", + "libc", "nix 0.30.1", "ntest", "portable-pty", diff --git a/crates/pty_terminal/Cargo.toml b/crates/pty_terminal/Cargo.toml index 52c800e2..4d67391e 100644 --- a/crates/pty_terminal/Cargo.toml +++ b/crates/pty_terminal/Cargo.toml @@ -12,6 +12,9 @@ anyhow = { workspace = true } portable-pty = { workspace = true } vt100 = { workspace = true } +[target.'cfg(target_env = "musl")'.dependencies] +libc = { workspace = true } + [dev-dependencies] ctor = { workspace = true } ctrlc = { workspace = true } diff --git a/crates/pty_terminal/src/lib.rs b/crates/pty_terminal/src/lib.rs index 07fe7a20..f70ac001 100644 --- a/crates/pty_terminal/src/lib.rs +++ b/crates/pty_terminal/src/lib.rs @@ -1,4 +1,6 @@ pub mod geo; +#[cfg(target_env = "musl")] +mod musl_spawn; pub mod terminal; pub use portable_pty::ExitStatus; diff --git a/crates/pty_terminal/src/musl_spawn.rs b/crates/pty_terminal/src/musl_spawn.rs new file mode 100644 index 00000000..2090771e --- /dev/null +++ b/crates/pty_terminal/src/musl_spawn.rs @@ -0,0 +1,226 @@ +//! musl-specific child process spawning using `posix_spawn()`. +//! +//! On musl libc, `fork()` in a multi-threaded process is unsafe: musl internal +//! state (locks, allocator metadata) in other threads can be left inconsistent +//! in the child, causing SIGSEGV/SIGBUS. Rust's `std::process::Command` falls +//! back to `fork()+exec()` whenever a `pre_exec` closure is set, and +//! `portable_pty` always sets one for PTY setup (setsid, TIOCSCTTY, close FDs, +//! signal reset). +//! +//! musl's `posix_spawn()` implementation uses `clone(CLONE_VM | CLONE_VFORK)` +//! instead of `fork()`, which avoids duplicating parent state entirely. This +//! module reimplements the child spawn using `posix_spawn()` directly, handling +//! all PTY setup via spawn attributes and file actions: +//! +//! - `POSIX_SPAWN_SETSID`: makes the child a session leader (replaces `setsid()`) +//! - `posix_spawn_file_actions_addopen()`: opens the slave TTY as fd 0, which +//! also sets it as the controlling terminal (because the child is a session +//! leader without a controlling terminal) +//! - `posix_spawn_file_actions_adddup2()`: copies fd 0 to fd 1 and fd 2 +//! - `POSIX_SPAWN_SETSIGDEF` + `POSIX_SPAWN_SETSIGMASK`: resets signal +//! dispositions and unblocks all signals + +use std::{ + ffi::{CString, OsStr}, + io, mem, + os::unix::ffi::OsStrExt, + path::Path, + ptr, +}; + +use portable_pty::{ChildKiller, CommandBuilder, ExitStatus}; + +/// A child process spawned via `posix_spawn()`. +pub struct PosixSpawnChild { + pid: libc::pid_t, +} + +/// A cloneable handle for killing a `posix_spawn`'d child. +#[derive(Debug)] +struct PosixSpawnChildKiller { + pid: libc::pid_t, +} + +impl ChildKiller for PosixSpawnChildKiller { + fn kill(&mut self) -> io::Result<()> { + // SAFETY: Sending SIGHUP to a valid PID. If the process already exited, + // the kernel returns ESRCH which we surface as an error. + let result = unsafe { libc::kill(self.pid, libc::SIGHUP) }; + if result != 0 { + return Err(io::Error::last_os_error()); + } + Ok(()) + } + + fn clone_killer(&self) -> Box { + Box::new(Self { pid: self.pid }) + } +} + +impl PosixSpawnChild { + pub fn clone_killer(&self) -> Box { + Box::new(PosixSpawnChildKiller { pid: self.pid }) + } + + /// Blocks until the child exits and returns its exit status. + pub fn wait(&mut self) -> io::Result { + let mut status: libc::c_int = 0; + loop { + // SAFETY: Calling waitpid with a valid PID and status pointer. + let result = unsafe { libc::waitpid(self.pid, &raw mut status, 0) }; + if result == -1 { + let err = io::Error::last_os_error(); + if err.kind() == io::ErrorKind::Interrupted { + continue; + } + return Err(err); + } + break; + } + + if libc::WIFEXITED(status) { + let code = libc::WEXITSTATUS(status); + Ok(ExitStatus::with_exit_code(code.cast_unsigned())) + } else if libc::WIFSIGNALED(status) { + let sig = libc::WTERMSIG(status); + // SAFETY: strsignal returns a valid C string for known signals or NULL. + let signame = unsafe { + let s = libc::strsignal(sig); + if s.is_null() { + format!("Signal {sig}") + } else { + std::ffi::CStr::from_ptr(s).to_string_lossy().into_owned() + } + }; + Ok(ExitStatus::with_signal(&signame)) + } else { + Ok(ExitStatus::with_exit_code(1)) + } + } +} + +/// Spawns a child process into the PTY slave using `posix_spawn()`. +/// +/// # Errors +/// +/// Returns an error if `posix_spawn()` or any setup step fails. +pub fn spawn_child_posix( + slave_tty_path: &Path, + cmd: &CommandBuilder, +) -> anyhow::Result { + let argv = cmd.get_argv(); + anyhow::ensure!(!argv.is_empty(), "empty argv"); + + let program = to_cstring(&argv[0])?; + let arg_cstrings: Vec = + argv.iter().map(|a| to_cstring(a)).collect::>()?; + let mut c_argv: Vec<*mut libc::c_char> = + arg_cstrings.iter().map(|a| a.as_ptr().cast_mut()).collect(); + c_argv.push(ptr::null_mut()); + + let env_strings: Vec = cmd + .iter_full_env_as_str() + .map(|(k, v)| CString::new(format!("{k}={v}"))) + .collect::>()?; + let mut c_envp: Vec<*mut libc::c_char> = + env_strings.iter().map(|e| e.as_ptr().cast_mut()).collect(); + c_envp.push(ptr::null_mut()); + + let slave_path = to_cstring(slave_tty_path.as_os_str())?; + let cwd_cstring = cmd.get_cwd().map(|cwd| to_cstring(cwd)).transpose()?; + + // SAFETY: All `posix_spawn_*` calls below operate on stack-allocated, + // properly initialized structures with valid pointers. The `CString` vectors + // (`arg_cstrings`, `env_strings`, `slave_path`, `cwd_cstring`) are kept alive + // for the duration of this block, ensuring all raw pointers remain valid. + // `mem::zeroed()` produces valid initial state for `posix_spawn_file_actions_t` + // and `posix_spawnattr_t` before their respective `_init()` calls. + // `sigfillset`/`sigemptyset` operate on zeroed `sigset_t` which is valid. + // `posix_spawnp` is called with properly null-terminated argv/envp arrays. + #[expect(clippy::cast_possible_truncation, reason = "spawn flags fit in c_short")] + unsafe { + let mut file_actions: libc::posix_spawn_file_actions_t = mem::zeroed(); + let mut file_actions_initialized = false; + let mut attr: libc::posix_spawnattr_t = mem::zeroed(); + let mut attr_initialized = false; + + let result = (|| -> anyhow::Result { + check_posix(libc::posix_spawn_file_actions_init(&raw mut file_actions))?; + file_actions_initialized = true; + + // Open slave TTY as fd 0 (stdin). Because the child is a session + // leader (POSIX_SPAWN_SETSID) with no controlling terminal, this + // open() automatically sets the slave TTY as the controlling terminal. + check_posix(libc::posix_spawn_file_actions_addopen( + &raw mut file_actions, + 0, + slave_path.as_ptr(), + libc::O_RDWR, + 0, + ))?; + + // dup2 stdin (now the slave TTY) to stdout and stderr. + check_posix(libc::posix_spawn_file_actions_adddup2(&raw mut file_actions, 0, 1))?; + check_posix(libc::posix_spawn_file_actions_adddup2(&raw mut file_actions, 0, 2))?; + + if let Some(ref cwd) = cwd_cstring { + check_posix(libc::posix_spawn_file_actions_addchdir_np( + &raw mut file_actions, + cwd.as_ptr(), + ))?; + } + + check_posix(libc::posix_spawnattr_init(&raw mut attr))?; + attr_initialized = true; + + // Create new session + reset signal dispositions + clear signal mask. + let flags: libc::c_short = (libc::POSIX_SPAWN_SETSID + | libc::POSIX_SPAWN_SETSIGDEF + | libc::POSIX_SPAWN_SETSIGMASK) + as libc::c_short; + check_posix(libc::posix_spawnattr_setflags(&raw mut attr, flags))?; + + // Reset ALL signal dispositions to default. + let mut all_signals: libc::sigset_t = mem::zeroed(); + libc::sigfillset(&raw mut all_signals); + check_posix(libc::posix_spawnattr_setsigdefault( + &raw mut attr, + &raw const all_signals, + ))?; + + // Unblock all signals. + let mut empty_mask: libc::sigset_t = mem::zeroed(); + libc::sigemptyset(&raw mut empty_mask); + check_posix(libc::posix_spawnattr_setsigmask(&raw mut attr, &raw const empty_mask))?; + + let mut pid: libc::pid_t = 0; + check_posix(libc::posix_spawnp( + &raw mut pid, + program.as_ptr(), + &raw const file_actions, + &raw const attr, + c_argv.as_ptr(), + c_envp.as_ptr(), + ))?; + + Ok(PosixSpawnChild { pid }) + })(); + + if file_actions_initialized { + libc::posix_spawn_file_actions_destroy(&raw mut file_actions); + } + if attr_initialized { + libc::posix_spawnattr_destroy(&raw mut attr); + } + + result + } +} + +fn to_cstring(s: &OsStr) -> anyhow::Result { + CString::new(s.as_bytes()).map_err(|e| anyhow::anyhow!("nul byte in argument: {e}")) +} + +fn check_posix(ret: libc::c_int) -> anyhow::Result<()> { + if ret != 0 { Err(io::Error::from_raw_os_error(ret).into()) } else { Ok(()) } +} diff --git a/crates/pty_terminal/src/terminal.rs b/crates/pty_terminal/src/terminal.rs index b77673bb..eacc7636 100644 --- a/crates/pty_terminal/src/terminal.rs +++ b/crates/pty_terminal/src/terminal.rs @@ -256,15 +256,6 @@ impl Terminal { /// /// Panics if the writer lock is poisoned when the background thread closes it. pub fn spawn(size: ScreenSize, cmd: CommandBuilder) -> anyhow::Result { - // On musl libc (Alpine Linux), concurrent PTY operations trigger - // SIGSEGV/SIGBUS in musl internals (sysconf, fcntl). This affects - // both openpty+fork and FD cleanup (close) from background threads. - // Serialize all PTY lifecycle operations that touch musl internals. - #[cfg(target_env = "musl")] - static PTY_LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(()); - #[cfg(target_env = "musl")] - let _spawn_guard = PTY_LOCK.lock().unwrap_or_else(|e| e.into_inner()); - let pty_pair = portable_pty::native_pty_system().openpty(portable_pty::PtySize { rows: size.rows, cols: size.cols, @@ -275,35 +266,79 @@ impl Terminal { let reader = pty_pair.master.try_clone_reader()?; let writer: Arc>>> = Arc::new(Mutex::new(Some(pty_pair.master.take_writer()?))); - let mut child = pty_pair.slave.spawn_command(cmd)?; - let child_killer = child.clone_killer(); - let master = pty_pair.master; - let exit_status: Arc> = Arc::new(OnceLock::new()); - - // Background thread: wait for child to exit, then clean up. - // - // The slave is kept alive until after `child.wait()` returns rather than - // being dropped immediately after spawn. On macOS, if the parent's slave - // fd is closed early (before spawn) and the child exits quickly, ALL - // slave references close before the reader issues its first `read()`. - // macOS then returns EIO on the master without draining the output buffer, - // causing data loss. Holding the slave until the background thread takes - // over guarantees the PTY stays connected while the child runs. - thread::spawn({ - let writer = Arc::clone(&writer); - let exit_status = Arc::clone(&exit_status); - let slave = pty_pair.slave; - move || { - let _ = exit_status.set(child.wait().map_err(Arc::new)); - // On musl, serialize FD cleanup (close) with PTY spawn to - // prevent racing on musl-internal state. - #[cfg(target_env = "musl")] - let _cleanup_guard = PTY_LOCK.lock().unwrap_or_else(|e| e.into_inner()); - // Close writer first, then drop slave to trigger EOF on the reader. - *writer.lock().unwrap() = None; - drop(slave); - } - }); + + #[cfg(target_env = "musl")] + let (child_killer, exit_status, master) = { + // On musl libc (Alpine Linux), fork() in a multi-threaded process is + // unsafe: musl internal state (locks, allocator metadata) inherited by + // the child can be inconsistent, causing SIGSEGV/SIGBUS. + // + // portable_pty's spawn_command() uses pre_exec() for PTY setup, which + // forces Rust's Command to use fork()+exec() instead of posix_spawn(). + // + // We bypass portable_pty's spawn and use posix_spawn() directly, which + // musl implements via clone(CLONE_VM|CLONE_VFORK) — avoiding fork() + // entirely. PTY setup is handled via spawn attributes and file actions: + // - POSIX_SPAWN_SETSID for session leadership + // - addopen() on the slave TTY path sets the controlling terminal + // - SETSIGDEF/SETSIGMASK for signal cleanup + let slave_tty_path = pty_pair + .master + .tty_name() + .ok_or_else(|| anyhow::anyhow!("failed to get slave TTY name"))?; + let mut child = crate::musl_spawn::spawn_child_posix(&slave_tty_path, &cmd)?; + let child_killer = child.clone_killer(); + let master = pty_pair.master; + let exit_status: Arc> = Arc::new(OnceLock::new()); + + // Drop the parent's reference to the slave. The child opened its own + // reference via posix_spawn file actions, and we only need the master. + // (The macOS slave-lifetime issue doesn't apply to Linux/musl.) + drop(pty_pair.slave); + + // Background thread: wait for child to exit, then clean up. + thread::spawn({ + let writer = Arc::clone(&writer); + let exit_status = Arc::clone(&exit_status); + move || { + let _ = exit_status.set(child.wait().map_err(Arc::new)); + *writer.lock().unwrap() = None; + } + }); + + (child_killer, exit_status, master) + }; + + #[cfg(not(target_env = "musl"))] + let (child_killer, exit_status, master) = { + let mut child = pty_pair.slave.spawn_command(cmd)?; + let child_killer = child.clone_killer(); + let master = pty_pair.master; + let exit_status: Arc> = Arc::new(OnceLock::new()); + + // Background thread: wait for child to exit, then clean up. + // + // The slave is kept alive until after `child.wait()` returns rather than + // being dropped immediately after spawn. On macOS, if the parent's slave + // fd is closed early (before spawn) and the child exits quickly, ALL + // slave references close before the reader issues its first `read()`. + // macOS then returns EIO on the master without draining the output buffer, + // causing data loss. Holding the slave until the background thread takes + // over guarantees the PTY stays connected while the child runs. + thread::spawn({ + let writer = Arc::clone(&writer); + let exit_status = Arc::clone(&exit_status); + let slave = pty_pair.slave; + move || { + let _ = exit_status.set(child.wait().map_err(Arc::new)); + // Close writer first, then drop slave to trigger EOF on the reader. + *writer.lock().unwrap() = None; + drop(slave); + } + }); + + (child_killer, exit_status, master) + }; let parser = Arc::new(Mutex::new(vt100::Parser::new_with_callbacks( size.rows, From cb567c8df9cee7630889adbd0cf2e69be4d3599a Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 20 Mar 2026 14:31:38 +0000 Subject: [PATCH 3/5] Revert "fix: use posix_spawn on musl to avoid fork() SIGSEGV in parallel tests" This reverts commit 4dd37ba607aa4ffdd38de2653fbd26d96a32c18d. --- .github/workflows/ci.yml | 10 +- Cargo.lock | 1 - crates/pty_terminal/Cargo.toml | 3 - crates/pty_terminal/src/lib.rs | 2 - crates/pty_terminal/src/musl_spawn.rs | 226 -------------------------- crates/pty_terminal/src/terminal.rs | 111 +++++-------- 6 files changed, 47 insertions(+), 306 deletions(-) delete mode 100644 crates/pty_terminal/src/musl_spawn.rs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0e21551c..ad67590c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -172,7 +172,15 @@ jobs: corepack enable pnpm install - - run: cargo test + # Run non-PTY tests in parallel (default thread count). + - run: cargo test --workspace --exclude pty_terminal --exclude pty_terminal_test --exclude vite_task_bin + + # PTY tests must run single-threaded: musl's fork() is not safe when + # other threads exist (even sleeping on a futex), because musl internal + # state inherited by the child can be inconsistent. + - run: cargo test -p pty_terminal -- --test-threads=1 + - run: cargo test -p pty_terminal_test -- --test-threads=1 + - run: cargo test -p vite_task_bin -- --test-threads=1 fmt: name: Format and Check Deps diff --git a/Cargo.lock b/Cargo.lock index 9de6086b..905cd6b9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2527,7 +2527,6 @@ dependencies = [ "anyhow", "ctor", "ctrlc", - "libc", "nix 0.30.1", "ntest", "portable-pty", diff --git a/crates/pty_terminal/Cargo.toml b/crates/pty_terminal/Cargo.toml index 4d67391e..52c800e2 100644 --- a/crates/pty_terminal/Cargo.toml +++ b/crates/pty_terminal/Cargo.toml @@ -12,9 +12,6 @@ anyhow = { workspace = true } portable-pty = { workspace = true } vt100 = { workspace = true } -[target.'cfg(target_env = "musl")'.dependencies] -libc = { workspace = true } - [dev-dependencies] ctor = { workspace = true } ctrlc = { workspace = true } diff --git a/crates/pty_terminal/src/lib.rs b/crates/pty_terminal/src/lib.rs index f70ac001..07fe7a20 100644 --- a/crates/pty_terminal/src/lib.rs +++ b/crates/pty_terminal/src/lib.rs @@ -1,6 +1,4 @@ pub mod geo; -#[cfg(target_env = "musl")] -mod musl_spawn; pub mod terminal; pub use portable_pty::ExitStatus; diff --git a/crates/pty_terminal/src/musl_spawn.rs b/crates/pty_terminal/src/musl_spawn.rs deleted file mode 100644 index 2090771e..00000000 --- a/crates/pty_terminal/src/musl_spawn.rs +++ /dev/null @@ -1,226 +0,0 @@ -//! musl-specific child process spawning using `posix_spawn()`. -//! -//! On musl libc, `fork()` in a multi-threaded process is unsafe: musl internal -//! state (locks, allocator metadata) in other threads can be left inconsistent -//! in the child, causing SIGSEGV/SIGBUS. Rust's `std::process::Command` falls -//! back to `fork()+exec()` whenever a `pre_exec` closure is set, and -//! `portable_pty` always sets one for PTY setup (setsid, TIOCSCTTY, close FDs, -//! signal reset). -//! -//! musl's `posix_spawn()` implementation uses `clone(CLONE_VM | CLONE_VFORK)` -//! instead of `fork()`, which avoids duplicating parent state entirely. This -//! module reimplements the child spawn using `posix_spawn()` directly, handling -//! all PTY setup via spawn attributes and file actions: -//! -//! - `POSIX_SPAWN_SETSID`: makes the child a session leader (replaces `setsid()`) -//! - `posix_spawn_file_actions_addopen()`: opens the slave TTY as fd 0, which -//! also sets it as the controlling terminal (because the child is a session -//! leader without a controlling terminal) -//! - `posix_spawn_file_actions_adddup2()`: copies fd 0 to fd 1 and fd 2 -//! - `POSIX_SPAWN_SETSIGDEF` + `POSIX_SPAWN_SETSIGMASK`: resets signal -//! dispositions and unblocks all signals - -use std::{ - ffi::{CString, OsStr}, - io, mem, - os::unix::ffi::OsStrExt, - path::Path, - ptr, -}; - -use portable_pty::{ChildKiller, CommandBuilder, ExitStatus}; - -/// A child process spawned via `posix_spawn()`. -pub struct PosixSpawnChild { - pid: libc::pid_t, -} - -/// A cloneable handle for killing a `posix_spawn`'d child. -#[derive(Debug)] -struct PosixSpawnChildKiller { - pid: libc::pid_t, -} - -impl ChildKiller for PosixSpawnChildKiller { - fn kill(&mut self) -> io::Result<()> { - // SAFETY: Sending SIGHUP to a valid PID. If the process already exited, - // the kernel returns ESRCH which we surface as an error. - let result = unsafe { libc::kill(self.pid, libc::SIGHUP) }; - if result != 0 { - return Err(io::Error::last_os_error()); - } - Ok(()) - } - - fn clone_killer(&self) -> Box { - Box::new(Self { pid: self.pid }) - } -} - -impl PosixSpawnChild { - pub fn clone_killer(&self) -> Box { - Box::new(PosixSpawnChildKiller { pid: self.pid }) - } - - /// Blocks until the child exits and returns its exit status. - pub fn wait(&mut self) -> io::Result { - let mut status: libc::c_int = 0; - loop { - // SAFETY: Calling waitpid with a valid PID and status pointer. - let result = unsafe { libc::waitpid(self.pid, &raw mut status, 0) }; - if result == -1 { - let err = io::Error::last_os_error(); - if err.kind() == io::ErrorKind::Interrupted { - continue; - } - return Err(err); - } - break; - } - - if libc::WIFEXITED(status) { - let code = libc::WEXITSTATUS(status); - Ok(ExitStatus::with_exit_code(code.cast_unsigned())) - } else if libc::WIFSIGNALED(status) { - let sig = libc::WTERMSIG(status); - // SAFETY: strsignal returns a valid C string for known signals or NULL. - let signame = unsafe { - let s = libc::strsignal(sig); - if s.is_null() { - format!("Signal {sig}") - } else { - std::ffi::CStr::from_ptr(s).to_string_lossy().into_owned() - } - }; - Ok(ExitStatus::with_signal(&signame)) - } else { - Ok(ExitStatus::with_exit_code(1)) - } - } -} - -/// Spawns a child process into the PTY slave using `posix_spawn()`. -/// -/// # Errors -/// -/// Returns an error if `posix_spawn()` or any setup step fails. -pub fn spawn_child_posix( - slave_tty_path: &Path, - cmd: &CommandBuilder, -) -> anyhow::Result { - let argv = cmd.get_argv(); - anyhow::ensure!(!argv.is_empty(), "empty argv"); - - let program = to_cstring(&argv[0])?; - let arg_cstrings: Vec = - argv.iter().map(|a| to_cstring(a)).collect::>()?; - let mut c_argv: Vec<*mut libc::c_char> = - arg_cstrings.iter().map(|a| a.as_ptr().cast_mut()).collect(); - c_argv.push(ptr::null_mut()); - - let env_strings: Vec = cmd - .iter_full_env_as_str() - .map(|(k, v)| CString::new(format!("{k}={v}"))) - .collect::>()?; - let mut c_envp: Vec<*mut libc::c_char> = - env_strings.iter().map(|e| e.as_ptr().cast_mut()).collect(); - c_envp.push(ptr::null_mut()); - - let slave_path = to_cstring(slave_tty_path.as_os_str())?; - let cwd_cstring = cmd.get_cwd().map(|cwd| to_cstring(cwd)).transpose()?; - - // SAFETY: All `posix_spawn_*` calls below operate on stack-allocated, - // properly initialized structures with valid pointers. The `CString` vectors - // (`arg_cstrings`, `env_strings`, `slave_path`, `cwd_cstring`) are kept alive - // for the duration of this block, ensuring all raw pointers remain valid. - // `mem::zeroed()` produces valid initial state for `posix_spawn_file_actions_t` - // and `posix_spawnattr_t` before their respective `_init()` calls. - // `sigfillset`/`sigemptyset` operate on zeroed `sigset_t` which is valid. - // `posix_spawnp` is called with properly null-terminated argv/envp arrays. - #[expect(clippy::cast_possible_truncation, reason = "spawn flags fit in c_short")] - unsafe { - let mut file_actions: libc::posix_spawn_file_actions_t = mem::zeroed(); - let mut file_actions_initialized = false; - let mut attr: libc::posix_spawnattr_t = mem::zeroed(); - let mut attr_initialized = false; - - let result = (|| -> anyhow::Result { - check_posix(libc::posix_spawn_file_actions_init(&raw mut file_actions))?; - file_actions_initialized = true; - - // Open slave TTY as fd 0 (stdin). Because the child is a session - // leader (POSIX_SPAWN_SETSID) with no controlling terminal, this - // open() automatically sets the slave TTY as the controlling terminal. - check_posix(libc::posix_spawn_file_actions_addopen( - &raw mut file_actions, - 0, - slave_path.as_ptr(), - libc::O_RDWR, - 0, - ))?; - - // dup2 stdin (now the slave TTY) to stdout and stderr. - check_posix(libc::posix_spawn_file_actions_adddup2(&raw mut file_actions, 0, 1))?; - check_posix(libc::posix_spawn_file_actions_adddup2(&raw mut file_actions, 0, 2))?; - - if let Some(ref cwd) = cwd_cstring { - check_posix(libc::posix_spawn_file_actions_addchdir_np( - &raw mut file_actions, - cwd.as_ptr(), - ))?; - } - - check_posix(libc::posix_spawnattr_init(&raw mut attr))?; - attr_initialized = true; - - // Create new session + reset signal dispositions + clear signal mask. - let flags: libc::c_short = (libc::POSIX_SPAWN_SETSID - | libc::POSIX_SPAWN_SETSIGDEF - | libc::POSIX_SPAWN_SETSIGMASK) - as libc::c_short; - check_posix(libc::posix_spawnattr_setflags(&raw mut attr, flags))?; - - // Reset ALL signal dispositions to default. - let mut all_signals: libc::sigset_t = mem::zeroed(); - libc::sigfillset(&raw mut all_signals); - check_posix(libc::posix_spawnattr_setsigdefault( - &raw mut attr, - &raw const all_signals, - ))?; - - // Unblock all signals. - let mut empty_mask: libc::sigset_t = mem::zeroed(); - libc::sigemptyset(&raw mut empty_mask); - check_posix(libc::posix_spawnattr_setsigmask(&raw mut attr, &raw const empty_mask))?; - - let mut pid: libc::pid_t = 0; - check_posix(libc::posix_spawnp( - &raw mut pid, - program.as_ptr(), - &raw const file_actions, - &raw const attr, - c_argv.as_ptr(), - c_envp.as_ptr(), - ))?; - - Ok(PosixSpawnChild { pid }) - })(); - - if file_actions_initialized { - libc::posix_spawn_file_actions_destroy(&raw mut file_actions); - } - if attr_initialized { - libc::posix_spawnattr_destroy(&raw mut attr); - } - - result - } -} - -fn to_cstring(s: &OsStr) -> anyhow::Result { - CString::new(s.as_bytes()).map_err(|e| anyhow::anyhow!("nul byte in argument: {e}")) -} - -fn check_posix(ret: libc::c_int) -> anyhow::Result<()> { - if ret != 0 { Err(io::Error::from_raw_os_error(ret).into()) } else { Ok(()) } -} diff --git a/crates/pty_terminal/src/terminal.rs b/crates/pty_terminal/src/terminal.rs index eacc7636..b77673bb 100644 --- a/crates/pty_terminal/src/terminal.rs +++ b/crates/pty_terminal/src/terminal.rs @@ -256,6 +256,15 @@ impl Terminal { /// /// Panics if the writer lock is poisoned when the background thread closes it. pub fn spawn(size: ScreenSize, cmd: CommandBuilder) -> anyhow::Result { + // On musl libc (Alpine Linux), concurrent PTY operations trigger + // SIGSEGV/SIGBUS in musl internals (sysconf, fcntl). This affects + // both openpty+fork and FD cleanup (close) from background threads. + // Serialize all PTY lifecycle operations that touch musl internals. + #[cfg(target_env = "musl")] + static PTY_LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(()); + #[cfg(target_env = "musl")] + let _spawn_guard = PTY_LOCK.lock().unwrap_or_else(|e| e.into_inner()); + let pty_pair = portable_pty::native_pty_system().openpty(portable_pty::PtySize { rows: size.rows, cols: size.cols, @@ -266,79 +275,35 @@ impl Terminal { let reader = pty_pair.master.try_clone_reader()?; let writer: Arc>>> = Arc::new(Mutex::new(Some(pty_pair.master.take_writer()?))); - - #[cfg(target_env = "musl")] - let (child_killer, exit_status, master) = { - // On musl libc (Alpine Linux), fork() in a multi-threaded process is - // unsafe: musl internal state (locks, allocator metadata) inherited by - // the child can be inconsistent, causing SIGSEGV/SIGBUS. - // - // portable_pty's spawn_command() uses pre_exec() for PTY setup, which - // forces Rust's Command to use fork()+exec() instead of posix_spawn(). - // - // We bypass portable_pty's spawn and use posix_spawn() directly, which - // musl implements via clone(CLONE_VM|CLONE_VFORK) — avoiding fork() - // entirely. PTY setup is handled via spawn attributes and file actions: - // - POSIX_SPAWN_SETSID for session leadership - // - addopen() on the slave TTY path sets the controlling terminal - // - SETSIGDEF/SETSIGMASK for signal cleanup - let slave_tty_path = pty_pair - .master - .tty_name() - .ok_or_else(|| anyhow::anyhow!("failed to get slave TTY name"))?; - let mut child = crate::musl_spawn::spawn_child_posix(&slave_tty_path, &cmd)?; - let child_killer = child.clone_killer(); - let master = pty_pair.master; - let exit_status: Arc> = Arc::new(OnceLock::new()); - - // Drop the parent's reference to the slave. The child opened its own - // reference via posix_spawn file actions, and we only need the master. - // (The macOS slave-lifetime issue doesn't apply to Linux/musl.) - drop(pty_pair.slave); - - // Background thread: wait for child to exit, then clean up. - thread::spawn({ - let writer = Arc::clone(&writer); - let exit_status = Arc::clone(&exit_status); - move || { - let _ = exit_status.set(child.wait().map_err(Arc::new)); - *writer.lock().unwrap() = None; - } - }); - - (child_killer, exit_status, master) - }; - - #[cfg(not(target_env = "musl"))] - let (child_killer, exit_status, master) = { - let mut child = pty_pair.slave.spawn_command(cmd)?; - let child_killer = child.clone_killer(); - let master = pty_pair.master; - let exit_status: Arc> = Arc::new(OnceLock::new()); - - // Background thread: wait for child to exit, then clean up. - // - // The slave is kept alive until after `child.wait()` returns rather than - // being dropped immediately after spawn. On macOS, if the parent's slave - // fd is closed early (before spawn) and the child exits quickly, ALL - // slave references close before the reader issues its first `read()`. - // macOS then returns EIO on the master without draining the output buffer, - // causing data loss. Holding the slave until the background thread takes - // over guarantees the PTY stays connected while the child runs. - thread::spawn({ - let writer = Arc::clone(&writer); - let exit_status = Arc::clone(&exit_status); - let slave = pty_pair.slave; - move || { - let _ = exit_status.set(child.wait().map_err(Arc::new)); - // Close writer first, then drop slave to trigger EOF on the reader. - *writer.lock().unwrap() = None; - drop(slave); - } - }); - - (child_killer, exit_status, master) - }; + let mut child = pty_pair.slave.spawn_command(cmd)?; + let child_killer = child.clone_killer(); + let master = pty_pair.master; + let exit_status: Arc> = Arc::new(OnceLock::new()); + + // Background thread: wait for child to exit, then clean up. + // + // The slave is kept alive until after `child.wait()` returns rather than + // being dropped immediately after spawn. On macOS, if the parent's slave + // fd is closed early (before spawn) and the child exits quickly, ALL + // slave references close before the reader issues its first `read()`. + // macOS then returns EIO on the master without draining the output buffer, + // causing data loss. Holding the slave until the background thread takes + // over guarantees the PTY stays connected while the child runs. + thread::spawn({ + let writer = Arc::clone(&writer); + let exit_status = Arc::clone(&exit_status); + let slave = pty_pair.slave; + move || { + let _ = exit_status.set(child.wait().map_err(Arc::new)); + // On musl, serialize FD cleanup (close) with PTY spawn to + // prevent racing on musl-internal state. + #[cfg(target_env = "musl")] + let _cleanup_guard = PTY_LOCK.lock().unwrap_or_else(|e| e.into_inner()); + // Close writer first, then drop slave to trigger EOF on the reader. + *writer.lock().unwrap() = None; + drop(slave); + } + }); let parser = Arc::new(Mutex::new(vt100::Parser::new_with_callbacks( size.rows, From 379b3d977afc08321c4cb56498af8dcc0c6e7912 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 20 Mar 2026 14:32:13 +0000 Subject: [PATCH 4/5] fix: use cargo-nextest on musl to run tests in parallel safely On musl libc, fork() in a multi-threaded process is unsafe because musl internal state (locks, allocator metadata) in other threads can be left inconsistent in the child, causing SIGSEGV/SIGBUS. cargo-nextest runs each test in its own process (not thread), so fork() happens from a mostly single-threaded context. Tests still execute in parallel across processes, maintaining fast CI times without SIGSEGV. This replaces the per-crate --test-threads=1 workaround which serialized all tests within each crate. https://claude.ai/code/session_011H8UR3gS6hoyQAf2x7Dfw8 --- .github/workflows/ci.yml | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ad67590c..fdb063a9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -172,15 +172,11 @@ jobs: corepack enable pnpm install - # Run non-PTY tests in parallel (default thread count). - - run: cargo test --workspace --exclude pty_terminal --exclude pty_terminal_test --exclude vite_task_bin - - # PTY tests must run single-threaded: musl's fork() is not safe when - # other threads exist (even sleeping on a futex), because musl internal - # state inherited by the child can be inconsistent. - - run: cargo test -p pty_terminal -- --test-threads=1 - - run: cargo test -p pty_terminal_test -- --test-threads=1 - - run: cargo test -p vite_task_bin -- --test-threads=1 + # Use cargo-nextest: it runs each test in its own process, avoiding + # musl's fork()-in-multithreaded-process SIGSEGV while keeping parallel + # execution (across processes instead of threads within one process). + - run: cargo install cargo-nextest --locked + - run: cargo nextest run fmt: name: Format and Check Deps From c0a3c55b58428fe536f168a97bcc7aa622501048 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 20 Mar 2026 14:46:47 +0000 Subject: [PATCH 5/5] fix: exclude custom-harness packages from nextest, run them with cargo test cargo-nextest can't discover tests in packages with `harness = false` (e2e_snapshots, plan_snapshots) because they output custom formats instead of the standard libtest listing. Split the musl test run: - nextest for standard-harness tests (parallel, per-process isolation) - cargo test for custom-harness packages (already run sequentially) https://claude.ai/code/session_011H8UR3gS6hoyQAf2x7Dfw8 --- .github/workflows/ci.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fdb063a9..6d1f72d6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -175,8 +175,11 @@ jobs: # Use cargo-nextest: it runs each test in its own process, avoiding # musl's fork()-in-multithreaded-process SIGSEGV while keeping parallel # execution (across processes instead of threads within one process). + # Exclude packages with custom test harnesses (harness = false) since + # nextest can't discover their tests; run those with cargo test instead. - run: cargo install cargo-nextest --locked - - run: cargo nextest run + - run: cargo nextest run --workspace --exclude vite_task_bin --exclude vite_task_plan + - run: cargo test -p vite_task_bin -p vite_task_plan fmt: name: Format and Check Deps