From 33927c3a45b8cce6eeded2926b6bc725bb175692 Mon Sep 17 00:00:00 2001 From: Joost Jager Date: Thu, 26 Mar 2026 15:38:57 +0100 Subject: [PATCH 01/13] test: Add diagnostic logging for electrs process failures Enable electrs stderr output in CI and log connection details at startup. Log errors that were previously silently discarded: the first block_headers_subscribe failure, generate_to_address failures, and ping errors across all polling helpers. This will help diagnose intermittent CI failures where electrs appears to crash or become unreachable mid-test. AI tools were used in preparing this commit. --- tests/common/mod.rs | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/tests/common/mod.rs b/tests/common/mod.rs index 69f9cc8d5..4a62fba49 100644 --- a/tests/common/mod.rs +++ b/tests/common/mod.rs @@ -232,7 +232,12 @@ pub(crate) fn setup_bitcoind_and_electrsd() -> (BitcoinD, ElectrsD) { let mut electrsd_conf = electrsd::Conf::default(); electrsd_conf.http_enabled = true; electrsd_conf.network = "regtest"; + electrsd_conf.view_stderr = true; let electrsd = ElectrsD::with_conf(electrs_exe, &bitcoind, &electrsd_conf).unwrap(); + println!( + "Electrs started with electrum_url={}, esplora_url={:?}", + electrsd.electrum_url, electrsd.esplora_url + ); (bitcoind, electrsd) } @@ -510,6 +515,9 @@ pub(crate) async fn generate_blocks_and_wait( let address = bitcoind.new_address().expect("failed to get new address"); // TODO: expect this Result once the WouldBlock issue is resolved upstream. let _block_hashes_res = bitcoind.generate_to_address(num, &address); + if let Err(ref e) = _block_hashes_res { + eprintln!("generate_to_address({}) failed: {:?}", num, e); + } wait_for_block(electrs, cur_height as usize + num).await; print!(" Done!"); println!("\n"); @@ -533,10 +541,14 @@ pub(crate) fn invalidate_blocks(bitcoind: &BitcoindClient, num_blocks: usize) { pub(crate) async fn wait_for_block(electrs: &E, min_height: usize) { let mut header = match electrs.block_headers_subscribe() { Ok(header) => header, - Err(_) => { + Err(e) => { // While subscribing should succeed the first time around, we ran into some cases where // it didn't. Since we can't proceed without subscribing, we try again after a delay // and panic if it still fails. + eprintln!("block_headers_subscribe failed (will retry in 3s): {:?}", e); + if let Err(ping_err) = electrs.ping() { + eprintln!("electrs ping also failed: {:?}", ping_err); + } tokio::time::sleep(Duration::from_secs(3)).await; electrs.block_headers_subscribe().expect("failed to subscribe to block headers") }, @@ -546,8 +558,10 @@ pub(crate) async fn wait_for_block(electrs: &E, min_height: usiz break; } header = exponential_backoff_poll(|| { - electrs.ping().expect("failed to ping electrs"); - electrs.block_headers_pop().expect("failed to pop block header") + electrs.ping().unwrap_or_else(|e| panic!("failed to ping electrs: {:?}", e)); + electrs + .block_headers_pop() + .unwrap_or_else(|e| panic!("failed to pop block header: {:?}", e)) }) .await; } @@ -559,7 +573,7 @@ pub(crate) async fn wait_for_tx(electrs: &E, txid: Txid) { } exponential_backoff_poll(|| { - electrs.ping().unwrap(); + electrs.ping().unwrap_or_else(|e| panic!("failed to ping electrs: {:?}", e)); electrs.transaction_get(&txid).ok() }) .await; @@ -575,7 +589,7 @@ pub(crate) async fn wait_for_outpoint_spend(electrs: &E, outpoin } exponential_backoff_poll(|| { - electrs.ping().unwrap(); + electrs.ping().unwrap_or_else(|e| panic!("failed to ping electrs: {:?}", e)); let is_spent = !electrs.script_get_history(&txout_script).unwrap().is_empty(); is_spent.then_some(()) From 163569b0e5f6b1172aeab68ec78724052278a21c Mon Sep 17 00:00:00 2001 From: Joost Jager Date: Thu, 26 Mar 2026 16:13:46 +0100 Subject: [PATCH 02/13] ci: Add stress-test job to reproduce intermittent electrs failures Run integration tests 10 times in a loop with --nocapture to maximize the chance of hitting the intermittent electrs crash and to capture the new diagnostic logging output. AI tools were used in preparing this commit. --- .github/workflows/rust.yml | 40 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 1ccade444..56500cfa1 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -86,6 +86,46 @@ jobs: run: | RUSTFLAGS="--cfg no_download --cfg cycle_tests" cargo test --features uniffi + stress-test: + runs-on: ubuntu-latest + steps: + - name: Checkout source code + uses: actions/checkout@v3 + - name: Install Rust stable toolchain + run: | + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --profile=minimal --default-toolchain stable + - name: Enable caching for bitcoind + id: cache-bitcoind + uses: actions/cache@v4 + with: + path: bin/bitcoind-${{ runner.os }}-${{ runner.arch }} + key: bitcoind-${{ runner.os }}-${{ runner.arch }} + - name: Enable caching for electrs + id: cache-electrs + uses: actions/cache@v4 + with: + path: bin/electrs-${{ runner.os }}-${{ runner.arch }} + key: electrs-${{ runner.os }}-${{ runner.arch }} + - name: Download bitcoind/electrs + if: "steps.cache-bitcoind.outputs.cache-hit != 'true' || steps.cache-electrs.outputs.cache-hit != 'true'" + run: | + source ./scripts/download_bitcoind_electrs.sh + mkdir bin + mv "$BITCOIND_EXE" bin/bitcoind-${{ runner.os }}-${{ runner.arch }} + mv "$ELECTRS_EXE" bin/electrs-${{ runner.os }}-${{ runner.arch }} + - name: Set bitcoind/electrs environment variables + run: | + echo "BITCOIND_EXE=$( pwd )/bin/bitcoind-${{ runner.os }}-${{ runner.arch }}" >> "$GITHUB_ENV" + echo "ELECTRS_EXE=$( pwd )/bin/electrs-${{ runner.os }}-${{ runner.arch }}" >> "$GITHUB_ENV" + - name: Build integration tests + run: RUSTFLAGS="--cfg no_download --cfg cycle_tests" cargo test --test integration_tests_rust --no-run + - name: Stress-test integration tests (10 iterations) + run: | + for i in $(seq 1 10); do + echo "=== Iteration $i ===" + RUSTFLAGS="--cfg no_download --cfg cycle_tests" cargo test --test integration_tests_rust -- --nocapture 2>&1 || { echo "FAILED on iteration $i"; exit 1; } + done + doc: name: Documentation runs-on: ubuntu-latest From 6de931261ab3c809f3fa65b924374a12a7b8aa08 Mon Sep 17 00:00:00 2001 From: Joost Jager Date: Thu, 26 Mar 2026 18:24:17 +0100 Subject: [PATCH 03/13] ci: Run 3 concurrent test processes across 3 stress-test shards Increase resource pressure to reproduce intermittent electrs failures. Each of the 3 shards runs 5 iterations with 3 concurrent cargo test processes, for 45 total test runs with up to 9 simultaneous processes. AI tools were used in preparing this commit. --- .github/workflows/rust.yml | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 56500cfa1..009d9167e 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -87,6 +87,10 @@ jobs: RUSTFLAGS="--cfg no_download --cfg cycle_tests" cargo test --features uniffi stress-test: + strategy: + fail-fast: false + matrix: + shard: [1, 2, 3] runs-on: ubuntu-latest steps: - name: Checkout source code @@ -119,11 +123,21 @@ jobs: echo "ELECTRS_EXE=$( pwd )/bin/electrs-${{ runner.os }}-${{ runner.arch }}" >> "$GITHUB_ENV" - name: Build integration tests run: RUSTFLAGS="--cfg no_download --cfg cycle_tests" cargo test --test integration_tests_rust --no-run - - name: Stress-test integration tests (10 iterations) + - name: Stress-test integration tests (5 iterations, 3 concurrent processes) run: | - for i in $(seq 1 10); do - echo "=== Iteration $i ===" - RUSTFLAGS="--cfg no_download --cfg cycle_tests" cargo test --test integration_tests_rust -- --nocapture 2>&1 || { echo "FAILED on iteration $i"; exit 1; } + for i in $(seq 1 5); do + echo "=== Iteration $i (shard ${{ matrix.shard }}) ===" + for j in 1 2 3; do + RUSTFLAGS="--cfg no_download --cfg cycle_tests" cargo test --test integration_tests_rust -- --nocapture > /tmp/stress-${j}.log 2>&1 & + pids[${j}]=$! + done + for j in 1 2 3; do + if ! wait ${pids[${j}]}; then + echo "FAILED: process $j on iteration $i (shard ${{ matrix.shard }})" + cat /tmp/stress-${j}.log + exit 1 + fi + done done doc: From baaa6ffe6c2e090be36c32b50709703b6601586f Mon Sep 17 00:00:00 2001 From: Joost Jager Date: Thu, 26 Mar 2026 19:20:52 +0100 Subject: [PATCH 04/13] test: Support configurable base port for concurrent test processes Read LDK_NODE_TEST_BASE_PORT env var to offset the listening port range, avoiding collisions when multiple test processes run simultaneously. Assign base ports 20000, 21000, 22000 to the three concurrent processes in the stress-test CI job. AI tools were used in preparing this commit. --- .github/workflows/rust.yml | 2 +- tests/common/mod.rs | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 009d9167e..353fbb9bd 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -128,7 +128,7 @@ jobs: for i in $(seq 1 5); do echo "=== Iteration $i (shard ${{ matrix.shard }}) ===" for j in 1 2 3; do - RUSTFLAGS="--cfg no_download --cfg cycle_tests" cargo test --test integration_tests_rust -- --nocapture > /tmp/stress-${j}.log 2>&1 & + LDK_NODE_TEST_BASE_PORT=$((20000 + (j - 1) * 1000)) RUSTFLAGS="--cfg no_download --cfg cycle_tests" cargo test --test integration_tests_rust -- --nocapture > /tmp/stress-${j}.log 2>&1 & pids[${j}]=$! done for j in 1 2 3; do diff --git a/tests/common/mod.rs b/tests/common/mod.rs index 4a62fba49..a68f9bfd9 100644 --- a/tests/common/mod.rs +++ b/tests/common/mod.rs @@ -15,7 +15,7 @@ use std::env; use std::future::Future; use std::path::PathBuf; use std::sync::atomic::{AtomicU16, Ordering}; -use std::sync::{Arc, RwLock}; +use std::sync::{Arc, LazyLock, RwLock}; use std::time::Duration; use bitcoin::hashes::hex::FromHex; @@ -274,7 +274,10 @@ pub(crate) fn random_storage_path() -> PathBuf { temp_path } -static NEXT_PORT: AtomicU16 = AtomicU16::new(20000); +static BASE_PORT: LazyLock = LazyLock::new(|| { + env::var("LDK_NODE_TEST_BASE_PORT").ok().and_then(|v| v.parse().ok()).unwrap_or(20000) +}); +static NEXT_PORT: LazyLock = LazyLock::new(|| AtomicU16::new(*BASE_PORT)); pub(crate) fn generate_listening_addresses() -> Vec { let port = NEXT_PORT.fetch_add(2, Ordering::Relaxed); From fc11c64c92a3daa8d449b254271017cb416f19ee Mon Sep 17 00:00:00 2001 From: Joost Jager Date: Thu, 26 Mar 2026 23:37:30 +0100 Subject: [PATCH 05/13] ci: Use unique base ports per iteration to avoid TIME_WAIT collisions Ports from previous iterations may still be in TIME_WAIT when the next iteration starts. Offset the base port by both iteration and process index to ensure no overlap. AI tools were used in preparing this commit. --- .github/workflows/rust.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 353fbb9bd..73b9004d1 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -128,7 +128,7 @@ jobs: for i in $(seq 1 5); do echo "=== Iteration $i (shard ${{ matrix.shard }}) ===" for j in 1 2 3; do - LDK_NODE_TEST_BASE_PORT=$((20000 + (j - 1) * 1000)) RUSTFLAGS="--cfg no_download --cfg cycle_tests" cargo test --test integration_tests_rust -- --nocapture > /tmp/stress-${j}.log 2>&1 & + LDK_NODE_TEST_BASE_PORT=$((20000 + (i - 1) * 3000 + (j - 1) * 1000)) RUSTFLAGS="--cfg no_download --cfg cycle_tests" cargo test --test integration_tests_rust -- --nocapture > /tmp/stress-${j}.log 2>&1 & pids[${j}]=$! done for j in 1 2 3; do From 1bd16723d00f173984154e21c6fbc1a185fd3bce Mon Sep 17 00:00:00 2001 From: Joost Jager Date: Fri, 27 Mar 2026 07:40:26 +0100 Subject: [PATCH 06/13] ci: Dump dmesg OOM/kill messages on test failure Check whether the kernel OOM killer is responsible for electrs silently disappearing during tests. Dump relevant dmesg output after any test failure on ubuntu runners. AI tools were used in preparing this commit. --- .github/workflows/rust.yml | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 73b9004d1..70b0b06ee 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -81,6 +81,13 @@ jobs: if: "matrix.platform != 'windows-latest'" run: | RUSTFLAGS="--cfg no_download --cfg cycle_tests" cargo test + - name: Dump kernel OOM messages on failure + if: "failure() && matrix.platform == 'ubuntu-latest'" + run: | + echo "=== dmesg OOM/kill messages ===" + sudo dmesg | grep -iE 'oom|kill|out of memory|invoked oom' || echo "No OOM messages found" + echo "=== dmesg last 50 lines ===" + sudo dmesg | tail -50 - name: Test with UniFFI support on Rust ${{ matrix.toolchain }} if: "matrix.platform != 'windows-latest' && matrix.build-uniffi" run: | @@ -131,13 +138,21 @@ jobs: LDK_NODE_TEST_BASE_PORT=$((20000 + (i - 1) * 3000 + (j - 1) * 1000)) RUSTFLAGS="--cfg no_download --cfg cycle_tests" cargo test --test integration_tests_rust -- --nocapture > /tmp/stress-${j}.log 2>&1 & pids[${j}]=$! done + failed=0 for j in 1 2 3; do if ! wait ${pids[${j}]}; then echo "FAILED: process $j on iteration $i (shard ${{ matrix.shard }})" cat /tmp/stress-${j}.log - exit 1 + failed=1 fi done + if [ $failed -eq 1 ]; then + echo "=== dmesg OOM/kill messages ===" + sudo dmesg | grep -iE 'oom|kill|out of memory|invoked oom' || echo "No OOM messages found" + echo "=== dmesg last 50 lines ===" + sudo dmesg | tail -50 + exit 1 + fi done doc: From 4736ce23ad2e1e52fa185088d6aa84a08eee7f39 Mon Sep 17 00:00:00 2001 From: Joost Jager Date: Fri, 27 Mar 2026 07:49:47 +0100 Subject: [PATCH 07/13] test: Use random ports with retry loop instead of deterministic allocation Revert the deterministic port allocation approach from PR #847 and instead use random ports with a retry loop around node.start(). This avoids collisions with ports allocated by electrsd/corepc_node via get_available_port(), which use the OS ephemeral port allocator and can land in any range. On InvalidSocketAddress, new random ports are selected and the node is rebuilt, up to 5 attempts. AI tools were used in preparing this commit. --- .github/workflows/rust.yml | 2 +- tests/common/mod.rs | 171 +++++++++++++++++--------------- tests/integration_tests_rust.rs | 10 +- 3 files changed, 98 insertions(+), 85 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 70b0b06ee..e1dadede8 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -135,7 +135,7 @@ jobs: for i in $(seq 1 5); do echo "=== Iteration $i (shard ${{ matrix.shard }}) ===" for j in 1 2 3; do - LDK_NODE_TEST_BASE_PORT=$((20000 + (i - 1) * 3000 + (j - 1) * 1000)) RUSTFLAGS="--cfg no_download --cfg cycle_tests" cargo test --test integration_tests_rust -- --nocapture > /tmp/stress-${j}.log 2>&1 & + RUSTFLAGS="--cfg no_download --cfg cycle_tests" cargo test --test integration_tests_rust -- --nocapture > /tmp/stress-${j}.log 2>&1 & pids[${j}]=$! done failed=0 diff --git a/tests/common/mod.rs b/tests/common/mod.rs index a68f9bfd9..b6fe9dc5c 100644 --- a/tests/common/mod.rs +++ b/tests/common/mod.rs @@ -14,8 +14,7 @@ use std::collections::{HashMap, HashSet}; use std::env; use std::future::Future; use std::path::PathBuf; -use std::sync::atomic::{AtomicU16, Ordering}; -use std::sync::{Arc, LazyLock, RwLock}; +use std::sync::{Arc, RwLock}; use std::time::Duration; use bitcoin::hashes::hex::FromHex; @@ -274,13 +273,9 @@ pub(crate) fn random_storage_path() -> PathBuf { temp_path } -static BASE_PORT: LazyLock = LazyLock::new(|| { - env::var("LDK_NODE_TEST_BASE_PORT").ok().and_then(|v| v.parse().ok()).unwrap_or(20000) -}); -static NEXT_PORT: LazyLock = LazyLock::new(|| AtomicU16::new(*BASE_PORT)); - -pub(crate) fn generate_listening_addresses() -> Vec { - let port = NEXT_PORT.fetch_add(2, Ordering::Relaxed); +pub(crate) fn random_listening_addresses() -> Vec { + let mut rng = rng(); + let port = rng.random_range(10000..65000u16); vec![ SocketAddress::TcpIpV4 { addr: [127, 0, 0, 1], port }, SocketAddress::TcpIpV4 { addr: [127, 0, 0, 1], port: port + 1 }, @@ -310,8 +305,8 @@ pub(crate) fn random_config(anchor_channels: bool) -> TestConfig { println!("Setting random LDK storage dir: {}", rand_dir.display()); node_config.storage_dir_path = rand_dir.to_str().unwrap().to_owned(); - let listening_addresses = generate_listening_addresses(); - println!("Setting LDK listening addresses: {:?}", listening_addresses); + let listening_addresses = random_listening_addresses(); + println!("Setting random LDK listening addresses: {:?}", listening_addresses); node_config.listening_addresses = Some(listening_addresses); let alias = random_node_alias(); @@ -430,81 +425,99 @@ pub(crate) fn setup_two_nodes_with_store( } pub(crate) fn setup_node(chain_source: &TestChainSource, config: TestConfig) -> TestNode { - setup_builder!(builder, config.node_config); - match chain_source { - TestChainSource::Esplora(electrsd) => { - let esplora_url = format!("http://{}", electrsd.esplora_url.as_ref().unwrap()); - let mut sync_config = EsploraSyncConfig::default(); - sync_config.background_sync_config = None; - builder.set_chain_source_esplora(esplora_url.clone(), Some(sync_config)); - }, - TestChainSource::Electrum(electrsd) => { - let electrum_url = format!("tcp://{}", electrsd.electrum_url); - let mut sync_config = ElectrumSyncConfig::default(); - sync_config.background_sync_config = None; - builder.set_chain_source_electrum(electrum_url.clone(), Some(sync_config)); - }, - TestChainSource::BitcoindRpcSync(bitcoind) => { - let rpc_host = bitcoind.params.rpc_socket.ip().to_string(); - let rpc_port = bitcoind.params.rpc_socket.port(); - let values = bitcoind.params.get_cookie_values().unwrap().unwrap(); - let rpc_user = values.user; - let rpc_password = values.password; - builder.set_chain_source_bitcoind_rpc(rpc_host, rpc_port, rpc_user, rpc_password); - }, - TestChainSource::BitcoindRestSync(bitcoind) => { - let rpc_host = bitcoind.params.rpc_socket.ip().to_string(); - let rpc_port = bitcoind.params.rpc_socket.port(); - let values = bitcoind.params.get_cookie_values().unwrap().unwrap(); - let rpc_user = values.user; - let rpc_password = values.password; - let rest_host = bitcoind.params.rpc_socket.ip().to_string(); - let rest_port = bitcoind.params.rpc_socket.port(); - builder.set_chain_source_bitcoind_rest( - rest_host, - rest_port, - rpc_host, - rpc_port, - rpc_user, - rpc_password, + for attempt in 0..5 { + let mut node_config = config.node_config.clone(); + if attempt > 0 { + let new_addrs = random_listening_addresses(); + println!( + "Retrying with new listening addresses (attempt {}): {:?}", + attempt + 1, + new_addrs ); - }, - } + node_config.listening_addresses = Some(new_addrs); + } - match &config.log_writer { - TestLogWriter::FileWriter => { - builder.set_filesystem_logger(None, None); - }, - TestLogWriter::LogFacade => { - builder.set_log_facade_logger(); - }, - TestLogWriter::Custom(custom_log_writer) => { - builder.set_custom_logger(Arc::clone(custom_log_writer)); - }, - } + setup_builder!(builder, node_config); + match chain_source { + TestChainSource::Esplora(electrsd) => { + let esplora_url = format!("http://{}", electrsd.esplora_url.as_ref().unwrap()); + let mut sync_config = EsploraSyncConfig::default(); + sync_config.background_sync_config = None; + builder.set_chain_source_esplora(esplora_url.clone(), Some(sync_config)); + }, + TestChainSource::Electrum(electrsd) => { + let electrum_url = format!("tcp://{}", electrsd.electrum_url); + let mut sync_config = ElectrumSyncConfig::default(); + sync_config.background_sync_config = None; + builder.set_chain_source_electrum(electrum_url.clone(), Some(sync_config)); + }, + TestChainSource::BitcoindRpcSync(bitcoind) => { + let rpc_host = bitcoind.params.rpc_socket.ip().to_string(); + let rpc_port = bitcoind.params.rpc_socket.port(); + let values = bitcoind.params.get_cookie_values().unwrap().unwrap(); + let rpc_user = values.user; + let rpc_password = values.password; + builder.set_chain_source_bitcoind_rpc(rpc_host, rpc_port, rpc_user, rpc_password); + }, + TestChainSource::BitcoindRestSync(bitcoind) => { + let rpc_host = bitcoind.params.rpc_socket.ip().to_string(); + let rpc_port = bitcoind.params.rpc_socket.port(); + let values = bitcoind.params.get_cookie_values().unwrap().unwrap(); + let rpc_user = values.user; + let rpc_password = values.password; + let rest_host = bitcoind.params.rpc_socket.ip().to_string(); + let rest_port = bitcoind.params.rpc_socket.port(); + builder.set_chain_source_bitcoind_rest( + rest_host, + rest_port, + rpc_host, + rpc_port, + rpc_user, + rpc_password, + ); + }, + } - builder.set_async_payments_role(config.async_payments_role).unwrap(); + match &config.log_writer { + TestLogWriter::FileWriter => { + builder.set_filesystem_logger(None, None); + }, + TestLogWriter::LogFacade => { + builder.set_log_facade_logger(); + }, + TestLogWriter::Custom(custom_log_writer) => { + builder.set_custom_logger(Arc::clone(custom_log_writer)); + }, + } - if config.recovery_mode { - builder.set_wallet_recovery_mode(); - } + builder.set_async_payments_role(config.async_payments_role).unwrap(); - let node = match config.store_type { - TestStoreType::TestSyncStore => { - let kv_store = TestSyncStore::new(config.node_config.storage_dir_path.into()); - builder.build_with_store(config.node_entropy.into(), kv_store).unwrap() - }, - TestStoreType::Sqlite => builder.build(config.node_entropy.into()).unwrap(), - }; + if config.recovery_mode { + builder.set_wallet_recovery_mode(); + } - if config.recovery_mode { - builder.set_wallet_recovery_mode(); - } + let node = match config.store_type { + TestStoreType::TestSyncStore => { + let kv_store = TestSyncStore::new(node_config.storage_dir_path.into()); + builder.build_with_store(config.node_entropy.clone().into(), kv_store).unwrap() + }, + TestStoreType::Sqlite => builder.build(config.node_entropy.clone().into()).unwrap(), + }; - node.start().unwrap(); - assert!(node.status().is_running); - assert!(node.status().latest_fee_rate_cache_update_timestamp.is_some()); - node + match node.start() { + Ok(()) => { + assert!(node.status().is_running); + assert!(node.status().latest_fee_rate_cache_update_timestamp.is_some()); + return node; + }, + Err(NodeError::InvalidSocketAddress) => { + eprintln!("node.start() failed with InvalidSocketAddress, retrying..."); + continue; + }, + Err(e) => panic!("node.start() failed: {:?}", e), + } + } + panic!("Failed to start node after 5 attempts due to port collisions") } pub(crate) async fn generate_blocks_and_wait( diff --git a/tests/integration_tests_rust.rs b/tests/integration_tests_rust.rs index 413b2d44a..4eb9c434d 100644 --- a/tests/integration_tests_rust.rs +++ b/tests/integration_tests_rust.rs @@ -21,8 +21,8 @@ use common::{ expect_channel_pending_event, expect_channel_ready_event, expect_channel_ready_events, expect_event, expect_payment_claimable_event, expect_payment_received_event, expect_payment_successful_event, expect_splice_pending_event, generate_blocks_and_wait, - generate_listening_addresses, open_channel, open_channel_push_amt, open_channel_with_all, - premine_and_distribute_funds, premine_blocks, prepare_rbf, random_chain_source, random_config, + open_channel, open_channel_push_amt, open_channel_with_all, premine_and_distribute_funds, + premine_blocks, prepare_rbf, random_chain_source, random_config, random_listening_addresses, setup_bitcoind_and_electrsd, setup_builder, setup_node, setup_two_nodes, splice_in_with_all, wait_for_tx, TestChainSource, TestStoreType, TestSyncStore, }; @@ -1431,9 +1431,9 @@ async fn test_node_announcement_propagation() { node_a_alias_bytes[..node_a_alias_string.as_bytes().len()] .copy_from_slice(node_a_alias_string.as_bytes()); let node_a_node_alias = Some(NodeAlias(node_a_alias_bytes)); - let node_a_announcement_addresses = generate_listening_addresses(); + let node_a_announcement_addresses = random_listening_addresses(); config_a.node_config.node_alias = node_a_node_alias.clone(); - config_a.node_config.listening_addresses = Some(generate_listening_addresses()); + config_a.node_config.listening_addresses = Some(random_listening_addresses()); config_a.node_config.announcement_addresses = Some(node_a_announcement_addresses.clone()); // Node B will only use listening addresses @@ -1443,7 +1443,7 @@ async fn test_node_announcement_propagation() { node_b_alias_bytes[..node_b_alias_string.as_bytes().len()] .copy_from_slice(node_b_alias_string.as_bytes()); let node_b_node_alias = Some(NodeAlias(node_b_alias_bytes)); - let node_b_listening_addresses = generate_listening_addresses(); + let node_b_listening_addresses = random_listening_addresses(); config_b.node_config.node_alias = node_b_node_alias.clone(); config_b.node_config.listening_addresses = Some(node_b_listening_addresses.clone()); config_b.node_config.announcement_addresses = None; From 0c83072d255e5515ad70d20304dfa81832d29669 Mon Sep 17 00:00:00 2001 From: Joost Jager Date: Fri, 27 Mar 2026 08:13:21 +0100 Subject: [PATCH 08/13] test: Use fresh storage directory on port collision retry When node.start() fails with InvalidSocketAddress and we retry with new random ports, also generate a fresh storage directory. Reusing the same directory causes the second build to fail with ReadFailed/Namespace not found since the first build already wrote data there. AI tools were used in preparing this commit. --- tests/common/mod.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/common/mod.rs b/tests/common/mod.rs index b6fe9dc5c..252bac112 100644 --- a/tests/common/mod.rs +++ b/tests/common/mod.rs @@ -429,12 +429,15 @@ pub(crate) fn setup_node(chain_source: &TestChainSource, config: TestConfig) -> let mut node_config = config.node_config.clone(); if attempt > 0 { let new_addrs = random_listening_addresses(); + let new_dir = random_storage_path(); println!( - "Retrying with new listening addresses (attempt {}): {:?}", + "Retrying with new listening addresses and storage dir (attempt {}): {:?}, {}", attempt + 1, - new_addrs + new_addrs, + new_dir.display() ); node_config.listening_addresses = Some(new_addrs); + node_config.storage_dir_path = new_dir.to_str().unwrap().to_owned(); } setup_builder!(builder, node_config); From d9a1f97cb8422575b51138dae0742fbf7aad5c04 Mon Sep 17 00:00:00 2001 From: Joost Jager Date: Fri, 27 Mar 2026 08:14:47 +0100 Subject: [PATCH 09/13] test: Log which process holds a port on InvalidSocketAddress Run lsof to identify what is using the port when node.start() fails with a binding error. This helps distinguish between collisions with electrsd/bitcoind, other test processes, or TIME_WAIT leftovers. AI tools were used in preparing this commit. --- tests/common/mod.rs | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tests/common/mod.rs b/tests/common/mod.rs index 252bac112..97e25a0be 100644 --- a/tests/common/mod.rs +++ b/tests/common/mod.rs @@ -514,6 +514,29 @@ pub(crate) fn setup_node(chain_source: &TestChainSource, config: TestConfig) -> return node; }, Err(NodeError::InvalidSocketAddress) => { + if let Some(ref addrs) = node_config.listening_addresses { + for addr in addrs { + if let SocketAddress::TcpIpV4 { port, .. } + | SocketAddress::TcpIpV6 { port, .. } = addr + { + let output = std::process::Command::new("lsof") + .args(["-i", &format!(":{}", port), "-P", "-n"]) + .output(); + match output { + Ok(o) if !o.stdout.is_empty() => { + eprintln!( + "Port {} is in use:\n{}", + port, + String::from_utf8_lossy(&o.stdout) + ); + }, + _ => { + eprintln!("Port {} appears unavailable (no lsof info)", port); + }, + } + } + } + } eprintln!("node.start() failed with InvalidSocketAddress, retrying..."); continue; }, From cdc77a22b5ee45345ce1b57c51252e548e56b3eb Mon Sep 17 00:00:00 2001 From: Joost Jager Date: Fri, 27 Mar 2026 08:43:14 +0100 Subject: [PATCH 10/13] test: Fix announcement address assertion and simplify stress test Read node_b's listening addresses from the node after setup instead of using the pre-retry variable, which may differ if setup_node retried with new ports. Simplify the stress test to run 1 process per shard with 10 iterations instead of 3 concurrent processes. The concurrent processes caused port collisions in code paths outside setup_node that don't have retry logic, which is noise unrelated to the electrs crash we're investigating. AI tools were used in preparing this commit. --- .github/workflows/rust.yml | 21 +++++---------------- tests/integration_tests_rust.rs | 3 +-- 2 files changed, 6 insertions(+), 18 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index e1dadede8..62a6d5d16 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -130,29 +130,18 @@ jobs: echo "ELECTRS_EXE=$( pwd )/bin/electrs-${{ runner.os }}-${{ runner.arch }}" >> "$GITHUB_ENV" - name: Build integration tests run: RUSTFLAGS="--cfg no_download --cfg cycle_tests" cargo test --test integration_tests_rust --no-run - - name: Stress-test integration tests (5 iterations, 3 concurrent processes) + - name: Stress-test integration tests (10 iterations) run: | - for i in $(seq 1 5); do + for i in $(seq 1 10); do echo "=== Iteration $i (shard ${{ matrix.shard }}) ===" - for j in 1 2 3; do - RUSTFLAGS="--cfg no_download --cfg cycle_tests" cargo test --test integration_tests_rust -- --nocapture > /tmp/stress-${j}.log 2>&1 & - pids[${j}]=$! - done - failed=0 - for j in 1 2 3; do - if ! wait ${pids[${j}]}; then - echo "FAILED: process $j on iteration $i (shard ${{ matrix.shard }})" - cat /tmp/stress-${j}.log - failed=1 - fi - done - if [ $failed -eq 1 ]; then + RUSTFLAGS="--cfg no_download --cfg cycle_tests" cargo test --test integration_tests_rust -- --nocapture 2>&1 || { + echo "FAILED on iteration $i (shard ${{ matrix.shard }})" echo "=== dmesg OOM/kill messages ===" sudo dmesg | grep -iE 'oom|kill|out of memory|invoked oom' || echo "No OOM messages found" echo "=== dmesg last 50 lines ===" sudo dmesg | tail -50 exit 1 - fi + } done doc: diff --git a/tests/integration_tests_rust.rs b/tests/integration_tests_rust.rs index 4eb9c434d..bd8099ce4 100644 --- a/tests/integration_tests_rust.rs +++ b/tests/integration_tests_rust.rs @@ -1443,13 +1443,12 @@ async fn test_node_announcement_propagation() { node_b_alias_bytes[..node_b_alias_string.as_bytes().len()] .copy_from_slice(node_b_alias_string.as_bytes()); let node_b_node_alias = Some(NodeAlias(node_b_alias_bytes)); - let node_b_listening_addresses = random_listening_addresses(); config_b.node_config.node_alias = node_b_node_alias.clone(); - config_b.node_config.listening_addresses = Some(node_b_listening_addresses.clone()); config_b.node_config.announcement_addresses = None; let node_a = setup_node(&chain_source, config_a); let node_b = setup_node(&chain_source, config_b); + let node_b_listening_addresses = node_b.listening_addresses().unwrap(); let address_a = node_a.onchain_payment().new_address().unwrap(); let premine_amount_sat = 5_000_000; From 72f1764cc90e99ea4cb76e98961190aef840fa44 Mon Sep 17 00:00:00 2001 From: Joost Jager Date: Fri, 27 Mar 2026 09:04:21 +0100 Subject: [PATCH 11/13] test: Use atomic counter with random base for port allocation Avoid intra-process port collisions between parallel tests by using an atomic counter that increments by 2 for each allocation. The base port is randomized once per process to reduce inter-process collisions. This eliminates the birthday-paradox collisions that occurred when every call independently picked a random port from the range. AI tools were used in preparing this commit. --- tests/common/mod.rs | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/tests/common/mod.rs b/tests/common/mod.rs index 97e25a0be..a3e306c63 100644 --- a/tests/common/mod.rs +++ b/tests/common/mod.rs @@ -14,6 +14,7 @@ use std::collections::{HashMap, HashSet}; use std::env; use std::future::Future; use std::path::PathBuf; +use std::sync::atomic::{AtomicU16, Ordering}; use std::sync::{Arc, RwLock}; use std::time::Duration; @@ -273,9 +274,21 @@ pub(crate) fn random_storage_path() -> PathBuf { temp_path } +static NEXT_PORT: AtomicU16 = AtomicU16::new(0); + pub(crate) fn random_listening_addresses() -> Vec { - let mut rng = rng(); - let port = rng.random_range(10000..65000u16); + // Use an atomic counter to avoid intra-process collisions between parallel tests. + // The base port is randomized once per process to avoid inter-process collisions. + let port = NEXT_PORT + .fetch_update(Ordering::Relaxed, Ordering::Relaxed, |p| { + if p == 0 { + let base = rng().random_range(10000..50000u16); + Some(base + 2) + } else { + Some(p + 2) + } + }) + .unwrap_or_else(|p| p); vec![ SocketAddress::TcpIpV4 { addr: [127, 0, 0, 1], port }, SocketAddress::TcpIpV4 { addr: [127, 0, 0, 1], port: port + 1 }, From 615355172bc8fedb20ec74abcf4241b05cbe9aaf Mon Sep 17 00:00:00 2001 From: Joost Jager Date: Fri, 27 Mar 2026 09:15:44 +0100 Subject: [PATCH 12/13] test: Fix port 0 bug in atomic counter initialization fetch_update returns the previous value, so the first caller got port 0 instead of the random base. Use compare_exchange for one-time init followed by fetch_add, which correctly returns the base port to the first caller. AI tools were used in preparing this commit. --- tests/common/mod.rs | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/tests/common/mod.rs b/tests/common/mod.rs index a3e306c63..ee7c017ff 100644 --- a/tests/common/mod.rs +++ b/tests/common/mod.rs @@ -276,19 +276,17 @@ pub(crate) fn random_storage_path() -> PathBuf { static NEXT_PORT: AtomicU16 = AtomicU16::new(0); +fn init_base_port() { + // Initialize once with a random base port. compare_exchange ensures only one thread wins. + let base = rng().random_range(10000..50000u16); + let _ = NEXT_PORT.compare_exchange(0, base, Ordering::Relaxed, Ordering::Relaxed); +} + pub(crate) fn random_listening_addresses() -> Vec { // Use an atomic counter to avoid intra-process collisions between parallel tests. // The base port is randomized once per process to avoid inter-process collisions. - let port = NEXT_PORT - .fetch_update(Ordering::Relaxed, Ordering::Relaxed, |p| { - if p == 0 { - let base = rng().random_range(10000..50000u16); - Some(base + 2) - } else { - Some(p + 2) - } - }) - .unwrap_or_else(|p| p); + init_base_port(); + let port = NEXT_PORT.fetch_add(2, Ordering::Relaxed); vec![ SocketAddress::TcpIpV4 { addr: [127, 0, 0, 1], port }, SocketAddress::TcpIpV4 { addr: [127, 0, 0, 1], port: port + 1 }, From f4726a29493ac189363fa3208c3e338138f9034f Mon Sep 17 00:00:00 2001 From: Joost Jager Date: Fri, 27 Mar 2026 09:35:09 +0100 Subject: [PATCH 13/13] test: Use port range below Linux ephemeral range to avoid collisions Restrict the random base port to 10000-30000, which is below the Linux ephemeral port range (32768-60999). This prevents collisions with OS-assigned ports used by electrsd and bitcoind. AI tools were used in preparing this commit. --- tests/common/mod.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/common/mod.rs b/tests/common/mod.rs index ee7c017ff..596e982ec 100644 --- a/tests/common/mod.rs +++ b/tests/common/mod.rs @@ -278,7 +278,9 @@ static NEXT_PORT: AtomicU16 = AtomicU16::new(0); fn init_base_port() { // Initialize once with a random base port. compare_exchange ensures only one thread wins. - let base = rng().random_range(10000..50000u16); + // Use a range below the Linux ephemeral port range (32768-60999) to avoid + // collisions with OS-assigned ports used by electrsd/bitcoind. + let base = rng().random_range(10000..30000u16); let _ = NEXT_PORT.compare_exchange(0, base, Ordering::Relaxed, Ordering::Relaxed); }