From c1b7dfed01264a7d47e79db73b6e554792fd769d Mon Sep 17 00:00:00 2001 From: amackillop Date: Thu, 19 Mar 2026 14:43:51 -0700 Subject: [PATCH 1/2] Re-send webhook on peer disconnect with orphaned HTLCs When a serverless SDK peer reconnects from a webhook but disconnects again before channel_reestablish completes, the intercepted HTLC gets stuck: process_pending_htlcs only iterates connected peers, so a disconnected peer's HTLCs just sit there until the 45s expiry timer kills them. Fix this by firing another SendWebhook from peer_disconnected when orphaned HTLCs remain in the store. The peer wakes up, reconnects, and peer_connected / process_pending_htlcs get another shot at forwarding. If it disconnects again, the cycle repeats until either the forward succeeds or handle_expired_htlcs cleans up at 45s. --- lightning-liquidity/src/lsps4/service.rs | 29 +++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/lightning-liquidity/src/lsps4/service.rs b/lightning-liquidity/src/lsps4/service.rs index 5d9b7ee673e..8cf208ccb08 100644 --- a/lightning-liquidity/src/lsps4/service.rs +++ b/lightning-liquidity/src/lsps4/service.rs @@ -9,7 +9,7 @@ //! Contains the main LSPS4 server-side object, [`LSPS4ServiceHandler`]. -use crate::events::{LiquidityEvent, EventQueue}; +use crate::events::{EventQueue, LiquidityEvent}; use crate::lsps0::ser::{ LSPSMessage, LSPSProtocolMessageHandler, LSPSRequestId, LSPSResponseError, JSONRPC_INTERNAL_ERROR_ERROR_CODE, JSONRPC_INTERNAL_ERROR_ERROR_MESSAGE, @@ -183,7 +183,7 @@ where // peer_connected() will find this HTLC in the store and process it. // The process_pending_htlcs timer also serves as a fallback. let mut event_queue_notifier = self.pending_events.notifier(); - event_queue_notifier.enqueue(crate::events::LiquidityEvent::LSPS4Service(LSPS4ServiceEvent::SendWebhook { + event_queue_notifier.enqueue(LiquidityEvent::LSPS4Service(LSPS4ServiceEvent::SendWebhook { counterparty_node_id: counterparty_node_id.clone(), payment_hash, })); @@ -497,6 +497,29 @@ where htlc.expected_outbound_amount_msat() ); } + + // Re-send webhook so the peer wakes up and reconnects. Use the first + // HTLC's payment_hash — the peer will reconnect and peer_connected / + // process_pending_htlcs will forward all queued HTLCs. If the peer + // disconnects again before channels become usable, this fires again, + // creating a retry loop bounded by the HTLC expiry threshold (45s). + let payment_hash = pending_htlcs[0].payment_hash(); + log_info!( + self.logger, + "[LSPS4] peer_disconnected: re-sending webhook for {} orphaned HTLCs, peer: {}, payment_hash: {}", + pending_htlcs.len(), + counterparty_node_id, + payment_hash + ); + let mut event_queue_notifier = self.pending_events.notifier(); + event_queue_notifier.enqueue( + LiquidityEvent::LSPS4Service( + LSPS4ServiceEvent::SendWebhook { + counterparty_node_id: counterparty_node_id.clone(), + payment_hash, + }, + ), + ); } } @@ -828,7 +851,7 @@ where ); let mut event_queue_notifier = self.pending_events.notifier(); - event_queue_notifier.enqueue(crate::events::LiquidityEvent::LSPS4Service(LSPS4ServiceEvent::OpenChannel { + event_queue_notifier.enqueue(LiquidityEvent::LSPS4Service(LSPS4ServiceEvent::OpenChannel { their_network_key: their_node_id, amt_to_forward_msat: channel_size_msat, channel_count: actions.channel_count, From c5481c712c7c83ffe304d857cde46234ea18fb8f Mon Sep 17 00:00:00 2001 From: amackillop Date: Thu, 19 Mar 2026 15:42:16 -0700 Subject: [PATCH 2/2] Track pending channel opens to fix timer skip The TOCTOU fix in eecf53e made process_pending_htlcs skip whenever calculate_htlc_actions_for_peer returned new_channel_needed_msat, assuming htlc_intercepted or peer_connected had already emitted OpenChannel. That assumption is wrong for the disconnected-peer path: htlc_intercepted only stores the HTLC and sends a webhook, peer_connected defers because channels aren't usable yet, and nobody ever emits OpenChannel. The timer was the only place that could, but the skip prevented it. The HTLC sat there until expiry. Replace the unconditional skip with a pending_channel_opens set that tracks which peers actually have an OpenChannel in flight. execute_htlc_actions inserts, channel_ready removes. The timer only skips if the peer is in the set. Considered making the OpenChannel handler in ldk-node idempotent instead, but create_channel has real side effects (funding tx) and deduplicating at that layer would be more invasive for the same result. --- lightning-liquidity/src/lsps4/service.rs | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/lightning-liquidity/src/lsps4/service.rs b/lightning-liquidity/src/lsps4/service.rs index 8cf208ccb08..c33ed110622 100644 --- a/lightning-liquidity/src/lsps4/service.rs +++ b/lightning-liquidity/src/lsps4/service.rs @@ -101,6 +101,9 @@ where scid_store: ScidStore, htlc_store: HTLCStore, connected_peers: RwLock>, + /// Peers for which an OpenChannel event has been emitted but channel_ready has not + /// yet fired. Used to prevent duplicate OpenChannel emissions from the timer. + pending_channel_opens: RwLock>, config: LSPS4ServiceConfig, } @@ -128,6 +131,7 @@ where config, logger, connected_peers: RwLock::new(HashSet::new()), + pending_channel_opens: RwLock::new(HashSet::new()), }) } @@ -275,6 +279,8 @@ where pub fn channel_ready( &self, counterparty_node_id: &PublicKey, ) -> Result<(), APIError> { + self.pending_channel_opens.write().unwrap().remove(counterparty_node_id); + let is_connected = self.is_peer_connected(counterparty_node_id); log_info!( @@ -547,14 +553,16 @@ where node_id ); let actions = self.calculate_htlc_actions_for_peer(node_id, htlcs); - if actions.new_channel_needed_msat.is_some() { - // A channel open is already in flight from htlc_intercepted or - // peer_connected. Skip — channel_ready will handle forwarding - // once the new channel is established. + if actions.new_channel_needed_msat.is_some() + && self.pending_channel_opens.read().unwrap().contains(&node_id) + { + // A channel open was already emitted (by htlc_intercepted or + // peer_connected) and channel_ready hasn't fired yet. Skip to + // avoid opening a duplicate channel. log_info!( self.logger, - "[LSPS4] process_pending_htlcs: peer {} needs a new channel, \ - skipping (channel open already in flight)", + "[LSPS4] process_pending_htlcs: peer {} needs a new channel \ + but one is already being opened, skipping", node_id ); continue; @@ -850,6 +858,7 @@ where channel_size_msat ); + self.pending_channel_opens.write().unwrap().insert(their_node_id); let mut event_queue_notifier = self.pending_events.notifier(); event_queue_notifier.enqueue(LiquidityEvent::LSPS4Service(LSPS4ServiceEvent::OpenChannel { their_network_key: their_node_id,