From 039f800f339c6bf08828831a21e93675f29c28ab Mon Sep 17 00:00:00 2001
From: Chai Zhenhua <chaizhenhua@gmail.com>
Date: Sat, 21 Mar 2026 18:47:01 +0800
Subject: [PATCH 1/2] fix: add missing index field to streaming tool_call
 chunks

The OpenAI Chat Completions streaming API requires each tool_call
delta to include an `index` field that identifies which tool call
in the array the chunk belongs to. Without it, clients like the
genai Rust crate silently discard tool_call chunks, causing agents
to terminate early without executing any tools.

Changes:
- Add optional `index` field to `ToolCall` model
- Pass `index` through `build_openai_tool_call` helper
- Set `index=state.index` on all `ToolCall` instantiations in
  openai_to_openai and anthropic_to_openai streaming formatters
- Set `index` in claude_sdk parser tool call conversion
---
 ccproxy/llms/formatters/anthropic_to_openai/_helpers.py | 2 ++
 ccproxy/llms/formatters/anthropic_to_openai/streams.py  | 1 +
 ccproxy/llms/formatters/openai_to_openai/responses.py   | 4 +++-
 ccproxy/llms/formatters/openai_to_openai/streams.py     | 2 ++
 ccproxy/llms/models/openai.py                           | 1 +
 ccproxy/plugins/claude_sdk/parser.py                    | 7 +++++--
 6 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/ccproxy/llms/formatters/anthropic_to_openai/_helpers.py b/ccproxy/llms/formatters/anthropic_to_openai/_helpers.py
index 331b1275..7d136089 100644
--- a/ccproxy/llms/formatters/anthropic_to_openai/_helpers.py
+++ b/ccproxy/llms/formatters/anthropic_to_openai/_helpers.py
@@ -24,6 +24,7 @@ def build_openai_tool_call(
     tool_input: Any,
     arguments: Any = None,
     fallback_index: int = 0,
+    index: int | None = None,
 ) -> openai_models.ToolCall:
     args_str = (
         arguments
@@ -41,4 +42,5 @@ def build_openai_tool_call(
             name=str(name),
             arguments=str(args_str),
         ),
+        index=index if index is not None else fallback_index,
     )
diff --git a/ccproxy/llms/formatters/anthropic_to_openai/streams.py b/ccproxy/llms/formatters/anthropic_to_openai/streams.py
index 3aff75f0..ebf8f1b8 100644
--- a/ccproxy/llms/formatters/anthropic_to_openai/streams.py
+++ b/ccproxy/llms/formatters/anthropic_to_openai/streams.py
@@ -108,6 +108,7 @@ def _build_openai_tool_call(
             tool_input=tool_call.get("input", {}),
             arguments=arguments,
             fallback_index=block_index,
+            index=tool_call.get("index", block_index),
         )
 
     return None
diff --git a/ccproxy/llms/formatters/openai_to_openai/responses.py b/ccproxy/llms/formatters/openai_to_openai/responses.py
index d8ccd254..03e5e7be 100644
--- a/ccproxy/llms/formatters/openai_to_openai/responses.py
+++ b/ccproxy/llms/formatters/openai_to_openai/responses.py
@@ -396,16 +396,18 @@ def convert__openai_responses_to_openai_chat__response(
             else:
                 arguments_str = json.dumps(arguments_value or {})
 
+            tool_call_index = len(tool_calls)
             tool_calls.append(
                 openai_models.ToolCall(
                     id=_get_attr(item, "id")
                     or _get_attr(item, "call_id")
-                    or f"call_{len(tool_calls)}",
+                    or f"call_{tool_call_index}",
                     type="function",
                     function=openai_models.FunctionCall(
                         name=name,
                         arguments=arguments_str,
                     ),
+                    index=tool_call_index,
                 )
             )
 
diff --git a/ccproxy/llms/formatters/openai_to_openai/streams.py b/ccproxy/llms/formatters/openai_to_openai/streams.py
index 8a50ee2d..8a9fb1ed 100644
--- a/ccproxy/llms/formatters/openai_to_openai/streams.py
+++ b/ccproxy/llms/formatters/openai_to_openai/streams.py
@@ -396,6 +396,7 @@ def create_text_chunk(
                                 name=state.name or "",
                                 arguments=arguments or "",
                             ),
+                            index=state.index,
                         )
                         state.emitted = True
                         state.initial_emitted = True
@@ -449,6 +450,7 @@ def create_text_chunk(
                                 name=state.name or "",
                                 arguments=delta_segment,
                             ),
+                            index=state.index,
                         )
 
                         state.emitted = True
diff --git a/ccproxy/llms/models/openai.py b/ccproxy/llms/models/openai.py
index 34b3f9ec..012a6977 100644
--- a/ccproxy/llms/models/openai.py
+++ b/ccproxy/llms/models/openai.py
@@ -188,6 +188,7 @@ class ToolCall(LlmBaseModel):
     id: str
     type: Literal["function"] = Field(default="function")
     function: FunctionCall
+    index: int | None = None
 
 
 class ChatMessage(LlmBaseModel):
diff --git a/ccproxy/plugins/claude_sdk/parser.py b/ccproxy/plugins/claude_sdk/parser.py
index ed5ba1e6..09f3fd6a 100644
--- a/ccproxy/plugins/claude_sdk/parser.py
+++ b/ccproxy/plugins/claude_sdk/parser.py
@@ -17,7 +17,9 @@
 from ccproxy.llms.models import openai as openai_models
 
 
-def format_openai_tool_call(tool_use: dict[str, Any]) -> openai_models.ToolCall:
+def format_openai_tool_call(
+    tool_use: dict[str, Any], index: int = 0
+) -> openai_models.ToolCall:
     """Convert Anthropic tool use to OpenAI tool call format."""
     tool_input = tool_use.get("input", {})
     if isinstance(tool_input, dict):
@@ -32,6 +34,7 @@ def format_openai_tool_call(tool_use: dict[str, Any]) -> openai_models.ToolCall:
             name=tool_use.get("name", ""),
             arguments=arguments_str,
         ),
+        index=index,
     )
 
 
@@ -86,7 +89,7 @@ def replace_tool_use(match: re.Match[str]) -> str:
                     "name": tool_data.get("name", ""),
                     "input": tool_data.get("input", {}),
                 }
-                tool_calls.append(format_openai_tool_call(tool_call_block))
+                tool_calls.append(format_openai_tool_call(tool_call_block, index=len(tool_calls)))
                 return ""  # Remove the XML tag from text
             else:
                 # For streaming: format as readable text

From 5e7a64e01f93cc90d914f3f0a5dcb6ebcec91d2b Mon Sep 17 00:00:00 2001
From: Caddy Glow <caddyglow@pm.me>
Date: Sun, 22 Mar 2026 21:31:32 +0100
Subject: [PATCH 2/2] refactor: split ToolCall into ToolCall and ToolCallChunk
 per OpenAI spec

The OpenAI spec defines two distinct types for tool calls:
- ChatCompletionMessageToolCall (non-streaming): id, type, function
- ChoiceDeltaToolCall (streaming): index (required), id, function, type

Split the single ToolCall model into ToolCall (non-streaming, no index)
and ToolCallChunk (streaming, required index). This keeps non-streaming
responses spec-compliant while enforcing index on streaming chunks.
---
 .../anthropic_to_openai/_helpers.py           | 32 +++++++++++++++++--
 .../formatters/anthropic_to_openai/streams.py | 12 +++----
 .../formatters/openai_to_openai/responses.py  |  4 +--
 .../formatters/openai_to_openai/streams.py    | 17 ++++++----
 ccproxy/llms/models/openai.py                 | 14 ++++++--
 ccproxy/plugins/claude_sdk/parser.py          |  7 ++--
 6 files changed, 61 insertions(+), 25 deletions(-)

diff --git a/ccproxy/llms/formatters/anthropic_to_openai/_helpers.py b/ccproxy/llms/formatters/anthropic_to_openai/_helpers.py
index 7d136089..ec5ed695 100644
--- a/ccproxy/llms/formatters/anthropic_to_openai/_helpers.py
+++ b/ccproxy/llms/formatters/anthropic_to_openai/_helpers.py
@@ -24,7 +24,6 @@ def build_openai_tool_call(
     tool_input: Any,
     arguments: Any = None,
     fallback_index: int = 0,
-    index: int | None = None,
 ) -> openai_models.ToolCall:
     args_str = (
         arguments
@@ -42,5 +41,34 @@ def build_openai_tool_call(
             name=str(name),
             arguments=str(args_str),
         ),
-        index=index if index is not None else fallback_index,
+    )
+
+
+def build_openai_tool_call_chunk(
+    *,
+    index: int,
+    tool_id: str | None,
+    tool_name: str | None,
+    tool_input: Any,
+    arguments: Any = None,
+    fallback_index: int = 0,
+) -> openai_models.ToolCallChunk:
+    args_str = (
+        arguments
+        if isinstance(arguments, str) and arguments
+        else serialize_tool_arguments(tool_input)
+    )
+    call_id = (
+        tool_id if isinstance(tool_id, str) and tool_id else f"call_{fallback_index}"
+    )
+    name = tool_name if isinstance(tool_name, str) and tool_name else "function"
+
+    return openai_models.ToolCallChunk(
+        index=index,
+        id=str(call_id),
+        type="function",
+        function=openai_models.FunctionCall(
+            name=str(name),
+            arguments=str(args_str),
+        ),
     )
diff --git a/ccproxy/llms/formatters/anthropic_to_openai/streams.py b/ccproxy/llms/formatters/anthropic_to_openai/streams.py
index ebf8f1b8..9710dc14 100644
--- a/ccproxy/llms/formatters/anthropic_to_openai/streams.py
+++ b/ccproxy/llms/formatters/anthropic_to_openai/streams.py
@@ -27,7 +27,7 @@
 from ccproxy.llms.models import openai as openai_models
 from ccproxy.llms.streaming.accumulators import ClaudeAccumulator
 
-from ._helpers import build_openai_tool_call
+from ._helpers import build_openai_tool_call_chunk
 from .requests import _build_responses_payload_from_anthropic_request
 from .responses import convert__anthropic_usage_to_openai_responses__usage
 
@@ -88,10 +88,10 @@ def _anthropic_delta_to_text(
     return None
 
 
-def _build_openai_tool_call(
+def _build_openai_tool_call_chunk(
     accumulator: ClaudeAccumulator,
     block_index: int,
-) -> openai_models.ToolCall | None:
+) -> openai_models.ToolCallChunk | None:
     for tool_call in accumulator.get_complete_tool_calls():
         if tool_call.get("index") != block_index:
             continue
@@ -102,13 +102,13 @@ def _build_openai_tool_call(
         tool_name = function_payload.get("name") or tool_call.get("name")
         arguments = function_payload.get("arguments")
 
-        return build_openai_tool_call(
+        return build_openai_tool_call_chunk(
+            index=tool_call.get("index", block_index),
             tool_id=tool_call.get("id"),
             tool_name=tool_name,
             tool_input=tool_call.get("input", {}),
             arguments=arguments,
             fallback_index=block_index,
-            index=tool_call.get("index", block_index),
         )
 
     return None
@@ -1414,7 +1414,7 @@ async def generator() -> AsyncGenerator[
                         continue
                     if block_index in emitted_tool_indices:
                         continue
-                    tool_call = _build_openai_tool_call(accumulator, block_index)
+                    tool_call = _build_openai_tool_call_chunk(accumulator, block_index)
                     if tool_call is None:
                         continue
                     emitted_tool_indices.add(block_index)
diff --git a/ccproxy/llms/formatters/openai_to_openai/responses.py b/ccproxy/llms/formatters/openai_to_openai/responses.py
index 03e5e7be..d8ccd254 100644
--- a/ccproxy/llms/formatters/openai_to_openai/responses.py
+++ b/ccproxy/llms/formatters/openai_to_openai/responses.py
@@ -396,18 +396,16 @@ def convert__openai_responses_to_openai_chat__response(
             else:
                 arguments_str = json.dumps(arguments_value or {})
 
-            tool_call_index = len(tool_calls)
             tool_calls.append(
                 openai_models.ToolCall(
                     id=_get_attr(item, "id")
                     or _get_attr(item, "call_id")
-                    or f"call_{tool_call_index}",
+                    or f"call_{len(tool_calls)}",
                     type="function",
                     function=openai_models.FunctionCall(
                         name=name,
                         arguments=arguments_str,
                     ),
-                    index=tool_call_index,
                 )
             )
 
diff --git a/ccproxy/llms/formatters/openai_to_openai/streams.py b/ccproxy/llms/formatters/openai_to_openai/streams.py
index 8a9fb1ed..239439b2 100644
--- a/ccproxy/llms/formatters/openai_to_openai/streams.py
+++ b/ccproxy/llms/formatters/openai_to_openai/streams.py
@@ -389,14 +389,14 @@ def create_text_chunk(
 
                     # Emit initial tool call chunk to surface id/name information
                     if not state.initial_emitted:
-                        tool_call = openai_models.ToolCall(
+                        tool_call = openai_models.ToolCallChunk(
+                            index=state.index,
                             id=state.id,
                             type="function",
                             function=openai_models.FunctionCall(
                                 name=state.name or "",
                                 arguments=arguments or "",
                             ),
-                            index=state.index,
                         )
                         state.emitted = True
                         state.initial_emitted = True
@@ -443,14 +443,14 @@ def create_text_chunk(
                             state.name = guessed
 
                     if state.initial_emitted:
-                        tool_call = openai_models.ToolCall(
+                        tool_call = openai_models.ToolCallChunk(
+                            index=state.index,
                             id=state.id,
                             type="function",
                             function=openai_models.FunctionCall(
                                 name=state.name or "",
                                 arguments=delta_segment,
                             ),
-                            index=state.index,
                         )
 
                         state.emitted = True
@@ -496,7 +496,8 @@ def create_text_chunk(
                             if guessed:
                                 state.name = guessed
 
-                        tool_call = openai_models.ToolCall(
+                        tool_call = openai_models.ToolCallChunk(
+                            index=state.index,
                             id=state.id,
                             type="function",
                             function=openai_models.FunctionCall(
@@ -588,7 +589,8 @@ def create_text_chunk(
                             if guessed:
                                 state.name = guessed
                         if not state.arguments_emitted:
-                            tool_call = openai_models.ToolCall(
+                            tool_call = openai_models.ToolCallChunk(
+                                index=state.index,
                                 id=state.id,
                                 type="function",
                                 function=openai_models.FunctionCall(
@@ -618,7 +620,8 @@ def create_text_chunk(
 
                     # Emit a patch chunk if the name was never surfaced earlier
                     if state.name and not state.name_emitted:
-                        tool_call = openai_models.ToolCall(
+                        tool_call = openai_models.ToolCallChunk(
+                            index=state.index,
                             id=state.id,
                             type="function",
                             function=openai_models.FunctionCall(
diff --git a/ccproxy/llms/models/openai.py b/ccproxy/llms/models/openai.py
index 012a6977..6eec0fa9 100644
--- a/ccproxy/llms/models/openai.py
+++ b/ccproxy/llms/models/openai.py
@@ -185,10 +185,20 @@ class FunctionCall(LlmBaseModel):
 
 
 class ToolCall(LlmBaseModel):
+    """Non-streaming tool call (ChatCompletionMessageToolCall)."""
+
     id: str
     type: Literal["function"] = Field(default="function")
     function: FunctionCall
-    index: int | None = None
+
+
+class ToolCallChunk(LlmBaseModel):
+    """Streaming tool call delta (ChoiceDeltaToolCall)."""
+
+    index: int
+    id: str | None = None
+    type: Literal["function"] | None = None
+    function: FunctionCall | None = None
 
 
 class ChatMessage(LlmBaseModel):
@@ -310,7 +320,7 @@ class ChatCompletionResponse(LlmBaseModel):
 class DeltaMessage(LlmBaseModel):
     role: Literal["assistant"] | None = None
     content: str | list[Any] | None = None
-    tool_calls: list[ToolCall] | None = None
+    tool_calls: list[ToolCallChunk] | None = None
     audio: dict[str, Any] | None = None
     reasoning: ResponseMessageReasoning | None = None
 
diff --git a/ccproxy/plugins/claude_sdk/parser.py b/ccproxy/plugins/claude_sdk/parser.py
index 09f3fd6a..ed5ba1e6 100644
--- a/ccproxy/plugins/claude_sdk/parser.py
+++ b/ccproxy/plugins/claude_sdk/parser.py
@@ -17,9 +17,7 @@
 from ccproxy.llms.models import openai as openai_models
 
 
-def format_openai_tool_call(
-    tool_use: dict[str, Any], index: int = 0
-) -> openai_models.ToolCall:
+def format_openai_tool_call(tool_use: dict[str, Any]) -> openai_models.ToolCall:
     """Convert Anthropic tool use to OpenAI tool call format."""
     tool_input = tool_use.get("input", {})
     if isinstance(tool_input, dict):
@@ -34,7 +32,6 @@ def format_openai_tool_call(
             name=tool_use.get("name", ""),
             arguments=arguments_str,
         ),
-        index=index,
     )
 
 
@@ -89,7 +86,7 @@ def replace_tool_use(match: re.Match[str]) -> str:
                     "name": tool_data.get("name", ""),
                     "input": tool_data.get("input", {}),
                 }
-                tool_calls.append(format_openai_tool_call(tool_call_block, index=len(tool_calls)))
+                tool_calls.append(format_openai_tool_call(tool_call_block))
                 return ""  # Remove the XML tag from text
             else:
                 # For streaming: format as readable text