Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 64 additions & 0 deletions .ccproxy.codex.msaf.toml.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# Example ccproxy config for Microsoft Agent Framework clients over Codex.

enable_plugins = true
enabled_plugins = ["oauth_codex", "codex"]

[server]
bypass_mode = false

[llm]
# Keep OpenAI-compatible responses free from <thinking>...</thinking> blocks.
openai_thinking_xml = false

[plugins.codex]
enabled = true
name = "codex"
base_url = "https://chatgpt.com/backend-api/codex"
requires_auth = true
auth_type = "oauth"
supports_streaming = true
preferred_upstream_mode = "streaming"
buffer_non_streaming = true
enable_format_registry = true

# Microsoft Agent Framework sends its own instructions/reasoning payloads.
# Do not prepend captured Codex CLI templates to generic OpenAI-compatible calls.
inject_detection_payload = false

supported_input_formats = [
"openai.responses",
"openai.chat_completions",
"anthropic.messages",
]

detection_home_mode = "temp"

[[plugins.codex.models_endpoint]]
id = "gpt-5.4"
object = "model"
created = 1735689600
owned_by = "openai"
root = "gpt-5.4"
permission = []

[plugins.codex.oauth]
base_url = "https://auth.openai.com"
client_id = "app_EMoamEEZ73f0CkXaXp7hrann"
scopes = ["openid", "profile", "email", "offline_access"]

[plugins.oauth_codex]
enabled = true
base_url = "https://auth.openai.com"
authorize_url = "https://auth.openai.com/oauth/authorize"
token_url = "https://auth.openai.com/oauth/token"
profile_url = "https://api.openai.com/oauth/profile"
client_id = "app_EMoamEEZ73f0CkXaXp7hrann"
redirect_uri = "http://localhost:1455/auth/callback"
callback_port = 1455
scopes = ["openid", "profile", "email", "offline_access"]
audience = "https://api.openai.com/v1"
user_agent = "Codex-Code/1.0.43"
headers = { User-Agent = "Codex-Code/1.0.43" }
request_timeout = 30
callback_timeout = 300
use_pkce = true
19 changes: 18 additions & 1 deletion ccproxy/core/plugins/factories.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from ccproxy.models.provider import ProviderConfig
from ccproxy.services.adapters.base import BaseAdapter
from ccproxy.services.adapters.http_adapter import BaseHTTPAdapter
from ccproxy.services.adapters.mock_adapter import MockAdapter
from ccproxy.services.interfaces import (
IMetricsCollector,
IRequestTracer,
Expand Down Expand Up @@ -215,6 +216,23 @@ async def create_adapter(self, context: PluginContext) -> BaseAdapter:
Returns:
Adapter instance
"""
settings = context.get("settings")
service_container = context.get("service_container")
if settings and getattr(settings.server, "bypass_mode", False):
if not service_container:
raise RuntimeError(
f"Cannot initialize plugin '{self.plugin_name}' in bypass mode: "
"service container is required to create mock adapter. "
"This is likely a configuration issue."
)
logger.warning(
"plugin_bypass_mode_enabled",
plugin=self.plugin_name,
adapter=self.adapter_class.__name__,
category="lifecycle",
)
return MockAdapter(service_container.get_mock_handler())

# Extract services from context (one-time extraction)
http_pool_manager: HTTPPoolManager | None = cast(
"HTTPPoolManager | None", context.get("http_pool_manager")
Expand All @@ -232,7 +250,6 @@ async def create_adapter(self, context: PluginContext) -> BaseAdapter:
config = context.get("config")

# Get all adapter dependencies from service container
service_container = context.get("service_container")
if not service_container:
raise RuntimeError("Service container is required for adapter services")

Expand Down
24 changes: 24 additions & 0 deletions ccproxy/llms/formatters/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
"formatter_instructions", default=None
)
_TOOLS_VAR: ContextVar[list[Any] | None] = ContextVar("formatter_tools", default=None)
_OPENAI_THINKING_XML_VAR: ContextVar[bool | None] = ContextVar(
"formatter_openai_thinking_xml", default=None
)


def register_request(request: Any | None, instructions: str | None = None) -> None:
Expand Down Expand Up @@ -114,3 +117,24 @@ def get_last_request_tools() -> list[Any] | None:

cached = _TOOLS_VAR.get()
return list(cached) if cached else None


def register_openai_thinking_xml(enabled: bool | None) -> None:
"""Cache OpenAI thinking serialization preference for active conversions.

Args:
enabled: Whether thinking blocks should be serialized with XML wrappers.
``None`` means downstream conversion logic should use its default.

Note:
The value is stored in a ``ContextVar``, so concurrent async requests
keep independent preferences without leaking into each other.
"""

_OPENAI_THINKING_XML_VAR.set(enabled)


def get_openai_thinking_xml() -> bool | None:
"""Return the OpenAI thinking serialization preference for active conversions."""

return _OPENAI_THINKING_XML_VAR.get()
7 changes: 6 additions & 1 deletion ccproxy/llms/formatters/openai_to_openai/responses.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
convert_openai_responses_usage_to_completion_usage,
merge_thinking_segments,
)
from ccproxy.llms.formatters.context import get_openai_thinking_xml
from ccproxy.llms.models import openai as openai_models

from ._helpers import (
Expand Down Expand Up @@ -333,6 +334,10 @@ def convert__openai_responses_to_openai_chat__response(
response: openai_models.ResponseObject,
) -> openai_models.ChatCompletionResponse:
"""Convert an OpenAI ResponseObject to a ChatCompletionResponse."""
include_thinking = get_openai_thinking_xml()
if include_thinking is None:
include_thinking = True

text_segments: list[str] = []
added_reasoning: set[tuple[str, str]] = set()
tool_calls: list[openai_models.ToolCall] = []
Expand All @@ -353,7 +358,7 @@ def convert__openai_responses_to_openai_chat__response(
if thinking_text and len(thinking_text) > 30
else thinking_text,
)
if thinking_text:
if include_thinking and thinking_text:
key = (signature or "", thinking_text)
if key not in added_reasoning:
text_segments.append(_wrap_thinking(signature, thinking_text))
Expand Down
12 changes: 7 additions & 5 deletions ccproxy/llms/formatters/openai_to_openai/streams.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,14 @@
get_last_instructions,
get_last_request,
get_last_request_tools,
get_openai_thinking_xml,
register_request,
register_request_tools,
)
from ccproxy.llms.models import openai as openai_models
from ccproxy.llms.streaming.accumulators import OpenAIAccumulator

from ._helpers import (
_convert_tools_chat_to_responses,
_get_attr,
)
from ._helpers import _convert_tools_chat_to_responses, _get_attr
from .requests import _build_responses_payload_from_chat_request
from .responses import (
_collect_reasoning_segments,
Expand All @@ -61,6 +59,10 @@ def run(
async def generator() -> AsyncGenerator[
openai_models.ChatCompletionChunk, None
]:
include_thinking = get_openai_thinking_xml()
if include_thinking is None:
include_thinking = True

model_id = ""
role_sent = False

Expand Down Expand Up @@ -537,7 +539,7 @@ def create_text_chunk(
for entry in summary_list:
text = _get_attr(entry, "text")
signature = _get_attr(entry, "signature")
if isinstance(text, str) and text:
if include_thinking and isinstance(text, str) and text:
chunk_text = _wrap_thinking(signature, text)
sequence_counter += 1
yield openai_models.ChatCompletionChunk(
Expand Down
36 changes: 27 additions & 9 deletions ccproxy/plugins/codex/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,26 +262,41 @@ async def prepare_provider_request(

# Parse body (format conversion is now handled by format chain)
body_data = json.loads(body.decode()) if body else {}
body_data = self._apply_request_template(body_data)
if self._should_apply_detection_payload():
body_data = self._apply_request_template(body_data)
else:
body_data = self._normalize_input_messages(body_data)

# Fetch detected instructions from detection service
instructions = self._get_instructions()
detected_instructions = (
self._get_instructions() if self._should_apply_detection_payload() else ""
)

existing_instructions = body_data.get("instructions")
if isinstance(existing_instructions, str) and existing_instructions:
if instructions:
instructions = instructions + "\n" + existing_instructions
else:
instructions = existing_instructions
instructions = (
detected_instructions + "\n" + existing_instructions
if detected_instructions
else existing_instructions
)
else:
instructions = detected_instructions

body_data["instructions"] = instructions
if instructions:
body_data["instructions"] = instructions
else:
body_data.pop("instructions", None)

# Codex backend requires stream=true, always override
body_data["stream"] = True
body_data["store"] = False

# Remove unsupported keys for Codex
for key in ("max_output_tokens", "max_completion_tokens", "temperature"):
for key in (
"max_output_tokens",
"max_completion_tokens",
"max_tokens",
"temperature",
):
body_data.pop(key, None)

list_input = body_data.get("input", [])
Expand Down Expand Up @@ -640,6 +655,9 @@ def _request_body_is_encoded(self, headers: dict[str, str]) -> bool:
encoding = headers.get("content-encoding", "").strip().lower()
return bool(encoding and encoding != "identity")

def _should_apply_detection_payload(self) -> bool:
return bool(getattr(self.config, "inject_detection_payload", True))

def _detect_streaming_intent(self, body: bytes, headers: dict[str, str]) -> bool:
if self._request_body_is_encoded(headers):
accept = headers.get("accept", "").lower()
Expand Down
7 changes: 7 additions & 0 deletions ccproxy/plugins/codex/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,13 @@ class CodexSettings(ProviderConfig):
enable_format_registry: bool = Field(
default=True, description="Whether to enable format adapter registry"
)
inject_detection_payload: bool = Field(
default=True,
description=(
"Whether to inject the captured Codex CLI instructions/template into "
"provider requests. Disable this for generic OpenAI-compatible API usage."
),
)

# Detection configuration
detection_home_mode: Literal["temp", "home"] = Field(
Expand Down
8 changes: 8 additions & 0 deletions ccproxy/services/adapters/format_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
from collections.abc import AsyncIterator, Awaitable, Callable
from typing import Any, Protocol, runtime_checkable

from ccproxy.llms.formatters.context import register_openai_thinking_xml


FormatDict = dict[str, Any]

Expand Down Expand Up @@ -63,6 +65,10 @@ def __init__(
self._error = error
self._stream = stream
self.name = name or self.__class__.__name__
self._openai_thinking_xml: bool | None = None

def configure_streaming(self, *, openai_thinking_xml: bool | None = None) -> None:
self._openai_thinking_xml = openai_thinking_xml

async def convert_request(self, data: FormatDict) -> FormatDict:
return await self._run_stage(self._request, data, stage="request")
Expand Down Expand Up @@ -92,6 +98,7 @@ async def _create_stream_iterator(
f"{self.name} does not implement stream conversion"
)

register_openai_thinking_xml(self._openai_thinking_xml)
handler = self._stream(stream)
handler = await _maybe_await(handler)

Expand Down Expand Up @@ -121,6 +128,7 @@ async def _run_stage(
f"{self.name} does not implement {stage} conversion"
)

register_openai_thinking_xml(self._openai_thinking_xml)
result = await _maybe_await(func(data))
if not isinstance(result, dict):
raise TypeError(
Expand Down
Loading
Loading