Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 64 additions & 0 deletions .ccproxy.codex.msaf.toml.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# Example ccproxy config for Microsoft Agent Framework clients over Codex.

enable_plugins = true
enabled_plugins = ["oauth_codex", "codex"]

[server]
bypass_mode = false

[llm]
# Keep OpenAI-compatible responses free from <thinking>...</thinking> blocks.
openai_thinking_xml = false

[plugins.codex]
enabled = true
name = "codex"
base_url = "https://chatgpt.com/backend-api/codex"
requires_auth = true
auth_type = "oauth"
supports_streaming = true
preferred_upstream_mode = "streaming"
buffer_non_streaming = true
enable_format_registry = true

# Microsoft Agent Framework sends its own instructions/reasoning payloads.
# Do not prepend captured Codex CLI templates to generic OpenAI-compatible calls.
inject_detection_payload = false

supported_input_formats = [
"openai.responses",
"openai.chat_completions",
"anthropic.messages",
]

detection_home_mode = "temp"

[[plugins.codex.models_endpoint]]
id = "gpt-5.4"
object = "model"
created = 1735689600
owned_by = "openai"
root = "gpt-5.4"
permission = []

[plugins.codex.oauth]
base_url = "https://auth.openai.com"
client_id = "app_EMoamEEZ73f0CkXaXp7hrann"
scopes = ["openid", "profile", "email", "offline_access"]

[plugins.oauth_codex]
enabled = true
base_url = "https://auth.openai.com"
authorize_url = "https://auth.openai.com/oauth/authorize"
token_url = "https://auth.openai.com/oauth/token"
profile_url = "https://api.openai.com/oauth/profile"
client_id = "app_EMoamEEZ73f0CkXaXp7hrann"
redirect_uri = "http://localhost:1455/auth/callback"
callback_port = 1455
scopes = ["openid", "profile", "email", "offline_access"]
audience = "https://api.openai.com/v1"
user_agent = "Codex-Code/1.0.43"
headers = { User-Agent = "Codex-Code/1.0.43" }
request_timeout = 30
callback_timeout = 300
use_pkce = true
26 changes: 25 additions & 1 deletion ccproxy/core/plugins/factories.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from ccproxy.models.provider import ProviderConfig
from ccproxy.services.adapters.base import BaseAdapter
from ccproxy.services.adapters.http_adapter import BaseHTTPAdapter
from ccproxy.services.adapters.mock_adapter import MockAdapter
from ccproxy.services.interfaces import (
IMetricsCollector,
IRequestTracer,
Expand Down Expand Up @@ -104,6 +105,7 @@ class BaseProviderPluginFactory(ProviderPluginFactory):
cli_commands: list[CliCommandSpec] = []
cli_arguments: list[CliArgumentSpec] = []
tool_accumulator_class: type | None = None
use_mock_adapter_in_bypass_mode: bool = True

def __init__(self) -> None:
"""Initialize factory with manifest built from class attributes."""
Expand Down Expand Up @@ -215,6 +217,24 @@ async def create_adapter(self, context: PluginContext) -> BaseAdapter:
Returns:
Adapter instance
"""
settings = context.get("settings")
service_container = context.get("service_container")
if settings and getattr(settings.server, "bypass_mode", False):
if not service_container:
raise RuntimeError(
f"Cannot initialize plugin '{self.plugin_name}' in bypass mode: "
"service container is required to create mock adapter. "
"This is likely a configuration issue."
)
logger.warning(
"plugin_bypass_mode_enabled",
plugin=self.plugin_name,
adapter=self.adapter_class.__name__,
category="lifecycle",
)
if self.use_mock_adapter_in_bypass_mode:
return MockAdapter(service_container.get_mock_handler())

# Extract services from context (one-time extraction)
http_pool_manager: HTTPPoolManager | None = cast(
"HTTPPoolManager | None", context.get("http_pool_manager")
Expand All @@ -232,7 +252,6 @@ async def create_adapter(self, context: PluginContext) -> BaseAdapter:
config = context.get("config")

# Get all adapter dependencies from service container
service_container = context.get("service_container")
if not service_container:
raise RuntimeError("Service container is required for adapter services")

Expand Down Expand Up @@ -268,6 +287,8 @@ async def create_adapter(self, context: PluginContext) -> BaseAdapter:
if hasattr(context, "get")
else None,
}
if settings and getattr(settings.server, "bypass_mode", False):
adapter_kwargs["mock_handler"] = service_container.get_mock_handler()
if self.tool_accumulator_class:
adapter_kwargs["tool_accumulator_class"] = self.tool_accumulator_class

Expand Down Expand Up @@ -303,6 +324,9 @@ async def create_adapter(self, context: PluginContext) -> BaseAdapter:
"model_mapper": context.get("model_mapper")
if hasattr(context, "get")
else None,
"mock_handler": service_container.get_mock_handler()
if settings and getattr(settings.server, "bypass_mode", False)
else None,
}
if self.tool_accumulator_class:
non_http_adapter_kwargs["tool_accumulator_class"] = (
Expand Down
24 changes: 24 additions & 0 deletions ccproxy/llms/formatters/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
"formatter_instructions", default=None
)
_TOOLS_VAR: ContextVar[list[Any] | None] = ContextVar("formatter_tools", default=None)
_OPENAI_THINKING_XML_VAR: ContextVar[bool | None] = ContextVar(
"formatter_openai_thinking_xml", default=None
)


def register_request(request: Any | None, instructions: str | None = None) -> None:
Expand Down Expand Up @@ -114,3 +117,24 @@ def get_last_request_tools() -> list[Any] | None:

cached = _TOOLS_VAR.get()
return list(cached) if cached else None


def register_openai_thinking_xml(enabled: bool | None) -> None:
"""Cache OpenAI thinking serialization preference for active conversions.

Args:
enabled: Whether thinking blocks should be serialized with XML wrappers.
``None`` means downstream conversion logic should use its default.

Note:
The value is stored in a ``ContextVar``, so concurrent async requests
keep independent preferences without leaking into each other.
"""

_OPENAI_THINKING_XML_VAR.set(enabled)


def get_openai_thinking_xml() -> bool | None:
"""Return the OpenAI thinking serialization preference for active conversions."""

return _OPENAI_THINKING_XML_VAR.get()
7 changes: 6 additions & 1 deletion ccproxy/llms/formatters/openai_to_openai/responses.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
convert_openai_responses_usage_to_completion_usage,
merge_thinking_segments,
)
from ccproxy.llms.formatters.context import get_openai_thinking_xml
from ccproxy.llms.models import openai as openai_models

from ._helpers import (
Expand Down Expand Up @@ -333,6 +334,10 @@ def convert__openai_responses_to_openai_chat__response(
response: openai_models.ResponseObject,
) -> openai_models.ChatCompletionResponse:
"""Convert an OpenAI ResponseObject to a ChatCompletionResponse."""
include_thinking = get_openai_thinking_xml()
if include_thinking is None:
include_thinking = True

text_segments: list[str] = []
added_reasoning: set[tuple[str, str]] = set()
tool_calls: list[openai_models.ToolCall] = []
Expand All @@ -353,7 +358,7 @@ def convert__openai_responses_to_openai_chat__response(
if thinking_text and len(thinking_text) > 30
else thinking_text,
)
if thinking_text:
if include_thinking and thinking_text:
key = (signature or "", thinking_text)
if key not in added_reasoning:
text_segments.append(_wrap_thinking(signature, thinking_text))
Expand Down
12 changes: 7 additions & 5 deletions ccproxy/llms/formatters/openai_to_openai/streams.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,14 @@
get_last_instructions,
get_last_request,
get_last_request_tools,
get_openai_thinking_xml,
register_request,
register_request_tools,
)
from ccproxy.llms.models import openai as openai_models
from ccproxy.llms.streaming.accumulators import OpenAIAccumulator

from ._helpers import (
_convert_tools_chat_to_responses,
_get_attr,
)
from ._helpers import _convert_tools_chat_to_responses, _get_attr
from .requests import _build_responses_payload_from_chat_request
from .responses import (
_collect_reasoning_segments,
Expand All @@ -61,6 +59,10 @@ def run(
async def generator() -> AsyncGenerator[
openai_models.ChatCompletionChunk, None
]:
include_thinking = get_openai_thinking_xml()
if include_thinking is None:
include_thinking = True

model_id = ""
role_sent = False

Expand Down Expand Up @@ -537,7 +539,7 @@ def create_text_chunk(
for entry in summary_list:
text = _get_attr(entry, "text")
signature = _get_attr(entry, "signature")
if isinstance(text, str) and text:
if include_thinking and isinstance(text, str) and text:
chunk_text = _wrap_thinking(signature, text)
sequence_counter += 1
yield openai_models.ChatCompletionChunk(
Expand Down
54 changes: 43 additions & 11 deletions ccproxy/plugins/codex/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
)
from ccproxy.services.adapters.chain_composer import compose_from_chain
from ccproxy.services.adapters.http_adapter import BaseHTTPAdapter
from ccproxy.services.adapters.mock_adapter import MockAdapter
from ccproxy.services.handler_config import HandlerConfig
from ccproxy.streaming import DeferredStreaming, StreamingBufferService
from ccproxy.utils.headers import (
Expand Down Expand Up @@ -58,6 +59,9 @@ async def handle_request(
# Context + request info
ctx = request.state.context
self._ensure_tool_accumulator(ctx)
if self.mock_handler:
return await MockAdapter(self.mock_handler).handle_request(request)

endpoint = ctx.metadata.get("endpoint", "")
body = await request.body()
body = await self._map_request_model(ctx, body)
Expand Down Expand Up @@ -262,26 +266,48 @@ async def prepare_provider_request(

# Parse body (format conversion is now handled by format chain)
body_data = json.loads(body.decode()) if body else {}
body_data = self._apply_request_template(body_data)
if self._should_apply_detection_payload():
body_data = self._apply_request_template(body_data)
else:
body_data = self._normalize_input_messages(body_data)

# Fetch detected instructions from detection service
instructions = self._get_instructions()
detected_instructions = (
self._get_instructions() if self._should_apply_detection_payload() else ""
)

existing_instructions = body_data.get("instructions")
if isinstance(existing_instructions, str) and existing_instructions:
if instructions:
instructions = instructions + "\n" + existing_instructions
else:
instructions = existing_instructions
instructions = (
detected_instructions + "\n" + existing_instructions
if detected_instructions
else existing_instructions
)
else:
instructions = detected_instructions

if instructions:
body_data["instructions"] = instructions
else:
body_data.pop("instructions", None)

body_data = self._sanitize_provider_body(body_data)

body_data["instructions"] = instructions
return json.dumps(body_data).encode(), filtered_headers

def _sanitize_provider_body(self, body_data: dict[str, Any]) -> dict[str, Any]:
"""Apply Codex-specific payload sanitization shared by all request paths."""

# Codex backend requires stream=true, always override
body_data["stream"] = True
body_data["store"] = False

# Remove unsupported keys for Codex
for key in ("max_output_tokens", "max_completion_tokens", "temperature"):
for key in (
"max_output_tokens",
"max_completion_tokens",
"max_tokens",
"temperature",
):
body_data.pop(key, None)

list_input = body_data.get("input", [])
Expand All @@ -290,11 +316,10 @@ async def prepare_provider_request(
input for input in list_input if input.get("type") != "item_reference"
]

#
# Remove any prefixed metadata fields that shouldn't be sent to the API
body_data = self._remove_metadata_fields(body_data)

return json.dumps(body_data).encode(), filtered_headers
return body_data

async def prepare_provider_headers(self, headers: dict[str, str]) -> dict[str, str]:
token_value = await self._resolve_access_token()
Expand Down Expand Up @@ -460,6 +485,10 @@ async def handle_streaming(
if not self.streaming_handler:
# Fallback to base behavior
return await super().handle_streaming(request, endpoint, **kwargs)
if self.mock_handler:
return await MockAdapter(self.mock_handler).handle_streaming(
request, endpoint, **kwargs
)

# Get context
ctx = request.state.context
Expand Down Expand Up @@ -652,6 +681,9 @@ def _detect_streaming_intent(self, body: bytes, headers: dict[str, str]) -> bool
accept = headers.get("accept", "").lower()
return "text/event-stream" in accept

def _should_apply_detection_payload(self) -> bool:
return bool(getattr(self.config, "inject_detection_payload", True))

def _get_instructions(self) -> str:
if not self.detection_service:
return ""
Expand Down
7 changes: 7 additions & 0 deletions ccproxy/plugins/codex/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,13 @@ class CodexSettings(ProviderConfig):
enable_format_registry: bool = Field(
default=True, description="Whether to enable format adapter registry"
)
inject_detection_payload: bool = Field(
default=True,
description=(
"Whether to inject the captured Codex CLI instructions/template into "
"provider requests. Disable this for generic OpenAI-compatible API usage."
),
)

# Detection configuration
detection_home_mode: Literal["temp", "home"] = Field(
Expand Down
Loading
Loading