CaddyGlow · CaddyGlow · Mar 20, 2026 · Mar 20, 2026
diff --git a/.ccproxy.codex.msaf.toml.example b/.ccproxy.codex.msaf.toml.example
@@ -0,0 +1,64 @@
+# Example ccproxy config for Microsoft Agent Framework clients over Codex.
+
+enable_plugins = true
+enabled_plugins = ["oauth_codex", "codex"]
+
+[server]
+bypass_mode = false
+
+[llm]
+# Keep OpenAI-compatible responses free from <thinking>...</thinking> blocks.
+openai_thinking_xml = false
+
+[plugins.codex]
+enabled = true
+name = "codex"
+base_url = "https://chatgpt.com/backend-api/codex"
+requires_auth = true
+auth_type = "oauth"
+supports_streaming = true
+preferred_upstream_mode = "streaming"
+buffer_non_streaming = true
+enable_format_registry = true
+
+# Microsoft Agent Framework sends its own instructions/reasoning payloads.
+# Do not prepend captured Codex CLI templates to generic OpenAI-compatible calls.
+inject_detection_payload = false
+
+supported_input_formats = [
+  "openai.responses",
+  "openai.chat_completions",
+  "anthropic.messages",
+]
+
+detection_home_mode = "temp"
+
+[[plugins.codex.models_endpoint]]
+id = "gpt-5.4"
+object = "model"
+created = 1735689600
+owned_by = "openai"
+root = "gpt-5.4"
+permission = []
+
+[plugins.codex.oauth]
+base_url = "https://auth.openai.com"
+client_id = "app_EMoamEEZ73f0CkXaXp7hrann"
+scopes = ["openid", "profile", "email", "offline_access"]
+
+[plugins.oauth_codex]
+enabled = true
+base_url = "https://auth.openai.com"
+authorize_url = "https://auth.openai.com/oauth/authorize"
+token_url = "https://auth.openai.com/oauth/token"
+profile_url = "https://api.openai.com/oauth/profile"
+client_id = "app_EMoamEEZ73f0CkXaXp7hrann"
+redirect_uri = "http://localhost:1455/auth/callback"
+callback_port = 1455
+scopes = ["openid", "profile", "email", "offline_access"]
+audience = "https://api.openai.com/v1"
+user_agent = "Codex-Code/1.0.43"
+headers = { User-Agent = "Codex-Code/1.0.43" }
+request_timeout = 30
+callback_timeout = 300
+use_pkce = true
diff --git a/ccproxy/core/plugins/factories.py b/ccproxy/core/plugins/factories.py
@@ -14,6 +14,7 @@
 from ccproxy.models.provider import ProviderConfig
 from ccproxy.services.adapters.base import BaseAdapter
 from ccproxy.services.adapters.http_adapter import BaseHTTPAdapter
+from ccproxy.services.adapters.mock_adapter import MockAdapter
 from ccproxy.services.interfaces import (
     IMetricsCollector,
     IRequestTracer,
@@ -215,6 +216,23 @@ async def create_adapter(self, context: PluginContext) -> BaseAdapter:
         Returns:
             Adapter instance
         """
+        settings = context.get("settings")
+        service_container = context.get("service_container")
+        if settings and getattr(settings.server, "bypass_mode", False):
+            if not service_container:
+                raise RuntimeError(
+                    f"Cannot initialize plugin '{self.plugin_name}' in bypass mode: "
+                    "service container is required to create mock adapter. "
+                    "This is likely a configuration issue."
+                )
+            logger.warning(
+                "plugin_bypass_mode_enabled",
+                plugin=self.plugin_name,
+                adapter=self.adapter_class.__name__,
+                category="lifecycle",
+            )
+            return MockAdapter(service_container.get_mock_handler())
+
         # Extract services from context (one-time extraction)
         http_pool_manager: HTTPPoolManager | None = cast(
             "HTTPPoolManager | None", context.get("http_pool_manager")
@@ -232,7 +250,6 @@ async def create_adapter(self, context: PluginContext) -> BaseAdapter:
         config = context.get("config")
 
         # Get all adapter dependencies from service container
-        service_container = context.get("service_container")
         if not service_container:
             raise RuntimeError("Service container is required for adapter services")
 

diff --git a/ccproxy/llms/formatters/context.py b/ccproxy/llms/formatters/context.py
@@ -11,6 +11,9 @@
     "formatter_instructions", default=None
 )
 _TOOLS_VAR: ContextVar[list[Any] | None] = ContextVar("formatter_tools", default=None)
+_OPENAI_THINKING_XML_VAR: ContextVar[bool | None] = ContextVar(
+    "formatter_openai_thinking_xml", default=None
+)
 
 
 def register_request(request: Any | None, instructions: str | None = None) -> None:
@@ -114,3 +117,24 @@ def get_last_request_tools() -> list[Any] | None:
 
     cached = _TOOLS_VAR.get()
     return list(cached) if cached else None
+
+
+def register_openai_thinking_xml(enabled: bool | None) -> None:
+    """Cache OpenAI thinking serialization preference for active conversions.
+
+    Args:
+        enabled: Whether thinking blocks should be serialized with XML wrappers.
+            ``None`` means downstream conversion logic should use its default.
+
+    Note:
+        The value is stored in a ``ContextVar``, so concurrent async requests
+        keep independent preferences without leaking into each other.
+    """
+
+    _OPENAI_THINKING_XML_VAR.set(enabled)
+
+
+def get_openai_thinking_xml() -> bool | None:
+    """Return the OpenAI thinking serialization preference for active conversions."""
+
+    return _OPENAI_THINKING_XML_VAR.get()
diff --git a/ccproxy/llms/formatters/openai_to_openai/responses.py b/ccproxy/llms/formatters/openai_to_openai/responses.py
@@ -15,6 +15,7 @@
     convert_openai_responses_usage_to_completion_usage,
     merge_thinking_segments,
 )
+from ccproxy.llms.formatters.context import get_openai_thinking_xml
 from ccproxy.llms.models import openai as openai_models
 
 from ._helpers import (
@@ -333,6 +334,10 @@ def convert__openai_responses_to_openai_chat__response(
     response: openai_models.ResponseObject,
 ) -> openai_models.ChatCompletionResponse:
     """Convert an OpenAI ResponseObject to a ChatCompletionResponse."""
+    include_thinking = get_openai_thinking_xml()
+    if include_thinking is None:
+        include_thinking = True
+
     text_segments: list[str] = []
     added_reasoning: set[tuple[str, str]] = set()
     tool_calls: list[openai_models.ToolCall] = []
@@ -353,7 +358,7 @@ def convert__openai_responses_to_openai_chat__response(
                     if thinking_text and len(thinking_text) > 30
                     else thinking_text,
                 )
-                if thinking_text:
+                if include_thinking and thinking_text:
                     key = (signature or "", thinking_text)
                     if key not in added_reasoning:
                         text_segments.append(_wrap_thinking(signature, thinking_text))

diff --git a/ccproxy/llms/formatters/openai_to_openai/streams.py b/ccproxy/llms/formatters/openai_to_openai/streams.py
@@ -27,16 +27,14 @@
     get_last_instructions,
     get_last_request,
     get_last_request_tools,
+    get_openai_thinking_xml,
     register_request,
     register_request_tools,
 )
 from ccproxy.llms.models import openai as openai_models
 from ccproxy.llms.streaming.accumulators import OpenAIAccumulator
 
-from ._helpers import (
-    _convert_tools_chat_to_responses,
-    _get_attr,
-)
+from ._helpers import _convert_tools_chat_to_responses, _get_attr
 from .requests import _build_responses_payload_from_chat_request
 from .responses import (
     _collect_reasoning_segments,
@@ -61,6 +59,10 @@ def run(
         async def generator() -> AsyncGenerator[
             openai_models.ChatCompletionChunk, None
         ]:
+            include_thinking = get_openai_thinking_xml()
+            if include_thinking is None:
+                include_thinking = True
+
             model_id = ""
             role_sent = False
 
@@ -537,7 +539,7 @@ def create_text_chunk(
                             for entry in summary_list:
                                 text = _get_attr(entry, "text")
                                 signature = _get_attr(entry, "signature")
-                                if isinstance(text, str) and text:
+                                if include_thinking and isinstance(text, str) and text:
                                     chunk_text = _wrap_thinking(signature, text)
                                     sequence_counter += 1
                                     yield openai_models.ChatCompletionChunk(

diff --git a/ccproxy/plugins/codex/adapter.py b/ccproxy/plugins/codex/adapter.py
@@ -262,26 +262,41 @@ async def prepare_provider_request(
 
         # Parse body (format conversion is now handled by format chain)
         body_data = json.loads(body.decode()) if body else {}
-        body_data = self._apply_request_template(body_data)
+        if self._should_apply_detection_payload():
+            body_data = self._apply_request_template(body_data)
+        else:
+            body_data = self._normalize_input_messages(body_data)
 
-        # Fetch detected instructions from detection service
-        instructions = self._get_instructions()
+        detected_instructions = (
+            self._get_instructions() if self._should_apply_detection_payload() else ""
+        )
 
         existing_instructions = body_data.get("instructions")
         if isinstance(existing_instructions, str) and existing_instructions:
-            if instructions:
-                instructions = instructions + "\n" + existing_instructions
-            else:
-                instructions = existing_instructions
+            instructions = (
+                detected_instructions + "\n" + existing_instructions
+                if detected_instructions
+                else existing_instructions
+            )
+        else:
+            instructions = detected_instructions
 
-        body_data["instructions"] = instructions
+        if instructions:
+            body_data["instructions"] = instructions
+        else:
+            body_data.pop("instructions", None)
 
         # Codex backend requires stream=true, always override
         body_data["stream"] = True
         body_data["store"] = False
 
         # Remove unsupported keys for Codex
-        for key in ("max_output_tokens", "max_completion_tokens", "temperature"):
+        for key in (
+            "max_output_tokens",
+            "max_completion_tokens",
+            "max_tokens",
+            "temperature",
+        ):
             body_data.pop(key, None)
 
         list_input = body_data.get("input", [])
@@ -640,6 +655,9 @@ def _request_body_is_encoded(self, headers: dict[str, str]) -> bool:
         encoding = headers.get("content-encoding", "").strip().lower()
         return bool(encoding and encoding != "identity")
 
+    def _should_apply_detection_payload(self) -> bool:
+        return bool(getattr(self.config, "inject_detection_payload", True))
+
     def _detect_streaming_intent(self, body: bytes, headers: dict[str, str]) -> bool:
         if self._request_body_is_encoded(headers):
             accept = headers.get("accept", "").lower()

diff --git a/ccproxy/plugins/codex/config.py b/ccproxy/plugins/codex/config.py
@@ -124,6 +124,13 @@ class CodexSettings(ProviderConfig):
     enable_format_registry: bool = Field(
         default=True, description="Whether to enable format adapter registry"
     )
+    inject_detection_payload: bool = Field(
+        default=True,
+        description=(
+            "Whether to inject the captured Codex CLI instructions/template into "
+            "provider requests. Disable this for generic OpenAI-compatible API usage."
+        ),
+    )
 
     # Detection configuration
     detection_home_mode: Literal["temp", "home"] = Field(

diff --git a/ccproxy/services/adapters/format_adapter.py b/ccproxy/services/adapters/format_adapter.py
@@ -6,6 +6,8 @@
 from collections.abc import AsyncIterator, Awaitable, Callable
 from typing import Any, Protocol, runtime_checkable
 
+from ccproxy.llms.formatters.context import register_openai_thinking_xml
+
 
 FormatDict = dict[str, Any]
 
@@ -63,6 +65,10 @@ def __init__(
         self._error = error
         self._stream = stream
         self.name = name or self.__class__.__name__
+        self._openai_thinking_xml: bool | None = None
+
+    def configure_streaming(self, *, openai_thinking_xml: bool | None = None) -> None:
+        self._openai_thinking_xml = openai_thinking_xml
 
     async def convert_request(self, data: FormatDict) -> FormatDict:
         return await self._run_stage(self._request, data, stage="request")
@@ -92,6 +98,7 @@ async def _create_stream_iterator(
                 f"{self.name} does not implement stream conversion"
             )
 
+        register_openai_thinking_xml(self._openai_thinking_xml)
         handler = self._stream(stream)
         handler = await _maybe_await(handler)
 
@@ -121,6 +128,7 @@ async def _run_stage(
                 f"{self.name} does not implement {stage} conversion"
             )
 
+        register_openai_thinking_xml(self._openai_thinking_xml)
         result = await _maybe_await(func(data))
         if not isinstance(result, dict):
             raise TypeError(