CaddyGlow · CaddyGlow · Mar 20, 2026 · Mar 20, 2026 · Mar 20, 2026 · Mar 20, 2026
diff --git a/.ccproxy.codex.msaf.toml.example b/.ccproxy.codex.msaf.toml.example
@@ -0,0 +1,64 @@
+# Example ccproxy config for Microsoft Agent Framework clients over Codex.
+
+enable_plugins = true
+enabled_plugins = ["oauth_codex", "codex"]
+
+[server]
+bypass_mode = false
+
+[llm]
+# Keep OpenAI-compatible responses free from <thinking>...</thinking> blocks.
+openai_thinking_xml = false
+
+[plugins.codex]
+enabled = true
+name = "codex"
+base_url = "https://chatgpt.com/backend-api/codex"
+requires_auth = true
+auth_type = "oauth"
+supports_streaming = true
+preferred_upstream_mode = "streaming"
+buffer_non_streaming = true
+enable_format_registry = true
+
+# Microsoft Agent Framework sends its own instructions/reasoning payloads.
+# Do not prepend captured Codex CLI templates to generic OpenAI-compatible calls.
+inject_detection_payload = false
+
+supported_input_formats = [
+  "openai.responses",
+  "openai.chat_completions",
+  "anthropic.messages",
+]
+
+detection_home_mode = "temp"
+
+[[plugins.codex.models_endpoint]]
+id = "gpt-5.4"
+object = "model"
+created = 1735689600
+owned_by = "openai"
+root = "gpt-5.4"
+permission = []
+
+[plugins.codex.oauth]
+base_url = "https://auth.openai.com"
+client_id = "app_EMoamEEZ73f0CkXaXp7hrann"
+scopes = ["openid", "profile", "email", "offline_access"]
+
+[plugins.oauth_codex]
+enabled = true
+base_url = "https://auth.openai.com"
+authorize_url = "https://auth.openai.com/oauth/authorize"
+token_url = "https://auth.openai.com/oauth/token"
+profile_url = "https://api.openai.com/oauth/profile"
+client_id = "app_EMoamEEZ73f0CkXaXp7hrann"
+redirect_uri = "http://localhost:1455/auth/callback"
+callback_port = 1455
+scopes = ["openid", "profile", "email", "offline_access"]
+audience = "https://api.openai.com/v1"
+user_agent = "Codex-Code/1.0.43"
+headers = { User-Agent = "Codex-Code/1.0.43" }
+request_timeout = 30
+callback_timeout = 300
+use_pkce = true
diff --git a/ccproxy/core/plugins/factories.py b/ccproxy/core/plugins/factories.py
@@ -14,6 +14,7 @@
 from ccproxy.models.provider import ProviderConfig
 from ccproxy.services.adapters.base import BaseAdapter
 from ccproxy.services.adapters.http_adapter import BaseHTTPAdapter
+from ccproxy.services.adapters.mock_adapter import MockAdapter
 from ccproxy.services.interfaces import (
     IMetricsCollector,
     IRequestTracer,
@@ -104,6 +105,7 @@ class BaseProviderPluginFactory(ProviderPluginFactory):
     cli_commands: list[CliCommandSpec] = []
     cli_arguments: list[CliArgumentSpec] = []
     tool_accumulator_class: type | None = None
+    use_mock_adapter_in_bypass_mode: bool = True
 
     def __init__(self) -> None:
         """Initialize factory with manifest built from class attributes."""
@@ -215,6 +217,24 @@ async def create_adapter(self, context: PluginContext) -> BaseAdapter:
         Returns:
             Adapter instance
         """
+        settings = context.get("settings")
+        service_container = context.get("service_container")
+        if settings and getattr(settings.server, "bypass_mode", False):
+            if not service_container:
+                raise RuntimeError(
+                    f"Cannot initialize plugin '{self.plugin_name}' in bypass mode: "
+                    "service container is required to create mock adapter. "
+                    "This is likely a configuration issue."
+                )
+            logger.warning(
+                "plugin_bypass_mode_enabled",
+                plugin=self.plugin_name,
+                adapter=self.adapter_class.__name__,
+                category="lifecycle",
+            )
+            if self.use_mock_adapter_in_bypass_mode:
+                return MockAdapter(service_container.get_mock_handler())
+
         # Extract services from context (one-time extraction)
         http_pool_manager: HTTPPoolManager | None = cast(
             "HTTPPoolManager | None", context.get("http_pool_manager")
@@ -232,7 +252,6 @@ async def create_adapter(self, context: PluginContext) -> BaseAdapter:
         config = context.get("config")
 
         # Get all adapter dependencies from service container
-        service_container = context.get("service_container")
         if not service_container:
             raise RuntimeError("Service container is required for adapter services")
 
@@ -268,6 +287,8 @@ async def create_adapter(self, context: PluginContext) -> BaseAdapter:
                 if hasattr(context, "get")
                 else None,
             }
+            if settings and getattr(settings.server, "bypass_mode", False):
+                adapter_kwargs["mock_handler"] = service_container.get_mock_handler()
             if self.tool_accumulator_class:
                 adapter_kwargs["tool_accumulator_class"] = self.tool_accumulator_class
 
@@ -303,6 +324,9 @@ async def create_adapter(self, context: PluginContext) -> BaseAdapter:
                 "model_mapper": context.get("model_mapper")
                 if hasattr(context, "get")
                 else None,
+                "mock_handler": service_container.get_mock_handler()
+                if settings and getattr(settings.server, "bypass_mode", False)
+                else None,
             }
             if self.tool_accumulator_class:
                 non_http_adapter_kwargs["tool_accumulator_class"] = (

diff --git a/ccproxy/llms/formatters/context.py b/ccproxy/llms/formatters/context.py
@@ -11,6 +11,9 @@
     "formatter_instructions", default=None
 )
 _TOOLS_VAR: ContextVar[list[Any] | None] = ContextVar("formatter_tools", default=None)
+_OPENAI_THINKING_XML_VAR: ContextVar[bool | None] = ContextVar(
+    "formatter_openai_thinking_xml", default=None
+)
 
 
 def register_request(request: Any | None, instructions: str | None = None) -> None:
@@ -114,3 +117,24 @@ def get_last_request_tools() -> list[Any] | None:
 
     cached = _TOOLS_VAR.get()
     return list(cached) if cached else None
+
+
+def register_openai_thinking_xml(enabled: bool | None) -> None:
+    """Cache OpenAI thinking serialization preference for active conversions.
+
+    Args:
+        enabled: Whether thinking blocks should be serialized with XML wrappers.
+            ``None`` means downstream conversion logic should use its default.
+
+    Note:
+        The value is stored in a ``ContextVar``, so concurrent async requests
+        keep independent preferences without leaking into each other.
+    """
+
+    _OPENAI_THINKING_XML_VAR.set(enabled)
+
+
+def get_openai_thinking_xml() -> bool | None:
+    """Return the OpenAI thinking serialization preference for active conversions."""
+
+    return _OPENAI_THINKING_XML_VAR.get()
diff --git a/ccproxy/llms/formatters/openai_to_openai/responses.py b/ccproxy/llms/formatters/openai_to_openai/responses.py
@@ -15,6 +15,7 @@
     convert_openai_responses_usage_to_completion_usage,
     merge_thinking_segments,
 )
+from ccproxy.llms.formatters.context import get_openai_thinking_xml
 from ccproxy.llms.models import openai as openai_models
 
 from ._helpers import (
@@ -333,6 +334,10 @@ def convert__openai_responses_to_openai_chat__response(
     response: openai_models.ResponseObject,
 ) -> openai_models.ChatCompletionResponse:
     """Convert an OpenAI ResponseObject to a ChatCompletionResponse."""
+    include_thinking = get_openai_thinking_xml()
+    if include_thinking is None:
+        include_thinking = True
+
     text_segments: list[str] = []
     added_reasoning: set[tuple[str, str]] = set()
     tool_calls: list[openai_models.ToolCall] = []
@@ -353,7 +358,7 @@ def convert__openai_responses_to_openai_chat__response(
                     if thinking_text and len(thinking_text) > 30
                     else thinking_text,
                 )
-                if thinking_text:
+                if include_thinking and thinking_text:
                     key = (signature or "", thinking_text)
                     if key not in added_reasoning:
                         text_segments.append(_wrap_thinking(signature, thinking_text))

diff --git a/ccproxy/llms/formatters/openai_to_openai/streams.py b/ccproxy/llms/formatters/openai_to_openai/streams.py
@@ -27,16 +27,14 @@
     get_last_instructions,
     get_last_request,
     get_last_request_tools,
+    get_openai_thinking_xml,
     register_request,
     register_request_tools,
 )
 from ccproxy.llms.models import openai as openai_models
 from ccproxy.llms.streaming.accumulators import OpenAIAccumulator
 
-from ._helpers import (
-    _convert_tools_chat_to_responses,
-    _get_attr,
-)
+from ._helpers import _convert_tools_chat_to_responses, _get_attr
 from .requests import _build_responses_payload_from_chat_request
 from .responses import (
     _collect_reasoning_segments,
@@ -61,6 +59,10 @@ def run(
         async def generator() -> AsyncGenerator[
             openai_models.ChatCompletionChunk, None
         ]:
+            include_thinking = get_openai_thinking_xml()
+            if include_thinking is None:
+                include_thinking = True
+
             model_id = ""
             role_sent = False
 
@@ -537,7 +539,7 @@ def create_text_chunk(
                             for entry in summary_list:
                                 text = _get_attr(entry, "text")
                                 signature = _get_attr(entry, "signature")
-                                if isinstance(text, str) and text:
+                                if include_thinking and isinstance(text, str) and text:
                                     chunk_text = _wrap_thinking(signature, text)
                                     sequence_counter += 1
                                     yield openai_models.ChatCompletionChunk(

diff --git a/ccproxy/plugins/codex/adapter.py b/ccproxy/plugins/codex/adapter.py
@@ -17,6 +17,7 @@
 )
 from ccproxy.services.adapters.chain_composer import compose_from_chain
 from ccproxy.services.adapters.http_adapter import BaseHTTPAdapter
+from ccproxy.services.adapters.mock_adapter import MockAdapter
 from ccproxy.services.handler_config import HandlerConfig
 from ccproxy.streaming import DeferredStreaming, StreamingBufferService
 from ccproxy.utils.headers import (
@@ -58,6 +59,9 @@ async def handle_request(
         # Context + request info
         ctx = request.state.context
         self._ensure_tool_accumulator(ctx)
+        if self.mock_handler:
+            return await MockAdapter(self.mock_handler).handle_request(request)
+
         endpoint = ctx.metadata.get("endpoint", "")
         body = await request.body()
         body = await self._map_request_model(ctx, body)
@@ -262,26 +266,48 @@ async def prepare_provider_request(
 
         # Parse body (format conversion is now handled by format chain)
         body_data = json.loads(body.decode()) if body else {}
-        body_data = self._apply_request_template(body_data)
+        if self._should_apply_detection_payload():
+            body_data = self._apply_request_template(body_data)
+        else:
+            body_data = self._normalize_input_messages(body_data)
 
-        # Fetch detected instructions from detection service
-        instructions = self._get_instructions()
+        detected_instructions = (
+            self._get_instructions() if self._should_apply_detection_payload() else ""
+        )
 
         existing_instructions = body_data.get("instructions")
         if isinstance(existing_instructions, str) and existing_instructions:
-            if instructions:
-                instructions = instructions + "\n" + existing_instructions
-            else:
-                instructions = existing_instructions
+            instructions = (
+                detected_instructions + "\n" + existing_instructions
+                if detected_instructions
+                else existing_instructions
+            )
+        else:
+            instructions = detected_instructions
+
+        if instructions:
+            body_data["instructions"] = instructions
+        else:
+            body_data.pop("instructions", None)
+
+        body_data = self._sanitize_provider_body(body_data)
 
-        body_data["instructions"] = instructions
+        return json.dumps(body_data).encode(), filtered_headers
+
+    def _sanitize_provider_body(self, body_data: dict[str, Any]) -> dict[str, Any]:
+        """Apply Codex-specific payload sanitization shared by all request paths."""
 
         # Codex backend requires stream=true, always override
         body_data["stream"] = True
         body_data["store"] = False
 
         # Remove unsupported keys for Codex
-        for key in ("max_output_tokens", "max_completion_tokens", "temperature"):
+        for key in (
+            "max_output_tokens",
+            "max_completion_tokens",
+            "max_tokens",
+            "temperature",
+        ):
             body_data.pop(key, None)
 
         list_input = body_data.get("input", [])
@@ -290,11 +316,10 @@ async def prepare_provider_request(
             input for input in list_input if input.get("type") != "item_reference"
         ]
 
-        #
         # Remove any prefixed metadata fields that shouldn't be sent to the API
         body_data = self._remove_metadata_fields(body_data)
 
-        return json.dumps(body_data).encode(), filtered_headers
+        return body_data
 
     async def prepare_provider_headers(self, headers: dict[str, str]) -> dict[str, str]:
         token_value = await self._resolve_access_token()
@@ -460,6 +485,10 @@ async def handle_streaming(
         if not self.streaming_handler:
             # Fallback to base behavior
             return await super().handle_streaming(request, endpoint, **kwargs)
+        if self.mock_handler:
+            return await MockAdapter(self.mock_handler).handle_streaming(
+                request, endpoint, **kwargs
+            )
 
         # Get context
         ctx = request.state.context
@@ -652,6 +681,9 @@ def _detect_streaming_intent(self, body: bytes, headers: dict[str, str]) -> bool
             accept = headers.get("accept", "").lower()
             return "text/event-stream" in accept
 
+    def _should_apply_detection_payload(self) -> bool:
+        return bool(getattr(self.config, "inject_detection_payload", True))
+
     def _get_instructions(self) -> str:
         if not self.detection_service:
             return ""

diff --git a/ccproxy/plugins/codex/config.py b/ccproxy/plugins/codex/config.py
@@ -124,6 +124,13 @@ class CodexSettings(ProviderConfig):
     enable_format_registry: bool = Field(
         default=True, description="Whether to enable format adapter registry"
     )
+    inject_detection_payload: bool = Field(
+        default=True,
+        description=(
+            "Whether to inject the captured Codex CLI instructions/template into "
+            "provider requests. Disable this for generic OpenAI-compatible API usage."
+        ),
+    )
 
     # Detection configuration
     detection_home_mode: Literal["temp", "home"] = Field(