Abernaughty · Abernaughty · Mar 31, 2026 · Mar 31, 2026 · Mar 31, 2026 · Mar 31, 2026
diff --git a/dev-suite/src/agents/qa.py b/dev-suite/src/agents/qa.py
@@ -1,19 +1,41 @@
 """QA agent - runs tests, audits security, writes failure reports.
 
 Produces structured failure reports for retry context.
-Can escalate architectural failures back to Architect.
-
-Implementation in Step 4.
+Can escalate architectural failures back to Architect via
+failure_type classification.
 """
 
-from pydantic import BaseModel
+import logging
+from enum import Enum
+
+from pydantic import BaseModel, field_validator, model_validator
+
+logger = logging.getLogger(__name__)
+
+
+class FailureType(str, Enum):
+    """Classification of QA failure for routing decisions.
+
+    The orchestrator uses this to decide where to route after QA:
+    - CODE: Bug, syntax error, test failure in the implementation.
+      Action: retry with Lead Dev using the same Blueprint.
+    - ARCHITECTURAL: Wrong target file, missing dependency, design flaw.
+      Action: escalate to Architect for a new Blueprint.
+    """
+
+    CODE = "code"
+    ARCHITECTURAL = "architectural"
 
 
 class FailureReport(BaseModel):
     """Structured report passed back on QA failure.
 
-    Included in retry context so Lead Dev knows exactly
-    what broke and why.
+    Included in retry context so the orchestrator can route to the
+    correct agent: Lead Dev for code fixes, Architect for re-planning.
+
+    The failure_type field is the primary classifier. The is_architectural
+    bool is kept for backward compatibility and stays in sync via the
+    model validator.
     """
 
     task_id: str
@@ -24,10 +46,54 @@ class FailureReport(BaseModel):
     failed_files: list[str]
     is_architectural: bool  # If True, escalate to Architect
     recommendation: str
+    failure_type: FailureType | None = None
+
+    @field_validator("failure_type", mode="before")
+    @classmethod
+    def normalize_failure_type(cls, v: object) -> object:
+        """Accept case-insensitive failure_type values from LLM output.
+
+        LLMs may return "ARCHITECTURAL", "Code", or even typos like
+        "design_flaw". Normalize known values to lowercase; coerce
+        unknown strings to None so the model_validator can fall back
+        to is_architectural/status instead of crashing the workflow.
+        """
+        if isinstance(v, str):
+            normalized = v.strip().lower()
+            if normalized in ("code", "architectural"):
+                return normalized
+            # Unknown value -- log and fall back to None
+            if normalized:
+                logger.warning(
+                    "Unknown failure_type '%s' from LLM output, "
+                    "falling back to is_architectural/status",
+                    v,
+                )
+            return None
+        return v
 
+    @model_validator(mode="after")
+    def sync_failure_type(self) -> "FailureReport":
+        """Keep failure_type and is_architectural in sync.
 
-# TODO Step 4:
-# - Define QA prompt template
-# - Implement test execution via E2B sandbox
-# - Parse structured output from sandbox wrapper
-# - Determine pass/fail/escalate decision
+        If failure_type is provided, it takes precedence and syncs
+        is_architectural. If only is_architectural is set (backward
+        compat from older LLM output), failure_type is derived.
+        status="escalate" also implies ARCHITECTURAL even when
+        is_architectural was not explicitly set.
+        """
+        if self.failure_type is not None:
+            # failure_type takes precedence
+            self.is_architectural = (
+                self.failure_type == FailureType.ARCHITECTURAL
+            )
+        elif self.is_architectural:
+            self.failure_type = FailureType.ARCHITECTURAL
+        elif self.status == "escalate":
+            # status=escalate implies architectural failure
+            self.failure_type = FailureType.ARCHITECTURAL
+            self.is_architectural = True
+        elif self.status == "fail":
+            self.failure_type = FailureType.CODE
+        # status == "pass" leaves failure_type as None (no failure)
+        return self
diff --git a/dev-suite/src/api/events.py b/dev-suite/src/api/events.py
@@ -1,6 +1,6 @@
 """Async event bus for real-time SSE streaming to the dashboard.
 
-Issue #35: SSE Event System — Real-Time Task Streaming
+Issue #35: SSE Event System -- Real-Time Task Streaming
 
 The EventBus is a singleton that LangGraph nodes publish events to.
 Connected SSE clients each get their own asyncio.Queue for fan-out.
@@ -14,7 +14,7 @@
         data={"agent": "dev", "status": "coding", "task_id": "auth-rls"},
     ))
 
-Usage (subscribing — handled by the /stream endpoint):
+Usage (subscribing -- handled by the /stream endpoint):
     queue = event_bus.subscribe()
     try:
         while True:
@@ -48,6 +48,7 @@ class EventType(str, Enum):
     TASK_COMPLETE = "task_complete"
     MEMORY_ADDED = "memory_added"
     LOG_LINE = "log_line"
+    QA_ESCALATION = "qa_escalation"
 
 
 class SSEEvent(BaseModel):
@@ -133,7 +134,7 @@ async def publish(self, event: SSEEvent) -> int:
                 delivered += 1
             except asyncio.QueueFull:
                 logger.warning(
-                    "SSE client queue full — dropping event %s (counter=%d)",
+                    "SSE client queue full -- dropping event %s (counter=%d)",
                     event.type.value,
                     self._event_counter,
                 )
@@ -156,6 +157,6 @@ async def clear(self) -> None:
         logger.info("EventBus cleared all subscribers")
 
 
-# ── Singleton ──
+# -- Singleton --
 
 event_bus = EventBus()
diff --git a/dev-suite/src/orchestrator.py b/dev-suite/src/orchestrator.py
@@ -20,7 +20,7 @@
 from pydantic import BaseModel
 
 from .agents.architect import Blueprint
-from .agents.qa import FailureReport
+from .agents.qa import FailureReport, FailureType
 from .memory.factory import create_memory_store
 from .memory.protocol import MemoryQueryResult, MemoryStore
 from .memory.summarizer import summarize_writes_sync
@@ -271,7 +271,13 @@ def architect_node(state: GraphState) -> dict:
     if failure_report and failure_report.is_architectural:
         user_msg += "\n\nPREVIOUS ATTEMPT FAILED (architectural issue):\n"
         user_msg += f"Errors: {', '.join(failure_report.errors)}\n"
-        user_msg += f"Recommendation: {failure_report.recommendation}"
+        if failure_report.failed_files:
+            user_msg += f"Failed files: {', '.join(failure_report.failed_files)}\n"
+        user_msg += f"Recommendation: {failure_report.recommendation}\n"
+        user_msg += (
+            "\nGenerate a COMPLETELY NEW Blueprint. Do not patch the old one. "
+            "The previous target_files or approach was wrong."
+        )
 
     llm = _get_architect_llm()
     response = llm.invoke([
@@ -411,12 +417,24 @@ def qa_node(state: GraphState) -> dict:
         '  "tests_failed": number,\n'
         '  "errors": ["list of specific error descriptions"],\n'
         '  "failed_files": ["list of files with issues"],\n'
-        '  "is_architectural": true/false '
-        "(set true if the failure is a design/planning issue),\n"
+        '  "is_architectural": true/false,\n'
+        '  "failure_type": "code" or "architectural" or null (if pass),\n'
         '  "recommendation": "what to fix or why it should escalate"\n'
         "}\n\n"
-        '"escalate" means the Blueprint itself is wrong, not just the '
-        "implementation.\n"
+        "FAILURE CLASSIFICATION (critical for correct routing):\n\n"
+        'Set failure_type to "code" (status: "fail") when:\n'
+        "- Implementation has bugs, syntax errors, or type errors\n"
+        "- Tests fail due to logic errors in the code\n"
+        "- Code does not follow the Blueprint's constraints\n"
+        "- Missing error handling or edge cases\n"
+        "Action: Lead Dev will retry with the same Blueprint.\n\n"
+        'Set failure_type to "architectural" (status: "escalate") when:\n'
+        "- Blueprint targets the WRONG files (code is in the wrong place)\n"
+        "- A required dependency or import is missing from the Blueprint\n"
+        "- The design approach is fundamentally flawed\n"
+        "- Acceptance criteria are impossible to meet with current targets\n"
+        "- The task requires files not listed in target_files\n"
+        "Action: Architect will generate a completely NEW Blueprint.\n\n"
         "Be strict but fair. Only pass code that meets ALL acceptance "
         "criteria.\n"
         "Do not include any text before or after the JSON."