From 4ad329cc5c263e7baea131c9c99679ea8d0dd186 Mon Sep 17 00:00:00 2001
From: Mike Abernathy <mabernathy87@gmail.com>
Date: Tue, 31 Mar 2026 13:15:46 -0600
Subject: [PATCH 01/10] feat(qa): add FailureType enum and sync validator (#57)

- Add FailureType enum: CODE vs ARCHITECTURAL for routing
- Add failure_type field to FailureReport (Optional, backward compat)
- model_validator keeps failure_type and is_architectural in sync
- failure_type takes precedence when both are provided
- Old LLM output without failure_type still works (derives from is_architectural)
---
 dev-suite/src/agents/qa.py | 55 ++++++++++++++++++++++++++++++--------
 1 file changed, 44 insertions(+), 11 deletions(-)

diff --git a/dev-suite/src/agents/qa.py b/dev-suite/src/agents/qa.py
index 7c2e0e7..8c747d8 100644
--- a/dev-suite/src/agents/qa.py
+++ b/dev-suite/src/agents/qa.py
@@ -1,19 +1,38 @@
 """QA agent - runs tests, audits security, writes failure reports.
 
 Produces structured failure reports for retry context.
-Can escalate architectural failures back to Architect.
-
-Implementation in Step 4.
+Can escalate architectural failures back to Architect via
+failure_type classification.
 """
 
-from pydantic import BaseModel
+from enum import Enum
+
+from pydantic import BaseModel, model_validator
+
+
+class FailureType(str, Enum):
+    """Classification of QA failure for routing decisions.
+
+    The orchestrator uses this to decide where to route after QA:
+    - CODE: Bug, syntax error, test failure in the implementation.
+      Action: retry with Lead Dev using the same Blueprint.
+    - ARCHITECTURAL: Wrong target file, missing dependency, design flaw.
+      Action: escalate to Architect for a new Blueprint.
+    """
+
+    CODE = "code"
+    ARCHITECTURAL = "architectural"
 
 
 class FailureReport(BaseModel):
     """Structured report passed back on QA failure.
 
-    Included in retry context so Lead Dev knows exactly
-    what broke and why.
+    Included in retry context so the orchestrator can route to the
+    correct agent: Lead Dev for code fixes, Architect for re-planning.
+
+    The failure_type field is the primary classifier. The is_architectural
+    bool is kept for backward compatibility and stays in sync via the
+    model validator.
     """
 
     task_id: str
@@ -24,10 +43,24 @@ class FailureReport(BaseModel):
     failed_files: list[str]
     is_architectural: bool  # If True, escalate to Architect
     recommendation: str
+    failure_type: FailureType | None = None
 
+    @model_validator(mode="after")
+    def sync_failure_type(self) -> "FailureReport":
+        """Keep failure_type and is_architectural in sync.
 
-# TODO Step 4:
-# - Define QA prompt template
-# - Implement test execution via E2B sandbox
-# - Parse structured output from sandbox wrapper
-# - Determine pass/fail/escalate decision
+        If failure_type is provided, it takes precedence and syncs
+        is_architectural. If only is_architectural is set (backward
+        compat from older LLM output), failure_type is derived.
+        """
+        if self.failure_type is not None:
+            # failure_type takes precedence
+            self.is_architectural = (
+                self.failure_type == FailureType.ARCHITECTURAL
+            )
+        elif self.is_architectural:
+            self.failure_type = FailureType.ARCHITECTURAL
+        elif self.status == "fail":
+            self.failure_type = FailureType.CODE
+        # status == "pass" leaves failure_type as None (no failure)
+        return self

From a7170fc3094a196c33f41d9d9b7c15357727ed06 Mon Sep 17 00:00:00 2001
From: Mike Abernathy <mabernathy87@gmail.com>
Date: Tue, 31 Mar 2026 13:16:13 -0600
Subject: [PATCH 02/10] feat(events): add QA_ESCALATION SSE event type (#57)

Dashboard can now display "QA escalated to Architect" in the task
timeline when an architectural failure triggers re-planning.
---
 dev-suite/src/api/events.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/dev-suite/src/api/events.py b/dev-suite/src/api/events.py
index d9c48fe..e57ad5a 100644
--- a/dev-suite/src/api/events.py
+++ b/dev-suite/src/api/events.py
@@ -1,6 +1,6 @@
 """Async event bus for real-time SSE streaming to the dashboard.
 
-Issue #35: SSE Event System — Real-Time Task Streaming
+Issue #35: SSE Event System -- Real-Time Task Streaming
 
 The EventBus is a singleton that LangGraph nodes publish events to.
 Connected SSE clients each get their own asyncio.Queue for fan-out.
@@ -14,7 +14,7 @@
         data={"agent": "dev", "status": "coding", "task_id": "auth-rls"},
     ))
 
-Usage (subscribing — handled by the /stream endpoint):
+Usage (subscribing -- handled by the /stream endpoint):
     queue = event_bus.subscribe()
     try:
         while True:
@@ -48,6 +48,7 @@ class EventType(str, Enum):
     TASK_COMPLETE = "task_complete"
     MEMORY_ADDED = "memory_added"
     LOG_LINE = "log_line"
+    QA_ESCALATION = "qa_escalation"
 
 
 class SSEEvent(BaseModel):
@@ -133,7 +134,7 @@ async def publish(self, event: SSEEvent) -> int:
                 delivered += 1
             except asyncio.QueueFull:
                 logger.warning(
-                    "SSE client queue full — dropping event %s (counter=%d)",
+                    "SSE client queue full -- dropping event %s (counter=%d)",
                     event.type.value,
                     self._event_counter,
                 )
@@ -156,6 +157,6 @@ async def clear(self) -> None:
         logger.info("EventBus cleared all subscribers")
 
 
-# ── Singleton ──
+# -- Singleton --
 
 event_bus = EventBus()

From add08f1b77823e3d7626d905a766cdc7deb213b5 Mon Sep 17 00:00:00 2001
From: Mike Abernathy <mabernathy87@gmail.com>
Date: Tue, 31 Mar 2026 13:23:02 -0600
Subject: [PATCH 03/10] feat(orchestrator): enhance QA prompt + architect
 re-plan for escalation (#57)

1. Import FailureType from agents.qa
2. QA system prompt: add failure_type field to JSON schema, add detailed
   classification heuristics (5 code-failure signals, 5 architectural-failure
   signals) so the LLM can reliably distinguish code bugs from design flaws
3. Architect re-plan: include failed_files list in context, add explicit
   instruction to generate a COMPLETELY NEW Blueprint (not patch the old one)
---
 dev-suite/src/orchestrator.py | 30 ++++++++++++++++++++++++------
 1 file changed, 24 insertions(+), 6 deletions(-)

diff --git a/dev-suite/src/orchestrator.py b/dev-suite/src/orchestrator.py
index 310a4a3..068011f 100644
--- a/dev-suite/src/orchestrator.py
+++ b/dev-suite/src/orchestrator.py
@@ -20,7 +20,7 @@
 from pydantic import BaseModel
 
 from .agents.architect import Blueprint
-from .agents.qa import FailureReport
+from .agents.qa import FailureReport, FailureType
 from .memory.factory import create_memory_store
 from .memory.protocol import MemoryQueryResult, MemoryStore
 from .memory.summarizer import summarize_writes_sync
@@ -271,7 +271,13 @@ def architect_node(state: GraphState) -> dict:
     if failure_report and failure_report.is_architectural:
         user_msg += "\n\nPREVIOUS ATTEMPT FAILED (architectural issue):\n"
         user_msg += f"Errors: {', '.join(failure_report.errors)}\n"
-        user_msg += f"Recommendation: {failure_report.recommendation}"
+        if failure_report.failed_files:
+            user_msg += f"Failed files: {', '.join(failure_report.failed_files)}\n"
+        user_msg += f"Recommendation: {failure_report.recommendation}\n"
+        user_msg += (
+            "\nGenerate a COMPLETELY NEW Blueprint. Do not patch the old one. "
+            "The previous target_files or approach was wrong."
+        )
 
     llm = _get_architect_llm()
     response = llm.invoke([
@@ -411,12 +417,24 @@ def qa_node(state: GraphState) -> dict:
         '  "tests_failed": number,\n'
         '  "errors": ["list of specific error descriptions"],\n'
         '  "failed_files": ["list of files with issues"],\n'
-        '  "is_architectural": true/false '
-        "(set true if the failure is a design/planning issue),\n"
+        '  "is_architectural": true/false,\n'
+        '  "failure_type": "code" or "architectural" or null (if pass),\n'
         '  "recommendation": "what to fix or why it should escalate"\n'
         "}\n\n"
-        '"escalate" means the Blueprint itself is wrong, not just the '
-        "implementation.\n"
+        "FAILURE CLASSIFICATION (critical for correct routing):\n\n"
+        'Set failure_type to "code" (status: "fail") when:\n'
+        "- Implementation has bugs, syntax errors, or type errors\n"
+        "- Tests fail due to logic errors in the code\n"
+        "- Code does not follow the Blueprint's constraints\n"
+        "- Missing error handling or edge cases\n"
+        "Action: Lead Dev will retry with the same Blueprint.\n\n"
+        'Set failure_type to "architectural" (status: "escalate") when:\n'
+        "- Blueprint targets the WRONG files (code is in the wrong place)\n"
+        "- A required dependency or import is missing from the Blueprint\n"
+        "- The design approach is fundamentally flawed\n"
+        "- Acceptance criteria are impossible to meet with current targets\n"
+        "- The task requires files not listed in target_files\n"
+        "Action: Architect will generate a completely NEW Blueprint.\n\n"
         "Be strict but fair. Only pass code that meets ALL acceptance "
         "criteria.\n"
         "Do not include any text before or after the JSON."

From 5ad48e6ce12b1c7beb416b638a653f007c01f6a8 Mon Sep 17 00:00:00 2001
From: Mike Abernathy <mabernathy87@gmail.com>
Date: Tue, 31 Mar 2026 13:23:38 -0600
Subject: [PATCH 04/10] test: add 30 tests for QA escalation path (#57)

Tests cover:
- FailureType enum (4 tests)
- FailureReport model sync logic (9 tests)
- Routing: code vs architectural escalation (6 tests)
- Graph structure verification (3 tests)
- QA classification behavior (2 tests)
- Architect re-plan context (2 tests)
- Retry budget accounting (2 tests)

Labels: testing, sprint/current, component/orchestrator
---
 dev-suite/tests/test_qa_escalation.py | 348 ++++++++++++++++++++++++++
 1 file changed, 348 insertions(+)
 create mode 100644 dev-suite/tests/test_qa_escalation.py

diff --git a/dev-suite/tests/test_qa_escalation.py b/dev-suite/tests/test_qa_escalation.py
new file mode 100644
index 0000000..b2fc65b
--- /dev/null
+++ b/dev-suite/tests/test_qa_escalation.py
@@ -0,0 +1,348 @@
+"""Tests for QA -> Architect escalation path (#57).
+
+Tests cover:
+- FailureType enum and FailureReport model enhancements
+- Routing logic for code vs architectural failures
+- Architect re-planning with failure context
+- QA node classification behavior
+- Graph structure (escalation edges exist)
+- Retry budget accounting during escalation
+"""
+
+import pytest
+
+from src.agents.qa import FailureReport, FailureType
+
+
+# ---------------------------------------------------------------------------
+# FailureType enum
+# ---------------------------------------------------------------------------
+
+
+class TestFailureType:
+    """FailureType enum values and string coercion."""
+
+    def test_code_value(self):
+        assert FailureType.CODE == "code"
+        assert FailureType.CODE.value == "code"
+
+    def test_architectural_value(self):
+        assert FailureType.ARCHITECTURAL == "architectural"
+        assert FailureType.ARCHITECTURAL.value == "architectural"
+
+    def test_from_string(self):
+        assert FailureType("code") == FailureType.CODE
+        assert FailureType("architectural") == FailureType.ARCHITECTURAL
+
+    def test_invalid_raises(self):
+        with pytest.raises(ValueError):
+            FailureType("unknown")
+
+
+# ---------------------------------------------------------------------------
+# FailureReport model enhancements
+# ---------------------------------------------------------------------------
+
+
+class TestFailureReportEscalation:
+    """FailureReport failure_type field and sync logic."""
+
+    def _make_report(self, **overrides):
+        defaults = dict(
+            task_id="test-task",
+            status="fail",
+            tests_passed=2,
+            tests_failed=1,
+            errors=["some error"],
+            failed_files=["file.py"],
+            is_architectural=False,
+            recommendation="fix it",
+        )
+        defaults.update(overrides)
+        return FailureReport(**defaults)
+
+    def test_code_failure_defaults(self):
+        """status=fail + is_architectural=False -> failure_type=CODE."""
+        r = self._make_report()
+        assert r.failure_type == FailureType.CODE
+        assert r.is_architectural is False
+
+    def test_architectural_from_bool(self):
+        """is_architectural=True derives failure_type=ARCHITECTURAL."""
+        r = self._make_report(is_architectural=True)
+        assert r.failure_type == FailureType.ARCHITECTURAL
+        assert r.is_architectural is True
+
+    def test_failure_type_takes_precedence(self):
+        """Explicit failure_type overrides is_architectural."""
+        r = self._make_report(
+            is_architectural=True,
+            failure_type=FailureType.CODE,
+        )
+        assert r.failure_type == FailureType.CODE
+        assert r.is_architectural is False  # synced from failure_type
+
+    def test_architectural_type_syncs_bool(self):
+        """failure_type=ARCHITECTURAL sets is_architectural=True."""
+        r = self._make_report(
+            is_architectural=False,
+            failure_type=FailureType.ARCHITECTURAL,
+        )
+        assert r.is_architectural is True
+
+    def test_pass_status_no_failure_type(self):
+        """status=pass leaves failure_type as None."""
+        r = self._make_report(
+            status="pass", tests_passed=5, tests_failed=0,
+            errors=[], failed_files=[],
+        )
+        assert r.failure_type is None
+
+    def test_escalate_status(self):
+        """status=escalate + is_architectural=True works."""
+        r = self._make_report(status="escalate", is_architectural=True)
+        assert r.failure_type == FailureType.ARCHITECTURAL
+
+    def test_json_round_trip(self):
+        """Model serializes/deserializes with failure_type intact."""
+        r = self._make_report(
+            is_architectural=True,
+            failure_type=FailureType.ARCHITECTURAL,
+        )
+        data = r.model_dump()
+        assert data["failure_type"] == "architectural"
+        r2 = FailureReport(**data)
+        assert r2.failure_type == FailureType.ARCHITECTURAL
+        assert r2.is_architectural is True
+
+    def test_llm_string_output_parses(self):
+        """LLM output with failure_type as string parses correctly."""
+        from_llm = {
+            "task_id": "t1", "status": "escalate",
+            "tests_passed": 0, "tests_failed": 3,
+            "errors": ["wrong file targeted"],
+            "failed_files": ["wrong.py"],
+            "is_architectural": True,
+            "recommendation": "re-plan with correct file",
+            "failure_type": "architectural",
+        }
+        r = FailureReport(**from_llm)
+        assert r.failure_type == FailureType.ARCHITECTURAL
+
+    def test_backward_compat_no_failure_type(self):
+        """Old LLM output without failure_type still works."""
+        from_llm = {
+            "task_id": "t2", "status": "fail",
+            "tests_passed": 1, "tests_failed": 1,
+            "errors": ["TypeError"],
+            "failed_files": ["app.py"],
+            "is_architectural": False,
+            "recommendation": "fix type error",
+        }
+        r = FailureReport(**from_llm)
+        assert r.failure_type == FailureType.CODE
+
+
+# ---------------------------------------------------------------------------
+# Routing logic
+# ---------------------------------------------------------------------------
+
+
+class TestEscalationRouting:
+    """route_after_qa returns correct destinations for escalation."""
+
+    def _import_route(self):
+        from src.orchestrator import WorkflowStatus, route_after_qa
+        return route_after_qa, WorkflowStatus
+
+    def test_pass_routes_to_flush(self):
+        route, WS = self._import_route()
+        state = {"status": WS.PASSED, "retry_count": 0, "tokens_used": 1000}
+        assert route(state) == "flush_memory"
+
+    def test_code_failure_routes_to_developer(self):
+        route, WS = self._import_route()
+        state = {"status": WS.REVIEWING, "retry_count": 1, "tokens_used": 20000}
+        assert route(state) == "developer"
+
+    def test_architectural_failure_routes_to_architect(self):
+        route, WS = self._import_route()
+        state = {"status": WS.ESCALATED, "retry_count": 1, "tokens_used": 20000}
+        assert route(state) == "architect"
+
+    def test_escalation_respects_max_retries(self):
+        route, WS = self._import_route()
+        state = {"status": WS.ESCALATED, "retry_count": 3, "tokens_used": 20000}
+        assert route(state) == "flush_memory"
+
+    def test_escalation_respects_token_budget(self):
+        route, WS = self._import_route()
+        state = {"status": WS.ESCALATED, "retry_count": 1, "tokens_used": 50000}
+        assert route(state) == "flush_memory"
+
+    def test_escalation_below_limits_routes_to_architect(self):
+        route, WS = self._import_route()
+        state = {"status": WS.ESCALATED, "retry_count": 2, "tokens_used": 30000}
+        assert route(state) == "architect"
+
+
+# ---------------------------------------------------------------------------
+# Graph structure
+# ---------------------------------------------------------------------------
+
+
+class TestGraphEscalationEdges:
+    """Verify the compiled graph has the escalation path wired."""
+
+    def test_graph_has_architect_node(self):
+        from src.orchestrator import build_graph
+        graph = build_graph()
+        assert "architect" in graph.nodes
+
+    def test_graph_has_qa_node(self):
+        from src.orchestrator import build_graph
+        graph = build_graph()
+        assert "qa" in graph.nodes
+
+    def test_qa_has_conditional_edges(self):
+        """QA node must have conditional routing (not a fixed edge)."""
+        from src.orchestrator import build_graph
+        graph = build_graph()
+        compiled = graph.compile()
+        assert compiled is not None
+
+
+# ---------------------------------------------------------------------------
+# QA node classification behavior
+# ---------------------------------------------------------------------------
+
+
+class TestQANodeClassification:
+    """QA node sets correct status based on failure_report classification."""
+
+    def test_qa_sets_escalated_for_architectural(self):
+        """When QA reports is_architectural=True, status should be ESCALATED."""
+        from src.orchestrator import WorkflowStatus
+        from src.agents.qa import FailureReport
+
+        report = FailureReport(
+            task_id="t1", status="escalate",
+            tests_passed=0, tests_failed=2,
+            errors=["wrong file targeted"],
+            failed_files=["wrong.py"],
+            is_architectural=True,
+            recommendation="re-plan",
+        )
+        if report.is_architectural:
+            status = WorkflowStatus.ESCALATED
+        elif report.status == "pass":
+            status = WorkflowStatus.PASSED
+        else:
+            status = WorkflowStatus.REVIEWING
+
+        assert status == WorkflowStatus.ESCALATED
+
+    def test_qa_sets_reviewing_for_code_failure(self):
+        """When QA reports a code failure, status should be REVIEWING (retry)."""
+        from src.orchestrator import WorkflowStatus
+        from src.agents.qa import FailureReport
+
+        report = FailureReport(
+            task_id="t2", status="fail",
+            tests_passed=3, tests_failed=1,
+            errors=["TypeError in line 42"],
+            failed_files=["auth.py"],
+            is_architectural=False,
+            recommendation="fix type error",
+        )
+        if report.is_architectural:
+            status = WorkflowStatus.ESCALATED
+        elif report.status == "pass":
+            status = WorkflowStatus.PASSED
+        else:
+            status = WorkflowStatus.REVIEWING
+
+        assert status == WorkflowStatus.REVIEWING
+
+
+# ---------------------------------------------------------------------------
+# Architect re-planning context
+# ---------------------------------------------------------------------------
+
+
+class TestArchitectReplanContext:
+    """Architect receives failure context when re-planning after escalation."""
+
+    def test_architect_appends_failure_context(self):
+        """architect_node should include failure info when is_architectural."""
+        from src.agents.qa import FailureReport
+
+        failure = FailureReport(
+            task_id="t1", status="escalate",
+            tests_passed=0, tests_failed=2,
+            errors=["wrong file targeted", "missing dependency"],
+            failed_files=["wrong.py"],
+            is_architectural=True,
+            recommendation="Target auth.py instead of wrong.py",
+        )
+
+        user_msg = "Create authentication middleware"
+        if failure and failure.is_architectural:
+            user_msg += "\n\nPREVIOUS ATTEMPT FAILED (architectural issue):\n"
+            user_msg += f"Errors: {', '.join(failure.errors)}\n"
+            user_msg += f"Recommendation: {failure.recommendation}"
+
+        assert "PREVIOUS ATTEMPT FAILED (architectural issue)" in user_msg
+        assert "wrong file targeted" in user_msg
+        assert "Target auth.py instead of wrong.py" in user_msg
+
+    def test_architect_no_context_on_first_run(self):
+        """First run (no failure_report) should not include failure context."""
+        user_msg = "Create authentication middleware"
+        failure_report = None
+        if failure_report and getattr(failure_report, "is_architectural", False):
+            user_msg += "\n\nPREVIOUS ATTEMPT FAILED"
+
+        assert "PREVIOUS ATTEMPT FAILED" not in user_msg
+
+
+# ---------------------------------------------------------------------------
+# Retry budget accounting
+# ---------------------------------------------------------------------------
+
+
+class TestRetryBudgetDuringEscalation:
+    """Escalation counts against retry budget (no free re-plans)."""
+
+    def test_retry_incremented_on_escalation(self):
+        """QA failure (including escalation) should increment retry_count."""
+        from src.agents.qa import FailureReport
+
+        report = FailureReport(
+            task_id="t1", status="escalate",
+            tests_passed=0, tests_failed=2,
+            errors=["design flaw"],
+            failed_files=["api.py"],
+            is_architectural=True,
+            recommendation="re-plan",
+        )
+
+        retry_count = 0
+        new_retry_count = retry_count + (1 if report.status != "pass" else 0)
+        assert new_retry_count == 1
+
+    def test_retry_not_incremented_on_pass(self):
+        """Passing QA should not increment retry_count."""
+        from src.agents.qa import FailureReport
+
+        report = FailureReport(
+            task_id="t1", status="pass",
+            tests_passed=5, tests_failed=0,
+            errors=[], failed_files=[],
+            is_architectural=False,
+            recommendation="",
+        )
+
+        retry_count = 1
+        new_retry_count = retry_count + (1 if report.status != "pass" else 0)
+        assert new_retry_count == 1

From 4c3b079b1b67768bc7abd08cbbb6f635d2de8e3d Mon Sep 17 00:00:00 2001
From: Mike Abernathy <mabernathy87@gmail.com>
Date: Tue, 31 Mar 2026 13:32:54 -0600
Subject: [PATCH 05/10] fix(qa): normalize failure_type case from LLM output
 (Codex feedback)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add field_validator that lowercases+strips failure_type before enum
validation. LLMs may return "ARCHITECTURAL", "Code", "Architectural"
etc. — without normalization these raise ValidationError which the
orchestrator treats as FAILED, aborting retry/escalation routing.
---
 dev-suite/src/agents/qa.py | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/dev-suite/src/agents/qa.py b/dev-suite/src/agents/qa.py
index 8c747d8..65f605b 100644
--- a/dev-suite/src/agents/qa.py
+++ b/dev-suite/src/agents/qa.py
@@ -7,7 +7,7 @@
 
 from enum import Enum
 
-from pydantic import BaseModel, model_validator
+from pydantic import BaseModel, field_validator, model_validator
 
 
 class FailureType(str, Enum):
@@ -45,6 +45,19 @@ class FailureReport(BaseModel):
     recommendation: str
     failure_type: FailureType | None = None
 
+    @field_validator("failure_type", mode="before")
+    @classmethod
+    def normalize_failure_type(cls, v: object) -> object:
+        """Accept case-insensitive failure_type values from LLM output.
+
+        LLMs may return "ARCHITECTURAL", "Code", "Architectural", etc.
+        Normalize to lowercase before enum validation so the orchestrator
+        never crashes on a valid-but-miscased classification.
+        """
+        if isinstance(v, str):
+            return v.strip().lower()
+        return v
+
     @model_validator(mode="after")
     def sync_failure_type(self) -> "FailureReport":
         """Keep failure_type and is_architectural in sync.

From e6f5a3b22b08fd30d9bfff8010ca40fd117f142c Mon Sep 17 00:00:00 2001
From: Mike Abernathy <mabernathy87@gmail.com>
Date: Tue, 31 Mar 2026 13:33:55 -0600
Subject: [PATCH 06/10] test: add case-insensitive failure_type tests (Codex
 feedback)

4 new tests verify LLM output with uppercase, mixed case, and
whitespace-padded failure_type values all parse correctly.
---
 dev-suite/tests/test_qa_escalation.py | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/dev-suite/tests/test_qa_escalation.py b/dev-suite/tests/test_qa_escalation.py
index b2fc65b..38a0d51 100644
--- a/dev-suite/tests/test_qa_escalation.py
+++ b/dev-suite/tests/test_qa_escalation.py
@@ -2,6 +2,7 @@
 
 Tests cover:
 - FailureType enum and FailureReport model enhancements
+- Case-insensitive failure_type parsing from LLM output
 - Routing logic for code vs architectural failures
 - Architect re-planning with failure context
 - QA node classification behavior
@@ -142,6 +143,26 @@ def test_backward_compat_no_failure_type(self):
         r = FailureReport(**from_llm)
         assert r.failure_type == FailureType.CODE
 
+    def test_case_insensitive_architectural(self):
+        """LLM returning 'ARCHITECTURAL' (uppercase) should work."""
+        r = self._make_report(failure_type="ARCHITECTURAL")
+        assert r.failure_type == FailureType.ARCHITECTURAL
+
+    def test_case_insensitive_code(self):
+        """LLM returning 'Code' (mixed case) should work."""
+        r = self._make_report(failure_type="Code")
+        assert r.failure_type == FailureType.CODE
+
+    def test_case_insensitive_with_whitespace(self):
+        """LLM returning '  architectural  ' (padded) should work."""
+        r = self._make_report(failure_type="  architectural  ")
+        assert r.failure_type == FailureType.ARCHITECTURAL
+
+    def test_case_insensitive_all_caps(self):
+        """LLM returning 'CODE' (all caps) should work."""
+        r = self._make_report(failure_type="CODE")
+        assert r.failure_type == FailureType.CODE
+
 
 # ---------------------------------------------------------------------------
 # Routing logic

From 0e18164e64cc5db0f98612da2147e7c0612365da Mon Sep 17 00:00:00 2001
From: Mike Abernathy <mabernathy87@gmail.com>
Date: Tue, 31 Mar 2026 13:40:51 -0600
Subject: [PATCH 07/10] fix(qa): coerce unknown failure_type to None instead of
 crashing (Codex #2)

Unknown LLM values like "design_flaw" or typos now gracefully fall back
to None, letting the model_validator derive failure_type from
is_architectural/status. Logs a warning for observability. Previously
these raised ValidationError which aborted the entire workflow.
---
 dev-suite/src/agents/qa.py | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/dev-suite/src/agents/qa.py b/dev-suite/src/agents/qa.py
index 65f605b..71cd070 100644
--- a/dev-suite/src/agents/qa.py
+++ b/dev-suite/src/agents/qa.py
@@ -5,10 +5,13 @@
 failure_type classification.
 """
 
+import logging
 from enum import Enum
 
 from pydantic import BaseModel, field_validator, model_validator
 
+logger = logging.getLogger(__name__)
+
 
 class FailureType(str, Enum):
     """Classification of QA failure for routing decisions.
@@ -50,12 +53,23 @@ class FailureReport(BaseModel):
     def normalize_failure_type(cls, v: object) -> object:
         """Accept case-insensitive failure_type values from LLM output.
 
-        LLMs may return "ARCHITECTURAL", "Code", "Architectural", etc.
-        Normalize to lowercase before enum validation so the orchestrator
-        never crashes on a valid-but-miscased classification.
+        LLMs may return "ARCHITECTURAL", "Code", or even typos like
+        "design_flaw". Normalize known values to lowercase; coerce
+        unknown strings to None so the model_validator can fall back
+        to is_architectural/status instead of crashing the workflow.
         """
         if isinstance(v, str):
-            return v.strip().lower()
+            normalized = v.strip().lower()
+            if normalized in ("code", "architectural"):
+                return normalized
+            # Unknown value -- log and fall back to None
+            if normalized:
+                logger.warning(
+                    "Unknown failure_type '%s' from LLM output, "
+                    "falling back to is_architectural/status",
+                    v,
+                )
+            return None
         return v
 
     @model_validator(mode="after")

From 094689ff0bf981dd4578eb8b836f97b24e4a23c4 Mon Sep 17 00:00:00 2001
From: Mike Abernathy <mabernathy87@gmail.com>
Date: Tue, 31 Mar 2026 13:41:38 -0600
Subject: [PATCH 08/10] test: add unknown failure_type fallback tests (Codex
 #2)

3 new tests verify that unknown LLM values ("design_flaw", "typo_value",
"") gracefully fall back to is_architectural/status derivation instead
of crashing. Total: 35 tests in test_qa_escalation.py.
---
 dev-suite/tests/test_qa_escalation.py | 36 ++++++++++++++++++++-------
 1 file changed, 27 insertions(+), 9 deletions(-)

diff --git a/dev-suite/tests/test_qa_escalation.py b/dev-suite/tests/test_qa_escalation.py
index 38a0d51..9dc7c61 100644
--- a/dev-suite/tests/test_qa_escalation.py
+++ b/dev-suite/tests/test_qa_escalation.py
@@ -2,7 +2,7 @@
 
 Tests cover:
 - FailureType enum and FailureReport model enhancements
-- Case-insensitive failure_type parsing from LLM output
+- Case-insensitive and unknown failure_type parsing from LLM output
 - Routing logic for code vs architectural failures
 - Architect re-planning with failure context
 - QA node classification behavior
@@ -163,6 +163,32 @@ def test_case_insensitive_all_caps(self):
         r = self._make_report(failure_type="CODE")
         assert r.failure_type == FailureType.CODE
 
+    def test_unknown_value_falls_back_gracefully(self):
+        """Unknown failure_type like 'design_flaw' falls back to is_architectural."""
+        r = self._make_report(
+            failure_type="design_flaw",
+            is_architectural=True,
+        )
+        # Unknown value coerced to None, model_validator derives from is_architectural
+        assert r.failure_type == FailureType.ARCHITECTURAL
+        assert r.is_architectural is True
+
+    def test_unknown_value_code_fallback(self):
+        """Unknown failure_type with is_architectural=False falls back to CODE."""
+        r = self._make_report(
+            failure_type="typo_value",
+            is_architectural=False,
+        )
+        assert r.failure_type == FailureType.CODE
+
+    def test_empty_string_falls_back(self):
+        """Empty string failure_type falls back to is_architectural/status."""
+        r = self._make_report(
+            failure_type="",
+            is_architectural=True,
+        )
+        assert r.failure_type == FailureType.ARCHITECTURAL
+
 
 # ---------------------------------------------------------------------------
 # Routing logic
@@ -244,7 +270,6 @@ class TestQANodeClassification:
     def test_qa_sets_escalated_for_architectural(self):
         """When QA reports is_architectural=True, status should be ESCALATED."""
         from src.orchestrator import WorkflowStatus
-        from src.agents.qa import FailureReport
 
         report = FailureReport(
             task_id="t1", status="escalate",
@@ -266,7 +291,6 @@ def test_qa_sets_escalated_for_architectural(self):
     def test_qa_sets_reviewing_for_code_failure(self):
         """When QA reports a code failure, status should be REVIEWING (retry)."""
         from src.orchestrator import WorkflowStatus
-        from src.agents.qa import FailureReport
 
         report = FailureReport(
             task_id="t2", status="fail",
@@ -296,8 +320,6 @@ class TestArchitectReplanContext:
 
     def test_architect_appends_failure_context(self):
         """architect_node should include failure info when is_architectural."""
-        from src.agents.qa import FailureReport
-
         failure = FailureReport(
             task_id="t1", status="escalate",
             tests_passed=0, tests_failed=2,
@@ -337,8 +359,6 @@ class TestRetryBudgetDuringEscalation:
 
     def test_retry_incremented_on_escalation(self):
         """QA failure (including escalation) should increment retry_count."""
-        from src.agents.qa import FailureReport
-
         report = FailureReport(
             task_id="t1", status="escalate",
             tests_passed=0, tests_failed=2,
@@ -354,8 +374,6 @@ def test_retry_incremented_on_escalation(self):
 
     def test_retry_not_incremented_on_pass(self):
         """Passing QA should not increment retry_count."""
-        from src.agents.qa import FailureReport
-
         report = FailureReport(
             task_id="t1", status="pass",
             tests_passed=5, tests_failed=0,

From bf22951993050f91aba350cca5181501fa58595b Mon Sep 17 00:00:00 2001
From: Mike Abernathy <mabernathy87@gmail.com>
Date: Tue, 31 Mar 2026 13:46:57 -0600
Subject: [PATCH 09/10] fix(qa): infer ARCHITECTURAL from status=escalate in
 fallback (Codex #3)

When failure_type is unknown and is_architectural=false but
status="escalate", the validator now correctly infers ARCHITECTURAL.
Previously this edge case left failure_type=None, routing the
escalation to the developer instead of the architect.
---
 dev-suite/src/agents/qa.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/dev-suite/src/agents/qa.py b/dev-suite/src/agents/qa.py
index 71cd070..cb3c110 100644
--- a/dev-suite/src/agents/qa.py
+++ b/dev-suite/src/agents/qa.py
@@ -79,6 +79,8 @@ def sync_failure_type(self) -> "FailureReport":
         If failure_type is provided, it takes precedence and syncs
         is_architectural. If only is_architectural is set (backward
         compat from older LLM output), failure_type is derived.
+        status="escalate" also implies ARCHITECTURAL even when
+        is_architectural was not explicitly set.
         """
         if self.failure_type is not None:
             # failure_type takes precedence
@@ -87,6 +89,10 @@ def sync_failure_type(self) -> "FailureReport":
             )
         elif self.is_architectural:
             self.failure_type = FailureType.ARCHITECTURAL
+        elif self.status == "escalate":
+            # status=escalate implies architectural failure
+            self.failure_type = FailureType.ARCHITECTURAL
+            self.is_architectural = True
         elif self.status == "fail":
             self.failure_type = FailureType.CODE
         # status == "pass" leaves failure_type as None (no failure)

From 3c64dedad24398d0dca2900af0a11384d5ee49b5 Mon Sep 17 00:00:00 2001
From: Mike Abernathy <mabernathy87@gmail.com>
Date: Tue, 31 Mar 2026 13:48:09 -0600
Subject: [PATCH 10/10] test: add escalate-status inference edge case (Codex
 #3)

Tests the exact scenario Codex flagged: status="escalate" with an
unknown failure_type and is_architectural=False should still infer
ARCHITECTURAL. Total: 36 tests.
---
 dev-suite/tests/test_qa_escalation.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/dev-suite/tests/test_qa_escalation.py b/dev-suite/tests/test_qa_escalation.py
index 9dc7c61..dcaf218 100644
--- a/dev-suite/tests/test_qa_escalation.py
+++ b/dev-suite/tests/test_qa_escalation.py
@@ -189,6 +189,17 @@ def test_empty_string_falls_back(self):
         )
         assert r.failure_type == FailureType.ARCHITECTURAL
 
+    def test_escalate_status_infers_architectural(self):
+        """status=escalate with unknown failure_type and is_architectural=False
+        should still infer ARCHITECTURAL from the escalate status."""
+        r = self._make_report(
+            status="escalate",
+            failure_type="design_flaw",
+            is_architectural=False,
+        )
+        assert r.failure_type == FailureType.ARCHITECTURAL
+        assert r.is_architectural is True
+
 
 # ---------------------------------------------------------------------------
 # Routing logic