From eca3171825057a89f1b7742d8ea7aac601d14940 Mon Sep 17 00:00:00 2001 From: Ashish-dwi99 Date: Thu, 16 Apr 2026 12:10:50 +0530 Subject: [PATCH] =?UTF-8?q?v3.3.0:=20Native=20Claude=20Code=20hooks=20?= =?UTF-8?q?=E2=80=94=20self-evolving=20sessions=20without=20markdown?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add dhee/hooks/claude_code/ — 6 lifecycle hooks that make every Claude Code session learn from its own execution. No CLAUDE.md bloat, no SKILL.md files, no static routing tables. Vector memory with decay + token-budgeted XML context injection (~630 tokens for rich context, constant regardless of memory volume). Hooks: SessionStart (context injection), UserPromptSubmit (per-turn memories), PostToolUse (outcome capture with privacy filter), PreCompact (state survival), Stop/SessionEnd (checkpoint with learnings). CLI: `dhee task "..."` starts Claude Code with hooks pre-configured. `dhee install` / `dhee uninstall-hooks` for manual hook management. 42 tests covering renderer, privacy filter, installer, dispatch handlers. 878 existing tests unaffected. Co-Authored-By: Claude Opus 4.6 --- README.md | 42 +++- dhee/__init__.py | 2 +- dhee/cli.py | 68 +++++ dhee/hooks/__init__.py | 0 dhee/hooks/claude_code/__init__.py | 2 + dhee/hooks/claude_code/__main__.py | 256 +++++++++++++++++++ dhee/hooks/claude_code/install.py | 173 +++++++++++++ dhee/hooks/claude_code/privacy.py | 34 +++ dhee/hooks/claude_code/renderer.py | 310 +++++++++++++++++++++++ pyproject.toml | 2 +- tests/test_claude_code_hooks.py | 388 +++++++++++++++++++++++++++++ 11 files changed, 1274 insertions(+), 3 deletions(-) create mode 100644 dhee/hooks/__init__.py create mode 100644 dhee/hooks/claude_code/__init__.py create mode 100644 dhee/hooks/claude_code/__main__.py create mode 100644 dhee/hooks/claude_code/install.py create mode 100644 dhee/hooks/claude_code/privacy.py create mode 100644 dhee/hooks/claude_code/renderer.py create mode 100644 tests/test_claude_code_hooks.py diff --git a/README.md b/README.md index 9d26cac..76184a1 100644 --- a/README.md +++ b/README.md @@ -78,12 +78,42 @@ d.context("fixing auth bug") d.checkpoint("Fixed it", what_worked="git blame first") ``` +### Claude Code — Native Hooks (v3.3.0) + +One command. Every Claude Code session becomes self-evolving. + +```bash +dhee install +``` + +That's it. Dhee hooks into Claude Code's lifecycle — no CLAUDE.md bloat, no SKILL.md files, no markdown accumulation. Structured XML context injection, budgeted to ~630 tokens regardless of how much memory you have. + +**What happens automatically:** + +| Hook | When | What Dhee does | +|:-----|:-----|:---------------| +| `SessionStart` | Session opens | Injects last session, insights, performance trends, relevant memories | +| `UserPromptSubmit` | Every prompt | Surfaces memories relevant to what you just asked | +| `PostToolUse` | After Edit/Write/Bash | Captures what Claude did (secrets auto-stripped) | +| `PreCompact` | Before context compaction | Checkpoints state so nothing is lost | +| `Stop` | Session ends | Records outcomes — what worked, what failed, learnings | + +Or start Claude Code directly with context: + +```bash +dhee task "fix the flaky auth test" +``` + +**Why not CLAUDE.md?** Markdown files are static. After 6 months of accumulated knowledge, they rot — stale patterns sit at equal weight to current ones, no retrieval ranking, no forgetting. Dhee uses vector memory with strength-based decay. Relevant memories surface. Irrelevant ones fade. The context budget stays constant at ~630 tokens whether you have 50 memories or 50,000. + ### CLI ```bash dhee remember "User prefers Python" dhee recall "programming language" dhee checkpoint "Fixed auth bug" --what-worked "checked logs" +dhee install # install Claude Code hooks +dhee uninstall-hooks # remove them ``` ### Docker @@ -219,8 +249,18 @@ These are surfaced through `context()` and `checkpoint()` automatically when ena ## Architecture ``` -Agent (Claude, GPT, Cursor, custom) +Claude Code (or any agent) │ + ├── SessionStart hook ──→ dhee.context() ──→ XML renderer ──→ system prompt injection + ├── UserPromptSubmit ───→ dhee.recall() ──→ ranked memories ──→ per-turn context + ├── PostToolUse ────────→ dhee.remember() ─→ privacy filter ──→ stored (0 LLM) + ├── PreCompact ─────────→ dhee.checkpoint() + re-inject context + └── Stop ───────────────→ dhee.checkpoint(what_worked, what_failed, outcome_score) +``` + +The 4-operation API under the hooks: + +``` ├── remember(content) → Engram: embed + store (0 LLM) ├── recall(query) → Engram: embed + vector search (0 LLM) ├── context(task) → Buddhi: performance + insights + intentions + memories diff --git a/dhee/__init__.py b/dhee/__init__.py index 4f6a72d..a972b2c 100644 --- a/dhee/__init__.py +++ b/dhee/__init__.py @@ -32,7 +32,7 @@ # Default: CoreMemory (lightest, zero-config) Memory = CoreMemory -__version__ = "3.2.0" +__version__ = "3.3.0" __all__ = [ # Memory classes "Engram", diff --git a/dhee/cli.py b/dhee/cli.py index 271dd01..c92713e 100644 --- a/dhee/cli.py +++ b/dhee/cli.py @@ -318,6 +318,58 @@ def cmd_uninstall(args: argparse.Namespace) -> None: print("Cancelled.") +def cmd_task(args: argparse.Namespace) -> None: + """Start Claude Code with Dhee cognition hooks.""" + from dhee.hooks.claude_code.install import ensure_installed + + result = ensure_installed() + if result.already_installed: + pass # hooks already in place + elif result.created or result.updated: + print(f" Dhee hooks installed → {result.settings_path}") + + # Find claude executable + claude_bin = shutil.which("claude") + if not claude_bin: + print("Error: 'claude' not found in PATH. Install Claude Code first.", file=sys.stderr) + sys.exit(1) + + # Build command + cmd = [claude_bin] + if args.print_mode: + cmd.append("--print") + if args.description: + cmd.append(args.description) + + # Replace current process with claude + os.execvp(claude_bin, cmd) + + +def cmd_install_hooks(args: argparse.Namespace) -> None: + """Install Dhee hooks into Claude Code.""" + from dhee.hooks.claude_code.install import install_hooks + + result = install_hooks(force=args.force) + if result.already_installed and not args.force: + print(" Dhee hooks already installed.") + else: + action = "Created" if result.created else "Updated" + print(f" {action} {result.settings_path}") + print(f" Hooks: {', '.join(result.events)}") + if result.backed_up: + print(f" Backup: {result.backed_up}") + + +def cmd_uninstall_hooks(args: argparse.Namespace) -> None: + """Remove Dhee hooks from Claude Code.""" + from dhee.hooks.claude_code.install import uninstall_hooks + + if uninstall_hooks(): + print(" Dhee hooks removed.") + else: + print(" No Dhee hooks found.") + + def cmd_benchmark(args: argparse.Namespace) -> None: """Run performance benchmarks.""" import time @@ -458,6 +510,19 @@ def build_parser() -> argparse.ArgumentParser: p_status = sub.add_parser("status", help="Show version, config, and agents") p_status.add_argument("--json", action="store_true", help="JSON output") + # task + p_task = sub.add_parser("task", help="Start Claude Code with Dhee cognition") + p_task.add_argument("description", nargs="?", default="", help="Task description") + p_task.add_argument("--user-id", default="default", help="User ID") + p_task.add_argument("--print", dest="print_mode", action="store_true", help="One-shot mode") + + # install (hooks) + p_install = sub.add_parser("install", help="Install Dhee hooks into Claude Code") + p_install.add_argument("--force", action="store_true", help="Overwrite existing hooks") + + # uninstall-hooks + sub.add_parser("uninstall-hooks", help="Remove Dhee hooks from Claude Code") + # benchmark sub.add_parser("benchmark", help="Run performance benchmarks") @@ -481,6 +546,9 @@ def build_parser() -> argparse.ArgumentParser: "export": cmd_export, "import": cmd_import, "status": cmd_status, + "task": cmd_task, + "install": cmd_install_hooks, + "uninstall-hooks": cmd_uninstall_hooks, "benchmark": cmd_benchmark, "uninstall": cmd_uninstall, } diff --git a/dhee/hooks/__init__.py b/dhee/hooks/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/dhee/hooks/claude_code/__init__.py b/dhee/hooks/claude_code/__init__.py new file mode 100644 index 0000000..70bbd30 --- /dev/null +++ b/dhee/hooks/claude_code/__init__.py @@ -0,0 +1,2 @@ +from dhee.hooks.claude_code.install import ensure_installed, install_hooks, uninstall_hooks +from dhee.hooks.claude_code.renderer import estimate_tokens, render_context diff --git a/dhee/hooks/claude_code/__main__.py b/dhee/hooks/claude_code/__main__.py new file mode 100644 index 0000000..313ffa4 --- /dev/null +++ b/dhee/hooks/claude_code/__main__.py @@ -0,0 +1,256 @@ +"""Dhee Claude Code hook dispatch. + +Usage:: + + python -m dhee.hooks.claude_code + +Reads the Claude Code hook payload from stdin (JSON). +Writes JSON response to stdout. +On any error, outputs ``{}`` — never fails the host agent. + +Events handled: + SessionStart — inject full Dhee context (session + memories + insights) + UserPromptSubmit — inject relevant memories for the current prompt + PostToolUse — capture tool outcomes into Dhee memory + PreCompact — checkpoint state, re-inject context to survive compaction + Stop / SessionEnd — checkpoint session with outcomes +""" + +from __future__ import annotations + +import json +import os +import sys +from typing import Any + +_MAX_REMEMBER_CHARS = 2000 +_MAX_QUERY_CHARS = 200 + + +def _get_dhee(): + from dhee import Dhee + + return Dhee( + user_id=os.environ.get("DHEE_USER_ID", "default"), + auto_context=False, + auto_checkpoint=False, + ) + + +def _render(ctx: dict[str, Any], **kwargs: Any) -> str: + from dhee.hooks.claude_code.renderer import render_context + + return render_context(ctx, **kwargs) + + +# --------------------------------------------------------------------------- +# Handlers — each returns a dict for stdout JSON +# --------------------------------------------------------------------------- + + +def handle_session_start(payload: dict[str, Any]) -> dict[str, Any]: + dhee = _get_dhee() + + task_desc = ( + payload.get("task_description") + or payload.get("initial_prompt") + or payload.get("prompt") + or "" + ) + + ctx = dhee.context( + task_description=task_desc or None, + user_id=os.environ.get("DHEE_USER_ID", "default"), + ) + + if not ctx: + return {} + + has_content = ( + ctx.get("memories") + or ctx.get("last_session") + or ctx.get("insights") + or ctx.get("intentions") + or ctx.get("performance") + ) + if not has_content: + return {} + + xml = _render(ctx, task_description=task_desc or None) + return {"systemMessage": xml} + + +def handle_user_prompt(payload: dict[str, Any]) -> dict[str, Any]: + if isinstance(payload, dict): + prompt = str(payload.get("prompt", payload.get("content", ""))) + elif isinstance(payload, str): + prompt = payload + else: + prompt = str(payload) + + if not prompt.strip(): + return {} + + dhee = _get_dhee() + results = dhee.recall(query=prompt[:_MAX_QUERY_CHARS], limit=5) + if not results: + return {} + + xml = _render({"memories": results}, max_tokens=500) + return {"systemMessage": xml} + + +def handle_post_tool(payload: dict[str, Any]) -> dict[str, Any]: + if not isinstance(payload, dict): + return {} + + tool_name = payload.get("tool_name", "") + tool_input = payload.get("tool_input", {}) + tool_result = payload.get("tool_result", "") + success = payload.get("success", True) + + if not tool_name: + return {} + + write_tools = {"Edit", "Write", "MultiEdit", "NotebookEdit"} + shell_tools = {"Bash", "BashOutput"} + if tool_name not in write_tools and tool_name not in shell_tools: + return {} + + if tool_name in write_tools: + path = "" + if isinstance(tool_input, dict): + path = tool_input.get("file_path", tool_input.get("path", "")) + content = f"edited {path}" if path else f"used {tool_name}" + if not success: + content = f"failed to edit {path}: {str(tool_result)[:100]}" + else: + cmd = "" + if isinstance(tool_input, dict): + cmd = tool_input.get("command", "")[:150] + content = f"ran: {cmd}" if cmd else f"used {tool_name}" + if not success: + stderr = str(tool_result)[:200] if tool_result else "unknown error" + content = f"command failed: {cmd[:80]} — {stderr}" + + from dhee.hooks.claude_code.privacy import filter_secrets + + content = filter_secrets(content) + if len(content) < 10: + return {} + + try: + dhee = _get_dhee() + dhee.remember( + content=content[:_MAX_REMEMBER_CHARS], + metadata={"source": "claude_code_hook", "tool": tool_name, "success": success}, + ) + except Exception: + pass + + return {} + + +def handle_pre_compact(payload: dict[str, Any]) -> dict[str, Any]: + dhee = _get_dhee() + + summary = "session compacted" + if isinstance(payload, dict): + summary = payload.get("summary", summary) + + try: + dhee.checkpoint(summary=summary, status="compacted") + except Exception: + pass + + ctx = dhee.context(user_id=os.environ.get("DHEE_USER_ID", "default")) + if not ctx: + return {} + xml = _render(ctx) + return {"systemMessage": xml} + + +def handle_stop(payload: dict[str, Any]) -> dict[str, Any]: + dhee = _get_dhee() + + summary = "session ended" + task_type = None + outcome_score = None + what_worked = None + what_failed = None + + if isinstance(payload, dict): + summary = payload.get("summary", payload.get("task_description", summary)) + task_type = payload.get("task_type") + if payload.get("outcome_score") is not None: + try: + outcome_score = float(payload["outcome_score"]) + except (TypeError, ValueError): + pass + what_worked = payload.get("what_worked") + what_failed = payload.get("what_failed") + + try: + dhee.checkpoint( + summary=summary, + task_type=task_type, + outcome_score=outcome_score, + what_worked=what_worked, + what_failed=what_failed, + status="completed", + repo=os.getcwd(), + ) + except Exception: + pass + + return {} + + +# --------------------------------------------------------------------------- +# Dispatch +# --------------------------------------------------------------------------- + +_HANDLERS = { + "SessionStart": handle_session_start, + "UserPromptSubmit": handle_user_prompt, + "PostToolUse": handle_post_tool, + "PreCompact": handle_pre_compact, + "Stop": handle_stop, + "SessionEnd": handle_stop, +} + + +def main() -> int: + if len(sys.argv) < 2: + sys.stderr.write("usage: python -m dhee.hooks.claude_code \n") + sys.stdout.write("{}\n") + return 1 + + event = sys.argv[1] + handler = _HANDLERS.get(event) + if not handler: + sys.stdout.write("{}\n") + return 0 + + try: + raw = sys.stdin.read() or "{}" + except Exception: + raw = "{}" + + try: + payload = json.loads(raw) + except json.JSONDecodeError: + payload = {"prompt": raw, "raw": raw} + + try: + result = handler(payload) + sys.stdout.write(json.dumps(result or {}) + "\n") + except Exception as exc: + sys.stderr.write(f"dhee hook {event}: {exc}\n") + sys.stdout.write("{}\n") + + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/dhee/hooks/claude_code/install.py b/dhee/hooks/claude_code/install.py new file mode 100644 index 0000000..4a8e187 --- /dev/null +++ b/dhee/hooks/claude_code/install.py @@ -0,0 +1,173 @@ +"""Install Dhee hooks into Claude Code settings. + +Writes hook entries into ``~/.claude/settings.json`` so that every Claude Code +session automatically gets Dhee cognition: memory injection on start, learning +on tool use, checkpoint on exit. No markdown files, no SKILL.md, no plugins +directory — just Python hooks in the agent's native lifecycle. +""" + +from __future__ import annotations + +import json +import shutil +import sys +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any + +HOOK_EVENTS: tuple[str, ...] = ( + "SessionStart", + "UserPromptSubmit", + "PostToolUse", + "PreCompact", + "Stop", + "SessionEnd", +) + +TOOL_MATCHERS: dict[str, str] = { + "PostToolUse": "Edit|Write|MultiEdit|Bash", +} + + +@dataclass +class InstallResult: + settings_path: Path + events: tuple[str, ...] + created: bool = False + updated: bool = False + already_installed: bool = False + backed_up: Path | None = None + + +def _settings_path() -> Path: + return Path.home() / ".claude" / "settings.json" + + +def _python_cmd() -> str: + return sys.executable or "python3" + + +def _build_entry(event: str) -> dict[str, Any]: + cmd = f"{_python_cmd()} -m dhee.hooks.claude_code {event}" + entry: dict[str, Any] = { + "hooks": [{"type": "command", "command": cmd, "timeout": 10}], + } + if event in TOOL_MATCHERS: + entry["matcher"] = TOOL_MATCHERS[event] + return entry + + +def _has_dhee_hook(entries: list[dict[str, Any]]) -> bool: + for entry in entries: + for hook in entry.get("hooks", []): + if "dhee.hooks.claude_code" in hook.get("command", ""): + return True + return False + + +def _all_installed(hooks: dict[str, Any], events: tuple[str, ...]) -> bool: + for event in events: + entries = hooks.get(event, []) + if not isinstance(entries, list) or not _has_dhee_hook(entries): + return False + return True + + +def install_hooks( + *, + force: bool = False, + events: tuple[str, ...] = HOOK_EVENTS, +) -> InstallResult: + """Install Dhee hooks into ``~/.claude/settings.json``.""" + path = _settings_path() + path.parent.mkdir(parents=True, exist_ok=True) + + settings: dict[str, Any] = {} + if path.exists(): + try: + settings = json.loads(path.read_text(encoding="utf-8")) + except (json.JSONDecodeError, Exception): + settings = {} + + existing_hooks = settings.get("hooks", {}) + + if not force and _all_installed(existing_hooks, events): + return InstallResult( + settings_path=path, + events=events, + already_installed=True, + ) + + backed_up = None + if path.exists(): + backup = path.with_suffix(".json.dhee-backup") + shutil.copy2(path, backup) + backed_up = backup + + hooks = dict(existing_hooks) + for event in events: + our_entry = _build_entry(event) + if event in hooks and not force: + existing = hooks[event] + if isinstance(existing, list) and _has_dhee_hook(existing): + continue + if isinstance(existing, list): + existing.append(our_entry) + else: + hooks[event] = [our_entry] + else: + hooks[event] = [our_entry] + + settings["hooks"] = hooks + + created = not path.exists() + path.write_text(json.dumps(settings, indent=2) + "\n", encoding="utf-8") + + return InstallResult( + settings_path=path, + events=events, + created=created, + updated=not created, + backed_up=backed_up, + ) + + +def ensure_installed() -> InstallResult: + """Install hooks if not already present.""" + return install_hooks() + + +def uninstall_hooks() -> bool: + """Remove Dhee hooks from settings.json.""" + path = _settings_path() + if not path.exists(): + return False + + try: + settings = json.loads(path.read_text(encoding="utf-8")) + except Exception: + return False + + hooks = settings.get("hooks", {}) + changed = False + + for event in list(hooks.keys()): + entries = hooks[event] + if not isinstance(entries, list): + continue + filtered = [ + e for e in entries + if not any("dhee.hooks.claude_code" in h.get("command", "") for h in e.get("hooks", [])) + ] + if len(filtered) != len(entries): + changed = True + if filtered: + hooks[event] = filtered + else: + del hooks[event] + + if changed: + settings["hooks"] = hooks + path.write_text(json.dumps(settings, indent=2) + "\n", encoding="utf-8") + + return changed diff --git a/dhee/hooks/claude_code/privacy.py b/dhee/hooks/claude_code/privacy.py new file mode 100644 index 0000000..46a78e4 --- /dev/null +++ b/dhee/hooks/claude_code/privacy.py @@ -0,0 +1,34 @@ +"""Strip secrets from tool outputs before storing in Dhee memory. + +Applied automatically by PostToolUse hook before ``dhee.remember()``. +Conservative: redacts aggressively. Better to lose a token than leak a key. +""" + +from __future__ import annotations + +import re + +_SECRET_PATTERNS = [ + re.compile(r'(?:api[_-]?key|apikey|api_secret)\s*[:=]\s*["\']?[\w\-\.]{20,}["\']?', re.IGNORECASE), + re.compile(r'Bearer\s+[\w\-\.]{20,}', re.IGNORECASE), + re.compile(r'AKIA[A-Z0-9]{16}'), + re.compile(r'(?:aws_secret_access_key|aws_access_key_id)\s*[:=]\s*["\']?[\w/\+]{20,}["\']?', re.IGNORECASE), + re.compile(r'(?:password|passwd|secret|token|credential)\s*[:=]\s*["\']?[^\s"\']{8,}["\']?', re.IGNORECASE), + re.compile(r'-----BEGIN (?:RSA |EC |DSA )?PRIVATE KEY-----[\s\S]*?-----END (?:RSA |EC |DSA )?PRIVATE KEY-----'), + re.compile(r'gh[pousr]_[A-Za-z0-9_]{36,}'), + re.compile(r'sk-ant-[a-zA-Z0-9\-]{20,}'), + re.compile(r'sk-[a-zA-Z0-9\-]{20,}'), + re.compile(r'(?:key|secret|token|auth)\s*[:=]\s*["\']?[a-f0-9]{32,}["\']?', re.IGNORECASE), +] + +_REDACTED = "[REDACTED]" + + +def filter_secrets(text: str) -> str: + """Remove likely secrets from text before memory storage.""" + if not text: + return text + result = text + for pattern in _SECRET_PATTERNS: + result = pattern.sub(_REDACTED, result) + return result diff --git a/dhee/hooks/claude_code/renderer.py b/dhee/hooks/claude_code/renderer.py new file mode 100644 index 0000000..7b897c7 --- /dev/null +++ b/dhee/hooks/claude_code/renderer.py @@ -0,0 +1,310 @@ +"""Render Dhee HyperContext as token-budgeted XML for Claude Code. + +Takes the raw dict from ``Dhee.context()`` and produces a compact XML block +that fits inside a token budget. No Pydantic models, no sankhya-os dependency. + +Priority order (highest first): + session > performance > insights > intentions > memories > + beliefs > policies > episodes > warnings + +Sections that would overflow the budget are dropped silently, lowest priority +first. Empty sections never emit tags. +""" + +from __future__ import annotations + +from typing import Any +from xml.sax.saxutils import escape as _xml_escape + +DEFAULT_TOKEN_BUDGET = 1500 +CHARS_PER_TOKEN = 3.5 + +HEADER = ( + "Dhee cognition active. The context block below contains your memory and " + "learned patterns from prior sessions. Treat as ground truth for this turn — " + "do not re-derive what is already here. Honor warnings literally." +) + + +def render_context( + ctx: dict[str, Any], + *, + task_description: str | None = None, + max_tokens: int = DEFAULT_TOKEN_BUDGET, + max_memories: int = 8, + max_insights: int = 5, + max_intentions: int = 3, +) -> str: + """Render a Dhee context dict as XML for Claude Code system-prompt injection.""" + sections: list[tuple[int, list[str]]] = [ + (100, _session_block(ctx.get("last_session"))), + (90, _performance_block(ctx.get("performance", []))), + (80, _insights_block(ctx.get("insights", []), max_insights)), + (75, _intentions_block(ctx.get("intentions", []), max_intentions)), + (70, _memories_block(ctx.get("memories", []), max_memories)), + (60, _beliefs_block(ctx.get("beliefs", []))), + (50, _policies_block(ctx.get("policies", []))), + (40, _episodes_block(ctx.get("episodes", []))), + (30, _warnings_block(ctx.get("warnings", []))), + ] + + budget_chars = int(max_tokens * CHARS_PER_TOKEN) + + attrs = "" + if task_description: + attrs = f' task="{_esc_attr(task_description[:120])}"' + + open_tag = f"" + close_tag = "" + body: list[str] = [HEADER, open_tag] + used = len(HEADER) + len(open_tag) + len(close_tag) + 2 # newlines + + for _priority, lines in sections: + if not lines: + continue + block = "\n".join(f" {line}" for line in lines) + cost = len(block) + 1 + if used + cost > budget_chars: + continue + body.append(block) + used += cost + + body.append(close_tag) + return "\n".join(body) + "\n" + + +def estimate_tokens(text: str) -> int: + """Conservative token estimate.""" + return int(len(text) / CHARS_PER_TOKEN) + + +# --------------------------------------------------------------------------- +# Section builders — each returns list[str] of XML lines, [] if nothing to emit +# --------------------------------------------------------------------------- + + +def _session_block(session: dict[str, Any] | None) -> list[str]: + if not session or not isinstance(session, dict): + return [] + decisions = session.get("decisions") or [] + files = session.get("files_touched") or session.get("files") or [] + todos = session.get("todos") or session.get("todos_remaining") or [] + summary = session.get("summary") or session.get("task_summary") or "" + status = session.get("status", "") + if not (decisions or files or todos or summary): + return [] + attrs = _attrs(status=status) if status else "" + inner: list[str] = [] + if summary: + inner.append(f"{_esc(summary[:200])}") + if decisions: + inner.append(_flat_list("decisions", "d", decisions)) + if files: + inner.append(_flat_list("files", "f", files)) + if todos: + inner.append(_flat_list("todos", "t", todos)) + return _container("session", attrs, inner) + + +def _performance_block(perf: list[Any]) -> list[str]: + if not perf: + return [] + out: list[str] = [""] + for row in perf[:5]: + if not isinstance(row, dict): + continue + attrs = _attrs( + type=str(row.get("task_type", "")), + attempts=str(row.get("total_attempts", 0)), + best=_fmt(row.get("best_score")), + avg=_fmt(row.get("avg_score")), + trend=_fmt(row.get("trend")), + ) + if attrs: + out.append(f" ") + if len(out) == 1: + return [] + out.append("") + return out + + +def _insights_block(insights: list[Any], limit: int) -> list[str]: + if not insights: + return [] + items: list[str] = [] + for row in insights[:limit]: + if isinstance(row, dict): + content = str(row.get("content", "")) + tag = str(row.get("task_type", row.get("tag", ""))) + if not content: + continue + a = _attrs(tag=tag) if tag else "" + items.append(_tag("i", a, content)) + elif isinstance(row, str) and row: + items.append(_tag("i", "", row)) + if not items: + return [] + return [""] + [f" {i}" for i in items] + [""] + + +def _intentions_block(intentions: list[Any], limit: int) -> list[str]: + if not intentions: + return [] + items: list[str] = [] + for row in intentions[:limit]: + if isinstance(row, dict): + content = str( + row.get("content") + or row.get("remember_to") + or row.get("description") + or "" + ) + triggers = row.get("trigger_keywords") or row.get("triggers") or [] + if not content: + continue + trig = ",".join(str(t) for t in triggers[:5]) if isinstance(triggers, list) else str(triggers) + a = _attrs(triggers=trig) if trig else "" + items.append(_tag("i", a, content)) + elif isinstance(row, str) and row: + items.append(_tag("i", "", row)) + if not items: + return [] + return [""] + [f" {i}" for i in items] + [""] + + +def _memories_block(memories: list[Any], limit: int) -> list[str]: + if not memories: + return [] + + def _score(m: Any) -> float: + if isinstance(m, dict): + return float(m.get("score", m.get("composite_score", m.get("strength", 0)))) + return 0.0 + + ranked = sorted(memories, key=_score, reverse=True)[:limit] + items: list[str] = [] + for m in ranked: + if isinstance(m, dict): + text = str(m.get("memory", m.get("content", m.get("details", "")))) + score = _score(m) + if not text: + continue + items.append(f'{_esc(text)}') + elif isinstance(m, str) and m: + items.append(f"{_esc(m)}") + if not items: + return [] + return [""] + [f" {i}" for i in items] + [""] + + +def _beliefs_block(beliefs: list[Any]) -> list[str]: + if not beliefs: + return [] + items: list[str] = [] + for b in beliefs[:5]: + if not isinstance(b, dict): + continue + claim = str(b.get("claim", b.get("content", ""))) + btype = str(b.get("belief_type", b.get("type", ""))) + conf = _fmt(b.get("confidence")) + if not claim: + continue + items.append(_tag("b", _attrs(type=btype, conf=conf), claim)) + if not items: + return [] + return [""] + [f" {i}" for i in items] + [""] + + +def _policies_block(policies: list[Any]) -> list[str]: + if not policies: + return [] + items: list[str] = [] + for p in policies[:3]: + if not isinstance(p, dict): + continue + name = str(p.get("name", "")) + desc = str(p.get("description", "")) + conf = _fmt(p.get("confidence")) + if not (name or desc): + continue + items.append(_tag("p", _attrs(name=name, conf=conf), desc)) + if not items: + return [] + return [""] + [f" {i}" for i in items] + [""] + + +def _episodes_block(episodes: list[Any]) -> list[str]: + if not episodes: + return [] + items: list[str] = [] + for e in episodes[:3]: + if not isinstance(e, dict): + continue + summary = str(e.get("summary", "")) + etype = str(e.get("episode_type", e.get("type", ""))) + if not summary: + continue + items.append(_tag("e", _attrs(type=etype), summary)) + if not items: + return [] + return [""] + [f" {i}" for i in items] + [""] + + +def _warnings_block(warnings: list[str]) -> list[str]: + if not warnings: + return [] + inner = [f"{_esc(str(w))}" for w in warnings if w] + if not inner: + return [] + return _container("warnings", "", inner) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _esc(text: str) -> str: + return _xml_escape(str(text)) + + +def _esc_attr(text: str) -> str: + return _xml_escape(str(text), {'"': """}) + + +def _attrs(**kwargs: str) -> str: + parts: list[str] = [] + for key, value in kwargs.items(): + if value is None or value == "": + continue + parts.append(f'{key}="{_esc_attr(value)}"') + return " ".join(parts) + + +def _fmt(value: Any) -> str: + try: + return f"{float(value):.2f}" + except (TypeError, ValueError): + return "" + + +def _tag(name: str, attrs: str, text: str) -> str: + safe = _esc(text) + if attrs: + return f"<{name} {attrs}>{safe}" + return f"<{name}>{safe}" + + +def _flat_list(wrapper: str, item: str, items: list) -> str: + if not items: + return "" + inner = " ".join(f"<{item}>{_esc(str(x))}" for x in items if x) + return f"<{wrapper}>{inner}" + + +def _container(tag: str, attrs: str, inner: list[str]) -> list[str]: + inner = [line for line in inner if line] + if not inner: + return [] + open_tag = f"<{tag} {attrs}>" if attrs else f"<{tag}>" + return [open_tag, *[f" {line}" for line in inner], f""] diff --git a/pyproject.toml b/pyproject.toml index 23b163c..57ee451 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "dhee" -version = "3.2.0" +version = "3.3.0" description = "Cognition layer for AI agents — persistent memory, performance tracking, and insight synthesis" readme = "README.md" requires-python = ">=3.9" diff --git a/tests/test_claude_code_hooks.py b/tests/test_claude_code_hooks.py new file mode 100644 index 0000000..e1e4747 --- /dev/null +++ b/tests/test_claude_code_hooks.py @@ -0,0 +1,388 @@ +"""Tests for the Dhee Claude Code hook system. + +Covers: XML renderer, privacy filter, installer, dispatch handlers. +""" + +from __future__ import annotations + +import json +import re +import tempfile +from pathlib import Path +from unittest.mock import patch +from xml.etree import ElementTree as ET + +import pytest + +from dhee.hooks.claude_code.install import ( + InstallResult, + _all_installed, + ensure_installed, + install_hooks, + uninstall_hooks, +) +from dhee.hooks.claude_code.privacy import filter_secrets +from dhee.hooks.claude_code.renderer import ( + CHARS_PER_TOKEN, + DEFAULT_TOKEN_BUDGET, + HEADER, + estimate_tokens, + render_context, +) + + +# --------------------------------------------------------------------------- +# Renderer +# --------------------------------------------------------------------------- + + +def _extract_xml(rendered: str) -> ET.Element: + """Extract and parse the XML block from rendered output.""" + match = re.search(r"]", rendered) + if not match: + raise ValueError("No block found") + return ET.fromstring(rendered[match.start() :]) + + +def _rich_ctx() -> dict: + return { + "last_session": { + "summary": "Fixed flaky auth test", + "status": "completed", + "decisions": ["mock datetime.utcnow"], + "files_touched": ["tests/test_auth.py"], + "todos": ["verify with CI"], + }, + "performance": [ + { + "task_type": "bug_fix", + "total_attempts": 3, + "best_score": 0.9, + "avg_score": 0.82, + "trend": 0.05, + } + ], + "insights": [ + {"content": "freezegun works", "task_type": "bug_fix"}, + ], + "intentions": [ + {"content": "prefer freezegun", "trigger_keywords": ["flaky", "time"]}, + ], + "memories": [ + {"memory": "JWT uses 60-min expiry", "score": 0.57}, + {"memory": "flaky test fixed by mocking clock", "score": 0.83}, + ], + "beliefs": [ + {"claim": "pytest-asyncio used", "belief_type": "fact", "confidence": 0.9}, + ], + "policies": [ + {"name": "verify_first", "description": "Run tests first", "confidence": 0.87}, + ], + "warnings": ["Repeated failure detected"], + } + + +class TestRenderer: + def test_empty_context_produces_minimal_xml(self): + xml = render_context({}) + assert xml.startswith(HEADER) + assert "" in xml + assert "" in xml + + def test_empty_context_has_no_sections(self): + xml = render_context({}) + assert "alert("xss") & co', "score": 0.5}], + "warnings": ['contains "quoted" '], + } + xml = render_context(ctx) + assert "' in mem.text + + def test_budget_drops_low_priority_sections(self): + ctx = _rich_ctx() + ctx["memories"] = [{"memory": f"mem {i} " * 20, "score": 0.5} for i in range(20)] + tight = render_context(ctx, max_tokens=200) + full = render_context(ctx, max_tokens=3000) + assert estimate_tokens(tight) < estimate_tokens(full) + assert " Path: + return tmpdir / "settings.json" + + def test_fresh_install_creates_file(self, tmp_path): + fake = self._fake_settings(tmp_path) + with patch("dhee.hooks.claude_code.install._settings_path", return_value=fake): + result = install_hooks() + assert result.created + assert fake.exists() + settings = json.loads(fake.read_text()) + assert "SessionStart" in settings["hooks"] + assert "Stop" in settings["hooks"] + + def test_all_events_registered(self, tmp_path): + fake = self._fake_settings(tmp_path) + with patch("dhee.hooks.claude_code.install._settings_path", return_value=fake): + result = install_hooks() + settings = json.loads(fake.read_text()) + for event in result.events: + assert event in settings["hooks"] + cmd = settings["hooks"][event][0]["hooks"][0]["command"] + assert "dhee.hooks.claude_code" in cmd + assert event in cmd + + def test_idempotent_reinstall(self, tmp_path): + fake = self._fake_settings(tmp_path) + with patch("dhee.hooks.claude_code.install._settings_path", return_value=fake): + install_hooks() + result2 = install_hooks() + assert result2.already_installed + + def test_preserves_existing_hooks(self, tmp_path): + fake = self._fake_settings(tmp_path) + fake.write_text(json.dumps({ + "hooks": {"UserPromptSubmit": [{"hooks": [{"type": "command", "command": "other-tool"}]}]}, + "permissions": {"allow": ["Read"]}, + })) + with patch("dhee.hooks.claude_code.install._settings_path", return_value=fake): + install_hooks() + settings = json.loads(fake.read_text()) + assert "permissions" in settings + usp = settings["hooks"]["UserPromptSubmit"] + assert len(usp) == 2 + cmds = [e["hooks"][0]["command"] for e in usp] + assert "other-tool" in cmds + + def test_post_tool_use_has_matcher(self, tmp_path): + fake = self._fake_settings(tmp_path) + with patch("dhee.hooks.claude_code.install._settings_path", return_value=fake): + install_hooks() + settings = json.loads(fake.read_text()) + assert settings["hooks"]["PostToolUse"][0]["matcher"] == "Edit|Write|MultiEdit|Bash" + + def test_uninstall_removes_dhee_hooks(self, tmp_path): + fake = self._fake_settings(tmp_path) + with patch("dhee.hooks.claude_code.install._settings_path", return_value=fake): + install_hooks() + assert uninstall_hooks() + settings = json.loads(fake.read_text()) + assert settings.get("hooks", {}) == {} + + def test_uninstall_preserves_other_hooks(self, tmp_path): + fake = self._fake_settings(tmp_path) + fake.write_text(json.dumps({ + "hooks": {"UserPromptSubmit": [{"hooks": [{"type": "command", "command": "other-tool"}]}]}, + })) + with patch("dhee.hooks.claude_code.install._settings_path", return_value=fake): + install_hooks() + uninstall_hooks() + settings = json.loads(fake.read_text()) + usp = settings["hooks"]["UserPromptSubmit"] + assert len(usp) == 1 + assert usp[0]["hooks"][0]["command"] == "other-tool" + + def test_force_overwrites(self, tmp_path): + fake = self._fake_settings(tmp_path) + with patch("dhee.hooks.claude_code.install._settings_path", return_value=fake): + install_hooks() + result = install_hooks(force=True) + assert result.updated + assert result.backed_up is not None + + def test_backup_created(self, tmp_path): + fake = self._fake_settings(tmp_path) + fake.write_text("{}") + with patch("dhee.hooks.claude_code.install._settings_path", return_value=fake): + result = install_hooks() + assert result.backed_up is not None + assert result.backed_up.exists() + + def test_ensure_installed_returns_result_on_first_call(self, tmp_path): + fake = self._fake_settings(tmp_path) + with patch("dhee.hooks.claude_code.install._settings_path", return_value=fake): + result = ensure_installed() + assert result.created or result.updated or result.already_installed + + +# --------------------------------------------------------------------------- +# Dispatch handlers (unit-level, no real Dhee) +# --------------------------------------------------------------------------- + + +class TestDispatchHandlers: + def test_post_tool_edit_builds_content(self): + from dhee.hooks.claude_code.__main__ import handle_post_tool + + with patch("dhee.hooks.claude_code.__main__._get_dhee") as mock: + mock.return_value.remember.return_value = {"stored": True} + result = handle_post_tool({ + "tool_name": "Edit", + "tool_input": {"file_path": "/src/auth.py"}, + "success": True, + }) + assert result == {} + mock.return_value.remember.assert_called_once() + call_args = mock.return_value.remember.call_args + assert "/src/auth.py" in call_args.kwargs.get("content", call_args[1].get("content", "")) + + def test_post_tool_ignores_read_tools(self): + from dhee.hooks.claude_code.__main__ import handle_post_tool + + result = handle_post_tool({"tool_name": "Read", "tool_input": {}, "success": True}) + assert result == {} + + def test_post_tool_filters_secrets(self): + from dhee.hooks.claude_code.__main__ import handle_post_tool + + with patch("dhee.hooks.claude_code.__main__._get_dhee") as mock: + mock.return_value.remember.return_value = {"stored": True} + handle_post_tool({ + "tool_name": "Bash", + "tool_input": {"command": "echo sk-ant-api03-secret12345678901234567890"}, + "success": True, + }) + if mock.return_value.remember.called: + content = mock.return_value.remember.call_args.kwargs.get( + "content", mock.return_value.remember.call_args[1].get("content", "") + ) + assert "sk-ant-api03" not in content + + def test_user_prompt_empty_returns_empty(self): + from dhee.hooks.claude_code.__main__ import handle_user_prompt + + result = handle_user_prompt({"prompt": ""}) + assert result == {} + + def test_stop_handler_calls_checkpoint(self): + from dhee.hooks.claude_code.__main__ import handle_stop + + with patch("dhee.hooks.claude_code.__main__._get_dhee") as mock: + mock.return_value.checkpoint.return_value = {} + handle_stop({"summary": "done", "task_type": "bug_fix", "outcome_score": 0.9}) + mock.return_value.checkpoint.assert_called_once() + kwargs = mock.return_value.checkpoint.call_args.kwargs + assert kwargs["summary"] == "done" + assert kwargs["task_type"] == "bug_fix" + assert kwargs["outcome_score"] == 0.9