diff --git a/AGENTS.md b/AGENTS.md index 5d060b1..2feb5df 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,87 +1,68 @@ -# PROJECT KNOWLEDGE BASE +# Code Context Engine Knowledge Base -**Generated:** 2026-02-02 01:45 -**Commit:** bd3baf8 -**Branch:** refactor/cli-commands-architecture +## Overview -## OVERVIEW -git-ai CLI + MCP server. TypeScript implementation for AI-powered Git operations with semantic search and graph-based code analysis. Indices stored in `.git-ai/`. +Code Context Engine is a TypeScript local runtime for agent-oriented code retrieval and context construction. The runtime is the product core. CLI and MCP are thin adapters retained only where they help local debugging or agent integration. -## STRUCTURE -``` -git-ai-cli-v2/ -├── src/ -│ ├── cli/ # CLI command architecture (NEW: registry + handlers + schemas) -│ │ ├── types.ts # Core types, executeHandler -│ │ ├── registry.ts # Handler registry (20 commands) -│ │ ├── helpers.ts # Shared utilities -│ │ ├── schemas/ # Zod validation schemas -│ │ ├── handlers/ # Business logic handlers -│ │ └── commands/ # Commander.js wrappers -│ ├── commands/ # Command aggregator (ai.ts only) -│ ├── core/ # Indexing, graph, storage, parsers -│ └── mcp/ # MCP server implementation -├── test/ # Node test runner tests -├── dist/ # Build output -└── .git-ai/ # Indices (LanceDB) +## Structure + +```text +src/ + index.ts public runtime entry + domain/ stable agent-facing contracts + retrieval/ + runtime.ts createCodeContextEngine() + lexical/ lexical-first retrieval + symbol/ higher-level navigation capabilities + tasks/ + review/ review context builders + impact/ impact analysis + tests/ test discovery and mapping + implementation/ implementation context + extensions/ extension point discovery + diff/ diff parsing and analysis + core/ parsers, indexers, LanceDB, CozoDB, repo-map + cli/ thin local adapter + commands/ ai command aggregation + mcp/ thin MCP adapter +bin/ + code-context-engine.ts package CLI entry ``` -## WHERE TO LOOK -| Task | Location | +## Where To Look + +| Need | Location | |------|----------| -| CLI commands | `src/cli/commands/*.ts` (new architecture) | -| CLI handlers | `src/cli/handlers/*.ts` (business logic) | -| CLI schemas | `src/cli/schemas/*.ts` (Zod validation) | -| Handler registry | `src/cli/registry.ts` (all 20 commands) | -| Command aggregator | `src/commands/ai.ts` (entry point) | -| Indexing logic | `src/core/indexer.ts`, `src/core/indexerIncremental.ts` | -| Graph queries | `src/core/cozo.ts`, `src/core/astGraph.ts` | -| Semantic search | `src/core/semantic.ts`, `src/core/sq8.ts` | -| Repo map | `src/core/repoMap.ts` | -| MCP tools | `src/mcp/`, `src/core/graph.ts` | -| Language parsers | `src/core/parser/*.ts` | +| Runtime API | `src/index.ts`, `src/retrieval/runtime.ts` | +| Stable contracts | `src/domain/*.ts` | +| Lexical retrieval | `src/retrieval/lexical/*.ts` | +| Symbol navigation | `src/retrieval/symbol/*.ts` | +| Task builders | `src/tasks/**/*` | +| MCP thin adapter | `src/mcp/server.ts`, `src/mcp/tools/taskTools.ts`, `src/mcp/handlers/taskHandlers.ts` | +| CLI thin adapter | `src/commands/ai.ts`, `src/cli/commands/*` | +| Core indexing and graph | `src/core/*` | -## CODE MAP -| Symbol | Type | Location | Role | -|--------|------|----------|------| -| `indexer` | fn | `core/indexer.ts` | Full repository indexing | -| `incrementalIndexer` | fn | `core/indexerIncremental.ts` | Incremental updates | -| `GitAiService` | class | `mcp/index.ts` | MCP entry point | -| `cozoQuery` | fn | `core/cozo.ts` | Graph DB queries | -| `semanticSearch` | fn | `core/semantic.ts` | Vector similarity | -| `repoMap` | fn | `core/repoMap.ts` | PageRank-based repo overview | -| `resolveGitRoot` | fn | `core/git.ts` | Repo boundary detection | +## Primary Entry Points -## CONVENTIONS -- **strict: true** TypeScript - no implicit any -- **Imports**: Node built-ins → external deps → internal modules -- **Formatting**: 2 spaces, single quotes, trailing commas -- **Errors**: Structured JSON logging via `createLogger` -- **CLI output**: JSON on stdout, logs on stderr -- **External inputs**: Use `unknown`, narrow early +- `createCodeContextEngine()` is the main product entry point. +- `code-context-engine ai serve` starts the thin MCP adapter. +- `code-context-engine ai index --overwrite` rebuilds local index state. -## ANTI-PATTERNS (THIS PROJECT) -- Never suppress type errors (`as any`, `@ts-ignore`) -- Never throw raw strings - throw `Error` objects -- Never commit without explicit request -- No empty catch blocks +## Thin MCP Surface -## UNIQUE STYLES -- `.git-ai/` directory for all index data (not config files) -- MCP tools require explicit `path` argument -- Multi-language parser architecture (TS, Go, Rust, Python, C, Markdown, YAML) -- PageRank-based repo-map for code importance scoring +- `check_index` +- `rebuild_index` +- `read_file` +- `repo_map` +- `lexical_search` +- `implementation_context` +- `find_tests` +- `find_impact` +- `find_extension_points` +- `review_context_for_diff` -## COMMANDS -```bash -npm i # Install dependencies -npm run build # Build to dist/ -npm run start # Dev run (e.g., --help) -npm test # Build + node --test -node dist/bin/git-ai.js --help # Validate packaged output -``` +## Retrieval Policy -## NOTES -- Indices auto-update on git operations -- `checkIndex` gates symbol/semantic/graph queries -- MCP server exposes git-ai tools for external IDEs +- lexical / symbol first +- graph expand second +- semantic rerank last diff --git a/README.md b/README.md index 2154118..bfc2238 100644 --- a/README.md +++ b/README.md @@ -1,510 +1,116 @@ -

- git-ai logo -

- -# git-ai +# Code Context Engine [![ci](https://github.com/mars167/git-ai-cli/actions/workflows/ci.yml/badge.svg)](https://github.com/mars167/git-ai-cli/actions/workflows/ci.yml) [![release](https://github.com/mars167/git-ai-cli/actions/workflows/release.yml/badge.svg)](https://github.com/mars167/git-ai-cli/actions/workflows/release.yml) [![license](https://img.shields.io/github/license/mars167/git-ai-cli)](./LICENSE) -[![npm version](https://img.shields.io/npm/v/@mars167/git-ai)](https://www.npmjs.com/package/@mars167/git-ai) -[![npm downloads](https://img.shields.io/npm/dm/@mars167/git-ai)](https://www.npmjs.com/package/@mars167/git-ai) -[![Agent Skill](https://img.shields.io/badge/Agent_Skill-git--ai--code--search-blue)](https://skills.sh) - -[🇨🇳 简体中文](./README.zh-CN.md) | **English** - ---- - -
- -### 🚀 Quick Install - -**For AI Agents (Claude Code, Cursor, Windsurf, etc.)** - -```bash -npx skills add mars167/git-ai-cli/skills/git-ai-code-search -``` - -**For CLI Usage** - -```bash -npm install -g @mars167/git-ai -``` - -
- ---- - -## Adding a Semantic Layer to Your Codebase, Enabling AI to Evolve from "Reading Code" to "Understanding Code" - -**Code semantics should be versioned and traceable, just like code itself** - -git-ai is a local code understanding tool that builds a semantic layer for your codebase using advanced RAG techniques, enabling AI Agents and developers to deeply understand code structure and relationships. - -### ✨ Why git-ai? - -- **🔗 Advanced RAG**: Combines vector retrieval + graph retrieval for multi-dimensional semantic understanding -- **📊 Fast & Accurate**: Optimized repo-map with PageRank-based importance scoring -- **🔄 Always Available**: Indices travel with code, available immediately after checkout, no rebuild needed -- **🤖 AI-Native**: MCP Server enables Claude, Trae and other Agents to deeply understand your codebase -- **🔒 Fully Local**: Code never leaves your machine, secure and private -- **⚡ Full Lifecycle Support**: From development to Review to refactoring, indices span the entire lifecycle -- **📊 Blazing Fast**: 10k files indexed in < 30s, search response < 100ms - ---- - -## ✨ Core Capabilities - -### 1️⃣ Semantic Search - -Find code using natural language, no need to remember file names or function names: - -```bash -git-ai ai semantic "user authentication logic" -git-ai ai semantic "database connection pool configuration" -git-ai ai semantic "error handling middleware" -``` - -### 2️⃣ Symbol Relationship Analysis - -Understand relationships between code: - -```bash -# Find function callers -git-ai ai graph callers authenticateUser - -# Find functions called by this function -git-ai ai graph callees authenticateUser - -# Trace complete call chain -git-ai ai graph chain authenticateUser --max-depth 3 -``` - -### 3️⃣ Multi-Language Support - -Supports multiple mainstream programming languages: - -| Language | File Extensions | -|----------|-----------------| -| JavaScript | `.js`, `.jsx` | -| TypeScript | `.ts`, `.tsx` | -| Java | `.java` | -| Python | `.py` | -| Go | `.go` | -| Rust | `.rs` | -| C | `.c`, `.h` | -| Markdown | `.md`, `.mdx` | -| YAML | `.yml`, `.yaml` | - ---- - -## 💡 Design Philosophy - -git-ai is built for deep code understanding through multiple retrieval strategies: - -### Advanced RAG - -Combines multiple retrieval methods for deeper understanding: -- **Vector Retrieval**: Semantic similarity matching using SQ8 quantized embeddings -- **Graph Retrieval**: Call relationship and dependency analysis via AST graphs -- **Intelligent Fusion**: Weighted combination of retrieval strategies for optimal results - -### Decentralized Semantics - -Indices travel with code, no central server required. checkout, branch, tag—all can use consistent semantic indices immediately. - -### Server Mode - -MCP Server enables any AI Agent to invoke indices, achieving true AI-assisted development. - ---- - -## 🎯 Use Cases - -### Scenario 1: Newcomers Quickly Understanding Large Projects - -> "Just joined the team, facing 100k lines of code, where do I start?" - -```bash -# 1. Get project global view -git-ai ai repo-map --max-files 20 - -# 2. Search core business logic -git-ai ai semantic "order processing flow" - -# 3. Trace key function call chains -git-ai ai graph chain processOrder --max-depth 5 -``` -*From design to development, semantic indices remain consistent* - -### Scenario 2: Pre-Refactoring Impact Analysis - -> "About to refactor this function, what will it affect?" - -```bash -# Find all callers -git-ai ai graph callers deprecatedFunction - -# Analyze complete call chain -git-ai ai graph chain deprecatedFunction --direction upstream -``` -*Graph analysis reveals complete impact scope* - -### Scenario 3: Bug Localization and Root Cause Analysis - -> "User reported an error, but don't know where the problem is" - -```bash -# Search related error handling code -git-ai ai semantic "user login failure handling" - -# View error propagation path -git-ai ai graph chain handleLoginError --direction upstream -``` -*Full lifecycle indices, quickly locate problem roots* - -### Scenario 4: AI Agent-Assisted Development - -> "Let Claude Desktop help me understand this project" - -After configuring git-ai MCP Server in Claude Desktop, you can converse directly: - -> "Help me analyze this project's architecture, find all payment-related code, and explain their relationships" - -Claude will automatically invoke git-ai tools to provide deep analysis. *Enabling AI to evolve from "reading code" to "understanding code"* - ---- - -## 🏗️ System Architecture - -```mermaid -graph TB - A[Git Repository] -->|Index| B[Code Parser\nMulti-Language AST] - B --> C[LanceDB\nVector Database] - B --> D[CozoDB\nGraph Database] - C --> E[MCP Server] - D --> E - E -->|Tool Call| F[AI Agent\nClaude Desktop / Cursor] - E -->|CLI| G[Developer] - B -->|Repo Map| H[PageRank Analysis\nImportance Scoring] - H --> E - style B fill:#e1f5ff,stroke:#333 - style C fill:#fff4e1,stroke:#333 - style D fill:#fff4e1,stroke:#333 - style E fill:#e8f5e9,stroke:#333 - style F fill:#f3e5f5,stroke:#333 - style H fill:#fce4ec,stroke:#333 -``` - -**Core Components**: - -- **Code Parser**: Multi-language AST extraction (TypeScript, Java, Python, Go, Rust, C, Markdown, YAML) -- **LanceDB + SQ8**: High-performance vector database with quantized embeddings for semantic search -- **CozoDB**: Graph database for AST-level relationship queries (callers, callees, chains) -- **Repo Map**: PageRank-based code importance analysis for project overview -- **MCP Server**: Standard protocol interface for AI Agent invocation - ---- - -## 📊 Comparison with Other Tools - -| Feature | git-ai | GitHub Code Search | Sourcegraph | -|---------|--------|-------------------|-------------| -| Local Execution | ✅ | ❌ | ❌ | -| AST-Level Analysis | ✅ | ❌ | ✅ | -| AI Agent Integration | ✅ | ❌ | ❌ | -| Free & Open Source | ✅ | ❌ | ❌ | -| Semantic Search | ✅ | ✅ | ✅ | -| Call Chain Analysis | ✅ | ❌ | ✅ | -| Multi-Language Support | ✅ | ✅ | ✅ | -| Repo Map with PageRank | ✅ | ❌ | ❌ | - ---- +[![npm version](https://img.shields.io/npm/v/code-context-engine)](https://www.npmjs.com/package/code-context-engine) +[![npm downloads](https://img.shields.io/npm/dm/code-context-engine)](https://www.npmjs.com/package/code-context-engine) -## 🚀 Quick Start +[简体中文](./README.zh-CN.md) | **English** -### 1. Install +Code Context Engine is a local runtime for review agents and coding agents. It turns a repository into structured evidence and context bundles instead of exposing search rows as the primary product. -```bash -npm install -g @mars167/git-ai -``` +## Product Direction -### 2. Initialize Repository +The product center is: +- local TypeScript runtime +- task-oriented context builders +- agent bundle / skill templates -```bash -cd your-project -git-ai ai index --overwrite -``` +Default retrieval strategy: +- lexical / symbol first +- graph expand second +- semantic rerank last -### 3. Start Using Immediately +## Install ```bash -# Search code using natural language -git-ai ai semantic "user authentication logic" - -# View function call relationships -git-ai ai graph callers authenticateUser +npm install -g code-context-engine ``` -**Actual Output Example**: -```json -[ - { - "file": "src/auth/service.ts", - "line": 45, - "symbol": "authenticateUser", - "context": "async function authenticateUser(email: string, password: string)" - }, - { - "file": "src/controllers/auth.ts", - "line": 23, - "symbol": "loginHandler", - "context": "const user = await authenticateUser(req.body.email, req.body.password)" - } -] -``` - -That's it! 3 steps to get started, immediately begin deep understanding of your codebase. - -*From now on, indices are not "one-time artifacts" but "semantic assets" that evolve with your code.* - ---- - -## ⚙️ Configuration - -### File Filtering - -git-ai respects your project's ignore files to control which files are indexed: - -#### `.gitignore` - Standard Git Ignore - -Files matching patterns in `.gitignore` are excluded from indexing by default. - -#### `.aiignore` - AI-Specific Exclusions (Highest Priority) - -Create a `.aiignore` file in your repository root to exclude specific files from indexing that should be ignored by git-ai but not necessarily by Git: - -```bash -# Example .aiignore -test-fixtures/** -*.generated.ts -docs/api-reference/** -``` - -#### `.git-ai/include.txt` - Force Include (Overrides `.gitignore`) - -Sometimes you need to index generated code or files that are in `.gitignore` but important for code understanding. Create `.git-ai/include.txt` to force-index specific patterns: +## Runtime API -```bash -# Example .git-ai/include.txt -# Include generated API clients -generated/api/** - -# Include specific build artifacts that contain important types -dist/types/** - -# Include code from specific ignored directories -vendor/important-lib/** -``` - -**Priority Order (Highest to Lowest):** -1. `.aiignore` - Explicit exclusions always win -2. `.git-ai/include.txt` - Force-include patterns override `.gitignore` -3. `.gitignore` - Standard Git ignore patterns +```ts +import { createCodeContextEngine } from 'code-context-engine'; -**Supported Pattern Syntax:** -- `**` - Match any number of directories -- `*` - Match any characters within a directory -- `directory/` - Match entire directory (automatically converts to `directory/**`) -- `file.ts` - Match specific file -- Lines starting with `#` are comments +const engine = createCodeContextEngine({ repoRoot: '/path/to/repo' }); -**Example Configuration:** +const lexical = await engine.search.lexical({ + query: 'authenticateUser', + mode: 'exact', + pathPattern: 'src/auth/**', + lang: 'ts', +}); -```bash -# .gitignore -dist/ -generated/ -*.log +const implementation = await engine.tasks.implementationContext({ + task: 'implementation_context', + query: 'UserRepository', + symbolHints: ['findById', 'save'], +}); -# .git-ai/include.txt -generated/api/** -generated/types/** +const impact = await engine.tasks.findImpact({ + task: 'find_impact', + query: 'UserService', +}); -# .aiignore (overrides everything) -generated/test-data/** +const review = await engine.tasks.reviewContextForDiff({ + task: 'review_pr', + diffText: rawDiff, +}); ``` -With this configuration: -- ✅ `generated/api/client.ts` - Indexed (included via include.txt) -- ✅ `generated/types/models.ts` - Indexed (included via include.txt) -- ❌ `generated/test-data/mock.ts` - Not indexed (.aiignore takes priority) -- ❌ `dist/bundle.js` - Not indexed (.gitignore, not in include.txt) +## Symbol Navigation ---- +The runtime now exposes higher-level navigation capabilities: +- `findDefinition` +- `findReferences` +- `findImplementations` +- `findImporters` +- `findExports` +- `findContainingScope` -## 🛠️ Troubleshooting +## Thin MCP Adapter -### Windows Installation Issues - -git-ai uses [CozoDB](https://github.com/cozodb/cozo) for AST graph queries. On Windows, if you encounter installation errors related to `cozo-node`, try these solutions: - -**Option 1: Use Gitee Mirror (Recommended for users in China)** +The MCP adapter keeps only tools that directly help agent workflows: +- `check_index` +- `rebuild_index` +- `read_file` +- `repo_map` +- `lexical_search` +- `implementation_context` +- `find_tests` +- `find_impact` +- `find_extension_points` +- `review_context_for_diff` ```bash -npm install -g @mars167/git-ai --cozo_node_prebuilt_binary_host_mirror=https://gitee.com/cozodb/cozo-lib-nodejs/releases/download/ +code-context-engine ai serve +code-context-engine ai serve --http --port 3000 ``` -**Option 2: Configure npm proxy** +## Thin CLI Adapter -If you're behind a corporate firewall or proxy: +The CLI is retained only for local debugging and verification: ```bash -npm config set proxy http://your-proxy:port -npm config set https-proxy http://your-proxy:port -npm install -g @mars167/git-ai +code-context-engine ai index --overwrite +code-context-engine ai check-index +code-context-engine ai status --json +code-context-engine ai repo-map --max-files 20 +code-context-engine ai agent install +code-context-engine ai serve ``` -**Option 3: Manual binary download** - -1. Download the Windows binary from [cozo-lib-nodejs releases](https://github.com/cozodb/cozo-lib-nodejs/releases) -2. Look for `6-win32-x64.tar.gz` (for 64-bit Windows) -3. Extract to `node_modules/cozo-node/native/6/` - -**Verify installation:** +## Agent Bundle ```bash -git-ai ai status --path . -``` - -If you see graph-related features working, installation was successful. - -### Other Native Dependencies - -git-ai also uses these native packages that may require similar troubleshooting: -- `onnxruntime-node` - For semantic embeddings -- `tree-sitter` - For code parsing -- `@lancedb/lancedb` - For vector database - -Most issues are resolved by ensuring a stable network connection or using a mirror. - ---- - -## 🤖 AI Agent Integration - -git-ai provides a standard MCP Server that seamlessly integrates with: - -- **Claude Desktop**: The most popular local AI programming assistant -- **Cursor**: AI-powered code editor -- **Trae**: Powerful AI-driven IDE -- **Continue.dev**: VS Code AI plugin - -### Single Agent (stdio mode - default) - -Add to `~/.claude/claude_desktop_config.json`: - -```json -{ - "mcpServers": { - "git-ai": { - "command": "git-ai", - "args": ["ai", "serve"] - } - } -} -``` - -### Multiple Agents (HTTP mode) - -When you need multiple AI agents to connect simultaneously (e.g., Claude Code + Cursor): - -```bash -# Start HTTP server (supports multiple clients) -git-ai ai serve --http --port 3000 -``` - -Then configure each agent to connect to `http://localhost:3000/mcp`. - -**HTTP mode features:** -- Multiple concurrent sessions -- Health check endpoint: `http://localhost:3000/health` -- Session management with automatic cleanup -- Session persistence via `mcp-session-id` response header -- Comprehensive error handling with proper HTTP status codes -- Graceful shutdown with SIGTERM/SIGINT signal handlers -- Optional stateless mode for load-balanced setups: `--stateless` - -Then restart Claude Desktop and start conversing: - -> "Help me analyze this project's architecture, find all payment-related code" - -Claude will automatically invoke git-ai tools to provide deep analysis. - -### Agent Skills & Rules - -We provide carefully designed Agent templates to help AI use git-ai better: - -- [Skill Template](./templates/agents/common/skills/git-ai-code-search/SKILL.md): Guides Agents on how to use tools -- [Rule Template](./templates/agents/common/rules/git-ai-code-search/RULE.md): Constrains Agent behavior - -Skills/Rules docs (Markdown/YAML) are indexed as part of semantic search, so agents can retrieve MCP guidance via `semantic_search`. - -One-click install to your project: - -```bash -git-ai ai agent install -``` - ---- - -## 📚 Documentation - -- [Quick Start](./docs/README.md) -- [MCP Server Guide](./docs/mcp.md) -- [Architecture Explained](./docs/architecture_explained.md) -- [Design Document](./docs/design.md) -- [Development Guide](./DEVELOPMENT.md) - ---- - -## 🔧 Advanced Features - -### Git Hooks Automation - -Automatically rebuild indices before commit, verify pack before push: - -```bash -git-ai ai hooks install +npx skills add mars167/git-ai-cli/skills/git-ai-code-search +code-context-engine ai agent install ``` -- `pre-commit`: Auto incremental index + pack -- `pre-push`: Verify pack -- `post-checkout`: Auto unpack - -### Git LFS Integration - -Recommended for managing index archives: +## Verification ```bash -git lfs track ".git-ai/lancedb.tar.gz" -git-ai ai pack --lfs +npm run build +npm test ``` - ---- - -## 🤝 Contributing - -Welcome contributions, issue reports, and suggestions! - -- [Contribution Guide](./CONTRIBUTING.md) -- [Issue Tracker](https://github.com/mars167/git-ai-cli/issues) - ---- - -## 📄 License - -[MIT](./LICENSE) - ---- - -**Enabling AI to Evolve from "Reading Code" to "Understanding Code"** ⭐ Star us on GitHub! diff --git a/README.zh-CN.md b/README.zh-CN.md index d034238..7c9a4bf 100644 --- a/README.zh-CN.md +++ b/README.zh-CN.md @@ -1,523 +1,116 @@ -# git-ai +# Code Context Engine [![ci](https://github.com/mars167/git-ai-cli/actions/workflows/ci.yml/badge.svg)](https://github.com/mars167/git-ai-cli/actions/workflows/ci.yml) [![release](https://github.com/mars167/git-ai-cli/actions/workflows/release.yml/badge.svg)](https://github.com/mars167/git-ai-cli/actions/workflows/release.yml) [![license](https://img.shields.io/github/license/mars167/git-ai-cli)](./LICENSE) -[![npm version](https://img.shields.io/npm/v/@mars167/git-ai)](https://www.npmjs.com/package/@mars167/git-ai) -[![npm downloads](https://img.shields.io/npm/dm/@mars167/git-ai)](https://www.npmjs.com/package/@mars167/git-ai) -[![Agent Skill](https://img.shields.io/badge/Agent_Skill-git--ai--mcp-blue)](https://skills.sh) +[![npm version](https://img.shields.io/npm/v/code-context-engine)](https://www.npmjs.com/package/code-context-engine) +[![npm downloads](https://img.shields.io/npm/dm/code-context-engine)](https://www.npmjs.com/package/code-context-engine) **简体中文** | [English](./README.md) ---- +Code Context Engine 是一个面向 review agent 和 coding agent 的本地运行时。它把仓库转成结构化证据包和上下文包,而不是把零散搜索结果当作产品核心。 -
+## 产品定位 -### 🚀 快速安装 +产品核心是: +- 本地 TypeScript runtime +- task-oriented context builders +- agent bundle / skill 模板 -**AI Agent 技能安装 (Claude Code, Cursor, Windsurf 等)** +默认检索策略: +- lexical / symbol first +- graph expand second +- semantic rerank last -```bash -npx skills add mars167/git-ai-cli/skills/git-ai-code-search -``` - -**CLI 工具安装** - -```bash -npm install -g @mars167/git-ai -``` - -
- ---- - -## 为代码库添加语义层,让 AI 从"读代码"进化到"懂代码" - -**代码的语义,应该像代码本身一样版本化、可追溯** - -git-ai 是一个本地代码理解工具,通过 DSR(确定性语义记录)和 Hyper RAG,为代码库构建可追溯的语义层,让 AI Agent 和开发者真正理解代码的演进与关系。 - -### ✨ 为什么选择 git-ai? - -- **🔗 Hyper RAG**:融合向量检索 + 图检索 + DSR,多维度语义理解 -- **📜 版本化语义**:每个提交都有语义快照,历史变更清晰可溯 -- **🔄 随时可用**:索引随代码走,checkout 即可用,无需重建 -- **🤖 AI 原生**:MCP Server 让 Claude、Trae 等 Agent 都能深度理解你的代码 -- **🔒 完全本地**:代码永不离开你的机器,安全无忧 -- **⚡ 全流程支持**:从开发到 Review 到重构,索引贯穿整个生命周期 -- **📊 极速性能**:10k 文件索引 < 30 秒,搜索响应 < 100ms - ---- - -## ✨ 核心能力 - -### 1️⃣ 语义搜索 - -用自然语言找到代码,无需记忆文件名或函数名: +## 安装 ```bash -git-ai ai semantic "用户认证逻辑" -git-ai ai semantic "数据库连接池配置" -git-ai ai semantic "错误处理中间件" +npm install -g code-context-engine ``` -### 2️⃣ 符号关系分析 - -理解代码之间的调用关系: +## Runtime API -```bash -# 查找函数调用者 -git-ai ai graph callers authenticateUser +```ts +import { createCodeContextEngine } from 'code-context-engine'; -# 查找函数调用的其他函数 -git-ai ai graph callees authenticateUser - -# 追踪完整调用链 -git-ai ai graph chain authenticateUser --max-depth 3 -``` +const engine = createCodeContextEngine({ repoRoot: '/path/to/repo' }); -### 3️⃣ 变更历史追溯 +const lexical = await engine.search.lexical({ + query: 'authenticateUser', + mode: 'exact', + pathPattern: 'src/auth/**', + lang: 'ts', +}); -通过 DSR 追踪符号的历史演变: +const implementation = await engine.tasks.implementationContext({ + task: 'implementation_context', + query: 'UserRepository', + symbolHints: ['findById', 'save'], +}); -```bash -# 查看函数的历史变更 -git-ai ai dsr query symbol-evolution authenticateUser --limit 50 +const impact = await engine.tasks.findImpact({ + task: 'find_impact', + query: 'UserService', +}); -# 查看某个提交的完整语义快照 -git-ai ai dsr context +const review = await engine.tasks.reviewContextForDiff({ + task: 'review_pr', + diffText: rawDiff, +}); ``` -### 4️⃣ 跨语言支持 - -支持多种主流编程语言: - -| 语言 | 文件后缀 | -|------|----------| -| JavaScript | `.js`, `.jsx` | -| TypeScript | `.ts`, `.tsx` | -| Java | `.java` | -| Python | `.py` | -| Go | `.go` | -| Rust | `.rs` | -| C | `.c`, `.h` | -| Markdown | `.md`, `.mdx` | -| YAML | `.yml`, `.yaml` | - ---- - -## 💡 设计理念 - -git-ai 不只是一个搜索工具,而是代码库的"语义时间线": - -### DSR(Deterministic Semantic Record) - -每个提交对应一份不可变的语义快照,记录当时的代码结构、符号关系、设计意图。代码的语义应该像代码本身一样版本化——可追溯、可比对、可演进。 - -### Hyper RAG - -融合多种检索方式,让理解更深入: -- **向量检索**:语义相似度匹配 -- **图检索**:调用关系、继承关系分析 -- **DSR 检索**:历史演变追溯 - -### 去中心化语义 +## Symbol Navigation -索引随代码走,不依赖中央服务器。checkout、branch、tag 都能立即使用一致的语义索引。 +Runtime 暴露的高阶导航能力: +- `findDefinition` +- `findReferences` +- `findImplementations` +- `findImporters` +- `findExports` +- `findContainingScope` -### Server 模式 +## 薄 MCP 适配层 -MCP Server 让任何 AI Agent 都能调用索引,实现真正的 AI 辅助开发。 - ---- - -## 🎯 使用场景 - -### 场景 1:新人快速理解大型项目 - -> "刚加入团队,面对 10 万行代码,从哪里开始?" +MCP 只保留直接服务 agent 工作流的工具: +- `check_index` +- `rebuild_index` +- `read_file` +- `repo_map` +- `lexical_search` +- `implementation_context` +- `find_tests` +- `find_impact` +- `find_extension_points` +- `review_context_for_diff` ```bash -# 1. 获取项目全局视图 -git-ai ai repo-map --max-files 20 - -# 2. 搜索核心业务逻辑 -git-ai ai semantic "订单处理流程" - -# 3. 追踪关键函数调用链 -git-ai ai graph chain processOrder --max-depth 5 +code-context-engine ai serve +code-context-engine ai serve --http --port 3000 ``` -*从设计到开发,语义索引始终如一* -### 场景 2:重构前的代码影响分析 +## 薄 CLI 适配层 -> "要重构这个函数,会影响哪些地方?" +CLI 仅保留本地调试和验证用途: ```bash -# 查找所有调用者 -git-ai ai graph callers deprecatedFunction - -# 追踪历史变更,了解设计意图 -git-ai ai dsr query symbol-evolution deprecatedFunction --all +code-context-engine ai index --overwrite +code-context-engine ai check-index +code-context-engine ai status --json +code-context-engine ai repo-map --max-files 20 +code-context-engine ai agent install +code-context-engine ai serve ``` -*DSR 追溯历史变更,理解设计意图* - -### 场景 3:Bug 定位和根因分析 -> "用户报告了一个错误,但不知道问题出在哪里" +## Agent Bundle ```bash -# 搜索相关错误处理代码 -git-ai ai semantic "用户登录失败处理" - -# 查看错误传播路径 -git-ai ai graph chain handleLoginError --direction upstream -``` -*全流程索引,快速定位问题根源* - -### 场景 4:AI Agent 辅助开发 - -> "让 Claude Desktop 帮我理解这个项目" - -在 Claude Desktop 中配置 git-ai MCP Server 后,你可以直接对话: - -> "帮我分析这个项目的架构,找出所有与支付相关的代码,并解释它们之间的关系" - -Claude 会自动调用 git-ai 的工具,为你提供深入的分析。*让 AI 从"读代码"进化到"懂代码"* - ---- - -## 🏗️ 系统架构 - -```mermaid -graph TB - A[Git 仓库] -->|每次提交| B["DSR (Deterministic Semantic Record)"] - B --> C[.git-ai/dsr/.json
语义快照] - C -->|索引重建| D[LanceDB 向量库] - C -->|索引重建| E[CozoDB 图数据库] - D --> F[MCP Server] - E --> F - F -->|工具调用| G["AI Agent
Claude Desktop / Trae"] - F -->|命令行| H[开发者] - C -->|跨版本| I{"语义时间线
可追溯、可比对、可演进"} - - style B fill:#e1f5ff - style C fill:#e8f5e9 - style D fill:#fff4e1 - style E fill:#fff4e1 - style F fill:#e8f5e9 - style G fill:#f3e5f5 - style I fill:#fce4ec -``` - -**核心组件**: - -- **DSR (Deterministic Semantic Record)**:按提交存储的不可变语义快照,版本化语义 -- **LanceDB + SQ8**:高性能向量数据库,支持语义搜索 -- **CozoDB**:图数据库,支持 AST 级关系查询 -- **MCP Server**:标准协议接口,供 AI Agent 调用 - ---- - -## 📊 与其他工具对比 - -| 特性 | git-ai | GitHub Code Search | Sourcegraph | -|------|--------|-------------------|-------------| -| 本地运行 | ✅ | ❌ | ❌ | -| AST 级分析 | ✅ | ❌ | ✅ | -| 版本化语义 | ✅ | ❌ | ❌ | -| 变更历史追溯 | ✅ | ❌ | ❌ | -| AI Agent 集成 | ✅ | ❌ | ❌ | -| 免费开源 | ✅ | ❌ | ❌ | -| 语义搜索 | ✅ | ✅ | ✅ | -| 调用链分析 | ✅ | ❌ | ✅ | - ---- - -## 🚀 快速开始 - -### 1. 安装 - -```bash -npm install -g @mars167/git-ai -``` - -### 2. 初始化仓库 - -```bash -cd your-project -git-ai ai index --overwrite -``` - -### 3. 立即体验 - -```bash -# 用自然语言搜索代码 -git-ai ai semantic "用户认证逻辑" - -# 查看函数调用关系 -git-ai ai graph callers authenticateUser -``` - -**实际输出示例**: -```json -[ - { - "file": "src/auth/service.ts", - "line": 45, - "symbol": "authenticateUser", - "context": "async function authenticateUser(email: string, password: string)" - }, - { - "file": "src/controllers/auth.ts", - "line": 23, - "symbol": "loginHandler", - "context": "const user = await authenticateUser(req.body.email, req.body.password)" - } -] -``` - -就这么简单!3 步上手,立即开始深度理解你的代码库。 - -*从此,索引不再是"一次性产物",而是随代码演进的"语义资产"。* - ---- - -## ⚙️ 配置 - -### 文件过滤 - -git-ai 遵循项目的忽略文件配置来控制哪些文件会被索引: - -#### `.gitignore` - 标准 Git 忽略规则 - -默认情况下,匹配 `.gitignore` 中模式的文件会被排除在索引之外。 - -#### `.aiignore` - AI 专属排除规则(最高优先级) - -在仓库根目录创建 `.aiignore` 文件,用于排除特定文件的索引,这些文件应该被 git-ai 忽略但不一定要被 Git 忽略: - -```bash -# .aiignore 示例 -test-fixtures/** -*.generated.ts -docs/api-reference/** -``` - -#### `.git-ai/include.txt` - 强制包含(覆盖 `.gitignore`) - -有时您需要索引生成的代码或在 `.gitignore` 中但对代码理解很重要的文件。创建 `.git-ai/include.txt` 来强制索引特定模式: - -```bash -# .git-ai/include.txt 示例 -# 包含生成的 API 客户端 -generated/api/** - -# 包含特定的构建产物,其中包含重要的类型定义 -dist/types/** - -# 包含特定被忽略目录中的代码 -vendor/important-lib/** -``` - -**优先级顺序(从高到低):** -1. `.aiignore` - 显式排除规则始终生效 -2. `.git-ai/include.txt` - 强制包含模式覆盖 `.gitignore` -3. `.gitignore` - 标准 Git 忽略模式 - -**支持的模式语法:** -- `**` - 匹配任意数量的目录 -- `*` - 匹配目录内的任意字符 -- `directory/` - 匹配整个目录(自动转换为 `directory/**`) -- `file.ts` - 匹配特定文件 -- 以 `#` 开头的行为注释 - -**配置示例:** - -```bash -# .gitignore -dist/ -generated/ -*.log - -# .git-ai/include.txt -generated/api/** -generated/types/** - -# .aiignore (覆盖所有规则) -generated/test-data/** -``` - -此配置下: -- ✅ `generated/api/client.ts` - 被索引(通过 include.txt 包含) -- ✅ `generated/types/models.ts` - 被索引(通过 include.txt 包含) -- ❌ `generated/test-data/mock.ts` - 不被索引(.aiignore 优先级最高) -- ❌ `dist/bundle.js` - 不被索引(在 .gitignore 中,不在 include.txt 中) - ---- - -## 🛠️ 故障排除 - -### Windows 安装问题 - -git-ai 使用 [CozoDB](https://github.com/cozodb/cozo) 来实现 AST 图查询功能。在 Windows 上,如果遇到 `cozo-node` 相关的安装错误,可以尝试以下解决方案: - -**方案一:使用 Gitee 镜像(推荐国内用户使用)** - -```bash -npm install -g @mars167/git-ai --cozo_node_prebuilt_binary_host_mirror=https://gitee.com/cozodb/cozo-lib-nodejs/releases/download/ -``` - -**方案二:配置 npm 代理** - -如果你在公司防火墙或代理后面: - -```bash -npm config set proxy http://your-proxy:port -npm config set https-proxy http://your-proxy:port -npm install -g @mars167/git-ai -``` - -**方案三:手动下载二进制文件** - -1. 从 [cozo-lib-nodejs releases](https://github.com/cozodb/cozo-lib-nodejs/releases) 下载 Windows 二进制文件 -2. 找到 `6-win32-x64.tar.gz`(64 位 Windows) -3. 解压到 `node_modules/cozo-node/native/6/` 目录 - -**验证安装:** - -```bash -git-ai ai status --path . -``` - -如果看到 graph 相关功能正常工作,说明安装成功。 - -### 其他原生依赖 - -git-ai 还使用了以下原生包,可能需要类似的故障排除: -- `onnxruntime-node` - 用于语义向量生成 -- `tree-sitter` - 用于代码解析 -- `@lancedb/lancedb` - 用于向量数据库 - -大多数问题可以通过确保稳定的网络连接或使用镜像来解决。 - ---- - -## 🤖 AI Agent 集成 - -git-ai 提供标准的 MCP Server,可与以下 AI Agent 无缝集成: - -- **Claude Desktop**:最流行的本地 AI 编程助手 -- **Cursor**:AI 驱动的代码编辑器 -- **Trae**:强大的 AI 驱动 IDE -- **Continue.dev**:VS Code AI 插件 - -### 单客户端模式(stdio,默认) - -在 `~/.claude/claude_desktop_config.json` 中添加: - -```json -{ - "mcpServers": { - "git-ai": { - "command": "git-ai", - "args": ["ai", "serve"] - } - } -} -``` - -### 多客户端模式(HTTP) - -当你需要多个 AI Agent 同时连接时(如同时使用 Claude Code 和 Cursor): - -```bash -# 启动 HTTP 服务(支持多客户端) -git-ai ai serve --http --port 3000 -``` - -然后配置每个 Agent 连接到 `http://localhost:3000/mcp`。 - -**HTTP 模式特性:** -- 支持多个并发会话 -- 健康检查端点:`http://localhost:3000/health` -- 自动管理会话生命周期 -- 通过 `mcp-session-id` 响应头实现 Session 持久化 -- 完善的错误处理机制,返回正确的 HTTP 状态码 -- 支持 SIGTERM/SIGINT 信号,实现优雅关闭 -- 可选无状态模式,用于负载均衡场景:`--stateless` - -然后重启 Claude Desktop,即可开始对话: - -> "帮我分析这个项目的架构,找出所有与支付相关的代码" - -Claude 会自动调用 git-ai 的工具,为你提供深入的分析。 - -### Agent Skills & Rules - -我们提供了精心设计的 Agent 模版,帮助 AI 更好地使用 git-ai: - -- [Skill 模版](./templates/agents/common/skills/git-ai-code-search/SKILL.md):指导 Agent 如何使用工具 -- [Rule 模版](./templates/agents/common/rules/git-ai-code-search/RULE.md):约束 Agent 的行为 - -Skills/Rules 文档(Markdown/YAML)会被纳入语义索引,便于通过 `semantic_search` 检索 MCP 指南。 - -一键安装到你的项目: - -```bash -git-ai ai agent install -``` - ---- - -## 📚 文档 - -- [快速入门](./docs/zh-CN/README.md) -- [MCP Server 使用指南](./docs/zh-CN/mcp.md) -- [技术架构详解](./docs/zh-CN/architecture_explained.md) -- [设计文档](./docs/zh-CN/design.md) -- [开发指南](./DEVELOPMENT.zh-CN.md) - ---- - -## 🔧 高级功能 - -### Git Hooks 自动化 - -自动在提交前重建索引,push 前打包校验: - -```bash -git-ai ai hooks install +npx skills add mars167/git-ai-cli/skills/git-ai-code-search +code-context-engine ai agent install ``` -- `pre-commit`:自动增量索引 + 打包 -- `pre-push`:校验索引归档 -- `post-checkout`:自动解包索引 - -### Git LFS 集成 - -推荐使用 Git LFS 管理索引归档: +## 验证 ```bash -git lfs track ".git-ai/lancedb.tar.gz" -git-ai ai pack --lfs +npm run build +npm test ``` - ---- - -## 🤝 贡献 - -欢迎贡献代码、报告问题或提出建议! - -- [贡献指南](./CONTRIBUTING.md) -- [问题追踪](https://github.com/mars167/git-ai-cli/issues) - ---- - -## 📄 License - -[MIT](./LICENSE) - ---- - -**让 AI 从"读代码"进化到"懂代码"** ⭐ Star us on GitHub! diff --git a/bin/git-ai.ts b/bin/code-context-engine.ts similarity index 91% rename from bin/git-ai.ts rename to bin/code-context-engine.ts index 7316beb..6b74eda 100644 --- a/bin/git-ai.ts +++ b/bin/code-context-engine.ts @@ -1,6 +1,6 @@ #!/usr/bin/env node -import { Command } from 'commander'; import { spawnSync } from 'child_process'; +import { Command } from 'commander'; import fs from 'fs'; import path from 'path'; import { aiCommand } from '../src/commands/ai'; @@ -50,8 +50,8 @@ function main() { const program = new Command(); const version = readVersionFromPackageJson(); program - .name('git-ai') - .description('git-ai: git-compatible CLI with AI indexing tools') + .name('code-context-engine') + .description('Code Context Engine: local runtime and thin adapters for agent-oriented code retrieval') .version(version); program.addCommand(aiCommand); diff --git a/docs/mcp.md b/docs/mcp.md index 4eb0ed4..2d899e0 100644 --- a/docs/mcp.md +++ b/docs/mcp.md @@ -1,280 +1,36 @@ -# MCP Server Integration +# Code Context Engine MCP Adapter -`git-ai` provides an MCP (Model Context Protocol) Server that can be integrated with AI Agents (such as Claude Desktop, Cursor, Trae, etc.), empowering them with "code understanding" capabilities. +Code Context Engine keeps MCP as a thin adapter over the local runtime. It is not the product core and it no longer exposes a large primitive-first tool surface. -## Starting the Server - -### Stdio Mode (Default, Single Client) - -Run from any directory: - -```bash -git-ai ai serve -``` - -This starts the server in stdio mode (waiting for client connection). Configure it in MCP-compatible clients. Suitable for single Agent connection. - -### HTTP Mode (Multiple Clients) - -If you need multiple Agents to connect simultaneously (e.g., using Claude Code and Cursor together), use HTTP mode: +## Start ```bash -git-ai ai serve --http --port 3000 +code-context-engine ai serve +code-context-engine ai serve --http --port 3000 ``` -HTTP mode features: -- **Multi-client support**: Each connection gets an independent session -- **Health check endpoint**: `http://localhost:3000/health` returns server status -- **MCP endpoint**: `http://localhost:3000/mcp` for MCP protocol communication -- **Session management**: Automatic client session lifecycle management -- **Session persistence**: Session IDs returned in `mcp-session-id` response header for client reuse -- **Error handling**: Comprehensive error handling with proper HTTP status codes -- **Graceful shutdown**: SIGTERM/SIGINT signal handlers for clean shutdown - -#### Options - -| Option | Description | Default | -|--------|-------------|---------| -| `--http` | Enable HTTP transport (supports multiple clients) | No (uses stdio) | -| `--port ` | HTTP server port | 3000 | -| `--stateless` | Stateless mode (no session tracking, for load-balanced setups) | No | -| `--disable-mcp-log` | Disable MCP access logging | No | - -#### HTTP Mode Examples - -```bash -# Default port 3000 -git-ai ai serve --http - -# Custom port -git-ai ai serve --http --port 8080 - -# Stateless mode (for load-balanced scenarios) -git-ai ai serve --http --port 3000 --stateless - -# Disable access logging -git-ai ai serve --http --disable-mcp-log -``` - -#### Health Check - -```bash -curl http://localhost:3000/health -# {"status":"ok","sessions":2} -``` - -#### Session Management - -When connecting to the MCP endpoint, the server: -1. Creates a new session if no `mcp-session-id` header is provided -2. Returns the session ID in the `mcp-session-id` response header -3. Clients should include this header in subsequent requests to reuse the session -4. Sessions are automatically cleaned up when the connection closes - -Example session workflow: -```bash -# First request - new session created -curl -X POST http://localhost:3000/mcp \ - -H "Content-Type: application/json" \ - -H "Accept: application/json, text/event-stream" \ - -i # -i shows headers including mcp-session-id - -# Subsequent requests - reuse session -curl -X POST http://localhost:3000/mcp \ - -H "Content-Type: application/json" \ - -H "Accept: application/json, text/event-stream" \ - -H "mcp-session-id: " \ - ... -``` - -## Tool List - -### Repository Management -- `get_repo({ path })`: Returns repository root and scan root for specified `path` (debugging) - -### Index Management -- `check_index({ path })`: Check if index structure matches current version (rebuild if inconsistent) -- `rebuild_index({ path, dim?, overwrite? })`: Rebuild full index (writes to `.git-ai/`; Risk: high) -- `pack_index({ path, lfs? })`: Pack index to `.git-ai/lancedb.tar.gz` (optional git-lfs track) -- `unpack_index({ path })`: Unpack index archive - -### Retrieval -- `search_symbols({ path, query, mode?, case_insensitive?, max_candidates?, limit?, lang?, with_repo_map?, repo_map_max_files?, repo_map_max_symbols?, wiki_dir? })`: Symbol retrieval (lang: auto/all/java/ts/python/go/rust/c/markdown/yaml; optional repo_map) -- `semantic_search({ path, query, topk?, lang?, with_repo_map?, repo_map_max_files?, repo_map_max_symbols?, wiki_dir? })`: Semantic search based on LanceDB + SQ8 (lang: auto/all/java/ts/python/go/rust/c/markdown/yaml; optional repo_map) -- `repo_map({ path, max_files?, max_symbols?, wiki_dir? })`: Generate repo map (important files/symbols ranking, Wiki navigation) -- `ast_graph_find({ path, prefix, limit?, lang? })`: Find symbol definitions by name prefix (case-insensitive; lang: auto/all/java/ts) -- `ast_graph_children({ path, id, as_file? })`: List direct child nodes in containment hierarchy (file→top-level symbols, class→methods, etc.) -- `ast_graph_refs({ path, name, limit?, lang? })`: Find reference locations by name (call/new/type; lang: auto/all/java/ts) -- `ast_graph_callers({ path, name, limit?, lang? })`: Find callers by name (callee name; lang: auto/all/java/ts) -- `ast_graph_callees({ path, name, limit?, lang? })`: Find callees by name (caller name; lang: auto/all/java/ts) -- `ast_graph_chain({ path, name, direction?, max_depth?, limit?, lang? })`: Find call chain by name (upstream/downstream, max depth; lang: auto/all/java/ts) -- `ast_graph_query({ path, query, params? })`: Execute CozoScript query on AST graph database (advanced) - -### File Reading -- `list_files({ path, pattern?, limit? })`: List files by glob pattern (defaults ignore node_modules, .git, etc.) -- `read_file({ path, file, start_line?, end_line? })`: Read file segment by line numbers - -### DSR (Deterministic Semantic Record) -- `dsr_context({ path })`: Get repository Git context and DSR directory status - - Returns: commit_hash, repo_root, branch, detached, dsr_directory_state -- `dsr_generate({ path, commit })`: Generate DSR for specified commit - - Returns: commit_hash, file_path, existed, counts, semantic_change_type, risk_level -- `dsr_rebuild_index({ path })`: Rebuild index from DSR files for faster queries - - Returns: enabled, engine, dbPath, counts -- `dsr_symbol_evolution({ path, symbol, start?, all?, limit?, contains? })`: Trace symbol change history - - Returns: ok, hits (including commit_hash, semantic_change_type, risk_level, operations) - -## DSR Usage Examples - -Get repository Git status and DSR info: - -```js -dsr_context({ path: "/ABS/PATH/TO/REPO" }) -``` - -Generate DSR for recent commits: - -```js -dsr_generate({ path: "/ABS/PATH/TO/REPO", commit: "HEAD" }) -dsr_generate({ path: "/ABS/PATH/TO/REPO", commit: "HEAD~1" }) -``` - -Query function change history: - -```js -dsr_symbol_evolution({ path: "/ABS/PATH/TO/REPO", symbol: "handleRequest", limit: 50 }) -``` - -Fuzzy match symbol names: - -```js -dsr_symbol_evolution({ path: "/ABS/PATH/TO/REPO", symbol: "Request", contains: true, limit: 100 }) -``` - -## AST Graph Query Examples - -List top-level symbols in a file (recommended: no manual file_id calculation needed): - -```js -ast_graph_children({ path: "/ABS/PATH/TO/REPO", id: "src/mcp/server.ts", as_file: true }) -``` - -Query method/function callers (recommended: use callers/callees/chain, not manual CozoScript): - -```js -ast_graph_callers({ path: "/ABS/PATH/TO/REPO", name: "greet", limit: 50 }) -ast_graph_chain({ path: "/ABS/PATH/TO/REPO", name: "greet", direction: "upstream", max_depth: 3 }) -``` - -List top-level symbols in a file (advanced: direct CozoScript, requires file_id): - -```cozo -?[file_id] <- [[$file_id]] -?[child_id, name, kind, start_line, end_line] := - *ast_contains{parent_id: file_id, child_id}, - *ast_symbol{ref_id: child_id, file, name, kind, signature, start_line, end_line} -``` - -## Recommended Usage (Let Agent Pass Correct Path) -- MCP tools require `path` parameter: Every tool call must explicitly pass `path: "/ABS/PATH/TO/REPO"` (ensures call atomicity) - -## RepoMap Usage Tips - -Repo map provides Agents with a "global bird's-eye view + navigation entry points" (important files/symbols + Wiki associations). Recommended as a pre-analysis step: - -```js -repo_map({ path: "/ABS/PATH/TO/REPO", max_files: 20, max_symbols: 5 }) -``` - -If you want to include repo map in retrieval results (disabled by default to avoid output bloat): - -```js -search_symbols({ path: "/ABS/PATH/TO/REPO", query: "Foo", limit: 20, with_repo_map: true, repo_map_max_files: 20, repo_map_max_symbols: 5 }) -semantic_search({ path: "/ABS/PATH/TO/REPO", query: "where is auth handled", topk: 5, with_repo_map: true }) -``` - -## Agent Skills / Rules - -This repository provides Agent-ready Skill/Rule templates designed to help Agents follow best practices when using these tools. - -These template documents (Markdown/YAML) are indexed, making it easy for Agents to retrieve MCP guidelines and skill descriptions via `semantic_search`. - -### YAML Format Templates - -- **Skill**: [`templates/agents/common/skills/git-ai/skill.yaml`](../templates/agents/common/skills/git-ai/skill.yaml) - Guides Agents on using git-ai's Git-native semantic system (including DSR constraints) and MCP tools - - Includes: trigger conditions, workflow steps, tool definitions, output requirements, common pitfalls - -- **Rule**: [`templates/agents/common/rules/git-ai.yaml`](../templates/agents/common/rules/git-ai.yaml) - Constrains Agent behavior when using git-ai MCP - - Includes: must-follow rules, recommended strategies, prohibited actions, Git Hooks rules, Manifest Workspace rules - -### Markdown Templates (for easy reading/copying) - -- **Skill**: [`templates/agents/common/skills/git-ai-code-search/SKILL.md`](../templates/agents/common/skills/git-ai-code-search/SKILL.md) -- **Rule**: [`templates/agents/common/rules/git-ai-code-search/RULE.md`](../templates/agents/common/rules/git-ai-code-search/RULE.md) - -### Install to Trae - -Install Skills and Rules from this repository to your project's `.agents` directory (default): - -```bash -cd /path/to/your-repo -git-ai ai agent install -git-ai ai agent install --overwrite -git-ai ai agent install --to /custom/location/.agents -``` - -If you want to install to Trae's `.trae` directory: - -```bash -git-ai ai agent install --agent trae -``` - -### Skill Workflow Overview - -According to `skill.yaml`, recommended workflow: - -1. **First time in repository** - Use `repo_map` for global view -2. **Check index status** - Use `check_index`, `rebuild_index` if necessary -3. **Locate target code** - Use `search_symbols` or `semantic_search` -4. **Understand code relationships** - Use `ast_graph_callers/callees/chain` -5. **Trace change history** - Use `dsr_symbol_evolution` -6. **Read code carefully** - Use `read_file` to read key segments -7. **Provide suggestions** - Give modification suggestions based on complete understanding - -### Rule Constraints Overview - -According to `rule.yaml`, Agents must follow: - -- **explicit_path**: Every call must explicitly pass `path` parameter -- **check_index_first**: Must check index before symbol search -- **understand_before_modify**: Must understand implementation before modification -- **use_dsr_for_history**: Must use DSR tools for tracing history -- **respect_dsr_risk**: Take seriously DSR-reported high-risk changes - -Documentation/rule retrieval suggestions: -- When issues involve MCP/Skill/Rule, first use `semantic_search` to locate relevant documentation segments, then conclude. - -Prohibited actions include: -- Assuming symbol locations without searching -- Directly modifying unread files -- Manually parsing git history -- Performing symbol search when index is missing -- Omitting `path` parameter - -## DSR and MCP Relationship +## Retained Tool Surface -- **MCP tools** primarily cover "index (.git-ai) construction and retrieval", helping Agents efficiently locate evidence -- **DSR** is "per-commit semantic artifacts (.git-ai/dsr)", used for semantic history/evolution queries -- DSR is per-commit, immutable, deterministic, enabling precise symbol change tracing -- All historical traversal must start from Git DAG (DSR only enriches nodes, doesn't define edges) +### Index and repo access +- `check_index({ path })` +- `rebuild_index({ path, dim?, overwrite? })` +- `read_file({ path, file, start_line?, end_line? })` +- `repo_map({ path, max_files?, max_symbols?, depth?, max_nodes?, wiki_dir? })` -See DSR documentation for DSR-related commands: [DSR Documentation](./dsr.md) +### Runtime-oriented retrieval +- `lexical_search({ path, query, mode?, lang?, path_pattern?, limit? })` -## Output Requirements +### Task-oriented context +- `implementation_context({ path, query?, path_hints?, symbol_hints? })` +- `find_tests({ path, query?, path_hints?, symbol_hints? })` +- `find_impact({ path, query?, path_hints?, symbol_hints? })` +- `find_extension_points({ path, query?, path_hints?, symbol_hints? })` +- `review_context_for_diff({ path, diff_text, query?, path_hints?, symbol_hints? })` -Agents should follow these guidelines when using git-ai MCP tools: +## Recommended Agent Workflow -1. **Conclusion first, then evidence** - Summarize findings first, then provide detailed locations -2. **Use IDE-clickable links** - Format: `file:///path/to/file#L10-L20` -3. **Minimal change principle** - Avoid introducing new dependencies when suggesting modifications -4. **Evidence must be based on read_file** - Don't rely on assumptions or guesses +1. `check_index` to verify index availability +2. `repo_map` for top-level orientation when entering a repo +3. `lexical_search` for precise initial recall +4. One of the task tools as the main working surface +5. `read_file` to inspect exact code before making changes diff --git a/package.json b/package.json index f5a1893..d5e6d6f 100644 --- a/package.json +++ b/package.json @@ -1,18 +1,18 @@ { - "name": "@mars167/git-ai", + "name": "code-context-engine", "version": "2.4.3", - "main": "dist/index.js", + "main": "dist/src/index.js", "bin": { - "git-ai": "dist/bin/git-ai.js" + "code-context-engine": "dist/bin/code-context-engine.js" }, "directories": { "doc": "docs" }, "scripts": { "build": "tsc", - "start": "ts-node bin/git-ai.ts", - "test": "npm run build && node dist/bin/git-ai.js ai index --overwrite && node --test test/*.test.mjs test/*.test.ts test/*.test.js", - "test:cli": "bash test-cli.sh", + "start": "ts-node bin/code-context-engine.ts", + "test": "npm run build && node --test test/runtime.search.test.ts test/runtime.tasks.test.ts test/runtime.navigation.test.ts test/diffReviewContext.test.ts test/agentBundle.test.ts test/mcp.smoke.test.js", + "test:cli": "npm run build && node --test test/cliCommands.test.js", "test:parser": "ts-node test/verify_parsing.ts" }, "files": [ @@ -27,6 +27,8 @@ "keywords": [ "git", "ai", + "code-context-engine", + "agent-runtime", "semantic-search", "mcp", "code-search", @@ -39,7 +41,7 @@ ], "author": "mars167", "license": "MIT", - "description": "A git-compatible CLI with AI indexing/search and an MCP server.", + "description": "Code Context Engine: a local runtime and thin adapter surface for agent-oriented code retrieval and context construction.", "repository": { "type": "git", "url": "git+https://github.com/mars167/git-ai-cli.git" diff --git a/src/cli/handlers/serveHandlers.ts b/src/cli/handlers/serveHandlers.ts index f0c0b9b..49fc02c 100644 --- a/src/cli/handlers/serveHandlers.ts +++ b/src/cli/handlers/serveHandlers.ts @@ -1,7 +1,7 @@ import path from 'path'; import fs from 'fs-extra'; import { resolveGitRoot } from '../../core/git'; -import { GitAIV2MCPServer } from '../../mcp/server'; +import { CodeContextEngineMCPServer } from '../../mcp/server'; import { createLogger } from '../../core/log'; import type { CLIResult, CLIError } from '../types'; import { success, error } from '../types'; @@ -20,7 +20,7 @@ export async function handleServe(input: { stateless: input.http ? input.stateless : undefined, }); - const server = new GitAIV2MCPServer(process.cwd(), { + const server = new CodeContextEngineMCPServer(process.cwd(), { disableAccessLog: !!input.disableMcpLog, transport: input.http ? 'http' : 'stdio', port: input.port, diff --git a/src/commands/ai.ts b/src/commands/ai.ts index fb49b57..7628f8d 100644 --- a/src/commands/ai.ts +++ b/src/commands/ai.ts @@ -1,27 +1,14 @@ import { Command } from 'commander'; import { indexCommand } from '../cli/commands/indexCommand.js'; -import { queryCommand } from '../cli/commands/queryCommand.js'; -import { queryFilesCommand } from '../cli/commands/queryFilesCommand.js'; -import { semanticCommand } from '../cli/commands/semanticCommand.js'; import { serveCommand, agentCommand } from '../cli/commands/serveCommands.js'; -import { packCommand, unpackCommand } from '../cli/commands/archiveCommands.js'; -import { hooksCommand } from '../cli/commands/hooksCommands.js'; -import { graphCommand } from '../cli/commands/graphCommands.js'; import { checkIndexCommand, statusCommand } from '../cli/commands/statusCommands.js'; import { repoMapCommand } from '../cli/commands/repoMapCommand.js'; export const aiCommand = new Command('ai') - .description('AI features (indexing, search, hooks, MCP)') + .description('Code Context Engine adapters (index, repo overview, agent install, MCP)') .addCommand(indexCommand) .addCommand(checkIndexCommand) .addCommand(statusCommand) .addCommand(repoMapCommand) - .addCommand(queryCommand) - .addCommand(queryFilesCommand) - .addCommand(semanticCommand) - .addCommand(graphCommand) - .addCommand(packCommand) - .addCommand(unpackCommand) .addCommand(agentCommand) - .addCommand(hooksCommand) .addCommand(serveCommand); diff --git a/src/core/astGraphQuery.ts b/src/core/astGraphQuery.ts index 38763b4..30a3f2f 100644 --- a/src/core/astGraphQuery.ts +++ b/src/core/astGraphQuery.ts @@ -21,6 +21,15 @@ export function buildFindSymbolsQuery(lang?: string): string { `; } +export function buildDefinitionsByNameQuery(lang?: string): string { + return ` +?[ref_id, file, lang, name, kind, signature, start_line, end_line] := + *ast_symbol{ref_id, file, lang, name, kind, signature, start_line, end_line}, + lowercase(name) == lowercase($name)${lang ? `, + lowercase(lang) == lowercase($lang)` : ''} +`; +} + export function buildChildrenQuery(): string { return ` ?[child_id, file, lang, name, kind, signature, start_line, end_line] := @@ -29,6 +38,17 @@ export function buildChildrenQuery(): string { `; } +export function buildContainingScopeQuery(lang?: string): string { + return ` +?[file, lang, name, kind, signature, start_line, end_line] := + *ast_symbol{file, lang, name, kind, signature, start_line, end_line}, + file == $file, + start_line <= $line, + end_line >= $line${lang ? `, + lowercase(lang) == lowercase($lang)` : ''} +`; +} + export function buildFindReferencesQuery(lang?: string): string { return ` ?[file, line, col, ref_kind, from_id, from_kind, from_name, from_lang] := diff --git a/src/domain/context.ts b/src/domain/context.ts new file mode 100644 index 0000000..c487d7c --- /dev/null +++ b/src/domain/context.ts @@ -0,0 +1,22 @@ +import type { SearchMatch } from './search'; + +export interface EvidenceBundle { + task: string; + summary: string; + evidence: SearchMatch[]; + related_paths?: string[]; + diagnostics?: string[]; +} + +export interface ContextSection { + title: string; + summary: string; + evidence: SearchMatch[]; +} + +export interface ContextBundle { + task: string; + summary: string; + sections: ContextSection[]; + diagnostics?: string[]; +} diff --git a/src/domain/diff.ts b/src/domain/diff.ts new file mode 100644 index 0000000..e26d319 --- /dev/null +++ b/src/domain/diff.ts @@ -0,0 +1,14 @@ +export interface DiffFileChange { + path: string; + change_type: 'added' | 'modified' | 'deleted' | 'renamed'; +} + +export interface DiffInsight { + touched_files: DiffFileChange[]; + touched_symbols: string[]; + signature_changes: string[]; + import_changes: string[]; + config_changes: string[]; + added_literals: string[]; + removed_literals: string[]; +} diff --git a/src/domain/search.ts b/src/domain/search.ts new file mode 100644 index 0000000..dfd9bd2 --- /dev/null +++ b/src/domain/search.ts @@ -0,0 +1,41 @@ +export type MatchType = 'exact_token' | 'substring' | 'regex' | 'literal' | 'path'; + +export type EvidenceType = + | 'content_match' + | 'path_match' + | 'symbol_match' + | 'graph_match' + | 'semantic_match' + | 'diff_change'; + +export type MatchConfidence = 'high' | 'medium' | 'low'; + +export interface MatchPosition { + line: number; + column: number; +} + +export interface MatchRange { + start: MatchPosition; + end: MatchPosition; +} + +export interface SearchMatch { + why_matched: string; + match_type: MatchType; + evidence_type: EvidenceType; + score: number; + path: string; + range: MatchRange; + symbol?: string; + preview: string; + confidence: MatchConfidence; + lang?: string; +} + +export interface SearchResultSet { + repoRoot: string; + query: string; + mode: string; + matches: SearchMatch[]; +} diff --git a/src/domain/tasks.ts b/src/domain/tasks.ts new file mode 100644 index 0000000..fa50c9a --- /dev/null +++ b/src/domain/tasks.ts @@ -0,0 +1,19 @@ +import type { ContextBundle, EvidenceBundle } from './context'; +import type { DiffInsight } from './diff'; + +export interface TaskRequest { + task: string; + query?: string; + pathHints?: string[]; + symbolHints?: string[]; +} + +export interface DiffTaskRequest extends TaskRequest { + diffText: string; +} + +export interface TaskResult { + bundle?: ContextBundle | EvidenceBundle; + diff?: DiffInsight; + diagnostics?: string[]; +} diff --git a/src/index.ts b/src/index.ts new file mode 100644 index 0000000..cc71b78 --- /dev/null +++ b/src/index.ts @@ -0,0 +1,41 @@ +export { createCodeContextEngine } from './retrieval/runtime'; +export type { CodeContextEngine, CodeContextEngineOptions } from './retrieval/runtime'; + +export type { + MatchConfidence, + MatchPosition, + MatchRange, + MatchType, + EvidenceType, + SearchMatch, + SearchResultSet, +} from './domain/search'; + +export type { + EvidenceBundle, + ContextBundle, + ContextSection, +} from './domain/context'; + +export type { + DiffFileChange, + DiffInsight, +} from './domain/diff'; + +export type { + TaskRequest, + DiffTaskRequest, + TaskResult, +} from './domain/tasks'; + +export type { + RuntimeLanguage, + LexicalSearchMode, + LexicalSearchRequest, + LexicalSearchResponse, +} from './retrieval/lexical/types'; + +export type { + SymbolNavigationRequest, + ContainingScopeRequest, +} from './retrieval/symbol/navigation'; diff --git a/src/mcp/handlers/index.ts b/src/mcp/handlers/index.ts index 052b7c4..a874ec3 100644 --- a/src/mcp/handlers/index.ts +++ b/src/mcp/handlers/index.ts @@ -3,3 +3,4 @@ export * from './repoHandlers'; export * from './fileHandlers'; export * from './searchHandlers'; export * from './astGraphHandlers'; +export * from './taskHandlers'; diff --git a/src/mcp/handlers/taskHandlers.ts b/src/mcp/handlers/taskHandlers.ts new file mode 100644 index 0000000..ab5d398 --- /dev/null +++ b/src/mcp/handlers/taskHandlers.ts @@ -0,0 +1,110 @@ +import path from 'path'; +import { createCodeContextEngine } from '../../retrieval/runtime'; +import type { ToolHandler } from '../types'; +import { errorResponse, successResponse } from '../types'; +import { resolveGitRoot } from '../../core/git'; +import type { + FindExtensionPointsArgs, + FindImpactArgs, + FindTestsArgs, + ImplementationContextArgs, + LexicalSearchArgs, + ReviewContextForDiffArgs, +} from '../schemas/taskSchemas'; + +async function resolveEngine(startDir: string) { + const repoRoot = await resolveGitRoot(path.resolve(startDir)); + return createCodeContextEngine({ repoRoot }); +} + +export const handleLexicalSearch: ToolHandler = async (args) => { + try { + const engine = await resolveEngine(args.path); + const result = await engine.search.lexical({ + query: args.query, + mode: args.mode, + lang: args.lang, + pathPattern: args.path_pattern, + limit: args.limit, + }); + return successResponse(result); + } catch (error) { + return errorResponse(error); + } +}; + +export const handleImplementationContext: ToolHandler = async (args) => { + try { + const engine = await resolveEngine(args.path); + const result = await engine.tasks.implementationContext({ + task: 'implementation_context', + query: args.query, + pathHints: args.path_hints, + symbolHints: args.symbol_hints, + }); + return successResponse(result); + } catch (error) { + return errorResponse(error); + } +}; + +export const handleFindTests: ToolHandler = async (args) => { + try { + const engine = await resolveEngine(args.path); + const result = await engine.tasks.findTests({ + task: 'find_tests', + query: args.query, + pathHints: args.path_hints, + symbolHints: args.symbol_hints, + }); + return successResponse(result); + } catch (error) { + return errorResponse(error); + } +}; + +export const handleFindImpact: ToolHandler = async (args) => { + try { + const engine = await resolveEngine(args.path); + const result = await engine.tasks.findImpact({ + task: 'find_impact', + query: args.query, + pathHints: args.path_hints, + symbolHints: args.symbol_hints, + }); + return successResponse(result); + } catch (error) { + return errorResponse(error); + } +}; + +export const handleFindExtensionPoints: ToolHandler = async (args) => { + try { + const engine = await resolveEngine(args.path); + const result = await engine.tasks.findExtensionPoints({ + task: 'find_extension_points', + query: args.query, + pathHints: args.path_hints, + symbolHints: args.symbol_hints, + }); + return successResponse(result); + } catch (error) { + return errorResponse(error); + } +}; + +export const handleReviewContextForDiff: ToolHandler = async (args) => { + try { + const engine = await resolveEngine(args.path); + const result = await engine.tasks.reviewContextForDiff({ + task: 'review_pr', + query: args.query, + diffText: args.diff_text, + pathHints: args.path_hints, + symbolHints: args.symbol_hints, + }); + return successResponse(result); + } catch (error) { + return errorResponse(error); + } +}; diff --git a/src/mcp/schemas/index.ts b/src/mcp/schemas/index.ts index fe90592..efc0500 100644 --- a/src/mcp/schemas/index.ts +++ b/src/mcp/schemas/index.ts @@ -2,3 +2,4 @@ export * from './repoSchemas'; export * from './searchSchemas'; export * from './astGraphSchemas'; export * from './fileSchemas'; +export * from './taskSchemas'; diff --git a/src/mcp/schemas/taskSchemas.ts b/src/mcp/schemas/taskSchemas.ts new file mode 100644 index 0000000..5de398f --- /dev/null +++ b/src/mcp/schemas/taskSchemas.ts @@ -0,0 +1,33 @@ +import { z } from 'zod'; + +export const LexicalSearchArgsSchema = z.object({ + path: z.string().min(1, 'path is required'), + query: z.string().min(1, 'query is required'), + mode: z.enum(['exact', 'substring', 'regex', 'literal']).default('substring'), + lang: z.enum(['all', 'ts', 'java', 'python', 'go', 'rust', 'c', 'markdown', 'yaml']).default('all'), + path_pattern: z.string().optional(), + limit: z.number().int().positive().default(50), +}); + +const TaskBaseArgsSchema = z.object({ + path: z.string().min(1, 'path is required'), + query: z.string().optional(), + path_hints: z.array(z.string()).optional(), + symbol_hints: z.array(z.string()).optional(), +}); + +export const ImplementationContextArgsSchema = TaskBaseArgsSchema; +export const FindTestsArgsSchema = TaskBaseArgsSchema; +export const FindImpactArgsSchema = TaskBaseArgsSchema; +export const FindExtensionPointsArgsSchema = TaskBaseArgsSchema; + +export const ReviewContextForDiffArgsSchema = TaskBaseArgsSchema.extend({ + diff_text: z.string().min(1, 'diff_text is required'), +}); + +export type LexicalSearchArgs = z.infer; +export type ImplementationContextArgs = z.infer; +export type FindTestsArgs = z.infer; +export type FindImpactArgs = z.infer; +export type FindExtensionPointsArgs = z.infer; +export type ReviewContextForDiffArgs = z.infer; diff --git a/src/mcp/server.ts b/src/mcp/server.ts index 43e3c70..1f02470 100644 --- a/src/mcp/server.ts +++ b/src/mcp/server.ts @@ -14,24 +14,24 @@ import { ToolRegistry } from './registry'; import { allTools } from './tools'; import * as schemas from './schemas'; -export interface GitAIV2MCPServerOptions { +export interface CodeContextEngineMCPServerOptions { disableAccessLog?: boolean; transport?: 'stdio' | 'http'; port?: number; stateless?: boolean; } -export class GitAIV2MCPServer { +export class CodeContextEngineMCPServer { private server: Server; private startDir: string; - private options: GitAIV2MCPServerOptions; + private options: CodeContextEngineMCPServerOptions; private registry: ToolRegistry; - constructor(startDir: string, options: GitAIV2MCPServerOptions = {}) { + constructor(startDir: string, options: CodeContextEngineMCPServerOptions = {}) { this.startDir = path.resolve(startDir); this.options = options; this.server = new Server( - { name: 'git-ai-v2', version: '2.0.0' }, + { name: 'code-context-engine', version: '2.0.0' }, { capabilities: { tools: {} } } ); this.registry = new ToolRegistry(); @@ -82,23 +82,16 @@ export class GitAIV2MCPServer { private setupHandlers() { const schemaMap: Record = { - get_repo: schemas.GetRepoArgsSchema, check_index: schemas.CheckIndexArgsSchema, rebuild_index: schemas.RebuildIndexArgsSchema, - pack_index: schemas.PackIndexArgsSchema, - unpack_index: schemas.UnpackIndexArgsSchema, - list_files: schemas.ListFilesArgsSchema, read_file: schemas.ReadFileArgsSchema, - search_symbols: schemas.SearchSymbolsArgsSchema, - semantic_search: schemas.SemanticSearchArgsSchema, repo_map: schemas.RepoMapArgsSchema, - ast_graph_query: schemas.AstGraphQueryArgsSchema, - ast_graph_find: schemas.AstGraphFindArgsSchema, - ast_graph_children: schemas.AstGraphChildrenArgsSchema, - ast_graph_refs: schemas.AstGraphRefsArgsSchema, - ast_graph_callers: schemas.AstGraphCallersArgsSchema, - ast_graph_callees: schemas.AstGraphCalleesArgsSchema, - ast_graph_chain: schemas.AstGraphChainArgsSchema, + lexical_search: schemas.LexicalSearchArgsSchema, + implementation_context: schemas.ImplementationContextArgsSchema, + find_tests: schemas.FindTestsArgsSchema, + find_impact: schemas.FindImpactArgsSchema, + find_extension_points: schemas.FindExtensionPointsArgsSchema, + review_context_for_diff: schemas.ReviewContextForDiffArgsSchema, }; for (const tool of allTools) { @@ -198,7 +191,7 @@ export class GitAIV2MCPServer { sessionIdGenerator: undefined, }); const serverInstance = new Server( - { name: 'git-ai-v2', version: '2.0.0' }, + { name: 'code-context-engine', version: '2.0.0' }, { capabilities: { tools: {} } } ); this.attachServerHandlers(serverInstance); @@ -225,7 +218,7 @@ export class GitAIV2MCPServer { }); const serverInstance = new Server( - { name: 'git-ai-v2', version: '2.0.0' }, + { name: 'code-context-engine', version: '2.0.0' }, { capabilities: { tools: {} } } ); this.attachServerHandlers(serverInstance); diff --git a/src/mcp/tools/index.ts b/src/mcp/tools/index.ts index 5462d45..8c3bb41 100644 --- a/src/mcp/tools/index.ts +++ b/src/mcp/tools/index.ts @@ -1,58 +1,33 @@ import type { ToolDefinition } from '../types'; import { - getRepoDefinition, checkIndexDefinition, rebuildIndexDefinition, - packIndexDefinition, - unpackIndexDefinition } from './repoTools'; +import { readFileDefinition } from './fileTools'; +import { repoMapDefinition } from './searchTools'; import { - listFilesDefinition, - readFileDefinition -} from './fileTools'; -import { - searchSymbolsDefinition, - semanticSearchDefinition, - repoMapDefinition -} from './searchTools'; -import { - astGraphQueryDefinition, - astGraphFindDefinition, - astGraphChildrenDefinition, - astGraphRefsDefinition, - astGraphCallersDefinition, - astGraphCalleesDefinition, - astGraphChainDefinition -} from './astGraphTools'; + findExtensionPointsDefinition, + findImpactDefinition, + findTestsDefinition, + implementationContextDefinition, + lexicalSearchDefinition, + reviewContextForDiffDefinition, +} from './taskTools'; export const allTools: ToolDefinition[] = [ - // Repo tools (5) - getRepoDefinition, checkIndexDefinition, rebuildIndexDefinition, - packIndexDefinition, - unpackIndexDefinition, - - // File tools (2) - listFilesDefinition, readFileDefinition, - - // Search tools (3) - searchSymbolsDefinition, - semanticSearchDefinition, repoMapDefinition, - - // AST graph tools (7) - astGraphQueryDefinition, - astGraphFindDefinition, - astGraphChildrenDefinition, - astGraphRefsDefinition, - astGraphCallersDefinition, - astGraphCalleesDefinition, - astGraphChainDefinition, + lexicalSearchDefinition, + implementationContextDefinition, + findTestsDefinition, + findImpactDefinition, + findExtensionPointsDefinition, + reviewContextForDiffDefinition, ]; export * from './repoTools'; export * from './fileTools'; export * from './searchTools'; -export * from './astGraphTools'; +export * from './taskTools'; \ No newline at end of file diff --git a/src/mcp/tools/taskTools.ts b/src/mcp/tools/taskTools.ts new file mode 100644 index 0000000..9f7e0c2 --- /dev/null +++ b/src/mcp/tools/taskTools.ts @@ -0,0 +1,108 @@ +import type { ToolDefinition } from '../types'; +import { + handleFindExtensionPoints, + handleFindImpact, + handleFindTests, + handleImplementationContext, + handleLexicalSearch, + handleReviewContextForDiff, +} from '../handlers/taskHandlers'; + +export const lexicalSearchDefinition: ToolDefinition = { + name: 'lexical_search', + description: 'Lexical-first code retrieval for agents. Supports exact token, substring, regex, literal, path filter, and language filter. Returns structured SearchMatch results.', + inputSchema: { + type: 'object', + properties: { + path: { type: 'string', description: 'Repository root path' }, + query: { type: 'string', description: 'Search query' }, + mode: { type: 'string', enum: ['exact', 'substring', 'regex', 'literal'] }, + lang: { type: 'string', enum: ['all', 'ts', 'java', 'python', 'go', 'rust', 'c', 'markdown', 'yaml'] }, + path_pattern: { type: 'string', description: 'Glob pattern to constrain files' }, + limit: { type: 'number', default: 50 }, + }, + required: ['path', 'query'], + }, + handler: handleLexicalSearch, +}; + +export const implementationContextDefinition: ToolDefinition = { + name: 'implementation_context', + description: 'Build a ContextBundle for coding tasks. Primary task-oriented entrypoint for implementation work.', + inputSchema: { + type: 'object', + properties: { + path: { type: 'string', description: 'Repository root path' }, + query: { type: 'string', description: 'Task or symbol query' }, + path_hints: { type: 'array', items: { type: 'string' } }, + symbol_hints: { type: 'array', items: { type: 'string' } }, + }, + required: ['path'], + }, + handler: handleImplementationContext, +}; + +export const findTestsDefinition: ToolDefinition = { + name: 'find_tests', + description: 'Find tests related to a task, symbol, or file path. Returns an EvidenceBundle for agent verification workflows.', + inputSchema: { + type: 'object', + properties: { + path: { type: 'string', description: 'Repository root path' }, + query: { type: 'string', description: 'Task or symbol query' }, + path_hints: { type: 'array', items: { type: 'string' } }, + symbol_hints: { type: 'array', items: { type: 'string' } }, + }, + required: ['path'], + }, + handler: handleFindTests, +}; + +export const findImpactDefinition: ToolDefinition = { + name: 'find_impact', + description: 'Build a ContextBundle describing impacted code paths and related references for a proposed change.', + inputSchema: { + type: 'object', + properties: { + path: { type: 'string', description: 'Repository root path' }, + query: { type: 'string', description: 'Changed symbol or concept' }, + path_hints: { type: 'array', items: { type: 'string' } }, + symbol_hints: { type: 'array', items: { type: 'string' } }, + }, + required: ['path'], + }, + handler: handleFindImpact, +}; + +export const findExtensionPointsDefinition: ToolDefinition = { + name: 'find_extension_points', + description: 'Find extension-oriented code such as interfaces, handlers, registries, hooks, adapters, and plugins.', + inputSchema: { + type: 'object', + properties: { + path: { type: 'string', description: 'Repository root path' }, + query: { type: 'string', description: 'Extension-related query' }, + path_hints: { type: 'array', items: { type: 'string' } }, + symbol_hints: { type: 'array', items: { type: 'string' } }, + }, + required: ['path'], + }, + handler: handleFindExtensionPoints, +}; + +export const reviewContextForDiffDefinition: ToolDefinition = { + name: 'review_context_for_diff', + description: 'Build diff-aware review evidence. Extracts touched files, symbols, signature/import/config changes, and related code evidence.', + inputSchema: { + type: 'object', + properties: { + path: { type: 'string', description: 'Repository root path' }, + diff_text: { type: 'string', description: 'Raw unified diff text' }, + query: { type: 'string', description: 'Optional extra review prompt' }, + path_hints: { type: 'array', items: { type: 'string' } }, + symbol_hints: { type: 'array', items: { type: 'string' } }, + }, + required: ['path', 'diff_text'], + }, + handler: handleReviewContextForDiff, +}; diff --git a/src/retrieval/lexical/fileScanner.ts b/src/retrieval/lexical/fileScanner.ts new file mode 100644 index 0000000..a300e60 --- /dev/null +++ b/src/retrieval/lexical/fileScanner.ts @@ -0,0 +1,51 @@ +import path from 'path'; +import { glob } from 'glob'; +import type { RuntimeLanguage } from './types'; + +const DEFAULT_IGNORES = [ + '**/.git/**', + '**/.git-ai/**', + '**/node_modules/**', + '**/dist/**', +]; + +const LANGUAGE_EXTENSIONS: Record, string[]> = { + ts: ['.ts', '.tsx', '.js', '.jsx', '.mjs', '.cjs'], + java: ['.java'], + python: ['.py'], + go: ['.go'], + rust: ['.rs'], + c: ['.c', '.h'], + markdown: ['.md', '.mdx'], + yaml: ['.yml', '.yaml'], +}; + +export function inferRuntimeLanguage(filePath: string): RuntimeLanguage | null { + const ext = path.extname(filePath).toLowerCase(); + for (const [lang, exts] of Object.entries(LANGUAGE_EXTENSIONS)) { + if (exts.includes(ext)) { + return lang as RuntimeLanguage; + } + } + return null; +} + +export async function scanFiles( + repoRoot: string, + lang: RuntimeLanguage = 'all', + pathPattern = '**/*', +): Promise { + const files = await glob(pathPattern, { + cwd: repoRoot, + nodir: true, + dot: false, + ignore: DEFAULT_IGNORES, + posix: true, + }); + + if (lang === 'all') { + return files.filter((file) => inferRuntimeLanguage(file) !== null); + } + + return files.filter((file) => inferRuntimeLanguage(file) === lang); +} diff --git a/src/retrieval/lexical/lexicalSearch.ts b/src/retrieval/lexical/lexicalSearch.ts new file mode 100644 index 0000000..0a14bc2 --- /dev/null +++ b/src/retrieval/lexical/lexicalSearch.ts @@ -0,0 +1,112 @@ +import fs from 'fs/promises'; +import path from 'path'; +import type { MatchConfidence, MatchType, SearchMatch } from '../../domain/search'; +import type { LexicalSearchRequest, LexicalSearchResponse, LexicalSearchMode } from './types'; +import { inferRuntimeLanguage, scanFiles } from './fileScanner'; + +function escapeRegex(input: string): string { + return input.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); +} + +function buildMatcher(query: string, mode: LexicalSearchMode): RegExp { + if (mode === 'regex') { + return new RegExp(query, 'g'); + } + + if (mode === 'literal' || mode === 'substring') { + return new RegExp(escapeRegex(query), 'g'); + } + + return new RegExp(`(^|[^A-Za-z0-9_$])(${escapeRegex(query)})(?=$|[^A-Za-z0-9_$])`, 'g'); +} + +function normalizeMode(mode?: LexicalSearchMode): LexicalSearchMode { + return mode ?? 'substring'; +} + +function inferConfidence(mode: LexicalSearchMode): MatchConfidence { + if (mode === 'exact' || mode === 'literal') return 'high'; + if (mode === 'regex') return 'medium'; + return 'medium'; +} + +function inferScore(mode: LexicalSearchMode): number { + if (mode === 'exact') return 1; + if (mode === 'literal') return 0.98; + if (mode === 'regex') return 0.9; + return 0.8; +} + +function inferMatchType(mode: LexicalSearchMode): MatchType { + if (mode === 'exact') return 'exact_token'; + if (mode === 'literal') return 'literal'; + if (mode === 'regex') return 'regex'; + return 'substring'; +} + +function inferWhyMatched(mode: LexicalSearchMode, filePath: string): string { + if (mode === 'exact') return `Matched exact token in ${filePath}`; + if (mode === 'literal') return `Matched literal string in ${filePath}`; + if (mode === 'regex') return `Matched regex in ${filePath}`; + return `Matched substring in ${filePath}`; +} + +function inferSymbol(candidate: string): string | undefined { + return /^[A-Za-z_$][A-Za-z0-9_$]*$/.test(candidate) ? candidate : undefined; +} + +export async function lexicalSearch( + repoRoot: string, + request: LexicalSearchRequest, +): Promise { + const mode = normalizeMode(request.mode); + const limit = Math.max(1, request.limit ?? 20); + const files = await scanFiles(repoRoot, request.lang ?? 'all', request.pathPattern ?? '**/*'); + const matcher = buildMatcher(request.query, mode); + const matches: SearchMatch[] = []; + + for (const relPath of files) { + if (matches.length >= limit) break; + const absPath = path.join(repoRoot, relPath); + let content = ''; + try { + content = await fs.readFile(absPath, 'utf-8'); + } catch { + continue; + } + + const lines = content.split(/\r?\n/); + for (let lineIndex = 0; lineIndex < lines.length && matches.length < limit; lineIndex++) { + const line = lines[lineIndex] ?? ''; + const lineMatches = Array.from(line.matchAll(matcher)); + if (lineMatches.length === 0) continue; + + const found = lineMatches[0]!; + const fullMatch = mode === 'exact' ? (found[2] ?? request.query) : found[0]; + const column = (found.index ?? 0) + 1; + matches.push({ + why_matched: inferWhyMatched(mode, relPath), + match_type: inferMatchType(mode), + evidence_type: 'content_match', + score: inferScore(mode), + path: relPath, + range: { + start: { line: lineIndex + 1, column }, + end: { line: lineIndex + 1, column: column + fullMatch.length - 1 }, + }, + symbol: inferSymbol(fullMatch), + preview: line.trim(), + confidence: inferConfidence(mode), + lang: inferRuntimeLanguage(relPath) ?? undefined, + }); + } + } + + return { + repoRoot: path.resolve(repoRoot), + query: request.query, + mode, + matches, + scannedFiles: files.length, + }; +} diff --git a/src/retrieval/lexical/types.ts b/src/retrieval/lexical/types.ts new file mode 100644 index 0000000..c887b0c --- /dev/null +++ b/src/retrieval/lexical/types.ts @@ -0,0 +1,26 @@ +import type { SearchResultSet } from '../../domain/search'; + +export type RuntimeLanguage = + | 'all' + | 'ts' + | 'java' + | 'python' + | 'go' + | 'rust' + | 'c' + | 'markdown' + | 'yaml'; + +export type LexicalSearchMode = 'exact' | 'substring' | 'regex' | 'literal'; + +export interface LexicalSearchRequest { + query: string; + mode?: LexicalSearchMode; + lang?: RuntimeLanguage; + pathPattern?: string; + limit?: number; +} + +export interface LexicalSearchResponse extends SearchResultSet { + scannedFiles: number; +} diff --git a/src/retrieval/runtime.ts b/src/retrieval/runtime.ts new file mode 100644 index 0000000..9d71068 --- /dev/null +++ b/src/retrieval/runtime.ts @@ -0,0 +1,99 @@ +import path from 'path'; +import type { ContextBundle, EvidenceBundle } from '../domain/context'; +import type { DiffTaskRequest, TaskRequest, TaskResult } from '../domain/tasks'; +import type { SearchResultSet } from '../domain/search'; +import type { LexicalSearchRequest, LexicalSearchResponse } from './lexical/types'; +import { lexicalSearch } from './lexical/lexicalSearch'; +import { + findContainingScope, + findDefinition, + findExports, + findImplementations, + findImporters, + findReferences, + type ContainingScopeRequest, + type SymbolNavigationRequest, +} from './symbol/navigation'; +import { + buildImplementationContext, + buildImpactContext, + buildReviewContextForDiff, + findExtensionPoints as buildExtensionPointsContext, + findTestsForTask, +} from '../tasks'; + +export interface CodeContextEngineOptions { + repoRoot: string; +} + +export interface CodeContextEngine { + repoRoot: string; + search: { + lexical(request: LexicalSearchRequest): Promise; + }; + navigation: { + findDefinition(request: SymbolNavigationRequest): Promise; + findReferences(request: SymbolNavigationRequest): Promise; + findImplementations(request: SymbolNavigationRequest): Promise; + findImporters(request: SymbolNavigationRequest): Promise; + findExports(request: SymbolNavigationRequest): Promise; + findContainingScope(request: ContainingScopeRequest): Promise; + }; + tasks: { + implementationContext(request: TaskRequest): Promise; + findTests(request: TaskRequest): Promise; + findImpact(request: TaskRequest): Promise; + findExtensionPoints(request: TaskRequest): Promise; + reviewContextForDiff(request: DiffTaskRequest): Promise; + }; +} + +export function createCodeContextEngine(options: CodeContextEngineOptions): CodeContextEngine { + const repoRoot = path.resolve(options.repoRoot); + + return { + repoRoot, + search: { + lexical(request: LexicalSearchRequest) { + return lexicalSearch(repoRoot, request); + }, + }, + navigation: { + findDefinition(request: SymbolNavigationRequest) { + return findDefinition(repoRoot, request); + }, + findReferences(request: SymbolNavigationRequest) { + return findReferences(repoRoot, request); + }, + findImplementations(request: SymbolNavigationRequest) { + return findImplementations(repoRoot, request); + }, + findImporters(request: SymbolNavigationRequest) { + return findImporters(repoRoot, request); + }, + findExports(request: SymbolNavigationRequest) { + return findExports(repoRoot, request); + }, + findContainingScope(request: ContainingScopeRequest) { + return findContainingScope(repoRoot, request); + }, + }, + tasks: { + implementationContext(request: TaskRequest) { + return buildImplementationContext(repoRoot, request); + }, + findTests(request: TaskRequest) { + return findTestsForTask(repoRoot, request); + }, + findImpact(request: TaskRequest) { + return buildImpactContext(repoRoot, request); + }, + findExtensionPoints(request: TaskRequest) { + return buildExtensionPointsContext(repoRoot, request); + }, + reviewContextForDiff(request: DiffTaskRequest) { + return buildReviewContextForDiff(repoRoot, request); + }, + }, + }; +} diff --git a/src/retrieval/symbol/graphCapabilities.ts b/src/retrieval/symbol/graphCapabilities.ts new file mode 100644 index 0000000..3b6116d --- /dev/null +++ b/src/retrieval/symbol/graphCapabilities.ts @@ -0,0 +1,94 @@ +import type { RuntimeLanguage } from '../lexical/types'; +import type { SearchMatch, SearchResultSet } from '../../domain/search'; +import { lexicalSearch } from '../lexical/lexicalSearch'; + +export interface SymbolNavigationRequest { + symbol: string; + lang?: RuntimeLanguage | 'auto'; + limit?: number; +} + +function escapeRegex(text: string): string { + return text.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); +} + +function dedupeMatches(matches: SearchMatch[]): SearchMatch[] { + const seen = new Set(); + return matches.filter((match) => { + const key = [match.path, match.range.start.line, match.range.start.column, match.symbol, match.why_matched].join(':'); + if (seen.has(key)) return false; + seen.add(key); + return true; + }); +} + +async function heuristicSearch( + repoRoot: string, + request: SymbolNavigationRequest, + query: string, + why: string, + confidence: SearchMatch['confidence'], +): Promise { + const result = await lexicalSearch(repoRoot, { + query, + mode: 'regex', + lang: request.lang === 'auto' ? 'all' : request.lang, + limit: request.limit ?? 25, + }); + + return { + repoRoot, + query: request.symbol, + mode: 'heuristic', + matches: dedupeMatches( + result.matches.map((match) => ({ + ...match, + evidence_type: 'graph_match', + why_matched: why, + confidence, + })), + ), + }; +} + +export async function findImplementationsHeuristically( + repoRoot: string, + request: SymbolNavigationRequest, +): Promise { + const symbol = escapeRegex(request.symbol); + return heuristicSearch( + repoRoot, + request, + `(?:implements|extends)\\s+${symbol}\\b`, + `Heuristic implementation match for ${request.symbol}`, + 'medium', + ); +} + +export async function findImportersHeuristically( + repoRoot: string, + request: SymbolNavigationRequest, +): Promise { + const symbol = escapeRegex(request.symbol); + return heuristicSearch( + repoRoot, + request, + `(?:import|require).*\\b${symbol}\\b`, + `Heuristic importer match for ${request.symbol}`, + 'medium', + ); +} + +export async function findExportsHeuristically( + repoRoot: string, + request: SymbolNavigationRequest, +): Promise { + const symbol = escapeRegex(request.symbol); + return heuristicSearch( + repoRoot, + request, + `(?:export\\s+(?:async\\s+)?(?:function|class|const|interface|type|enum)|module\\.exports|exports\\.).*\\b${symbol}\\b`, + `Heuristic export match for ${request.symbol}`, + 'medium', + ); +} diff --git a/src/retrieval/symbol/navigation.ts b/src/retrieval/symbol/navigation.ts new file mode 100644 index 0000000..943eb00 --- /dev/null +++ b/src/retrieval/symbol/navigation.ts @@ -0,0 +1,199 @@ +import type { SearchMatch, SearchResultSet } from '../../domain/search'; +import type { RuntimeLanguage } from '../lexical/types'; +import { + buildContainingScopeQuery, + buildDefinitionsByNameQuery, + buildFindReferencesQuery, + runAstGraphQuery, +} from '../../core/astGraphQuery'; +import { checkIndex, resolveLangs } from '../../core/indexCheck'; +import { + findExportsHeuristically, + findImplementationsHeuristically, + findImportersHeuristically, +} from './graphCapabilities'; + +export interface SymbolNavigationRequest { + symbol: string; + lang?: RuntimeLanguage | 'auto'; + limit?: number; +} + +export interface ContainingScopeRequest { + file: string; + line: number; + lang?: RuntimeLanguage | 'auto'; + limit?: number; +} + +function toGraphLangs(lang?: RuntimeLanguage | 'auto'): Array<'ts' | 'java'> { + if (!lang || lang === 'auto' || lang === 'all') return ['ts', 'java']; + if (lang === 'ts' || lang === 'java') return [lang]; + return []; +} + +function definitionToMatch(row: any[]): SearchMatch { + const [, file, lang, name, kind, signature, startLine, endLine] = row; + return { + why_matched: `AST definition match for ${name}`, + match_type: 'exact_token', + evidence_type: 'symbol_match', + score: 0.99, + path: String(file), + range: { + start: { line: Number(startLine), column: 1 }, + end: { line: Number(endLine), column: 1 }, + }, + symbol: String(name), + preview: [kind, name, signature].filter(Boolean).join(' '), + confidence: 'high', + lang: String(lang), + }; +} + +function referenceToMatch(target: string, row: any[]): SearchMatch { + const [file, line, col, refKind, , fromKind, fromName, fromLang] = row; + return { + why_matched: `AST reference match for ${target}`, + match_type: 'exact_token', + evidence_type: 'graph_match', + score: 0.92, + path: String(file), + range: { + start: { line: Number(line), column: Number(col) }, + end: { line: Number(line), column: Number(col) + String(target).length }, + }, + symbol: String(fromName), + preview: `${String(fromKind)} ${String(fromName)} references ${target} (${String(refKind)})`, + confidence: 'high', + lang: String(fromLang), + }; +} + +function containingScopeToMatch(row: any[]): SearchMatch { + const [file, lang, name, kind, signature, startLine, endLine] = row; + return { + why_matched: `Containing scope for line ${startLine}-${endLine}`, + match_type: 'path', + evidence_type: 'symbol_match', + score: 0.9, + path: String(file), + range: { + start: { line: Number(startLine), column: 1 }, + end: { line: Number(endLine), column: 1 }, + }, + symbol: String(name), + preview: [kind, name, signature].filter(Boolean).join(' '), + confidence: 'high', + lang: String(lang), + }; +} + +function uniqueMatches(matches: SearchMatch[]): SearchMatch[] { + const seen = new Set(); + return matches.filter((match) => { + const key = [match.path, match.range.start.line, match.range.end.line, match.symbol, match.preview].join(':'); + if (seen.has(key)) return false; + seen.add(key); + return true; + }); +} + +async function assertGraphReady(repoRoot: string): Promise { + const status = await checkIndex(repoRoot); + if (!status.ok) { + throw new Error('Index incompatible or missing'); + } +} + +export async function findDefinition( + repoRoot: string, + request: SymbolNavigationRequest, +): Promise { + await assertGraphReady(repoRoot); + const langs = toGraphLangs(request.lang); + const rows: any[] = []; + for (const lang of langs) { + const result = await runAstGraphQuery(repoRoot, buildDefinitionsByNameQuery(lang), { + name: request.symbol, + lang, + }); + rows.push(...(((result as any)?.rows ?? []) as any[])); + } + + return { + repoRoot, + query: request.symbol, + mode: 'definition', + matches: uniqueMatches(rows.slice(0, request.limit ?? 25).map(definitionToMatch)), + }; +} + +export async function findReferences( + repoRoot: string, + request: SymbolNavigationRequest, +): Promise { + await assertGraphReady(repoRoot); + const langs = toGraphLangs(request.lang); + const rows: any[] = []; + for (const lang of langs) { + const result = await runAstGraphQuery(repoRoot, buildFindReferencesQuery(lang), { + name: request.symbol, + lang, + }); + rows.push(...(((result as any)?.rows ?? []) as any[])); + } + + return { + repoRoot, + query: request.symbol, + mode: 'references', + matches: uniqueMatches(rows.slice(0, request.limit ?? 50).map((row) => referenceToMatch(request.symbol, row))), + }; +} + +export async function findContainingScope( + repoRoot: string, + request: ContainingScopeRequest, +): Promise { + await assertGraphReady(repoRoot); + const langs = toGraphLangs(request.lang); + const rows: any[] = []; + for (const lang of langs) { + const result = await runAstGraphQuery(repoRoot, buildContainingScopeQuery(lang), { + file: request.file, + line: request.line, + lang, + }); + rows.push(...(((result as any)?.rows ?? []) as any[])); + } + + rows.sort((a, b) => { + const aStart = Number(a[5]); + const aEnd = Number(a[6]); + const bStart = Number(b[5]); + const bEnd = Number(b[6]); + const aSpan = aEnd - aStart; + const bSpan = bEnd - bStart; + return aSpan - bSpan || bStart - aStart; + }); + + return { + repoRoot, + query: `${request.file}:${request.line}`, + mode: 'containing_scope', + matches: uniqueMatches(rows.slice(0, request.limit ?? 10).map(containingScopeToMatch)), + }; +} + +export async function findImplementations(repoRoot: string, request: SymbolNavigationRequest) { + return findImplementationsHeuristically(repoRoot, request); +} + +export async function findImporters(repoRoot: string, request: SymbolNavigationRequest) { + return findImportersHeuristically(repoRoot, request); +} + +export async function findExports(repoRoot: string, request: SymbolNavigationRequest) { + return findExportsHeuristically(repoRoot, request); +} diff --git a/src/tasks/diff/analyzeDiff.ts b/src/tasks/diff/analyzeDiff.ts new file mode 100644 index 0000000..e5e5642 --- /dev/null +++ b/src/tasks/diff/analyzeDiff.ts @@ -0,0 +1,59 @@ +import type { DiffInsight } from '../../domain/diff'; +import { parseDiff } from './parseDiff'; + +function unique(values: string[]): string[] { + return Array.from(new Set(values.filter(Boolean))); +} + +function extractLiterals(lines: string[]): string[] { + const literals: string[] = []; + const literalPattern = /["'`]([^"'`]+)["'`]/g; + for (const line of lines) { + for (const match of line.matchAll(literalPattern)) { + literals.push(match[1] ?? ''); + } + } + return unique(literals); +} + +function extractSymbols(lines: string[]): string[] { + const identifiers: string[] = []; + const identifierPattern = /\b[A-Za-z_$][A-Za-z0-9_$]*\b/g; + for (const line of lines) { + for (const match of line.matchAll(identifierPattern)) { + identifiers.push(match[0]); + } + } + return unique(identifiers); +} + +export function analyzeDiff(diffText: string): DiffInsight { + const files = parseDiff(diffText); + const allAdded = files.flatMap((file) => file.addedLines); + const allRemoved = files.flatMap((file) => file.removedLines); + const allContext = files.flatMap((file) => file.contextLines); + const allLines = [...allAdded, ...allRemoved, ...allContext]; + + return { + touched_files: files.map((file) => ({ + path: file.path, + change_type: 'modified' as const, + })), + touched_symbols: extractSymbols(allLines), + signature_changes: unique( + [...allAdded, ...allRemoved].filter((line) => + /(function|class|interface|type|=>|\)\s*\{)/.test(line), + ), + ), + import_changes: unique( + [...allAdded, ...allRemoved].filter((line) => line.trim().startsWith('import ')), + ), + config_changes: unique( + files + .map((file) => file.path) + .filter((file) => /(package\.json|tsconfig|\.ya?ml$|\.json$|\.toml$|\.ini$)/i.test(file)), + ), + added_literals: extractLiterals(allAdded), + removed_literals: extractLiterals(allRemoved), + }; +} diff --git a/src/tasks/diff/parseDiff.ts b/src/tasks/diff/parseDiff.ts new file mode 100644 index 0000000..5b63dc1 --- /dev/null +++ b/src/tasks/diff/parseDiff.ts @@ -0,0 +1,40 @@ +export interface ParsedDiffFile { + path: string; + addedLines: string[]; + removedLines: string[]; + contextLines: string[]; +} + +export function parseDiff(diffText: string): ParsedDiffFile[] { + const files: ParsedDiffFile[] = []; + let current: ParsedDiffFile | null = null; + + for (const rawLine of diffText.split(/\r?\n/)) { + if (rawLine.startsWith('+++ b/')) { + current = { + path: rawLine.slice('+++ b/'.length).trim(), + addedLines: [], + removedLines: [], + contextLines: [], + }; + files.push(current); + continue; + } + + if (!current) continue; + if (rawLine.startsWith('@@')) continue; + if (rawLine.startsWith('+') && !rawLine.startsWith('+++')) { + current.addedLines.push(rawLine.slice(1)); + continue; + } + if (rawLine.startsWith('-') && !rawLine.startsWith('---')) { + current.removedLines.push(rawLine.slice(1)); + continue; + } + if (rawLine.startsWith(' ')) { + current.contextLines.push(rawLine.slice(1)); + } + } + + return files; +} diff --git a/src/tasks/extensions/extensionPoints.ts b/src/tasks/extensions/extensionPoints.ts new file mode 100644 index 0000000..38c79a0 --- /dev/null +++ b/src/tasks/extensions/extensionPoints.ts @@ -0,0 +1,27 @@ +import type { ContextBundle } from '../../domain/context'; +import type { TaskRequest } from '../../domain/tasks'; +import { lexicalSearch } from '../../retrieval/lexical/lexicalSearch'; + +export async function findExtensionPoints( + repoRoot: string, + request: TaskRequest, +): Promise { + const query = request.query ?? request.symbolHints?.[0] ?? 'register'; + const matches = await lexicalSearch(repoRoot, { + query, + mode: 'substring', + limit: 20, + }); + + return { + task: 'find_extension_points', + summary: `Candidate extension points for ${query}`, + sections: [ + { + title: 'Extension Points', + summary: `Files that may expose extension hooks for ${query}`, + evidence: matches.matches, + }, + ], + }; +} diff --git a/src/tasks/impact/impactContext.ts b/src/tasks/impact/impactContext.ts new file mode 100644 index 0000000..97d5f65 --- /dev/null +++ b/src/tasks/impact/impactContext.ts @@ -0,0 +1,71 @@ +import type { ContextBundle } from '../../domain/context'; +import type { TaskRequest } from '../../domain/tasks'; +import { lexicalSearch } from '../../retrieval/lexical/lexicalSearch'; +import { + findImplementations, + findImporters, + findReferences, +} from '../../retrieval/symbol/navigation'; + +export async function buildImpactContext( + repoRoot: string, + request: TaskRequest, +): Promise { + const query = request.symbolHints?.[0] ?? request.query ?? ''; + const matches = await lexicalSearch(repoRoot, { + query, + mode: 'exact', + limit: 20, + }); + const references = query + ? await findReferences(repoRoot, { symbol: query, lang: 'auto', limit: 20 }).catch(() => ({ + repoRoot, + query, + mode: 'references', + matches: [], + })) + : { repoRoot, query, mode: 'references', matches: [] }; + const importers = query + ? await findImporters(repoRoot, { symbol: query, lang: 'auto', limit: 20 }).catch(() => ({ + repoRoot, + query, + mode: 'importers', + matches: [], + })) + : { repoRoot, query, mode: 'importers', matches: [] }; + const implementations = query + ? await findImplementations(repoRoot, { symbol: query, lang: 'auto', limit: 20 }).catch(() => ({ + repoRoot, + query, + mode: 'implementations', + matches: [], + })) + : { repoRoot, query, mode: 'implementations', matches: [] }; + + return { + task: 'find_impact', + summary: `Potential impact surface for ${query}`, + sections: [ + { + title: 'Candidate References', + summary: `Potential references for ${query}`, + evidence: matches.matches, + }, + { + title: 'Graph References', + summary: `Reference matches for ${query}`, + evidence: references.matches, + }, + { + title: 'Importers', + summary: `Importer matches for ${query}`, + evidence: importers.matches, + }, + { + title: 'Implementations', + summary: `Implementation matches for ${query}`, + evidence: implementations.matches, + }, + ], + }; +} diff --git a/src/tasks/implementation/implementationContext.ts b/src/tasks/implementation/implementationContext.ts new file mode 100644 index 0000000..bdd9ebb --- /dev/null +++ b/src/tasks/implementation/implementationContext.ts @@ -0,0 +1,78 @@ +import type { ContextBundle, ContextSection } from '../../domain/context'; +import type { TaskRequest } from '../../domain/tasks'; +import { lexicalSearch } from '../../retrieval/lexical/lexicalSearch'; +import { findDefinition, findExports } from '../../retrieval/symbol/navigation'; +import { findTestsForTask } from '../tests/testContext'; + +function toPathPattern(pathHints?: string[]): string { + if (!pathHints || pathHints.length === 0) return '**/*'; + return `${pathHints[0]}/**/*`; +} + +export async function buildImplementationContext( + repoRoot: string, + request: TaskRequest, +): Promise { + const query = request.symbolHints?.[0] ?? request.query ?? ''; + const primary = await lexicalSearch(repoRoot, { + query, + mode: 'exact', + pathPattern: toPathPattern(request.pathHints), + limit: 20, + }); + const definitions = query + ? await findDefinition(repoRoot, { symbol: query, lang: 'auto', limit: 10 }).catch(() => ({ + repoRoot, + query, + mode: 'definition', + matches: [], + })) + : { repoRoot, query, mode: 'definition', matches: [] }; + const exports = query + ? await findExports(repoRoot, { symbol: query, lang: 'auto', limit: 10 }).catch(() => ({ + repoRoot, + query, + mode: 'exports', + matches: [], + })) + : { repoRoot, query, mode: 'exports', matches: [] }; + const tests = await findTestsForTask(repoRoot, request); + + const sections: ContextSection[] = [ + { + title: 'Primary Matches', + summary: `Primary implementation matches for ${query}`, + evidence: primary.matches, + }, + ]; + + if (tests.evidence.length > 0) { + sections.push({ + title: 'Related Tests', + summary: `Related tests for ${query}`, + evidence: tests.evidence, + }); + } + + if (definitions.matches.length > 0) { + sections.push({ + title: 'Definitions', + summary: `Definitions for ${query}`, + evidence: definitions.matches, + }); + } + + if (exports.matches.length > 0) { + sections.push({ + title: 'Exports', + summary: `Exports related to ${query}`, + evidence: exports.matches, + }); + } + + return { + task: 'implementation_context', + summary: `Implementation context for ${query}`, + sections, + }; +} diff --git a/src/tasks/index.ts b/src/tasks/index.ts new file mode 100644 index 0000000..2202d9f --- /dev/null +++ b/src/tasks/index.ts @@ -0,0 +1,5 @@ +export { buildImplementationContext } from './implementation/implementationContext'; +export { findTestsForTask } from './tests/testContext'; +export { buildReviewContextForDiff } from './review/reviewEvidenceBuilder'; +export { buildImpactContext } from './impact/impactContext'; +export { findExtensionPoints } from './extensions/extensionPoints'; diff --git a/src/tasks/review/reviewEvidenceBuilder.ts b/src/tasks/review/reviewEvidenceBuilder.ts new file mode 100644 index 0000000..73c4f93 --- /dev/null +++ b/src/tasks/review/reviewEvidenceBuilder.ts @@ -0,0 +1,55 @@ +import type { EvidenceBundle } from '../../domain/context'; +import type { DiffTaskRequest, TaskResult } from '../../domain/tasks'; +import type { SearchMatch } from '../../domain/search'; +import { lexicalSearch } from '../../retrieval/lexical/lexicalSearch'; +import { analyzeDiff } from '../diff/analyzeDiff'; +import { findTestsForTask } from '../tests/testContext'; + +function fileEvidence(path: string): SearchMatch { + return { + why_matched: `Touched file from diff: ${path}`, + match_type: 'path', + evidence_type: 'path_match', + score: 0.95, + path, + range: { start: { line: 1, column: 1 }, end: { line: 1, column: 1 } }, + preview: path, + confidence: 'high', + }; +} + +export async function buildReviewContextForDiff( + repoRoot: string, + request: DiffTaskRequest, +): Promise { + const diff = analyzeDiff(request.diffText); + const evidence: SearchMatch[] = diff.touched_files.map((file) => fileEvidence(file.path)); + + if (diff.touched_symbols.length > 0) { + const primary = await lexicalSearch(repoRoot, { + query: diff.touched_symbols[0]!, + mode: 'exact', + pathPattern: diff.touched_files[0]?.path ?? '**/*', + limit: 10, + }); + evidence.push(...primary.matches); + } + + const tests = await findTestsForTask(repoRoot, { + task: 'find_tests', + query: diff.touched_symbols[0], + pathHints: diff.touched_files.map((file) => file.path.replace(/\/[^/]+$/, '')), + symbolHints: diff.touched_symbols.slice(0, 1), + }); + evidence.push(...tests.evidence); + + const bundle: EvidenceBundle = { + task: 'review_pr', + summary: `Review context for ${diff.touched_files.length} changed file(s)`, + evidence, + related_paths: diff.touched_files.map((file) => file.path), + diagnostics: diff.added_literals.length > 0 ? [`Added literals: ${diff.added_literals.join(', ')}`] : undefined, + }; + + return { bundle, diff }; +} diff --git a/src/tasks/tests/testContext.ts b/src/tasks/tests/testContext.ts new file mode 100644 index 0000000..b4d9f03 --- /dev/null +++ b/src/tasks/tests/testContext.ts @@ -0,0 +1,63 @@ +import { glob } from 'glob'; +import type { EvidenceBundle } from '../../domain/context'; +import type { TaskRequest } from '../../domain/tasks'; +import type { SearchMatch } from '../../domain/search'; +import { lexicalSearch } from '../../retrieval/lexical/lexicalSearch'; + +function isTestPath(filePath: string): boolean { + return /\.(test|spec)\.[^.]+$/i.test(filePath); +} + +function filterByPathHints(matches: SearchMatch[], pathHints?: string[]): SearchMatch[] { + if (!pathHints || pathHints.length === 0) return matches; + return matches.filter((match) => pathHints.some((hint) => match.path.startsWith(hint))); +} + +export async function findTestsForTask( + repoRoot: string, + request: TaskRequest, +): Promise { + const query = request.symbolHints?.[0] ?? request.query ?? ''; + const lexical = query + ? await lexicalSearch(repoRoot, { + query, + mode: 'exact', + pathPattern: '**/*', + limit: 50, + }) + : { matches: [] }; + + let evidence = filterByPathHints( + lexical.matches.filter((match) => isTestPath(match.path)), + request.pathHints, + ); + + if (evidence.length === 0 && request.pathHints?.length) { + const candidates = await glob('**/*', { + cwd: repoRoot, + nodir: true, + posix: true, + ignore: ['**/.git/**', '**/.git-ai/**', '**/node_modules/**', '**/dist/**'], + }); + evidence = candidates + .filter((candidate) => isTestPath(candidate)) + .filter((candidate) => request.pathHints?.some((hint) => candidate.startsWith(hint))) + .map((candidate) => ({ + why_matched: `Matched test file path for ${query || 'task'}`, + match_type: 'path' as const, + evidence_type: 'path_match' as const, + score: 0.7, + path: candidate, + range: { start: { line: 1, column: 1 }, end: { line: 1, column: 1 } }, + preview: candidate, + confidence: 'medium' as const, + })); + } + + return { + task: 'find_tests', + summary: query ? `Found tests related to ${query}` : 'Found candidate tests', + evidence, + related_paths: evidence.map((match) => match.path), + }; +} diff --git a/templates/agents/common/skills/git-ai-code-search/SKILL.md b/templates/agents/common/skills/git-ai-code-search/SKILL.md index c0a4f2d..157643e 100644 --- a/templates/agents/common/skills/git-ai-code-search/SKILL.md +++ b/templates/agents/common/skills/git-ai-code-search/SKILL.md @@ -1,47 +1,39 @@ --- name: git-ai-code-search description: | - Semantic code search and codebase understanding using git-ai MCP tools. Use when: (1) Searching for symbols, functions, or semantic concepts, (2) Understanding project architecture, (3) Analyzing call graphs and code relationships. Triggers: "find X", "search for X", "who calls X", "where is X", "understand this codebase". + Code Context Engine skill for local code retrieval and task-oriented context building. Use when searching for code, preparing implementation work, reviewing diffs, finding tests, or analyzing impact in a local repository. --- -# git-ai Code Search - -Semantic code search with AST analysis and change tracking. - -## Quick Start - -**For Agents** - 3-step pattern: -``` -1. check_index({ path }) → verify index exists -2. semantic_search({ path, query }) → find relevant code -3. read_file({ path, file }) → read the actual code -``` - -**For Users** - build index first: -```bash -cd your-repo -git-ai ai index # build index -git-ai ai semantic "authentication logic" # search -``` - -## Core Tools - -| Need | Tool | Example | -|------|------|---------| -| Search by meaning | `semantic_search` | `{ path, query: "error handling", topk: 10 }` | -| Search by name | `search_symbols` | `{ path, query: "handleAuth", mode: "substring" }` | -| Who calls X | `ast_graph_callers` | `{ path, name: "processOrder" }` | -| What X calls | `ast_graph_callees` | `{ path, name: "processOrder" }` | -| Call chain | `ast_graph_chain` | `{ path, name: "main", direction: "downstream" }` | -| Project overview | `repo_map` | `{ path, max_files: 20 }` | - -## Rules - -1. **Always pass `path`** - Every tool requires explicit repository path -2. **Check index first** - Run `check_index` before search tools -3. **Read before modify** - Use `read_file` to understand code before changes - -## References - -- [Tool Documentation](references/tools.md) -- [Behavioral Constraints](references/constraints.md) +# Code Context Engine Skill + +## Recommended Workflow + +1. `check_index({ path })` +2. `repo_map({ path })` when you need a repo overview +3. `lexical_search({ path, query, ... })` for initial precise recall +4. Use the task tools as the main working surface: + - `implementation_context` + - `find_tests` + - `find_impact` + - `find_extension_points` + - `review_context_for_diff` +5. `read_file({ path, file })` before making or suggesting edits + +## Retrieval Order + +- lexical / symbol first +- graph expand second +- semantic rerank last + +## Thin MCP Surface + +- `check_index` +- `rebuild_index` +- `read_file` +- `repo_map` +- `lexical_search` +- `implementation_context` +- `find_tests` +- `find_impact` +- `find_extension_points` +- `review_context_for_diff` diff --git a/test/agentBundle.test.ts b/test/agentBundle.test.ts new file mode 100644 index 0000000..cf972ef --- /dev/null +++ b/test/agentBundle.test.ts @@ -0,0 +1,114 @@ +import test from 'node:test'; +import assert from 'node:assert/strict'; +import os from 'node:os'; +import path from 'node:path'; +import fs from 'node:fs/promises'; +// eslint-disable-next-line @typescript-eslint/ban-ts-comment +// @ts-ignore dist module has no typings +import { createCodeContextEngine } from '../dist/src/index.js'; + +async function writeFixture(root: string, relPath: string, content: string): Promise { + const absPath = path.join(root, relPath); + await fs.mkdir(path.dirname(absPath), { recursive: true }); + await fs.writeFile(absPath, content, 'utf-8'); +} + +async function createProjectFixture(): Promise { + const root = await fs.mkdtemp(path.join(os.tmpdir(), 'agent-bundle-')); + await writeFixture(root, 'src/user/repository.ts', [ + 'export interface UserRepository {', + ' findById(id: string): Promise;', + ' save(user: User): Promise;', + '}', + '', + 'export interface User {', + ' id: string;', + ' email: string;', + ' name: string;', + '}', + '', + ].join('\n')); + await writeFixture(root, 'src/user/service.ts', [ + "import { UserRepository, User } from './repository';", + '', + 'export class UserService {', + ' constructor(private repo: UserRepository) {}', + '', + ' async getUser(id: string): Promise {', + ' return this.repo.findById(id);', + ' }', + '', + ' async updateEmail(id: string, email: string): Promise {', + ' const user = await this.repo.findById(id);', + " if (!user) throw new Error('user not found');", + ' user.email = email;', + ' await this.repo.save(user);', + ' }', + '}', + '', + ].join('\n')); + await writeFixture(root, 'src/user/controller.ts', [ + "import { UserService } from './service';", + '', + 'export class UserController {', + ' constructor(private svc: UserService) {}', + '', + ' async handleGetUser(id: string) {', + ' return this.svc.getUser(id);', + ' }', + '}', + '', + ].join('\n')); + await writeFixture(root, 'test/user/service.test.ts', [ + "import { UserService } from '../../src/user/service';", + '', + "test('getUser delegates to repository', async () => {", + " const mockRepo = { findById: async () => ({ id: '1', email: 'a@b.com', name: 'Alice' }), save: async () => {} };", + ' const svc = new UserService(mockRepo as any);', + " const user = await svc.getUser('1');", + " expect(user?.email).toBe('a@b.com');", + '});', + '', + ].join('\n')); + return root; +} + +test('Agent scenario: review agent builds context from a PR diff', async () => { + const repoRoot = await createProjectFixture(); + const engine = createCodeContextEngine({ repoRoot }); + const diff = [ + 'diff --git a/src/user/service.ts b/src/user/service.ts', + '--- a/src/user/service.ts', + '+++ b/src/user/service.ts', + '@@ -9,6 +9,11 @@ export class UserService {', + ' return this.repo.findById(id);', + ' }', + ' ', + '+ async deleteUser(id: string): Promise {', + '+ const user = await this.repo.findById(id);', + "+ if (!user) throw new Error('user not found');", + '+ // TODO: implement delete logic', + '+ }', + '+', + ' async updateEmail(id: string, email: string): Promise {', + ].join('\n'); + + const result = await engine.tasks.reviewContextForDiff({ task: 'review_pr', diffText: diff }); + assert.ok(result.diff); + assert.ok(result.bundle); + assert.equal(result.bundle?.task, 'review_pr'); +}); + +test('Agent scenario: coding agent builds implementation context', async () => { + const repoRoot = await createProjectFixture(); + const engine = createCodeContextEngine({ repoRoot }); + const bundle = await engine.tasks.implementationContext({ + task: 'implementation_context', + query: 'UserRepository', + symbolHints: ['UserRepository', 'findById'], + pathHints: ['src/user'], + }); + const allEvidence = bundle.sections.flatMap((section) => section.evidence); + assert.ok(allEvidence.some((match) => match.path === 'src/user/repository.ts')); + assert.ok(allEvidence.some((match) => match.path === 'src/user/service.ts')); +}); diff --git a/test/diffReviewContext.test.ts b/test/diffReviewContext.test.ts new file mode 100644 index 0000000..8f9bc08 --- /dev/null +++ b/test/diffReviewContext.test.ts @@ -0,0 +1,71 @@ +import test from 'node:test'; +import assert from 'node:assert/strict'; +import os from 'node:os'; +import path from 'node:path'; +import fs from 'node:fs/promises'; +// eslint-disable-next-line @typescript-eslint/ban-ts-comment +// @ts-ignore dist module has no typings +import { createCodeContextEngine } from '../dist/src/index.js'; + +async function writeFixture(root: string, relPath: string, content: string): Promise { + const absPath = path.join(root, relPath); + await fs.mkdir(path.dirname(absPath), { recursive: true }); + await fs.writeFile(absPath, content, 'utf-8'); +} + +async function createFixtureRepo(): Promise { + const root = await fs.mkdtemp(path.join(os.tmpdir(), 'code-context-diff-')); + await writeFixture( + root, + 'src/auth/service.ts', + [ + 'export async function authenticateUser(email: string, password: string) {', + " if (!email) throw new Error('missing email');", + " return `${email}:${password}`;", + '}', + '', + ].join('\n'), + ); + await writeFixture( + root, + 'src/auth/service.test.ts', + [ + "import { authenticateUser } from './service';", + '', + "test('authenticateUser rejects empty email', async () => {", + " await expect(authenticateUser('', 'pw')).rejects.toThrow('missing email');", + '});', + '', + ].join('\n'), + ); + return root; +} + +test('Code Context Engine builds review context from a diff', async () => { + const repoRoot = await createFixtureRepo(); + const engine = createCodeContextEngine({ repoRoot }); + const diffText = [ + 'diff --git a/src/auth/service.ts b/src/auth/service.ts', + '--- a/src/auth/service.ts', + '+++ b/src/auth/service.ts', + '@@ -1,4 +1,5 @@', + ' export async function authenticateUser(email: string, password: string) {', + " if (!email) throw new Error('missing email');", + '+ if (!password) throw new Error(\"missing password\");', + ' return `${email}:${password}`;', + ' }', + '', + ].join('\n'); + + const result = await engine.tasks.reviewContextForDiff({ + task: 'review_pr', + diffText, + }); + + assert.equal(result.bundle.task, 'review_pr'); + assert.equal(result.diff.touched_files[0]?.path, 'src/auth/service.ts'); + assert.ok(result.diff.touched_symbols.includes('authenticateUser')); + assert.ok(result.diff.added_literals.includes('missing password')); + assert.ok(result.bundle.evidence.some((match: any) => match.path === 'src/auth/service.ts')); + assert.ok(result.bundle.evidence.some((match: any) => match.path === 'src/auth/service.test.ts')); +}); diff --git a/test/mcp.smoke.test.js b/test/mcp.smoke.test.js index 351cf75..2e3f341 100644 --- a/test/mcp.smoke.test.js +++ b/test/mcp.smoke.test.js @@ -5,7 +5,7 @@ const path = require('node:path'); const fs = require('node:fs/promises'); const { spawnSync } = require('node:child_process'); -const CLI = path.resolve(__dirname, '..', 'dist', 'bin', 'git-ai.js'); +const CLI = path.resolve(__dirname, '..', 'dist', 'bin', 'code-context-engine.js'); function runOk(cmd, args, cwd) { const res = spawnSync(cmd, args, { cwd, encoding: 'utf-8' }); @@ -37,17 +37,15 @@ async function createRepo(baseDir, name, files) { return repoDir; } -test('mcp server supports atomic tool calls via path arg', async () => { +test('Code Context Engine MCP exposes thin runtime-centric tools', async () => { const { Client } = await import('@modelcontextprotocol/sdk/client/index.js'); const { StdioClientTransport } = await import('@modelcontextprotocol/sdk/client/stdio.js'); - const tmp = await fs.mkdtemp(path.join(os.tmpdir(), 'git-ai-mcp-')); + const tmp = await fs.mkdtemp(path.join(os.tmpdir(), 'cce-mcp-')); const repoDir = await createRepo(tmp, 'repo', { 'src/foo.ts': [ - 'export class Foo {', - ' hello(name: string) {', - ' return `hello ${name}`;', - ' }', + 'export interface FooHandler {', + ' handle(name: string): string;', '}', '', 'export function helloWorld() {', @@ -59,11 +57,16 @@ test('mcp server supports atomic tool calls via path arg', async () => { '}', '', ].join('\n'), - 'README.md': '# test repo\n', + 'test/foo.test.ts': [ + "import { helloWorld } from '../src/foo';", + '', + "test('helloWorld', () => {", + " expect(helloWorld()).toContain('hello');", + '});', + '', + ].join('\n'), }); - const repoRootReal = await fs.realpath(repoDir); - // Pre-index the repo since index_repo tool is removed from MCP runOk('node', [CLI, 'ai', 'index', '--dim', '64', '--overwrite'], repoDir); const transport = new StdioClientTransport({ @@ -72,184 +75,28 @@ test('mcp server supports atomic tool calls via path arg', async () => { stderr: 'ignore', }); - const client = new Client({ name: 'git-ai-test', version: '0.0.0' }, { capabilities: {} }); + const client = new Client({ name: 'cce-test', version: '0.0.0' }, { capabilities: {} }); try { await client.connect(transport); const res = await client.listTools(); - const toolNames = new Set((res.tools ?? []).map(t => t.name)); - - assert.ok(toolNames.has('search_symbols')); - assert.ok(toolNames.has('semantic_search')); - assert.ok(toolNames.has('repo_map')); - assert.ok(toolNames.has('get_repo')); - assert.ok(toolNames.has('check_index')); - assert.ok(toolNames.has('pack_index')); - assert.ok(toolNames.has('unpack_index')); - assert.ok(toolNames.has('list_files')); - assert.ok(toolNames.has('read_file')); - assert.ok(toolNames.has('ast_graph_query')); - assert.ok(toolNames.has('ast_graph_find')); - assert.ok(toolNames.has('ast_graph_children')); - assert.ok(toolNames.has('ast_graph_refs')); - assert.ok(toolNames.has('ast_graph_callers')); - assert.ok(toolNames.has('ast_graph_callees')); - assert.ok(toolNames.has('ast_graph_chain')); - - { - const call = await client.callTool({ name: 'get_repo', arguments: { path: repoDir } }); - const text = String(call?.content?.[0]?.text ?? ''); - const parsed = text ? JSON.parse(text) : null; - assert.equal(parsed.ok, true); - assert.equal(await fs.realpath(parsed.repoRoot), repoRootReal); - } - - { - const call = await client.callTool({ name: 'check_index', arguments: { path: repoDir } }); - const text = String(call?.content?.[0]?.text ?? ''); - const parsed = text ? JSON.parse(text) : null; - assert.equal(parsed.ok, true); - assert.ok(parsed.expected && parsed.expected.index_schema_version); - } - - { - const call = await client.callTool({ - name: 'search_symbols', - arguments: { - path: repoDir, - query: 'hello', - mode: 'substring', - case_insensitive: true, - limit: 10, - }, - }); - const text = String(call?.content?.[0]?.text ?? ''); - const parsed = text ? JSON.parse(text) : null; - assert.ok(parsed && Array.isArray(parsed.rows)); - assert.ok(parsed.rows.length > 0); - } - - { - const call = await client.callTool({ - name: 'search_symbols', - arguments: { - path: repoDir, - query: 'hello', - mode: 'substring', - case_insensitive: true, - limit: 10, - with_repo_map: true, - repo_map_max_files: 5, - repo_map_max_symbols: 2, - }, - }); - const text = String(call?.content?.[0]?.text ?? ''); - const parsed = text ? JSON.parse(text) : null; - assert.ok(parsed && parsed.repo_map && parsed.repo_map.enabled === true); - assert.ok(Array.isArray(parsed.repo_map.files)); - assert.ok(parsed.repo_map.files.length > 0); - } - - { - const call = await client.callTool({ name: 'semantic_search', arguments: { path: repoDir, query: 'hello world', topk: 3 } }); - const text = String(call?.content?.[0]?.text ?? ''); - const parsed = text ? JSON.parse(text) : null; - assert.ok(parsed && Array.isArray(parsed.rows)); - assert.ok(parsed.rows.length > 0); - } - - { - const call = await client.callTool({ name: 'repo_map', arguments: { path: repoDir, max_files: 5, max_symbols: 2 } }); - const text = String(call?.content?.[0]?.text ?? ''); - const parsed = text ? JSON.parse(text) : null; - assert.ok(parsed && parsed.repo_map && parsed.repo_map.enabled === true); - assert.ok(Array.isArray(parsed.repo_map.files)); - assert.ok(parsed.repo_map.files.length > 0); - } - - { - const call = await client.callTool({ name: 'list_files', arguments: { path: repoDir, pattern: 'src/**/*', limit: 50 } }); - const text = String(call?.content?.[0]?.text ?? ''); - const parsed = text ? JSON.parse(text) : null; - assert.ok(parsed && Array.isArray(parsed.files)); - assert.ok(parsed.files.includes('src/foo.ts')); - } - - { - const call = await client.callTool({ name: 'read_file', arguments: { path: repoDir, file: 'src/foo.ts', start_line: 1, end_line: 20 } }); - const text = String(call?.content?.[0]?.text ?? ''); - const parsed = text ? JSON.parse(text) : null; - assert.ok(parsed && typeof parsed.text === 'string'); - assert.ok(parsed.text.includes('export class Foo')); - } - - { - const call = await client.callTool({ name: 'ast_graph_query', arguments: { path: repoDir, query: "?[file] := *ast_symbol{ref_id, file, lang, name: 'Foo', kind, signature, start_line, end_line}" } }); - const text = String(call?.content?.[0]?.text ?? ''); - const parsed = text ? JSON.parse(text) : null; - assert.ok(parsed && parsed.result && Array.isArray(parsed.result.rows)); - assert.ok(parsed.result.rows.length > 0); - } - - { - const call = await client.callTool({ name: 'ast_graph_find', arguments: { path: repoDir, prefix: 'Fo', limit: 10 } }); - const text = String(call?.content?.[0]?.text ?? ''); - const parsed = text ? JSON.parse(text) : null; - assert.ok(parsed && parsed.result && Array.isArray(parsed.result.rows)); - assert.ok(parsed.result.rows.length > 0); - } - - { - const call = await client.callTool({ name: 'ast_graph_children', arguments: { path: repoDir, id: 'src/foo.ts', as_file: true } }); - const text = String(call?.content?.[0]?.text ?? ''); - const parsed = text ? JSON.parse(text) : null; - assert.ok(parsed && parsed.result && Array.isArray(parsed.result.rows)); - assert.ok(parsed.result.rows.length > 0); - } - - { - const call = await client.callTool({ name: 'ast_graph_refs', arguments: { path: repoDir, name: 'helloWorld', limit: 50 } }); - const text = String(call?.content?.[0]?.text ?? ''); - const parsed = text ? JSON.parse(text) : null; - assert.ok(parsed && parsed.result && Array.isArray(parsed.result.rows)); - assert.ok(parsed.result.rows.some(r => String(r[3] ?? '') === 'call')); - } - - { - const call = await client.callTool({ name: 'ast_graph_callers', arguments: { path: repoDir, name: 'helloWorld', limit: 50 } }); - const text = String(call?.content?.[0]?.text ?? ''); - const parsed = text ? JSON.parse(text) : null; - assert.ok(parsed && parsed.result && Array.isArray(parsed.result.rows)); - assert.ok(parsed.result.rows.length > 0); - } - - { - const call = await client.callTool({ name: 'ast_graph_chain', arguments: { path: repoDir, name: 'run', direction: 'downstream', max_depth: 2, limit: 200 } }); - const text = String(call?.content?.[0]?.text ?? ''); - const parsed = text ? JSON.parse(text) : null; - assert.ok(parsed && parsed.result && Array.isArray(parsed.result.rows)); - assert.ok(parsed.result.rows.length > 0); - } - - { - const call = await client.callTool({ name: 'pack_index', arguments: { path: repoDir, lfs: true } }); - const text = String(call?.content?.[0]?.text ?? ''); - const parsed = text ? JSON.parse(text) : null; - assert.equal(parsed.ok, true); - const stat = await fs.stat(parsed.archivePath); - assert.ok(stat.size > 0); - } - - { - await fs.rm(path.join(repoDir, '.git-ai', 'lancedb'), { recursive: true, force: true }); - const call = await client.callTool({ name: 'unpack_index', arguments: { path: repoDir } }); - const text = String(call?.content?.[0]?.text ?? ''); - const parsed = text ? JSON.parse(text) : null; - assert.equal(parsed.ok, true); - const stat = await fs.stat(path.join(repoDir, '.git-ai', 'lancedb')); - assert.ok(stat.isDirectory()); - } + const toolNames = new Set((res.tools ?? []).map((tool) => tool.name)); + assert.deepEqual( + [...toolNames].sort(), + [ + 'check_index', + 'find_extension_points', + 'find_impact', + 'find_tests', + 'implementation_context', + 'lexical_search', + 'read_file', + 'rebuild_index', + 'repo_map', + 'review_context_for_diff', + ].sort(), + ); } finally { - await transport.close(); + await client.close().catch(() => {}); } }); diff --git a/test/runtime.navigation.test.ts b/test/runtime.navigation.test.ts new file mode 100644 index 0000000..7d0ec97 --- /dev/null +++ b/test/runtime.navigation.test.ts @@ -0,0 +1,100 @@ +import test from 'node:test'; +import assert from 'node:assert/strict'; +import os from 'node:os'; +import path from 'node:path'; +import fs from 'node:fs/promises'; +import { spawnSync } from 'node:child_process'; +// eslint-disable-next-line @typescript-eslint/ban-ts-comment +// @ts-ignore dist module has no typings +import { createCodeContextEngine } from '../dist/src/index.js'; + +const CLI = path.resolve(process.cwd(), 'dist', 'bin', 'code-context-engine.js'); + +function runOk(cmd: string, args: string[], cwd: string) { + const res = spawnSync(cmd, args, { cwd, encoding: 'utf-8' }); + if (res.error) throw res.error; + if (res.status !== 0) { + throw new Error(`${cmd} ${args.join(' ')} failed\n${res.stdout}\n${res.stderr}`); + } +} + +async function writeFixture(root: string, relPath: string, content: string): Promise { + const absPath = path.join(root, relPath); + await fs.mkdir(path.dirname(absPath), { recursive: true }); + await fs.writeFile(absPath, content, 'utf-8'); +} + +async function createIndexedRepo(): Promise { + const root = await fs.mkdtemp(path.join(os.tmpdir(), 'code-context-nav-')); + runOk('git', ['init', '-b', 'main'], root); + runOk('git', ['config', 'user.email', 'test@example.com'], root); + runOk('git', ['config', 'user.name', 'Test User'], root); + + await writeFixture(root, 'src/auth/provider.ts', [ + 'export interface AuthProvider {', + ' authenticate(token: string): Promise;', + '}', + '', + 'export class EmailAuthProvider implements AuthProvider {', + ' async authenticate(token: string): Promise {', + ' return token.length > 0;', + ' }', + '}', + '', + 'export function createProvider(): AuthProvider {', + ' return new EmailAuthProvider();', + '}', + '', + ].join('\n')); + + await writeFixture(root, 'src/auth/service.ts', [ + "import { AuthProvider, createProvider } from './provider';", + '', + 'export async function authenticateUser(provider: AuthProvider, token: string) {', + ' return provider.authenticate(token);', + '}', + '', + 'export async function loginWithDefaultProvider(token: string) {', + ' const provider = createProvider();', + ' return authenticateUser(provider, token);', + '}', + '', + ].join('\n')); + + runOk('git', ['add', '.'], root); + runOk('git', ['commit', '-m', 'init'], root); + runOk('node', [CLI, 'ai', 'index', '--overwrite'], root); + return root; +} + +test('Code Context Engine navigation finds definitions, references, and scopes', async () => { + const repoRoot = await createIndexedRepo(); + const engine = createCodeContextEngine({ repoRoot }); + + const definitions = await engine.navigation.findDefinition({ symbol: 'authenticateUser', lang: 'ts' }); + assert.ok(definitions.matches.some((match) => match.path === 'src/auth/service.ts')); + + const references = await engine.navigation.findReferences({ symbol: 'authenticateUser', lang: 'ts' }); + assert.ok(references.matches.some((match) => match.path === 'src/auth/service.ts')); + + const containingScope = await engine.navigation.findContainingScope({ + file: 'src/auth/service.ts', + line: 7, + lang: 'ts', + }); + assert.ok(containingScope.matches.some((match) => match.symbol === 'loginWithDefaultProvider')); +}); + +test('Code Context Engine navigation finds implementations, importers, and exports', async () => { + const repoRoot = await createIndexedRepo(); + const engine = createCodeContextEngine({ repoRoot }); + + const implementations = await engine.navigation.findImplementations({ symbol: 'AuthProvider', lang: 'ts' }); + assert.ok(implementations.matches.some((match) => match.path === 'src/auth/provider.ts')); + + const importers = await engine.navigation.findImporters({ symbol: 'AuthProvider', lang: 'ts' }); + assert.ok(importers.matches.some((match) => match.path === 'src/auth/service.ts')); + + const exports = await engine.navigation.findExports({ symbol: 'createProvider', lang: 'ts' }); + assert.ok(exports.matches.some((match) => match.path === 'src/auth/provider.ts')); +}); diff --git a/test/runtime.search.test.ts b/test/runtime.search.test.ts new file mode 100644 index 0000000..9ec715c --- /dev/null +++ b/test/runtime.search.test.ts @@ -0,0 +1,115 @@ +import test from 'node:test'; +import assert from 'node:assert/strict'; +import os from 'node:os'; +import path from 'node:path'; +import fs from 'node:fs/promises'; +// eslint-disable-next-line @typescript-eslint/ban-ts-comment +// @ts-ignore dist module has no typings +import { createCodeContextEngine } from '../dist/src/index.js'; + +async function writeFixture(root: string, relPath: string, content: string): Promise { + const absPath = path.join(root, relPath); + await fs.mkdir(path.dirname(absPath), { recursive: true }); + await fs.writeFile(absPath, content, 'utf-8'); +} + +async function createFixtureRepo(): Promise { + const root = await fs.mkdtemp(path.join(os.tmpdir(), 'code-context-engine-')); + await writeFixture( + root, + 'src/auth/service.ts', + [ + 'export async function authenticateUser(email: string, password: string) {', + " if (!email) throw new Error('missing email');", + " return `${email}:${password}`;", + '}', + '', + ].join('\n'), + ); + await writeFixture( + root, + 'src/payment/client.ts', + [ + 'export async function retryCharge() {', + ' return 3;', + '}', + '', + 'export function failWithLiteral() {', + ' throw new Error("boom?");', + '}', + '', + ].join('\n'), + ); + await writeFixture( + root, + 'docs/notes.md', + [ + '# Notes', + '', + 'authenticateUser is documented here.', + 'retryCharge is mentioned for docs only.', + '', + ].join('\n'), + ); + return root; +} + +test('Code Context Engine lexical search returns structured exact-token matches', async () => { + const repoRoot = await createFixtureRepo(); + const engine = createCodeContextEngine({ repoRoot }); + + const result = await engine.search.lexical({ + query: 'authenticateUser', + mode: 'exact', + lang: 'ts', + pathPattern: 'src/auth/**', + limit: 10, + }); + + assert.equal(result.matches.length, 1); + assert.equal(result.matches[0]?.path, 'src/auth/service.ts'); + assert.equal(result.matches[0]?.match_type, 'exact_token'); + assert.equal(result.matches[0]?.evidence_type, 'content_match'); + assert.equal(result.matches[0]?.symbol, 'authenticateUser'); + assert.ok(result.matches[0]?.why_matched.includes('exact token')); + assert.ok(result.matches[0]?.preview.includes('authenticateUser')); + assert.equal(result.matches[0]?.range.start.line, 1); + assert.equal(result.matches[0]?.confidence, 'high'); +}); + +test('Code Context Engine lexical search supports regex with path and language filters', async () => { + const repoRoot = await createFixtureRepo(); + const engine = createCodeContextEngine({ repoRoot }); + + const result = await engine.search.lexical({ + query: 'retry[A-Za-z]+', + mode: 'regex', + lang: 'ts', + pathPattern: 'src/**/*.ts', + limit: 10, + }); + + assert.equal(result.matches.length, 1); + assert.equal(result.matches[0]?.path, 'src/payment/client.ts'); + assert.equal(result.matches[0]?.match_type, 'regex'); + assert.ok(result.matches[0]?.why_matched.includes('regex')); + assert.ok(result.matches.every((match: any) => match.path.endsWith('.ts'))); +}); + +test('Code Context Engine lexical search supports literal string matching', async () => { + const repoRoot = await createFixtureRepo(); + const engine = createCodeContextEngine({ repoRoot }); + + const result = await engine.search.lexical({ + query: 'throw new Error("boom?");', + mode: 'literal', + pathPattern: 'src/payment/**', + limit: 10, + }); + + assert.equal(result.matches.length, 1); + assert.equal(result.matches[0]?.path, 'src/payment/client.ts'); + assert.equal(result.matches[0]?.match_type, 'literal'); + assert.ok(result.matches[0]?.preview.includes('boom?')); + assert.ok(result.matches[0]?.why_matched.includes('literal')); +}); diff --git a/test/runtime.tasks.test.ts b/test/runtime.tasks.test.ts new file mode 100644 index 0000000..b94822a --- /dev/null +++ b/test/runtime.tasks.test.ts @@ -0,0 +1,88 @@ +import test from 'node:test'; +import assert from 'node:assert/strict'; +import os from 'node:os'; +import path from 'node:path'; +import fs from 'node:fs/promises'; +// eslint-disable-next-line @typescript-eslint/ban-ts-comment +// @ts-ignore dist module has no typings +import { createCodeContextEngine } from '../dist/src/index.js'; + +async function writeFixture(root: string, relPath: string, content: string): Promise { + const absPath = path.join(root, relPath); + await fs.mkdir(path.dirname(absPath), { recursive: true }); + await fs.writeFile(absPath, content, 'utf-8'); +} + +async function createFixtureRepo(): Promise { + const root = await fs.mkdtemp(path.join(os.tmpdir(), 'code-context-tasks-')); + await writeFixture( + root, + 'src/auth/service.ts', + [ + 'export async function authenticateUser(email: string, password: string) {', + " if (!email) throw new Error('missing email');", + " return `${email}:${password}`;", + '}', + '', + ].join('\n'), + ); + await writeFixture( + root, + 'src/auth/controller.ts', + [ + "import { authenticateUser } from './service';", + '', + 'export async function loginController(email: string, password: string) {', + ' return authenticateUser(email, password);', + '}', + '', + ].join('\n'), + ); + await writeFixture( + root, + 'src/auth/service.test.ts', + [ + "import { authenticateUser } from './service';", + '', + "test('authenticateUser returns credentials shape', async () => {", + " const result = await authenticateUser('a@b.com', 'pw');", + " expect(result).toContain('a@b.com');", + '});', + '', + ].join('\n'), + ); + return root; +} + +test('Code Context Engine builds implementation context bundles', async () => { + const repoRoot = await createFixtureRepo(); + const engine = createCodeContextEngine({ repoRoot }); + + const bundle = await engine.tasks.implementationContext({ + task: 'implementation_context', + query: 'authenticateUser', + pathHints: ['src/auth'], + symbolHints: ['authenticateUser'], + }); + + assert.equal(bundle.task, 'implementation_context'); + assert.ok(bundle.summary.includes('authenticateUser')); + assert.ok(bundle.sections.length >= 1); + assert.ok(bundle.sections.some((section: any) => section.evidence.some((match: any) => match.path === 'src/auth/service.ts'))); +}); + +test('Code Context Engine finds related tests for implementation work', async () => { + const repoRoot = await createFixtureRepo(); + const engine = createCodeContextEngine({ repoRoot }); + + const bundle = await engine.tasks.findTests({ + task: 'find_tests', + query: 'authenticateUser', + pathHints: ['src/auth'], + symbolHints: ['authenticateUser'], + }); + + assert.equal(bundle.task, 'find_tests'); + assert.ok(bundle.evidence.length >= 1); + assert.ok(bundle.evidence.some((match: any) => match.path === 'src/auth/service.test.ts')); +});