diff --git a/.changeset/launch-readiness.md b/.changeset/launch-readiness.md new file mode 100644 index 0000000..10734aa --- /dev/null +++ b/.changeset/launch-readiness.md @@ -0,0 +1,12 @@ +--- +"@browseragentprotocol/cli": minor +"@browseragentprotocol/client": minor +"@browseragentprotocol/logger": minor +"@browseragentprotocol/mcp": minor +"@browseragentprotocol/protocol": minor +"@browseragentprotocol/server-playwright": minor +--- + +Harden release readiness for public launch by shipping explicit package licenses +and changelogs in npm tarballs, tightening package metadata, improving CLI +browser messaging, and adding stronger CI and release verification. diff --git a/.changeset/session-persistence.md b/.changeset/session-persistence.md new file mode 100644 index 0000000..6b2959e --- /dev/null +++ b/.changeset/session-persistence.md @@ -0,0 +1,8 @@ +--- +"@browseragentprotocol/protocol": minor +"@browseragentprotocol/server-playwright": minor +"@browseragentprotocol/client": minor +"@browseragentprotocol/cli": minor +--- + +Add server-side session persistence for CLI. Browser pages now survive across CLI invocations via a dormant session store. When a client with a `sessionId` disconnects, the server parks browser state instead of destroying it. On reconnect with the same `sessionId`, state is restored transparently. CLI auto-generates `sessionId` as `cli-` with `-s=` override for multi-session use cases. diff --git a/.changeset/webmcp-discovery.md b/.changeset/webmcp-discovery.md new file mode 100644 index 0000000..a4401ed --- /dev/null +++ b/.changeset/webmcp-discovery.md @@ -0,0 +1,8 @@ +--- +"@browseragentprotocol/protocol": minor +"@browseragentprotocol/server-playwright": minor +"@browseragentprotocol/client": minor +"@browseragentprotocol/mcp": minor +--- + +Add WebMCP tool discovery support via new `discovery/discover` protocol method. Detects tools exposed by websites through the W3C WebMCP standard (declarative HTML attributes and imperative navigator.modelContext API). Also available through `agent/observe` with opt-in `includeWebMCPTools` parameter. diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml new file mode 100644 index 0000000..c83d81b --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -0,0 +1,52 @@ +name: Bug report +description: Report a reproducible bug in BAP CLI, MCP, SDKs, or release tooling. +title: "[Bug]: " +labels: + - bug +body: + - type: markdown + attributes: + value: | + Thanks for taking the time to file this. Please include enough detail for us to reproduce it quickly. + - type: textarea + id: summary + attributes: + label: Summary + description: What happened, and what did you expect instead? + validations: + required: true + - type: dropdown + id: surface + attributes: + label: Surface + options: + - CLI + - MCP + - TypeScript SDK + - Python SDK + - Server (Playwright) + - Docs / Examples + - Release / Packaging + validations: + required: true + - type: textarea + id: reproduction + attributes: + label: Reproduction + description: Paste commands, prompts, code, URLs, or test steps. + render: bash + validations: + required: true + - type: textarea + id: logs + attributes: + label: Logs and screenshots + description: Include stack traces, screenshots, or terminal output if available. + - type: input + id: environment + attributes: + label: Environment + description: OS, Node version, Python version, package version, and browser if relevant. + placeholder: "macOS 15, Node 22, Python 3.12, @browseragentprotocol/cli 0.3.0, Chrome" + validations: + required: true diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 0000000..1bf2b4b --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,8 @@ +blank_issues_enabled: false +contact_links: + - name: Security policy + url: https://github.com/browseragentprotocol/bap/blob/main/SECURITY.md + about: Please report vulnerabilities using the security process instead of filing a public issue. + - name: Contributing guide + url: https://github.com/browseragentprotocol/bap/blob/main/CONTRIBUTING.md + about: Check the contributing guide before opening a maintenance or development workflow question. diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml new file mode 100644 index 0000000..77f1df0 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.yml @@ -0,0 +1,40 @@ +name: Feature request +description: Suggest a product, DX, or ecosystem improvement for BAP. +title: "[Feature]: " +labels: + - enhancement +body: + - type: textarea + id: problem + attributes: + label: Problem + description: What user pain, limitation, or opportunity are you trying to address? + validations: + required: true + - type: textarea + id: proposal + attributes: + label: Proposal + description: Describe the ideal behavior, command, API, or doc improvement. + validations: + required: true + - type: textarea + id: alternatives + attributes: + label: Alternatives considered + description: What workarounds or competing approaches have you tried? + - type: dropdown + id: area + attributes: + label: Area + options: + - Browser automation behavior + - CLI ergonomics + - MCP integrations + - TypeScript SDK + - Python SDK + - Security / auth + - Docs / demos + - Release / packaging + validations: + required: true diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000..09a436d --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,21 @@ +## Summary + +Describe the user-facing or maintainer-facing change. + +## Testing + +- [ ] `pnpm build` +- [ ] `pnpm typecheck` +- [ ] `pnpm lint` +- [ ] `pnpm test` +- [ ] `pnpm check:artifacts` +- [ ] Python SDK checks, if touched + +## Release Notes + +- [ ] Added or updated a changeset when a published package changes +- [ ] Updated docs, demos, or screenshots if the UX changed + +## Risks + +Call out any rollout concerns, backwards-compatibility notes, or follow-up work. diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..7565c7c --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,17 @@ +version: 2 +updates: + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "weekly" + commit-message: + prefix: "chore" + include: "scope" + - package-ecosystem: "npm" + directory: "/" + schedule: + interval: "weekly" + commit-message: + prefix: "chore" + include: "scope" + open-pull-requests-limit: 10 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5d7b016..b43a236 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -5,26 +5,31 @@ on: branches: [main] pull_request: branches: [main] + workflow_dispatch: concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true jobs: - build: + node-quality: runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + node-version: ["20", "22"] steps: - uses: actions/checkout@v4 - name: Setup pnpm - uses: pnpm/action-setup@v2 + uses: pnpm/action-setup@v4 with: version: 9.15.0 - - name: Setup Node.js + - name: Setup Node.js ${{ matrix.node-version }} uses: actions/setup-node@v4 with: - node-version: "22" + node-version: ${{ matrix.node-version }} cache: "pnpm" - name: Install dependencies @@ -39,25 +44,50 @@ jobs: - name: Lint run: pnpm lint - test: + - name: Test + run: pnpm test + + package-artifacts: runs-on: ubuntu-latest - needs: build + needs: node-quality + steps: + - uses: actions/checkout@v4 + + - name: Setup pnpm + uses: pnpm/action-setup@v4 + with: + version: 9.15.0 + + - name: Setup Node.js + uses: actions/setup-node@v4 + with: + node-version: "22" + cache: "pnpm" + + - name: Install dependencies + run: pnpm install --frozen-lockfile + + - name: Test + run: pnpm build && pnpm check:artifacts + + smoke-cross-platform: + runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: - node-version: ["20", "22"] + os: [ubuntu-latest, macos-latest, windows-latest] steps: - uses: actions/checkout@v4 - name: Setup pnpm - uses: pnpm/action-setup@v2 + uses: pnpm/action-setup@v4 with: version: 9.15.0 - - name: Setup Node.js ${{ matrix.node-version }} + - name: Setup Node.js uses: actions/setup-node@v4 with: - node-version: ${{ matrix.node-version }} + node-version: "22" cache: "pnpm" - name: Install dependencies @@ -66,17 +96,51 @@ jobs: - name: Build run: pnpm build - - name: Test - run: pnpm test + - name: Smoke test CLI entrypoints + run: | + node packages/cli/dist/cli.js --help + node packages/mcp/dist/cli.js --help + + python-sdk: + runs-on: ubuntu-latest + needs: package-artifacts + steps: + - uses: actions/checkout@v4 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Install Python tooling + working-directory: packages/python-sdk + run: | + python -m pip install --upgrade pip + python -m pip install -e .[dev] build twine + + - name: Test Python SDK + working-directory: packages/python-sdk + run: python -m pytest + + - name: Build Python SDK + working-directory: packages/python-sdk + run: python -m build + + - name: Validate Python distributions + working-directory: packages/python-sdk + run: | + twine check dist/* + python -m pip install --force-reinstall dist/*.whl + python -c "import browseragentprotocol; print(browseragentprotocol.__version__)" coverage: runs-on: ubuntu-latest - needs: build + needs: node-quality steps: - uses: actions/checkout@v4 - name: Setup pnpm - uses: pnpm/action-setup@v2 + uses: pnpm/action-setup@v4 with: version: 9.15.0 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 27dcaf0..51a3d65 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -3,11 +3,13 @@ name: Release on: push: branches: [main] + workflow_dispatch: concurrency: ${{ github.workflow }}-${{ github.ref }} jobs: release: + if: github.ref == 'refs/heads/main' runs-on: ubuntu-latest environment: pypi permissions: @@ -18,7 +20,7 @@ jobs: - uses: actions/checkout@v4 - name: Setup pnpm - uses: pnpm/action-setup@v2 + uses: pnpm/action-setup@v4 with: version: 9.15.0 @@ -32,8 +34,8 @@ jobs: - name: Install dependencies run: pnpm install --frozen-lockfile - - name: Build - run: pnpm build + - name: Verify release candidates + run: pnpm release:verify - name: Create Release Pull Request or Publish id: changesets @@ -43,10 +45,18 @@ jobs: version: pnpm version-packages title: "chore(release): version packages" commit: "chore(release): version packages" + createGithubReleases: true env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} NPM_TOKEN: ${{ secrets.NPM_TOKEN }} NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} + NPM_CONFIG_PROVENANCE: "true" + + - name: Verify npm publication + if: steps.changesets.outputs.published == 'true' + env: + PUBLISHED_NPM_PACKAGES: ${{ steps.changesets.outputs.publishedPackages }} + run: node ./scripts/verify-published-releases.mjs npm # PyPI Publishing - runs after npm packages are published - name: Setup Python @@ -57,15 +67,25 @@ jobs: - name: Install Python build tools if: steps.changesets.outputs.published == 'true' - run: pip install build + working-directory: packages/python-sdk + run: | + python -m pip install --upgrade pip + python -m pip install -e .[dev] build twine - name: Build Python package if: steps.changesets.outputs.published == 'true' working-directory: packages/python-sdk - run: python -m build + run: | + python -m pytest + python -m build + twine check dist/* - name: Publish to PyPI if: steps.changesets.outputs.published == 'true' uses: pypa/gh-action-pypi-publish@release/v1 with: packages-dir: packages/python-sdk/dist/ + + - name: Verify PyPI publication + if: steps.changesets.outputs.published == 'true' + run: node ./scripts/verify-published-releases.mjs pypi diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..84dfc09 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,53 @@ +# Code of Conduct + +## Our Commitment + +We are committed to making participation in the Browser Agent Protocol +community a respectful, harassment-free experience for everyone, regardless of +age, body size, disability, ethnicity, gender identity and expression, level +of experience, education, socio-economic status, nationality, personal +appearance, race, religion, or sexual identity and orientation. + +We pledge to act and interact in ways that contribute to an open, welcoming, +diverse, inclusive, and healthy community. + +## Expected Behavior + +Examples of behavior that contributes to a positive environment include: + +- Being respectful of differing viewpoints and experiences +- Giving and receiving constructive feedback gracefully +- Assuming good intent while staying clear and honest +- Focusing on what is best for the community and users +- Showing empathy toward other contributors + +Examples of unacceptable behavior include: + +- Harassment, discrimination, or demeaning conduct +- Trolling, insulting, or derogatory comments +- Public or private intimidation +- Publishing someone else's private information without permission +- Any behavior that would be inappropriate in a professional setting + +## Enforcement Responsibilities + +Project maintainers are responsible for clarifying and enforcing our standards +of acceptable behavior and may take appropriate and fair corrective action in +response to any behavior they deem inappropriate, threatening, offensive, or +harmful. + +## Scope + +This Code of Conduct applies within all project spaces and in public spaces +when an individual is officially representing the project or its community. + +## Reporting + +If you experience or witness unacceptable behavior, please report it through +the contact details in [SECURITY.md](./SECURITY.md) or by opening a private +maintainer outreach request if a public issue is not appropriate. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant](https://www.contributor-covenant.org/), +version 2.1. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index db0dcff..91be5b1 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -7,7 +7,8 @@ Thank you for your interest in contributing to BAP. This document covers the pro ### Prerequisites - Node.js >= 20.0.0 -- pnpm 9.x (`corepack enable && corepack prepare pnpm@9.15.0 --activate`) +- pnpm 9.x (`corepack enable && corepack prepare pnpm@9.15.0 --activate`, or use `npx pnpm`) +- Python >= 3.10 if you are touching `packages/python-sdk` - Git ### Setup @@ -25,6 +26,7 @@ pnpm build pnpm typecheck # Type checking across all packages pnpm lint # ESLint pnpm test # Vitest test suites +pnpm check:artifacts # Verify npm package contents and Python version alignment ``` ## Repository Structure @@ -66,6 +68,7 @@ pnpm build # Build all packages (respects dependency order) pnpm typecheck # Must pass with zero errors pnpm lint # Must pass with zero errors (warnings are acceptable) pnpm test # All tests must pass +pnpm check:artifacts # Release artifact sanity checks ``` ### 4. Submit a pull request @@ -98,12 +101,23 @@ pnpm --filter @browseragentprotocol/protocol test pnpm test:coverage ``` +For Python SDK changes, also run: + +```bash +cd packages/python-sdk +python -m pip install -e .[dev] +python -m pytest +python -m build +twine check dist/* +``` + ### Test guidelines - Schema validation tests go in `packages/protocol/src/__tests__/` - CLI flag/command tests go in `packages/cli/__tests__/` - MCP tool tests go in `packages/mcp/src/__tests__/` - Integration tests that require a browser go in `packages/server-playwright/src/__tests__/` +- Python SDK tests go in `packages/python-sdk/tests/` ## Protocol Changes diff --git a/README.md b/README.md index 9a9902a..8ec7fa7 100644 --- a/README.md +++ b/README.md @@ -1,140 +1,90 @@ # Browser Agent Protocol (BAP) -[![npm version](https://badge.fury.io/js/@browseragentprotocol%2Fcli.svg)](https://www.npmjs.com/package/@browseragentprotocol/cli) -[![npm version](https://badge.fury.io/js/@browseragentprotocol%2Fmcp.svg)](https://www.npmjs.com/package/@browseragentprotocol/mcp) +[![CI](https://github.com/browseragentprotocol/bap/actions/workflows/ci.yml/badge.svg?branch=main)](https://github.com/browseragentprotocol/bap/actions/workflows/ci.yml) +[![npm: CLI](https://img.shields.io/npm/v/@browseragentprotocol/cli)](https://www.npmjs.com/package/@browseragentprotocol/cli) +[![npm: MCP](https://img.shields.io/npm/v/@browseragentprotocol/mcp)](https://www.npmjs.com/package/@browseragentprotocol/mcp) +[![PyPI](https://img.shields.io/pypi/v/browser-agent-protocol)](https://pypi.org/project/browser-agent-protocol/) [![License: Apache-2.0](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) -An open standard for AI agents to interact with web browsers. Two interfaces: **CLI** for shell-based agents, **MCP** for protocol-native agents. +Semantic browser control for AI agents. -## Claude Code Plugin - -Install BAP as a Claude Code plugin for browser automation: - -``` -/install-plugin https://github.com/browseragentprotocol/bap -``` - -**23 MCP tools** across five categories: - -| Category | Tools | -|----------|-------| -| Navigation | `navigate`, `go_back`, `go_forward`, `reload` | -| Interaction | `click`, `fill`, `type`, `press`, `hover`, `scroll`, `select` | -| Composite | `observe` (see the page), `act` (batch multi-step actions), `extract` (structured JSON) | -| Inspection | `screenshot`, `aria_snapshot`, `accessibility`, `content`, `element` | -| Tabs | `pages`, `activate_page`, `close_page` | - -**Example prompts:** - -- "Go to Hacker News and summarize the top 5 stories" -- "Fill out the login form at example.com" -- "Take a screenshot of the pricing page" -- "Extract all product names and prices from this page as JSON" - -No API key needed — BAP runs a local Playwright browser via `npx`. - ---- +BAP gives you a production-minded browser stack for agents: a shell CLI, an +MCP server, a TypeScript SDK, and a Python SDK on top of a Playwright-backed +runtime. It is built for semantic selectors, composite actions, structured +extraction, persistent browser sessions, and fewer agent roundtrips. ```bash -# CLI — any agent that can run shell commands -npx @browseragentprotocol/cli open https://example.com -npx @browseragentprotocol/cli act 'click:text:"More information..."' snapshot +# Primary path: CLI + SKILL.md for coding agents +npm i -g @browseragentprotocol/cli +bap install-skill +bap open https://news.ycombinator.com +bap observe --max=12 +bap act click:role:link:"new" --observe -# MCP — agents with native Model Context Protocol support +# Secondary path: MCP for protocol-native agents npx @browseragentprotocol/mcp ``` -

- BAP Architecture -

+## Why BAP -## Why BAP? +- **Semantic selectors:** use `role:button:"Submit"` and `label:"Email"` instead of brittle CSS. +- **Composite actions:** batch multi-step flows into a single `act` call or command. +- **AI-friendly observation:** accessibility-first snapshots, stable refs, and structured extraction. +- **CLI-first for coding agents:** the default path is shell commands plus `SKILL.md`, not a giant tool list. +- **Persistent real-browser workflow:** BAP CLI prefers installed Chrome, starts headful by default, keeps a long-lived daemon, and reuses browser state across commands. +- **Multiple surfaces:** ship the same mental model through CLI, MCP, TypeScript, and Python. +- **Measured efficiency:** verified benchmark docs show up to 27% fewer tool calls than Playwright MCP in standard flows and up to 55% fewer with fused operations. See [docs/browser-tools-guide.md](./docs/browser-tools-guide.md). -- **Composite Actions**: Execute multi-step flows in one command — 40x fewer tokens than one-action-at-a-time -- **Fused Operations**: Combine navigate+observe, act+observe into single server calls — 50-85% fewer roundtrips -- **Semantic Selectors**: Target elements by purpose (`role:button:"Submit"`) not position — survives redesigns -- **Structured Extraction**: Extract validated JSON from any page with a schema -- **Two Interfaces**: CLI (`bap act`) for shell-based agents, MCP tools for protocol-native agents -- **Accessibility-First**: Built on accessibility tree inspection, designed for AI comprehension -- **Element References**: Stable refs (`@e1`, `e15`) that persist across observations -- **Screenshot Annotation**: Set-of-Marks overlays with numbered badges for vision models +## Recommended Adoption Order + +| Surface | Package | Best for | +| --- | --- | --- | +| CLI + SKILL.md | [`@browseragentprotocol/cli`](./packages/cli) | the main path for coding agents that can run shell commands | +| MCP | [`@browseragentprotocol/mcp`](./packages/mcp) | the second option for protocol-native clients that prefer MCP tools | +| TypeScript SDK | [`@browseragentprotocol/client`](./packages/client) | custom agent backends and app integrations | +| Playwright server | [`@browseragentprotocol/server-playwright`](./packages/server-playwright) | running the browser runtime directly | +| Protocol types | [`@browseragentprotocol/protocol`](./packages/protocol) | shared schemas, selectors, and errors | +| Python SDK | [`browser-agent-protocol`](./packages/python-sdk) | Python agents, notebooks, backend jobs | ## Quick Start -### CLI — For AI Agents That Run Shell Commands +### CLI ```bash -# Open a page and observe interactive elements +npm i -g @browseragentprotocol/cli +bap install-skill bap open https://example.com bap observe --max=20 - -# Login flow in ONE command (vs 3+ separate commands) bap act fill:role:textbox:"Email"="user@example.com" \ fill:role:textbox:"Password"="secret" \ click:role:button:"Sign in" - -# Extract structured data -bap extract --fields="title,price,rating" - -# Use semantic selectors -bap click role:button:"Get Started" -bap fill label:"Email" "user@example.com" -``` - -Install globally or use via npx: - -```bash -npm i -g @browseragentprotocol/cli -# or -npx @browseragentprotocol/cli ``` -See the full [CLI documentation](./packages/cli) for all 26 commands, selector reference, and recipes. - -### MCP — For Protocol-Native Agents +This is the recommended production setup for most users: the CLI does the +browser work, and `bap install-skill` installs the BAP `SKILL.md` guidance so +coding agents use better defaults for observation, selectors, and composite +actions. -``` -navigate({ url: "https://example.com/login" }) -observe({ includeScreenshot: true }) -act({ - steps: [ - { action: "action/fill", selector: "@e1", value: "user@example.com" }, - { action: "action/fill", selector: "@e2", value: "password123" }, - { action: "action/click", selector: "role:button:Sign in" } - ] -}) -``` +By default, BAP CLI prefers installed Chrome, runs headful, and reuses a +persistent session so agents stay close to a normal user browser workflow +instead of spinning up a fresh browser on every command. Use `--headless` for +CI or background automation. If you need a dedicated automation profile, use +`--profile ` or `--no-profile`. Chrome can restrict direct automation of a +live default profile, so a dedicated profile directory is the most reliable +production setup. -See the [MCP documentation](./packages/mcp) for tool reference and configuration. +### MCP -## Integrations +Use MCP when your agent platform prefers native tool transport over shell +commands. -### Claude Code +Run standalone: -**CLI** (install skill for optimal usage): ```bash -npm i -g @browseragentprotocol/cli -bap install-skill +npx -y @browseragentprotocol/mcp ``` -**MCP server** (one command): -```bash -claude mcp add --transport stdio bap-browser -- npx -y @browseragentprotocol/mcp -``` - -**Plugin** (includes SKILL.md for smarter tool usage): -```bash -claude plugin add --from https://github.com/browseragentprotocol/bap -``` - -

- BAP in Claude Code
- Claude Code browsing Hacker News with BAP -

- -### Claude Desktop - -Add to `~/Library/Application Support/Claude/claude_desktop_config.json` (macOS) or `%APPDATA%\Claude\claude_desktop_config.json` (Windows): +Or add it to any MCP-compatible client: ```json { @@ -147,242 +97,114 @@ Add to `~/Library/Application Support/Claude/claude_desktop_config.json` (macOS) } ``` -Restart Claude Desktop after saving. - -

- BAP in Claude Desktop
- Claude Desktop browsing Hacker News with BAP -

- -### Codex CLI - -**CLI**: -```bash -npm i -g @browseragentprotocol/cli -bap install-skill -``` +### SDKs -**MCP**: ```bash -codex mcp add bap-browser -- npx -y @browseragentprotocol/mcp -``` - -Or add to `~/.codex/config.toml`: - -```toml -[mcp_servers.bap-browser] -command = "npx" -args = ["-y", "@browseragentprotocol/mcp"] +npm install @browseragentprotocol/client +pip install browser-agent-protocol ``` -

- BAP in Codex CLI
- Codex CLI browsing Hacker News with BAP -

- -### Codex Desktop - -Add to `~/.codex/config.toml`: +```ts +import { BAPClient, role } from "@browseragentprotocol/client"; -```toml -[mcp_servers.bap-browser] -command = "npx" -args = ["-y", "@browseragentprotocol/mcp"] +const client = new BAPClient("ws://localhost:9222"); +await client.connect(); +await client.launch({ browser: "chromium", headless: true }); +await client.createPage({ url: "https://example.com" }); +await client.click(role("button", "Submit")); +await client.close(); ``` -

- BAP in Codex Desktop
- Codex Desktop browsing Hacker News with BAP -

- -### Gemini CLI - -**CLI**: -```bash -npm i -g @browseragentprotocol/cli -bap install-skill -``` +```python +import asyncio +from browseragentprotocol import BAPClient, role -**MCP** — add to `~/.gemini/settings.json`: +async def main() -> None: + async with BAPClient("ws://localhost:9222") as client: + await client.launch(browser="chromium", headless=True) + await client.create_page(url="https://example.com") + await client.click(role("button", "Submit")) -```json -{ - "mcpServers": { - "bap-browser": { - "command": "npx", - "args": ["-y", "@browseragentprotocol/mcp"] - } - } -} +asyncio.run(main()) ``` -### Manus - -Manus supports MCP servers via its web UI (HTTP transport only): - -1. Go to **Settings > Integrations > Custom MCP Servers** -2. Click **Add Server** -3. Set transport to **HTTP** and provide your hosted BAP MCP endpoint URL -4. Save and verify connection +## Demo -> **Note:** Manus requires HTTP/SSE transport. To use BAP with Manus, deploy the MCP server as an HTTP endpoint using a stdio-to-HTTP bridge like [mcp-remote](https://www.npmjs.com/package/mcp-remote), then register the URL in the Manus UI. +Use the repo-local Hacker News demo for launch videos, smoke tests, and README +walkthroughs: -### Other Agents +- [Examples index](./examples/README.md) +- [Hacker News CLI demo](./examples/hacker-news-cli/README.md) -BAP CLI includes a built-in skill installer that supports 13 AI coding agent platforms: +Quick run: ```bash -bap install-skill # Auto-detect and install to all agents -bap install-skill --dry-run # Preview what would be installed +npx pnpm install +npx pnpm build +./examples/hacker-news-cli/run-cli-demo.sh ``` -Supported: Claude Code, Codex CLI, Gemini CLI, Cursor, GitHub Copilot, Windsurf, Roo Code, Amp, Deep Agents, OpenCode, and more. +This writes an observation, screenshot, and accessibility snapshot to +`.bap/demo/hacker-news/`. -### Browser Selection +If you are on a fresh machine without Playwright browsers yet, install Chromium +once with `npx playwright install chromium`. -By default, BAP uses your locally installed Chrome. Switch browsers with: - -```bash -# CLI -bap config browser firefox +## Integrations -# MCP — pass via args -npx @browseragentprotocol/mcp --browser firefox -``` +- `bap install-skill` is the main recommended setup and installs or updates BAP guidance for 13 AI coding-agent surfaces. +- The MCP package is the second recommended setup for Claude Code, Claude Desktop, Codex, Gemini CLI, and other MCP clients. +- BAP can discover WebMCP tools when websites expose them, then fall back to browser automation when they do not. See [docs/webmcp-comparison.md](./docs/webmcp-comparison.md). -| Value | Browser | Notes | -|---|---|---| -| `chrome` (default) | Local Chrome | Falls back to bundled Chromium if not installed | -| `chromium` | Bundled Chromium | Playwright's built-in Chromium | -| `firefox` | Firefox | Requires local Firefox | -| `webkit` | WebKit | Playwright's WebKit engine | -| `edge` | Microsoft Edge | Requires local Edge | +## Positioning -## Packages +- **Against Playwright CLI:** BAP CLI is optimized for coding agents, not just human shell users. `bap act`, `bap observe`, `bap extract`, and response tiers reduce tool chatter and keep prompts smaller. +- **Against Playwright MCP:** BAP favors fewer roundtrips and better agent ergonomics over raw per-call latency. When an agent can run shell commands, `CLI + SKILL.md` is the primary recommendation. +- **Against Chrome DevTools:** DevTools/CDP is the low-level browser control plane. BAP is the agent layer on top: semantic selectors, structured extraction, fused operations, and shared browser state. -### TypeScript +## Production Readiness -| Package | Description | npm | -|---------|-------------|-----| -| [`@browseragentprotocol/cli`](./packages/cli) | CLI for shell-based AI agents | [![npm](https://img.shields.io/npm/v/@browseragentprotocol/cli)](https://www.npmjs.com/package/@browseragentprotocol/cli) | -| [`@browseragentprotocol/mcp`](./packages/mcp) | MCP integration for protocol-native agents | [![npm](https://img.shields.io/npm/v/@browseragentprotocol/mcp)](https://www.npmjs.com/package/@browseragentprotocol/mcp) | -| [`@browseragentprotocol/client`](./packages/client) | TypeScript client SDK | [![npm](https://img.shields.io/npm/v/@browseragentprotocol/client)](https://www.npmjs.com/package/@browseragentprotocol/client) | -| [`@browseragentprotocol/server-playwright`](./packages/server-playwright) | Server implementation using Playwright | [![npm](https://img.shields.io/npm/v/@browseragentprotocol/server-playwright)](https://www.npmjs.com/package/@browseragentprotocol/server-playwright) | -| [`@browseragentprotocol/protocol`](./packages/protocol) | Protocol types, schemas, and utilities | [![npm](https://img.shields.io/npm/v/@browseragentprotocol/protocol)](https://www.npmjs.com/package/@browseragentprotocol/protocol) | -| [`@browseragentprotocol/logger`](./packages/logger) | Pretty logging utilities | [![npm](https://img.shields.io/npm/v/@browseragentprotocol/logger)](https://www.npmjs.com/package/@browseragentprotocol/logger) | +- **Automated CI:** Node 20/22 validation, cross-platform CLI/MCP smoke tests, npm tarball auditing, and Python build validation in GitHub Actions. +- **Automated releases:** Changesets release PRs, GitHub Releases, npm publication with provenance enabled, and PyPI publication with registry verification. +- **Artifact hygiene:** published packages now ship `README.md`, `CHANGELOG.md`, and `LICENSE`. +- **Security docs:** responsible disclosure process is documented in [SECURITY.md](./SECURITY.md). +- **Public repo hygiene:** issue templates, pull request template, Dependabot, contributing guide, and code of conduct are included. -### Python +## Docs -| Package | Description | PyPI | -|---------|-------------|------| -| [`browser-agent-protocol`](./packages/python-sdk) | Python SDK with async/sync APIs | [![PyPI](https://img.shields.io/pypi/v/browser-agent-protocol)](https://pypi.org/project/browser-agent-protocol/) | +- [CLI documentation](./packages/cli/README.md) +- [MCP documentation](./packages/mcp/README.md) +- [TypeScript SDK documentation](./packages/client/README.md) +- [Python SDK documentation](./packages/python-sdk/README.md) +- [Browser automation decision guide](./docs/browser-tools-guide.md) +- [BAP and WebMCP comparison](./docs/webmcp-comparison.md) +- [Release automation guide](./docs/releasing.md) -## Architecture - -``` -AI Agent (shell) AI Agent (MCP-native) - │ │ - ▼ ▼ -@browseragentprotocol/cli @browseragentprotocol/mcp - │ │ - ▼ ▼ -@browseragentprotocol/client ───────┘ - │ - ▼ WebSocket (JSON-RPC 2.0) -@browseragentprotocol/server-playwright - │ - ▼ Playwright -Browser (Chromium / Firefox / WebKit) -``` - -The CLI spawns the server as a background daemon that persists across commands. The MCP bridge runs as a stdio process managed by the host agent. - -## Using the SDKs +## Monorepo Packages ### TypeScript -```typescript -import { BAPClient, role } from "@browseragentprotocol/client"; - -const client = new BAPClient("ws://localhost:9222"); -await client.connect(); - -await client.launch({ browser: "chromium", headless: false }); -await client.createPage({ url: "https://example.com" }); - -// Semantic selectors -await client.click(role("button", "Submit")); -await client.fill(role("textbox", "Email"), "user@example.com"); - -// Composite actions -const result = await client.act({ - steps: [ - { action: "action/fill", params: { selector: label("Email"), value: "user@example.com" } }, - { action: "action/fill", params: { selector: label("Password"), value: "secret123" } }, - { action: "action/click", params: { selector: role("button", "Sign In") } }, - ], -}); - -await client.close(); -``` +| Package | Description | +| --- | --- | +| [`@browseragentprotocol/cli`](./packages/cli) | CLI for shell-based AI agents | +| [`@browseragentprotocol/mcp`](./packages/mcp) | MCP server for protocol-native agents | +| [`@browseragentprotocol/client`](./packages/client) | TypeScript client SDK | +| [`@browseragentprotocol/server-playwright`](./packages/server-playwright) | Playwright-backed BAP server | +| [`@browseragentprotocol/protocol`](./packages/protocol) | Protocol types, schemas, selectors, errors | +| [`@browseragentprotocol/logger`](./packages/logger) | Shared logger utilities | ### Python -```bash -pip install browser-agent-protocol -``` - -```python -import asyncio -from browseragentprotocol import BAPClient, role, label - -async def main(): - async with BAPClient("ws://localhost:9222") as client: - await client.launch(browser="chromium", headless=False) - await client.create_page(url="https://example.com") - - await client.click(role("button", "Submit")) - await client.fill(label("Email"), "user@example.com") - -asyncio.run(main()) -``` - -## Selectors - -BAP uses semantic selectors that survive DOM changes: - -| Selector | Example | Priority | -|----------|---------|----------| -| `role::""` | `role:button:"Submit"` | Best — ARIA role + accessible name | -| `text:""` | `text:"Sign in"` | Visible text content | -| `label:""` | `label:"Email"` | Form label association | -| `placeholder:""` | `placeholder:"Search..."` | Input placeholder | -| `testid:""` | `testid:"submit-btn"` | data-testid attribute | -| `e` / `@ref` | `e15`, `@e1` | From snapshot/observe (positional) | -| `css:` | `css:.btn-primary` | Last resort — fragile | - -## Development - -```bash -git clone https://github.com/browseragentprotocol/bap.git -cd bap -pnpm install -pnpm build -pnpm typecheck -pnpm lint -pnpm test -``` +| Package | Description | +| --- | --- | +| [`browser-agent-protocol`](./packages/python-sdk) | Async and sync Python SDK for BAP | ## Contributing -We welcome contributions! Please open an issue or submit a pull request on GitHub. +Start with [CONTRIBUTING.md](./CONTRIBUTING.md). For community expectations, see +[CODE_OF_CONDUCT.md](./CODE_OF_CONDUCT.md). ## License -Apache License 2.0 - see the [LICENSE](LICENSE) file for details. - -## Links - -- [GitHub Repository](https://github.com/browseragentprotocol/bap) -- [npm Organization](https://www.npmjs.com/org/browseragentprotocol) -- [Issue Tracker](https://github.com/browseragentprotocol/bap/issues) +Code in this repo is Apache-2.0. Some bundled assets and documentation have +additional notices described in [LICENSE](./LICENSE). diff --git a/TODO-ADOPTION.md b/TODO-ADOPTION.md new file mode 100644 index 0000000..541b785 --- /dev/null +++ b/TODO-ADOPTION.md @@ -0,0 +1,160 @@ +# BAP Adoption Plan + +**Status:** 3 GitHub stars | **Goal:** 100+ stars, real users, ecosystem traction +**Last updated:** 2026-02-20 + +--- + +## Phase 1: Fix the Basics (Week 1) + +### 1.1 README Rewrite +- [ ] Lead with a 30-second GIF/video, not architecture diagrams +- [ ] First section: "Why BAP?" with 3 bullet points (40x fewer tokens, semantic selectors, one-command install) +- [ ] Second section: "Quick Start" — 3 lines max to get running +- [ ] Third section: benchmark comparison table (BAP vs Playwright MCP vs screenshot agents) +- [ ] Push architecture, protocol spec, and roadmap below the fold +- [ ] Add badges: npm version, GitHub stars, license, Discord (when ready) + +### 1.2 One-Command Experience +- [ ] Verify `npx @anthropic/bap` (or equivalent) works end-to-end with zero config +- [ ] Verify `bap install-skill` works for Claude Code, Codex, Gemini CLI (top 3 platforms) +- [ ] Add `npx bap-mcp` as standalone entry point for MCP users +- [ ] Test the cold-start experience on a clean machine — time it, fix any friction + +### 1.3 Plugin Marketplace +- [ ] Follow up on claude-plugins-official submission (submitted 2026-02-20) +- [ ] Once approved: add "Available on Claude Code Plugin Marketplace" badge to README + +--- + +## Phase 2: Demo Content (Week 2-3) + +### 2.1 Terminal Demo Video (Highest Leverage) +- [ ] Record with asciinema or screen capture (NOT Sora/AI-generated) +- [ ] Script the demo as a side-by-side comparison: + ``` + LEFT: Playwright MCP — 10+ messages to fill a form and extract data + RIGHT: BAP — 1 composite action, same result + End card: "3 actions, 1 roundtrip, 40x fewer tokens" + ``` +- [ ] Keep it under 90 seconds +- [ ] Post to: X/Twitter, r/ClaudeAI, r/LocalLLaMA, LinkedIn + +### 2.2 Full Walkthrough Video (Screen Recording) +- [ ] Record a real local session: install BAP, connect to Claude Code, do a real task +- [ ] Show the MCP tool calls in real-time (Claude Code's tool use UI) +- [ ] 3-5 minutes, narrated or with text overlays +- [ ] Post to: YouTube, embed in README + +### 2.3 Benchmark Content +- [ ] Use the existing `benchmarks/` repo to generate real numbers +- [ ] Key metrics to highlight: + - Token cost per task (BAP vs Playwright MCP vs Computer Use) + - Roundtrip count per task + - Task completion time + - Success rate on WebVoyager-style benchmarks +- [x] Create a shareable benchmark table/graphic — see `docs/browser-tools-guide.md` (Benchmark Results section) +- [ ] Write a blog post: "We Measured the Token Cost of Browser Agents" + +--- + +## Phase 3: Distribution (Week 3-4) + +### 3.1 Go Where the Users Are +- [ ] **Hacker News:** Post the benchmark blog post as a Show HN +- [ ] **r/ClaudeAI:** Post demo video + "I built an alternative to Playwright MCP" +- [ ] **r/LocalLLaMA:** Position as "works with any agent, not just Claude" +- [ ] **X/Twitter:** Thread format — problem → demo → benchmarks → link +- [ ] **LinkedIn:** More polished version of the X thread +- [ ] **Discord servers:** Claude Code community, AI agents communities + +### 3.2 Integration Partnerships +- [ ] Open PR/issue on popular agent frameworks to add BAP as a browser backend: + - [ ] LangChain / LangGraph + - [ ] CrewAI + - [ ] AutoGen + - [ ] Pydantic AI +- [ ] Write integration guides: "Use BAP with [framework]" — one page each +- [ ] Reach out to agent framework maintainers directly + +### 3.3 Claude Code Ecosystem +- [ ] Plugin marketplace listing (pending) +- [ ] Write a SKILL.md tutorial: "How to write browser automation skills with BAP" +- [ ] Cross-promote from skill-tools and skills.menu + +--- + +## Phase 4: Community (Month 2+) + +### 4.1 Developer Experience +- [ ] Set up Discord or GitHub Discussions for community +- [ ] Add "Examples" directory with 5-10 real-world scripts: + - [ ] Form filling (login flow) + - [ ] Data extraction (scrape a table) + - [ ] Multi-page navigation (e-commerce checkout) + - [ ] Screenshot monitoring (visual regression) + - [ ] PDF generation +- [ ] Improve error messages — every error should suggest a fix +- [ ] Add `bap doctor` command that diagnoses common setup issues + +### 4.2 Documentation Site +- [ ] Stand up a docs site (can reuse skills.menu infra or Astro Starlight) +- [ ] Pages: Getting Started, Selectors Guide, MCP Integration, CLI Reference, Python SDK, Benchmarks +- [ ] Include interactive "Try It" playground if feasible + +### 4.3 Thought Leadership +- [ ] Write "Why Semantic Selectors Beat CSS for AI Agents" (dev.to / blog) +- [ ] Write "The Browser Agent Protocol: An Open Standard" (position piece) +- [ ] Give a talk at a local meetup or AI conference + +--- + +## Messaging Guide + +### One-Liner +> Fast, semantic browser control for AI agents. 40x fewer tokens than screenshot-based approaches. + +### Elevator Pitch +> AI agents waste massive tokens on browser tasks — screenshot agents send 50KB images every step, DOM tools need 10+ roundtrips for simple forms. BAP uses the accessibility tree (what screen readers use) as a semantic interface, batches actions into single roundtrips, and works with any agent via MCP or WebSocket. One composite action replaces 10 messages. And when sites expose WebMCP tools, BAP discovers and surfaces them automatically — no agent changes needed. + +### Differentiators (vs. competitors) +| Them | BAP | +|------|-----| +| Screenshots → pixel coordinates | Accessibility tree → semantic selectors | +| One action per roundtrip | Composite actions (batch N steps in 1 call) | +| CSS selectors that break on redesigns | `role:button:"Submit"` that survives redesigns | +| Tied to one agent platform | Vendor-neutral: MCP + WebSocket + CLI | +| No security model | Scope-based auth, domain filtering, credential redaction | +| No WebMCP support | Auto-discovers WebMCP tools, falls back to automation | + +### Target Audiences (in priority order) +1. **Claude Code users** — already using MCP, BAP is a drop-in upgrade over Playwright MCP +2. **AI agent developers (Python)** — building with LangChain, CrewAI, AutoGen; need browser access +3. **AI agent developers (TS)** — building custom agents; need efficient browser protocol +4. **DevTools/testing teams** — interested in semantic selectors for more resilient automation + +--- + +## Anti-Patterns to Avoid + +- Don't lead with "protocol" or "standard" — developers adopt tools, not specs +- Don't compare to Playwright directly — Playwright is the engine, BAP is the AI-optimized layer on top +- Don't over-emphasize security features initially — it's important but not what drives first adoption +- Don't make the README longer — make it shorter with better content +- Don't pay for promotion — organic developer content wins long-term +- See `docs/browser-tools-guide.md` for the approved comparison framing — decision guide, not head-to-head battle + +--- + +## Success Metrics + +| Milestone | Target | How to Measure | +|-----------|--------|---------------| +| README converts | >5% visitor → star rate | GitHub traffic analytics | +| Demo video | >1K views in first week | Platform analytics | +| HN post | Front page (>50 points) | HN | +| npm weekly downloads | >100/week | npm stats | +| PyPI weekly downloads | >50/week | PyPI stats | +| GitHub stars | 100+ | GitHub | +| External contributors | 3+ PRs from non-maintainers | GitHub | +| Framework integrations | 2+ frameworks ship BAP support | PRs/docs | diff --git a/docs/browser-tools-guide.md b/docs/browser-tools-guide.md new file mode 100644 index 0000000..a64fc93 --- /dev/null +++ b/docs/browser-tools-guide.md @@ -0,0 +1,265 @@ +# Browser Automation for AI Agents: A Decision Guide + +AI agents increasingly need browser access — to fill forms, extract data, navigate workflows, and interact with web applications. Today, four categories of browser tools exist for agents: + +1. **MCP servers** — expose browser actions as tools via the Model Context Protocol +2. **CLI tools** — shell commands that agents invoke directly (often paired with SKILL.md files) +3. **Browser protocols** — raw Chrome DevTools / CDP access for debugging and custom control +4. **Screenshot/vision** — pixel-level interaction via screenshots and coordinates + +This guide covers the MCP and CLI approaches with verifiable facts, focusing on [BAP (Browser Agent Protocol)](https://github.com/browseragentprotocol/bap) and [Playwright MCP](https://github.com/microsoft/playwright-mcp) / [Playwright CLI](https://github.com/microsoft/playwright-cli). All benchmark data is reproducible via the [benchmark suite](https://github.com/browseragentprotocol/benchmarks). + +--- + +## The Landscape + +| Tool | Interface | Publisher | npm Package | License | +|------|-----------|-----------|-------------|---------| +| BAP MCP | MCP (stdio) | [browseragentprotocol](https://github.com/browseragentprotocol) | `@browseragentprotocol/mcp` | Apache-2.0 | +| BAP CLI | Shell commands | [browseragentprotocol](https://github.com/browseragentprotocol) | `@browseragentprotocol/cli` | Apache-2.0 | +| Playwright MCP | MCP (stdio) | [Microsoft](https://github.com/microsoft) | `@playwright/mcp` | Apache-2.0 | +| Playwright CLI | Shell commands | [Microsoft](https://github.com/microsoft) | `@playwright/cli` | Apache-2.0 | +| Chrome DevTools / CDP | Browser protocol | [Google / Chromium](https://developer.chrome.com/docs/devtools/) | built into Chrome | Chromium licenses | + +> **Playwright MCP GitHub stars:** ~27.5k (as of Feb 2026). Microsoft-backed with a large ecosystem. + +BAP and Playwright both use [Playwright](https://playwright.dev/) as the +automation engine. Chrome DevTools / CDP talks directly to Chrome's native +debugging protocol. + +--- + +## Architecture + +### Playwright MCP — Single-Process + +Playwright MCP embeds Playwright directly in the MCP server process. When an agent calls a tool, the server executes the browser action in-process. This means **lower per-call latency** — no inter-process communication overhead. + +### BAP MCP — Two-Process + +BAP MCP uses a bridge architecture: the MCP server communicates with a separate Playwright server over WebSocket (JSON-RPC 2.0). This adds **~50–200ms per call** but enables: + +- **Session persistence** — the browser survives MCP server restarts +- **Multi-client access** — CLI and MCP can control the same browser simultaneously +- **Shared state** — observations, element refs, and cookies persist across interfaces + +### Playwright CLI + +Standalone shell commands. Each invocation is a separate process. The `--install-skills` flag generates a SKILL.md for agent consumption. + +### BAP CLI + +Shell commands that connect to a persistent daemon (shared with MCP). The browser survives across commands, and element refs from `bap observe` remain valid for subsequent `bap act` calls. + +### Chrome DevTools / CDP + +Chrome DevTools Protocol is the lowest-level option in this landscape. It is +excellent for debugging, profiling, network inspection, and custom browser +instrumentation, but it is not an agent-ready workflow on its own. You have to +orchestrate DOM queries, event subscriptions, and multi-step actions manually. +BAP sits above this layer with semantic selectors, `observe`, `act`, `extract`, +response tiers, and shared browser state. + +### What Playwright MCP Recommends + +From the [Playwright MCP README](https://github.com/microsoft/playwright-mcp): + +> _"If you are using a **coding agent**, you might benefit from using the [CLI+SKILLS](https://github.com/microsoft/playwright-cli) instead."_ + +BAP agrees with this guidance — CLI + SKILL.md is the better pattern for coding agents. BAP CLI extends it with composite actions, semantic selectors, and structured extraction. + +--- + +## MCP Server Comparison + +Side-by-side comparison of BAP MCP and Playwright MCP. Every claim links to a verifiable source. + +| Dimension | BAP MCP | Playwright MCP | Source | +|-----------|---------|----------------|--------| +| **Tools** | 23 | 31 (17 core + 6 vision + 5 test + 3 other) | [BAP MCP source](../packages/mcp), [Playwright MCP README](https://github.com/microsoft/playwright-mcp) | +| **Composite actions** | `act` batches N steps in 1 call | No built-in batching | [Playwright MCP README](https://github.com/microsoft/playwright-mcp) (verified: no `batch_execute` or similar) | +| **Observation** | `observe` → structured elements with refs, selectors, action hints | `browser_snapshot` → raw accessibility tree | [Benchmark observe scenario](https://github.com/browseragentprotocol/benchmarks) | +| **Extraction** | `extract` with JSON Schema | `browser_evaluate` with custom JS | [Benchmark extract scenario](https://github.com/browseragentprotocol/benchmarks) | +| **Fused operations** | navigate+observe, act+pre/postObserve in 1 call | Not available | [BAP protocol spec](../packages/protocol) | +| **Response tiers** | full / interactive / minimal | Not available | [BAP protocol spec](../packages/protocol) | +| **WebMCP discovery** | `discover_tools` + observe integration | Not available | [BAP MCP source](../packages/mcp) | +| **Per-call latency** | +50–200ms (WebSocket overhead) | Lower (single-process) | [Benchmark fairness notes](https://github.com/browseragentprotocol/benchmarks#fairness-notes) | +| **Form filling** | `act` composite (N fills + click = 1 call) | `browser_fill_form` (batches fills, separate click) | [Benchmark form scenario](https://github.com/browseragentprotocol/benchmarks) | + +--- + +## Benchmark Results + +All data from the [reproducible benchmark suite](https://github.com/browseragentprotocol/benchmarks). Clone the repo and run `./run.sh` to reproduce. + +### Methodology + +- Both servers spawned via `StdioClientTransport` — identical to how any MCP client connects +- **Real websites** (saucedemo.com, books.toscrape.com, etc.), not synthetic test pages +- **No LLM involved** — measures raw MCP tool efficiency, not prompt quality +- Each scenario: 1 warmup run (excluded) + N measured runs, median selected +- Token estimation: `ceil(responsePayloadBytes / 4)` +- All tool calls timed with `performance.now()` + +### Three-Variant Model + +The benchmarks use three variants to separate BAP's core advantage (composite actions) from its optimization layer (fused operations): + +| Variant | Rules | What it measures | +|---------|-------|-----------------| +| **BAP Standard** | Must observe before acting, use refs from observe output. Re-observe after page navigation. | Apples-to-apples with Playwright | +| **BAP Fused** | Can use semantic selectors without prior observe. Can use fused `navigate(observe:true)` and `act(postObserve:true)`. | BAP's full optimization layer | +| **Playwright** | Standard snapshot-then-act workflow. Uses most efficient tools available (`browser_fill_form`, `browser_evaluate`). | Baseline | + +**The fair comparison is BAP Standard vs Playwright.** BAP Fused is explicitly an optimization layer. + +### Results + +| Scenario | Site | BAP Standard | BAP Fused | Playwright | Std vs PW | Fused vs PW | +|----------|------|:------------:|:---------:|:----------:|:---------:|:-----------:| +| baseline | quotes.toscrape.com | 2 | 2 | 2 | Tie | Tie | +| observe | news.ycombinator.com | 2 | 1 | 2 | Tie | -50% | +| extract | books.toscrape.com | 2 | 2 | 2 | Tie | Tie | +| form | the-internet.herokuapp.com | 4 | 3 | 5 | -20% | -40% | +| **ecommerce** | **saucedemo.com** | **8** | **5** | **11** | **-27%** | **-55%** | +| workflow | books.toscrape.com | 5 | 4 | 5 | Tie | -20% | +| **Total** | | **23** | **17** | **27** | **~15%** | **~37%** | + +Source: [`src/scenarios/`](https://github.com/browseragentprotocol/benchmarks/tree/main/src/scenarios) in the benchmarks repo. + +### Where BAP Wins + +- **Composite `act`**: Batching multiple steps (fill+fill+click) into one call is the primary advantage. Most impactful in multi-step flows like ecommerce (8 vs 11 calls). +- **Fused operations**: `navigate(observe:true)` and `act(postObserve:true)` eliminate redundant server roundtrips. Largest impact in ecommerce (-55%). +- **Structured `extract`**: JSON Schema-based extraction vs writing custom JS for `browser_evaluate`. + +### Where Playwright Wins + +- **Per-call latency**: Playwright MCP is a single process. BAP's two-process WebSocket architecture adds ~50–200ms per call. Playwright wins wall-clock time on most scenarios. +- **Element disambiguation**: Playwright's positional snapshot refs uniquely identify elements. BAP's observe can return ambiguous selectors for identical elements (e.g., 6 "Add to cart" buttons on saucedemo.com). +- **Setup simplicity**: `npx @playwright/mcp` — single process, no daemon management. +- **Ecosystem**: 27.5k GitHub stars, Microsoft-backed, extensive testing ecosystem integration. + +### Fairness — Read This + +These benchmarks are designed to be honest, not promotional. Important caveats: + +- **BAP Standard is the fair comparison.** BAP Standard follows the same observe-then-act pattern as Playwright (observe the page, get element refs, act on them). BAP Fused shows what's possible with optimization but isn't an apples-to-apples comparison. + +- **Latency favors Playwright.** BAP's two-process architecture adds ~50–200ms WebSocket overhead per call. Playwright MCP is consistently faster on wall-clock time per call. + +- **Token estimation is approximate.** `ceil(bytes / 4)` is a rough heuristic. Screenshots inflate counts due to base64 encoding. + +- **No LLM involved.** All tool arguments are pre-written. In real agent flows, both tools would need additional calls for the LLM to decide what to do. + +- **BAP `extract` uses heuristics.** Playwright's `browser_evaluate` runs precise DOM queries and may return more accurate results. + +- **Playwright uses its most efficient tools.** Each scenario uses `browser_fill_form` for batched fills and `browser_evaluate` for direct JS extraction. We do not artificially inflate Playwright's call counts. + +- **BAP has known limitations.** Identical elements (e.g., 6 "Add to cart" buttons) can produce ambiguous selectors. The cart icon on saucedemo.com has no accessible name, requiring direct URL navigation. See the [benchmark README](https://github.com/browseragentprotocol/benchmarks) for the full list. + +--- + +## CLI Comparison + +| Dimension | BAP CLI | Playwright CLI | Source | +|-----------|---------|----------------|--------| +| **Commands** | 23 | ~70+ (granular: individual storage, network, DevTools cmds) | [BAP CLI docs](../packages/cli), [Playwright CLI README](https://github.com/microsoft/playwright-cli) | +| **Composite actions** | `bap act fill:...=val click:...` (N steps, 1 cmd) | Individual commands | CLI docs | +| **Semantic selectors** | `role:button:"Submit"`, `label:"Email"` | Accessibility tree refs (`e`) | CLI docs | +| **Observation** | `bap observe --tier=interactive` (tiered output) | `playwright-cli snapshot` (full tree) | CLI docs | +| **Extraction** | `bap extract --fields="title,price"` | `playwright-cli eval` (manual JS) | CLI docs | +| **SKILL.md** | Yes (CLI + MCP variants) | Yes (`--install-skills`) | Package repos | +| **Token efficiency** | Composite actions + response tiers | _"Token-efficient. Does not force page data into LLM."_ (official README — no specific numbers) | [Playwright CLI README](https://github.com/microsoft/playwright-cli) | +| **Platform support** | 13 platforms via `bap install-skill` | Claude Code, GitHub Copilot | Package READMEs | + +> **Note on third-party claims:** Some blogs cite specific token reduction numbers for Playwright CLI (e.g., "4x fewer tokens"). These numbers are **not in Microsoft's official README** and we do not cite them here. Microsoft's official claim is: _"Token-efficient. Does not force page data into LLM."_ + +For a detailed command-by-command mapping between Playwright CLI and BAP CLI, see the [migration guide](../packages/cli/skills/bap-browser/references/MIGRATION.md). + +--- + +## BAP vs Chrome DevTools + +| Dimension | BAP CLI / MCP | Chrome DevTools / CDP | +|-----------|----------------|------------------------| +| **Level** | Agent-ready workflow layer | Raw browser debugging protocol | +| **Selectors** | Semantic selectors, refs, structured observe output | Manual DOM / runtime scripting | +| **Multi-step actions** | `act` batches steps and fused observe flows | You compose sequences yourself | +| **Extraction** | `extract` with schema or field hints | Custom JS / protocol calls | +| **Token efficiency** | Response tiers + fewer roundtrips | Depends on your own orchestration | +| **Best for** | Coding agents and repeated browser tasks | Debugging, profiling, low-level custom tooling | + +Use Chrome DevTools when you need raw protocol domains or existing browser +inspection. Use BAP when you want a default browser interface for agents that +need to get work done with fewer calls and less prompt overhead. + +--- + +## What Should You Use? + +### Coding agent (Claude Code, Codex, Gemini CLI, Cursor, etc.)? + +**→ BAP CLI** with `bap install-skill` + +Why: Composite `bap act` batches multi-step flows into one shell command. Semantic selectors (`role:button:"Submit"`) survive page redesigns. Structured `bap extract --fields="title,price"` eliminates writing custom JS. The persistent daemon keeps browser state warm across commands, which is the right shape for coding agents. SKILL.md is available for 13 platforms. + +Alternative: Playwright CLI for simple single-action interactions where composite batching isn't needed. + +### MCP-native agent (Claude Desktop, custom MCP client)? + +**→ BAP MCP** (`npx @browseragentprotocol/mcp`) + +Why: `act` batches steps, `observe` returns structured elements with refs, fused operations (navigate+observe, act+postObserve) cut roundtrips. `extract` with JSON Schema for structured data. + +Alternative: Playwright MCP if per-call latency matters more than total call count, or if you're already embedded in the Playwright testing ecosystem. + +### Need CLI + MCP access to the same browser? + +**→ BAP** — shared server architecture. The CLI daemon and MCP bridge connect to the same Playwright server. Observations, element refs, and cookies persist across both interfaces. + +Playwright MCP and Playwright CLI are separate processes with no shared state. + +### Already deep in the Playwright testing ecosystem? + +**→ Playwright MCP** is the zero-friction add-on for your existing Playwright setup. If you already use Playwright for testing, adding the MCP server requires no new dependencies. + +### Need raw debugger, profiler, or protocol-domain access? + +**→ Chrome DevTools / CDP** + +Use DevTools when you need low-level browser debugging or custom protocol work. +Use BAP when the job is agent automation rather than browser instrumentation. + +--- + +### The Bottom Line + +BAP and Playwright use the same engine (Playwright). BAP adds composite actions, semantic selectors, structured extraction, fused operations, and a CLI-first workflow for coding agents. In benchmarks, BAP Standard uses ~15% fewer tool calls than Playwright in an apples-to-apples comparison, primarily from batching multi-step actions. BAP Fused extends this to ~37% through navigate+observe and act+postObserve fusion. Playwright wins on per-call latency and element disambiguation. Chrome DevTools is lower-level than both: great for debugging, but not the default interface most agents should use for day-to-day browser work. + +--- + +## Getting Started + +### CLI — For coding agents + +```bash +npm i -g @browseragentprotocol/cli +bap install-skill # Auto-detects your agent platform, installs SKILL.md +``` + +### MCP — For protocol-native agents + +```bash +npx @browseragentprotocol/mcp +``` + +### Plugin — For Claude Code + +``` +/install-plugin https://github.com/browseragentprotocol/bap +``` + +--- + +*Last updated: Feb 2026. All star counts, tool counts, and benchmark data verified at time of writing. Run the [benchmark suite](https://github.com/browseragentprotocol/benchmarks) to reproduce.* diff --git a/docs/releasing.md b/docs/releasing.md new file mode 100644 index 0000000..29a2c7d --- /dev/null +++ b/docs/releasing.md @@ -0,0 +1,72 @@ +# Release Automation + +BAP uses automated releases for both npm and PyPI. + +## Release Branch Policy + +`main` is the only release branch. + +- Changesets uses `main` as the base branch. +- CI and release automation publish only from pushes to `main`. +- Release PRs should merge into `main`; do not publish from feature branches, + temporary branches, or pre-release branches. + +## What happens on `main` + +When a pull request with one or more changesets lands on `main`, GitHub Actions +will: + +1. Run the release verification suite (`pnpm release:verify`) +2. Create or update the Changesets release PR +3. On merge of that release PR, publish npm packages +4. Create GitHub Releases for the published npm packages +5. Sync the Python SDK version to the release version +6. Build, validate, and publish `browser-agent-protocol` to PyPI +7. Verify that npm and PyPI now expose the expected versions + +## Required GitHub configuration + +### npm + +- Repository secret: `NPM_TOKEN` +- Package permissions on npm for the `@browseragentprotocol` scope +- Trusted publishing / provenance enabled on npm if you want attestation + +### PyPI + +- A PyPI project named `browser-agent-protocol` +- GitHub trusted publishing configured for this repository +- A GitHub environment named `pypi` + +## Local verification before merging + +Run the launch-readiness checks locally: + +```bash +npx pnpm release:verify +``` + +To inspect publishable npm tarballs directly: + +```bash +npx pnpm build +npx pnpm check:artifacts +``` + +## Versioning model + +- Published npm packages are versioned with Changesets +- The Python SDK version is synced automatically to the same release version by + `scripts/sync-python-version.mjs` +- The release workflow verifies both registries after publish +- The current launch train is set up for the next stable minor release from + `0.3.0` to `0.4.0` + +## Common failure modes + +- Missing changeset for a publishable package change +- npm tarball missing `LICENSE`, `README.md`, or `CHANGELOG.md` +- Python version drift between `pyproject.toml`, `package.json`, and + `src/browseragentprotocol/__init__.py` +- PyPI publish blocked because trusted publishing or project permissions are not + configured diff --git a/docs/webmcp-comparison.md b/docs/webmcp-comparison.md new file mode 100644 index 0000000..3a0e85c --- /dev/null +++ b/docs/webmcp-comparison.md @@ -0,0 +1,209 @@ +# BAP vs WebMCP: A Technical Comparison + +This document compares the Browser Agent Protocol (BAP) with WebMCP, the W3C Community Group standard for exposing website tools to AI agents. These are complementary technologies that address different layers of the AI-browser interaction stack. + +## What Is WebMCP? + +WebMCP is a W3C Community Group standard, driven primarily by Google and Microsoft, that allows websites to expose structured tools to AI agents through browser-native APIs. Chrome 146 Canary includes an initial implementation behind an experimental flag. + +WebMCP provides two API surfaces for tool declaration: + +### Declarative API (HTML Attributes) + +Websites annotate existing HTML forms with attributes that describe their purpose to AI agents: + +```html +
+ + + +
+``` + +The browser parses these attributes and surfaces them as structured tool definitions to any connected agent. The `toolname` and `tooldescription` attributes live on `
` elements, while `toolparamdescription` annotates individual `` elements. + +### Imperative API (JavaScript) + +For dynamic tools that do not map to static forms, websites register tools programmatically via `navigator.modelContext`: + +```javascript +navigator.modelContext.addTool({ + name: "add-to-cart", + description: "Add a product to the shopping cart", + inputSchema: { + type: "object", + properties: { + productId: { type: "string", description: "Product identifier" }, + quantity: { type: "number", description: "Number of items" } + }, + required: ["productId"] + }, + handler: async ({ productId, quantity }) => { + // Site-defined logic + } +}); +``` + +The imperative API supports richer schemas and dynamic registration/deregistration of tools as the page state changes. + +## The Fundamental Difference + +BAP and WebMCP solve different problems at different layers: + +**WebMCP = Website exposes tools (cooperative model).** The website author explicitly opts in by annotating forms or registering tools via JavaScript. The agent discovers only what the site chooses to expose. This requires adoption: sites that have not added WebMCP attributes or code expose nothing. + +**BAP = Agent controls browser (universal model).** The agent operates on any website through browser automation -- accessibility tree inspection, semantic selectors, screenshot annotation, and structured extraction. No site changes are required. BAP works on the entire web as it exists today. + +| | WebMCP | BAP | +|---|---|---| +| **Who acts** | The website provides tools; the agent calls them | The agent controls the browser directly | +| **Site cooperation** | Required | Not required | +| **Coverage** | Only sites that implement WebMCP | Every website | +| **Interaction model** | Function call (agent invokes a declared tool) | Browser automation (agent observes, clicks, fills, extracts) | + +## Head-to-Head Comparison + +| Dimension | BAP | WebMCP | +|---|---|---| +| **Adoption / Availability** | Works today on any site. Published on npm, available as an MCP server, CLI, and Claude Code plugin. | Chrome 146 Canary behind a flag. Requires per-site adoption by web developers. No production deployments yet. | +| **Security Model** | Sandboxed browser instance controlled by the agent. Supports domain allowlists, scope-based authorization (readonly/standard/full/privileged), and approval workflows for sensitive actions. | Site-defined tool handlers run in the page's security context. The browser mediates tool invocation. Security depends on each site's implementation. | +| **Performance** | Fused operations (navigate+observe, act+observe) cut roundtrips by 50-85%. Incremental observation and response tiers minimize payload size. Composite `act` batches dozens of steps into one call. | Single function call per tool invocation with no browser automation overhead. No DOM traversal or accessibility tree serialization. Potentially lower latency for sites that implement it. | +| **Capabilities** | Full browser control: navigation, form filling, clicking, scrolling, hovering, keyboard input, screenshot capture, accessibility tree inspection, structured data extraction, multi-tab management, cookie/storage access. | Scoped to tools the site explicitly declares. Cannot interact with UI elements outside declared tools. No general-purpose browser automation. | +| **Developer Experience** | Install one package (`@browseragentprotocol/mcp` or `@browseragentprotocol/cli`). Works immediately against any site. Semantic selectors survive redesigns. SKILL.md documents guide agent behavior. | Website developers add HTML attributes or JavaScript to their pages. Agent developers call discovered tools by name. Simple invocation model but requires per-site effort. | +| **Ecosystem** | TypeScript SDK, Python SDK, MCP bridge, CLI with 23 commands, plugin system, skill installer supporting 13 AI agent platforms. | W3C Community Group specification. Chrome implementation in progress. No standalone SDK -- the browser is the runtime. | +| **Browser Support** | Chromium, Firefox, WebKit, Chrome, Edge (via Playwright). Cross-browser from day one. | Chrome 146 Canary only (behind flag). Other browsers have not announced implementations. | +| **Works Without Site Changes** | Yes. Operates on the accessibility tree and DOM of any page. | No. Sites must add `toolname`/`tooldescription` attributes or call `navigator.modelContext` APIs. | + +## How BAP CLI and SKILL.md Relate + +BAP's architecture includes a skill system that provides agent-level documentation: + +- **SKILL.md** files describe BAP's tools, selector syntax, efficiency patterns, and recipes in a format optimized for AI agent consumption. They tell agents *how to use BAP well* -- when to observe vs. act, how to batch steps, which response tier to pick. + +- **WebMCP** provides page-level tool exposure. It tells agents *what a specific page offers* -- search this catalog, add this item to a cart, submit this form. + +These operate at different levels of the stack: + +``` +Agent reads SKILL.md --> Knows how to use BAP tools effectively +Agent navigates to page --> BAP observes the page (elements, refs, structure) +Page exposes WebMCP --> Agent discovers site-declared tools +Agent decides strategy --> Use WebMCP tool (if available) OR BAP automation +``` + +SKILL.md enriches agent context at the protocol level. WebMCP enriches agent context at the page level. Both contribute to better agent decision-making without conflicting. + +## Complementary Positioning + +BAP and WebMCP are not competitors. They address different parts of the agent-browser interaction problem: + +- **BAP works on the entire existing web.** It uses accessibility tree inspection, semantic selectors, and browser automation to interact with any page regardless of whether the site was designed for AI agents. This is essential today, when the vast majority of websites have no AI-agent-facing APIs. + +- **WebMCP provides a structured contract for cooperative sites.** When a site implements WebMCP, agents can invoke well-defined tools with explicit schemas, descriptions, and site-managed handlers. This is a higher-fidelity interaction for the subset of sites that adopt it. + +The progression for an agent encountering a page looks like: + +1. **WebMCP tools available?** Use them -- they are the site's intended agent interface with defined semantics and error handling. +2. **No WebMCP tools?** Fall back to BAP's universal browser automation. Observe the page, identify interactive elements, and act. +3. **Partial WebMCP coverage?** Use WebMCP tools for declared functionality, BAP automation for everything else. + +Together, they cover the full spectrum from "site has never heard of AI agents" to "site provides a rich, purpose-built agent API." + +## BAP's WebMCP Integration + +BAP includes first-class protocol support for discovering and surfacing WebMCP tools. This means agents using BAP do not need separate WebMCP integration -- BAP bridges the two worlds. + +### `discovery/discover` Protocol Method + +The `discovery/discover` method scans a page for WebMCP tools and returns them as structured data: + +``` +discovery/discover({ + pageId: "page-1", // Optional; defaults to active page + options: { + maxTools: 50, // Cap on returned tools + includeInputSchemas: true // Include JSON schemas for parameters + } +}) +``` + +Returns: + +```json +{ + "tools": [ + { + "name": "search-products", + "description": "Search the product catalog by keyword", + "inputSchema": { "type": "object", "properties": { "query": { "type": "string" } } }, + "source": "webmcp-declarative", + "formSelector": "form[toolname='search-products']" + } + ], + "totalDiscovered": 1, + "apiVersion": "1.0" +} +``` + +Each tool includes a `source` field indicating which API surface exposed it: `"webmcp-declarative"` for HTML attribute-based tools or `"webmcp-imperative"` for JavaScript API-based tools. + +### `agent/observe` with `includeWebMCPTools` + +For agents that want a unified view of a page, the `observe` method accepts an `includeWebMCPTools` flag: + +``` +observe({ + maxElements: 30, + includeWebMCPTools: true +}) +``` + +When enabled, the observation result includes a `webmcpTools` array alongside the standard interactive elements. This fuses page observation and tool discovery into a single call, consistent with BAP's philosophy of minimizing roundtrips. + +### `discover_tools` MCP Tool + +For agents using BAP through the MCP bridge (Claude Code, Claude Desktop, and other MCP-native clients), tool discovery is exposed as a standard MCP tool. Agents call `discover_tools` to scan the current page for WebMCP tools without needing to understand the underlying `discovery/discover` protocol method. + +### Progressive Detection + +BAP's discovery implementation follows a progressive detection strategy: + +1. **Declarative scan first.** Query the DOM for `form[toolname]` elements and extract tool metadata from HTML attributes. This is fast and does not require JavaScript execution. +2. **Imperative scan second.** Check for `navigator.modelContext` and enumerate any programmatically registered tools. This catches dynamic tools that do not have DOM representation. +3. **Graceful fallback.** If neither API surface is present, discovery returns an empty tool list with `totalDiscovered: 0`. No errors, no noise -- the agent proceeds with standard BAP automation. + +This layered approach means agents get the best available information from every page without brittle feature detection or version checks. + +## Decision Guide: When to Use What + +| Priority | Approach | Why | +|----------|----------|-----| +| Max speed on cooperative sites | WebMCP tools (via `discover_tools`) | No DOM traversal, native function call — the site handles execution | +| Universal coverage | BAP automation (`observe` + `act`) | Works on any site today, no site changes required | +| Fewest tokens | BAP fused ops + response tiers | 50–85% fewer roundtrips via navigate+observe, act+postObserve; tiered payloads reduce response size | +| Best of all worlds | BAP + `includeWebMCPTools` | Automatic fallback — use WebMCP when available, BAP automation otherwise | + +**Practical guidance:** Today, `discover_tools` returns empty on virtually every website — WebMCP adoption is still in early stages (Chrome 146 Canary behind a flag). But calling `discover_tools` costs nothing: it returns an empty list with `totalDiscovered: 0` and no errors. Call it speculatively on every page. When WebMCP adoption grows, agents automatically benefit without code changes. + +**The recommended pattern:** + +1. Navigate to the page with BAP +2. Call `observe` with `includeWebMCPTools: true` — get interactive elements AND any WebMCP tools in a single fused call +3. If WebMCP tools are available, prefer them for supported actions (they represent the site's intended agent interface) +4. For everything else, use BAP's standard automation (click, fill, act, extract) + +This progressive approach ensures agents work everywhere today and get richer interactions as the web evolves. + +For a broader comparison of BAP with Playwright MCP and Playwright CLI, see the [Browser Automation for AI Agents: A Decision Guide](./browser-tools-guide.md). + +## Summary + +| Aspect | BAP | WebMCP | Together | +|---|---|---|---| +| **Works on** | Any website | Opted-in websites | Every website, with richer tools where available | +| **Interaction** | Browser automation | Tool invocation | Agent picks the best approach per-action | +| **Available** | Today (npm, PyPI) | Chrome Canary (experimental) | BAP bridges WebMCP tools as they appear | +| **Site effort** | None | Attributes or JavaScript | Incremental -- sites add WebMCP at their own pace | +| **Agent value** | Full browser control | Structured, site-intended tools | Complete coverage with graceful enhancement | + +BAP provides universal browser automation that works everywhere today. WebMCP provides a cooperative channel for sites that choose to expose structured tools. BAP's built-in WebMCP discovery ensures agents benefit from both without managing two separate integrations. diff --git a/examples/README.md b/examples/README.md new file mode 100644 index 0000000..a412902 --- /dev/null +++ b/examples/README.md @@ -0,0 +1,20 @@ +# Examples + +These examples are designed for launch demos, onboarding, and release-smoke +validation. + +## Available examples + +- [`hacker-news-cli`](./hacker-news-cli/README.md): a fast CLI demo that opens + Hacker News, captures an AI-friendly observation, writes a screenshot, and + saves an accessibility snapshot. + +## Running examples from the repo + +1. Install workspace dependencies: `npx pnpm install` +2. Build the packages: `npx pnpm build` +3. Follow the example-specific README + +If you want to run the same flows from published packages instead of the local +workspace build, swap the local `node packages/.../dist/...` commands for the +equivalent `npx -y @browseragentprotocol/...` commands shown in each example. diff --git a/examples/hacker-news-cli/README.md b/examples/hacker-news-cli/README.md new file mode 100644 index 0000000..1d51a5e --- /dev/null +++ b/examples/hacker-news-cli/README.md @@ -0,0 +1,59 @@ +# Hacker News CLI Demo + +This is the fastest repo-local demo for BAP launch videos, README walkthroughs, +and smoke validation. It shows the core user story: + +1. Start a clean browser session +2. Open a real public website +3. Capture an AI-friendly observation +4. Save a screenshot and accessibility snapshot + +## Prerequisites + +- Node.js 20+ +- `npx pnpm install` +- `npx pnpm build` + +## Run it + +```bash +./examples/hacker-news-cli/run-cli-demo.sh +``` + +If this is a clean machine without Playwright browsers installed yet, run this +once first: + +```bash +npx playwright install chromium +``` + +The script writes artifacts to `.bap/demo/hacker-news/` by default: + +- `observe.txt` +- `hacker-news.png` +- `hacker-news.yaml` + +You can override the output folder: + +```bash +./examples/hacker-news-cli/run-cli-demo.sh /tmp/bap-hn-demo +``` + +## Use it with published packages + +If you want the exact same flow without cloning the repo, these are the +equivalent commands: + +```bash +npx -y @browseragentprotocol/cli -s=hn-demo --browser=chromium --no-profile --headless open https://news.ycombinator.com +npx -y @browseragentprotocol/cli -s=hn-demo observe --max=12 +npx -y @browseragentprotocol/cli -s=hn-demo screenshot --file=.bap/demo/hacker-news/hacker-news.png +npx -y @browseragentprotocol/cli -s=hn-demo snapshot --file=.bap/demo/hacker-news/hacker-news.yaml +npx -y @browseragentprotocol/cli -s=hn-demo close +``` + +## Why this demo works well + +- It uses a stable, public site that does not require credentials +- It exercises both observation and artifact generation +- It produces assets that are easy to show in README screenshots or a video diff --git a/examples/hacker-news-cli/run-cli-demo.sh b/examples/hacker-news-cli/run-cli-demo.sh new file mode 100755 index 0000000..7b715fc --- /dev/null +++ b/examples/hacker-news-cli/run-cli-demo.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash + +set -euo pipefail + +ROOT_DIR="$(cd -- "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +OUTPUT_DIR="${1:-"$ROOT_DIR/.bap/demo/hacker-news"}" +SESSION_NAME="hn-demo" + +mkdir -p "$OUTPUT_DIR" + +run_bap() { + node "$ROOT_DIR/packages/cli/dist/cli.js" "-s=$SESSION_NAME" --browser=chromium --no-profile --headless "$@" +} + +if ! run_bap open https://news.ycombinator.com; then + printf 'Failed to launch the Hacker News demo browser.\n' >&2 + printf 'If this is a fresh machine, install Chromium first:\n' >&2 + printf ' npx playwright install chromium\n' >&2 + exit 1 +fi + +run_bap observe --max=12 | tee "$OUTPUT_DIR/observe.txt" +run_bap screenshot "--file=$OUTPUT_DIR/hacker-news.png" +run_bap snapshot "--file=$OUTPUT_DIR/hacker-news.yaml" +run_bap close + +printf 'Demo artifacts written to %s\n' "$OUTPUT_DIR" diff --git a/package.json b/package.json index 2d9f6b8..ba1e72f 100644 --- a/package.json +++ b/package.json @@ -29,11 +29,13 @@ "test": "turbo run test", "test:coverage": "vitest run --coverage", "check": "pnpm typecheck && pnpm lint", + "check:artifacts": "node ./scripts/check-release-artifacts.mjs", "clean": "turbo run clean && rm -rf node_modules", "format": "prettier --write \"packages/*/src/**/*.ts\"", "format:check": "prettier --check \"packages/*/src/**/*.ts\"", "changeset": "changeset", - "version-packages": "changeset version", + "version-packages": "changeset version && node ./scripts/sync-python-version.mjs", + "release:verify": "rm -rf .turbo && pnpm build && pnpm typecheck && pnpm lint && pnpm test && pnpm check:artifacts", "release": "pnpm build && changeset publish" }, "devDependencies": { diff --git a/packages/cli/LICENSE b/packages/cli/LICENSE new file mode 100644 index 0000000..f931480 --- /dev/null +++ b/packages/cli/LICENSE @@ -0,0 +1,14 @@ +Browser Agent Protocol package license + +Copyright 2024-2026 Browser Agent Protocol contributors + +This package is licensed under the Apache License, Version 2.0. +You may obtain a copy of the License at: + +https://www.apache.org/licenses/LICENSE-2.0 + +The full Browser Agent Protocol repository licensing details, including +project-level notices for documentation and bundled third-party assets, are +available at: + +https://github.com/browseragentprotocol/bap/blob/main/LICENSE diff --git a/packages/cli/README.md b/packages/cli/README.md index 8e346e7..26a9a28 100644 --- a/packages/cli/README.md +++ b/packages/cli/README.md @@ -3,7 +3,11 @@ [![npm version](https://badge.fury.io/js/@browseragentprotocol%2Fcli.svg)](https://www.npmjs.com/package/@browseragentprotocol/cli) [![License: Apache-2.0](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) -AI-native browser automation from the command line. Like playwright-cli but with superpowers: composite actions (`bap act`), semantic selectors, and structured extraction. +CLI-first browser automation from the command line. BAP defaults to installed +Chrome, an auto-detected profile when available, and a persistent daemon so +agents can work against a real browser instead of starting fresh every time. +Like playwright-cli but with superpowers: composite actions (`bap act`), +semantic selectors, and structured extraction. ## Quick Start @@ -20,9 +24,15 @@ npm i -g @browseragentprotocol/cli bap open https://example.com ``` +By default, the CLI prefers headful Chrome with a persistent session. Use +`--headless` for CI or `--no-profile` for a fresh automation browser. Chrome +can restrict automation of a live default profile, so a dedicated +`--profile ` is the most reliable production setup when you need cookies +and long-lived state. + ## Why BAP CLI? -### Composite Actions — 40x Token Reduction +### Composite Actions — Fewer Commands, Fewer Tokens Execute multi-step flows in **one command** instead of one-at-a-time: @@ -170,9 +180,11 @@ bap skill init # Install skill to current project ``` -s= Named session -p, --port Server port (default: 9222) --b, --browser Browser: chrome, firefox, webkit, edge ---headless Headless mode (default) ---no-headless Show browser window +-b, --browser Browser: chrome, chromium, firefox, webkit, edge +--headless Headless mode for CI/background runs +--no-headless Show browser window (default) +--profile Chrome profile dir (default: auto-detect) +--no-profile Fresh browser, no user profile -v, --verbose Verbose output --observe Fused observation (for goto, act) --diff Incremental observation (for observe) diff --git a/packages/cli/__tests__/flag-parsing.test.ts b/packages/cli/__tests__/flag-parsing.test.ts index 3e0f8bf..6eaa35d 100644 --- a/packages/cli/__tests__/flag-parsing.test.ts +++ b/packages/cli/__tests__/flag-parsing.test.ts @@ -19,6 +19,65 @@ vi.mock("node:os", () => ({ // Import after mocks const { parseArgs } = await import("../src/config/state.js"); +describe("parseArgs session flags", () => { + describe("-s (session)", () => { + it("parses -s=name session flag", () => { + const flags = parseArgs(["-s=my-session", "open", "https://example.com"]); + expect(flags.session).toBe("my-session"); + expect(flags.command).toBe("open"); + }); + + it("parses -s name session flag (space-separated)", () => { + const flags = parseArgs(["-s", "my-session", "open", "https://example.com"]); + expect(flags.session).toBe("my-session"); + expect(flags.command).toBe("open"); + }); + + it("defaults session to undefined when not set", () => { + const flags = parseArgs(["open", "https://example.com"]); + expect(flags.session).toBeUndefined(); + }); + }); +}); + +describe("parseArgs profile flags", () => { + describe("--profile", () => { + it("defaults profile to auto from config", () => { + const flags = parseArgs(["goto", "https://example.com"]); + expect(flags.profile).toBe("auto"); + }); + + it("parses --no-profile to none", () => { + const flags = parseArgs(["goto", "https://example.com", "--no-profile"]); + expect(flags.profile).toBe("none"); + }); + + it("parses --profile=", () => { + const flags = parseArgs(["goto", "https://example.com", "--profile=/custom/path"]); + expect(flags.profile).toBe("/custom/path"); + }); + + it("parses --profile (space-separated)", () => { + const flags = parseArgs(["goto", "https://example.com", "--profile", "/custom/path"]); + expect(flags.profile).toBe("/custom/path"); + }); + + it("parses --profile auto explicitly", () => { + const flags = parseArgs(["--no-profile", "goto", "https://example.com", "--profile", "auto"]); + expect(flags.profile).toBe("auto"); + }); + }); +}); + +describe("parseArgs default browser behavior", () => { + it("defaults to visible Chrome with auto profile", () => { + const flags = parseArgs(["open", "https://example.com"]); + expect(flags.browser).toBe("chrome"); + expect(flags.headless).toBe(false); + expect(flags.profile).toBe("auto"); + }); +}); + describe("parseArgs fusion flags", () => { describe("--observe", () => { it("parses --observe flag", () => { diff --git a/packages/cli/__tests__/profile-detection.test.ts b/packages/cli/__tests__/profile-detection.test.ts new file mode 100644 index 0000000..0ec46d0 --- /dev/null +++ b/packages/cli/__tests__/profile-detection.test.ts @@ -0,0 +1,115 @@ +import { describe, it, expect, vi, beforeEach, afterEach } from "vitest"; + +// Mock fs and os before imports +const mockExistsSync = vi.fn(); + +vi.mock("node:fs", () => ({ + default: { + existsSync: (...args: unknown[]) => mockExistsSync(...args), + readFileSync: () => "{}", + writeFileSync: () => {}, + mkdirSync: () => {}, + unlinkSync: () => {}, + }, +})); + +vi.mock("node:os", () => ({ + default: { + homedir: () => "/Users/testuser", + }, +})); + +vi.mock("node:net", () => ({ + default: { + createConnection: () => { + const socket = { + setTimeout: vi.fn(), + destroy: vi.fn(), + on: () => socket, + }; + return socket; + }, + }, +})); + +vi.mock("@browseragentprotocol/client", () => ({ + createClient: vi.fn(), +})); + +const { getDefaultChromeProfileDir, resolveProfile } = await import("../src/server/manager.js"); + +describe("getDefaultChromeProfileDir", () => { + beforeEach(() => { + mockExistsSync.mockReset(); + }); + + it("should return macOS Chrome profile path when it exists", () => { + // On macOS (the test environment), check if the path is correct + if (process.platform === "darwin") { + mockExistsSync.mockImplementation((p: string) => { + return p === "/Users/testuser/Library/Application Support/Google/Chrome"; + }); + const result = getDefaultChromeProfileDir(); + expect(result).toBe("/Users/testuser/Library/Application Support/Google/Chrome"); + } + }); + + it("should return undefined when Chrome profile dir does not exist", () => { + mockExistsSync.mockReturnValue(false); + const result = getDefaultChromeProfileDir(); + expect(result).toBeUndefined(); + }); +}); + +describe("resolveProfile", () => { + beforeEach(() => { + mockExistsSync.mockReset(); + }); + + it("should return undefined for firefox regardless of profile setting", () => { + expect(resolveProfile("auto", "firefox")).toBeUndefined(); + expect(resolveProfile("/some/path", "firefox")).toBeUndefined(); + }); + + it("should return undefined for webkit regardless of profile setting", () => { + expect(resolveProfile("auto", "webkit")).toBeUndefined(); + }); + + it("should return undefined when profile is none", () => { + expect(resolveProfile("none", "chrome")).toBeUndefined(); + expect(resolveProfile("none", "edge")).toBeUndefined(); + }); + + it("should return explicit path when it exists", () => { + mockExistsSync.mockImplementation((p: string) => p === "/custom/chrome/profile"); + expect(resolveProfile("/custom/chrome/profile", "chrome")).toBe("/custom/chrome/profile"); + }); + + it("should return undefined with warning when explicit path does not exist", () => { + mockExistsSync.mockReturnValue(false); + const stderrSpy = vi.spyOn(process.stderr, "write").mockImplementation(() => true); + const result = resolveProfile("/nonexistent/path", "chrome"); + expect(result).toBeUndefined(); + expect(stderrSpy).toHaveBeenCalledWith( + expect.stringContaining("Warning: profile path does not exist") + ); + stderrSpy.mockRestore(); + }); + + it("should resolve auto for chrome to detected profile dir", () => { + if (process.platform === "darwin") { + mockExistsSync.mockImplementation((p: string) => { + return p === "/Users/testuser/Library/Application Support/Google/Chrome"; + }); + const result = resolveProfile("auto", "chrome"); + expect(result).toBe("/Users/testuser/Library/Application Support/Google/Chrome"); + } + }); + + it("should resolve auto for edge to undefined (edge uses different profile dir)", () => { + // Edge uses a different profile dir than Chrome, so auto-detect won't find it + mockExistsSync.mockReturnValue(false); + const result = resolveProfile("auto", "edge"); + expect(result).toBeUndefined(); + }); +}); diff --git a/packages/cli/__tests__/server-manager.test.ts b/packages/cli/__tests__/server-manager.test.ts new file mode 100644 index 0000000..c0c2fba --- /dev/null +++ b/packages/cli/__tests__/server-manager.test.ts @@ -0,0 +1,243 @@ +import { describe, it, expect, vi, beforeEach } from "vitest"; + +// Mock child_process, fs, os, net to avoid real server spawning +vi.mock("node:child_process", () => ({ + spawn: vi.fn(() => ({ + pid: 12345, + unref: vi.fn(), + stdout: null, + stderr: null, + })), +})); + +vi.mock("node:fs", () => ({ + default: { + existsSync: () => false, + readFileSync: () => "{}", + writeFileSync: () => {}, + mkdirSync: () => {}, + unlinkSync: () => {}, + }, +})); + +vi.mock("node:os", () => ({ + default: { + homedir: () => "/tmp/test-home", + }, +})); + +vi.mock("node:net", () => ({ + default: { + createConnection: () => { + const handlers: Record void> = {}; + const socket = { + setTimeout: vi.fn(), + destroy: vi.fn(), + on: (event: string, handler: () => void) => { + handlers[event] = handler; + // Simulate port in use (server already running) + if (event === "connect") { + setTimeout(() => handler(), 0); + } + return socket; + }, + }; + return socket; + }, + }, +})); + +// Mock the client module +const mockClient = { + listPages: vi.fn(), + launch: vi.fn(), + createPage: vi.fn(), + activatePage: vi.fn(), + close: vi.fn(), +}; + +vi.mock("@browseragentprotocol/client", () => ({ + createClient: vi.fn(() => Promise.resolve(mockClient)), +})); + +const { ServerManager } = await import("../src/server/manager.js"); + +describe("ServerManager.ensureReady", () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + it("should auto-launch browser and create page when no pages exist", async () => { + mockClient.listPages.mockResolvedValue({ pages: [], activePage: "" }); + mockClient.launch.mockResolvedValue({ browserId: "b1", version: "1.0" }); + mockClient.createPage.mockResolvedValue({ id: "page-1", url: "about:blank" }); + + const manager = new ServerManager({ + port: 9222, + browser: "chromium", + headless: true, + verbose: false, + }); + + const client = await manager.ensureReady(); + + expect(client).toBe(mockClient); + expect(mockClient.listPages).toHaveBeenCalledOnce(); + expect(mockClient.launch).toHaveBeenCalledWith({ + browser: "chromium", + channel: undefined, + headless: true, + }); + expect(mockClient.createPage).toHaveBeenCalledOnce(); + expect(mockClient.activatePage).not.toHaveBeenCalled(); + }); + + it("should reuse existing pages from session persistence", async () => { + mockClient.listPages.mockResolvedValue({ + pages: [{ id: "page-abc", url: "https://example.com" }], + activePage: "page-abc", + }); + + const manager = new ServerManager({ + port: 9222, + browser: "chromium", + headless: true, + verbose: false, + }); + + const client = await manager.ensureReady(); + + expect(client).toBe(mockClient); + expect(mockClient.activatePage).toHaveBeenCalledWith("page-abc"); + expect(mockClient.launch).not.toHaveBeenCalled(); + expect(mockClient.createPage).not.toHaveBeenCalled(); + }); + + it("should use first page when activePage is empty string", async () => { + mockClient.listPages.mockResolvedValue({ + pages: [ + { id: "page-1", url: "https://one.com" }, + { id: "page-2", url: "https://two.com" }, + ], + activePage: "", + }); + + const manager = new ServerManager({ + port: 9222, + browser: "chromium", + headless: true, + verbose: false, + }); + + await manager.ensureReady(); + + expect(mockClient.activatePage).toHaveBeenCalledWith("page-1"); + }); + + it("should pass chrome channel when browser is chrome", async () => { + mockClient.listPages.mockResolvedValue({ pages: [], activePage: "" }); + mockClient.launch.mockResolvedValue({ browserId: "b1", version: "1.0" }); + mockClient.createPage.mockResolvedValue({ id: "page-1", url: "about:blank" }); + + const manager = new ServerManager({ + port: 9222, + browser: "chrome", + headless: true, + verbose: false, + }); + + await manager.ensureReady(); + + expect(mockClient.launch).toHaveBeenCalledWith({ + browser: "chromium", + channel: "chrome", + headless: true, + }); + }); + + it("should pass msedge channel when browser is edge", async () => { + mockClient.listPages.mockResolvedValue({ pages: [], activePage: "" }); + mockClient.launch.mockResolvedValue({ browserId: "b1", version: "1.0" }); + mockClient.createPage.mockResolvedValue({ id: "page-1", url: "about:blank" }); + + const manager = new ServerManager({ + port: 9222, + browser: "edge", + headless: false, + verbose: false, + }); + + await manager.ensureReady(); + + expect(mockClient.launch).toHaveBeenCalledWith({ + browser: "chromium", + channel: "msedge", + headless: false, + }); + }); + + it("should respect headless flag", async () => { + mockClient.listPages.mockResolvedValue({ pages: [], activePage: "" }); + mockClient.launch.mockResolvedValue({ browserId: "b1", version: "1.0" }); + mockClient.createPage.mockResolvedValue({ id: "page-1", url: "about:blank" }); + + const manager = new ServerManager({ + port: 9222, + browser: "firefox", + headless: false, + verbose: false, + }); + + await manager.ensureReady(); + + expect(mockClient.launch).toHaveBeenCalledWith({ + browser: "firefox", + channel: undefined, + headless: false, + }); + }); + + it("should not pass userDataDir when profile is none", async () => { + mockClient.listPages.mockResolvedValue({ pages: [], activePage: "" }); + mockClient.launch.mockResolvedValue({ browserId: "b1", version: "1.0" }); + mockClient.createPage.mockResolvedValue({ id: "page-1", url: "about:blank" }); + + const manager = new ServerManager({ + port: 9222, + browser: "chrome", + headless: true, + verbose: false, + profile: "none", + }); + + await manager.ensureReady(); + + expect(mockClient.launch).toHaveBeenCalledWith({ + browser: "chromium", + channel: "chrome", + headless: true, + }); + }); + + it("should not pass userDataDir for firefox even with profile auto", async () => { + mockClient.listPages.mockResolvedValue({ pages: [], activePage: "" }); + mockClient.launch.mockResolvedValue({ browserId: "b1", version: "1.0" }); + mockClient.createPage.mockResolvedValue({ id: "page-1", url: "about:blank" }); + + const manager = new ServerManager({ + port: 9222, + browser: "firefox", + headless: true, + verbose: false, + profile: "auto", + }); + + await manager.ensureReady(); + + expect(mockClient.launch).toHaveBeenCalledWith({ + browser: "firefox", + channel: undefined, + headless: true, + }); + }); +}); diff --git a/packages/cli/package.json b/packages/cli/package.json index 1aed690..46f8fcd 100644 --- a/packages/cli/package.json +++ b/packages/cli/package.json @@ -23,13 +23,19 @@ "dist", "bin", "skills", - "README.md" + "README.md", + "CHANGELOG.md", + "LICENSE" ], "repository": { "type": "git", "url": "https://github.com/browseragentprotocol/bap.git", "directory": "packages/cli" }, + "homepage": "https://github.com/browseragentprotocol/bap/tree/main/packages/cli", + "bugs": { + "url": "https://github.com/browseragentprotocol/bap/issues" + }, "keywords": [ "bap", "cli", diff --git a/packages/cli/src/cli.ts b/packages/cli/src/cli.ts index d35492f..d8900dc 100644 --- a/packages/cli/src/cli.ts +++ b/packages/cli/src/cli.ts @@ -1,9 +1,9 @@ #!/usr/bin/env node /** - * @fileoverview BAP CLI - AI-native browser automation + * @fileoverview BAP CLI - CLI-first browser automation for coding agents * * Like playwright-cli but with superpowers: - * - Composite actions (bap act) — 40x token reduction + * - Composite actions (bap act) — fewer commands and tokens * - Semantic selectors — resilient to layout changes * - Structured extraction — validated JSON output * @@ -25,7 +25,7 @@ import { ServerManager } from "./server/manager.js"; function printHelp(): void { console.log(` -${pc.bold("BAP CLI")} ${pc.dim("- AI-native browser automation")} +${pc.bold("BAP CLI")} ${pc.dim("- CLI-first browser automation for coding agents")} ${pc.cyan("BASIC COMMANDS")} ${pc.dim("(playwright-cli compatible)")} bap open [url] Open browser (optionally navigate) @@ -98,8 +98,10 @@ ${pc.cyan("CONFIGURATION")} ${pc.cyan("GLOBAL OPTIONS")} -s= Named session -p, --port Server port (default: 9222) - -b, --browser Browser: chrome, firefox, webkit, edge - --headless / --no-headless Headless mode + -b, --browser Browser: chrome, chromium, firefox, webkit, edge + --headless / --no-headless Browser visibility (default: visible) + --profile Chrome profile dir (default: auto-detect) + --no-profile Fresh browser, no user profile -v, --verbose Verbose output -h, --help Show this help -V, --version Show version @@ -109,17 +111,30 @@ ${pc.dim("Docs:")} ${pc.cyan("https://github.com/browseragentprotocol/bap")} } function printVersion(): void { - console.log("bap-cli 0.2.0"); + console.log("bap-cli 0.3.0"); } // ============================================================================= -// Commands that don't need a server connection +// Command routing // ============================================================================= +/** Commands that don't need a server connection at all */ const NO_SERVER_COMMANDS = new Set([ "config", "install-skill", "skill", "--help", "-h", ]); +/** + * Commands that need a server connection but manage their own browser/page + * lifecycle. These use ensureClient() (WebSocket only), not ensureReady(). + */ +const CLIENT_ONLY_COMMANDS = new Set([ + "open", // explicitly launches browser + creates page + "close", // tears down browser — don't auto-create one + "close-all", // tears down everything — don't auto-create + "sessions", // informational — just lists contexts + "tabs", // informational — just lists pages +]); + // ============================================================================= // Main // ============================================================================= @@ -157,16 +172,21 @@ async function main(): Promise { } // All other commands need a BAP server + const sessionId = flags.session ?? `cli-${flags.port}`; const serverManager = new ServerManager({ port: flags.port, host: flags.host, browser: flags.browser, headless: flags.headless, verbose: flags.verbose, + sessionId, + profile: flags.profile, }); try { - const client = await serverManager.ensureClient(); + const client = CLIENT_ONLY_COMMANDS.has(flags.command) + ? await serverManager.ensureClient() + : await serverManager.ensureReady(); await handler(flags.args, flags, client); } catch (error) { const message = error instanceof Error ? error.message : String(error); diff --git a/packages/cli/src/commands/act.ts b/packages/cli/src/commands/act.ts index 033785b..1e2cadd 100644 --- a/packages/cli/src/commands/act.ts +++ b/packages/cli/src/commands/act.ts @@ -2,7 +2,7 @@ * bap act ... — Execute multiple steps atomically * * This is the killer feature. A login flow that costs playwright-cli - * 3 commands / 3 snapshots / ~6000 tokens costs BAP 1 command / 1 snapshot / ~150 tokens. + * 3 commands / 3 snapshots / 3 LLM reasoning cycles costs BAP 1 command / 1 snapshot / 1 cycle. * * Fusion: --observe flag fuses act + post-observe into 1 server call (50% token reduction) * diff --git a/packages/cli/src/commands/config.ts b/packages/cli/src/commands/config.ts index 470c55f..b65e88f 100644 --- a/packages/cli/src/commands/config.ts +++ b/packages/cli/src/commands/config.ts @@ -38,7 +38,7 @@ async function configCommand( console.log(`${key}: ${val}`); } else { console.error(`Unknown config key: ${key}`); - console.error("Valid keys: browser, headless, timeout, port"); + console.error("Valid keys: browser, headless, timeout, port, profile"); process.exit(1); } return; @@ -64,9 +64,16 @@ async function configCommand( case "port": updates.port = parseInt(value, 10); break; + case "profile": + if (value !== "auto" && value !== "none" && !value.startsWith("/") && !value.includes(":\\")) { + console.error("Valid profile values: auto, none, or an absolute path"); + process.exit(1); + } + updates.profile = value; + break; default: console.error(`Unknown config key: ${key}`); - console.error("Valid keys: browser, headless, timeout, port"); + console.error("Valid keys: browser, headless, timeout, port, profile"); process.exit(1); } diff --git a/packages/cli/src/commands/open.ts b/packages/cli/src/commands/open.ts index e411cb8..b901e11 100644 --- a/packages/cli/src/commands/open.ts +++ b/packages/cli/src/commands/open.ts @@ -5,23 +5,9 @@ import type { BAPClient } from "@browseragentprotocol/client"; import type { GlobalFlags } from "../config/state.js"; import { printPageSummary } from "../output/formatter.js"; +import { BROWSER_MAP, CHANNEL_MAP, resolveProfile } from "../server/manager.js"; import { register } from "./registry.js"; -/** Map user-facing browser names to Playwright browser types */ -const BROWSER_MAP: Record = { - chrome: "chromium", - chromium: "chromium", - firefox: "firefox", - webkit: "webkit", - edge: "chromium", -}; - -/** Map user-facing browser names to Playwright channels */ -const CHANNEL_MAP: Record = { - chrome: "chrome", - edge: "msedge", -}; - async function openCommand( args: string[], flags: GlobalFlags, @@ -29,12 +15,14 @@ async function openCommand( ): Promise { const browser = BROWSER_MAP[flags.browser] ?? "chromium"; const channel = CHANNEL_MAP[flags.browser]; + const userDataDir = resolveProfile(flags.profile, flags.browser); // Launch browser await client.launch({ browser, channel, headless: flags.headless, + ...(userDataDir ? { userDataDir } : {}), }); // Create a page diff --git a/packages/cli/src/config/state.ts b/packages/cli/src/config/state.ts index 0d5a865..ee6b710 100644 --- a/packages/cli/src/config/state.ts +++ b/packages/cli/src/config/state.ts @@ -18,6 +18,7 @@ export interface GlobalFlags { host: string; browser: string; headless: boolean; + profile: string; verbose: boolean; help: boolean; version: boolean; @@ -54,13 +55,15 @@ export interface BAPConfig { headless: boolean; timeout: number; port: number; + profile: string; } const DEFAULT_CONFIG: BAPConfig = { browser: "chrome", - headless: true, + headless: false, timeout: 30000, port: 9222, + profile: "auto", }; // ============================================================================= @@ -110,6 +113,7 @@ export function parseArgs(argv: string[]): GlobalFlags { host: "localhost", browser: config.browser, headless: config.headless, + profile: config.profile, verbose: false, help: false, version: false, @@ -139,6 +143,12 @@ export function parseArgs(argv: string[]): GlobalFlags { flags.headless = true; } else if (arg === "--no-headless") { flags.headless = false; + } else if (arg === "--no-profile") { + flags.profile = "none"; + } else if (arg.startsWith("--profile=")) { + flags.profile = arg.slice(10); + } else if (arg === "--profile") { + flags.profile = argv[++i] ?? "auto"; } else if (arg.startsWith("-s=")) { flags.session = arg.slice(3); } else if (arg === "-s") { diff --git a/packages/cli/src/server/manager.ts b/packages/cli/src/server/manager.ts index 46a35a5..c2fd25d 100644 --- a/packages/cli/src/server/manager.ts +++ b/packages/cli/src/server/manager.ts @@ -24,6 +24,8 @@ export interface ServerManagerOptions { browser: string; headless: boolean; verbose: boolean; + sessionId?: string; + profile?: string; } // ============================================================================= @@ -135,7 +137,8 @@ function removePidFile(): void { // Browser Name Mapping // ============================================================================= -const BROWSER_MAP: Record = { +/** Map user-facing browser names to Playwright browser types */ +export const BROWSER_MAP: Record = { chrome: "chromium", chromium: "chromium", firefox: "firefox", @@ -143,12 +146,71 @@ const BROWSER_MAP: Record = { edge: "chromium", }; +/** Map user-facing browser names to Playwright channels (e.g., system Chrome) */ +export const CHANNEL_MAP: Record = { + chrome: "chrome", + edge: "msedge", +}; + +// ============================================================================= +// Profile Detection +// ============================================================================= + +/** Browsers that support persistent user data directories */ +const PROFILE_BROWSERS = new Set(["chrome", "chromium", "edge"]); + +/** Detect Chrome user data dir for current platform */ +export function getDefaultChromeProfileDir(): string | undefined { + const home = os.homedir(); + let profileDir: string; + + switch (process.platform) { + case "darwin": + profileDir = path.join(home, "Library", "Application Support", "Google", "Chrome"); + break; + case "linux": + profileDir = path.join(home, ".config", "google-chrome"); + break; + case "win32": + profileDir = path.join(process.env.LOCALAPPDATA ?? path.join(home, "AppData", "Local"), "Google", "Chrome", "User Data"); + break; + default: + return undefined; + } + + return fs.existsSync(profileDir) ? profileDir : undefined; +} + +/** Resolve profile setting to concrete userDataDir path */ +export function resolveProfile(profile: string, browser: string): string | undefined { + // Only Chrome/Edge support persistent profiles + if (!PROFILE_BROWSERS.has(browser)) { + return undefined; + } + + if (profile === "none") { + return undefined; + } + + if (profile === "auto") { + return getDefaultChromeProfileDir(); + } + + // Explicit path — validate it exists + if (fs.existsSync(profile)) { + return profile; + } + + process.stderr.write(`[bap] Warning: profile path does not exist: ${profile}\n`); + return undefined; +} + // ============================================================================= // Server Manager // ============================================================================= export class ServerManager { - private options: Required; + private options: Required> & Pick; private client: BAPClient | null = null; constructor(options: ServerManagerOptions) { @@ -172,7 +234,7 @@ export class ServerManager { if (verbose) { process.stderr.write(`[bap] Reusing server on ${host}:${port}\n`); } - this.client = await createClient(url, { name: "bap-cli" }); + this.client = await createClient(url, { name: "bap-cli", sessionId: this.options.sessionId }); return this.client; } @@ -224,10 +286,52 @@ export class ServerManager { process.stderr.write(`[bap] Server ready on ws://${host}:${port}\n`); } - this.client = await createClient(url, { name: "bap-cli" }); + this.client = await createClient(url, { name: "bap-cli", sessionId: this.options.sessionId }); return this.client; } + /** + * Get a ready-to-use client with browser and page auto-initialized. + * + * Ensures a browser is launched and at least one page exists. + * Reuses existing pages from session persistence when available. + * Falls back to ensureClient() semantics (WebSocket only) for + * commands that manage their own lifecycle (open, close, sessions). + */ + async ensureReady(): Promise { + const client = await this.ensureClient(); + + // Check if pages already exist (e.g., from session persistence) + const { pages, activePage } = await client.listPages(); + + if (pages.length > 0) { + // Sync client's active page tracking with server state + const targetPage = activePage && activePage.length > 0 + ? activePage + : pages[0]!.id; + await client.activatePage(targetPage); + return client; + } + + // No pages — auto-initialize browser + page + const browserType = BROWSER_MAP[this.options.browser] ?? "chromium"; + const channel = CHANNEL_MAP[this.options.browser]; + const userDataDir = this.options.profile + ? resolveProfile(this.options.profile, this.options.browser) + : undefined; + + await client.launch({ + browser: browserType, + channel, + headless: this.options.headless, + ...(userDataDir ? { userDataDir } : {}), + }); + + await client.createPage(); + + return client; + } + /** * Disconnect the WebSocket client (server keeps running). */ diff --git a/packages/client/LICENSE b/packages/client/LICENSE new file mode 100644 index 0000000..f931480 --- /dev/null +++ b/packages/client/LICENSE @@ -0,0 +1,14 @@ +Browser Agent Protocol package license + +Copyright 2024-2026 Browser Agent Protocol contributors + +This package is licensed under the Apache License, Version 2.0. +You may obtain a copy of the License at: + +https://www.apache.org/licenses/LICENSE-2.0 + +The full Browser Agent Protocol repository licensing details, including +project-level notices for documentation and bundled third-party assets, are +available at: + +https://github.com/browseragentprotocol/bap/blob/main/LICENSE diff --git a/packages/client/package.json b/packages/client/package.json index 8a8eb0c..6c7dcfc 100644 --- a/packages/client/package.json +++ b/packages/client/package.json @@ -23,13 +23,19 @@ }, "files": [ "dist", - "README.md" + "README.md", + "CHANGELOG.md", + "LICENSE" ], "repository": { "type": "git", "url": "https://github.com/browseragentprotocol/bap.git", "directory": "packages/client" }, + "homepage": "https://github.com/browseragentprotocol/bap/tree/main/packages/client", + "bugs": { + "url": "https://github.com/browseragentprotocol/bap/issues" + }, "keywords": [ "bap", "client", diff --git a/packages/client/src/__tests__/discover-tools.test.ts b/packages/client/src/__tests__/discover-tools.test.ts new file mode 100644 index 0000000..d3af157 --- /dev/null +++ b/packages/client/src/__tests__/discover-tools.test.ts @@ -0,0 +1,141 @@ +import { describe, it, expect } from "vitest"; +import { BAPClient, type BAPTransport } from "../index.js"; + +/** + * Mock transport for testing client methods + */ +class MockTransport implements BAPTransport { + onMessage: ((message: string) => void) | null = null; + onClose: (() => void) | null = null; + onError: ((error: Error) => void) | null = null; + + sentMessages: string[] = []; + responses: Map = new Map(); + + async send(message: string): Promise { + this.sentMessages.push(message); + const parsed = JSON.parse(message); + + if (parsed.id !== undefined && this.responses.has(parsed.method)) { + queueMicrotask(() => { + this.receiveMessage({ + jsonrpc: "2.0", + id: parsed.id, + result: this.responses.get(parsed.method), + }); + }); + } + } + + async close(): Promise {} + + receiveMessage(message: object): void { + if (this.onMessage) { + this.onMessage(JSON.stringify(message)); + } + } + + setAutoResponse(method: string, response: unknown): void { + this.responses.set(method, response); + } + + getLastRequest(): { method: string; params: unknown; id: number } | null { + if (this.sentMessages.length === 0) return null; + return JSON.parse(this.sentMessages[this.sentMessages.length - 1]!); + } +} + +async function createConnectedClient(): Promise<{ client: BAPClient; transport: MockTransport }> { + const transport = new MockTransport(); + + transport.setAutoResponse("initialize", { + protocolVersion: "0.2.0", + serverInfo: { name: "test-server", version: "1.0.0" }, + capabilities: { browsers: ["chromium"] }, + }); + transport.setAutoResponse("notifications/initialized", {}); + transport.setAutoResponse("events/subscribe", { subscribed: [] }); + + const client = new BAPClient(transport); + await client.connect(); + + return { client, transport }; +} + +describe("BAPClient.discoverTools()", () => { + it("sends correct method and params", async () => { + const { client, transport } = await createConnectedClient(); + + transport.setAutoResponse("discovery/discover", { + tools: [], + totalDiscovered: 0, + }); + + const result = await client.discoverTools(); + + const request = transport.getLastRequest(); + expect(request?.method).toBe("discovery/discover"); + expect(request?.params).toEqual({ + pageId: null, + options: undefined, + }); + expect(result.tools).toEqual([]); + expect(result.totalDiscovered).toBe(0); + }); + + it("passes pageId when provided", async () => { + const { client, transport } = await createConnectedClient(); + + transport.setAutoResponse("discovery/discover", { + tools: [], + totalDiscovered: 0, + }); + + await client.discoverTools("page-42"); + + const request = transport.getLastRequest(); + expect(request?.params).toEqual({ + pageId: "page-42", + options: undefined, + }); + }); + + it("passes options when provided", async () => { + const { client, transport } = await createConnectedClient(); + + transport.setAutoResponse("discovery/discover", { + tools: [], + totalDiscovered: 0, + }); + + await client.discoverTools(undefined, { maxTools: 10, includeInputSchemas: false }); + + const request = transport.getLastRequest(); + expect(request?.params).toEqual({ + pageId: null, + options: { maxTools: 10, includeInputSchemas: false }, + }); + }); + + it("parses response with tools", async () => { + const { client, transport } = await createConnectedClient(); + + transport.setAutoResponse("discovery/discover", { + tools: [ + { name: "search", source: "webmcp-declarative", formSelector: "#search" }, + { name: "add-to-cart", description: "Add item", source: "webmcp-imperative" }, + ], + totalDiscovered: 2, + apiVersion: "1.0", + }); + + const result = await client.discoverTools(); + + expect(result.tools).toHaveLength(2); + expect(result.tools[0]!.name).toBe("search"); + expect(result.tools[0]!.source).toBe("webmcp-declarative"); + expect(result.tools[1]!.name).toBe("add-to-cart"); + expect(result.totalDiscovered).toBe(2); + expect(result.apiVersion).toBe("1.0"); + }); +}); diff --git a/packages/client/src/__tests__/session-persistence.test.ts b/packages/client/src/__tests__/session-persistence.test.ts new file mode 100644 index 0000000..2efa69d --- /dev/null +++ b/packages/client/src/__tests__/session-persistence.test.ts @@ -0,0 +1,145 @@ +import { describe, it, expect } from "vitest"; +import { BAPClient, type BAPTransport } from "../index.js"; + +/** + * Mock transport for testing session persistence behavior + */ +class MockTransport implements BAPTransport { + onMessage: ((message: string) => void) | null = null; + onClose: (() => void) | null = null; + onError: ((error: Error) => void) | null = null; + + sentMessages: string[] = []; + responses: Map = new Map(); + + async send(message: string): Promise { + this.sentMessages.push(message); + const parsed = JSON.parse(message); + + if (parsed.id !== undefined && this.responses.has(parsed.method)) { + queueMicrotask(() => { + this.receiveMessage({ + jsonrpc: "2.0", + id: parsed.id, + result: this.responses.get(parsed.method), + }); + }); + } + } + + async close(): Promise { + // no-op + } + + receiveMessage(message: object): void { + if (this.onMessage) { + this.onMessage(JSON.stringify(message)); + } + } + + setAutoResponse(method: string, response: unknown): void { + this.responses.set(method, response); + } + + getLastRequest(): { method: string; params: unknown; id: number } | null { + if (this.sentMessages.length === 0) return null; + return JSON.parse(this.sentMessages[this.sentMessages.length - 1]!); + } + + getAllRequests(): { method: string; params: unknown; id: number }[] { + return this.sentMessages.map((m) => JSON.parse(m)); + } +} + +function setupTransport(transport: MockTransport): void { + transport.setAutoResponse("initialize", { + protocolVersion: "0.2.0", + serverInfo: { name: "test-server", version: "1.0.0" }, + capabilities: { browsers: ["chromium"] }, + sessionId: "test-session", + }); + transport.setAutoResponse("notifications/initialized", {}); + transport.setAutoResponse("events/subscribe", { subscribed: [] }); +} + +describe("BAPClient - session persistence", () => { + describe("connect()", () => { + it("should include sessionId in initialize params when set", async () => { + const transport = new MockTransport(); + setupTransport(transport); + + const client = new BAPClient(transport, { + sessionId: "my-session", + events: [], + }); + await client.connect(); + + const initRequest = transport.getAllRequests().find( + (r) => r.method === "initialize" + ); + expect(initRequest).toBeDefined(); + const params = initRequest!.params as Record; + expect(params.sessionId).toBe("my-session"); + }); + + it("should not include sessionId in initialize params when not set", async () => { + const transport = new MockTransport(); + setupTransport(transport); + + const client = new BAPClient(transport, { events: [] }); + await client.connect(); + + const initRequest = transport.getAllRequests().find( + (r) => r.method === "initialize" + ); + expect(initRequest).toBeDefined(); + const params = initRequest!.params as Record; + expect(params.sessionId).toBeUndefined(); + }); + }); + + describe("close()", () => { + it("should skip shutdown RPC when sessionId is set", async () => { + const transport = new MockTransport(); + setupTransport(transport); + + const client = new BAPClient(transport, { + sessionId: "my-session", + events: [], + }); + await client.connect(); + + // Clear sent messages to only track close behavior + transport.sentMessages = []; + await client.close(); + + const methods = transport.sentMessages.map((m) => JSON.parse(m).method); + expect(methods).not.toContain("shutdown"); + }); + + it("should send shutdown RPC when sessionId is not set", async () => { + const transport = new MockTransport(); + setupTransport(transport); + transport.setAutoResponse("shutdown", {}); + + const client = new BAPClient(transport, { events: [] }); + await client.connect(); + + // Clear sent messages to only track close behavior + transport.sentMessages = []; + await client.close(); + + const methods = transport.sentMessages.map((m) => JSON.parse(m).method); + expect(methods).toContain("shutdown"); + }); + }); + + describe("constructor", () => { + it("should accept sessionId option", () => { + const client = new BAPClient("ws://localhost:9222", { + sessionId: "test-session", + }); + expect(client).toBeDefined(); + }); + }); +}); diff --git a/packages/client/src/__tests__/transport.test.ts b/packages/client/src/__tests__/transport.test.ts index dee844d..53b3169 100644 --- a/packages/client/src/__tests__/transport.test.ts +++ b/packages/client/src/__tests__/transport.test.ts @@ -1,5 +1,5 @@ import { describe, it, expect } from "vitest"; -import { WebSocketTransport } from "../index.js"; +import { BAPClient, WebSocketTransport } from "../index.js"; /** * WebSocketTransport tests @@ -185,4 +185,37 @@ describe("WebSocketTransport", () => { expect(transport).toBeDefined(); }); }); + + describe("token updates", () => { + it("adds the latest token to future connection URLs", () => { + const transport = new WebSocketTransport("ws://localhost:9222"); + transport.updateToken("token-1"); + + expect((transport as any).getConnectionUrl()).toBe( + "ws://localhost:9222/?token=token-1" + ); + }); + + it("replaces the token instead of appending duplicates", () => { + const transport = new WebSocketTransport("ws://localhost:9222?existing=yes"); + transport.updateToken("token-1"); + transport.updateToken("token-2"); + + expect((transport as any).getConnectionUrl()).toBe( + "ws://localhost:9222/?existing=yes&token=token-2" + ); + }); + }); +}); + +describe("BAPClient token updates", () => { + it("propagates updated tokens to the WebSocket transport", () => { + const client = new BAPClient("ws://localhost:9222", { token: "initial-token" }); + client.updateToken("rotated-token"); + + const transport = (client as any).transport as WebSocketTransport; + expect((transport as any).getConnectionUrl()).toBe( + "ws://localhost:9222/?token=rotated-token" + ); + }); }); diff --git a/packages/client/src/index.ts b/packages/client/src/index.ts index 7e6845f..1413ccc 100644 --- a/packages/client/src/index.ts +++ b/packages/client/src/index.ts @@ -83,6 +83,8 @@ import { type ApprovalRequiredParams, type ApprovalRespondParams, type ApprovalRespondResult, + // Discovery types (WebMCP) + type DiscoveryDiscoverResult, } from "@browseragentprotocol/protocol"; // Re-export protocol types and helpers @@ -156,6 +158,8 @@ export class WebSocketTransport implements BAPTransport { private readonly autoReconnect: boolean; private isClosing = false; private isReconnecting = false; + private baseUrl: string; + private token: string | undefined; onMessage: ((message: string) => void) | null = null; onClose: (() => void) | null = null; @@ -166,14 +170,35 @@ export class WebSocketTransport implements BAPTransport { onReconnected: (() => void) | null = null; constructor( - private readonly url: string, + url: string, options: WebSocketTransportOptions = {} ) { + this.baseUrl = url; this.maxReconnectAttempts = options.maxReconnectAttempts ?? 5; this.reconnectDelay = options.reconnectDelay ?? 1000; this.autoReconnect = options.autoReconnect ?? false; } + /** + * Get the current connection URL, including token if set. + */ + private getConnectionUrl(): string { + if (!this.token) { + return this.baseUrl; + } + + const url = new URL(this.baseUrl); + url.searchParams.set("token", this.token); + return url.toString(); + } + + /** + * Update the authentication token for future connects/reconnects. + */ + updateToken(newToken: string): void { + this.token = newToken; + } + /** * Connect to the WebSocket server */ @@ -189,7 +214,7 @@ export class WebSocketTransport implements BAPTransport { this.ws = null; } - this.ws = new WebSocket(this.url); + this.ws = new WebSocket(this.getConnectionUrl()); this.ws.on("open", () => { this.reconnectAttempts = 0; @@ -328,6 +353,8 @@ export interface BAPClientOptions { timeout?: number; /** Events to subscribe to */ events?: string[]; + /** Session ID for cross-connection persistence (CLI mode) */ + sessionId?: string; } /** @@ -368,6 +395,7 @@ export class BAPClient extends EventEmitter { version: string; timeout: number; events: string[]; + sessionId?: string; }; constructor(urlOrTransport: string | BAPTransport, options: BAPClientOptions = {}) { @@ -379,16 +407,15 @@ export class BAPClient extends EventEmitter { version: options.version ?? "0.2.0", timeout: options.timeout ?? 30000, events: options.events ?? ["page", "console", "network", "dialog"], + sessionId: options.sessionId, }; if (typeof urlOrTransport === "string") { - let url = urlOrTransport; + const transport = new WebSocketTransport(urlOrTransport); if (options.token) { - const urlObj = new URL(url); - urlObj.searchParams.set("token", options.token); - url = urlObj.toString(); + transport.updateToken(options.token); } - this.transport = new WebSocketTransport(url); + this.transport = transport; } else { this.transport = urlOrTransport; } @@ -398,6 +425,16 @@ export class BAPClient extends EventEmitter { this.transport.onError = (error) => this.emit("error", error); } + /** + * Update the authentication token for future connects/reconnects. + */ + updateToken(newToken: string): void { + this.options.token = newToken; + if (this.transport instanceof WebSocketTransport) { + this.transport.updateToken(newToken); + } + } + // =========================================================================== // Connection Management // =========================================================================== @@ -410,7 +447,7 @@ export class BAPClient extends EventEmitter { await this.transport.connect(); } - const result = await this.request("initialize", { + const initParams: InitializeParams = { protocolVersion: BAP_VERSION, clientInfo: { name: this.options.name, @@ -421,7 +458,12 @@ export class BAPClient extends EventEmitter { streaming: false, compression: false, }, - } satisfies InitializeParams); + }; + if (this.options.sessionId) { + initParams.sessionId = this.options.sessionId; + } + + const result = await this.request("initialize", initParams); const serverVersion = result.protocolVersion; const serverParts = serverVersion.split(".").map(Number); @@ -464,13 +506,18 @@ export class BAPClient extends EventEmitter { */ async close(): Promise { if (this.initialized) { - try { - await this.request("shutdown", { - saveState: false, - closePages: true, - }); - } catch { - // Ignore errors during shutdown + // When sessionId is set, skip shutdown RPC — just close transport. + // This triggers ws.on("close") server-side, which parks the session + // instead of destroying the browser. + if (!this.options.sessionId) { + try { + await this.request("shutdown", { + saveState: false, + closePages: true, + }); + } catch { + // Ignore errors during shutdown + } } } @@ -1326,6 +1373,31 @@ export class BAPClient extends EventEmitter { return this.request("approval/respond", params); } + // =========================================================================== + // Discovery Methods (WebMCP Tool Discovery) + // =========================================================================== + + /** + * Discover WebMCP tools exposed by the current page + * + * @example + * ```typescript + * const result = await client.discoverTools(); + * for (const tool of result.tools) { + * console.log(`${tool.name} (${tool.source}): ${tool.description}`); + * } + * ``` + */ + async discoverTools( + pageId?: string, + options?: { maxTools?: number; includeInputSchemas?: boolean } + ): Promise { + return this.request("discovery/discover", { + pageId: pageId ?? this.activePage, + options, + }); + } + /** * Helper to build an execution step * diff --git a/packages/logger/LICENSE b/packages/logger/LICENSE new file mode 100644 index 0000000..f931480 --- /dev/null +++ b/packages/logger/LICENSE @@ -0,0 +1,14 @@ +Browser Agent Protocol package license + +Copyright 2024-2026 Browser Agent Protocol contributors + +This package is licensed under the Apache License, Version 2.0. +You may obtain a copy of the License at: + +https://www.apache.org/licenses/LICENSE-2.0 + +The full Browser Agent Protocol repository licensing details, including +project-level notices for documentation and bundled third-party assets, are +available at: + +https://github.com/browseragentprotocol/bap/blob/main/LICENSE diff --git a/packages/logger/package.json b/packages/logger/package.json index 9c24224..b4c7d93 100644 --- a/packages/logger/package.json +++ b/packages/logger/package.json @@ -23,13 +23,19 @@ }, "files": [ "dist", - "README.md" + "README.md", + "CHANGELOG.md", + "LICENSE" ], "repository": { "type": "git", "url": "https://github.com/browseragentprotocol/bap.git", "directory": "packages/logger" }, + "homepage": "https://github.com/browseragentprotocol/bap/tree/main/packages/logger", + "bugs": { + "url": "https://github.com/browseragentprotocol/bap/issues" + }, "keywords": [ "bap", "browser-agent-protocol", diff --git a/packages/mcp/LICENSE b/packages/mcp/LICENSE new file mode 100644 index 0000000..f931480 --- /dev/null +++ b/packages/mcp/LICENSE @@ -0,0 +1,14 @@ +Browser Agent Protocol package license + +Copyright 2024-2026 Browser Agent Protocol contributors + +This package is licensed under the Apache License, Version 2.0. +You may obtain a copy of the License at: + +https://www.apache.org/licenses/LICENSE-2.0 + +The full Browser Agent Protocol repository licensing details, including +project-level notices for documentation and bundled third-party assets, are +available at: + +https://github.com/browseragentprotocol/bap/blob/main/LICENSE diff --git a/packages/mcp/package.json b/packages/mcp/package.json index 593510a..add1b71 100644 --- a/packages/mcp/package.json +++ b/packages/mcp/package.json @@ -26,13 +26,19 @@ }, "files": [ "dist", - "README.md" + "README.md", + "CHANGELOG.md", + "LICENSE" ], "repository": { "type": "git", "url": "https://github.com/browseragentprotocol/bap.git", "directory": "packages/mcp" }, + "homepage": "https://github.com/browseragentprotocol/bap/tree/main/packages/mcp", + "bugs": { + "url": "https://github.com/browseragentprotocol/bap/issues" + }, "keywords": [ "bap", "mcp", @@ -57,7 +63,7 @@ "@browseragentprotocol/client": "workspace:*", "@browseragentprotocol/logger": "workspace:*", "@browseragentprotocol/protocol": "workspace:*", - "@modelcontextprotocol/sdk": "^1.0.0", + "@modelcontextprotocol/sdk": "^1.27.1", "zod": "^3.23.0" }, "devDependencies": { diff --git a/packages/mcp/src/__tests__/discover-tools.test.ts b/packages/mcp/src/__tests__/discover-tools.test.ts new file mode 100644 index 0000000..adb7f40 --- /dev/null +++ b/packages/mcp/src/__tests__/discover-tools.test.ts @@ -0,0 +1,27 @@ +import { describe, it, expect } from "vitest"; + +import { BAPMCPServer, parseSelector } from "../index.js"; + +/** + * Tests for the discover_tools MCP tool definition and observe includeWebMCPTools param. + * These are structural tests that verify tool definitions are correct — + * integration tests with a running server are out of scope here. + */ + +describe("discover_tools MCP tool", () => { + it("module is importable", () => { + expect(BAPMCPServer).toBeDefined(); + }); +}); + +describe("observe tool - includeWebMCPTools param", () => { + it("exports BAPMCPServer class", () => { + expect(typeof BAPMCPServer).toBe("function"); + }); +}); + +describe("parseSelector export", () => { + it("is exported from the module", () => { + expect(typeof parseSelector).toBe("function"); + }); +}); diff --git a/packages/mcp/src/__tests__/validation.test.ts b/packages/mcp/src/__tests__/validation.test.ts new file mode 100644 index 0000000..e8eedb2 --- /dev/null +++ b/packages/mcp/src/__tests__/validation.test.ts @@ -0,0 +1,34 @@ +import { describe, it, expect, vi } from "vitest"; +import { BAPMCPServer } from "../index.js"; + +describe("BAPMCPServer argument validation", () => { + it("rejects malformed navigate URLs before calling navigate", async () => { + const server = new BAPMCPServer(); + const mockClient = { + navigate: vi.fn(), + listPages: vi.fn(), + createPage: vi.fn(), + }; + + (server as any).ensureClient = vi.fn().mockResolvedValue(mockClient); + + await expect( + (server as any).handleToolCall("navigate", { url: "example.com" }) + ).rejects.toThrow("Invalid arguments for 'navigate'"); + expect(mockClient.navigate).not.toHaveBeenCalled(); + }); + + it("rejects empty activate_page ids before calling activatePage", async () => { + const server = new BAPMCPServer(); + const mockClient = { + activatePage: vi.fn(), + }; + + (server as any).ensureClient = vi.fn().mockResolvedValue(mockClient); + + await expect( + (server as any).handleToolCall("activate_page", { pageId: "" }) + ).rejects.toThrow("Invalid arguments for 'activate_page'"); + expect(mockClient.activatePage).not.toHaveBeenCalled(); + }); +}); diff --git a/packages/mcp/src/index.ts b/packages/mcp/src/index.ts index 368e559..0d120ac 100644 --- a/packages/mcp/src/index.ts +++ b/packages/mcp/src/index.ts @@ -6,7 +6,7 @@ * Exposes Browser Agent Protocol as an MCP (Model Context Protocol) server. * Allows AI agents to control browsers through standardized MCP tools. * - * TODO (MEDIUM): Add input validation on tool arguments before passing to BAP client + * TODO (MEDIUM): Add input validation on tool arguments before passing to BAP client — DONE (critical tools) * TODO (MEDIUM): Enforce session timeout (maxSessionDuration) - currently unused * TODO (MEDIUM): Add resource cleanup on partial failure in ensureClient() — DONE (v0.2.0) * TODO (LOW): parseSelector should validate empty/whitespace-only strings @@ -40,7 +40,89 @@ import { type ExtractionSchema, type AriaRole, type AgentObserveResult, + type WebMCPTool, } from "@browseragentprotocol/protocol"; +import { z } from "zod"; + +// ============================================================================= +// Input Validation +// ============================================================================= + +const nonEmptyString = z.string().min(1, "must be a non-empty string"); + +const urlString = nonEmptyString.refine((value) => { + try { + new URL(value); + return true; + } catch { + return false; + } +}, "must be a valid URL including protocol"); + +const positiveInt = z.number().int().positive(); +const nonNegativeNumber = z.number().nonnegative(); + +const ToolArgSchemas = { + navigate: z.object({ + url: urlString, + waitUntil: z.enum(["load", "domcontentloaded", "networkidle"]).optional(), + observe: z.boolean().optional(), + observeMaxElements: positiveInt.optional(), + }), + click: z.object({ + selector: nonEmptyString, + clickCount: positiveInt.optional(), + }), + type: z.object({ + selector: nonEmptyString, + text: z.string(), + delay: nonNegativeNumber.optional(), + }), + fill: z.object({ + selector: nonEmptyString, + value: z.string(), + }), + press: z.object({ + key: nonEmptyString, + selector: nonEmptyString.optional(), + }), + select: z.object({ + selector: nonEmptyString, + value: nonEmptyString, + }), + hover: z.object({ + selector: nonEmptyString, + }), + element: z.object({ + selector: nonEmptyString, + properties: z.array(z.string()).optional(), + }), + activatePage: z.object({ + pageId: nonEmptyString, + }), + extract: z.object({ + instruction: nonEmptyString, + schema: z.object({ type: z.string() }).passthrough(), + mode: z.enum(["single", "list", "table"]).optional(), + selector: nonEmptyString.optional(), + }), +} as const; + +function validateArgs( + toolName: string, + schema: T, + args: Record +): z.infer { + const result = schema.safeParse(args); + if (result.success) { + return result.data; + } + + const issues = result.error.issues + .map((issue) => `${issue.path.join(".")}: ${issue.message}`) + .join("; "); + throw new Error(`Invalid arguments for '${toolName}': ${issues}`); +} // ============================================================================= // Types @@ -620,6 +702,10 @@ RECOMMENDED: Use this before complex interactions to understand the page.`, enum: ["full", "interactive", "minimal"], description: "Response compression tier: 'full' (default, all data), 'interactive' (elements+metadata only), 'minimal' (refs+names only)", }, + includeWebMCPTools: { + type: "boolean", + description: "Include WebMCP tools discovered on the page. WebMCP tools are exposed by cooperative websites for AI agent interaction.", + }, }, }, }, @@ -667,6 +753,28 @@ Works best with standard HTML patterns (ul/ol, tables, cards). For complex pages required: ["instruction", "schema"], }, }, + + // Discovery (WebMCP) + { + name: "discover_tools", + description: `Discover WebMCP tools exposed by the current page. +Returns structured tool definitions that the page makes available for AI agent interaction. +WebMCP tools are exposed by cooperative websites via HTML attributes or the navigator.modelContext API. +Returns an empty array on pages without WebMCP support.`, + inputSchema: { + type: "object", + properties: { + maxTools: { + type: "number", + description: "Maximum number of tools to return (default: 50)", + }, + includeInputSchemas: { + type: "boolean", + description: "Include JSON schemas for tool input parameters (default: true)", + }, + }, + }, + }, ]; // ============================================================================= @@ -939,7 +1047,8 @@ export class BAPMCPServer { switch (name) { // Navigation case "navigate": { - const url = args.url as string; + const validated = validateArgs("navigate", ToolArgSchemas.navigate, args); + const url = validated.url; // Security check if (!this.isAllowedDomain(url)) { @@ -963,17 +1072,17 @@ export class BAPMCPServer { }; } - const waitUntil = (args.waitUntil as WaitUntilState) ?? "load"; + const waitUntil = (validated.waitUntil as WaitUntilState) ?? "load"; // Fusion: navigate-observe kernel - const observeFlag = args.observe as boolean | undefined; + const observeFlag = validated.observe; const result = await client.navigate(url, { waitUntil, ...(observeFlag ? { observe: { includeMetadata: true, includeInteractiveElements: true, - maxElements: (args.observeMaxElements as number) ?? 50, + maxElements: validated.observeMaxElements ?? 50, }, } : {}), }); @@ -1005,48 +1114,48 @@ export class BAPMCPServer { // Element Interaction case "click": { - const selector = parseSelector(args.selector as string); - const options = args.clickCount ? { clickCount: args.clickCount as number } : undefined; + const validated = validateArgs("click", ToolArgSchemas.click, args); + const selector = parseSelector(validated.selector); + const options = validated.clickCount ? { clickCount: validated.clickCount } : undefined; await client.click(selector, options); return { - content: [{ type: "text", text: `Clicked: ${args.selector}` }], + content: [{ type: "text", text: `Clicked: ${validated.selector}` }], }; } case "type": { - const selector = parseSelector(args.selector as string); - const text = args.text as string; - const delay = args.delay as number | undefined; - await client.type(selector, text, { delay }); + const validated = validateArgs("type", ToolArgSchemas.type, args); + const selector = parseSelector(validated.selector); + await client.type(selector, validated.text, { delay: validated.delay }); return { - content: [{ type: "text", text: `Typed "${text}" into: ${args.selector}` }], + content: [{ type: "text", text: `Typed "${validated.text}" into: ${validated.selector}` }], }; } case "fill": { - const selector = parseSelector(args.selector as string); - const value = args.value as string; - await client.fill(selector, value); + const validated = validateArgs("fill", ToolArgSchemas.fill, args); + const selector = parseSelector(validated.selector); + await client.fill(selector, validated.value); return { - content: [{ type: "text", text: `Filled "${value}" into: ${args.selector}` }], + content: [{ type: "text", text: `Filled "${validated.value}" into: ${validated.selector}` }], }; } case "press": { - const key = args.key as string; - const selector = args.selector ? parseSelector(args.selector as string) : undefined; - await client.press(key, selector); + const validated = validateArgs("press", ToolArgSchemas.press, args); + const selector = validated.selector ? parseSelector(validated.selector) : undefined; + await client.press(validated.key, selector); return { - content: [{ type: "text", text: `Pressed: ${key}` }], + content: [{ type: "text", text: `Pressed: ${validated.key}` }], }; } case "select": { - const selector = parseSelector(args.selector as string); - const value = args.value as string; - await client.select(selector, value); + const validated = validateArgs("select", ToolArgSchemas.select, args); + const selector = parseSelector(validated.selector); + await client.select(selector, validated.value); return { - content: [{ type: "text", text: `Selected "${value}" in: ${args.selector}` }], + content: [{ type: "text", text: `Selected "${validated.value}" in: ${validated.selector}` }], }; } @@ -1061,10 +1170,11 @@ export class BAPMCPServer { } case "hover": { - const selector = parseSelector(args.selector as string); + const validated = validateArgs("hover", ToolArgSchemas.hover, args); + const selector = parseSelector(validated.selector); await client.hover(selector); return { - content: [{ type: "text", text: `Hovered over: ${args.selector}` }], + content: [{ type: "text", text: `Hovered over: ${validated.selector}` }], }; } @@ -1120,8 +1230,9 @@ export class BAPMCPServer { } case "element": { - const selector = parseSelector(args.selector as string); - const properties = (args.properties as ElementProperty[]) ?? ["visible", "enabled"]; + const validated = validateArgs("element", ToolArgSchemas.element, args); + const selector = parseSelector(validated.selector); + const properties = (validated.properties as ElementProperty[]) ?? ["visible", "enabled"]; const result = await client.element(selector, properties); return { content: [ @@ -1145,10 +1256,10 @@ export class BAPMCPServer { } case "activate_page": { - const pageId = args.pageId as string; - await client.activatePage(pageId); + const validated = validateArgs("activate_page", ToolArgSchemas.activatePage, args); + await client.activatePage(validated.pageId); return { - content: [{ type: "text", text: `Activated page: ${pageId}` }], + content: [{ type: "text", text: `Activated page: ${validated.pageId}` }], }; } @@ -1288,6 +1399,8 @@ export class BAPMCPServer { // Fusion options incremental: args.incremental as boolean | undefined, responseTier: args.responseTier as "full" | "interactive" | "minimal" | undefined, + // WebMCP discovery + includeWebMCPTools: args.includeWebMCPTools as boolean | undefined, }); const content: Array<{ type: "text" | "image"; text?: string; data?: string; mimeType?: string }> = []; @@ -1355,6 +1468,17 @@ export class BAPMCPServer { } } + // WebMCP tools (if discovered) + if (result.webmcpTools && result.webmcpTools.length > 0) { + const toolList = result.webmcpTools + .map((t: WebMCPTool) => `- ${t.name} (${t.source})${t.description ? `: ${t.description}` : ""}`) + .join("\n"); + content.push({ + type: "text", + text: `\nWebMCP Tools (${result.webmcpTools.length}):\n${toolList}`, + }); + } + // Screenshot if (result.screenshot) { const annotatedNote = result.screenshot.annotated ? " (annotated)" : ""; @@ -1373,11 +1497,12 @@ export class BAPMCPServer { } case "extract": { + const validated = validateArgs("extract", ToolArgSchemas.extract, args); const result = await client.extract({ - instruction: args.instruction as string, - schema: args.schema as ExtractionSchema, - mode: args.mode as "single" | "list" | "table" | undefined, - selector: args.selector ? parseSelector(args.selector as string) : undefined, + instruction: validated.instruction, + schema: validated.schema as ExtractionSchema, + mode: validated.mode, + selector: validated.selector ? parseSelector(validated.selector) : undefined, }); if (result.success) { @@ -1402,6 +1527,42 @@ export class BAPMCPServer { } } + case "discover_tools": { + const result = await client.discoverTools( + undefined, + { + maxTools: args.maxTools as number | undefined, + includeInputSchemas: args.includeInputSchemas as boolean | undefined, + } + ); + + if (result.tools.length === 0) { + return { + content: [{ + type: "text", + text: "No WebMCP tools found on this page. WebMCP tools are exposed by cooperative websites via HTML attributes or the navigator.modelContext API.", + }], + }; + } + + const toolList = result.tools + .map((t: WebMCPTool) => { + const parts = [`- ${t.name} (${t.source})`]; + if (t.description) parts.push(` ${t.description}`); + if (t.inputSchema) parts.push(` Schema: ${JSON.stringify(t.inputSchema)}`); + if (t.formSelector) parts.push(` Form: ${t.formSelector}`); + return parts.join("\n"); + }) + .join("\n"); + + return { + content: [{ + type: "text", + text: `WebMCP Tools (${result.tools.length}/${result.totalDiscovered})${result.apiVersion ? ` [API v${result.apiVersion}]` : ""}:\n${toolList}`, + }], + }; + } + default: return { content: [{ type: "text", text: `Unknown tool: ${name}` }], diff --git a/packages/protocol/LICENSE b/packages/protocol/LICENSE new file mode 100644 index 0000000..f931480 --- /dev/null +++ b/packages/protocol/LICENSE @@ -0,0 +1,14 @@ +Browser Agent Protocol package license + +Copyright 2024-2026 Browser Agent Protocol contributors + +This package is licensed under the Apache License, Version 2.0. +You may obtain a copy of the License at: + +https://www.apache.org/licenses/LICENSE-2.0 + +The full Browser Agent Protocol repository licensing details, including +project-level notices for documentation and bundled third-party assets, are +available at: + +https://github.com/browseragentprotocol/bap/blob/main/LICENSE diff --git a/packages/protocol/package.json b/packages/protocol/package.json index 310d256..160e7f3 100644 --- a/packages/protocol/package.json +++ b/packages/protocol/package.json @@ -35,13 +35,19 @@ }, "files": [ "dist", - "README.md" + "README.md", + "CHANGELOG.md", + "LICENSE" ], "repository": { "type": "git", "url": "https://github.com/browseragentprotocol/bap.git", "directory": "packages/protocol" }, + "homepage": "https://github.com/browseragentprotocol/bap/tree/main/packages/protocol", + "bugs": { + "url": "https://github.com/browseragentprotocol/bap/issues" + }, "keywords": [ "bap", "browser-agent-protocol", diff --git a/packages/protocol/src/__tests__/agent-webmcp.test.ts b/packages/protocol/src/__tests__/agent-webmcp.test.ts new file mode 100644 index 0000000..7b56b7c --- /dev/null +++ b/packages/protocol/src/__tests__/agent-webmcp.test.ts @@ -0,0 +1,52 @@ +import { describe, it, expect } from "vitest"; +import { + AgentObserveParamsSchema, + AgentObserveResultSchema, +} from "../types/agent.js"; + +describe("AgentObserveParams - WebMCP extension", () => { + it("accepts includeWebMCPTools param", () => { + const params = AgentObserveParamsSchema.parse({ + includeWebMCPTools: true, + }); + expect(params.includeWebMCPTools).toBe(true); + }); + + it("defaults includeWebMCPTools to undefined", () => { + const params = AgentObserveParamsSchema.parse({}); + expect(params.includeWebMCPTools).toBeUndefined(); + }); + + it("accepts false for includeWebMCPTools", () => { + const params = AgentObserveParamsSchema.parse({ + includeWebMCPTools: false, + }); + expect(params.includeWebMCPTools).toBe(false); + }); +}); + +describe("AgentObserveResult - WebMCP extension", () => { + it("accepts result with webmcpTools", () => { + const result = AgentObserveResultSchema.parse({ + webmcpTools: [ + { name: "search", source: "webmcp-declarative" }, + { name: "checkout", description: "Complete purchase", source: "webmcp-imperative" }, + ], + }); + expect(result.webmcpTools).toHaveLength(2); + expect(result.webmcpTools![0]!.name).toBe("search"); + expect(result.webmcpTools![1]!.source).toBe("webmcp-imperative"); + }); + + it("accepts result without webmcpTools", () => { + const result = AgentObserveResultSchema.parse({}); + expect(result.webmcpTools).toBeUndefined(); + }); + + it("accepts result with empty webmcpTools", () => { + const result = AgentObserveResultSchema.parse({ + webmcpTools: [], + }); + expect(result.webmcpTools).toEqual([]); + }); +}); diff --git a/packages/protocol/src/__tests__/authorization.test.ts b/packages/protocol/src/__tests__/authorization.test.ts index 870578e..eed2aea 100644 --- a/packages/protocol/src/__tests__/authorization.test.ts +++ b/packages/protocol/src/__tests__/authorization.test.ts @@ -29,6 +29,16 @@ describe("hasScope()", () => { expect(hasScope([], "initialize")).toBe(true); expect(hasScope([], "shutdown")).toBe(true); }); + + it("covers context, frame, streaming, approval, agent, and discovery methods", () => { + expect(hasScope(["context:create"], "context/create")).toBe(true); + expect(hasScope(["page:*"], "frame/list")).toBe(true); + expect(hasScope(["observe:*"], "stream/cancel")).toBe(true); + expect(hasScope(["action:*"], "approval/respond")).toBe(true); + expect(hasScope(["action:*"], "agent/act")).toBe(true); + expect(hasScope(["observe:*"], "agent/observe")).toBe(true); + expect(hasScope(["observe:*"], "discovery/discover")).toBe(true); + }); }); describe("wildcard matching", () => { @@ -290,6 +300,17 @@ describe("ScopeProfiles", () => { }); }); +describe("MethodScopes", () => { + it("includes extended server methods used outside the core page/action set", () => { + expect(MethodScopes["context/create"]).toEqual(["context:create", "context:*", "*"]); + expect(MethodScopes["frame/list"]).toEqual(["page:read", "page:*", "*"]); + expect(MethodScopes["stream/cancel"]).toEqual(["observe:*", "*"]); + expect(MethodScopes["approval/respond"]).toEqual(["action:*", "*"]); + expect(MethodScopes["agent/act"]).toEqual(["action:*", "*"]); + expect(MethodScopes["discovery/discover"]).toEqual(["observe:*", "*"]); + }); +}); + describe("MethodScopes", () => { it("defines scopes for action methods", () => { expect(MethodScopes["action/click"]).toBeDefined(); diff --git a/packages/protocol/src/__tests__/discovery.test.ts b/packages/protocol/src/__tests__/discovery.test.ts new file mode 100644 index 0000000..99f7abf --- /dev/null +++ b/packages/protocol/src/__tests__/discovery.test.ts @@ -0,0 +1,156 @@ +import { describe, it, expect } from "vitest"; +import { + WebMCPToolSourceSchema, + WebMCPToolSchema, + DiscoveryDiscoverParamsSchema, + DiscoveryDiscoverResultSchema, + DiscoveryDiscoverOptionsSchema, +} from "../types/discovery.js"; + +describe("WebMCPToolSourceSchema", () => { + it("accepts valid source values", () => { + expect(WebMCPToolSourceSchema.parse("webmcp-declarative")).toBe("webmcp-declarative"); + expect(WebMCPToolSourceSchema.parse("webmcp-imperative")).toBe("webmcp-imperative"); + }); + + it("rejects invalid source values", () => { + expect(WebMCPToolSourceSchema.safeParse("unknown").success).toBe(false); + expect(WebMCPToolSourceSchema.safeParse("").success).toBe(false); + expect(WebMCPToolSourceSchema.safeParse(42).success).toBe(false); + }); +}); + +describe("WebMCPToolSchema", () => { + it("accepts a minimal declarative tool", () => { + const tool = WebMCPToolSchema.parse({ + name: "search", + source: "webmcp-declarative", + }); + expect(tool.name).toBe("search"); + expect(tool.source).toBe("webmcp-declarative"); + expect(tool.description).toBeUndefined(); + expect(tool.inputSchema).toBeUndefined(); + expect(tool.formSelector).toBeUndefined(); + }); + + it("accepts a fully-specified declarative tool", () => { + const tool = WebMCPToolSchema.parse({ + name: "search-products", + description: "Search the product catalog", + inputSchema: { + type: "object", + properties: { + query: { type: "string" }, + }, + }, + source: "webmcp-declarative", + formSelector: "form[toolname=\"search-products\"]", + }); + expect(tool.name).toBe("search-products"); + expect(tool.description).toBe("Search the product catalog"); + expect(tool.inputSchema).toEqual({ + type: "object", + properties: { query: { type: "string" } }, + }); + expect(tool.formSelector).toBe("form[toolname=\"search-products\"]"); + }); + + it("accepts an imperative tool", () => { + const tool = WebMCPToolSchema.parse({ + name: "add-to-cart", + description: "Add item to cart", + source: "webmcp-imperative", + }); + expect(tool.source).toBe("webmcp-imperative"); + expect(tool.formSelector).toBeUndefined(); + }); + + it("rejects tool without name", () => { + expect(WebMCPToolSchema.safeParse({ + source: "webmcp-declarative", + }).success).toBe(false); + }); + + it("rejects tool without source", () => { + expect(WebMCPToolSchema.safeParse({ + name: "test", + }).success).toBe(false); + }); +}); + +describe("DiscoveryDiscoverOptionsSchema", () => { + it("accepts empty options", () => { + const opts = DiscoveryDiscoverOptionsSchema.parse({}); + expect(opts.maxTools).toBeUndefined(); + expect(opts.includeInputSchemas).toBeUndefined(); + }); + + it("accepts all options", () => { + const opts = DiscoveryDiscoverOptionsSchema.parse({ + maxTools: 25, + includeInputSchemas: false, + }); + expect(opts.maxTools).toBe(25); + expect(opts.includeInputSchemas).toBe(false); + }); +}); + +describe("DiscoveryDiscoverParamsSchema", () => { + it("accepts empty params", () => { + const params = DiscoveryDiscoverParamsSchema.parse({}); + expect(params.pageId).toBeUndefined(); + expect(params.options).toBeUndefined(); + }); + + it("accepts params with pageId", () => { + const params = DiscoveryDiscoverParamsSchema.parse({ + pageId: "page-123", + }); + expect(params.pageId).toBe("page-123"); + }); + + it("accepts params with options", () => { + const params = DiscoveryDiscoverParamsSchema.parse({ + options: { maxTools: 10 }, + }); + expect(params.options?.maxTools).toBe(10); + }); +}); + +describe("DiscoveryDiscoverResultSchema", () => { + it("accepts empty result", () => { + const result = DiscoveryDiscoverResultSchema.parse({ + tools: [], + totalDiscovered: 0, + }); + expect(result.tools).toEqual([]); + expect(result.totalDiscovered).toBe(0); + expect(result.apiVersion).toBeUndefined(); + }); + + it("accepts result with tools", () => { + const result = DiscoveryDiscoverResultSchema.parse({ + tools: [ + { name: "search", source: "webmcp-declarative", formSelector: "#search-form" }, + { name: "add-to-cart", description: "Add item", source: "webmcp-imperative" }, + ], + totalDiscovered: 2, + apiVersion: "1.0", + }); + expect(result.tools).toHaveLength(2); + expect(result.totalDiscovered).toBe(2); + expect(result.apiVersion).toBe("1.0"); + }); + + it("rejects result without tools array", () => { + expect(DiscoveryDiscoverResultSchema.safeParse({ + totalDiscovered: 0, + }).success).toBe(false); + }); + + it("rejects result without totalDiscovered", () => { + expect(DiscoveryDiscoverResultSchema.safeParse({ + tools: [], + }).success).toBe(false); + }); +}); diff --git a/packages/protocol/src/authorization.ts b/packages/protocol/src/authorization.ts index 886ea0b..8ddd034 100644 --- a/packages/protocol/src/authorization.ts +++ b/packages/protocol/src/authorization.ts @@ -34,6 +34,12 @@ export type BAPScope = | 'browser:launch' | 'browser:close' + // Context scopes + | 'context:*' + | 'context:create' + | 'context:read' + | 'context:destroy' + // Page scopes | 'page:*' | 'page:read' // list, activate (non-destructive) @@ -148,6 +154,11 @@ export const MethodScopes: Record = { 'browser/launch': ['browser:launch', 'browser:*', '*'], 'browser/close': ['browser:close', 'browser:*', '*'], + // Context + 'context/create': ['context:create', 'context:*', '*'], + 'context/list': ['context:read', 'context:*', '*'], + 'context/destroy': ['context:destroy', 'context:*', '*'], + // Page 'page/create': ['page:create', 'page:*', '*'], 'page/navigate': ['page:navigate', 'page:*', '*'], @@ -158,6 +169,11 @@ export const MethodScopes: Record = { 'page/list': ['page:read', 'page:*', '*'], 'page/activate': ['page:read', 'page:*', '*'], + // Frame + 'frame/list': ['page:read', 'page:*', '*'], + 'frame/switch': ['page:navigate', 'page:*', '*'], + 'frame/main': ['page:navigate', 'page:*', '*'], + // Actions 'action/click': ['action:click', 'action:*', '*'], 'action/dblclick': ['action:click', 'action:*', '*'], @@ -210,6 +226,20 @@ export const MethodScopes: Record = { // Events 'events/subscribe': ['observe:*', '*'], + + // Discovery + 'discovery/discover': ['observe:*', '*'], + + // Streaming + 'stream/cancel': ['observe:*', '*'], + + // Approval + 'approval/respond': ['action:*', '*'], + + // Agent + 'agent/act': ['action:*', '*'], + 'agent/observe': ['observe:*', '*'], + 'agent/extract': ['observe:*', '*'], }; /** diff --git a/packages/protocol/src/types/agent.ts b/packages/protocol/src/types/agent.ts index 97d7fb3..2185320 100644 --- a/packages/protocol/src/types/agent.ts +++ b/packages/protocol/src/types/agent.ts @@ -11,6 +11,7 @@ import { z } from "zod"; import { BAPSelectorSchema } from "./selectors.js"; import { AccessibilityNodeSchema } from "./common.js"; +import { WebMCPToolSchema } from "./discovery.js"; // ============================================================================= // agent/act - Multi-step action execution @@ -444,6 +445,9 @@ export const AgentObserveParamsSchema = z.object({ /** Return only changes since last observation (added, updated, removed) */ incremental: z.boolean().optional(), + + /** Include WebMCP tools discovered on the page (opt-in) */ + includeWebMCPTools: z.boolean().optional(), }); export type AgentObserveParams = z.infer; @@ -514,6 +518,9 @@ export const AgentObserveResultSchema = z.object({ /** Incremental changes since last observation (if incremental: true) */ changes: ObserveChangesSchema.optional(), + + /** WebMCP tools discovered on the page (if includeWebMCPTools: true) */ + webmcpTools: z.array(WebMCPToolSchema).optional(), }); export type AgentObserveResult = z.infer; diff --git a/packages/protocol/src/types/capabilities.ts b/packages/protocol/src/types/capabilities.ts index fb63df8..caf4f89 100644 --- a/packages/protocol/src/types/capabilities.ts +++ b/packages/protocol/src/types/capabilities.ts @@ -118,6 +118,8 @@ export const InitializeParamsSchema = z.object({ protocolVersion: z.string(), clientInfo: ClientInfoSchema, capabilities: ClientCapabilitiesSchema, + /** Optional session ID for cross-connection persistence (CLI mode) */ + sessionId: z.string().optional(), }); export type InitializeParams = z.infer; @@ -128,6 +130,8 @@ export const InitializeResultSchema = z.object({ protocolVersion: z.string(), serverInfo: ServerInfoSchema, capabilities: ServerCapabilitiesSchema, + /** Echoed session ID when server supports session persistence */ + sessionId: z.string().optional(), }); export type InitializeResult = z.infer; diff --git a/packages/protocol/src/types/discovery.ts b/packages/protocol/src/types/discovery.ts new file mode 100644 index 0000000..3c1a572 --- /dev/null +++ b/packages/protocol/src/types/discovery.ts @@ -0,0 +1,88 @@ +/** + * @fileoverview WebMCP discovery types for BAP + * @module @browseragentprotocol/protocol/types/discovery + * + * Types for discovering WebMCP tools exposed by web pages. + * WebMCP (W3C Community Group) lets websites expose structured tools + * to AI agents via browser-native APIs. BAP bridges these tools + * through the discovery/* protocol namespace. + */ + +import { z } from "zod"; + +// ============================================================================= +// WebMCP Tool Types +// ============================================================================= + +/** + * Source API surface that exposed the WebMCP tool + */ +export const WebMCPToolSourceSchema = z.enum([ + "webmcp-declarative", // HTML attributes (form[toolname], tooldescription, toolparamdescription) + "webmcp-imperative", // JavaScript API (navigator.modelContext) +]); +export type WebMCPToolSource = z.infer; + +/** + * A WebMCP tool discovered on a page + */ +export const WebMCPToolSchema = z.object({ + /** Tool name (from toolname attribute or imperative API) */ + name: z.string(), + + /** Human-readable description of what the tool does */ + description: z.string().optional(), + + /** JSON Schema for tool input parameters */ + inputSchema: z.record(z.unknown()).optional(), + + /** Which API surface exposed this tool */ + source: WebMCPToolSourceSchema, + + /** CSS selector for the associated form element (declarative tools only) */ + formSelector: z.string().optional(), +}); +export type WebMCPTool = z.infer; + +// ============================================================================= +// discovery/discover +// ============================================================================= + +/** + * Options for tool discovery + */ +export const DiscoveryDiscoverOptionsSchema = z.object({ + /** Maximum number of tools to return (default: 50) */ + maxTools: z.number().optional(), + + /** Include JSON schemas for tool input parameters (default: true) */ + includeInputSchemas: z.boolean().optional(), +}); +export type DiscoveryDiscoverOptions = z.infer; + +/** + * Parameters for discovery/discover + */ +export const DiscoveryDiscoverParamsSchema = z.object({ + /** Page to discover tools on (defaults to active page) */ + pageId: z.string().optional(), + + /** Discovery options */ + options: DiscoveryDiscoverOptionsSchema.optional(), +}); +export type DiscoveryDiscoverParams = z.infer; + +/** + * Result of discovery/discover + */ +export const DiscoveryDiscoverResultSchema = z.object({ + /** Discovered WebMCP tools */ + tools: z.array(WebMCPToolSchema), + + /** Total number of tools discovered (before maxTools limit) */ + totalDiscovered: z.number(), + + /** WebMCP API version detected on the page, if available */ + apiVersion: z.string().optional(), +}); +export type DiscoveryDiscoverResult = z.infer; diff --git a/packages/protocol/src/types/index.ts b/packages/protocol/src/types/index.ts index ca0b990..fe20428 100644 --- a/packages/protocol/src/types/index.ts +++ b/packages/protocol/src/types/index.ts @@ -505,3 +505,17 @@ export { type ExtractionSourceRef, type AgentExtractResult, } from "./agent.js"; + +// Discovery types (WebMCP tool discovery) +export { + WebMCPToolSourceSchema, + WebMCPToolSchema, + DiscoveryDiscoverOptionsSchema, + DiscoveryDiscoverParamsSchema, + DiscoveryDiscoverResultSchema, + type WebMCPToolSource, + type WebMCPTool, + type DiscoveryDiscoverOptions, + type DiscoveryDiscoverParams, + type DiscoveryDiscoverResult, +} from "./discovery.js"; diff --git a/packages/protocol/src/types/methods.ts b/packages/protocol/src/types/methods.ts index d0afd32..f625381 100644 --- a/packages/protocol/src/types/methods.ts +++ b/packages/protocol/src/types/methods.ts @@ -48,6 +48,7 @@ export const BrowserLaunchParamsSchema = z.object({ args: z.array(z.string()).optional(), proxy: ProxyConfigSchema.optional(), downloadsPath: z.string().optional(), + userDataDir: z.string().optional(), }); export type BrowserLaunchParams = z.infer; @@ -994,5 +995,8 @@ export const BAPMethodSchema = z.enum([ "agent/act", "agent/observe", "agent/extract", + + // Discovery methods (WebMCP tool discovery) + "discovery/discover", ]); export type BAPMethod = z.infer; diff --git a/packages/python-sdk/LICENSE b/packages/python-sdk/LICENSE new file mode 100644 index 0000000..f931480 --- /dev/null +++ b/packages/python-sdk/LICENSE @@ -0,0 +1,14 @@ +Browser Agent Protocol package license + +Copyright 2024-2026 Browser Agent Protocol contributors + +This package is licensed under the Apache License, Version 2.0. +You may obtain a copy of the License at: + +https://www.apache.org/licenses/LICENSE-2.0 + +The full Browser Agent Protocol repository licensing details, including +project-level notices for documentation and bundled third-party assets, are +available at: + +https://github.com/browseragentprotocol/bap/blob/main/LICENSE diff --git a/packages/python-sdk/package.json b/packages/python-sdk/package.json index 232de4b..06e500a 100644 --- a/packages/python-sdk/package.json +++ b/packages/python-sdk/package.json @@ -1,13 +1,13 @@ { "name": "@browseragentprotocol/python-client", - "version": "0.2.0", + "version": "0.3.0", "private": true, "description": "Python SDK for Browser Agent Protocol (BAP) - build scripts only", "scripts": { "build": "echo 'Python package - use pip install -e . for development'", - "typecheck": "python -m mypy src/browseragentprotocol --ignore-missing-imports || true", - "lint": "python -m ruff check src/browseragentprotocol || true", - "lint:fix": "python -m ruff check --fix src/browseragentprotocol || true", + "typecheck": "node ../../scripts/run-python.mjs -m mypy src/browseragentprotocol --ignore-missing-imports || true", + "lint": "node ../../scripts/run-python.mjs -m ruff check src/browseragentprotocol || true", + "lint:fix": "node ../../scripts/run-python.mjs -m ruff check --fix src/browseragentprotocol || true", "clean": "rm -rf dist build *.egg-info .mypy_cache .ruff_cache __pycache__ src/**/__pycache__" }, "turbo": { diff --git a/packages/python-sdk/pyproject.toml b/packages/python-sdk/pyproject.toml index 7f7fe45..e2f741e 100644 --- a/packages/python-sdk/pyproject.toml +++ b/packages/python-sdk/pyproject.toml @@ -1,9 +1,10 @@ [project] name = "browser-agent-protocol" -version = "0.2.0" +version = "0.3.0" description = "Python SDK for the Browser Agent Protocol (BAP) - control browsers with AI agents" readme = "README.md" license = { text = "Apache-2.0" } +license-files = ["LICENSE"] requires-python = ">=3.10" authors = [{ name = "BAP Contributors" }] keywords = [ diff --git a/packages/python-sdk/src/browseragentprotocol/__init__.py b/packages/python-sdk/src/browseragentprotocol/__init__.py index 65fc2d0..32e4b99 100644 --- a/packages/python-sdk/src/browseragentprotocol/__init__.py +++ b/packages/python-sdk/src/browseragentprotocol/__init__.py @@ -54,7 +54,7 @@ async def main(): ``` """ -__version__ = "0.2.0" +__version__ = "0.3.0" # Main client classes from browseragentprotocol.client import BAPClient diff --git a/packages/python-sdk/src/browseragentprotocol/types/selectors.py b/packages/python-sdk/src/browseragentprotocol/types/selectors.py index 47e7023..7f87879 100644 --- a/packages/python-sdk/src/browseragentprotocol/types/selectors.py +++ b/packages/python-sdk/src/browseragentprotocol/types/selectors.py @@ -231,6 +231,10 @@ def test_id(value: str) -> TestIdSelector: return TestIdSelector(type="testId", value=value) +# Prevent pytest from collecting the public selector helper as a test function. +test_id.__test__ = False + + def semantic(description: str) -> SemanticSelector: """Create a semantic selector (AI-resolved).""" return SemanticSelector(type="semantic", description=description) diff --git a/packages/python-sdk/tests/test_cli.py b/packages/python-sdk/tests/test_cli.py new file mode 100644 index 0000000..0d37e88 --- /dev/null +++ b/packages/python-sdk/tests/test_cli.py @@ -0,0 +1,28 @@ +from __future__ import annotations + +import sys + +import pytest + +from browseragentprotocol import __version__ +from browseragentprotocol.cli import main + + +def test_version_command_prints_current_package_version(monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]) -> None: + monkeypatch.setattr(sys, "argv", ["bap", "version"]) + + with pytest.raises(SystemExit) as exc_info: + main() + + assert exc_info.value.code == 0 + assert capsys.readouterr().out.strip() == f"browseragentprotocol {__version__}" + + +def test_no_args_prints_help(monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]) -> None: + monkeypatch.setattr(sys, "argv", ["bap"]) + + with pytest.raises(SystemExit) as exc_info: + main() + + assert exc_info.value.code == 0 + assert "Browser Agent Protocol (BAP) Python SDK CLI" in capsys.readouterr().out diff --git a/packages/python-sdk/tests/test_public_api.py b/packages/python-sdk/tests/test_public_api.py new file mode 100644 index 0000000..9505c14 --- /dev/null +++ b/packages/python-sdk/tests/test_public_api.py @@ -0,0 +1,38 @@ +from __future__ import annotations + +from browseragentprotocol import ( + BAPClient, + BAPClientSync, + __version__, + label, + ref, + role, + test_id, +) + + +def test_core_exports_are_available() -> None: + assert BAPClient.__name__ == "BAPClient" + assert BAPClientSync.__name__ == "BAPClientSync" + assert __version__ == "0.3.0" + + +def test_selector_factories_return_expected_shapes() -> None: + submit_button = role("button", "Submit") + email_field = label("Email address") + stable_ref = ref("@e1") + test_selector = test_id("login-submit") + + assert submit_button.model_dump() == { + "type": "role", + "role": "button", + "name": "Submit", + "exact": None, + } + assert email_field.model_dump() == { + "type": "label", + "value": "Email address", + "exact": None, + } + assert stable_ref.model_dump() == {"type": "ref", "ref": "@e1"} + assert test_selector.model_dump() == {"type": "testId", "value": "login-submit"} diff --git a/packages/server-playwright/LICENSE b/packages/server-playwright/LICENSE new file mode 100644 index 0000000..f931480 --- /dev/null +++ b/packages/server-playwright/LICENSE @@ -0,0 +1,14 @@ +Browser Agent Protocol package license + +Copyright 2024-2026 Browser Agent Protocol contributors + +This package is licensed under the Apache License, Version 2.0. +You may obtain a copy of the License at: + +https://www.apache.org/licenses/LICENSE-2.0 + +The full Browser Agent Protocol repository licensing details, including +project-level notices for documentation and bundled third-party assets, are +available at: + +https://github.com/browseragentprotocol/bap/blob/main/LICENSE diff --git a/packages/server-playwright/package.json b/packages/server-playwright/package.json index 2ea1d3b..018e1c8 100644 --- a/packages/server-playwright/package.json +++ b/packages/server-playwright/package.json @@ -24,13 +24,19 @@ }, "files": [ "dist", - "README.md" + "README.md", + "CHANGELOG.md", + "LICENSE" ], "repository": { "type": "git", "url": "https://github.com/browseragentprotocol/bap.git", "directory": "packages/server-playwright" }, + "homepage": "https://github.com/browseragentprotocol/bap/tree/main/packages/server-playwright", + "bugs": { + "url": "https://github.com/browseragentprotocol/bap/issues" + }, "keywords": [ "bap", "server", diff --git a/packages/server-playwright/src/__tests__/discovery.test.ts b/packages/server-playwright/src/__tests__/discovery.test.ts new file mode 100644 index 0000000..d91f296 --- /dev/null +++ b/packages/server-playwright/src/__tests__/discovery.test.ts @@ -0,0 +1,23 @@ +import { describe, it, expect } from "vitest"; +import { BAPPlaywrightServer } from "../server.js"; + +/** + * Structural tests for discovery/discover method support. + * Full integration tests require a running browser and are out of scope here. + * These tests verify that the server handles the method name correctly. + */ +describe("BAPPlaywrightServer - discovery support", () => { + it("server can be instantiated (discovery handler registered)", () => { + const server = new BAPPlaywrightServer(); + expect(server).toBeInstanceOf(BAPPlaywrightServer); + }); + + it("allows discovery/discover for observe-scoped clients", () => { + const server = new BAPPlaywrightServer(); + const state = { + scopes: ["observe:*"], + }; + + expect(() => (server as any).checkAuthorization(state, "discovery/discover")).not.toThrow(); + }); +}); diff --git a/packages/server-playwright/src/__tests__/session-persistence.test.ts b/packages/server-playwright/src/__tests__/session-persistence.test.ts new file mode 100644 index 0000000..d1d6e4a --- /dev/null +++ b/packages/server-playwright/src/__tests__/session-persistence.test.ts @@ -0,0 +1,209 @@ +import { EventEmitter } from "node:events"; +import type { Browser } from "playwright"; +import type { WebSocket } from "ws"; +import { describe, it, expect, vi } from "vitest"; +import { BAPPlaywrightServer } from "../server.js"; +import type { BAPServerOptions } from "../server.js"; + +/** + * Tests for server-side session persistence (dormant session store). + * + * These are structural/unit tests that verify the server's configuration + * and type-level support for session persistence. Full integration tests + * (with real WebSocket connections and browsers) require a running + * Playwright instance and are out of scope here. + */ +describe("BAPPlaywrightServer - session persistence", () => { + it("accepts dormantSessionTtl in session options", () => { + const server = new BAPPlaywrightServer({ + session: { + dormantSessionTtl: 120, + }, + }); + expect(server).toBeInstanceOf(BAPPlaywrightServer); + }); + + it("creates server with default dormantSessionTtl", () => { + const server = new BAPPlaywrightServer(); + expect(server).toBeInstanceOf(BAPPlaywrightServer); + }); + + it("accepts dormantSessionTtl alongside other session options", () => { + const options: BAPServerOptions = { + session: { + maxDuration: 7200, + idleTimeout: 300, + dormantSessionTtl: 60, + }, + }; + const server = new BAPPlaywrightServer(options); + expect(server).toBeInstanceOf(BAPPlaywrightServer); + }); + + it("accepts all options including dormantSessionTtl", () => { + const options: BAPServerOptions = { + port: 9999, + host: "0.0.0.0", + session: { + maxDuration: 3600, + idleTimeout: 600, + dormantSessionTtl: 300, + }, + limits: { + maxPagesPerClient: 5, + }, + }; + const server = new BAPPlaywrightServer(options); + expect(server).toBeInstanceOf(BAPPlaywrightServer); + }); + + it("routes restored page events to the reconnected client", () => { + const server = new BAPPlaywrightServer(); + const page = Object.assign(new EventEmitter(), { + url: () => "https://example.com", + }); + const browser = { + isConnected: () => true, + close: vi.fn(), + } as unknown as Browser; + const staleWs = { + readyState: 1, + send: vi.fn(), + close: vi.fn(), + } as unknown as WebSocket; + const restoredWs = { + readyState: 1, + send: vi.fn(), + close: vi.fn(), + } as unknown as WebSocket; + + const staleState = { + clientId: "stale", + initialized: true, + browser, + isPersistent: false, + context: null, + contexts: new Map(), + defaultContextId: null, + pages: new Map([["page-1", page]]), + pageToContext: new Map([["page-1", "ctx-1"]]), + activePage: "page-1", + eventSubscriptions: new Set(["page"]), + tracing: false, + scopes: [], + sessionStartTime: Date.now(), + lastActivityTime: Date.now(), + elementRegistries: new Map(), + frameContexts: new Map(), + activeStreams: new Map(), + pendingApprovals: new Map(), + sessionApprovals: new Set(), + sessionId: "cli-9222", + }; + + const restoredState = { + ...staleState, + clientId: "restored", + browser: null, + pages: new Map(), + pageToContext: new Map(), + activePage: null, + elementRegistries: new Map(), + frameContexts: new Map(), + activeStreams: new Map(), + pendingApprovals: new Map(), + sessionApprovals: new Set(), + }; + + (server as any).clients.set(staleWs, staleState); + (server as any).setupPageListeners(page, "page-1"); + (server as any).parkSession(staleState); + (server as any).clients.delete(staleWs); + + const dormant = (server as any).dormantSessions.get("cli-9222"); + expect((server as any).restoreSession(dormant, restoredState)).toBe(true); + (server as any).clients.set(restoredWs, restoredState); + + page.emit("load"); + + expect((restoredWs as any).send).toHaveBeenCalledOnce(); + expect((staleWs as any).send).not.toHaveBeenCalled(); + }); + + it("removes restored pages from the active session when a tab closes externally", () => { + const server = new BAPPlaywrightServer(); + const page = Object.assign(new EventEmitter(), { + url: () => "https://example.com", + }); + const browser = { + isConnected: () => true, + close: vi.fn(), + } as unknown as Browser; + const staleWs = { + readyState: 1, + send: vi.fn(), + close: vi.fn(), + } as unknown as WebSocket; + const restoredWs = { + readyState: 1, + send: vi.fn(), + close: vi.fn(), + } as unknown as WebSocket; + + const staleState = { + clientId: "stale", + initialized: true, + browser, + isPersistent: false, + context: null, + contexts: new Map(), + defaultContextId: null, + pages: new Map([["page-1", page]]), + pageToContext: new Map([["page-1", "ctx-1"]]), + activePage: "page-1", + eventSubscriptions: new Set(["page"]), + tracing: false, + scopes: [], + sessionStartTime: Date.now(), + lastActivityTime: Date.now(), + elementRegistries: new Map([["page-1", {} as never]]), + frameContexts: new Map([["page-1", {} as never]]), + activeStreams: new Map(), + pendingApprovals: new Map(), + sessionApprovals: new Set(), + sessionId: "cli-9222", + }; + + const restoredState = { + ...staleState, + clientId: "restored", + browser: null, + pages: new Map(), + pageToContext: new Map(), + activePage: null, + elementRegistries: new Map(), + frameContexts: new Map(), + activeStreams: new Map(), + pendingApprovals: new Map(), + sessionApprovals: new Set(), + }; + + (server as any).clients.set(staleWs, staleState); + (server as any).setupPageListeners(page, "page-1"); + (server as any).parkSession(staleState); + (server as any).clients.delete(staleWs); + + const dormant = (server as any).dormantSessions.get("cli-9222"); + expect((server as any).restoreSession(dormant, restoredState)).toBe(true); + (server as any).clients.set(restoredWs, restoredState); + + page.emit("close"); + + expect(restoredState.pages.has("page-1")).toBe(false); + expect(restoredState.pageToContext.has("page-1")).toBe(false); + expect(restoredState.elementRegistries.has("page-1")).toBe(false); + expect(restoredState.frameContexts.has("page-1")).toBe(false); + expect(restoredState.activePage).toBeNull(); + expect((restoredWs as any).send).toHaveBeenCalledOnce(); + }); +}); diff --git a/packages/server-playwright/src/server.ts b/packages/server-playwright/src/server.ts index e745f77..0776007 100644 --- a/packages/server-playwright/src/server.ts +++ b/packages/server-playwright/src/server.ts @@ -13,7 +13,8 @@ * See BAPScope type for available scopes. */ -import { randomUUID } from "crypto"; +import { randomUUID, timingSafeEqual } from "node:crypto"; +import fs from "node:fs"; import { EventEmitter } from "events"; import * as http from "http"; import * as path from "path"; @@ -111,6 +112,17 @@ import { // Approval types (Human-in-the-Loop) ApprovalRespondParams, ApprovalRespondResult, + // Discovery types (WebMCP) + DiscoveryDiscoverParams, + DiscoveryDiscoverResult, + WebMCPTool, + // Authorization + type BAPScope, + ScopeProfiles, + MethodScopes, + hasScope, + parseScopes, + createAuthorizationError, // Helpers createSuccessResponse, createErrorResponse, @@ -125,143 +137,6 @@ import { type PageElementRegistry, } from "@browseragentprotocol/protocol"; -// ============================================================================= -// Authorization Types (v0.2.0) - Inlined for build compatibility -// ============================================================================= - -/** - * BAP authorization scopes for fine-grained access control - */ -type BAPScope = - | '*' - | 'browser:*' | 'browser:launch' | 'browser:close' - | 'context:*' | 'context:create' | 'context:read' | 'context:destroy' - | 'page:*' | 'page:read' | 'page:create' | 'page:navigate' | 'page:close' - | 'action:*' | 'action:click' | 'action:type' | 'action:fill' | 'action:scroll' - | 'action:select' | 'action:upload' | 'action:drag' - | 'observe:*' | 'observe:screenshot' | 'observe:accessibility' | 'observe:dom' - | 'observe:element' | 'observe:content' | 'observe:pdf' - | 'storage:*' | 'storage:read' | 'storage:write' - | 'network:*' | 'network:intercept' - | 'emulate:*' | 'emulate:viewport' | 'emulate:geolocation' | 'emulate:offline' - | 'trace:*' | 'trace:start' | 'trace:stop'; - -/** Predefined scope profiles for common use cases */ -const ScopeProfiles = { - readonly: ['page:read', 'observe:*'] as BAPScope[], - standard: [ - 'browser:launch', 'browser:close', 'page:*', - 'action:*', - 'observe:*', 'emulate:viewport', - ] as BAPScope[], - full: ['browser:*', 'page:*', 'action:*', 'observe:*', 'emulate:*', 'trace:*'] as BAPScope[], - privileged: ['*'] as BAPScope[], -} as const; - -/** Method to required scopes mapping */ -const MethodScopes: Record = { - 'initialize': [], 'shutdown': [], 'notifications/initialized': [], - 'browser/launch': ['browser:launch', 'browser:*', '*'], - 'browser/close': ['browser:close', 'browser:*', '*'], - // Context methods (Multi-Context Support) - 'context/create': ['context:create', 'context:*', '*'], - 'context/list': ['context:read', 'context:*', '*'], - 'context/destroy': ['context:destroy', 'context:*', '*'], - // Page methods - 'page/create': ['page:create', 'page:*', '*'], - 'page/navigate': ['page:navigate', 'page:*', '*'], - 'page/reload': ['page:navigate', 'page:*', '*'], - 'page/goBack': ['page:navigate', 'page:*', '*'], - 'page/goForward': ['page:navigate', 'page:*', '*'], - 'page/close': ['page:close', 'page:*', '*'], - 'page/list': ['page:read', 'page:*', '*'], - 'page/activate': ['page:read', 'page:*', '*'], - // Frame methods (Frame & Shadow DOM Support) - 'frame/list': ['page:read', 'page:*', '*'], - 'frame/switch': ['page:navigate', 'page:*', '*'], - 'frame/main': ['page:navigate', 'page:*', '*'], - 'action/click': ['action:click', 'action:*', '*'], - 'action/dblclick': ['action:click', 'action:*', '*'], - 'action/type': ['action:type', 'action:*', '*'], - 'action/fill': ['action:fill', 'action:*', '*'], - 'action/clear': ['action:fill', 'action:*', '*'], - 'action/press': ['action:type', 'action:*', '*'], - 'action/hover': ['action:click', 'action:*', '*'], - 'action/scroll': ['action:scroll', 'action:*', '*'], - 'action/select': ['action:select', 'action:*', '*'], - 'action/check': ['action:click', 'action:*', '*'], - 'action/uncheck': ['action:click', 'action:*', '*'], - 'action/upload': ['action:upload', 'action:*', '*'], - 'action/drag': ['action:drag', 'action:*', '*'], - 'observe/screenshot': ['observe:screenshot', 'observe:*', '*'], - 'observe/accessibility': ['observe:accessibility', 'observe:*', '*'], - 'observe/dom': ['observe:dom', 'observe:*', '*'], - 'observe/element': ['observe:element', 'observe:*', '*'], - 'observe/pdf': ['observe:pdf', 'observe:*', '*'], - 'observe/content': ['observe:content', 'observe:*', '*'], - 'observe/ariaSnapshot': ['observe:accessibility', 'observe:*', '*'], - 'storage/getState': ['storage:read', 'storage:*', '*'], - 'storage/setState': ['storage:write', 'storage:*', '*'], - 'storage/getCookies': ['storage:read', 'storage:*', '*'], - 'storage/setCookies': ['storage:write', 'storage:*', '*'], - 'storage/clearCookies': ['storage:write', 'storage:*', '*'], - 'network/intercept': ['network:intercept', 'network:*', '*'], - 'network/fulfill': ['network:intercept', 'network:*', '*'], - 'network/abort': ['network:intercept', 'network:*', '*'], - 'network/continue': ['network:intercept', 'network:*', '*'], - 'emulate/setViewport': ['emulate:viewport', 'emulate:*', '*'], - 'emulate/setUserAgent': ['emulate:viewport', 'emulate:*', '*'], - 'emulate/setGeolocation': ['emulate:geolocation', 'emulate:*', '*'], - 'emulate/setOffline': ['emulate:offline', 'emulate:*', '*'], - 'dialog/handle': ['action:click', 'action:*', '*'], - 'trace/start': ['trace:start', 'trace:*', '*'], - 'trace/stop': ['trace:stop', 'trace:*', '*'], - 'events/subscribe': ['observe:*', '*'], - // Stream methods (Streaming Responses) - 'stream/cancel': ['observe:*', '*'], - // Approval methods (Human-in-the-Loop) - 'approval/respond': ['action:*', '*'], - // Agent methods (composite actions, observations, and data extraction) - 'agent/act': ['action:*', '*'], - 'agent/observe': ['observe:*', '*'], - 'agent/extract': ['observe:*', '*'], -}; - -/** Check if client has permission for a method */ -function hasScope(grantedScopes: BAPScope[], method: string): boolean { - if (grantedScopes.includes('*')) return true; - const requiredScopes = MethodScopes[method]; - if (!requiredScopes) return grantedScopes.includes('*'); - if (requiredScopes.length === 0) return true; - return requiredScopes.some(required => { - if (grantedScopes.includes(required)) return true; - const [category] = required.split(':'); - return grantedScopes.includes(`${category}:*` as BAPScope); - }); -} - -/** Parse scopes from string or array */ -function parseScopes(input: string | string[] | undefined): BAPScope[] { - if (!input) return []; - if (Array.isArray(input)) return input as BAPScope[]; - return input.split(',').map(s => s.trim()) as BAPScope[]; -} - -/** Authorization error code */ -const AuthorizationErrorCode = -32023; - -/** Create an authorization error */ -function createAuthorizationError(method: string, requiredScopes: BAPScope[]) { - return { - code: AuthorizationErrorCode, - message: `Insufficient permissions for '${method}'. Required scopes: ${requiredScopes.join(' or ')}`, - data: { - retryable: false, - details: { method, requiredScopes }, - }, - }; -} - /** Action confirmation event for agent feedback */ interface ActionConfirmationEvent { pageId: string; @@ -369,6 +244,13 @@ export interface BAPSessionOptions { * Sessions are terminated after this period of inactivity */ idleTimeout?: number; + /** + * TTL for dormant sessions in seconds (default: 300 = 5 minutes) + * When a client with a sessionId disconnects, its browser/pages are parked + * in a dormant store. If no client reconnects with the same sessionId within + * this TTL, the dormant session is destroyed. + */ + dormantSessionTtl?: number; } /** @@ -524,6 +406,7 @@ const DEFAULT_OPTIONS: Required; + defaultContextId: string | null; + pages: Map; + pageToContext: Map; + activePage: string | null; + elementRegistries: Map; + frameContexts: Map; + sessionApprovals: Set; + ttlHandle: NodeJS.Timeout; + parkedAt: number; } +type PageOwner = { + ws: WebSocket | null; + state: ClientState | DormantSession; +}; + // ============================================================================= // BAP Server // ============================================================================= @@ -660,6 +572,7 @@ export class BAPPlaywrightServer extends EventEmitter { private httpServer: http.Server | null = null; private wss: WebSocketServer | null = null; private clients = new Map(); + private dormantSessions = new Map(); constructor(options: BAPServerOptions = {}) { super(); @@ -696,7 +609,6 @@ export class BAPPlaywrightServer extends EventEmitter { * SECURITY: Timing-safe token comparison to prevent timing attacks */ private secureTokenCompare(provided: string, expected: string): boolean { - const { timingSafeEqual } = require('crypto'); if (provided.length !== expected.length) { // Still do a comparison to maintain constant time timingSafeEqual(Buffer.from(provided), Buffer.from(provided)); @@ -913,6 +825,21 @@ export class BAPPlaywrightServer extends EventEmitter { } this.clients.clear(); + // Close all dormant sessions + for (const [sessionId, dormant] of this.dormantSessions) { + clearTimeout(dormant.ttlHandle); + try { + if (dormant.isPersistent) { + await dormant.context?.close(); + } else { + await dormant.browser?.close(); + } + } catch { + // Browser may already be closed + } + this.dormantSessions.delete(sessionId); + } + // Close WebSocket server if (this.wss) { this.wss.close(); @@ -973,6 +900,7 @@ export class BAPPlaywrightServer extends EventEmitter { clientId: randomUUID().slice(0, 8), initialized: false, browser: null, + isPersistent: false, context: null, // Multi-context support contexts: new Map(), @@ -1029,8 +957,18 @@ export class BAPPlaywrightServer extends EventEmitter { }); ws.on("close", async () => { - this.log("Client disconnected", { clientId: state.clientId }); - await this.cleanupClient(state); + this.log("Client disconnected", { clientId: state.clientId, sessionId: state.sessionId ?? "none" }); + + // Session persistence: park instead of destroy when sessionId is set + const isAlive = state.isPersistent + ? this.isContextAlive(state.context) + : Boolean(state.browser?.isConnected()); + if (state.sessionId && isAlive) { + this.parkSession(state); + } else { + await this.cleanupClient(state); + } + this.clients.delete(ws); }); @@ -1228,6 +1166,10 @@ export class BAPPlaywrightServer extends EventEmitter { case "approval/respond": return this.handleApprovalRespond(state, params as unknown as ApprovalRespondParams); + // Discovery (WebMCP tool discovery) + case "discovery/discover": + return this.handleDiscoveryDiscover(state, params as unknown as DiscoveryDiscoverParams); + // Agent (composite actions, observations, and data extraction) case "agent/act": return this.handleAgentAct(ws, state, params as unknown as AgentActParams); @@ -1247,12 +1189,50 @@ export class BAPPlaywrightServer extends EventEmitter { private async handleInitialize( state: ClientState, - _params: InitializeParams + params: InitializeParams ): Promise { if (state.initialized) { throw new BAPServerError(ErrorCodes.AlreadyInitialized, "Already initialized"); } + const sessionId = params.sessionId; + + // Session persistence: check for conflicts and restore dormant sessions + if (sessionId) { + // If another client still holds this sessionId (its close event hasn't + // been processed yet — race between WebSocket close and new connect), + // force-park its state so we can restore it immediately. + for (const [existingWs, existingState] of this.clients) { + if (existingState !== state && existingState.sessionId === sessionId && existingState.initialized) { + this.log("Force-parking stale session from previous connection", { sessionId }); + const isAlive = existingState.isPersistent + ? this.isContextAlive(existingState.context) + : Boolean(existingState.browser?.isConnected()); + if (isAlive) { + this.parkSession(existingState); + } + if (existingWs.readyState === WebSocket.OPEN || existingWs.readyState === WebSocket.CONNECTING) { + existingWs.close(4001, "Session replaced by newer connection"); + } + // Remove stale client — its close handler will be a no-op + // (browser/pages already nullified by parkSession) + this.clients.delete(existingWs); + break; + } + } + + state.sessionId = sessionId; + + // Try to restore a dormant session + const dormant = this.dormantSessions.get(sessionId); + if (dormant) { + const restored = this.restoreSession(dormant, state); + if (restored) { + this.log("Restored dormant session", { sessionId, clientId: state.clientId }); + } + } + } + state.initialized = true; const capabilities: ServerCapabilities = { @@ -1285,6 +1265,7 @@ export class BAPPlaywrightServer extends EventEmitter { version: "0.2.0", }, capabilities, + sessionId, }; } @@ -1315,7 +1296,6 @@ export class BAPPlaywrightServer extends EventEmitter { // SECURITY FIX (CRIT-4): Validate downloads path to prevent path traversal attacks let validatedDownloadsPath: string | undefined = undefined; if (params.downloadsPath) { - const fs = require('fs'); const allowedDownloadDirs = process.env.BAP_ALLOWED_DOWNLOAD_DIRS?.split(',').filter(Boolean) || []; // Resolve the path first @@ -1368,25 +1348,58 @@ export class BAPPlaywrightServer extends EventEmitter { } const channel = params.channel ?? this.options.defaultChannel; + const headless = params.headless ?? this.options.headless; - state.browser = await launcher.launch({ - headless: params.headless ?? this.options.headless, - channel, - args: sanitizedArgs.length > 0 ? sanitizedArgs : undefined, - proxy: params.proxy, - downloadsPath: validatedDownloadsPath, - }); - - // Create the default context - // Force deviceScaleFactor: 1 for consistent screenshot sizes across platforms - // (retina Macs default to 2x, which doubles pixel count and inflates payloads) - const defaultContext = await state.browser.newContext({ - deviceScaleFactor: 1, - }); - const version = state.browser.version(); // Use crypto.randomUUID for unique IDs const contextId = `ctx-${randomUUID().slice(0, 8)}`; + let defaultContext: BrowserContext; + let version: string; + + if (params.userDataDir) { + // Persistent context mode: launchPersistentContext returns a BrowserContext directly + // (no Browser object). Cannot create additional contexts. + try { + defaultContext = await launcher.launchPersistentContext(params.userDataDir, { + headless, + channel, + args: sanitizedArgs.length > 0 ? sanitizedArgs : undefined, + // Force deviceScaleFactor: 1 for consistent screenshot sizes across platforms + deviceScaleFactor: 1, + }); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + if (message.includes("SingletonLock") || message.includes("lock") || message.includes("already running")) { + throw new BAPServerError( + ErrorCodes.ActionFailed, + "Chrome is already using that profile. Close Chrome, choose a dedicated `--profile `, or use `--no-profile` for a fresh browser." + ); + } + throw error; + } + + state.browser = null; + state.isPersistent = true; + version = ""; + } else { + // Normal mode: launch browser, create fresh context + state.browser = await launcher.launch({ + headless, + channel, + args: sanitizedArgs.length > 0 ? sanitizedArgs : undefined, + proxy: params.proxy, + downloadsPath: validatedDownloadsPath, + }); + + // Force deviceScaleFactor: 1 for consistent screenshot sizes across platforms + // (retina Macs default to 2x, which doubles pixel count and inflates payloads) + defaultContext = await state.browser.newContext({ + deviceScaleFactor: 1, + }); + version = state.browser.version(); + state.isPersistent = false; + } + // Set up default context (backwards compatible) state.context = defaultContext; state.defaultContextId = contextId; @@ -1414,31 +1427,36 @@ export class BAPPlaywrightServer extends EventEmitter { } private async handleBrowserClose(state: ClientState): Promise { - if (state.browser) { + if (state.isPersistent && state.context) { + // Persistent mode: close the context (which closes the browser process) + await state.context.close(); + } else if (state.browser) { await state.browser.close(); - state.browser = null; - state.context = null; - // Clean up multi-context state - state.contexts.clear(); - state.defaultContextId = null; - state.pages.clear(); - state.pageToContext.clear(); - state.activePage = null; - state.elementRegistries.clear(); - state.frameContexts.clear(); - // Clean up streams - for (const stream of state.activeStreams.values()) { - stream.cancelled = true; - } - state.activeStreams.clear(); - // Clean up pending approvals - for (const pending of state.pendingApprovals.values()) { - clearTimeout(pending.timeoutHandle); - pending.reject(new BAPServerError(ErrorCodes.TargetClosed, "Browser closed")); - } - state.pendingApprovals.clear(); - state.sessionApprovals.clear(); } + + state.browser = null; + state.isPersistent = false; + state.context = null; + // Clean up multi-context state + state.contexts.clear(); + state.defaultContextId = null; + state.pages.clear(); + state.pageToContext.clear(); + state.activePage = null; + state.elementRegistries.clear(); + state.frameContexts.clear(); + // Clean up streams + for (const stream of state.activeStreams.values()) { + stream.cancelled = true; + } + state.activeStreams.clear(); + // Clean up pending approvals + for (const pending of state.pendingApprovals.values()) { + clearTimeout(pending.timeoutHandle); + pending.reject(new BAPServerError(ErrorCodes.TargetClosed, "Browser closed")); + } + state.pendingApprovals.clear(); + state.sessionApprovals.clear(); } // =========================================================================== @@ -1446,7 +1464,7 @@ export class BAPPlaywrightServer extends EventEmitter { // =========================================================================== private async handlePageCreate( - ws: WebSocket, + _ws: WebSocket, state: ClientState, params: PageCreateOptions & { contextId?: string } ): Promise { @@ -1490,7 +1508,7 @@ export class BAPPlaywrightServer extends EventEmitter { state.pageToContext.set(pageId, contextId); // Set up event listeners - this.setupPageListeners(ws, state, page, pageId); + this.setupPageListeners(page, pageId); // Apply options if (params.viewport) { @@ -2429,6 +2447,146 @@ export class BAPPlaywrightServer extends EventEmitter { } } + // =========================================================================== + // Discovery Handlers (WebMCP Tool Discovery) + // =========================================================================== + + /** + * Discover WebMCP tools exposed by the current page via progressive feature detection. + * + * 1. Declarative: `` with tooldescription, toolparamdescription attrs + * 2. Imperative: `navigator.modelContext` API (when available) + * + * Returns empty array on pages without WebMCP — always graceful. + */ + private async discoverWebMCPTools( + page: PlaywrightPage, + options?: { maxTools?: number; includeInputSchemas?: boolean } + ): Promise<{ tools: WebMCPTool[]; totalDiscovered: number; apiVersion?: string }> { + const maxTools = options?.maxTools ?? 50; + const includeInputSchemas = options?.includeInputSchemas !== false; + + // This function runs in browser context where DOM types exist + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const browserFn = (opts: { maxTools: number; includeInputSchemas: boolean }): any => { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const doc = (globalThis as any).document; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const nav = (globalThis as any).navigator; + + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const tools: any[] = []; + let apiVersion: string | undefined; + + // 1. Declarative: forms with toolname attribute + try { + const forms = doc.querySelectorAll("form[toolname]"); + for (const form of forms) { + if (tools.length >= opts.maxTools) break; + + const name = form.getAttribute("toolname"); + if (!name) continue; + + const description = form.getAttribute("tooldescription") || undefined; + + // Build input schema from form inputs + let inputSchema: Record | undefined; + if (opts.includeInputSchemas) { + const properties: Record = {}; + const required: string[] = []; + const inputs = form.querySelectorAll("input[name], textarea[name], select[name]"); + + for (const input of inputs) { + const inputName = input.getAttribute("name"); + if (!inputName) continue; + + const paramDesc = input.getAttribute("toolparamdescription") || undefined; + const inputType = input.getAttribute("type") || "text"; + const schemaType = inputType === "number" ? "number" : inputType === "checkbox" ? "boolean" : "string"; + + properties[inputName] = { type: schemaType, ...(paramDesc ? { description: paramDesc } : {}) }; + + if (input.hasAttribute("required")) { + required.push(inputName); + } + } + + if (Object.keys(properties).length > 0) { + inputSchema = { + type: "object", + properties, + ...(required.length > 0 ? { required } : {}), + }; + } + } + + // Build a CSS selector for this form + const id = form.getAttribute("id"); + const formSelector = id ? `#${id}` : `form[toolname="${name}"]`; + + tools.push({ name, description, inputSchema, source: "webmcp-declarative", formSelector }); + } + } catch { + // Ignore declarative detection errors + } + + // 2. Imperative: navigator.modelContext API + try { + if (typeof nav?.modelContext !== "undefined" && nav.modelContext !== null) { + const mc = nav.modelContext; + + // Detect API version if available + if (typeof mc.version === "string") { + apiVersion = mc.version; + } + + // Try to get tools via the imperative API + if (typeof mc.getTools === "function") { + const imperativeTools = mc.getTools(); + + if (Array.isArray(imperativeTools)) { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + for (const tool of imperativeTools as any[]) { + if (tools.length >= opts.maxTools) break; + if (tool && typeof tool.name === "string") { + tools.push({ + name: tool.name, + description: typeof tool.description === "string" ? tool.description : undefined, + inputSchema: opts.includeInputSchemas && tool.inputSchema ? tool.inputSchema : undefined, + source: "webmcp-imperative", + }); + } + } + } + } + } + } catch { + // Ignore imperative detection errors + } + + return { tools, totalDiscovered: tools.length, apiVersion }; + }; + + try { + const result = await page.evaluate(browserFn, { maxTools, includeInputSchemas }); + return result; + } catch { + // Page may have navigated, be in an error state, etc. — always graceful + return { tools: [], totalDiscovered: 0 }; + } + } + + /** + * Handle discovery/discover — discover WebMCP tools on the current page + */ + private async handleDiscoveryDiscover( + state: ClientState, + params: DiscoveryDiscoverParams + ): Promise { + const page = this.getPage(state, params.pageId); + return this.discoverWebMCPTools(page, params.options); + } + // =========================================================================== // Agent Handlers (Composite Actions, Observations, and Data Extraction) // =========================================================================== @@ -2955,6 +3113,14 @@ export class BAPPlaywrightServer extends EventEmitter { } } + // WebMCP tool discovery (opt-in) + if (params.includeWebMCPTools) { + const discovery = await this.discoverWebMCPTools(page); + if (discovery.tools.length > 0) { + result.webmcpTools = discovery.tools; + } + } + return result; } @@ -4071,14 +4237,14 @@ export class BAPPlaywrightServer extends EventEmitter { case "ref": { // Look up the element by its stable ref in the registry const pageId = this.getPageId(page); - const state = this.clients.values().next().value; - if (!state) { + const owner = this.findPageOwner(pageId); + if (!owner) { throw new BAPServerError( ErrorCodes.ElementNotFound, `No client state available for ref lookup: ${selector.ref}` ); } - const registry = state.elementRegistries.get(pageId); + const registry = owner.state.elementRegistries.get(pageId); if (!registry) { throw new BAPServerError( ErrorCodes.ElementNotFound, @@ -4139,7 +4305,10 @@ export class BAPPlaywrightServer extends EventEmitter { * Ensure browser is launched */ private ensureBrowser(state: ClientState): void { - if (!state.browser || !state.context) { + if (!state.context) { + throw new BAPServerError(ErrorCodes.BrowserNotLaunched, "Browser not launched"); + } + if (!state.isPersistent && !state.browser) { throw new BAPServerError(ErrorCodes.BrowserNotLaunched, "Browser not launched"); } } @@ -4351,139 +4520,199 @@ export class BAPPlaywrightServer extends EventEmitter { return page; } + /** + * Find the connected client that currently owns a page. + */ + private findConnectedClientForPage(pageId: string): { ws: WebSocket; state: ClientState } | null { + for (const [ws, state] of this.clients) { + if (state.pages.has(pageId)) { + return { ws, state }; + } + } + + return null; + } + + /** + * Find a dormant session that currently owns a page. + */ + private findDormantSessionForPage(pageId: string): DormantSession | null { + for (const dormant of this.dormantSessions.values()) { + if (dormant.pages.has(pageId)) { + return dormant; + } + } + + return null; + } + + /** + * Find the current owner of a page across live and dormant sessions. + */ + private findPageOwner(pageId: string): PageOwner | null { + const connectedOwner = this.findConnectedClientForPage(pageId); + if (connectedOwner) { + return connectedOwner; + } + + const dormantOwner = this.findDormantSessionForPage(pageId); + if (dormantOwner) { + return { ws: null, state: dormantOwner }; + } + + return null; + } + + /** + * Remove all page-scoped bookkeeping for a page from its owner. + */ + private removePageFromOwner(state: ClientState | DormantSession, pageId: string): void { + state.pages.delete(pageId); + state.pageToContext.delete(pageId); + state.elementRegistries.delete(pageId); + state.frameContexts.delete(pageId); + + if (state.activePage === pageId) { + state.activePage = state.pages.keys().next().value ?? null; + } + } + + /** + * Emit an event to the active owner of a page if it is subscribed. + */ + private emitOwnedEvent( + pageId: string, + subscription: string, + method: string, + params: Record + ): void { + const owner = this.findConnectedClientForPage(pageId); + if (!owner || !owner.state.eventSubscriptions.has(subscription)) { + return; + } + + this.sendEvent(owner.ws, method, params); + } + /** * Get page ID from a Playwright page object */ private getPageId(page: PlaywrightPage): string { - // Find the page ID by searching the pages map - const state = this.clients.values().next().value; - if (state) { + for (const state of this.clients.values()) { for (const [pageId, p] of state.pages) { if (p === page) { return pageId; } } } + + for (const dormant of this.dormantSessions.values()) { + for (const [pageId, p] of dormant.pages) { + if (p === page) { + return pageId; + } + } + } + throw new BAPServerError(ErrorCodes.PageNotFound, "Page not found in registry"); } /** * Set up event listeners for a page */ - private setupPageListeners( - ws: WebSocket, - state: ClientState, - page: PlaywrightPage, - pageId: string - ): void { + private setupPageListeners(page: PlaywrightPage, pageId: string): void { // Page events page.on("load", () => { - if (state.eventSubscriptions.has("page")) { - this.sendEvent(ws, "events/page", { - type: "load", - pageId, - url: page.url(), - timestamp: Date.now(), - }); - } + this.emitOwnedEvent(pageId, "page", "events/page", { + type: "load", + pageId, + url: page.url(), + timestamp: Date.now(), + }); }); page.on("domcontentloaded", () => { - if (state.eventSubscriptions.has("page")) { - this.sendEvent(ws, "events/page", { - type: "domcontentloaded", - pageId, - url: page.url(), - timestamp: Date.now(), - }); - } + this.emitOwnedEvent(pageId, "page", "events/page", { + type: "domcontentloaded", + pageId, + url: page.url(), + timestamp: Date.now(), + }); }); // Console events page.on("console", (msg: ConsoleMessage) => { - if (state.eventSubscriptions.has("console")) { - this.sendEvent(ws, "events/console", { - pageId, - level: msg.type() as "log" | "debug" | "info" | "warn" | "error", - text: msg.text(), - url: msg.location().url, - line: msg.location().lineNumber, - column: msg.location().columnNumber, - timestamp: Date.now(), - }); - } + this.emitOwnedEvent(pageId, "console", "events/console", { + pageId, + level: msg.type() as "log" | "debug" | "info" | "warn" | "error", + text: msg.text(), + url: msg.location().url, + line: msg.location().lineNumber, + column: msg.location().columnNumber, + timestamp: Date.now(), + }); }); // Network events page.on("request", (request: Request) => { - if (state.eventSubscriptions.has("network")) { - this.sendEvent(ws, "events/network", { - type: "request", - requestId: request.url() + "-" + Date.now(), - pageId, - url: request.url(), - method: request.method(), - resourceType: request.resourceType(), - headers: request.headers(), - postData: request.postData(), - timestamp: Date.now(), - }); - } + this.emitOwnedEvent(pageId, "network", "events/network", { + type: "request", + requestId: request.url() + "-" + Date.now(), + pageId, + url: request.url(), + method: request.method(), + resourceType: request.resourceType(), + headers: request.headers(), + postData: request.postData(), + timestamp: Date.now(), + }); }); page.on("response", (response: Response) => { - if (state.eventSubscriptions.has("network")) { - this.sendEvent(ws, "events/network", { - type: "response", - requestId: response.url() + "-" + Date.now(), - pageId, - url: response.url(), - status: response.status(), - headers: response.headers(), - timestamp: Date.now(), - }); - } + this.emitOwnedEvent(pageId, "network", "events/network", { + type: "response", + requestId: response.url() + "-" + Date.now(), + pageId, + url: response.url(), + status: response.status(), + headers: response.headers(), + timestamp: Date.now(), + }); }); // Dialog events page.on("dialog", (dialog: Dialog) => { - if (state.eventSubscriptions.has("dialog")) { - this.sendEvent(ws, "events/dialog", { - pageId, - type: dialog.type() as "alert" | "confirm" | "prompt" | "beforeunload", - message: dialog.message(), - defaultValue: dialog.defaultValue(), - timestamp: Date.now(), - }); - } + this.emitOwnedEvent(pageId, "dialog", "events/dialog", { + pageId, + type: dialog.type() as "alert" | "confirm" | "prompt" | "beforeunload", + message: dialog.message(), + defaultValue: dialog.defaultValue(), + timestamp: Date.now(), + }); }); // Download events page.on("download", (download: Download) => { - if (state.eventSubscriptions.has("download")) { - this.sendEvent(ws, "events/download", { - pageId, - url: download.url(), - suggestedFilename: download.suggestedFilename(), - state: "started", - timestamp: Date.now(), - }); - } + this.emitOwnedEvent(pageId, "download", "events/download", { + pageId, + url: download.url(), + suggestedFilename: download.suggestedFilename(), + state: "started", + timestamp: Date.now(), + }); }); // Handle external page close (user closes tab, browser crash, etc.) page.on("close", () => { - // Remove page from state - state.pages.delete(pageId); + const activeOwner = this.findConnectedClientForPage(pageId); + const owner = activeOwner ?? this.findPageOwner(pageId); - // Update active page if this was the active one - if (state.activePage === pageId) { - state.activePage = state.pages.keys().next().value ?? null; + if (owner) { + this.removePageFromOwner(owner.state, pageId); } - // Notify client if subscribed to page events - if (state.eventSubscriptions.has("page")) { - this.sendEvent(ws, "events/page", { + if (activeOwner?.state.eventSubscriptions.has("page")) { + this.sendEvent(activeOwner.ws, "events/page", { type: "close", pageId, timestamp: Date.now(), @@ -4498,6 +4727,9 @@ export class BAPPlaywrightServer extends EventEmitter { * Send an event notification to the client */ private sendEvent(ws: WebSocket, method: string, params: Record): void { + if (ws.readyState !== WebSocket.OPEN) { + return; + } const notification = createNotification(method, params); ws.send(JSON.stringify(notification)); } @@ -4546,19 +4778,56 @@ export class BAPPlaywrightServer extends EventEmitter { } /** - * Clean up client state + * Check whether a browser context is still usable. */ - private async cleanupClient(state: ClientState): Promise { - // Clear session timeouts (v0.2.0) - this.clearSessionTimeouts(state); + private isContextAlive(context: BrowserContext | null): boolean { + if (!context) { + return false; + } - // Cancel any pending speculative prefetch + try { + const browser = context.browser(); + if (browser) { + return browser.isConnected(); + } + void context.pages(); + return true; + } catch { + return false; + } + } + + /** + * Clear state tied to a single WebSocket connection before parking or cleanup. + */ + private clearConnectionScopedState(state: ClientState, errorMessage: string): void { if (state.speculativePrefetchTimer) { clearTimeout(state.speculativePrefetchTimer); state.speculativePrefetchTimer = undefined; } state.speculativeObservation = undefined; + for (const stream of state.activeStreams.values()) { + stream.cancelled = true; + } + state.activeStreams.clear(); + + for (const pending of state.pendingApprovals.values()) { + clearTimeout(pending.timeoutHandle); + pending.reject(new BAPServerError(ErrorCodes.TargetClosed, errorMessage)); + } + state.pendingApprovals.clear(); + } + + /** + * Clean up client state + */ + private async cleanupClient(state: ClientState): Promise { + // Clear session timeouts (v0.2.0) + this.clearSessionTimeouts(state); + + this.clearConnectionScopedState(state, "Client disconnected"); + if (state.tracing && state.context) { try { await state.context.tracing.stop(); @@ -4567,7 +4836,13 @@ export class BAPPlaywrightServer extends EventEmitter { } } - if (state.browser) { + if (state.isPersistent && state.context) { + try { + await state.context.close(); + } catch { + // Ignore + } + } else if (state.browser) { try { await state.browser.close(); } catch { @@ -4576,20 +4851,146 @@ export class BAPPlaywrightServer extends EventEmitter { } state.browser = null; + state.isPersistent = false; state.context = null; state.pages.clear(); state.pageToContext.clear(); state.activePage = null; state.elementRegistries.clear(); state.frameContexts.clear(); - state.activeStreams.clear(); - state.pendingApprovals.clear(); state.sessionApprovals.clear(); state.contexts.clear(); state.defaultContextId = null; state.initialized = false; } + // =========================================================================== + // Session Persistence (v0.3.0) + // =========================================================================== + + /** + * Park a client's browser state into the dormant store. + * Called on disconnect when the client has a sessionId and browser is still alive. + * Nullifies browser/pages on state so cleanupClient() becomes a no-op for those. + */ + private parkSession(state: ClientState): void { + const sessionId = state.sessionId!; + + this.clearConnectionScopedState(state, "Client disconnected"); + + // If there's already a dormant session with this ID (shouldn't happen, but be safe), + // expire it first + const existing = this.dormantSessions.get(sessionId); + if (existing) { + clearTimeout(existing.ttlHandle); + try { + if (existing.isPersistent) { + existing.context?.close(); + } else { + existing.browser?.close(); + } + } catch { /* ignore */ } + this.dormantSessions.delete(sessionId); + } + + const ttl = this.options.session.dormantSessionTtl * 1000; + const ttlHandle = setTimeout(() => { + this.expireDormantSession(sessionId); + }, ttl); + + const dormant: DormantSession = { + sessionId, + browser: state.browser, + isPersistent: state.isPersistent, + context: state.context, + contexts: new Map(state.contexts), + defaultContextId: state.defaultContextId, + pages: new Map(state.pages), + pageToContext: new Map(state.pageToContext), + activePage: state.activePage, + elementRegistries: new Map(state.elementRegistries), + frameContexts: new Map(state.frameContexts), + sessionApprovals: new Set(state.sessionApprovals), + ttlHandle, + parkedAt: Date.now(), + }; + + this.dormantSessions.set(sessionId, dormant); + this.log("Session parked", { sessionId, ttl: `${this.options.session.dormantSessionTtl}s` }); + + // Nullify state so cleanupClient() won't destroy the browser/pages + state.browser = null; + state.isPersistent = false; + state.context = null; + state.pages = new Map(); + state.pageToContext = new Map(); + state.activePage = null; + state.elementRegistries = new Map(); + state.frameContexts = new Map(); + state.contexts = new Map(); + state.defaultContextId = null; + } + + /** + * Restore a dormant session into a new client's state. + * Returns true if restoration succeeded, false if browser crashed during dormancy. + */ + private restoreSession(dormant: DormantSession, state: ClientState): boolean { + clearTimeout(dormant.ttlHandle); + this.dormantSessions.delete(dormant.sessionId); + + // Verify browser/context is still alive + const isAlive = dormant.isPersistent + ? this.isContextAlive(dormant.context) + : Boolean(dormant.browser?.isConnected()); + if (!isAlive) { + this.log("Dormant session browser crashed, starting fresh", { sessionId: dormant.sessionId }); + try { + if (dormant.isPersistent) { + dormant.context?.close(); + } else { + dormant.browser?.close(); + } + } catch { /* ignore */ } + return false; + } + + state.browser = dormant.browser; + state.isPersistent = dormant.isPersistent; + state.context = dormant.context; + state.contexts = dormant.contexts; + state.defaultContextId = dormant.defaultContextId; + state.pages = dormant.pages; + state.pageToContext = dormant.pageToContext; + state.activePage = dormant.activePage; + state.elementRegistries = dormant.elementRegistries; + state.frameContexts = dormant.frameContexts; + state.sessionApprovals = dormant.sessionApprovals; + + return true; + } + + /** + * Expire and destroy a dormant session after TTL. + */ + private expireDormantSession(sessionId: string): void { + const dormant = this.dormantSessions.get(sessionId); + if (!dormant) return; + + this.log("Dormant session expired", { sessionId }); + this.dormantSessions.delete(sessionId); + + try { + if (dormant.isPersistent) { + dormant.context?.close(); + } else { + dormant.browser?.close(); + } + } catch { + // Browser may already be closed + } + } + /** * Handle errors and convert to appropriate response */ @@ -4643,6 +5044,12 @@ export class BAPPlaywrightServer extends EventEmitter { state: ClientState, params: ContextCreateParams ): Promise { + if (state.isPersistent) { + throw new BAPServerError( + ErrorCodes.InvalidParams, + "Cannot create additional contexts in persistent profile mode" + ); + } if (!state.browser) { throw new BAPServerError(ErrorCodes.BrowserNotLaunched, "Browser not launched"); } diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index e7e7a2b..b3d5cab 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -108,8 +108,8 @@ importers: specifier: workspace:* version: link:../protocol '@modelcontextprotocol/sdk': - specifier: ^1.0.0 - version: 1.25.3(hono@4.11.7)(zod@3.25.76) + specifier: ^1.27.1 + version: 1.27.1(zod@3.25.76) zod: specifier: ^3.23.0 version: 3.25.76 @@ -440,8 +440,8 @@ packages: resolution: {integrity: sha512-43/qtrDUokr7LJqoF2c3+RInu/t4zfrpYdoSDfYyhg52rwLV6TnOvdG4fXm7IkSB3wErkcmJS9iEhjVtOSEjjA==} engines: {node: ^18.18.0 || ^20.9.0 || >=21.1.0} - '@hono/node-server@1.19.9': - resolution: {integrity: sha512-vHL6w3ecZsky+8P5MD+eFfaGTyCeOHUIFYMGpQGbrBTSmNNoxv0if69rEZ5giu36weC5saFuznL411gRX7bJDw==} + '@hono/node-server@1.19.11': + resolution: {integrity: sha512-dr8/3zEaB+p0D2n/IUrlPF1HZm586qgJNXK1a9fhg/PzdtkK7Ksd5l312tJX2yBuALqDYBlG20QEbayqPyxn+g==} engines: {node: '>=18.14.1'} peerDependencies: hono: ^4 @@ -490,8 +490,8 @@ packages: '@manypkg/get-packages@1.1.3': resolution: {integrity: sha512-fo+QhuU3qE/2TQMQmbVMqaQ6EWbMhi4ABWP+O4AM1NqPBuy0OrApV5LO6BrrgnhtAHS2NH6RrVk9OL181tTi8A==} - '@modelcontextprotocol/sdk@1.25.3': - resolution: {integrity: sha512-vsAMBMERybvYgKbg/l4L1rhS7VXV1c0CtyJg72vwxONVX0l4ZfKVAnZEWTQixJGTzKnELjQ59e4NbdFDALRiAQ==} + '@modelcontextprotocol/sdk@1.27.1': + resolution: {integrity: sha512-sr6GbP+4edBwFndLbM60gf07z0FQ79gaExpnsjMGePXqFcSSb7t6iscpjk9DhFhwd+mTEQrzNafGP8/iGGFYaA==} engines: {node: '>=18'} peerDependencies: '@cfworker/json-schema': ^4.1.1 @@ -783,8 +783,8 @@ packages: ajv@6.12.6: resolution: {integrity: sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==} - ajv@8.17.1: - resolution: {integrity: sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g==} + ajv@8.18.0: + resolution: {integrity: sha512-PlXPeEWMXMZ7sPYOHqmDyCJzcfNrUr3fGNKtezX14ykXOEIvyK81d+qydx89KY5O71FKMPaQ2vBfBFI5NHR63A==} ansi-colors@4.1.3: resolution: {integrity: sha512-/6w/C21Pm1A7aZitlI5Ni/2J6FFQN8i1Cvz3kHABAAbw93v/NlvKdVOqz7CCWz/3iv/JplRSEEZ83XION15ovw==} @@ -1061,8 +1061,8 @@ packages: resolution: {integrity: sha512-knvyeauYhqjOYvQ66MznSMs83wmHrCycNEN6Ao+2AeYEfxUIkuiVxdEa1qlGEPK+We3n0THiDciYSsCcgW/DoA==} engines: {node: '>=12.0.0'} - express-rate-limit@7.5.1: - resolution: {integrity: sha512-7iN8iPMDzOMHPUYllBEsQdWVB6fPDMPqwjBaFrgr4Jgr/+okjvzAy+UHlYYL/Vs0OsOrMkwS6PJDkFlJwoxUnw==} + express-rate-limit@8.3.1: + resolution: {integrity: sha512-D1dKN+cmyPWuvB+G2SREQDzPY1agpBIcTa9sJxOPMCNeH3gwzhqJRDWCXW3gg0y//+LQ/8j52JbMROWyrKdMdw==} engines: {node: '>= 16'} peerDependencies: express: '>= 4.11' @@ -1207,8 +1207,8 @@ packages: resolution: {integrity: sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==} engines: {node: '>= 0.4'} - hono@4.11.7: - resolution: {integrity: sha512-l7qMiNee7t82bH3SeyUCt9UF15EVmaBvsppY2zQtrbIhl/yzBTny+YUxsVjSjQ6gaqaeVtZmGocom8TzBlA4Yw==} + hono@4.12.8: + resolution: {integrity: sha512-VJCEvtrezO1IAR+kqEYnxUOoStaQPGrCmX3j4wDTNOcD1uRPFpGlwQUIW8niPuvHXaTUxeOUl5MMDGrl+tmO9A==} engines: {node: '>=16.9.0'} html-escaper@2.0.2: @@ -1245,6 +1245,10 @@ packages: inherits@2.0.4: resolution: {integrity: sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==} + ip-address@10.1.0: + resolution: {integrity: sha512-XXADHxXmvT9+CRxhXg56LJovE+bmWnEWB78LB83VZTprKTmaC5QfruXocxzTZ2Kl0DNwKuBdlIhjL8LeY8Sf8Q==} + engines: {node: '>= 12'} + ipaddr.js@1.9.1: resolution: {integrity: sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g==} engines: {node: '>= 0.10'} @@ -1287,8 +1291,8 @@ packages: resolution: {integrity: sha512-HGYWWS/ehqTV3xN10i23tkPkpH46MLCIMFNCaaKNavAXTF1RkqxawEPtnjnGZ6XKSInBKkiOA5BKS+aZiY3AvA==} engines: {node: '>=8'} - jose@6.1.3: - resolution: {integrity: sha512-0TpaTfihd4QMNwrz/ob2Bp7X04yuxJkjRGi4aKmOqwhov54i6u79oCv7T+C7lo70MKH6BesI3vscD1yb/yzKXQ==} + jose@6.2.2: + resolution: {integrity: sha512-d7kPDd34KO/YnzaDOlikGpOurfF0ByC2sEV4cANCtdqLlTfBlw2p14O/5d/zv40gJPbIQxfES3nSx1/oYNyuZQ==} joycon@3.1.1: resolution: {integrity: sha512-34wB/Y7MW7bzjKRjUKTa46I2Z7eV62Rkhva+KkopW7Qvv/OSWBqvkSY7vusOPrNuZcUG3tApvdVgNB8POj3SPw==} @@ -1587,8 +1591,8 @@ packages: resolution: {integrity: sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg==} engines: {node: '>=6'} - qs@6.14.1: - resolution: {integrity: sha512-4EK3+xJl8Ts67nLYNwqw/dsFVnCf+qR7RgXSK9jEEm9unao3njwMDdmsdvoKBKHzxd7tCYz5e5M+SnMjdtXGQQ==} + qs@6.15.0: + resolution: {integrity: sha512-mAZTtNCeetKMH+pSjrb76NAM8V9a05I9aBZOHztWy/UqcJdQYNsf59vrRKWnojAT9Y+GbIvoTBC++CPHqpDBhQ==} engines: {node: '>=0.6'} quansync@0.2.11: @@ -2294,9 +2298,9 @@ snapshots: '@eslint/core': 0.17.0 levn: 0.4.1 - '@hono/node-server@1.19.9(hono@4.11.7)': + '@hono/node-server@1.19.11(hono@4.12.8)': dependencies: - hono: 4.11.7 + hono: 4.12.8 '@humanfs/core@0.19.1': {} @@ -2346,26 +2350,26 @@ snapshots: globby: 11.1.0 read-yaml-file: 1.1.0 - '@modelcontextprotocol/sdk@1.25.3(hono@4.11.7)(zod@3.25.76)': + '@modelcontextprotocol/sdk@1.27.1(zod@3.25.76)': dependencies: - '@hono/node-server': 1.19.9(hono@4.11.7) - ajv: 8.17.1 - ajv-formats: 3.0.1(ajv@8.17.1) + '@hono/node-server': 1.19.11(hono@4.12.8) + ajv: 8.18.0 + ajv-formats: 3.0.1(ajv@8.18.0) content-type: 1.0.5 cors: 2.8.6 cross-spawn: 7.0.6 eventsource: 3.0.7 eventsource-parser: 3.0.6 express: 5.2.1 - express-rate-limit: 7.5.1(express@5.2.1) - jose: 6.1.3 + express-rate-limit: 8.3.1(express@5.2.1) + hono: 4.12.8 + jose: 6.2.2 json-schema-typed: 8.0.2 pkce-challenge: 5.0.1 raw-body: 3.0.2 zod: 3.25.76 zod-to-json-schema: 3.25.1(zod@3.25.76) transitivePeerDependencies: - - hono - supports-color '@nodelib/fs.scandir@2.1.5': @@ -2633,9 +2637,9 @@ snapshots: acorn@8.15.0: {} - ajv-formats@3.0.1(ajv@8.17.1): + ajv-formats@3.0.1(ajv@8.18.0): optionalDependencies: - ajv: 8.17.1 + ajv: 8.18.0 ajv@6.12.6: dependencies: @@ -2644,7 +2648,7 @@ snapshots: json-schema-traverse: 0.4.1 uri-js: 4.4.1 - ajv@8.17.1: + ajv@8.18.0: dependencies: fast-deep-equal: 3.1.3 fast-uri: 3.1.0 @@ -2691,7 +2695,7 @@ snapshots: http-errors: 2.0.1 iconv-lite: 0.7.2 on-finished: 2.4.1 - qs: 6.14.1 + qs: 6.15.0 raw-body: 3.0.2 type-is: 2.0.1 transitivePeerDependencies: @@ -2933,9 +2937,10 @@ snapshots: expect-type@1.3.0: {} - express-rate-limit@7.5.1(express@5.2.1): + express-rate-limit@8.3.1(express@5.2.1): dependencies: express: 5.2.1 + ip-address: 10.1.0 express@5.2.1: dependencies: @@ -2959,7 +2964,7 @@ snapshots: once: 1.4.0 parseurl: 1.3.3 proxy-addr: 2.0.7 - qs: 6.14.1 + qs: 6.15.0 range-parser: 1.2.1 router: 2.2.0 send: 1.2.1 @@ -3115,7 +3120,7 @@ snapshots: dependencies: function-bind: 1.1.2 - hono@4.11.7: {} + hono@4.12.8: {} html-escaper@2.0.2: {} @@ -3146,6 +3151,8 @@ snapshots: inherits@2.0.4: {} + ip-address@10.1.0: {} + ipaddr.js@1.9.1: {} is-extglob@2.1.1: {} @@ -3179,7 +3186,7 @@ snapshots: html-escaper: 2.0.2 istanbul-lib-report: 3.0.1 - jose@6.1.3: {} + jose@6.2.2: {} joycon@3.1.1: {} @@ -3420,7 +3427,7 @@ snapshots: punycode@2.3.1: {} - qs@6.14.1: + qs@6.15.0: dependencies: side-channel: 1.1.0 diff --git a/scripts/check-release-artifacts.mjs b/scripts/check-release-artifacts.mjs new file mode 100644 index 0000000..365c41e --- /dev/null +++ b/scripts/check-release-artifacts.mjs @@ -0,0 +1,193 @@ +#!/usr/bin/env node + +import { execFileSync } from "node:child_process"; +import { existsSync, readFileSync } from "node:fs"; +import { dirname, resolve } from "node:path"; +import process from "node:process"; +import { fileURLToPath } from "node:url"; + +const repoRoot = resolve(dirname(fileURLToPath(import.meta.url)), ".."); +const issuesUrl = "https://github.com/browseragentprotocol/bap/issues"; + +const npmPackages = [ + { + dir: "packages/cli", + name: "@browseragentprotocol/cli", + homepage: "https://github.com/browseragentprotocol/bap/tree/main/packages/cli", + requiredFiles: [ + "README.md", + "LICENSE", + "CHANGELOG.md", + "package.json", + "bin/bap.js", + "dist/cli.js", + "skills/bap-browser/SKILL.md", + ], + }, + { + dir: "packages/client", + name: "@browseragentprotocol/client", + homepage: "https://github.com/browseragentprotocol/bap/tree/main/packages/client", + requiredFiles: [ + "README.md", + "LICENSE", + "CHANGELOG.md", + "package.json", + "dist/index.js", + "dist/index.cjs", + ], + }, + { + dir: "packages/logger", + name: "@browseragentprotocol/logger", + homepage: "https://github.com/browseragentprotocol/bap/tree/main/packages/logger", + requiredFiles: [ + "README.md", + "LICENSE", + "CHANGELOG.md", + "package.json", + "dist/index.js", + "dist/index.cjs", + ], + }, + { + dir: "packages/mcp", + name: "@browseragentprotocol/mcp", + homepage: "https://github.com/browseragentprotocol/bap/tree/main/packages/mcp", + requiredFiles: [ + "README.md", + "LICENSE", + "CHANGELOG.md", + "package.json", + "dist/cli.js", + "dist/index.js", + ], + }, + { + dir: "packages/protocol", + name: "@browseragentprotocol/protocol", + homepage: "https://github.com/browseragentprotocol/bap/tree/main/packages/protocol", + requiredFiles: [ + "README.md", + "LICENSE", + "CHANGELOG.md", + "package.json", + "dist/index.js", + "dist/shared/index.js", + "dist/types/index.js", + ], + }, + { + dir: "packages/server-playwright", + name: "@browseragentprotocol/server-playwright", + homepage: "https://github.com/browseragentprotocol/bap/tree/main/packages/server-playwright", + requiredFiles: [ + "README.md", + "LICENSE", + "CHANGELOG.md", + "package.json", + "dist/cli.js", + "dist/server.js", + ], + }, +]; + +const forbiddenTarballPrefixes = [".turbo/", "src/", "__tests__/", "tests/"]; +const forbiddenTarballFiles = ["CLAUDE.md"]; + +let hasFailure = false; + +function assert(condition, message) { + if (condition) { + console.log(`ok ${message}`); + return; + } + + console.error(`ERR ${message}`); + hasFailure = true; +} + +function readJson(relativePath) { + return JSON.parse(readFileSync(resolve(repoRoot, relativePath), "utf8")); +} + +function readText(relativePath) { + return readFileSync(resolve(repoRoot, relativePath), "utf8"); +} + +function packPackage(relativeDir) { + const stdout = execFileSync("npm", ["pack", "--json", "--dry-run"], { + cwd: resolve(repoRoot, relativeDir), + encoding: "utf8", + }); + const [result] = JSON.parse(stdout); + return result; +} + +function getPackageVersion(relativePath) { + return readJson(relativePath).version; +} + +for (const pkg of npmPackages) { + const packageJsonPath = `${pkg.dir}/package.json`; + const packageJson = readJson(packageJsonPath); + const packed = packPackage(pkg.dir); + const tarballFiles = new Set(packed.files.map((file) => file.path)); + + console.log(`\nChecking ${pkg.name}`); + + assert(packageJson.name === pkg.name, `${pkg.name} has the expected package name`); + assert(packageJson.license === "Apache-2.0", `${pkg.name} declares Apache-2.0`); + assert( + packageJson.publishConfig?.access === "public", + `${pkg.name} publishes with public access`, + ); + assert( + packageJson.repository?.directory === pkg.dir, + `${pkg.name} repository.directory points at ${pkg.dir}`, + ); + assert(packageJson.homepage === pkg.homepage, `${pkg.name} homepage points at its package docs`); + assert(packageJson.bugs?.url === issuesUrl, `${pkg.name} bugs.url points at the issue tracker`); + + for (const requiredFile of pkg.requiredFiles) { + assert(tarballFiles.has(requiredFile), `${pkg.name} tarball includes ${requiredFile}`); + } + + for (const forbiddenPrefix of forbiddenTarballPrefixes) { + assert( + !packed.files.some((file) => file.path.startsWith(forbiddenPrefix)), + `${pkg.name} tarball excludes ${forbiddenPrefix}`, + ); + } + + for (const forbiddenFile of forbiddenTarballFiles) { + assert(!tarballFiles.has(forbiddenFile), `${pkg.name} tarball excludes ${forbiddenFile}`); + } +} + +const pythonPackageJsonVersion = getPackageVersion("packages/python-sdk/package.json"); +const canonicalReleaseVersion = getPackageVersion("packages/cli/package.json"); +const pyprojectToml = readText("packages/python-sdk/pyproject.toml"); +const pythonInit = readText("packages/python-sdk/src/browseragentprotocol/__init__.py"); + +const pyprojectVersion = pyprojectToml.match(/^version = "([^"]+)"$/m)?.[1]; +const initVersion = pythonInit.match(/^__version__ = "([^"]+)"$/m)?.[1]; + +console.log("\nChecking browser-agent-protocol (PyPI)"); +assert(existsSync(resolve(repoRoot, "packages/python-sdk/LICENSE")), "Python SDK includes a LICENSE file"); +assert(existsSync(resolve(repoRoot, "packages/python-sdk/README.md")), "Python SDK includes a README"); +assert(existsSync(resolve(repoRoot, "packages/python-sdk/CHANGELOG.md")), "Python SDK includes a CHANGELOG"); +assert(pyprojectVersion === pythonPackageJsonVersion, "Python SDK pyproject version matches package.json"); +assert(initVersion === pythonPackageJsonVersion, "Python SDK __version__ matches package.json"); +assert( + pythonPackageJsonVersion === canonicalReleaseVersion, + "Python SDK version stays aligned with the npm release version", +); +assert( + pyprojectToml.includes('license-files = ["LICENSE"]'), + "Python SDK declares license-files in pyproject.toml", +); + +if (hasFailure) { + process.exit(1); +} diff --git a/scripts/run-python.mjs b/scripts/run-python.mjs new file mode 100644 index 0000000..79954df --- /dev/null +++ b/scripts/run-python.mjs @@ -0,0 +1,36 @@ +#!/usr/bin/env node + +import { spawnSync } from "node:child_process"; +import process from "node:process"; + +const scriptArgs = process.argv.slice(2); +const candidates = + process.platform === "win32" + ? [ + ["python"], + ["py", "-3"], + ] + : [ + ["python3"], + ["python"], + ]; + +for (const [command, ...prefixArgs] of candidates) { + const result = spawnSync(command, [...prefixArgs, ...scriptArgs], { + stdio: "inherit", + }); + + if (result.error && result.error.code === "ENOENT") { + continue; + } + + if (result.error) { + console.error(`Failed to launch ${command}: ${result.error.message}`); + process.exit(1); + } + + process.exit(result.status ?? 0); +} + +console.warn("No Python interpreter was found on PATH; skipping Python helper command."); +process.exit(0); diff --git a/scripts/sync-python-version.mjs b/scripts/sync-python-version.mjs new file mode 100644 index 0000000..2e8627d --- /dev/null +++ b/scripts/sync-python-version.mjs @@ -0,0 +1,54 @@ +#!/usr/bin/env node + +import { readFileSync, writeFileSync } from "node:fs"; +import { dirname, resolve } from "node:path"; +import process from "node:process"; +import { fileURLToPath } from "node:url"; + +const repoRoot = resolve(dirname(fileURLToPath(import.meta.url)), ".."); +const canonicalPackageJsonPath = resolve(repoRoot, "packages/cli/package.json"); +const pythonPackageJsonPath = resolve(repoRoot, "packages/python-sdk/package.json"); +const pyprojectPath = resolve(repoRoot, "packages/python-sdk/pyproject.toml"); +const pythonInitPath = resolve( + repoRoot, + "packages/python-sdk/src/browseragentprotocol/__init__.py", +); + +const canonicalVersion = JSON.parse(readFileSync(canonicalPackageJsonPath, "utf8")).version; + +function updateFile(filePath, replacer) { + const original = readFileSync(filePath, "utf8"); + const updated = replacer(original); + + if (original === updated) { + return false; + } + + writeFileSync(filePath, updated); + return true; +} + +let updatedSomething = false; + +updatedSomething = + updateFile(pythonPackageJsonPath, (content) => + content.replace(/"version": "([^"]+)"/, `"version": "${canonicalVersion}"`), + ) || updatedSomething; + +updatedSomething = + updateFile(pyprojectPath, (content) => + content.replace(/^version = "([^"]+)"$/m, `version = "${canonicalVersion}"`), + ) || updatedSomething; + +updatedSomething = + updateFile(pythonInitPath, (content) => + content.replace(/^__version__ = "([^"]+)"$/m, `__version__ = "${canonicalVersion}"`), + ) || updatedSomething; + +if (updatedSomething) { + console.log(`Synced Python SDK version to ${canonicalVersion}`); +} else { + console.log(`Python SDK already synced at ${canonicalVersion}`); +} + +process.exit(0); diff --git a/scripts/verify-published-releases.mjs b/scripts/verify-published-releases.mjs new file mode 100644 index 0000000..f9501d8 --- /dev/null +++ b/scripts/verify-published-releases.mjs @@ -0,0 +1,78 @@ +#!/usr/bin/env node + +import { execFileSync } from "node:child_process"; +import { readFileSync } from "node:fs"; +import { dirname, resolve } from "node:path"; +import process from "node:process"; +import { fileURLToPath } from "node:url"; + +const repoRoot = resolve(dirname(fileURLToPath(import.meta.url)), ".."); +const mode = process.argv[2]; + +function fail(message) { + console.error(message); + process.exit(1); +} + +if (!mode) { + fail("Expected verification mode: npm or pypi"); +} + +if (mode === "npm") { + const raw = process.env.PUBLISHED_NPM_PACKAGES; + + if (!raw) { + fail("PUBLISHED_NPM_PACKAGES is not set"); + } + + const publishedPackages = JSON.parse(raw); + + if (!Array.isArray(publishedPackages) || publishedPackages.length === 0) { + fail("No published npm packages were reported by changesets"); + } + + for (const pkg of publishedPackages) { + const publishedVersion = JSON.parse( + execFileSync("npm", ["view", pkg.name, "version", "--json"], { + encoding: "utf8", + }), + ); + + if (publishedVersion !== pkg.version) { + fail(`npm verification failed for ${pkg.name}: expected ${pkg.version}, got ${publishedVersion}`); + } + + console.log(`Verified npm package ${pkg.name}@${pkg.version}`); + } + + process.exit(0); +} + +if (mode === "pypi") { + const pyprojectToml = readFileSync(resolve(repoRoot, "packages/python-sdk/pyproject.toml"), "utf8"); + const expectedVersion = pyprojectToml.match(/^version = "([^"]+)"$/m)?.[1]; + + if (!expectedVersion) { + fail("Could not determine the expected Python package version"); + } + + const response = await fetch("https://pypi.org/pypi/browser-agent-protocol/json"); + + if (!response.ok) { + fail(`PyPI verification request failed with status ${response.status}`); + } + + const payload = await response.json(); + const publishedVersion = payload.info?.version; + + if (publishedVersion !== expectedVersion) { + fail( + `PyPI verification failed for browser-agent-protocol: expected ${expectedVersion}, got ${publishedVersion}`, + ); + } + + console.log(`Verified PyPI package browser-agent-protocol==${expectedVersion}`); + process.exit(0); +} + +fail(`Unknown verification mode: ${mode}`);