From 092d9407194d0e646ae40097c40cb7caaa9d51ce Mon Sep 17 00:00:00 2001
From: zhangyue <zhangyue@qiyuanlab.com>
Date: Thu, 19 Mar 2026 06:16:40 +0000
Subject: [PATCH 1/5] feat/nv ci test

---
 .ci/README.md                | 171 ++++++++++++++++++++++++++++
 .ci/build.py                 | 210 +++++++++++++++++++++++++++++++++++
 .ci/config.yaml              |  36 ++++++
 .ci/images/ascend/Dockerfile |  31 ++++++
 .ci/images/nvidia/Dockerfile |  26 +++++
 .ci/run.py                   | 195 ++++++++++++++++++++++++++++++++
 pyproject.toml               |   2 +-
 tests/test_add.py            |  71 ++++++------
 tests/test_rms_norm.py       |  11 +-
 9 files changed, 718 insertions(+), 35 deletions(-)
 create mode 100644 .ci/README.md
 create mode 100644 .ci/build.py
 create mode 100644 .ci/config.yaml
 create mode 100644 .ci/images/ascend/Dockerfile
 create mode 100644 .ci/images/nvidia/Dockerfile
 create mode 100644 .ci/run.py
diff --git a/.ci/README.md b/.ci/README.md
new file mode 100644
index 0000000..59ee101
--- /dev/null
+++ b/.ci/README.md
@@ -0,0 +1,171 @@
+# .ci — CI 镜像与流水线
+
+本目录管理 CI 所用的 Docker 镜像构建与测试流水线执行。
+
+## 目录结构
+
+```
+.ci/
+├── config.yaml              # 统一配置（registry、镜像、job 定义）
+├── build.py                 # 镜像构建脚本
+├── run.py                   # CI 流水线执行脚本
+├── README.md
+└── images/
+    ├── nvidia/Dockerfile    # NVIDIA 平台镜像
+    └── ascend/Dockerfile    # 昇腾平台镜像
+```
+
+## 前置依赖
+
+- Docker
+- Python 3.10+
+- pyyaml (`pip install pyyaml`)
+
+## 配置文件 `config.yaml`
+
+```yaml
+repo:
+  url: https://github.com/InfiniTensor/InfiniOps.git
+  branch: master
+
+registry:
+  url: ""                    # Harbor 地址，本地开发时留空
+  project: infiniops
+  credentials_env: REGISTRY_TOKEN
+
+images:
+  nvidia:
+    dockerfile: .ci/images/nvidia/
+    build_args:
+      BASE_IMAGE: nvcr.io/nvidia/pytorch:24.10-py3
+  ascend:
+    dockerfile: .ci/images/ascend/
+    build_args:
+      BASE_IMAGE: ascendhub.huawei.com/public-ascendhub/ascend-pytorch:24.0.0
+    private_sdk:
+      source: "${PRIVATE_SDK_URL}"
+
+jobs:
+  nvidia_gpu:
+    image: stable            # stable | latest | 具体 commit hash
+    platform: nvidia
+    resources:
+      gpu_ids: "0"           # GPU 设备 ID，如 "0" "0,2" "all"
+      gpu_type: A100
+      memory: 32GB
+      timeout: 3600
+    setup: pip install .[dev]
+    stages:
+      - name: test
+        run: pytest tests/ -v --tb=short --junitxml=/workspace/test-results.xml
+```
+
+- **`registry.url`** 为空时镜像仅保存在本地，tag 格式为 `<project>-ci/<platform>:<tag>`。
+- **`images.<platform>.build_args`** 会作为 `--build-arg` 传入 `docker build`。
+- **`jobs.<name>.image`** 支持 `stable`、`latest` 或具体 commit hash。
+- **`resources.gpu_ids`** 指定 GPU 设备 ID，支持 `"0"`、`"0,2"`、`"all"` 等格式，映射为 `docker run --gpus "device=..."`。也可保留 `gpu_count` 按数量分配。
+
+## 镜像构建 `build.py`
+
+```bash
+python .ci/build.py [options]
+```
+
+| 参数 | 默认值 | 说明 |
+|---|---|---|
+| `--platform` | `all` | 构建平台：`nvidia`、`ascend` 或 `all` |
+| `--commit` | `HEAD` | 用于镜像 tag 的 git ref |
+| `--push` | — | 构建后推送到 registry |
+| `--force` | — | 跳过变更检测，强制构建 |
+| `--dry-run` | — | 仅打印命令，不执行 |
+| `--config` | `.ci/config.yaml` | 配置文件路径 |
+
+### 示例
+
+```bash
+# 构建 nvidia 镜像（自动检测 Dockerfile 变更，无变更则跳过）
+python .ci/build.py --platform nvidia
+
+# 强制构建
+python .ci/build.py --platform nvidia --force
+
+# 构建全部平台并推送到 registry
+python .ci/build.py --push --force
+
+# 预览实际执行的 docker 命令
+python .ci/build.py --platform nvidia --force --dry-run
+```
+
+### 构建流程
+
+1. 通过 `git diff HEAD~1` 检测 Dockerfile 目录是否有变更（`--force` 跳过此步）
+2. `docker build` 构建镜像，同时打 `<commit-hash>` 和 `latest` 两个 tag
+3. 自动透传宿主机的 `http_proxy`/`https_proxy`/`no_proxy` 到构建容器
+4. 若指定 `--push`，将两个 tag 推送到 registry
+
+### 产物
+
+| Tag | 说明 |
+|---|---|
+| `infiniops-ci/<platform>:<commit-hash>` | 精确追溯到某次构建 |
+| `infiniops-ci/<platform>:latest` | 最近一次构建 |
+
+## 流水线执行 `run.py`
+
+```bash
+python .ci/run.py [options]
+```
+
+| 参数 | 默认值 | 说明 |
+|---|---|---|
+| `--job` | 配置中第一个 job | 要执行的 job 名称 |
+| `--branch` | `config.yaml` 中的 `repo.branch` | 覆盖克隆分支 |
+| `--stage` | 全部 | 仅运行指定 stage |
+| `--image-tag` | job 中的 `image` 字段 | 覆盖镜像版本 |
+| `--gpu-id` | config 中的 `gpu_ids` | GPU 设备 ID，如 `0`、`0,2`、`all` |
+| `--dry-run` | — | 仅打印 docker 命令，不执行 |
+| `--config` | `.ci/config.yaml` | 配置文件路径 |
+
+### 示例
+
+```bash
+# 运行默认 job
+python .ci/run.py
+
+# 指定分支和镜像版本
+python .ci/run.py --branch feature-xxx --image-tag latest
+
+# 只用 GPU 0 运行
+python .ci/run.py --gpu-id 0
+
+# 用 GPU 0 和 2 运行
+python .ci/run.py --gpu-id 0,2
+
+# 使用全部 GPU
+python .ci/run.py --gpu-id all
+
+# 只跑 test stage
+python .ci/run.py --stage test
+
+# 预览 docker 命令
+python .ci/run.py --dry-run
+```
+
+### 执行流程
+
+1. 解析 job 配置，拉取对应镜像
+2. `docker run` 启动容器（自动挂载 GPU、限制内存）
+3. 容器内 `git clone` → `checkout` → 执行 `setup` 命令
+4. 依次执行各 stage，汇总结果
+
+## 代理配置
+
+如果网络环境需要代理，在宿主机设置环境变量后即可：
+
+```bash
+export http_proxy=http://localhost:9991
+export https_proxy=http://localhost:9991
+```
+
+- **`build.py`** 会自动透传代理到 `docker build`（通过 `--build-arg` + `--network host`）。
+- **`run.py`** 使用 `--network host`，容器内可直接访问宿主机代理。
diff --git a/.ci/build.py b/.ci/build.py
new file mode 100644
index 0000000..489ebf0
--- /dev/null
+++ b/.ci/build.py
@@ -0,0 +1,210 @@
+#!/usr/bin/env python3
+"""CI image builder: detect changes, build, tag, and optionally push Docker images."""
+
+import argparse
+import json
+import os
+import subprocess
+import sys
+from pathlib import Path
+
+try:
+    import yaml
+except ImportError:
+    print(
+        "error: pyyaml is required. Install with: pip install pyyaml", file=sys.stderr
+    )
+    sys.exit(1)
+
+
+def load_config(path):
+    with open(path, encoding="utf-8") as f:
+        return yaml.safe_load(f)
+
+
+def get_git_commit(ref="HEAD"):
+    result = subprocess.run(
+        ["git", "rev-parse", "--short", ref],
+        capture_output=True,
+        text=True,
+    )
+    if result.returncode != 0:
+        print(f"error: failed to get commit hash for `{ref}`", file=sys.stderr)
+        sys.exit(1)
+
+    return result.stdout.strip()
+
+
+def has_dockerfile_changed(dockerfile_dir, base_ref="HEAD~1"):
+    """Check if any file under `dockerfile_dir` changed since `base_ref`."""
+    result = subprocess.run(
+        ["git", "diff", "--name-only", base_ref, "--", dockerfile_dir],
+        capture_output=True,
+        text=True,
+    )
+
+    return bool(result.stdout.strip())
+
+
+def build_image_tag(registry_url, project, platform, tag):
+    if registry_url:
+        return f"{registry_url}/{project}/{platform}:{tag}"
+
+    return f"{project}-ci/{platform}:{tag}"
+
+
+def build_image(platform, platform_cfg, registry_cfg, commit, push, dry_run):
+    """Build a single platform image. Returns True on success."""
+    registry_url = registry_cfg.get("url", "")
+    project = registry_cfg.get("project", "infiniops")
+    dockerfile_dir = platform_cfg["dockerfile"]
+
+    commit_tag = build_image_tag(registry_url, project, platform, commit)
+    latest_tag = build_image_tag(registry_url, project, platform, "latest")
+
+    build_args_cfg = platform_cfg.get("build_args", {})
+    build_cmd = ["docker", "build", "--network", "host"]
+    for key, value in build_args_cfg.items():
+        build_cmd.extend(["--build-arg", f"{key}={value}"])
+
+    for proxy_var in ("http_proxy", "https_proxy", "no_proxy"):
+        proxy_val = os.environ.get(proxy_var) or os.environ.get(proxy_var.upper())
+        if proxy_val:
+            build_cmd.extend(["--build-arg", f"{proxy_var}={proxy_val}"])
+
+    private_sdk = platform_cfg.get("private_sdk", {})
+    if private_sdk:
+        sdk_url = private_sdk.get("source", "")
+        if sdk_url.startswith("${") and sdk_url.endswith("}"):
+            env_var = sdk_url[2:-1]
+            sdk_url = os.environ.get(env_var, "")
+        if sdk_url:
+            build_cmd.extend(["--build-arg", f"PRIVATE_SDK_URL={sdk_url}"])
+
+    build_cmd.extend(["-t", commit_tag, "-t", latest_tag, dockerfile_dir])
+
+    if dry_run:
+        print(f"[dry-run] {' '.join(build_cmd)}")
+        if push:
+            print(f"[dry-run] docker push {commit_tag}")
+            print(f"[dry-run] docker push {latest_tag}")
+
+        return True
+
+    print(f"==> building {platform}: {commit_tag}", file=sys.stderr)
+    result = subprocess.run(build_cmd)
+    if result.returncode != 0:
+        error = {
+            "stage": "build",
+            "platform": platform,
+            "tag": commit_tag,
+            "exit_code": result.returncode,
+        }
+        print(json.dumps(error), file=sys.stderr)
+
+        return False
+
+    if push:
+        for tag in (commit_tag, latest_tag):
+            print(f"==> pushing {tag}", file=sys.stderr)
+            push_result = subprocess.run(["docker", "push", tag])
+            if push_result.returncode != 0:
+                error = {
+                    "stage": "push",
+                    "platform": platform,
+                    "tag": tag,
+                    "exit_code": push_result.returncode,
+                }
+                print(json.dumps(error), file=sys.stderr)
+
+                return False
+
+    return True
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Build CI Docker images")
+    parser.add_argument(
+        "--platform",
+        type=str,
+        default="all",
+        help="Platform to build: nvidia, ascend, or all (default: all)",
+    )
+    parser.add_argument(
+        "--config",
+        type=Path,
+        default=Path(__file__).resolve().parent / "config.yaml",
+        help="Path to config.yaml",
+    )
+    parser.add_argument(
+        "--commit",
+        type=str,
+        default="HEAD",
+        help="Git ref for tagging the image (default: HEAD)",
+    )
+    parser.add_argument(
+        "--push",
+        action="store_true",
+        help="Push images to registry after building",
+    )
+    parser.add_argument(
+        "--force",
+        action="store_true",
+        help="Skip change detection and force build",
+    )
+    parser.add_argument(
+        "--dry-run",
+        action="store_true",
+        help="Print commands without executing",
+    )
+    args = parser.parse_args()
+
+    config = load_config(args.config)
+    registry_cfg = config.get("registry", {})
+    images_cfg = config.get("images", {})
+
+    if not images_cfg:
+        print("error: no `images` section in config", file=sys.stderr)
+        sys.exit(1)
+
+    if args.platform == "all":
+        platforms = list(images_cfg.keys())
+    else:
+        if args.platform not in images_cfg:
+            print(
+                f"error: platform `{args.platform}` not found in config",
+                file=sys.stderr,
+            )
+            sys.exit(1)
+        platforms = [args.platform]
+
+    commit = get_git_commit(args.commit)
+    failed = False
+
+    for platform in platforms:
+        platform_cfg = images_cfg[platform]
+        dockerfile_dir = platform_cfg["dockerfile"]
+
+        if not Path(dockerfile_dir).is_dir():
+            print(
+                f"warning: dockerfile directory `{dockerfile_dir}` does not exist, skipping {platform}",
+                file=sys.stderr,
+            )
+            continue
+
+        if not args.force and not has_dockerfile_changed(dockerfile_dir):
+            print(f"==> {platform}: no changes detected, skipping", file=sys.stderr)
+            continue
+
+        ok = build_image(
+            platform, platform_cfg, registry_cfg, commit, args.push, args.dry_run
+        )
+        if not ok:
+            failed = True
+
+    if failed:
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/.ci/config.yaml b/.ci/config.yaml
new file mode 100644
index 0000000..fea3f7c
--- /dev/null
+++ b/.ci/config.yaml
@@ -0,0 +1,36 @@
+repo:
+  url: https://github.com/InfiniTensor/InfiniOps.git
+  branch: master
+
+registry:
+  url: ""                              # TODO: Harbor not ready yet
+  project: infiniops
+  credentials_env: REGISTRY_TOKEN
+
+images: 
+  nvidia:
+    dockerfile: .ci/images/nvidia/
+    build_args:
+      BASE_IMAGE: nvcr.io/nvidia/pytorch:24.10-py3
+  ascend:                              # TODO: Ascend image is not ready yet
+    dockerfile: .ci/images/ascend/
+    build_args:
+      BASE_IMAGE: ascendhub.huawei.com/public-ascendhub/ascend-pytorch:24.0.0
+    private_sdk:
+      source: "${PRIVATE_SDK_URL}"
+
+jobs:
+  nvidia_gpu:
+    image: stable
+    platform: nvidia
+    resources:
+      gpu_ids: "0"                       # 指定 GPU ID，如 "0" "0,2" "all"
+      gpu_type: A100
+      memory: 32GB
+      timeout: 3600
+
+    setup: pip install .[dev]
+
+    stages:
+      - name: test
+        run: pytest tests/ -v --tb=short --junitxml=/workspace/test-results.xml
diff --git a/.ci/images/ascend/Dockerfile b/.ci/images/ascend/Dockerfile
new file mode 100644
index 0000000..87f7c91
--- /dev/null
+++ b/.ci/images/ascend/Dockerfile
@@ -0,0 +1,31 @@
+ARG BASE_IMAGE
+FROM ${BASE_IMAGE}
+
+ENV DEBIAN_FRONTEND=noninteractive
+
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+        git \
+        cmake \
+        ninja-build \
+        curl \
+        libclang-dev \
+    && rm -rf /var/lib/apt/lists/*
+
+ARG PRIVATE_SDK_URL
+RUN if [ -n "$PRIVATE_SDK_URL" ]; then \
+        curl -fSL "$PRIVATE_SDK_URL" -o /tmp/sdk.run && \
+        chmod +x /tmp/sdk.run && /tmp/sdk.run --quiet && \
+        rm /tmp/sdk.run; \
+    fi
+
+RUN pip install --no-cache-dir \
+    scikit-build-core \
+    pybind11 \
+    libclang \
+    pytest \
+    pytest-cov \
+    pytest-xdist \
+    pyyaml
+
+WORKDIR /workspace
diff --git a/.ci/images/nvidia/Dockerfile b/.ci/images/nvidia/Dockerfile
new file mode 100644
index 0000000..d89ea91
--- /dev/null
+++ b/.ci/images/nvidia/Dockerfile
@@ -0,0 +1,26 @@
+ARG BASE_IMAGE
+FROM ${BASE_IMAGE}
+
+ENV DEBIAN_FRONTEND=noninteractive
+
+ARG http_proxy
+ARG https_proxy
+
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+        git \
+        cmake \
+        ninja-build \
+        libclang-dev \
+    && rm -rf /var/lib/apt/lists/*
+
+RUN pip install --no-cache-dir \
+    scikit-build-core \
+    pybind11 \
+    libclang \
+    pytest \
+    pytest-cov \
+    pytest-xdist \
+    pyyaml
+
+WORKDIR /workspace
diff --git a/.ci/run.py b/.ci/run.py
new file mode 100644
index 0000000..0421a56
--- /dev/null
+++ b/.ci/run.py
@@ -0,0 +1,195 @@
+#!/usr/bin/env python3
+"""Standalone Docker CI runner: clone repo, setup, run stages. Output to stdout."""
+
+import argparse
+import subprocess
+import sys
+from pathlib import Path
+
+try:
+    import yaml
+except ImportError:
+    print(
+        "error: pyyaml is required. Install with: pip install pyyaml", file=sys.stderr
+    )
+    sys.exit(1)
+
+
+def load_config(path):
+    with open(path, encoding="utf-8") as f:
+        return yaml.safe_load(f)
+
+
+def resolve_image(config, platform, image_tag):
+    """Resolve an image reference ('stable', 'latest', or commit hash) to a full URL."""
+    registry = config.get("registry", {})
+    registry_url = registry.get("url", "")
+    project = registry.get("project", "infiniops")
+
+    if not registry_url:
+        return f"{project}-ci/{platform}:{image_tag}"
+
+    return f"{registry_url}/{project}/{platform}:{image_tag}"
+
+
+def build_runner_script():
+    return r"""
+export https_proxy=http://localhost:9991
+set -e
+cd /workspace
+git clone "$REPO_URL" repo
+cd repo
+git checkout "$BRANCH"
+echo "========== Setup =========="
+eval "$SETUP_CMD"
+set +e
+failed=0
+for i in $(seq 1 "$NUM_STAGES"); do
+  name_var="STAGE_${i}_NAME"
+  cmd_var="STAGE_${i}_CMD"
+  name="${!name_var}"
+  cmd="${!cmd_var}"
+  echo "========== Stage: $name =========="
+  eval "$cmd" || failed=1
+done
+echo "========== Summary =========="
+exit $failed
+"""
+
+
+def build_docker_args(
+    config, job_name, repo_url, branch, stages, workdir, image_tag_override,
+    gpu_id_override=None,
+):
+    job = config["jobs"][job_name]
+    platform = job.get("platform", "nvidia")
+    image_tag = image_tag_override or job.get("image", "stable")
+    image = resolve_image(config, platform, image_tag)
+    resources = job.get("resources", {})
+    setup_cmd = job.get("setup", "pip install .[dev]")
+
+    args = [
+        "docker",
+        "run",
+        "--rm",
+        "--network",
+        "host",
+        "-i",
+        "-w",
+        workdir,
+        "-e",
+        f"REPO_URL={repo_url}",
+        "-e",
+        f"BRANCH={branch}",
+        "-e",
+        f"SETUP_CMD={setup_cmd}",
+        "-e",
+        f"NUM_STAGES={len(stages)}",
+    ]
+    for i, s in enumerate(stages):
+        args.append("-e")
+        args.append(f"STAGE_{i + 1}_NAME={s['name']}")
+        args.append("-e")
+        args.append(f"STAGE_{i + 1}_CMD={s['run']}")
+
+    gpu_id = gpu_id_override or str(resources.get("gpu_ids", ""))
+    gpu_count = resources.get("gpu_count", 0)
+    if gpu_id:
+        if gpu_id == "all":
+            args.extend(["--gpus", "all"])
+        else:
+            args.extend(["--gpus", f'"device={gpu_id}"'])
+    elif gpu_count and gpu_count > 0:
+        args.extend(["--gpus", f"count={gpu_count}"])
+
+    memory = resources.get("memory")
+    if memory:
+        mem = str(memory).upper().replace("GB", "g").replace("MB", "m")
+        if not mem.endswith("g") and not mem.endswith("m"):
+            mem = f"{mem}g"
+        args.extend(["--memory", mem])
+
+    timeout_sec = resources.get("timeout")
+    if timeout_sec:
+        args.extend(["--stop-timeout", str(timeout_sec)])
+
+    args.append(image)
+    args.append("bash")
+    args.append("-c")
+    args.append(build_runner_script().strip())
+
+    return args
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Run Docker CI pipeline")
+    parser.add_argument(
+        "--config",
+        type=Path,
+        default=Path(__file__).resolve().parent / "config.yaml",
+        help="Path to config.yaml",
+    )
+    parser.add_argument("--branch", type=str, help="Override repo branch")
+    parser.add_argument("--job", type=str, help="Job name to run (default: first job)")
+    parser.add_argument(
+        "--stage",
+        type=str,
+        help="Run only this stage name (still runs setup first)",
+    )
+    parser.add_argument(
+        "--image-tag",
+        type=str,
+        help="Override image tag (stable, latest, or commit hash)",
+    )
+    parser.add_argument(
+        "--gpu-id",
+        type=str,
+        help='GPU device IDs to use, e.g. "0", "0,2", "all"',
+    )
+    parser.add_argument(
+        "--dry-run",
+        action="store_true",
+        help="Print docker command and exit",
+    )
+    args = parser.parse_args()
+
+    config = load_config(args.config)
+    repo = config.get("repo", {})
+    repo_url = repo.get("url", "https://github.com/InfiniTensor/InfiniOps.git")
+    branch = args.branch or repo.get("branch", "dev-infra")
+
+    jobs = config.get("jobs", {})
+    if not jobs:
+        print("error: no jobs in config", file=sys.stderr)
+        sys.exit(1)
+    job_name = args.job or next(iter(jobs))
+    if job_name not in jobs:
+        print(f"error: job {job_name!r} not in config", file=sys.stderr)
+        sys.exit(1)
+
+    job = jobs[job_name]
+    all_stages = job.get("stages", [])
+    if args.stage:
+        stages = [s for s in all_stages if s["name"] == args.stage]
+        if not stages:
+            print(f"error: stage {args.stage!r} not found", file=sys.stderr)
+            sys.exit(1)
+    else:
+        stages = all_stages
+
+    workdir = "/workspace"
+    docker_args = build_docker_args(
+        config, job_name, repo_url, branch, stages, workdir, args.image_tag,
+        gpu_id_override=args.gpu_id,
+    )
+
+    if args.dry_run:
+        print(" ".join(docker_args))
+
+        return
+
+    sys.exit(subprocess.run(docker_args).returncode)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/pyproject.toml b/pyproject.toml
index 765b90a..3dbc186 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -7,7 +7,7 @@ name = "InfiniOps"
 version = "0.1.0"
 
 [project.optional-dependencies]
-dev = ["pytest", "pytest-cov", "pytest-xdist", "ruff", "torch"]
+dev = ["pytest", "pytest-cov", "pytest-xdist", "ruff", "torch", "pyyaml"]
 
 [tool.scikit-build.wheel]
 install-dir = "infini"
diff --git a/tests/test_add.py b/tests/test_add.py
index 1c98d91..61d6715 100644
--- a/tests/test_add.py
+++ b/tests/test_add.py
@@ -4,15 +4,39 @@
 
 from tests.utils import Payload, empty_strided, randint_strided, randn_strided
 
-_INT_DTYPES = (
-    torch.int16,
-    torch.uint16,
-    torch.int32,
-    torch.uint32,
-    torch.int64,
-    torch.uint64,
+_INT_DTYPES = tuple(
+    d
+    for d in (
+        torch.int16,
+        torch.int32,
+        torch.int64,
+    )
+    if d is not None
 )
 
+_UINT_DTYPES = tuple(
+    d
+    for d in (
+        getattr(torch, "uint16", None),
+        getattr(torch, "uint32", None),
+        getattr(torch, "uint64", None),
+    )
+    if d is not None
+)
+
+def _dtype_parametrize():
+    candidates = [
+        (torch.float32, 1e-7, 1e-7),
+        (torch.float16, 1e-3, 1e-3),
+        (torch.bfloat16, 1e-2, 5e-3),
+        (torch.int16, 0, 0),
+        (torch.int32, 0, 0),
+        (getattr(torch, "uint32", None), 0, 0),
+        (torch.int64, 0, 0),
+        (getattr(torch, "uint64", None), 0, 0),
+    ]
+    return tuple((d, r, a) for (d, r, a) in candidates if d is not None)
+
 
 @pytest.mark.auto_act_and_assert
 @pytest.mark.parametrize(
@@ -32,30 +56,11 @@
         ((4, 4, 5632), (45056, 5632, 1), (45056, 5632, 1), (45056, 5632, 1)),
     ),
 )
-@pytest.mark.parametrize(
-    ("dtype", "rtol", "atol"),
-    (
-        (torch.float32, 1e-7, 1e-7),
-        (torch.float16, 1e-3, 1e-3),
-        (torch.bfloat16, 1e-2, 5e-3),
-        (torch.int16, 0, 0),
-        (torch.uint16, 0, 0),
-        (torch.int32, 0, 0),
-        (torch.uint32, 0, 0),
-        (torch.int64, 0, 0),
-        (torch.uint64, 0, 0),
-    ),
-)
-def test_add(
-    shape, input_strides, other_strides, out_strides, dtype, device, rtol, atol
-):
-    if dtype in _INT_DTYPES:
-        input = randint_strided(
-            0, 100, shape, input_strides, dtype=dtype, device=device
-        )
-        other = randint_strided(
-            0, 100, shape, other_strides, dtype=dtype, device=device
-        )
+@pytest.mark.parametrize(("dtype", "rtol", "atol"), _dtype_parametrize())
+def test_add(shape, input_strides, other_strides, out_strides, dtype, device, rtol, atol):
+    if dtype in _INT_DTYPES or dtype in _UINT_DTYPES:
+        input = randint_strided(0, 100, shape, input_strides, dtype=dtype, device=device)
+        other = randint_strided(0, 100, shape, other_strides, dtype=dtype, device=device)
     else:
         input = randn_strided(shape, input_strides, dtype=dtype, device=device)
         other = randn_strided(shape, other_strides, dtype=dtype, device=device)
@@ -72,10 +77,10 @@ def _add(input, other, out):
 
 
 def _torch_add(input, other, out):
-    if input.dtype in (torch.uint16, torch.uint32, torch.uint64):
+    if input.dtype in _UINT_DTYPES:
         input = input.to(torch.int64)
 
-    if other.dtype in (torch.uint16, torch.uint32, torch.uint64):
+    if other.dtype in _UINT_DTYPES:
         other = other.to(torch.int64)
 
     res = torch.add(input, other)
diff --git a/tests/test_rms_norm.py b/tests/test_rms_norm.py
index f447091..b0c9c5d 100644
--- a/tests/test_rms_norm.py
+++ b/tests/test_rms_norm.py
@@ -59,4 +59,13 @@ def _rms_norm(input, weight, *, eps=1e-6, out=None):
 
 
 def _torch_rms_norm(input, weight, *, eps=1e-6, out=None):
-    return torch.nn.functional.rms_norm(input, input.shape[-1:], weight=weight, eps=eps)
+    rms_norm_fn = getattr(torch.nn.functional, "rms_norm", None)
+    if rms_norm_fn is not None:
+        return rms_norm_fn(input, input.shape[-1:], weight=weight, eps=eps)
+    # Fallback for PyTorch < 2.3: RMS norm = (x / sqrt(mean(x^2) + eps)) * weight
+    rms = torch.sqrt(torch.mean(input * input, dim=-1, keepdim=True) + eps)
+    result = (input / rms) * weight
+    if out is not None:
+        out.copy_(result)
+        return out
+    return result

From f15e113ff43b566059e35ed91bcc44dd29e85540 Mon Sep 17 00:00:00 2001
From: zhangyue <zhangyue@qiyuanlab.com>
Date: Fri, 20 Mar 2026 07:24:55 +0000
Subject: [PATCH 2/5] feat: ci sys for nv platform

---
 .ci/README.md                | 155 +++++-------------
 .ci/build.py                 | 103 ++++++++++--
 .ci/config.yaml              |  17 +-
 .ci/images/ascend/Dockerfile |   8 +
 .ci/images/nvidia/Dockerfile |   5 +
 .ci/run.py                   | 117 ++++++++++++--
 .ci/tests/__init__.py        |   0
 .ci/tests/conftest.py        |  42 +++++
 .ci/tests/test_build.py      | 186 ++++++++++++++++++++++
 .ci/tests/test_run.py        | 298 +++++++++++++++++++++++++++++++++++
 10 files changed, 775 insertions(+), 156 deletions(-)
 create mode 100644 .ci/tests/__init__.py
 create mode 100644 .ci/tests/conftest.py
 create mode 100644 .ci/tests/test_build.py
 create mode 100644 .ci/tests/test_run.py

diff --git a/.ci/README.md b/.ci/README.md
index 59ee101..0bd59bd 100644
--- a/.ci/README.md
+++ b/.ci/README.md
@@ -1,25 +1,18 @@
 # .ci — CI 镜像与流水线
 
-本目录管理 CI 所用的 Docker 镜像构建与测试流水线执行。
-
-## 目录结构
-
 ```
 .ci/
-├── config.yaml              # 统一配置（registry、镜像、job 定义）
-├── build.py                 # 镜像构建脚本
-├── run.py                   # CI 流水线执行脚本
-├── README.md
+├── config.yaml              # 统一配置（镜像、job 定义）
+├── build.py                 # 镜像构建
+├── run.py                   # CI 流水线执行
 └── images/
-    ├── nvidia/Dockerfile    # NVIDIA 平台镜像
-    └── ascend/Dockerfile    # 昇腾平台镜像
+    ├── nvidia/Dockerfile
+    └── ascend/Dockerfile
 ```
 
-## 前置依赖
+**前置依赖**：Docker、Python 3.10+、`pip install pyyaml`
 
-- Docker
-- Python 3.10+
-- pyyaml (`pip install pyyaml`)
+---
 
 ## 配置文件 `config.yaml`
 
@@ -28,144 +21,72 @@ repo:
   url: https://github.com/InfiniTensor/InfiniOps.git
   branch: master
 
-registry:
-  url: ""                    # Harbor 地址，本地开发时留空
-  project: infiniops
-  credentials_env: REGISTRY_TOKEN
-
 images:
   nvidia:
     dockerfile: .ci/images/nvidia/
     build_args:
       BASE_IMAGE: nvcr.io/nvidia/pytorch:24.10-py3
-  ascend:
-    dockerfile: .ci/images/ascend/
-    build_args:
-      BASE_IMAGE: ascendhub.huawei.com/public-ascendhub/ascend-pytorch:24.0.0
-    private_sdk:
-      source: "${PRIVATE_SDK_URL}"
 
 jobs:
   nvidia_gpu:
-    image: stable            # stable | latest | 具体 commit hash
+    image: latest            # latest | <commit-hash>
     platform: nvidia
     resources:
-      gpu_ids: "0"           # GPU 设备 ID，如 "0" "0,2" "all"
-      gpu_type: A100
+      gpu_ids: "0"           # "0" | "0,2" | "all"
       memory: 32GB
-      timeout: 3600
+      shm_size: 16g          # 避免 PyTorch SHMEM 不足
+      timeout: 3600          # 容器内脚本最大运行秒数
     setup: pip install .[dev]
+    env:                     # 可选，注入容器环境变量
+      MY_VAR: value
     stages:
       - name: test
-        run: pytest tests/ -v --tb=short --junitxml=/workspace/test-results.xml
+        run: pytest tests/ -n auto -v --tb=short --junitxml=/workspace/results/test-results.xml
 ```
 
-- **`registry.url`** 为空时镜像仅保存在本地，tag 格式为 `<project>-ci/<platform>:<tag>`。
-- **`images.<platform>.build_args`** 会作为 `--build-arg` 传入 `docker build`。
-- **`jobs.<name>.image`** 支持 `stable`、`latest` 或具体 commit hash。
-- **`resources.gpu_ids`** 指定 GPU 设备 ID，支持 `"0"`、`"0,2"`、`"all"` 等格式，映射为 `docker run --gpus "device=..."`。也可保留 `gpu_count` 按数量分配。
+---
 
 ## 镜像构建 `build.py`
 
-```bash
-python .ci/build.py [options]
-```
-
-| 参数 | 默认值 | 说明 |
-|---|---|---|
-| `--platform` | `all` | 构建平台：`nvidia`、`ascend` 或 `all` |
-| `--commit` | `HEAD` | 用于镜像 tag 的 git ref |
-| `--push` | — | 构建后推送到 registry |
-| `--force` | — | 跳过变更检测，强制构建 |
-| `--dry-run` | — | 仅打印命令，不执行 |
-| `--config` | `.ci/config.yaml` | 配置文件路径 |
-
-### 示例
+| 参数 | 说明 |
+|---|---|
+| `--platform nvidia\|ascend\|all` | 构建平台，默认 `all` |
+| `--force` | 跳过 Dockerfile 变更检测 |
+| `--dry-run` | 打印命令不执行 |
 
 ```bash
-# 构建 nvidia 镜像（自动检测 Dockerfile 变更，无变更则跳过）
+# 检测变更后构建（无变更自动跳过）
 python .ci/build.py --platform nvidia
 
 # 强制构建
 python .ci/build.py --platform nvidia --force
-
-# 构建全部平台并推送到 registry
-python .ci/build.py --push --force
-
-# 预览实际执行的 docker 命令
-python .ci/build.py --platform nvidia --force --dry-run
 ```
 
-### 构建流程
+构建产物以宿主机本地镜像 tag 存储：`infiniops-ci/<platform>:<commit-hash>` 和 `:latest`。
+代理、`no_proxy` 自动从宿主机环境变量透传到 `docker build`。
 
-1. 通过 `git diff HEAD~1` 检测 Dockerfile 目录是否有变更（`--force` 跳过此步）
-2. `docker build` 构建镜像，同时打 `<commit-hash>` 和 `latest` 两个 tag
-3. 自动透传宿主机的 `http_proxy`/`https_proxy`/`no_proxy` 到构建容器
-4. 若指定 `--push`，将两个 tag 推送到 registry
+> `--push` 为预留功能，需在 `config.yaml` 中配置 `registry` 段后方可使用。
 
-### 产物
-
-| Tag | 说明 |
-|---|---|
-| `infiniops-ci/<platform>:<commit-hash>` | 精确追溯到某次构建 |
-| `infiniops-ci/<platform>:latest` | 最近一次构建 |
+---
 
 ## 流水线执行 `run.py`
 
-```bash
-python .ci/run.py [options]
-```
-
-| 参数 | 默认值 | 说明 |
-|---|---|---|
-| `--job` | 配置中第一个 job | 要执行的 job 名称 |
-| `--branch` | `config.yaml` 中的 `repo.branch` | 覆盖克隆分支 |
-| `--stage` | 全部 | 仅运行指定 stage |
-| `--image-tag` | job 中的 `image` 字段 | 覆盖镜像版本 |
-| `--gpu-id` | config 中的 `gpu_ids` | GPU 设备 ID，如 `0`、`0,2`、`all` |
-| `--dry-run` | — | 仅打印 docker 命令，不执行 |
-| `--config` | `.ci/config.yaml` | 配置文件路径 |
-
-### 示例
+| 参数 | 说明 |
+|---|---|
+| `--branch` | 覆盖克隆分支 |
+| `--stage` | 只运行指定 stage |
+| `--image-tag` | 覆盖镜像 tag |
+| `--gpu-id` | 覆盖 GPU 设备 ID |
+| `--results-dir` | 宿主机目录，挂载到容器 `/workspace/results` |
+| `--dry-run` | 打印 docker 命令不执行 |
 
 ```bash
 # 运行默认 job
-python .ci/run.py
-
-# 指定分支和镜像版本
-python .ci/run.py --branch feature-xxx --image-tag latest
-
-# 只用 GPU 0 运行
-python .ci/run.py --gpu-id 0
-
-# 用 GPU 0 和 2 运行
-python .ci/run.py --gpu-id 0,2
-
-# 使用全部 GPU
-python .ci/run.py --gpu-id all
-
-# 只跑 test stage
-python .ci/run.py --stage test
+python .ci/run.py --branch feat/my-feature --results-dir ./ci-results
 
-# 预览 docker 命令
-python .ci/run.py --dry-run
-```
-
-### 执行流程
-
-1. 解析 job 配置，拉取对应镜像
-2. `docker run` 启动容器（自动挂载 GPU、限制内存）
-3. 容器内 `git clone` → `checkout` → 执行 `setup` 命令
-4. 依次执行各 stage，汇总结果
-
-## 代理配置
-
-如果网络环境需要代理，在宿主机设置环境变量后即可：
-
-```bash
-export http_proxy=http://localhost:9991
-export https_proxy=http://localhost:9991
+# 只跑 test stage，预览命令
+python .ci/run.py --stage test --dry-run
 ```
 
-- **`build.py`** 会自动透传代理到 `docker build`（通过 `--build-arg` + `--network host`）。
-- **`run.py`** 使用 `--network host`，容器内可直接访问宿主机代理。
+容器内执行流程：`git clone` → `checkout` → `setup` → stages。
+代理从宿主机透传，测试结果写入 `--results-dir`。每次运行均为干净环境（不挂载宿主机 pip 缓存）。
diff --git a/.ci/build.py b/.ci/build.py
index 489ebf0..2339319 100644
--- a/.ci/build.py
+++ b/.ci/build.py
@@ -4,6 +4,7 @@
 import argparse
 import json
 import os
+import shlex
 import subprocess
 import sys
 from pathlib import Path
@@ -28,6 +29,7 @@ def get_git_commit(ref="HEAD"):
         capture_output=True,
         text=True,
     )
+
     if result.returncode != 0:
         print(f"error: failed to get commit hash for `{ref}`", file=sys.stderr)
         sys.exit(1)
@@ -43,9 +45,61 @@ def has_dockerfile_changed(dockerfile_dir, base_ref="HEAD~1"):
         text=True,
     )
 
+    if result.returncode != 0:
+        print(
+            "warning: git diff failed (shallow clone or initial commit?);"
+            " assuming Dockerfile changed",
+            file=sys.stderr,
+        )
+        return True
+
     return bool(result.stdout.strip())
 
 
+def docker_login(registry_cfg, dry_run):
+    """Log in to the registry using `credentials_env` token.
+
+    Returns True on success.
+
+    NOTE: Registry support is currently unused (`config.yaml` has no registry
+    section). Retained for future integration with an external image management
+    system.
+    """
+    credentials_env = registry_cfg.get("credentials_env")
+    registry_url = registry_cfg.get("url", "")
+
+    if not credentials_env or not registry_url:
+        return True
+
+    token = os.environ.get(credentials_env)
+
+    if not token:
+        print(
+            f"error: {credentials_env} not set, cannot login",
+            file=sys.stderr,
+        )
+        return False
+
+    if dry_run:
+        print(
+            f"[dry-run] echo <token> | docker login {registry_url}"
+            " --username token --password-stdin"
+        )
+        return True
+
+    result = subprocess.run(
+        ["docker", "login", registry_url, "--username", "token", "--password-stdin"],
+        input=token,
+        text=True,
+    )
+
+    if result.returncode != 0:
+        print("error: docker login failed", file=sys.stderr)
+        return False
+
+    return True
+
+
 def build_image_tag(registry_url, project, platform, tag):
     if registry_url:
         return f"{registry_url}/{project}/{platform}:{tag}"
@@ -53,46 +107,53 @@ def build_image_tag(registry_url, project, platform, tag):
     return f"{project}-ci/{platform}:{tag}"
 
 
-def build_image(platform, platform_cfg, registry_cfg, commit, push, dry_run):
+def build_image(platform, platform_cfg, registry_cfg, commit, push, dry_run, logged_in):
     """Build a single platform image. Returns True on success."""
     registry_url = registry_cfg.get("url", "")
     project = registry_cfg.get("project", "infiniops")
     dockerfile_dir = platform_cfg["dockerfile"]
-
     commit_tag = build_image_tag(registry_url, project, platform, commit)
     latest_tag = build_image_tag(registry_url, project, platform, "latest")
 
     build_args_cfg = platform_cfg.get("build_args", {})
     build_cmd = ["docker", "build", "--network", "host"]
+
     for key, value in build_args_cfg.items():
         build_cmd.extend(["--build-arg", f"{key}={value}"])
 
-    for proxy_var in ("http_proxy", "https_proxy", "no_proxy"):
-        proxy_val = os.environ.get(proxy_var) or os.environ.get(proxy_var.upper())
+    for proxy_var in ("HTTP_PROXY", "HTTPS_PROXY", "NO_PROXY"):
+        proxy_val = os.environ.get(proxy_var) or os.environ.get(proxy_var.lower())
+
         if proxy_val:
             build_cmd.extend(["--build-arg", f"{proxy_var}={proxy_val}"])
+            build_cmd.extend(["--build-arg", f"{proxy_var.lower()}={proxy_val}"])
 
     private_sdk = platform_cfg.get("private_sdk", {})
+
     if private_sdk:
-        sdk_url = private_sdk.get("source", "")
-        if sdk_url.startswith("${") and sdk_url.endswith("}"):
-            env_var = sdk_url[2:-1]
-            sdk_url = os.environ.get(env_var, "")
+        source_env = private_sdk.get("source_env", "")
+        sdk_url = os.environ.get(source_env, "") if source_env else ""
+
         if sdk_url:
             build_cmd.extend(["--build-arg", f"PRIVATE_SDK_URL={sdk_url}"])
 
     build_cmd.extend(["-t", commit_tag, "-t", latest_tag, dockerfile_dir])
 
     if dry_run:
-        print(f"[dry-run] {' '.join(build_cmd)}")
+        print(f"[dry-run] {shlex.join(build_cmd)}")
+
         if push:
-            print(f"[dry-run] docker push {commit_tag}")
-            print(f"[dry-run] docker push {latest_tag}")
+            if not logged_in:
+                print("[dry-run] (skipping push: docker login failed)")
+            else:
+                print(f"[dry-run] docker push {commit_tag}")
+                print(f"[dry-run] docker push {latest_tag}")
 
         return True
 
     print(f"==> building {platform}: {commit_tag}", file=sys.stderr)
     result = subprocess.run(build_cmd)
+
     if result.returncode != 0:
         error = {
             "stage": "build",
@@ -105,9 +166,14 @@ def build_image(platform, platform_cfg, registry_cfg, commit, push, dry_run):
         return False
 
     if push:
+        if not logged_in:
+            print("error: docker login failed, cannot push", file=sys.stderr)
+            return False
+
         for tag in (commit_tag, latest_tag):
             print(f"==> pushing {tag}", file=sys.stderr)
             push_result = subprocess.run(["docker", "push", tag])
+
             if push_result.returncode != 0:
                 error = {
                     "stage": "push",
@@ -145,7 +211,7 @@ def main():
     parser.add_argument(
         "--push",
         action="store_true",
-        help="Push images to registry after building",
+        help="Push images to registry after building (requires registry in config)",
     )
     parser.add_argument(
         "--force",
@@ -179,6 +245,7 @@ def main():
         platforms = [args.platform]
 
     commit = get_git_commit(args.commit)
+    logged_in = docker_login(registry_cfg, args.dry_run) if args.push else True
     failed = False
 
     for platform in platforms:
@@ -187,7 +254,8 @@ def main():
 
         if not Path(dockerfile_dir).is_dir():
             print(
-                f"warning: dockerfile directory `{dockerfile_dir}` does not exist, skipping {platform}",
+                f"warning: dockerfile directory `{dockerfile_dir}` does not exist,"
+                f" skipping {platform}",
                 file=sys.stderr,
             )
             continue
@@ -197,8 +265,15 @@ def main():
             continue
 
         ok = build_image(
-            platform, platform_cfg, registry_cfg, commit, args.push, args.dry_run
+            platform,
+            platform_cfg,
+            registry_cfg,
+            commit,
+            args.push,
+            args.dry_run,
+            logged_in=logged_in,
         )
+
         if not ok:
             failed = True
 
diff --git a/.ci/config.yaml b/.ci/config.yaml
index fea3f7c..c80c47d 100644
--- a/.ci/config.yaml
+++ b/.ci/config.yaml
@@ -2,12 +2,7 @@ repo:
   url: https://github.com/InfiniTensor/InfiniOps.git
   branch: master
 
-registry:
-  url: ""                              # TODO: Harbor not ready yet
-  project: infiniops
-  credentials_env: REGISTRY_TOKEN
-
-images: 
+images:
   nvidia:
     dockerfile: .ci/images/nvidia/
     build_args:
@@ -17,20 +12,22 @@ images:
     build_args:
       BASE_IMAGE: ascendhub.huawei.com/public-ascendhub/ascend-pytorch:24.0.0
     private_sdk:
-      source: "${PRIVATE_SDK_URL}"
+      source_env: PRIVATE_SDK_URL
 
 jobs:
   nvidia_gpu:
-    image: stable
+    image: latest
     platform: nvidia
     resources:
       gpu_ids: "0"                       # 指定 GPU ID，如 "0" "0,2" "all"
-      gpu_type: A100
       memory: 32GB
+      shm_size: 16g                      # 避免 PyTorch 默认 64MB SHMEM 不足
       timeout: 3600
 
     setup: pip install .[dev]
+    # env:                             # 可选，注入容器环境变量
+    #   MY_VAR: value
 
     stages:
       - name: test
-        run: pytest tests/ -v --tb=short --junitxml=/workspace/test-results.xml
+        run: pytest tests/ -n auto -v --tb=short --junitxml=/workspace/results/test-results.xml
diff --git a/.ci/images/ascend/Dockerfile b/.ci/images/ascend/Dockerfile
index 87f7c91..66392eb 100644
--- a/.ci/images/ascend/Dockerfile
+++ b/.ci/images/ascend/Dockerfile
@@ -3,11 +3,19 @@ FROM ${BASE_IMAGE}
 
 ENV DEBIAN_FRONTEND=noninteractive
 
+ARG HTTP_PROXY
+ARG HTTPS_PROXY
+ARG NO_PROXY
+ARG http_proxy
+ARG https_proxy
+ARG no_proxy
+
 RUN apt-get update && \
     apt-get install -y --no-install-recommends \
         git \
         cmake \
         ninja-build \
+        coreutils \
         curl \
         libclang-dev \
     && rm -rf /var/lib/apt/lists/*
diff --git a/.ci/images/nvidia/Dockerfile b/.ci/images/nvidia/Dockerfile
index d89ea91..74ccfd1 100644
--- a/.ci/images/nvidia/Dockerfile
+++ b/.ci/images/nvidia/Dockerfile
@@ -3,14 +3,19 @@ FROM ${BASE_IMAGE}
 
 ENV DEBIAN_FRONTEND=noninteractive
 
+ARG HTTP_PROXY
+ARG HTTPS_PROXY
+ARG NO_PROXY
 ARG http_proxy
 ARG https_proxy
+ARG no_proxy
 
 RUN apt-get update && \
     apt-get install -y --no-install-recommends \
         git \
         cmake \
         ninja-build \
+        coreutils \
         libclang-dev \
     && rm -rf /var/lib/apt/lists/*
 
diff --git a/.ci/run.py b/.ci/run.py
index 0421a56..3f25afa 100644
--- a/.ci/run.py
+++ b/.ci/run.py
@@ -2,8 +2,11 @@
 """Standalone Docker CI runner: clone repo, setup, run stages. Output to stdout."""
 
 import argparse
+import os
+import shlex
 import subprocess
 import sys
+from datetime import datetime
 from pathlib import Path
 
 try:
@@ -20,8 +23,35 @@ def load_config(path):
         return yaml.safe_load(f)
 
 
+def get_git_commit(ref="HEAD"):
+    result = subprocess.run(
+        ["git", "rev-parse", "--short", ref],
+        capture_output=True,
+        text=True,
+    )
+
+    if result.returncode != 0:
+        return "unknown"
+
+    return result.stdout.strip()
+
+
+def build_results_dir(base, platform, stages, commit):
+    """Build a results directory path: `{base}/{platform}_{stages}_{commit}_{timestamp}`."""
+    stage_names = "+".join(s["name"] for s in stages)
+    timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
+    dirname = f"{platform}_{stage_names}_{commit}_{timestamp}"
+
+    return Path(base) / dirname
+
+
 def resolve_image(config, platform, image_tag):
-    """Resolve an image reference ('stable', 'latest', or commit hash) to a full URL."""
+    """Resolve an image reference to a full image name.
+
+    Accepts `stable`, `latest`, or a commit hash as `image_tag`. When config
+    contains a registry section, returns a registry-prefixed URL. Otherwise
+    returns a local tag (current default).
+    """
     registry = config.get("registry", {})
     registry_url = registry.get("url", "")
     project = registry.get("project", "infiniops")
@@ -34,9 +64,9 @@ def resolve_image(config, platform, image_tag):
 
 def build_runner_script():
     return r"""
-export https_proxy=http://localhost:9991
 set -e
 cd /workspace
+mkdir -p /workspace/results
 git clone "$REPO_URL" repo
 cd repo
 git checkout "$BRANCH"
@@ -58,15 +88,27 @@ def build_runner_script():
 
 
 def build_docker_args(
-    config, job_name, repo_url, branch, stages, workdir, image_tag_override,
+    config,
+    job_name,
+    repo_url,
+    branch,
+    stages,
+    workdir,
+    image_tag_override,
     gpu_id_override=None,
+    results_dir=None,
 ):
     job = config["jobs"][job_name]
     platform = job.get("platform", "nvidia")
-    image_tag = image_tag_override or job.get("image", "stable")
+    image_tag = image_tag_override or job.get("image", "latest")
     image = resolve_image(config, platform, image_tag)
     resources = job.get("resources", {})
-    setup_cmd = job.get("setup", "pip install .[dev]")
+    setup_raw = job.get("setup", "pip install .[dev]")
+
+    if isinstance(setup_raw, list):
+        setup_cmd = "\n".join(setup_raw)
+    else:
+        setup_cmd = setup_raw
 
     args = [
         "docker",
@@ -86,6 +128,20 @@ def build_docker_args(
         "-e",
         f"NUM_STAGES={len(stages)}",
     ]
+
+    for proxy_var in ("HTTP_PROXY", "HTTPS_PROXY", "NO_PROXY"):
+        proxy_val = os.environ.get(proxy_var) or os.environ.get(proxy_var.lower())
+
+        if proxy_val:
+            args.extend(["-e", f"{proxy_var}={proxy_val}"])
+            args.extend(["-e", f"{proxy_var.lower()}={proxy_val}"])
+
+    for key, value in job.get("env", {}).items():
+        args.extend(["-e", f"{key}={value}"])
+
+    if results_dir:
+        args.extend(["-v", f"{results_dir.resolve()}:/workspace/results"])
+
     for i, s in enumerate(stages):
         args.append("-e")
         args.append(f"STAGE_{i + 1}_NAME={s['name']}")
@@ -94,6 +150,7 @@ def build_docker_args(
 
     gpu_id = gpu_id_override or str(resources.get("gpu_ids", ""))
     gpu_count = resources.get("gpu_count", 0)
+
     if gpu_id:
         if gpu_id == "all":
             args.extend(["--gpus", "all"])
@@ -103,20 +160,28 @@ def build_docker_args(
         args.extend(["--gpus", f"count={gpu_count}"])
 
     memory = resources.get("memory")
+
     if memory:
-        mem = str(memory).upper().replace("GB", "g").replace("MB", "m")
+        mem = str(memory).lower().replace("gb", "g").replace("mb", "m")
+
         if not mem.endswith("g") and not mem.endswith("m"):
             mem = f"{mem}g"
+
         args.extend(["--memory", mem])
 
+    shm_size = resources.get("shm_size")
+
+    if shm_size:
+        args.extend(["--shm-size", str(shm_size)])
+
     timeout_sec = resources.get("timeout")
+    args.append(image)
+
     if timeout_sec:
-        args.extend(["--stop-timeout", str(timeout_sec)])
+        # Requires coreutils `timeout` inside the container image.
+        args.extend(["timeout", str(timeout_sec)])
 
-    args.append(image)
-    args.append("bash")
-    args.append("-c")
-    args.append(build_runner_script().strip())
+    args.extend(["bash", "-c", build_runner_script().strip()])
 
     return args
 
@@ -146,6 +211,12 @@ def main():
         type=str,
         help='GPU device IDs to use, e.g. "0", "0,2", "all"',
     )
+    parser.add_argument(
+        "--results-dir",
+        type=Path,
+        default=Path("ci-results"),
+        help="Base directory for test results (default: ./ci-results)",
+    )
     parser.add_argument(
         "--dry-run",
         action="store_true",
@@ -156,38 +227,54 @@ def main():
     config = load_config(args.config)
     repo = config.get("repo", {})
     repo_url = repo.get("url", "https://github.com/InfiniTensor/InfiniOps.git")
-    branch = args.branch or repo.get("branch", "dev-infra")
+    branch = args.branch or repo.get("branch", "master")
 
     jobs = config.get("jobs", {})
+
     if not jobs:
         print("error: no jobs in config", file=sys.stderr)
         sys.exit(1)
+
     job_name = args.job or next(iter(jobs))
+
     if job_name not in jobs:
         print(f"error: job {job_name!r} not in config", file=sys.stderr)
         sys.exit(1)
 
     job = jobs[job_name]
     all_stages = job.get("stages", [])
+
     if args.stage:
         stages = [s for s in all_stages if s["name"] == args.stage]
+
         if not stages:
             print(f"error: stage {args.stage!r} not found", file=sys.stderr)
             sys.exit(1)
     else:
         stages = all_stages
 
+    platform = job.get("platform", "nvidia")
+    commit = get_git_commit()
+    results_dir = build_results_dir(args.results_dir, platform, stages, commit)
+
     workdir = "/workspace"
     docker_args = build_docker_args(
-        config, job_name, repo_url, branch, stages, workdir, args.image_tag,
+        config,
+        job_name,
+        repo_url,
+        branch,
+        stages,
+        workdir,
+        args.image_tag,
         gpu_id_override=args.gpu_id,
+        results_dir=results_dir,
     )
 
     if args.dry_run:
-        print(" ".join(docker_args))
-
+        print(shlex.join(docker_args))
         return
 
+    results_dir.mkdir(parents=True, exist_ok=True)
     sys.exit(subprocess.run(docker_args).returncode)
 
 
diff --git a/.ci/tests/__init__.py b/.ci/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/.ci/tests/conftest.py b/.ci/tests/conftest.py
new file mode 100644
index 0000000..98079cd
--- /dev/null
+++ b/.ci/tests/conftest.py
@@ -0,0 +1,42 @@
+import sys
+from pathlib import Path
+
+# Allow `import run` and `import build` directly.
+sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
+
+import pytest
+
+
+@pytest.fixture
+def minimal_config():
+    return {
+        "repo": {
+            "url": "https://github.com/InfiniTensor/InfiniOps.git",
+            "branch": "master",
+        },
+        "images": {
+            "nvidia": {
+                "dockerfile": ".ci/images/nvidia/",
+                "build_args": {"BASE_IMAGE": "nvcr.io/nvidia/pytorch:24.10-py3"},
+            }
+        },
+        "jobs": {
+            "nvidia_gpu": {
+                "image": "latest",
+                "platform": "nvidia",
+                "resources": {
+                    "gpu_ids": "0",
+                    "memory": "32GB",
+                    "shm_size": "16g",
+                    "timeout": 3600,
+                },
+                "setup": "pip install .[dev]",
+                "stages": [
+                    {
+                        "name": "test",
+                        "run": "pytest tests/ -v",
+                    }
+                ],
+            }
+        },
+    }
diff --git a/.ci/tests/test_build.py b/.ci/tests/test_build.py
new file mode 100644
index 0000000..fa2f292
--- /dev/null
+++ b/.ci/tests/test_build.py
@@ -0,0 +1,186 @@
+import build
+
+
+# ---------------------------------------------------------------------------
+# build_image_tag
+# ---------------------------------------------------------------------------
+
+
+def test_build_image_tag_with_registry():
+    tag = build.build_image_tag("localhost:5000", "infiniops", "nvidia", "latest")
+    assert tag == "localhost:5000/infiniops/nvidia:latest"
+
+
+def test_build_image_tag_without_registry():
+    tag = build.build_image_tag("", "infiniops", "nvidia", "abc1234")
+    assert tag == "infiniops-ci/nvidia:abc1234"
+
+
+def test_build_image_tag_commit_hash():
+    tag = build.build_image_tag(
+        "registry.example.com:5000", "proj", "ascend", "deadbeef"
+    )
+    assert tag == "registry.example.com:5000/proj/ascend:deadbeef"
+
+
+# ---------------------------------------------------------------------------
+# has_dockerfile_changed
+# ---------------------------------------------------------------------------
+
+
+def test_has_dockerfile_changed_true_when_stdout_nonempty(mocker):
+    mocker.patch(
+        "subprocess.run",
+        return_value=mocker.Mock(returncode=0, stdout="Dockerfile\n"),
+    )
+    assert build.has_dockerfile_changed(".ci/images/nvidia/") is True
+
+
+def test_has_dockerfile_changed_false_when_stdout_empty(mocker):
+    mocker.patch(
+        "subprocess.run",
+        return_value=mocker.Mock(returncode=0, stdout=""),
+    )
+    assert build.has_dockerfile_changed(".ci/images/nvidia/") is False
+
+
+def test_has_dockerfile_changed_true_on_git_error(mocker):
+    # Shallow clone or initial commit: `git diff` returns non-zero.
+    mocker.patch(
+        "subprocess.run",
+        return_value=mocker.Mock(returncode=128, stdout=""),
+    )
+    assert build.has_dockerfile_changed(".ci/images/nvidia/") is True
+
+
+# ---------------------------------------------------------------------------
+# docker_login
+# ---------------------------------------------------------------------------
+
+
+def test_docker_login_no_credentials_env(mocker):
+    run_mock = mocker.patch("subprocess.run")
+    result = build.docker_login({"url": "localhost:5000"}, dry_run=False)
+    assert result is True
+    run_mock.assert_not_called()
+
+
+def test_docker_login_token_not_set(mocker, monkeypatch, capsys):
+    monkeypatch.delenv("REGISTRY_TOKEN", raising=False)
+    run_mock = mocker.patch("subprocess.run")
+    cfg = {"url": "localhost:5000", "credentials_env": "REGISTRY_TOKEN"}
+    result = build.docker_login(cfg, dry_run=False)
+    assert result is False
+    run_mock.assert_not_called()
+
+
+def test_docker_login_dry_run_does_not_call_subprocess(mocker, monkeypatch):
+    monkeypatch.setenv("REGISTRY_TOKEN", "mytoken")
+    run_mock = mocker.patch("subprocess.run")
+    cfg = {"url": "localhost:5000", "credentials_env": "REGISTRY_TOKEN"}
+    result = build.docker_login(cfg, dry_run=True)
+    assert result is True
+    run_mock.assert_not_called()
+
+
+def test_docker_login_success(mocker, monkeypatch):
+    monkeypatch.setenv("REGISTRY_TOKEN", "mytoken")
+    run_mock = mocker.patch(
+        "subprocess.run",
+        return_value=mocker.Mock(returncode=0),
+    )
+    cfg = {"url": "localhost:5000", "credentials_env": "REGISTRY_TOKEN"}
+    result = build.docker_login(cfg, dry_run=False)
+    assert result is True
+    run_mock.assert_called_once()
+    cmd = run_mock.call_args[0][0]
+    assert "docker" in cmd
+    assert "login" in cmd
+
+
+# ---------------------------------------------------------------------------
+# build_image — dry_run and proxy
+# ---------------------------------------------------------------------------
+
+
+def _platform_cfg():
+    return {
+        "dockerfile": ".ci/images/nvidia/",
+        "build_args": {"BASE_IMAGE": "nvcr.io/nvidia/pytorch:24.10-py3"},
+    }
+
+
+def _registry_cfg():
+    return {"url": "localhost:5000", "project": "infiniops"}
+
+
+def test_build_image_dry_run_no_subprocess(mocker, monkeypatch, capsys):
+    monkeypatch.delenv("HTTP_PROXY", raising=False)
+    run_mock = mocker.patch("subprocess.run")
+    build.build_image(
+        "nvidia",
+        _platform_cfg(),
+        _registry_cfg(),
+        "abc1234",
+        push=False,
+        dry_run=True,
+        logged_in=True,
+    )
+    run_mock.assert_not_called()
+    captured = capsys.readouterr()
+    assert "[dry-run]" in captured.out
+
+
+def test_build_image_dry_run_output_contains_image_tag(mocker, monkeypatch, capsys):
+    monkeypatch.delenv("HTTP_PROXY", raising=False)
+    mocker.patch("subprocess.run")
+    build.build_image(
+        "nvidia",
+        _platform_cfg(),
+        _registry_cfg(),
+        "abc1234",
+        push=False,
+        dry_run=True,
+        logged_in=True,
+    )
+    captured = capsys.readouterr()
+    assert "abc1234" in captured.out
+
+
+def test_build_image_proxy_in_build_args(mocker, monkeypatch):
+    monkeypatch.setenv("HTTP_PROXY", "http://proxy.test:3128")
+    run_mock = mocker.patch(
+        "subprocess.run",
+        return_value=mocker.Mock(returncode=0),
+    )
+    build.build_image(
+        "nvidia",
+        _platform_cfg(),
+        _registry_cfg(),
+        "abc1234",
+        push=False,
+        dry_run=False,
+        logged_in=True,
+    )
+    called_cmd = run_mock.call_args[0][0]
+    joined = " ".join(called_cmd)
+    assert "HTTP_PROXY=http://proxy.test:3128" in joined
+    assert "http_proxy=http://proxy.test:3128" in joined
+
+
+def test_build_image_returns_false_on_docker_error(mocker, monkeypatch):
+    monkeypatch.delenv("HTTP_PROXY", raising=False)
+    mocker.patch(
+        "subprocess.run",
+        return_value=mocker.Mock(returncode=1),
+    )
+    result = build.build_image(
+        "nvidia",
+        _platform_cfg(),
+        _registry_cfg(),
+        "abc1234",
+        push=False,
+        dry_run=False,
+        logged_in=True,
+    )
+    assert result is False
diff --git a/.ci/tests/test_run.py b/.ci/tests/test_run.py
new file mode 100644
index 0000000..075546e
--- /dev/null
+++ b/.ci/tests/test_run.py
@@ -0,0 +1,298 @@
+from pathlib import Path
+
+import pytest
+
+import run
+
+
+# ---------------------------------------------------------------------------
+# resolve_image
+# ---------------------------------------------------------------------------
+
+
+def test_resolve_image_with_registry():
+    cfg = {"registry": {"url": "localhost:5000", "project": "infiniops"}}
+    img = run.resolve_image(cfg, "nvidia", "latest")
+    assert img == "localhost:5000/infiniops/nvidia:latest"
+
+
+def test_resolve_image_without_registry(minimal_config):
+    img = run.resolve_image(minimal_config, "nvidia", "abc1234")
+    assert img == "infiniops-ci/nvidia:abc1234"
+
+
+# ---------------------------------------------------------------------------
+# build_runner_script
+# ---------------------------------------------------------------------------
+
+
+def test_runner_script_contains_git_clone():
+    script = run.build_runner_script()
+    assert "git clone" in script
+
+
+def test_runner_script_contains_setup_cmd():
+    script = run.build_runner_script()
+    assert "SETUP_CMD" in script
+
+
+def test_runner_script_exits_on_failure():
+    script = run.build_runner_script()
+    assert "exit $failed" in script
+
+
+def test_runner_script_creates_results_dir():
+    script = run.build_runner_script()
+    assert "mkdir -p /workspace/results" in script
+
+
+# ---------------------------------------------------------------------------
+# build_docker_args — basic structure
+# ---------------------------------------------------------------------------
+
+
+def test_docker_args_basic_structure(minimal_config):
+    args = run.build_docker_args(
+        minimal_config,
+        "nvidia_gpu",
+        "https://github.com/example/repo.git",
+        "master",
+        minimal_config["jobs"]["nvidia_gpu"]["stages"],
+        "/workspace",
+        None,
+    )
+    assert args[0] == "docker"
+    assert "run" in args
+    assert "--rm" in args
+
+
+def test_docker_args_correct_image(minimal_config):
+    args = run.build_docker_args(
+        minimal_config,
+        "nvidia_gpu",
+        "https://github.com/example/repo.git",
+        "master",
+        minimal_config["jobs"]["nvidia_gpu"]["stages"],
+        "/workspace",
+        None,
+    )
+    assert "infiniops-ci/nvidia:latest" in args
+
+
+def test_docker_args_image_tag_override(minimal_config):
+    args = run.build_docker_args(
+        minimal_config,
+        "nvidia_gpu",
+        "https://github.com/example/repo.git",
+        "master",
+        minimal_config["jobs"]["nvidia_gpu"]["stages"],
+        "/workspace",
+        "abc1234",
+    )
+    assert "infiniops-ci/nvidia:abc1234" in args
+
+
+# ---------------------------------------------------------------------------
+# build_docker_args — proxy passthrough
+# ---------------------------------------------------------------------------
+
+
+def test_docker_args_proxy_present_when_set(minimal_config, monkeypatch):
+    monkeypatch.setenv("HTTP_PROXY", "http://proxy.example.com:8080")
+    args = run.build_docker_args(
+        minimal_config,
+        "nvidia_gpu",
+        "https://github.com/example/repo.git",
+        "master",
+        minimal_config["jobs"]["nvidia_gpu"]["stages"],
+        "/workspace",
+        None,
+    )
+    assert "-e" in args
+    assert "HTTP_PROXY=http://proxy.example.com:8080" in args
+    assert "http_proxy=http://proxy.example.com:8080" in args
+
+
+def test_docker_args_proxy_absent_when_not_set(minimal_config, monkeypatch):
+    monkeypatch.delenv("HTTP_PROXY", raising=False)
+    monkeypatch.delenv("http_proxy", raising=False)
+    monkeypatch.delenv("HTTPS_PROXY", raising=False)
+    monkeypatch.delenv("https_proxy", raising=False)
+    monkeypatch.delenv("NO_PROXY", raising=False)
+    monkeypatch.delenv("no_proxy", raising=False)
+    args = run.build_docker_args(
+        minimal_config,
+        "nvidia_gpu",
+        "https://github.com/example/repo.git",
+        "master",
+        minimal_config["jobs"]["nvidia_gpu"]["stages"],
+        "/workspace",
+        None,
+    )
+
+    for arg in args:
+        assert not arg.startswith("HTTP_PROXY=")
+        assert not arg.startswith("http_proxy=")
+        assert not arg.startswith("HTTPS_PROXY=")
+        assert not arg.startswith("https_proxy=")
+        assert not arg.startswith("NO_PROXY=")
+        assert not arg.startswith("no_proxy=")
+
+
+def test_docker_args_proxy_lowercase_fallback(minimal_config, monkeypatch):
+    monkeypatch.delenv("HTTP_PROXY", raising=False)
+    monkeypatch.setenv("http_proxy", "http://lowercase.proxy:3128")
+    args = run.build_docker_args(
+        minimal_config,
+        "nvidia_gpu",
+        "https://github.com/example/repo.git",
+        "master",
+        minimal_config["jobs"]["nvidia_gpu"]["stages"],
+        "/workspace",
+        None,
+    )
+    assert "HTTP_PROXY=http://lowercase.proxy:3128" in args
+    assert "http_proxy=http://lowercase.proxy:3128" in args
+
+
+# ---------------------------------------------------------------------------
+# build_docker_args — GPU flags
+# ---------------------------------------------------------------------------
+
+
+def _make_args(config, gpu_id_override=None):
+    return run.build_docker_args(
+        config,
+        "nvidia_gpu",
+        "https://github.com/example/repo.git",
+        "master",
+        config["jobs"]["nvidia_gpu"]["stages"],
+        "/workspace",
+        None,
+        gpu_id_override=gpu_id_override,
+    )
+
+
+def test_docker_args_gpu_device(minimal_config):
+    args = _make_args(minimal_config)
+    idx = args.index("--gpus")
+    assert "device=0" in args[idx + 1]
+
+
+def test_docker_args_gpu_all(minimal_config):
+    minimal_config["jobs"]["nvidia_gpu"]["resources"]["gpu_ids"] = "all"
+    args = _make_args(minimal_config)
+    idx = args.index("--gpus")
+    assert args[idx + 1] == "all"
+
+
+def test_docker_args_no_gpu(minimal_config):
+    minimal_config["jobs"]["nvidia_gpu"]["resources"]["gpu_ids"] = ""
+    minimal_config["jobs"]["nvidia_gpu"]["resources"].pop("gpu_count", None)
+    args = _make_args(minimal_config)
+    assert "--gpus" not in args
+
+
+def test_docker_args_gpu_override(minimal_config):
+    args = _make_args(minimal_config, gpu_id_override="2,3")
+    idx = args.index("--gpus")
+    assert "2,3" in args[idx + 1]
+
+
+# ---------------------------------------------------------------------------
+# build_docker_args — memory format
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize(
+    "raw,expected",
+    [
+        ("32GB", "32g"),
+        ("512MB", "512m"),
+        ("8", "8g"),
+        ("16gb", "16g"),
+        ("256mb", "256m"),
+    ],
+)
+def test_docker_args_memory_format(minimal_config, raw, expected):
+    minimal_config["jobs"]["nvidia_gpu"]["resources"]["memory"] = raw
+    args = _make_args(minimal_config)
+    idx = args.index("--memory")
+    assert args[idx + 1] == expected
+
+
+# ---------------------------------------------------------------------------
+# build_docker_args — stages encoding
+# ---------------------------------------------------------------------------
+
+
+def test_docker_args_num_stages(minimal_config):
+    args = _make_args(minimal_config)
+    assert "NUM_STAGES=1" in args
+
+
+def test_docker_args_stage_name_cmd(minimal_config):
+    args = _make_args(minimal_config)
+    assert "STAGE_1_NAME=test" in args
+    assert any(a.startswith("STAGE_1_CMD=") for a in args)
+
+
+def test_docker_args_multiple_stages(minimal_config):
+    minimal_config["jobs"]["nvidia_gpu"]["stages"] = [
+        {"name": "lint", "run": "ruff check ."},
+        {"name": "test", "run": "pytest tests/"},
+    ]
+    args = _make_args(minimal_config)
+    assert "NUM_STAGES=2" in args
+    assert "STAGE_1_NAME=lint" in args
+    assert "STAGE_2_NAME=test" in args
+
+
+# ---------------------------------------------------------------------------
+# build_docker_args — results_dir mount
+# ---------------------------------------------------------------------------
+
+
+def test_docker_args_results_dir(minimal_config, tmp_path):
+    args = run.build_docker_args(
+        minimal_config,
+        "nvidia_gpu",
+        "https://github.com/example/repo.git",
+        "master",
+        minimal_config["jobs"]["nvidia_gpu"]["stages"],
+        "/workspace",
+        None,
+        results_dir=tmp_path,
+    )
+    joined = " ".join(str(a) for a in args)
+    assert "-v" in args
+    assert "/workspace/results" in joined
+
+
+# ---------------------------------------------------------------------------
+# build_results_dir
+# ---------------------------------------------------------------------------
+
+
+def test_build_results_dir_contains_platform():
+    stages = [{"name": "test", "run": "pytest"}]
+    d = run.build_results_dir("ci-results", "nvidia", stages, "abc1234")
+    assert "nvidia" in d.name
+
+
+def test_build_results_dir_contains_commit():
+    stages = [{"name": "test", "run": "pytest"}]
+    d = run.build_results_dir("ci-results", "nvidia", stages, "abc1234")
+    assert "abc1234" in d.name
+
+
+def test_build_results_dir_contains_stage_names():
+    stages = [{"name": "lint", "run": "ruff"}, {"name": "test", "run": "pytest"}]
+    d = run.build_results_dir("ci-results", "nvidia", stages, "abc1234")
+    assert "lint+test" in d.name
+
+
+def test_build_results_dir_under_base():
+    stages = [{"name": "test", "run": "pytest"}]
+    d = run.build_results_dir("/tmp/my-results", "ascend", stages, "def5678")
+    assert d.parent == Path("/tmp/my-results")

From 63dbafca7019902ab35f7b47a8ed72c68655c613 Mon Sep 17 00:00:00 2001
From: zhangyue <zhangyue@qiyuanlab.com>
Date: Fri, 20 Mar 2026 08:00:22 +0000
Subject: [PATCH 3/5] fix(ci): fix results dir permissions and reduce parallel
 workers

- Pass host UID/GID into container and `chown` results after tests,
  so mounted `ci-results/` is accessible by the host user.
- Limit `pytest-xdist` workers from `-n auto` to `-n 8` to prevent
  OOM worker crashes on high-core-count machines.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .ci/config.yaml | 2 +-
 .ci/run.py      | 7 +++++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/.ci/config.yaml b/.ci/config.yaml
index c80c47d..a86174a 100644
--- a/.ci/config.yaml
+++ b/.ci/config.yaml
@@ -30,4 +30,4 @@ jobs:
 
     stages:
       - name: test
-        run: pytest tests/ -n auto -v --tb=short --junitxml=/workspace/results/test-results.xml
+        run: pytest tests/ -n 8 -v --tb=short --junitxml=/workspace/results/test-results.xml
diff --git a/.ci/run.py b/.ci/run.py
index 3f25afa..0c8d648 100644
--- a/.ci/run.py
+++ b/.ci/run.py
@@ -83,6 +83,9 @@ def build_runner_script():
   eval "$cmd" || failed=1
 done
 echo "========== Summary =========="
+if [ -n "$HOST_UID" ] && [ -n "$HOST_GID" ]; then
+  chown -R "$HOST_UID:$HOST_GID" /workspace/results 2>/dev/null || true
+fi
 exit $failed
 """
 
@@ -127,6 +130,10 @@ def build_docker_args(
         f"SETUP_CMD={setup_cmd}",
         "-e",
         f"NUM_STAGES={len(stages)}",
+        "-e",
+        f"HOST_UID={os.getuid()}",
+        "-e",
+        f"HOST_GID={os.getgid()}",
     ]
 
     for proxy_var in ("HTTP_PROXY", "HTTPS_PROXY", "NO_PROXY"):

From 497b2552b1077d03ee7c1a7b7e1c0e5832dcd7f6 Mon Sep 17 00:00:00 2001
From: zhangyue <zhangyue@qiyuanlab.com>
Date: Mon, 23 Mar 2026 03:27:06 +0000
Subject: [PATCH 4/5] refactor(ci): Refactor code structure for improved
 readability and maintainability

---
 .ci/README.md                   | 207 ++++++-
 .ci/agent.py                    | 971 ++++++++++++++++++++++++++++++++
 .ci/build.py                    |  27 +-
 .ci/ci_resource.py              | 241 ++++++++
 .ci/config.yaml                 |  89 ++-
 .ci/github_status.py            |  98 ++++
 .ci/images/iluvatar/Dockerfile  |  53 ++
 .ci/images/nvidia/Dockerfile    |  21 +-
 .ci/run.py                      |  56 +-
 .ci/tests/conftest.py           |  44 +-
 .ci/tests/test_agent.py         | 503 +++++++++++++++++
 .ci/tests/test_github_status.py | 144 +++++
 .ci/tests/test_resource.py      | 324 +++++++++++
 .ci/tests/test_utils.py         |  90 +++
 .ci/utils.py                    | 101 ++++
 15 files changed, 2833 insertions(+), 136 deletions(-)
 create mode 100644 .ci/agent.py
 create mode 100644 .ci/ci_resource.py
 create mode 100644 .ci/github_status.py
 create mode 100644 .ci/images/iluvatar/Dockerfile
 create mode 100644 .ci/tests/test_agent.py
 create mode 100644 .ci/tests/test_github_status.py
 create mode 100644 .ci/tests/test_resource.py
 create mode 100644 .ci/tests/test_utils.py
 create mode 100644 .ci/utils.py

diff --git a/.ci/README.md b/.ci/README.md
index 0bd59bd..33841ca 100644
--- a/.ci/README.md
+++ b/.ci/README.md
@@ -2,11 +2,16 @@
 
 ```
 .ci/
-├── config.yaml              # 统一配置（镜像、job 定义）
+├── config.yaml              # 统一配置（镜像、job、Agent 定义）
+├── utils.py                 # 共享工具（load_config、get_git_commit）
+├── agent.py                 # Runner Agent（调度、Webhook、远程触发）
 ├── build.py                 # 镜像构建
-├── run.py                   # CI 流水线执行
+├── run.py                   # CI 流水线执行（Docker 层）
+├── ci_resource.py           # GPU/内存资源检测与分配
+├── github_status.py         # GitHub Commit Status 上报
 └── images/
     ├── nvidia/Dockerfile
+    ├── iluvatar/Dockerfile
     └── ascend/Dockerfile
 ```
 
@@ -16,41 +21,88 @@
 
 ## 配置文件 `config.yaml`
 
+配置以 **platform** 为顶级结构，每个平台包含镜像定义、平台级默认值和 job 列表。
+加载时自动展平为 `{platform}_{job}` 格式（如 `nvidia_gpu`）。
+
 ```yaml
 repo:
   url: https://github.com/InfiniTensor/InfiniOps.git
   branch: master
 
-images:
+platforms:
   nvidia:
-    dockerfile: .ci/images/nvidia/
-    build_args:
-      BASE_IMAGE: nvcr.io/nvidia/pytorch:24.10-py3
-
-jobs:
-  nvidia_gpu:
-    image: latest            # latest | <commit-hash>
-    platform: nvidia
-    resources:
-      gpu_ids: "0"           # "0" | "0,2" | "all"
-      memory: 32GB
-      shm_size: 16g          # 避免 PyTorch SHMEM 不足
-      timeout: 3600          # 容器内脚本最大运行秒数
+    image:                              # 镜像定义
+      dockerfile: .ci/images/nvidia/
+      build_args:
+        BASE_IMAGE: nvcr.io/nvidia/pytorch:24.10-py3
+    setup: pip install .[dev]           # 平台级默认值，job 可覆盖
+    jobs:
+      gpu:                              # 展平后为 nvidia_gpu
+        resources:
+          gpu_ids: "0"                  # "0" | "0,2" | "all"
+          memory: 32GB
+          shm_size: 16g
+          timeout: 3600
+        stages:
+          - name: test
+            run: pytest tests/ -n 8 -v --tb=short --junitxml=/workspace/results/test-results.xml
+
+  iluvatar:
+    image:
+      dockerfile: .ci/images/iluvatar/
+      build_args:
+        BASE_IMAGE: corex:qs_pj20250825
+        APT_MIRROR: http://archive.ubuntu.com/ubuntu
+        PIP_INDEX_URL: https://pypi.org/simple
+    docker_args:                        # 平台级 docker 参数，所有 job 继承
+      - "--privileged"
+      - "--cap-add=ALL"
+      - "--pid=host"
+      - "--ipc=host"
+    volumes:
+      - /dev:/dev
+      - /lib/firmware:/lib/firmware
+      - /usr/src:/usr/src
+      - /lib/modules:/lib/modules
     setup: pip install .[dev]
-    env:                     # 可选，注入容器环境变量
-      MY_VAR: value
-    stages:
-      - name: test
-        run: pytest tests/ -n auto -v --tb=short --junitxml=/workspace/results/test-results.xml
+    jobs:
+      gpu:                              # 展平后为 iluvatar_gpu
+        resources:
+          gpu_ids: "0"
+          gpu_style: none               # CoreX 设备通过 --privileged + /dev 挂载
+          memory: 32GB
+          shm_size: 16g
+          timeout: 3600
+        stages:
+          - name: test
+            run: pytest tests/ -n 8 -v --tb=short --junitxml=/workspace/results/test-results.xml
 ```
 
+### 配置层级说明
+
+| 层级 | 字段 | 说明 |
+|---|---|---|
+| **平台级** | `image` | 镜像定义（dockerfile、build_args） |
+| | `image_tag` | 默认镜像 tag（默认 `latest`） |
+| | `docker_args` | 额外 docker run 参数（如 `--privileged`） |
+| | `volumes` | 额外挂载卷 |
+| | `setup` | 容器内 setup 命令 |
+| | `env` | 注入容器环境变量 |
+| **Job 级** | `resources.gpu_ids` | GPU 设备 ID |
+| | `resources.gpu_style` | GPU 透传方式：`nvidia`（默认）或 `none` |
+| | `resources.memory` | 容器内存限制 |
+| | `resources.shm_size` | 共享内存大小 |
+| | `resources.timeout` | 容器内脚本最大运行秒数 |
+| | `stages` | 执行阶段列表 |
+| | 以上平台级字段 | Job 可覆盖任意平台级默认值 |
+
 ---
 
 ## 镜像构建 `build.py`
 
 | 参数 | 说明 |
 |---|---|
-| `--platform nvidia\|ascend\|all` | 构建平台，默认 `all` |
+| `--platform nvidia\|iluvatar\|ascend\|all` | 构建平台，默认 `all` |
 | `--force` | 跳过 Dockerfile 变更检测 |
 | `--dry-run` | 打印命令不执行 |
 
@@ -58,8 +110,11 @@ jobs:
 # 检测变更后构建（无变更自动跳过）
 python .ci/build.py --platform nvidia
 
-# 强制构建
-python .ci/build.py --platform nvidia --force
+# 构建 Iluvatar 镜像
+python .ci/build.py --platform iluvatar --force
+
+# 强制构建全部
+python .ci/build.py --force
 ```
 
 构建产物以宿主机本地镜像 tag 存储：`infiniops-ci/<platform>:<commit-hash>` 和 `:latest`。
@@ -73,20 +128,116 @@ python .ci/build.py --platform nvidia --force
 
 | 参数 | 说明 |
 |---|---|
+| `--job` | 指定 job 名称（默认第一个） |
 | `--branch` | 覆盖克隆分支 |
 | `--stage` | 只运行指定 stage |
 | `--image-tag` | 覆盖镜像 tag |
-| `--gpu-id` | 覆盖 GPU 设备 ID |
+| `--gpu-id` | 覆盖 GPU 设备 ID（仅 nvidia gpu_style） |
 | `--results-dir` | 宿主机目录，挂载到容器 `/workspace/results` |
 | `--dry-run` | 打印 docker 命令不执行 |
 
 ```bash
-# 运行默认 job
-python .ci/run.py --branch feat/my-feature --results-dir ./ci-results
+# 运行 NVIDIA job
+python .ci/run.py --job nvidia_gpu --branch master
+
+# 运行 Iluvatar job
+python .ci/run.py --job iluvatar_gpu --branch feat/ci-nvidia
 
 # 只跑 test stage，预览命令
-python .ci/run.py --stage test --dry-run
+python .ci/run.py --job iluvatar_gpu --stage test --dry-run
 ```
 
 容器内执行流程：`git clone` → `checkout` → `setup` → stages。
 代理从宿主机透传，测试结果写入 `--results-dir`。每次运行均为干净环境（不挂载宿主机 pip 缓存）。
+
+---
+
+## 平台差异
+
+| 平台 | GPU 透传方式 | 基础镜像 | 备注 |
+|---|---|---|---|
+| NVIDIA | `--gpus` (NVIDIA Container Toolkit) | `nvcr.io/nvidia/pytorch:24.10-py3` | 标准 CUDA |
+| Iluvatar | `--privileged` + `/dev` 挂载 | `corex:qs_pj20250825` | CoreX 运行时，CUDA 兼容 |
+| Ascend | TODO | `ascend-pytorch:24.0.0` | 待完善 |
+
+---
+
+## Runner Agent `agent.py`
+
+Runner Agent 支持 CLI 手动触发、GitHub Webhook 自动触发、资源感知的动态调度，以及跨机器远程触发。
+
+### CLI 手动执行
+
+```bash
+# 运行所有 job（本地 + 远程 Agent）
+python .ci/agent.py run --branch master
+
+# 运行指定 job
+python .ci/agent.py run --branch master --job nvidia_gpu
+
+# 按平台运行
+python .ci/agent.py run --branch master --platform nvidia
+
+# 预览命令
+python .ci/agent.py run --branch master --dry-run --no-status
+```
+
+| 参数 | 说明 |
+|---|---|
+| `--branch` | 测试分支（必填） |
+| `--job` | 指定 job 名称 |
+| `--platform` | 按平台过滤 job |
+| `--commit` | 覆盖 commit SHA |
+| `--image-tag` | 覆盖镜像 tag |
+| `--results-dir` | 结果目录（默认 `ci-results`） |
+| `--utilization-threshold` | GPU 空闲阈值百分比（默认 10） |
+| `--no-status` | 跳过 GitHub Status 上报 |
+| `--dry-run` | 预览模式 |
+
+### Webhook 服务
+
+每台平台机器部署一个 Agent 实例：
+
+```bash
+# NVIDIA 机器
+python .ci/agent.py serve --platform nvidia --port 8080
+
+# Iluvatar 机器
+python .ci/agent.py serve --platform iluvatar --port 8080
+```
+
+| 端点 | 方法 | 说明 |
+|---|---|---|
+| `/webhook` | POST | GitHub Webhook（push/pull_request） |
+| `/api/run` | POST | 远程触发 job |
+| `/api/job/{id}` | GET | 查询 job 状态 |
+| `/health` | GET | 健康检查 |
+| `/status` | GET | 队列 + 资源状态 |
+
+Webhook 支持 `X-Hub-Signature-256` 签名验证，通过 `--webhook-secret` 或 `WEBHOOK_SECRET` 环境变量配置。
+
+### 远程 Agent 配置
+
+在 `config.yaml` 中配置各平台 Agent 地址，CLI 执行时自动将远程 job 分发到对应 Agent：
+
+```yaml
+agents:
+  nvidia:
+    url: http://nvidia-host:8080
+  iluvatar:
+    url: http://iluvatar-host:8080
+```
+
+### 资源调度
+
+Agent 自动检测 GPU 利用率和系统内存，动态决定并行度：
+- GPU 利用率 < 阈值（默认 10%）且未被 Agent 分配 → 可用
+- 资源不足时 job 自动排队，已完成 job 释放资源后自动调度排队任务
+
+### GitHub Status
+
+设置 `GITHUB_TOKEN` 环境变量后，Agent 会自动上报 commit status：
+- `pending` — job 开始执行
+- `success` / `failure` — job 执行完成
+
+Status context 格式：`ci/infiniops/{job_name}`
diff --git a/.ci/agent.py b/.ci/agent.py
new file mode 100644
index 0000000..3696ce2
--- /dev/null
+++ b/.ci/agent.py
@@ -0,0 +1,971 @@
+#!/usr/bin/env python3
+"""CI Runner Agent: webhook server, resource-aware scheduler, GitHub status reporting.
+
+Usage:
+    # Run jobs locally (or dispatch to remote agents)
+    python .ci/agent.py run --branch master
+    python .ci/agent.py run --branch master --job nvidia_gpu --dry-run
+
+    # Start webhook server
+    python .ci/agent.py serve --platform nvidia --port 8080
+"""
+
+import argparse
+import collections
+import hashlib
+import hmac
+import json
+import os
+import shlex
+import subprocess
+import sys
+import threading
+import time
+import urllib.error
+import urllib.request
+import uuid
+from concurrent.futures import ThreadPoolExecutor
+from datetime import datetime
+from http.server import BaseHTTPRequestHandler, HTTPServer
+from pathlib import Path
+
+try:
+    import yaml
+except ImportError:
+    print(
+        "error: pyyaml is required. Install with: pip install pyyaml", file=sys.stderr
+    )
+    sys.exit(1)
+
+import ci_resource as res
+import github_status as gh
+import run
+
+# Maximum POST body size (1 MB) to prevent memory exhaustion
+MAX_CONTENT_LENGTH = 1 * 1024 * 1024
+
+# Job states
+STATE_QUEUED = "queued"
+STATE_RUNNING = "running"
+STATE_PENDING = "pending"
+STATE_SUCCESS = "success"
+STATE_FAILURE = "failure"
+STATE_ERROR = "error"
+
+# urllib helpers (module-level for easier mocking in tests)
+urllib_request = urllib.request.Request
+urllib_urlopen = urllib.request.urlopen
+
+
+# ---------------------------------------------------------------------------
+# Data classes
+# ---------------------------------------------------------------------------
+
+
+class JobRequest:
+    """Describes a CI job to be executed."""
+
+    def __init__(self, job_name, branch, commit_sha, config, image_tag=None, results_dir=None):
+        self.job_id = str(uuid.uuid4())[:8]
+        self.job_name = job_name
+        self.branch = branch
+        self.commit_sha = commit_sha
+        self.config = config
+        self.image_tag = image_tag
+        self.results_dir = results_dir or Path("ci-results")
+        self.created_at = datetime.now().isoformat()
+
+        job = config["jobs"][job_name]
+        self.platform = job.get("platform", "nvidia")
+
+    def to_dict(self):
+        return {
+            "job_id": self.job_id,
+            "job_name": self.job_name,
+            "branch": self.branch,
+            "commit_sha": self.commit_sha,
+            "platform": self.platform,
+            "created_at": self.created_at,
+        }
+
+
+class JobResult:
+    """Outcome of a completed job."""
+
+    def __init__(self, job_id, job_name, commit_sha, returncode, results_dir, duration):
+        self.job_id = job_id
+        self.job_name = job_name
+        self.commit_sha = commit_sha
+        self.returncode = returncode
+        self.results_dir = results_dir
+        self.duration = duration
+
+        self.state = STATE_SUCCESS if returncode == 0 else STATE_FAILURE
+
+    def to_dict(self):
+        return {
+            "job_id": self.job_id,
+            "job_name": self.job_name,
+            "commit_sha": self.commit_sha,
+            "state": self.state,
+            "returncode": self.returncode,
+            "results_dir": str(self.results_dir),
+            "duration_seconds": round(self.duration, 1),
+        }
+
+
+# ---------------------------------------------------------------------------
+# Job selection and routing
+# ---------------------------------------------------------------------------
+
+
+def select_jobs(config, platform=None, job_name=None):
+    """Return list of job names to run."""
+    jobs = config.get("jobs", {})
+
+    if job_name:
+        if job_name not in jobs:
+            raise ValueError(f"job {job_name!r} not in config")
+
+        return [job_name]
+
+    if platform:
+        return [
+            name for name, job in jobs.items() if job.get("platform") == platform
+        ]
+
+    return list(jobs.keys())
+
+
+def route_jobs(config, job_names, local_platform=None):
+    """Split jobs into local and remote.
+
+    Returns (local_jobs, remote_jobs) where remote_jobs is a list of
+    (job_name, agent_url) tuples.
+    """
+    agents = config.get("agents", {})
+    jobs = config.get("jobs", {})
+    local = []
+    remote = []
+
+    for name in job_names:
+        job = jobs.get(name, {})
+        platform = job.get("platform", "")
+
+        if not local_platform:
+            local.append(name)
+        elif platform == local_platform:
+            local.append(name)
+        elif platform in agents:
+            remote.append((name, agents[platform].get("url", "")))
+        else:
+            local.append(name)
+
+    return local, remote
+
+
+# ---------------------------------------------------------------------------
+# Scheduler
+# ---------------------------------------------------------------------------
+
+
+class Scheduler:
+    """Resource-aware job scheduler with dynamic parallelism."""
+
+    def __init__(
+        self,
+        config,
+        platform,
+        resource_pool,
+        results_dir=None,
+        max_workers=4,
+        no_status=False,
+        dry_run=False,
+    ):
+        self._config = config
+        self._platform = platform
+        self._resource_pool = resource_pool
+        self._results_dir = results_dir or Path("ci-results")
+        self._no_status = no_status
+        self._dry_run = dry_run
+        self._queue = collections.deque()
+        self._jobs: dict[str, dict] = {}  # job_id -> {request, result, state, gpu_ids}
+        self._executor = ThreadPoolExecutor(max_workers=max_workers)
+        self._lock = threading.Lock()
+        self._done_event = threading.Event()
+
+        # GitHub config
+        github_cfg = config.get("github", {})
+        self._status_prefix = github_cfg.get("status_context_prefix", "ci/infiniops")
+        repo = config.get("repo", {})
+        repo_url = repo.get("url", "")
+        self._owner, self._repo = gh.parse_repo_url(repo_url)
+
+    def submit(self, job_request):
+        """Add a job to the queue and attempt to schedule it.
+
+        Returns the job_id.
+        """
+        with self._lock:
+            self._jobs[job_request.job_id] = {
+                "request": job_request,
+                "result": None,
+                "state": STATE_QUEUED,
+                "gpu_ids": [],
+            }
+            self._queue.append(job_request)
+
+        self._try_schedule()
+        return job_request.job_id
+
+    def get_job(self, job_id):
+        """Get job info by ID."""
+        with self._lock:
+            entry = self._jobs.get(job_id)
+
+            if not entry:
+                return None
+
+            info = entry["request"].to_dict()
+            info["state"] = entry["state"]
+
+            if entry["result"]:
+                info.update(entry["result"].to_dict())
+
+            return info
+
+    def get_status(self):
+        """Return scheduler status for the /status endpoint."""
+        with self._lock:
+            queued = [
+                self._jobs[r.job_id]["request"].to_dict()
+                for r in self._queue
+            ]
+            running = []
+            completed = []
+
+            for entry in self._jobs.values():
+                state = entry["state"]
+
+                if state == STATE_RUNNING:
+                    running.append({**entry["request"].to_dict(), "gpu_ids": entry["gpu_ids"]})
+                elif state in (STATE_SUCCESS, STATE_FAILURE):
+                    completed.append(entry["result"].to_dict())
+
+        return {
+            "queued": queued,
+            "running": running,
+            "completed": completed[-20:],  # Last 20
+            "resources": self._resource_pool.get_status(),
+        }
+
+    def wait_all(self):
+        """Block until all submitted jobs are done. Returns list of JobResult."""
+        while True:
+            with self._lock:
+                pending = any(
+                    e["state"] in (STATE_QUEUED, STATE_RUNNING) for e in self._jobs.values()
+                )
+
+            if not pending:
+                break
+
+            self._done_event.wait(timeout=2.0)
+            self._done_event.clear()
+
+        with self._lock:
+            return [
+                e["result"]
+                for e in self._jobs.values()
+                if e["result"] is not None
+            ]
+
+    def _try_schedule(self):
+        """Try to run queued jobs that have enough resources.
+
+        Resource allocation and job submission are split: allocation decisions
+        are made under the lock, but executor.submit() happens outside to
+        prevent deadlock when the thread pool is saturated.
+        """
+        to_launch = []  # [(req, gpu_ids), ...]
+
+        with self._lock:
+            remaining = collections.deque()
+
+            while self._queue:
+                req = self._queue.popleft()
+                job_cfg = self._config["jobs"].get(req.job_name, {})
+                gpu_count = res.parse_gpu_requirement(job_cfg)
+                memory_mb = res.parse_memory_requirement(job_cfg)
+
+                if self._dry_run:
+                    # In dry-run mode, skip resource checks
+                    gpu_ids, ok = [], True
+                else:
+                    gpu_ids, ok = self._resource_pool.allocate(gpu_count, memory_mb)
+
+                if ok:
+                    self._jobs[req.job_id]["state"] = STATE_RUNNING
+                    self._jobs[req.job_id]["gpu_ids"] = gpu_ids
+                    to_launch.append((req, gpu_ids))
+                else:
+                    remaining.append(req)
+
+            self._queue = remaining
+
+        # Submit outside the lock to avoid deadlock with ThreadPoolExecutor
+        for req, gpu_ids in to_launch:
+            self._executor.submit(self._run_job, req, gpu_ids)
+
+    def _run_job(self, req, gpu_ids):
+        """Execute a single job in a worker thread.
+
+        Wrapped in try/finally to guarantee GPU resources are always released
+        and job state is updated even on unexpected exceptions.
+        """
+        context = gh.build_status_context(self._status_prefix, req.job_name)
+        result = None
+
+        try:
+            # Post pending status
+            if not self._no_status:
+                gh.post_commit_status(
+                    self._owner,
+                    self._repo,
+                    req.commit_sha,
+                    STATE_PENDING,
+                    context,
+                    f"Running {req.job_name}...",
+                )
+
+            job_cfg = self._config["jobs"][req.job_name]
+            all_stages = job_cfg.get("stages", [])
+            repo_url = self._config.get("repo", {}).get("url", "")
+            commit_short = req.commit_sha[:7] if len(req.commit_sha) > 7 else req.commit_sha
+            results_dir = run.build_results_dir(
+                req.results_dir, req.platform, all_stages, commit_short
+            )
+
+            gpu_id_str = ",".join(str(g) for g in gpu_ids) if gpu_ids else None
+            docker_args = run.build_docker_args(
+                self._config,
+                req.job_name,
+                repo_url,
+                req.branch,
+                all_stages,
+                "/workspace",
+                req.image_tag,
+                gpu_id_override=gpu_id_str,
+                results_dir=results_dir,
+            )
+
+            start = time.monotonic()
+
+            if self._dry_run:
+                print(f"[dry-run] {req.job_name}: {shlex.join(docker_args)}")
+                returncode = 0
+            else:
+                results_dir.mkdir(parents=True, exist_ok=True)
+                proc = subprocess.run(docker_args)
+                returncode = proc.returncode
+
+            duration = time.monotonic() - start
+
+            result = JobResult(
+                job_id=req.job_id,
+                job_name=req.job_name,
+                commit_sha=req.commit_sha,
+                returncode=returncode,
+                results_dir=results_dir,
+                duration=duration,
+            )
+
+            # Post final status
+            if not self._no_status:
+                gh.post_commit_status(
+                    self._owner,
+                    self._repo,
+                    req.commit_sha,
+                    result.state,
+                    context,
+                    f"{req.job_name}: {result.state} in {duration:.0f}s",
+                )
+        except Exception as e:
+            print(f"error: job {req.job_name} failed with exception: {e}", file=sys.stderr)
+
+            if result is None:
+                result = JobResult(
+                    job_id=req.job_id,
+                    job_name=req.job_name,
+                    commit_sha=req.commit_sha,
+                    returncode=-1,
+                    results_dir=req.results_dir,
+                    duration=0,
+                )
+
+            if not self._no_status:
+                gh.post_commit_status(
+                    self._owner,
+                    self._repo,
+                    req.commit_sha,
+                    STATE_ERROR,
+                    context,
+                    f"{req.job_name}: internal error",
+                )
+        finally:
+            # Always release resources and update state
+            self._resource_pool.release(gpu_ids)
+
+            with self._lock:
+                self._jobs[req.job_id]["result"] = result
+                self._jobs[req.job_id]["state"] = result.state if result else STATE_FAILURE
+
+            self._done_event.set()
+            self._try_schedule()
+
+        return result
+
+
+# ---------------------------------------------------------------------------
+# Webhook server
+# ---------------------------------------------------------------------------
+
+
+def verify_signature(secret, body, signature_header):
+    """Verify GitHub webhook HMAC-SHA256 signature."""
+    if not signature_header:
+        return False
+
+    expected = "sha256=" + hmac.new(
+        secret.encode("utf-8"), body, hashlib.sha256
+    ).hexdigest()
+    return hmac.compare_digest(expected, signature_header)
+
+
+def _verify_api_token(handler):
+    """Check Bearer token for /api/run authentication.
+
+    Returns True if authenticated, False (and sends 401) if not.
+    When no api_token is configured on the server, all requests are allowed.
+    """
+    api_token = getattr(handler.server, "api_token", None)
+
+    if not api_token:
+        return True
+
+    auth_header = handler.headers.get("Authorization", "")
+
+    if auth_header == f"Bearer {api_token}":
+        return True
+
+    handler._respond_json(401, {"error": "unauthorized"})
+    return False
+
+
+class WebhookHandler(BaseHTTPRequestHandler):
+    """HTTP handler for GitHub webhooks and API endpoints."""
+
+    def log_message(self, format, *args):
+        print(f"[agent] {args[0]}", file=sys.stderr)
+
+    def do_GET(self):
+        if self.path == "/health":
+            self._respond_json(200, {"status": "ok", "platform": self.server.platform})
+        elif self.path == "/status":
+            status = self.server.scheduler.get_status()
+            self._respond_json(200, status)
+        elif self.path.startswith("/api/job/"):
+            self._handle_api_job()
+        else:
+            self._respond_json(404, {"error": "not found"})
+
+    def do_POST(self):
+        content_length = int(self.headers.get("Content-Length", 0))
+
+        if content_length > MAX_CONTENT_LENGTH:
+            self._respond_json(413, {"error": "payload too large"})
+            return
+
+        body = self.rfile.read(content_length)
+
+        if self.path == "/webhook":
+            self._handle_webhook(body)
+        elif self.path == "/api/run":
+            self._handle_api_run(body)
+        else:
+            self._respond_json(404, {"error": "not found"})
+
+    def _handle_webhook(self, body):
+        # Verify signature if secret is configured
+        if self.server.webhook_secret:
+            sig = self.headers.get("X-Hub-Signature-256", "")
+
+            if not verify_signature(self.server.webhook_secret, body, sig):
+                self._respond_json(401, {"error": "invalid signature"})
+                return
+
+        event_type = self.headers.get("X-GitHub-Event", "")
+
+        if event_type == "ping":
+            self._respond_json(200, {"msg": "pong"})
+            return
+
+        try:
+            payload = json.loads(body)
+        except json.JSONDecodeError:
+            self._respond_json(400, {"error": "invalid JSON"})
+            return
+
+        if event_type == "push":
+            branch, sha = self._parse_push(payload)
+        elif event_type == "pull_request":
+            action = payload.get("action", "")
+
+            if action not in ("opened", "synchronize"):
+                self._respond_json(200, {"msg": f"ignored PR action: {action}"})
+                return
+
+            branch, sha = self._parse_pull_request(payload)
+        else:
+            self._respond_json(200, {"msg": f"ignored event: {event_type}"})
+            return
+
+        if not branch or not sha:
+            self._respond_json(400, {"error": "could not extract branch/sha"})
+            return
+
+        job_ids = self._submit_jobs(branch, sha)
+        self._respond_json(200, {"accepted": True, "job_ids": job_ids})
+
+    def _handle_api_run(self, body):
+        """Handle /api/run: remote job trigger (requires Bearer token auth)."""
+        if not _verify_api_token(self):
+            return
+
+        try:
+            payload = json.loads(body)
+        except json.JSONDecodeError:
+            self._respond_json(400, {"error": "invalid JSON"})
+            return
+
+        branch = payload.get("branch", "")
+        sha = payload.get("commit_sha", "")
+        job_name = payload.get("job")
+        image_tag = payload.get("image_tag")
+
+        if not branch:
+            self._respond_json(400, {"error": "branch is required"})
+            return
+
+        if not sha:
+            sha = run.get_git_commit()
+
+        job_ids = self._submit_jobs(branch, sha, job_name=job_name, image_tag=image_tag)
+        self._respond_json(200, {"accepted": True, "job_ids": job_ids})
+
+    def _handle_api_job(self):
+        """Handle GET /api/job/{id}."""
+        parts = self.path.split("/")
+
+        if len(parts) < 4:
+            self._respond_json(400, {"error": "missing job_id"})
+            return
+
+        job_id = parts[3]
+        info = self.server.scheduler.get_job(job_id)
+
+        if info is None:
+            self._respond_json(404, {"error": f"job {job_id} not found"})
+        else:
+            self._respond_json(200, info)
+
+    def _parse_push(self, payload):
+        branch = payload.get("ref", "").removeprefix("refs/heads/")
+        sha = payload.get("after", "")
+        return branch, sha
+
+    def _parse_pull_request(self, payload):
+        pr = payload.get("pull_request", {})
+        head = pr.get("head", {})
+        branch = head.get("ref", "")
+        sha = head.get("sha", "")
+        return branch, sha
+
+    def _submit_jobs(self, branch, sha, job_name=None, image_tag=None):
+        config = self.server.config
+        job_names = select_jobs(config, platform=self.server.platform, job_name=job_name)
+        job_ids = []
+
+        for name in job_names:
+            req = JobRequest(
+                job_name=name,
+                branch=branch,
+                commit_sha=sha,
+                config=config,
+                image_tag=image_tag,
+                results_dir=self.server.results_dir,
+            )
+            jid = self.server.scheduler.submit(req)
+            job_ids.append(jid)
+
+        return job_ids
+
+    def _respond_json(self, status_code, data):
+        body = json.dumps(data, indent=2).encode("utf-8")
+        self.send_response(status_code)
+        self.send_header("Content-Type", "application/json")
+        self.send_header("Content-Length", str(len(body)))
+        self.end_headers()
+        self.wfile.write(body)
+
+
+class AgentServer(HTTPServer):
+    """HTTP server with scheduler and config context."""
+
+    def __init__(
+        self,
+        host,
+        port,
+        config,
+        scheduler,
+        platform,
+        webhook_secret=None,
+        api_token=None,
+        results_dir=None,
+    ):
+        super().__init__((host, port), WebhookHandler)
+        self.config = config
+        self.scheduler = scheduler
+        self.platform = platform
+        self.webhook_secret = webhook_secret
+        self.api_token = api_token
+        self.results_dir = results_dir or Path("ci-results")
+
+
+# ---------------------------------------------------------------------------
+# Remote job dispatch (for CLI triggering remote agents)
+# ---------------------------------------------------------------------------
+
+
+def dispatch_remote_job(agent_url, job_name, branch, commit_sha, image_tag=None, api_token=None):
+    """Send a job to a remote agent via HTTP API. Returns job_id or None."""
+    url = f"{agent_url.rstrip('/')}/api/run"
+    body = {
+        "branch": branch,
+        "commit_sha": commit_sha,
+        "job": job_name,
+    }
+
+    if image_tag:
+        body["image_tag"] = image_tag
+
+    data = json.dumps(body).encode("utf-8")
+    headers = {"Content-Type": "application/json"}
+
+    if api_token:
+        headers["Authorization"] = f"Bearer {api_token}"
+
+    req = urllib_request(url, data=data, headers=headers, method="POST")
+
+    try:
+        with urllib_urlopen(req, timeout=30) as resp:
+            result = json.loads(resp.read())
+            job_ids = result.get("job_ids", [])
+            return job_ids[0] if job_ids else None
+    except Exception as e:
+        print(f"error: failed to dispatch to {agent_url}: {e}", file=sys.stderr)
+        return None
+
+
+def poll_remote_job(agent_url, job_id, interval=5.0, timeout=7200):
+    """Poll a remote agent for job completion. Returns final state dict or None."""
+    url = f"{agent_url.rstrip('/')}/api/job/{job_id}"
+    deadline = time.monotonic() + timeout
+
+    while time.monotonic() < deadline:
+        try:
+            req = urllib_request(url)
+
+            with urllib_urlopen(req, timeout=10) as resp:
+                info = json.loads(resp.read())
+
+            state = info.get("state", "")
+
+            if state in (STATE_SUCCESS, STATE_FAILURE):
+                return info
+        except Exception:
+            pass
+
+        time.sleep(interval)
+
+    return None
+
+
+# ---------------------------------------------------------------------------
+# CLI
+# ---------------------------------------------------------------------------
+
+
+def cmd_run(args):
+    """Handle 'run' subcommand: execute jobs locally and/or remotely."""
+    config = run.load_config(args.config)
+    commit_sha = args.commit or run.get_git_commit(short=False)
+
+    # Determine which jobs to run
+    try:
+        job_names = select_jobs(config, platform=args.platform, job_name=args.job)
+    except ValueError as e:
+        print(f"error: {e}", file=sys.stderr)
+        sys.exit(1)
+
+    if not job_names:
+        print("error: no matching jobs found", file=sys.stderr)
+        sys.exit(1)
+
+    # Detect local platform (if running serve on this machine, use that; otherwise guess)
+    local_platform = args.platform
+    local_jobs, remote_jobs = route_jobs(config, job_names, local_platform)
+
+    # Run local jobs
+    local_results = []
+
+    if local_jobs:
+        pool = res.ResourcePool(
+            local_platform or "unknown",
+            utilization_threshold=args.utilization_threshold,
+        )
+        scheduler = Scheduler(
+            config,
+            local_platform or "unknown",
+            pool,
+            results_dir=args.results_dir,
+            no_status=args.no_status,
+            dry_run=args.dry_run,
+        )
+
+        for name in local_jobs:
+            req = JobRequest(
+                job_name=name,
+                branch=args.branch,
+                commit_sha=commit_sha,
+                config=config,
+                image_tag=args.image_tag,
+                results_dir=args.results_dir,
+            )
+            scheduler.submit(req)
+
+        local_results = scheduler.wait_all()
+
+    # Dispatch remote jobs
+    remote_results = []
+    api_token = os.environ.get("AGENT_API_TOKEN", "")
+
+    if remote_jobs and not args.dry_run:
+        # Dispatch all remote jobs first, then poll concurrently
+        dispatched = []  # [(name, agent_url, job_id)]
+
+        for name, agent_url in remote_jobs:
+            if not agent_url:
+                print(f"warning: no agent URL for {name}, skipping", file=sys.stderr)
+                remote_results.append({"job_name": name, "state": "error"})
+                continue
+
+            print(f"==> dispatching {name} to {agent_url}", file=sys.stderr)
+            job_id = dispatch_remote_job(
+                agent_url, name, args.branch, commit_sha, args.image_tag,
+                api_token=api_token or None,
+            )
+
+            if job_id:
+                print(f"    job_id: {job_id}", file=sys.stderr)
+                dispatched.append((name, agent_url, job_id))
+            else:
+                print(f"    failed to dispatch {name}", file=sys.stderr)
+                remote_results.append({"job_name": name, "state": "error"})
+
+        # Poll all dispatched jobs concurrently
+        if dispatched:
+            with ThreadPoolExecutor(max_workers=len(dispatched)) as executor:
+                futures = {
+                    executor.submit(poll_remote_job, url, jid): (name, url, jid)
+                    for name, url, jid in dispatched
+                }
+
+                for future in futures:
+                    name, _, _ = futures[future]
+                    result = future.result()
+
+                    if result:
+                        remote_results.append(result)
+                    else:
+                        print(f"    timeout waiting for {name}", file=sys.stderr)
+                        remote_results.append({"job_name": name, "state": "timeout"})
+
+    elif remote_jobs and args.dry_run:
+        for name, agent_url in remote_jobs:
+            print(f"[dry-run] dispatch {name} to {agent_url}")
+
+    # Summary
+    print("\n========== Results ==========")
+    all_ok = True
+
+    for r in local_results:
+        status = "PASS" if r.returncode == 0 else "FAIL"
+
+        if r.returncode != 0:
+            all_ok = False
+
+        print(f"  {status}  {r.job_name}  ({r.duration:.0f}s)  {r.results_dir}")
+
+    for r in remote_results:
+        state = r.get("state", "unknown")
+        name = r.get("job_name", "?")
+        status = "PASS" if state == STATE_SUCCESS else "FAIL"
+
+        if state != STATE_SUCCESS:
+            all_ok = False
+
+        duration = r.get("duration_seconds", 0)
+        print(f"  {status}  {name}  ({duration:.0f}s)  [remote]")
+
+    if not all_ok:
+        sys.exit(1)
+
+
+def cmd_serve(args):
+    """Handle 'serve' subcommand: start webhook server."""
+    config = run.load_config(args.config)
+
+    pool = res.ResourcePool(
+        args.platform,
+        utilization_threshold=args.utilization_threshold,
+    )
+    scheduler = Scheduler(
+        config,
+        args.platform,
+        pool,
+        results_dir=args.results_dir,
+    )
+
+    webhook_secret = args.webhook_secret or os.environ.get("WEBHOOK_SECRET", "")
+    api_token = args.api_token or os.environ.get("AGENT_API_TOKEN", "")
+
+    if not webhook_secret:
+        print(
+            "WARNING: No webhook secret configured. Webhook endpoint accepts "
+            "unsigned requests. Set --webhook-secret or WEBHOOK_SECRET for production.",
+            file=sys.stderr,
+        )
+
+    if not api_token:
+        print(
+            "WARNING: No API token configured. /api/run endpoint is unauthenticated. "
+            "Set --api-token or AGENT_API_TOKEN for production.",
+            file=sys.stderr,
+        )
+
+    server = AgentServer(
+        args.host,
+        args.port,
+        config,
+        scheduler,
+        args.platform,
+        webhook_secret=webhook_secret or None,
+        api_token=api_token or None,
+        results_dir=args.results_dir,
+    )
+
+    print(
+        f"Agent serving on {args.host}:{args.port} (platform={args.platform})",
+        file=sys.stderr,
+    )
+    print(f"  POST /webhook  — GitHub webhook", file=sys.stderr)
+    print(f"  POST /api/run  — remote job trigger", file=sys.stderr)
+    print(f"  GET  /health   — health check", file=sys.stderr)
+    print(f"  GET  /status   — queue & resource status", file=sys.stderr)
+    print(f"  GET  /api/job/{{id}} — job status", file=sys.stderr)
+
+    try:
+        server.serve_forever()
+    except KeyboardInterrupt:
+        print("\nShutting down...", file=sys.stderr)
+        server.shutdown()
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="CI Runner Agent: run jobs locally, dispatch remotely, or serve webhooks",
+    )
+    subparsers = parser.add_subparsers(dest="command")
+
+    # --- run subcommand ---
+    run_parser = subparsers.add_parser("run", help="Run CI jobs")
+    run_parser.add_argument(
+        "--config",
+        type=Path,
+        default=Path(__file__).resolve().parent / "config.yaml",
+    )
+    run_parser.add_argument("--branch", type=str, required=True, help="Branch to test")
+    run_parser.add_argument("--job", type=str, help="Specific job name")
+    run_parser.add_argument("--platform", type=str, help="Filter jobs by platform")
+    run_parser.add_argument("--image-tag", type=str, help="Override image tag")
+    run_parser.add_argument("--commit", type=str, help="Override commit SHA")
+    run_parser.add_argument(
+        "--results-dir",
+        type=Path,
+        default=Path("ci-results"),
+    )
+    run_parser.add_argument(
+        "--utilization-threshold",
+        type=int,
+        default=10,
+        help="GPU utilization threshold (%%) to consider free (default: 10)",
+    )
+    run_parser.add_argument("--no-status", action="store_true", help="Skip GitHub status")
+    run_parser.add_argument("--dry-run", action="store_true")
+
+    # --- serve subcommand ---
+    serve_parser = subparsers.add_parser("serve", help="Start webhook server")
+    serve_parser.add_argument(
+        "--config",
+        type=Path,
+        default=Path(__file__).resolve().parent / "config.yaml",
+    )
+    serve_parser.add_argument(
+        "--platform",
+        type=str,
+        required=True,
+        help="Platform this agent handles (nvidia, iluvatar, etc.)",
+    )
+    serve_parser.add_argument("--port", type=int, default=8080)
+    serve_parser.add_argument("--host", type=str, default="0.0.0.0")
+    serve_parser.add_argument("--webhook-secret", type=str)
+    serve_parser.add_argument(
+        "--api-token",
+        type=str,
+        help="Bearer token for /api/run authentication (or AGENT_API_TOKEN env var)",
+    )
+    serve_parser.add_argument(
+        "--results-dir",
+        type=Path,
+        default=Path("ci-results"),
+    )
+    serve_parser.add_argument(
+        "--utilization-threshold",
+        type=int,
+        default=10,
+    )
+
+    args = parser.parse_args()
+
+    if args.command == "run":
+        cmd_run(args)
+    elif args.command == "serve":
+        cmd_serve(args)
+    else:
+        parser.print_help()
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/.ci/build.py b/.ci/build.py
index 2339319..7953209 100644
--- a/.ci/build.py
+++ b/.ci/build.py
@@ -9,32 +9,7 @@
 import sys
 from pathlib import Path
 
-try:
-    import yaml
-except ImportError:
-    print(
-        "error: pyyaml is required. Install with: pip install pyyaml", file=sys.stderr
-    )
-    sys.exit(1)
-
-
-def load_config(path):
-    with open(path, encoding="utf-8") as f:
-        return yaml.safe_load(f)
-
-
-def get_git_commit(ref="HEAD"):
-    result = subprocess.run(
-        ["git", "rev-parse", "--short", ref],
-        capture_output=True,
-        text=True,
-    )
-
-    if result.returncode != 0:
-        print(f"error: failed to get commit hash for `{ref}`", file=sys.stderr)
-        sys.exit(1)
-
-    return result.stdout.strip()
+from utils import get_git_commit, load_config
 
 
 def has_dockerfile_changed(dockerfile_dir, base_ref="HEAD~1"):
diff --git a/.ci/ci_resource.py b/.ci/ci_resource.py
new file mode 100644
index 0000000..f3dbfb1
--- /dev/null
+++ b/.ci/ci_resource.py
@@ -0,0 +1,241 @@
+#!/usr/bin/env python3
+"""Resource detection and allocation for CI Runner Agent."""
+
+import os
+import subprocess
+import threading
+from dataclasses import dataclass, field
+
+# GPU passthrough styles
+GPU_STYLE_NVIDIA = "nvidia"
+GPU_STYLE_NONE = "none"
+
+
+@dataclass
+class GpuInfo:
+    index: int
+    memory_used_mb: float
+    memory_total_mb: float
+    utilization_pct: float
+
+
+@dataclass
+class SystemResources:
+    total_memory_mb: float
+    available_memory_mb: float
+    cpu_count: int
+
+
+class ResourcePool:
+    """Thread-safe GPU and system resource manager.
+
+    Detects available GPUs via platform-specific tools (nvidia-smi, ixsmi)
+    and tracks allocations to enable dynamic parallel scheduling.
+    """
+
+    GPU_QUERY_TOOLS = {
+        "nvidia": "nvidia-smi",
+        "iluvatar": "ixsmi",
+    }
+
+    def __init__(self, platform, utilization_threshold=10):
+        self._platform = platform
+        self._utilization_threshold = utilization_threshold
+        self._allocated: set[int] = set()
+        self._lock = threading.Lock()
+
+    @property
+    def platform(self):
+        return self._platform
+
+    @property
+    def allocated(self):
+        with self._lock:
+            return set(self._allocated)
+
+    def detect_gpus(self) -> list[GpuInfo]:
+        """Query GPU status via platform-specific CLI tool."""
+        tool = self.GPU_QUERY_TOOLS.get(self._platform)
+
+        if not tool:
+            return []
+
+        try:
+            result = subprocess.run(
+                [
+                    tool,
+                    "--query-gpu=index,memory.used,memory.total,utilization.gpu",
+                    "--format=csv,noheader,nounits",
+                ],
+                capture_output=True,
+                text=True,
+                timeout=10,
+            )
+        except (FileNotFoundError, subprocess.TimeoutExpired):
+            return []
+
+        if result.returncode != 0:
+            return []
+
+        gpus = []
+
+        for line in result.stdout.strip().splitlines():
+            parts = [p.strip() for p in line.split(",")]
+
+            if len(parts) < 4:
+                continue
+
+            try:
+                gpus.append(
+                    GpuInfo(
+                        index=int(parts[0]),
+                        memory_used_mb=float(parts[1]),
+                        memory_total_mb=float(parts[2]),
+                        utilization_pct=float(parts[3]),
+                    )
+                )
+            except (ValueError, IndexError):
+                continue
+
+        return gpus
+
+    def detect_system_resources(self) -> SystemResources:
+        """Read system memory from /proc/meminfo and CPU count."""
+        total_mb = 0.0
+        available_mb = 0.0
+
+        try:
+            with open("/proc/meminfo", encoding="utf-8") as f:
+                for line in f:
+                    if line.startswith("MemTotal:"):
+                        total_mb = float(line.split()[1]) / 1024
+                    elif line.startswith("MemAvailable:"):
+                        available_mb = float(line.split()[1]) / 1024
+        except OSError:
+            pass
+
+        return SystemResources(
+            total_memory_mb=total_mb,
+            available_memory_mb=available_mb,
+            cpu_count=os.cpu_count() or 1,
+        )
+
+    def get_free_gpus(self) -> list[int]:
+        """Return GPU indices with utilization below threshold."""
+        gpus = self.detect_gpus()
+        return [
+            g.index
+            for g in gpus
+            if g.utilization_pct < self._utilization_threshold
+        ]
+
+    def allocate(self, gpu_count, memory_mb=0) -> tuple[list[int], bool]:
+        """Try to allocate GPUs and check memory.
+
+        Returns (allocated_gpu_ids, success). On failure returns ([], False).
+        GPU detection and memory checks run outside the lock to avoid blocking
+        other threads while subprocess.run (nvidia-smi) executes.
+        """
+        if gpu_count <= 0:
+            if memory_mb > 0:
+                sys_res = self.detect_system_resources()
+
+                if sys_res.available_memory_mb < memory_mb:
+                    return ([], False)
+
+            return ([], True)
+
+        # Detect GPUs and memory outside the lock (subprocess.run can block)
+        free_gpus = set(self.get_free_gpus())
+        sys_res = self.detect_system_resources() if memory_mb > 0 else None
+
+        with self._lock:
+            available = free_gpus - self._allocated
+
+            if len(available) < gpu_count:
+                return ([], False)
+
+            if sys_res is not None and sys_res.available_memory_mb < memory_mb:
+                return ([], False)
+
+            selected = sorted(available)[:gpu_count]
+            self._allocated.update(selected)
+            return (selected, True)
+
+    def release(self, gpu_ids):
+        """Return GPUs to the free pool."""
+        with self._lock:
+            self._allocated -= set(gpu_ids)
+
+    def get_status(self) -> dict:
+        """Return current resource status for API endpoints."""
+        gpus = self.detect_gpus()
+        sys_res = self.detect_system_resources()
+
+        with self._lock:
+            allocated = sorted(self._allocated)
+
+        return {
+            "platform": self._platform,
+            "gpus": [
+                {
+                    "index": g.index,
+                    "memory_used_mb": g.memory_used_mb,
+                    "memory_total_mb": g.memory_total_mb,
+                    "utilization_pct": g.utilization_pct,
+                    "allocated_by_agent": g.index in allocated,
+                }
+                for g in gpus
+            ],
+            "allocated_gpu_ids": allocated,
+            "system": {
+                "total_memory_mb": round(sys_res.total_memory_mb, 1),
+                "available_memory_mb": round(sys_res.available_memory_mb, 1),
+                "cpu_count": sys_res.cpu_count,
+            },
+            "utilization_threshold": self._utilization_threshold,
+        }
+
+
+def parse_gpu_requirement(job_config) -> int:
+    """Extract GPU count requirement from a job config."""
+    resources = job_config.get("resources", {})
+    gpu_style = resources.get("gpu_style", GPU_STYLE_NVIDIA)
+
+    if gpu_style == GPU_STYLE_NONE:
+        return 0
+
+    gpu_ids = str(resources.get("gpu_ids", ""))
+
+    if not gpu_ids:
+        return resources.get("gpu_count", 0)
+
+    if gpu_ids == "all":
+        return 0  # "all" means use all available, don't reserve specific count
+
+    return len(gpu_ids.split(","))
+
+
+def parse_memory_requirement(job_config) -> float:
+    """Extract memory requirement in MB from a job config."""
+    resources = job_config.get("resources", {})
+    memory = str(resources.get("memory", ""))
+
+    if not memory:
+        return 0
+
+    memory = memory.lower().strip()
+
+    if memory.endswith("gb"):
+        return float(memory[:-2]) * 1024
+    elif memory.endswith("g"):
+        return float(memory[:-1]) * 1024
+    elif memory.endswith("mb"):
+        return float(memory[:-2])
+    elif memory.endswith("m"):
+        return float(memory[:-1])
+
+    try:
+        return float(memory) * 1024  # Default: GB
+    except ValueError:
+        return 0
diff --git a/.ci/config.yaml b/.ci/config.yaml
index a86174a..e62bc07 100644
--- a/.ci/config.yaml
+++ b/.ci/config.yaml
@@ -2,32 +2,69 @@ repo:
   url: https://github.com/InfiniTensor/InfiniOps.git
   branch: master
 
-images:
-  nvidia:
-    dockerfile: .ci/images/nvidia/
-    build_args:
-      BASE_IMAGE: nvcr.io/nvidia/pytorch:24.10-py3
-  ascend:                              # TODO: Ascend image is not ready yet
-    dockerfile: .ci/images/ascend/
-    build_args:
-      BASE_IMAGE: ascendhub.huawei.com/public-ascendhub/ascend-pytorch:24.0.0
-    private_sdk:
-      source_env: PRIVATE_SDK_URL
+github:
+  status_context_prefix: "ci/infiniops"    # GitHub Commit Status context 前缀
+
+# agents:                                  # 远程 Agent 地址（CLI 跨机器触发用）
+#   nvidia:
+#     url: http://nvidia-host:8080
+#   iluvatar:
+#     url: http://iluvatar-host:8080
 
-jobs:
-  nvidia_gpu:
-    image: latest
-    platform: nvidia
-    resources:
-      gpu_ids: "0"                       # 指定 GPU ID，如 "0" "0,2" "all"
-      memory: 32GB
-      shm_size: 16g                      # 避免 PyTorch 默认 64MB SHMEM 不足
-      timeout: 3600
+platforms:
+  nvidia:
+    image:
+      dockerfile: .ci/images/nvidia/
+      build_args:
+        BASE_IMAGE: nvcr.io/nvidia/pytorch:24.10-py3
+    setup: pip install .[dev] --no-build-isolation
+    jobs:
+      gpu:
+        resources:
+          gpu_ids: "0"                     # 指定 GPU ID，如 "0" "0,2" "all"
+          memory: 32GB
+          shm_size: 16g                    # 避免 PyTorch 默认 64MB SHMEM 不足
+          timeout: 3600
+        # env:                             # 可选，注入容器环境变量
+        #   MY_VAR: value
+        stages:
+          - name: test
+            run: pytest tests/ -n 8 -v --tb=short --junitxml=/workspace/results/test-results.xml
 
-    setup: pip install .[dev]
-    # env:                             # 可选，注入容器环境变量
-    #   MY_VAR: value
+  iluvatar:
+    image:
+      dockerfile: .ci/images/iluvatar/
+      build_args:
+        BASE_IMAGE: corex:qs_pj20250825
+        APT_MIRROR: http://archive.ubuntu.com/ubuntu
+        PIP_INDEX_URL: https://pypi.org/simple
+    docker_args:
+      - "--privileged"
+      - "--cap-add=ALL"
+      - "--pid=host"
+      - "--ipc=host"
+    volumes:
+      - /dev:/dev
+      - /lib/firmware:/lib/firmware
+      - /usr/src:/usr/src
+      - /lib/modules:/lib/modules
+    setup: pip install .[dev] --no-build-isolation
+    jobs:
+      gpu:
+        resources:
+          gpu_ids: "0"                     # 通过 CUDA_VISIBLE_DEVICES 控制可见 GPU
+          gpu_style: none                  # CoreX 设备通过 --privileged + /dev 挂载透传
+          memory: 32GB
+          shm_size: 16g
+          timeout: 3600
+        stages:
+          - name: test
+            run: pytest tests/ -n 8 -v --tb=short --junitxml=/workspace/results/test-results.xml
 
-    stages:
-      - name: test
-        run: pytest tests/ -n 8 -v --tb=short --junitxml=/workspace/results/test-results.xml
+  ascend:                                  # TODO: Ascend image is not ready yet
+    image:
+      dockerfile: .ci/images/ascend/
+      build_args:
+        BASE_IMAGE: ascendhub.huawei.com/public-ascendhub/ascend-pytorch:24.0.0
+      private_sdk:
+        source_env: PRIVATE_SDK_URL
diff --git a/.ci/github_status.py b/.ci/github_status.py
new file mode 100644
index 0000000..a7abb8f
--- /dev/null
+++ b/.ci/github_status.py
@@ -0,0 +1,98 @@
+#!/usr/bin/env python3
+"""GitHub Commit Status API wrapper using urllib (zero external dependencies)."""
+
+import json
+import os
+import re
+import sys
+import urllib.error
+import urllib.request
+
+
+def parse_repo_url(url):
+    """Extract (owner, repo) from a GitHub URL.
+
+    Handles:
+      - https://github.com/Owner/Repo.git
+      - git@github.com:Owner/Repo.git
+    """
+    # HTTPS format
+    m = re.match(r"https?://[^/]+/([^/]+)/([^/]+?)(?:\.git)?$", url)
+
+    if m:
+        return m.group(1), m.group(2)
+
+    # SSH format
+    m = re.match(r"git@[^:]+:([^/]+)/([^/]+?)(?:\.git)?$", url)
+
+    if m:
+        return m.group(1), m.group(2)
+
+    return "", ""
+
+
+def build_status_context(prefix, job_name):
+    """Build status context string, e.g. 'ci/infiniops/nvidia_gpu'."""
+    return f"{prefix}/{job_name}"
+
+
+def post_commit_status(
+    owner,
+    repo,
+    sha,
+    state,
+    context,
+    description,
+    target_url=None,
+    token=None,
+):
+    """Post a commit status to GitHub.
+
+    Args:
+        state: One of 'pending', 'success', 'failure', 'error'.
+        Returns True on success, False on failure.
+    """
+    token = token or os.environ.get("GITHUB_TOKEN", "")
+
+    if not token:
+        print("warning: GITHUB_TOKEN not set, skipping status update", file=sys.stderr)
+        return False
+
+    if not owner or not repo or not sha:
+        print("warning: missing owner/repo/sha, skipping status update", file=sys.stderr)
+        return False
+
+    url = f"https://api.github.com/repos/{owner}/{repo}/statuses/{sha}"
+    body = {
+        "state": state,
+        "context": context,
+        "description": description[:140],
+    }
+
+    if target_url:
+        body["target_url"] = target_url
+
+    data = json.dumps(body).encode("utf-8")
+    req = urllib.request.Request(
+        url,
+        data=data,
+        headers={
+            "Authorization": f"token {token}",
+            "Accept": "application/vnd.github.v3+json",
+            "Content-Type": "application/json",
+        },
+        method="POST",
+    )
+
+    try:
+        with urllib.request.urlopen(req, timeout=30) as resp:
+            return 200 <= resp.status < 300
+    except urllib.error.HTTPError as e:
+        print(
+            f"warning: GitHub status API returned {e.code}: {e.reason}",
+            file=sys.stderr,
+        )
+        return False
+    except urllib.error.URLError as e:
+        print(f"warning: GitHub status API error: {e.reason}", file=sys.stderr)
+        return False
diff --git a/.ci/images/iluvatar/Dockerfile b/.ci/images/iluvatar/Dockerfile
new file mode 100644
index 0000000..f098e5f
--- /dev/null
+++ b/.ci/images/iluvatar/Dockerfile
@@ -0,0 +1,53 @@
+ARG BASE_IMAGE
+FROM ${BASE_IMAGE}
+
+ENV DEBIAN_FRONTEND=noninteractive
+
+# CoreX runtime environment (base image sets these in /etc/bash.bashrc,
+# but docker build RUN uses /bin/sh which doesn't source it)
+ENV PATH=/usr/local/corex/bin:/usr/local/corex-4.3.0/corex-toolbox-1.0.0/bin:/usr/local/corex/lib64/python3/dist-packages/bin:/usr/local/openmpi/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
+ENV PYTHONPATH=/usr/local/corex/lib64/python3/dist-packages
+ENV LD_LIBRARY_PATH=/usr/local/corex/lib64:/usr/local/lib:/usr/local/openmpi/lib
+
+ARG HTTP_PROXY
+ARG HTTPS_PROXY
+ARG NO_PROXY
+ARG http_proxy
+ARG https_proxy
+ARG no_proxy
+
+ARG APT_MIRROR
+RUN if [ -n "$APT_MIRROR" ]; then \
+        sed -i "s|http://[^/]*/ubuntu|${APT_MIRROR}|g" /etc/apt/sources.list; \
+    fi && \
+    apt-get update && \
+    apt-get install -y --no-install-recommends \
+        git \
+        ninja-build \
+        coreutils \
+    && rm -rf /var/lib/apt/lists/*
+
+RUN ln -sf $(which python3) /usr/local/bin/python 2>/dev/null || true
+
+ARG PIP_INDEX_URL
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir \
+    ${PIP_INDEX_URL:+--index-url "$PIP_INDEX_URL"} \
+    scikit-build-core \
+    pybind11 \
+    libclang \
+    pytest \
+    pytest-cov \
+    pytest-xdist \
+    pyyaml \
+    ruff==0.15.7
+
+RUN pip config set global.index-url https://pypi.org/simple
+
+# Pin pre-installed CoreX torch to prevent pip from replacing it with upstream version
+RUN pip show torch >/dev/null 2>&1 && \
+    echo "torch==$(pip show torch | grep '^Version:' | awk '{print $2}')" > /etc/pip-constraints.txt || \
+    touch /etc/pip-constraints.txt
+ENV PIP_CONSTRAINT=/etc/pip-constraints.txt
+
+WORKDIR /workspace
diff --git a/.ci/images/nvidia/Dockerfile b/.ci/images/nvidia/Dockerfile
index 74ccfd1..05da963 100644
--- a/.ci/images/nvidia/Dockerfile
+++ b/.ci/images/nvidia/Dockerfile
@@ -10,7 +10,11 @@ ARG http_proxy
 ARG https_proxy
 ARG no_proxy
 
-RUN apt-get update && \
+ARG APT_MIRROR
+RUN if [ -n "$APT_MIRROR" ]; then \
+        sed -i "s|http://[^/]*/ubuntu|${APT_MIRROR}|g" /etc/apt/sources.list; \
+    fi && \
+    apt-get update && \
     apt-get install -y --no-install-recommends \
         git \
         cmake \
@@ -19,13 +23,24 @@ RUN apt-get update && \
         libclang-dev \
     && rm -rf /var/lib/apt/lists/*
 
-RUN pip install --no-cache-dir \
+
+ARG PIP_INDEX_URL
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir \
+    ${PIP_INDEX_URL:+--index-url "$PIP_INDEX_URL"} \
     scikit-build-core \
     pybind11 \
     libclang \
     pytest \
     pytest-cov \
     pytest-xdist \
-    pyyaml
+    pyyaml \
+    ruff==0.15.7
+
+# Pin pre-installed torch to prevent pip from replacing it with a different version
+RUN pip show torch >/dev/null 2>&1 && \
+    echo "torch==$(pip show torch | grep '^Version:' | awk '{print $2}')" > /etc/pip-constraints.txt || \
+    touch /etc/pip-constraints.txt
+ENV PIP_CONSTRAINT=/etc/pip-constraints.txt
 
 WORKDIR /workspace
diff --git a/.ci/run.py b/.ci/run.py
index 0c8d648..2575781 100644
--- a/.ci/run.py
+++ b/.ci/run.py
@@ -9,31 +9,8 @@
 from datetime import datetime
 from pathlib import Path
 
-try:
-    import yaml
-except ImportError:
-    print(
-        "error: pyyaml is required. Install with: pip install pyyaml", file=sys.stderr
-    )
-    sys.exit(1)
-
-
-def load_config(path):
-    with open(path, encoding="utf-8") as f:
-        return yaml.safe_load(f)
-
-
-def get_git_commit(ref="HEAD"):
-    result = subprocess.run(
-        ["git", "rev-parse", "--short", ref],
-        capture_output=True,
-        text=True,
-    )
-
-    if result.returncode != 0:
-        return "unknown"
-
-    return result.stdout.strip()
+from ci_resource import GPU_STYLE_NVIDIA, GPU_STYLE_NONE
+from utils import get_git_commit, load_config
 
 
 def build_results_dir(base, platform, stages, commit):
@@ -155,16 +132,29 @@ def build_docker_args(
         args.append("-e")
         args.append(f"STAGE_{i + 1}_CMD={s['run']}")
 
+    # Platform-specific device access
+    for flag in job.get("docker_args", []):
+        args.append(flag)
+
+    for vol in job.get("volumes", []):
+        args.extend(["-v", vol])
+
     gpu_id = gpu_id_override or str(resources.get("gpu_ids", ""))
     gpu_count = resources.get("gpu_count", 0)
-
-    if gpu_id:
-        if gpu_id == "all":
-            args.extend(["--gpus", "all"])
-        else:
-            args.extend(["--gpus", f'"device={gpu_id}"'])
-    elif gpu_count and gpu_count > 0:
-        args.extend(["--gpus", f"count={gpu_count}"])
+    gpu_style = resources.get("gpu_style", GPU_STYLE_NVIDIA)
+
+    if gpu_style == GPU_STYLE_NVIDIA:
+        if gpu_id:
+            if gpu_id == "all":
+                args.extend(["--gpus", "all"])
+            else:
+                args.extend(["--gpus", f'"device={gpu_id}"'])
+        elif gpu_count and gpu_count > 0:
+            args.extend(["--gpus", f"count={gpu_count}"])
+    elif gpu_style == GPU_STYLE_NONE and gpu_id and gpu_id != "all":
+        # For platforms like Iluvatar/CoreX that use --privileged + /dev mount,
+        # control visible GPUs via CUDA_VISIBLE_DEVICES.
+        args.extend(["-e", f"CUDA_VISIBLE_DEVICES={gpu_id}"])
 
     memory = resources.get("memory")
 
diff --git a/.ci/tests/conftest.py b/.ci/tests/conftest.py
index 98079cd..38ed716 100644
--- a/.ci/tests/conftest.py
+++ b/.ci/tests/conftest.py
@@ -6,37 +6,41 @@
 
 import pytest
 
+from utils import normalize_config
+
 
 @pytest.fixture
 def minimal_config():
-    return {
+    """Minimal platform-centric config, normalized to flat format."""
+    raw = {
         "repo": {
             "url": "https://github.com/InfiniTensor/InfiniOps.git",
             "branch": "master",
         },
-        "images": {
+        "platforms": {
             "nvidia": {
-                "dockerfile": ".ci/images/nvidia/",
-                "build_args": {"BASE_IMAGE": "nvcr.io/nvidia/pytorch:24.10-py3"},
-            }
-        },
-        "jobs": {
-            "nvidia_gpu": {
-                "image": "latest",
-                "platform": "nvidia",
-                "resources": {
-                    "gpu_ids": "0",
-                    "memory": "32GB",
-                    "shm_size": "16g",
-                    "timeout": 3600,
+                "image": {
+                    "dockerfile": ".ci/images/nvidia/",
+                    "build_args": {"BASE_IMAGE": "nvcr.io/nvidia/pytorch:24.10-py3"},
                 },
                 "setup": "pip install .[dev]",
-                "stages": [
-                    {
-                        "name": "test",
-                        "run": "pytest tests/ -v",
+                "jobs": {
+                    "gpu": {
+                        "resources": {
+                            "gpu_ids": "0",
+                            "memory": "32GB",
+                            "shm_size": "16g",
+                            "timeout": 3600,
+                        },
+                        "stages": [
+                            {
+                                "name": "test",
+                                "run": "pytest tests/ -v",
+                            }
+                        ],
                     }
-                ],
+                },
             }
         },
     }
+    return normalize_config(raw)
diff --git a/.ci/tests/test_agent.py b/.ci/tests/test_agent.py
new file mode 100644
index 0000000..5741385
--- /dev/null
+++ b/.ci/tests/test_agent.py
@@ -0,0 +1,503 @@
+import hashlib
+import hmac
+import json
+import threading
+import time
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+import agent
+import ci_resource as res
+from utils import normalize_config
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture
+def agent_config():
+    raw = {
+        "repo": {
+            "url": "https://github.com/InfiniTensor/InfiniOps.git",
+            "branch": "master",
+        },
+        "github": {
+            "status_context_prefix": "ci/infiniops",
+        },
+        "agents": {
+            "nvidia": {"url": "http://nvidia-host:8080"},
+            "iluvatar": {"url": "http://iluvatar-host:8080"},
+        },
+        "platforms": {
+            "nvidia": {
+                "image": {
+                    "dockerfile": ".ci/images/nvidia/",
+                    "build_args": {"BASE_IMAGE": "nvcr.io/nvidia/pytorch:24.10-py3"},
+                },
+                "setup": "pip install .[dev]",
+                "jobs": {
+                    "gpu": {
+                        "resources": {
+                            "gpu_ids": "0",
+                            "memory": "32GB",
+                            "shm_size": "16g",
+                            "timeout": 3600,
+                        },
+                        "stages": [{"name": "test", "run": "pytest tests/ -v"}],
+                    },
+                },
+            },
+            "iluvatar": {
+                "image": {
+                    "dockerfile": ".ci/images/iluvatar/",
+                    "build_args": {"BASE_IMAGE": "corex:qs_pj20250825"},
+                },
+                "setup": "pip install .[dev]",
+                "jobs": {
+                    "gpu": {
+                        "resources": {
+                            "gpu_ids": "0",
+                            "gpu_style": "none",
+                            "memory": "32GB",
+                            "shm_size": "16g",
+                            "timeout": 3600,
+                        },
+                        "stages": [{"name": "test", "run": "pytest tests/ -v"}],
+                    },
+                },
+            },
+        },
+    }
+    return normalize_config(raw)
+
+
+@pytest.fixture
+def mock_resource_pool():
+    pool = MagicMock(spec=res.ResourcePool)
+    pool.platform = "nvidia"
+    pool.allocate.return_value = ([0], True)
+    pool.release.return_value = None
+    pool.get_status.return_value = {"platform": "nvidia", "gpus": [], "allocated_gpu_ids": [], "system": {}}
+    return pool
+
+
+# ---------------------------------------------------------------------------
+# select_jobs
+# ---------------------------------------------------------------------------
+
+
+def test_select_jobs_by_name(agent_config):
+    jobs = agent.select_jobs(agent_config, job_name="nvidia_gpu")
+    assert jobs == ["nvidia_gpu"]
+
+
+def test_select_jobs_by_platform(agent_config):
+    jobs = agent.select_jobs(agent_config, platform="nvidia")
+    assert jobs == ["nvidia_gpu"]
+
+
+def test_select_jobs_by_platform_iluvatar(agent_config):
+    jobs = agent.select_jobs(agent_config, platform="iluvatar")
+    assert jobs == ["iluvatar_gpu"]
+
+
+def test_select_jobs_all(agent_config):
+    jobs = agent.select_jobs(agent_config)
+    assert set(jobs) == {"nvidia_gpu", "iluvatar_gpu"}
+
+
+def test_select_jobs_invalid_name(agent_config):
+    with pytest.raises(ValueError, match="not_exist"):
+        agent.select_jobs(agent_config, job_name="not_exist")
+
+
+# ---------------------------------------------------------------------------
+# route_jobs
+# ---------------------------------------------------------------------------
+
+
+def test_route_jobs_local(agent_config):
+    local, remote = agent.route_jobs(agent_config, ["nvidia_gpu"], local_platform="nvidia")
+    assert local == ["nvidia_gpu"]
+    assert remote == []
+
+
+def test_route_jobs_remote(agent_config):
+    local, remote = agent.route_jobs(agent_config, ["iluvatar_gpu"], local_platform="nvidia")
+    assert local == []
+    assert len(remote) == 1
+    assert remote[0][0] == "iluvatar_gpu"
+    assert remote[0][1] == "http://iluvatar-host:8080"
+
+
+def test_route_jobs_mixed(agent_config):
+    local, remote = agent.route_jobs(
+        agent_config, ["nvidia_gpu", "iluvatar_gpu"], local_platform="nvidia"
+    )
+    assert local == ["nvidia_gpu"]
+    assert len(remote) == 1
+
+
+def test_route_jobs_no_platform(agent_config):
+    local, remote = agent.route_jobs(agent_config, ["nvidia_gpu", "iluvatar_gpu"])
+    assert len(local) == 2
+    assert remote == []
+
+
+# ---------------------------------------------------------------------------
+# verify_signature
+# ---------------------------------------------------------------------------
+
+
+def test_verify_signature_valid():
+    secret = "my-secret"
+    body = b'{"action": "push"}'
+    sig = "sha256=" + hmac.new(secret.encode(), body, hashlib.sha256).hexdigest()
+    assert agent.verify_signature(secret, body, sig) is True
+
+
+def test_verify_signature_invalid():
+    assert agent.verify_signature("secret", b"body", "sha256=wrong") is False
+
+
+def test_verify_signature_empty():
+    assert agent.verify_signature("secret", b"body", "") is False
+
+
+# ---------------------------------------------------------------------------
+# JobRequest / JobResult
+# ---------------------------------------------------------------------------
+
+
+def test_job_request_fields(agent_config):
+    req = agent.JobRequest("nvidia_gpu", "master", "abc123", agent_config)
+    assert req.job_name == "nvidia_gpu"
+    assert req.platform == "nvidia"
+    assert req.commit_sha == "abc123"
+    assert len(req.job_id) == 8
+    d = req.to_dict()
+    assert d["job_name"] == "nvidia_gpu"
+
+
+def test_job_result_success():
+    r = agent.JobResult("id1", "nvidia_gpu", "abc", 0, Path("/tmp/res"), 42.5)
+    assert r.state == "success"
+
+
+def test_job_result_failure():
+    r = agent.JobResult("id1", "nvidia_gpu", "abc", 1, Path("/tmp/res"), 10.0)
+    assert r.state == "failure"
+
+
+# ---------------------------------------------------------------------------
+# Scheduler
+# ---------------------------------------------------------------------------
+
+
+def test_scheduler_submit_and_run(agent_config, mock_resource_pool, monkeypatch):
+    monkeypatch.setattr("subprocess.run", lambda cmd, **kw: MagicMock(returncode=0))
+    monkeypatch.setattr("agent.gh.post_commit_status", lambda *a, **kw: True)
+
+    scheduler = agent.Scheduler(
+        agent_config, "nvidia", mock_resource_pool,
+        results_dir=Path("/tmp/test-results"),
+        no_status=True, dry_run=True,
+    )
+    req = agent.JobRequest("nvidia_gpu", "master", "abc123", agent_config,
+                           results_dir=Path("/tmp/test-results"))
+    jid = scheduler.submit(req)
+    results = scheduler.wait_all()
+    assert len(results) == 1
+    assert results[0].state == "success"
+
+
+def test_scheduler_queues_when_no_resources(agent_config, monkeypatch):
+    pool = MagicMock(spec=res.ResourcePool)
+    pool.allocate.return_value = ([], False)
+    pool.get_status.return_value = {"platform": "nvidia", "gpus": [], "allocated_gpu_ids": [], "system": {}}
+
+    scheduler = agent.Scheduler(
+        agent_config, "nvidia", pool,
+        no_status=True, dry_run=False,
+    )
+
+    req = agent.JobRequest("nvidia_gpu", "master", "abc123", agent_config)
+    scheduler.submit(req)
+
+    info = scheduler.get_job(req.job_id)
+    assert info["state"] == "queued"
+
+
+def test_scheduler_get_status(agent_config, mock_resource_pool):
+    scheduler = agent.Scheduler(
+        agent_config, "nvidia", mock_resource_pool,
+        no_status=True, dry_run=True,
+    )
+
+    status = scheduler.get_status()
+    assert "queued" in status
+    assert "running" in status
+    assert "completed" in status
+    assert "resources" in status
+
+
+# ---------------------------------------------------------------------------
+# WebhookHandler — push event parsing
+# ---------------------------------------------------------------------------
+
+
+def test_webhook_parse_push():
+    handler = agent.WebhookHandler.__new__(agent.WebhookHandler)
+    payload = {"ref": "refs/heads/feat/test", "after": "abc123def456"}
+    branch, sha = handler._parse_push(payload)
+    assert branch == "feat/test"
+    assert sha == "abc123def456"
+
+
+def test_webhook_parse_pr():
+    handler = agent.WebhookHandler.__new__(agent.WebhookHandler)
+    payload = {
+        "pull_request": {
+            "head": {
+                "ref": "feat/pr-branch",
+                "sha": "def789",
+            }
+        }
+    }
+    branch, sha = handler._parse_pull_request(payload)
+    assert branch == "feat/pr-branch"
+    assert sha == "def789"
+
+
+# ---------------------------------------------------------------------------
+# Integration-style: webhook HTTP test
+# ---------------------------------------------------------------------------
+
+
+def _urlopen_no_proxy(url_or_req, **kwargs):
+    """urlopen that bypasses any HTTP_PROXY."""
+    import urllib.request
+
+    opener = urllib.request.build_opener(urllib.request.ProxyHandler({}))
+    return opener.open(url_or_req, **kwargs)
+
+
+def test_health_endpoint(agent_config, mock_resource_pool):
+    scheduler = agent.Scheduler(
+        agent_config, "nvidia", mock_resource_pool,
+        no_status=True,
+    )
+    server = agent.AgentServer(
+        "127.0.0.1", 0, agent_config, scheduler, "nvidia",
+    )
+    port = server.server_address[1]
+
+    t = threading.Thread(target=server.handle_request, daemon=True)
+    t.start()
+
+    try:
+        resp = _urlopen_no_proxy(f"http://127.0.0.1:{port}/health", timeout=5)
+        data = json.loads(resp.read())
+        assert data["status"] == "ok"
+        assert data["platform"] == "nvidia"
+    finally:
+        server.server_close()
+
+
+def test_api_run_endpoint(agent_config, mock_resource_pool, monkeypatch):
+    monkeypatch.setattr("agent.gh.post_commit_status", lambda *a, **kw: True)
+
+    scheduler = agent.Scheduler(
+        agent_config, "nvidia", mock_resource_pool,
+        no_status=True, dry_run=True,
+    )
+    server = agent.AgentServer(
+        "127.0.0.1", 0, agent_config, scheduler, "nvidia",
+        results_dir=Path("/tmp/test-results"),
+    )
+    port = server.server_address[1]
+
+    t = threading.Thread(target=server.handle_request, daemon=True)
+    t.start()
+
+    import urllib.request
+
+    body = json.dumps({"branch": "master", "commit_sha": "abc123"}).encode()
+    req = urllib.request.Request(
+        f"http://127.0.0.1:{port}/api/run",
+        data=body,
+        headers={"Content-Type": "application/json"},
+    )
+
+    try:
+        resp = _urlopen_no_proxy(req, timeout=5)
+        data = json.loads(resp.read())
+        assert data["accepted"] is True
+        assert len(data["job_ids"]) >= 1
+    finally:
+        server.server_close()
+
+
+def test_webhook_with_signature(agent_config, mock_resource_pool, monkeypatch):
+    monkeypatch.setattr("agent.gh.post_commit_status", lambda *a, **kw: True)
+
+    scheduler = agent.Scheduler(
+        agent_config, "nvidia", mock_resource_pool,
+        no_status=True, dry_run=True,
+    )
+    secret = "test-secret"
+    server = agent.AgentServer(
+        "127.0.0.1", 0, agent_config, scheduler, "nvidia",
+        webhook_secret=secret,
+        results_dir=Path("/tmp/test-results"),
+    )
+    port = server.server_address[1]
+
+    t = threading.Thread(target=server.handle_request, daemon=True)
+    t.start()
+
+    import urllib.request
+
+    payload = json.dumps({
+        "ref": "refs/heads/master",
+        "after": "abc123def456",
+    }).encode()
+    sig = "sha256=" + hmac.new(secret.encode(), payload, hashlib.sha256).hexdigest()
+
+    req = urllib.request.Request(
+        f"http://127.0.0.1:{port}/webhook",
+        data=payload,
+        headers={
+            "Content-Type": "application/json",
+            "X-GitHub-Event": "push",
+            "X-Hub-Signature-256": sig,
+        },
+    )
+
+    try:
+        resp = _urlopen_no_proxy(req, timeout=5)
+        data = json.loads(resp.read())
+        assert data["accepted"] is True
+    finally:
+        server.server_close()
+
+
+def test_webhook_invalid_signature(agent_config, mock_resource_pool):
+    scheduler = agent.Scheduler(
+        agent_config, "nvidia", mock_resource_pool,
+        no_status=True,
+    )
+    server = agent.AgentServer(
+        "127.0.0.1", 0, agent_config, scheduler, "nvidia",
+        webhook_secret="real-secret",
+    )
+    port = server.server_address[1]
+
+    t = threading.Thread(target=server.handle_request, daemon=True)
+    t.start()
+
+    import urllib.error
+    import urllib.request
+
+    payload = b'{"ref": "refs/heads/master", "after": "abc"}'
+    req = urllib.request.Request(
+        f"http://127.0.0.1:{port}/webhook",
+        data=payload,
+        headers={
+            "Content-Type": "application/json",
+            "X-GitHub-Event": "push",
+            "X-Hub-Signature-256": "sha256=invalid",
+        },
+    )
+
+    try:
+        with pytest.raises(urllib.error.HTTPError) as exc_info:
+            _urlopen_no_proxy(req, timeout=5)
+
+        assert exc_info.value.code == 401
+    finally:
+        server.server_close()
+
+
+# ---------------------------------------------------------------------------
+# API token authentication
+# ---------------------------------------------------------------------------
+
+
+def test_api_run_requires_token(agent_config, mock_resource_pool, monkeypatch):
+    """When api_token is set, /api/run rejects requests without valid token."""
+    monkeypatch.setattr("agent.gh.post_commit_status", lambda *a, **kw: True)
+
+    scheduler = agent.Scheduler(
+        agent_config, "nvidia", mock_resource_pool,
+        no_status=True, dry_run=True,
+    )
+    server = agent.AgentServer(
+        "127.0.0.1", 0, agent_config, scheduler, "nvidia",
+        api_token="my-secret-token",
+        results_dir=Path("/tmp/test-results"),
+    )
+    port = server.server_address[1]
+
+    t = threading.Thread(target=server.handle_request, daemon=True)
+    t.start()
+
+    import urllib.error
+    import urllib.request
+
+    body = json.dumps({"branch": "master", "commit_sha": "abc123"}).encode()
+    req = urllib.request.Request(
+        f"http://127.0.0.1:{port}/api/run",
+        data=body,
+        headers={"Content-Type": "application/json"},
+    )
+
+    try:
+        with pytest.raises(urllib.error.HTTPError) as exc_info:
+            _urlopen_no_proxy(req, timeout=5)
+
+        assert exc_info.value.code == 401
+    finally:
+        server.server_close()
+
+
+def test_api_run_accepts_valid_token(agent_config, mock_resource_pool, monkeypatch):
+    """When api_token is set, /api/run accepts requests with correct Bearer token."""
+    monkeypatch.setattr("agent.gh.post_commit_status", lambda *a, **kw: True)
+
+    scheduler = agent.Scheduler(
+        agent_config, "nvidia", mock_resource_pool,
+        no_status=True, dry_run=True,
+    )
+    server = agent.AgentServer(
+        "127.0.0.1", 0, agent_config, scheduler, "nvidia",
+        api_token="my-secret-token",
+        results_dir=Path("/tmp/test-results"),
+    )
+    port = server.server_address[1]
+
+    t = threading.Thread(target=server.handle_request, daemon=True)
+    t.start()
+
+    import urllib.request
+
+    body = json.dumps({"branch": "master", "commit_sha": "abc123"}).encode()
+    req = urllib.request.Request(
+        f"http://127.0.0.1:{port}/api/run",
+        data=body,
+        headers={
+            "Content-Type": "application/json",
+            "Authorization": "Bearer my-secret-token",
+        },
+    )
+
+    try:
+        resp = _urlopen_no_proxy(req, timeout=5)
+        data = json.loads(resp.read())
+        assert data["accepted"] is True
+    finally:
+        server.server_close()
diff --git a/.ci/tests/test_github_status.py b/.ci/tests/test_github_status.py
new file mode 100644
index 0000000..0efa36e
--- /dev/null
+++ b/.ci/tests/test_github_status.py
@@ -0,0 +1,144 @@
+import json
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+import github_status as gh
+
+
+# ---------------------------------------------------------------------------
+# parse_repo_url
+# ---------------------------------------------------------------------------
+
+
+def test_parse_repo_url_https():
+    owner, repo = gh.parse_repo_url("https://github.com/InfiniTensor/InfiniOps.git")
+    assert owner == "InfiniTensor"
+    assert repo == "InfiniOps"
+
+
+def test_parse_repo_url_https_no_git():
+    owner, repo = gh.parse_repo_url("https://github.com/Owner/Repo")
+    assert owner == "Owner"
+    assert repo == "Repo"
+
+
+def test_parse_repo_url_ssh():
+    owner, repo = gh.parse_repo_url("git@github.com:Owner/Repo.git")
+    assert owner == "Owner"
+    assert repo == "Repo"
+
+
+def test_parse_repo_url_invalid():
+    owner, repo = gh.parse_repo_url("not-a-url")
+    assert owner == ""
+    assert repo == ""
+
+
+# ---------------------------------------------------------------------------
+# build_status_context
+# ---------------------------------------------------------------------------
+
+
+def test_build_status_context():
+    ctx = gh.build_status_context("ci/infiniops", "nvidia_gpu")
+    assert ctx == "ci/infiniops/nvidia_gpu"
+
+
+# ---------------------------------------------------------------------------
+# post_commit_status
+# ---------------------------------------------------------------------------
+
+
+def test_post_status_no_token(monkeypatch):
+    monkeypatch.delenv("GITHUB_TOKEN", raising=False)
+    result = gh.post_commit_status("owner", "repo", "abc123", "success", "ctx", "desc")
+    assert result is False
+
+
+def test_post_status_missing_owner():
+    result = gh.post_commit_status("", "repo", "abc123", "success", "ctx", "desc", token="tok")
+    assert result is False
+
+
+def test_post_status_success(monkeypatch):
+    mock_response = MagicMock()
+    mock_response.status = 201
+    mock_response.__enter__ = MagicMock(return_value=mock_response)
+    mock_response.__exit__ = MagicMock(return_value=False)
+
+    captured_req = {}
+
+    def mock_urlopen(req, **kwargs):
+        captured_req["url"] = req.full_url
+        captured_req["data"] = json.loads(req.data)
+        captured_req["headers"] = dict(req.headers)
+        return mock_response
+
+    monkeypatch.setattr("urllib.request.urlopen", mock_urlopen)
+
+    result = gh.post_commit_status(
+        "InfiniTensor",
+        "InfiniOps",
+        "abc123def",
+        "success",
+        "ci/infiniops/nvidia_gpu",
+        "Tests passed",
+        token="ghp_test_token",
+    )
+
+    assert result is True
+    assert "abc123def" in captured_req["url"]
+    assert captured_req["data"]["state"] == "success"
+    assert captured_req["data"]["context"] == "ci/infiniops/nvidia_gpu"
+    assert "ghp_test_token" in captured_req["headers"]["Authorization"]
+
+
+def test_post_status_http_error(monkeypatch):
+    import urllib.error
+
+    def mock_urlopen(req, **kwargs):
+        raise urllib.error.HTTPError(
+            url="", code=422, msg="Unprocessable", hdrs=None, fp=None
+        )
+
+    monkeypatch.setattr("urllib.request.urlopen", mock_urlopen)
+
+    result = gh.post_commit_status(
+        "owner", "repo", "sha", "success", "ctx", "desc", token="tok"
+    )
+    assert result is False
+
+
+def test_post_status_url_error(monkeypatch):
+    import urllib.error
+
+    def mock_urlopen(req, **kwargs):
+        raise urllib.error.URLError("connection refused")
+
+    monkeypatch.setattr("urllib.request.urlopen", mock_urlopen)
+
+    result = gh.post_commit_status(
+        "owner", "repo", "sha", "success", "ctx", "desc", token="tok"
+    )
+    assert result is False
+
+
+def test_post_status_truncates_description(monkeypatch):
+    mock_response = MagicMock()
+    mock_response.status = 201
+    mock_response.__enter__ = MagicMock(return_value=mock_response)
+    mock_response.__exit__ = MagicMock(return_value=False)
+
+    captured = {}
+
+    def mock_urlopen(req, **kwargs):
+        captured["data"] = json.loads(req.data)
+        return mock_response
+
+    monkeypatch.setattr("urllib.request.urlopen", mock_urlopen)
+
+    long_desc = "x" * 200
+    gh.post_commit_status("o", "r", "sha", "success", "ctx", long_desc, token="tok")
+
+    assert len(captured["data"]["description"]) == 140
diff --git a/.ci/tests/test_resource.py b/.ci/tests/test_resource.py
new file mode 100644
index 0000000..b75043c
--- /dev/null
+++ b/.ci/tests/test_resource.py
@@ -0,0 +1,324 @@
+import threading
+
+import pytest
+
+import ci_resource as res
+
+
+# ---------------------------------------------------------------------------
+# GpuInfo / SystemResources
+# ---------------------------------------------------------------------------
+
+
+def test_gpu_info_fields():
+    g = res.GpuInfo(index=0, memory_used_mb=1000, memory_total_mb=8000, utilization_pct=50)
+    assert g.index == 0
+    assert g.memory_total_mb == 8000
+
+
+def test_system_resources_fields():
+    s = res.SystemResources(total_memory_mb=32000, available_memory_mb=16000, cpu_count=8)
+    assert s.cpu_count == 8
+
+
+# ---------------------------------------------------------------------------
+# detect_gpus
+# ---------------------------------------------------------------------------
+
+
+def test_detect_gpus_nvidia_parses_csv(monkeypatch):
+    csv_output = "0, 512, 8192, 5\n1, 1024, 8192, 80\n"
+
+    def mock_run(cmd, **kwargs):
+        class R:
+            returncode = 0
+            stdout = csv_output
+
+        return R()
+
+    monkeypatch.setattr("subprocess.run", mock_run)
+
+    pool = res.ResourcePool("nvidia")
+    gpus = pool.detect_gpus()
+    assert len(gpus) == 2
+    assert gpus[0].index == 0
+    assert gpus[0].memory_used_mb == 512
+    assert gpus[0].utilization_pct == 5
+    assert gpus[1].index == 1
+    assert gpus[1].utilization_pct == 80
+
+
+def test_detect_gpus_empty_on_failure(monkeypatch):
+    def mock_run(cmd, **kwargs):
+        class R:
+            returncode = 1
+            stdout = ""
+
+        return R()
+
+    monkeypatch.setattr("subprocess.run", mock_run)
+
+    pool = res.ResourcePool("nvidia")
+    assert pool.detect_gpus() == []
+
+
+def test_detect_gpus_unknown_platform():
+    pool = res.ResourcePool("unknown_platform")
+    assert pool.detect_gpus() == []
+
+
+def test_detect_gpus_file_not_found(monkeypatch):
+    def mock_run(cmd, **kwargs):
+        raise FileNotFoundError("nvidia-smi not found")
+
+    monkeypatch.setattr("subprocess.run", mock_run)
+
+    pool = res.ResourcePool("nvidia")
+    assert pool.detect_gpus() == []
+
+
+# ---------------------------------------------------------------------------
+# detect_system_resources
+# ---------------------------------------------------------------------------
+
+
+def test_detect_system_resources(monkeypatch, tmp_path):
+    meminfo = tmp_path / "meminfo"
+    meminfo.write_text(
+        "MemTotal:       32000000 kB\n"
+        "MemFree:        10000000 kB\n"
+        "MemAvailable:   20000000 kB\n"
+    )
+
+    import io
+    _real_open = open
+
+    def fake_open(path, **kw):
+        if str(path) == "/proc/meminfo":
+            return _real_open(str(meminfo), **kw)
+        return _real_open(path, **kw)
+
+    monkeypatch.setattr("builtins.open", fake_open)
+
+    pool = res.ResourcePool("nvidia")
+    sys_res = pool.detect_system_resources()
+    assert abs(sys_res.total_memory_mb - 32000000 / 1024) < 1
+    assert abs(sys_res.available_memory_mb - 20000000 / 1024) < 1
+    assert sys_res.cpu_count > 0
+
+
+# ---------------------------------------------------------------------------
+# get_free_gpus
+# ---------------------------------------------------------------------------
+
+
+def test_get_free_gpus_filters_by_utilization(monkeypatch):
+    csv_output = "0, 100, 8192, 5\n1, 4000, 8192, 95\n2, 200, 8192, 8\n"
+
+    def mock_run(cmd, **kwargs):
+        class R:
+            returncode = 0
+            stdout = csv_output
+
+        return R()
+
+    monkeypatch.setattr("subprocess.run", mock_run)
+
+    pool = res.ResourcePool("nvidia", utilization_threshold=10)
+    free = pool.get_free_gpus()
+    assert 0 in free
+    assert 2 in free
+    assert 1 not in free
+
+
+# ---------------------------------------------------------------------------
+# allocate / release
+# ---------------------------------------------------------------------------
+
+
+def test_allocate_success(monkeypatch):
+    csv_output = "0, 100, 8192, 5\n1, 200, 8192, 3\n"
+
+    def mock_run(cmd, **kwargs):
+        class R:
+            returncode = 0
+            stdout = csv_output
+
+        return R()
+
+    monkeypatch.setattr("subprocess.run", mock_run)
+
+    pool = res.ResourcePool("nvidia", utilization_threshold=10)
+    gpu_ids, ok = pool.allocate(1)
+    assert ok is True
+    assert len(gpu_ids) == 1
+    assert gpu_ids[0] in (0, 1)
+
+
+def test_allocate_insufficient_gpus(monkeypatch):
+    csv_output = "0, 100, 8192, 5\n"
+
+    def mock_run(cmd, **kwargs):
+        class R:
+            returncode = 0
+            stdout = csv_output
+
+        return R()
+
+    monkeypatch.setattr("subprocess.run", mock_run)
+
+    pool = res.ResourcePool("nvidia", utilization_threshold=10)
+    gpu_ids, ok = pool.allocate(3)
+    assert ok is False
+    assert gpu_ids == []
+
+
+def test_allocate_zero_gpus():
+    pool = res.ResourcePool("unknown")
+    gpu_ids, ok = pool.allocate(0)
+    assert ok is True
+    assert gpu_ids == []
+
+
+def test_release_frees_gpus(monkeypatch):
+    csv_output = "0, 100, 8192, 5\n1, 200, 8192, 3\n"
+
+    def mock_run(cmd, **kwargs):
+        class R:
+            returncode = 0
+            stdout = csv_output
+
+        return R()
+
+    monkeypatch.setattr("subprocess.run", mock_run)
+
+    pool = res.ResourcePool("nvidia", utilization_threshold=10)
+    gpu_ids, ok = pool.allocate(2)
+    assert ok is True
+    assert len(gpu_ids) == 2
+
+    # All GPUs allocated, next allocation should fail
+    _, ok2 = pool.allocate(1)
+    assert ok2 is False
+
+    # Release one
+    pool.release([gpu_ids[0]])
+    gpu_ids2, ok3 = pool.allocate(1)
+    assert ok3 is True
+    assert gpu_ids2 == [gpu_ids[0]]
+
+
+def test_allocate_excludes_allocated(monkeypatch):
+    csv_output = "0, 100, 8192, 5\n1, 200, 8192, 3\n"
+
+    def mock_run(cmd, **kwargs):
+        class R:
+            returncode = 0
+            stdout = csv_output
+
+        return R()
+
+    monkeypatch.setattr("subprocess.run", mock_run)
+
+    pool = res.ResourcePool("nvidia", utilization_threshold=10)
+    gpu_ids1, _ = pool.allocate(1)
+    gpu_ids2, _ = pool.allocate(1)
+
+    assert gpu_ids1 != gpu_ids2
+    assert set(gpu_ids1 + gpu_ids2) == {0, 1}
+
+
+def test_thread_safety(monkeypatch):
+    csv_output = "0, 0, 8192, 0\n1, 0, 8192, 0\n2, 0, 8192, 0\n3, 0, 8192, 0\n"
+
+    def mock_run(cmd, **kwargs):
+        class R:
+            returncode = 0
+            stdout = csv_output
+
+        return R()
+
+    monkeypatch.setattr("subprocess.run", mock_run)
+
+    pool = res.ResourcePool("nvidia", utilization_threshold=50)
+    allocated_all = []
+    lock = threading.Lock()
+
+    def allocate_one():
+        ids, ok = pool.allocate(1)
+
+        if ok:
+            with lock:
+                allocated_all.extend(ids)
+
+    threads = [threading.Thread(target=allocate_one) for _ in range(4)]
+
+    for t in threads:
+        t.start()
+
+    for t in threads:
+        t.join()
+
+    assert len(allocated_all) == 4
+    assert len(set(allocated_all)) == 4
+
+
+# ---------------------------------------------------------------------------
+# get_status
+# ---------------------------------------------------------------------------
+
+
+def test_get_status(monkeypatch):
+    csv_output = "0, 512, 8192, 5\n"
+
+    def mock_run(cmd, **kwargs):
+        class R:
+            returncode = 0
+            stdout = csv_output
+
+        return R()
+
+    monkeypatch.setattr("subprocess.run", mock_run)
+
+    pool = res.ResourcePool("nvidia")
+    status = pool.get_status()
+    assert status["platform"] == "nvidia"
+    assert len(status["gpus"]) == 1
+    assert "system" in status
+
+
+# ---------------------------------------------------------------------------
+# parse_gpu_requirement / parse_memory_requirement
+# ---------------------------------------------------------------------------
+
+
+def test_parse_gpu_requirement_nvidia():
+    job = {"resources": {"gpu_ids": "0,1", "gpu_style": "nvidia"}}
+    assert res.parse_gpu_requirement(job) == 2
+
+
+def test_parse_gpu_requirement_none():
+    job = {"resources": {"gpu_style": "none"}}
+    assert res.parse_gpu_requirement(job) == 0
+
+
+def test_parse_gpu_requirement_all():
+    job = {"resources": {"gpu_ids": "all"}}
+    assert res.parse_gpu_requirement(job) == 0
+
+
+def test_parse_gpu_requirement_default():
+    job = {"resources": {"gpu_ids": "0"}}
+    assert res.parse_gpu_requirement(job) == 1
+
+
+def test_parse_memory_requirement_gb():
+    assert res.parse_memory_requirement({"resources": {"memory": "32GB"}}) == 32 * 1024
+
+
+def test_parse_memory_requirement_mb():
+    assert res.parse_memory_requirement({"resources": {"memory": "512MB"}}) == 512
+
+
+def test_parse_memory_requirement_empty():
+    assert res.parse_memory_requirement({"resources": {}}) == 0
diff --git a/.ci/tests/test_utils.py b/.ci/tests/test_utils.py
new file mode 100644
index 0000000..2a930d3
--- /dev/null
+++ b/.ci/tests/test_utils.py
@@ -0,0 +1,90 @@
+from utils import normalize_config
+
+
+def test_normalize_creates_flat_jobs():
+    raw = {
+        "repo": {"url": "https://github.com/org/repo.git"},
+        "platforms": {
+            "nvidia": {
+                "image": {"dockerfile": ".ci/images/nvidia/"},
+                "setup": "pip install .",
+                "docker_args": ["--gpus", "all"],
+                "jobs": {
+                    "gpu": {
+                        "resources": {"gpu_ids": "0"},
+                        "stages": [{"name": "test", "run": "pytest"}],
+                    },
+                    "multi_gpu": {
+                        "resources": {"gpu_ids": "0,1"},
+                        "stages": [{"name": "test", "run": "pytest"}],
+                    },
+                },
+            },
+        },
+    }
+    config = normalize_config(raw)
+
+    assert "nvidia_gpu" in config["jobs"]
+    assert "nvidia_multi_gpu" in config["jobs"]
+    assert config["jobs"]["nvidia_gpu"]["platform"] == "nvidia"
+    assert config["jobs"]["nvidia_gpu"]["setup"] == "pip install ."
+    assert config["jobs"]["nvidia_gpu"]["docker_args"] == ["--gpus", "all"]
+    assert config["jobs"]["nvidia_gpu"]["resources"]["gpu_ids"] == "0"
+    assert config["jobs"]["nvidia_multi_gpu"]["resources"]["gpu_ids"] == "0,1"
+
+
+def test_normalize_extracts_images():
+    raw = {
+        "platforms": {
+            "nvidia": {
+                "image": {
+                    "dockerfile": ".ci/images/nvidia/",
+                    "build_args": {"BASE_IMAGE": "pytorch:latest"},
+                },
+                "jobs": {},
+            },
+        },
+    }
+    config = normalize_config(raw)
+    assert config["images"]["nvidia"]["dockerfile"] == ".ci/images/nvidia/"
+    assert config["images"]["nvidia"]["build_args"]["BASE_IMAGE"] == "pytorch:latest"
+
+
+def test_normalize_job_overrides_platform_defaults():
+    raw = {
+        "platforms": {
+            "nvidia": {
+                "setup": "default setup",
+                "jobs": {
+                    "special": {
+                        "setup": "custom setup",
+                        "stages": [],
+                    },
+                },
+            },
+        },
+    }
+    config = normalize_config(raw)
+    assert config["jobs"]["nvidia_special"]["setup"] == "custom setup"
+
+
+def test_normalize_preserves_top_level_keys():
+    raw = {
+        "repo": {"url": "https://github.com/org/repo.git"},
+        "github": {"status_context_prefix": "ci/test"},
+        "agents": {"nvidia": {"url": "http://host:8080"}},
+        "platforms": {},
+    }
+    config = normalize_config(raw)
+    assert config["repo"]["url"] == "https://github.com/org/repo.git"
+    assert config["github"]["status_context_prefix"] == "ci/test"
+    assert config["agents"]["nvidia"]["url"] == "http://host:8080"
+
+
+def test_normalize_passthrough_flat_config():
+    """Old flat format without 'platforms' key is returned as-is."""
+    flat = {
+        "images": {"nvidia": {}},
+        "jobs": {"nvidia_gpu": {"platform": "nvidia"}},
+    }
+    assert normalize_config(flat) is flat
diff --git a/.ci/utils.py b/.ci/utils.py
new file mode 100644
index 0000000..7932ba6
--- /dev/null
+++ b/.ci/utils.py
@@ -0,0 +1,101 @@
+#!/usr/bin/env python3
+"""Shared utilities for the CI toolchain."""
+
+import subprocess
+import sys
+
+try:
+    import yaml
+except ImportError:
+    print(
+        "error: pyyaml is required. Install with: pip install pyyaml", file=sys.stderr
+    )
+    sys.exit(1)
+
+
+def normalize_config(raw):
+    """Convert platform-centric config to flat images/jobs format.
+
+    Input (new format):
+        platforms:
+          nvidia:
+            image: {dockerfile: ..., build_args: ...}
+            setup: pip install .[dev]
+            jobs:
+              gpu: {resources: ..., stages: ...}
+
+    Output (flat format consumed by run.py / build.py / agent.py):
+        images:
+          nvidia: {dockerfile: ..., build_args: ...}
+        jobs:
+          nvidia_gpu: {platform: nvidia, setup: ..., resources: ..., stages: ...}
+
+    If the config already uses the flat format (no 'platforms' key), returns as-is.
+    """
+    if "platforms" not in raw:
+        return raw
+
+    config = {}
+
+    for key in ("repo", "github", "agents"):
+        if key in raw:
+            config[key] = raw[key]
+
+    config["images"] = {}
+    config["jobs"] = {}
+
+    for platform, pcfg in raw.get("platforms", {}).items():
+        # Image config
+        if "image" in pcfg:
+            config["images"][platform] = pcfg["image"]
+
+        # Platform-level defaults inherited by jobs
+        defaults = {}
+
+        for key in ("image_tag", "docker_args", "volumes", "setup", "env"):
+            if key in pcfg:
+                defaults[key] = pcfg[key]
+
+        # Flatten jobs: {platform}_{job_name}
+        for job_name, job_cfg in pcfg.get("jobs", {}).items():
+            full_name = f"{platform}_{job_name}"
+            flat = {
+                "platform": platform,
+                "image": defaults.get("image_tag", "latest"),
+            }
+
+            # Apply platform defaults
+            for key in ("docker_args", "volumes", "setup", "env"):
+                if key in defaults:
+                    flat[key] = defaults[key]
+
+            # Job-level overrides
+            flat.update(job_cfg)
+
+            config["jobs"][full_name] = flat
+
+    return config
+
+
+def load_config(path):
+    """Load a YAML config file and normalize to flat format."""
+    with open(path, encoding="utf-8") as f:
+        raw = yaml.safe_load(f)
+
+    return normalize_config(raw)
+
+
+def get_git_commit(ref="HEAD", short=True):
+    """Get git commit SHA. Returns 'unknown' on failure."""
+    cmd = ["git", "rev-parse"]
+
+    if short:
+        cmd.append("--short")
+
+    cmd.append(ref)
+    result = subprocess.run(cmd, capture_output=True, text=True)
+
+    if result.returncode != 0:
+        return "unknown"
+
+    return result.stdout.strip()

From 8da3bc0643e26d82acc15c35c52c4cc525ab97fe Mon Sep 17 00:00:00 2001
From: zhangyue <zhangyue@qiyuanlab.com>
Date: Mon, 23 Mar 2026 06:03:23 +0000
Subject: [PATCH 5/5] docs: add multi-machine deployment guide for NVIDIA and
 Iluvatar platform

---
 .ci/README.md | 151 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 151 insertions(+)

diff --git a/.ci/README.md b/.ci/README.md
index 33841ca..4e826e8 100644
--- a/.ci/README.md
+++ b/.ci/README.md
@@ -241,3 +241,154 @@ Agent 自动检测 GPU 利用率和系统内存，动态决定并行度：
 - `success` / `failure` — job 执行完成
 
 Status context 格式：`ci/infiniops/{job_name}`
+
+---
+
+## 多机部署指南
+
+以 NVIDIA + Iluvatar 双平台为例，说明如何在两台机器上部署 Agent 并实现跨平台并行测试。
+
+### 前置条件（两台机器共同）
+
+```bash
+# 1. Python 3.10+ 和依赖
+pip install pyyaml
+
+# 2. Docker 已安装
+docker --version
+
+# 3. 克隆仓库
+git clone https://github.com/InfiniTensor/InfiniOps.git
+cd InfiniOps
+```
+
+### NVIDIA 机器配置
+
+```bash
+# 1. 安装 NVIDIA Container Toolkit
+#    参考: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html
+
+# 2. 验证 GPU 可见
+nvidia-smi
+
+# 3. 构建 CI 镜像
+python .ci/build.py --platform nvidia
+```
+
+### Iluvatar 机器配置
+
+```bash
+# 1. 确认 CoreX 运行时已安装
+ixsmi
+
+# 2. 确认基础镜像已导入（非公开镜像，需提前准备）
+docker images | grep corex    # 应有 corex:qs_pj20250825
+
+# 3. 构建 CI 镜像
+python .ci/build.py --platform iluvatar
+```
+
+### 启动 Agent 服务
+
+在各自机器上启动 Agent：
+
+```bash
+# NVIDIA 机器
+python .ci/agent.py serve --platform nvidia --port 8080
+
+# Iluvatar 机器
+python .ci/agent.py serve --platform iluvatar --port 8080
+```
+
+验证连通性：
+
+```bash
+curl http://<nvidia-ip>:8080/health
+curl http://<iluvatar-ip>:8080/health
+```
+
+### 配置远程 Agent 地址
+
+在触发端的 `config.yaml` 中添加 `agents` 段：
+
+```yaml
+agents:
+  nvidia:
+    url: http://<nvidia-ip>:8080
+  iluvatar:
+    url: http://<iluvatar-ip>:8080
+```
+
+### 触发跨平台测试
+
+```bash
+# 一键运行所有平台的 job
+python .ci/agent.py run --branch master
+
+# 预览模式（不实际执行）
+python .ci/agent.py run --branch master --dry-run --no-status
+
+# 只运行指定平台
+python .ci/agent.py run --branch master --platform nvidia
+```
+
+### 可选配置
+
+#### GitHub Status 上报
+
+两台机器均设置环境变量，各自上报所属平台的测试状态：
+
+```bash
+export GITHUB_TOKEN=ghp_xxxxxxxxxxxx
+```
+
+#### API Token 认证
+
+Agent 暴露在非可信网络时，建议启用 Token 认证：
+
+```bash
+# 启动 Agent 时指定 token
+python .ci/agent.py serve --platform nvidia --port 8080 --api-token <secret>
+
+# 或通过环境变量
+export API_TOKEN=<secret>
+```
+
+#### GitHub Webhook 自动触发
+
+在 GitHub repo → Settings → Webhooks 中为每台机器添加 Webhook：
+
+| 字段 | 值 |
+|---|---|
+| Payload URL | `http://<机器IP>:8080/webhook` |
+| Content type | `application/json` |
+| Secret | 与 `--webhook-secret` 一致 |
+| Events | `push` 和 `pull_request` |
+
+启动时配置 secret：
+
+```bash
+python .ci/agent.py serve --platform nvidia --port 8080 --webhook-secret <github-secret>
+
+# 或通过环境变量
+export WEBHOOK_SECRET=<github-secret>
+```
+
+### 验证清单
+
+```bash
+# 1. 各机器单独 dry-run
+python .ci/agent.py run --branch master --platform nvidia --dry-run --no-status
+python .ci/agent.py run --branch master --platform iluvatar --dry-run --no-status
+
+# 2. 健康检查
+curl http://<nvidia-ip>:8080/health
+curl http://<iluvatar-ip>:8080/health
+
+# 3. 查看资源状态
+curl http://<nvidia-ip>:8080/status
+curl http://<iluvatar-ip>:8080/status
+
+# 4. 跨平台一键测试
+python .ci/agent.py run --branch master
+```