From 641a90dfc0d03198ac00bfd3addf7d34a9f18c86 Mon Sep 17 00:00:00 2001 From: plusNew001 Date: Wed, 25 Mar 2026 11:41:12 +0000 Subject: [PATCH 01/11] fix xpu ci bug --- .github/workflows/_xpu_4cards_case_test.yml | 15 +++++++ .github/workflows/_xpu_8cards_case_test.yml | 15 +++++++ tests/xpu_ci/conftest.py | 50 +++++++++++++++++++++ 3 files changed, 80 insertions(+) diff --git a/.github/workflows/_xpu_4cards_case_test.yml b/.github/workflows/_xpu_4cards_case_test.yml index 2c4d6deb9e1..4927ee0c503 100644 --- a/.github/workflows/_xpu_4cards_case_test.yml +++ b/.github/workflows/_xpu_4cards_case_test.yml @@ -193,8 +193,14 @@ jobs: echo "============================开始运行pytest测试============================" export PYTHONPATH=/workspace/FastDeploy/ export PYTHONPATH=$(pwd)/tests/xpu_ci:$PYTHONPATH + mkdir -p case_logs + set +e python -m pytest -v -s --tb=short tests/xpu_ci/4cards_cases/ exit_code=$? + set -e + + # 修改case_logs权限,确保Docker外部的runner用户可以读取并上传 + chmod -R a+rX case_logs/ 2>/dev/null || true if [ $exit_code -eq 0 ]; then echo "============================4卡cases测试通过!============================" @@ -203,3 +209,12 @@ jobs: exit $exit_code fi ' + + - name: Upload case logs + if: always() + uses: actions/upload-artifact@v4 + with: + name: xpu-4cards-case-logs + path: FastDeploy/case_logs/ + retention-days: 7 + if-no-files-found: ignore diff --git a/.github/workflows/_xpu_8cards_case_test.yml b/.github/workflows/_xpu_8cards_case_test.yml index b4e5b85083f..10a33c09613 100644 --- a/.github/workflows/_xpu_8cards_case_test.yml +++ b/.github/workflows/_xpu_8cards_case_test.yml @@ -182,8 +182,14 @@ jobs: echo "============================开始运行pytest测试============================" export PYTHONPATH=/workspace/FastDeploy/ export PYTHONPATH=$(pwd)/tests/xpu_ci:$PYTHONPATH + mkdir -p case_logs + set +e python -m pytest -v -s --tb=short tests/xpu_ci/8cards_cases/ exit_code=$? + set -e + + # 修改case_logs权限,确保Docker外部的runner用户可以读取并上传 + chmod -R a+rX case_logs/ 2>/dev/null || true if [ $exit_code -eq 0 ]; then echo "============================8卡cases测试通过!============================" @@ -192,3 +198,12 @@ jobs: exit $exit_code fi ' + + - name: Upload case logs + if: always() + uses: actions/upload-artifact@v4 + with: + name: xpu-8cards-case-logs + path: FastDeploy/case_logs/ + retention-days: 7 + if-no-files-found: ignore diff --git a/tests/xpu_ci/conftest.py b/tests/xpu_ci/conftest.py index 62496bff16f..0b4ee09a46b 100644 --- a/tests/xpu_ci/conftest.py +++ b/tests/xpu_ci/conftest.py @@ -23,6 +23,7 @@ 4. 环境配置 - 设置XPU相关环境变量 """ +import glob import json import os import shutil @@ -32,6 +33,9 @@ import pytest +CASE_LOGS_DIR = os.path.join(os.getcwd(), "case_logs") + + def get_xpu_id(): """获取XPU_ID环境变量""" return int(os.getenv("XPU_ID", "0")) @@ -457,3 +461,49 @@ def setup_logprobs_zmq_env(): os.environ[key] = value print(f"设置环境变量: {key}={value}") return original_values + + +# ============ 日志归档 pytest hook ============ + + +def _archive_case_logs(test_name): + """ + 将当前工作目录下所有 log 开头的文件夹和 server.log 复制到 case_logs/{test_name}/ 下 + """ + dest_dir = os.path.join(CASE_LOGS_DIR, test_name) + os.makedirs(dest_dir, exist_ok=True) + + # 复制所有 log* 目录 + for entry in glob.glob("log*"): + if os.path.isdir(entry): + shutil.copytree(entry, os.path.join(dest_dir, entry), dirs_exist_ok=True) + elif os.path.isfile(entry): + # 处理 server.log 等 log 开头的文件 + shutil.copy2(entry, os.path.join(dest_dir, entry)) + + # 单独处理 server.log(不以 log 开头但也是关键日志) + if os.path.exists("server.log") and not os.path.exists(os.path.join(dest_dir, "server.log")): + shutil.copy2("server.log", os.path.join(dest_dir, "server.log")) + + print(f"\n日志已归档到: {dest_dir}") + # 列出归档内容 + for root, dirs, files in os.walk(dest_dir): + for f in files: + rel = os.path.relpath(os.path.join(root, f), dest_dir) + print(f" 归档文件: {rel}") + + +@pytest.hookimpl(hookwrapper=True, trylast=True) +def pytest_runtest_makereport(item, call): + """每个测试阶段结束后归档日志(仅在 call 阶段后执行)""" + outcome = yield + report = outcome.get_result() + + if report.when == "call": + # 使用测试文件名(不含 .py)作为归档目录名 + test_file = os.path.basename(item.fspath) + test_name = os.path.splitext(test_file)[0] + try: + _archive_case_logs(test_name) + except Exception as e: + print(f"\n警告: 日志归档失败: {e}") From 3fdcec05dfcf06894a0bd6dfe45a291725b81e2e Mon Sep 17 00:00:00 2001 From: Jiaxin Sui <95567040+plusNew001@users.noreply.github.com> Date: Wed, 25 Mar 2026 19:52:36 +0800 Subject: [PATCH 02/11] Remove unnecessary blank line in conftest.py --- tests/xpu_ci/conftest.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/xpu_ci/conftest.py b/tests/xpu_ci/conftest.py index 0b4ee09a46b..1466dab34cf 100644 --- a/tests/xpu_ci/conftest.py +++ b/tests/xpu_ci/conftest.py @@ -32,7 +32,6 @@ import pytest - CASE_LOGS_DIR = os.path.join(os.getcwd(), "case_logs") From 1c3c73ad3fabc3889938779cc48a6bb5b8025f28 Mon Sep 17 00:00:00 2001 From: Jiaxin Sui <95567040+plusNew001@users.noreply.github.com> Date: Thu, 26 Mar 2026 11:10:39 +0800 Subject: [PATCH 03/11] Update upload-artifact action to version 6 --- .github/workflows/_xpu_4cards_case_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/_xpu_4cards_case_test.yml b/.github/workflows/_xpu_4cards_case_test.yml index 4927ee0c503..e90e5f06193 100644 --- a/.github/workflows/_xpu_4cards_case_test.yml +++ b/.github/workflows/_xpu_4cards_case_test.yml @@ -212,7 +212,7 @@ jobs: - name: Upload case logs if: always() - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v6 with: name: xpu-4cards-case-logs path: FastDeploy/case_logs/ From cdbb8633f50b9887d4f6eee6c1654c2838e9e4c1 Mon Sep 17 00:00:00 2001 From: Jiaxin Sui <95567040+plusNew001@users.noreply.github.com> Date: Thu, 26 Mar 2026 11:10:55 +0800 Subject: [PATCH 04/11] Update _xpu_8cards_case_test.yml --- .github/workflows/_xpu_8cards_case_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/_xpu_8cards_case_test.yml b/.github/workflows/_xpu_8cards_case_test.yml index 10a33c09613..c9ed0fa2314 100644 --- a/.github/workflows/_xpu_8cards_case_test.yml +++ b/.github/workflows/_xpu_8cards_case_test.yml @@ -201,7 +201,7 @@ jobs: - name: Upload case logs if: always() - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v6 with: name: xpu-8cards-case-logs path: FastDeploy/case_logs/ From e1f921a483987f776f245b887cdc3efd57e96c1c Mon Sep 17 00:00:00 2001 From: plusNew001 Date: Thu, 26 Mar 2026 03:31:37 +0000 Subject: [PATCH 05/11] fix ci bug --- tests/xpu_ci/8cards_cases/test_pd_21b_ep4tp1.py | 2 +- tests/xpu_ci/8cards_cases/test_pd_21b_ep4tp4.py | 2 +- tests/xpu_ci/8cards_cases/test_pd_21b_ep4tp4_cudagraph.py | 2 +- tests/xpu_ci/8cards_cases/test_pd_p_tp4ep4_d_tp1ep4.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/xpu_ci/8cards_cases/test_pd_21b_ep4tp1.py b/tests/xpu_ci/8cards_cases/test_pd_21b_ep4tp1.py index d7d75090561..2429b8c1458 100644 --- a/tests/xpu_ci/8cards_cases/test_pd_21b_ep4tp1.py +++ b/tests/xpu_ci/8cards_cases/test_pd_21b_ep4tp1.py @@ -109,7 +109,7 @@ def print_pd_logs_on_failure(): log_dirs = ["log_router", "log_prefill", "log_decode"] for log_dir in log_dirs: - nohup_path = os.path.join(log_dir, "log_0/worklog.0") + nohup_path = os.path.join(log_dir, "log_0/workerlog.0") if os.path.exists(nohup_path): print(f"\n========== {nohup_path} ==========") with open(nohup_path, "r") as f: diff --git a/tests/xpu_ci/8cards_cases/test_pd_21b_ep4tp4.py b/tests/xpu_ci/8cards_cases/test_pd_21b_ep4tp4.py index 919a5c39942..c1f804e6466 100644 --- a/tests/xpu_ci/8cards_cases/test_pd_21b_ep4tp4.py +++ b/tests/xpu_ci/8cards_cases/test_pd_21b_ep4tp4.py @@ -109,7 +109,7 @@ def print_pd_logs_on_failure(): log_dirs = ["log_router", "log_prefill", "log_decode"] for log_dir in log_dirs: - nohup_path = os.path.join(log_dir, "log_0/worklog.0") + nohup_path = os.path.join(log_dir, "log_0/workerlog.0") if os.path.exists(nohup_path): print(f"\n========== {nohup_path} ==========") with open(nohup_path, "r") as f: diff --git a/tests/xpu_ci/8cards_cases/test_pd_21b_ep4tp4_cudagraph.py b/tests/xpu_ci/8cards_cases/test_pd_21b_ep4tp4_cudagraph.py index 14fcb358f8f..280870f0a55 100644 --- a/tests/xpu_ci/8cards_cases/test_pd_21b_ep4tp4_cudagraph.py +++ b/tests/xpu_ci/8cards_cases/test_pd_21b_ep4tp4_cudagraph.py @@ -109,7 +109,7 @@ def print_pd_logs_on_failure(): log_dirs = ["log_router", "log_prefill", "log_decode"] for log_dir in log_dirs: - nohup_path = os.path.join(log_dir, "log_0/worklog.0") + nohup_path = os.path.join(log_dir, "log_0/workerlog.0") if os.path.exists(nohup_path): print(f"\n========== {nohup_path} ==========") with open(nohup_path, "r") as f: diff --git a/tests/xpu_ci/8cards_cases/test_pd_p_tp4ep4_d_tp1ep4.py b/tests/xpu_ci/8cards_cases/test_pd_p_tp4ep4_d_tp1ep4.py index 2904749a10c..936bc8b6371 100644 --- a/tests/xpu_ci/8cards_cases/test_pd_p_tp4ep4_d_tp1ep4.py +++ b/tests/xpu_ci/8cards_cases/test_pd_p_tp4ep4_d_tp1ep4.py @@ -110,7 +110,7 @@ def print_pd_logs_on_failure(): log_dirs = ["log_router", "log_prefill", "log_decode"] for log_dir in log_dirs: - nohup_path = os.path.join(log_dir, "log_0/worklog.0") + nohup_path = os.path.join(log_dir, "log_0/workerlog.0") if os.path.exists(nohup_path): print(f"\n========== {nohup_path} ==========") with open(nohup_path, "r") as f: From d73a8a9a20cbc4abe18aafb8ebfd5d86a53045ff Mon Sep 17 00:00:00 2001 From: Jiaxin Sui <95567040+plusNew001@users.noreply.github.com> Date: Thu, 26 Mar 2026 14:32:49 +0800 Subject: [PATCH 06/11] Change exit code on test failure to 1 --- .github/workflows/_xpu_4cards_case_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/_xpu_4cards_case_test.yml b/.github/workflows/_xpu_4cards_case_test.yml index e90e5f06193..0b819040b5e 100644 --- a/.github/workflows/_xpu_4cards_case_test.yml +++ b/.github/workflows/_xpu_4cards_case_test.yml @@ -206,7 +206,7 @@ jobs: echo "============================4卡cases测试通过!============================" else echo "============================4卡cases测试失败,请检查日志!============================" - exit $exit_code + exit 1 fi ' From 20e47c2749fbb30ce997a49aec76eda473176e69 Mon Sep 17 00:00:00 2001 From: plusNew001 Date: Thu, 26 Mar 2026 07:12:52 +0000 Subject: [PATCH 07/11] fix ci bug --- tests/xpu_ci/conftest.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/tests/xpu_ci/conftest.py b/tests/xpu_ci/conftest.py index 1466dab34cf..162971d4510 100644 --- a/tests/xpu_ci/conftest.py +++ b/tests/xpu_ci/conftest.py @@ -484,12 +484,7 @@ def _archive_case_logs(test_name): if os.path.exists("server.log") and not os.path.exists(os.path.join(dest_dir, "server.log")): shutil.copy2("server.log", os.path.join(dest_dir, "server.log")) - print(f"\n日志已归档到: {dest_dir}") - # 列出归档内容 - for root, dirs, files in os.walk(dest_dir): - for f in files: - rel = os.path.relpath(os.path.join(root, f), dest_dir) - print(f" 归档文件: {rel}") + @pytest.hookimpl(hookwrapper=True, trylast=True) @@ -504,5 +499,5 @@ def pytest_runtest_makereport(item, call): test_name = os.path.splitext(test_file)[0] try: _archive_case_logs(test_name) - except Exception as e: - print(f"\n警告: 日志归档失败: {e}") + except Exception: + pass From b5e3f8fba1418a5c211f820bed85d3db3ffb03d1 Mon Sep 17 00:00:00 2001 From: plusNew001 Date: Thu, 26 Mar 2026 07:15:11 +0000 Subject: [PATCH 08/11] fix ci bug --- .github/workflows/_xpu_4cards_case_test.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/_xpu_4cards_case_test.yml b/.github/workflows/_xpu_4cards_case_test.yml index 0b819040b5e..e73217589a3 100644 --- a/.github/workflows/_xpu_4cards_case_test.yml +++ b/.github/workflows/_xpu_4cards_case_test.yml @@ -204,9 +204,10 @@ jobs: if [ $exit_code -eq 0 ]; then echo "============================4卡cases测试通过!============================" + exit $1 else echo "============================4卡cases测试失败,请检查日志!============================" - exit 1 + exit $exit_code fi ' From 5ef57d02d2156d4acdddf536db1492a5093995dc Mon Sep 17 00:00:00 2001 From: plusNew001 Date: Thu, 26 Mar 2026 08:45:04 +0000 Subject: [PATCH 09/11] fix ci bug --- .github/workflows/_xpu_4cards_case_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/_xpu_4cards_case_test.yml b/.github/workflows/_xpu_4cards_case_test.yml index e73217589a3..8a51cd6d11c 100644 --- a/.github/workflows/_xpu_4cards_case_test.yml +++ b/.github/workflows/_xpu_4cards_case_test.yml @@ -204,7 +204,7 @@ jobs: if [ $exit_code -eq 0 ]; then echo "============================4卡cases测试通过!============================" - exit $1 + exit 1 else echo "============================4卡cases测试失败,请检查日志!============================" exit $exit_code From b112f1a8a126f485b74dc097970efcaa83857a64 Mon Sep 17 00:00:00 2001 From: plusNew001 Date: Thu, 26 Mar 2026 10:46:08 +0000 Subject: [PATCH 10/11] fix ci bug --- .github/workflows/_xpu_4cards_case_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/_xpu_4cards_case_test.yml b/.github/workflows/_xpu_4cards_case_test.yml index 8a51cd6d11c..f3c97f40dc6 100644 --- a/.github/workflows/_xpu_4cards_case_test.yml +++ b/.github/workflows/_xpu_4cards_case_test.yml @@ -204,7 +204,7 @@ jobs: if [ $exit_code -eq 0 ]; then echo "============================4卡cases测试通过!============================" - exit 1 + exit $exit_code else echo "============================4卡cases测试失败,请检查日志!============================" exit $exit_code From b142c220dec38e95faffc21ff817c23955f0dcfe Mon Sep 17 00:00:00 2001 From: Jiaxin Sui <95567040+plusNew001@users.noreply.github.com> Date: Thu, 26 Mar 2026 20:57:22 +0800 Subject: [PATCH 11/11] Update conftest.py --- tests/xpu_ci/conftest.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/xpu_ci/conftest.py b/tests/xpu_ci/conftest.py index 162971d4510..ae0c95d727a 100644 --- a/tests/xpu_ci/conftest.py +++ b/tests/xpu_ci/conftest.py @@ -485,8 +485,6 @@ def _archive_case_logs(test_name): shutil.copy2("server.log", os.path.join(dest_dir, "server.log")) - - @pytest.hookimpl(hookwrapper=True, trylast=True) def pytest_runtest_makereport(item, call): """每个测试阶段结束后归档日志(仅在 call 阶段后执行)"""