From cfba529c359e4412e2210aa31e80f2ad14a5d297 Mon Sep 17 00:00:00 2001 From: Mengci Cai Date: Sat, 14 Mar 2026 13:08:09 +0800 Subject: [PATCH] feat: add copyright check functionality Added copyright check module that automatically verifies copyright and license headers in staged files during commit process. The system checks for copyright notices, validates copyright years, and ensures license declarations are present. For new files, it examines staged content while for modified files it checks working directory content. Users are prompted to confirm continuation if copyright issues are detected. The implementation includes file type detection, content extraction from git staging area, and comprehensive copyright pattern matching. Uses chrono crate for current year comparison to ensure copyright dates are up-to-date. Log: Added automated copyright checking during commit process Influence: 1. Test copyright check with files containing valid copyright headers 2. Verify detection of missing copyright notices in new files 3. Test copyright year validation with outdated years 4. Check license declaration detection for various license types 5. Verify user prompt behavior when copyright issues are found 6. Test skip functionality when using amend mode 7. Validate file type detection for different file extensions 8. Test content extraction from both staged and working directory files --- Cargo.lock | 123 ++++++++++++- Cargo.toml | 1 + src/commit.rs | 37 +++- src/copyright_check.rs | 409 +++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 1 + src/main.rs | 1 + 6 files changed, 566 insertions(+), 6 deletions(-) create mode 100644 src/copyright_check.rs diff --git a/Cargo.lock b/Cargo.lock index 87220a6..e2b82da 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -37,6 +37,15 @@ dependencies = [ "memchr", ] +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + [[package]] name = "anstream" version = "0.6.18" @@ -224,6 +233,19 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "chrono" +version = "0.4.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0" +dependencies = [ + "iana-time-zone", + "js-sys", + "num-traits", + "wasm-bindgen", + "windows-link 0.2.1", +] + [[package]] name = "clap" version = "4.5.39" @@ -599,6 +621,7 @@ dependencies = [ "assert_cmd", "async-trait", "base64 0.22.1", + "chrono", "clap", "clap_complete", "config", @@ -869,6 +892,30 @@ dependencies = [ "windows-registry", ] +[[package]] +name = "iana-time-zone" +version = "0.1.65" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "log", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + [[package]] name = "icu_collections" version = "2.0.0" @@ -1176,6 +1223,15 @@ dependencies = [ "minimal-lexical", ] +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + [[package]] name = "object" version = "0.36.7" @@ -2342,20 +2398,61 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows-core" +version = "0.62.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link 0.2.1", + "windows-result 0.4.1", + "windows-strings 0.5.1", +] + +[[package]] +name = "windows-implement" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-interface" +version = "0.59.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "windows-link" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76840935b766e1b0a05c0066835fb9ec80071d4c09a16f6bd5f7e655e3c14c38" +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + [[package]] name = "windows-registry" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4286ad90ddb45071efd1a66dfa43eb02dd0dfbae1545ad6cc3c51cf34d7e8ba3" dependencies = [ - "windows-result", - "windows-strings", + "windows-result 0.3.4", + "windows-strings 0.3.1", "windows-targets 0.53.0", ] @@ -2365,7 +2462,16 @@ version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6" dependencies = [ - "windows-link", + "windows-link 0.1.1", +] + +[[package]] +name = "windows-result" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" +dependencies = [ + "windows-link 0.2.1", ] [[package]] @@ -2374,7 +2480,16 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "87fa48cc5d406560701792be122a10132491cff9d0aeb23583cc2dcafc847319" dependencies = [ - "windows-link", + "windows-link 0.1.1", +] + +[[package]] +name = "windows-strings" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" +dependencies = [ + "windows-link 0.2.1", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 6ffac0e..a2e51ba 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,6 +18,7 @@ anyhow = "1.0" async-trait = "0.1" config = "0.13" regex = "1.10" +chrono = "0.4" textwrap = "0.16" clap = { version = "4.4", features = ["derive"] } clap_complete = { version = "4.5", features = ["unstable-dynamic"] } diff --git a/src/commit.rs b/src/commit.rs index 9e7c194..94a1ada 100644 --- a/src/commit.rs +++ b/src/commit.rs @@ -2,6 +2,8 @@ use regex::Regex; use crate::ai_service; use crate::config; use crate::git; +use crate::review; +use crate::copyright_check; /// 从提交消息中提取 Change-Id fn extract_change_id(message: &str) -> Option { @@ -718,9 +720,8 @@ impl CommitMessage { } } -use crate::review; use dialoguer::Confirm; -use log::{debug, info}; +use log::{debug, info, warn}; use std::process::Command; pub async fn generate_commit_message( @@ -786,6 +787,38 @@ pub async fn generate_commit_message( let config = config::Config::load()?; + // 执行版权检查(对于 amend 模式,我们跳过检查) + if !amend { + info!("正在进行版权检查..."); + + // 优先使用 AI 检查,如果失败则回退到硬编码检查 + let check_result = match copyright_check::check_copyright_with_ai(&config).await { + Ok(result) => { + info!("AI 版权检查完成"); + result + } + Err(e) => { + warn!("AI 版权检查失败,回退到硬编码检查: {}", e); + copyright_check::check_copyright()? + } + }; + + let formatted = copyright_check::format_copyright_result(&check_result); + println!("\n{}\n", formatted); + + // 如果有版权问题,询问用户是否继续 + if check_result.has_issues { + if !Confirm::with_theme(&dialoguer::theme::ColorfulTheme::default()) + .with_prompt("发现版权问题,是否继续提交?") + .default(false) + .interact()? + { + println!("已取消提交"); + return Ok(()); + } + } + } + // 在确认有差异内容后执行代码审查(对于 amend 模式,我们跳过审查,因为是对已有提交的修改) if !amend && !no_review && config.ai_review { info!("正在进行代码审查..."); diff --git a/src/copyright_check.rs b/src/copyright_check.rs new file mode 100644 index 0000000..5be2450 --- /dev/null +++ b/src/copyright_check.rs @@ -0,0 +1,409 @@ +use anyhow::Result; +use std::process::Command; +use log::{debug, info, warn}; +use chrono::Datelike; +use serde_json::Value; + +/// 文件信息结构 +#[derive(Debug, Clone)] +pub struct FileInfo { + pub path: String, + pub file_type: String, + pub content: String, + pub is_new: bool, +} + +/// 版权检查结果 +#[derive(Debug)] +pub struct CopyrightCheckResult { + pub has_issues: bool, + pub issues: Vec, +} + +/// 获取暂存区的文件信息 +pub fn get_staged_files() -> Result> { + // 获取暂存文件列表 + let files_output = Command::new("git") + .args(&["diff", "--cached", "--name-status"]) + .output()?; + + if !files_output.status.success() { + return Err(anyhow::anyhow!("获取暂存文件列表失败")); + } + + let files_status = String::from_utf8(files_output.stdout)?; + let mut files_info = Vec::new(); + + // 解析文件状态并获取文件内容 + for line in files_status.lines() { + let parts: Vec<&str> = line.split_whitespace().collect(); + if parts.len() >= 2 { + let status = parts[0]; + let path = parts[1]; + + // 判断是否为新文件 + let is_new = status == "A"; + + // 获取文件类型 + let file_type = get_file_type(path); + + // 获取文件内容(只获取文件头部,用于版权检查) + let content = get_file_content(path, is_new)?; + + files_info.push(FileInfo { + path: path.to_string(), + file_type, + content, + is_new, + }); + } + } + + Ok(files_info) +} + +/// 获取文件类型 +fn get_file_type(path: &str) -> String { + if let Some(ext) = path.rsplit('.').next() { + match ext.to_lowercase().as_str() { + "rs" => "Rust".to_string(), + "py" => "Python".to_string(), + "js" | "ts" => "JavaScript/TypeScript".to_string(), + "java" => "Java".to_string(), + "c" | "h" => "C".to_string(), + "cpp" | "hpp" | "cc" | "cxx" => "C++".to_string(), + "go" => "Go".to_string(), + "qml" => "QML".to_string(), + _ => format!("Unknown ({})", ext), + } + } else { + "Unknown".to_string() + } +} + +/// 判断文件是否需要版权检查(根据文件扩展名) +fn needs_copyright_check(file_path: &str) -> bool { + if let Some(ext) = file_path.rsplit('.').next() { + matches!(ext.to_lowercase().as_str(), + "rs" | // Rust + "py" | // Python + "js" | // JavaScript + "ts" | // TypeScript + "java" | // Java + "c" | // C + "h" | // C header + "cpp" | // C++ + "hpp" | // C++ header + "cc" | // C++ + "cxx" | // C++ + "go" | // Go + "qml" // QML + ) + } else { + false + } +} + +/// 获取文件内容(前50行) +fn get_file_content(path: &str, is_new: bool) -> Result { + // 对于新文件,获取暂存区的内容 + // 对于修改的文件,获取工作区的最新内容 + if is_new { + // 新文件:从暂存区获取 + let output = Command::new("git") + .args(&["show", &format!(":{}", path)]) + .output()?; + + if output.status.success() { + let content = String::from_utf8(output.stdout)?; + // 只返回前 50 行,用于版权检查 + let lines: Vec<&str> = content.lines().take(50).collect(); + Ok(lines.join("\n")) + } else { + // 如果获取失败,尝试读取工作区文件 + match std::fs::read_to_string(path) { + Ok(content) => { + let lines: Vec<&str> = content.lines().take(50).collect(); + Ok(lines.join("\n")) + } + Err(_) => Ok(String::new()), + } + } + } else { + // 修改的文件:直接读取工作区的最新内容 + match std::fs::read_to_string(path) { + Ok(content) => { + let lines: Vec<&str> = content.lines().take(50).collect(); + Ok(lines.join("\n")) + } + Err(_) => { + // 如果工作区文件不存在,尝试从暂存区获取 + let output = Command::new("git") + .args(&["show", &format!(":{}", path)]) + .output()?; + + if output.status.success() { + let content = String::from_utf8(output.stdout)?; + let lines: Vec<&str> = content.lines().take(50).collect(); + Ok(lines.join("\n")) + } else { + Ok(String::new()) + } + } + } + } +} + +/// 检查单个文件的版权 +fn check_file_copyright(file: &FileInfo, current_year: i32) -> Vec { + let mut issues = Vec::new(); + let content = &file.content; + + // 检查是否包含版权声明 + let has_copyright = content.contains("Copyright") || + content.contains("copyright") || + content.contains("版权"); + + if !has_copyright { + issues.push(format!("文件 {} 缺少版权声明", file.path)); + } + + // 检查版权年份 + // 支持两种格式:单个年份(2023)和范围格式(2023 - 2026) + let year_pattern = regex::Regex::new(r"(?:Copyright|copyright|版权).*?(\d{4})(?:\s*-\s*(\d{4}))?").unwrap(); + if let Some(caps) = year_pattern.captures(content) { + if let Some(year_str) = caps.get(1) { + if let Ok(start_year) = year_str.as_str().parse::() { + // 检查是否有结束年份(范围格式) + let end_year = if let Some(end_year_str) = caps.get(2) { + end_year_str.as_str().parse::().ok() + } else { + None + }; + + // 如果是范围格式,检查结束年份;否则检查单个年份 + let year_to_check = end_year.unwrap_or(start_year); + + if year_to_check < current_year { + issues.push(format!( + "文件 {} 的版权年份 {} 可能需要更新到 {}", + file.path, year_to_check, current_year + )); + } + } + } + } + + // 检查许可证声明 + let has_license = content.contains("License") || + content.contains("license") || + content.contains("MIT") || + content.contains("GPL") || + content.contains("Apache") || + content.contains("BSD"); + + if !has_license { + issues.push(format!("文件 {} 缺少许可证声明", file.path)); + } + + issues +} + +/// 检查所有暂存文件的版权 +pub fn check_copyright() -> Result { + info!("开始检查暂存文件的版权信息..."); + + let files = get_staged_files()?; + let mut all_issues = Vec::new(); + + // 获取当前年份 + let current_year = chrono::Local::now().year(); + + for file in &files { + debug!("检查文件: {}", file.path); + let issues = check_file_copyright(file, current_year); + all_issues.extend(issues); + } + + let has_issues = !all_issues.is_empty(); + + if has_issues { + warn!("发现 {} 个版权相关问题", all_issues.len()); + for issue in &all_issues { + warn!(" - {}", issue); + } + } else { + info!("所有文件的版权检查通过"); + } + + Ok(CopyrightCheckResult { + has_issues, + issues: all_issues, + }) +} + +/// 清理 AI 响应,移除 markdown 代码块标记 +fn clean_ai_response(response: &str) -> String { + let response = response.trim(); + + // 移除 ```json 或 ``` 等代码块标记 + if response.starts_with("```") { + let lines: Vec<&str> = response.lines().collect(); + if lines.len() > 2 { + // 移除第一行和最后一行的代码块标记 + return lines[1..lines.len()-1].join("\n"); + } + } + + response.to_string() +} + +/// 使用 AI 检查版权信息 +pub async fn check_copyright_with_ai(config: &crate::config::Config) -> Result { + info!("开始使用 AI 检查暂存文件的版权信息..."); + + let files = get_staged_files()?; + let mut all_issues = Vec::new(); + + // 获取当前年份 + let current_year = chrono::Local::now().year(); + + // 创建 AI 服务 + let translator = crate::ai_service::create_translator(config).await?; + + for file in &files { + // 跳过不需要版权检查的文件类型 + if !needs_copyright_check(&file.path) { + debug!("跳过不需要版权检查的文件: {} (类型: {})", file.path, file.file_type); + continue; + } + + debug!("使用 AI 检查文件: {} (类型: {})", file.path, file.file_type); + + // 构建 AI prompt + let prompt = build_copyright_check_prompt(&file.content, &file.file_type, current_year); + + // 调用 AI + match translator.chat("你是一个版权声明审核助手。", &prompt).await { + Ok(ai_response) => { + debug!("AI 响应: {}", ai_response); + + // 清理 AI 响应,移除 markdown 代码块标记 + let cleaned_response = clean_ai_response(&ai_response); + + // 解析 AI 返回的 JSON + if let Ok(json) = serde_json::from_str::(&cleaned_response) { + // 检查是否有版权声明 + if let Some(has_copyright) = json.get("has_copyright").and_then(|v| v.as_bool()) { + if !has_copyright { + all_issues.push(format!("文件 {} 缺少版权声明", file.path)); + } + } + + // 检查是否有许可证声明 + if let Some(has_license) = json.get("has_license").and_then(|v| v.as_bool()) { + if !has_license { + all_issues.push(format!("文件 {} 缺少许可证声明", file.path)); + } + } + + // 检查年份是否需要更新 + if let Some(year_needs_update) = json.get("year_needs_update").and_then(|v| v.as_bool()) { + if year_needs_update { + if let Some(current_year_str) = json.get("current_year").and_then(|v| v.as_str()) { + all_issues.push(format!( + "文件 {} 的版权年份可能需要更新到 {}", + file.path, current_year_str + )); + } + } + } + + // 添加 AI 识别的其他问题 + if let Some(issues) = json.get("issues").and_then(|v| v.as_array()) { + for issue in issues { + if let Some(issue_str) = issue.as_str() { + all_issues.push(format!("文件 {} - {}", file.path, issue_str)); + } + } + } + } else { + // JSON 解析失败,回退到硬编码检查 + warn!("AI 返回格式错误,回退到硬编码检查: {}", file.path); + let issues = check_file_copyright(file, current_year); + all_issues.extend(issues); + } + } + Err(e) => { + // AI 调用失败,回退到硬编码检查 + warn!("AI 检查失败,回退到硬编码检查: {} - {}", file.path, e); + let issues = check_file_copyright(file, current_year); + all_issues.extend(issues); + } + } + } + + let has_issues = !all_issues.is_empty(); + + if has_issues { + warn!("发现 {} 个版权相关问题", all_issues.len()); + for issue in &all_issues { + warn!(" - {}", issue); + } + } else { + info!("所有文件的版权检查通过"); + } + + Ok(CopyrightCheckResult { + has_issues, + issues: all_issues, + }) +} + +/// 构建版权检查的 AI prompt +fn build_copyright_check_prompt(file_content: &str, file_type: &str, current_year: i32) -> String { + format!( + r#"请检查以下文件内容中的版权声明。 + +文件类型:{} +当前年份:{} + +请检查以下内容: +1. 是否包含版权声明(Copyright、copyright、版权等关键词) +2. 版权年份是否需要更新到当前年份 {} +3. 是否包含许可证声明(License、MIT、GPL、Apache、BSD 等) + +文件内容(前50行): +{} + +请以 JSON 格式返回检查结果,格式如下: +{{ + "has_copyright": true/false, + "has_license": true/false, + "year_needs_update": true/false, + "current_year": "当前年份", + "issues": ["其他问题的描述列表"] +}} + +注意: +- 只返回 JSON,不要包含其他文字说明 +- 如果版权年份小于当前年份,year_needs_update 应为 true +- issues 数组中可以包含其他需要提醒的问题 +- 这是 {} 代码文件,请根据该语言的特点检查版权声明"#, + file_type, current_year, current_year, file_content, file_type + ) +} + +/// 格式化版权检查结果为可读的字符串 +pub fn format_copyright_result(result: &CopyrightCheckResult) -> String { + if !result.has_issues { + return "✓ 版权检查通过".to_string(); + } + + let mut output = String::new(); + output.push_str("✗ 版权检查发现问题:\n"); + for issue in &result.issues { + output.push_str(&format!(" - {}\n", issue)); + } + output +} diff --git a/src/lib.rs b/src/lib.rs index b925d3b..32ef8b1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,6 +2,7 @@ pub mod ai_service; pub mod auth; pub mod commit; pub mod config; +pub mod copyright_check; pub mod debug; pub mod git; pub mod github; diff --git a/src/main.rs b/src/main.rs index a957999..58edbcd 100644 --- a/src/main.rs +++ b/src/main.rs @@ -15,6 +15,7 @@ mod install; mod commit; mod review; mod ai_service; +mod copyright_check; #[derive(Parser)] #[command(name = "git-commit-helper")]