From 800b47ce750d39ba29026d21df37bda3708d2979 Mon Sep 17 00:00:00 2001 From: brijeshp56 Date: Thu, 26 Mar 2026 12:03:40 +0530 Subject: [PATCH 01/12] PDP-981 : Update copyrightcheck.py for multiline exclude list Updated code to support both single line and multi line exclude list. --- scripts/copyrightcheck.py | 60 +++++++++++++++++++++++++-------------- 1 file changed, 38 insertions(+), 22 deletions(-) diff --git a/scripts/copyrightcheck.py b/scripts/copyrightcheck.py index 85c1cbc..d4b7dfc 100644 --- a/scripts/copyrightcheck.py +++ b/scripts/copyrightcheck.py @@ -54,33 +54,49 @@ def _load_config(self, config_file: str) -> Dict[str, Any]: # Reset file pointer to beginning f.seek(0) + current_multiline_key = None for line_num, line in enumerate(f, 1): line = line.strip() - # Skip empty lines and comments - if not line or line.startswith('#'): + # Empty line ends any active multi-line block + if not line: + current_multiline_key = None continue - # Parse key:value pairs + # Skip comments + if line.startswith('#'): + continue + + # Detect key:value pairs — key must be a simple word (no path chars) if ':' in line: - key, value = line.split(':', 1) - key = key.strip().lower() - value = value.strip() - - if key == 'startyear': - try: - config['startyear'] = int(value) - except ValueError: - print(f"Error: Invalid start year '{value}'. Must be a valid integer.") - sys.exit(1) - - elif key == 'filesexcluded': - # Parse comma-separated list or single file - if value: - files = [f.strip() for f in value.split(',')] - config['filesexcluded'] = [f for f in files if f] - else: - config['filesexcluded'] = [] + key_part, value_part = line.split(':', 1) + key_candidate = key_part.strip().lower() + if re.match(r'^[a-z][a-z0-9]*$', key_candidate): + current_multiline_key = None + key = key_candidate + value = value_part.strip() + + if key == 'startyear': + try: + config['startyear'] = int(value) + except ValueError: + print(f"Error: Invalid start year '{value}'. Must be a valid integer.") + sys.exit(1) + + elif key == 'filesexcluded': + if value: + # Single-line: comma-separated or single entry + files = [f.strip() for f in value.split(',')] + config['filesexcluded'] = [f for f in files if f] + else: + # Multi-line: collect subsequent lines as entries + config['filesexcluded'] = [] + current_multiline_key = 'filesexcluded' + continue + + # Continuation line for an active multi-line key + if current_multiline_key == 'filesexcluded': + config['filesexcluded'].append(line) print("✅ Parsed configuration:") for key, value in config.items(): @@ -448,4 +464,4 @@ def main(): if __name__ == '__main__': - main() \ No newline at end of file + main() From 2b49a642bea893b22c069cc113f7e2f330ac3f85 Mon Sep 17 00:00:00 2001 From: brijeshp56 Date: Thu, 26 Mar 2026 12:23:46 +0530 Subject: [PATCH 02/12] PDP-981 : Update copyrightcheck.py to have exclude file in multiple lines Update copyrightcheck.py to have exclude file in multiple lines --- scripts/copyrightcheck.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/copyrightcheck.py b/scripts/copyrightcheck.py index d4b7dfc..5d754f8 100644 --- a/scripts/copyrightcheck.py +++ b/scripts/copyrightcheck.py @@ -95,8 +95,10 @@ def _load_config(self, config_file: str) -> Dict[str, Any]: continue # Continuation line for an active multi-line key + # Each line may contain one or more comma-separated entries if current_multiline_key == 'filesexcluded': - config['filesexcluded'].append(line) + entries = [e.strip() for e in line.split(',')] + config['filesexcluded'].extend([e for e in entries if e]) print("✅ Parsed configuration:") for key, value in config.items(): From 2da58a3da1715fd68c345016721acdb316add6da Mon Sep 17 00:00:00 2001 From: brijeshp56 Date: Thu, 26 Mar 2026 16:21:57 +0530 Subject: [PATCH 03/12] PDP-981 : Update scripts/copyrightcheck.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- scripts/copyrightcheck.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scripts/copyrightcheck.py b/scripts/copyrightcheck.py index 5d754f8..b8b507c 100644 --- a/scripts/copyrightcheck.py +++ b/scripts/copyrightcheck.py @@ -58,9 +58,8 @@ def _load_config(self, config_file: str) -> Dict[str, Any]: for line_num, line in enumerate(f, 1): line = line.strip() - # Empty line ends any active multi-line block + # Empty line: skip but keep any active multi-line block if not line: - current_multiline_key = None continue # Skip comments From c2492b77842c1c63f3b7551a443e8c394d8ae60c Mon Sep 17 00:00:00 2001 From: brijeshp56 Date: Thu, 26 Mar 2026 19:54:41 +0530 Subject: [PATCH 04/12] PDP-981 : Update copyrightcheck.py Update copyrightcheck.py --- scripts/copyrightcheck.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/copyrightcheck.py b/scripts/copyrightcheck.py index b8b507c..5d754f8 100644 --- a/scripts/copyrightcheck.py +++ b/scripts/copyrightcheck.py @@ -58,8 +58,9 @@ def _load_config(self, config_file: str) -> Dict[str, Any]: for line_num, line in enumerate(f, 1): line = line.strip() - # Empty line: skip but keep any active multi-line block + # Empty line ends any active multi-line block if not line: + current_multiline_key = None continue # Skip comments From 62b07d75de6e6247434cdb1b58d5639ea1f5b221 Mon Sep 17 00:00:00 2001 From: brijeshp56 Date: Thu, 26 Mar 2026 21:06:18 +0530 Subject: [PATCH 05/12] PDP-981 : Update copyrightcheck.py Update copyrightcheck.py --- scripts/copyrightcheck.py | 243 +++++--------------------------------- 1 file changed, 32 insertions(+), 211 deletions(-) diff --git a/scripts/copyrightcheck.py b/scripts/copyrightcheck.py index 5d754f8..0d31dc4 100644 --- a/scripts/copyrightcheck.py +++ b/scripts/copyrightcheck.py @@ -38,7 +38,25 @@ def __init__(self, config_file: str): self.excluded_files = set(excluded_files_list) def _load_config(self, config_file: str) -> Dict[str, Any]: - """Load configuration from plain text file.""" + """Load configuration from plain text file. + + Supports both single-line and multiline filesexcluded values: + + Single-line: + filesexcluded: README.MD,.github/* + + Mixed (inline value + continuation lines): + filesexcluded: README.MD + .github/* + src/scripts/brijeshtest.py + + Multiline only (empty inline value): + filesexcluded: + .github/* + src/scripts/brijeshtest.py + + Continuation lines are collected until an empty line or a new key: is found. + """ config = {} print(f"📋 Loading copyright config from: {config_file}") @@ -84,14 +102,13 @@ def _load_config(self, config_file: str) -> Dict[str, Any]: sys.exit(1) elif key == 'filesexcluded': - if value: - # Single-line: comma-separated or single entry - files = [f.strip() for f in value.split(',')] - config['filesexcluded'] = [f for f in files if f] - else: - # Multi-line: collect subsequent lines as entries - config['filesexcluded'] = [] - current_multiline_key = 'filesexcluded' + # Always initialise the list and activate multiline mode. + # This supports: + # - empty inline value → purely multiline + # - non-empty inline value → inline entries + optional continuation lines + files = [f.strip() for f in value.split(',') if f.strip()] if value else [] + config['filesexcluded'] = files + current_multiline_key = 'filesexcluded' continue # Continuation line for an active multi-line key @@ -124,7 +141,7 @@ def _is_excluded(self, relative_path: str) -> bool: # Always exclude dotfiles (files starting with .) filename = os.path.basename(relative_path) - if filename.startswith('.'): + if filename.startswith('.'): print(f"🚫 Excluding dotfile: {relative_path}") return True @@ -145,7 +162,7 @@ def _is_excluded(self, relative_path: str) -> bool: print(f"✅ Including: {relative_path}") return False - + def _get_expected_copyright(self) -> str: """Generate expected copyright header.""" @@ -265,205 +282,9 @@ def validate_files(self, file_paths: List[str], relative_paths: List[str] = None 'error': None, 'found_copyright': None }) - continue - - # Use absolute path for file operations - result = self.validate_file(file_path) - result['relative_path'] = relative_path - results.append(result) + else: + result = self.validate_file(file_path) + result['relative_path'] = relative_path + results.append(result) return results - - def print_results(self, results: List[Dict[str, Any]], verbose: bool = False): - """Print validation results.""" - MARKER_START = "<<>>" - MARKER_END = "<<>>" - total_files = len(results) - valid_files = sum(1 for r in results if r['valid'] and not r['excluded']) - excluded_files = sum(1 for r in results if r['excluded']) - invalid_files = sum(1 for r in results if not r['valid'] and not r['excluded']) - - LIST_LIMIT = 200 # safety cap - - print(MARKER_START) - ts = datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S UTC') - commit_sha = os.environ.get('COPYRIGHT_CHECK_COMMIT_SHA') - print("**Copyright Validation Results**") - counts_line = f"Total: {total_files} | Passed: {valid_files} | Failed: {invalid_files} | Skipped: {excluded_files}" - if commit_sha: - counts_line += f" | at: {ts} | commit: {commit_sha[:12]}" - else: - counts_line += f" | at: {ts}" - print(counts_line) - print() - - has_invalid = invalid_files > 0 - if has_invalid: - print("### ❌ Failed Files") - for result in results: - if result['valid'] or result['excluded']: - continue - # Prefer relative path for display - display_path = result.get('relative_path') or result['file'] - print(f"- {display_path}") - print() # blank line for visual spacing before error details - err_msg = result.get('error') or 'Invalid header' - # Error label small + bold - print(" Error:") - print(" ```diff") - print(f" - {err_msg}") - print(" ```") - expected_line = result['expected_copyright'] - # Expected header label small + bold - print(" Expected header:") - print(" ```") - print(f" {expected_line}") - print(" ```") - print() - - excluded_list = [r for r in results if r['excluded']] - if excluded_list: - print("### ⏭️ Skipped (Excluded) Files") - for r in excluded_list[:LIST_LIMIT]: - display_path = r.get('relative_path') or r['file'] - print(f"- {display_path}") - if len(excluded_list) > LIST_LIMIT: - print(f"- … ({len(excluded_list) - LIST_LIMIT} more omitted)") - print() - - valid_list = [r for r in results if r['valid'] and not r['excluded']] - if valid_list: - print("### ✅ Valid Files") - for r in valid_list[:LIST_LIMIT]: - display_path = r.get('relative_path') or r['file'] - print(f"- {display_path}") - if len(valid_list) > LIST_LIMIT: - print(f"- … ({len(valid_list) - LIST_LIMIT} more omitted)") - print() - - # Moved Guidance section here (after all file lists, before success/timestamp) - if has_invalid: - print("### 🛠️ Guidance") - print("Follow these steps to fix the failed files:") - print("1. Insert the expected header at the very top (within first 20 lines) of each failed file.") - print("2. Ensure the year range matches the configuration (start year through current year).") - print("3. Do not alter spacing or punctuation in the header line.") - print("4. Commit and push the changes to update this check.") - print() - - if not has_invalid: - print("✅ All files have valid copyright headers!\n") - - print(MARKER_END) - - -def main(): - """Main function.""" - parser = argparse.ArgumentParser( - description="Validate copyright headers in source files", - formatter_class=argparse.RawDescriptionHelpFormatter, - epilog=""" -Examples: - python copyrightcheck.py -c config.yml file1.py file2.js - python copyrightcheck.py -c config.yml --files-from-stdin - echo "file1.py\nfile2.js" | python copyrightcheck.py -c config.yml --files-from-stdin - """ - ) - - parser.add_argument( - '-c', '--config', - required=True, - help='Path to copyright configuration file' - ) - - parser.add_argument( - '-w', '--working-dir', - help='Working directory for resolving relative file paths (default: current directory)' - ) - - parser.add_argument( - 'files', - nargs='*', - help='Files to check for copyright headers (relative to working-dir if specified)' - ) - - parser.add_argument( - '--files-from-stdin', - action='store_true', - help='Read file paths from standard input (one per line)' - ) - - parser.add_argument( - '-v', '--verbose', - action='store_true', - help='Show detailed output including valid and excluded files' - ) - - parser.add_argument( - '--origins-file', - help='Optional file containing origin metadata for each file (ignored by validator)', - required=False - ) - - args = parser.parse_args() - - # Get file paths - file_paths = [] - - if args.files_from_stdin: - # Read file paths from stdin - for line in sys.stdin: - file_path = line.strip() - if file_path: - file_paths.append(file_path) - else: - file_paths = args.files - - if not file_paths: - print("Error: No files specified. Use positional arguments or --files-from-stdin.") - sys.exit(1) - - # Initialize validator - validator = CopyrightValidator(args.config) - - # Set working directory if specified - working_dir = args.working_dir or os.getcwd() - if args.working_dir: - print(f"📂 Working directory: {working_dir}") - - # Convert file paths to absolute paths for file operations - # but keep relative paths for exclusion checking - absolute_file_paths = [] - relative_file_paths = [] - - for file_path in file_paths: - if os.path.isabs(file_path): - # Already absolute - convert to relative for exclusion checking - try: - relative_path = os.path.relpath(file_path, working_dir) - absolute_file_paths.append(file_path) - relative_file_paths.append(relative_path) - except ValueError: - # If relpath fails, use as-is - absolute_file_paths.append(file_path) - relative_file_paths.append(file_path) - else: - # Relative path - resolve to absolute for file operations - absolute_path = os.path.join(working_dir, file_path) - absolute_file_paths.append(absolute_path) - relative_file_paths.append(file_path) - - # Validate files using absolute paths for file ops, relative for exclusion - results = validator.validate_files(absolute_file_paths, relative_file_paths) - - # Print results - validator.print_results(results, verbose=args.verbose) - - # Exit with error code if any files are invalid - invalid_count = sum(1 for r in results if not r['valid'] and not r['excluded']) - if invalid_count > 0: - sys.exit(1) - - -if __name__ == '__main__': - main() From 7841d83899bedbc87cae0365e4da41451acb0b67 Mon Sep 17 00:00:00 2001 From: brijeshp56 Date: Thu, 26 Mar 2026 21:28:52 +0530 Subject: [PATCH 06/12] Update copyrightcheck.py with new copyright validation script --- scripts/copyrightcheck.py | 208 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 202 insertions(+), 6 deletions(-) diff --git a/scripts/copyrightcheck.py b/scripts/copyrightcheck.py index 0d31dc4..b1621de 100644 --- a/scripts/copyrightcheck.py +++ b/scripts/copyrightcheck.py @@ -141,7 +141,7 @@ def _is_excluded(self, relative_path: str) -> bool: # Always exclude dotfiles (files starting with .) filename = os.path.basename(relative_path) - if filename.startswith('.'): + if filename.startswith('.'): print(f"🚫 Excluding dotfile: {relative_path}") return True @@ -162,7 +162,7 @@ def _is_excluded(self, relative_path: str) -> bool: print(f"✅ Including: {relative_path}") return False - + def _get_expected_copyright(self) -> str: """Generate expected copyright header.""" @@ -282,9 +282,205 @@ def validate_files(self, file_paths: List[str], relative_paths: List[str] = None 'error': None, 'found_copyright': None }) - else: - result = self.validate_file(file_path) - result['relative_path'] = relative_path - results.append(result) + continue + + # Use absolute path for file operations + result = self.validate_file(file_path) + result['relative_path'] = relative_path + results.append(result) return results + + def print_results(self, results: List[Dict[str, Any]], verbose: bool = False): + """Print validation results.""" + MARKER_START = "<<>>" + MARKER_END = "<<>>" + total_files = len(results) + valid_files = sum(1 for r in results if r['valid'] and not r['excluded']) + excluded_files = sum(1 for r in results if r['excluded']) + invalid_files = sum(1 for r in results if not r['valid'] and not r['excluded']) + + LIST_LIMIT = 200 # safety cap + + print(MARKER_START) + ts = datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S UTC') + commit_sha = os.environ.get('COPYRIGHT_CHECK_COMMIT_SHA') + print("**Copyright Validation Results**") + counts_line = f"Total: {total_files} | Passed: {valid_files} | Failed: {invalid_files} | Skipped: {excluded_files}" + if commit_sha: + counts_line += f" | at: {ts} | commit: {commit_sha[:12]}" + else: + counts_line += f" | at: {ts}" + print(counts_line) + print() + + has_invalid = invalid_files > 0 + if has_invalid: + print("### ❌ Failed Files") + for result in results: + if result['valid'] or result['excluded']: + continue + # Prefer relative path for display + display_path = result.get('relative_path') or result['file'] + print(f"- {display_path}") + print() # blank line for visual spacing before error details + err_msg = result.get('error') or 'Invalid header' + # Error label small + bold + print(" Error:") + print(" ```diff") + print(f" - {err_msg}") + print(" ```") + expected_line = result['expected_copyright'] + # Expected header label small + bold + print(" Expected header:") + print(" ```") + print(f" {expected_line}") + print(" ```") + print() + + excluded_list = [r for r in results if r['excluded']] + if excluded_list: + print("### ⏭️ Skipped (Excluded) Files") + for r in excluded_list[:LIST_LIMIT]: + display_path = r.get('relative_path') or r['file'] + print(f"- {display_path}") + if len(excluded_list) > LIST_LIMIT: + print(f"- … ({len(excluded_list) - LIST_LIMIT} more omitted)") + print() + + valid_list = [r for r in results if r['valid'] and not r['excluded']] + if valid_list: + print("### ✅ Valid Files") + for r in valid_list[:LIST_LIMIT]: + display_path = r.get('relative_path') or r['file'] + print(f"- {display_path}") + if len(valid_list) > LIST_LIMIT: + print(f"- … ({len(valid_list) - LIST_LIMIT} more omitted)") + print() + + # Moved Guidance section here (after all file lists, before success/timestamp) + if has_invalid: + print("### 🛠️ Guidance") + print("Follow these steps to fix the failed files:") + print("1. Insert the expected header at the very top (within first 20 lines) of each failed file.") + print("2. Ensure the year range matches the configuration (start year through current year).") + print("3. Do not alter spacing or punctuation in the header line.") + print("4. Commit and push the changes to update this check.") + print() + + if not has_invalid: + print("✅ All files have valid copyright headers!\n") + + print(MARKER_END) + + +def main(): + """Main function.""" + parser = argparse.ArgumentParser( + description="Validate copyright headers in source files", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + python copyrightcheck.py -c config.yml file1.py file2.js + python copyrightcheck.py -c config.yml --files-from-stdin + echo "file1.py\nfile2.js" | python copyrightcheck.py -c config.yml --files-from-stdin + """ + ) + + parser.add_argument( + '-c', '--config', + required=True, + help='Path to copyright configuration file' + ) + + parser.add_argument( + '-w', '--working-dir', + help='Working directory for resolving relative file paths (default: current directory)' + ) + + parser.add_argument( + 'files', + nargs='*', + help='Files to check for copyright headers (relative to working-dir if specified)' + ) + + parser.add_argument( + '--files-from-stdin', + action='store_true', + help='Read file paths from standard input (one per line)' + ) + + parser.add_argument( + '-v', '--verbose', + action='store_true', + help='Show detailed output including valid and excluded files' + ) + + parser.add_argument( + '--origins-file', + help='Optional file containing origin metadata for each file (ignored by validator)', + required=False + ) + + args = parser.parse_args() + + # Get file paths + file_paths = [] + + if args.files_from_stdin: + # Read file paths from stdin + for line in sys.stdin: + file_path = line.strip() + if file_path: + file_paths.append(file_path) + else: + file_paths = args.files + + if not file_paths: + print("Error: No files specified. Use positional arguments or --files-from-stdin.") + sys.exit(1) + + # Initialize validator + validator = CopyrightValidator(args.config) + + # Set working directory if specified + working_dir = args.working_dir or os.getcwd() + if args.working_dir: + print(f"📂 Working directory: {working_dir}") + + # Convert file paths to absolute paths for file operations + # but keep relative paths for exclusion checking + absolute_file_paths = [] + relative_file_paths = [] + + for file_path in file_paths: + if os.path.isabs(file_path): + # Already absolute - convert to relative for exclusion checking + try: + relative_path = os.path.relpath(file_path, working_dir) + absolute_file_paths.append(file_path) + relative_file_paths.append(relative_path) + except ValueError: + # If relpath fails, use as-is + absolute_file_paths.append(file_path) + relative_file_paths.append(file_path) + else: + # Relative path - resolve to absolute for file operations + absolute_path = os.path.join(working_dir, file_path) + absolute_file_paths.append(absolute_path) + relative_file_paths.append(file_path) + + # Validate files using absolute paths for file ops, relative for exclusion + results = validator.validate_files(absolute_file_paths, relative_file_paths) + + # Print results + validator.print_results(results, verbose=args.verbose) + + # Exit with error code if any files are invalid + invalid_count = sum(1 for r in results if not r['valid'] and not r['excluded']) + if invalid_count > 0: + sys.exit(1) + + +if __name__ == '__main__': + main() \ No newline at end of file From f36a10e84435e8b08d997c9dc2d733cec51ac7b3 Mon Sep 17 00:00:00 2001 From: brijeshp56 Date: Thu, 26 Mar 2026 21:34:36 +0530 Subject: [PATCH 07/12] Update scripts/copyrightcheck.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- scripts/copyrightcheck.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/copyrightcheck.py b/scripts/copyrightcheck.py index b1621de..261be9c 100644 --- a/scripts/copyrightcheck.py +++ b/scripts/copyrightcheck.py @@ -48,12 +48,12 @@ def _load_config(self, config_file: str) -> Dict[str, Any]: Mixed (inline value + continuation lines): filesexcluded: README.MD .github/* - src/scripts/brijeshtest.py + src/scripts/example.py Multiline only (empty inline value): filesexcluded: .github/* - src/scripts/brijeshtest.py + src/scripts/example.py Continuation lines are collected until an empty line or a new key: is found. """ From 5738e21cd1f40d5a282c2f60efe0ce3e5ab97383 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 26 Mar 2026 16:05:13 +0000 Subject: [PATCH 08/12] Initial plan From e1189a46579ddfb77a30b3e0ef1c7bce61bd2f27 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 26 Mar 2026 16:05:28 +0000 Subject: [PATCH 09/12] Initial plan From d5734a16c5476c899c4015d650bb688edd6a8eb1 Mon Sep 17 00:00:00 2001 From: brijeshp56 Date: Thu, 26 Mar 2026 21:35:29 +0530 Subject: [PATCH 10/12] Update scripts/copyrightcheck.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- scripts/copyrightcheck.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/copyrightcheck.py b/scripts/copyrightcheck.py index 261be9c..3e01b87 100644 --- a/scripts/copyrightcheck.py +++ b/scripts/copyrightcheck.py @@ -141,7 +141,7 @@ def _is_excluded(self, relative_path: str) -> bool: # Always exclude dotfiles (files starting with .) filename = os.path.basename(relative_path) - if filename.startswith('.'): + if filename.startswith('.'): print(f"🚫 Excluding dotfile: {relative_path}") return True From a11d379d2c1cf2950c0e851c0e9966263112c7a4 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 26 Mar 2026 16:06:09 +0000 Subject: [PATCH 11/12] Remove trailing whitespace from line 315 in copyrightcheck.py Agent-Logs-Url: https://github.com/marklogic/pr-workflows/sessions/25a254e4-623b-4097-8ead-02ba997e61eb Co-authored-by: brijeshp56 <203762578+brijeshp56@users.noreply.github.com> --- scripts/copyrightcheck.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/copyrightcheck.py b/scripts/copyrightcheck.py index 261be9c..cc27c88 100644 --- a/scripts/copyrightcheck.py +++ b/scripts/copyrightcheck.py @@ -312,7 +312,7 @@ def print_results(self, results: List[Dict[str, Any]], verbose: bool = False): else: counts_line += f" | at: {ts}" print(counts_line) - print() + print() has_invalid = invalid_files > 0 if has_invalid: From 31ff308306181629518fef65913801f5d436a78b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 26 Mar 2026 16:07:11 +0000 Subject: [PATCH 12/12] Remove trailing whitespace from copyrightcheck.py Agent-Logs-Url: https://github.com/marklogic/pr-workflows/sessions/8161bf32-06e4-47c5-b637-c6c6e1ca0037 Co-authored-by: brijeshp56 <203762578+brijeshp56@users.noreply.github.com> --- scripts/copyrightcheck.py | 140 +++++++++++++++++++------------------- 1 file changed, 70 insertions(+), 70 deletions(-) diff --git a/scripts/copyrightcheck.py b/scripts/copyrightcheck.py index 261be9c..863be3f 100644 --- a/scripts/copyrightcheck.py +++ b/scripts/copyrightcheck.py @@ -17,10 +17,10 @@ class CopyrightValidator: """Validates copyright headers in source files.""" - + # Common patterns for comment block terminators TRAILING_COMMENT_TERMINATORS = r'(\*/|-->|:\))\s*$' - + def __init__(self, config_file: str): """Initialize validator with configuration file.""" self.config = self._load_config(config_file) @@ -29,38 +29,38 @@ def __init__(self, config_file: str): if self.start_year is None: print("Error: 'startyear' must be specified in the configuration file.") sys.exit(1) - + # Get excluded files from config, default to empty set if not specified excluded_files_list = self.config.get('filesexcluded') if excluded_files_list is None: self.excluded_files = set() else: self.excluded_files = set(excluded_files_list) - + def _load_config(self, config_file: str) -> Dict[str, Any]: """Load configuration from plain text file. - + Supports both single-line and multiline filesexcluded values: - + Single-line: filesexcluded: README.MD,.github/* - + Mixed (inline value + continuation lines): filesexcluded: README.MD .github/* src/scripts/example.py - + Multiline only (empty inline value): filesexcluded: .github/* src/scripts/example.py - + Continuation lines are collected until an empty line or a new key: is found. """ config = {} - + print(f"📋 Loading copyright config from: {config_file}") - + try: with open(config_file, 'r') as f: content = f.read() @@ -68,23 +68,23 @@ def _load_config(self, config_file: str) -> Dict[str, Any]: for line_num, line in enumerate(content.split('\n'), 1): print(f" {line_num:2d}: {line}") print() - + # Reset file pointer to beginning f.seek(0) - + current_multiline_key = None for line_num, line in enumerate(f, 1): line = line.strip() - + # Empty line ends any active multi-line block if not line: current_multiline_key = None continue - + # Skip comments if line.startswith('#'): continue - + # Detect key:value pairs — key must be a simple word (no path chars) if ':' in line: key_part, value_part = line.split(':', 1) @@ -93,14 +93,14 @@ def _load_config(self, config_file: str) -> Dict[str, Any]: current_multiline_key = None key = key_candidate value = value_part.strip() - + if key == 'startyear': try: config['startyear'] = int(value) except ValueError: print(f"Error: Invalid start year '{value}'. Must be a valid integer.") sys.exit(1) - + elif key == 'filesexcluded': # Always initialise the list and activate multiline mode. # This supports: @@ -110,65 +110,65 @@ def _load_config(self, config_file: str) -> Dict[str, Any]: config['filesexcluded'] = files current_multiline_key = 'filesexcluded' continue - + # Continuation line for an active multi-line key # Each line may contain one or more comma-separated entries if current_multiline_key == 'filesexcluded': entries = [e.strip() for e in line.split(',')] config['filesexcluded'].extend([e for e in entries if e]) - + print("✅ Parsed configuration:") for key, value in config.items(): print(f" {key}: {value}") print() - + return config - + except FileNotFoundError: print(f"Error: Configuration file '{config_file}' not found.") sys.exit(1) except Exception as e: print(f"Error reading configuration file: {e}") sys.exit(1) - + def _is_excluded(self, relative_path: str) -> bool: """Check if file should be excluded from copyright validation. - + Args: relative_path: File path relative to repository root """ relative_path = os.path.normpath(relative_path) - + # Always exclude dotfiles (files starting with .) filename = os.path.basename(relative_path) - if filename.startswith('.'): + if filename.startswith('.'): print(f"🚫 Excluding dotfile: {relative_path}") return True - + for excluded_pattern in self.excluded_files: excluded_pattern = os.path.normpath(excluded_pattern) - + # Check for exact match if relative_path == excluded_pattern: print(f"🚫 Excluding (exact match): {relative_path} matches {excluded_pattern}") return True - + # Check for pattern match (simple glob-like matching) if '*' in excluded_pattern: pattern = excluded_pattern.replace('*', '.*') if re.match(pattern, relative_path): print(f"🚫 Excluding (pattern match): {relative_path} matches {excluded_pattern}") return True - + print(f"✅ Including: {relative_path}") return False - - + + def _get_expected_copyright(self) -> str: """Generate expected copyright header.""" year_range = f"{self.start_year}-{self.current_year}" if self.start_year != self.current_year else str(self.current_year) return f"Copyright (c) {year_range} Progress Software Corporation and/or its subsidiaries or affiliates. All Rights Reserved." - + def _extract_copyright_from_content(self, content: str) -> str: """Extract copyright line from file content.""" lines = content.split('\n') @@ -181,7 +181,7 @@ def _extract_copyright_from_content(self, content: str) -> str: if cleaned_line.lower().startswith('copyright'): return cleaned_line return "" - + def _validate_copyright_format(self, copyright_line: str) -> bool: """Validate copyright line. Accepts any header of the form: @@ -211,7 +211,7 @@ def _validate_copyright_format(self, copyright_line: str) -> bool: return False # All conditions satisfied return True - + def validate_file(self, file_path: str) -> Dict[str, Any]: """Validate copyright in a single file.""" result = { @@ -222,55 +222,55 @@ def validate_file(self, file_path: str) -> Dict[str, Any]: 'found_copyright': '', 'expected_copyright': self._get_expected_copyright() } - + # Check if file is excluded if self._is_excluded(file_path): result['excluded'] = True result['valid'] = True # Excluded files are considered valid return result - + try: # Check if file exists if not os.path.exists(file_path): result['error'] = f"File not found: {file_path}" return result - + # Read file content with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: content = f.read() - + # Extract copyright line copyright_line = self._extract_copyright_from_content(content) result['found_copyright'] = copyright_line - + if not copyright_line: result['error'] = "No copyright header found" return result - + # Validate copyright format result['valid'] = self._validate_copyright_format(copyright_line) - + if not result['valid']: result['error'] = "Copyright format does not match expected format" - + except Exception as e: result['error'] = f"Error reading file: {str(e)}" - + return result - + def validate_files(self, file_paths: List[str], relative_paths: List[str] = None) -> List[Dict[str, Any]]: """Validate copyright in multiple files. - + Args: file_paths: Absolute paths to files for file operations relative_paths: Relative paths for exclusion checking (optional) """ results = [] - + # If no relative paths provided, use file_paths as-is if relative_paths is None: relative_paths = file_paths - + for file_path, relative_path in zip(file_paths, relative_paths): # Use relative path for exclusion checking if self._is_excluded(relative_path): @@ -283,14 +283,14 @@ def validate_files(self, file_paths: List[str], relative_paths: List[str] = None 'found_copyright': None }) continue - + # Use absolute path for file operations result = self.validate_file(file_path) result['relative_path'] = relative_path results.append(result) - + return results - + def print_results(self, results: List[Dict[str, Any]], verbose: bool = False): """Print validation results.""" MARKER_START = "<<>>" @@ -312,7 +312,7 @@ def print_results(self, results: List[Dict[str, Any]], verbose: bool = False): else: counts_line += f" | at: {ts}" print(counts_line) - print() + print() has_invalid = invalid_files > 0 if has_invalid: @@ -366,7 +366,7 @@ def print_results(self, results: List[Dict[str, Any]], verbose: bool = False): print("2. Ensure the year range matches the configuration (start year through current year).") print("3. Do not alter spacing or punctuation in the header line.") print("4. Commit and push the changes to update this check.") - print() + print() if not has_invalid: print("✅ All files have valid copyright headers!\n") @@ -386,47 +386,47 @@ def main(): echo "file1.py\nfile2.js" | python copyrightcheck.py -c config.yml --files-from-stdin """ ) - + parser.add_argument( '-c', '--config', required=True, help='Path to copyright configuration file' ) - + parser.add_argument( '-w', '--working-dir', help='Working directory for resolving relative file paths (default: current directory)' ) - + parser.add_argument( 'files', nargs='*', help='Files to check for copyright headers (relative to working-dir if specified)' ) - + parser.add_argument( '--files-from-stdin', action='store_true', help='Read file paths from standard input (one per line)' ) - + parser.add_argument( '-v', '--verbose', action='store_true', help='Show detailed output including valid and excluded files' ) - + parser.add_argument( '--origins-file', help='Optional file containing origin metadata for each file (ignored by validator)', required=False ) - + args = parser.parse_args() - + # Get file paths file_paths = [] - + if args.files_from_stdin: # Read file paths from stdin for line in sys.stdin: @@ -435,24 +435,24 @@ def main(): file_paths.append(file_path) else: file_paths = args.files - + if not file_paths: print("Error: No files specified. Use positional arguments or --files-from-stdin.") sys.exit(1) - + # Initialize validator validator = CopyrightValidator(args.config) - + # Set working directory if specified working_dir = args.working_dir or os.getcwd() if args.working_dir: print(f"📂 Working directory: {working_dir}") - + # Convert file paths to absolute paths for file operations # but keep relative paths for exclusion checking absolute_file_paths = [] relative_file_paths = [] - + for file_path in file_paths: if os.path.isabs(file_path): # Already absolute - convert to relative for exclusion checking @@ -469,13 +469,13 @@ def main(): absolute_path = os.path.join(working_dir, file_path) absolute_file_paths.append(absolute_path) relative_file_paths.append(file_path) - + # Validate files using absolute paths for file ops, relative for exclusion results = validator.validate_files(absolute_file_paths, relative_file_paths) - + # Print results validator.print_results(results, verbose=args.verbose) - + # Exit with error code if any files are invalid invalid_count = sum(1 for r in results if not r['valid'] and not r['excluded']) if invalid_count > 0: