diff --git a/scripts/copyrightcheck.py b/scripts/copyrightcheck.py index 85c1cbc..863be3f 100644 --- a/scripts/copyrightcheck.py +++ b/scripts/copyrightcheck.py @@ -17,10 +17,10 @@ class CopyrightValidator: """Validates copyright headers in source files.""" - + # Common patterns for comment block terminators TRAILING_COMMENT_TERMINATORS = r'(\*/|-->|:\))\s*$' - + def __init__(self, config_file: str): """Initialize validator with configuration file.""" self.config = self._load_config(config_file) @@ -29,20 +29,38 @@ def __init__(self, config_file: str): if self.start_year is None: print("Error: 'startyear' must be specified in the configuration file.") sys.exit(1) - + # Get excluded files from config, default to empty set if not specified excluded_files_list = self.config.get('filesexcluded') if excluded_files_list is None: self.excluded_files = set() else: self.excluded_files = set(excluded_files_list) - + def _load_config(self, config_file: str) -> Dict[str, Any]: - """Load configuration from plain text file.""" + """Load configuration from plain text file. + + Supports both single-line and multiline filesexcluded values: + + Single-line: + filesexcluded: README.MD,.github/* + + Mixed (inline value + continuation lines): + filesexcluded: README.MD + .github/* + src/scripts/example.py + + Multiline only (empty inline value): + filesexcluded: + .github/* + src/scripts/example.py + + Continuation lines are collected until an empty line or a new key: is found. + """ config = {} - + print(f"📋 Loading copyright config from: {config_file}") - + try: with open(config_file, 'r') as f: content = f.read() @@ -50,90 +68,107 @@ def _load_config(self, config_file: str) -> Dict[str, Any]: for line_num, line in enumerate(content.split('\n'), 1): print(f" {line_num:2d}: {line}") print() - + # Reset file pointer to beginning f.seek(0) - + + current_multiline_key = None for line_num, line in enumerate(f, 1): line = line.strip() - - # Skip empty lines and comments - if not line or line.startswith('#'): + + # Empty line ends any active multi-line block + if not line: + current_multiline_key = None + continue + + # Skip comments + if line.startswith('#'): continue - - # Parse key:value pairs + + # Detect key:value pairs — key must be a simple word (no path chars) if ':' in line: - key, value = line.split(':', 1) - key = key.strip().lower() - value = value.strip() - - if key == 'startyear': - try: - config['startyear'] = int(value) - except ValueError: - print(f"Error: Invalid start year '{value}'. Must be a valid integer.") - sys.exit(1) - - elif key == 'filesexcluded': - # Parse comma-separated list or single file - if value: - files = [f.strip() for f in value.split(',')] - config['filesexcluded'] = [f for f in files if f] - else: - config['filesexcluded'] = [] - + key_part, value_part = line.split(':', 1) + key_candidate = key_part.strip().lower() + if re.match(r'^[a-z][a-z0-9]*$', key_candidate): + current_multiline_key = None + key = key_candidate + value = value_part.strip() + + if key == 'startyear': + try: + config['startyear'] = int(value) + except ValueError: + print(f"Error: Invalid start year '{value}'. Must be a valid integer.") + sys.exit(1) + + elif key == 'filesexcluded': + # Always initialise the list and activate multiline mode. + # This supports: + # - empty inline value → purely multiline + # - non-empty inline value → inline entries + optional continuation lines + files = [f.strip() for f in value.split(',') if f.strip()] if value else [] + config['filesexcluded'] = files + current_multiline_key = 'filesexcluded' + continue + + # Continuation line for an active multi-line key + # Each line may contain one or more comma-separated entries + if current_multiline_key == 'filesexcluded': + entries = [e.strip() for e in line.split(',')] + config['filesexcluded'].extend([e for e in entries if e]) + print("✅ Parsed configuration:") for key, value in config.items(): print(f" {key}: {value}") print() - + return config - + except FileNotFoundError: print(f"Error: Configuration file '{config_file}' not found.") sys.exit(1) except Exception as e: print(f"Error reading configuration file: {e}") sys.exit(1) - + def _is_excluded(self, relative_path: str) -> bool: """Check if file should be excluded from copyright validation. - + Args: relative_path: File path relative to repository root """ relative_path = os.path.normpath(relative_path) - + # Always exclude dotfiles (files starting with .) filename = os.path.basename(relative_path) if filename.startswith('.'): print(f"🚫 Excluding dotfile: {relative_path}") return True - + for excluded_pattern in self.excluded_files: excluded_pattern = os.path.normpath(excluded_pattern) - + # Check for exact match if relative_path == excluded_pattern: print(f"🚫 Excluding (exact match): {relative_path} matches {excluded_pattern}") return True - + # Check for pattern match (simple glob-like matching) if '*' in excluded_pattern: pattern = excluded_pattern.replace('*', '.*') if re.match(pattern, relative_path): print(f"🚫 Excluding (pattern match): {relative_path} matches {excluded_pattern}") return True - + print(f"✅ Including: {relative_path}") return False - - + + def _get_expected_copyright(self) -> str: """Generate expected copyright header.""" year_range = f"{self.start_year}-{self.current_year}" if self.start_year != self.current_year else str(self.current_year) return f"Copyright (c) {year_range} Progress Software Corporation and/or its subsidiaries or affiliates. All Rights Reserved." - + def _extract_copyright_from_content(self, content: str) -> str: """Extract copyright line from file content.""" lines = content.split('\n') @@ -146,7 +181,7 @@ def _extract_copyright_from_content(self, content: str) -> str: if cleaned_line.lower().startswith('copyright'): return cleaned_line return "" - + def _validate_copyright_format(self, copyright_line: str) -> bool: """Validate copyright line. Accepts any header of the form: @@ -176,7 +211,7 @@ def _validate_copyright_format(self, copyright_line: str) -> bool: return False # All conditions satisfied return True - + def validate_file(self, file_path: str) -> Dict[str, Any]: """Validate copyright in a single file.""" result = { @@ -187,55 +222,55 @@ def validate_file(self, file_path: str) -> Dict[str, Any]: 'found_copyright': '', 'expected_copyright': self._get_expected_copyright() } - + # Check if file is excluded if self._is_excluded(file_path): result['excluded'] = True result['valid'] = True # Excluded files are considered valid return result - + try: # Check if file exists if not os.path.exists(file_path): result['error'] = f"File not found: {file_path}" return result - + # Read file content with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: content = f.read() - + # Extract copyright line copyright_line = self._extract_copyright_from_content(content) result['found_copyright'] = copyright_line - + if not copyright_line: result['error'] = "No copyright header found" return result - + # Validate copyright format result['valid'] = self._validate_copyright_format(copyright_line) - + if not result['valid']: result['error'] = "Copyright format does not match expected format" - + except Exception as e: result['error'] = f"Error reading file: {str(e)}" - + return result - + def validate_files(self, file_paths: List[str], relative_paths: List[str] = None) -> List[Dict[str, Any]]: """Validate copyright in multiple files. - + Args: file_paths: Absolute paths to files for file operations relative_paths: Relative paths for exclusion checking (optional) """ results = [] - + # If no relative paths provided, use file_paths as-is if relative_paths is None: relative_paths = file_paths - + for file_path, relative_path in zip(file_paths, relative_paths): # Use relative path for exclusion checking if self._is_excluded(relative_path): @@ -248,14 +283,14 @@ def validate_files(self, file_paths: List[str], relative_paths: List[str] = None 'found_copyright': None }) continue - + # Use absolute path for file operations result = self.validate_file(file_path) result['relative_path'] = relative_path results.append(result) - + return results - + def print_results(self, results: List[Dict[str, Any]], verbose: bool = False): """Print validation results.""" MARKER_START = "<<>>" @@ -351,47 +386,47 @@ def main(): echo "file1.py\nfile2.js" | python copyrightcheck.py -c config.yml --files-from-stdin """ ) - + parser.add_argument( '-c', '--config', required=True, help='Path to copyright configuration file' ) - + parser.add_argument( '-w', '--working-dir', help='Working directory for resolving relative file paths (default: current directory)' ) - + parser.add_argument( 'files', nargs='*', help='Files to check for copyright headers (relative to working-dir if specified)' ) - + parser.add_argument( '--files-from-stdin', action='store_true', help='Read file paths from standard input (one per line)' ) - + parser.add_argument( '-v', '--verbose', action='store_true', help='Show detailed output including valid and excluded files' ) - + parser.add_argument( '--origins-file', help='Optional file containing origin metadata for each file (ignored by validator)', required=False ) - + args = parser.parse_args() - + # Get file paths file_paths = [] - + if args.files_from_stdin: # Read file paths from stdin for line in sys.stdin: @@ -400,24 +435,24 @@ def main(): file_paths.append(file_path) else: file_paths = args.files - + if not file_paths: print("Error: No files specified. Use positional arguments or --files-from-stdin.") sys.exit(1) - + # Initialize validator validator = CopyrightValidator(args.config) - + # Set working directory if specified working_dir = args.working_dir or os.getcwd() if args.working_dir: print(f"📂 Working directory: {working_dir}") - + # Convert file paths to absolute paths for file operations # but keep relative paths for exclusion checking absolute_file_paths = [] relative_file_paths = [] - + for file_path in file_paths: if os.path.isabs(file_path): # Already absolute - convert to relative for exclusion checking @@ -434,13 +469,13 @@ def main(): absolute_path = os.path.join(working_dir, file_path) absolute_file_paths.append(absolute_path) relative_file_paths.append(file_path) - + # Validate files using absolute paths for file ops, relative for exclusion results = validator.validate_files(absolute_file_paths, relative_file_paths) - + # Print results validator.print_results(results, verbose=args.verbose) - + # Exit with error code if any files are invalid invalid_count = sum(1 for r in results if not r['valid'] and not r['excluded']) if invalid_count > 0: