diff --git a/README.md b/README.md index 3425d53..97f9c91 100644 --- a/README.md +++ b/README.md @@ -74,6 +74,8 @@ for gitpull: - `-h, --help`: Show help message and exit - `-v, --verbose`: Enable verbose output +- `--norepo`: Do not keep Git history +- `--createdir`: Create `target_path` if needed ### Examples for gitpull @@ -108,9 +110,9 @@ or - `git checkout -f origin/main` - Restore state - `git switch main` - - Remove untracked files - force, include directories & ignored (.zip) files + - Remove untracked files - force, include directories, & ignored (.zip) files - `git clean -fdx` -- **The eBook folder will now be a Git repository**: but only for updated eBooks +- **The eBook folder will now be a Git repository, unless `--norepo` was used** - **It does not update the database**: It is assumed that the chron-dopush.sh call to autodelete.py will do that diff --git a/gitpull.py b/gitpull.py index 75bd59e..7f93da2 100755 --- a/gitpull.py +++ b/gitpull.py @@ -1,8 +1,8 @@ #!/usr/bin/env python3 """ -from gutenbergtools/pglaf-gitpull: Update a folder with the latest files from a Git repository +from gutenbergtools/gitpull: Update a folder with the latest files from a Git repository -This tool clones or pulls the latest changes from a Git repository into a +This tool clones or pulls the latest changes from a PG Git eBook repository into a specified target folder. """ @@ -12,6 +12,10 @@ import sys import logging from pathlib import Path +import shutil + +VERSION = "2026.03.16" +UPSTREAM_REPO_DIR = os.getenv('UPSTREAM_REPO_DIR') or '' # Configure logging logging.basicConfig(filename='gitpull.log', level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") @@ -27,7 +31,9 @@ def run_command(cmd, cwd=None, noerror=False): text=True, check=True ) - return result.stdout.strip() + if result.stdout: + result.stdout = result.stdout.strip() + return result.stdout except subprocess.CalledProcessError as e: if not noerror: logger.error(f"Error running command: {' '.join(cmd)}") @@ -175,12 +181,41 @@ def update_folder(repo_url, target_path): return False +def remove_git_history(target_path): + """ + Remove Git history from the target path. + Deletes the .git directory and common Git-related files like .gitignore, .gitattributes, + README.md, and LICENSE.txt if they exist. + It might be cleaner to use "git archive" to export only the files without Git history, + but our server does not support the protocol. Would also need to remove untracked files. + Any existing unchanged files should not be updated. + """ + git_dir = Path(target_path) / ".git" + if git_dir.exists() and git_dir.is_dir(): + shutil.rmtree(git_dir) + logger.info("Git history removed successfully") + else: + logger.info("No Git history found to remove") + files_to_remove = [".gitignore", ".gitattributes", "README.md", "LICENSE.txt"] + for filename in files_to_remove: + file_path = Path(target_path) / filename + if file_path.exists(): + file_path.unlink() + logger.info(f"{filename} removed successfully") + return True + + def main(): """Main entry point for the script.""" parser = argparse.ArgumentParser( description="Update an eBook folder with the latest files from the Git repository", epilog="Example: %(prog)s 12345 /path/to/target" ) + parser.add_argument( + "--version", + action="store_true", + help="Show version information" + ) parser.add_argument( "ebook_number", help="Number of the eBook Git repository to clone/pull from" @@ -194,28 +229,65 @@ def main(): action="store_true", help="Enable verbose output" ) + parser.add_argument( + "--norepo", + action="store_true", + help="Do not keep Git history" + ) + parser.add_argument( + "--createdirs", + action="store_true", + help="Create target directories if they don't exist" + ) args = parser.parse_args() + if args.version: + print(f"gitpull version {VERSION}") + sys.exit(0) # Set logging level based on verbosity if args.verbose: logger.setLevel(logging.DEBUG) + if not UPSTREAM_REPO_DIR: + logger.error("UPSTREAM_REPO_DIR environment variable is not set") + print("Failed: UPSTREAM_REPO_DIR environment variable is not set.") + sys.exit(1) + # Check if target exists and is a directory target_path = Path(args.target_path).resolve() if not target_path.exists() or not target_path.is_dir(): - logger.error(f"{args.target_path} does not exist or is not a directory") - sys.exit(1) + if args.createdirs: + # Create the target directory if it doesn't exist + logger.info(f"Creating target directory: {target_path}") + try: + target_path.mkdir(parents=True, exist_ok=True) + except Exception as e: + logger.error(f"Failed to create target directory: {e}") + print(f"Failed: unable to create target directory {target_path}, see log.") + sys.exit(1) + else: + logger.error(f"{args.target_path} does not exist or is not a directory") + print(f"Failed: {args.target_path} does not exist or is not a directory") + sys.exit(1) # Update the directory - origin = f"https://r.pglaf.org/git/{args.ebook_number}.git/" + origin = f"{UPSTREAM_REPO_DIR}/{args.ebook_number}.git/" # destination is a directory named with the ebook number under the target path destination = f"{args.target_path}/{args.ebook_number}" logger.info(f"Pulling from {origin} to {destination}") success = update_folder(origin, destination) + # Remove Git history if not needed, but only if the update was successful to avoid + # deleting existing files on failure + if args.norepo and success: + success = remove_git_history(destination) + if success: + print(f"Success: eBook {args.ebook_number} copied to {destination}.") + else: + print(f"Failed: unable to copy eBook {args.ebook_number}, see log.") sys.exit(0 if success else 1) diff --git a/puller.py b/puller.py index 04bbf54..1c05cf7 100644 --- a/puller.py +++ b/puller.py @@ -48,8 +48,6 @@ def scan_dopull_log(): """ Scan the dopull log directory for new files. """ - current_user_id = os.getuid() - for filename in sorted(os.listdir(DOPULL_LOG_DIR)): mode = os.stat(os.path.join(DOPULL_LOG_DIR, filename))[stat.ST_MODE] # skip directories JIC diff --git a/setup.py b/setup.py index 0357e4b..4bc52e3 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ setup( name="gitpull", - version="0.2.0", + version="0.3.0", author="Project Gutenberg", description="Update a folder with the latest files from a Git repository", long_description=long_description, diff --git a/updatehosts.py b/updatehosts.py new file mode 100644 index 0000000..b0950c4 --- /dev/null +++ b/updatehosts.py @@ -0,0 +1,169 @@ +#!/usr/bin/env python3 +""" +from gutenbergtools/pglaf-gitpull: Update a folder with the latest files from a Git repository + +This tool clones or pulls the latest changes from a Git repository into a +specified target folder. +""" +import argparse +import os +import subprocess +import logging +import sys + +# Configure logging +logging.basicConfig(filename='updatehosts.log', level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") +logger = logging.getLogger(__name__) + +PRIVATE = os.getenv('PRIVATE') or '' +# These are where .zip.trig files go on ibiblio : +DOPULL_LOG_DIR = os.path.join(PRIVATE, 'logs', 'dopull') +IBIBLIO_BIN = os.getenv('IBIBLIO_BIN') or '' +MIRROR_BIN = os.getenv('MIRROR_BIN') or '' + +ibiblio = "gutenberg.login.ibiblio.org" +mirrors = ["inferno.pglaf.org", + "aleph.pglaf.org", + "readingroo.ms"] + +def load_env_file(env_file='.env'): + """ + Load environment variables from a .env file. + Assumes the file is in the current directory and contains key=value pairs. + Skips lines starting with # (comments) and empty lines. + """ + if not os.path.exists(env_file): + logger.warning(f".env file not found: {env_file}") + return False + with open(env_file, 'r') as f: + for line in f: + line = line.strip() + if not line or line.startswith('#'): + continue + if '=' in line: + key, value = line.split('=', 1) + os.environ[key.strip()] = value.strip() + logger.info(f"Loaded env var: {key.strip()}") + return True + +def run_python_script_via_ssh(host, script_path, script_args=None, timeout=60): + """Run a Python script on a remote server via SSH.""" + if script_args is None: + script_args = [] + + remote_command = f"python3 {script_path}" + try: + logger.info(f"[START] Running Python script on {host}: {remote_command} {' '.join(script_args)}") + output = run_ssh_command(host, remote_command, arguments=script_args, timeout=timeout) + logger.info(f"[SUCCESS] Output from {host}: {output}") + return output + except Exception as e: + logger.error(f"[ERROR] Failed to run Python script on {host}: {str(e)}") + raise + +def run_ssh_command(host, command, arguments=None, timeout=60): + """Run a shell command on a remote host via SSH with optional arguments.""" + if arguments is None: + arguments = [] + + # Append arguments to the command + full_command = f"{command} {' '.join(arguments)}" + try: + logger.info(f"[START] Running command on {host}: {full_command}") + result = subprocess.run( + ["ssh", host, full_command], + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + timeout=timeout + ) + logger.info(f"[SUCCESS] Command output from {host}: {result.stdout}") + if result.stderr: + logger.warning(f"[WARNING] Command stderr from {host}: {result.stderr}") + return result.stdout + except subprocess.TimeoutExpired: + logger.error(f"[TIMEOUT] Command timed out after {timeout} seconds on {host}: {full_command}") + raise + except subprocess.CalledProcessError as e: + logger.error(f"[FAILURE] Command failed on {host}: {e.stderr}") + raise + except Exception as e: + logger.error(f"[ERROR] Unexpected error while running command on {host}: {str(e)}") + raise + +def get_ebook_path(number): + """Get PG directory path: 12345 --> 1/2/3/4/""" + outdir = '/'.join(number) + '/' + + # Ditch the last digit to make the target subdirectory + if len(outdir) == 2: # Special case: Single digit filenames will prefix with '0/' + outdir = '0/' + else: + where = outdir.rfind('/') + if where != -1: + outdir = outdir[:where - 1] # It's always 1 digit + return outdir + +def update_gitpull_to_hosts(): + """ + Update the gitpull script on all hosts. + Assumes the source script is named 'gitpull.py' and is located in the current directory. + """ + for host in mirrors + [ibiblio]: + logger.info(f"Updating gitpull.py script on {host}...") + if not os.path.exists('gitpull.py'): + print("gitpull.py script not found in the current directory.") + return 1 + if not IBIBLIO_BIN or not MIRROR_BIN: + print("IBIBLIO_BIN or MIRROR_BIN environment variable not set.") + return 1 + try: + if host == ibiblio: + result = run_ssh_command(host, "scp", ["gitpull.py", f"{host}:{IBIBLIO_BIN}"]) + else: + result = run_ssh_command(host, "scp", ["gitpull.py", f"{host}:{MIRROR_BIN}"]) + print(f"Successfully updated gitpull script on {host}") + except Exception as e: + result = f"Failed to update gitpull script on {host}: {str(e)}" + logger.error(result) + return 1 + + print(f"Finished updating gitpull.py on {host}, result = {result}\n") + return 0 + +def main(): + """Main entry point for the script.""" + if not load_env_file(): # Load .env variables at the start + print("Failed to load environment variables.") + sys.exit(1) + parser = argparse.ArgumentParser( + description="Update an eBook directory on the mirrors with the latest files from the Git repository", + epilog="Example: %(prog)s 12345" + ) + parser.add_argument( + "ebook_number", + help="Number of the eBook Git repository to pull from" + ) + args = parser.parse_args() + + # Get the destination path for the eBook number + destination = get_ebook_path(args.ebook_number) + print(f"{args.ebook_number} goes to {destination}\n") + destination = "~/ftp/" + destination + for host in mirrors: + print("Copying to " + host + "...") + # Call gitpull.py on the host, creating the target directory if it doesn't exist, no history + sargs = ["--norepo", "--createdir", f"{args.ebook_number}", f"{destination}"] + run_python_script_via_ssh(host, f"{MIRROR_BIN}/gitpull.py", sargs) + print("Success!\n") + + # ibiblio is a special case, it needs to trigger other actions after the pull, + # so we just trigger the pull there and let it do the rest + print(f"Trigger processing of #{args.ebook_number} on ibiblio...") + run_ssh_command(ibiblio, "touch", [f"{DOPULL_LOG_DIR}{args.ebook_number}.zip.trig"]) + print("Success!\n") + sys.exit(0) + +if __name__ == "__main__": + main()