diff --git a/README.md b/README.md index 30a5768..946c414 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,14 @@ Rename PDF invoice files from "YYYY-MM-DD - name - invoice_no.pdf" to "YYYY-MM-D uv run https://kzuraw.github.io/tools/python/rename_invoices.py [--dry-run] ``` +### rename_epubs.py + +Rename epub files to "Author - Title.epub" format using metadata from the epub file + +```bash +uv run https://kzuraw.github.io/tools/python/rename_epubs.py [--dry-run] +``` + ## Deployment This project is automatically deployed to GitHub Pages. Any changes pushed to the main branch will be reflected at [https://kzuraw.github.io/tools/](https://kzuraw.github.io/tools/). diff --git a/python/rename_epubs.py b/python/rename_epubs.py new file mode 100644 index 0000000..dafd105 --- /dev/null +++ b/python/rename_epubs.py @@ -0,0 +1,107 @@ +# /// script +# requires-python = ">=3.14" +# dependencies = [ +# "click", +# ] +# /// + +"""Rename epub files to 'Author - Title.epub' format.""" + +import re +import xml.etree.ElementTree as ET +import zipfile +from pathlib import Path + +import click + + +def extract_metadata(epub_path: Path) -> tuple[str | None, str | None]: + """Extract author and title from epub metadata.""" + try: + with zipfile.ZipFile(epub_path, "r") as zf: + # Find the OPF file (contains metadata) + container = zf.read("META-INF/container.xml") + container_root = ET.fromstring(container) + ns = {"c": "urn:oasis:names:tc:opendocument:xmlns:container"} + rootfile = container_root.find(".//c:rootfile", ns) + if rootfile is None: + return None, None + opf_path = rootfile.get("full-path") + if not opf_path: + return None, None + + # Parse OPF for metadata + opf_content = zf.read(opf_path) + opf_root = ET.fromstring(opf_content) + dc = {"dc": "http://purl.org/dc/elements/1.1/"} + + title_el = opf_root.find(".//dc:title", dc) + creator_el = opf_root.find(".//dc:creator", dc) + + title = title_el.text.strip() if title_el is not None and title_el.text else None + author = creator_el.text.strip() if creator_el is not None and creator_el.text else None + + return author, title + except (zipfile.BadZipFile, ET.ParseError, KeyError) as e: + click.echo(f"Error reading {epub_path.name}: {e}", err=True) + return None, None + except Exception as e: + click.echo(f"Unexpected error reading {epub_path.name}: {e}", err=True) + return None, None + + +def sanitize_filename(name: str) -> str: + """Remove characters that are invalid in filenames.""" + invalid_chars = "<>:\"/\\|?*" + for char in invalid_chars: + name = name.replace(char, "") + name = re.sub(r"\s+", " ", name) + return name.strip() + + +@click.command() +@click.argument("folder", type=click.Path(exists=True, file_okay=False, path_type=Path)) +@click.option("--dry-run", is_flag=True, help="Preview changes without renaming") +def main(folder: Path, dry_run: bool): + """Rename epub files in FOLDER to 'Author - Title.epub' format.""" + epubs = list(folder.glob("*.epub")) + + if not epubs: + click.echo("No epub files found.") + return + + renamed_count = 0 + skipped_count = 0 + + for epub_path in epubs: + author, title = extract_metadata(epub_path) + + if not author or not title: + click.echo(f"Skipping {epub_path.name}: missing metadata") + skipped_count += 1 + continue + + new_name = sanitize_filename(f"{author} - {title}.epub") + new_path = folder / new_name + + if new_path == epub_path: + click.echo(f"Already named correctly: {epub_path.name}") + continue + + if dry_run: + click.echo(f"Would rename: {epub_path.name} → {new_name}") + else: + epub_path.rename(new_path) + click.echo(f"Renamed: {epub_path.name} → {new_name}") + renamed_count += 1 + + if renamed_count == 0 and skipped_count == 0: + click.echo("\nNo files needed renaming.") + elif dry_run: + click.echo(f"\nDry run complete. {renamed_count} file(s) would be renamed, {skipped_count} skipped.") + else: + click.echo(f"\nRenamed {renamed_count} file(s), {skipped_count} skipped.") + + +if __name__ == "__main__": + main()