Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,8 @@ The tool automatically extracts file extensions from HTTP headers to ensure file

**Repository filtering** for repo files/assets handles renamed and transferred repositories gracefully. URLs are included if they either match the current repository name directly, or redirect to it (e.g., ``willmcgugan/rich`` redirects to ``Textualize/rich`` after transfer).

**Fine-grained token limitation:** Due to a GitHub platform limitation, fine-grained personal access tokens (``github_pat_...``) cannot download attachments from private repositories directly. This affects both ``/assets/`` (images) and ``/files/`` (documents) URLs. The tool implements a workaround for image attachments using GitHub's Markdown API, which converts URLs to temporary JWT-signed URLs that can be downloaded. However, this workaround only works for images - document attachments (PDFs, text files, etc.) will fail with 404 errors when using fine-grained tokens on private repos. For full attachment support on private repositories, use a classic token (``-t``) instead of a fine-grained token (``-f``). See `#477 <https://github.com/josegonzalez/python-github-backup/issues/477>`_ for details.


Run in Docker container
-----------------------
Expand Down
10 changes: 10 additions & 0 deletions github_backup/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,16 @@ def main():
"Use -t/--token or -f/--token-fine to authenticate."
)

# Issue #477: Fine-grained PATs cannot download all attachment types from
# private repos. Image attachments will be retried via Markdown API workaround.
if args.include_attachments and args.token_fine:
logger.warning(
"Using --attachments with fine-grained token. Due to GitHub platform "
"limitations, file attachments (PDFs, etc.) from private repos may fail. "
"Image attachments will be retried via workaround. For full attachment "
"support, use --token-classic instead."
)

if args.quiet:
logger.setLevel(logging.WARNING)

Expand Down
108 changes: 100 additions & 8 deletions github_backup/github_backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -1062,6 +1062,65 @@ def download_attachment_file(url, path, auth, as_app=False, fine=False):
return metadata


def get_jwt_signed_url_via_markdown_api(url, token, repo_context):
"""Convert a user-attachments/assets URL to a JWT-signed URL via Markdown API.

GitHub's Markdown API renders image URLs and returns HTML containing
JWT-signed private-user-images.githubusercontent.com URLs that work
without token authentication.

This is a workaround for issue #477 where fine-grained PATs cannot
download user-attachments URLs from private repos directly.

Limitations:
- Only works for /assets/ URLs (images)
- Does NOT work for /files/ URLs (PDFs, text files, etc.)
- JWT URLs expire after ~5 minutes

Args:
url: The github.com/user-attachments/assets/UUID URL
token: Raw fine-grained PAT (github_pat_...)
repo_context: Repository context as "owner/repo"

Returns:
str: JWT-signed URL from private-user-images.githubusercontent.com
None: If conversion fails
"""

try:
payload = json.dumps(
{"text": f"![img]({url})", "mode": "gfm", "context": repo_context}
).encode("utf-8")

request = Request("https://api.github.com/markdown", data=payload, method="POST")
request.add_header("Authorization", f"token {token}")
request.add_header("Content-Type", "application/json")
request.add_header("Accept", "application/vnd.github+json")

html = urlopen(request, timeout=30).read().decode("utf-8")

# Parse JWT-signed URL from HTML response
# Format: <img src="https://private-user-images.githubusercontent.com/...?jwt=..." ...>
if match := re.search(
r'src="(https://private-user-images\.githubusercontent\.com/[^"]+)"', html
):
jwt_url = match.group(1)
logger.debug("Converted attachment URL to JWT-signed URL via Markdown API")
return jwt_url

logger.debug("Markdown API response did not contain JWT-signed URL")
return None

except HTTPError as e:
logger.debug(
"Markdown API request failed with HTTP {0}: {1}".format(e.code, e.reason)
)
return None
except Exception as e:
logger.debug("Markdown API request failed: {0}".format(str(e)))
return None


def extract_attachment_urls(item_data, issue_number=None, repository_full_name=None):
"""Extract GitHub-hosted attachment URLs from issue/PR body and comments.

Expand Down Expand Up @@ -1415,15 +1474,46 @@ def download_attachments(
filename = get_attachment_filename(url)
filepath = os.path.join(attachments_dir, filename)

# Download and get metadata
metadata = download_attachment_file(
url,
filepath,
get_auth(args, encode=not args.as_app),
as_app=args.as_app,
fine=args.token_fine is not None,
# Issue #477: Fine-grained PATs cannot download user-attachments/assets
# from private repos directly (404). Use Markdown API workaround to get
# a JWT-signed URL. Only works for /assets/ (images), not /files/.
needs_jwt = (
args.token_fine is not None
and repository.get("private", False)
and "github.com/user-attachments/assets/" in url
)

if not needs_jwt:
# NORMAL download path
metadata = download_attachment_file(
url,
filepath,
get_auth(args, encode=not args.as_app),
as_app=args.as_app,
fine=args.token_fine is not None,
)
elif jwt_url := get_jwt_signed_url_via_markdown_api(
url, args.token_fine, repository["full_name"]
):
# JWT needed and extracted, download via JWT
metadata = download_attachment_file(
jwt_url, filepath, auth=None, as_app=False, fine=False
)
metadata["url"] = url # Apply back the original URL
metadata["jwt_workaround"] = True
else:
# Markdown API workaround failed - skip download we know will fail
metadata = {
"url": url,
"success": False,
"skipped_at": datetime.now(timezone.utc).isoformat(),
"error": "Fine-grained token cannot download private repo attachments. "
"Markdown API workaround failed. Use --token-classic instead.",
}
logger.warning(
"Skipping attachment {0}: {1}".format(url, metadata["error"])
)

# If download succeeded but we got an extension from Content-Disposition,
# we may need to rename the file to add the extension
if metadata["success"] and metadata.get("original_filename"):
Expand Down Expand Up @@ -1951,7 +2041,9 @@ def backup_security_advisories(args, repo_cwd, repository, repos_template):
logger.info("Retrieving {0} security advisories".format(repository["full_name"]))
mkdir_p(repo_cwd, advisory_cwd)

template = "{0}/{1}/security-advisories".format(repos_template, repository["full_name"])
template = "{0}/{1}/security-advisories".format(
repos_template, repository["full_name"]
)

_advisories = retrieve_data(args, template)

Expand Down
25 changes: 25 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
"""Shared pytest fixtures for github-backup tests."""

import pytest

from github_backup.github_backup import parse_args


@pytest.fixture
def create_args():
"""Factory fixture that creates args with real CLI defaults.

Uses the actual argument parser so new CLI args are automatically
available with their defaults - no test updates needed.

Usage:
def test_something(self, create_args):
args = create_args(include_releases=True, user="myuser")
"""
def _create(**overrides):
# Use real parser to get actual defaults
args = parse_args(["testuser"])
for key, value in overrides.items():
setattr(args, key, value)
return args
return _create
62 changes: 9 additions & 53 deletions tests/test_all_starred.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Tests for --all-starred flag behavior (issue #225)."""

import pytest
from unittest.mock import Mock, patch
from unittest.mock import patch

from github_backup import github_backup

Expand All @@ -12,58 +12,14 @@ class TestAllStarredCloning:
Issue #225: --all-starred should clone starred repos without requiring --repositories.
"""

def _create_mock_args(self, **overrides):
"""Create a mock args object with sensible defaults."""
args = Mock()
args.user = "testuser"
args.output_directory = "/tmp/backup"
args.include_repository = False
args.include_everything = False
args.include_gists = False
args.include_starred_gists = False
args.all_starred = False
args.skip_existing = False
args.bare_clone = False
args.lfs_clone = False
args.no_prune = False
args.include_wiki = False
args.include_issues = False
args.include_issue_comments = False
args.include_issue_events = False
args.include_pulls = False
args.include_pull_comments = False
args.include_pull_commits = False
args.include_pull_details = False
args.include_labels = False
args.include_hooks = False
args.include_milestones = False
args.include_security_advisories = False
args.include_releases = False
args.include_assets = False
args.include_attachments = False
args.incremental = False
args.incremental_by_files = False
args.github_host = None
args.prefer_ssh = False
args.token_classic = None
args.token_fine = None
args.as_app = False
args.osx_keychain_item_name = None
args.osx_keychain_item_account = None

for key, value in overrides.items():
setattr(args, key, value)

return args

@patch('github_backup.github_backup.fetch_repository')
@patch('github_backup.github_backup.get_github_repo_url')
def test_all_starred_clones_without_repositories_flag(self, mock_get_url, mock_fetch):
def test_all_starred_clones_without_repositories_flag(self, mock_get_url, mock_fetch, create_args):
"""--all-starred should clone starred repos without --repositories flag.

This is the core fix for issue #225.
"""
args = self._create_mock_args(all_starred=True)
args = create_args(all_starred=True)
mock_get_url.return_value = "https://github.com/otheruser/awesome-project.git"

# A starred repository (is_starred flag set by retrieve_repositories)
Expand All @@ -88,9 +44,9 @@ def test_all_starred_clones_without_repositories_flag(self, mock_get_url, mock_f

@patch('github_backup.github_backup.fetch_repository')
@patch('github_backup.github_backup.get_github_repo_url')
def test_starred_repo_not_cloned_without_all_starred_flag(self, mock_get_url, mock_fetch):
def test_starred_repo_not_cloned_without_all_starred_flag(self, mock_get_url, mock_fetch, create_args):
"""Starred repos should NOT be cloned if --all-starred is not set."""
args = self._create_mock_args(all_starred=False)
args = create_args(all_starred=False)
mock_get_url.return_value = "https://github.com/otheruser/awesome-project.git"

starred_repo = {
Expand All @@ -111,9 +67,9 @@ def test_starred_repo_not_cloned_without_all_starred_flag(self, mock_get_url, mo

@patch('github_backup.github_backup.fetch_repository')
@patch('github_backup.github_backup.get_github_repo_url')
def test_non_starred_repo_not_cloned_with_only_all_starred(self, mock_get_url, mock_fetch):
def test_non_starred_repo_not_cloned_with_only_all_starred(self, mock_get_url, mock_fetch, create_args):
"""Non-starred repos should NOT be cloned when only --all-starred is set."""
args = self._create_mock_args(all_starred=True)
args = create_args(all_starred=True)
mock_get_url.return_value = "https://github.com/testuser/my-project.git"

# A regular (non-starred) repository
Expand All @@ -135,9 +91,9 @@ def test_non_starred_repo_not_cloned_with_only_all_starred(self, mock_get_url, m

@patch('github_backup.github_backup.fetch_repository')
@patch('github_backup.github_backup.get_github_repo_url')
def test_repositories_flag_still_works(self, mock_get_url, mock_fetch):
def test_repositories_flag_still_works(self, mock_get_url, mock_fetch, create_args):
"""--repositories flag should still clone repos as before."""
args = self._create_mock_args(include_repository=True)
args = create_args(include_repository=True)
mock_get_url.return_value = "https://github.com/testuser/my-project.git"

regular_repo = {
Expand Down
Loading