From 44831544ec01242bc11d0209e68b2f933e308a9f Mon Sep 17 00:00:00 2001 From: 0xmrma Date: Fri, 27 Feb 2026 05:50:20 +0200 Subject: [PATCH] Fix OS command injection in scrape_github (CWE-78) --- thepipe/scraper.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/thepipe/scraper.py b/thepipe/scraper.py index 6141ca5..942f8d6 100644 --- a/thepipe/scraper.py +++ b/thepipe/scraper.py @@ -28,7 +28,6 @@ chunk_by_length, chunk_agentic, ) -import tempfile import mimetypes import dotenv from magika import Magika @@ -1048,14 +1047,18 @@ def scrape_github( # make new tempdir for cloned repo with tempfile.TemporaryDirectory() as temp_dir: # requires git - exit_code = os.system( - f'git clone --branch "{branch}" --single-branch {github_url} "{temp_dir}" --quiet' - ) - if exit_code != 0: + import subprocess + + try: + subprocess.run( + ["git", "clone", "--branch", branch, "--single-branch", github_url, temp_dir, "--quiet"], + check=True, + ) + except subprocess.CalledProcessError as e: raise RuntimeError( f"git clone failed for {github_url} at branch '{branch}'. " "Verify the repository URL and branch name." - ) + ) from e files_contents = scrape_directory( dir_path=temp_dir, inclusion_pattern=inclusion_pattern,