From 2aeb42321343890b6d5d6fbe7d94442bdda51e3b Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Mon, 16 Feb 2026 11:30:42 +0000 Subject: [PATCH] Add GitHub repository investigator tool This tool allows searching GitHub repositories by keywords with optional OR logic, and classifies the results by language, stars, and topics. It uses the standard library `urllib` to minimize dependencies. Includes unit tests. Co-authored-by: muumuu8181 <87556753+muumuu8181@users.noreply.github.com> --- github_investigator.py | 112 ++++++++++++++++++++++++++++++++++++ test_github_investigator.py | 97 +++++++++++++++++++++++++++++++ 2 files changed, 209 insertions(+) create mode 100644 github_investigator.py create mode 100644 test_github_investigator.py diff --git a/github_investigator.py b/github_investigator.py new file mode 100644 index 0000000..3ca0d71 --- /dev/null +++ b/github_investigator.py @@ -0,0 +1,112 @@ +import argparse +import os +import sys +import json +import urllib.request +import urllib.parse +from collections import Counter + +def search_repositories(keywords, use_or=False): + """ + Search GitHub repositories based on keywords. + """ + base_url = "https://api.github.com/search/repositories" + + # URL encode keywords to handle special characters + encoded_keywords = [urllib.parse.quote(k) for k in keywords] + + if use_or: + q = "+OR+".join(encoded_keywords) + else: + q = "+".join(encoded_keywords) + + url = f"{base_url}?q={q}" + + req = urllib.request.Request(url) + req.add_header("Accept", "application/vnd.github.v3+json") + + token = os.environ.get('GITHUB_TOKEN') + if token: + req.add_header("Authorization", f"token {token}") + + try: + with urllib.request.urlopen(req) as response: + data = json.load(response) + return data.get('items', []) + except urllib.error.HTTPError as e: + print(f"HTTP Error: {e.code} - {e.reason}", file=sys.stderr) + return [] + except urllib.error.URLError as e: + print(f"URL Error: {e.reason}", file=sys.stderr) + return [] + +def classify_results(repositories): + """ + Classify and print the results. + """ + if not repositories: + print("No repositories found.") + return + + print(f"\nFound {len(repositories)} repositories.") + + languages = Counter() + stars = Counter() + topics = Counter() + + for repo in repositories: + # Language + lang = repo.get('language') + if lang: + languages[lang] += 1 + else: + languages['Unknown'] += 1 + + # Stars + stargazers_count = repo.get('stargazers_count', 0) + if stargazers_count < 100: + stars['< 100'] += 1 + elif stargazers_count < 1000: + stars['100 - 1000'] += 1 + elif stargazers_count < 10000: + stars['1000 - 10000'] += 1 + else: + stars['> 10000'] += 1 + + # Topics + repo_topics = repo.get('topics', []) + for topic in repo_topics: + topics[topic] += 1 + + print("\n--- Classification by Language ---") + for lang, count in languages.most_common(): + print(f"{lang}: {count}") + + print("\n--- Classification by Stars ---") + # Sort order for stars + star_order = ['< 100', '100 - 1000', '1000 - 10000', '> 10000'] + for range_key in star_order: + if stars[range_key] > 0: + print(f"{range_key}: {stars[range_key]}") + + print("\n--- Top 10 Topics ---") + for topic, count in topics.most_common(10): + print(f"{topic}: {count}") + +def main(): + parser = argparse.ArgumentParser(description='GitHub Repository Investigator') + parser.add_argument('keywords', nargs='+', help='Search keywords') + parser.add_argument('--or', dest='use_or', action='store_true', help='Use OR search logic') + args = parser.parse_args() + + token = os.environ.get('GITHUB_TOKEN') + if not token: + print("Warning: GITHUB_TOKEN not found. Requests may be rate-limited.", file=sys.stderr) + + print(f"Searching for: {args.keywords} (OR: {args.use_or})") + + results = search_repositories(args.keywords, args.use_or) + classify_results(results) + +if __name__ == "__main__": + main() diff --git a/test_github_investigator.py b/test_github_investigator.py new file mode 100644 index 0000000..8eb04b8 --- /dev/null +++ b/test_github_investigator.py @@ -0,0 +1,97 @@ +import unittest +from unittest.mock import patch, MagicMock +import json +import io +import sys +import github_investigator + +class TestGithubInvestigator(unittest.TestCase): + + def setUp(self): + # Sample response data + self.sample_data = { + "items": [ + { + "language": "Python", + "stargazers_count": 50, + "topics": ["python", "search"] + }, + { + "language": "JavaScript", + "stargazers_count": 500, + "topics": ["javascript", "web"] + }, + { + "language": "Python", + "stargazers_count": 2000, + "topics": ["python", "data-science"] + }, + { + "language": None, + "stargazers_count": 15000, + "topics": [] + } + ] + } + + @patch('urllib.request.urlopen') + def test_search_repositories_and(self, mock_urlopen): + # Mock response + mock_response = MagicMock() + mock_response.read.return_value = json.dumps(self.sample_data).encode('utf-8') + mock_response.__enter__.return_value = mock_response + mock_urlopen.return_value = mock_response + + # Call function + results = github_investigator.search_repositories(['test', 'query'], use_or=False) + + # Verify URL construction + args, _ = mock_urlopen.call_args + req = args[0] + self.assertIn('q=test+query', req.full_url) + self.assertEqual(results, self.sample_data['items']) + + @patch('urllib.request.urlopen') + def test_search_repositories_or(self, mock_urlopen): + # Mock response + mock_response = MagicMock() + mock_response.read.return_value = json.dumps(self.sample_data).encode('utf-8') + mock_response.__enter__.return_value = mock_response + mock_urlopen.return_value = mock_response + + # Call function + results = github_investigator.search_repositories(['test', 'query'], use_or=True) + + # Verify URL construction for OR + args, _ = mock_urlopen.call_args + req = args[0] + self.assertIn('q=test+OR+query', req.full_url) + self.assertEqual(results, self.sample_data['items']) + + def test_classify_results(self): + # Capture stdout + captured_output = io.StringIO() + sys.stdout = captured_output + + github_investigator.classify_results(self.sample_data['items']) + + sys.stdout = sys.__stdout__ + output = captured_output.getvalue() + + # Check language classification + self.assertIn("Python: 2", output) + self.assertIn("JavaScript: 1", output) + self.assertIn("Unknown: 1", output) + + # Check stars classification + self.assertIn("< 100: 1", output) # 50 + self.assertIn("100 - 1000: 1", output) # 500 + self.assertIn("1000 - 10000: 1", output) # 2000 + self.assertIn("> 10000: 1", output) # 15000 + + # Check topics + self.assertIn("python: 2", output) + self.assertIn("web: 1", output) + +if __name__ == '__main__': + unittest.main()