Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 112 additions & 0 deletions github_investigator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
import argparse
import os
import sys
import json
import urllib.request
import urllib.parse
from collections import Counter

def search_repositories(keywords, use_or=False):
"""
Search GitHub repositories based on keywords.
"""
base_url = "https://api.github.com/search/repositories"

# URL encode keywords to handle special characters
encoded_keywords = [urllib.parse.quote(k) for k in keywords]

if use_or:
q = "+OR+".join(encoded_keywords)
else:
q = "+".join(encoded_keywords)

url = f"{base_url}?q={q}"

req = urllib.request.Request(url)
req.add_header("Accept", "application/vnd.github.v3+json")

token = os.environ.get('GITHUB_TOKEN')
if token:
req.add_header("Authorization", f"token {token}")

try:
with urllib.request.urlopen(req) as response:
data = json.load(response)
return data.get('items', [])
except urllib.error.HTTPError as e:
print(f"HTTP Error: {e.code} - {e.reason}", file=sys.stderr)
return []
except urllib.error.URLError as e:
print(f"URL Error: {e.reason}", file=sys.stderr)
return []

def classify_results(repositories):
"""
Classify and print the results.
"""
if not repositories:
print("No repositories found.")
return

print(f"\nFound {len(repositories)} repositories.")

languages = Counter()
stars = Counter()
topics = Counter()

for repo in repositories:
# Language
lang = repo.get('language')
if lang:
languages[lang] += 1
else:
languages['Unknown'] += 1

# Stars
stargazers_count = repo.get('stargazers_count', 0)
if stargazers_count < 100:
stars['< 100'] += 1
elif stargazers_count < 1000:
stars['100 - 1000'] += 1
elif stargazers_count < 10000:
stars['1000 - 10000'] += 1
else:
stars['> 10000'] += 1

# Topics
repo_topics = repo.get('topics', [])
for topic in repo_topics:
topics[topic] += 1

print("\n--- Classification by Language ---")
for lang, count in languages.most_common():
print(f"{lang}: {count}")

print("\n--- Classification by Stars ---")
# Sort order for stars
star_order = ['< 100', '100 - 1000', '1000 - 10000', '> 10000']
for range_key in star_order:
if stars[range_key] > 0:
print(f"{range_key}: {stars[range_key]}")

print("\n--- Top 10 Topics ---")
for topic, count in topics.most_common(10):
print(f"{topic}: {count}")

def main():
parser = argparse.ArgumentParser(description='GitHub Repository Investigator')
parser.add_argument('keywords', nargs='+', help='Search keywords')
parser.add_argument('--or', dest='use_or', action='store_true', help='Use OR search logic')
args = parser.parse_args()

token = os.environ.get('GITHUB_TOKEN')
if not token:
print("Warning: GITHUB_TOKEN not found. Requests may be rate-limited.", file=sys.stderr)

print(f"Searching for: {args.keywords} (OR: {args.use_or})")

results = search_repositories(args.keywords, args.use_or)
classify_results(results)

if __name__ == "__main__":
main()
97 changes: 97 additions & 0 deletions test_github_investigator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
import unittest
from unittest.mock import patch, MagicMock
import json
import io
import sys
import github_investigator

class TestGithubInvestigator(unittest.TestCase):

def setUp(self):
# Sample response data
self.sample_data = {
"items": [
{
"language": "Python",
"stargazers_count": 50,
"topics": ["python", "search"]
},
{
"language": "JavaScript",
"stargazers_count": 500,
"topics": ["javascript", "web"]
},
{
"language": "Python",
"stargazers_count": 2000,
"topics": ["python", "data-science"]
},
{
"language": None,
"stargazers_count": 15000,
"topics": []
}
]
}

@patch('urllib.request.urlopen')
def test_search_repositories_and(self, mock_urlopen):
# Mock response
mock_response = MagicMock()
mock_response.read.return_value = json.dumps(self.sample_data).encode('utf-8')
mock_response.__enter__.return_value = mock_response
mock_urlopen.return_value = mock_response

# Call function
results = github_investigator.search_repositories(['test', 'query'], use_or=False)

# Verify URL construction
args, _ = mock_urlopen.call_args
req = args[0]
self.assertIn('q=test+query', req.full_url)
self.assertEqual(results, self.sample_data['items'])

@patch('urllib.request.urlopen')
def test_search_repositories_or(self, mock_urlopen):
# Mock response
mock_response = MagicMock()
mock_response.read.return_value = json.dumps(self.sample_data).encode('utf-8')
mock_response.__enter__.return_value = mock_response
mock_urlopen.return_value = mock_response

# Call function
results = github_investigator.search_repositories(['test', 'query'], use_or=True)

# Verify URL construction for OR
args, _ = mock_urlopen.call_args
req = args[0]
self.assertIn('q=test+OR+query', req.full_url)
self.assertEqual(results, self.sample_data['items'])

def test_classify_results(self):
# Capture stdout
captured_output = io.StringIO()
sys.stdout = captured_output

github_investigator.classify_results(self.sample_data['items'])

sys.stdout = sys.__stdout__
output = captured_output.getvalue()

# Check language classification
self.assertIn("Python: 2", output)
self.assertIn("JavaScript: 1", output)
self.assertIn("Unknown: 1", output)

# Check stars classification
self.assertIn("< 100: 1", output) # 50
self.assertIn("100 - 1000: 1", output) # 500
self.assertIn("1000 - 10000: 1", output) # 2000
self.assertIn("> 10000: 1", output) # 15000

# Check topics
self.assertIn("python: 2", output)
self.assertIn("web: 1", output)

if __name__ == '__main__':
unittest.main()