diff --git a/.bandit b/.bandit new file mode 100644 index 0000000..6465be2 --- /dev/null +++ b/.bandit @@ -0,0 +1,102 @@ +# Bandit configuration for tf-shell +# This file configures Bandit security scanner for Python code + +[bandit] +# Directories to exclude from scanning +exclude_dirs = [ + "*/test/*", + "*/tests/*", + "bazel-*", + ".git", + "__pycache__", + "*.venv", + "venv", + "env" +] + +# Files to skip +skips = [ + # Skip test files that may contain intentionally insecure code for testing + "*/test_*.py", + "*/*_test.py" +] + +# Security tests to run (all by default, but can be customized) +tests = [ + "B101", # assert_used + "B102", # exec_used + "B103", # set_bad_file_permissions + "B104", # hardcoded_bind_all_interfaces + "B105", # hardcoded_password_string + "B106", # hardcoded_password_funcarg + "B107", # hardcoded_password_default + "B108", # hardcoded_tmp_directory + "B110", # try_except_pass + "B112", # try_except_continue + "B201", # flask_debug_true + "B301", # pickle + "B302", # marshal + "B303", # md5 + "B304", # des + "B305", # cipher + "B306", # mktemp_q + "B307", # eval + "B308", # mark_safe + "B309", # httpsconnection + "B310", # urllib_urlopen + "B311", # random + "B312", # telnetlib + "B313", # xml_bad_cElementTree + "B314", # xml_bad_ElementTree + "B315", # xml_bad_expatreader + "B316", # xml_bad_expatbuilder + "B317", # xml_bad_sax + "B318", # xml_bad_minidom + "B319", # xml_bad_pulldom + "B320", # xml_bad_etree + "B321", # ftplib + "B322", # input + "B323", # unverified_context + "B324", # hashlib_new_insecure_functions + "B325", # tempnam + "B401", # import_telnetlib + "B402", # import_ftplib + "B403", # import_pickle + "B404", # import_subprocess + "B405", # import_xml_etree + "B406", # import_xml_sax + "B407", # import_xml_expat + "B408", # import_xml_minidom + "B409", # import_xml_pulldom + "B410", # import_lxml + "B411", # import_xmlrpclib + "B412", # import_httpoxy + "B413", # import_pycrypto + "B501", # request_with_no_cert_validation + "B502", # ssl_with_bad_version + "B503", # ssl_with_bad_defaults + "B504", # ssl_with_no_version + "B505", # weak_cryptographic_key + "B506", # yaml_load + "B507", # ssh_no_host_key_verification + "B601", # paramiko_calls + "B602", # subprocess_popen_with_shell_equals_true + "B603", # subprocess_without_shell_equals_false + "B604", # any_other_function_with_shell_equals_true + "B605", # start_process_with_a_shell + "B606", # start_process_with_no_shell + "B607", # start_process_with_partial_path + "B608", # hardcoded_sql_expressions + "B609", # linux_commands_wildcard_injection + "B610", # django_extra_used + "B611", # django_rawsql_used + "B701", # jinja2_autoescape_false + "B702", # use_of_mako_templates + "B703" # django_mark_safe +] + +# Confidence levels to report (LOW, MEDIUM, HIGH) +confidence = ["HIGH", "MEDIUM"] + +# Severity levels to report (LOW, MEDIUM, HIGH) +severity = ["MEDIUM", "HIGH"] diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..033244d --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,71 @@ +# Dependabot configuration for tf-shell +# This file configures Dependabot to automatically create pull requests +# for dependency updates, including security patches. + +version: 2 +updates: + # Monitor Python dependencies + - package-ecosystem: "pip" + directory: "/" + schedule: + interval: "weekly" + day: "monday" + time: "09:00" + # Allow both direct and indirect dependency updates + allow: + - dependency-type: "direct" + - dependency-type: "indirect" + # Automatically merge security updates + open-pull-requests-limit: 10 + # Group related updates together + groups: + tensorflow: + patterns: + - "tensorflow*" + security-updates: + patterns: + - "*" + update-types: + - "security" + # Custom commit message + commit-message: + prefix: "deps" + prefix-development: "deps-dev" + include: "scope" + # Reviewers for dependency updates + reviewers: + - "google/tf-shell-maintainers" + # Labels to apply to PRs + labels: + - "dependencies" + - "security" + + # Monitor GitHub Actions + - package-ecosystem: "github-actions" + directory: "/.github/workflows" + schedule: + interval: "weekly" + day: "monday" + time: "09:00" + open-pull-requests-limit: 5 + commit-message: + prefix: "ci" + include: "scope" + labels: + - "github-actions" + - "ci" + + # Monitor Bazel dependencies (if applicable) + - package-ecosystem: "docker" + directory: "/.devcontainer" + schedule: + interval: "weekly" + day: "monday" + time: "09:00" + open-pull-requests-limit: 3 + commit-message: + prefix: "docker" + include: "scope" + labels: + - "docker" + - "devcontainer" diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml new file mode 100644 index 0000000..7854a2e --- /dev/null +++ b/.github/workflows/security.yml @@ -0,0 +1,201 @@ +name: Security Vulnerability Scanning + +on: + # Run on every push to main and pull requests + push: + branches: [ main ] + pull_request: + branches: [ main ] + # Run weekly security scans + schedule: + - cron: '0 6 * * 1' # Every Monday at 6 AM UTC + # Allow manual trigger + workflow_dispatch: + +permissions: + # Required for security scanning + contents: read + security-events: write + actions: read + +jobs: + # Python dependency vulnerability scanning + python-security-scan: + name: Python Security Scan + runs-on: ubuntu-22.04 + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.10' + + - name: Install security scanning tools + run: | + python -m pip install --upgrade pip + pip install safety bandit semgrep + + - name: Run Safety (Python dependency vulnerability scanner) + run: | + # Scan requirements files for known vulnerabilities + for req_file in requirements*.txt; do + if [ -f "$req_file" ]; then + echo "Scanning $req_file..." + safety check -r "$req_file" --json --output safety-report-$(basename $req_file .txt).json || true + fi + done + continue-on-error: true + + - name: Run Bandit (Python code security scanner) + run: | + # Scan Python code for security issues + bandit -r tf_shell/ tf_shell_ml/ -f json -o bandit-report.json || true + bandit -r tf_shell/ tf_shell_ml/ -f txt -o bandit-report.txt || true + continue-on-error: true + + - name: Run Semgrep (Static analysis security scanner) + run: | + # Run Semgrep with security rules + semgrep --config=auto --json --output=semgrep-report.json tf_shell/ tf_shell_ml/ || true + continue-on-error: true + + - name: Upload security scan results + uses: actions/upload-artifact@v4 + if: always() + with: + name: python-security-reports + path: | + *-report.json + *-report.txt + retention-days: 30 + + # CodeQL analysis for comprehensive code scanning + codeql-analysis: + name: CodeQL Analysis + runs-on: ubuntu-22.04 + permissions: + actions: read + contents: read + security-events: write + strategy: + fail-fast: false + matrix: + language: [ 'python', 'cpp' ] + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Initialize CodeQL + uses: github/codeql-action/init@v3 + with: + languages: ${{ matrix.language }} + # Use default queries plus security-extended for more comprehensive scanning + queries: security-extended,security-and-quality + + - name: Setup Bazel for C++ analysis + if: matrix.language == 'cpp' + run: | + # Install bazelisk for building C++ components + curl -LO "https://github.com/bazelbuild/bazelisk/releases/latest/download/bazelisk-linux-amd64" + chmod +x bazelisk-linux-amd64 + sudo mv bazelisk-linux-amd64 /usr/local/bin/bazelisk + + - name: Autobuild + uses: github/codeql-action/autobuild@v3 + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v3 + with: + category: "/language:${{matrix.language}}" + + # Dependency review for pull requests + dependency-review: + name: Dependency Review + runs-on: ubuntu-22.04 + if: github.event_name == 'pull_request' + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Dependency Review + uses: actions/dependency-review-action@v4 + with: + # Fail the build if high or critical vulnerabilities are found + fail-on-severity: high + # Allow licenses commonly used in ML/crypto projects + allow-licenses: Apache-2.0, MIT, BSD-2-Clause, BSD-3-Clause + + # Secret scanning (for any accidentally committed secrets) + secret-scan: + name: Secret Scanning + runs-on: ubuntu-22.04 + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + # Fetch full history for secret scanning + fetch-depth: 0 + + - name: Run TruffleHog (Secret scanner) + uses: trufflesecurity/trufflehog@main + with: + path: ./ + base: main + head: HEAD + extra_args: --debug --only-verified + + # Security summary and reporting + security-summary: + name: Security Summary + runs-on: ubuntu-22.04 + needs: [python-security-scan, codeql-analysis, secret-scan] + if: always() + steps: + - name: Download security reports + uses: actions/download-artifact@v4 + with: + name: python-security-reports + path: ./security-reports/ + continue-on-error: true + + - name: Generate Security Summary + run: | + echo "# Security Scan Summary" > security-summary.md + echo "" >> security-summary.md + echo "## Scan Results" >> security-summary.md + echo "" >> security-summary.md + + # Check if reports exist and summarize + if [ -f "./security-reports/bandit-report.json" ]; then + echo "### Bandit (Python Code Security)" >> security-summary.md + echo "- Report generated successfully" >> security-summary.md + fi + + if [ -f "./security-reports/safety-report-requirements_3_10.json" ]; then + echo "### Safety (Dependency Vulnerabilities)" >> security-summary.md + echo "- Dependency scan completed" >> security-summary.md + fi + + echo "" >> security-summary.md + echo "## Recommendations" >> security-summary.md + echo "- Review all security findings before merging" >> security-summary.md + echo "- Update dependencies with known vulnerabilities" >> security-summary.md + echo "- Follow security best practices outlined in SECURITY.md" >> security-summary.md + + - name: Comment PR with security summary + if: github.event_name == 'pull_request' + uses: actions/github-script@v7 + with: + script: | + const fs = require('fs'); + if (fs.existsSync('security-summary.md')) { + const summary = fs.readFileSync('security-summary.md', 'utf8'); + github.rest.issues.createComment({ + issue_number: context.issue.number, + owner: context.repo.owner, + repo: context.repo.repo, + body: summary + }); + } diff --git a/.gitignore b/.gitignore index 806735a..6d2ad01 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,12 @@ bazel-testlogs __pycache__ **.venv *.whl -MODULE.bazel.lock \ No newline at end of file +MODULE.bazel.lock + +# Security scan reports (contain sensitive information) +security-reports/ +*.security-report.* +bandit-report.* +safety-report.* +semgrep-report.* +pip-audit-report.* \ No newline at end of file diff --git a/README.md b/README.md index 11e3c3a..2a5a0f4 100644 --- a/README.md +++ b/README.md @@ -112,6 +112,53 @@ this case, `c++filt` will help to decode the mangled symbol name and `nm --defined-only .../libtensorflow_framework.so | grep ...` may help find what the symbol changed to, and which dependency is causing the error. +## Security + +### Security Vulnerability Scanning + +tf-shell includes comprehensive security scanning tools to identify potential vulnerabilities in code and dependencies. + +#### Running Security Scans Locally + +Install security scanning tools: + +```bash +pip install -r requirements-security.txt +``` + +Run all security scans: + +```bash +python tools/security_scan.py +``` + +Or using Bazel: + +```bash +bazel run //tools:security_scanner +``` + +#### Available Security Tools + +- **Safety**: Scans Python dependencies for known vulnerabilities +- **Bandit**: Analyzes Python code for security issues +- **Semgrep**: Static analysis security scanner +- **pip-audit**: Audits installed packages for vulnerabilities +- **CodeQL**: GitHub's semantic code analysis (runs in CI) +- **Dependabot**: Automated dependency updates + +#### Security Reports + +Security scan reports are saved to the `security-reports/` directory and include: +- Detailed vulnerability findings +- Severity assessments +- Remediation recommendations +- Summary reports in JSON format + +#### Reporting Security Issues + +Please see our [Security Policy](SECURITY.md) for information on how to report security vulnerabilities responsibly. + ## Contributing See [`CONTRIBUTING.md`](CONTRIBUTING.md) for details. diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000..4efbb81 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,81 @@ +# Security Policy + +## Supported Versions + +We actively support the following versions of tf-shell with security updates: + +| Version | Supported | +| ------- | ------------------ | +| 0.3.x | :white_check_mark: | +| < 0.3 | :x: | + +## Reporting a Vulnerability + +We take security vulnerabilities seriously. If you discover a security vulnerability in tf-shell, please report it responsibly. + +### How to Report + +**Please do NOT report security vulnerabilities through public GitHub issues.** + +Instead, please report security vulnerabilities to: +- **Email**: security@google.com +- **Subject**: [tf-shell] Security Vulnerability Report + +### What to Include + +When reporting a vulnerability, please include: + +1. **Description**: A clear description of the vulnerability +2. **Impact**: Potential impact and attack scenarios +3. **Reproduction**: Step-by-step instructions to reproduce the issue +4. **Environment**: + - tf-shell version + - Python version + - Operating system + - TensorFlow version +5. **Proof of Concept**: If applicable, include a minimal proof of concept +6. **Suggested Fix**: If you have ideas for fixing the vulnerability + +### Response Timeline + +- **Initial Response**: Within 48 hours of report +- **Status Update**: Within 7 days with preliminary assessment +- **Resolution**: Security patches will be released as soon as possible after verification + +### Security Best Practices + +When using tf-shell in production: + +1. **Keep Dependencies Updated**: Regularly update tf-shell and its dependencies +2. **Secure Key Management**: + - Never hardcode encryption keys in source code + - Use secure key storage mechanisms + - Rotate keys regularly +3. **Input Validation**: Always validate inputs before encryption/decryption +4. **Secure Communication**: Use TLS for network communication +5. **Access Control**: Implement proper access controls for encrypted data +6. **Audit Logging**: Log security-relevant events for monitoring + +### Vulnerability Disclosure Policy + +1. We will acknowledge receipt of vulnerability reports within 48 hours +2. We will provide regular updates on the progress of fixing the vulnerability +3. We will publicly disclose vulnerabilities after they have been fixed +4. We may coordinate disclosure with other affected parties if necessary +5. We will credit reporters in our security advisories (unless they prefer to remain anonymous) + +### Security Contacts + +For security-related questions or concerns: +- **Security Team**: security@google.com +- **Project Maintainers**: See [CONTRIBUTORS.md](CONTRIBUTORS.md) + +### Hall of Fame + +We recognize and thank security researchers who responsibly disclose vulnerabilities: + + + +--- + +This security policy is based on industry best practices and may be updated as needed. diff --git a/docs/security-testing-guide.md b/docs/security-testing-guide.md new file mode 100644 index 0000000..98c33ae --- /dev/null +++ b/docs/security-testing-guide.md @@ -0,0 +1,337 @@ +# Security Scanning System Testing Guide + +This document provides a comprehensive testing plan for the security vulnerability scanning system implemented in tf-shell. + +## Overview + +The security scanning system includes multiple tools and processes: +- **Safety**: Python dependency vulnerability scanning +- **Bandit**: Python code security analysis +- **Semgrep**: Static analysis security scanning +- **pip-audit**: Package vulnerability auditing +- **CodeQL**: GitHub's semantic code analysis +- **Dependabot**: Automated dependency updates +- **TruffleHog**: Secret scanning + +## Testing Prerequisites + +### 1. Install Security Tools + +```bash +# Install security scanning dependencies +pip install -r requirements-security.txt + +# Verify installations +safety --version +bandit --version +semgrep --version +pip-audit --version +``` + +### 2. Set Up Test Environment + +```bash +# Create a test branch +git checkout -b test/security-scanning + +# Ensure clean working directory +git status +``` + +## Manual Testing Procedures + +### Test 1: Local Security Scan Execution + +**Objective**: Verify that the security scanning script runs successfully. + +**Steps**: +1. Run the security scanner: + ```bash + python tools/security_scan.py --output-dir test-security-reports + ``` + +2. Verify output directory creation: + ```bash + ls -la test-security-reports/ + ``` + +3. Check for expected report files: + - `security_summary_*.json` + - `bandit_*.json` + - `safety_*.json` + - `semgrep_*.json` + - `pip_audit_*.json` + +**Expected Results**: +- Script completes without errors +- All report files are generated +- Summary report contains scan statistics + +### Test 2: Bazel Integration Test + +**Objective**: Verify Bazel can execute the security scanner. + +**Steps**: +1. Run via Bazel: + ```bash + bazel run //tools:security_scanner -- --output-dir bazel-security-reports + ``` + +2. Verify execution and output generation + +**Expected Results**: +- Bazel successfully builds and runs the security scanner +- Reports are generated in the specified directory + +### Test 3: Individual Tool Testing + +**Objective**: Test each security tool independently. + +#### Test 3a: Safety (Dependency Scanning) +```bash +# Test Safety on requirements files +safety check -r requirements_3_10.txt --json +``` + +#### Test 3b: Bandit (Code Security) +```bash +# Test Bandit on Python code +bandit -r tf_shell/ -f json +``` + +#### Test 3c: Semgrep (Static Analysis) +```bash +# Test Semgrep security rules +semgrep --config=auto tf_shell/ tf_shell_ml/ +``` + +#### Test 3d: pip-audit (Package Audit) +```bash +# Test pip-audit +pip-audit --format=json +``` + +### Test 4: Configuration File Testing + +**Objective**: Verify configuration files work correctly. + +**Steps**: +1. Test Bandit configuration: + ```bash + bandit -r tf_shell/ -c .bandit + ``` + +2. Verify exclusions and test selections work as expected + +**Expected Results**: +- Test files are excluded from scanning +- Only specified security tests are executed + +### Test 5: Error Handling Testing + +**Objective**: Test system behavior with missing tools or invalid inputs. + +**Steps**: +1. Test with missing tool: + ```bash + # Temporarily rename a tool + mv $(which safety) $(which safety).bak + python tools/security_scan.py + mv $(which safety).bak $(which safety) + ``` + +2. Test with invalid directory: + ```bash + python tools/security_scan.py --output-dir /invalid/path + ``` + +**Expected Results**: +- Graceful error handling +- Informative error messages +- Non-zero exit codes for failures + +## GitHub Actions Testing + +### Test 6: CI/CD Pipeline Testing + +**Objective**: Verify the security workflow runs correctly in GitHub Actions. + +**Steps**: +1. Create a test PR with security scanning changes +2. Monitor the security workflow execution +3. Verify all jobs complete successfully +4. Check for security reports in workflow artifacts + +**Expected Results**: +- All security scanning jobs pass +- Reports are uploaded as artifacts +- PR comments include security summary (if applicable) + +### Test 7: Dependabot Testing + +**Objective**: Verify Dependabot configuration works correctly. + +**Steps**: +1. Wait for Dependabot to create dependency update PRs +2. Verify PRs have correct labels and formatting +3. Check that security updates are prioritized + +**Expected Results**: +- Dependabot creates weekly update PRs +- Security updates are properly labeled +- PRs follow configured commit message format + +## Security Test Cases + +### Test 8: Vulnerability Detection Testing + +**Objective**: Verify the system can detect known vulnerabilities. + +**Steps**: +1. Temporarily add a package with known vulnerabilities to requirements +2. Run security scans +3. Verify vulnerabilities are detected and reported + +**Example vulnerable packages for testing**: +``` +# Add to requirements-security.txt temporarily for testing +django==1.11.0 # Known to have security vulnerabilities +requests==2.6.0 # Older version with known issues +``` + +**Expected Results**: +- Safety detects vulnerable dependencies +- Reports include vulnerability details and remediation advice + +### Test 9: Code Security Issue Detection + +**Objective**: Test detection of insecure code patterns. + +**Steps**: +1. Create a test file with intentionally insecure code: + ```python + # test_insecure.py + import os + password = "hardcoded_password" # Should trigger B105 + os.system("rm -rf /") # Should trigger B605 + ``` + +2. Run Bandit scan +3. Verify issues are detected + +**Expected Results**: +- Bandit detects hardcoded passwords +- Bandit detects dangerous system calls +- Issues are properly categorized by severity + +## Performance Testing + +### Test 10: Scan Performance Testing + +**Objective**: Measure scanning performance and resource usage. + +**Steps**: +1. Time the security scanning process: + ```bash + time python tools/security_scan.py + ``` + +2. Monitor resource usage during scans +3. Test with different project sizes + +**Expected Results**: +- Scans complete within reasonable time limits (< 10 minutes) +- Memory usage remains within acceptable bounds +- Performance is consistent across runs + +## Regression Testing + +### Test 11: Baseline Security Report + +**Objective**: Establish baseline security posture for regression testing. + +**Steps**: +1. Run complete security scan on clean main branch +2. Save reports as baseline +3. Compare future scans against baseline + +**Expected Results**: +- Baseline reports show current security status +- New vulnerabilities are detected in subsequent scans +- False positives are minimized + +## Test Automation + +### Test 12: Automated Test Suite + +Create automated tests for the security scanning system: + +```python +# tests/test_security_scanner.py +import unittest +import subprocess +import json +from pathlib import Path + +class TestSecurityScanner(unittest.TestCase): + def test_security_scanner_execution(self): + """Test that security scanner runs without errors.""" + result = subprocess.run([ + "python", "tools/security_scan.py", + "--output-dir", "test-reports" + ], capture_output=True, text=True) + + self.assertEqual(result.returncode, 0) + self.assertTrue(Path("test-reports").exists()) + + def test_report_generation(self): + """Test that all expected reports are generated.""" + # Implementation details... + pass +``` + +## Troubleshooting Guide + +### Common Issues and Solutions + +1. **Tool Installation Failures**: + - Ensure Python version compatibility + - Check for system dependencies + - Use virtual environments + +2. **Permission Errors**: + - Verify write permissions for output directory + - Check file system permissions + +3. **Network Issues**: + - Verify internet connectivity for vulnerability databases + - Check proxy settings if applicable + +4. **False Positives**: + - Review and update tool configurations + - Add appropriate exclusions + - Document accepted risks + +## Reporting and Documentation + +### Test Results Documentation + +For each test run, document: +- Test environment details +- Tool versions used +- Test execution results +- Any issues encountered +- Remediation actions taken + +### Security Metrics Tracking + +Track the following metrics over time: +- Number of vulnerabilities detected +- Time to remediation +- False positive rates +- Scan execution time +- Tool effectiveness + +## Conclusion + +This testing guide ensures the security scanning system is robust, reliable, and effective at detecting vulnerabilities in the tf-shell project. Regular execution of these tests will maintain the security posture and catch regressions early. diff --git a/requirements-security.txt b/requirements-security.txt new file mode 100644 index 0000000..84958c2 --- /dev/null +++ b/requirements-security.txt @@ -0,0 +1,25 @@ +# Security scanning tools for tf-shell development +# Install with: pip install -r requirements-security.txt + +# Python code security scanner +bandit[toml]==1.7.5 + +# Python dependency vulnerability scanner +safety==3.0.1 + +# Static analysis security scanner +semgrep==1.45.0 + +# Additional security tools +pip-audit==2.6.1 # Audit Python packages for known vulnerabilities +cyclonedx-bom==4.0.4 # Generate Software Bill of Materials (SBOM) + +# Code quality tools that help with security +pylint==3.0.2 # Code analysis for potential issues +mypy==1.7.1 # Static type checking +flake8==6.1.0 # Style guide enforcement +flake8-bandit==4.1.1 # Bandit integration for flake8 + +# Documentation and reporting +jinja2==3.1.2 # For generating security reports +markdown==3.5.1 # For markdown report generation diff --git a/tools/BUILD b/tools/BUILD index 6674417..9c47f97 100644 --- a/tools/BUILD +++ b/tools/BUILD @@ -1,6 +1,22 @@ +load("@pip//:requirements.bzl", "requirement") +load("@rules_python//python:defs.bzl", "py_binary") + exports_files([ "python_formatter.py", "clang_formatter.py", "wheel_rename.py", + "security_scan.py", "0001-Posix-std.patch", ]) + +py_binary( + name = "security_scanner", + srcs = ["security_scan.py"], + python_version = "PY3", + srcs_version = "PY3", + visibility = [ + "//:__pkg__", + ], + # Note: Security scanning tools need to be installed separately + # pip install -r requirements-security.txt +) diff --git a/tools/security_scan.py b/tools/security_scan.py new file mode 100644 index 0000000..20ed984 --- /dev/null +++ b/tools/security_scan.py @@ -0,0 +1,305 @@ +#!/usr/bin/env python3 +""" +Security vulnerability scanning script for tf-shell project. + +This script runs various security scans and generates a comprehensive report. +It can be used by developers locally or in CI/CD pipelines. + +Usage: + python tools/security_scan.py [--output-dir OUTPUT_DIR] [--format FORMAT] + +Requirements: + pip install -r requirements-security.txt +""" + +import argparse +import json +import os +import subprocess +import sys +from datetime import datetime +from pathlib import Path +from typing import Dict, List, Optional, Tuple + + +class SecurityScanner: + """Main security scanner class that orchestrates different security tools.""" + + def __init__(self, output_dir: str = "security-reports", format_type: str = "json"): + """ + Initialize the security scanner. + + Args: + output_dir: Directory to store security reports + format_type: Output format (json, txt, html) + """ + self.output_dir = Path(output_dir) + self.format_type = format_type + self.output_dir.mkdir(exist_ok=True) + self.timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + + def run_command(self, command: List[str], description: str) -> Tuple[bool, str, str]: + """ + Run a shell command and return the result. + + Args: + command: Command to run as list of strings + description: Description of the command for logging + + Returns: + Tuple of (success, stdout, stderr) + """ + print(f"Running {description}...") + try: + result = subprocess.run( + command, + capture_output=True, + text=True, + timeout=300 # 5 minute timeout + ) + return result.returncode == 0, result.stdout, result.stderr + except subprocess.TimeoutExpired: + return False, "", f"Command timed out: {' '.join(command)}" + except Exception as e: + return False, "", f"Error running command: {e}" + + def scan_python_dependencies(self) -> Dict: + """Scan Python dependencies for known vulnerabilities using Safety.""" + print("\n=== Python Dependency Vulnerability Scan ===") + results = {"tool": "safety", "findings": [], "errors": []} + + # Find all requirements files + req_files = list(Path(".").glob("requirements*.txt")) + + for req_file in req_files: + print(f"Scanning {req_file}...") + + # Run safety check + command = ["safety", "check", "-r", str(req_file), "--json"] + success, stdout, stderr = self.run_command(command, f"Safety scan for {req_file}") + + output_file = self.output_dir / f"safety_{req_file.stem}_{self.timestamp}.json" + + if success and stdout: + try: + safety_data = json.loads(stdout) + results["findings"].extend(safety_data) + + # Save detailed report + with open(output_file, 'w') as f: + json.dump(safety_data, f, indent=2) + + except json.JSONDecodeError: + results["errors"].append(f"Failed to parse Safety output for {req_file}") + else: + results["errors"].append(f"Safety scan failed for {req_file}: {stderr}") + + return results + + def scan_python_code(self) -> Dict: + """Scan Python code for security issues using Bandit.""" + print("\n=== Python Code Security Scan ===") + results = {"tool": "bandit", "findings": [], "errors": []} + + # Directories to scan + scan_dirs = ["tf_shell", "tf_shell_ml", "tools"] + existing_dirs = [d for d in scan_dirs if Path(d).exists()] + + if not existing_dirs: + results["errors"].append("No Python directories found to scan") + return results + + # Run Bandit scan + command = [ + "bandit", "-r", *existing_dirs, + "-f", "json", + "-o", str(self.output_dir / f"bandit_{self.timestamp}.json") + ] + + success, stdout, stderr = self.run_command(command, "Bandit security scan") + + if success: + try: + # Read the output file + with open(self.output_dir / f"bandit_{self.timestamp}.json", 'r') as f: + bandit_data = json.load(f) + results["findings"] = bandit_data.get("results", []) + except (FileNotFoundError, json.JSONDecodeError) as e: + results["errors"].append(f"Failed to read Bandit output: {e}") + else: + results["errors"].append(f"Bandit scan failed: {stderr}") + + return results + + def scan_with_semgrep(self) -> Dict: + """Run Semgrep static analysis security scanner.""" + print("\n=== Semgrep Static Analysis Scan ===") + results = {"tool": "semgrep", "findings": [], "errors": []} + + # Run Semgrep with security rules + command = [ + "semgrep", "--config=auto", "--json", + "--output", str(self.output_dir / f"semgrep_{self.timestamp}.json"), + "tf_shell/", "tf_shell_ml/" + ] + + success, stdout, stderr = self.run_command(command, "Semgrep security scan") + + if success: + try: + with open(self.output_dir / f"semgrep_{self.timestamp}.json", 'r') as f: + semgrep_data = json.load(f) + results["findings"] = semgrep_data.get("results", []) + except (FileNotFoundError, json.JSONDecodeError) as e: + results["errors"].append(f"Failed to read Semgrep output: {e}") + else: + results["errors"].append(f"Semgrep scan failed: {stderr}") + + return results + + def audit_pip_packages(self) -> Dict: + """Audit installed pip packages for vulnerabilities.""" + print("\n=== Pip Package Audit ===") + results = {"tool": "pip-audit", "findings": [], "errors": []} + + command = ["pip-audit", "--format=json", "--output", + str(self.output_dir / f"pip_audit_{self.timestamp}.json")] + + success, stdout, stderr = self.run_command(command, "Pip package audit") + + if success: + try: + with open(self.output_dir / f"pip_audit_{self.timestamp}.json", 'r') as f: + audit_data = json.load(f) + results["findings"] = audit_data.get("vulnerabilities", []) + except (FileNotFoundError, json.JSONDecodeError) as e: + results["errors"].append(f"Failed to read pip-audit output: {e}") + else: + results["errors"].append(f"Pip audit failed: {stderr}") + + return results + + def generate_summary_report(self, scan_results: List[Dict]) -> None: + """Generate a summary report of all security scans.""" + print("\n=== Generating Security Summary Report ===") + + summary = { + "scan_timestamp": datetime.now().isoformat(), + "project": "tf-shell", + "total_scans": len(scan_results), + "scans": scan_results, + "summary": { + "total_findings": 0, + "high_severity": 0, + "medium_severity": 0, + "low_severity": 0, + "errors": 0 + } + } + + # Count findings and errors + for scan in scan_results: + summary["summary"]["total_findings"] += len(scan.get("findings", [])) + summary["summary"]["errors"] += len(scan.get("errors", [])) + + # Count severity levels (implementation depends on tool output format) + for finding in scan.get("findings", []): + severity = finding.get("severity", "").lower() + if "high" in severity: + summary["summary"]["high_severity"] += 1 + elif "medium" in severity: + summary["summary"]["medium_severity"] += 1 + else: + summary["summary"]["low_severity"] += 1 + + # Save summary report + summary_file = self.output_dir / f"security_summary_{self.timestamp}.json" + with open(summary_file, 'w') as f: + json.dump(summary, f, indent=2) + + # Print summary to console + print(f"\n=== Security Scan Summary ===") + print(f"Total scans completed: {summary['total_scans']}") + print(f"Total findings: {summary['summary']['total_findings']}") + print(f"High severity: {summary['summary']['high_severity']}") + print(f"Medium severity: {summary['summary']['medium_severity']}") + print(f"Low severity: {summary['summary']['low_severity']}") + print(f"Errors: {summary['summary']['errors']}") + print(f"\nDetailed reports saved to: {self.output_dir}") + + # Return non-zero exit code if high severity issues found + if summary['summary']['high_severity'] > 0: + print("\n⚠️ HIGH SEVERITY VULNERABILITIES FOUND!") + print("Please review and address these issues before proceeding.") + return 1 + elif summary['summary']['medium_severity'] > 0: + print("\n⚠️ Medium severity vulnerabilities found.") + print("Consider addressing these issues.") + + return 0 + + def run_all_scans(self) -> int: + """Run all security scans and generate reports.""" + print("Starting comprehensive security scan for tf-shell...") + print(f"Reports will be saved to: {self.output_dir}") + + scan_results = [] + + # Run all scans + try: + scan_results.append(self.scan_python_dependencies()) + scan_results.append(self.scan_python_code()) + scan_results.append(self.scan_with_semgrep()) + scan_results.append(self.audit_pip_packages()) + except KeyboardInterrupt: + print("\nScan interrupted by user.") + return 1 + except Exception as e: + print(f"Unexpected error during scanning: {e}") + return 1 + + # Generate summary report + return self.generate_summary_report(scan_results) + + +def main(): + """Main entry point for the security scanner.""" + parser = argparse.ArgumentParser( + description="Run security vulnerability scans for tf-shell project" + ) + parser.add_argument( + "--output-dir", + default="security-reports", + help="Directory to store security reports (default: security-reports)" + ) + parser.add_argument( + "--format", + choices=["json", "txt", "html"], + default="json", + help="Output format for reports (default: json)" + ) + + args = parser.parse_args() + + # Check if required tools are installed + required_tools = ["safety", "bandit", "semgrep", "pip-audit"] + missing_tools = [] + + for tool in required_tools: + try: + subprocess.run([tool, "--version"], capture_output=True, check=True) + except (subprocess.CalledProcessError, FileNotFoundError): + missing_tools.append(tool) + + if missing_tools: + print(f"Error: Missing required security tools: {', '.join(missing_tools)}") + print("Please install them with: pip install -r requirements-security.txt") + return 1 + + # Run security scans + scanner = SecurityScanner(args.output_dir, args.format) + return scanner.run_all_scans() + + +if __name__ == "__main__": + sys.exit(main())