diff --git a/.github/workflows/test-database-processing.yml b/.github/workflows/test-database-processing.yml new file mode 100644 index 0000000..3f6b609 --- /dev/null +++ b/.github/workflows/test-database-processing.yml @@ -0,0 +1,87 @@ +name: Test Database Processing + +on: + push: + pull_request: + workflow_dispatch: + +jobs: + test-database-processing: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + lfs: true + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.x' + + - name: Install dependencies (7z) + run: | + sudo apt-get update + sudo apt-get install -y p7zip-full unzip + + - name: Install SQLite from official release + run: | + # Ubuntu's SQLite 3.45.1 has a bug causing segfaults with complex views + # Download official precompiled SQLite 3.50.4 instead + SQLITE_VERSION=3500400 + SQLITE_YEAR=2025 + wget https://www.sqlite.org/${SQLITE_YEAR}/sqlite-tools-linux-x64-${SQLITE_VERSION}.zip + unzip sqlite-tools-linux-x64-${SQLITE_VERSION}.zip + ls -la + sudo cp sqlite3 /usr/local/bin/ + sudo chmod +x /usr/local/bin/sqlite3 + # Make sure /usr/local/bin is in PATH first + echo "/usr/local/bin" >> $GITHUB_PATH + + - name: Verify installations and SQLite versions + run: | + echo "=== 7z version ===" + 7z --help | head -5 + echo "" + echo "=== System sqlite3 version ===" + /usr/local/bin/sqlite3 --version + echo "" + echo "=== Python sqlite3 module version ===" + python -c "import sqlite3; print('Python sqlite3 module uses SQLite version:', sqlite3.sqlite_version)" + + - name: Extract latest.7z + run: | + echo "Extracting latest.7z..." + 7z x latest.7z + echo "Extraction complete." + ls -lh latest.db + + - name: Run add_primary_keys.py + run: | + echo "Running add_primary_keys.py..." + python scripts/add_primary_keys.py --db latest.db + echo "Primary keys added successfully." + + - name: Run create_views.sh + run: | + echo "Running create_views.sh..." + chmod +x scripts/create_views.sh + scripts/create_views.sh latest.db + echo "Views created successfully." + + - name: Verify database integrity + run: | + echo "Checking database integrity..." + sqlite3 latest.db "PRAGMA integrity_check;" + echo "Verifying views exist..." + sqlite3 latest.db "SELECT name FROM sqlite_master WHERE type='view' ORDER BY name;" + echo "All checks passed!" + + - name: Upload database for debugging + if: always() + uses: actions/upload-artifact@v4 + with: + name: debug-db + path: latest.db + retention-days: 7 diff --git a/scripts/create_views.sh b/scripts/create_views.sh index 8fcd223..9c70a25 100755 --- a/scripts/create_views.sh +++ b/scripts/create_views.sh @@ -995,23 +995,41 @@ echo "Finished view View_StatusData." echo "Created CBDB views in '$DB_PATH'." echo "Running sanity counts on views..." -sqlite3 "$DB_PATH" <<'SQL' -SELECT 'View_AltnameData' AS view_name, COUNT(*) AS row_count FROM View_AltnameData; -SELECT 'View_Association' AS view_name, COUNT(*) AS row_count FROM View_Association; -SELECT 'View_BiogAddrData' AS view_name, COUNT(*) AS row_count FROM View_BiogAddrData; -SELECT 'View_BiogInstAddrData' AS view_name, COUNT(*) AS row_count FROM View_BiogInstAddrData; -SELECT 'View_BiogInstData' AS view_name, COUNT(*) AS row_count FROM View_BiogInstData; -SELECT 'View_BiogSourceData' AS view_name, COUNT(*) AS row_count FROM View_BiogSourceData; -SELECT 'View_BiogTextData' AS view_name, COUNT(*) AS row_count FROM View_BiogTextData; -SELECT 'View_Entry' AS view_name, COUNT(*) AS row_count FROM View_Entry; -SELECT 'View_EventAddr' AS view_name, COUNT(*) AS row_count FROM View_EventAddr; -SELECT 'View_EventData' AS view_name, COUNT(*) AS row_count FROM View_EventData; -SELECT 'View_KinAddr' AS view_name, COUNT(*) AS row_count FROM View_KinAddr; -SELECT 'View_People' AS view_name, COUNT(*) AS row_count FROM View_People; -SELECT 'View_PeopleAddr' AS view_name, COUNT(*) AS row_count FROM View_PeopleAddr; -SELECT 'View_Possessions' AS view_name, COUNT(*) AS row_count FROM View_Possessions; -SELECT 'View_PossessionsAddr' AS view_name, COUNT(*) AS row_count FROM View_PossessionsAddr; -SELECT 'View_PostingAddr' AS view_name, COUNT(*) AS row_count FROM View_PostingAddr; -SELECT 'View_PostingOffice' AS view_name, COUNT(*) AS row_count FROM View_PostingOffice; -SELECT 'View_StatusData' AS view_name, COUNT(*) AS row_count FROM View_StatusData; -SQL + +# List of views to check +VIEWS=( + "View_AltnameData" + "View_Association" + "View_BiogAddrData" + "View_BiogInstAddrData" + "View_BiogInstData" + "View_BiogSourceData" + "View_BiogTextData" + "View_Entry" + "View_EventAddr" + "View_EventData" + "View_KinAddr" + "View_People" + "View_PeopleAddr" + "View_Possessions" + "View_PossessionsAddr" + "View_PostingAddr" + "View_PostingOffice" + "View_StatusData" +) + +# Check each view individually to identify which one causes issues +for view in "${VIEWS[@]}"; do + echo "Checking view: $view..." + if sqlite3 "$DB_PATH" "PRAGMA mmap_size=0; SELECT '$view' AS view_name, COUNT(*) AS row_count FROM $view;" 2>&1; then + echo " ✓ $view completed successfully" + else + EXIT_CODE=$? + echo " ✗ ERROR: $view failed with exit code $EXIT_CODE" + echo " Memory info:" + free -h 2>/dev/null || true + exit $EXIT_CODE + fi +done + +echo "All sanity checks passed!"