From 5346bb8307befb700ca19ed31a7110641e78da3a Mon Sep 17 00:00:00 2001 From: Frank Lin Date: Fri, 31 Oct 2025 12:08:38 -0700 Subject: [PATCH 1/9] Add GitHub workflow to validate the DB processing scripts. --- .../workflows/test-database-processing.yml | 60 +++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 .github/workflows/test-database-processing.yml diff --git a/.github/workflows/test-database-processing.yml b/.github/workflows/test-database-processing.yml new file mode 100644 index 0000000..e576aaa --- /dev/null +++ b/.github/workflows/test-database-processing.yml @@ -0,0 +1,60 @@ +name: Test Database Processing + +on: + push: + branches: + - master + - update-views + pull_request: + branches: + - master + workflow_dispatch: + +jobs: + test-database-processing: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.x' + + - name: Install dependencies (7z and sqlite3) + run: | + sudo apt-get update + sudo apt-get install -y p7zip-full sqlite3 + + - name: Verify 7z installation + run: 7z --help + + - name: Extract latest.7z + run: | + echo "Extracting latest.7z..." + 7z x latest.7z + echo "Extraction complete." + ls -lh latest.db + + - name: Run add_primary_keys.py + run: | + echo "Running add_primary_keys.py..." + python scripts/add_primary_keys.py --db latest.db + echo "Primary keys added successfully." + + - name: Run create_views.sh + run: | + echo "Running create_views.sh..." + chmod +x scripts/create_views.sh + scripts/create_views.sh latest.db + echo "Views created successfully." + + - name: Verify database integrity + run: | + echo "Checking database integrity..." + sqlite3 latest.db "PRAGMA integrity_check;" + echo "Verifying views exist..." + sqlite3 latest.db "SELECT name FROM sqlite_master WHERE type='view' ORDER BY name;" + echo "All checks passed!" From 86a930fa3f63124468992c1a9df303b557627d63 Mon Sep 17 00:00:00 2001 From: Frank Lin Date: Fri, 31 Oct 2025 12:18:37 -0700 Subject: [PATCH 2/9] Update workflow to run on all branches --- .github/workflows/test-database-processing.yml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/.github/workflows/test-database-processing.yml b/.github/workflows/test-database-processing.yml index e576aaa..bd2a5cd 100644 --- a/.github/workflows/test-database-processing.yml +++ b/.github/workflows/test-database-processing.yml @@ -2,12 +2,7 @@ name: Test Database Processing on: push: - branches: - - master - - update-views pull_request: - branches: - - master workflow_dispatch: jobs: From d82ef2b012b334341588f7f26d07ff87e619d6d3 Mon Sep 17 00:00:00 2001 From: Frank Lin Date: Fri, 31 Oct 2025 12:20:53 -0700 Subject: [PATCH 3/9] Fix LFS file checkout in workflow --- .github/workflows/test-database-processing.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/test-database-processing.yml b/.github/workflows/test-database-processing.yml index bd2a5cd..ede7b08 100644 --- a/.github/workflows/test-database-processing.yml +++ b/.github/workflows/test-database-processing.yml @@ -12,6 +12,8 @@ jobs: steps: - name: Checkout repository uses: actions/checkout@v4 + with: + lfs: true - name: Set up Python uses: actions/setup-python@v5 From a566d863210d37f6e8a1b81b6ecf1de7e29a31f0 Mon Sep 17 00:00:00 2001 From: Frank Lin Date: Fri, 31 Oct 2025 12:32:40 -0700 Subject: [PATCH 4/9] Update the sanity check logic to query one view at a time --- scripts/create_views.sh | 58 +++++++++++++++++++++++++++-------------- 1 file changed, 38 insertions(+), 20 deletions(-) diff --git a/scripts/create_views.sh b/scripts/create_views.sh index 8fcd223..53f1d7a 100755 --- a/scripts/create_views.sh +++ b/scripts/create_views.sh @@ -995,23 +995,41 @@ echo "Finished view View_StatusData." echo "Created CBDB views in '$DB_PATH'." echo "Running sanity counts on views..." -sqlite3 "$DB_PATH" <<'SQL' -SELECT 'View_AltnameData' AS view_name, COUNT(*) AS row_count FROM View_AltnameData; -SELECT 'View_Association' AS view_name, COUNT(*) AS row_count FROM View_Association; -SELECT 'View_BiogAddrData' AS view_name, COUNT(*) AS row_count FROM View_BiogAddrData; -SELECT 'View_BiogInstAddrData' AS view_name, COUNT(*) AS row_count FROM View_BiogInstAddrData; -SELECT 'View_BiogInstData' AS view_name, COUNT(*) AS row_count FROM View_BiogInstData; -SELECT 'View_BiogSourceData' AS view_name, COUNT(*) AS row_count FROM View_BiogSourceData; -SELECT 'View_BiogTextData' AS view_name, COUNT(*) AS row_count FROM View_BiogTextData; -SELECT 'View_Entry' AS view_name, COUNT(*) AS row_count FROM View_Entry; -SELECT 'View_EventAddr' AS view_name, COUNT(*) AS row_count FROM View_EventAddr; -SELECT 'View_EventData' AS view_name, COUNT(*) AS row_count FROM View_EventData; -SELECT 'View_KinAddr' AS view_name, COUNT(*) AS row_count FROM View_KinAddr; -SELECT 'View_People' AS view_name, COUNT(*) AS row_count FROM View_People; -SELECT 'View_PeopleAddr' AS view_name, COUNT(*) AS row_count FROM View_PeopleAddr; -SELECT 'View_Possessions' AS view_name, COUNT(*) AS row_count FROM View_Possessions; -SELECT 'View_PossessionsAddr' AS view_name, COUNT(*) AS row_count FROM View_PossessionsAddr; -SELECT 'View_PostingAddr' AS view_name, COUNT(*) AS row_count FROM View_PostingAddr; -SELECT 'View_PostingOffice' AS view_name, COUNT(*) AS row_count FROM View_PostingOffice; -SELECT 'View_StatusData' AS view_name, COUNT(*) AS row_count FROM View_StatusData; -SQL + +# List of views to check +VIEWS=( + "View_AltnameData" + "View_Association" + "View_BiogAddrData" + "View_BiogInstAddrData" + "View_BiogInstData" + "View_BiogSourceData" + "View_BiogTextData" + "View_Entry" + "View_EventAddr" + "View_EventData" + "View_KinAddr" + "View_People" + "View_PeopleAddr" + "View_Possessions" + "View_PossessionsAddr" + "View_PostingAddr" + "View_PostingOffice" + "View_StatusData" +) + +# Check each view individually to identify which one causes issues +for view in "${VIEWS[@]}"; do + echo "Checking view: $view..." + if sqlite3 "$DB_PATH" "SELECT '$view' AS view_name, COUNT(*) AS row_count FROM $view;" 2>&1; then + echo " ✓ $view completed successfully" + else + EXIT_CODE=$? + echo " ✗ ERROR: $view failed with exit code $EXIT_CODE" + echo " Memory info:" + free -h 2>/dev/null || true + exit $EXIT_CODE + fi +done + +echo "All sanity checks passed!" From c304c72e3b8a99c922916e231774d59700b2fb1b Mon Sep 17 00:00:00 2001 From: Frank Lin Date: Fri, 31 Oct 2025 12:41:44 -0700 Subject: [PATCH 5/9] Disable mmap_size when querying the views --- scripts/create_views.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/create_views.sh b/scripts/create_views.sh index 53f1d7a..9c70a25 100755 --- a/scripts/create_views.sh +++ b/scripts/create_views.sh @@ -1021,7 +1021,7 @@ VIEWS=( # Check each view individually to identify which one causes issues for view in "${VIEWS[@]}"; do echo "Checking view: $view..." - if sqlite3 "$DB_PATH" "SELECT '$view' AS view_name, COUNT(*) AS row_count FROM $view;" 2>&1; then + if sqlite3 "$DB_PATH" "PRAGMA mmap_size=0; SELECT '$view' AS view_name, COUNT(*) AS row_count FROM $view;" 2>&1; then echo " ✓ $view completed successfully" else EXIT_CODE=$? From 6598492bd30ce137e619569d8618d555a1e98370 Mon Sep 17 00:00:00 2001 From: Frank Lin Date: Fri, 31 Oct 2025 12:48:55 -0700 Subject: [PATCH 6/9] Upload the db even upon failure --- .github/workflows/test-database-processing.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.github/workflows/test-database-processing.yml b/.github/workflows/test-database-processing.yml index ede7b08..e6e2e8c 100644 --- a/.github/workflows/test-database-processing.yml +++ b/.github/workflows/test-database-processing.yml @@ -55,3 +55,11 @@ jobs: echo "Verifying views exist..." sqlite3 latest.db "SELECT name FROM sqlite_master WHERE type='view' ORDER BY name;" echo "All checks passed!" + + - name: Upload database for debugging + if: always() + uses: actions/upload-artifact@v4 + with: + name: debug-db + path: latest.db + retention-days: 7 From e4191856afb4efe0318daefdc1ec211475879cc6 Mon Sep 17 00:00:00 2001 From: Frank Lin Date: Fri, 31 Oct 2025 13:07:34 -0700 Subject: [PATCH 7/9] Use official SQLite 3.50.4 to fix Ubuntu's segfault bug --- .../workflows/test-database-processing.yml | 29 ++++++++++++++++--- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test-database-processing.yml b/.github/workflows/test-database-processing.yml index e6e2e8c..dd88193 100644 --- a/.github/workflows/test-database-processing.yml +++ b/.github/workflows/test-database-processing.yml @@ -20,13 +20,34 @@ jobs: with: python-version: '3.x' - - name: Install dependencies (7z and sqlite3) + - name: Install dependencies (7z) run: | sudo apt-get update - sudo apt-get install -y p7zip-full sqlite3 + sudo apt-get install -y p7zip-full unzip - - name: Verify 7z installation - run: 7z --help + - name: Install SQLite from official release + run: | + # Ubuntu's SQLite 3.45.1 has a bug causing segfaults with complex views + # Download official precompiled SQLite 3.50.4 instead + SQLITE_VERSION=3500400 + SQLITE_YEAR=2025 + wget https://www.sqlite.org/${SQLITE_YEAR}/sqlite-tools-linux-x64-${SQLITE_VERSION}.zip + unzip -q sqlite-tools-linux-x64-${SQLITE_VERSION}.zip + sudo cp sqlite-tools-linux-x64-${SQLITE_VERSION}/sqlite3 /usr/local/bin/ + sudo chmod +x /usr/local/bin/sqlite3 + # Make sure /usr/local/bin is in PATH first + echo "/usr/local/bin" >> $GITHUB_PATH + + - name: Verify installations and SQLite versions + run: | + echo "=== 7z version ===" + 7z --help | head -5 + echo "" + echo "=== System sqlite3 version ===" + /usr/local/bin/sqlite3 --version + echo "" + echo "=== Python sqlite3 module version ===" + python -c "import sqlite3; print('SQLite version:', sqlite3.sqlite_version); print('Module version:', sqlite3.version)" - name: Extract latest.7z run: | From 7468e1009d7761129fcddde38972701b216a1430 Mon Sep 17 00:00:00 2001 From: Frank Lin Date: Fri, 31 Oct 2025 13:10:54 -0700 Subject: [PATCH 8/9] Fix SQLite file path after extraction --- .github/workflows/test-database-processing.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-database-processing.yml b/.github/workflows/test-database-processing.yml index dd88193..6ca2e63 100644 --- a/.github/workflows/test-database-processing.yml +++ b/.github/workflows/test-database-processing.yml @@ -32,8 +32,9 @@ jobs: SQLITE_VERSION=3500400 SQLITE_YEAR=2025 wget https://www.sqlite.org/${SQLITE_YEAR}/sqlite-tools-linux-x64-${SQLITE_VERSION}.zip - unzip -q sqlite-tools-linux-x64-${SQLITE_VERSION}.zip - sudo cp sqlite-tools-linux-x64-${SQLITE_VERSION}/sqlite3 /usr/local/bin/ + unzip sqlite-tools-linux-x64-${SQLITE_VERSION}.zip + ls -la + sudo cp sqlite3 /usr/local/bin/ sudo chmod +x /usr/local/bin/sqlite3 # Make sure /usr/local/bin is in PATH first echo "/usr/local/bin" >> $GITHUB_PATH From 1fff849e15f7e8f2ea7c01d043a6bcf61bd626e3 Mon Sep 17 00:00:00 2001 From: Frank Lin Date: Fri, 31 Oct 2025 13:14:42 -0700 Subject: [PATCH 9/9] Fix deprecated sqlite3.version attribute check --- .github/workflows/test-database-processing.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-database-processing.yml b/.github/workflows/test-database-processing.yml index 6ca2e63..3f6b609 100644 --- a/.github/workflows/test-database-processing.yml +++ b/.github/workflows/test-database-processing.yml @@ -48,7 +48,7 @@ jobs: /usr/local/bin/sqlite3 --version echo "" echo "=== Python sqlite3 module version ===" - python -c "import sqlite3; print('SQLite version:', sqlite3.sqlite_version); print('Module version:', sqlite3.version)" + python -c "import sqlite3; print('Python sqlite3 module uses SQLite version:', sqlite3.sqlite_version)" - name: Extract latest.7z run: |