From 81760f6843876f8958a15d27953b9474c1c2fc4c Mon Sep 17 00:00:00 2001 From: Marco Bambini Date: Wed, 15 Oct 2025 06:27:46 +0200 Subject: [PATCH 1/3] Increased performance in cloudsync_refill_metatable --- src/cloudsync.c | 23 +++++++++++++++++------ src/cloudsync.h | 2 +- test/unit.c | 2 +- 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/src/cloudsync.c b/src/cloudsync.c index b4806ae..da6741a 100644 --- a/src/cloudsync.c +++ b/src/cloudsync.c @@ -1712,24 +1712,34 @@ int cloudsync_refill_metatable (sqlite3 *db, cloudsync_context *data, const char char *pkdecodeval = (pkdecode) ? pkdecode : "cloudsync_pk_decode(pk, 1) AS rowid"; cloudsync_memory_free(sql); + /* not needed in VERSION 2 sql = cloudsync_memory_mprintf("SELECT group_concat('\"' || format('%%w', name) || '\"' || ' = cloudsync_pk_decode(pk, ' || pk || ')', ' AND ') FROM pragma_table_info('%q') WHERE pk>0 ORDER BY pk;", table_name); char *pkonclause = dbutils_text_select(db, sql); char *pkonclauseval = (pkonclause) ? pkonclause : "rowid = cloudsync_pk_decode(pk, 1) AS rowid"; cloudsync_memory_free(sql); - + */ + sql = cloudsync_memory_mprintf("SELECT cloudsync_insert('%q', %s) FROM (SELECT %s FROM \"%w\" EXCEPT SELECT %s FROM \"%w_cloudsync\");", table_name, pkvalues_identifiers, pkvalues_identifiers, table_name, pkdecodeval, table_name); int rc = sqlite3_exec(db, sql, NULL, NULL, NULL); cloudsync_memory_free(sql); if (rc != SQLITE_OK) goto finalize; - + // fill missing colums // for each non-pk column: - sql = cloudsync_memory_mprintf("SELECT cloudsync_pk_encode(%s) FROM \"%w\" LEFT JOIN \"%w_cloudsync\" ON %s AND \"%w_cloudsync\".col_name = ? WHERE \"%w_cloudsync\".db_version IS NULL", pkvalues_identifiers, table_name, table_name, pkonclauseval, table_name, table_name); - rc = sqlite3_prepare(db, sql, -1, &vm, NULL); + // VERSION 2 (October 15, 2025) + // previous query WAS + // sql = cloudsync_memory_mprintf("SELECT cloudsync_pk_encode(%s) FROM \"%w\" LEFT JOIN \"%w_cloudsync\" ON %s AND \"%w_cloudsync\".col_name = ? WHERE \"%w_cloudsync\".db_version IS NULL", pkvalues_identifiers, table_name, table_name, pkonclauseval, table_name, table_name); + // but it takes hours with large tables (100,000 rows, 25 columns, of which 18 was primary keys) + // + // The new query does 1 encode per source row and one indexed NOT-EXISTS probe. + // The old plan does many decodes per candidate and can’t use an index to rule out matches quickly—so it burns CPU and I/O. + + sql = cloudsync_memory_mprintf("WITH _cstemp1 AS (SELECT cloudsync_pk_encode(%s) AS pk FROM \"%w\") SELECT _cstemp1.pk FROM _cstemp1 WHERE NOT EXISTS (SELECT 1 FROM \"%w_cloudsync\" _cstemp2 WHERE _cstemp2.pk = _cstemp1.pk AND _cstemp2.col_name = ?);", pkvalues_identifiers, table_name, table_name); + rc = sqlite3_prepare_v3(db, sql, -1, SQLITE_PREPARE_PERSISTENT, &vm, NULL); cloudsync_memory_free(sql); if (rc != SQLITE_OK) goto finalize; - + for (int i=0; incols; ++i) { char *col_name = table->col_name[i]; @@ -1758,7 +1768,8 @@ int cloudsync_refill_metatable (sqlite3 *db, cloudsync_context *data, const char if (rc != SQLITE_OK) DEBUG_ALWAYS("cloudsync_refill_metatable error: %s", sqlite3_errmsg(db)); if (pkclause_identifiers) cloudsync_memory_free(pkclause_identifiers); if (pkdecode) cloudsync_memory_free(pkdecode); - if (pkonclause) cloudsync_memory_free(pkonclause); + // not needed in VERSION 2 + // if (pkonclause) cloudsync_memory_free(pkonclause); if (vm) sqlite3_finalize(vm); return rc; } diff --git a/src/cloudsync.h b/src/cloudsync.h index c92f40f..0f4be61 100644 --- a/src/cloudsync.h +++ b/src/cloudsync.h @@ -20,7 +20,7 @@ extern "C" { #endif -#define CLOUDSYNC_VERSION "0.8.41" +#define CLOUDSYNC_VERSION "0.8.50" int sqlite3_cloudsync_init (sqlite3 *db, char **pzErrMsg, const sqlite3_api_routines *pApi); diff --git a/test/unit.c b/test/unit.c index 80851bf..cee4206 100644 --- a/test/unit.c +++ b/test/unit.c @@ -5930,7 +5930,7 @@ int main(int argc, const char * argv[]) { cloudsync_set_payload_apply_callback(db, unittest_payload_apply_rls_callback); printf("Testing CloudSync version %s\n", CLOUDSYNC_VERSION); - printf("===============================\n"); + printf("=================================\n"); result += test_report("PK Test:", do_test_pk(db, 10000, print_result)); result += test_report("UUID Test:", do_test_uuid(db, 1000, print_result)); From 0d76e66050e3cd85cd17847a8cedb4695d90b6cc Mon Sep 17 00:00:00 2001 From: Andrea Donetti Date: Wed, 15 Oct 2025 15:55:33 -0600 Subject: [PATCH 2/3] chore: remove old code --- src/cloudsync.c | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/src/cloudsync.c b/src/cloudsync.c index da6741a..76598b1 100644 --- a/src/cloudsync.c +++ b/src/cloudsync.c @@ -1711,13 +1711,6 @@ int cloudsync_refill_metatable (sqlite3 *db, cloudsync_context *data, const char char *pkdecode = dbutils_text_select(db, sql); char *pkdecodeval = (pkdecode) ? pkdecode : "cloudsync_pk_decode(pk, 1) AS rowid"; cloudsync_memory_free(sql); - - /* not needed in VERSION 2 - sql = cloudsync_memory_mprintf("SELECT group_concat('\"' || format('%%w', name) || '\"' || ' = cloudsync_pk_decode(pk, ' || pk || ')', ' AND ') FROM pragma_table_info('%q') WHERE pk>0 ORDER BY pk;", table_name); - char *pkonclause = dbutils_text_select(db, sql); - char *pkonclauseval = (pkonclause) ? pkonclause : "rowid = cloudsync_pk_decode(pk, 1) AS rowid"; - cloudsync_memory_free(sql); - */ sql = cloudsync_memory_mprintf("SELECT cloudsync_insert('%q', %s) FROM (SELECT %s FROM \"%w\" EXCEPT SELECT %s FROM \"%w_cloudsync\");", table_name, pkvalues_identifiers, pkvalues_identifiers, table_name, pkdecodeval, table_name); int rc = sqlite3_exec(db, sql, NULL, NULL, NULL); @@ -1726,12 +1719,6 @@ int cloudsync_refill_metatable (sqlite3 *db, cloudsync_context *data, const char // fill missing colums // for each non-pk column: - - // VERSION 2 (October 15, 2025) - // previous query WAS - // sql = cloudsync_memory_mprintf("SELECT cloudsync_pk_encode(%s) FROM \"%w\" LEFT JOIN \"%w_cloudsync\" ON %s AND \"%w_cloudsync\".col_name = ? WHERE \"%w_cloudsync\".db_version IS NULL", pkvalues_identifiers, table_name, table_name, pkonclauseval, table_name, table_name); - // but it takes hours with large tables (100,000 rows, 25 columns, of which 18 was primary keys) - // // The new query does 1 encode per source row and one indexed NOT-EXISTS probe. // The old plan does many decodes per candidate and can’t use an index to rule out matches quickly—so it burns CPU and I/O. @@ -1768,8 +1755,6 @@ int cloudsync_refill_metatable (sqlite3 *db, cloudsync_context *data, const char if (rc != SQLITE_OK) DEBUG_ALWAYS("cloudsync_refill_metatable error: %s", sqlite3_errmsg(db)); if (pkclause_identifiers) cloudsync_memory_free(pkclause_identifiers); if (pkdecode) cloudsync_memory_free(pkdecode); - // not needed in VERSION 2 - // if (pkonclause) cloudsync_memory_free(pkonclause); if (vm) sqlite3_finalize(vm); return rc; } From a7f1862098addc7d023ee33097dfc0c39ebaf1a8 Mon Sep 17 00:00:00 2001 From: Andrea Donetti Date: Wed, 15 Oct 2025 16:04:11 -0600 Subject: [PATCH 3/3] chore: bump version --- src/cloudsync.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cloudsync.h b/src/cloudsync.h index 0f4be61..55bd604 100644 --- a/src/cloudsync.h +++ b/src/cloudsync.h @@ -20,7 +20,7 @@ extern "C" { #endif -#define CLOUDSYNC_VERSION "0.8.50" +#define CLOUDSYNC_VERSION "0.8.51" int sqlite3_cloudsync_init (sqlite3 *db, char **pzErrMsg, const sqlite3_api_routines *pApi);