From 29028effd4daebdc21b549e337cf700540d5e301 Mon Sep 17 00:00:00 2001 From: Orual Date: Fri, 28 Nov 2025 17:34:10 -0500 Subject: [PATCH 01/40] rewrite the damn thing part the first --- Cargo.lock | 882 ++++++++++- crates/pattern_db/.gitignore | 4 + crates/pattern_db/Cargo.toml | 59 + crates/pattern_db/src/connection.rs | 172 ++ crates/pattern_db/src/error.rs | 88 ++ crates/pattern_db/src/lib.rs | 70 + crates/pattern_db/src/models/agent.rs | 139 ++ crates/pattern_db/src/models/coordination.rs | 277 ++++ crates/pattern_db/src/models/memory.rs | 275 ++++ crates/pattern_db/src/models/message.rs | 141 ++ crates/pattern_db/src/models/mod.rs | 19 + crates/pattern_db/src/queries/agent.rs | 322 ++++ crates/pattern_db/src/queries/coordination.rs | 519 ++++++ crates/pattern_db/src/queries/memory.rs | 334 ++++ crates/pattern_db/src/queries/message.rs | 360 +++++ crates/pattern_db/src/queries/mod.rs | 17 + crates/pattern_db/src/vector.rs | 520 ++++++ .../partner-init-implementation.md | 81 - .../partner-initialization-plan.md | 191 --- docs/refactoring/v2-api-surface.md | 687 ++++++++ docs/refactoring/v2-constellation-forking.md | 230 +++ docs/refactoring/v2-database-design.md | 905 +++++++++++ docs/refactoring/v2-dialect-implementation.md | 1174 ++++++++++++++ docs/refactoring/v2-memory-system.md | 1408 +++++++++++++++++ docs/refactoring/v2-migration-path.md | 783 +++++++++ docs/refactoring/v2-overview.md | 159 ++ docs/refactoring/v2-pattern-db-status.md | 186 +++ docs/refactoring/v2-pattern-dialect.md | 468 ++++++ docs/refactoring/v2-remote-presence.md | 838 ++++++++++ nix/modules/devshell.nix | 2 + nix/modules/rust.nix | 17 +- 31 files changed, 11040 insertions(+), 287 deletions(-) create mode 100644 crates/pattern_db/.gitignore create mode 100644 crates/pattern_db/Cargo.toml create mode 100644 crates/pattern_db/src/connection.rs create mode 100644 crates/pattern_db/src/error.rs create mode 100644 crates/pattern_db/src/lib.rs create mode 100644 crates/pattern_db/src/models/agent.rs create mode 100644 crates/pattern_db/src/models/coordination.rs create mode 100644 crates/pattern_db/src/models/memory.rs create mode 100644 crates/pattern_db/src/models/message.rs create mode 100644 crates/pattern_db/src/models/mod.rs create mode 100644 crates/pattern_db/src/queries/agent.rs create mode 100644 crates/pattern_db/src/queries/coordination.rs create mode 100644 crates/pattern_db/src/queries/memory.rs create mode 100644 crates/pattern_db/src/queries/message.rs create mode 100644 crates/pattern_db/src/queries/mod.rs create mode 100644 crates/pattern_db/src/vector.rs delete mode 100644 docs/refactoring/partner-init-implementation.md delete mode 100644 docs/refactoring/partner-initialization-plan.md create mode 100644 docs/refactoring/v2-api-surface.md create mode 100644 docs/refactoring/v2-constellation-forking.md create mode 100644 docs/refactoring/v2-database-design.md create mode 100644 docs/refactoring/v2-dialect-implementation.md create mode 100644 docs/refactoring/v2-memory-system.md create mode 100644 docs/refactoring/v2-migration-path.md create mode 100644 docs/refactoring/v2-overview.md create mode 100644 docs/refactoring/v2-pattern-db-status.md create mode 100644 docs/refactoring/v2-pattern-dialect.md create mode 100644 docs/refactoring/v2-remote-presence.md diff --git a/Cargo.lock b/Cargo.lock index 2ffada5..03e3d95 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -196,6 +196,12 @@ version = "1.0.100" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" +[[package]] +name = "append-only-bytes" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac436601d6bdde674a0d7fb593e829ffe7b3387c351b356dd20e2d40f5bf3ee5" + [[package]] name = "approx" version = "0.4.0" @@ -250,6 +256,12 @@ dependencies = [ "serde", ] +[[package]] +name = "arref" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ccd462b64c3c72f1be8305905a85d85403d768e8690c9b8bd3b9009a5761679" + [[package]] name = "ascii-canvas" version = "3.0.0" @@ -430,6 +442,24 @@ dependencies = [ "rustc_version", ] +[[package]] +name = "atoi" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528" +dependencies = [ + "num-traits", +] + +[[package]] +name = "atomic-polyfill" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8cf2bce30dfe09ef0bfaef228b9d414faaf7e563035494d7fe092dba54b300f4" +dependencies = [ + "critical-section", +] + [[package]] name = "atomic-waker" version = "1.1.2" @@ -815,6 +845,18 @@ name = "bitflags" version = "2.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2261d10cca569e4643e526d8dc2e62e433cc8aba21ab764233731f8d369bf394" +dependencies = [ + "serde", +] + +[[package]] +name = "bitmaps" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "031043d04099746d8db04daf1fa424b2bc8bd69d92b25962dcde24da39ab64a2" +dependencies = [ + "typenum", +] [[package]] name = "bitvec" @@ -1312,7 +1354,7 @@ version = "4.5.47" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbfd7eae0b0f1a6e63d4b13c9c478de77c2eb546fba158ad50b4203dc24b9f9c" dependencies = [ - "heck", + "heck 0.5.0", "proc-macro2", "quote", "syn 2.0.106", @@ -1324,6 +1366,15 @@ version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675" +[[package]] +name = "cobs" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fa961b519f0b462e3a3b4a34b64d119eeaca1d59af726fe450bbba07a9fc0a1" +dependencies = [ + "thiserror 2.0.17", +] + [[package]] name = "colorchoice" version = "1.0.4" @@ -1526,6 +1577,21 @@ dependencies = [ "libc", ] +[[package]] +name = "crc" +version = "3.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5eb8a2a1cd12ab0d987a5d5e825195d372001a4094a0376319d5a0ad71c1ba0d" +dependencies = [ + "crc-catalog", +] + +[[package]] +name = "crc-catalog" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5" + [[package]] name = "crc32fast" version = "1.5.0" @@ -1535,6 +1601,12 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "critical-section" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "790eea4361631c5e7d22598ecd5723ff611904e3344ce8720784c93e3d83d40b" + [[package]] name = "crokey" version = "1.3.0" @@ -1865,6 +1937,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e7c1832837b905bbfb5101e07cc24c8deddf52f93225eee6ead5f4d63d53ddcb" dependencies = [ "const-oid", + "pem-rfc7468", "zeroize", ] @@ -2195,6 +2268,9 @@ name = "either" version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" +dependencies = [ + "serde", +] [[package]] name = "elliptic-curve" @@ -2214,6 +2290,18 @@ dependencies = [ "zeroize", ] +[[package]] +name = "embedded-io" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef1a6892d9eef45c8fa6b9e0086428a2cca8491aca8f787c534a3d6d0bcb3ced" + +[[package]] +name = "embedded-io" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edd0f118536f44f5ccd48bcb8b111bdc3de888b58c74639dfb034a357d0f206d" + [[package]] name = "ena" version = "0.14.3" @@ -2244,13 +2332,43 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c34f04666d835ff5d62e058c3995147c06f42fe86ff053337632bca83e42702d" +[[package]] +name = "ensure-cov" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33753185802e107b8fa907192af1f0eca13b1fb33327a59266d650fef29b2b4e" + +[[package]] +name = "enum-as-inner" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c9720bba047d567ffc8a3cba48bf19126600e249ab7f128e9233e6376976a116" +dependencies = [ + "heck 0.4.1", + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "enum-as-inner" version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a1e6a265c649f3f5979b601d26f1d05ada116434c87741c9493cb56218f76cbc" dependencies = [ - "heck", + "heck 0.5.0", + "proc-macro2", + "quote", + "syn 2.0.106", +] + +[[package]] +name = "enum_dispatch" +version = "0.3.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa18ce2bc66555b3218614519ac839ddb759a7d6720732f979ef8d13be147ecd" +dependencies = [ + "once_cell", "proc-macro2", "quote", "syn 2.0.106", @@ -2281,6 +2399,17 @@ dependencies = [ "cc", ] +[[package]] +name = "etcetera" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "136d1b5283a1ab77bd9257427ffd09d8667ced0570b6f938942bc7568ed5b943" +dependencies = [ + "cfg-if", + "home", + "windows-sys 0.48.0", +] + [[package]] name = "event-listener" version = "5.4.1" @@ -2539,6 +2668,17 @@ dependencies = [ "futures-util", ] +[[package]] +name = "futures-intrusive" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d930c203dd0b6ff06e0201a4a2fe9149b43c684fd4420555b26d21b1a02956f" +dependencies = [ + "futures-core", + "lock_api", + "parking_lot", +] + [[package]] name = "futures-io" version = "0.3.31" @@ -2880,6 +3020,20 @@ dependencies = [ "value-ext", ] +[[package]] +name = "generator" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "605183a538e3e2a9c1038635cc5c2d194e2ee8fd0d1b66b8349fad7dbacce5a2" +dependencies = [ + "cc", + "cfg-if", + "libc", + "log", + "rustversion", + "windows 0.61.3", +] + [[package]] name = "generic-array" version = "0.14.7" @@ -2891,6 +3045,20 @@ dependencies = [ "zeroize", ] +[[package]] +name = "generic-btree" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0c1bce85c110ab718fd139e0cc89c51b63bd647b14a767e24bdfc77c83df79b" +dependencies = [ + "arref", + "heapless 0.9.2", + "itertools 0.11.0", + "loro-thunderdome", + "proc-macro2", + "rustc-hash", +] + [[package]] name = "geo" version = "0.28.0" @@ -3047,6 +3215,15 @@ dependencies = [ "rand_distr", ] +[[package]] +name = "hash32" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0c35f58762feb77d74ebe43bdbc3210f09be9fe6742234d573bacc26ed92b67" +dependencies = [ + "byteorder", +] + [[package]] name = "hash32" version = "0.3.1" @@ -3088,6 +3265,15 @@ version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5419bdc4f6a9207fbeba6d11b604d481addf78ecd10c11ad51e76c2f6482748d" +[[package]] +name = "hashlink" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7382cf6263419f2d8df38c55d7da83da5c18aef87fc7a7fc1fb1e344edfe14c1" +dependencies = [ + "hashbrown 0.15.5", +] + [[package]] name = "headers" version = "0.4.1" @@ -3112,16 +3298,46 @@ dependencies = [ "http 1.3.1", ] +[[package]] +name = "heapless" +version = "0.7.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdc6457c0eb62c71aac4bc17216026d8410337c4126773b9c5daba343f17964f" +dependencies = [ + "atomic-polyfill", + "hash32 0.2.1", + "rustc_version", + "serde", + "spin", + "stable_deref_trait", +] + [[package]] name = "heapless" version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0bfb9eb618601c89945a70e254898da93b13be0388091d42117462b265bb3fad" dependencies = [ - "hash32", + "hash32 0.3.1", + "stable_deref_trait", +] + +[[package]] +name = "heapless" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2af2455f757db2b292a9b1768c4b70186d443bcb3b316252d6b540aec1cd89ed" +dependencies = [ + "hash32 0.3.1", "stable_deref_trait", ] +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + [[package]] name = "heck" version = "0.5.0" @@ -3170,7 +3386,7 @@ dependencies = [ "async-trait", "cfg-if", "data-encoding", - "enum-as-inner", + "enum-as-inner 0.6.1", "futures-channel", "futures-io", "futures-util", @@ -3206,6 +3422,15 @@ dependencies = [ "tracing", ] +[[package]] +name = "hkdf" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b5f8eb2ad728638ea2c7d47a21db23b7b58a72ed6a38256b8a1849f15fbbdf7" +dependencies = [ + "hmac", +] + [[package]] name = "hmac" version = "0.12.1" @@ -3215,6 +3440,15 @@ dependencies = [ "digest", ] +[[package]] +name = "home" +version = "0.5.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc627f471c528ff0c4a49e1d5e60450c8f6461dd6d10ba9dcd3a61d3dff7728d" +dependencies = [ + "windows-sys 0.61.1", +] + [[package]] name = "html2md" version = "0.2.15" @@ -3597,6 +3831,21 @@ dependencies = [ "icu_properties", ] +[[package]] +name = "im" +version = "15.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0acd33ff0285af998aaf9b57342af478078f53492322fafc47450e09397e0e9" +dependencies = [ + "bitmaps", + "rand_core 0.6.4", + "rand_xoshiro", + "serde", + "sized-chunks", + "typenum", + "version_check", +] + [[package]] name = "indexmap" version = "1.9.3" @@ -3786,6 +4035,15 @@ dependencies = [ "either", ] +[[package]] +name = "itertools" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" +dependencies = [ + "either", +] + [[package]] name = "itertools" version = "0.13.0" @@ -4011,6 +4269,15 @@ name = "lazy_static" version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" +dependencies = [ + "spin", +] + +[[package]] +name = "leb128" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "884e2677b40cc8c339eaefcb701c32ef1fd2493d71118dc0ca4b6a736c93bd67" [[package]] name = "levenshtein" @@ -4060,6 +4327,17 @@ dependencies = [ "redox_syscall", ] +[[package]] +name = "libsqlite3-sys" +version = "0.30.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e99fb7a497b1e3339bc746195567ed8d3e24945ecd636e3619d20b9de9e9149" +dependencies = [ + "cc", + "pkg-config", + "vcpkg", +] + [[package]] name = "linfa-linalg" version = "0.1.0" @@ -4118,6 +4396,156 @@ version = "0.4.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432" +[[package]] +name = "loom" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "419e0dc8046cb947daa77eb95ae174acfbddb7673b4151f56d1eed8e93fbfaca" +dependencies = [ + "cfg-if", + "generator", + "scoped-tls", + "serde", + "serde_json", + "tracing", + "tracing-subscriber", +] + +[[package]] +name = "loro" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "218f2e4429474eae53a7fc71fac8e4566e0765fd1ec043546cf8ec62b3dc8902" +dependencies = [ + "enum-as-inner 0.6.1", + "generic-btree", + "loro-common", + "loro-delta", + "loro-internal", + "loro-kv-store", + "rustc-hash", + "tracing", +] + +[[package]] +name = "loro-common" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70363ea05a9c507fd9d58b65dc414bf515f636d69d8ab53e50ecbe8d27eef90c" +dependencies = [ + "arbitrary", + "enum-as-inner 0.6.1", + "leb128", + "loro-rle", + "nonmax", + "rustc-hash", + "serde", + "serde_columnar", + "serde_json", + "thiserror 1.0.69", +] + +[[package]] +name = "loro-delta" +version = "1.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8eafa788a72c1cbf0b7dc08a862cd7cc31b96d99c2ef749cdc94c2330f9494d3" +dependencies = [ + "arrayvec", + "enum-as-inner 0.5.1", + "generic-btree", + "heapless 0.8.0", +] + +[[package]] +name = "loro-internal" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7c99e5b3617d0a2b586704de6086e986a4fae2194c7cd7ac17f2fdf70a48740" +dependencies = [ + "append-only-bytes", + "arref", + "bytes", + "either", + "ensure-cov", + "enum-as-inner 0.6.1", + "enum_dispatch", + "generic-btree", + "getrandom 0.2.16", + "im", + "itertools 0.12.1", + "leb128", + "loom", + "loro-common", + "loro-delta", + "loro-kv-store", + "loro-rle", + "loro_fractional_index", + "md5", + "nonmax", + "num", + "num-traits", + "once_cell", + "postcard", + "pretty_assertions", + "rand 0.8.5", + "rustc-hash", + "serde", + "serde_columnar", + "serde_json", + "smallvec", + "thiserror 1.0.69", + "thread_local", + "tracing", + "wasm-bindgen", + "xxhash-rust", +] + +[[package]] +name = "loro-kv-store" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78beebc933a33c26495c9a98f05b38bc0a4c0a337ecfbd3146ce1f9437eec71f" +dependencies = [ + "bytes", + "ensure-cov", + "loro-common", + "lz4_flex", + "once_cell", + "quick_cache 0.6.16", + "rustc-hash", + "tracing", + "xxhash-rust", +] + +[[package]] +name = "loro-rle" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76400c3eea6bb39b013406acce964a8db39311534e308286c8d8721baba8ee20" +dependencies = [ + "append-only-bytes", + "num", + "smallvec", +] + +[[package]] +name = "loro-thunderdome" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f3d053a135388e6b1df14e8af1212af5064746e9b87a06a345a7a779ee9695a" + +[[package]] +name = "loro_fractional_index" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "427c8ea186958094052b971fe7e322a934b034c3bf62f0458ccea04fcd687ba1" +dependencies = [ + "once_cell", + "rand 0.8.5", + "serde", +] + [[package]] name = "lru" version = "0.12.5" @@ -4142,6 +4570,15 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" +[[package]] +name = "lz4_flex" +version = "0.11.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08ab2867e3eeeca90e844d1940eab391c9dc5228783db2ed999acbc0a9ed375a" +dependencies = [ + "twox-hash", +] + [[package]] name = "mac" version = "0.1.1" @@ -4315,6 +4752,12 @@ dependencies = [ "digest", ] +[[package]] +name = "md5" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771" + [[package]] name = "memchr" version = "2.7.6" @@ -4718,6 +5161,12 @@ dependencies = [ "minimal-lexical", ] +[[package]] +name = "nonmax" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "610a5acd306ec67f907abe5567859a3c693fb9886eb1f012ab8f2a47bef3db51" + [[package]] name = "notify" version = "7.0.0" @@ -4788,6 +5237,22 @@ dependencies = [ "num-traits", ] +[[package]] +name = "num-bigint-dig" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e661dda6640fad38e827a6d4a310ff4763082116fe217f279885c97f511bb0b7" +dependencies = [ + "lazy_static", + "libm", + "num-integer", + "num-iter", + "num-traits", + "rand 0.8.5", + "smallvec", + "zeroize", +] + [[package]] name = "num-complex" version = "0.4.6" @@ -5338,6 +5803,26 @@ dependencies = [ "uuid", ] +[[package]] +name = "pattern_db" +version = "0.4.0" +dependencies = [ + "chrono", + "libsqlite3-sys", + "loro", + "miette 7.6.0", + "serde", + "serde_json", + "sqlite-vec", + "sqlx", + "tempfile", + "thiserror 1.0.69", + "tokio", + "tracing", + "uuid", + "zerocopy", +] + [[package]] name = "pbkdf2" version = "0.12.2" @@ -5360,6 +5845,15 @@ dependencies = [ "serde", ] +[[package]] +name = "pem-rfc7468" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88b39c9bfcfc231068454382784bb460aae594343fb030d46e9f50a645418412" +dependencies = [ + "base64ct", +] + [[package]] name = "percent-encoding" version = "2.3.2" @@ -5502,10 +5996,31 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" [[package]] -name = "pin-utils" -version = "0.1.0" +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "pkcs1" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8ffb9f10fa047879315e6625af03c164b16962a5368d724ed16323b68ace47f" +dependencies = [ + "der", + "pkcs8", + "spki", +] + +[[package]] +name = "pkcs8" +version = "0.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7" +dependencies = [ + "der", + "spki", +] [[package]] name = "pkg-config" @@ -5532,6 +6047,19 @@ version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483" +[[package]] +name = "postcard" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6764c3b5dd454e283a30e6dfe78e9b31096d9e32036b5d1eaac7a6119ccb9a24" +dependencies = [ + "cobs", + "embedded-io 0.4.0", + "embedded-io 0.6.1", + "heapless 0.7.17", + "serde", +] + [[package]] name = "potential_utf" version = "0.1.3" @@ -5921,6 +6449,15 @@ dependencies = [ "rand 0.9.2", ] +[[package]] +name = "rand_xoshiro" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f97cdb2a36ed4183de61b2f824cc45c9f1037f28afe0a322e9fff4c108b5aaa" +dependencies = [ + "rand_core 0.6.4", +] + [[package]] name = "ratatui" version = "0.29.0" @@ -6464,13 +7001,33 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "rsa" +version = "0.9.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40a0376c50d0358279d9d643e4bf7b7be212f1f4ff1da9070a7b54d22ef75c88" +dependencies = [ + "const-oid", + "digest", + "num-bigint-dig", + "num-integer", + "num-traits", + "pkcs1", + "pkcs8", + "rand_core 0.6.4", + "signature", + "spki", + "subtle", + "zeroize", +] + [[package]] name = "rstar" version = "0.12.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "421400d13ccfd26dfa5858199c30a5d76f9c54e0dba7575273025b43c5175dbb" dependencies = [ - "heapless", + "heapless 0.8.0", "num-traits", "smallvec", ] @@ -6782,6 +7339,12 @@ dependencies = [ "syn 2.0.106", ] +[[package]] +name = "scoped-tls" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294" + [[package]] name = "scopeguard" version = "1.2.0" @@ -6963,6 +7526,31 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_columnar" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a16e404f17b16d0273460350e29b02d76ba0d70f34afdc9a4fa034c97d6c6eb" +dependencies = [ + "itertools 0.11.0", + "postcard", + "serde", + "serde_columnar_derive", + "thiserror 1.0.69", +] + +[[package]] +name = "serde_columnar_derive" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45958fce4903f67e871fbf15ac78e289269b21ebd357d6fecacdba233629112e" +dependencies = [ + "darling 0.20.11", + "proc-macro2", + "quote", + "syn 2.0.106", +] + [[package]] name = "serde_core" version = "1.0.228" @@ -7308,6 +7896,16 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" +[[package]] +name = "sized-chunks" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16d69225bde7a69b235da73377861095455d298f2b970996eec25ddbb42b3d1e" +dependencies = [ + "bitmaps", + "typenum", +] + [[package]] name = "slab" version = "0.4.11" @@ -7379,6 +7977,16 @@ dependencies = [ "lock_api", ] +[[package]] +name = "spki" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d91ed6c858b01f942cd56b37a94b3e0a1798290327d1236e4d9cf4eaca44d29d" +dependencies = [ + "base64ct", + "der", +] + [[package]] name = "spm_precompiled" version = "0.1.4" @@ -7391,6 +7999,207 @@ dependencies = [ "unicode-segmentation", ] +[[package]] +name = "sqlite-vec" +version = "0.1.7-alpha.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2388d9b97b32baa48a059df2f15a9bb49217fa1f9fb076e98c89e8fc02c8f2c4" +dependencies = [ + "cc", +] + +[[package]] +name = "sqlx" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fefb893899429669dcdd979aff487bd78f4064e5e7907e4269081e0ef7d97dc" +dependencies = [ + "sqlx-core", + "sqlx-macros", + "sqlx-mysql", + "sqlx-postgres", + "sqlx-sqlite", +] + +[[package]] +name = "sqlx-core" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee6798b1838b6a0f69c007c133b8df5866302197e404e8b6ee8ed3e3a5e68dc6" +dependencies = [ + "base64 0.22.1", + "bytes", + "chrono", + "crc", + "crossbeam-queue", + "either", + "event-listener", + "futures-core", + "futures-intrusive", + "futures-io", + "futures-util", + "hashbrown 0.15.5", + "hashlink", + "indexmap 2.11.4", + "log", + "memchr", + "once_cell", + "percent-encoding", + "serde", + "serde_json", + "sha2", + "smallvec", + "thiserror 2.0.17", + "tokio", + "tokio-stream", + "tracing", + "url", +] + +[[package]] +name = "sqlx-macros" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2d452988ccaacfbf5e0bdbc348fb91d7c8af5bee192173ac3636b5fb6e6715d" +dependencies = [ + "proc-macro2", + "quote", + "sqlx-core", + "sqlx-macros-core", + "syn 2.0.106", +] + +[[package]] +name = "sqlx-macros-core" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19a9c1841124ac5a61741f96e1d9e2ec77424bf323962dd894bdb93f37d5219b" +dependencies = [ + "dotenvy", + "either", + "heck 0.5.0", + "hex", + "once_cell", + "proc-macro2", + "quote", + "serde", + "serde_json", + "sha2", + "sqlx-core", + "sqlx-mysql", + "sqlx-postgres", + "sqlx-sqlite", + "syn 2.0.106", + "tokio", + "url", +] + +[[package]] +name = "sqlx-mysql" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa003f0038df784eb8fecbbac13affe3da23b45194bd57dba231c8f48199c526" +dependencies = [ + "atoi", + "base64 0.22.1", + "bitflags 2.9.4", + "byteorder", + "bytes", + "chrono", + "crc", + "digest", + "dotenvy", + "either", + "futures-channel", + "futures-core", + "futures-io", + "futures-util", + "generic-array", + "hex", + "hkdf", + "hmac", + "itoa", + "log", + "md-5", + "memchr", + "once_cell", + "percent-encoding", + "rand 0.8.5", + "rsa", + "serde", + "sha1", + "sha2", + "smallvec", + "sqlx-core", + "stringprep", + "thiserror 2.0.17", + "tracing", + "whoami", +] + +[[package]] +name = "sqlx-postgres" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db58fcd5a53cf07c184b154801ff91347e4c30d17a3562a635ff028ad5deda46" +dependencies = [ + "atoi", + "base64 0.22.1", + "bitflags 2.9.4", + "byteorder", + "chrono", + "crc", + "dotenvy", + "etcetera", + "futures-channel", + "futures-core", + "futures-util", + "hex", + "hkdf", + "hmac", + "home", + "itoa", + "log", + "md-5", + "memchr", + "once_cell", + "rand 0.8.5", + "serde", + "serde_json", + "sha2", + "smallvec", + "sqlx-core", + "stringprep", + "thiserror 2.0.17", + "tracing", + "whoami", +] + +[[package]] +name = "sqlx-sqlite" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2d12fe70b2c1b4401038055f90f151b78208de1f9f89a7dbfd41587a10c3eea" +dependencies = [ + "atoi", + "chrono", + "flume", + "futures-channel", + "futures-core", + "futures-executor", + "futures-intrusive", + "futures-util", + "libsqlite3-sys", + "log", + "percent-encoding", + "serde", + "serde_urlencoded", + "sqlx-core", + "thiserror 2.0.17", + "tracing", + "url", +] + [[package]] name = "sse-stream" version = "0.2.1" @@ -7478,6 +8287,17 @@ dependencies = [ "quote", ] +[[package]] +name = "stringprep" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b4df3d392d81bd458a8a621b8bffbd2302a12ffe288a9d931670948749463b1" +dependencies = [ + "unicode-bidi", + "unicode-normalization", + "unicode-properties", +] + [[package]] name = "strobe-rs" version = "0.10.0" @@ -7512,7 +8332,7 @@ version = "0.26.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" dependencies = [ - "heck", + "heck 0.5.0", "proc-macro2", "quote", "rustversion", @@ -7777,7 +8597,7 @@ checksum = "ec7dddc5f0fee506baf8b9fdb989e242f17e4b11c61dfbb0635b705217199eea" dependencies = [ "bitflags 2.9.4", "byteorder", - "enum-as-inner", + "enum-as-inner 0.6.1", "libc", "thiserror 1.0.69", "walkdir", @@ -7791,7 +8611,7 @@ checksum = "01198a2debb237c62b6826ec7081082d951f46dbb64b0e8c7649a452230d1dfc" dependencies = [ "bitflags 2.9.4", "byteorder", - "enum-as-inner", + "enum-as-inner 0.6.1", "libc", "thiserror 1.0.69", "walkdir", @@ -8666,6 +9486,12 @@ dependencies = [ "utf-8", ] +[[package]] +name = "twox-hash" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c" + [[package]] name = "typemap_rev" version = "0.3.0" @@ -8722,6 +9548,12 @@ version = "2.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "75b844d17643ee918803943289730bec8aac480150456169e647ed0b576ba539" +[[package]] +name = "unicode-bidi" +version = "0.3.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c1cb5db39152898a79168971543b1cb5020dff7fe43c8dc468b0885f5e29df5" + [[package]] name = "unicode-ident" version = "1.0.19" @@ -8752,6 +9584,12 @@ dependencies = [ "smallvec", ] +[[package]] +name = "unicode-properties" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7df058c713841ad818f1dc5d3fd88063241cc61f49f5fbea4b951e8cf5a8d71d" + [[package]] name = "unicode-script" version = "0.5.7" @@ -8965,6 +9803,12 @@ dependencies = [ "wit-bindgen", ] +[[package]] +name = "wasite" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8dad83b4f25e74f184f64c43b150b91efe7647395b42289f38e50566d82855b" + [[package]] name = "wasm-bindgen" version = "0.2.104" @@ -9119,6 +9963,16 @@ dependencies = [ "rustls-pki-types", ] +[[package]] +name = "whoami" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d4a4db5077702ca3015d3d02d74974948aba2ad9e12ab7df718ee64ccd7e97d" +dependencies = [ + "libredox", + "wasite", +] + [[package]] name = "widestring" version = "1.2.0" @@ -9669,6 +10523,12 @@ dependencies = [ "markup5ever 0.12.1", ] +[[package]] +name = "xxhash-rust" +version = "0.8.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3" + [[package]] name = "yaml-rust" version = "0.4.5" diff --git a/crates/pattern_db/.gitignore b/crates/pattern_db/.gitignore new file mode 100644 index 0000000..5219f19 --- /dev/null +++ b/crates/pattern_db/.gitignore @@ -0,0 +1,4 @@ +# Development database for sqlx prepare +dev.db +dev.db-shm +dev.db-wal diff --git a/crates/pattern_db/Cargo.toml b/crates/pattern_db/Cargo.toml new file mode 100644 index 0000000..09ee05e --- /dev/null +++ b/crates/pattern_db/Cargo.toml @@ -0,0 +1,59 @@ +[package] +name = "pattern_db" +version.workspace = true +edition.workspace = true +authors.workspace = true +license.workspace = true +repository.workspace = true +description = "SQLite storage backend for Pattern" + +[dependencies] +# Async runtime +tokio = { workspace = true } + +# Database - bundled SQLite for consistent builds and extension support +# The "sqlite" feature bundles SQLite; "sqlite-unbundled" would use system lib +sqlx = { version = "0.8", features = [ + "runtime-tokio", + "sqlite", + "migrate", + "json", + "chrono", +] } + +# Serialization +serde = { workspace = true } +serde_json = { workspace = true } + +# Error handling +thiserror = { workspace = true } +miette = { workspace = true } + +# Logging +tracing = { workspace = true } + +# Utilities +chrono = { workspace = true, features = ["serde"] } +uuid = { workspace = true } + +# Loro for CRDT memory blocks +loro = "1.6" + +# Vector search extension - bundles C source, compiles via cc +sqlite-vec = "0.1.7-alpha.2" + +# Pin to match sqlx's bundled sqlite (linkage is semver-exempt per sqlx docs) +# Required for sqlite3_auto_extension to register sqlite-vec globally +libsqlite3-sys = "=0.30.1" + +# For efficient vector serialization (zero-copy f32 slices to bytes) +zerocopy = { version = "0.8", features = ["derive"] } + +[dev-dependencies] +tokio = { workspace = true, features = ["rt-multi-thread", "macros"] } +tempfile = "3" + +[features] +default = ["vector-search"] +# Enable sqlite-vec for vector search +vector-search = [] diff --git a/crates/pattern_db/src/connection.rs b/crates/pattern_db/src/connection.rs new file mode 100644 index 0000000..51aadbf --- /dev/null +++ b/crates/pattern_db/src/connection.rs @@ -0,0 +1,172 @@ +//! Database connection management. + +use std::path::Path; + +use sqlx::sqlite::{SqliteConnectOptions, SqliteJournalMode, SqlitePool, SqlitePoolOptions}; +use tracing::{debug, info}; + +use crate::error::DbResult; + +/// Connection to a constellation's database. +/// +/// Each constellation has its own SQLite database file, providing physical +/// isolation between constellations. +#[derive(Debug, Clone)] +pub struct ConstellationDb { + pool: SqlitePool, +} + +impl ConstellationDb { + /// Open or create a constellation database at the given path. + /// + /// This will: + /// 1. Register sqlite-vec extension globally (if not already done) + /// 2. Create the database file if it doesn't exist + /// 3. Run any pending migrations + /// 4. Configure SQLite for optimal performance (WAL mode, etc.) + pub async fn open(path: impl AsRef) -> DbResult { + // Register sqlite-vec before any connections are created. + // This is idempotent - safe to call multiple times. + crate::vector::init_sqlite_vec(); + + let path = path.as_ref(); + + // Ensure parent directory exists + if let Some(parent) = path.parent() { + if !parent.exists() { + std::fs::create_dir_all(parent)?; + } + } + + let path_str = path.to_string_lossy(); + info!("Opening constellation database: {}", path_str); + + let options = SqliteConnectOptions::new() + .filename(path) + .create_if_missing(true) + .journal_mode(SqliteJournalMode::Wal) + // Recommended SQLite pragmas for performance + .pragma("cache_size", "-64000") // 64MB cache + .pragma("synchronous", "NORMAL") // Safe with WAL + .pragma("temp_store", "MEMORY") + .pragma("mmap_size", "268435456") // 256MB mmap + .pragma("foreign_keys", "ON"); + + let pool = SqlitePoolOptions::new() + .max_connections(5) // SQLite is single-writer, but readers can parallelize + .connect_with(options) + .await?; + + debug!("Database connection established"); + + // Run migrations + Self::run_migrations(&pool).await?; + + Ok(Self { pool }) + } + + /// Open an in-memory database (for testing). + pub async fn open_in_memory() -> DbResult { + // Register sqlite-vec before any connections are created. + crate::vector::init_sqlite_vec(); + + let options = SqliteConnectOptions::new() + .filename(":memory:") + .journal_mode(SqliteJournalMode::Wal) + .pragma("foreign_keys", "ON"); + + let pool = SqlitePoolOptions::new() + .max_connections(1) // In-memory must be single connection to share state + .connect_with(options) + .await?; + + Self::run_migrations(&pool).await?; + + Ok(Self { pool }) + } + + /// Run database migrations. + async fn run_migrations(pool: &SqlitePool) -> DbResult<()> { + debug!("Running database migrations"); + sqlx::migrate!("./migrations").run(pool).await?; + info!("Database migrations complete"); + Ok(()) + } + + /// Get a reference to the connection pool. + pub fn pool(&self) -> &SqlitePool { + &self.pool + } + + /// Close the database connection. + pub async fn close(&self) { + self.pool.close().await; + } + + /// Check if the database is healthy. + pub async fn health_check(&self) -> DbResult<()> { + sqlx::query("SELECT 1").execute(&self.pool).await?; + Ok(()) + } + + /// Get database statistics. + pub async fn stats(&self) -> DbResult { + let agents: (i64,) = sqlx::query_as("SELECT COUNT(*) FROM agents") + .fetch_one(&self.pool) + .await?; + + let messages: (i64,) = sqlx::query_as("SELECT COUNT(*) FROM messages") + .fetch_one(&self.pool) + .await?; + + let memory_blocks: (i64,) = sqlx::query_as("SELECT COUNT(*) FROM memory_blocks") + .fetch_one(&self.pool) + .await?; + + Ok(DbStats { + agent_count: agents.0 as u64, + message_count: messages.0 as u64, + memory_block_count: memory_blocks.0 as u64, + }) + } + + /// Vacuum the database to reclaim space. + pub async fn vacuum(&self) -> DbResult<()> { + info!("Vacuuming database"); + sqlx::query("VACUUM").execute(&self.pool).await?; + Ok(()) + } + + /// Checkpoint the WAL file. + pub async fn checkpoint(&self) -> DbResult<()> { + debug!("Checkpointing WAL"); + sqlx::query("PRAGMA wal_checkpoint(TRUNCATE)") + .execute(&self.pool) + .await?; + Ok(()) + } +} + +/// Database statistics. +#[derive(Debug, Clone)] +pub struct DbStats { + pub agent_count: u64, + pub message_count: u64, + pub memory_block_count: u64, +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn test_open_in_memory() { + let db = ConstellationDb::open_in_memory().await.unwrap(); + db.health_check().await.unwrap(); + + let stats = db.stats().await.unwrap(); + assert_eq!(stats.agent_count, 0); + assert_eq!(stats.message_count, 0); + assert_eq!(stats.memory_block_count, 0); + } +} diff --git a/crates/pattern_db/src/error.rs b/crates/pattern_db/src/error.rs new file mode 100644 index 0000000..627841c --- /dev/null +++ b/crates/pattern_db/src/error.rs @@ -0,0 +1,88 @@ +//! Error types for the database layer. + +use miette::Diagnostic; +use thiserror::Error; + +/// Result type alias for database operations. +pub type DbResult = Result; + +/// Database error types. +#[derive(Debug, Error, Diagnostic)] +pub enum DbError { + /// SQLite/sqlx error + #[error("Database error: {0}")] + Sqlx(#[from] sqlx::Error), + + /// Migration error + #[error("Migration error: {0}")] + Migration(#[from] sqlx::migrate::MigrateError), + + /// Loro document error + #[error("Loro error: {0}")] + Loro(String), + + /// Entity not found + #[error("{entity_type} not found: {id}")] + NotFound { + entity_type: &'static str, + id: String, + }, + + /// Duplicate entity + #[error("{entity_type} already exists: {id}")] + AlreadyExists { + entity_type: &'static str, + id: String, + }, + + /// Invalid data + #[error("Invalid data: {message}")] + InvalidData { message: String }, + + /// Serialization error + #[error("Serialization error: {0}")] + Serialization(#[from] serde_json::Error), + + /// IO error (for filesystem operations if needed) + #[error("IO error: {0}")] + Io(#[from] std::io::Error), + + /// Constraint violation + #[error("Constraint violation: {message}")] + ConstraintViolation { message: String }, + + /// SQLite extension error + #[error("Extension error: {0}")] + #[diagnostic(help("Ensure sqlite-vec is properly initialized before database operations"))] + Extension(String), +} + +impl DbError { + /// Create a not found error. + pub fn not_found(entity_type: &'static str, id: impl Into) -> Self { + Self::NotFound { + entity_type, + id: id.into(), + } + } + + /// Create an already exists error. + pub fn already_exists(entity_type: &'static str, id: impl Into) -> Self { + Self::AlreadyExists { + entity_type, + id: id.into(), + } + } + + /// Create an invalid data error. + pub fn invalid_data(message: impl Into) -> Self { + Self::InvalidData { + message: message.into(), + } + } + + /// Create a loro error. + pub fn loro(message: impl Into) -> Self { + Self::Loro(message.into()) + } +} diff --git a/crates/pattern_db/src/lib.rs b/crates/pattern_db/src/lib.rs new file mode 100644 index 0000000..4e44a4e --- /dev/null +++ b/crates/pattern_db/src/lib.rs @@ -0,0 +1,70 @@ +//! Pattern Database Layer +//! +//! SQLite-based storage backend for Pattern constellations. +//! +//! # Architecture +//! +//! - **One database per constellation** - Physical isolation, no cross-constellation leaks +//! - **Loro CRDT for memory blocks** - Versioned, mergeable documents +//! - **sqlite-vec for vectors** - Semantic search over memories +//! - **FTS5 for text search** - Full-text search over messages and memories +//! +//! # Usage +//! +//! ```rust,ignore +//! use pattern_db::ConstellationDb; +//! +//! let db = ConstellationDb::open("path/to/constellation.db").await?; +//! ``` + +pub mod connection; +pub mod error; +pub mod models; +pub mod queries; +pub mod vector; + +pub use connection::ConstellationDb; +pub use error::{DbError, DbResult}; + +// Re-export vector module types +pub use vector::{ + ContentType, DEFAULT_EMBEDDING_DIMENSIONS, EmbeddingStats, VectorSearchResult, init_sqlite_vec, + verify_sqlite_vec, +}; + +// Re-export key model types for convenience +pub use models::{ + // Coordination models + ActivityEvent, + ActivityEventType, + // Agent models + Agent, + AgentGroup, + AgentStatus, + AgentSummary, + // Memory models + ArchivalEntry, + // Message models + ArchiveSummary, + ConstellationSummary, + CoordinationState, + CoordinationTask, + EventImportance, + GroupMember, + GroupMemberRole, + HandoffNote, + MemoryBlock, + MemoryBlockCheckpoint, + MemoryBlockType, + MemoryGate, + MemoryOp, + MemoryPermission, + Message, + MessageRole, + MessageSummary, + NotableEvent, + PatternType, + SharedBlockAttachment, + TaskPriority, + TaskStatus, +}; diff --git a/crates/pattern_db/src/models/agent.rs b/crates/pattern_db/src/models/agent.rs new file mode 100644 index 0000000..5a30494 --- /dev/null +++ b/crates/pattern_db/src/models/agent.rs @@ -0,0 +1,139 @@ +//! Agent-related models. + +use chrono::{DateTime, Utc}; +use serde::{Deserialize, Serialize}; +use sqlx::FromRow; +use sqlx::types::Json; + +/// An agent in the constellation. +#[derive(Debug, Clone, FromRow, Serialize, Deserialize)] +pub struct Agent { + /// Unique identifier + pub id: String, + + /// Human-readable name (unique within constellation) + pub name: String, + + /// Optional description + pub description: Option, + + /// Model provider: 'anthropic', 'openai', 'google', etc. + pub model_provider: String, + + /// Model name: 'claude-3-5-sonnet', 'gpt-4o', etc. + pub model_name: String, + + /// System prompt / base instructions + pub system_prompt: String, + + /// Agent configuration as JSON + /// Contains: max_messages, compression_threshold, temperature, etc. + pub config: Json, + + /// List of enabled tool names + pub enabled_tools: Json>, + + /// Tool-specific rules as JSON (optional) + pub tool_rules: Option>, + + /// Agent status + pub status: AgentStatus, + + /// Creation timestamp + pub created_at: DateTime, + + /// Last update timestamp + pub updated_at: DateTime, +} + +/// Agent status. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, sqlx::Type)] +#[sqlx(type_name = "TEXT", rename_all = "lowercase")] +#[serde(rename_all = "lowercase")] +pub enum AgentStatus { + /// Agent is active and can process messages + Active, + /// Agent is hibernated (not processing, but data preserved) + Hibernated, + /// Agent is archived (read-only) + Archived, +} + +impl Default for AgentStatus { + fn default() -> Self { + Self::Active + } +} + +/// An agent group for coordination. +#[derive(Debug, Clone, FromRow, Serialize, Deserialize)] +pub struct AgentGroup { + /// Unique identifier + pub id: String, + + /// Human-readable name (unique within constellation) + pub name: String, + + /// Optional description + pub description: Option, + + /// Coordination pattern type + pub pattern_type: PatternType, + + /// Pattern-specific configuration as JSON + pub pattern_config: Json, + + /// Creation timestamp + pub created_at: DateTime, + + /// Last update timestamp + pub updated_at: DateTime, +} + +/// Coordination pattern types. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, sqlx::Type)] +#[sqlx(type_name = "TEXT", rename_all = "snake_case")] +#[serde(rename_all = "snake_case")] +pub enum PatternType { + /// Round-robin message distribution + RoundRobin, + /// Dynamic routing based on selector + Dynamic, + /// Pipeline of sequential processing + Pipeline, + /// Supervisor delegates to workers + Supervisor, + /// Voting-based consensus + Voting, + /// Background monitoring (sleeptime) + Sleeptime, +} + +/// Group membership. +#[derive(Debug, Clone, FromRow, Serialize, Deserialize)] +pub struct GroupMember { + /// Group ID + pub group_id: String, + + /// Agent ID + pub agent_id: String, + + /// Role within the group (pattern-specific) + pub role: Option, + + /// When the agent joined the group + pub joined_at: DateTime, +} + +/// Member roles within a group. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, sqlx::Type)] +#[sqlx(type_name = "TEXT", rename_all = "lowercase")] +#[serde(rename_all = "lowercase")] +pub enum GroupMemberRole { + /// Supervisor role (for supervisor pattern) + Supervisor, + /// Worker role + Worker, + /// Observer (receives messages but doesn't respond) + Observer, +} diff --git a/crates/pattern_db/src/models/coordination.rs b/crates/pattern_db/src/models/coordination.rs new file mode 100644 index 0000000..4c2e248 --- /dev/null +++ b/crates/pattern_db/src/models/coordination.rs @@ -0,0 +1,277 @@ +//! Coordination-related models. +//! +//! These models support cross-agent coordination: +//! - Activity stream for constellation-wide event logging +//! - Summaries for agent catch-up after hibernation +//! - Tasks for structured work assignment +//! - Handoff notes for agent-to-agent communication + +use chrono::{DateTime, Utc}; +use serde::{Deserialize, Serialize}; +use sqlx::FromRow; +use sqlx::types::Json; + +/// An event in the constellation's activity stream. +/// +/// The activity stream provides a unified timeline of events for +/// coordinating agents and enabling catch-up for returning agents. +#[derive(Debug, Clone, FromRow, Serialize, Deserialize)] +pub struct ActivityEvent { + /// Unique identifier + pub id: String, + + /// When the event occurred + pub timestamp: DateTime, + + /// Agent that caused the event (None for system events) + pub agent_id: Option, + + /// Event type + pub event_type: ActivityEventType, + + /// Event-specific details as JSON + pub details: Json, + + /// Importance level for filtering + pub importance: Option, +} + +/// Activity event types. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, sqlx::Type)] +#[sqlx(type_name = "TEXT", rename_all = "snake_case")] +#[serde(rename_all = "snake_case")] +pub enum ActivityEventType { + /// Agent sent a message + MessageSent, + /// Agent used a tool + ToolUsed, + /// Memory was updated + MemoryUpdated, + /// Task was created/updated + TaskChanged, + /// Agent status changed (activated, hibernated, etc.) + AgentStatusChanged, + /// External event (Discord message, Bluesky post, etc.) + ExternalEvent, + /// Coordination event (handoff, delegation, etc.) + Coordination, + /// System event (startup, shutdown, error, etc.) + System, +} + +/// Event importance levels. +#[derive( + Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize, sqlx::Type, +)] +#[sqlx(type_name = "TEXT", rename_all = "lowercase")] +#[serde(rename_all = "lowercase")] +pub enum EventImportance { + /// Routine event, can be skipped in summaries + Low, + /// Normal event, included in standard summaries + Medium, + /// Important event, always included in summaries + High, + /// Critical event, requires attention + Critical, +} + +impl Default for EventImportance { + fn default() -> Self { + Self::Medium + } +} + +/// Per-agent activity summary. +/// +/// LLM-generated summary of an agent's recent activity, +/// used to help other agents understand what this agent has been doing. +#[derive(Debug, Clone, FromRow, Serialize, Deserialize)] +pub struct AgentSummary { + /// Agent this summary is for (also the primary key) + pub agent_id: String, + + /// LLM-generated summary + pub summary: String, + + /// Number of messages covered by this summary + pub messages_covered: i64, + + /// When this summary was generated + pub generated_at: DateTime, + + /// When the agent was last active + pub last_active: DateTime, +} + +/// Constellation-wide summary. +/// +/// Periodic roll-up of activity across all agents, +/// used for long-term context and catch-up. +#[derive(Debug, Clone, FromRow, Serialize, Deserialize)] +pub struct ConstellationSummary { + /// Unique identifier + pub id: String, + + /// Start of the summarized period + pub period_start: DateTime, + + /// End of the summarized period + pub period_end: DateTime, + + /// LLM-generated summary + pub summary: String, + + /// Key decisions made during this period + pub key_decisions: Option>>, + + /// Open threads/topics that need follow-up + pub open_threads: Option>>, + + /// When this summary was created + pub created_at: DateTime, +} + +/// A notable event flagged for long-term memory. +/// +/// Unlike regular activity events, notable events are explicitly +/// preserved for historical context and agent training. +#[derive(Debug, Clone, FromRow, Serialize, Deserialize)] +pub struct NotableEvent { + /// Unique identifier + pub id: String, + + /// When the event occurred + pub timestamp: DateTime, + + /// Type of event + pub event_type: String, + + /// Human-readable description + pub description: String, + + /// Agents involved in this event + pub agents_involved: Option>>, + + /// Importance level + pub importance: EventImportance, + + /// When this was recorded + pub created_at: DateTime, +} + +/// A coordination task. +/// +/// Structured task assignment for cross-agent work. +/// More formal than handoff notes, used for tracked deliverables. +#[derive(Debug, Clone, FromRow, Serialize, Deserialize)] +pub struct CoordinationTask { + /// Unique identifier + pub id: String, + + /// Task description + pub description: String, + + /// Agent assigned to this task (None = unassigned) + pub assigned_to: Option, + + /// Task status + pub status: TaskStatus, + + /// Task priority + pub priority: TaskPriority, + + /// Creation timestamp + pub created_at: DateTime, + + /// Last update timestamp + pub updated_at: DateTime, +} + +/// Task status. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, sqlx::Type)] +#[sqlx(type_name = "TEXT", rename_all = "snake_case")] +#[serde(rename_all = "snake_case")] +pub enum TaskStatus { + /// Task is pending, not yet started + Pending, + /// Task is in progress + InProgress, + /// Task is completed + Completed, + /// Task was cancelled + Cancelled, +} + +impl Default for TaskStatus { + fn default() -> Self { + Self::Pending + } +} + +/// Task priority. +#[derive( + Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize, sqlx::Type, +)] +#[sqlx(type_name = "TEXT", rename_all = "lowercase")] +#[serde(rename_all = "lowercase")] +pub enum TaskPriority { + /// Low priority + Low, + /// Medium priority (default) + Medium, + /// High priority + High, + /// Urgent priority + Urgent, +} + +impl Default for TaskPriority { + fn default() -> Self { + Self::Medium + } +} + +/// A handoff note from one agent to another. +/// +/// Used for informal agent-to-agent communication, +/// like leaving a note for the next shift. +#[derive(Debug, Clone, FromRow, Serialize, Deserialize)] +pub struct HandoffNote { + /// Unique identifier + pub id: String, + + /// Agent that left the note + pub from_agent: String, + + /// Target agent (None = for any agent) + pub to_agent: Option, + + /// Note content + pub content: String, + + /// When the note was created + pub created_at: DateTime, + + /// When the note was read (None = unread) + pub read_at: Option>, +} + +/// Coordination key-value state entry. +/// +/// Flexible shared state for coordination patterns. +/// Used for things like round-robin counters, vote tallies, etc. +#[derive(Debug, Clone, FromRow, Serialize, Deserialize)] +pub struct CoordinationState { + /// Key for this state entry + pub key: String, + + /// Value as JSON + pub value: Json, + + /// When this was last updated + pub updated_at: DateTime, + + /// Who updated it last + pub updated_by: Option, +} diff --git a/crates/pattern_db/src/models/memory.rs b/crates/pattern_db/src/models/memory.rs new file mode 100644 index 0000000..ff37172 --- /dev/null +++ b/crates/pattern_db/src/models/memory.rs @@ -0,0 +1,275 @@ +//! Memory-related models. + +use chrono::{DateTime, Utc}; +use serde::{Deserialize, Serialize}; +use sqlx::FromRow; +use sqlx::types::Json; + +/// A memory block belonging to an agent. +/// +/// Memory blocks are stored as Loro CRDT documents, enabling versioning, +/// time-travel, and potential future merging. +#[derive(Debug, Clone, FromRow, Serialize, Deserialize)] +pub struct MemoryBlock { + /// Unique identifier + pub id: String, + + /// Owning agent ID + pub agent_id: String, + + /// Semantic label: "persona", "human", "scratchpad", etc. + pub label: String, + + /// Description for the LLM (critical for proper usage) + pub description: String, + + /// Block type determines context inclusion behavior + pub block_type: MemoryBlockType, + + /// Character limit for the block + pub char_limit: i64, + + /// Permission level for this block + pub permission: MemoryPermission, + + /// Whether this block is pinned (can't be swapped out of context) + pub pinned: bool, + + /// Loro document snapshot (binary blob) + pub loro_snapshot: Vec, + + /// Quick content preview without deserializing Loro + pub content_preview: Option, + + /// Additional metadata + pub metadata: Option>, + + /// Embedding model used (if embedded) + pub embedding_model: Option, + + /// Whether this block is active (false = soft deleted) + pub is_active: bool, + + /// Creation timestamp + pub created_at: DateTime, + + /// Last update timestamp + pub updated_at: DateTime, +} + +/// Memory block types. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, sqlx::Type)] +#[sqlx(type_name = "TEXT", rename_all = "lowercase")] +#[serde(rename_all = "lowercase")] +pub enum MemoryBlockType { + /// Always in context, critical for agent identity + /// Examples: persona, human, system guidelines + Core, + + /// Working memory, can be swapped in/out based on relevance + /// Examples: scratchpad, current_task, session_notes + Working, + + /// Long-term storage, NOT in context by default + /// Retrieved via recall/search tools using semantic search + Archival, + + /// System-maintained logs (read-only to agent) + /// Recent entries shown in context, older entries searchable + Log, +} + +impl Default for MemoryBlockType { + fn default() -> Self { + Self::Working + } +} + +/// Permission levels for memory operations. +/// +/// Ordered from most restrictive to least restrictive. +/// This determines what operations an agent can perform on a block. +#[derive( + Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize, sqlx::Type, +)] +#[sqlx(type_name = "TEXT", rename_all = "snake_case")] +#[serde(rename_all = "snake_case")] +pub enum MemoryPermission { + /// Can only read, no modifications allowed + ReadOnly, + /// Requires permission from partner (owner) to write + Partner, + /// Requires permission from any human to write + Human, + /// Can append to existing content, but not overwrite + Append, + /// Can modify content freely (default) + ReadWrite, + /// Total control, including delete + Admin, +} + +impl Default for MemoryPermission { + fn default() -> Self { + Self::ReadWrite + } +} + +impl std::fmt::Display for MemoryPermission { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::ReadOnly => write!(f, "Read Only"), + Self::Partner => write!(f, "Requires Partner permission to write"), + Self::Human => write!(f, "Requires Human permission to write"), + Self::Append => write!(f, "Append Only"), + Self::ReadWrite => write!(f, "Read, Append, Write"), + Self::Admin => write!(f, "Read, Write, Delete"), + } + } +} + +/// Memory operation types for permission gating. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum MemoryOp { + Read, + Append, + Overwrite, + Delete, +} + +/// Result of permission check for a memory operation. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum MemoryGate { + /// Operation can proceed without additional consent. + Allow, + /// Operation may proceed with human/partner consent. + RequireConsent { reason: String }, + /// Operation is not allowed under current policy. + Deny { reason: String }, +} + +impl MemoryGate { + /// Check whether an operation is allowed under a permission level. + /// + /// Policy: + /// - Read: always allowed + /// - Append: allowed for Append/ReadWrite/Admin; Human/Partner require consent; ReadOnly denied + /// - Overwrite: allowed for ReadWrite/Admin; Human/Partner require consent; ReadOnly/Append denied + /// - Delete: allowed for Admin only; others denied + pub fn check(op: MemoryOp, perm: MemoryPermission) -> Self { + match op { + MemoryOp::Read => Self::Allow, + MemoryOp::Append => match perm { + MemoryPermission::Append + | MemoryPermission::ReadWrite + | MemoryPermission::Admin => Self::Allow, + MemoryPermission::Human => Self::RequireConsent { + reason: "Requires human approval to append".into(), + }, + MemoryPermission::Partner => Self::RequireConsent { + reason: "Requires partner approval to append".into(), + }, + MemoryPermission::ReadOnly => Self::Deny { + reason: "Block is read-only; appending is not allowed".into(), + }, + }, + MemoryOp::Overwrite => match perm { + MemoryPermission::ReadWrite | MemoryPermission::Admin => Self::Allow, + MemoryPermission::Human => Self::RequireConsent { + reason: "Requires human approval to overwrite".into(), + }, + MemoryPermission::Partner => Self::RequireConsent { + reason: "Requires partner approval to overwrite".into(), + }, + MemoryPermission::Append | MemoryPermission::ReadOnly => Self::Deny { + reason: "Insufficient permission (append-only or read-only) for overwrite" + .into(), + }, + }, + MemoryOp::Delete => match perm { + MemoryPermission::Admin => Self::Allow, + _ => Self::Deny { + reason: "Deleting memory requires admin permission".into(), + }, + }, + } + } + + /// Check if the gate allows the operation. + pub fn is_allowed(&self) -> bool { + matches!(self, Self::Allow) + } + + /// Check if the gate requires consent. + pub fn requires_consent(&self) -> bool { + matches!(self, Self::RequireConsent { .. }) + } + + /// Check if the gate denies the operation. + pub fn is_denied(&self) -> bool { + matches!(self, Self::Deny { .. }) + } +} + +/// Checkpoint of a memory block (for history/rollback). +#[derive(Debug, Clone, FromRow, Serialize, Deserialize)] +pub struct MemoryBlockCheckpoint { + /// Auto-incrementing ID + pub id: i64, + + /// Block this checkpoint belongs to + pub block_id: String, + + /// Full Loro snapshot at this checkpoint + pub snapshot: Vec, + + /// When this checkpoint was created + pub created_at: DateTime, + + /// How many updates were consolidated into this checkpoint + pub updates_consolidated: i64, +} + +/// An archival memory entry. +/// +/// Separate from blocks - these are individual searchable entries +/// the agent can store/retrieve. Useful for fine-grained memories. +#[derive(Debug, Clone, FromRow, Serialize, Deserialize)] +pub struct ArchivalEntry { + /// Unique identifier + pub id: String, + + /// Owning agent ID + pub agent_id: String, + + /// Content of the entry + pub content: String, + + /// Optional structured metadata + pub metadata: Option>, + + /// For chunked large content + pub chunk_index: i64, + + /// Links chunks together + pub parent_entry_id: Option, + + /// Creation timestamp + pub created_at: DateTime, +} + +/// Shared block attachment (when blocks are shared between agents). +#[derive(Debug, Clone, FromRow, Serialize, Deserialize)] +pub struct SharedBlockAttachment { + /// The shared block + pub block_id: String, + + /// Agent gaining access + pub agent_id: String, + + /// Permission level for this attachment (may differ from block's inherent permission) + pub permission: MemoryPermission, + + /// When the attachment was created + pub attached_at: DateTime, +} diff --git a/crates/pattern_db/src/models/message.rs b/crates/pattern_db/src/models/message.rs new file mode 100644 index 0000000..ab7bdb0 --- /dev/null +++ b/crates/pattern_db/src/models/message.rs @@ -0,0 +1,141 @@ +//! Message-related models. + +use chrono::{DateTime, Utc}; +use serde::{Deserialize, Serialize}; +use sqlx::FromRow; +use sqlx::types::Json; + +/// A message in an agent's conversation history. +/// +/// Messages use Snowflake IDs for absolute ordering across all messages, +/// with batch tracking for atomic request/response cycles. +#[derive(Debug, Clone, FromRow, Serialize, Deserialize)] +pub struct Message { + /// Unique identifier + pub id: String, + + /// Owning agent ID + pub agent_id: String, + + /// Snowflake ID as string for sorting (absolute ordering) + pub position: String, + + /// Groups request/response cycles together + pub batch_id: Option, + + /// Order within a batch (0 = first message) + pub sequence_in_batch: Option, + + /// Message role + pub role: MessageRole, + + /// Message content (may be null for tool-only messages) + pub content: Option, + + /// Tool call ID (for tool messages) + pub tool_call_id: Option, + + /// Tool name (for tool calls/results) + pub tool_name: Option, + + /// Tool arguments as JSON (for tool calls) + pub tool_args: Option>, + + /// Tool result as JSON (for tool responses) + pub tool_result: Option>, + + /// Source of the message: 'cli', 'discord', 'bluesky', 'api', etc. + pub source: Option, + + /// Source-specific metadata (channel ID, message ID, etc.) + pub source_metadata: Option>, + + /// Whether this message has been archived (compressed into a summary) + pub is_archived: bool, + + /// Creation timestamp + pub created_at: DateTime, +} + +/// Message roles. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, sqlx::Type)] +#[sqlx(type_name = "TEXT", rename_all = "lowercase")] +#[serde(rename_all = "lowercase")] +pub enum MessageRole { + /// User/human message + User, + /// Assistant/agent response + Assistant, + /// System message (instructions, context) + System, + /// Tool call or result + Tool, +} + +impl Default for MessageRole { + fn default() -> Self { + Self::User + } +} + +impl std::fmt::Display for MessageRole { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::User => write!(f, "user"), + Self::Assistant => write!(f, "assistant"), + Self::System => write!(f, "system"), + Self::Tool => write!(f, "tool"), + } + } +} + +/// An archive summary replacing a range of messages. +/// +/// When conversation history grows too long, older messages are compressed +/// into summaries. The original messages are marked as archived but retained +/// for search and history purposes. +#[derive(Debug, Clone, FromRow, Serialize, Deserialize)] +pub struct ArchiveSummary { + /// Unique identifier + pub id: String, + + /// Owning agent ID + pub agent_id: String, + + /// LLM-generated summary of the archived messages + pub summary: String, + + /// Starting position (Snowflake ID) of summarized range + pub start_position: String, + + /// Ending position (Snowflake ID) of summarized range + pub end_position: String, + + /// Number of messages summarized + pub message_count: i64, + + /// Creation timestamp + pub created_at: DateTime, +} + +/// Lightweight message projection for listing/searching. +#[derive(Debug, Clone, FromRow, Serialize, Deserialize)] +pub struct MessageSummary { + /// Message ID + pub id: String, + + /// Position for ordering + pub position: String, + + /// Message role + pub role: MessageRole, + + /// Truncated content preview + pub content_preview: Option, + + /// Source platform + pub source: Option, + + /// Creation timestamp + pub created_at: DateTime, +} diff --git a/crates/pattern_db/src/models/mod.rs b/crates/pattern_db/src/models/mod.rs new file mode 100644 index 0000000..ca5600e --- /dev/null +++ b/crates/pattern_db/src/models/mod.rs @@ -0,0 +1,19 @@ +//! Database models. +//! +//! These structs map directly to database tables via sqlx. + +mod agent; +mod coordination; +mod memory; +mod message; + +pub use agent::{Agent, AgentGroup, AgentStatus, GroupMember, GroupMemberRole, PatternType}; +pub use coordination::{ + ActivityEvent, ActivityEventType, AgentSummary, ConstellationSummary, CoordinationState, + CoordinationTask, EventImportance, HandoffNote, NotableEvent, TaskPriority, TaskStatus, +}; +pub use memory::{ + ArchivalEntry, MemoryBlock, MemoryBlockCheckpoint, MemoryBlockType, MemoryGate, MemoryOp, + MemoryPermission, SharedBlockAttachment, +}; +pub use message::{ArchiveSummary, Message, MessageRole, MessageSummary}; diff --git a/crates/pattern_db/src/queries/agent.rs b/crates/pattern_db/src/queries/agent.rs new file mode 100644 index 0000000..645ab1b --- /dev/null +++ b/crates/pattern_db/src/queries/agent.rs @@ -0,0 +1,322 @@ +//! Agent-related database queries. + +use sqlx::SqlitePool; + +use crate::error::DbResult; +use crate::models::{Agent, AgentGroup, AgentStatus, GroupMember, GroupMemberRole, PatternType}; + +/// Get an agent by ID. +pub async fn get_agent(pool: &SqlitePool, id: &str) -> DbResult> { + let agent = sqlx::query_as!( + Agent, + r#" + SELECT + id as "id!", + name as "name!", + description, + model_provider as "model_provider!", + model_name as "model_name!", + system_prompt as "system_prompt!", + config as "config!: _", + enabled_tools as "enabled_tools!: _", + tool_rules as "tool_rules: _", + status as "status!: AgentStatus", + created_at as "created_at!: _", + updated_at as "updated_at!: _" + FROM agents WHERE id = ? + "#, + id + ) + .fetch_optional(pool) + .await?; + Ok(agent) +} + +/// Get an agent by name. +pub async fn get_agent_by_name(pool: &SqlitePool, name: &str) -> DbResult> { + let agent = sqlx::query_as!( + Agent, + r#" + SELECT + id as "id!", + name as "name!", + description, + model_provider as "model_provider!", + model_name as "model_name!", + system_prompt as "system_prompt!", + config as "config!: _", + enabled_tools as "enabled_tools!: _", + tool_rules as "tool_rules: _", + status as "status!: AgentStatus", + created_at as "created_at!: _", + updated_at as "updated_at!: _" + FROM agents WHERE name = ? + "#, + name + ) + .fetch_optional(pool) + .await?; + Ok(agent) +} + +/// List all agents. +pub async fn list_agents(pool: &SqlitePool) -> DbResult> { + let agents = sqlx::query_as!( + Agent, + r#" + SELECT + id as "id!", + name as "name!", + description, + model_provider as "model_provider!", + model_name as "model_name!", + system_prompt as "system_prompt!", + config as "config!: _", + enabled_tools as "enabled_tools!: _", + tool_rules as "tool_rules: _", + status as "status!: AgentStatus", + created_at as "created_at!: _", + updated_at as "updated_at!: _" + FROM agents ORDER BY name + "# + ) + .fetch_all(pool) + .await?; + Ok(agents) +} + +/// List agents with a specific status. +pub async fn list_agents_by_status(pool: &SqlitePool, status: AgentStatus) -> DbResult> { + let agents = sqlx::query_as!( + Agent, + r#" + SELECT + id as "id!", + name as "name!", + description, + model_provider as "model_provider!", + model_name as "model_name!", + system_prompt as "system_prompt!", + config as "config!: _", + enabled_tools as "enabled_tools!: _", + tool_rules as "tool_rules: _", + status as "status!: AgentStatus", + created_at as "created_at!: _", + updated_at as "updated_at!: _" + FROM agents WHERE status = ? ORDER BY name + "#, + status + ) + .fetch_all(pool) + .await?; + Ok(agents) +} + +/// Create a new agent. +pub async fn create_agent(pool: &SqlitePool, agent: &Agent) -> DbResult<()> { + sqlx::query!( + r#" + INSERT INTO agents (id, name, description, model_provider, model_name, + system_prompt, config, enabled_tools, tool_rules, + status, created_at, updated_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + "#, + agent.id, + agent.name, + agent.description, + agent.model_provider, + agent.model_name, + agent.system_prompt, + agent.config, + agent.enabled_tools, + agent.tool_rules, + agent.status, + agent.created_at, + agent.updated_at, + ) + .execute(pool) + .await?; + Ok(()) +} + +/// Update an agent's status. +pub async fn update_agent_status(pool: &SqlitePool, id: &str, status: AgentStatus) -> DbResult<()> { + sqlx::query!( + "UPDATE agents SET status = ?, updated_at = datetime('now') WHERE id = ?", + status, + id + ) + .execute(pool) + .await?; + Ok(()) +} + +/// Delete an agent. +pub async fn delete_agent(pool: &SqlitePool, id: &str) -> DbResult<()> { + sqlx::query!("DELETE FROM agents WHERE id = ?", id) + .execute(pool) + .await?; + Ok(()) +} + +/// Get an agent group by ID. +pub async fn get_group(pool: &SqlitePool, id: &str) -> DbResult> { + let group = sqlx::query_as!( + AgentGroup, + r#" + SELECT + id as "id!", + name as "name!", + description, + pattern_type as "pattern_type!: PatternType", + pattern_config as "pattern_config!: _", + created_at as "created_at!: _", + updated_at as "updated_at!: _" + FROM agent_groups WHERE id = ? + "#, + id + ) + .fetch_optional(pool) + .await?; + Ok(group) +} + +/// Get an agent group by name. +pub async fn get_group_by_name(pool: &SqlitePool, name: &str) -> DbResult> { + let group = sqlx::query_as!( + AgentGroup, + r#" + SELECT + id as "id!", + name as "name!", + description, + pattern_type as "pattern_type!: PatternType", + pattern_config as "pattern_config!: _", + created_at as "created_at!: _", + updated_at as "updated_at!: _" + FROM agent_groups WHERE name = ? + "#, + name + ) + .fetch_optional(pool) + .await?; + Ok(group) +} + +/// List all agent groups. +pub async fn list_groups(pool: &SqlitePool) -> DbResult> { + let groups = sqlx::query_as!( + AgentGroup, + r#" + SELECT + id as "id!", + name as "name!", + description, + pattern_type as "pattern_type!: PatternType", + pattern_config as "pattern_config!: _", + created_at as "created_at!: _", + updated_at as "updated_at!: _" + FROM agent_groups ORDER BY name + "# + ) + .fetch_all(pool) + .await?; + Ok(groups) +} + +/// Create a new agent group. +pub async fn create_group(pool: &SqlitePool, group: &AgentGroup) -> DbResult<()> { + sqlx::query!( + r#" + INSERT INTO agent_groups (id, name, description, pattern_type, pattern_config, created_at, updated_at) + VALUES (?, ?, ?, ?, ?, ?, ?) + "#, + group.id, + group.name, + group.description, + group.pattern_type, + group.pattern_config, + group.created_at, + group.updated_at, + ) + .execute(pool) + .await?; + Ok(()) +} + +/// Get members of a group. +pub async fn get_group_members(pool: &SqlitePool, group_id: &str) -> DbResult> { + let members = sqlx::query_as!( + GroupMember, + r#" + SELECT + group_id as "group_id!", + agent_id as "agent_id!", + role as "role: GroupMemberRole", + joined_at as "joined_at!: _" + FROM group_members WHERE group_id = ? + "#, + group_id + ) + .fetch_all(pool) + .await?; + Ok(members) +} + +/// Add an agent to a group. +pub async fn add_group_member(pool: &SqlitePool, member: &GroupMember) -> DbResult<()> { + sqlx::query!( + r#" + INSERT INTO group_members (group_id, agent_id, role, joined_at) + VALUES (?, ?, ?, ?) + "#, + member.group_id, + member.agent_id, + member.role, + member.joined_at, + ) + .execute(pool) + .await?; + Ok(()) +} + +/// Remove an agent from a group. +pub async fn remove_group_member( + pool: &SqlitePool, + group_id: &str, + agent_id: &str, +) -> DbResult<()> { + sqlx::query!( + "DELETE FROM group_members WHERE group_id = ? AND agent_id = ?", + group_id, + agent_id + ) + .execute(pool) + .await?; + Ok(()) +} + +/// Get all groups an agent belongs to. +pub async fn get_agent_groups(pool: &SqlitePool, agent_id: &str) -> DbResult> { + let groups = sqlx::query_as!( + AgentGroup, + r#" + SELECT + g.id as "id!", + g.name as "name!", + g.description, + g.pattern_type as "pattern_type!: PatternType", + g.pattern_config as "pattern_config!: _", + g.created_at as "created_at!: _", + g.updated_at as "updated_at!: _" + FROM agent_groups g + INNER JOIN group_members m ON g.id = m.group_id + WHERE m.agent_id = ? + ORDER BY g.name + "#, + agent_id + ) + .fetch_all(pool) + .await?; + Ok(groups) +} diff --git a/crates/pattern_db/src/queries/coordination.rs b/crates/pattern_db/src/queries/coordination.rs new file mode 100644 index 0000000..051094a --- /dev/null +++ b/crates/pattern_db/src/queries/coordination.rs @@ -0,0 +1,519 @@ +//! Coordination-related database queries. + +use sqlx::SqlitePool; + +use crate::error::DbResult; +use crate::models::{ + ActivityEvent, ActivityEventType, AgentSummary, ConstellationSummary, CoordinationState, + CoordinationTask, EventImportance, HandoffNote, NotableEvent, TaskPriority, TaskStatus, +}; + +// ============================================================================ +// Activity Events +// ============================================================================ + +/// Get recent activity events. +pub async fn get_recent_activity(pool: &SqlitePool, limit: i64) -> DbResult> { + let events = sqlx::query_as!( + ActivityEvent, + r#" + SELECT + id as "id!", + timestamp as "timestamp!: _", + agent_id, + event_type as "event_type!: ActivityEventType", + details as "details!: _", + importance as "importance: EventImportance" + FROM activity_events + ORDER BY timestamp DESC + LIMIT ? + "#, + limit + ) + .fetch_all(pool) + .await?; + Ok(events) +} + +/// Get recent activity events with minimum importance. +pub async fn get_recent_activity_by_importance( + pool: &SqlitePool, + limit: i64, + min_importance: EventImportance, +) -> DbResult> { + let events = sqlx::query_as!( + ActivityEvent, + r#" + SELECT + id as "id!", + timestamp as "timestamp!: _", + agent_id, + event_type as "event_type!: ActivityEventType", + details as "details!: _", + importance as "importance: EventImportance" + FROM activity_events + WHERE importance >= ? + ORDER BY timestamp DESC + LIMIT ? + "#, + min_importance, + limit + ) + .fetch_all(pool) + .await?; + Ok(events) +} + +/// Get activity events for a specific agent. +pub async fn get_agent_activity( + pool: &SqlitePool, + agent_id: &str, + limit: i64, +) -> DbResult> { + let events = sqlx::query_as!( + ActivityEvent, + r#" + SELECT + id as "id!", + timestamp as "timestamp!: _", + agent_id, + event_type as "event_type!: ActivityEventType", + details as "details!: _", + importance as "importance: EventImportance" + FROM activity_events + WHERE agent_id = ? + ORDER BY timestamp DESC + LIMIT ? + "#, + agent_id, + limit + ) + .fetch_all(pool) + .await?; + Ok(events) +} + +/// Create an activity event. +pub async fn create_activity_event(pool: &SqlitePool, event: &ActivityEvent) -> DbResult<()> { + sqlx::query!( + r#" + INSERT INTO activity_events (id, timestamp, agent_id, event_type, details, importance) + VALUES (?, ?, ?, ?, ?, ?) + "#, + event.id, + event.timestamp, + event.agent_id, + event.event_type, + event.details, + event.importance, + ) + .execute(pool) + .await?; + Ok(()) +} + +// ============================================================================ +// Agent Summaries +// ============================================================================ + +/// Get an agent's summary. +pub async fn get_agent_summary( + pool: &SqlitePool, + agent_id: &str, +) -> DbResult> { + let summary = sqlx::query_as!( + AgentSummary, + r#" + SELECT + agent_id as "agent_id!", + summary as "summary!", + messages_covered as "messages_covered!", + generated_at as "generated_at!: _", + last_active as "last_active!: _" + FROM agent_summaries + WHERE agent_id = ? + "#, + agent_id + ) + .fetch_optional(pool) + .await?; + Ok(summary) +} + +/// Upsert an agent summary. +pub async fn upsert_agent_summary(pool: &SqlitePool, summary: &AgentSummary) -> DbResult<()> { + sqlx::query!( + r#" + INSERT INTO agent_summaries (agent_id, summary, messages_covered, generated_at, last_active) + VALUES (?, ?, ?, ?, ?) + ON CONFLICT(agent_id) DO UPDATE SET + summary = excluded.summary, + messages_covered = excluded.messages_covered, + generated_at = excluded.generated_at, + last_active = excluded.last_active + "#, + summary.agent_id, + summary.summary, + summary.messages_covered, + summary.generated_at, + summary.last_active, + ) + .execute(pool) + .await?; + Ok(()) +} + +/// Get all agent summaries. +pub async fn get_all_agent_summaries(pool: &SqlitePool) -> DbResult> { + let summaries = sqlx::query_as!( + AgentSummary, + r#" + SELECT + agent_id as "agent_id!", + summary as "summary!", + messages_covered as "messages_covered!", + generated_at as "generated_at!: _", + last_active as "last_active!: _" + FROM agent_summaries + ORDER BY last_active DESC + "# + ) + .fetch_all(pool) + .await?; + Ok(summaries) +} + +// ============================================================================ +// Constellation Summaries +// ============================================================================ + +/// Get the latest constellation summary. +pub async fn get_latest_constellation_summary( + pool: &SqlitePool, +) -> DbResult> { + let summary = sqlx::query_as!( + ConstellationSummary, + r#" + SELECT + id as "id!", + period_start as "period_start!: _", + period_end as "period_end!: _", + summary as "summary!", + key_decisions as "key_decisions: _", + open_threads as "open_threads: _", + created_at as "created_at!: _" + FROM constellation_summaries + ORDER BY period_end DESC + LIMIT 1 + "# + ) + .fetch_optional(pool) + .await?; + Ok(summary) +} + +/// Create a constellation summary. +pub async fn create_constellation_summary( + pool: &SqlitePool, + summary: &ConstellationSummary, +) -> DbResult<()> { + sqlx::query!( + r#" + INSERT INTO constellation_summaries (id, period_start, period_end, summary, key_decisions, open_threads, created_at) + VALUES (?, ?, ?, ?, ?, ?, ?) + "#, + summary.id, + summary.period_start, + summary.period_end, + summary.summary, + summary.key_decisions, + summary.open_threads, + summary.created_at, + ) + .execute(pool) + .await?; + Ok(()) +} + +// ============================================================================ +// Notable Events +// ============================================================================ + +/// Get recent notable events. +pub async fn get_notable_events(pool: &SqlitePool, limit: i64) -> DbResult> { + let events = sqlx::query_as!( + NotableEvent, + r#" + SELECT + id as "id!", + timestamp as "timestamp!: _", + event_type as "event_type!", + description as "description!", + agents_involved as "agents_involved: _", + importance as "importance!: EventImportance", + created_at as "created_at!: _" + FROM notable_events + ORDER BY timestamp DESC + LIMIT ? + "#, + limit + ) + .fetch_all(pool) + .await?; + Ok(events) +} + +/// Create a notable event. +pub async fn create_notable_event(pool: &SqlitePool, event: &NotableEvent) -> DbResult<()> { + sqlx::query!( + r#" + INSERT INTO notable_events (id, timestamp, event_type, description, agents_involved, importance, created_at) + VALUES (?, ?, ?, ?, ?, ?, ?) + "#, + event.id, + event.timestamp, + event.event_type, + event.description, + event.agents_involved, + event.importance, + event.created_at, + ) + .execute(pool) + .await?; + Ok(()) +} + +// ============================================================================ +// Coordination Tasks +// ============================================================================ + +/// Get a coordination task by ID. +pub async fn get_task(pool: &SqlitePool, id: &str) -> DbResult> { + let task = sqlx::query_as!( + CoordinationTask, + r#" + SELECT + id as "id!", + description as "description!", + assigned_to, + status as "status!: TaskStatus", + priority as "priority!: TaskPriority", + created_at as "created_at!: _", + updated_at as "updated_at!: _" + FROM coordination_tasks + WHERE id = ? + "#, + id + ) + .fetch_optional(pool) + .await?; + Ok(task) +} + +/// Get tasks by status. +pub async fn get_tasks_by_status( + pool: &SqlitePool, + status: TaskStatus, +) -> DbResult> { + let tasks = sqlx::query_as!( + CoordinationTask, + r#" + SELECT + id as "id!", + description as "description!", + assigned_to, + status as "status!: TaskStatus", + priority as "priority!: TaskPriority", + created_at as "created_at!: _", + updated_at as "updated_at!: _" + FROM coordination_tasks + WHERE status = ? + ORDER BY priority DESC, created_at + "#, + status + ) + .fetch_all(pool) + .await?; + Ok(tasks) +} + +/// Get tasks assigned to an agent. +pub async fn get_tasks_for_agent( + pool: &SqlitePool, + agent_id: &str, +) -> DbResult> { + let tasks = sqlx::query_as!( + CoordinationTask, + r#" + SELECT + id as "id!", + description as "description!", + assigned_to, + status as "status!: TaskStatus", + priority as "priority!: TaskPriority", + created_at as "created_at!: _", + updated_at as "updated_at!: _" + FROM coordination_tasks + WHERE assigned_to = ? + ORDER BY priority DESC, created_at + "#, + agent_id + ) + .fetch_all(pool) + .await?; + Ok(tasks) +} + +/// Create a coordination task. +pub async fn create_task(pool: &SqlitePool, task: &CoordinationTask) -> DbResult<()> { + sqlx::query!( + r#" + INSERT INTO coordination_tasks (id, description, assigned_to, status, priority, created_at, updated_at) + VALUES (?, ?, ?, ?, ?, ?, ?) + "#, + task.id, + task.description, + task.assigned_to, + task.status, + task.priority, + task.created_at, + task.updated_at, + ) + .execute(pool) + .await?; + Ok(()) +} + +/// Update task status. +pub async fn update_task_status(pool: &SqlitePool, id: &str, status: TaskStatus) -> DbResult<()> { + sqlx::query!( + "UPDATE coordination_tasks SET status = ?, updated_at = datetime('now') WHERE id = ?", + status, + id + ) + .execute(pool) + .await?; + Ok(()) +} + +/// Assign a task to an agent. +pub async fn assign_task(pool: &SqlitePool, id: &str, agent_id: Option<&str>) -> DbResult<()> { + sqlx::query!( + "UPDATE coordination_tasks SET assigned_to = ?, updated_at = datetime('now') WHERE id = ?", + agent_id, + id + ) + .execute(pool) + .await?; + Ok(()) +} + +// ============================================================================ +// Handoff Notes +// ============================================================================ + +/// Get unread handoff notes for an agent. +pub async fn get_unread_handoffs(pool: &SqlitePool, agent_id: &str) -> DbResult> { + let notes = sqlx::query_as!( + HandoffNote, + r#" + SELECT + id as "id!", + from_agent as "from_agent!", + to_agent, + content as "content!", + created_at as "created_at!: _", + read_at as "read_at: _" + FROM handoff_notes + WHERE (to_agent = ? OR to_agent IS NULL) AND read_at IS NULL + ORDER BY created_at + "#, + agent_id + ) + .fetch_all(pool) + .await?; + Ok(notes) +} + +/// Create a handoff note. +pub async fn create_handoff(pool: &SqlitePool, note: &HandoffNote) -> DbResult<()> { + sqlx::query!( + r#" + INSERT INTO handoff_notes (id, from_agent, to_agent, content, created_at, read_at) + VALUES (?, ?, ?, ?, ?, ?) + "#, + note.id, + note.from_agent, + note.to_agent, + note.content, + note.created_at, + note.read_at, + ) + .execute(pool) + .await?; + Ok(()) +} + +/// Mark a handoff note as read. +pub async fn mark_handoff_read(pool: &SqlitePool, id: &str) -> DbResult<()> { + sqlx::query!( + "UPDATE handoff_notes SET read_at = datetime('now') WHERE id = ?", + id + ) + .execute(pool) + .await?; + Ok(()) +} + +// ============================================================================ +// Coordination State (Key-Value) +// ============================================================================ + +/// Get a coordination state value. +pub async fn get_state(pool: &SqlitePool, key: &str) -> DbResult> { + let state = sqlx::query_as!( + CoordinationState, + r#" + SELECT + key as "key!", + value as "value!: _", + updated_at as "updated_at!: _", + updated_by + FROM coordination_state + WHERE key = ? + "#, + key + ) + .fetch_optional(pool) + .await?; + Ok(state) +} + +/// Set a coordination state value. +pub async fn set_state(pool: &SqlitePool, state: &CoordinationState) -> DbResult<()> { + sqlx::query!( + r#" + INSERT INTO coordination_state (key, value, updated_at, updated_by) + VALUES (?, ?, ?, ?) + ON CONFLICT(key) DO UPDATE SET + value = excluded.value, + updated_at = excluded.updated_at, + updated_by = excluded.updated_by + "#, + state.key, + state.value, + state.updated_at, + state.updated_by, + ) + .execute(pool) + .await?; + Ok(()) +} + +/// Delete a coordination state value. +pub async fn delete_state(pool: &SqlitePool, key: &str) -> DbResult<()> { + sqlx::query!("DELETE FROM coordination_state WHERE key = ?", key) + .execute(pool) + .await?; + Ok(()) +} diff --git a/crates/pattern_db/src/queries/memory.rs b/crates/pattern_db/src/queries/memory.rs new file mode 100644 index 0000000..c5370a9 --- /dev/null +++ b/crates/pattern_db/src/queries/memory.rs @@ -0,0 +1,334 @@ +//! Memory-related database queries. + +use sqlx::SqlitePool; + +use crate::error::DbResult; +use crate::models::{ + ArchivalEntry, MemoryBlock, MemoryBlockCheckpoint, MemoryBlockType, MemoryPermission, +}; + +/// Get a memory block by ID. +pub async fn get_block(pool: &SqlitePool, id: &str) -> DbResult> { + let block = sqlx::query_as!( + MemoryBlock, + r#" + SELECT + id as "id!", + agent_id as "agent_id!", + label as "label!", + description as "description!", + block_type as "block_type!: MemoryBlockType", + char_limit as "char_limit!", + permission as "permission!: MemoryPermission", + pinned as "pinned!: bool", + loro_snapshot as "loro_snapshot!", + content_preview, + metadata as "metadata: _", + embedding_model, + is_active as "is_active!: bool", + created_at as "created_at!: _", + updated_at as "updated_at!: _" + FROM memory_blocks WHERE id = ? + "#, + id + ) + .fetch_optional(pool) + .await?; + Ok(block) +} + +/// Get a memory block by agent ID and label. +pub async fn get_block_by_label( + pool: &SqlitePool, + agent_id: &str, + label: &str, +) -> DbResult> { + let block = sqlx::query_as!( + MemoryBlock, + r#" + SELECT + id as "id!", + agent_id as "agent_id!", + label as "label!", + description as "description!", + block_type as "block_type!: MemoryBlockType", + char_limit as "char_limit!", + permission as "permission!: MemoryPermission", + pinned as "pinned!: bool", + loro_snapshot as "loro_snapshot!", + content_preview, + metadata as "metadata: _", + embedding_model, + is_active as "is_active!: bool", + created_at as "created_at!: _", + updated_at as "updated_at!: _" + FROM memory_blocks WHERE agent_id = ? AND label = ? + "#, + agent_id, + label + ) + .fetch_optional(pool) + .await?; + Ok(block) +} + +/// List all memory blocks for an agent. +pub async fn list_blocks(pool: &SqlitePool, agent_id: &str) -> DbResult> { + let blocks = sqlx::query_as!( + MemoryBlock, + r#" + SELECT + id as "id!", + agent_id as "agent_id!", + label as "label!", + description as "description!", + block_type as "block_type!: MemoryBlockType", + char_limit as "char_limit!", + permission as "permission!: MemoryPermission", + pinned as "pinned!: bool", + loro_snapshot as "loro_snapshot!", + content_preview, + metadata as "metadata: _", + embedding_model, + is_active as "is_active!: bool", + created_at as "created_at!: _", + updated_at as "updated_at!: _" + FROM memory_blocks WHERE agent_id = ? AND is_active = 1 ORDER BY label + "#, + agent_id + ) + .fetch_all(pool) + .await?; + Ok(blocks) +} + +/// List memory blocks by type. +pub async fn list_blocks_by_type( + pool: &SqlitePool, + agent_id: &str, + block_type: MemoryBlockType, +) -> DbResult> { + let blocks = sqlx::query_as!( + MemoryBlock, + r#" + SELECT + id as "id!", + agent_id as "agent_id!", + label as "label!", + description as "description!", + block_type as "block_type!: MemoryBlockType", + char_limit as "char_limit!", + permission as "permission!: MemoryPermission", + pinned as "pinned!: bool", + loro_snapshot as "loro_snapshot!", + content_preview, + metadata as "metadata: _", + embedding_model, + is_active as "is_active!: bool", + created_at as "created_at!: _", + updated_at as "updated_at!: _" + FROM memory_blocks WHERE agent_id = ? AND block_type = ? AND is_active = 1 ORDER BY label + "#, + agent_id, + block_type + ) + .fetch_all(pool) + .await?; + Ok(blocks) +} + +/// Create a new memory block. +pub async fn create_block(pool: &SqlitePool, block: &MemoryBlock) -> DbResult<()> { + sqlx::query!( + r#" + INSERT INTO memory_blocks (id, agent_id, label, description, block_type, char_limit, + permission, pinned, loro_snapshot, content_preview, metadata, + embedding_model, is_active, created_at, updated_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + "#, + block.id, + block.agent_id, + block.label, + block.description, + block.block_type, + block.char_limit, + block.permission, + block.pinned, + block.loro_snapshot, + block.content_preview, + block.metadata, + block.embedding_model, + block.is_active, + block.created_at, + block.updated_at, + ) + .execute(pool) + .await?; + Ok(()) +} + +/// Update a memory block's Loro snapshot and preview. +pub async fn update_block_content( + pool: &SqlitePool, + id: &str, + loro_snapshot: &[u8], + content_preview: Option<&str>, +) -> DbResult<()> { + sqlx::query!( + r#" + UPDATE memory_blocks + SET loro_snapshot = ?, content_preview = ?, updated_at = datetime('now') + WHERE id = ? + "#, + loro_snapshot, + content_preview, + id + ) + .execute(pool) + .await?; + Ok(()) +} + +/// Soft-delete a memory block. +pub async fn deactivate_block(pool: &SqlitePool, id: &str) -> DbResult<()> { + sqlx::query!( + "UPDATE memory_blocks SET is_active = 0, updated_at = datetime('now') WHERE id = ?", + id + ) + .execute(pool) + .await?; + Ok(()) +} + +/// Create a checkpoint for a memory block. +pub async fn create_checkpoint( + pool: &SqlitePool, + checkpoint: &MemoryBlockCheckpoint, +) -> DbResult { + let result = sqlx::query!( + r#" + INSERT INTO memory_block_checkpoints (block_id, snapshot, created_at, updates_consolidated) + VALUES (?, ?, ?, ?) + "#, + checkpoint.block_id, + checkpoint.snapshot, + checkpoint.created_at, + checkpoint.updates_consolidated, + ) + .execute(pool) + .await?; + Ok(result.last_insert_rowid()) +} + +/// Get the latest checkpoint for a block. +pub async fn get_latest_checkpoint( + pool: &SqlitePool, + block_id: &str, +) -> DbResult> { + let checkpoint = sqlx::query_as!( + MemoryBlockCheckpoint, + r#" + SELECT + id as "id!", + block_id as "block_id!", + snapshot as "snapshot!", + created_at as "created_at!: _", + updates_consolidated as "updates_consolidated!" + FROM memory_block_checkpoints WHERE block_id = ? ORDER BY created_at DESC LIMIT 1 + "#, + block_id + ) + .fetch_optional(pool) + .await?; + Ok(checkpoint) +} + +/// Get an archival entry by ID. +pub async fn get_archival_entry(pool: &SqlitePool, id: &str) -> DbResult> { + let entry = sqlx::query_as!( + ArchivalEntry, + r#" + SELECT + id as "id!", + agent_id as "agent_id!", + content as "content!", + metadata as "metadata: _", + chunk_index as "chunk_index!", + parent_entry_id, + created_at as "created_at!: _" + FROM archival_entries WHERE id = ? + "#, + id + ) + .fetch_optional(pool) + .await?; + Ok(entry) +} + +/// List archival entries for an agent. +pub async fn list_archival_entries( + pool: &SqlitePool, + agent_id: &str, + limit: i64, + offset: i64, +) -> DbResult> { + let entries = sqlx::query_as!( + ArchivalEntry, + r#" + SELECT + id as "id!", + agent_id as "agent_id!", + content as "content!", + metadata as "metadata: _", + chunk_index as "chunk_index!", + parent_entry_id, + created_at as "created_at!: _" + FROM archival_entries WHERE agent_id = ? ORDER BY created_at DESC LIMIT ? OFFSET ? + "#, + agent_id, + limit, + offset + ) + .fetch_all(pool) + .await?; + Ok(entries) +} + +/// Create a new archival entry. +pub async fn create_archival_entry(pool: &SqlitePool, entry: &ArchivalEntry) -> DbResult<()> { + sqlx::query!( + r#" + INSERT INTO archival_entries (id, agent_id, content, metadata, chunk_index, parent_entry_id, created_at) + VALUES (?, ?, ?, ?, ?, ?, ?) + "#, + entry.id, + entry.agent_id, + entry.content, + entry.metadata, + entry.chunk_index, + entry.parent_entry_id, + entry.created_at, + ) + .execute(pool) + .await?; + Ok(()) +} + +/// Delete an archival entry. +pub async fn delete_archival_entry(pool: &SqlitePool, id: &str) -> DbResult<()> { + sqlx::query!("DELETE FROM archival_entries WHERE id = ?", id) + .execute(pool) + .await?; + Ok(()) +} + +/// Count archival entries for an agent. +pub async fn count_archival_entries(pool: &SqlitePool, agent_id: &str) -> DbResult { + let result = sqlx::query!( + "SELECT COUNT(*) as count FROM archival_entries WHERE agent_id = ?", + agent_id + ) + .fetch_one(pool) + .await?; + Ok(result.count) +} diff --git a/crates/pattern_db/src/queries/message.rs b/crates/pattern_db/src/queries/message.rs new file mode 100644 index 0000000..b29da3e --- /dev/null +++ b/crates/pattern_db/src/queries/message.rs @@ -0,0 +1,360 @@ +//! Message-related database queries. + +use sqlx::SqlitePool; + +use crate::error::DbResult; +use crate::models::{ArchiveSummary, Message, MessageRole, MessageSummary}; + +/// Get a message by ID. +pub async fn get_message(pool: &SqlitePool, id: &str) -> DbResult> { + let msg = sqlx::query_as!( + Message, + r#" + SELECT + id as "id!", + agent_id as "agent_id!", + position as "position!", + batch_id, + sequence_in_batch, + role as "role!: MessageRole", + content, + tool_call_id, + tool_name, + tool_args as "tool_args: _", + tool_result as "tool_result: _", + source, + source_metadata as "source_metadata: _", + is_archived as "is_archived!: bool", + created_at as "created_at!: _" + FROM messages WHERE id = ? + "#, + id + ) + .fetch_optional(pool) + .await?; + Ok(msg) +} + +/// Get messages for an agent, ordered by position (not archived). +pub async fn get_messages(pool: &SqlitePool, agent_id: &str, limit: i64) -> DbResult> { + let messages = sqlx::query_as!( + Message, + r#" + SELECT + id as "id!", + agent_id as "agent_id!", + position as "position!", + batch_id, + sequence_in_batch, + role as "role!: MessageRole", + content, + tool_call_id, + tool_name, + tool_args as "tool_args: _", + tool_result as "tool_result: _", + source, + source_metadata as "source_metadata: _", + is_archived as "is_archived!: bool", + created_at as "created_at!: _" + FROM messages + WHERE agent_id = ? AND is_archived = 0 + ORDER BY position DESC + LIMIT ? + "#, + agent_id, + limit + ) + .fetch_all(pool) + .await?; + Ok(messages) +} + +/// Get messages for an agent including archived. +pub async fn get_messages_with_archived( + pool: &SqlitePool, + agent_id: &str, + limit: i64, +) -> DbResult> { + let messages = sqlx::query_as!( + Message, + r#" + SELECT + id as "id!", + agent_id as "agent_id!", + position as "position!", + batch_id, + sequence_in_batch, + role as "role!: MessageRole", + content, + tool_call_id, + tool_name, + tool_args as "tool_args: _", + tool_result as "tool_result: _", + source, + source_metadata as "source_metadata: _", + is_archived as "is_archived!: bool", + created_at as "created_at!: _" + FROM messages + WHERE agent_id = ? + ORDER BY position DESC + LIMIT ? + "#, + agent_id, + limit + ) + .fetch_all(pool) + .await?; + Ok(messages) +} + +/// Get messages after a specific position. +pub async fn get_messages_after( + pool: &SqlitePool, + agent_id: &str, + after_position: &str, + limit: i64, +) -> DbResult> { + let messages = sqlx::query_as!( + Message, + r#" + SELECT + id as "id!", + agent_id as "agent_id!", + position as "position!", + batch_id, + sequence_in_batch, + role as "role!: MessageRole", + content, + tool_call_id, + tool_name, + tool_args as "tool_args: _", + tool_result as "tool_result: _", + source, + source_metadata as "source_metadata: _", + is_archived as "is_archived!: bool", + created_at as "created_at!: _" + FROM messages + WHERE agent_id = ? AND position > ? + ORDER BY position ASC + LIMIT ? + "#, + agent_id, + after_position, + limit + ) + .fetch_all(pool) + .await?; + Ok(messages) +} + +/// Get messages in a specific batch. +pub async fn get_batch_messages(pool: &SqlitePool, batch_id: &str) -> DbResult> { + let messages = sqlx::query_as!( + Message, + r#" + SELECT + id as "id!", + agent_id as "agent_id!", + position as "position!", + batch_id, + sequence_in_batch, + role as "role!: MessageRole", + content, + tool_call_id, + tool_name, + tool_args as "tool_args: _", + tool_result as "tool_result: _", + source, + source_metadata as "source_metadata: _", + is_archived as "is_archived!: bool", + created_at as "created_at!: _" + FROM messages + WHERE batch_id = ? + ORDER BY sequence_in_batch + "#, + batch_id + ) + .fetch_all(pool) + .await?; + Ok(messages) +} + +/// Create a new message. +pub async fn create_message(pool: &SqlitePool, msg: &Message) -> DbResult<()> { + sqlx::query!( + r#" + INSERT INTO messages (id, agent_id, position, batch_id, sequence_in_batch, + role, content, tool_call_id, tool_name, tool_args, tool_result, + source, source_metadata, is_archived, created_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + "#, + msg.id, + msg.agent_id, + msg.position, + msg.batch_id, + msg.sequence_in_batch, + msg.role, + msg.content, + msg.tool_call_id, + msg.tool_name, + msg.tool_args, + msg.tool_result, + msg.source, + msg.source_metadata, + msg.is_archived, + msg.created_at, + ) + .execute(pool) + .await?; + Ok(()) +} + +/// Mark messages as archived. +pub async fn archive_messages( + pool: &SqlitePool, + agent_id: &str, + before_position: &str, +) -> DbResult { + let result = sqlx::query!( + "UPDATE messages SET is_archived = 1 WHERE agent_id = ? AND position < ? AND is_archived = 0", + agent_id, + before_position + ) + .execute(pool) + .await?; + Ok(result.rows_affected()) +} + +/// Delete messages (hard delete, use with caution). +pub async fn delete_messages( + pool: &SqlitePool, + agent_id: &str, + before_position: &str, +) -> DbResult { + let result = sqlx::query!( + "DELETE FROM messages WHERE agent_id = ? AND position < ?", + agent_id, + before_position + ) + .execute(pool) + .await?; + Ok(result.rows_affected()) +} + +/// Get archive summary by ID. +pub async fn get_archive_summary(pool: &SqlitePool, id: &str) -> DbResult> { + let summary = sqlx::query_as!( + ArchiveSummary, + r#" + SELECT + id as "id!", + agent_id as "agent_id!", + summary as "summary!", + start_position as "start_position!", + end_position as "end_position!", + message_count as "message_count!", + created_at as "created_at!: _" + FROM archive_summaries WHERE id = ? + "#, + id + ) + .fetch_optional(pool) + .await?; + Ok(summary) +} + +/// Get archive summaries for an agent. +pub async fn get_archive_summaries( + pool: &SqlitePool, + agent_id: &str, +) -> DbResult> { + let summaries = sqlx::query_as!( + ArchiveSummary, + r#" + SELECT + id as "id!", + agent_id as "agent_id!", + summary as "summary!", + start_position as "start_position!", + end_position as "end_position!", + message_count as "message_count!", + created_at as "created_at!: _" + FROM archive_summaries WHERE agent_id = ? ORDER BY start_position + "#, + agent_id + ) + .fetch_all(pool) + .await?; + Ok(summaries) +} + +/// Create an archive summary. +pub async fn create_archive_summary(pool: &SqlitePool, summary: &ArchiveSummary) -> DbResult<()> { + sqlx::query!( + r#" + INSERT INTO archive_summaries (id, agent_id, summary, start_position, end_position, message_count, created_at) + VALUES (?, ?, ?, ?, ?, ?, ?) + "#, + summary.id, + summary.agent_id, + summary.summary, + summary.start_position, + summary.end_position, + summary.message_count, + summary.created_at, + ) + .execute(pool) + .await?; + Ok(()) +} + +/// Count messages for an agent (excluding archived). +pub async fn count_messages(pool: &SqlitePool, agent_id: &str) -> DbResult { + let result = sqlx::query!( + "SELECT COUNT(*) as count FROM messages WHERE agent_id = ? AND is_archived = 0", + agent_id + ) + .fetch_one(pool) + .await?; + Ok(result.count) +} + +/// Count all messages for an agent (including archived). +pub async fn count_all_messages(pool: &SqlitePool, agent_id: &str) -> DbResult { + let result = sqlx::query!( + "SELECT COUNT(*) as count FROM messages WHERE agent_id = ?", + agent_id + ) + .fetch_one(pool) + .await?; + Ok(result.count) +} + +/// Get message summaries (lightweight projection for listing). +pub async fn get_message_summaries( + pool: &SqlitePool, + agent_id: &str, + limit: i64, +) -> DbResult> { + let summaries = sqlx::query_as!( + MessageSummary, + r#" + SELECT + id as "id!", + position as "position!", + role as "role!: MessageRole", + CAST(CASE WHEN LENGTH(content) > 100 THEN SUBSTR(content, 1, 100) || '...' ELSE content END AS TEXT) as "content_preview: _", + source, + created_at as "created_at!: _" + FROM messages + WHERE agent_id = ? AND is_archived = 0 + ORDER BY position DESC + LIMIT ? + "#, + agent_id, + limit + ) + .fetch_all(pool) + .await?; + Ok(summaries) +} diff --git a/crates/pattern_db/src/queries/mod.rs b/crates/pattern_db/src/queries/mod.rs new file mode 100644 index 0000000..642ecf4 --- /dev/null +++ b/crates/pattern_db/src/queries/mod.rs @@ -0,0 +1,17 @@ +//! Database query functions. +//! +//! Organized by domain: +//! - `agent`: Agent CRUD and queries +//! - `memory`: Memory block operations +//! - `message`: Message history operations +//! - `coordination`: Cross-agent coordination queries + +mod agent; +mod coordination; +mod memory; +mod message; + +pub use agent::*; +pub use coordination::*; +pub use memory::*; +pub use message::*; diff --git a/crates/pattern_db/src/vector.rs b/crates/pattern_db/src/vector.rs new file mode 100644 index 0000000..956202f --- /dev/null +++ b/crates/pattern_db/src/vector.rs @@ -0,0 +1,520 @@ +//! Vector search functionality using sqlite-vec. +//! +//! This module provides vector storage and KNN search capabilities for +//! semantic search over memories, messages, and other content. +//! +//! The sqlite-vec extension is registered globally via `sqlite3_auto_extension` +//! before any database connections are opened. This means all connections +//! automatically have access to vector functions and virtual tables. +//! +//! # Why Runtime Queries +//! +//! Unlike the rest of pattern_db, this module uses runtime `sqlx::query_as()` +//! instead of compile-time `sqlx::query_as!()` macros. This is intentional: +//! +//! 1. **Virtual table syntax** - `WHERE embedding MATCH ? AND k = ?` is +//! sqlite-vec specific, not standard SQL. sqlx's compile-time checker +//! doesn't understand it. +//! +//! 2. **Table created at runtime** - The `embeddings` virtual table is created +//! via `ensure_embeddings_table()`, not in migrations. sqlx's offline mode +//! can't see it. +//! +//! 3. **Dynamic dimensions** - Table definition uses `float[{dimensions}]` +//! which varies per constellation. +//! +//! 4. **Extension-specific types** - Vector columns and the magic `distance` +//! column from KNN queries don't map to sqlx-known types. +//! +//! The tradeoff is acceptable: vector queries are isolated here, patterns are +//! simple and stable, and we test at runtime anyway. + +use std::ffi::c_char; +use std::sync::Once; + +use sqlx::SqlitePool; +use zerocopy::IntoBytes; + +use crate::error::{DbError, DbResult}; + +/// Default embedding dimensions (bge-small-en-v1.5). +/// Configurable per constellation if using different models. +pub const DEFAULT_EMBEDDING_DIMENSIONS: usize = 384; + +static INIT: Once = Once::new(); + +/// Initialize sqlite-vec extension globally. +/// +/// This registers the extension via `sqlite3_auto_extension`, which means +/// it will be automatically loaded for ALL SQLite connections created after +/// this call. Safe to call multiple times - only runs once. +/// +/// # Safety +/// +/// This function contains unsafe code to register the C extension. The unsafe +/// block is contained here to keep it in one place. The extension init function +/// is provided by the sqlite-vec crate which bundles and compiles the C source. +pub fn init_sqlite_vec() { + INIT.call_once(|| { + unsafe { + // sqlite-vec exports sqlite3_vec_init with a slightly wrong signature. + // We transmute to the correct sqlite3_auto_extension callback type. + // This is the same pattern used in the sqlite-vec docs and confirmed + // working in sqlx issue #3147. + let init_fn = sqlite_vec::sqlite3_vec_init as *const (); + let init_fn: unsafe extern "C" fn( + *mut libsqlite3_sys::sqlite3, + *mut *mut c_char, + *const libsqlite3_sys::sqlite3_api_routines, + ) -> std::ffi::c_int = std::mem::transmute(init_fn); + libsqlite3_sys::sqlite3_auto_extension(Some(init_fn)); + } + tracing::debug!("sqlite-vec extension registered globally"); + }); +} + +/// Types of content that can have embeddings. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ContentType { + /// Memory block content + MemoryBlock, + /// Message content + Message, + /// Archival entry + ArchivalEntry, + /// File passage + FilePassage, +} + +impl ContentType { + pub fn as_str(&self) -> &'static str { + match self { + ContentType::MemoryBlock => "memory_block", + ContentType::Message => "message", + ContentType::ArchivalEntry => "archival_entry", + ContentType::FilePassage => "file_passage", + } + } + + pub fn from_str(s: &str) -> Option { + match s { + "memory_block" => Some(ContentType::MemoryBlock), + "message" => Some(ContentType::Message), + "archival_entry" => Some(ContentType::ArchivalEntry), + "file_passage" => Some(ContentType::FilePassage), + _ => None, + } + } +} + +/// Result of a KNN vector search. +#[derive(Debug, Clone)] +pub struct VectorSearchResult { + /// The content ID + pub content_id: String, + /// Distance from query vector (lower = more similar) + pub distance: f32, + /// Content type + pub content_type: ContentType, + /// Chunk index if applicable + pub chunk_index: Option, +} + +/// Statistics about stored embeddings. +#[derive(Debug, Clone, Default)] +pub struct EmbeddingStats { + pub total_embeddings: u64, + pub by_content_type: Vec<(ContentType, u64)>, +} + +/// Verify that sqlite-vec is loaded and working. +pub async fn verify_sqlite_vec(pool: &SqlitePool) -> DbResult { + let version: (String,) = sqlx::query_as("SELECT vec_version()") + .fetch_one(pool) + .await + .map_err(|e| DbError::Extension(format!("sqlite-vec not loaded: {}", e)))?; + Ok(version.0) +} + +/// Create the embeddings virtual table if it doesn't exist. +/// +/// Virtual tables can't be created via sqlx migrations (they use +/// extension-specific syntax), so we create them programmatically. +pub async fn ensure_embeddings_table(pool: &SqlitePool, dimensions: usize) -> DbResult<()> { + // Create the unified embeddings table using vec0 + // The + prefix on columns makes them "auxiliary" columns stored alongside vectors + let create_sql = format!( + r#" + CREATE VIRTUAL TABLE IF NOT EXISTS embeddings USING vec0( + embedding float[{dimensions}], + +content_type TEXT NOT NULL, + +content_id TEXT NOT NULL, + +chunk_index INTEGER, + +content_hash TEXT + ) + "#, + ); + + sqlx::query(&create_sql).execute(pool).await?; + tracing::debug!(dimensions, "ensured embeddings virtual table exists"); + Ok(()) +} + +/// Insert an embedding into the database. +pub async fn insert_embedding( + pool: &SqlitePool, + content_type: ContentType, + content_id: &str, + embedding: &[f32], + chunk_index: Option, + content_hash: Option<&str>, +) -> DbResult { + let embedding_bytes = embedding.as_bytes(); + + let rowid = sqlx::query_scalar::<_, i64>( + r#" + INSERT INTO embeddings (embedding, content_type, content_id, chunk_index, content_hash) + VALUES (?, ?, ?, ?, ?) + RETURNING rowid + "#, + ) + .bind(embedding_bytes) + .bind(content_type.as_str()) + .bind(content_id) + .bind(chunk_index) + .bind(content_hash) + .fetch_one(pool) + .await?; + + Ok(rowid) +} + +/// Delete embeddings for a content item. +pub async fn delete_embeddings( + pool: &SqlitePool, + content_type: ContentType, + content_id: &str, +) -> DbResult { + let result = sqlx::query("DELETE FROM embeddings WHERE content_type = ? AND content_id = ?") + .bind(content_type.as_str()) + .bind(content_id) + .execute(pool) + .await?; + + Ok(result.rows_affected()) +} + +/// Update embedding for a content item (delete old, insert new). +pub async fn update_embedding( + pool: &SqlitePool, + content_type: ContentType, + content_id: &str, + embedding: &[f32], + chunk_index: Option, + content_hash: Option<&str>, +) -> DbResult { + delete_embeddings(pool, content_type, content_id).await?; + insert_embedding( + pool, + content_type, + content_id, + embedding, + chunk_index, + content_hash, + ) + .await +} + +/// Perform KNN search over embeddings. +/// +/// Note: vec0 virtual tables don't support WHERE constraints on auxiliary +/// columns during KNN queries. If `content_type_filter` is specified, we +/// fetch more results and filter post-query. This means the actual number +/// of results may be less than `limit` when filtering. +pub async fn knn_search( + pool: &SqlitePool, + query_embedding: &[f32], + limit: i64, + content_type_filter: Option, +) -> DbResult> { + let query_bytes = query_embedding.as_bytes(); + + // When filtering by content type, fetch more results to account for + // post-filtering. This is a tradeoff - we can't filter during KNN. + let fetch_limit = if content_type_filter.is_some() { + limit * 3 // Fetch 3x to have enough after filtering + } else { + limit + }; + + let results = sqlx::query_as::<_, (String, f32, String, Option)>( + r#" + SELECT content_id, distance, content_type, chunk_index + FROM embeddings + WHERE embedding MATCH ? AND k = ? + ORDER BY distance + "#, + ) + .bind(query_bytes) + .bind(fetch_limit) + .fetch_all(pool) + .await?; + + let mut results: Vec = results + .into_iter() + .filter_map(|(content_id, distance, content_type, chunk_index)| { + let ct = ContentType::from_str(&content_type)?; + // Apply content type filter if specified + if let Some(filter_ct) = content_type_filter { + if ct != filter_ct { + return None; + } + } + Some(VectorSearchResult { + content_id, + distance, + content_type: ct, + chunk_index, + }) + }) + .collect(); + + // Truncate to requested limit + results.truncate(limit as usize); + Ok(results) +} + +/// Search for similar content within a specific type. +pub async fn search_similar( + pool: &SqlitePool, + query_embedding: &[f32], + content_type: ContentType, + limit: i64, + max_distance: Option, +) -> DbResult> { + let mut results = knn_search(pool, query_embedding, limit, Some(content_type)).await?; + + // Filter by maximum distance if specified + if let Some(max_dist) = max_distance { + results.retain(|r| r.distance <= max_dist); + } + + Ok(results) +} + +/// Check if an embedding exists and is up-to-date. +pub async fn embedding_is_current( + pool: &SqlitePool, + content_type: ContentType, + content_id: &str, + current_hash: &str, +) -> DbResult { + let result: Option<(String,)> = sqlx::query_as( + "SELECT content_hash FROM embeddings WHERE content_type = ? AND content_id = ? LIMIT 1", + ) + .bind(content_type.as_str()) + .bind(content_id) + .fetch_optional(pool) + .await?; + + Ok(result.map(|(h,)| h == current_hash).unwrap_or(false)) +} + +/// Get embedding statistics. +pub async fn get_embedding_stats(pool: &SqlitePool) -> DbResult { + let total: (i64,) = sqlx::query_as("SELECT COUNT(*) FROM embeddings") + .fetch_one(pool) + .await?; + + let by_type: Vec<(String, i64)> = + sqlx::query_as("SELECT content_type, COUNT(*) FROM embeddings GROUP BY content_type") + .fetch_all(pool) + .await?; + + Ok(EmbeddingStats { + total_embeddings: total.0 as u64, + by_content_type: by_type + .into_iter() + .filter_map(|(ct, count)| ContentType::from_str(&ct).map(|t| (t, count as u64))) + .collect(), + }) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_content_type_roundtrip() { + for ct in [ + ContentType::MemoryBlock, + ContentType::Message, + ContentType::ArchivalEntry, + ContentType::FilePassage, + ] { + let s = ct.as_str(); + assert_eq!(ContentType::from_str(s), Some(ct)); + } + } + + #[test] + fn test_content_type_unknown() { + assert_eq!(ContentType::from_str("unknown"), None); + } + + #[test] + fn test_init_sqlite_vec_idempotent() { + // Should be safe to call multiple times + init_sqlite_vec(); + init_sqlite_vec(); + init_sqlite_vec(); + } + + #[tokio::test] + async fn test_sqlite_vec_loaded() { + // Open a connection (which registers sqlite-vec) + let db = crate::ConstellationDb::open_in_memory().await.unwrap(); + + // Verify sqlite-vec is available + let version = verify_sqlite_vec(db.pool()).await.unwrap(); + assert!(!version.is_empty()); + assert!( + version.starts_with("v"), + "version should start with 'v': {}", + version + ); + } + + #[tokio::test] + async fn test_embeddings_table_creation() { + let db = crate::ConstellationDb::open_in_memory().await.unwrap(); + + // Create the embeddings table + ensure_embeddings_table(db.pool(), 384).await.unwrap(); + + // Should be idempotent + ensure_embeddings_table(db.pool(), 384).await.unwrap(); + } + + #[tokio::test] + async fn test_embedding_insert_and_search() { + let db = crate::ConstellationDb::open_in_memory().await.unwrap(); + ensure_embeddings_table(db.pool(), 4).await.unwrap(); + + // Insert a test embedding + let embedding = vec![1.0f32, 0.0, 0.0, 0.0]; + let rowid = insert_embedding( + db.pool(), + ContentType::Message, + "msg_123", + &embedding, + None, + Some("abc123"), + ) + .await + .unwrap(); + // vec0 rowids start at 0 + assert!(rowid >= 0); + + // Insert another + let embedding2 = vec![0.9f32, 0.1, 0.0, 0.0]; // Similar to first + insert_embedding( + db.pool(), + ContentType::Message, + "msg_456", + &embedding2, + None, + None, + ) + .await + .unwrap(); + + // Insert a dissimilar one + let embedding3 = vec![0.0f32, 0.0, 1.0, 0.0]; + insert_embedding( + db.pool(), + ContentType::MemoryBlock, + "block_789", + &embedding3, + Some(0), + None, + ) + .await + .unwrap(); + + // Search for similar to first embedding + let query = vec![1.0f32, 0.0, 0.0, 0.0]; + let results = knn_search(db.pool(), &query, 3, None).await.unwrap(); + + assert_eq!(results.len(), 3); + // First result should be exact match + assert_eq!(results[0].content_id, "msg_123"); + assert!(results[0].distance < 0.01); + // Second should be similar + assert_eq!(results[1].content_id, "msg_456"); + + // Search with content type filter + let results = knn_search(db.pool(), &query, 3, Some(ContentType::Message)) + .await + .unwrap(); + assert_eq!(results.len(), 2); + assert!( + results + .iter() + .all(|r| r.content_type == ContentType::Message) + ); + } + + #[tokio::test] + async fn test_embedding_delete() { + let db = crate::ConstellationDb::open_in_memory().await.unwrap(); + ensure_embeddings_table(db.pool(), 4).await.unwrap(); + + let embedding = vec![1.0f32, 0.0, 0.0, 0.0]; + insert_embedding( + db.pool(), + ContentType::Message, + "msg_delete_me", + &embedding, + None, + None, + ) + .await + .unwrap(); + + let deleted = delete_embeddings(db.pool(), ContentType::Message, "msg_delete_me") + .await + .unwrap(); + assert_eq!(deleted, 1); + + // Should find nothing now + let results = knn_search(db.pool(), &embedding, 10, None).await.unwrap(); + assert!(results.is_empty()); + } + + #[tokio::test] + async fn test_embedding_stats() { + let db = crate::ConstellationDb::open_in_memory().await.unwrap(); + ensure_embeddings_table(db.pool(), 4).await.unwrap(); + + // Initially empty + let stats = get_embedding_stats(db.pool()).await.unwrap(); + assert_eq!(stats.total_embeddings, 0); + + // Add some embeddings + let emb = vec![1.0f32, 0.0, 0.0, 0.0]; + insert_embedding(db.pool(), ContentType::Message, "m1", &emb, None, None) + .await + .unwrap(); + insert_embedding(db.pool(), ContentType::Message, "m2", &emb, None, None) + .await + .unwrap(); + insert_embedding(db.pool(), ContentType::MemoryBlock, "b1", &emb, None, None) + .await + .unwrap(); + + let stats = get_embedding_stats(db.pool()).await.unwrap(); + assert_eq!(stats.total_embeddings, 3); + assert_eq!(stats.by_content_type.len(), 2); + } +} diff --git a/docs/refactoring/partner-init-implementation.md b/docs/refactoring/partner-init-implementation.md deleted file mode 100644 index d263e5a..0000000 --- a/docs/refactoring/partner-init-implementation.md +++ /dev/null @@ -1,81 +0,0 @@ -# Partner Pre-Initialization Implementation Summary - -## Completed Work (2025-07-05) - -### 1. Configuration Updates ✅ -- Added `PartnersConfig` and `PartnerUser` structs to `src/config.rs` -- Added `partners` field to main `Config` struct -- Updated `pattern.toml` with partner configuration: - ```toml - [partners] - [[partners.users]] - discord_id = "549170854458687509" - name = "orual" - auto_initialize = true - ``` - -### 2. Service-Level Initialization ✅ -- Added `initialize_partners()` method to `PatternService` -- Partners are initialized after multi-agent system creation -- Only partners with `auto_initialize = true` are pre-initialized at boot -- Errors during partner initialization don't crash the service - -### 3. MultiAgentSystem Updates ✅ -- Added `initialize_partner()` method that: - - Parses Discord ID from string to u64 - - Gets or creates database user by Discord ID - - Checks if already initialized (fast path) - - Initializes constellation if needed -- Added `is_user_initialized()` method to check agent existence -- Updated `build_system_prompt()` to accept partner_name and discord_id - -### 4. Discord Bot Fast Path ✅ -- Updated `process_message()` to use fast path checking -- Only initializes constellation on first interaction -- Subsequent messages skip initialization for 20-30s speedup - -### 5. System Prompt Updates ✅ -- Modified `build_system_prompt()` to handle partner placeholders -- Replaces `{partner_name}` and `{discord_id}` in prompts -- Falls back to "Unknown" when partner info not available - -## What Works Now - -1. **Boot-time Initialization**: Partners listed in `pattern.toml` with `auto_initialize = true` get their constellations created at startup -2. **Fast Discord Responses**: Pre-initialized partners get instant responses instead of 20-30s delays -3. **Flexible Configuration**: Easy to add/remove partners via config file -4. **Error Resilience**: Failed partner init doesn't crash the service - -## Known Limitations - -1. **Partner Info in Prompts**: Currently passes `None` for partner info in most contexts - - Only the boot-time initialization has access to partner details - - Regular user initialization doesn't have partner name/discord_id - - Would need to store partner info in database to fully implement - -2. **No /partner Commands**: Slash commands for dynamic partner management not yet implemented - -3. **No Hot Reload**: Adding partners requires service restart - -## Next Steps - -To fully complete the partner system: - -1. **Store Partner Info in Database**: - - Add `is_partner` and `partner_name` fields to users table - - Update `initialize_partner()` to mark users as partners - - Retrieve partner info when building prompts - -2. **Implement /partner Slash Commands**: - - `/partner add @user` - Add a new partner - - `/partner list` - Show all partners - - `/partner remove @user` - Remove a partner - -3. **Thread Partner Info Through Agent Creation**: - - Pass user info through `create_agent()` flow - - Retrieve partner details from database - - Include in system prompt generation - -4. **Add Partner Status to Groups**: - - Groups could have different behavior for partners vs conversants - - Partner-specific memory blocks or tools \ No newline at end of file diff --git a/docs/refactoring/partner-initialization-plan.md b/docs/refactoring/partner-initialization-plan.md deleted file mode 100644 index 3c45787..0000000 --- a/docs/refactoring/partner-initialization-plan.md +++ /dev/null @@ -1,191 +0,0 @@ -# Partner Constellation Pre-Initialization Plan - -## Problem Statement -Currently, agent constellations are created on first Discord message, causing significant delays: -- Agent creation (6 agents × ~1-2s each) -- Memory block creation (3 blocks × ~0.5s each) -- Group creation (4 groups × ~2-3s each) -- Total initialization time: 20-30 seconds - -## Proposed Solution -Pre-create partner constellations at boot time for designated partners. - -## Implementation Plan - -### Phase 1: Configuration Changes - -1. **Update pattern.toml structure**: -```toml -[discord] -token = "..." -application_id = ... - -[partners] -# Define partners who get pre-initialized constellations -[[partners.users]] -discord_id = "549170854458687509" -name = "primary_partner" -auto_initialize = true - -[[partners.users]] -discord_id = "123456789012345678" -name = "secondary_partner" -auto_initialize = false # Can be initialized via slash command -``` - -2. **Add to Config struct**: -```rust -#[derive(Debug, Clone, Deserialize)] -pub struct PartnersConfig { - pub users: Vec, -} - -#[derive(Debug, Clone, Deserialize)] -pub struct PartnerUser { - pub discord_id: String, - pub name: String, - pub auto_initialize: bool, -} -``` - -### Phase 2: Boot-Time Initialization - -1. **Modify PatternService::start()**: - - After multi-agent system init, check for partners config - - For each partner with `auto_initialize = true`: - - Create database user if not exists - - Initialize full constellation (agents, memory, groups) - - Log initialization progress - -2. **Add method to MultiAgentSystem**: -```rust -pub async fn initialize_partner(&self, discord_id: &str, name: &str) -> Result { - // Get or create database user - let user = self.db.get_or_create_user_by_discord_id( - discord_id.parse()?, - name - ).await?; - - let user_id = UserId(user.id); - - // Initialize constellation - self.initialize_user(user_id).await?; - - info!("Partner {} initialized with user_id {}", name, user_id.0); - Ok(user_id) -} -``` - -### Phase 3: Slash Command for Partner Management - -1. **Add new slash commands**: - - `/partner add @user` - Add someone as a partner and initialize their constellation - - `/partner remove @user` - Remove partner status (keeps agents) - - `/partner list` - Show all configured partners and their status - -2. **Implement partner management**: -```rust -async fn handle_partner_command(&self, ctx: &Context, command: &CommandInteraction) { - let subcommand = // parse subcommand - - match subcommand { - "add" => { - let mentioned_user = // get mentioned user - let discord_id = mentioned_user.id.to_string(); - - // Add to database - self.db.add_partner(&discord_id, &mentioned_user.name).await?; - - // Initialize constellation - self.multi_agent_system.initialize_partner(&discord_id, &mentioned_user.name).await?; - - // Update config file (optional - for persistence across restarts) - } - // ... other subcommands - } -} -``` - -### Phase 4: Optimize Discord Message Handling - -1. **Fast path for initialized partners**: -```rust -async fn process_message(&self, ctx: &Context, msg: &Message) -> Result { - let state = self.state.read().await; - - // Get database user - let db_user = state.db - .get_or_create_user_by_discord_id(msg.author.id.get(), &msg.author.name) - .await?; - - let user_id = UserId(db_user.id); - - // Check if already initialized (fast path) - if state.multi_agent_system.is_user_initialized(user_id).await { - // Skip initialization, go straight to message processing - return state.multi_agent_system.send_message_to_agent(...).await; - } - - // Slow path: initialize on demand for non-partners - state.multi_agent_system.initialize_user(user_id).await?; - // ... continue as before -} -``` - -2. **Add initialization check method**: -```rust -impl MultiAgentSystem { - pub async fn is_user_initialized(&self, user_id: UserId) -> bool { - // Check if user has agents in cache - self.user_agents.read().await.contains_key(&user_id) - } -} -``` - -### Phase 5: Database Schema Updates - -1. **Add partners table** (optional - for persistence): -```sql -CREATE TABLE partners ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - user_id INTEGER NOT NULL, - added_by INTEGER, - added_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - FOREIGN KEY (user_id) REFERENCES users(id), - FOREIGN KEY (added_by) REFERENCES users(id), - UNIQUE(user_id) -); -``` - -### Phase 6: Performance Monitoring - -1. **Add timing logs**: - - Boot-time initialization duration - - Per-message processing time - - Cache hit/miss rates - -2. **Metrics to track**: - - Time from Discord message to agent response - - Number of pre-initialized vs on-demand initializations - - Memory usage with pre-initialized constellations - -## Benefits - -1. **Immediate Response**: Partners get instant responses without initialization delay -2. **Predictable Load**: Heavy initialization happens at boot, not during user interaction -3. **Better UX**: No more "initializing..." messages for partners -4. **Scalability**: Can control which users get pre-initialized constellations - -## Migration Path - -1. Deploy with auto_initialize = false for all partners -2. Test with single partner via slash command -3. Enable auto_initialize for primary partners -4. Monitor performance and adjust - -## Future Enhancements - -1. **Lazy agent loading**: Initialize core agents first, specialists on demand -2. **Agent hibernation**: Unload inactive agents after timeout -3. **Warm standby pool**: Pre-create generic agents that can be assigned to users -4. **Constellation templates**: Different agent sets for different partner types \ No newline at end of file diff --git a/docs/refactoring/v2-api-surface.md b/docs/refactoring/v2-api-surface.md new file mode 100644 index 0000000..75bb355 --- /dev/null +++ b/docs/refactoring/v2-api-surface.md @@ -0,0 +1,687 @@ +# Pattern v2: API Surface + +## Overview + +Pattern v2 exposes multiple interfaces: + +1. **HTTP API** - REST endpoints for web clients, external integrations +2. **ACP (Agent Client Protocol)** - For editor integration (Zed, JetBrains, etc.) +3. **CLI** - Direct local interaction (remains important as trusted interface) + +## HTTP API + +Built on Axum, serving the pattern_server crate. + +### Authentication + +``` +POST /api/v1/auth/login +POST /api/v1/auth/refresh +POST /api/v1/auth/logout +``` + +Existing v1 JWT-based auth can be preserved. + +### Constellation Management + +``` +GET /api/v1/constellations # List user's constellations +POST /api/v1/constellations # Create new constellation +GET /api/v1/constellations/:id # Get constellation details +DELETE /api/v1/constellations/:id # Delete constellation +``` + +### Agent Management + +``` +GET /api/v1/constellations/:cid/agents # List agents +POST /api/v1/constellations/:cid/agents # Create agent +GET /api/v1/constellations/:cid/agents/:aid # Get agent +PATCH /api/v1/constellations/:cid/agents/:aid # Update agent config +DELETE /api/v1/constellations/:cid/agents/:aid # Delete agent +``` + +### Sessions (Conversations) + +``` +POST /api/v1/constellations/:cid/agents/:aid/sessions # Create session +GET /api/v1/constellations/:cid/agents/:aid/sessions/:sid # Get session +DELETE /api/v1/constellations/:cid/agents/:aid/sessions/:sid # End session + +# Send message and get streaming response +POST /api/v1/constellations/:cid/agents/:aid/sessions/:sid/messages + Content-Type: application/json + Accept: text/event-stream + + { "content": "Hello", "source": "api" } + + Response: SSE stream of ResponseEvents +``` + +### Memory Operations + +``` +# Memory blocks +GET /api/v1/constellations/:cid/agents/:aid/memory # List blocks +GET /api/v1/constellations/:cid/agents/:aid/memory/:label # Get block +PUT /api/v1/constellations/:cid/agents/:aid/memory/:label # Update block +DELETE /api/v1/constellations/:cid/agents/:aid/memory/:label # Delete block + +# Block history (via Loro) +GET /api/v1/constellations/:cid/agents/:aid/memory/:label/history +POST /api/v1/constellations/:cid/agents/:aid/memory/:label/rollback + { "version": "frontiers_json" } + +# Archival search +POST /api/v1/constellations/:cid/agents/:aid/memory/search + { "query": "search terms", "limit": 10 } + +# Shared blocks +POST /api/v1/constellations/:cid/agents/:aid/memory/:label/share + { "target_agent_id": "...", "access": "read_only" } +``` + +### Groups + +``` +GET /api/v1/constellations/:cid/groups # List groups +POST /api/v1/constellations/:cid/groups # Create group +GET /api/v1/constellations/:cid/groups/:gid # Get group +DELETE /api/v1/constellations/:cid/groups/:gid # Delete group + +POST /api/v1/constellations/:cid/groups/:gid/members + { "agent_id": "...", "role": "worker" } +DELETE /api/v1/constellations/:cid/groups/:gid/members/:aid +``` + +### Export/Import + +``` +POST /api/v1/constellations/:cid/export + Response: application/octet-stream (CAR file) + +POST /api/v1/constellations/import + Content-Type: application/octet-stream + Body: CAR file +``` + +--- + +## ACP Integration + +The Agent Client Protocol enables Pattern to work with any ACP-compatible editor (Zed, JetBrains, Neovim, etc.). + +### What is ACP? + +ACP is an open standard (from Zed) that decouples agents from editors: +- **Agents** implement the `Agent` trait and run as subprocesses +- **Clients** (editors) spawn agents and communicate via JSON-RPC over stdio +- Agents can request file access, terminal execution, permissions from the client + +### Pattern as an ACP Agent + +Pattern would implement the `agent-client-protocol` crate's `Agent` trait: + +```rust +use agent_client_protocol::{ + Agent, AgentSideConnection, + InitializeRequest, InitializeResponse, + NewSessionRequest, NewSessionResponse, + PromptRequest, PromptResponse, + // ... +}; + +pub struct PatternAcpAgent { + /// The Pattern constellation/agent this wraps + db: Arc, + agent_id: AgentId, + + /// Active sessions + sessions: DashMap, +} + +#[async_trait] +impl Agent for PatternAcpAgent { + async fn initialize(&self, req: InitializeRequest) -> Result { + Ok(InitializeResponse { + protocol_version: V1, + capabilities: AgentCapabilities { + load_session: true, // Pattern has persistent sessions + mcp: Some(McpCapabilities { /* ... */ }), + // ... + }, + implementation: Implementation { + name: "pattern".into(), + version: env!("CARGO_PKG_VERSION").into(), + title: Some("Pattern ADHD Support Agent".into()), + }, + // ... + }) + } + + async fn new_session(&self, req: NewSessionRequest) -> Result { + // Create or load Pattern agent session + let session = PatternSession::new( + &self.db, + &self.agent_id, + req.mcp_servers, // Forward MCP server configs + ).await?; + + let session_id = SessionId::new(); + self.sessions.insert(session_id.clone(), session); + + Ok(NewSessionResponse { + session_id, + available_modes: vec![ + SessionMode { id: "default".into(), name: "Default".into() }, + SessionMode { id: "adhd".into(), name: "ADHD Support".into() }, + ], + // ... + }) + } + + async fn prompt(&self, req: PromptRequest) -> Result { + let session = self.sessions.get(&req.session_id) + .ok_or_else(|| Error::session_not_found())?; + + // Convert ACP content blocks to Pattern message format + let message = convert_acp_to_pattern(&req.messages); + + // Get connection for sending updates back to client + let conn = /* ... */; + + // Process with Pattern agent, streaming updates via ACP notifications + let result = session.agent + .process_message_stream(message) + .await; + + // Stream tool calls, content chunks, etc. via session/update notifications + while let Some(event) = result.next().await { + match event { + ResponseEvent::Text(chunk) => { + conn.notify(SessionNotification { + session_id: req.session_id.clone(), + update: SessionUpdate::ContentChunk(ContentChunk { + role: Role::Assistant, + content: ContentBlock::Text(TextContent { text: chunk }), + }), + }).await?; + } + ResponseEvent::ToolCall { id, name, args } => { + conn.notify(SessionNotification { + session_id: req.session_id.clone(), + update: SessionUpdate::ToolCall(ToolCall { + id: ToolCallId::new(id), + name, + kind: ToolKind::Function, + // ... + }), + }).await?; + } + // ... + } + } + + Ok(PromptResponse { + stop_reason: StopReason::EndTurn, + }) + } + + async fn cancel(&self, req: CancelNotification) -> Result<()> { + if let Some(session) = self.sessions.get(&req.session_id) { + session.cancel().await; + } + Ok(()) + } + + async fn load_session(&self, req: LoadSessionRequest) -> Result { + // Pattern has full session persistence - can restore from DB + let session = PatternSession::load(&self.db, &req.session_id).await?; + + // Stream history back to client via notifications + for message in session.history().await? { + // Send as ContentChunk notifications + } + + self.sessions.insert(req.session_id.clone(), session); + Ok(LoadSessionResponse { /* ... */ }) + } +} +``` + +### Running as ACP Agent + +Pattern would provide a binary that speaks ACP over stdio: + +```bash +# Invoked by editor (Zed, etc.) +pattern-acp --constellation my-constellation --agent my-agent +``` + +The binary: +1. Reads JSON-RPC from stdin +2. Dispatches to `PatternAcpAgent` implementation +3. Writes responses/notifications to stdout + +### ACP Client Capabilities Pattern Can Use + +When the client (editor) supports these, Pattern can: + +```rust +// Read files from user's workspace +let content = conn.request(ReadTextFileRequest { + path: "/path/to/file.rs".into(), +}).await?; + +// Write files (with permission) +conn.request(WriteTextFileRequest { + path: "/path/to/file.rs".into(), + content: new_content, +}).await?; + +// Execute terminal commands +let terminal = conn.request(CreateTerminalRequest { + command: "cargo".into(), + args: vec!["test".into()], + cwd: Some("/project".into()), + env: vec![], +}).await?; + +// Wait for command and get output +let result = conn.request(WaitForTerminalExitRequest { + terminal_id: terminal.id, +}).await?; + +// Request permission for dangerous operations +let permission = conn.request(RequestPermissionRequest { + tool_call_id: call_id, + tool_name: "shell_execute".into(), + description: "Run rm -rf on /tmp/build".into(), + options: vec![ + PermissionOption { id: "allow".into(), label: "Allow".into(), kind: PermissionOptionKind::Allow }, + PermissionOption { id: "deny".into(), label: "Deny".into(), kind: PermissionOptionKind::Deny }, + ], +}).await?; +``` + +### ACP vs Remote Presence Connector + +These solve different problems: + +| Aspect | ACP | Remote Presence Connector | +|--------|-----|---------------------------| +| Direction | Editor → Agent | Agent (on server) → User's machine | +| Transport | stdio (local subprocess) | WebSocket (network) | +| Use case | Editor integration | Server-hosted agent accessing local files | +| Trust | Agent trusted by editor | Connector authenticated as partner | + +They can coexist: +- Local Pattern: Runs as ACP agent, accessed by editor directly +- Remote Pattern: Runs on server, uses connector for file access, could still speak ACP to local editor via tunnel + +--- + +## WebSocket API + +For real-time bidirectional communication (Discord bot, live updates): + +``` +WS /api/v1/constellations/:cid/ws + +# Client → Server messages +{ "type": "subscribe", "agent_id": "..." } +{ "type": "message", "agent_id": "...", "content": "...", "source": "discord" } +{ "type": "unsubscribe", "agent_id": "..." } + +# Server → Client messages +{ "type": "response_event", "agent_id": "...", "event": { ... } } +{ "type": "memory_update", "agent_id": "...", "block": "...", "content": "..." } +{ "type": "error", "message": "..." } +``` + +--- + +## CLI Commands (Preserved/Enhanced) + +The CLI remains the trusted local interface: + +```bash +# Agent operations +pattern agent list +pattern agent create --name "MyAgent" --model anthropic/claude-3-5-sonnet +pattern agent status MyAgent +pattern agent export MyAgent -o agent.car + +# Interactive chat +pattern chat --agent MyAgent +pattern chat --group MyGroup + +# Memory inspection +pattern debug list-core --agent MyAgent +pattern debug search-archival --agent MyAgent --query "important" +pattern debug show-context --agent MyAgent + +# Memory history (new in v2) +pattern memory history --agent MyAgent --block persona +pattern memory rollback --agent MyAgent --block persona --version + +# Export/Import +pattern export constellation MyConstellation -o constellation.car +pattern import --from constellation.car + +# ACP mode (new in v2) +pattern acp --agent MyAgent # Run as ACP agent for editor integration +``` + +--- + +## Trust Levels and Tool Access + +Different API surfaces have different trust levels: + +| Interface | Trust Level | Default Tool Access | +|-----------|-------------|---------------------| +| CLI | Partner | All tools | +| ACP (local editor) | Partner | All tools (editor controls permissions) | +| HTTP API (authenticated) | Partner | All tools | +| Remote Connector | Partner | All tools (connector authenticated) | +| Discord | Conversant | Safe tools only | +| Bluesky | Conversant | Safe tools only | +| Unauthenticated | Untrusted | Read-only | + +Tool access can be further restricted per-agent via config. + +--- + +## MCP Integration + +Pattern already has MCP client support. With ACP integration: + +1. **Editor provides MCP servers** - Via `NewSessionRequest.mcp_servers` +2. **Pattern connects to them** - Uses existing `pattern_mcp` client +3. **Tools from MCP servers** - Registered in Pattern's tool registry +4. **Pattern can also expose MCP** - For other tools to call Pattern + +``` +┌─────────────┐ ACP ┌─────────────┐ MCP ┌─────────────┐ +│ Zed │◄────────────►│ Pattern │◄───────────►│ MCP Server │ +│ (Client) │ stdio │ (Agent) │ │ (e.g. gh) │ +└─────────────┘ └─────────────┘ └─────────────┘ +``` + +--- + +## ACP Connection Model + +One ACP connection = one Pattern entity: + +```bash +# Connect to a single agent +pattern acp --agent MyAgent + +# Connect to a group (coordination pattern routes internally) +pattern acp --group MyGroup +``` + +When connecting to a **group**, the ACP session maps to the group's coordination pattern: +- Messages route through the pattern manager (round-robin, dynamic, supervisor, etc.) +- `session/update` notifications include which agent is responding +- The group appears as a single "agent" to the editor + +This keeps the ACP interface simple while allowing Pattern's multi-agent coordination to work underneath. + +### Pattern vs Typical ACP Session Semantics + +**Important distinction:** Pattern's model differs from typical ACP agents. + +| Aspect | Typical ACP Agent | Pattern | +|--------|-------------------|---------| +| Session | Ephemeral conversation | Connection to persistent constellation | +| History | Per-session, loaded on `session/load` | Always persistent in constellation DB | +| `session/new` | Creates fresh context | Connects to existing agent, maybe specifies memory blocks to surface | +| `session/load` | Restores saved conversation | Mostly no-op - constellation is always "loaded" | +| Identity | Session-scoped | Long-term relationship with partner | + +Pattern would implement ACP sessions as "connection handles" rather than "conversation containers": + +```rust +async fn new_session(&self, req: NewSessionRequest) -> Result { + // "Session" is just a connection to the persistent constellation + // No new conversation context created - agent already has full history + + let session = AcpSessionHandle { + constellation_id: self.constellation_id.clone(), + agent_id: self.agent_id.clone(), + // Optional: which memory blocks to emphasize in this connection + active_blocks: req.config.get("active_blocks").cloned(), + // MCP servers from editor + mcp_servers: req.mcp_servers, + }; + + // Don't replay history - it's already in the agent's context + // Editor can request specific history via extension methods if needed + + Ok(NewSessionResponse { + session_id: SessionId::generate(), + // ... + }) +} + +async fn load_session(&self, req: LoadSessionRequest) -> Result { + // Pattern doesn't really "load" sessions - constellation is persistent + // This could be used to switch which agent/group we're connected to + // or to specify a different memory configuration + + Ok(LoadSessionResponse { /* ... */ }) +} +``` + +This means Pattern agents maintain continuity across ACP connections - reconnecting picks up where you left off, because the agent never "forgot" anything. + +--- + +--- + +## Deployment Model + +### Single Canonical Server + +Pattern runs on a **single server** - this could be: +- A home server (low latency, always-on, your hardware) +- A cloud VPS (accessible from anywhere) +- Your local machine (simplest, but not always-on) + +The key insight: **LLM API response latency dominates everything**. Network latency for file operations over iroh is negligible by comparison. So "local vs remote" for the server matters less than you'd think. + +### Multiple Entry Points, One Truth + +The server handles multiple concurrent connections: + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ Pattern Server (e.g., home server) │ +│ │ +│ ┌────────────────────────────────────────────────────────┐ │ +│ │ Constellation │ │ +│ │ └── Agents (Pattern, Flux, Entropy, Anchor, Archive) │ │ +│ │ └── constellation.db (single source of truth) │ │ +│ │ └── Shared context, coordination state │ │ +│ └────────────────────────────────────────────────────────┘ │ +│ ▲ ▲ ▲ ▲ │ +│ │ │ │ │ │ +│ Connector Connector Discord Bluesky │ +│ (laptop) (desktop) Bot Firehose │ +│ │ │ │ │ │ +└─────────┼────────────┼────────────┼────────────┼────────────────┘ + │ │ │ │ + ┌────▼────┐ ┌────▼────┐ │ │ + │ Laptop │ │ Desktop │ Discord Bluesky + │ Zed │ │ Zed │ servers servers + └─────────┘ └─────────┘ +``` + +All entry points hit the same server, same constellation, same agents. No sync problem because there's one database. + +### Concurrent Message Handling + +When messages arrive simultaneously from different sources, Pattern handles them with **batch isolation**: + +``` +Timeline (actual processing): + t=0: Discord msg arrives, batch A starts + t=1: Bluesky mention arrives, batch B starts + t=3: Connector request arrives, batch C starts + t=5: batch B completes (Bluesky was a quick reply) + t=8: batch C completes + t=12: batch A completes (Discord needed more thought) + +Agent's view during batch A: + [history up to t=0] + [batch A in progress] + (batches B and C are invisible) + +Final history (reconstructed by arrival order): + [prior history] + batch A (Discord) - arrived t=0 + batch B (Bluesky) - arrived t=1 + batch C (Connector) - arrived t=3 +``` + +This is **implicit forking** - each batch gets isolated context, results merge back ordered by arrival time (via Snowflake IDs). The agent experiences it as fast sequential processing. + +### Deployment Configurations + +#### Home Server (Recommended for Power Users) + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ Home Network │ +│ ┌─────────────────────────────────────────────────────────┐ │ +│ │ Home Server (always-on) │ │ +│ │ └── Pattern server process │ │ +│ │ └── constellation.db │ │ +│ │ └── iroh endpoint (accepts connectors) │ │ +│ └─────────────────────────────────────────────────────────┘ │ +│ ▲ │ +│ │ iroh (local network = very fast) │ +│ │ │ +│ ┌──────┴──────────────────────────────────────────────────┐ │ +│ │ Workstation │ │ +│ │ └── pattern-connector daemon │ │ +│ │ └── Zed/Editor (ACP via connector) │ │ +│ └─────────────────────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────────────────────┘ + │ + │ iroh (NAT-traversed when away from home) + ▼ + ┌─────────┐ + │ Laptop │ (on the go, connector still works) + └─────────┘ +``` + +**Pros**: Always-on, low latency on home network, NAT traversal when remote +**Cons**: Requires home server setup + +#### Cloud VPS + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ Cloud VPS │ +│ └── Pattern server │ +│ └── constellation.db │ +│ └── Discord bot, Bluesky firehose │ +└───────────────────────────┬─────────────────────────────────────┘ + │ iroh + ┌─────────────┼─────────────┐ + ▼ ▼ ▼ + Laptop Desktop Phone + (connector) (connector) (web UI) +``` + +**Pros**: Accessible from anywhere, no home server needed +**Cons**: More latency for file ops, cloud costs + +#### Local Only (Simplest) + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ Your Machine │ +│ └── Pattern server (runs when you're working) │ +│ └── constellation.db │ +│ └── Direct file access (LocalConnector) │ +│ └── Zed connects via ACP directly │ +└─────────────────────────────────────────────────────────────────┘ +``` + +**Pros**: Simplest setup, no network +**Cons**: Not always-on, no Discord/Bluesky when machine is off + +--- + +## ACP Modes + +### Direct ACP (Local) + +Pattern runs as subprocess, editor spawns it directly: + +```bash +# In editor's agent config +{ + "command": "pattern-acp", + "args": ["--constellation", "my-constellation", "--agent", "coder"] +} +``` + +Pattern binary speaks ACP over stdio, has direct file access. + +### Proxied ACP (Remote via Connector) + +Editor connects to local connector daemon, which proxies to remote Pattern: + +```bash +# Connector runs in background +$ pattern-connector --daemon + +# Editor connects to connector's ACP socket +{ + "command": "pattern-connector", + "args": ["acp-proxy"] +} +``` + +The connector: +1. Receives ACP messages from editor via stdio +2. Forwards to Pattern server via iroh +3. Handles file/env requests from server +4. Returns responses to editor + +### ACP Session Mapping + +| ACP Concept | Pattern Mapping | +|-------------|-----------------| +| `session/new` | Connect to existing agent/group (no new context created) | +| `session/load` | Reconnect to same agent (mostly no-op) | +| `session_id` | Connection handle, not conversation container | +| `prompt` | Send message to agent, get streaming response | +| `cancel` | Cancel current agent processing | + +Pattern's persistent memory means the agent maintains context across sessions. The "session" is just a connection, not a conversation boundary. + +--- + +## Open Questions + +1. **Local/server sync** - For hybrid mode, how to sync constellation state? + - Shared database via network? + - CAR-based sync? + - Conflict resolution for concurrent edits? + +2. **Rate limiting** - Different limits for different trust levels? + - Partner (CLI, ACP): no limits + - Conversant (Discord, Bluesky): per-user rate limits + - API: token bucket per API key + +3. **Group mode in ACP** - Should `session/set_mode` switch coordination patterns? + - Or expose as separate capability? + +4. **Connector auth in hybrid mode** - Local Pattern talking to server - same iroh auth flow? diff --git a/docs/refactoring/v2-constellation-forking.md b/docs/refactoring/v2-constellation-forking.md new file mode 100644 index 0000000..310478d --- /dev/null +++ b/docs/refactoring/v2-constellation-forking.md @@ -0,0 +1,230 @@ +# Pattern v2.1: Constellation Forking (Future) + +> **Status**: Concept for future implementation. Not part of v2 initial release. + +## Overview + +Explicit forking and merging of constellations (or subsets thereof) for isolated work with optional reintegration. + +## Use Cases + +1. **Experimental work** - Try something risky without polluting main constellation history +2. **Focused task** - Spin off agents for a specific project, less noise from other activity +3. **Collaboration** - Fork, share with someone, merge their contributions back +4. **Rollback** - If the fork goes badly, just discard it +5. **Templates** - Fork a "clean" constellation as starting point for new projects + +## Fork Specification + +```rust +pub struct ForkSpec { + /// Source constellation + pub source: ConstellationId, + + /// Which agents to include (None = all) + pub agents: Option>, + + /// How much history to bring + pub history: HistorySpec, + + /// Which memory blocks to copy + pub memory: MemorySpec, + + /// Include shared resources (folders, coordination state)? + pub include_shared: bool, +} + +pub enum HistorySpec { + /// No history, just current memory state + None, + /// Last N messages per agent + Recent(usize), + /// Everything since timestamp + Since(DateTime), + /// Full history + Full, +} + +pub enum MemorySpec { + /// Core blocks only (persona, human) + CoreOnly, + /// Core + working + CoreAndWorking, + /// Everything including archival + Full, + /// Specific blocks by label + Specific(Vec), +} +``` + +## Merge Specification + +```rust +pub struct MergeSpec { + /// Fork to merge from + pub source: ConstellationId, + + /// Target constellation + pub target: ConstellationId, + + /// What to merge + pub merge: MergeContent, + + /// Conflict resolution strategy + pub conflicts: ConflictResolution, +} + +pub enum MergeContent { + /// Just memories (learnings), not conversation history + MemoriesOnly, + /// Memories + LLM-generated summary of what happened + MemoriesAndSummary, + /// Full history appended as delineated section + FullHistory, +} + +pub enum ConflictResolution { + /// Fork wins (overwrite target) + PreferFork, + /// Target wins (keep target, discard fork conflicts) + PreferTarget, + /// Interactive resolution (prompt user) + Interactive, + /// Keep both versions (create separate blocks) + KeepBoth, +} +``` + +## Memory Block Merge Semantics + +Loro CRDT enables automatic merging of memory blocks: + +```rust +async fn merge_memory_block( + source_block: &MemoryBlock, + target_block: &MemoryBlock, +) -> Result { + // Loro documents can merge! + let mut merged_doc = target_block.document.clone(); + merged_doc.merge(&source_block.document)?; + + // Result contains both histories, conflicts auto-resolved + Ok(MemoryBlock { + document: merged_doc, + ..target_block.clone() + }) +} +``` + +Merge rules: +- **Block only in fork** → Copy to target +- **Block only in target** → Keep as-is +- **Block in both** → Loro CRDT merge (or interactive if structural conflict) + +## History Merge Options + +### Option 1: Append as Fork Session + +Clearly delineated in history: + +``` +[normal history] +--- Fork: refactor-task (3 days) --- +[fork history] +--- End Fork --- +[continues] +``` + +### Option 2: Summarize (Recommended Default) + +LLM generates summary of fork activity: + +``` +[normal history] +[System: During "refactor-task" fork, coder refactored the memory system + and entropy broke it into 12 subtasks. Key learnings were saved to archival.] +[continues] +``` + +### Option 3: Memories Only + +Don't merge history at all, just memory block updates. Fork history is discarded. + +## CLI Interface + +```bash +# Create fork +$ pattern fork my-constellation --agents coder entropy --name refactor-task +Created fork: refactor-task (2 agents, core+working memory, recent 100 messages) + +# Work in fork +$ pattern chat --constellation refactor-task --agent coder + +# Check fork status +$ pattern fork status refactor-task +Fork: refactor-task +Source: my-constellation +Created: 2 days ago +Agents: coder (847 new messages), entropy (234 new messages) +Memory changes: 12 blocks modified, 3 new archival entries + +# Merge back +$ pattern merge refactor-task --into my-constellation --summarize +Merging fork... +- 12 memory blocks merged (Loro CRDT) +- 3 archival entries copied +- Summary generated and inserted into history +Done. + +# Or discard +$ pattern fork delete refactor-task +Deleted fork: refactor-task +``` + +## Dialect Integration + +Agents could fork/merge via dialect: + +``` +/fork agents coder entropy as "refactor-task" +/fork status "refactor-task" +/merge "refactor-task" summarize +/fork delete "refactor-task" +``` + +This would require appropriate permissions - probably partner-only. + +## Implementation Considerations + +1. **Storage** - Fork creates new constellation DB (same as regular constellation) +2. **Tracking** - Source constellation ID stored in fork metadata +3. **Divergence tracking** - Record "fork point" for merge operations +4. **Shared resources** - Folders, data sources need explicit handling +5. **Coordination state** - Probably shouldn't be forked (or reset to clean state) + +## Relationship to Existing Concurrent Processing + +Pattern already handles concurrent message processing via batch isolation: + +``` +Timeline (actual): + t=0: msg A arrives, batch A starts processing + t=1: msg B arrives, batch B starts processing + t=5: batch B completes (faster response) + t=8: batch A completes + +Agent's history (reconstructed): + [prior history] + batch A messages (t=0) + batch B messages (t=1) +``` + +This is **implicit, automatic forking** for concurrent requests. The explicit forking described in this document is for **intentional, longer-lived divergence** with controlled merge back. + +## Open Questions + +1. **Nested forks** - Can you fork a fork? +2. **Partial merge** - Merge some agents/blocks but not others? +3. **Fork sharing** - Export fork for someone else to work on? +4. **Long-lived forks** - At what point is it just a new constellation? +5. **Sync during fork** - Should fork see updates from source? (Probably not - that's merge) diff --git a/docs/refactoring/v2-database-design.md b/docs/refactoring/v2-database-design.md new file mode 100644 index 0000000..c08a2ee --- /dev/null +++ b/docs/refactoring/v2-database-design.md @@ -0,0 +1,905 @@ +# Pattern v2: Database Design + +## Overview + +Moving from SurrealDB to SQLite + sqlx. One database file per constellation provides physical isolation. + +## Why SQLite + +### Problems with SurrealDB + +1. **Immature ecosystem** - Constant API changes, missing features, weird edge cases +2. **Live query limitations** - Can't use parameters in WHERE clauses, forces string interpolation +3. **Type system friction** - `chrono::DateTime` vs `surrealdb::Datetime`, bracket-wrapped IDs +4. **No row-level security in practice** - We couldn't get permissions working reliably +5. **Custom entity macro** - We built a 1000+ line proc macro just to work around serialization issues +6. **Debugging difficulty** - Hard to inspect what's actually in the database + +### SQLite Benefits + +1. **Battle-tested** - Decades of production use, known behavior +2. **Extensions for everything** - sqlite-vec for vectors, FTS5 for full-text search +3. **One file = one database** - Natural isolation per constellation +4. **sqlx is solid** - Compile-time checked queries, good async support +5. **Easy debugging** - Can open in any SQLite browser +6. **Portable** - Database files are self-contained, easy backup/restore + +## Architecture + +### One Database Per Constellation + +``` +data/ +├── constellations/ +│ ├── {constellation_id}/ +│ │ ├── pattern.db # Main SQLite database +│ │ ├── pattern.db-shm # SQLite shared memory (temp) +│ │ ├── pattern.db-wal # Write-ahead log (temp) +│ │ └── exports/ # CAR exports +│ └── {another_constellation_id}/ +│ └── ... +└── global.db # User accounts, constellation registry +``` + +**Benefits:** +- No cross-constellation data leaks possible at DB level +- SQLite's single-writer is fine (one constellation = sequential operations) +- Easy to backup/restore/migrate individual constellations +- Can run multiple constellations in parallel (separate DB connections) + +### Global Database + +Small database for cross-constellation concerns: + +```sql +-- User accounts (for server mode) +CREATE TABLE users ( + id TEXT PRIMARY KEY, + username TEXT UNIQUE NOT NULL, + email TEXT UNIQUE, + password_hash TEXT, -- NULL for OAuth-only users + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL +); + +-- OAuth identities +CREATE TABLE user_identities ( + id TEXT PRIMARY KEY, + user_id TEXT NOT NULL, + provider TEXT NOT NULL, -- 'atproto', 'discord', etc. + provider_id TEXT NOT NULL, + access_token TEXT, + refresh_token TEXT, + expires_at TEXT, + created_at TEXT NOT NULL, + UNIQUE(provider, provider_id), + FOREIGN KEY (user_id) REFERENCES users(id) +); + +-- Constellation registry +CREATE TABLE constellations ( + id TEXT PRIMARY KEY, + owner_id TEXT NOT NULL, + name TEXT NOT NULL, + db_path TEXT NOT NULL, -- Relative path to constellation DB + created_at TEXT NOT NULL, + last_accessed_at TEXT NOT NULL, + FOREIGN KEY (owner_id) REFERENCES users(id) +); +``` + +## Constellation Database Schema + +### Core Tables + +```sql +-- Agents in this constellation +CREATE TABLE agents ( + id TEXT PRIMARY KEY, + name TEXT UNIQUE NOT NULL, + description TEXT, + + -- Model configuration + model_provider TEXT NOT NULL, -- 'anthropic', 'openai', 'google' + model_name TEXT NOT NULL, + + -- System prompt and config + system_prompt TEXT NOT NULL, + config JSON NOT NULL, -- Temperature, max tokens, etc. + + -- Tool configuration + enabled_tools JSON NOT NULL, -- Array of tool names + tool_rules JSON, -- Tool-specific rules + + -- Status + status TEXT NOT NULL DEFAULT 'active', -- 'active', 'hibernated', 'archived' + + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL +); + +-- Agent groups for coordination +CREATE TABLE agent_groups ( + id TEXT PRIMARY KEY, + name TEXT UNIQUE NOT NULL, + description TEXT, + + -- Coordination pattern + pattern_type TEXT NOT NULL, -- 'round_robin', 'dynamic', 'supervisor', etc. + pattern_config JSON NOT NULL, + + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL +); + +-- Group membership +CREATE TABLE group_members ( + group_id TEXT NOT NULL, + agent_id TEXT NOT NULL, + role TEXT, -- 'supervisor', 'worker', etc. (pattern-specific) + joined_at TEXT NOT NULL, + PRIMARY KEY (group_id, agent_id), + FOREIGN KEY (group_id) REFERENCES agent_groups(id), + FOREIGN KEY (agent_id) REFERENCES agents(id) +); +``` + +### Memory Tables + +```sql +-- Memory blocks (see v2-memory-system.md for details) +CREATE TABLE memory_blocks ( + id TEXT PRIMARY KEY, + agent_id TEXT NOT NULL, + label TEXT NOT NULL, + description TEXT NOT NULL, + + block_type TEXT NOT NULL, -- 'core', 'working', 'archival', 'log' + char_limit INTEGER NOT NULL DEFAULT 5000, + read_only INTEGER NOT NULL DEFAULT 0, + + -- Loro document stored as blob + loro_snapshot BLOB NOT NULL, + + -- Quick access without deserializing + content_preview TEXT, + + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL, + + UNIQUE(agent_id, label), + FOREIGN KEY (agent_id) REFERENCES agents(id) +); + +-- Pending Loro updates (between snapshots) +CREATE TABLE memory_block_updates ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + block_id TEXT NOT NULL, + loro_update BLOB NOT NULL, + created_at TEXT NOT NULL, + FOREIGN KEY (block_id) REFERENCES memory_blocks(id) ON DELETE CASCADE +); + +-- Shared blocks (blocks that multiple agents can access) +CREATE TABLE shared_block_agents ( + block_id TEXT NOT NULL, + agent_id TEXT NOT NULL, + write_access INTEGER NOT NULL DEFAULT 0, + attached_at TEXT NOT NULL, + PRIMARY KEY (block_id, agent_id), + FOREIGN KEY (block_id) REFERENCES memory_blocks(id) ON DELETE CASCADE, + FOREIGN KEY (agent_id) REFERENCES agents(id) +); + +-- Block history metadata (for UI, supplements Loro's internal history) +CREATE TABLE memory_block_history ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + block_id TEXT NOT NULL, + version_frontiers TEXT NOT NULL, -- JSON: Loro frontiers + change_summary TEXT, + changed_by TEXT NOT NULL, -- 'agent:{id}', 'user', 'system' + timestamp TEXT NOT NULL, + FOREIGN KEY (block_id) REFERENCES memory_blocks(id) ON DELETE CASCADE +); + +-- Archival memory entries (separate from blocks) +-- These are individual searchable entries the agent can store/retrieve +CREATE TABLE archival_entries ( + id TEXT PRIMARY KEY, + agent_id TEXT NOT NULL, + + -- Content + content TEXT NOT NULL, + metadata JSON, -- Optional structured metadata + + -- For chunked large content + chunk_index INTEGER DEFAULT 0, + parent_entry_id TEXT, -- Links chunks together + + created_at TEXT NOT NULL, + + FOREIGN KEY (agent_id) REFERENCES agents(id), + FOREIGN KEY (parent_entry_id) REFERENCES archival_entries(id) +); + +CREATE INDEX idx_archival_agent ON archival_entries(agent_id); +CREATE INDEX idx_archival_parent ON archival_entries(parent_entry_id); +``` + +### Message Tables + +```sql +-- Messages (conversation history) +CREATE TABLE messages ( + id TEXT PRIMARY KEY, + agent_id TEXT NOT NULL, + + -- Snowflake-based ordering + position TEXT NOT NULL, -- Snowflake ID as string for sorting + batch_id TEXT, -- Groups request/response cycles + sequence_in_batch INTEGER, + + -- Message content + role TEXT NOT NULL, -- 'user', 'assistant', 'system', 'tool' + content TEXT, + + -- For tool messages + tool_call_id TEXT, + tool_name TEXT, + tool_args JSON, + tool_result JSON, + + -- Metadata + source TEXT, -- 'cli', 'discord', 'bluesky', 'api', etc. + source_metadata JSON, -- Channel ID, message ID, etc. + + -- Status + is_archived INTEGER NOT NULL DEFAULT 0, + + created_at TEXT NOT NULL, + + FOREIGN KEY (agent_id) REFERENCES agents(id) +); + +CREATE INDEX idx_messages_agent_position ON messages(agent_id, position); +CREATE INDEX idx_messages_agent_batch ON messages(agent_id, batch_id); +CREATE INDEX idx_messages_archived ON messages(agent_id, is_archived, position); + +-- Archive summaries +CREATE TABLE archive_summaries ( + id TEXT PRIMARY KEY, + agent_id TEXT NOT NULL, + + summary TEXT NOT NULL, + + -- What messages this summarizes + start_position TEXT NOT NULL, + end_position TEXT NOT NULL, + message_count INTEGER NOT NULL, + + created_at TEXT NOT NULL, + + FOREIGN KEY (agent_id) REFERENCES agents(id) +); +``` + +### Vector Search (sqlite-vec) + +```sql +-- Vector table for semantic search over memories +CREATE VIRTUAL TABLE memory_embeddings USING vec0( + embedding float[384], -- Adjust dimension for your model + +block_id TEXT, + +chunk_index INTEGER, -- For blocks split into chunks + +content_hash TEXT -- Detect if content changed +); + +-- Vector table for message search +CREATE VIRTUAL TABLE message_embeddings USING vec0( + embedding float[384], + +message_id TEXT, + +content_hash TEXT +); +``` + +### Full-Text Search (FTS5) + +```sql +-- Full-text search over memories +CREATE VIRTUAL TABLE memory_fts USING fts5( + block_id, + label, + content, + content='memory_blocks', + content_rowid='rowid' +); + +-- Triggers to keep FTS in sync +CREATE TRIGGER memory_fts_insert AFTER INSERT ON memory_blocks BEGIN + INSERT INTO memory_fts(rowid, block_id, label, content) + VALUES (new.rowid, new.id, new.label, new.content_preview); +END; + +CREATE TRIGGER memory_fts_delete AFTER DELETE ON memory_blocks BEGIN + INSERT INTO memory_fts(memory_fts, rowid, block_id, label, content) + VALUES ('delete', old.rowid, old.id, old.label, old.content_preview); +END; + +CREATE TRIGGER memory_fts_update AFTER UPDATE ON memory_blocks BEGIN + INSERT INTO memory_fts(memory_fts, rowid, block_id, label, content) + VALUES ('delete', old.rowid, old.id, old.label, old.content_preview); + INSERT INTO memory_fts(rowid, block_id, label, content) + VALUES (new.rowid, new.id, new.label, new.content_preview); +END; + +-- Full-text search over messages +CREATE VIRTUAL TABLE message_fts USING fts5( + message_id, + content, + content='messages', + content_rowid='rowid' +); +``` + +### Tasks and Events + +```sql +-- Tasks (ADHD support) +CREATE TABLE tasks ( + id TEXT PRIMARY KEY, + agent_id TEXT, -- NULL for constellation-level tasks + + title TEXT NOT NULL, + description TEXT, + + status TEXT NOT NULL DEFAULT 'pending', -- 'pending', 'in_progress', 'completed', 'cancelled' + priority TEXT NOT NULL DEFAULT 'medium', -- 'low', 'medium', 'high', 'urgent' + + -- Optional scheduling + due_at TEXT, + scheduled_at TEXT, + completed_at TEXT, + + -- Hierarchy + parent_task_id TEXT, + + -- Embedding for semantic search + -- (stored in vec table, linked by task_id) + + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL, + + FOREIGN KEY (agent_id) REFERENCES agents(id), + FOREIGN KEY (parent_task_id) REFERENCES tasks(id) +); + +-- Events/reminders +CREATE TABLE events ( + id TEXT PRIMARY KEY, + agent_id TEXT, + + title TEXT NOT NULL, + description TEXT, + + starts_at TEXT NOT NULL, + ends_at TEXT, + + -- Recurrence (iCal RRULE format) + rrule TEXT, + + -- Reminder settings + reminder_minutes INTEGER, -- Minutes before event + + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL, + + FOREIGN KEY (agent_id) REFERENCES agents(id) +); +``` + +### Data Sources + +```sql +-- External data source configurations +CREATE TABLE data_sources ( + id TEXT PRIMARY KEY, + name TEXT UNIQUE NOT NULL, + source_type TEXT NOT NULL, -- 'file', 'bluesky', 'discord', 'rss', etc. + config JSON NOT NULL, + + -- Polling/sync state + last_sync_at TEXT, + sync_cursor TEXT, -- Source-specific position marker + + enabled INTEGER NOT NULL DEFAULT 1, + + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL +); + +-- Which agents receive from which sources +CREATE TABLE agent_data_sources ( + agent_id TEXT NOT NULL, + source_id TEXT NOT NULL, + + -- How to handle incoming data + notification_template TEXT, + + PRIMARY KEY (agent_id, source_id), + FOREIGN KEY (agent_id) REFERENCES agents(id), + FOREIGN KEY (source_id) REFERENCES data_sources(id) +); +``` + +### Folders (File Access) + +```sql +-- File folders for agent access +CREATE TABLE folders ( + id TEXT PRIMARY KEY, + name TEXT NOT NULL UNIQUE, + description TEXT, + path_type TEXT NOT NULL, -- 'local', 'virtual', 'remote' + path_value TEXT, -- filesystem path or URL + embedding_model TEXT NOT NULL, + created_at TEXT NOT NULL +); + +-- Files within folders +CREATE TABLE folder_files ( + id TEXT PRIMARY KEY, + folder_id TEXT NOT NULL, + name TEXT NOT NULL, + content_type TEXT, + size_bytes INTEGER, + content BLOB, -- for virtual folders + uploaded_at TEXT NOT NULL, + indexed_at TEXT, + UNIQUE(folder_id, name), + FOREIGN KEY (folder_id) REFERENCES folders(id) ON DELETE CASCADE +); + +-- File passages (chunks with embeddings) +CREATE TABLE file_passages ( + id TEXT PRIMARY KEY, + file_id TEXT NOT NULL, + content TEXT NOT NULL, + start_line INTEGER, + end_line INTEGER, + created_at TEXT NOT NULL, + FOREIGN KEY (file_id) REFERENCES folder_files(id) ON DELETE CASCADE +); + +-- Passage embeddings (sqlite-vec) +CREATE VIRTUAL TABLE file_passage_embeddings USING vec0( + embedding float[384], + +passage_id TEXT, + +file_id TEXT, + +folder_id TEXT +); + +-- Folder attachments to agents +CREATE TABLE folder_attachments ( + folder_id TEXT NOT NULL, + agent_id TEXT NOT NULL, + access TEXT NOT NULL, -- 'read', 'read_write' + attached_at TEXT NOT NULL, + PRIMARY KEY (folder_id, agent_id), + FOREIGN KEY (folder_id) REFERENCES folders(id) ON DELETE CASCADE, + FOREIGN KEY (agent_id) REFERENCES agents(id) +); +``` + +### Activity Stream & Shared Context + +```sql +-- Activity stream events +CREATE TABLE activity_events ( + id TEXT PRIMARY KEY, + timestamp TEXT NOT NULL, + agent_id TEXT, -- NULL for system events + event_type TEXT NOT NULL, + details JSON NOT NULL, + importance TEXT, -- 'low', 'medium', 'high', 'critical' + FOREIGN KEY (agent_id) REFERENCES agents(id) +); + +CREATE INDEX idx_activity_timestamp ON activity_events(timestamp DESC); +CREATE INDEX idx_activity_agent ON activity_events(agent_id); +CREATE INDEX idx_activity_type ON activity_events(event_type); + +-- Per-agent activity summaries (LLM-generated) +CREATE TABLE agent_summaries ( + agent_id TEXT PRIMARY KEY, + summary TEXT NOT NULL, + messages_covered INTEGER, + generated_at TEXT NOT NULL, + last_active TEXT NOT NULL, + FOREIGN KEY (agent_id) REFERENCES agents(id) +); + +-- Constellation-wide summaries (periodic roll-ups) +CREATE TABLE constellation_summaries ( + id TEXT PRIMARY KEY, + period_start TEXT NOT NULL, + period_end TEXT NOT NULL, + summary TEXT NOT NULL, + key_decisions JSON, -- array of strings + open_threads JSON, -- array of strings + created_at TEXT NOT NULL +); + +-- Notable events (flagged for long-term memory) +CREATE TABLE notable_events ( + id TEXT PRIMARY KEY, + timestamp TEXT NOT NULL, + event_type TEXT NOT NULL, + description TEXT NOT NULL, + agents_involved JSON, -- array of agent IDs + importance TEXT NOT NULL, + created_at TEXT NOT NULL +); + +CREATE INDEX idx_notable_timestamp ON notable_events(timestamp DESC); +CREATE INDEX idx_notable_importance ON notable_events(importance); +``` + +### Coordination State + +```sql +-- Coordination key-value store (flexible shared state) +CREATE TABLE coordination_state ( + key TEXT PRIMARY KEY, + value JSON NOT NULL, + updated_at TEXT NOT NULL, + updated_by TEXT -- agent ID or 'system' or 'user' +); + +-- Task assignments (structured coordination) +CREATE TABLE coordination_tasks ( + id TEXT PRIMARY KEY, + description TEXT NOT NULL, + assigned_to TEXT, -- agent ID, NULL = unassigned + status TEXT NOT NULL DEFAULT 'pending', -- 'pending', 'in_progress', 'completed', 'cancelled' + priority TEXT NOT NULL DEFAULT 'medium', + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL, + FOREIGN KEY (assigned_to) REFERENCES agents(id) +); + +-- Handoff notes between agents +CREATE TABLE handoff_notes ( + id TEXT PRIMARY KEY, + from_agent TEXT NOT NULL, + to_agent TEXT, -- NULL = for any agent + content TEXT NOT NULL, + created_at TEXT NOT NULL, + read_at TEXT, + FOREIGN KEY (from_agent) REFERENCES agents(id), + FOREIGN KEY (to_agent) REFERENCES agents(id) +); + +CREATE INDEX idx_handoff_to ON handoff_notes(to_agent, read_at); +``` + +### Migration Audit + +```sql +-- Records of v1->v2 migration decisions +CREATE TABLE migration_audit ( + id TEXT PRIMARY KEY, + imported_at TEXT NOT NULL, + source_file TEXT NOT NULL, + source_version INTEGER NOT NULL, + issues_found INTEGER NOT NULL, + issues_resolved INTEGER NOT NULL, + audit_log JSON NOT NULL -- Full decision log +); +``` +``` + +## sqlx Patterns + +### No Entity Macro + +Instead of the v1 `#[derive(Entity)]` proc macro, we use plain sqlx: + +```rust +// Simple struct, derives what sqlx needs +#[derive(Debug, Clone, sqlx::FromRow)] +pub struct Agent { + pub id: String, + pub name: String, + pub description: Option, + pub model_provider: String, + pub model_name: String, + pub system_prompt: String, + pub config: sqlx::types::Json, + pub enabled_tools: sqlx::types::Json>, + pub tool_rules: Option>, + pub status: String, + pub created_at: String, + pub updated_at: String, +} + +// Queries are compile-time checked +pub async fn get_agent(pool: &SqlitePool, id: &str) -> Result> { + sqlx::query_as!( + Agent, + r#"SELECT * FROM agents WHERE id = ?"#, + id + ) + .fetch_optional(pool) + .await + .map_err(Into::into) +} + +pub async fn create_agent(pool: &SqlitePool, agent: &Agent) -> Result<()> { + sqlx::query!( + r#" + INSERT INTO agents (id, name, description, model_provider, model_name, + system_prompt, config, enabled_tools, tool_rules, + status, created_at, updated_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + "#, + agent.id, + agent.name, + agent.description, + agent.model_provider, + agent.model_name, + agent.system_prompt, + agent.config, + agent.enabled_tools, + agent.tool_rules, + agent.status, + agent.created_at, + agent.updated_at + ) + .execute(pool) + .await?; + Ok(()) +} +``` + +### Connection Management + +```rust +use sqlx::sqlite::{SqlitePool, SqlitePoolOptions}; + +pub struct ConstellationDb { + pool: SqlitePool, + constellation_id: String, +} + +impl ConstellationDb { + pub async fn open(path: &Path) -> Result { + let pool = SqlitePoolOptions::new() + .max_connections(5) // SQLite is single-writer anyway + .connect(&format!("sqlite:{}?mode=rwc", path.display())) + .await?; + + // Run migrations + sqlx::migrate!("./migrations") + .run(&pool) + .await?; + + // Load sqlite-vec extension + sqlx::query("SELECT load_extension('vec0')") + .execute(&pool) + .await?; + + Ok(Self { pool, constellation_id }) + } + + pub fn pool(&self) -> &SqlitePool { + &self.pool + } +} +``` + +### Transactions + +```rust +pub async fn create_agent_with_default_blocks( + db: &ConstellationDb, + agent: &Agent, +) -> Result<()> { + let mut tx = db.pool().begin().await?; + + // Create agent + sqlx::query!(/* ... */) + .execute(&mut *tx) + .await?; + + // Create default memory blocks + for block in default_blocks(agent.id) { + sqlx::query!(/* ... */) + .execute(&mut *tx) + .await?; + } + + tx.commit().await?; + Ok(()) +} +``` + +## Migration Strategy + +### From v1 SurrealDB + +1. Export constellation to CAR file (v1 format) +2. Parse CAR, extract entities +3. Transform to v2 schema (agent_id on memories, etc.) +4. Import into fresh SQLite database +5. Generate Loro documents for memory blocks + +See [v2-migration-path.md](./v2-migration-path.md) for details. + +### Schema Migrations + +Using sqlx migrations in `migrations/` directory: + +``` +migrations/ +├── 20250101000000_initial.sql +├── 20250102000000_add_task_embeddings.sql +└── ... +``` + +## Performance Considerations + +### Indexes + +Key indexes for common query patterns: + +```sql +-- Agent lookups +CREATE INDEX idx_agents_name ON agents(name); +CREATE INDEX idx_agents_status ON agents(status); + +-- Memory block lookups +CREATE INDEX idx_memory_blocks_agent ON memory_blocks(agent_id); +CREATE INDEX idx_memory_blocks_type ON memory_blocks(agent_id, block_type); + +-- Message queries (most common) +CREATE INDEX idx_messages_agent_position ON messages(agent_id, position DESC); +CREATE INDEX idx_messages_batch ON messages(batch_id); +``` + +### Blob Storage + +Loro snapshots are stored as BLOBs. For large documents: + +```sql +-- Consider separate table for large blobs +CREATE TABLE large_blobs ( + id TEXT PRIMARY KEY, + data BLOB NOT NULL +); + +-- Reference from memory_blocks +ALTER TABLE memory_blocks ADD COLUMN large_blob_id TEXT REFERENCES large_blobs(id); +``` + +### Connection Pooling + +SQLite with WAL mode handles concurrent reads well: + +```rust +SqlitePoolOptions::new() + .max_connections(10) // Readers can be parallel + .connect("sqlite:pattern.db?mode=rwc&_journal_mode=WAL") +``` + +## Design Decisions + +### Embeddings + +**Decision**: Support multiple embedding models, pursue local model as default. + +- Store embedding dimension in config per constellation/folder +- Create vec tables dynamically based on configured dimension +- Default to a local model (e.g., `all-MiniLM-L6-v2` at 384 dims) to avoid cloud costs +- Cloud embeddings (OpenAI, Google) available as opt-in for higher quality +- This matches current v1 approach but with local-first default + +### Blob Compression & Loro Snapshots + +**Decision**: Compress large snapshots, periodic checkpoints, incremental updates. + +Strategy: +1. **Frequent ops**: Store Loro update deltas (small, fast) +2. **Periodic checkpoints**: Full document snapshot that acts as new root + - Triggered by: update count threshold, total update size, or time interval + - Old updates before checkpoint can be pruned +3. **Compression**: zstd compress snapshots over N bytes (e.g., 10KB) +4. **History**: Checkpoint acts as "shallow clone" point - history before it is summarized/discarded + +```rust +pub struct LoroStorageConfig { + /// Compress snapshots larger than this + pub compression_threshold_bytes: usize, // default: 10KB + /// Checkpoint after this many updates + pub checkpoint_update_count: usize, // default: 100 + /// Checkpoint if updates exceed this size + pub checkpoint_update_size: usize, // default: 50KB + /// Maximum time between checkpoints + pub checkpoint_interval: Duration, // default: 1 hour +} +``` + +### Backup Strategy + +**Decision**: CAR files for cold storage/migration, SQLite backup API for hot backups. + +- **CAR exports**: Cross-version portable, human-reviewable structure, used for migration and archival +- **SQLite backups**: Fast, consistent snapshots for disaster recovery +- **Recommendation**: Automated SQLite backups (daily), manual CAR exports for major milestones + +```bash +# Hot backup (fast, for disaster recovery) +pattern-cli backup --constellation "MyConstellation" -o backup.db + +# Cold export (portable, for migration/archival) +pattern-cli export constellation --name "MyConstellation" -o archive.car +``` + +### sqlite-vec + +**Decision**: Bundle sqlite-vec, explicit migration via CLI. + +- sqlite-vec compiled and bundled with pattern binaries +- On startup, check schema version against expected +- If outdated, refuse to run and prompt: `pattern-cli db migrate` +- Migrations are explicit user action, not automatic (safety) + +### Multi-Process Access + +**Decision**: Single canonical writer process ("server"), one DB per constellation, concurrency via threading. + +Architecture: +``` +┌─────────────────────────────────────────────────────────────┐ +│ Pattern Server Process │ +├─────────────────────────────────────────────────────────────┤ +│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ +│ │Constellation│ │Constellation│ │Constellation│ │ +│ │ Thread A │ │ Thread B │ │ Thread C │ │ +│ │ │ │ │ │ │ │ +│ │ agents.db │ │ agents.db │ │ agents.db │ │ +│ │ (exclusive) │ │ (exclusive) │ │ (exclusive) │ │ +│ └─────────────┘ └─────────────┘ └─────────────┘ │ +├─────────────────────────────────────────────────────────────┤ +│ Shared: LLM API clients, config, global.db (user accounts) │ +└─────────────────────────────────────────────────────────────┘ +``` + +- Each constellation gets its own thread with exclusive DB access +- No SQLite concurrency concerns - single writer per DB +- LLM API is the bottleneck (99.9999% of the time), not SQLite +- CLI in "server mode" fills this role currently; explicit server binary later +- CLI can also run standalone for single-constellation local use + +### Vacuum Schedule + +**Decision**: Manual trigger, with recommendations. + +- `pattern-cli db vacuum --constellation "MyConstellation"` +- Recommend after: migration, bulk deletes, major archival operations +- Could add `--auto-vacuum` flag for background maintenance mode +- SQLite's auto_vacuum pragma is an option but has tradeoffs + +## Remaining Open Questions + +1. **Default local embedding model** - Which specific model to bundle/recommend? + - `all-MiniLM-L6-v2` (384 dims, fast, decent quality) + - `bge-small-en-v1.5` (384 dims, better quality) + - `nomic-embed-text-v1` (768 dims, good quality, larger) + +2. **Checkpoint history retention** - How many checkpoints to keep? + - Keep last N checkpoints for rollback? + - Or just current + one previous? + +3. **Server process management** - Systemd service? Docker? Just CLI? + - For local use: CLI is fine + - For always-on: need proper service management diff --git a/docs/refactoring/v2-dialect-implementation.md b/docs/refactoring/v2-dialect-implementation.md new file mode 100644 index 0000000..a002efc --- /dev/null +++ b/docs/refactoring/v2-dialect-implementation.md @@ -0,0 +1,1174 @@ +# Pattern Dialect - Implementation Notes + +## Extensibility Architecture + +### Verb Registration Trait + +```rust +pub trait DialectVerb: Send + Sync { + /// Canonical name + fn name(&self) -> &str; + + /// Aliases that map to this verb + fn aliases(&self) -> &[&str]; + + /// How strict should fuzzy matching be + fn strictness(&self) -> Strictness { Strictness::Normal } + + /// Parse arguments into structured intent + fn parse_args(&self, args: &str, ctx: &ParseContext) -> Result; + + /// Execute the intent (or delegate to existing tool) + async fn execute(&self, intent: Intent, meta: &ExecutionMeta) -> Result; + + /// Short description for agent instructions + fn description(&self) -> &str; + + /// Examples for few-shot prompting + fn examples(&self) -> Vec; + + /// Required authority level (if any) + fn authority_required(&self) -> Option { None } +} + +pub enum Strictness { + /// Loose matching - maximize accessibility (recall, search) + Loose, + /// Normal matching - reasonable typo tolerance + Normal, + /// Strict matching - avoid accidental triggers (approve, deny, halt) + Strict, +} + +/// Hint for recoverable parse failures +pub enum ParseHint { + Ambiguous { options: Vec, question: String }, + MissingArg { name: String, suggestion: Option }, + UnknownModifier { got: String, similar: Vec }, +} +``` + +### Config-Driven Extensions + +```toml +# pattern.toml or agents/my_agent.toml + +# Extend existing verbs with more aliases +[dialect.verbs.recall] +extra_aliases = ["memorize", "jot down", "note to self"] + +# Create shorthand verbs that delegate to existing tools +[dialect.verbs.notify] +aliases = ["alert", "ping", "heads up"] +maps_to = "send_message" +default_target = { type = "agent", id = "anchor" } +description = "Quick notification to the coordinator" +examples = [ + "/notify something weird happened", + "/heads up partner seems stressed", +] + +# Platform-specific shortcuts +[dialect.verbs.toot] +aliases = ["mastodon"] +maps_to = "send" +default_target = { type = "mastodon" } +description = "Post to Mastodon" + +# Agent-specific custom verbs +[dialect.verbs.shame] +aliases = ["call out", "roast"] +maps_to = "send" +default_target = { type = "bluesky" } +template = "shame post template: {content}" +requires_permission = true +``` + +### Intent Types + +```rust +pub enum Intent { + Recall(RecallIntent), + Context(ContextIntent), + Search(SearchIntent), + Send(SendIntent), + Fetch(FetchIntent), + Web(WebIntent), + Calc(CalcIntent), + Authority(AuthorityIntent), + Custom(CustomIntent), // for config-defined verbs +} + +pub enum RecallIntent { + Read { label: String }, + Search { query: String }, + Insert { label: Option, content: String }, + Append { label: String, content: String }, + Delete { label: String }, + Patch { label: String, patch: DiffPatch }, +} + +pub struct CustomIntent { + pub verb: String, + pub maps_to: String, + pub args: serde_json::Value, + pub template_applied: Option, +} +``` + +### Wiring to Existing Tools + +```rust +impl DialectVerb for RecallVerb { + async fn execute(&self, intent: Intent, meta: &ExecutionMeta) -> Result { + let Intent::Recall(recall_intent) = intent else { + unreachable!() + }; + + // Convert to existing tool input + let tool_input = match recall_intent { + RecallIntent::Read { label } => RecallInput { + operation: ArchivalMemoryOperationType::Read, + label: Some(label), + content: None, + }, + RecallIntent::Search { query } => { + // This maps to SearchTool instead + return self.handle.search_archival(&query, 20, false).await + .map(ActionResult::from); + }, + RecallIntent::Insert { label, content } => RecallInput { + operation: ArchivalMemoryOperationType::Insert, + label, + content: Some(content), + }, + RecallIntent::Append { label, content } => RecallInput { + operation: ArchivalMemoryOperationType::Append, + label: Some(label), + content: Some(content), + }, + RecallIntent::Delete { label } => RecallInput { + operation: ArchivalMemoryOperationType::Delete, + label: Some(label), + content: None, + }, + RecallIntent::Patch { label, patch } => { + // Load, apply patch, save + let current = self.handle.get_archival_memory_by_label(&label).await?; + let patched = patch.apply(¤t.value)?; + // Update via replace or direct write + return self.handle.update_archival_memory(&label, &patched).await + .map(ActionResult::from); + } + }; + + self.recall_tool.execute(tool_input, meta).await + .map(ActionResult::from) + } +} +``` + +--- + +## Fuzzy Verb Matching + +### Matching Tiers + +The matcher tries these in order, returning the first confident match: + +``` +1. Exact match (canonical or alias) +2. Case-insensitive exact match +3. Morphological variants (plurals, tenses, spellings) +4. Levenshtein distance (typos) +5. Phonetic similarity (soundex/metaphone) +6. [Optional] Semantic similarity (embeddings) +``` + +### Tier 1-2: Exact Matching + +```rust +fn exact_match(input: &str, verbs: &[VerbSpec]) -> Option<&VerbSpec> { + let input_lower = input.to_lowercase(); + + for verb in verbs { + if verb.canonical == input || verb.canonical.eq_ignore_ascii_case(input) { + return Some(verb); + } + for alias in &verb.aliases { + if *alias == input || alias.eq_ignore_ascii_case(input) { + return Some(verb); + } + } + } + None +} +``` + +### Tier 3: Morphological Variants + +Handle common English variations without needing explicit aliases: + +```rust +pub struct MorphologicalMatcher { + // Irregular forms we need to know about + irregulars: HashMap<&'static str, &'static str>, +} + +impl MorphologicalMatcher { + pub fn new() -> Self { + let mut irregulars = HashMap::new(); + // Irregular verbs relevant to our domain + irregulars.insert("sent", "send"); + irregulars.insert("told", "tell"); + irregulars.insert("found", "find"); + irregulars.insert("forgot", "forget"); + irregulars.insert("wrote", "write"); + irregulars.insert("thought", "think"); + Self { irregulars } + } + + pub fn normalize(&self, word: &str) -> String { + let word = word.to_lowercase(); + + // Check irregulars first + if let Some(base) = self.irregulars.get(word.as_str()) { + return base.to_string(); + } + + // Regular patterns + let normalized = word + // -ing: searching -> search + .strip_suffix("ing") + .map(|s| { + // Handle doubling: stopping -> stop + if s.len() > 2 && s.chars().last() == s.chars().nth(s.len() - 2) { + &s[..s.len()-1] + } else if s.ends_with("e") { + // making -> make (we stripped 'ing', add back 'e') + return format!("{}e", s); + } else { + s + } + }) + // -ed: searched -> search + .or_else(|| word.strip_suffix("ed").map(|s| { + if s.ends_with("i") { + // tried -> try + format!("{}y", &s[..s.len()-1]) + } else { + s.to_string() + } + })) + // -s/-es: searches -> search + .or_else(|| word.strip_suffix("es").map(|s| s.to_string())) + .or_else(|| word.strip_suffix("s").map(|s| s.to_string())) + // -ies: queries -> query + .or_else(|| word.strip_suffix("ies").map(|s| format!("{}y", s))); + + normalized.unwrap_or(word) + } + + pub fn spelling_variants(&self, word: &str) -> Vec { + let mut variants = vec![word.to_string()]; + + // British/American spelling + if word.contains("ise") { + variants.push(word.replace("ise", "ize")); + } + if word.contains("ize") { + variants.push(word.replace("ize", "ise")); + } + if word.contains("our") { + variants.push(word.replace("our", "or")); + } + if word.contains("or") && !word.contains("our") { + // Be careful not to create nonsense + let replaced = word.replace("or", "our"); + if replaced != word { + variants.push(replaced); + } + } + + // Common misspellings + if word.contains("mem") { + variants.push(word.replace("mem", "rem")); // remember/memory confusion + } + + variants + } +} +``` + +### Tier 4: Levenshtein Distance + +```rust +pub fn levenshtein_match( + input: &str, + verbs: &[VerbSpec], + max_distance: usize, +) -> Vec<(VerbSpec, usize)> { + let mut matches = Vec::new(); + + for verb in verbs { + // Adjust max distance by strictness + let allowed = match verb.strictness { + Strictness::Strict => 1, + Strictness::Normal => max_distance, + Strictness::Loose => max_distance + 1, + }; + + let dist = levenshtein(&input.to_lowercase(), verb.canonical); + if dist <= allowed { + matches.push((verb.clone(), dist)); + } + + // Also check aliases + for alias in &verb.aliases { + let dist = levenshtein(&input.to_lowercase(), alias); + if dist <= allowed && dist < matches.iter() + .find(|(v, _)| v.canonical == verb.canonical) + .map(|(_, d)| *d) + .unwrap_or(usize::MAX) + { + // Better match via alias + if let Some(existing) = matches.iter_mut() + .find(|(v, _)| v.canonical == verb.canonical) + { + existing.1 = dist; + } else { + matches.push((verb.clone(), dist)); + } + } + } + } + + matches.sort_by_key(|(_, dist)| *dist); + matches +} + +fn levenshtein(a: &str, b: &str) -> usize { + // Standard Levenshtein implementation + // Could use `strsim` crate instead + let a: Vec = a.chars().collect(); + let b: Vec = b.chars().collect(); + + let mut matrix = vec![vec![0; b.len() + 1]; a.len() + 1]; + + for i in 0..=a.len() { matrix[i][0] = i; } + for j in 0..=b.len() { matrix[0][j] = j; } + + for i in 1..=a.len() { + for j in 1..=b.len() { + let cost = if a[i-1] == b[j-1] { 0 } else { 1 }; + matrix[i][j] = (matrix[i-1][j] + 1) + .min(matrix[i][j-1] + 1) + .min(matrix[i-1][j-1] + cost); + } + } + + matrix[a.len()][b.len()] +} +``` + +### Tier 5: Phonetic Similarity + +For when someone types "recal" or "serch" - sounds right but spelled wrong: + +```rust +pub fn phonetic_match(input: &str, verbs: &[VerbSpec]) -> Vec<(VerbSpec, f32)> { + let input_code = soundex(input); + let mut matches = Vec::new(); + + for verb in verbs { + if verb.strictness == Strictness::Strict { + continue; // Don't use phonetic for strict verbs + } + + let verb_code = soundex(verb.canonical); + if input_code == verb_code { + matches.push((verb.clone(), 0.8)); // High but not perfect confidence + } + + for alias in &verb.aliases { + if soundex(alias) == input_code { + matches.push((verb.clone(), 0.8)); + break; + } + } + } + + matches +} + +fn soundex(s: &str) -> String { + // Standard Soundex algorithm + // Could use `phonetics` crate instead + if s.is_empty() { return String::new(); } + + let s = s.to_uppercase(); + let mut chars = s.chars(); + let first = chars.next().unwrap(); + + let codes: String = chars + .filter_map(|c| match c { + 'B' | 'F' | 'P' | 'V' => Some('1'), + 'C' | 'G' | 'J' | 'K' | 'Q' | 'S' | 'X' | 'Z' => Some('2'), + 'D' | 'T' => Some('3'), + 'L' => Some('4'), + 'M' | 'N' => Some('5'), + 'R' => Some('6'), + _ => None, + }) + .collect(); + + // Remove consecutive duplicates + let mut result = String::new(); + result.push(first); + let mut last = ' '; + for c in codes.chars() { + if c != last { + result.push(c); + last = c; + } + if result.len() >= 4 { break; } + } + + // Pad to 4 characters + while result.len() < 4 { + result.push('0'); + } + + result +} +``` + +### Tier 6: Semantic Similarity (Optional/Advanced) + +For when an agent says `/retrieve` instead of `/recall` - different word, same meaning: + +```rust +pub struct SemanticMatcher { + embedder: E, + verb_embeddings: HashMap>, + similarity_threshold: f32, +} + +impl SemanticMatcher { + pub async fn new(embedder: E, verbs: &[VerbSpec]) -> Result { + let mut verb_embeddings = HashMap::new(); + + // Pre-compute embeddings for all verbs and aliases + for verb in verbs { + if verb.strictness == Strictness::Strict { + continue; // Don't use semantic matching for strict verbs + } + + // Embed the canonical name and use it as representative + let embedding = embedder.embed(&[verb.canonical.to_string()]).await?; + verb_embeddings.insert(verb.canonical.to_string(), embedding[0].clone()); + } + + Ok(Self { + embedder, + verb_embeddings, + similarity_threshold: 0.85, // High threshold to avoid false positives + }) + } + + pub async fn find_similar(&self, input: &str) -> Result> { + let input_embedding = self.embedder.embed(&[input.to_string()]).await?; + let input_vec = &input_embedding[0]; + + let mut matches: Vec<(String, f32)> = self.verb_embeddings + .iter() + .map(|(verb, verb_vec)| { + let similarity = cosine_similarity(input_vec, verb_vec); + (verb.clone(), similarity) + }) + .filter(|(_, sim)| *sim >= self.similarity_threshold) + .collect(); + + matches.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal)); + Ok(matches) + } +} + +fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 { + let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum(); + let norm_a: f32 = a.iter().map(|x| x * x).sum::().sqrt(); + let norm_b: f32 = b.iter().map(|x| x * x).sum::().sqrt(); + dot / (norm_a * norm_b) +} +``` + +### Combined Matcher + +```rust +pub struct VerbMatcher { + verbs: Vec, + morphological: MorphologicalMatcher, + semantic: Option>, +} + +impl VerbMatcher { + pub async fn match_verb(&self, input: &str) -> MatchResult { + // Tier 1-2: Exact match + if let Some(verb) = self.exact_match(input) { + return MatchResult::Exact(verb.clone()); + } + + // Tier 3: Morphological normalization then exact match + let normalized = self.morphological.normalize(input); + if normalized != input { + if let Some(verb) = self.exact_match(&normalized) { + return MatchResult::Normalized(verb.clone(), normalized); + } + } + + // Also try spelling variants + for variant in self.morphological.spelling_variants(input) { + if let Some(verb) = self.exact_match(&variant) { + return MatchResult::SpellingVariant(verb.clone(), variant); + } + } + + // Tier 4: Levenshtein + let lev_matches = levenshtein_match(input, &self.verbs, 2); + if let Some((verb, dist)) = lev_matches.first() { + if *dist <= 1 { + return MatchResult::Typo(verb.clone(), *dist); + } + } + + // Tier 5: Phonetic + let phonetic_matches = phonetic_match(input, &self.verbs); + if let Some((verb, conf)) = phonetic_matches.first() { + return MatchResult::Phonetic(verb.clone(), *conf); + } + + // Tier 6: Semantic (if enabled) + if let Some(ref semantic) = self.semantic { + if let Ok(semantic_matches) = semantic.find_similar(input).await { + if let Some((verb_name, conf)) = semantic_matches.first() { + if let Some(verb) = self.verbs.iter().find(|v| v.canonical == *verb_name) { + return MatchResult::Semantic(verb.clone(), *conf); + } + } + } + } + + // No match - return candidates for error message + let mut candidates: Vec<_> = lev_matches.into_iter() + .map(|(v, d)| (v.canonical.to_string(), 1.0 - (d as f32 / 5.0))) + .collect(); + candidates.extend(phonetic_matches.into_iter() + .map(|(v, c)| (v.canonical.to_string(), c))); + candidates.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap()); + candidates.dedup_by(|a, b| a.0 == b.0); + candidates.truncate(3); + + MatchResult::NoMatch { + input: input.to_string(), + candidates, + } + } +} + +pub enum MatchResult { + Exact(VerbSpec), + Normalized(VerbSpec, String), + SpellingVariant(VerbSpec, String), + Typo(VerbSpec, usize), + Phonetic(VerbSpec, f32), + Semantic(VerbSpec, f32), + NoMatch { input: String, candidates: Vec<(String, f32)> }, +} +``` + +--- + +## Parse Failure Logging + +### Data Model + +```rust +pub struct ParseFailure { + pub id: ParseFailureId, + pub raw_input: String, + pub agent_id: AgentId, + pub model: String, + pub timestamp: DateTime, + pub failure_type: ParseFailureType, + pub match_result: Option, // What the matcher found + pub context_hint: Option, // What was agent trying to do + pub session_id: Option, // For grouping related failures +} + +pub enum ParseFailureType { + UnknownVerb { + attempted: String, + candidates: Vec<(String, f32)>, + }, + AmbiguousArgs { + verb: String, + args: String, + possibilities: Vec, + }, + MalformedSyntax { + verb: String, + issue: String, + position: Option, + }, + InvalidReference { + reference: String, + available: Vec, + }, + UnrecognizedModifier { + verb: String, + modifier: String, + valid_modifiers: Vec, + }, + PermissionMarkerInvalid { + marker: String, + }, +} +``` + +### Storage & Queries + +```rust +pub struct ParseFailureLog { + db: DatabaseConnection, +} + +impl ParseFailureLog { + pub async fn log(&self, failure: ParseFailure) -> Result<()> { + // Insert into parse_failures table + } + + /// What unknown verbs are agents trying to use? + pub async fn unknown_verbs_frequency( + &self, + since: DateTime, + ) -> Result)>> { + // Returns: (attempted_verb, count, example_contexts) + } + + /// Failures grouped by model + pub async fn by_model( + &self, + model: &str, + limit: usize, + ) -> Result> { + } + + /// Most common failure patterns + pub async fn common_patterns( + &self, + since: DateTime, + ) -> Result> { + } + + /// Suggest new aliases based on failed attempts + pub async fn suggest_aliases(&self) -> Result> { + // Find unknown verbs that: + // 1. Occur frequently + // 2. Have high semantic similarity to existing verbs + // 3. Aren't already aliases + } +} + +pub struct AliasCandidate { + pub new_alias: String, + pub target_verb: String, + pub occurrence_count: usize, + pub confidence: f32, + pub example_usages: Vec, +} + +pub struct FailurePattern { + pub pattern_type: ParseFailureType, + pub count: usize, + pub affected_models: Vec, + pub suggested_fix: Option, +} +``` + +### Auto-Suggestion Pipeline + +```rust +pub async fn analyze_and_suggest( + log: &ParseFailureLog, + matcher: &VerbMatcher, + min_occurrences: usize, + min_confidence: f32, +) -> Result> { + let unknown_verbs = log.unknown_verbs_frequency( + Utc::now() - Duration::days(7) + ).await?; + + let mut candidates = Vec::new(); + + for (attempted, count, examples) in unknown_verbs { + if count < min_occurrences { + continue; + } + + // Try semantic matching + if let Some(ref semantic) = matcher.semantic { + let similar = semantic.find_similar(&attempted).await?; + if let Some((verb, confidence)) = similar.first() { + if *confidence >= min_confidence { + candidates.push(AliasCandidate { + new_alias: attempted, + target_verb: verb.clone(), + occurrence_count: count, + confidence: *confidence, + example_usages: examples, + }); + } + } + } + } + + // Sort by potential impact (count * confidence) + candidates.sort_by(|a, b| { + let score_a = a.occurrence_count as f32 * a.confidence; + let score_b = b.occurrence_count as f32 * b.confidence; + score_b.partial_cmp(&score_a).unwrap() + }); + + Ok(candidates) +} +``` + +--- + +## Diff/Patch Support + +Using the `similar` crate for unified diff parsing and application: + +```rust +use similar::{TextDiff, ChangeTag}; + +pub struct DiffPatch { + pub hunks: Vec, +} + +pub struct Hunk { + pub old_start: usize, + pub old_count: usize, + pub new_start: usize, + pub new_count: usize, + pub changes: Vec, +} + +pub enum Change { + Context(String), + Delete(String), + Insert(String), +} + +impl DiffPatch { + /// Parse unified diff format + pub fn parse(patch_text: &str) -> Result { + let mut hunks = Vec::new(); + let mut lines = patch_text.lines().peekable(); + + while let Some(line) = lines.next() { + // Skip header lines + if line.starts_with("---") || line.starts_with("+++") { + continue; + } + + // Parse hunk header: @@ -start,count +start,count @@ + if line.starts_with("@@") { + let hunk = Self::parse_hunk(line, &mut lines)?; + hunks.push(hunk); + } + } + + Ok(Self { hunks }) + } + + fn parse_hunk( + header: &str, + lines: &mut std::iter::Peekable, + ) -> Result { + // Parse @@ -old_start,old_count +new_start,new_count @@ + let re = regex::Regex::new(r"@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@").unwrap(); + let caps = re.captures(header) + .ok_or(PatchParseError::InvalidHunkHeader)?; + + let old_start: usize = caps[1].parse()?; + let old_count: usize = caps.get(2) + .map(|m| m.as_str().parse().unwrap_or(1)) + .unwrap_or(1); + let new_start: usize = caps[3].parse()?; + let new_count: usize = caps.get(4) + .map(|m| m.as_str().parse().unwrap_or(1)) + .unwrap_or(1); + + let mut changes = Vec::new(); + + while let Some(line) = lines.peek() { + if line.starts_with("@@") || line.starts_with("---") || line.starts_with("+++") { + break; + } + + let line = lines.next().unwrap(); + + if let Some(content) = line.strip_prefix('-') { + changes.push(Change::Delete(content.to_string())); + } else if let Some(content) = line.strip_prefix('+') { + changes.push(Change::Insert(content.to_string())); + } else if let Some(content) = line.strip_prefix(' ') { + changes.push(Change::Context(content.to_string())); + } else if !line.is_empty() { + // Treat as context if no prefix + changes.push(Change::Context(line.to_string())); + } + } + + Ok(Hunk { + old_start, + old_count, + new_start, + new_count, + changes, + }) + } + + /// Apply patch to original text + pub fn apply(&self, original: &str) -> Result { + let mut lines: Vec<&str> = original.lines().collect(); + + // Apply hunks in reverse order to preserve line numbers + for hunk in self.hunks.iter().rev() { + let start = hunk.old_start.saturating_sub(1); // Convert to 0-indexed + + // Verify context matches (fuzzy - allow some drift) + // ... + + // Remove old lines and insert new ones + let mut new_lines: Vec = Vec::new(); + let mut old_idx = 0; + + for change in &hunk.changes { + match change { + Change::Context(s) => { + new_lines.push(s.clone()); + old_idx += 1; + } + Change::Delete(_) => { + old_idx += 1; + } + Change::Insert(s) => { + new_lines.push(s.clone()); + } + } + } + + // Replace the range in original + let end = start + hunk.old_count; + let before: Vec<&str> = lines[..start].to_vec(); + let after: Vec<&str> = lines[end.min(lines.len())..].to_vec(); + + lines = before.into_iter() + .chain(new_lines.iter().map(|s| s.as_str())) + .chain(after) + .collect(); + } + + Ok(lines.join("\n")) + } +} + +/// Create a diff between two strings +pub fn create_diff(original: &str, modified: &str) -> String { + let diff = TextDiff::from_lines(original, modified); + + let mut output = String::new(); + + for hunk in diff.unified_diff().iter_hunks() { + output.push_str(&format!("{}", hunk)); + } + + output +} +``` + +--- + +## Action Detection & Execution + +### Detection Rules + +Actions are detected anywhere in agent output, but only executed under specific conditions. + +```rust +pub struct DetectedAction { + pub action: ParsedAction, + pub span: Span, // position in message + pub is_trailing: bool, // no non-whitespace after + pub has_do_marker: bool, // prefixed with /do +} + +pub struct ActionDetector { + sigil: char, // '/' + do_marker: &'static str, // "do" +} + +impl ActionDetector { + pub fn detect_all(&self, message: &str) -> Vec { + // Find all /verb patterns in message + // Mark each with position, trailing status, /do presence + } + + pub fn executable_actions(&self, message: &str) -> Vec { + self.detect_all(message) + .into_iter() + .filter(|a| should_execute(a, message)) + .map(|a| a.action) + .collect() + } +} +``` + +### Execution Decision + +```rust +pub fn should_execute( + action: &DetectedAction, + message: &str, +) -> bool { + // Must be trailing or explicitly marked with /do + if !action.is_trailing && !action.has_do_marker { + return false; + } + + // Check for reconsideration in text after the action + let text_after = &message[action.span.end..]; + if has_reconsideration(text_after) { + return false; + } + + true +} + +fn has_reconsideration(text: &str) -> bool { + const PATTERNS: &[&str] = &[ + "actually", + "wait", + "never mind", + "nevermind", + "scratch that", + "ignore that", + "don't need", + "won't need", + "shouldn't", + "hold on", + "let me think", + "on second thought", + "no,", + "nope", + "nah", + "forget that", + "disregard", + "not necessary", + "don't bother", + ]; + + let lower = text.to_lowercase(); + PATTERNS.iter().any(|p| lower.contains(p)) +} +``` + +### Examples + +``` +# EXECUTED - trailing action, no reconsideration +Let me check what we discussed. +/recall project deadlines + +# NOT EXECUTED - reconsideration after +Let me check that. +/recall project deadlines +Actually wait, you just told me. Never mind. + +# EXECUTED - explicit /do marker +/do /recall project deadlines +I'll keep talking while that runs. + +# NOT EXECUTED - mid-message, no marker +I could /recall the notes but let's think first... + +# NOT EXECUTED - /do but then reconsideration +/do /recall project deadlines +Hmm, actually scratch that, wrong label. + +# EXECUTED - multiple trailing actions +Let me gather some context. +/recall project notes +/search conversations about deadline +``` + +### Current vs Future Streaming Behavior + +**Current (no streaming):** +- Full message available before any execution +- Detect all actions, filter by rules, execute survivors +- Simple and safe + +**Future (with streaming):** +- Partial responses shown to user as they generate +- Execution still waits for message completion by default +- Exception: `/do` + safe action subset + token window + +```rust +pub enum ExecutionTiming { + /// Wait for full message (default, always safe) + PostCompletion, + + /// Stream-execute with safeguards (opt-in, restricted) + Streaming { + /// Additional tokens to wait after action for reconsideration + confirmation_window: usize, // e.g., 50 tokens + /// Only these action types allowed + allowed_verbs: HashSet, + }, +} + +/// Actions safe for stream-execution (no side effects, cancellable) +pub fn streaming_safe_verbs() -> HashSet { + hashset! { + Verb::Recall, // read-only recall (not insert/delete) + Verb::Search, + Verb::Calc, + } +} + +/// Full stream-execution rules (future) +pub async fn maybe_stream_execute( + action: &DetectedAction, + token_stream: &mut TokenStream, + timing: ExecutionTiming, +) -> Option { + match timing { + ExecutionTiming::PostCompletion => None, // handled elsewhere + + ExecutionTiming::Streaming { confirmation_window, allowed_verbs } => { + // Must have explicit /do marker + if !action.has_do_marker { + return None; + } + + // Must be in safe subset + if !allowed_verbs.contains(&action.action.verb) { + return None; + } + + // Must be read-only variant of the verb + if !action.action.intent.is_read_only() { + return None; + } + + // Wait for confirmation window + let mut tokens_seen = 0; + while tokens_seen < confirmation_window { + match token_stream.try_next().await { + Some(token) => { + if has_reconsideration(&token) { + return None; // cancelled + } + tokens_seen += 1; + } + None => break, // stream ended + } + } + + // Window passed, no reconsideration - execute + Some(execute(action.action.clone()).await) + } + } +} +``` + +### Integration with Message Flow + +``` +Agent generates response + │ + ▼ +┌───────────────────────────────────────┐ +│ Full message received │ +│ (or streamed to completion) │ +└───────────────────┬───────────────────┘ + │ + ▼ +┌───────────────────────────────────────┐ +│ ActionDetector.detect_all() │ +│ Find all /verb patterns │ +└───────────────────┬───────────────────┘ + │ + ▼ +┌───────────────────────────────────────┐ +│ Filter: trailing OR /do marked │ +│ Filter: no reconsideration after │ +└───────────────────┬───────────────────┘ + │ + ▼ +┌───────────────────────────────────────┐ +│ Parse surviving actions │ +│ (fuzzy verb match, arg extraction) │ +└───────────────────┬───────────────────┘ + │ + ▼ +┌───────────────────────────────────────┐ +│ Permission check │ +│ (implicit + explicit markers) │ +└───────────────────┬───────────────────┘ + │ + ┌───────────┴───────────┐ + ▼ ▼ + Permitted Needs approval + │ │ + ▼ ▼ + Execute via Queue for + existing tools authority + │ │ + ▼ ▼ + Format result Wait for + for agent decision + │ │ + └───────────┬───────────┘ + │ + ▼ +┌───────────────────────────────────────┐ +│ Return results to agent │ +│ (or error/permission denied msg) │ +└───────────────────────────────────────┘ +``` + +--- + +## Crate Dependencies + +```toml +[dependencies] +# Fuzzy string matching +strsim = "0.10" # Levenshtein, Jaro-Winkler, etc. + +# Phonetic matching (optional) +phonetics = "0.1" # Soundex, Metaphone + +# Diff/patch +similar = "2.0" # Text diffing + +# Regex for parsing +regex = "1.0" + +# Async trait +async-trait = "0.1" +``` diff --git a/docs/refactoring/v2-memory-system.md b/docs/refactoring/v2-memory-system.md new file mode 100644 index 0000000..aa9f5ea --- /dev/null +++ b/docs/refactoring/v2-memory-system.md @@ -0,0 +1,1408 @@ +# Pattern v2: Memory System Design + +## Research Summary + +### Loro CRDT Library + +Loro is a Rust-native CRDT library with first-class support for: + +**Data Types:** +- `LoroText` - Rich text with Fugue algorithm (minimizes interleaving) +- `LoroList` - Ordered list with move support +- `LoroMovableList` - List where items can be moved +- `LoroMap` - Last-write-wins map +- `LoroTree` - Hierarchical tree with move support +- `LoroCounter` - Numeric counter + +**Key Features:** +- **Snapshot + Updates model** - Full snapshots for periodic saves, delta updates for frequent saves +- **Time Travel** - `doc.checkout(frontiers)` jumps to any version +- **Version Vectors** - Track what each peer has seen +- **Shallow Snapshots** - Like git shallow clone, archive old history +- **JSONPath queries** - Query document structure +- **Built-in checksums** - Validates imports, rejects corrupted data + +**Persistence Pattern:** +```rust +// Periodic: export full snapshot +let snapshot = doc.export(ExportMode::Snapshot); +store_to_db(snapshot); + +// Frequent: export delta updates +let updates = doc.export(ExportMode::Updates { from: last_version }); +append_to_log(updates); + +// Load: import snapshot + all updates +let doc = LoroDoc::new(); +doc.import(snapshot); +for update in updates { + doc.import(update); +} +``` + +**Detached State Consideration:** +When you `checkout()` to a historical version, the document becomes read-only. Must call `attach()` to return to editing. This is intentional - you're viewing history, not rewriting it. + +### Letta Memory Blocks + +Letta's memory system uses named blocks with clear semantics: + +**Block Structure:** +- `label` - Unique identifier (e.g., "human", "persona", "organization") +- `description` - Crucial for LLM to understand purpose +- `value` - The actual content +- `limit` - Character limit +- `read_only` - Whether agent can modify + +**Key Design Principles:** +1. **Description is critical** - The LLM uses the description to understand what to store +2. **Labels are semantic** - "human", "persona", "scratchpad", "organization" +3. **Blocks are shareable** - Multiple agents can attach to the same block +4. **Always in context** - No retrieval needed, blocks are always visible +5. **Agent-managed** - Agents autonomously organize based on labels + +**Default Descriptions:** +- `persona`: "Stores details about your current persona, guiding how you behave and respond" +- `human`: "Stores key details about the person you are conversing with" + +**Use Cases from Letta:** +- Tool usage guidelines (avoid past mistakes) +- Working memory / scratchpad +- Mirror external state (user's current document) +- Read-only policies shared across agents +- Multi-agent coordination (watch subagent result blocks) +- Emergent behaviour (`performance_tracking`, `emotional_state`) + +### sqlite-vec + +Vector search extension for SQLite: + +**Features:** +- Virtual table: `CREATE VIRTUAL TABLE vec_items USING vec0(embedding float[384])` +- KNN queries with `match` and `order by distance` +- Supports float, int8, and binary vectors +- Metadata columns, partition keys, auxiliary columns +- Pure C, no dependencies, runs anywhere SQLite runs + +**Usage:** +```sql +CREATE VIRTUAL TABLE vec_memories USING vec0( + embedding float[384], + +memory_id TEXT, -- metadata column + +block_label TEXT -- for filtering +); + +-- KNN query +SELECT memory_id, distance +FROM vec_memories +WHERE embedding MATCH ? + AND block_label = 'archival' +ORDER BY distance +LIMIT 10; +``` + +--- + +## Existing v1 Tool Structure (Preserving) + +The current tools are well-designed and should be preserved with minimal changes: + +### `context` Tool +Manages **Core** and **Working** memory - blocks always/usually in context: +- `append` - Add content to a block +- `replace` - Find and replace content within a block +- `archive` - Move working memory to archival (frees context space) +- `load` - Bring archival memory into working memory +- `swap` - Exchange working and archival blocks + +### `recall` Tool +Manages **Archival** memory - long-term storage not in context: +- `insert` - Create new archival entry +- `append` - Add to existing archival entry +- `read` - Retrieve by label +- `delete` - Remove archival entry + +### `search` Tool +Unified search across domains: +- `archival_memory` - Search archival blocks +- `conversations` - Search message history +- `constellation_messages` - Search across all agents in constellation +- `all` - Search everything + +**Key insight:** The tool interface is solid. What needs to change is the storage backend and the isolation model. + +### What to Preserve +- Tool names and basic operations +- Core/Working/Archival distinction +- Search across multiple domains +- Permission system (ACL checks) + +### What to Change/Improve +- **Storage backend** - SQLite + Loro instead of SurrealDB + DashMap +- **Ownership model** - Agent-scoped instead of User-scoped +- **Versioning** - Loro gives us history/rollback for free +- **Descriptions** - Add Letta-style descriptions to guide LLM usage +- **Templates** - Structured schemas for common block patterns +- **Rolling logs** - System-maintained logs the agent doesn't manage +- **Archival entries** - Separate table from blocks for fine-grained storage +- **Search** - sqlite-vec for vectors, FTS5 for full-text (replacing SurrealDB's BM25) + +--- + +## v2 Memory Architecture + +### Core Principles + +1. **Loro documents as memory blocks** - Each memory block is a Loro document +2. **Agent-scoped, not user-scoped** - Memories belong to agents, not users (KEY CHANGE from v1) +3. **Labels with semantic descriptions** - Following Letta's pattern +4. **Versioned by default** - Every change tracked via Loro +5. **No in-memory cache** - SQLite + Loro documents are the source of truth +6. **Preserve tool interface** - Same tools, new backend + +### Memory Block Types + +```rust +pub enum MemoryBlockType { + /// Always in context, critical for agent identity + /// Examples: persona, human, system guidelines + Core, + + /// Working memory, can be swapped in/out based on relevance + /// Examples: scratchpad, current_task, session_notes + Working, + + /// Long-term storage, NOT in context by default + /// Retrieved via recall/search tools using semantic search + /// Examples: past conversations, learned facts, reference material + Archival, + + /// System-maintained logs (read-only to agent) + /// Recent entries shown in context, older entries searchable + /// Examples: tool_execution_log, event_log, compression_summaries + Log, +} +``` + +### Context Inclusion Model + +**Always in context:** +- `Core` blocks - Always included, agent identity depends on these +- `Log` blocks - Recent N entries included (configurable) + +**Available via tool call:** +- `Archival` blocks - Agent uses `recall` or `search` tools to retrieve +- `Working` blocks marked as swapped out + +**The key insight:** Core memory is the agent's "working memory" that's always visible. Archival memory is "long-term storage" that requires explicit retrieval. This mirrors how MemGPT/Letta works - the agent has limited context but can search its full history. + +``` +┌─────────────────────────────────────────────────────────┐ +│ Context Window │ +├─────────────────────────────────────────────────────────┤ +│ System Prompt │ +├─────────────────────────────────────────────────────────┤ +│ Core Memory Blocks (always present) │ +│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ +│ │ persona │ │ human │ │ guidelines │ │ +│ └─────────────┘ └─────────────┘ └─────────────┘ │ +├─────────────────────────────────────────────────────────┤ +│ Working Memory (if active) │ +│ ┌─────────────┐ ┌─────────────┐ │ +│ │ scratchpad │ │current_task │ │ +│ └─────────────┘ └─────────────┘ │ +├─────────────────────────────────────────────────────────┤ +│ Recent Log Entries (last N) │ +│ - tool call: read_file("/src/main.rs") → success │ +│ - tool call: search("auth") → 3 results │ +├─────────────────────────────────────────────────────────┤ +│ Conversation Messages │ +│ (with compression/summarization as needed) │ +└─────────────────────────────────────────────────────────┘ + +┌─────────────────────────────────────────────────────────┐ +│ Archival Memory (via tool access) │ +│ ┌─────────────────────────────────────────────────┐ │ +│ │ Searchable via: recall(query), search(query) │ │ +│ │ - Past conversation summaries │ │ +│ │ - Learned facts about partner │ │ +│ │ - Reference documentation │ │ +│ │ - Old log entries │ │ +│ └─────────────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────────────┘ +``` + +### Block Schema + +```rust +pub struct MemoryBlock { + /// Unique identifier + pub id: MemoryBlockId, + + /// Owning agent (NOT user - this is the key change from v1) + pub agent_id: AgentId, + + /// Semantic label: "persona", "human", "scratchpad", etc. + pub label: String, + + /// Description for the LLM (critical for proper usage) + pub description: String, + + /// Block type determines context inclusion behavior + pub block_type: MemoryBlockType, + + /// Character limit for the block + pub limit: usize, + + /// Whether the agent can modify this block + pub read_only: bool, + + /// The Loro document containing the block content + /// Stored as snapshot blob in SQLite + pub document: LoroDoc, + + /// Embedding for semantic search (archival blocks) + pub embedding: Option>, + + /// Creation timestamp + pub created_at: DateTime, + + /// Last modified timestamp + pub updated_at: DateTime, +} +``` + +### Loro Document Structure for Blocks + +Each memory block's `LoroDoc` contains: + +```rust +// For simple text blocks (persona, human) +doc.get_text("content") + +// For structured blocks (could support complex data) +doc.get_map("metadata") +doc.get_text("content") +doc.get_list("entries") // for log-style blocks +``` + +### Rolling Logs (System-Maintained) + +New concept: blocks that the system appends to, agent observes but doesn't manage: + +```rust +pub struct RollingLog { + /// Uses LoroList under the hood + entries: LoroList, + + /// Max entries before oldest are pruned + max_entries: usize, + + /// Entry schema + entry_type: LogEntryType, +} + +pub enum LogEntryType { + /// Tool calls and results + ToolExecution { tool: String, result: String, timestamp: DateTime }, + + /// Significant events + Event { event_type: String, details: String, timestamp: DateTime }, + + /// Summaries from compression + ContextSummary { summary: String, messages_summarized: usize, timestamp: DateTime }, +} +``` + +The agent sees a read-only view of recent entries in context, but doesn't have to manage the log. + +### Templated Memory Blocks + +Pre-defined block schemas for common patterns: + +```rust +pub enum BlockTemplate { + /// Agent's personality and behavior + Persona { + name: String, + traits: Vec, + style: String, + guidelines: String, + }, + + /// Information about the human + Human { + name: Option, + preferences: Vec, + context: String, + }, + + /// Working scratchpad + Scratchpad { + current_task: Option, + notes: String, + }, + + /// Shared organizational info (read-only) + Organization { + name: String, + policies: String, + }, + + /// Custom free-form + Custom { + schema: Option, + }, +} +``` + +Templates generate the description automatically and can validate content. + +### Versioning & Rollback + +Thanks to Loro, every memory block has full history: + +```rust +impl MemoryBlock { + /// Get all versions of this block + pub fn get_history(&self) -> Vec { + let changes = self.document.getAllChanges(); + // Convert to version summaries + } + + /// View block at a specific version (read-only) + pub fn checkout(&self, version: Frontiers) -> MemoryBlockView { + let mut doc = self.document.clone(); + doc.checkout(version); + MemoryBlockView { doc } + } + + /// Roll back to a previous version (creates new change) + pub fn rollback_to(&mut self, version: Frontiers) { + let old_content = self.checkout(version).content(); + self.document.attach(); // Return to head + // Set content to old value - this creates a new change + self.document.get_text("content").delete(0, current_len); + self.document.get_text("content").insert(0, &old_content); + } +} +``` + +### Shared Blocks Between Agents + +**Key distinction from v1:** Sharing is now *explicit and intentional*, not accidental. + +v1 problem: Memories owned by User, so any agent in the constellation could accidentally overwrite another agent's "persona" block if they used the same label. + +v2 solution: Memories owned by Agent by default. Sharing requires explicit attachment. + +```rust +/// A memory block can be shared with other agents via explicit attachment +pub struct SharedBlockAttachment { + /// The block being shared (has a primary owner agent) + pub block_id: MemoryBlockId, + + /// Agent gaining access (not the owner) + pub agent_id: AgentId, + + /// What this agent can do with the block + pub access: SharedAccess, + + /// When the attachment was created + pub attached_at: DateTime, +} + +pub enum SharedAccess { + /// Can read but not modify + ReadOnly, + /// Can append but not overwrite + AppendOnly, + /// Full read/write access + ReadWrite, +} +``` + +**How sharing works:** + +```rust +// Agent A creates a block (A owns it) +let block = memory_store.create_block( + &agent_a_id, + "shared_task_board", + "Shared task tracking for the constellation", + MemoryBlockType::Working, + "## Tasks\n- [ ] Initial task", +).await?; + +// Explicitly share with Agent B (read-write) +memory_store.share_block( + &block.id, + &agent_b_id, + SharedAccess::ReadWrite, +).await?; + +// Share with Agent C (read-only) +memory_store.share_block( + &block.id, + &agent_c_id, + SharedAccess::ReadOnly, +).await?; +``` + +**Constellation-level shared blocks:** + +For blocks that should be visible to ALL agents in a constellation: + +```rust +/// Special owner_id indicating constellation-level ownership +pub const CONSTELLATION_OWNER: &str = "_constellation_"; + +// Create a constellation-wide block +let org_block = memory_store.create_constellation_block( + "organization", + "Read-only information about the organization and its policies", + MemoryBlockType::Core, + "Organization: Pattern Project\nPolicies: Be helpful, be honest...", + SharedAccess::ReadOnly, // All agents get this access level +).await?; +``` + +**Use cases:** +- `organization` - Read-only policies shared across all agents +- `partner_profile` - Info about the human, all agents can read, one can write +- `task_board` - Shared task tracking, multiple agents can write +- `handoff_notes` - Agent A writes context for Agent B during handoffs + +**What this prevents:** +- Agent A's "persona" accidentally overwriting Agent B's "persona" (different blocks now) +- New agents inheriting memories they shouldn't have +- Confusion about which agent's data is which + +--- + +## SQLite Schema + +```sql +-- Memory blocks table +CREATE TABLE memory_blocks ( + id TEXT PRIMARY KEY, + agent_id TEXT NOT NULL, + label TEXT NOT NULL, + description TEXT NOT NULL, + block_type TEXT NOT NULL, -- 'core', 'working', 'archival', 'log' + char_limit INTEGER NOT NULL DEFAULT 5000, + read_only INTEGER NOT NULL DEFAULT 0, + + -- Loro document stored as blob + loro_snapshot BLOB NOT NULL, + + -- For quick content access without deserializing Loro + content_preview TEXT, -- First N chars + + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL, + + UNIQUE(agent_id, label), + FOREIGN KEY (agent_id) REFERENCES agents(id) +); + +-- Pending updates (between snapshots) +CREATE TABLE memory_block_updates ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + block_id TEXT NOT NULL, + loro_update BLOB NOT NULL, + created_at TEXT NOT NULL, + FOREIGN KEY (block_id) REFERENCES memory_blocks(id) +); + +-- Vector search for archival blocks (via sqlite-vec) +CREATE VIRTUAL TABLE memory_embeddings USING vec0( + embedding float[384], + +block_id TEXT, + +content_hash TEXT +); + +-- Shared block attachments +CREATE TABLE shared_block_agents ( + block_id TEXT NOT NULL, + agent_id TEXT NOT NULL, + write_access INTEGER NOT NULL DEFAULT 0, + attached_at TEXT NOT NULL, + PRIMARY KEY (block_id, agent_id), + FOREIGN KEY (block_id) REFERENCES memory_blocks(id), + FOREIGN KEY (agent_id) REFERENCES agents(id) +); + +-- Block history metadata (for UI, not full Loro history) +CREATE TABLE memory_block_history ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + block_id TEXT NOT NULL, + version_frontiers TEXT NOT NULL, -- JSON serialized + change_summary TEXT, + changed_by TEXT, -- 'agent', 'user', 'system' + timestamp TEXT NOT NULL, + FOREIGN KEY (block_id) REFERENCES memory_blocks(id) +); +``` + +--- + +## Memory Operations API + +```rust +pub trait MemoryStore { + /// Create a new memory block for an agent + async fn create_block( + &self, + agent_id: &AgentId, + label: &str, + description: &str, + block_type: MemoryBlockType, + initial_content: &str, + ) -> Result; + + /// Get a block by agent and label + async fn get_block( + &self, + agent_id: &AgentId, + label: &str, + ) -> Result>; + + /// Update block content (creates new Loro change) + async fn update_block_content( + &self, + block_id: &MemoryBlockId, + new_content: &str, + changed_by: &str, + ) -> Result<()>; + + /// Get all blocks for an agent + async fn list_agent_blocks( + &self, + agent_id: &AgentId, + ) -> Result>; + + /// Semantic search across archival blocks + async fn search_archival( + &self, + agent_id: &AgentId, + query_embedding: &[f32], + limit: usize, + ) -> Result>; + + /// Get block history + async fn get_block_history( + &self, + block_id: &MemoryBlockId, + limit: usize, + ) -> Result>; + + /// Rollback block to previous version + async fn rollback_block( + &self, + block_id: &MemoryBlockId, + version: Frontiers, + ) -> Result<()>; + + /// Attach shared block to agent + async fn attach_shared_block( + &self, + block_id: &MemoryBlockId, + agent_id: &AgentId, + write_access: bool, + ) -> Result<()>; + + /// Insert content into archival memory + async fn archival_insert( + &self, + agent_id: &AgentId, + content: &str, + metadata: Option, + ) -> Result; +} + +pub struct ArchivalSearchResult { + pub entry_id: ArchivalEntryId, + pub content: String, + pub metadata: Option, + pub relevance_score: f32, + pub created_at: DateTime, +} +``` + +--- + +## Agent Memory Tools + +The agent interacts with memory through built-in tools: + +### Core Memory Tools + +```rust +/// Update a core memory block +/// Only works on Core/Working blocks the agent has write access to +pub struct CoreMemoryUpdate { + /// Which block to update: "persona", "human", etc. + pub label: String, + /// New content (replaces existing) + pub content: String, +} + +/// Append to a core memory block (useful for incremental updates) +pub struct CoreMemoryAppend { + pub label: String, + pub content: String, +} +``` + +### Archival Memory Tools + +```rust +/// Search archival memory using semantic similarity +/// Returns relevant entries from long-term storage +pub struct ArchivalSearch { + /// Natural language query + pub query: String, + /// Max results to return + pub limit: Option, +} + +/// Insert new content into archival memory +/// Use for facts worth remembering long-term +pub struct ArchivalInsert { + /// Content to store + pub content: String, + /// Optional structured metadata + pub metadata: Option, +} +``` + +### Recall Tool + +```rust +/// Recall searches BOTH conversation history AND archival memory +/// More comprehensive than archival_search alone +pub struct Recall { + /// Natural language query + pub query: String, + /// Max results + pub limit: Option, +} +``` + +### Tool Descriptions for LLM + +These descriptions help the agent understand when to use each tool: + +``` +core_memory_update: Update your core memory blocks (persona, human, etc.). +Use this to modify persistent information about yourself or the person +you're talking to. Changes are immediately reflected in your context. + +core_memory_append: Add content to an existing core memory block without +replacing it. Useful for incrementally building up information. + +archival_search: Search your long-term archival memory using semantic +similarity. Use this when you need to recall specific facts, past +conversations, or reference material that isn't in your current context. + +archival_insert: Save important information to your archival memory for +future reference. Use this for facts worth remembering that don't belong +in core memory. Good for: learned preferences, important events, +reference material. + +recall: Comprehensive search across your conversation history AND archival +memory. Use this when you're trying to remember something but aren't sure +if it was in a conversation or saved to archival. +``` + +--- + +## Context Building + +When building context for an LLM request: + +```rust +pub struct ContextBuilder { + agent_id: AgentId, + memory_store: Arc, +} + +impl ContextBuilder { + pub async fn build_memory_section(&self) -> Result { + let blocks = self.memory_store.list_agent_blocks(&self.agent_id).await?; + + let mut sections = Vec::new(); + + for block in blocks { + // Skip archival blocks (retrieved on demand) + if block.block_type == MemoryBlockType::Archival { + continue; + } + + let content = block.document.get_text("content").to_string(); + + // Format with label and description for the LLM + sections.push(format!( + "<{}>\n{}\n\n{}\n", + block.label, + block.description, + content, + block.label + )); + } + + Ok(sections.join("\n\n")) + } +} +``` + +--- + +## Migration from v1 + +Memory blocks migrate via CAR export: + +```rust +// v1 export includes: +// - Raw content +// - Label +// - MemoryType (maps to block_type) +// - Pinned status (maps to Core block_type) +// - Permission (maps to read_only) + +// v2 import creates: +// - New Loro document with content +// - Sets agent_id based on import context +// - Generates description from label if missing +``` + +--- + +--- + +## Constellation Resources + +Beyond agent-owned memory blocks, Pattern manages shared resources at the constellation level. This is a key differentiator from systems like Letta which focus on single-agent memory. + +### Resource Types + +```rust +pub enum ConstellationResource { + /// Shared memory blocks (explicit sharing, has an owner) + SharedMemory { + block_id: MemoryBlockId, + owner: AgentId, + access_policy: AccessPolicy, + }, + + /// Folders of files with automatic embedding + Folder { + id: FolderId, + name: String, + description: String, + embedding_config: EmbeddingConfig, + }, + + /// Activity stream - system-maintained, read-only to agents + ActivityStream { + context_window: usize, // recent events shown in context + retention: Duration, // full history searchable + }, + + /// Cross-agent shared context (summaries, notable events) + SharedContext { + summaries: Vec, + summary_interval: Duration, + max_summaries: usize, + }, + + /// Coordination state - explicit shared mutable state + Coordination { + state: CoordinationState, + }, + + /// External data sources (bluesky, discord, etc.) + DataSource { + id: DataSourceId, + source_type: String, + }, +} +``` + +### Why Differentiate From Memory Blocks? + +v1 presented the activity log as a memory block, which confused agents - it *looked* like something they should edit but wasn't meant to be edited. The same problem applies to shared summaries, activity streams, etc. + +Clear differentiation: +- **Memory blocks** - Agent-owned, agent-editable (with permissions) +- **Constellation resources** - System-managed, agents consume/observe + +This reduces cognitive load on agents while giving them access to rich shared context. + +--- + +## Folders (File Access) + +Inspired by Letta's filesystem, but integrated with Pattern's multi-agent model. + +### Folder Structure + +```rust +pub struct Folder { + pub id: FolderId, + pub name: String, + pub description: String, // helps agents understand what's in it + pub path: FolderPath, // local filesystem or virtual + pub embedding_config: EmbeddingConfig, + pub created_at: DateTime, +} + +pub enum FolderPath { + /// Local filesystem path + Local(PathBuf), + /// Virtual folder (files stored in DB) + Virtual, + /// Remote (future: S3, etc.) + Remote { url: String, credentials: String }, +} + +pub struct FolderFile { + pub id: FileId, + pub folder_id: FolderId, + pub name: String, + pub content_type: String, + pub size_bytes: u64, + pub uploaded_at: DateTime, + pub indexed_at: Option>, +} + +pub struct FilePassage { + pub id: PassageId, + pub file_id: FileId, + pub content: String, + pub start_line: usize, + pub end_line: usize, + pub embedding: Vec, +} +``` + +### Folder Attachment + +When a folder is attached to an agent, they gain access to file tools: + +```rust +pub struct FolderAttachment { + pub folder_id: FolderId, + pub agent_id: AgentId, + pub access: FolderAccess, + pub attached_at: DateTime, +} + +pub enum FolderAccess { + Read, // can open, grep, search + ReadWrite, // can also upload, modify +} +``` + +Tools automatically available when folders are attached: +- `/open ` - Open file, show window in context +- `/read [lines]` - Read specific lines +- `/grep [path]` - Regex search +- `/search ` - Semantic search via embeddings + +Multiple agents can attach to the same folder with different access levels. + +### File Windowing + +Large files aren't dumped into context. Instead, a "window" is shown: + +```rust +pub struct FileWindow { + pub file_id: FileId, + pub start_line: usize, + pub end_line: usize, + pub content: String, + pub has_more_before: bool, + pub has_more_after: bool, +} +``` + +Agent can navigate: `/read file.rs lines 50-100` or `/read file.rs next` to scroll. + +### Future: Virtual Shell + +For more complex file operations, a sandboxed virtual shell could provide composable commands: + +``` +/sh ls project_docs/ +/sh grep -r "TODO" . | head -20 +/sh find . -name "*.rs" | wc -l +``` + +This is deferred for now but the folder/file infrastructure supports it. + +--- + +## Activity Stream + +System-maintained log of constellation activity. Agents observe but don't manage. + +### Event Types + +```rust +pub struct ActivityEvent { + pub id: ActivityEventId, + pub timestamp: DateTime, + pub agent_id: Option, // None = system event + pub event_type: ActivityEventType, + pub details: serde_json::Value, +} + +pub enum ActivityEventType { + // Agent lifecycle + AgentActivated, + AgentDeactivated, + + // Communication + MessageReceived { source: String, summary: String }, + MessageSent { target: String, summary: String }, + + // Tool usage + ToolExecuted { tool: String, success: bool }, + + // Memory changes + MemoryBlockUpdated { label: String, change_type: String }, + + // File access + FileAccessed { folder: String, path: String, operation: String }, + + // Coordination + TaskAssigned { task: String, assignee: AgentId }, + HandoffInitiated { from: AgentId, to: AgentId }, + + // Notable events (flagged by agents or system) + Notable { description: String, importance: Importance }, +} +``` + +### Context Inclusion + +Recent activity events are included in agent context: + +``` + +[2 hours ago] Flux responded to Bluesky thread about async Rust +[4 hours ago] Entropy broke down task "refactor memory system" into 5 subtasks +[yesterday] Partner updated persona block with new preferences +[yesterday] Anchor flagged Flux's post for tone review + +``` + +Older events are searchable via `/search activity "keyword"`. + +--- + +## Shared Context + +Cross-agent memory for constellation coherence, especially important for agents activated infrequently. + +### Structure + +```rust +pub struct SharedContext { + /// Per-agent activity summaries + pub agent_summaries: HashMap, + + /// Constellation-wide periodic summaries + pub constellation_summaries: Vec, + + /// Key events worth long-term remembering + pub notable_events: Vec, +} + +pub struct AgentActivitySummary { + pub agent_id: AgentId, + pub agent_name: String, + pub last_active: DateTime, + pub recent_summary: String, // LLM-generated + pub generated_at: DateTime, + pub messages_covered: usize, +} + +pub struct ConstellationSummary { + pub period_start: DateTime, + pub period_end: DateTime, + pub summary: String, + pub key_decisions: Vec, + pub open_threads: Vec, +} + +pub struct NotableEvent { + pub timestamp: DateTime, + pub event_type: String, + pub description: String, + pub agents_involved: Vec, + pub importance: Importance, +} + +pub enum Importance { + Low, + Medium, + High, + Critical, +} +``` + +### Context Building for Returning Agents + +When an agent is activated after a period of inactivity: + +```rust +impl SharedContextManager { + pub async fn build_context_for_agent( + &self, + agent_id: &AgentId, + ) -> Result> { + let last_active = self.db.get_agent_last_active(agent_id).await?; + let time_away = Utc::now() - last_active; + + // Recently active - minimal or no catch-up needed + if time_away < Duration::hours(1) { + return Ok(None); + } + + // Build catch-up context + let mut context = String::new(); + + context.push_str(&format!( + "## Constellation Update (you were last active {})\n\n", + humanize_duration(time_away) + )); + + // What's happened since + let events_since = self.db + .get_activity_events_since(last_active) + .await?; + context.push_str(&self.format_events_summary(&events_since)); + + // Key decisions + let decisions = self.db + .get_notable_events_since(last_active, Importance::Medium) + .await?; + if !decisions.is_empty() { + context.push_str("\n### Key Decisions\n"); + for decision in decisions { + context.push_str(&format!("- {}\n", decision.description)); + } + } + + // Per-agent summaries + context.push_str("\n### Agent Activity\n"); + let summaries = self.db.get_all_agent_summaries().await?; + for summary in summaries { + if summary.agent_id != *agent_id { + context.push_str(&format!( + "**{}** (last active: {})\n{}\n\n", + summary.agent_name, + humanize_time(summary.last_active), + summary.recent_summary + )); + } + } + + Ok(Some(context)) + } +} +``` + +### Example Output + +For an agent waking up after a week: + +``` +## Constellation Update (you were last active 7 days ago) + +### What's Happened +- 47 messages processed across the constellation +- Flux handled 12 Bluesky threads +- 3 tasks completed, 2 new tasks created +- Partner had 2 direct conversations with Entropy + +### Key Decisions +- Partner decided to pause project-X until next month +- New policy: no engagement with political content on Bluesky +- Memory system redesign approved, work started + +### Open Threads +- Partner mentioned wanting to revisit medication adjustments +- Ongoing discussion about v2 architecture + +### Agent Activity +**Flux** (last active: 2 hours ago) +Primarily handling Bluesky engagement. Responded to threads about Rust async +patterns and ADHD coping strategies. Tone was flagged once by Anchor. + +**Entropy** (last active: yesterday) +Task breakdown focus. Created detailed subtasks for pattern v2 refactor. +Helped partner organize project priorities. + +**Anchor** (last active: 3 days ago) +Reviewed 8 of Flux's public posts. Flagged 2 for tone adjustment, both resolved. +No escalations to partner needed. +``` + +### Summary Generation + +Summaries are generated by LLM calls on a schedule: + +```rust +pub struct SharedContextManager { + summarizer: Arc, + db: DatabaseConnection, +} + +impl SharedContextManager { + /// Called periodically or on agent deactivation + pub async fn refresh_agent_summary(&self, agent_id: &AgentId) -> Result<()> { + let since = self.db.get_last_summary_time(agent_id).await?; + let messages = self.db + .get_agent_messages_since(agent_id, since) + .await?; + + if messages.len() < 5 { + return Ok(()); // not enough new activity + } + + let summary = self.summarizer.summarize( + &messages, + "Summarize this agent's recent activity in 2-3 sentences. \ + Focus on what they worked on and any notable outcomes." + ).await?; + + self.db.update_agent_summary(agent_id, &summary).await + } + + /// Called on schedule (daily/weekly) + pub async fn generate_constellation_summary(&self) -> Result<()> { + let agent_summaries = self.db.get_all_agent_summaries().await?; + let notable = self.db.get_recent_notable_events(50).await?; + + let summary = self.summarizer.summarize_constellation( + &agent_summaries, + ¬able, + ).await?; + + self.db.insert_constellation_summary(summary).await + } +} +``` + +--- + +## Coordination State + +Explicit shared mutable state for multi-agent coordination. + +```rust +pub struct CoordinationState { + /// Task assignments + pub tasks: HashMap, + + /// Which agents are currently active + pub active_agents: HashSet, + + /// Handoff notes between agents + pub handoff_notes: HashMap, + + /// Custom fields (constellation-configurable) + pub custom: serde_json::Value, +} + +pub struct TaskAssignment { + pub task_id: TaskId, + pub description: String, + pub assigned_to: Option, + pub status: TaskStatus, + pub created_at: DateTime, + pub updated_at: DateTime, +} + +pub struct HandoffNote { + pub from_agent: AgentId, + pub content: String, + pub created_at: DateTime, +} +``` + +Agents interact via dialect: + +``` +/coord status # view current state +/coord assign "fix bug" to @entropy # assign task +/coord complete task-123 # mark done +/coord handoff @anchor "context..." # leave notes for another agent +/coord note "important observation" # add to shared notes +``` + +--- + +## SQLite Schema Updates + +```sql +-- Folders +CREATE TABLE folders ( + id TEXT PRIMARY KEY, + name TEXT NOT NULL UNIQUE, + description TEXT, + path_type TEXT NOT NULL, -- 'local', 'virtual', 'remote' + path_value TEXT, -- filesystem path or URL + embedding_model TEXT NOT NULL, + created_at TEXT NOT NULL +); + +-- Files within folders +CREATE TABLE folder_files ( + id TEXT PRIMARY KEY, + folder_id TEXT NOT NULL REFERENCES folders(id), + name TEXT NOT NULL, + content_type TEXT, + size_bytes INTEGER, + content BLOB, -- for virtual folders + uploaded_at TEXT NOT NULL, + indexed_at TEXT, + UNIQUE(folder_id, name) +); + +-- File passages (chunks with embeddings) +CREATE TABLE file_passages ( + id TEXT PRIMARY KEY, + file_id TEXT NOT NULL REFERENCES folder_files(id), + content TEXT NOT NULL, + start_line INTEGER, + end_line INTEGER, + created_at TEXT NOT NULL +); + +-- Passage embeddings (sqlite-vec) +CREATE VIRTUAL TABLE file_passage_embeddings USING vec0( + embedding float[384], + +passage_id TEXT, + +file_id TEXT, + +folder_id TEXT +); + +-- Folder attachments to agents +CREATE TABLE folder_attachments ( + folder_id TEXT NOT NULL REFERENCES folders(id), + agent_id TEXT NOT NULL REFERENCES agents(id), + access TEXT NOT NULL, -- 'read', 'read_write' + attached_at TEXT NOT NULL, + PRIMARY KEY (folder_id, agent_id) +); + +-- Activity stream +CREATE TABLE activity_events ( + id TEXT PRIMARY KEY, + timestamp TEXT NOT NULL, + agent_id TEXT REFERENCES agents(id), + event_type TEXT NOT NULL, + details TEXT NOT NULL, -- JSON + importance TEXT +); + +CREATE INDEX idx_activity_timestamp ON activity_events(timestamp); +CREATE INDEX idx_activity_agent ON activity_events(agent_id); + +-- Agent activity summaries +CREATE TABLE agent_summaries ( + agent_id TEXT PRIMARY KEY REFERENCES agents(id), + summary TEXT NOT NULL, + messages_covered INTEGER, + generated_at TEXT NOT NULL, + last_active TEXT NOT NULL +); + +-- Constellation summaries +CREATE TABLE constellation_summaries ( + id TEXT PRIMARY KEY, + period_start TEXT NOT NULL, + period_end TEXT NOT NULL, + summary TEXT NOT NULL, + key_decisions TEXT, -- JSON array + open_threads TEXT, -- JSON array + created_at TEXT NOT NULL +); + +-- Notable events +CREATE TABLE notable_events ( + id TEXT PRIMARY KEY, + timestamp TEXT NOT NULL, + event_type TEXT NOT NULL, + description TEXT NOT NULL, + agents_involved TEXT, -- JSON array of agent IDs + importance TEXT NOT NULL, + created_at TEXT NOT NULL +); + +-- Coordination state (simple key-value for flexibility) +CREATE TABLE coordination_state ( + key TEXT PRIMARY KEY, + value TEXT NOT NULL, -- JSON + updated_at TEXT NOT NULL, + updated_by TEXT -- agent ID or 'system' +); + +-- Task assignments +CREATE TABLE tasks ( + id TEXT PRIMARY KEY, + description TEXT NOT NULL, + assigned_to TEXT REFERENCES agents(id), + status TEXT NOT NULL, + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL +); + +-- Handoff notes +CREATE TABLE handoff_notes ( + id TEXT PRIMARY KEY, + from_agent TEXT NOT NULL REFERENCES agents(id), + to_agent TEXT REFERENCES agents(id), -- NULL = for anyone + content TEXT NOT NULL, + created_at TEXT NOT NULL, + read_at TEXT +); +``` + +--- + +## Open Questions + +1. **Embedding updates** - When block content changes, when do we regenerate embeddings? + - Option A: On every save (expensive) + - Option B: Background job (eventual consistency) + - Option C: On-demand when searching (lazy) + +2. **Loro snapshot frequency** - How often to consolidate updates into snapshots? + - Could be time-based (every N minutes) + - Or change-count based (every N updates) + - Or size-based (when updates exceed N bytes) + +3. **History retention** - How much Loro history to keep? + - Shallow snapshots after N days? + - Full history forever? + - Configurable per constellation? + +4. **Block templates** - Should templates be part of core, or an optional layer? + +5. **Diff visibility to agents** - Should agents see what changed between versions? + - Could add a "recent_changes" read-only block updated by system + +6. **Summary generation costs** - LLM calls for summaries add up. How to balance freshness vs cost? + - Generate on agent deactivation (natural breakpoint) + - Generate on schedule with batching + - Skip if activity below threshold + +7. **Shared context size** - How much catch-up context is too much? + - Configurable per agent role? + - Adaptive based on time away? diff --git a/docs/refactoring/v2-migration-path.md b/docs/refactoring/v2-migration-path.md new file mode 100644 index 0000000..028b453 --- /dev/null +++ b/docs/refactoring/v2-migration-path.md @@ -0,0 +1,783 @@ +# Pattern v2: Migration Path + +## Overview + +Migration from v1 to v2 uses the existing CAR (Content Addressable aRchive) export/import system. The compatibility layer lives at this boundary - v1 exports, v2 imports and transforms. + +## Existing CAR Export Structure + +v1 already has a solid export format: + +### Export Types +- `Agent` - Single agent with messages and memories +- `Group` - Agent group with member references +- `Constellation` - Full constellation with all groups and agents + +### Data Structures + +```rust +// Manifest (root of CAR file) +ExportManifest { + version: u32, // Currently 1 + exported_at: DateTime, + export_type: ExportType, + stats: ExportStats, + data_cid: Cid, // Points to actual export data +} + +// Agent export +AgentRecordExport { + id, name, agent_type, + model_id, model_config, + base_instructions, + // ... config fields ... + owner_id: UserId, // v1 uses user ownership + message_chunks: Vec, + memory_chunks: Vec, +} + +// Message chunk +MessageChunk { + chunk_id: u32, + start_position: String, // Snowflake ID + end_position: String, + messages: Vec<(Message, AgentMessageRelation)>, + next_chunk: Option, +} + +// Memory chunk +MemoryChunk { + chunk_id: u32, + memories: Vec<(MemoryBlock, AgentMemoryRelation)>, + next_chunk: Option, +} +``` + +## Migration Strategy + +### Phase 1: Export from v1 + +Use existing `pattern-cli export` commands: + +```bash +# Export single agent +pattern-cli export agent --name "MyAgent" -o agent.car + +# Export group +pattern-cli export group --name "MyGroup" -o group.car + +# Export full constellation +pattern-cli export constellation --name "MyConstellation" -o constellation.car +``` + +### Phase 2: Transform During Import + +The v2 importer reads v1 CAR files and transforms: + +```rust +pub struct V2Importer { + db: ConstellationDb, +} + +impl V2Importer { + pub async fn import_v1_car(&self, car_path: &Path) -> Result { + // 1. Read CAR file + let car_reader = CarReader::new(File::open(car_path)?).await?; + + // 2. Parse manifest + let manifest = self.read_manifest(&car_reader).await?; + + // 3. Transform based on export type + match manifest.export_type { + ExportType::Agent => self.import_agent(&car_reader, &manifest).await, + ExportType::Group => self.import_group(&car_reader, &manifest).await, + ExportType::Constellation => self.import_constellation(&car_reader, &manifest).await, + } + } +} +``` + +### Key Transformations + +#### 1. Memory Ownership Change + +v1: Memories owned by User, accessed by Agent via relation +v2: Memories owned by Agent directly + +```rust +fn transform_memory( + v1_memory: &V1MemoryBlock, + v1_relation: &AgentMemoryRelation, + target_agent_id: &AgentId, +) -> V2MemoryBlock { + // Create Loro document with content + let doc = LoroDoc::new(); + doc.get_text("content").insert(0, &v1_memory.value); + + V2MemoryBlock { + id: MemoryBlockId::generate(), + agent_id: target_agent_id.clone(), // Now agent-owned + label: v1_memory.label.clone(), + description: generate_description(&v1_memory.label), // Add description + block_type: map_memory_type(v1_memory.memory_type), + char_limit: 5000, // Default + read_only: v1_relation.access_level < MemoryPermission::Append, + loro_snapshot: doc.export(ExportMode::Snapshot), + content_preview: truncate(&v1_memory.value, 200), + created_at: v1_memory.created_at, + updated_at: v1_memory.updated_at, + } +} + +fn generate_description(label: &str) -> String { + match label { + "persona" => "Stores details about your current persona, guiding how you behave and respond.".into(), + "human" => "Stores key details about the person you are conversing with.".into(), + label if label.starts_with("archival_") => format!("Archival memory entry: {}", label), + _ => format!("Memory block: {}", label), + } +} + +fn map_memory_type(v1_type: V1MemoryType) -> V2MemoryBlockType { + match v1_type { + V1MemoryType::Core => V2MemoryBlockType::Core, + V1MemoryType::Working => V2MemoryBlockType::Working, + V1MemoryType::Archival => V2MemoryBlockType::Archival, + } +} +``` + +#### 2. Message Migration + +Messages stay largely the same, but move to SQLite: + +```rust +fn transform_message( + v1_msg: &V1Message, + v1_relation: &AgentMessageRelation, + target_agent_id: &AgentId, +) -> V2Message { + V2Message { + id: v1_msg.id.to_string(), + agent_id: target_agent_id.clone(), + position: v1_relation.position.to_string(), + batch_id: v1_relation.batch.map(|b| b.to_string()), + sequence_in_batch: v1_relation.sequence_num, + role: v1_msg.role.to_string(), + content: v1_msg.content.clone(), + tool_call_id: v1_msg.tool_call_id.clone(), + tool_name: v1_msg.tool_name.clone(), + tool_args: v1_msg.tool_args.clone(), + tool_result: v1_msg.tool_result.clone(), + source: None, // Lost in v1 export + source_metadata: None, + is_archived: v1_relation.message_type == MessageRelationType::Archived, + created_at: v1_msg.created_at.to_rfc3339(), + } +} +``` + +#### 3. Agent Configuration + +Agent config maps mostly 1:1, but stored differently: + +```rust +fn transform_agent(v1_agent: &AgentRecordExport) -> V2Agent { + V2Agent { + id: v1_agent.id.to_string(), + name: v1_agent.name.clone(), + description: None, // New field, not in v1 + model_provider: extract_provider(&v1_agent.model_id), + model_name: extract_model(&v1_agent.model_id), + system_prompt: v1_agent.base_instructions.clone(), + config: json!({ + "max_messages": v1_agent.max_messages, + "max_message_age_hours": v1_agent.max_message_age_hours, + "compression_threshold": v1_agent.compression_threshold, + "memory_char_limit": v1_agent.memory_char_limit, + "enable_thinking": v1_agent.enable_thinking, + }), + enabled_tools: vec!["context", "recall", "search", "send_message"], // Defaults + tool_rules: transform_tool_rules(&v1_agent.tool_rules), + status: "active".into(), + created_at: v1_agent.created_at.to_rfc3339(), + updated_at: v1_agent.updated_at.to_rfc3339(), + } +} +``` + +#### 4. Group/Constellation Structure + +Groups and constellations map cleanly: + +```rust +fn transform_constellation( + v1_const: &V1Constellation, + v1_groups: &[V1GroupExport], +) -> (V2Constellation, Vec) { + // Constellation becomes the database directory + let constellation = V2Constellation { + id: v1_const.id.to_string(), + owner_id: v1_const.owner_id.to_string(), + name: v1_const.name.clone(), + db_path: format!("constellations/{}", v1_const.id), + created_at: v1_const.created_at.to_rfc3339(), + last_accessed_at: Utc::now().to_rfc3339(), + }; + + let groups = v1_groups.iter().map(|g| transform_group(g)).collect(); + + (constellation, groups) +} +``` + +### Phase 3: Verification + +After import, verify data integrity: + +```rust +pub struct ImportVerifier { + db: ConstellationDb, +} + +impl ImportVerifier { + pub async fn verify(&self, result: &ImportResult) -> Result { + let mut report = VerificationReport::default(); + + // Check agent count matches + let agents = sqlx::query!("SELECT COUNT(*) as count FROM agents") + .fetch_one(self.db.pool()) + .await?; + report.agents_imported = agents.count as usize; + report.agents_expected = result.expected_agents; + + // Check message count + let messages = sqlx::query!("SELECT COUNT(*) as count FROM messages") + .fetch_one(self.db.pool()) + .await?; + report.messages_imported = messages.count as usize; + report.messages_expected = result.expected_messages; + + // Check memory blocks + let blocks = sqlx::query!("SELECT COUNT(*) as count FROM memory_blocks") + .fetch_one(self.db.pool()) + .await?; + report.blocks_imported = blocks.count as usize; + report.blocks_expected = result.expected_blocks; + + // Sample content verification + report.sample_checks = self.verify_samples(result).await?; + + Ok(report) + } +} +``` + +## CLI Commands + +```bash +# v2 import command +pattern-cli import --from v1.car --constellation "MyConstellation" + +# With verification +pattern-cli import --from v1.car --constellation "MyConstellation" --verify + +# Dry run (parse and transform, don't write) +pattern-cli import --from v1.car --dry-run + +# Import with explicit version +pattern-cli import --from v1.car --version 1 +``` + +## Export Version Bumping + +v2 exports will use version 2 format: + +```rust +pub const EXPORT_VERSION: u32 = 2; + +// v2 export changes: +// - Includes Loro snapshots instead of raw content +// - Agent-scoped memories (no separate relation) +// - SQLite-native types +``` + +v2 importer will handle both: + +```rust +match manifest.version { + 1 => self.import_v1(&car_reader).await, + 2 => self.import_v2(&car_reader).await, + v => Err(CoreError::UnsupportedExportVersion(v)), +} +``` + +## Rollback Strategy + +If v2 import fails or has issues: + +1. Original v1 CAR file is preserved (never modified) +2. v2 constellation DB can be deleted and re-imported +3. v1 system remains functional until migration verified + +```bash +# Keep v1 running alongside v2 during transition +pattern-cli-v1 chat --agent "MyAgent" # Uses SurrealDB +pattern-cli-v2 chat --agent "MyAgent" # Uses SQLite + +# Once verified, decommission v1 +``` + +## Interactive Migration + +v1 accumulated data quality issues that can't be automatically fixed: +- Memory cross-contamination between agents +- Attribution errors (wrong agent_id on memories) +- Duplicate/conflicting blocks with same label +- Orphaned data without valid agent references + +The migration tool provides an **interactive review mode** to resolve these. + +### Issue Detection + +During import, the migrator scans for problems: + +```rust +pub struct MigrationIssue { + pub id: IssueId, + pub severity: IssueSeverity, + pub issue_type: IssueType, + pub description: String, + pub affected_items: Vec, + pub suggested_actions: Vec, +} + +pub enum IssueSeverity { + /// Blocks import until resolved + Critical, + /// Should review, has default resolution + Warning, + /// Informational, auto-resolved + Info, +} + +pub enum IssueType { + /// Same label exists for multiple agents + DuplicateLabel { + label: String, + agents: Vec, + contents: Vec, + }, + + /// Memory content suggests wrong attribution + SuspiciousAttribution { + memory_id: MemoryBlockId, + current_agent: AgentId, + likely_agent: AgentId, + evidence: String, // why we think it's wrong + }, + + /// Memory references agent that doesn't exist + OrphanedMemory { + memory_id: MemoryBlockId, + referenced_agent: String, + }, + + /// Message has no valid agent reference + OrphanedMessage { + message_id: MessageId, + context: String, + }, + + /// Persona/human block has content from wrong agent + CrossContamination { + block_label: String, + owning_agent: AgentId, + contaminating_agent: AgentId, + contaminated_content: String, + }, + + /// Content looks corrupted or truncated + CorruptContent { + item_type: String, + item_id: String, + issue: String, + }, + + /// Timestamp ordering issues + TimestampAnomaly { + item_type: String, + description: String, + }, +} + +pub enum SuggestedAction { + /// Keep as-is, assign to this agent + AssignTo(AgentId), + /// Delete this item + Delete, + /// Merge with another item + MergeWith(String), + /// Split into multiple items + Split(Vec), + /// Keep both versions + KeepBoth, + /// Manual edit required + ManualEdit, + /// Skip/ignore + Skip, +} +``` + +### Detection Heuristics + +```rust +impl MigrationAnalyzer { + /// Detect suspicious attribution based on content + fn detect_attribution_issues(&self, memories: &[V1MemoryExport]) -> Vec { + let mut issues = Vec::new(); + + for memory in memories { + // Check if persona block mentions another agent's name + if memory.label == "persona" { + for agent in &self.all_agents { + if agent.id != memory.agent_id + && memory.value.contains(&agent.name) + && memory.value.contains("I am") + { + issues.push(MigrationIssue { + severity: IssueSeverity::Warning, + issue_type: IssueType::SuspiciousAttribution { + memory_id: memory.id.clone(), + current_agent: memory.agent_id.clone(), + likely_agent: agent.id.clone(), + evidence: format!( + "Persona block says 'I am' and mentions '{}' but belongs to '{}'", + agent.name, + self.get_agent_name(&memory.agent_id) + ), + }, + suggested_actions: vec![ + SuggestedAction::AssignTo(agent.id.clone()), + SuggestedAction::Delete, + SuggestedAction::ManualEdit, + ], + // ... + }); + } + } + } + + // Check for duplicate labels across agents + let same_label: Vec<_> = memories.iter() + .filter(|m| m.label == memory.label && m.id != memory.id) + .collect(); + + if !same_label.is_empty() && memory.label != "human" && memory.label != "persona" { + // Duplicate archival labels are suspicious + issues.push(MigrationIssue { + severity: IssueSeverity::Warning, + issue_type: IssueType::DuplicateLabel { + label: memory.label.clone(), + agents: same_label.iter().map(|m| m.agent_id.clone()).collect(), + contents: same_label.iter().map(|m| truncate(&m.value, 100)).collect(), + }, + // ... + }); + } + } + + issues + } + + /// Detect cross-contamination patterns + fn detect_cross_contamination(&self, memories: &[V1MemoryExport]) -> Vec { + let mut issues = Vec::new(); + + // Group by label + let by_label: HashMap> = memories.iter() + .fold(HashMap::new(), |mut acc, m| { + acc.entry(m.label.clone()).or_default().push(m); + acc + }); + + // For core blocks, check if content matches the owning agent + for (label, blocks) in &by_label { + if label == "persona" || label == "human" { + for block in blocks { + let agent = self.get_agent(&block.agent_id); + + // Persona should reference the agent's own name + if label == "persona" && !block.value.to_lowercase().contains(&agent.name.to_lowercase()) { + // Might be contaminated - check if it matches another agent + for other_agent in &self.all_agents { + if other_agent.id != agent.id + && block.value.to_lowercase().contains(&other_agent.name.to_lowercase()) + { + issues.push(MigrationIssue { + severity: IssueSeverity::Critical, + issue_type: IssueType::CrossContamination { + block_label: label.clone(), + owning_agent: agent.id.clone(), + contaminating_agent: other_agent.id.clone(), + contaminated_content: truncate(&block.value, 200), + }, + // ... + }); + } + } + } + } + } + } + + issues + } +} +``` + +### Interactive Review UI + +The CLI provides an interactive review session: + +``` +$ pattern-cli import --from constellation.car --interactive + +Analyzing export file... +Found: 5 agents, 2847 messages, 156 memory blocks + +Detected 7 issues requiring review: + +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + +[1/7] CRITICAL: Cross-contamination detected + +Block: persona (owned by: Flux) +Content preview: + "I am Entropy, a task-focused agent specializing in breaking down + complex problems into manageable steps..." + +This persona block belongs to Flux but contains Entropy's identity. + +Options: + [1] Reassign to Entropy + [2] Delete this block (Flux will get default persona) + [3] Edit content manually + [4] Keep as-is (not recommended) + [?] Show full content + +Your choice: 1 + +✓ Will reassign to Entropy + +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + +[2/7] WARNING: Duplicate label across agents + +Label: "project_notes" +Found in: + - Flux: "## Project Status\n- Working on bluesky integration..." + - Entropy: "## Project Status\n- Task breakdown complete..." + +These might be: + - Intentionally separate (each agent's own notes) + - Accidentally duplicated (should be one shared block) + +Options: + [1] Keep both as separate agent-owned blocks + [2] Merge into shared block (pick primary owner) + [3] Keep Flux's version, delete Entropy's + [4] Keep Entropy's version, delete Flux's + [5] View full content of each + [?] Help + +Your choice: 1 + +✓ Will keep as separate blocks + +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + +[3/7] WARNING: Suspicious attribution + +Block: archival_user_preferences (owned by: Anchor) +Content preview: + "User prefers: morning standups, async communication, + detailed task breakdowns from Entropy..." + +This archival block mentions Entropy's role but is owned by Anchor. +Possibly should belong to Entropy or be a shared block. + +Options: + [1] Keep with Anchor (constellation-wide info is reasonable) + [2] Reassign to Entropy + [3] Convert to shared block (Anchor owns, all can read) + [4] Delete + [?] Show full content + +Your choice: 3 + +✓ Will convert to shared block + +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + +... (4 more issues) + +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + +Review complete. Summary of changes: + - 2 blocks reassigned + - 1 block converted to shared + - 3 blocks kept as-is + - 1 block deleted + +Proceed with import? [y/N]: y + +Importing... +✓ Created constellation database +✓ Imported 5 agents +✓ Imported 2847 messages +✓ Imported 155 memory blocks (1 deleted per review) +✓ Created 1 shared block attachment +✓ Generated embeddings for archival blocks + +Import complete! +``` + +### Non-Interactive Mode + +For scripted migrations, issues can be exported and resolved via config: + +```bash +# Export issues to JSON +pattern-cli import --from constellation.car --analyze-only > issues.json + +# Edit issues.json to add resolutions... + +# Import with resolutions +pattern-cli import --from constellation.car --resolutions issues.json +``` + +Resolution file format: + +```json +{ + "resolutions": [ + { + "issue_id": "issue_001", + "action": "assign_to", + "target_agent": "entropy_abc123" + }, + { + "issue_id": "issue_002", + "action": "keep_both" + }, + { + "issue_id": "issue_003", + "action": "delete" + }, + { + "issue_id": "issue_004", + "action": "manual_edit", + "new_content": "Corrected content here..." + } + ] +} +``` + +### Batch Operations + +For constellations with many similar issues: + +``` +$ pattern-cli import --from constellation.car --interactive + +Found 47 duplicate "archival_*" labels across agents. + +Apply batch resolution? + [1] Keep all as separate agent-owned blocks + [2] Review each individually + [3] Delete all duplicates (keep first occurrence) + +Your choice: 1 + +✓ Applied to 47 items + +Remaining issues: 3 (require individual review) +``` + +### Audit Log + +All migration decisions are logged: + +```rust +pub struct MigrationAuditEntry { + pub timestamp: DateTime, + pub issue_id: IssueId, + pub issue_type: String, + pub resolution: String, + pub affected_items: Vec, + pub resolved_by: String, // "user", "auto", "batch" +} +``` + +Stored in the constellation DB for future reference: + +```sql +CREATE TABLE migration_audit ( + id TEXT PRIMARY KEY, + imported_at TEXT NOT NULL, + source_file TEXT NOT NULL, + source_version INTEGER NOT NULL, + issues_found INTEGER NOT NULL, + issues_resolved INTEGER NOT NULL, + audit_log JSON NOT NULL -- Full decision log +); +``` + +### Post-Migration Cleanup Tools + +After import, additional tools help clean up: + +```bash +# Find remaining anomalies +pattern-cli db analyze --constellation "MyConstellation" + +# Bulk reassign memories +pattern-cli db reassign-memories --from-agent flux --to-agent entropy --label "task_*" + +# Merge duplicate blocks +pattern-cli db merge-blocks --keep block_id_1 --merge block_id_2 + +# Delete orphaned data +pattern-cli db cleanup --remove-orphans --dry-run +pattern-cli db cleanup --remove-orphans + +# Regenerate embeddings for specific blocks +pattern-cli db reindex --agent flux --type archival +``` + +--- + +## Known Limitations + +1. **Embeddings** - May need regeneration if model changed +2. **Live queries** - v1 subscriptions don't migrate (not persisted) +3. **Timestamps** - Some precision may be lost in conversion +4. **Custom metadata** - Unrecognized fields in v1 config will be dropped +5. **Semantic analysis limits** - Heuristics can't catch all attribution errors; user review is essential + +## Migration Checklist + +- [ ] Export all constellations from v1 +- [ ] Backup v1 SurrealDB (just in case) +- [ ] Install v2 pattern-cli +- [ ] Run analysis on each export: `pattern-cli import --analyze-only` +- [ ] Review and resolve issues interactively or via resolution file +- [ ] Import each constellation with `--interactive` or `--resolutions` +- [ ] Verify import counts match +- [ ] Run `pattern-cli db analyze` on imported constellation +- [ ] Test agent interactions +- [ ] Regenerate embeddings if needed: `pattern-cli db reindex` +- [ ] Update any external integrations (Discord bot config, etc.) +- [ ] Keep v1 running in parallel during verification period +- [ ] Decommission v1 after verification complete diff --git a/docs/refactoring/v2-overview.md b/docs/refactoring/v2-overview.md new file mode 100644 index 0000000..b8b22cb --- /dev/null +++ b/docs/refactoring/v2-overview.md @@ -0,0 +1,159 @@ +# Pattern v2: Architecture Rework + +## Background + +Pattern v1 was written in "a fit of madness" - rapid prototyping that taught us what the actual requirements are. This document captures the vision for v2, which takes those lessons and rebuilds the core data layer properly. + +## Problems with v1 + +### Database (SurrealDB) + +1. **Global memory live query** - `subscribe_to_agent_memory_updates` ignores agent_id entirely, watching ALL memory blocks. Any memory update from any agent triggers updates to all agents. + +2. **No row-level security** - All isolation is application-level query filtering. One bad query = data bleed between agents. + +3. **Memory ownership confusion** - Memories belong to User, not Agent. Multiple agents can share memory blocks if labels collide. + +4. **Edge direction inconsistencies** - `group_members` is backwards from all other edge patterns. + +5. **Accumulated gotchas** - Datetime serialization issues, ID bracket parsing, LIVE SELECT parameter limitations, etc. + +6. **Custom entity macro nightmare** - `#[derive(Entity)]` was built to work around SurrealDB pain points but became its own source of complexity. + +### Memory System (DashMap-based) + +1. **In-memory caching creates sync problems** - DashMap holds memory state, DB holds another copy, they can desync. + +2. **Namespace collisions** - Same label across agents in constellation = last write wins. + +3. **No history/versioning** - Memory updates are destructive, no way to see what changed or roll back. + +4. **Agents must manually maintain logs** - No system-level rolling logs. + +5. **No templated/structured memories** - Everything is opaque strings. + +## v2 Goals + +### Database Layer + +- **SQLite + sqlx** - Boring, reliable, well-understood +- **One database per constellation** - Physical isolation, cross-contamination impossible at storage level +- **Vector search via sqlite-vec** - Extension-based, proven approach +- **No custom entity macro** - Just sqlx queries, maybe light derive helpers +- **Simple relational model** - Foreign keys and junction tables, no graph magic + +### Memory Layer + +- **Loro CRDT-backed documents** - Every edit tracked, mergeable, time-travel capable +- **No in-memory caching** - DB is single source of truth, read when needed, write immediately +- **Versioned memories** - Agents can see edit history, roll back bad updates +- **Templated memories** - Structured schemas for common patterns +- **Rolling logs** - System-maintained, agents observe but don't manage +- **Diffing/rollback exposed to users** - Maybe to agents too + +### Agent Runtime + +- **Pattern owns LLM calls** - Not an MCP tool bolted onto something else +- **Context building stays internal** - Deep integration into the context maintenance process +- **Coding harness** - Pattern can be a coding agent runtime +- **ACP support** - Agent Client Protocol for editor integration (Zed, JetBrains, Neovim, etc.) + - Pattern implements the `Agent` trait from `agent-client-protocol` crate + - Runs as subprocess, communicates via JSON-RPC over stdio + - Editors can spawn Pattern agents and interact through standard protocol + - See v2-api-surface.md for details + +### Interface Architecture + +v1 had the CLI as the primary interface - it directly instantiated agents, hit the DB, made LLM calls. The server was a stub for future multi-user hosting. + +v2 rethinks this: + +- **CLI remains important** - Trusted local interaction point. Agent knows CLI input is from their partner human, unlike Discord/Bluesky input from strangers. +- **Server as coordination layer** - HTTP API for external integrations, multi-user hosting, but not the only way to run agents +- **Remote presence connectors** - For coding harness and ACP, agents on a server need to reach into the partner's local environment: + - Read/write files on partner's machine + - Connect to local editor instances (LSP integration) + - CLI could work over such a connector too + - Enables "agent runs on server, acts on local dev environment" model + +The trust model matters: CLI = partner (trusted), Discord/Bluesky = conversant (verify), Remote connector = partner's environment (trusted proxy). + +## Non-Goals + +- Backwards compatibility at the API level (CAR export/import is the migration path) +- Supporting SurrealDB alongside SQLite +- Preserving the entity macro system + +## Migration Path + +Existing constellations migrate via: +1. CAR file export from v1 +2. CAR file import to v2 + +The compatibility layer lives at the export/import boundary, not in the runtime. + +## Key Design Decisions + +### One DB Per Constellation + +Benefits: +- Physical isolation - no query can accidentally leak across constellations +- SQLite's single-writer limitation becomes a non-issue (one constellation = one writer) +- Easy backup/restore per constellation +- Natural sharding if we ever need horizontal scale + +Trade-offs: +- Cross-constellation queries require opening multiple DBs +- Slightly more complex connection management + +### Loro for Memory Documents + +Benefits: +- Built-in versioning and history +- CRDT means eventual consistency if we ever need it +- Rich document primitives (text, lists, maps, trees) +- Diff visibility built-in + +Trade-offs: +- Another dependency +- Learning curve for the Loro model +- Need to understand how it persists/snapshots + +### No In-Memory Cache + +Benefits: +- Single source of truth +- No sync bugs possible +- Simpler mental model + +Trade-offs: +- More DB reads +- Need to think about query efficiency + +## Open Questions + +1. ~~How does Loro persistence work?~~ **Answered**: Snapshots + delta updates, both as byte blobs. We store snapshots in SQLite, updates in a separate table, consolidate periodically. +2. ~~What memory primitives does Letta use?~~ **Answered**: Labeled blocks with descriptions, read-only option, shared blocks between agents. See v2-memory-system.md. +3. How do we handle the constellation-level shared state (activity tracker, etc.)? +4. What's the right API surface for the HTTP server? +5. Should agents have write access to their own version history, or is that user-only? +6. Remote presence connector protocol - what does the interface look like? +7. Trust levels for different input sources - how does this affect tool access? + +## Related Documents + +- [v2-database-design.md](./v2-database-design.md) - SQLite schema, sqlx patterns +- [v2-memory-system.md](./v2-memory-system.md) - Loro integration, templates, shared context +- [v2-migration-path.md](./v2-migration-path.md) - CAR export/import, interactive migration +- [v2-api-surface.md](./v2-api-surface.md) - HTTP endpoints, ACP integration, deployment modes +- [v2-remote-presence.md](./v2-remote-presence.md) - iroh-based connector protocol +- [v2-pattern-dialect.md](./v2-pattern-dialect.md) - Action language for agents +- [v2-dialect-implementation.md](./v2-dialect-implementation.md) - Dialect parser implementation notes + +### Future (v2.1+) + +- [v2-constellation-forking.md](./v2-constellation-forking.md) - Explicit fork/merge for isolated work + +## Existing Infrastructure to Preserve/Adapt + +- `realtime.rs` - Event sink/tap system for streaming agent responses to multiple consumers. Good foundation for remote presence streaming. diff --git a/docs/refactoring/v2-pattern-db-status.md b/docs/refactoring/v2-pattern-db-status.md new file mode 100644 index 0000000..a301e9e --- /dev/null +++ b/docs/refactoring/v2-pattern-db-status.md @@ -0,0 +1,186 @@ +# Pattern v2: pattern_db Implementation Status + +## Overview + +`pattern_db` is the new SQLite-based storage backend for Pattern v2, replacing the SurrealDB-based system. It uses sqlx with compile-time query checking. + +**Current State**: Core implementation complete, ready for integration. + +## What's Implemented + +### Crate Structure + +``` +crates/pattern_db/ +├── src/ +│ ├── lib.rs # Public exports +│ ├── connection.rs # ConstellationDb, pool management +│ ├── error.rs # DbError, DbResult types +│ ├── models/ +│ │ ├── mod.rs # Re-exports +│ │ ├── agent.rs # Agent, AgentGroup, GroupMember, AgentStatus +│ │ ├── memory.rs # MemoryBlock, MemoryBlockUpdate, BlockType, etc. +│ │ ├── message.rs # Message, ArchiveSummary, MessageRole +│ │ └── coordination.rs # ActivityEvent, AgentSummary, ConstellationSummary, etc. +│ └── queries/ +│ ├── mod.rs # Re-exports +│ ├── agent.rs # CRUD for agents, groups, members +│ ├── memory.rs # CRUD for memory blocks +│ ├── message.rs # Message queries with batching +│ └── coordination.rs # Activity events, summaries, tasks, handoffs +├── migrations/ +│ └── 0001_initial.sql # Full schema creation +├── .env # DATABASE_URL for dev +├── .gitignore # Ignores dev.db +└── Cargo.toml +``` + +### Models + +All models use `sqlx::FromRow` with proper type mappings: + +| Model | Key Features | +|-------|--------------| +| `Agent` | JSON fields via `sqlx::types::Json` for config, tools, rules | +| `AgentGroup` | Coordination patterns stored as JSON | +| `MemoryBlock` | Loro snapshots as `Vec` blobs | +| `Message` | Snowflake ID ordering, batch tracking, tool call/response pairing | +| `ActivityEvent` | Importance levels as sqlx enum | +| `ConstellationSummary` | JSON arrays for key_decisions, open_threads | + +### Queries + +Most queries use compile-time checked macros (`sqlx::query!`, `sqlx::query_as!`): + +- **agent.rs**: Full CRUD, group management, status updates +- **memory.rs**: Block CRUD, shared blocks, updates, history +- **message.rs**: Message insert/query, batch operations, archival +- **coordination.rs**: Activity events, summaries, tasks, handoffs (uses runtime queries) + +### Schema + +The migration creates all tables from v2-database-design.md: + +- Core: `agents`, `agent_groups`, `group_members` +- Memory: `memory_blocks`, `memory_block_updates`, `shared_block_agents`, `memory_block_history` +- Messages: `messages`, `archive_summaries` +- Coordination: `activity_events`, `agent_summaries`, `constellation_summaries`, `notable_events`, `coordination_state`, `coordination_tasks`, `handoff_notes` + +**Not yet implemented** (requires sqlite-vec extension): +- `memory_embeddings` (virtual table) +- `message_embeddings` (virtual table) +- `memory_fts`, `message_fts` (FTS5) + +### Build System + +- **Offline mode**: `.sqlx/` contains cached query metadata for CI builds +- **Dev database**: `dev.db` in crate root for compile-time checking +- **Editor integration**: Set `SQLX_OFFLINE=true` in environment to silence LSP false positives + +## Technical Decisions + +### JSON Fields + +Using `sqlx::types::Json` wrapper instead of `#[sqlx(json)]` attribute: + +```rust +pub struct Agent { + pub config: Json, + pub enabled_tools: Json>, + pub tool_rules: Option>, +} +``` + +### SQLite Integer Types + +All integer columns use `i64` (SQLite INTEGER is always 64-bit internally). + +### Column Type Annotations + +For compile-time macros with nullable or custom-typed columns: + +```rust +sqlx::query_as!( + Agent, + r#"SELECT + id as "id!", + status as "status!: AgentStatus", + config as "config!: _" + FROM agents WHERE id = ?"#, + id +) +``` + +### Connection Management + +`ConstellationDb` wraps a `SqlitePool` with: +- WAL mode for concurrent reads +- 5 max connections (SQLite is single-writer) +- Automatic migration on open +- Pragmas for performance (cache_size, mmap_size, etc.) + +## What's Missing + +### High Priority + +1. **Integration with pattern_core** - Replace SurrealDB calls with pattern_db +2. **sqlite-vec extension** - Vector tables for semantic search +3. **FTS5 setup** - Full-text search tables and triggers +4. **More tests** - Query function unit tests, integration tests + +### Medium Priority + +1. **coordination.rs macro conversion** - Currently uses runtime queries +2. **Archival entries table/queries** - Defined in schema, not in models +3. **Data sources table/queries** - Defined in schema, not in models +4. **Folders/file passages** - Defined in schema, not in models + +### Lower Priority + +1. **Global database** - User accounts, constellation registry (server mode) +2. **Migration tooling** - CAR import to new schema +3. **Backup/vacuum commands** - CLI integration + +## Usage + +### Development + +```bash +# Create/update dev database +cd crates/pattern_db +sqlx database create +sqlx migrate run + +# Regenerate offline data after query changes +DATABASE_URL="sqlite:crates/pattern_db/dev.db" cargo sqlx prepare --workspace + +# Build (uses offline data) +SQLX_OFFLINE=true cargo check -p pattern_db +``` + +### In Code + +```rust +use pattern_db::{ConstellationDb, queries, models::*}; + +// Open database (creates if missing, runs migrations) +let db = ConstellationDb::open("path/to/constellation.db").await?; + +// Create an agent +let agent = Agent { /* ... */ }; +queries::agent::create_agent(db.pool(), &agent).await?; + +// Query messages with batch ordering +let messages = queries::message::get_recent_messages(db.pool(), "agent_id", 50).await?; +``` + +## Files Changed Since Last Session + +- `src/connection.rs` - Removed unused `DbError` import +- `.sqlx/*.json` - Generated offline query data (40+ files) + +## Next Steps + +1. **Integration spike** - Try using pattern_db from pattern_core for one agent operation +2. **sqlite-vec** - Research bundling strategy, create virtual table migration +3. **Test coverage** - Add tests for critical query paths diff --git a/docs/refactoring/v2-pattern-dialect.md b/docs/refactoring/v2-pattern-dialect.md new file mode 100644 index 0000000..b34a480 --- /dev/null +++ b/docs/refactoring/v2-pattern-dialect.md @@ -0,0 +1,468 @@ +# Pattern Dialect - Action Language for Agents + +## Overview + +Pattern Dialect is a lightweight action language designed to replace structured tool calls (JSON/XML) with something that: + +1. **LLMs can produce reliably** - even smaller/cheaper models +2. **Is easy to parse** - sigil-based structure with fuzzy argument matching +3. **Meets models where they are** - dense aliases for common operations +4. **Integrates with Pattern's permission system** - explicit markers for consent flows + +The core insight: LLMs already have "intuitions" about how to express actions. Rather than forcing them into rigid schemas, we provide a flexible surface that maps many expressions to the same underlying operations. + +## Design Principles + +### Hard to Fail, Hard to Be Dangerous + +- **Fuzzy matching** on verbs and arguments +- **Context inference** when targets are ambiguous +- **Sensible defaults** for omitted parameters +- **Permission gates invisible in syntax** - the system always checks, agents don't need to specify + +### Meet Models Where They Are + +- Dense alias tables for common operations +- Multiple syntactic forms for the same action +- Accept natural language fragments when unambiguous +- Intent inference from argument shape (e.g., `/recall "query text"` vs `/recall block_name`) + +### Explicit Permission Requests + +When an agent *knows* something might need approval: + +| Marker | Meaning | +|--------|---------| +| (none) | Normal - system checks silently | +| `.` | "This is routine, don't bug them" | +| `?` | "This might need checking" | +| `?ask` | "Please confirm with partner" | +| `!` | "This is serious, definitely confirm" | + +## Syntax + +### Basic Structure + +``` +/VERB [ARGUMENTS] [PERMISSION_MARKER] +``` + +- `/` sigil marks an action (familiar from Discord/Slack/IRC) +- `VERB` is fuzzy-matched against known verbs + aliases +- `ARGUMENTS` are parsed based on verb signature +- Optional permission marker at end + +### Chaining + +``` +/action1 -> /action2 -> /action3 +``` + +`->` means "then prompt me again with the result". Not a pipe - the agent gets control back between each action. + +### References + +``` +@last # last message in context +@last.message # same, more explicit +@last.output # output of last action +@thread # current thread/conversation +@parent # message being replied to +@[uri] # specific resource by URI + +that, this, it # implicit references resolved from context +``` + +### Content Blocks + +Any of these are recognized as "this is the content": + +``` +/post bluesky "single line content" +/post bluesky """ +multi-line +content +""" +/post bluesky ``` +code block style +``` +/post bluesky +> block quote +> also works +``` + +Unquoted content is fine when unambiguous: + +``` +/reply sounds good to me +/recall user mentioned project deadlines +``` + +## Verb Reference + +### Memory Operations + +#### `/recall` - Long-term Memory + +The most aliased verb. Intent is inferred from argument shape. + +| Form | Interpretation | +|------|----------------| +| `/recall block_name` | Read block by exact name | +| `/recall "fuzzy query"` | Vector search | +| `/recall + "content"` | Insert new block | +| `/recall block_name + "content"` | Append to existing | +| `/recall -block_name` | Delete block | +| `/recall patch block_name \`\`\`diff...` | Structured edit | + +**Aliases**: `remember`, `store`, `save`, `archive`, `forget` (→ delete) + +**Subcommand forms** (equivalent to modifiers): +- `insert`, `add`, `save` → Insert +- `append`, `+` → Append +- `read`, `get`, `check`, `show` → Read +- `delete`, `remove`, `-` → Delete +- `patch`, `edit` → Patch (for capable models) + +#### `/context` - Working Memory + +Operations on memory blocks that are always in context (persona, human, working notes). + +| Form | Interpretation | +|------|----------------| +| `/context block_name + "content"` | Append to block | +| `/context block_name "old" -> "new"` | Replace text | +| `/context archive block_name` | Move to archival | +| `/context load archival_label` | Load from archival | +| `/context swap block archival_label` | Swap working ↔ archival | + +**Aliases**: `note`, `update`, `remember` (context-aware disambiguation) + +#### `/search` - Query Across Domains + +| Form | Interpretation | +|------|----------------| +| `/search query terms` | Search all, return grouped | +| `/search in archival query` | Archival memory only | +| `/search in conversations query` | Conversation history | +| `/search in constellation query` | All agents' history | +| `/search query > 2 weeks` | Time-filtered | +| `/search query role:assistant` | Role-filtered | + +**Aliases**: `find`, `query`, `look`, `lookup` + +**Domain modifiers**: `in archival`, `in conversations`, `in constellation`, `in all`, `everywhere` + +### Communication + +#### `/send` - Explicit Target Form + +``` +/send @domain @target "message" +``` + +Domains: `@user`, `@agent`, `@channel`, `@bluesky`, `@discord` + +Examples: +``` +/send @agent @entropy "can you help break this down?" +/send @bluesky @at://did:plc:xxx/post/yyy "replying here" +/send @discord @general "hey everyone" +/send @user "here's what I found" +``` + +#### Shorthands + +| Verb | Target | Use | +|------|--------|-----| +| `/reply` | Current thread | Reply to the message being responded to | +| `/post` | Platform | Post to Bluesky, Discord, etc. | +| `/dm` | User | Direct message | +| `/tell` | Agent | Agent-to-agent communication | + +Examples: +``` +/reply sounds good . # routine reply +/post bluesky "thoughts on this" ?public # flag as public +/dm @user "private info" ? # might need checking +/tell @anchor "should I do this?" ?ask # request approval +``` + +### External Operations + +#### `/fetch` - Get Web Content + +``` +/fetch https://example.com # as markdown +/fetch https://example.com html # as raw html +/fetch continue # continue paginated read +``` + +**Aliases**: `get` (url context) + +#### `/web` - Web Search + +``` +/web rust async patterns +/web site:docs.rs tokio runtime +``` + +**Aliases**: `search web`, `google`, `look up` + +### Utilities + +#### `/calc` - Calculation + +``` +/calc 5 feet to meters +/calc radius = 5; pi * radius^2 +/calc 20% of 350 +``` + +**Aliases**: `calculate`, `compute`, `math` + +#### `/data` - Data Sources + +``` +/data list # show configured sources +/data read source_id # read from source +/data search source_id "query" # search in source +/data monitor source_id # start notifications +/data pause source_id # pause notifications +``` + +### Authority Operations + +Only available to agents with authority roles. + +#### `/approve` - Grant Permission + +``` +/approve [request-id] +/approve [request-id] always # create permanent rule +``` + +**Aliases**: `allow`, `permit`, `ok`, `yes` + +#### `/deny` - Deny Permission + +``` +/deny [request-id] +/deny [request-id] "reason" +/deny [request-id] always # create permanent block +``` + +**Aliases**: `reject`, `block`, `no` + +#### `/escalate` - Punt to Higher Authority + +``` +/escalate [request-id] +/escalate [request-id] "this needs human review" +``` + +#### `/cancel` - Cancel Pending Action + +``` +/cancel # cancel current chain +/cancel [request-id] # cancel specific pending +``` + +**Aliases**: `abort`, `nevermind`, `stop` + +### Emergency + +#### `/halt` - Emergency Stop + +``` +/halt "reason for emergency stop" ! +``` + +Only for system integrity agents. Terminates the process. + +## Parser Architecture + +### Intent Resolution Flow + +``` +1. Find sigil (/) +2. Extract verb token +3. Fuzzy match verb → canonical + aliases +4. Based on verb, parse arguments using signature +5. Extract permission markers +6. Resolve references (@last, that, etc.) +7. Infer intent from argument shape if needed +8. Return structured action +``` + +### Fuzzy Matching + +```rust +fn match_verb(input: &str) -> Option<(Verb, f32)> { + // 1. Exact match against canonical + // 2. Exact match against aliases + // 3. Levenshtein distance (weighted by verb strictness) + // 4. Return best match above threshold +} +``` + +Each verb has a `strictness` level: +- **Strict**: `approve`, `deny`, `halt` - don't want accidental matches +- **Normal**: most verbs +- **Loose**: `recall`, `search` - maximize accessibility + +### Argument Shape Inference + +For `/recall`: +```rust +fn interpret_recall(args: &str) -> RecallIntent { + if has_insert_modifier(args) { + RecallIntent::Insert(extract_content(args)) + } else if has_delete_modifier(args) { + RecallIntent::Delete(extract_label(args)) + } else if has_patch_modifier(args) { + RecallIntent::Patch(extract_patch(args)) + } else if is_exact_block_match(args) { + RecallIntent::Read(args.to_string()) + } else { + // No exact match - treat as semantic search + RecallIntent::Search(args.to_string()) + } +} +``` + +### Error Recovery + +When parsing fails or is ambiguous, return natural language: + +``` +"I couldn't tell if you meant /post or /reply - which one?" +"'recall' found multiple blocks matching 'notes' - did you mean project_notes or meeting_notes?" +"No permission to post to bluesky DMs - want me to ask?" +``` + +## Permission Integration + +### Implicit Checks (Always Run) + +- Platform access +- Rate limits +- Content policies +- DM vs public context +- Sensitivity heuristics + +### Authority Resolution + +```rust +enum Authority { + Partner, // Human owner + Agent(AgentId), // Supervisor agent + Chain(Vec), // Try in order, escalate +} +``` + +Configured per action pattern: +- `/post bluesky` public → `Chain([Agent(anchor), Partner])` +- `/recall` sensitive → `Partner` +- `/send dm` → `Partner` + +### Tiered Model Support + +Routine permission checks can run on smaller/cheaper models: +- Local model handles 95% of traffic instantly +- Larger model handles edge cases +- Partner only sees truly novel situations + +The small model can signal uncertainty: +``` +/uncertain "this looks like sarcasm but I can't tell" +/escalate "content seems political, above my pay grade" +``` + +## Agent Instructions + +The model-facing documentation is minimal: + +``` +Actions start with / + +Common verbs: + /recall - memory (read, search, store) + /search - find things + /reply - respond to messages + /post - publish to platforms + /tell - message other agents + +Chain actions with -> +End with ? to request permission, ! if serious, . if routine + +Examples: + /recall project deadlines + /reply sounds good . + /post bluesky "hello" ?public + /recall meeting notes -> /summarize -> /tell @entropy +``` + +## Implementation Notes + +### Crate Location + +`pattern_core::dialect` or separate `pattern_dialect` crate. + +### Key Types + +```rust +pub struct ParsedAction { + pub verb: Verb, + pub arguments: Arguments, + pub permission_marker: Option, + pub chain: Option>, +} + +pub enum Arguments { + Recall(RecallIntent), + Send(SendTarget, Content), + Search(SearchQuery), + // ... +} + +pub struct DialectParser { + verb_specs: HashMap, + block_registry: BlockRegistry, // for exact match detection +} +``` + +### Integration with Existing Tools + +The dialect parser produces structured actions that map directly to existing tool invocations. The `BuiltinTools` implementations remain unchanged - dialect is a new frontend, not a replacement of the execution layer. + +``` +Agent Output → Dialect Parser → ParsedAction → Tool Execution → Result +``` + +## Future Considerations + +### Learning from Usage + +Track which phrasings agents attempt: +- Add successful novel phrasings as aliases +- Identify common failure patterns +- Per-model alias tuning + +### Visual/TUI Representation + +The dialect is text-first but could render nicely: +- Syntax highlighting in logs +- Structured display in partner UI +- Action history with grouping + +### Multi-Action Batching + +Beyond chaining, explicit parallel execution: +``` +/batch { + /recall project_notes + /search conversations about project + /fetch https://project-docs.example.com +} +``` diff --git a/docs/refactoring/v2-remote-presence.md b/docs/refactoring/v2-remote-presence.md new file mode 100644 index 0000000..ec5b26d --- /dev/null +++ b/docs/refactoring/v2-remote-presence.md @@ -0,0 +1,838 @@ +# Pattern v2: Remote Presence Connector + +## The Problem + +Pattern agents often run on a server (for reliability, always-on presence), but need to interact with the partner's local environment: + +- Read/write files on partner's machine +- Connect to local editor (LSP integration) +- Execute commands in partner's terminal +- Access local development tools + +This is especially important for: +1. **Coding harness** - Agent needs to see code, run tests, use dev tools +2. **ACP (Agent Communication Protocol)** - Agents coordinating across environments +3. **ADHD support** - Agent helping with tasks that involve local files/apps + +## Current State + +`realtime.rs` has event sink traits for streaming agent responses: + +```rust +#[async_trait] +pub trait AgentEventSink: Send + Sync { + async fn on_event(&self, event: ResponseEvent, ctx: AgentEventContext); +} +``` + +This is one-way (agent → observer). We need bidirectional communication. + +## Proposed Architecture + +### Connector Trait + +```rust +/// A connector provides access to an environment (local or remote) +#[async_trait] +pub trait EnvironmentConnector: Send + Sync { + /// Read a file from the environment + async fn read_file(&self, path: &Path) -> Result; + + /// Write a file to the environment + async fn write_file(&self, path: &Path, content: &str) -> Result<()>; + + /// List directory contents + async fn list_dir(&self, path: &Path) -> Result>; + + /// Execute a command + async fn exec(&self, command: &str, args: &[&str]) -> Result; + + /// Check if path exists + async fn exists(&self, path: &Path) -> Result; + + /// Get environment info (working directory, OS, etc.) + async fn env_info(&self) -> Result; + + /// Open a bidirectional stream (for LSP, etc.) + async fn open_stream(&self, target: &str) -> Result>; +} + +pub struct ExecResult { + pub exit_code: i32, + pub stdout: String, + pub stderr: String, +} + +pub struct EnvInfo { + pub working_dir: PathBuf, + pub os: String, + pub hostname: String, + /// Trust level of this connector + pub trust_level: TrustLevel, +} +``` + +### Trust Levels + +Different input sources have different trust levels: + +```rust +pub enum TrustLevel { + /// Partner's own environment - full trust + /// CLI, remote connector authenticated as partner + Partner, + + /// Known friend/collaborator - high trust + /// Could allow some file operations + Friend, + + /// Public conversant - limited trust + /// Discord/Bluesky strangers + Conversant, + + /// Untrusted - read-only, sandboxed + Untrusted, +} +``` + +Trust level affects what tools the agent can use: + +```rust +impl Agent { + fn allowed_tools(&self, trust: TrustLevel) -> Vec { + match trust { + TrustLevel::Partner => self.all_tools(), + TrustLevel::Friend => self.tools_except(&["shell", "file_write"]), + TrustLevel::Conversant => self.safe_tools_only(), + TrustLevel::Untrusted => vec![], + } + } +} +``` + +### Implementations + +#### LocalConnector + +For when agent runs on same machine: + +```rust +pub struct LocalConnector { + working_dir: PathBuf, + trust_level: TrustLevel, +} + +#[async_trait] +impl EnvironmentConnector for LocalConnector { + async fn read_file(&self, path: &Path) -> Result { + tokio::fs::read_to_string(path).await.map_err(Into::into) + } + + async fn exec(&self, command: &str, args: &[&str]) -> Result { + let output = tokio::process::Command::new(command) + .args(args) + .current_dir(&self.working_dir) + .output() + .await?; + + Ok(ExecResult { + exit_code: output.status.code().unwrap_or(-1), + stdout: String::from_utf8_lossy(&output.stdout).into(), + stderr: String::from_utf8_lossy(&output.stderr).into(), + }) + } + + // ... +} +``` + +#### RemoteConnector + +For when agent runs on server, partner's machine is remote: + +```rust +pub struct RemoteConnector { + /// WebSocket or other transport to partner's client + transport: Box, + trust_level: TrustLevel, +} + +#[async_trait] +impl EnvironmentConnector for RemoteConnector { + async fn read_file(&self, path: &Path) -> Result { + let request = ConnectorRequest::ReadFile { path: path.to_owned() }; + let response = self.transport.request(request).await?; + match response { + ConnectorResponse::FileContent(content) => Ok(content), + ConnectorResponse::Error(e) => Err(e.into()), + _ => Err(anyhow!("unexpected response")), + } + } + + // ... +} +``` + +### Wire Protocol + +JSON-RPC style messages over WebSocket: + +```rust +#[derive(Serialize, Deserialize)] +#[serde(tag = "method", content = "params")] +pub enum ConnectorRequest { + ReadFile { path: PathBuf }, + WriteFile { path: PathBuf, content: String }, + ListDir { path: PathBuf }, + Exec { command: String, args: Vec }, + Exists { path: PathBuf }, + EnvInfo, + OpenStream { target: String }, +} + +#[derive(Serialize, Deserialize)] +#[serde(tag = "type", content = "data")] +pub enum ConnectorResponse { + FileContent(String), + DirEntries(Vec), + ExecResult(ExecResult), + Bool(bool), + EnvInfo(EnvInfo), + StreamOpened { stream_id: String }, + Error(String), +} +``` + +### Client-Side Daemon + +Partner runs a small daemon that: +1. Connects to Pattern server via WebSocket +2. Authenticates (proves they're the partner) +3. Handles `ConnectorRequest`s from their agent +4. Enforces local security policies + +```rust +// pattern-connector daemon +async fn main() { + let config = load_config()?; + + let ws = connect_to_server(&config.server_url).await?; + authenticate(&ws, &config.credentials).await?; + + loop { + let request: ConnectorRequest = ws.recv().await?; + + // Check local policy + if !policy_allows(&request) { + ws.send(ConnectorResponse::Error("denied by policy")).await?; + continue; + } + + let response = handle_request(request).await; + ws.send(response).await?; + } +} +``` + +### Integration with Tools + +Agent tools can use the connector: + +```rust +pub struct FileReadTool { + connector: Arc, +} + +#[async_trait] +impl Tool for FileReadTool { + async fn execute(&self, args: ToolArgs) -> Result { + let path = args.get::("path")?; + let content = self.connector.read_file(&path).await?; + Ok(ToolResult::text(content)) + } +} +``` + +### LSP Integration + +For editor integration, the connector can proxy LSP: + +```rust +impl RemoteConnector { + async fn connect_lsp(&self, language: &str) -> Result { + let stream = self.open_stream(&format!("lsp:{}", language)).await?; + Ok(LspClient::new(stream)) + } +} +``` + +The client-side daemon would spawn the appropriate LSP server and bridge the connection. + +## Security Considerations + +1. **Authentication** - Connector must prove it's the partner + - Could use same auth as API (JWT) + - Or separate long-lived connector tokens + +2. **Authorization** - Even partner may want limits + - Configurable allowed paths + - Command allowlist/denylist + - Confirmation prompts for destructive operations + +3. **Sandboxing** - For non-partner connectors + - Read-only access + - No command execution + - Limited to specific directories + +4. **Audit logging** - Track all connector operations + - Who, what, when + - Useful for debugging and trust building + +## Relationship to ACP + +Agent Communication Protocol would use similar transport: +- Agent A on server X wants to coordinate with Agent B on server Y +- Each agent has connector access to their partner's environment +- ACP messages flow server-to-server +- File/environment access flows through connectors + +``` +┌─────────────────┐ ┌─────────────────┐ +│ Partner A's │ │ Partner B's │ +│ Machine │ │ Machine │ +│ ┌───────────┐ │ │ ┌───────────┐ │ +│ │ Connector │◄─┼─────────┼──┤ Connector │ │ +│ │ Daemon │ │ WS │ │ Daemon │ │ +│ └─────┬─────┘ │ │ └─────┬─────┘ │ +│ │ │ │ │ │ +│ ▼ │ │ ▼ │ +│ Local Files │ │ Local Files │ +└─────────────────┘ └─────────────────┘ + ▲ ▲ + │ │ + │ ┌─────────────────┐ │ + │ │ Pattern Server │ │ + │ │ ┌───────────┐ │ │ + └────┼──┤ Agent A │ │ │ + │ └─────┬─────┘ │ │ + │ │ ACP │ │ + │ ▼ │ │ + │ ┌───────────┐ │ │ + │ │ Agent B ├──┼────┘ + │ └───────────┘ │ + └─────────────────┘ +``` + +## Transport: iroh + +**Decision**: Use [iroh](https://iroh.computer/) for connector transport. + +### Why iroh + +- **Peer-to-peer** - No central relay required (though uses relays for NAT traversal) +- **Encryption built-in** - QUIC-based, TLS 1.3 +- **Cryptographic identity** - Stable node IDs (public keys) for auth +- **Rust-native** - `iroh-net`, `iroh-rpc`, `iroh-bytes` crates, async, well-maintained +- **Handles reconnection** - Connection state management built in +- **NAT traversal** - STUN/TURN equivalent via n0 discovery + +### iroh Protocol Stack + +| Layer | iroh Crate | Use in Pattern | +|-------|------------|----------------| +| Transport | `iroh-net` | QUIC connections, node identity, NAT traversal | +| RPC | `iroh-rpc` | Request/response for connector operations | +| Streaming | `iroh-bytes` | Large file transfers, ACP message streams | + +**Primary primitive**: `iroh-rpc` for most connector operations (file read/write, exec, etc.) + +**Streaming**: `iroh-bytes` or raw QUIC streams for: +- Large file transfers (chunked) +- ACP message proxying +- LSP bidirectional streams + +### Architecture with iroh + +``` +┌─────────────────────────────────────────────────────────────┐ +│ Partner's Machine │ +│ ┌─────────────────────────────────────────────────────┐ │ +│ │ pattern-connector daemon │ │ +│ │ │ │ +│ │ iroh Endpoint │ │ +│ │ - persistent node ID (keypair) │ │ +│ │ - connects to Pattern server │ │ +│ │ - handles bidirectional streams │ │ +│ │ │ │ +│ │ Request handlers: │ │ +│ │ - file read/write (within allowed paths) │ │ +│ │ - command execution (within allowed commands) │ │ +│ │ - LSP proxy (spawn local LSP, bridge streams) │ │ +│ │ - file watching (push notifications) │ │ +│ └─────────────────────────────────────────────────────┘ │ +└───────────────────────────┬─────────────────────────────────┘ + │ QUIC (iroh) + │ encrypted, NAT-traversing + │ +┌───────────────────────────▼─────────────────────────────────┐ +│ Pattern Server │ +│ ┌─────────────────────────────────────────────────────┐ │ +│ │ iroh Endpoint │ │ +│ │ - accepts connector connections │ │ +│ │ - validates node ID against registered partners │ │ +│ │ │ │ +│ │ RemoteConnector │ │ +│ │ - implements EnvironmentConnector trait │ │ +│ │ - sends requests over iroh connection │ │ +│ │ - used by agents for file/env access │ │ +│ └─────────────────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────────────────┘ +``` + +### Authentication via Node ID + +iroh peers have cryptographic node IDs (public keys). We use this for auth: + +```rust +pub struct ConnectorAuth { + /// Partner's iroh node ID (Ed25519 public key) + pub partner_node_id: iroh_net::NodeId, + + /// Constellation this connector is authorized for + pub constellation_id: ConstellationId, + + /// When this auth was established + pub paired_at: DateTime, + + /// Optional: human-readable name for this connector + pub name: Option, +} + +impl PatternServer { + async fn on_connector_connect(&self, conn: iroh_net::Connection) -> Result<()> { + let peer_id = conn.remote_node_id(); + + // Check if this node ID is registered + match self.db.get_connector_auth(&peer_id).await? { + Some(auth) => { + // Known partner - create connector + let connector = RemoteConnector::new(conn, auth.constellation_id); + self.register_connector(auth.constellation_id, connector).await; + tracing::info!("Connector {} connected for constellation {}", + peer_id, auth.constellation_id); + } + None => { + // Unknown node - reject + tracing::warn!("Unknown connector attempted connection: {}", peer_id); + conn.close(0u8.into(), b"unknown node ID"); + } + } + + Ok(()) + } +} +``` + +### Pairing Flow + +First-time connector setup: + +``` +┌──────────────────────────────────────────────────────────────────┐ +│ 1. Partner authenticates via web UI or CLI │ +│ $ pattern auth login │ +│ │ +│ 2. Partner requests pairing code │ +│ $ pattern connector pair --generate │ +│ Pairing code: ABC-123-XYZ (expires in 10 minutes) │ +│ │ +│ 3. On partner's machine, run connector with code │ +│ $ pattern-connector pair --code ABC-123-XYZ │ +│ │ +│ 4. Connector generates keypair, sends node ID to server │ +│ POST /api/v1/connector/pair { code: "...", node_id: "..." } │ +│ │ +│ 5. Server associates node ID with partner's constellation │ +│ Connector auth saved to DB │ +│ │ +│ 6. Future connections: node ID is sufficient for auth │ +│ No tokens, no passwords - just cryptographic identity │ +└──────────────────────────────────────────────────────────────────┘ +``` + +### Connector Daemon Implementation + +```rust +// pattern-connector binary +use iroh_net::{Endpoint, NodeAddr, SecretKey}; +use std::path::PathBuf; + +const PATTERN_ALPN: &[u8] = b"pattern-connector/1"; + +#[tokio::main] +async fn main() -> Result<()> { + let config = ConnectorConfig::load()?; + + // Load or generate persistent identity + let secret_key = match std::fs::read(&config.key_path) { + Ok(bytes) => SecretKey::try_from_bytes(&bytes)?, + Err(_) => { + let key = SecretKey::generate(); + std::fs::write(&config.key_path, key.to_bytes())?; + key + } + }; + + // Create iroh endpoint + let endpoint = Endpoint::builder() + .secret_key(secret_key) + .discovery_n0() // NAT traversal via n0 infrastructure + .bind() + .await?; + + println!("Connector node ID: {}", endpoint.node_id()); + + // Connect to Pattern server + let server_addr: NodeAddr = config.server_addr.parse()?; + let conn = endpoint.connect(server_addr, PATTERN_ALPN).await?; + + println!("Connected to Pattern server"); + + // Set up file watcher for push notifications + let (watch_tx, watch_rx) = tokio::sync::mpsc::channel(100); + if let Some(watch_paths) = &config.watch_paths { + spawn_file_watcher(watch_paths.clone(), watch_tx); + } + + // Handle incoming requests and outgoing notifications + tokio::select! { + r = handle_requests(&conn, &config) => r?, + r = push_file_changes(&conn, watch_rx) => r?, + } + + Ok(()) +} + +async fn handle_requests( + conn: &iroh_net::Connection, + config: &ConnectorConfig +) -> Result<()> { + loop { + let (mut send, mut recv) = conn.accept_bi().await?; + let config = config.clone(); + + tokio::spawn(async move { + let request: ConnectorRequest = read_json(&mut recv).await?; + + // Check against local policy + if !config.policy.allows(&request) { + let response = ConnectorResponse::Error { + code: "POLICY_DENIED".into(), + message: format!("Local policy denies: {:?}", request), + }; + write_json(&mut send, &response).await?; + return Ok(()); + } + + // Handle request + let response = match request { + ConnectorRequest::ReadFile { path } => { + match tokio::fs::read_to_string(&path).await { + Ok(content) => ConnectorResponse::FileContent(content), + Err(e) => ConnectorResponse::Error { + code: "READ_ERROR".into(), + message: e.to_string(), + }, + } + } + ConnectorRequest::WriteFile { path, content } => { + match tokio::fs::write(&path, &content).await { + Ok(()) => ConnectorResponse::Ok, + Err(e) => ConnectorResponse::Error { + code: "WRITE_ERROR".into(), + message: e.to_string(), + }, + } + } + ConnectorRequest::Exec { command, args, cwd } => { + let mut cmd = tokio::process::Command::new(&command); + cmd.args(&args); + if let Some(cwd) = cwd { + cmd.current_dir(cwd); + } + + match cmd.output().await { + Ok(output) => ConnectorResponse::ExecResult(ExecResult { + exit_code: output.status.code().unwrap_or(-1), + stdout: String::from_utf8_lossy(&output.stdout).into(), + stderr: String::from_utf8_lossy(&output.stderr).into(), + }), + Err(e) => ConnectorResponse::Error { + code: "EXEC_ERROR".into(), + message: e.to_string(), + }, + } + } + ConnectorRequest::ListDir { path } => { + // ... similar pattern + } + ConnectorRequest::OpenLsp { language } => { + // Spawn LSP server, return stream ID for bidirectional comms + } + // ... other requests + }; + + write_json(&mut send, &response).await?; + Ok::<_, anyhow::Error>(()) + }); + } +} +``` + +### Local Policy Configuration + +Partner controls what the connector allows: + +```toml +# ~/.config/pattern-connector/config.toml + +[server] +address = "pattern.example.com" +node_id = "abc123..." # Server's iroh node ID + +[policy] +# Allowed paths for file operations (glob patterns) +allowed_paths = [ + "~/Projects/**", + "~/Documents/pattern/**", +] + +# Explicitly denied paths +denied_paths = [ + "~/.ssh/**", + "~/.gnupg/**", + "**/.env", + "**/secrets/**", +] + +# Allowed commands +allowed_commands = [ + "cargo", + "npm", + "git", + "rg", + "fd", + "cat", + "ls", +] + +# Denied commands (takes precedence) +denied_commands = [ + "rm", + "sudo", + "chmod", +] + +# Require confirmation for these operations +confirm_operations = [ + "write_file", + "exec", +] + +[watch] +# Paths to watch for changes (push to server) +paths = [ + "~/Projects/current/**/*.rs", + "~/Projects/current/**/*.toml", +] +``` + +### File Change Notifications + +Connector can push file change events to the server: + +```rust +#[derive(Serialize, Deserialize)] +pub enum ConnectorNotification { + FileChanged { + path: PathBuf, + change_type: FileChangeType, + }, + FileCreated { + path: PathBuf, + }, + FileDeleted { + path: PathBuf, + }, + ConnectorStatus { + status: ConnectorStatus, + }, +} + +#[derive(Serialize, Deserialize)] +pub enum FileChangeType { + Modified, + Renamed { from: PathBuf }, +} + +async fn push_file_changes( + conn: &iroh_net::Connection, + mut watch_rx: mpsc::Receiver, +) -> Result<()> { + while let Some(event) = watch_rx.recv().await { + let notification = match event.kind { + notify::EventKind::Modify(_) => { + ConnectorNotification::FileChanged { + path: event.paths[0].clone(), + change_type: FileChangeType::Modified, + } + } + notify::EventKind::Create(_) => { + ConnectorNotification::FileCreated { + path: event.paths[0].clone(), + } + } + // ... etc + }; + + // Open unidirectional stream for notification + let mut send = conn.open_uni().await?; + write_json(&mut send, ¬ification).await?; + send.finish().await?; + } + + Ok(()) +} +``` + +### Reconnection Handling + +iroh handles most reconnection automatically, but we add application-level resilience: + +```rust +impl RemoteConnector { + async fn request( + &self, + req: ConnectorRequest + ) -> Result { + // Retry with backoff on connection errors + let mut attempts = 0; + loop { + match self.try_request(&req).await { + Ok(response) => return Ok(response), + Err(e) if e.is_connection_error() && attempts < 3 => { + attempts += 1; + let backoff = Duration::from_millis(100 * 2u64.pow(attempts)); + tokio::time::sleep(backoff).await; + + // iroh may have reconnected automatically + // if not, this will fail again + continue; + } + Err(e) => return Err(e), + } + } + } + + /// Check if connector is currently connected + pub fn is_connected(&self) -> bool { + // iroh connection state + !self.conn.close_reason().is_some() + } + + /// Wait for reconnection (iroh handles this) + pub async fn wait_connected(&self) -> Result<()> { + // iroh will attempt to reconnect automatically + // we just need to wait for it + tokio::time::timeout( + Duration::from_secs(30), + self.conn.closed() + ).await??; + + Ok(()) + } +} +``` + +### Integration with ACP + +When Pattern runs as an ACP agent locally, it uses `LocalConnector`. When it runs on a server but the editor is local, we can tunnel ACP over iroh: + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ Partner's Machine │ +│ │ +│ ┌─────────────┐ ┌─────────────────────────────────┐ │ +│ │ Zed │◄──ACP──►│ pattern-connector │ │ +│ │ (Editor) │ stdio │ - proxies ACP over iroh │ │ +│ └─────────────┘ │ - handles file/env requests │ │ +│ └──────────────┬──────────────────┘ │ +└─────────────────────────────────────────┼───────────────────────┘ + │ iroh +┌─────────────────────────────────────────▼───────────────────────┐ +│ Pattern Server │ +│ ┌────────────────────────────────────────────────────────┐ │ +│ │ Pattern Agent │ │ +│ │ - receives ACP messages via iroh │ │ +│ │ - uses RemoteConnector for file access │ │ +│ │ - full constellation persistence │ │ +│ └────────────────────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────────────────────┘ +``` + +The connector daemon can spawn a local ACP shim that the editor connects to: + +```rust +// In pattern-connector, optional ACP proxy mode +async fn run_acp_proxy( + conn: &iroh_net::Connection, +) -> Result<()> { + // Spawn local process that speaks ACP over stdio + // Forward messages to Pattern server over iroh + // Return responses back to editor + + let mut child = tokio::process::Command::new("pattern-connector") + .arg("acp-shim") + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .spawn()?; + + // Bridge stdio <-> iroh + let stdin = child.stdin.take().unwrap(); + let stdout = child.stdout.take().unwrap(); + + tokio::select! { + // Editor -> Server + r = forward_stdin_to_iroh(stdout, conn) => r?, + // Server -> Editor + r = forward_iroh_to_stdout(conn, stdin) => r?, + } + + Ok(()) +} +``` + +## Open Questions + +1. **LSP multiplexing** - Multiple LSP servers over single iroh connection? Probably separate streams per LSP. + +2. **Offline operation** - Cache file contents on server for when connector disconnects? + - Could cache recently-accessed files + - Mark cached data as potentially stale + - Agent sees "connector offline, using cached data from X minutes ago" + +3. **Multiple connectors** - Partner with multiple machines. Options: + - Route to most-recently-active connector + - Route based on request (file path → which machine has it) + - Allow agent to specify target connector + - Probably: default to most-recent, allow explicit targeting + +4. **Connector discovery** - Use iroh's discovery vs explicit config? + - Explicit config is simpler and more secure + - Discovery could help with "find my connectors" UX + - Leaning toward: explicit config required, discovery as optional convenience + +5. **iroh-rpc schema** - Define connector RPC interface formally? + - Could generate from protobuf/schema + - Or just use serde JSON over iroh-rpc + - Leaning toward: start with serde, formalize later if needed diff --git a/nix/modules/devshell.nix b/nix/modules/devshell.nix index 6057bfb..be2777d 100644 --- a/nix/modules/devshell.nix +++ b/nix/modules/devshell.nix @@ -24,6 +24,7 @@ config.pre-commit.devShell # See ./nix/modules/pre-commit.nix ]; + DATABASE_URL = "sqlite:crates/pattern_db/dev.db"; packages = with pkgsWithUnfree; [ just nixd # Nix language server @@ -36,6 +37,7 @@ jujutsu git gh + sqlx-cli ]; }; }; diff --git a/nix/modules/rust.nix b/nix/modules/rust.nix index 582de79..92e5894 100644 --- a/nix/modules/rust.nix +++ b/nix/modules/rust.nix @@ -50,6 +50,19 @@ }; }; }; + "pattern-db" = { + imports = [ globalCrateConfig ]; + autoWire = [ "crate" "clippy" ]; + path = ./../../crates/pattern_db; + crane = { + args = { + buildInputs = + commonBuildInputs + ++ [ + ]; + }; + }; + }; "pattern-nd" = { imports = [ globalCrateConfig ]; @@ -73,7 +86,6 @@ }; }; - "pattern-cli" = { imports = [ globalCrateConfig ]; autoWire = [ "crate" "clippy" ]; @@ -107,9 +119,6 @@ }; }; }; - - - }; }; From 28162cc304c7e67b5240723a998f764174734012 Mon Sep 17 00:00:00 2001 From: Orual Date: Tue, 23 Dec 2025 12:33:55 -0500 Subject: [PATCH 02/40] db module done enough --- crates/pattern_core/src/embeddings/ollama.rs | 2 + crates/pattern_db/src/fts.rs | 557 +++++++++++++++ crates/pattern_db/src/lib.rs | 41 ++ crates/pattern_db/src/models/agent.rs | 98 +++ crates/pattern_db/src/models/event.rs | 114 +++ crates/pattern_db/src/models/folder.rs | 161 +++++ crates/pattern_db/src/models/memory.rs | 80 +++ crates/pattern_db/src/models/message.rs | 11 + crates/pattern_db/src/models/migration.rs | 123 ++++ crates/pattern_db/src/models/mod.rs | 21 +- crates/pattern_db/src/models/source.rs | 143 ++++ crates/pattern_db/src/models/task.rs | 187 +++++ crates/pattern_db/src/queries/event.rs | 323 +++++++++ crates/pattern_db/src/queries/folder.rs | 372 ++++++++++ crates/pattern_db/src/queries/memory.rs | 328 ++++++++- crates/pattern_db/src/queries/message.rs | 14 +- crates/pattern_db/src/queries/mod.rs | 12 + crates/pattern_db/src/queries/source.rs | 281 ++++++++ crates/pattern_db/src/queries/task.rs | 393 +++++++++++ crates/pattern_db/src/search.rs | 699 +++++++++++++++++++ docs/refactoring/v2-database-design.md | 103 +++ docs/refactoring/v2-pattern-db-status.md | 336 +++++++-- 22 files changed, 4339 insertions(+), 60 deletions(-) create mode 100644 crates/pattern_db/src/fts.rs create mode 100644 crates/pattern_db/src/models/event.rs create mode 100644 crates/pattern_db/src/models/folder.rs create mode 100644 crates/pattern_db/src/models/migration.rs create mode 100644 crates/pattern_db/src/models/source.rs create mode 100644 crates/pattern_db/src/models/task.rs create mode 100644 crates/pattern_db/src/queries/event.rs create mode 100644 crates/pattern_db/src/queries/folder.rs create mode 100644 crates/pattern_db/src/queries/source.rs create mode 100644 crates/pattern_db/src/queries/task.rs create mode 100644 crates/pattern_db/src/search.rs diff --git a/crates/pattern_core/src/embeddings/ollama.rs b/crates/pattern_core/src/embeddings/ollama.rs index 183a3e9..3098221 100644 --- a/crates/pattern_core/src/embeddings/ollama.rs +++ b/crates/pattern_core/src/embeddings/ollama.rs @@ -157,6 +157,7 @@ mod tests { } #[tokio::test] + #[ignore = "requires running ollama server with all-minilm model"] async fn test_ollama_embed() { let embedder = OllamaEmbedder::new( "all-minilm".to_string(), @@ -172,6 +173,7 @@ mod tests { #[tokio::test] async fn test_ollama_empty_input() { + // This test doesn't need a server - it fails before making the request let embedder = OllamaEmbedder::new( "all-minilm".to_string(), "http://localhost:11434".to_string(), diff --git a/crates/pattern_db/src/fts.rs b/crates/pattern_db/src/fts.rs new file mode 100644 index 0000000..d642480 --- /dev/null +++ b/crates/pattern_db/src/fts.rs @@ -0,0 +1,557 @@ +//! Full-text search functionality using FTS5. +//! +//! This module provides full-text search over messages, memory blocks, and +//! archival entries. FTS5 is built into SQLite, no extension loading required. +//! +//! Unlike sqlite-vec, FTS5 uses standard SQL syntax that sqlx understands, +//! so we can use compile-time checked queries here. +//! +//! # External Content Tables +//! +//! The FTS tables are configured as "external content" tables, meaning they +//! index data from the main tables but don't store a copy of the content. +//! Triggers keep the FTS indexes in sync with the source tables. +//! +//! # FTS5 Query Syntax +//! +//! - Basic search: `word1 word2` (matches documents containing both) +//! - Phrase search: `"exact phrase"` +//! - OR search: `word1 OR word2` +//! - NOT search: `word1 NOT word2` +//! - Prefix search: `prefix*` +//! - Column filter: `column:word` (not used since our tables are single-column) +//! +//! See: https://www.sqlite.org/fts5.html + +use sqlx::SqlitePool; + +use crate::error::{DbError, DbResult}; + +/// Result of a full-text search. +#[derive(Debug, Clone)] +pub struct FtsSearchResult { + /// Rowid of the matching record in the source table + pub rowid: i64, + /// Relevance rank (lower is better, typically negative) + pub rank: f64, + /// Optional highlighted snippet + pub snippet: Option, +} + +/// FTS match with the original content ID. +#[derive(Debug, Clone)] +pub struct FtsMatch { + /// The content ID from the source table + pub id: String, + /// The matched content + pub content: String, + /// Relevance rank (lower is better) + pub rank: f64, +} + +/// Search messages using full-text search. +/// +/// Returns messages matching the FTS5 query, ordered by relevance. +/// The query uses FTS5 syntax (see module docs). +pub async fn search_messages( + pool: &SqlitePool, + query: &str, + agent_id: Option<&str>, + limit: i64, +) -> DbResult> { + // Note: We use runtime query here because we need to join with the source + // table to get the full content and filter by agent_id. + // + // FTS5's MATCH is supported by sqlx since PR #396 (June 2020), but the + // bm25() ranking function and complex joins are easier with runtime queries. + let results = if let Some(agent_id) = agent_id { + sqlx::query_as::<_, (String, Option, f64)>( + r#" + SELECT m.id, m.content, bm25(messages_fts) as rank + FROM messages_fts + JOIN messages m ON messages_fts.rowid = m.rowid + WHERE messages_fts MATCH ? + AND m.agent_id = ? + ORDER BY rank + LIMIT ? + "#, + ) + .bind(query) + .bind(agent_id) + .bind(limit) + .fetch_all(pool) + .await? + } else { + sqlx::query_as::<_, (String, Option, f64)>( + r#" + SELECT m.id, m.content, bm25(messages_fts) as rank + FROM messages_fts + JOIN messages m ON messages_fts.rowid = m.rowid + WHERE messages_fts MATCH ? + ORDER BY rank + LIMIT ? + "#, + ) + .bind(query) + .bind(limit) + .fetch_all(pool) + .await? + }; + + Ok(results + .into_iter() + .map(|(id, content, rank)| FtsMatch { + id, + content: content.unwrap_or_default(), + rank, + }) + .collect()) +} + +/// Search memory blocks using full-text search. +/// +/// Searches the content_preview field of memory blocks. +pub async fn search_memory_blocks( + pool: &SqlitePool, + query: &str, + agent_id: Option<&str>, + limit: i64, +) -> DbResult> { + let results = if let Some(agent_id) = agent_id { + sqlx::query_as::<_, (String, Option, f64)>( + r#" + SELECT mb.id, mb.content_preview, bm25(memory_blocks_fts) as rank + FROM memory_blocks_fts + JOIN memory_blocks mb ON memory_blocks_fts.rowid = mb.rowid + WHERE memory_blocks_fts MATCH ? + AND mb.agent_id = ? + ORDER BY rank + LIMIT ? + "#, + ) + .bind(query) + .bind(agent_id) + .bind(limit) + .fetch_all(pool) + .await? + } else { + sqlx::query_as::<_, (String, Option, f64)>( + r#" + SELECT mb.id, mb.content_preview, bm25(memory_blocks_fts) as rank + FROM memory_blocks_fts + JOIN memory_blocks mb ON memory_blocks_fts.rowid = mb.rowid + WHERE memory_blocks_fts MATCH ? + ORDER BY rank + LIMIT ? + "#, + ) + .bind(query) + .bind(limit) + .fetch_all(pool) + .await? + }; + + Ok(results + .into_iter() + .map(|(id, content, rank)| FtsMatch { + id, + content: content.unwrap_or_default(), + rank, + }) + .collect()) +} + +/// Search archival entries using full-text search. +pub async fn search_archival( + pool: &SqlitePool, + query: &str, + agent_id: Option<&str>, + limit: i64, +) -> DbResult> { + let results = if let Some(agent_id) = agent_id { + sqlx::query_as::<_, (String, String, f64)>( + r#" + SELECT ae.id, ae.content, bm25(archival_fts) as rank + FROM archival_fts + JOIN archival_entries ae ON archival_fts.rowid = ae.rowid + WHERE archival_fts MATCH ? + AND ae.agent_id = ? + ORDER BY rank + LIMIT ? + "#, + ) + .bind(query) + .bind(agent_id) + .bind(limit) + .fetch_all(pool) + .await? + } else { + sqlx::query_as::<_, (String, String, f64)>( + r#" + SELECT ae.id, ae.content, bm25(archival_fts) as rank + FROM archival_fts + JOIN archival_entries ae ON archival_fts.rowid = ae.rowid + WHERE archival_fts MATCH ? + ORDER BY rank + LIMIT ? + "#, + ) + .bind(query) + .bind(limit) + .fetch_all(pool) + .await? + }; + + Ok(results + .into_iter() + .map(|(id, content, rank)| FtsMatch { id, content, rank }) + .collect()) +} + +/// Search across all content types. +/// +/// Performs separate searches on messages, memory blocks, and archival entries, +/// then merges results by rank. +pub async fn search_all( + pool: &SqlitePool, + query: &str, + agent_id: Option<&str>, + limit: i64, +) -> DbResult> { + // Search each type concurrently + let (messages, blocks, archival) = tokio::try_join!( + search_messages(pool, query, agent_id, limit), + search_memory_blocks(pool, query, agent_id, limit), + search_archival(pool, query, agent_id, limit), + )?; + + // Merge and sort by rank + let mut all: Vec<(FtsMatch, FtsContentType)> = messages + .into_iter() + .map(|m| (m, FtsContentType::Message)) + .chain(blocks.into_iter().map(|m| (m, FtsContentType::MemoryBlock))) + .chain( + archival + .into_iter() + .map(|m| (m, FtsContentType::ArchivalEntry)), + ) + .collect(); + + // Sort by rank (lower is better) + all.sort_by(|a, b| { + a.0.rank + .partial_cmp(&b.0.rank) + .unwrap_or(std::cmp::Ordering::Equal) + }); + + // Truncate to limit + all.truncate(limit as usize); + + Ok(all) +} + +/// Content types for FTS search. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum FtsContentType { + Message, + MemoryBlock, + ArchivalEntry, +} + +impl FtsContentType { + pub fn as_str(&self) -> &'static str { + match self { + FtsContentType::Message => "message", + FtsContentType::MemoryBlock => "memory_block", + FtsContentType::ArchivalEntry => "archival_entry", + } + } +} + +/// Rebuild the FTS index for messages. +/// +/// Use this after bulk imports or if the index gets out of sync. +pub async fn rebuild_messages_fts(pool: &SqlitePool) -> DbResult<()> { + // FTS5 rebuild command + sqlx::query("INSERT INTO messages_fts(messages_fts) VALUES('rebuild')") + .execute(pool) + .await?; + Ok(()) +} + +/// Rebuild the FTS index for memory blocks. +pub async fn rebuild_memory_blocks_fts(pool: &SqlitePool) -> DbResult<()> { + sqlx::query("INSERT INTO memory_blocks_fts(memory_blocks_fts) VALUES('rebuild')") + .execute(pool) + .await?; + Ok(()) +} + +/// Rebuild the FTS index for archival entries. +pub async fn rebuild_archival_fts(pool: &SqlitePool) -> DbResult<()> { + sqlx::query("INSERT INTO archival_fts(archival_fts) VALUES('rebuild')") + .execute(pool) + .await?; + Ok(()) +} + +/// Rebuild all FTS indexes. +pub async fn rebuild_all_fts(pool: &SqlitePool) -> DbResult<()> { + tokio::try_join!( + rebuild_messages_fts(pool), + rebuild_memory_blocks_fts(pool), + rebuild_archival_fts(pool), + )?; + Ok(()) +} + +/// Get FTS index statistics. +#[derive(Debug, Clone, Default)] +pub struct FtsStats { + pub messages_indexed: u64, + pub memory_blocks_indexed: u64, + pub archival_entries_indexed: u64, +} + +/// Get statistics about FTS indexes. +pub async fn get_fts_stats(pool: &SqlitePool) -> DbResult { + // Count indexed rows in each FTS table + let messages: (i64,) = sqlx::query_as("SELECT COUNT(*) FROM messages_fts") + .fetch_one(pool) + .await?; + + let memory_blocks: (i64,) = sqlx::query_as("SELECT COUNT(*) FROM memory_blocks_fts") + .fetch_one(pool) + .await?; + + let archival: (i64,) = sqlx::query_as("SELECT COUNT(*) FROM archival_fts") + .fetch_one(pool) + .await?; + + Ok(FtsStats { + messages_indexed: messages.0 as u64, + memory_blocks_indexed: memory_blocks.0 as u64, + archival_entries_indexed: archival.0 as u64, + }) +} + +/// Validate FTS query syntax. +/// +/// Returns an error if the query contains invalid FTS5 syntax. +pub fn validate_fts_query(query: &str) -> DbResult<()> { + // Basic validation - FTS5 will give better errors at runtime, + // but we can catch obvious issues early. + + // Empty queries are invalid + if query.trim().is_empty() { + return Err(DbError::invalid_data("FTS query cannot be empty")); + } + + // Unbalanced quotes + let quote_count = query.chars().filter(|c| *c == '"').count(); + if quote_count % 2 != 0 { + return Err(DbError::invalid_data("Unbalanced quotes in FTS query")); + } + + // Unbalanced parentheses + let open_parens = query.chars().filter(|c| *c == '(').count(); + let close_parens = query.chars().filter(|c| *c == ')').count(); + if open_parens != close_parens { + return Err(DbError::invalid_data("Unbalanced parentheses in FTS query")); + } + + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::ConstellationDb; + + /// Helper to create a test agent for foreign key constraints. + async fn create_test_agent(pool: &SqlitePool, id: &str) { + sqlx::query( + r#" + INSERT INTO agents (id, name, model_provider, model_name, system_prompt, config, enabled_tools, status, created_at, updated_at) + VALUES (?, ?, 'anthropic', 'claude-3', 'test prompt', '{}', '[]', 'active', datetime('now'), datetime('now')) + "#, + ) + .bind(id) + .bind(format!("{}_name", id)) + .execute(pool) + .await + .unwrap(); + } + + #[test] + fn test_validate_fts_query() { + // Valid queries + assert!(validate_fts_query("hello world").is_ok()); + assert!(validate_fts_query("\"exact phrase\"").is_ok()); + assert!(validate_fts_query("hello OR world").is_ok()); + assert!(validate_fts_query("prefix*").is_ok()); + assert!(validate_fts_query("(hello OR world) AND foo").is_ok()); + + // Invalid queries + assert!(validate_fts_query("").is_err()); + assert!(validate_fts_query(" ").is_err()); + assert!(validate_fts_query("\"unbalanced").is_err()); + assert!(validate_fts_query("(unbalanced").is_err()); + } + + #[test] + fn test_fts_content_type() { + assert_eq!(FtsContentType::Message.as_str(), "message"); + assert_eq!(FtsContentType::MemoryBlock.as_str(), "memory_block"); + assert_eq!(FtsContentType::ArchivalEntry.as_str(), "archival_entry"); + } + + #[tokio::test] + async fn test_fts_tables_exist() { + let db = ConstellationDb::open_in_memory().await.unwrap(); + + // FTS tables should be created by migration + let stats = get_fts_stats(db.pool()).await.unwrap(); + assert_eq!(stats.messages_indexed, 0); + assert_eq!(stats.memory_blocks_indexed, 0); + assert_eq!(stats.archival_entries_indexed, 0); + } + + #[tokio::test] + async fn test_fts_message_search() { + let db = ConstellationDb::open_in_memory().await.unwrap(); + + // Create agent first (foreign key constraint) + create_test_agent(db.pool(), "agent_1").await; + + // Insert test messages + sqlx::query( + r#" + INSERT INTO messages (id, agent_id, position, role, content, is_archived, created_at) + VALUES ('msg_1', 'agent_1', '1', 'user', 'hello world this is a test message', false, datetime('now')) + "#, + ) + .execute(db.pool()) + .await + .unwrap(); + + sqlx::query( + r#" + INSERT INTO messages (id, agent_id, position, role, content, is_archived, created_at) + VALUES ('msg_2', 'agent_1', '2', 'assistant', 'goodbye cruel world', false, datetime('now')) + "#, + ) + .execute(db.pool()) + .await + .unwrap(); + + // Search for "hello" - should find msg_1 + let results = search_messages(db.pool(), "hello", None, 10).await.unwrap(); + assert_eq!(results.len(), 1); + assert_eq!(results[0].id, "msg_1"); + assert!(results[0].content.contains("hello")); + + // Search for "world" - should find both + let results = search_messages(db.pool(), "world", None, 10).await.unwrap(); + assert_eq!(results.len(), 2); + + // Search with agent filter + let results = search_messages(db.pool(), "world", Some("agent_1"), 10) + .await + .unwrap(); + assert_eq!(results.len(), 2); + + let results = search_messages(db.pool(), "world", Some("agent_other"), 10) + .await + .unwrap(); + assert_eq!(results.len(), 0); + } + + #[tokio::test] + async fn test_fts_rebuild() { + let db = ConstellationDb::open_in_memory().await.unwrap(); + + // Create agent first + create_test_agent(db.pool(), "agent_1").await; + + // Insert a message + sqlx::query( + r#" + INSERT INTO messages (id, agent_id, position, role, content, is_archived, created_at) + VALUES ('msg_rebuild', 'agent_1', '1', 'user', 'rebuild test message', false, datetime('now')) + "#, + ) + .execute(db.pool()) + .await + .unwrap(); + + // Rebuild should not error + rebuild_messages_fts(db.pool()).await.unwrap(); + + // Should still be searchable + let results = search_messages(db.pool(), "rebuild", None, 10) + .await + .unwrap(); + assert_eq!(results.len(), 1); + } + + #[tokio::test] + async fn test_fts_phrase_search() { + let db = ConstellationDb::open_in_memory().await.unwrap(); + + // Create agent first + create_test_agent(db.pool(), "agent_1").await; + + sqlx::query( + r#" + INSERT INTO messages (id, agent_id, position, role, content, is_archived, created_at) + VALUES ('msg_phrase', 'agent_1', '1', 'user', 'the quick brown fox jumps over the lazy dog', false, datetime('now')) + "#, + ) + .execute(db.pool()) + .await + .unwrap(); + + // Exact phrase search + let results = search_messages(db.pool(), "\"quick brown fox\"", None, 10) + .await + .unwrap(); + assert_eq!(results.len(), 1); + + // Non-matching phrase + let results = search_messages(db.pool(), "\"brown quick fox\"", None, 10) + .await + .unwrap(); + assert_eq!(results.len(), 0); + } + + #[tokio::test] + async fn test_fts_prefix_search() { + let db = ConstellationDb::open_in_memory().await.unwrap(); + + // Create agent first + create_test_agent(db.pool(), "agent_1").await; + + sqlx::query( + r#" + INSERT INTO messages (id, agent_id, position, role, content, is_archived, created_at) + VALUES ('msg_prefix', 'agent_1', '1', 'user', 'programming is fun', false, datetime('now')) + "#, + ) + .execute(db.pool()) + .await + .unwrap(); + + // Prefix search + let results = search_messages(db.pool(), "prog*", None, 10).await.unwrap(); + assert_eq!(results.len(), 1); + + let results = search_messages(db.pool(), "program*", None, 10) + .await + .unwrap(); + assert_eq!(results.len(), 1); + + let results = search_messages(db.pool(), "xyz*", None, 10).await.unwrap(); + assert_eq!(results.len(), 0); + } +} diff --git a/crates/pattern_db/src/lib.rs b/crates/pattern_db/src/lib.rs index 4e44a4e..b5c50df 100644 --- a/crates/pattern_db/src/lib.rs +++ b/crates/pattern_db/src/lib.rs @@ -19,8 +19,10 @@ pub mod connection; pub mod error; +pub mod fts; pub mod models; pub mod queries; +pub mod search; pub mod vector; pub use connection::ConstellationDb; @@ -32,6 +34,15 @@ pub use vector::{ verify_sqlite_vec, }; +// Re-export FTS module types +pub use fts::{FtsContentType, FtsMatch, FtsSearchResult, FtsStats}; + +// Re-export hybrid search types +pub use search::{ + ContentFilter, FusionMethod, HybridSearchBuilder, ScoreBreakdown, SearchContentType, + SearchMode, SearchResult, +}; + // Re-export key model types for convenience pub use models::{ // Coordination models @@ -39,6 +50,8 @@ pub use models::{ ActivityEventType, // Agent models Agent, + // Source models + AgentDataSource, AgentGroup, AgentStatus, AgentSummary, @@ -49,10 +62,24 @@ pub use models::{ ConstellationSummary, CoordinationState, CoordinationTask, + DataSource, + // Migration models + EntityImport, + // Event models + Event, EventImportance, + EventOccurrence, + // Folder models + FilePassage, + Folder, + FolderAccess, + FolderAttachment, + FolderFile, + FolderPathType, GroupMember, GroupMemberRole, HandoffNote, + IssueSeverity, MemoryBlock, MemoryBlockCheckpoint, MemoryBlockType, @@ -62,9 +89,23 @@ pub use models::{ Message, MessageRole, MessageSummary, + MigrationAudit, + MigrationIssue, + MigrationLog, + MigrationStats, + ModelRoutingConfig, + ModelRoutingRule, NotableEvent, + OccurrenceStatus, PatternType, + RoutingCondition, SharedBlockAttachment, + SourceType, + // Task models (ADHD) + Task, TaskPriority, TaskStatus, + TaskSummary, + UserTaskPriority, + UserTaskStatus, }; diff --git a/crates/pattern_db/src/models/agent.rs b/crates/pattern_db/src/models/agent.rs index 5a30494..c96439f 100644 --- a/crates/pattern_db/src/models/agent.rs +++ b/crates/pattern_db/src/models/agent.rs @@ -5,6 +5,104 @@ use serde::{Deserialize, Serialize}; use sqlx::FromRow; use sqlx::types::Json; +// ============================================================================ +// Model Routing Configuration +// ============================================================================ + +/// Configuration for dynamic model routing. +/// +/// Allows agents to switch between models from the same provider +/// based on rules (cost, latency, capability requirements). +/// Stored as JSON in the agent's config field. +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct ModelRoutingConfig { + /// Fallback models to try if primary fails (in order of preference) + #[serde(default)] + pub fallback_models: Vec, + + /// Rules for dynamic model selection + #[serde(default)] + pub rules: Vec, + + /// Whether to allow automatic fallback on rate limits + #[serde(default = "default_true")] + pub fallback_on_rate_limit: bool, + + /// Whether to allow automatic fallback on context length exceeded + #[serde(default = "default_true")] + pub fallback_on_context_overflow: bool, + + /// Maximum retries before giving up + #[serde(default = "default_max_retries")] + pub max_retries: u32, +} + +fn default_true() -> bool { + true +} + +fn default_max_retries() -> u32 { + 2 +} + +/// A rule for selecting which model to use. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ModelRoutingRule { + /// Condition that triggers this rule + pub condition: RoutingCondition, + + /// Model to use when condition matches + pub model: String, + + /// Optional: override other settings when this rule matches + pub temperature_override: Option, + pub max_tokens_override: Option, +} + +/// Conditions for model routing rules. +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum RoutingCondition { + /// Use this model when estimated cost exceeds threshold + CostThreshold { + /// Maximum cost in USD before switching + max_usd: f32, + }, + + /// Use this model when context length exceeds threshold + ContextLength { + /// Minimum tokens to trigger this rule + min_tokens: u32, + }, + + /// Use this model for specific tool calls + ToolCall { + /// Tool names that trigger this rule + tools: Vec, + }, + + /// Use this model during specific time windows (e.g., off-peak for expensive models) + TimeWindow { + /// Start hour (0-23, UTC) + start_hour: u8, + /// End hour (0-23, UTC) + end_hour: u8, + }, + + /// Use this model for specific source types + Source { + /// Source types that trigger this rule + sources: Vec, + }, + + /// Always use this model (useful as a catch-all) + Always, +} + +// ============================================================================ +// Agent Models +// ============================================================================ + /// An agent in the constellation. #[derive(Debug, Clone, FromRow, Serialize, Deserialize)] pub struct Agent { diff --git a/crates/pattern_db/src/models/event.rs b/crates/pattern_db/src/models/event.rs new file mode 100644 index 0000000..b628f40 --- /dev/null +++ b/crates/pattern_db/src/models/event.rs @@ -0,0 +1,114 @@ +//! Event and reminder models. +//! +//! Calendar events with optional recurrence and reminder support. +//! Used for time-based triggers and ADHD-friendly scheduling. + +use chrono::{DateTime, Utc}; +use serde::{Deserialize, Serialize}; +use sqlx::FromRow; + +/// A calendar event or reminder. +/// +/// Events can be one-time or recurring, and can trigger agent actions +/// via the Timer data source. +#[derive(Debug, Clone, FromRow, Serialize, Deserialize)] +pub struct Event { + /// Unique identifier + pub id: String, + + /// Agent associated with this event (None = constellation-level) + pub agent_id: Option, + + /// Event title + pub title: String, + + /// Event description + pub description: Option, + + /// When the event starts + pub starts_at: DateTime, + + /// When the event ends (None = point-in-time event) + pub ends_at: Option>, + + /// Recurrence rule in iCal RRULE format + /// Examples: + /// - "FREQ=DAILY" (every day) + /// - "FREQ=WEEKLY;BYDAY=MO,WE,FR" (Mon/Wed/Fri) + /// - "FREQ=MONTHLY;BYMONTHDAY=1" (1st of each month) + pub rrule: Option, + + /// Minutes before event to trigger reminder (None = no reminder) + pub reminder_minutes: Option, + + /// Whether the event is all-day (vs specific time) + pub all_day: bool, + + /// Event location (physical or virtual) + pub location: Option, + + /// External calendar source ID (for sync) + pub external_id: Option, + + /// External calendar source type (google, ical, etc.) + pub external_source: Option, + + /// Creation timestamp + pub created_at: DateTime, + + /// Last update timestamp + pub updated_at: DateTime, +} + +/// Event occurrence for recurring events. +/// +/// When a recurring event fires, we may want to track individual occurrences +/// (e.g., for marking attendance, snoozing, or noting outcomes). +#[derive(Debug, Clone, FromRow, Serialize, Deserialize)] +pub struct EventOccurrence { + /// Unique identifier + pub id: String, + + /// Parent event + pub event_id: String, + + /// When this occurrence starts + pub starts_at: DateTime, + + /// When this occurrence ends + pub ends_at: Option>, + + /// Status of this occurrence + pub status: OccurrenceStatus, + + /// Notes for this specific occurrence + pub notes: Option, + + /// Creation timestamp + pub created_at: DateTime, +} + +/// Status of an event occurrence. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, sqlx::Type)] +#[sqlx(type_name = "TEXT", rename_all = "snake_case")] +#[serde(rename_all = "snake_case")] +pub enum OccurrenceStatus { + /// Upcoming, not yet happened + Scheduled, + /// Currently happening + Active, + /// Completed as planned + Completed, + /// Skipped this occurrence + Skipped, + /// Reminder was snoozed + Snoozed, + /// Cancelled this occurrence (but not the series) + Cancelled, +} + +impl Default for OccurrenceStatus { + fn default() -> Self { + Self::Scheduled + } +} diff --git a/crates/pattern_db/src/models/folder.rs b/crates/pattern_db/src/models/folder.rs new file mode 100644 index 0000000..769e758 --- /dev/null +++ b/crates/pattern_db/src/models/folder.rs @@ -0,0 +1,161 @@ +//! Folder and file models. +//! +//! Manages file access for agents with semantic search over file contents. +//! Files are chunked into passages for embedding and retrieval. + +use chrono::{DateTime, Utc}; +use serde::{Deserialize, Serialize}; +use sqlx::FromRow; + +/// A folder containing files accessible to agents. +/// +/// Folders can be: +/// - Local filesystem paths +/// - Virtual (content stored in DB) +/// - Remote (URLs, cloud storage) +#[derive(Debug, Clone, FromRow, Serialize, Deserialize)] +pub struct Folder { + /// Unique identifier + pub id: String, + + /// Human-readable name (unique within constellation) + pub name: String, + + /// Description of folder contents/purpose + pub description: Option, + + /// Type of folder path + pub path_type: FolderPathType, + + /// Actual path or URL (interpretation depends on path_type) + pub path_value: Option, + + /// Embedding model used for this folder's passages + pub embedding_model: String, + + /// Creation timestamp + pub created_at: DateTime, +} + +/// Folder path types. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, sqlx::Type)] +#[sqlx(type_name = "TEXT", rename_all = "snake_case")] +#[serde(rename_all = "snake_case")] +pub enum FolderPathType { + /// Local filesystem path + Local, + /// Content stored in database (no external path) + Virtual, + /// Remote URL or cloud storage path + Remote, +} + +impl std::fmt::Display for FolderPathType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Local => write!(f, "local"), + Self::Virtual => write!(f, "virtual"), + Self::Remote => write!(f, "remote"), + } + } +} + +/// A file within a folder. +#[derive(Debug, Clone, FromRow, Serialize, Deserialize)] +pub struct FolderFile { + /// Unique identifier + pub id: String, + + /// Parent folder + pub folder_id: String, + + /// Filename (unique within folder) + pub name: String, + + /// MIME type + pub content_type: Option, + + /// File size in bytes + pub size_bytes: Option, + + /// File content (for virtual folders) + pub content: Option>, + + /// When the file was uploaded/detected + pub uploaded_at: DateTime, + + /// When the file was last indexed (passages generated) + pub indexed_at: Option>, +} + +/// A passage (chunk) of a file for semantic search. +/// +/// Files are split into passages for embedding. Passages are the unit +/// of retrieval - when an agent searches, they get relevant passages. +#[derive(Debug, Clone, FromRow, Serialize, Deserialize)] +pub struct FilePassage { + /// Unique identifier + pub id: String, + + /// Parent file + pub file_id: String, + + /// Passage content (text chunk) + pub content: String, + + /// Starting line in source file (for code files) + pub start_line: Option, + + /// Ending line in source file + pub end_line: Option, + + /// Chunk index within file (0-based) + pub chunk_index: i64, + + /// Creation timestamp + pub created_at: DateTime, +} + +/// Attachment linking a folder to an agent. +/// +/// Determines what access level an agent has to a folder's files. +#[derive(Debug, Clone, FromRow, Serialize, Deserialize)] +pub struct FolderAttachment { + /// Folder being attached + pub folder_id: String, + + /// Agent gaining access + pub agent_id: String, + + /// Access level + pub access: FolderAccess, + + /// When the attachment was created + pub attached_at: DateTime, +} + +/// Folder access levels. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, sqlx::Type)] +#[sqlx(type_name = "TEXT", rename_all = "snake_case")] +#[serde(rename_all = "snake_case")] +pub enum FolderAccess { + /// Can read files but not modify + Read, + /// Can read and write files + ReadWrite, +} + +impl Default for FolderAccess { + fn default() -> Self { + Self::Read + } +} + +impl std::fmt::Display for FolderAccess { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Read => write!(f, "read"), + Self::ReadWrite => write!(f, "read_write"), + } + } +} diff --git a/crates/pattern_db/src/models/memory.rs b/crates/pattern_db/src/models/memory.rs index ff37172..09452e7 100644 --- a/crates/pattern_db/src/models/memory.rs +++ b/crates/pattern_db/src/models/memory.rs @@ -50,6 +50,12 @@ pub struct MemoryBlock { /// Whether this block is active (false = soft deleted) pub is_active: bool, + /// Loro frontier for version tracking (serialized) + pub frontier: Option>, + + /// Last assigned sequence number for updates + pub last_seq: i64, + /// Creation timestamp pub created_at: DateTime, @@ -228,6 +234,9 @@ pub struct MemoryBlockCheckpoint { /// How many updates were consolidated into this checkpoint pub updates_consolidated: i64, + + /// Loro frontier at this checkpoint (for version tracking) + pub frontier: Option>, } /// An archival memory entry. @@ -273,3 +282,74 @@ pub struct SharedBlockAttachment { /// When the attachment was created pub attached_at: DateTime, } + +/// An incremental update to a memory block. +/// +/// Updates are Loro deltas stored between checkpoints. On read, the checkpoint +/// is loaded and updates are applied in seq order to reconstruct current state. +#[derive(Debug, Clone, FromRow, Serialize, Deserialize)] +pub struct MemoryBlockUpdate { + /// Auto-incrementing ID + pub id: i64, + + /// Block this update belongs to + pub block_id: String, + + /// Sequence number within the block (monotonically increasing) + pub seq: i64, + + /// Loro update blob (delta) + pub update_blob: Vec, + + /// Size of update_blob in bytes (for consolidation decisions) + pub byte_size: i64, + + /// Source of this update + pub source: Option, + + /// When this update was created + pub created_at: DateTime, +} + +/// Update source types. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum UpdateSource { + /// Update from agent action + Agent, + /// Update from sync with another instance + Sync, + /// Update from v1->v2 migration + Migration, + /// Manual update (user/admin) + Manual, +} + +impl UpdateSource { + pub fn as_str(&self) -> &'static str { + match self { + Self::Agent => "agent", + Self::Sync => "sync", + Self::Migration => "migration", + Self::Manual => "manual", + } + } +} + +impl std::fmt::Display for UpdateSource { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.as_str()) + } +} + +/// Statistics about pending updates for a block. +/// +/// Used for consolidation decisions (e.g., consolidate when count > N or bytes > M). +#[derive(Debug, Clone, Default)] +pub struct UpdateStats { + /// Number of pending updates + pub count: i64, + /// Total bytes of all pending updates + pub total_bytes: i64, + /// Highest seq number (or 0 if no updates) + pub max_seq: i64, +} diff --git a/crates/pattern_db/src/models/message.rs b/crates/pattern_db/src/models/message.rs index ab7bdb0..1ff2e35 100644 --- a/crates/pattern_db/src/models/message.rs +++ b/crates/pattern_db/src/models/message.rs @@ -94,6 +94,9 @@ impl std::fmt::Display for MessageRole { /// When conversation history grows too long, older messages are compressed /// into summaries. The original messages are marked as archived but retained /// for search and history purposes. +/// +/// Summaries can be chained: when multiple summaries accumulate, they can be +/// summarized again into a higher-level summary (summary of summaries). #[derive(Debug, Clone, FromRow, Serialize, Deserialize)] pub struct ArchiveSummary { /// Unique identifier @@ -114,6 +117,14 @@ pub struct ArchiveSummary { /// Number of messages summarized pub message_count: i64, + /// Previous summary this one extends (for chaining) + /// When summarizing summaries, this links to the prior summary + /// that was incorporated into this one. + pub previous_summary_id: Option, + + /// Depth of summary chain (0 = direct message summary, 1+ = summary of summaries) + pub depth: i64, + /// Creation timestamp pub created_at: DateTime, } diff --git a/crates/pattern_db/src/models/migration.rs b/crates/pattern_db/src/models/migration.rs new file mode 100644 index 0000000..6f901ed --- /dev/null +++ b/crates/pattern_db/src/models/migration.rs @@ -0,0 +1,123 @@ +//! Migration audit models. +//! +//! Tracks v1 → v2 migration decisions and issues for debugging and rollback. + +use chrono::{DateTime, Utc}; +use serde::{Deserialize, Serialize}; +use sqlx::FromRow; +use sqlx::types::Json; + +/// Record of a v1 to v2 migration operation. +/// +/// Each CAR file import creates an audit record tracking what was imported, +/// any issues found, and how they were resolved. +#[derive(Debug, Clone, FromRow, Serialize, Deserialize)] +pub struct MigrationAudit { + /// Unique identifier + pub id: String, + + /// When the import occurred + pub imported_at: DateTime, + + /// Source CAR file path + pub source_file: String, + + /// Source format version + pub source_version: i64, + + /// Number of issues detected during import + pub issues_found: i64, + + /// Number of issues that were automatically resolved + pub issues_resolved: i64, + + /// Full audit log as JSON + /// Contains detailed record of: + /// - Entities imported + /// - Transformations applied + /// - Issues and resolutions + /// - Skipped items with reasons + pub audit_log: Json, +} + +/// Detailed migration log structure. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct MigrationLog { + /// Summary statistics + pub stats: MigrationStats, + + /// Individual entity import records + pub entities: Vec, + + /// Issues encountered during import + pub issues: Vec, +} + +/// Migration statistics. +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct MigrationStats { + /// Number of agents imported + pub agents: i64, + /// Number of memory blocks imported + pub memory_blocks: i64, + /// Number of messages imported + pub messages: i64, + /// Number of archival entries imported + pub archival_entries: i64, + /// Number of entities skipped + pub skipped: i64, + /// Total duration in milliseconds + pub duration_ms: i64, +} + +/// Record of a single entity import. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct EntityImport { + /// Entity type (agent, memory_block, message, etc.) + pub entity_type: String, + /// Original v1 ID + pub source_id: String, + /// New v2 ID (may be same or different) + pub target_id: String, + /// Whether any transformation was applied + pub transformed: bool, + /// Description of transformation if applied + pub transformation: Option, +} + +/// An issue encountered during migration. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct MigrationIssue { + /// Issue severity + pub severity: IssueSeverity, + /// Entity type involved + pub entity_type: Option, + /// Entity ID involved + pub entity_id: Option, + /// Description of the issue + pub description: String, + /// How it was resolved (if at all) + pub resolution: Option, + /// Whether the issue was automatically resolved + pub auto_resolved: bool, +} + +/// Migration issue severity levels. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum IssueSeverity { + /// Informational, no action needed + Info, + /// Warning, migration continued but may need review + Warning, + /// Error, entity was skipped or partially imported + Error, + /// Critical, migration may be incomplete + Critical, +} + +impl Default for IssueSeverity { + fn default() -> Self { + Self::Info + } +} diff --git a/crates/pattern_db/src/models/mod.rs b/crates/pattern_db/src/models/mod.rs index ca5600e..0ffba38 100644 --- a/crates/pattern_db/src/models/mod.rs +++ b/crates/pattern_db/src/models/mod.rs @@ -4,16 +4,31 @@ mod agent; mod coordination; +mod event; +mod folder; mod memory; mod message; +mod migration; +mod source; +mod task; -pub use agent::{Agent, AgentGroup, AgentStatus, GroupMember, GroupMemberRole, PatternType}; +pub use agent::{ + Agent, AgentGroup, AgentStatus, GroupMember, GroupMemberRole, ModelRoutingConfig, + ModelRoutingRule, PatternType, RoutingCondition, +}; pub use coordination::{ ActivityEvent, ActivityEventType, AgentSummary, ConstellationSummary, CoordinationState, CoordinationTask, EventImportance, HandoffNote, NotableEvent, TaskPriority, TaskStatus, }; +pub use event::{Event, EventOccurrence, OccurrenceStatus}; +pub use folder::{FilePassage, Folder, FolderAccess, FolderAttachment, FolderFile, FolderPathType}; pub use memory::{ - ArchivalEntry, MemoryBlock, MemoryBlockCheckpoint, MemoryBlockType, MemoryGate, MemoryOp, - MemoryPermission, SharedBlockAttachment, + ArchivalEntry, MemoryBlock, MemoryBlockCheckpoint, MemoryBlockType, MemoryBlockUpdate, + MemoryGate, MemoryOp, MemoryPermission, SharedBlockAttachment, UpdateSource, UpdateStats, }; pub use message::{ArchiveSummary, Message, MessageRole, MessageSummary}; +pub use migration::{ + EntityImport, IssueSeverity, MigrationAudit, MigrationIssue, MigrationLog, MigrationStats, +}; +pub use source::{AgentDataSource, DataSource, SourceType}; +pub use task::{Task, TaskSummary, UserTaskPriority, UserTaskStatus}; diff --git a/crates/pattern_db/src/models/source.rs b/crates/pattern_db/src/models/source.rs new file mode 100644 index 0000000..ad2b4ed --- /dev/null +++ b/crates/pattern_db/src/models/source.rs @@ -0,0 +1,143 @@ +//! Data source models. +//! +//! Data sources represent external integrations that feed content into the constellation: +//! - File watchers +//! - Discord channels +//! - Bluesky feeds +//! - RSS feeds +//! - etc. + +use chrono::{DateTime, Utc}; +use serde::{Deserialize, Serialize}; +use sqlx::FromRow; +use sqlx::types::Json; + +/// A configured data source. +/// +/// Data sources can push content into the constellation, which gets +/// routed to subscribed agents based on notification templates. +#[derive(Debug, Clone, FromRow, Serialize, Deserialize)] +pub struct DataSource { + /// Unique identifier + pub id: String, + + /// Human-readable name (unique within constellation) + pub name: String, + + /// Type of data source + pub source_type: SourceType, + + /// Source-specific configuration as JSON + /// Contents vary by source_type: + /// - file: { path, patterns, recursive } + /// - discord: { guild_id, channel_ids, event_types } + /// - bluesky: { dids, lists, feeds } + /// - rss: { urls, poll_interval } + pub config: Json, + + /// When the source was last synced + pub last_sync_at: Option>, + + /// Source-specific position marker for incremental sync + /// - file: last modified timestamp or inode + /// - discord: last message snowflake + /// - bluesky: cursor from firehose + /// - rss: ETag or last-modified header + pub sync_cursor: Option, + + /// Whether the source is enabled + pub enabled: bool, + + /// Creation timestamp + pub created_at: DateTime, + + /// Last update timestamp + pub updated_at: DateTime, +} + +/// Types of data sources. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, sqlx::Type)] +#[sqlx(type_name = "TEXT", rename_all = "snake_case")] +#[serde(rename_all = "snake_case")] +pub enum SourceType { + // ===== File & Code ===== + /// Local or remote file watching + File, + /// Version control events (Git, Jujutsu, Mercurial, etc.) + Vcs, + /// Code hosting platforms (GitHub, GitLab, Forgejo, etc.) + CodeHost, + /// Language Server Protocol events (diagnostics, completions) + LanguageServer, + /// Terminal/shell output capture + Terminal, + + // ===== Communication ===== + /// Group chat platforms (Discord servers, Slack workspaces, etc.) + GroupChat, + /// Direct messaging (Discord DMs, etc.) + DirectChat, + /// Bluesky/ATProto firehose or feed + Bluesky, + /// Email (IMAP/SMTP) + Email, + + // ===== Scheduling & Time ===== + /// Calendar integration (Google Calendar, iCal, etc.) + Calendar, + /// Scheduled/periodic triggers (pomodoro, reminders) + Timer, + + // ===== Integration ===== + /// MCP server as data source + Mcp, + /// Agent-to-agent notifications (supervisor patterns) + Agent, + /// Generic HTTP polling (RSS, Atom, JSON APIs) + Http, + /// Webhook receiver + Webhook, + /// Manual/API push + Manual, +} + +impl std::fmt::Display for SourceType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::File => write!(f, "file"), + Self::Vcs => write!(f, "vcs"), + Self::CodeHost => write!(f, "code_host"), + Self::LanguageServer => write!(f, "language_server"), + Self::Terminal => write!(f, "terminal"), + Self::GroupChat => write!(f, "group_chat"), + Self::DirectChat => write!(f, "direct_chat"), + Self::Bluesky => write!(f, "bluesky"), + Self::Email => write!(f, "email"), + Self::Calendar => write!(f, "calendar"), + Self::Timer => write!(f, "timer"), + Self::Mcp => write!(f, "mcp"), + Self::Agent => write!(f, "agent"), + Self::Http => write!(f, "http"), + Self::Webhook => write!(f, "webhook"), + Self::Manual => write!(f, "manual"), + } + } +} + +/// Subscription linking an agent to a data source. +/// +/// When the data source receives content, it gets formatted using +/// the notification template and sent to the agent. +#[derive(Debug, Clone, FromRow, Serialize, Deserialize)] +pub struct AgentDataSource { + /// Agent receiving notifications + pub agent_id: String, + + /// Data source providing content + pub source_id: String, + + /// Template for formatting notifications + /// Uses mustache-style placeholders: {{content}}, {{source}}, {{timestamp}} + /// If None, uses a default template based on source type + pub notification_template: Option, +} diff --git a/crates/pattern_db/src/models/task.rs b/crates/pattern_db/src/models/task.rs new file mode 100644 index 0000000..2835f3c --- /dev/null +++ b/crates/pattern_db/src/models/task.rs @@ -0,0 +1,187 @@ +//! ADHD task models. +//! +//! User-facing task management with ADHD-aware features: +//! - Hierarchical breakdown (big tasks → small steps) +//! - Flexible scheduling (due dates, scheduled times) +//! - Priority levels with urgency distinction +//! +//! Distinct from CoordinationTask which is for internal agent work assignment. + +use chrono::{DateTime, Utc}; +use serde::{Deserialize, Serialize}; +use sqlx::FromRow; +use sqlx::types::Json; + +/// A user-facing task. +/// +/// Tasks can be assigned to agents or be constellation-level. +/// They support hierarchical breakdown which is crucial for ADHD: +/// large overwhelming tasks can be broken into smaller, actionable steps. +#[derive(Debug, Clone, FromRow, Serialize, Deserialize)] +pub struct Task { + /// Unique identifier + pub id: String, + + /// Agent responsible for this task (None = constellation-level) + pub agent_id: Option, + + /// Task title (short, actionable) + pub title: String, + + /// Detailed description (optional) + pub description: Option, + + /// Current status + pub status: UserTaskStatus, + + /// Priority level + pub priority: UserTaskPriority, + + /// When the task is due (hard deadline) + pub due_at: Option>, + + /// When the task is scheduled to be worked on + pub scheduled_at: Option>, + + /// When the task was completed + pub completed_at: Option>, + + /// Parent task for hierarchy (None = top-level) + pub parent_task_id: Option, + + /// Optional tags/labels as JSON array + pub tags: Option>>, + + /// Estimated duration in minutes (for time-boxing) + pub estimated_minutes: Option, + + /// Actual duration in minutes (filled on completion) + pub actual_minutes: Option, + + /// Optional notes/context + pub notes: Option, + + /// Creation timestamp + pub created_at: DateTime, + + /// Last update timestamp + pub updated_at: DateTime, +} + +/// User task status. +/// +/// More nuanced than coordination task status to support ADHD workflows. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, sqlx::Type)] +#[sqlx(type_name = "TEXT", rename_all = "snake_case")] +#[serde(rename_all = "snake_case")] +pub enum UserTaskStatus { + /// Task exists but isn't ready to work on yet + /// (e.g., waiting for something, needs breakdown) + Backlog, + + /// Task is ready to be worked on + Pending, + + /// Currently being worked on + InProgress, + + /// Blocked by external factor + Blocked, + + /// Task is done + Completed, + + /// Task was intentionally skipped/dropped + Cancelled, + + /// Task was deferred to a later time + Deferred, +} + +impl Default for UserTaskStatus { + fn default() -> Self { + Self::Pending + } +} + +impl std::fmt::Display for UserTaskStatus { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Backlog => write!(f, "backlog"), + Self::Pending => write!(f, "pending"), + Self::InProgress => write!(f, "in progress"), + Self::Blocked => write!(f, "blocked"), + Self::Completed => write!(f, "completed"), + Self::Cancelled => write!(f, "cancelled"), + Self::Deferred => write!(f, "deferred"), + } + } +} + +/// User task priority. +/// +/// Distinguishes between importance and urgency (Eisenhower matrix style). +#[derive( + Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize, sqlx::Type, +)] +#[sqlx(type_name = "TEXT", rename_all = "snake_case")] +#[serde(rename_all = "snake_case")] +pub enum UserTaskPriority { + /// Can wait, nice to have + Low, + + /// Normal priority, should get done + Medium, + + /// Important, prioritize this + High, + + /// Time-sensitive AND important - do this now + Urgent, + + /// Critical blocker - everything else waits + Critical, +} + +impl Default for UserTaskPriority { + fn default() -> Self { + Self::Medium + } +} + +impl std::fmt::Display for UserTaskPriority { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Low => write!(f, "low"), + Self::Medium => write!(f, "medium"), + Self::High => write!(f, "high"), + Self::Urgent => write!(f, "urgent"), + Self::Critical => write!(f, "critical"), + } + } +} + +/// Lightweight task projection for lists. +#[derive(Debug, Clone, FromRow, Serialize, Deserialize)] +pub struct TaskSummary { + /// Task ID + pub id: String, + + /// Task title + pub title: String, + + /// Current status + pub status: UserTaskStatus, + + /// Priority level + pub priority: UserTaskPriority, + + /// Due date if set + pub due_at: Option>, + + /// Parent task ID for hierarchy display + pub parent_task_id: Option, + + /// Number of subtasks (computed) + pub subtask_count: Option, +} diff --git a/crates/pattern_db/src/queries/event.rs b/crates/pattern_db/src/queries/event.rs new file mode 100644 index 0000000..4d2e290 --- /dev/null +++ b/crates/pattern_db/src/queries/event.rs @@ -0,0 +1,323 @@ +//! Event and reminder queries. + +use chrono::{DateTime, Utc}; +use sqlx::SqlitePool; + +use crate::error::DbResult; +use crate::models::{Event, EventOccurrence, OccurrenceStatus}; + +// ============================================================================ +// Event CRUD +// ============================================================================ + +/// Create a new event. +pub async fn create_event(pool: &SqlitePool, event: &Event) -> DbResult<()> { + sqlx::query!( + r#" + INSERT INTO events (id, agent_id, title, description, starts_at, ends_at, rrule, reminder_minutes, created_at, updated_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + "#, + event.id, + event.agent_id, + event.title, + event.description, + event.starts_at, + event.ends_at, + event.rrule, + event.reminder_minutes, + event.created_at, + event.updated_at, + ) + .execute(pool) + .await?; + Ok(()) +} + +/// Get an event by ID. +pub async fn get_event(pool: &SqlitePool, id: &str) -> DbResult> { + let event = sqlx::query_as!( + Event, + r#" + SELECT + id as "id!", + agent_id, + title as "title!", + description, + starts_at as "starts_at!: _", + ends_at as "ends_at: _", + rrule, + reminder_minutes, + all_day as "all_day!: bool", + location, + external_id, + external_source, + created_at as "created_at!: _", + updated_at as "updated_at!: _" + FROM events WHERE id = ? + "#, + id + ) + .fetch_optional(pool) + .await?; + Ok(event) +} + +/// List events for an agent (or constellation-level). +pub async fn list_events(pool: &SqlitePool, agent_id: Option<&str>) -> DbResult> { + let events = match agent_id { + Some(aid) => { + sqlx::query_as!( + Event, + r#" + SELECT + id as "id!", + agent_id, + title as "title!", + description, + starts_at as "starts_at!: _", + ends_at as "ends_at: _", + rrule, + reminder_minutes, + all_day as "all_day!: bool", + location, + external_id, + external_source, + created_at as "created_at!: _", + updated_at as "updated_at!: _" + FROM events WHERE agent_id = ? ORDER BY starts_at ASC + "#, + aid + ) + .fetch_all(pool) + .await? + } + None => { + sqlx::query_as!( + Event, + r#" + SELECT + id as "id!", + agent_id, + title as "title!", + description, + starts_at as "starts_at!: _", + ends_at as "ends_at: _", + rrule, + reminder_minutes, + all_day as "all_day!: bool", + location, + external_id, + external_source, + created_at as "created_at!: _", + updated_at as "updated_at!: _" + FROM events WHERE agent_id IS NULL ORDER BY starts_at ASC + "# + ) + .fetch_all(pool) + .await? + } + }; + Ok(events) +} + +/// Get events in a time range. +pub async fn get_events_in_range( + pool: &SqlitePool, + start: DateTime, + end: DateTime, +) -> DbResult> { + let events = sqlx::query_as!( + Event, + r#" + SELECT + id as "id!", + agent_id, + title as "title!", + description, + starts_at as "starts_at!: _", + ends_at as "ends_at: _", + rrule, + reminder_minutes, + all_day as "all_day!: bool", + location, + external_id, + external_source, + created_at as "created_at!: _", + updated_at as "updated_at!: _" + FROM events + WHERE starts_at >= ? AND starts_at <= ? + ORDER BY starts_at ASC + "#, + start, + end + ) + .fetch_all(pool) + .await?; + Ok(events) +} + +/// Get upcoming events (starting within N hours). +pub async fn get_upcoming_events(pool: &SqlitePool, hours: i64) -> DbResult> { + let now = Utc::now(); + let deadline = now + chrono::Duration::hours(hours); + let events = sqlx::query_as!( + Event, + r#" + SELECT + id as "id!", + agent_id, + title as "title!", + description, + starts_at as "starts_at!: _", + ends_at as "ends_at: _", + rrule, + reminder_minutes, + all_day as "all_day!: bool", + location, + external_id, + external_source, + created_at as "created_at!: _", + updated_at as "updated_at!: _" + FROM events + WHERE starts_at >= ? AND starts_at <= ? + ORDER BY starts_at ASC + "#, + now, + deadline + ) + .fetch_all(pool) + .await?; + Ok(events) +} + +/// Get events needing reminders (reminder time is now or past, but event hasn't started). +pub async fn get_events_needing_reminders(pool: &SqlitePool) -> DbResult> { + let now = Utc::now(); + // This query finds events where: starts_at - reminder_minutes <= now < starts_at + let events = sqlx::query_as!( + Event, + r#" + SELECT + id as "id!", + agent_id, + title as "title!", + description, + starts_at as "starts_at!: _", + ends_at as "ends_at: _", + rrule, + reminder_minutes, + all_day as "all_day!: bool", + location, + external_id, + external_source, + created_at as "created_at!: _", + updated_at as "updated_at!: _" + FROM events + WHERE reminder_minutes IS NOT NULL + AND starts_at > ? + AND datetime(starts_at, '-' || reminder_minutes || ' minutes') <= ? + ORDER BY starts_at ASC + "#, + now, + now + ) + .fetch_all(pool) + .await?; + Ok(events) +} + +/// Update an event. +pub async fn update_event(pool: &SqlitePool, event: &Event) -> DbResult { + let result = sqlx::query!( + r#" + UPDATE events + SET title = ?, description = ?, starts_at = ?, ends_at = ?, + rrule = ?, reminder_minutes = ?, updated_at = ? + WHERE id = ? + "#, + event.title, + event.description, + event.starts_at, + event.ends_at, + event.rrule, + event.reminder_minutes, + event.updated_at, + event.id, + ) + .execute(pool) + .await?; + Ok(result.rows_affected() > 0) +} + +/// Delete an event. +pub async fn delete_event(pool: &SqlitePool, id: &str) -> DbResult { + let result = sqlx::query!("DELETE FROM events WHERE id = ?", id) + .execute(pool) + .await?; + Ok(result.rows_affected() > 0) +} + +// ============================================================================ +// EventOccurrence (for recurring events) +// ============================================================================ + +/// Create an event occurrence. +pub async fn create_occurrence(pool: &SqlitePool, occurrence: &EventOccurrence) -> DbResult<()> { + sqlx::query!( + r#" + INSERT INTO event_occurrences (id, event_id, starts_at, ends_at, status, notes, created_at) + VALUES (?, ?, ?, ?, ?, ?, ?) + "#, + occurrence.id, + occurrence.event_id, + occurrence.starts_at, + occurrence.ends_at, + occurrence.status, + occurrence.notes, + occurrence.created_at, + ) + .execute(pool) + .await?; + Ok(()) +} + +/// Get occurrences for an event. +pub async fn get_event_occurrences( + pool: &SqlitePool, + event_id: &str, +) -> DbResult> { + let occurrences = sqlx::query_as!( + EventOccurrence, + r#" + SELECT + id as "id!", + event_id as "event_id!", + starts_at as "starts_at!: _", + ends_at as "ends_at: _", + status as "status!: OccurrenceStatus", + notes, + created_at as "created_at!: _" + FROM event_occurrences WHERE event_id = ? ORDER BY starts_at ASC + "#, + event_id + ) + .fetch_all(pool) + .await?; + Ok(occurrences) +} + +/// Update occurrence status. +pub async fn update_occurrence_status( + pool: &SqlitePool, + id: &str, + status: OccurrenceStatus, +) -> DbResult { + let result = sqlx::query!( + "UPDATE event_occurrences SET status = ? WHERE id = ?", + status, + id, + ) + .execute(pool) + .await?; + Ok(result.rows_affected() > 0) +} diff --git a/crates/pattern_db/src/queries/folder.rs b/crates/pattern_db/src/queries/folder.rs new file mode 100644 index 0000000..de261de --- /dev/null +++ b/crates/pattern_db/src/queries/folder.rs @@ -0,0 +1,372 @@ +//! Folder and file queries. + +use chrono::Utc; +use sqlx::SqlitePool; + +use crate::error::DbResult; +use crate::models::{ + FilePassage, Folder, FolderAccess, FolderAttachment, FolderFile, FolderPathType, +}; + +// ============================================================================ +// Folder CRUD +// ============================================================================ + +/// Create a new folder. +pub async fn create_folder(pool: &SqlitePool, folder: &Folder) -> DbResult<()> { + sqlx::query!( + r#" + INSERT INTO folders (id, name, description, path_type, path_value, embedding_model, created_at) + VALUES (?, ?, ?, ?, ?, ?, ?) + "#, + folder.id, + folder.name, + folder.description, + folder.path_type, + folder.path_value, + folder.embedding_model, + folder.created_at, + ) + .execute(pool) + .await?; + Ok(()) +} + +/// Get a folder by ID. +pub async fn get_folder(pool: &SqlitePool, id: &str) -> DbResult> { + let folder = sqlx::query_as!( + Folder, + r#" + SELECT + id as "id!", + name as "name!", + description, + path_type as "path_type!: FolderPathType", + path_value, + embedding_model as "embedding_model!", + created_at as "created_at!: _" + FROM folders WHERE id = ? + "#, + id + ) + .fetch_optional(pool) + .await?; + Ok(folder) +} + +/// Get a folder by name. +pub async fn get_folder_by_name(pool: &SqlitePool, name: &str) -> DbResult> { + let folder = sqlx::query_as!( + Folder, + r#" + SELECT + id as "id!", + name as "name!", + description, + path_type as "path_type!: FolderPathType", + path_value, + embedding_model as "embedding_model!", + created_at as "created_at!: _" + FROM folders WHERE name = ? + "#, + name + ) + .fetch_optional(pool) + .await?; + Ok(folder) +} + +/// List all folders. +pub async fn list_folders(pool: &SqlitePool) -> DbResult> { + let folders = sqlx::query_as!( + Folder, + r#" + SELECT + id as "id!", + name as "name!", + description, + path_type as "path_type!: FolderPathType", + path_value, + embedding_model as "embedding_model!", + created_at as "created_at!: _" + FROM folders ORDER BY name + "# + ) + .fetch_all(pool) + .await?; + Ok(folders) +} + +/// Delete a folder (cascades to files and passages). +pub async fn delete_folder(pool: &SqlitePool, id: &str) -> DbResult { + let result = sqlx::query!("DELETE FROM folders WHERE id = ?", id) + .execute(pool) + .await?; + Ok(result.rows_affected() > 0) +} + +// ============================================================================ +// FolderFile CRUD +// ============================================================================ + +/// Create or update a file in a folder. +pub async fn upsert_file(pool: &SqlitePool, file: &FolderFile) -> DbResult<()> { + sqlx::query!( + r#" + INSERT INTO folder_files (id, folder_id, name, content_type, size_bytes, content, uploaded_at, indexed_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?) + ON CONFLICT(folder_id, name) DO UPDATE SET + content_type = excluded.content_type, + size_bytes = excluded.size_bytes, + content = excluded.content, + uploaded_at = excluded.uploaded_at + "#, + file.id, + file.folder_id, + file.name, + file.content_type, + file.size_bytes, + file.content, + file.uploaded_at, + file.indexed_at, + ) + .execute(pool) + .await?; + Ok(()) +} + +/// Get a file by ID. +pub async fn get_file(pool: &SqlitePool, id: &str) -> DbResult> { + let file = sqlx::query_as!( + FolderFile, + r#" + SELECT + id as "id!", + folder_id as "folder_id!", + name as "name!", + content_type, + size_bytes, + content, + uploaded_at as "uploaded_at!: _", + indexed_at as "indexed_at: _" + FROM folder_files WHERE id = ? + "#, + id + ) + .fetch_optional(pool) + .await?; + Ok(file) +} + +/// Get a file by folder and name. +pub async fn get_file_by_name( + pool: &SqlitePool, + folder_id: &str, + name: &str, +) -> DbResult> { + let file = sqlx::query_as!( + FolderFile, + r#" + SELECT + id as "id!", + folder_id as "folder_id!", + name as "name!", + content_type, + size_bytes, + content, + uploaded_at as "uploaded_at!: _", + indexed_at as "indexed_at: _" + FROM folder_files WHERE folder_id = ? AND name = ? + "#, + folder_id, + name + ) + .fetch_optional(pool) + .await?; + Ok(file) +} + +/// List files in a folder. +pub async fn list_files_in_folder(pool: &SqlitePool, folder_id: &str) -> DbResult> { + let files = sqlx::query_as!( + FolderFile, + r#" + SELECT + id as "id!", + folder_id as "folder_id!", + name as "name!", + content_type, + size_bytes, + content, + uploaded_at as "uploaded_at!: _", + indexed_at as "indexed_at: _" + FROM folder_files WHERE folder_id = ? ORDER BY name + "#, + folder_id + ) + .fetch_all(pool) + .await?; + Ok(files) +} + +/// Mark a file as indexed. +pub async fn mark_file_indexed(pool: &SqlitePool, file_id: &str) -> DbResult { + let now = Utc::now(); + let result = sqlx::query!( + "UPDATE folder_files SET indexed_at = ? WHERE id = ?", + now, + file_id, + ) + .execute(pool) + .await?; + Ok(result.rows_affected() > 0) +} + +/// Delete a file (cascades to passages). +pub async fn delete_file(pool: &SqlitePool, id: &str) -> DbResult { + let result = sqlx::query!("DELETE FROM folder_files WHERE id = ?", id) + .execute(pool) + .await?; + Ok(result.rows_affected() > 0) +} + +// ============================================================================ +// FilePassage CRUD +// ============================================================================ + +/// Create a file passage. +pub async fn create_passage(pool: &SqlitePool, passage: &FilePassage) -> DbResult<()> { + sqlx::query!( + r#" + INSERT INTO file_passages (id, file_id, content, start_line, end_line, created_at) + VALUES (?, ?, ?, ?, ?, ?) + "#, + passage.id, + passage.file_id, + passage.content, + passage.start_line, + passage.end_line, + passage.created_at, + ) + .execute(pool) + .await?; + Ok(()) +} + +/// Get passages for a file. +pub async fn get_file_passages(pool: &SqlitePool, file_id: &str) -> DbResult> { + let passages = sqlx::query_as!( + FilePassage, + r#" + SELECT + id as "id!", + file_id as "file_id!", + content as "content!", + start_line, + end_line, + chunk_index as "chunk_index!", + created_at as "created_at!: _" + FROM file_passages WHERE file_id = ? ORDER BY chunk_index + "#, + file_id + ) + .fetch_all(pool) + .await?; + Ok(passages) +} + +/// Delete passages for a file (used before re-indexing). +pub async fn delete_file_passages(pool: &SqlitePool, file_id: &str) -> DbResult { + let result = sqlx::query!("DELETE FROM file_passages WHERE file_id = ?", file_id) + .execute(pool) + .await?; + Ok(result.rows_affected()) +} + +// ============================================================================ +// FolderAttachment (agent access) +// ============================================================================ + +/// Attach a folder to an agent. +pub async fn attach_folder_to_agent( + pool: &SqlitePool, + folder_id: &str, + agent_id: &str, + access: FolderAccess, +) -> DbResult<()> { + let now = Utc::now(); + sqlx::query!( + r#" + INSERT INTO folder_attachments (folder_id, agent_id, access, attached_at) + VALUES (?, ?, ?, ?) + ON CONFLICT(folder_id, agent_id) DO UPDATE SET access = excluded.access + "#, + folder_id, + agent_id, + access, + now, + ) + .execute(pool) + .await?; + Ok(()) +} + +/// Detach a folder from an agent. +pub async fn detach_folder_from_agent( + pool: &SqlitePool, + folder_id: &str, + agent_id: &str, +) -> DbResult { + let result = sqlx::query!( + "DELETE FROM folder_attachments WHERE folder_id = ? AND agent_id = ?", + folder_id, + agent_id, + ) + .execute(pool) + .await?; + Ok(result.rows_affected() > 0) +} + +/// Get folders attached to an agent. +pub async fn get_agent_folders( + pool: &SqlitePool, + agent_id: &str, +) -> DbResult> { + let attachments = sqlx::query_as!( + FolderAttachment, + r#" + SELECT + folder_id as "folder_id!", + agent_id as "agent_id!", + access as "access!: FolderAccess", + attached_at as "attached_at!: _" + FROM folder_attachments WHERE agent_id = ? + "#, + agent_id + ) + .fetch_all(pool) + .await?; + Ok(attachments) +} + +/// Get agents with access to a folder. +pub async fn get_folder_agents( + pool: &SqlitePool, + folder_id: &str, +) -> DbResult> { + let attachments = sqlx::query_as!( + FolderAttachment, + r#" + SELECT + folder_id as "folder_id!", + agent_id as "agent_id!", + access as "access!: FolderAccess", + attached_at as "attached_at!: _" + FROM folder_attachments WHERE folder_id = ? + "#, + folder_id + ) + .fetch_all(pool) + .await?; + Ok(attachments) +} diff --git a/crates/pattern_db/src/queries/memory.rs b/crates/pattern_db/src/queries/memory.rs index c5370a9..80b438c 100644 --- a/crates/pattern_db/src/queries/memory.rs +++ b/crates/pattern_db/src/queries/memory.rs @@ -12,7 +12,7 @@ pub async fn get_block(pool: &SqlitePool, id: &str) -> DbResult DbResult DbResult DbResult DbResult<() r#" INSERT INTO memory_blocks (id, agent_id, label, description, block_type, char_limit, permission, pinned, loro_snapshot, content_preview, metadata, - embedding_model, is_active, created_at, updated_at) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + embedding_model, is_active, frontier, last_seq, created_at, updated_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) "#, block.id, block.agent_id, @@ -159,6 +167,8 @@ pub async fn create_block(pool: &SqlitePool, block: &MemoryBlock) -> DbResult<() block.metadata, block.embedding_model, block.is_active, + block.frontier, + block.last_seq, block.created_at, block.updated_at, ) @@ -176,7 +186,7 @@ pub async fn update_block_content( ) -> DbResult<()> { sqlx::query!( r#" - UPDATE memory_blocks + UPDATE memory_blocks SET loro_snapshot = ?, content_preview = ?, updated_at = datetime('now') WHERE id = ? "#, @@ -207,13 +217,14 @@ pub async fn create_checkpoint( ) -> DbResult { let result = sqlx::query!( r#" - INSERT INTO memory_block_checkpoints (block_id, snapshot, created_at, updates_consolidated) - VALUES (?, ?, ?, ?) + INSERT INTO memory_block_checkpoints (block_id, snapshot, created_at, updates_consolidated, frontier) + VALUES (?, ?, ?, ?, ?) "#, checkpoint.block_id, checkpoint.snapshot, checkpoint.created_at, checkpoint.updates_consolidated, + checkpoint.frontier, ) .execute(pool) .await?; @@ -228,12 +239,13 @@ pub async fn get_latest_checkpoint( let checkpoint = sqlx::query_as!( MemoryBlockCheckpoint, r#" - SELECT + SELECT id as "id!", block_id as "block_id!", snapshot as "snapshot!", created_at as "created_at!: _", - updates_consolidated as "updates_consolidated!" + updates_consolidated as "updates_consolidated!", + frontier FROM memory_block_checkpoints WHERE block_id = ? ORDER BY created_at DESC LIMIT 1 "#, block_id @@ -248,7 +260,7 @@ pub async fn get_archival_entry(pool: &SqlitePool, id: &str) -> DbResult DbResu .await?; Ok(result.count) } + +// ============================================================================ +// Memory Block Updates (Delta Storage) +// ============================================================================ + +use crate::models::{MemoryBlockUpdate, UpdateStats}; +use chrono::Utc; + +/// Store a new incremental update for a memory block. +/// +/// Atomically assigns the next sequence number and persists the update. +/// Returns the assigned sequence number. +pub async fn store_update( + pool: &SqlitePool, + block_id: &str, + update_blob: &[u8], + source: Option<&str>, +) -> DbResult { + let now = Utc::now(); + let byte_size = update_blob.len() as i64; + + // Use a transaction to atomically increment last_seq and insert + let mut tx = pool.begin().await?; + + // Get and increment the sequence number + let row = sqlx::query!( + "UPDATE memory_blocks SET last_seq = last_seq + 1, updated_at = ? WHERE id = ? RETURNING last_seq", + now, + block_id + ) + .fetch_one(&mut *tx) + .await?; + + let seq = row.last_seq; + + // Insert the update + sqlx::query!( + r#" + INSERT INTO memory_block_updates (block_id, seq, update_blob, byte_size, source, created_at) + VALUES (?, ?, ?, ?, ?, ?) + "#, + block_id, + seq, + update_blob, + byte_size, + source, + now, + ) + .execute(&mut *tx) + .await?; + + tx.commit().await?; + Ok(seq) +} + +/// Get the latest checkpoint and all pending updates for a block. +/// +/// Used for full reconstruction on cache miss. +pub async fn get_checkpoint_and_updates( + pool: &SqlitePool, + block_id: &str, +) -> DbResult<(Option, Vec)> { + // Get latest checkpoint + let checkpoint = get_latest_checkpoint(pool, block_id).await?; + + // Get all updates (or updates since checkpoint if we have one) + let updates = if let Some(ref cp) = checkpoint { + // Get updates created after the checkpoint + sqlx::query_as!( + MemoryBlockUpdate, + r#" + SELECT + id as "id!", + block_id as "block_id!", + seq as "seq!", + update_blob as "update_blob!", + byte_size as "byte_size!", + source, + created_at as "created_at!: _" + FROM memory_block_updates + WHERE block_id = ? AND created_at > ? + ORDER BY seq ASC + "#, + block_id, + cp.created_at + ) + .fetch_all(pool) + .await? + } else { + // No checkpoint, get all updates + sqlx::query_as!( + MemoryBlockUpdate, + r#" + SELECT + id as "id!", + block_id as "block_id!", + seq as "seq!", + update_blob as "update_blob!", + byte_size as "byte_size!", + source, + created_at as "created_at!: _" + FROM memory_block_updates + WHERE block_id = ? + ORDER BY seq ASC + "#, + block_id + ) + .fetch_all(pool) + .await? + }; + + Ok((checkpoint, updates)) +} + +/// Get updates after a given sequence number. +/// +/// Used for cache refresh when we already have some state. +pub async fn get_updates_since( + pool: &SqlitePool, + block_id: &str, + after_seq: i64, +) -> DbResult> { + let updates = sqlx::query_as!( + MemoryBlockUpdate, + r#" + SELECT + id as "id!", + block_id as "block_id!", + seq as "seq!", + update_blob as "update_blob!", + byte_size as "byte_size!", + source, + created_at as "created_at!: _" + FROM memory_block_updates + WHERE block_id = ? AND seq > ? + ORDER BY seq ASC + "#, + block_id, + after_seq + ) + .fetch_all(pool) + .await?; + Ok(updates) +} + +/// Check if there are updates after a given sequence number. +/// +/// Lightweight check without fetching update data. +pub async fn has_updates_since( + pool: &SqlitePool, + block_id: &str, + after_seq: i64, +) -> DbResult { + let result = sqlx::query!( + "SELECT EXISTS(SELECT 1 FROM memory_block_updates WHERE block_id = ? AND seq > ?) as has_updates", + block_id, + after_seq + ) + .fetch_one(pool) + .await?; + Ok(result.has_updates != 0) +} + +/// Atomically consolidate updates into a new checkpoint. +/// +/// Creates a new checkpoint with the merged state and deletes updates up to `up_to_seq`. +/// Updates arriving during the merge (with seq > up_to_seq) are preserved. +pub async fn consolidate_checkpoint( + pool: &SqlitePool, + block_id: &str, + new_snapshot: &[u8], + new_frontier: Option<&[u8]>, + up_to_seq: i64, +) -> DbResult<()> { + let now = Utc::now(); + + let mut tx = pool.begin().await?; + + // Count updates being consolidated + let count_result = sqlx::query!( + "SELECT COUNT(*) as count FROM memory_block_updates WHERE block_id = ? AND seq <= ?", + block_id, + up_to_seq + ) + .fetch_one(&mut *tx) + .await?; + let updates_consolidated = count_result.count; + + // Create new checkpoint + sqlx::query!( + r#" + INSERT INTO memory_block_checkpoints (block_id, snapshot, created_at, updates_consolidated, frontier) + VALUES (?, ?, ?, ?, ?) + "#, + block_id, + new_snapshot, + now, + updates_consolidated, + new_frontier, + ) + .execute(&mut *tx) + .await?; + + // Delete consolidated updates + sqlx::query!( + "DELETE FROM memory_block_updates WHERE block_id = ? AND seq <= ?", + block_id, + up_to_seq + ) + .execute(&mut *tx) + .await?; + + // Update the block's loro_snapshot and frontier + sqlx::query!( + r#" + UPDATE memory_blocks + SET loro_snapshot = ?, frontier = ?, updated_at = ? + WHERE id = ? + "#, + new_snapshot, + new_frontier, + now, + block_id, + ) + .execute(&mut *tx) + .await?; + + tx.commit().await?; + Ok(()) +} + +/// Get statistics about pending updates for consolidation decisions. +pub async fn get_pending_update_stats(pool: &SqlitePool, block_id: &str) -> DbResult { + let result = sqlx::query!( + r#" + SELECT + COUNT(*) as count, + COALESCE(SUM(byte_size), 0) as total_bytes, + COALESCE(MAX(seq), 0) as max_seq + FROM memory_block_updates + WHERE block_id = ? + "#, + block_id + ) + .fetch_one(pool) + .await?; + + Ok(UpdateStats { + count: result.count, + total_bytes: result.total_bytes, + max_seq: result.max_seq, + }) +} + +/// Update a block's frontier without creating an update record. +/// +/// Used when applying updates from external sources where we just need to track version. +pub async fn update_block_frontier( + pool: &SqlitePool, + block_id: &str, + frontier: &[u8], +) -> DbResult<()> { + let now = Utc::now(); + sqlx::query!( + "UPDATE memory_blocks SET frontier = ?, updated_at = ? WHERE id = ?", + frontier, + now, + block_id, + ) + .execute(pool) + .await?; + Ok(()) +} + +/// Get a lightweight view of a block for cache lookups. +/// +/// Returns just the ID and last_seq without loading the full snapshot. +pub async fn get_block_version_info( + pool: &SqlitePool, + block_id: &str, +) -> DbResult> { + let result = sqlx::query!( + r#"SELECT id as "id!", last_seq FROM memory_blocks WHERE id = ?"#, + block_id + ) + .fetch_optional(pool) + .await?; + + Ok(result.map(|r| (r.id, r.last_seq))) +} diff --git a/crates/pattern_db/src/queries/message.rs b/crates/pattern_db/src/queries/message.rs index b29da3e..383253b 100644 --- a/crates/pattern_db/src/queries/message.rs +++ b/crates/pattern_db/src/queries/message.rs @@ -246,13 +246,15 @@ pub async fn get_archive_summary(pool: &SqlitePool, id: &str) -> DbResult