From 3645a40cf79a6e798c197de3575cf328859d92bb Mon Sep 17 00:00:00 2001 From: mathieu Date: Fri, 24 Oct 2025 13:34:52 +1100 Subject: [PATCH 1/7] wip: add metadata along with vectors --- src/client.rs | 3 +- src/index/flat.rs | 41 ++++++++++---------- src/index/hnsw.rs | 48 +++++++++++++---------- src/lib.rs | 80 ++++++++++++++++++++++++++++++++++----- src/persistence.rs | 8 ++-- src/server.rs | 2 + tests/integration_test.rs | 1 + 7 files changed, 127 insertions(+), 56 deletions(-) diff --git a/src/client.rs b/src/client.rs index df48b5d..2dd95d2 100644 --- a/src/client.rs +++ b/src/client.rs @@ -310,7 +310,7 @@ impl Collection { let embedding = embedding_function.generate_embedding(text) .map_err(|e| e.to_string())?; - let vector = Vector { id, values: embedding }; + let vector = Vector { id, values: embedding, metadata: None }; // Acquire write lock only for the index operation let mut index = self.index.write().map_err(|_| "Failed to acquire write lock")?; @@ -658,6 +658,7 @@ mod tests { let vector = Vector { id: 42, values: vec![1.0, 2.0, 3.0], + metadata: None, }; client.add_vector_to_collection("test_collection", vector).unwrap(); diff --git a/src/index/flat.rs b/src/index/flat.rs index 44cfb77..a40718d 100644 --- a/src/index/flat.rs +++ b/src/index/flat.rs @@ -24,7 +24,7 @@ //! //! # fn example() -> Result<(), Box> { //! let mut index = FlatIndex::new(3, Vec::new()); -//! let vector = Vector { id: 1, values: vec![1.0, 2.0, 3.0] }; +//! let vector = Vector { id: 1, values: vec![1.0, 2.0, 3.0], metadata: None }; //! //! index.add(vector)?; //! let results = index.search(&[1.1, 2.1, 3.1], 5, SimilarityMetric::Cosine); @@ -49,7 +49,7 @@ use serde::{Serialize, Deserialize}; /// /// # fn example() -> Result<(), Box> { /// let mut index = FlatIndex::new(3, Vec::new()); -/// let vector = Vector { id: 1, values: vec![1.0, 2.0, 3.0] }; +/// let vector = Vector { id: 1, values: vec![1.0, 2.0, 3.0], metadata: None }; /// /// index.add(vector)?; /// let results = index.search(&[1.1, 2.1, 3.1], 5, SimilarityMetric::Cosine); @@ -100,7 +100,8 @@ impl VectorIndex for FlatIndex { .iter() .map(|e| SearchResult { id: e.id, - score: similarity_metric.calculate(&e.values, query) + score: similarity_metric.calculate(&e.values, query), + metadata: e.metadata.clone() }) .collect(); @@ -136,9 +137,9 @@ mod tests { fn test_serialization_deserialization() { // Create a FlatIndex with some data let vectors = vec![ - Vector { id: 1, values: vec![1.0, 0.0, 0.0] }, - Vector { id: 2, values: vec![0.0, 1.0, 0.0] }, - Vector { id: 3, values: vec![0.0, 0.0, 1.0] }, + Vector { id: 1, values: vec![1.0, 0.0, 0.0], metadata: None }, + Vector { id: 2, values: vec![0.0, 1.0, 0.0], metadata: None }, + Vector { id: 3, values: vec![0.0, 0.0, 1.0], metadata: None }, ]; let flat_index = FlatIndex::new(3, vectors); @@ -177,9 +178,9 @@ mod tests { #[test] fn test_flat_index_with_cosine_similarity() { let vectors = vec![ - Vector { id: 1, values: vec![1.0, 0.0, 0.0] }, - Vector { id: 2, values: vec![0.0, 1.0, 0.0] }, - Vector { id: 3, values: vec![0.0, 0.0, 1.0] }, + Vector { id: 1, values: vec![1.0, 0.0, 0.0], metadata: None }, + Vector { id: 2, values: vec![0.0, 1.0, 0.0], metadata: None }, + Vector { id: 3, values: vec![0.0, 0.0, 1.0], metadata: None }, ]; let index = FlatIndex::new(3, vectors); @@ -194,9 +195,9 @@ mod tests { #[test] fn test_flat_index_with_euclidean_similarity() { let vectors = vec![ - Vector { id: 1, values: vec![0.0, 0.0] }, - Vector { id: 2, values: vec![3.0, 4.0] }, - Vector { id: 3, values: vec![6.0, 8.0] }, + Vector { id: 1, values: vec![0.0, 0.0], metadata: None }, + Vector { id: 2, values: vec![3.0, 4.0], metadata: None }, + Vector { id: 3, values: vec![6.0, 8.0], metadata: None }, ]; let index = FlatIndex::new(2, vectors); @@ -211,9 +212,9 @@ mod tests { #[test] fn test_flat_index_with_manhattan_similarity() { let vectors = vec![ - Vector { id: 1, values: vec![0.0, 0.0] }, - Vector { id: 2, values: vec![3.0, 4.0] }, - Vector { id: 3, values: vec![6.0, 8.0] }, + Vector { id: 1, values: vec![0.0, 0.0], metadata: None }, + Vector { id: 2, values: vec![3.0, 4.0], metadata: None }, + Vector { id: 3, values: vec![6.0, 8.0], metadata: None }, ]; let index = FlatIndex::new(2, vectors); @@ -228,9 +229,9 @@ mod tests { #[test] fn test_flat_index_with_dot_product() { let vectors = vec![ - Vector { id: 1, values: vec![1.0, 2.0] }, - Vector { id: 2, values: vec![2.0, 1.0] }, - Vector { id: 3, values: vec![0.0, 0.0] }, + Vector { id: 1, values: vec![1.0, 2.0], metadata: None }, + Vector { id: 2, values: vec![2.0, 1.0], metadata: None }, + Vector { id: 3, values: vec![0.0, 0.0], metadata: None }, ]; let index = FlatIndex::new(2, vectors); @@ -245,8 +246,8 @@ mod tests { #[test] fn test_flat_index_change_similarity_metric() { let vectors = vec![ - Vector { id: 1, values: vec![1.0, 2.0] }, - Vector { id: 2, values: vec![2.0, 1.0] }, + Vector { id: 1, values: vec![1.0, 2.0], metadata: None }, + Vector { id: 2, values: vec![2.0, 1.0], metadata: None }, ]; let index = FlatIndex::new(2, vectors); diff --git a/src/index/hnsw.rs b/src/index/hnsw.rs index 22ade3b..4efad49 100644 --- a/src/index/hnsw.rs +++ b/src/index/hnsw.rs @@ -31,7 +31,7 @@ //! //! # fn example() -> Result<(), Box> { //! let mut index = HNSWIndex::new(384); -//! let vector = Vector { id: 1, values: vec![0.1; 384] }; +//! let vector = Vector { id: 1, values: vec![0.1; 384], metadata: None }; //! //! index.add(vector)?; //! let results = index.search(&[0.1; 384], 5, SimilarityMetric::Cosine); @@ -235,7 +235,11 @@ impl VectorIndex for HNSWIndex { self.index_to_id.get(&n.index).and_then(|&custom_id| { self.vectors.get(&custom_id).map(|vector| { let score = similarity_metric.calculate(&vector.values, query); - SearchResult { id: custom_id, score } + SearchResult { + id: custom_id, + score, + metadata: vector.metadata.clone() + } }) }) }) @@ -282,6 +286,7 @@ fn test_add_vector() { let vector = Vector { id: 1, values: vec![1.0, 2.0, 3.0], + metadata: None, }; assert!(hnsw.add(vector).is_ok()); @@ -295,6 +300,7 @@ fn test_add_vector_dimension_mismatch() { let vector = Vector { id: 1, values: vec![1.0, 2.0], // Wrong dimension + metadata: None, }; assert!(hnsw.add(vector).is_err()); @@ -306,10 +312,10 @@ fn test_search_basic() { let mut hnsw = HNSWIndex::new(3); let vectors = vec![ - Vector { id: 1, values: vec![1.0, 0.0, 0.0] }, - Vector { id: 2, values: vec![0.0, 1.0, 0.0] }, - Vector { id: 3, values: vec![0.0, 0.0, 1.0] }, - Vector { id: 4, values: vec![1.0, 1.0, 0.0] }, + Vector { id: 1, values: vec![1.0, 0.0, 0.0], metadata: None }, + Vector { id: 2, values: vec![0.0, 1.0, 0.0], metadata: None }, + Vector { id: 3, values: vec![0.0, 0.0, 1.0], metadata: None }, + Vector { id: 4, values: vec![1.0, 1.0, 0.0], metadata: None }, ]; for vector in vectors { @@ -346,10 +352,10 @@ fn test_id_mapping() { // Add vectors with custom IDs let vectors = vec![ - Vector { id: 100, values: vec![1.0, 0.0, 0.0] }, - Vector { id: 200, values: vec![0.0, 1.0, 0.0] }, - Vector { id: 300, values: vec![0.0, 0.0, 1.0] }, - Vector { id: 400, values: vec![1.0, 1.0, 0.0] }, + Vector { id: 100, values: vec![1.0, 0.0, 0.0], metadata: None }, + Vector { id: 200, values: vec![0.0, 1.0, 0.0], metadata: None }, + Vector { id: 300, values: vec![0.0, 0.0, 1.0], metadata: None }, + Vector { id: 400, values: vec![1.0, 1.0, 0.0], metadata: None }, ]; for vector in vectors { @@ -376,8 +382,8 @@ fn test_id_mapping() { fn test_duplicate_id_error() { let mut hnsw = HNSWIndex::new(3); - let vector1 = Vector { id: 1, values: vec![1.0, 2.0, 3.0] }; - let vector2 = Vector { id: 1, values: vec![4.0, 5.0, 6.0] }; // Same ID + let vector1 = Vector { id: 1, values: vec![1.0, 2.0, 3.0], metadata: None }; + let vector2 = Vector { id: 1, values: vec![4.0, 5.0, 6.0], metadata: None }; // Same ID assert!(hnsw.add(vector1).is_ok()); assert!(hnsw.add(vector2).is_err()); // Should fail with duplicate ID @@ -387,7 +393,7 @@ fn test_duplicate_id_error() { fn test_delete_vector() { let mut hnsw = HNSWIndex::new(3); - let vector = Vector { id: 42, values: vec![1.0, 2.0, 3.0] }; + let vector = Vector { id: 42, values: vec![1.0, 2.0, 3.0], metadata: None }; assert!(hnsw.add(vector).is_ok()); assert_eq!(hnsw.len(), 1); @@ -421,9 +427,9 @@ fn test_serialization_deserialization() { // Create an HNSW index with some data let mut hnsw = HNSWIndex::new(3); let vectors = vec![ - Vector { id: 1, values: vec![1.0, 0.0, 0.0] }, - Vector { id: 2, values: vec![0.0, 1.0, 0.0] }, - Vector { id: 3, values: vec![0.0, 0.0, 1.0] }, + Vector { id: 1, values: vec![1.0, 0.0, 0.0], metadata: None }, + Vector { id: 2, values: vec![0.0, 1.0, 0.0], metadata: None }, + Vector { id: 3, values: vec![0.0, 0.0, 1.0], metadata: None }, ]; for vector in vectors { @@ -454,7 +460,7 @@ fn test_serialization_deserialization() { assert_eq!(vector3.values, vec![0.0, 0.0, 1.0]); // Test that we can add a new vector to the deserialized index - let new_vector = Vector { id: 4, values: vec![1.0, 1.0, 1.0] }; + let new_vector = Vector { id: 4, values: vec![1.0, 1.0, 1.0], metadata: None }; assert!(deserialized.add(new_vector).is_ok()); assert_eq!(deserialized.len(), 4); @@ -505,7 +511,7 @@ fn test_empty_hnsw_serialization_deserialization() { assert!(deserialized.is_empty()); // Test that we can add vectors to the deserialized empty index - let vector = Vector { id: 1, values: vec![1.0, 2.0, 3.0] }; + let vector = Vector { id: 1, values: vec![1.0, 2.0, 3.0], metadata: None }; assert!(deserialized.add(vector).is_ok()); assert_eq!(deserialized.len(), 1); assert!(!deserialized.is_empty()); @@ -517,9 +523,9 @@ fn test_search_with_limited_vectors() { // Add only 3 vectors let vectors = vec![ - Vector { id: 1, values: vec![1.0, 0.0, 0.0] }, - Vector { id: 2, values: vec![0.0, 1.0, 0.0] }, - Vector { id: 3, values: vec![0.0, 0.0, 1.0] }, + Vector { id: 1, values: vec![1.0, 0.0, 0.0], metadata: None }, + Vector { id: 2, values: vec![0.0, 1.0, 0.0], metadata: None }, + Vector { id: 3, values: vec![0.0, 0.0, 1.0], metadata: None }, ]; for vector in vectors { diff --git a/src/lib.rs b/src/lib.rs index 9bb1f5d..1f2ce53 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -114,10 +114,16 @@ pub const DEFAULT_VECTOR_DIMENSION: usize = 768; /// /// ```rust /// use vectorlite::Vector; +/// use serde_json::json; /// /// let vector = Vector { /// id: 1, /// values: vec![0.1, 0.2, 0.3, 0.4], +/// metadata: Some(json!({ +/// "title": "Sample Document", +/// "category": "example", +/// "tags": ["demo", "test"] +/// })), /// }; /// ``` #[derive(Debug, Clone, Serialize, Deserialize)] @@ -126,9 +132,12 @@ pub struct Vector { pub id: u64, /// The vector values (embedding coordinates) pub values: Vec, + /// Optional metadata associated with the vector + /// Can contain arbitrary JSON data for flexible schema-less storage + pub metadata: Option, } -/// Search result containing a vector ID and similarity score +/// Search result containing a vector ID, similarity score, and optional metadata /// /// Results are typically sorted by score in descending order (highest similarity first). /// @@ -136,10 +145,12 @@ pub struct Vector { /// /// ```rust /// use vectorlite::SearchResult; +/// use serde_json::json; /// /// let result = SearchResult { /// id: 42, /// score: 0.95, +/// metadata: Some(json!({"title": "Document Title"})), /// }; /// ``` #[derive(Debug, Clone, Serialize, Deserialize)] @@ -148,6 +159,8 @@ pub struct SearchResult { pub id: u64, /// Similarity score (higher is more similar) pub score: f64, + /// Optional metadata from the matching vector + pub metadata: Option, } /// Trait for vector indexing implementations @@ -162,7 +175,7 @@ pub struct SearchResult { /// /// # fn example() -> Result<(), Box> { /// let mut index = FlatIndex::new(3, Vec::new()); -/// let vector = Vector { id: 1, values: vec![1.0, 2.0, 3.0] }; +/// let vector = Vector { id: 1, values: vec![1.0, 2.0, 3.0], metadata: None }; /// /// index.add(vector)?; /// let results = index.search(&[1.1, 2.1, 3.1], 5, SimilarityMetric::Cosine); @@ -207,7 +220,7 @@ pub trait VectorIndex { /// let mut wrapper = VectorIndexWrapper::Flat(FlatIndex::new(3, Vec::new())); /// /// // Add a vector -/// let vector = Vector { id: 1, values: vec![1.0, 2.0, 3.0] }; +/// let vector = Vector { id: 1, values: vec![1.0, 2.0, 3.0], metadata: None }; /// wrapper.add(vector)?; /// /// // Search using the wrapper @@ -598,8 +611,8 @@ mod tests { #[test] fn test_vector_store_creation() { let vectors = vec![ - Vector { id: 0, values: vec![1.0, 2.0, 3.0] }, - Vector { id: 1, values: vec![4.0, 5.0, 6.0] }, + Vector { id: 0, values: vec![1.0, 2.0, 3.0], metadata: None }, + Vector { id: 1, values: vec![4.0, 5.0, 6.0], metadata: None }, ]; let store = FlatIndex::new(3, vectors); assert_eq!(store.len(), 2); @@ -609,9 +622,9 @@ mod tests { #[test] fn test_vector_store_search() { let vectors = vec![ - Vector { id: 0, values: vec![1.0, 0.0, 0.0] }, - Vector { id: 1, values: vec![0.0, 1.0, 0.0] }, - Vector { id: 2, values: vec![0.0, 0.0, 1.0] }, + Vector { id: 0, values: vec![1.0, 0.0, 0.0], metadata: None }, + Vector { id: 1, values: vec![0.0, 1.0, 0.0], metadata: None }, + Vector { id: 2, values: vec![0.0, 0.0, 1.0], metadata: None }, ]; let store = FlatIndex::new(3, vectors); let query = vec![1.0, 0.0, 0.0]; @@ -628,8 +641,8 @@ mod tests { // Test FlatIndex wrapper let vectors = vec![ - Vector { id: 1, values: vec![1.0, 0.0, 0.0] }, - Vector { id: 2, values: vec![0.0, 1.0, 0.0] }, + Vector { id: 1, values: vec![1.0, 0.0, 0.0], metadata: None }, + Vector { id: 2, values: vec![0.0, 1.0, 0.0], metadata: None }, ]; let flat_index = FlatIndex::new(3, vectors); let wrapper = VectorIndexWrapper::Flat(flat_index); @@ -652,4 +665,51 @@ mod tests { assert_eq!(results[0].id, 1); } + #[test] + fn test_vector_metadata_functionality() { + use serde_json::json; + + // Test vector with metadata + let metadata = json!({ + "title": "Test Document", + "category": "example", + "tags": ["test", "metadata"], + "nested": { + "value": 42, + "enabled": true + } + }); + + let vector = Vector { + id: 1, + values: vec![1.0, 2.0, 3.0], + metadata: Some(metadata.clone()), + }; + + // Test that metadata is preserved + assert!(vector.metadata.is_some()); + let stored_metadata = vector.metadata.as_ref().unwrap(); + assert_eq!(stored_metadata["title"], "Test Document"); + assert_eq!(stored_metadata["category"], "example"); + assert_eq!(stored_metadata["nested"]["value"], 42); + assert_eq!(stored_metadata["nested"]["enabled"], true); + + // Test vector without metadata + let vector_no_metadata = Vector { + id: 2, + values: vec![4.0, 5.0, 6.0], + metadata: None, + }; + + assert!(vector_no_metadata.metadata.is_none()); + + // Test serialization/deserialization with metadata + let serialized = serde_json::to_string(&vector).expect("Serialization should work"); + let deserialized: Vector = serde_json::from_str(&serialized).expect("Deserialization should work"); + + assert_eq!(deserialized.id, vector.id); + assert_eq!(deserialized.values, vector.values); + assert_eq!(deserialized.metadata, vector.metadata); + } + } \ No newline at end of file diff --git a/src/persistence.rs b/src/persistence.rs index fc43618..9a49f3b 100644 --- a/src/persistence.rs +++ b/src/persistence.rs @@ -168,8 +168,8 @@ mod tests { fn create_test_collection() -> Collection { let vectors = vec![ - Vector { id: 0, values: vec![1.0, 2.0, 3.0] }, - Vector { id: 1, values: vec![4.0, 5.0, 6.0] }, + Vector { id: 0, values: vec![1.0, 2.0, 3.0], metadata: None }, + Vector { id: 1, values: vec![4.0, 5.0, 6.0], metadata: None }, ]; let flat_index = FlatIndex::new(3, vectors); let index = VectorIndexWrapper::Flat(flat_index); @@ -257,8 +257,8 @@ mod tests { let collection = Collection::new("test_hnsw_collection".to_string(), index); // Add some vectors - let vector1 = Vector { id: 0, values: vec![1.0, 2.0, 3.0] }; - let vector2 = Vector { id: 1, values: vec![4.0, 5.0, 6.0] }; + let vector1 = Vector { id: 0, values: vec![1.0, 2.0, 3.0], metadata: None }; + let vector2 = Vector { id: 1, values: vec![4.0, 5.0, 6.0], metadata: None }; collection.add_vector(vector1).unwrap(); collection.add_vector(vector2).unwrap(); diff --git a/src/server.rs b/src/server.rs index 328a7de..ff4d94f 100644 --- a/src/server.rs +++ b/src/server.rs @@ -98,6 +98,7 @@ pub struct AddTextResponse { pub struct AddVectorRequest { pub id: u64, pub values: Vec, + pub metadata: Option, } #[derive(Debug, Serialize)] @@ -321,6 +322,7 @@ async fn add_vector( let vector = Vector { id: payload.id, values: payload.values, + metadata: payload.metadata, }; let client = state.read().map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; diff --git a/tests/integration_test.rs b/tests/integration_test.rs index 174eea9..564fe64 100644 --- a/tests/integration_test.rs +++ b/tests/integration_test.rs @@ -26,6 +26,7 @@ pub fn load_test_dataset(path: &str, dimension: usize) -> Result Date: Fri, 24 Oct 2025 14:32:33 +1100 Subject: [PATCH 2/7] wip --- README.md | 30 +++++++++----- src/client.rs | 63 ++++++++++++++++++++--------- src/lib.rs | 73 +++++++++++++++++++++++----------- src/server.rs | 6 ++- tests/http_integration_test.rs | 8 ++-- 5 files changed, 124 insertions(+), 56 deletions(-) diff --git a/README.md b/README.md index ce992b8..eecba7f 100644 --- a/README.md +++ b/README.md @@ -67,8 +67,8 @@ docker build \ | **List collections** | `GET /collections` | – | | **Create collection** | `POST /collections` | `{"name": "docs", "index_type": "hnsw"}` | | **Delete collection** | `DELETE /collections/{name}` | – | -| **Add text** | `POST /collections/{name}/text` | `{"text": "Hello world"}` | -| **Add vector** | `POST /collections/{name}/vector` | `{"id": 1, "values": [0.1, 0.2, ...]}` | +| **Add text** | `POST /collections/{name}/text` | `{"text": "Hello world", "metadata": {...}}`| +| **Add vector** | `POST /collections/{name}/vector` | `{"id": 1, "values": [0.1, 0.2, ...], "metadata": {...}}` | | **Search (text)** | `POST /collections/{name}/search/text` | `{"query": "hello", "k": 5}` | | **Search (vector)** | `POST /collections/{name}/search/vector` | `{"query": [0.1, 0.2, ...], "k": 5}` | | **Get vector** | `GET /collections/{name}/vectors/{id}` | – | @@ -104,26 +104,38 @@ cargo build --features memory-optimized - **Manhattan**: L1 norm, robust to outliers - **Dot Product**: Raw similarity, requires consistent vector scaling - ## Rust SDK Example ```rust use vectorlite::{VectorLiteClient, EmbeddingGenerator, IndexType, SimilarityMetric}; +use serde_json::json; fn main() -> Result<(), Box> { let client = VectorLiteClient::new(Box::new(EmbeddingGenerator::new()?)); - client.create_collection("documents", IndexType::HNSW)?; - let id = client.add_text_to_collection("documents", "Hello world")?; + client.create_collection("quotes", IndexType::HNSW)?; + + let id = client.add_text_to_collection( + "quotes", + "I just want to lie on the beach and eat hot dogs", + Some(json!({ + "author": "Kevin Malone", + "tags": ["the-office", "s3:e23"], + "year": 2005, + })) + )?; let results = client.search_text_in_collection( - "documents", - "hello", - 5, + "quotes", + "beach games", + 3, SimilarityMetric::Cosine, )?; - println!("{:?}", results); + for result in &results { + println!("ID: {}, Score: {:.4}", result.id, result.score); + } + Ok(()) } ``` diff --git a/src/client.rs b/src/client.rs index f47bea5..790bd11 100644 --- a/src/client.rs +++ b/src/client.rs @@ -20,7 +20,7 @@ //! client.create_collection("documents", IndexType::HNSW)?; //! //! // Add text (auto-generates embedding) -//! let id = client.add_text_to_collection("documents", "Hello world")?; +//! let id = client.add_text_to_collection("documents", "Hello world", None)?; //! //! // Search for similar text //! let results = client.search_text_in_collection( @@ -121,11 +121,11 @@ impl VectorLiteClient { self.collections.contains_key(name) } - pub fn add_text_to_collection(&self, collection_name: &str, text: &str) -> VectorLiteResult { + pub fn add_text_to_collection(&self, collection_name: &str, text: &str, metadata: Option) -> VectorLiteResult { let collection = self.collections.get(collection_name) .ok_or_else(|| VectorLiteError::CollectionNotFound { name: collection_name.to_string() })?; - collection.add_text(text, self.embedding_function.as_ref()) + collection.add_text_with_metadata(text, metadata, self.embedding_function.as_ref()) } pub fn add_vector_to_collection(&self, collection_name: &str, vector: Vector) -> VectorLiteResult<()> { @@ -331,6 +331,33 @@ impl Collection { Ok(id) } + pub fn add_text_with_metadata(&self, text: &str, metadata: Option, embedding_function: &dyn EmbeddingFunction) -> VectorLiteResult { + let id = self.next_id.fetch_add(1, Ordering::Relaxed); + + // Generate embedding outside the lock + let embedding = embedding_function.generate_embedding(text)?; + + let vector = Vector { id, values: embedding, metadata }; + let vector_dimension = vector.values.len(); + let vector_id = vector.id; + + // Acquire write lock only for the index operation + let mut index = self.index.write().map_err(|_| VectorLiteError::LockError("Failed to acquire write lock for add_text_with_metadata".to_string()))?; + index.add(vector).map_err(|e| { + if e.contains("dimension") { + VectorLiteError::DimensionMismatch { + expected: index.dimension(), + actual: vector_dimension + } + } else if e.contains("already exists") { + VectorLiteError::DuplicateVectorId { id: vector_id } + } else { + VectorLiteError::InternalError(e) + } + })?; + Ok(id) + } + pub fn add_vector(&self, vector: Vector) -> VectorLiteResult<()> { let vector_dimension = vector.values.len(); let vector_id = vector.id; @@ -426,7 +453,7 @@ impl Collection { /// # fn example() -> Result<(), Box> { /// let mut client = VectorLiteClient::new(Box::new(EmbeddingGenerator::new()?)); /// client.create_collection("docs", IndexType::HNSW)?; - /// client.add_text_to_collection("docs", "Hello world")?; + /// client.add_text_to_collection("docs", "Hello world", None)?; /// /// let collection = client.get_collection("docs").unwrap(); /// collection.save_to_file(Path::new("./docs.vlc"))?; @@ -578,13 +605,13 @@ mod tests { client.create_collection("test_collection", IndexType::Flat).unwrap(); // Add text - let result = client.add_text_to_collection("test_collection", "Hello world"); + let result = client.add_text_to_collection("test_collection", "Hello world", None); assert!(result.is_ok()); let id = result.unwrap(); assert_eq!(id, 0); // First ID is 0 // Add another text - let result = client.add_text_to_collection("test_collection", "Another text"); + let result = client.add_text_to_collection("test_collection", "Another text", None); assert!(result.is_ok()); let id = result.unwrap(); assert_eq!(id, 1); @@ -600,7 +627,7 @@ mod tests { let client = VectorLiteClient::new(Box::new(embedding_fn)); // Try to add to non-existent collection - let result = client.add_text_to_collection("non_existent", "Hello world"); + let result = client.add_text_to_collection("non_existent", "Hello world", None); assert!(result.is_err()); assert!(matches!(result.unwrap_err(), VectorLiteError::CollectionNotFound { .. })); } @@ -620,7 +647,7 @@ mod tests { assert_eq!(info.name, "test_collection"); // Add text - let id = client.add_text_to_collection("test_collection", "Hello world").unwrap(); + let id = client.add_text_to_collection("test_collection", "Hello world", None).unwrap(); assert_eq!(id, 0); let info = client.get_collection_info("test_collection").unwrap(); @@ -628,7 +655,7 @@ mod tests { assert_eq!(info.count, 1); // Add another text - let id = client.add_text_to_collection("test_collection", "Another text").unwrap(); + let id = client.add_text_to_collection("test_collection", "Another text", None).unwrap(); assert_eq!(id, 1); let info = client.get_collection_info("test_collection").unwrap(); @@ -664,8 +691,8 @@ mod tests { client.create_collection("hnsw_collection", IndexType::HNSW).unwrap(); // Add some text - let id1 = client.add_text_to_collection("hnsw_collection", "First document").unwrap(); - let id2 = client.add_text_to_collection("hnsw_collection", "Second document").unwrap(); + let id1 = client.add_text_to_collection("hnsw_collection", "First document", None).unwrap(); + let id2 = client.add_text_to_collection("hnsw_collection", "Second document", None).unwrap(); assert_eq!(id1, 0); assert_eq!(id2, 1); @@ -711,8 +738,8 @@ mod tests { client.create_collection("test_collection", IndexType::Flat).unwrap(); // Add some vectors - client.add_text_to_collection("test_collection", "Hello world").unwrap(); - client.add_text_to_collection("test_collection", "Another text").unwrap(); + client.add_text_to_collection("test_collection", "Hello world", None).unwrap(); + client.add_text_to_collection("test_collection", "Another text", None).unwrap(); // Search with vector directly let query_vector = vec![1.0, 2.0, 3.0]; @@ -732,8 +759,8 @@ mod tests { // Create collection and add some data client.create_collection("test_collection", IndexType::Flat).unwrap(); - client.add_text_to_collection("test_collection", "Hello world").unwrap(); - client.add_text_to_collection("test_collection", "Another text").unwrap(); + client.add_text_to_collection("test_collection", "Hello world", None).unwrap(); + client.add_text_to_collection("test_collection", "Another text", None).unwrap(); let collection = client.get_collection("test_collection").unwrap(); @@ -768,8 +795,8 @@ mod tests { // Create HNSW collection and add some data client.create_collection("test_hnsw_collection", IndexType::HNSW).unwrap(); - client.add_text_to_collection("test_hnsw_collection", "First document").unwrap(); - client.add_text_to_collection("test_hnsw_collection", "Second document").unwrap(); + client.add_text_to_collection("test_hnsw_collection", "First document", None).unwrap(); + client.add_text_to_collection("test_hnsw_collection", "Second document", None).unwrap(); let collection = client.get_collection("test_hnsw_collection").unwrap(); @@ -815,7 +842,7 @@ mod tests { let mut client = VectorLiteClient::new(Box::new(embedding_fn)); client.create_collection("test_collection", IndexType::Flat).unwrap(); - client.add_text_to_collection("test_collection", "Hello world").unwrap(); + client.add_text_to_collection("test_collection", "Hello world", None).unwrap(); let collection = client.get_collection("test_collection").unwrap(); diff --git a/src/lib.rs b/src/lib.rs index 90a720e..598c0ea 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,15 +1,32 @@ //! # VectorLite //! -//! A high-performance, in-memory vector database optimized for AI agent workloads with HTTP API and thread-safe concurrency. +//! **A tiny, in-process Rust vector store with built-in embeddings for sub-millisecond semantic search.** //! -//! ## Overview +//! VectorLite is a high-performance, **in-memory vector database** optimized for **AI agent** and **edge** workloads. +//! It co-locates model inference (via [Candle](https://github.com/huggingface/candle)) with a low-latency vector index, making it ideal for **session-scoped**, **single-instance**, or **privacy-sensitive** environments. //! -//! VectorLite is designed for **single-instance, low-latency vector operations** in AI agent environments. It prioritizes **sub-millisecond search performance** over distributed scalability, making it ideal for: +//! ## Why VectorLite? //! -//! - **AI Agent Sessions**: Session-scoped vector storage with fast retrieval -//! - **Real-time Search**: Sub-millisecond response requirements -//! - **Prototype Development**: Rapid iteration without infrastructure complexity -//! - **Single-tenant Applications**: No multi-tenancy isolation requirements +//! | Feature | Description | +//! |----------|-------------| +//! | **Sub-millisecond search** | In-memory HNSW or flat search tuned for real-time agent loops. | +//! | **Built-in embeddings** | Runs [all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) locally using Candle, or any other model of your choice. No external API calls. | +//! | **Single-binary simplicity** | No dependencies, no servers to orchestrate. Start instantly via CLI or Docker. | +//! | **Session-scoped collections** | Perfect for ephemeral agent sessions or sidecars | +//! | **Thread-safe concurrency** | RwLock-based access and atomic ID generation for multi-threaded workloads. | +//! | **Instant persistence** | Save or restore collections snapshots in one call. | +//! +//! VectorLite trades distributed scalability for deterministic performance, perfect for use cases where latency matters more than millions of vectors. +//! +//! ## When to Use It +//! +//! | Scenario | Why VectorLite fits | +//! |-----------|--------------------| +//! | **AI agent sessions** | Keep short-lived embeddings per conversation. No network latency. | +//! | **Edge or embedded AI** | Run fully offline with model + index in one binary. | +//! | **Realtime search / personalization** | Sub-ms search for pre-computed embeddings. | +//! | **Local prototyping & CI** | Rust-native, no external services. | +//! | **Single-tenant microservices** | Lightweight sidecar for semantic capabilities. | //! //! ## Key Features //! @@ -23,26 +40,36 @@ //! //! ```rust //! use vectorlite::{VectorLiteClient, EmbeddingGenerator, IndexType, SimilarityMetric}; +//! use serde_json::json; //! -//! # async fn example() -> Result<(), Box> { -//! // Create client with embedding function -//! let mut client = VectorLiteClient::new(Box::new(EmbeddingGenerator::new()?)); +//! fn main() -> Result<(), Box> { +//! let mut client = VectorLiteClient::new(Box::new(EmbeddingGenerator::new()?)); //! -//! // Create collection -//! client.create_collection("documents", IndexType::HNSW)?; +//! client.create_collection("quotes", IndexType::HNSW)?; +//! +//! let id = client.add_text_to_collection( +//! "quotes", +//! "I just want to lie on the beach and eat hot dogs", +//! Some(json!({ +//! "author": "Kevin Malone", +//! "tags": ["the-office", "s3:e23"], +//! "year": 2005, +//! })) +//! )?; //! -//! // Add text (auto-generates embedding and ID) -//! let id = client.add_text_to_collection("documents", "Hello world")?; +//! let results = client.search_text_in_collection( +//! "quotes", +//! "beach games", +//! 3, +//! SimilarityMetric::Cosine, +//! )?; //! -//! // Search -//! let results = client.search_text_in_collection( -//! "documents", -//! "hello", -//! 5, -//! SimilarityMetric::Cosine -//! )?; -//! # Ok(()) -//! # } +//! for result in &results { +//! println!("ID: {}, Score: {:.4}", result.id, result.score); +//! } +//! +//! Ok(()) +//! } //! ``` //! //! ## Index Types diff --git a/src/server.rs b/src/server.rs index 1f194b5..ff43a91 100644 --- a/src/server.rs +++ b/src/server.rs @@ -15,7 +15,7 @@ //! - `DELETE /collections/{name}` - Delete a collection //! //! ## Vector Operations -//! - `POST /collections/{name}/text` - Add text (auto-generates embedding) +//! - `POST /collections/{name}/text` - Add text (auto-generates embedding, optional metadata) //! - `POST /collections/{name}/vector` - Add raw vector //! - `POST /collections/{name}/search/text` - Search by text //! - `POST /collections/{name}/search/vector` - Search by vector @@ -85,6 +85,7 @@ pub struct CreateCollectionResponse { #[derive(Debug, Deserialize)] pub struct AddTextRequest { pub text: String, + pub metadata: Option, } #[derive(Debug, Serialize)] @@ -266,7 +267,7 @@ async fn add_text( Json(payload): Json, ) -> Result, StatusCode> { let client = state.read().map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; - match client.add_text_to_collection(&collection_name, &payload.text) { + match client.add_text_to_collection(&collection_name, &payload.text, payload.metadata) { Ok(id) => { info!("Added text to collection '{}' with ID: {}", collection_name, id); Ok(Json(AddTextResponse { @@ -280,6 +281,7 @@ async fn add_text( } } + async fn add_vector( State(state): State, Path(collection_name): Path, diff --git a/tests/http_integration_test.rs b/tests/http_integration_test.rs index 24a4813..01802cd 100644 --- a/tests/http_integration_test.rs +++ b/tests/http_integration_test.rs @@ -208,7 +208,7 @@ async fn test_add_vector_to_collection() { async fn test_search_text() { let mut client = create_test_client(); client.create_collection("test_collection", vectorlite::IndexType::Flat).unwrap(); - client.add_text_to_collection("test_collection", "Hello world").unwrap(); + client.add_text_to_collection("test_collection", "Hello world", None).unwrap(); let app = create_app(std::sync::Arc::new(std::sync::RwLock::new(client))); let payload = json!({ @@ -238,7 +238,7 @@ async fn test_search_text() { async fn test_search_vector() { let mut client = create_test_client(); client.create_collection("test_collection", vectorlite::IndexType::Flat).unwrap(); - client.add_text_to_collection("test_collection", "Hello world").unwrap(); + client.add_text_to_collection("test_collection", "Hello world", None).unwrap(); let app = create_app(std::sync::Arc::new(std::sync::RwLock::new(client))); let payload = json!({ @@ -268,7 +268,7 @@ async fn test_search_vector() { async fn test_get_vector() { let mut client = create_test_client(); client.create_collection("test_collection", vectorlite::IndexType::Flat).unwrap(); - client.add_text_to_collection("test_collection", "Hello world").unwrap(); + client.add_text_to_collection("test_collection", "Hello world", None).unwrap(); let app = create_app(std::sync::Arc::new(std::sync::RwLock::new(client))); let request = Request::builder() @@ -290,7 +290,7 @@ async fn test_get_vector() { async fn test_delete_vector() { let mut client = create_test_client(); client.create_collection("test_collection", vectorlite::IndexType::Flat).unwrap(); - client.add_text_to_collection("test_collection", "Hello world").unwrap(); + client.add_text_to_collection("test_collection", "Hello world", None).unwrap(); let app = create_app(std::sync::Arc::new(std::sync::RwLock::new(client))); let request = Request::builder() From 5534f77ee6b0ff64280565a6b7f6ea95d4464cd6 Mon Sep 17 00:00:00 2001 From: mathieu Date: Fri, 24 Oct 2025 14:35:04 +1100 Subject: [PATCH 3/7] fix-clippy --- src/server.rs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/server.rs b/src/server.rs index ff43a91..21bca1a 100644 --- a/src/server.rs +++ b/src/server.rs @@ -449,10 +449,9 @@ async fn load_collection( Ok(collection) => collection, Err(e) => { // Check if it's a file not found error - if let crate::persistence::PersistenceError::Io(io_err) = &e { - if io_err.kind() == std::io::ErrorKind::NotFound { - return Err(VectorLiteError::FileNotFound(format!("File not found: {}", payload.file_path)).status_code()); - } + if let crate::persistence::PersistenceError::Io(io_err) = &e + && io_err.kind() == std::io::ErrorKind::NotFound { + return Err(VectorLiteError::FileNotFound(format!("File not found: {}", payload.file_path)).status_code()); } return Err(VectorLiteError::from(e).status_code()); } @@ -479,7 +478,7 @@ async fn load_collection( let new_collection = crate::Collection::new(collection_name.clone(), index); // Add the collection to the client - if let Err(_) = client.add_collection(new_collection) { + if client.add_collection(new_collection).is_err() { return Err(StatusCode::INTERNAL_SERVER_ERROR); } From f0d3e6237329c9a358db5a4e96fa7b38e4ada25a Mon Sep 17 00:00:00 2001 From: mathieu Date: Fri, 24 Oct 2025 14:48:24 +1100 Subject: [PATCH 4/7] add support for base text --- src/client.rs | 15 +++++++++++++-- src/index/flat.rs | 35 ++++++++++++++++++----------------- src/index/hnsw.rs | 43 +++++++++++++++++++++++-------------------- src/lib.rs | 28 ++++++++++++++++++---------- src/persistence.rs | 8 ++++---- src/server.rs | 2 ++ 6 files changed, 78 insertions(+), 53 deletions(-) diff --git a/src/client.rs b/src/client.rs index 790bd11..b8c7014 100644 --- a/src/client.rs +++ b/src/client.rs @@ -310,7 +310,12 @@ impl Collection { // Generate embedding outside the lock let embedding = embedding_function.generate_embedding(text)?; - let vector = Vector { id, values: embedding, metadata: None }; + let vector = Vector { + id, + values: embedding, + text: Some(text.to_string()), + metadata: None + }; let vector_dimension = vector.values.len(); let vector_id = vector.id; @@ -337,7 +342,12 @@ impl Collection { // Generate embedding outside the lock let embedding = embedding_function.generate_embedding(text)?; - let vector = Vector { id, values: embedding, metadata }; + let vector = Vector { + id, + values: embedding, + text: Some(text.to_string()), + metadata + }; let vector_dimension = vector.values.len(); let vector_id = vector.id; @@ -716,6 +726,7 @@ mod tests { let vector = Vector { id: 42, values: vec![1.0, 2.0, 3.0], + text: Some("Test vector text".to_string()), metadata: None, }; diff --git a/src/index/flat.rs b/src/index/flat.rs index a40718d..97881bc 100644 --- a/src/index/flat.rs +++ b/src/index/flat.rs @@ -101,6 +101,7 @@ impl VectorIndex for FlatIndex { .map(|e| SearchResult { id: e.id, score: similarity_metric.calculate(&e.values, query), + text: e.text.clone(), metadata: e.metadata.clone() }) .collect(); @@ -137,9 +138,9 @@ mod tests { fn test_serialization_deserialization() { // Create a FlatIndex with some data let vectors = vec![ - Vector { id: 1, values: vec![1.0, 0.0, 0.0], metadata: None }, - Vector { id: 2, values: vec![0.0, 1.0, 0.0], metadata: None }, - Vector { id: 3, values: vec![0.0, 0.0, 1.0], metadata: None }, + Vector { id: 1, values: vec![1.0, 0.0, 0.0], text: None, metadata: None }, + Vector { id: 2, values: vec![0.0, 1.0, 0.0], text: None, metadata: None }, + Vector { id: 3, values: vec![0.0, 0.0, 1.0], text: None, metadata: None }, ]; let flat_index = FlatIndex::new(3, vectors); @@ -178,9 +179,9 @@ mod tests { #[test] fn test_flat_index_with_cosine_similarity() { let vectors = vec![ - Vector { id: 1, values: vec![1.0, 0.0, 0.0], metadata: None }, - Vector { id: 2, values: vec![0.0, 1.0, 0.0], metadata: None }, - Vector { id: 3, values: vec![0.0, 0.0, 1.0], metadata: None }, + Vector { id: 1, values: vec![1.0, 0.0, 0.0], text: None, metadata: None }, + Vector { id: 2, values: vec![0.0, 1.0, 0.0], text: None, metadata: None }, + Vector { id: 3, values: vec![0.0, 0.0, 1.0], text: None, metadata: None }, ]; let index = FlatIndex::new(3, vectors); @@ -195,9 +196,9 @@ mod tests { #[test] fn test_flat_index_with_euclidean_similarity() { let vectors = vec![ - Vector { id: 1, values: vec![0.0, 0.0], metadata: None }, - Vector { id: 2, values: vec![3.0, 4.0], metadata: None }, - Vector { id: 3, values: vec![6.0, 8.0], metadata: None }, + Vector { id: 1, values: vec![0.0, 0.0], text: None, metadata: None }, + Vector { id: 2, values: vec![3.0, 4.0], text: None, metadata: None }, + Vector { id: 3, values: vec![6.0, 8.0], text: None, metadata: None }, ]; let index = FlatIndex::new(2, vectors); @@ -212,9 +213,9 @@ mod tests { #[test] fn test_flat_index_with_manhattan_similarity() { let vectors = vec![ - Vector { id: 1, values: vec![0.0, 0.0], metadata: None }, - Vector { id: 2, values: vec![3.0, 4.0], metadata: None }, - Vector { id: 3, values: vec![6.0, 8.0], metadata: None }, + Vector { id: 1, values: vec![0.0, 0.0], text: None, metadata: None }, + Vector { id: 2, values: vec![3.0, 4.0], text: None, metadata: None }, + Vector { id: 3, values: vec![6.0, 8.0], text: None, metadata: None }, ]; let index = FlatIndex::new(2, vectors); @@ -229,9 +230,9 @@ mod tests { #[test] fn test_flat_index_with_dot_product() { let vectors = vec![ - Vector { id: 1, values: vec![1.0, 2.0], metadata: None }, - Vector { id: 2, values: vec![2.0, 1.0], metadata: None }, - Vector { id: 3, values: vec![0.0, 0.0], metadata: None }, + Vector { id: 1, values: vec![1.0, 2.0], text: None, metadata: None }, + Vector { id: 2, values: vec![2.0, 1.0], text: None, metadata: None }, + Vector { id: 3, values: vec![0.0, 0.0], text: None, metadata: None }, ]; let index = FlatIndex::new(2, vectors); @@ -246,8 +247,8 @@ mod tests { #[test] fn test_flat_index_change_similarity_metric() { let vectors = vec![ - Vector { id: 1, values: vec![1.0, 2.0], metadata: None }, - Vector { id: 2, values: vec![2.0, 1.0], metadata: None }, + Vector { id: 1, values: vec![1.0, 2.0], text: None, metadata: None }, + Vector { id: 2, values: vec![2.0, 1.0], text: None, metadata: None }, ]; let index = FlatIndex::new(2, vectors); diff --git a/src/index/hnsw.rs b/src/index/hnsw.rs index 4efad49..07e5573 100644 --- a/src/index/hnsw.rs +++ b/src/index/hnsw.rs @@ -31,7 +31,7 @@ //! //! # fn example() -> Result<(), Box> { //! let mut index = HNSWIndex::new(384); -//! let vector = Vector { id: 1, values: vec![0.1; 384], metadata: None }; +//! let vector = Vector { id: 1, values: vec![0.1; 384], text: None, metadata: None }; //! //! index.add(vector)?; //! let results = index.search(&[0.1; 384], 5, SimilarityMetric::Cosine); @@ -238,6 +238,7 @@ impl VectorIndex for HNSWIndex { SearchResult { id: custom_id, score, + text: vector.text.clone(), metadata: vector.metadata.clone() } }) @@ -286,6 +287,7 @@ fn test_add_vector() { let vector = Vector { id: 1, values: vec![1.0, 2.0, 3.0], + text: None, metadata: None, }; @@ -300,6 +302,7 @@ fn test_add_vector_dimension_mismatch() { let vector = Vector { id: 1, values: vec![1.0, 2.0], // Wrong dimension + text: None, metadata: None, }; @@ -312,10 +315,10 @@ fn test_search_basic() { let mut hnsw = HNSWIndex::new(3); let vectors = vec![ - Vector { id: 1, values: vec![1.0, 0.0, 0.0], metadata: None }, - Vector { id: 2, values: vec![0.0, 1.0, 0.0], metadata: None }, - Vector { id: 3, values: vec![0.0, 0.0, 1.0], metadata: None }, - Vector { id: 4, values: vec![1.0, 1.0, 0.0], metadata: None }, + Vector { id: 1, values: vec![1.0, 0.0, 0.0], text: None, metadata: None }, + Vector { id: 2, values: vec![0.0, 1.0, 0.0], text: None, metadata: None }, + Vector { id: 3, values: vec![0.0, 0.0, 1.0], text: None, metadata: None }, + Vector { id: 4, values: vec![1.0, 1.0, 0.0], text: None, metadata: None }, ]; for vector in vectors { @@ -352,10 +355,10 @@ fn test_id_mapping() { // Add vectors with custom IDs let vectors = vec![ - Vector { id: 100, values: vec![1.0, 0.0, 0.0], metadata: None }, - Vector { id: 200, values: vec![0.0, 1.0, 0.0], metadata: None }, - Vector { id: 300, values: vec![0.0, 0.0, 1.0], metadata: None }, - Vector { id: 400, values: vec![1.0, 1.0, 0.0], metadata: None }, + Vector { id: 100, values: vec![1.0, 0.0, 0.0], text: None, metadata: None }, + Vector { id: 200, values: vec![0.0, 1.0, 0.0], text: None, metadata: None }, + Vector { id: 300, values: vec![0.0, 0.0, 1.0], text: None, metadata: None }, + Vector { id: 400, values: vec![1.0, 1.0, 0.0], text: None, metadata: None }, ]; for vector in vectors { @@ -382,8 +385,8 @@ fn test_id_mapping() { fn test_duplicate_id_error() { let mut hnsw = HNSWIndex::new(3); - let vector1 = Vector { id: 1, values: vec![1.0, 2.0, 3.0], metadata: None }; - let vector2 = Vector { id: 1, values: vec![4.0, 5.0, 6.0], metadata: None }; // Same ID + let vector1 = Vector { id: 1, values: vec![1.0, 2.0, 3.0], text: None, metadata: None }; + let vector2 = Vector { id: 1, values: vec![4.0, 5.0, 6.0], text: None, metadata: None }; // Same ID assert!(hnsw.add(vector1).is_ok()); assert!(hnsw.add(vector2).is_err()); // Should fail with duplicate ID @@ -393,7 +396,7 @@ fn test_duplicate_id_error() { fn test_delete_vector() { let mut hnsw = HNSWIndex::new(3); - let vector = Vector { id: 42, values: vec![1.0, 2.0, 3.0], metadata: None }; + let vector = Vector { id: 42, values: vec![1.0, 2.0, 3.0], text: None, metadata: None }; assert!(hnsw.add(vector).is_ok()); assert_eq!(hnsw.len(), 1); @@ -427,9 +430,9 @@ fn test_serialization_deserialization() { // Create an HNSW index with some data let mut hnsw = HNSWIndex::new(3); let vectors = vec![ - Vector { id: 1, values: vec![1.0, 0.0, 0.0], metadata: None }, - Vector { id: 2, values: vec![0.0, 1.0, 0.0], metadata: None }, - Vector { id: 3, values: vec![0.0, 0.0, 1.0], metadata: None }, + Vector { id: 1, values: vec![1.0, 0.0, 0.0], text: None, metadata: None }, + Vector { id: 2, values: vec![0.0, 1.0, 0.0], text: None, metadata: None }, + Vector { id: 3, values: vec![0.0, 0.0, 1.0], text: None, metadata: None }, ]; for vector in vectors { @@ -460,7 +463,7 @@ fn test_serialization_deserialization() { assert_eq!(vector3.values, vec![0.0, 0.0, 1.0]); // Test that we can add a new vector to the deserialized index - let new_vector = Vector { id: 4, values: vec![1.0, 1.0, 1.0], metadata: None }; + let new_vector = Vector { id: 4, values: vec![1.0, 1.0, 1.0], text: None, metadata: None }; assert!(deserialized.add(new_vector).is_ok()); assert_eq!(deserialized.len(), 4); @@ -511,7 +514,7 @@ fn test_empty_hnsw_serialization_deserialization() { assert!(deserialized.is_empty()); // Test that we can add vectors to the deserialized empty index - let vector = Vector { id: 1, values: vec![1.0, 2.0, 3.0], metadata: None }; + let vector = Vector { id: 1, values: vec![1.0, 2.0, 3.0], text: None, metadata: None }; assert!(deserialized.add(vector).is_ok()); assert_eq!(deserialized.len(), 1); assert!(!deserialized.is_empty()); @@ -523,9 +526,9 @@ fn test_search_with_limited_vectors() { // Add only 3 vectors let vectors = vec![ - Vector { id: 1, values: vec![1.0, 0.0, 0.0], metadata: None }, - Vector { id: 2, values: vec![0.0, 1.0, 0.0], metadata: None }, - Vector { id: 3, values: vec![0.0, 0.0, 1.0], metadata: None }, + Vector { id: 1, values: vec![1.0, 0.0, 0.0], text: None, metadata: None }, + Vector { id: 2, values: vec![0.0, 1.0, 0.0], text: None, metadata: None }, + Vector { id: 3, values: vec![0.0, 0.0, 1.0], text: None, metadata: None }, ]; for vector in vectors { diff --git a/src/lib.rs b/src/lib.rs index 598c0ea..03c4724 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -65,7 +65,7 @@ //! )?; //! //! for result in &results { -//! println!("ID: {}, Score: {:.4}", result.id, result.score); +//! println!("ID: {}, Score: {:.4}, Text: {:?}", result.id, result.score, result.text); //! } //! //! Ok(()) @@ -136,7 +136,7 @@ use serde::{Serialize, Deserialize}; /// Default vector dimension for embedding models pub const DEFAULT_VECTOR_DIMENSION: usize = 768; -/// Represents a vector with an ID and floating-point values +/// Represents a vector with an ID, floating-point values, and original text /// /// # Examples /// @@ -147,6 +147,7 @@ pub const DEFAULT_VECTOR_DIMENSION: usize = 768; /// let vector = Vector { /// id: 1, /// values: vec![0.1, 0.2, 0.3, 0.4], +/// text: Some("Sample document text".to_string()), /// metadata: Some(json!({ /// "title": "Sample Document", /// "category": "example", @@ -160,12 +161,14 @@ pub struct Vector { pub id: u64, /// The vector values (embedding coordinates) pub values: Vec, + /// The original text that was embedded to create this vector + pub text: Option, /// Optional metadata associated with the vector /// Can contain arbitrary JSON data for flexible schema-less storage pub metadata: Option, } -/// Search result containing a vector ID, similarity score, and optional metadata +/// Search result containing a vector ID, similarity score, original text, and optional metadata /// /// Results are typically sorted by score in descending order (highest similarity first). /// @@ -178,6 +181,7 @@ pub struct Vector { /// let result = SearchResult { /// id: 42, /// score: 0.95, +/// text: Some("Document content text".to_string()), /// metadata: Some(json!({"title": "Document Title"})), /// }; /// ``` @@ -187,6 +191,8 @@ pub struct SearchResult { pub id: u64, /// Similarity score (higher is more similar) pub score: f64, + /// The original text that was embedded to create this vector + pub text: Option, /// Optional metadata from the matching vector pub metadata: Option, } @@ -639,8 +645,8 @@ mod tests { #[test] fn test_vector_store_creation() { let vectors = vec![ - Vector { id: 0, values: vec![1.0, 2.0, 3.0], metadata: None }, - Vector { id: 1, values: vec![4.0, 5.0, 6.0], metadata: None }, + Vector { id: 0, values: vec![1.0, 2.0, 3.0], text: None, metadata: None }, + Vector { id: 1, values: vec![4.0, 5.0, 6.0], text: None, metadata: None }, ]; let store = FlatIndex::new(3, vectors); assert_eq!(store.len(), 2); @@ -650,9 +656,9 @@ mod tests { #[test] fn test_vector_store_search() { let vectors = vec![ - Vector { id: 0, values: vec![1.0, 0.0, 0.0], metadata: None }, - Vector { id: 1, values: vec![0.0, 1.0, 0.0], metadata: None }, - Vector { id: 2, values: vec![0.0, 0.0, 1.0], metadata: None }, + Vector { id: 0, values: vec![1.0, 0.0, 0.0], text: None, metadata: None }, + Vector { id: 1, values: vec![0.0, 1.0, 0.0], text: None, metadata: None }, + Vector { id: 2, values: vec![0.0, 0.0, 1.0], text: None, metadata: None }, ]; let store = FlatIndex::new(3, vectors); let query = vec![1.0, 0.0, 0.0]; @@ -669,8 +675,8 @@ mod tests { // Test FlatIndex wrapper let vectors = vec![ - Vector { id: 1, values: vec![1.0, 0.0, 0.0], metadata: None }, - Vector { id: 2, values: vec![0.0, 1.0, 0.0], metadata: None }, + Vector { id: 1, values: vec![1.0, 0.0, 0.0], text: None, metadata: None }, + Vector { id: 2, values: vec![0.0, 1.0, 0.0], text: None, metadata: None }, ]; let flat_index = FlatIndex::new(3, vectors); let wrapper = VectorIndexWrapper::Flat(flat_index); @@ -711,6 +717,7 @@ mod tests { let vector = Vector { id: 1, values: vec![1.0, 2.0, 3.0], + text: Some("Test document text".to_string()), metadata: Some(metadata.clone()), }; @@ -726,6 +733,7 @@ mod tests { let vector_no_metadata = Vector { id: 2, values: vec![4.0, 5.0, 6.0], + text: None, metadata: None, }; diff --git a/src/persistence.rs b/src/persistence.rs index 9a49f3b..28ac715 100644 --- a/src/persistence.rs +++ b/src/persistence.rs @@ -168,8 +168,8 @@ mod tests { fn create_test_collection() -> Collection { let vectors = vec![ - Vector { id: 0, values: vec![1.0, 2.0, 3.0], metadata: None }, - Vector { id: 1, values: vec![4.0, 5.0, 6.0], metadata: None }, + Vector { id: 0, values: vec![1.0, 2.0, 3.0], text: None, metadata: None }, + Vector { id: 1, values: vec![4.0, 5.0, 6.0], text: None, metadata: None }, ]; let flat_index = FlatIndex::new(3, vectors); let index = VectorIndexWrapper::Flat(flat_index); @@ -257,8 +257,8 @@ mod tests { let collection = Collection::new("test_hnsw_collection".to_string(), index); // Add some vectors - let vector1 = Vector { id: 0, values: vec![1.0, 2.0, 3.0], metadata: None }; - let vector2 = Vector { id: 1, values: vec![4.0, 5.0, 6.0], metadata: None }; + let vector1 = Vector { id: 0, values: vec![1.0, 2.0, 3.0], text: None, metadata: None }; + let vector2 = Vector { id: 1, values: vec![4.0, 5.0, 6.0], text: None, metadata: None }; collection.add_vector(vector1).unwrap(); collection.add_vector(vector2).unwrap(); diff --git a/src/server.rs b/src/server.rs index 21bca1a..5697da8 100644 --- a/src/server.rs +++ b/src/server.rs @@ -98,6 +98,7 @@ pub struct AddTextResponse { pub struct AddVectorRequest { pub id: u64, pub values: Vec, + pub text: Option, pub metadata: Option, } @@ -290,6 +291,7 @@ async fn add_vector( let vector = Vector { id: payload.id, values: payload.values, + text: payload.text, metadata: payload.metadata, }; From 243682d4df320160d598b3751431686ced3cbbb6 Mon Sep 17 00:00:00 2001 From: mathieu Date: Fri, 24 Oct 2025 15:16:34 +1100 Subject: [PATCH 5/7] considering removing manual vector option --- README.md | 2 - src/client.rs | 88 ++--------------------------- src/index/flat.rs | 38 ++++++------- src/index/hnsw.rs | 44 +++++++-------- src/lib.rs | 30 +++++----- src/persistence.rs | 34 ++++++++--- src/server.rs | 100 ++------------------------------- tests/http_integration_test.rs | 61 +------------------- tests/integration_test.rs | 1 + 9 files changed, 97 insertions(+), 301 deletions(-) diff --git a/README.md b/README.md index eecba7f..e26262b 100644 --- a/README.md +++ b/README.md @@ -68,9 +68,7 @@ docker build \ | **Create collection** | `POST /collections` | `{"name": "docs", "index_type": "hnsw"}` | | **Delete collection** | `DELETE /collections/{name}` | – | | **Add text** | `POST /collections/{name}/text` | `{"text": "Hello world", "metadata": {...}}`| -| **Add vector** | `POST /collections/{name}/vector` | `{"id": 1, "values": [0.1, 0.2, ...], "metadata": {...}}` | | **Search (text)** | `POST /collections/{name}/search/text` | `{"query": "hello", "k": 5}` | -| **Search (vector)** | `POST /collections/{name}/search/vector` | `{"query": [0.1, 0.2, ...], "k": 5}` | | **Get vector** | `GET /collections/{name}/vectors/{id}` | – | | **Delete vector** | `DELETE /collections/{name}/vectors/{id}` | – | | **Save collection** | `POST /collections/{name}/save` | `{"file_path": "./collection.vlc"}` | diff --git a/src/client.rs b/src/client.rs index b8c7014..19679fb 100644 --- a/src/client.rs +++ b/src/client.rs @@ -128,12 +128,6 @@ impl VectorLiteClient { collection.add_text_with_metadata(text, metadata, self.embedding_function.as_ref()) } - pub fn add_vector_to_collection(&self, collection_name: &str, vector: Vector) -> VectorLiteResult<()> { - let collection = self.collections.get(collection_name) - .ok_or_else(|| VectorLiteError::CollectionNotFound { name: collection_name.to_string() })?; - - collection.add_vector(vector) - } pub fn search_text_in_collection(&self, collection_name: &str, query_text: &str, k: usize, similarity_metric: SimilarityMetric) -> VectorLiteResult> { let collection = self.collections.get(collection_name) @@ -142,12 +136,6 @@ impl VectorLiteClient { collection.search_text(query_text, k, similarity_metric, self.embedding_function.as_ref()) } - pub fn search_vector_in_collection(&self, collection_name: &str, query_vector: &[f64], k: usize, similarity_metric: SimilarityMetric) -> VectorLiteResult> { - let collection = self.collections.get(collection_name) - .ok_or_else(|| VectorLiteError::CollectionNotFound { name: collection_name.to_string() })?; - - collection.search_vector(query_vector, k, similarity_metric) - } pub fn delete_from_collection(&self, collection_name: &str, id: u64) -> VectorLiteResult<()> { let collection = self.collections.get(collection_name) @@ -313,7 +301,7 @@ impl Collection { let vector = Vector { id, values: embedding, - text: Some(text.to_string()), + text: text.to_string(), metadata: None }; let vector_dimension = vector.values.len(); @@ -345,7 +333,7 @@ impl Collection { let vector = Vector { id, values: embedding, - text: Some(text.to_string()), + text: text.to_string(), metadata }; let vector_dimension = vector.values.len(); @@ -368,23 +356,6 @@ impl Collection { Ok(id) } - pub fn add_vector(&self, vector: Vector) -> VectorLiteResult<()> { - let vector_dimension = vector.values.len(); - let vector_id = vector.id; - let mut index = self.index.write().map_err(|_| VectorLiteError::LockError("Failed to acquire write lock for add_vector".to_string()))?; - index.add(vector).map_err(|e| { - if e.contains("dimension") { - VectorLiteError::DimensionMismatch { - expected: index.dimension(), - actual: vector_dimension - } - } else if e.contains("already exists") { - VectorLiteError::DuplicateVectorId { id: vector_id } - } else { - VectorLiteError::InternalError(e) - } - }) - } pub fn delete(&self, id: u64) -> VectorLiteResult<()> { let mut index = self.index.write().map_err(|_| VectorLiteError::LockError("Failed to acquire write lock for delete".to_string()))?; @@ -406,10 +377,6 @@ impl Collection { Ok(index.search(&query_embedding, k, similarity_metric)) } - pub fn search_vector(&self, query_vector: &[f64], k: usize, similarity_metric: SimilarityMetric) -> VectorLiteResult> { - let index = self.index.read().map_err(|_| VectorLiteError::LockError("Failed to acquire read lock for search_vector".to_string()))?; - Ok(index.search(query_vector, k, similarity_metric)) - } pub fn get_vector(&self, id: u64) -> VectorLiteResult> { let index = self.index.read().map_err(|_| VectorLiteError::LockError("Failed to acquire read lock for get_vector".to_string()))?; @@ -715,53 +682,7 @@ mod tests { assert_eq!(results.len(), 1); } - #[test] - fn test_add_vector_directly() { - let embedding_fn = MockEmbeddingFunction::new(3); - let mut client = VectorLiteClient::new(Box::new(embedding_fn)); - - client.create_collection("test_collection", IndexType::Flat).unwrap(); - - // Add vector directly - let vector = Vector { - id: 42, - values: vec![1.0, 2.0, 3.0], - text: Some("Test vector text".to_string()), - metadata: None, - }; - - client.add_vector_to_collection("test_collection", vector).unwrap(); - - let info = client.get_collection_info("test_collection").unwrap(); - assert_eq!(info.count, 1); - - // Verify vector exists - let retrieved = client.get_vector_from_collection("test_collection", 42).unwrap(); - assert!(retrieved.is_some()); - assert_eq!(retrieved.unwrap().id, 42); - } - #[test] - fn test_search_vector_directly() { - let embedding_fn = MockEmbeddingFunction::new(3); - let mut client = VectorLiteClient::new(Box::new(embedding_fn)); - - client.create_collection("test_collection", IndexType::Flat).unwrap(); - - // Add some vectors - client.add_text_to_collection("test_collection", "Hello world", None).unwrap(); - client.add_text_to_collection("test_collection", "Another text", None).unwrap(); - - // Search with vector directly - let query_vector = vec![1.0, 2.0, 3.0]; - let results = client.search_vector_in_collection("test_collection", &query_vector, 2, SimilarityMetric::Cosine).unwrap(); - - assert_eq!(results.len(), 2); - // Results should be sorted by score (highest first) - for i in 1..results.len() { - assert!(results[i-1].score >= results[i].score); - } - } #[test] fn test_collection_save_and_load() { @@ -794,8 +715,9 @@ mod tests { assert_eq!(info.dimension, 3); assert!(!info.is_empty); - // Test search functionality - let results = loaded_collection.search_vector(&[1.0, 2.0, 3.0], 2, SimilarityMetric::Cosine).unwrap(); + // Test search functionality using text search + let test_embedding_fn = MockEmbeddingFunction::new(3); + let results = loaded_collection.search_text("Hello", 2, SimilarityMetric::Cosine, &test_embedding_fn).unwrap(); assert_eq!(results.len(), 2); } diff --git a/src/index/flat.rs b/src/index/flat.rs index 97881bc..46e0be8 100644 --- a/src/index/flat.rs +++ b/src/index/flat.rs @@ -24,7 +24,7 @@ //! //! # fn example() -> Result<(), Box> { //! let mut index = FlatIndex::new(3, Vec::new()); -//! let vector = Vector { id: 1, values: vec![1.0, 2.0, 3.0], metadata: None }; +//! let vector = Vector { id: 1, values: vec![1.0, 2.0, 3.0], text: "test".to_string(), metadata: None }; //! //! index.add(vector)?; //! let results = index.search(&[1.1, 2.1, 3.1], 5, SimilarityMetric::Cosine); @@ -49,7 +49,7 @@ use serde::{Serialize, Deserialize}; /// /// # fn example() -> Result<(), Box> { /// let mut index = FlatIndex::new(3, Vec::new()); -/// let vector = Vector { id: 1, values: vec![1.0, 2.0, 3.0], metadata: None }; +/// let vector = Vector { id: 1, values: vec![1.0, 2.0, 3.0], text: "test".to_string(), metadata: None }; /// /// index.add(vector)?; /// let results = index.search(&[1.1, 2.1, 3.1], 5, SimilarityMetric::Cosine); @@ -138,9 +138,9 @@ mod tests { fn test_serialization_deserialization() { // Create a FlatIndex with some data let vectors = vec![ - Vector { id: 1, values: vec![1.0, 0.0, 0.0], text: None, metadata: None }, - Vector { id: 2, values: vec![0.0, 1.0, 0.0], text: None, metadata: None }, - Vector { id: 3, values: vec![0.0, 0.0, 1.0], text: None, metadata: None }, + Vector { id: 1, values: vec![1.0, 0.0, 0.0], text: "test".to_string(), metadata: None }, + Vector { id: 2, values: vec![0.0, 1.0, 0.0], text: "test".to_string(), metadata: None }, + Vector { id: 3, values: vec![0.0, 0.0, 1.0], text: "test".to_string(), metadata: None }, ]; let flat_index = FlatIndex::new(3, vectors); @@ -179,9 +179,9 @@ mod tests { #[test] fn test_flat_index_with_cosine_similarity() { let vectors = vec![ - Vector { id: 1, values: vec![1.0, 0.0, 0.0], text: None, metadata: None }, - Vector { id: 2, values: vec![0.0, 1.0, 0.0], text: None, metadata: None }, - Vector { id: 3, values: vec![0.0, 0.0, 1.0], text: None, metadata: None }, + Vector { id: 1, values: vec![1.0, 0.0, 0.0], text: "test".to_string(), metadata: None }, + Vector { id: 2, values: vec![0.0, 1.0, 0.0], text: "test".to_string(), metadata: None }, + Vector { id: 3, values: vec![0.0, 0.0, 1.0], text: "test".to_string(), metadata: None }, ]; let index = FlatIndex::new(3, vectors); @@ -196,9 +196,9 @@ mod tests { #[test] fn test_flat_index_with_euclidean_similarity() { let vectors = vec![ - Vector { id: 1, values: vec![0.0, 0.0], text: None, metadata: None }, - Vector { id: 2, values: vec![3.0, 4.0], text: None, metadata: None }, - Vector { id: 3, values: vec![6.0, 8.0], text: None, metadata: None }, + Vector { id: 1, values: vec![0.0, 0.0], text: "test".to_string(), metadata: None }, + Vector { id: 2, values: vec![3.0, 4.0], text: "test".to_string(), metadata: None }, + Vector { id: 3, values: vec![6.0, 8.0], text: "test".to_string(), metadata: None }, ]; let index = FlatIndex::new(2, vectors); @@ -213,9 +213,9 @@ mod tests { #[test] fn test_flat_index_with_manhattan_similarity() { let vectors = vec![ - Vector { id: 1, values: vec![0.0, 0.0], text: None, metadata: None }, - Vector { id: 2, values: vec![3.0, 4.0], text: None, metadata: None }, - Vector { id: 3, values: vec![6.0, 8.0], text: None, metadata: None }, + Vector { id: 1, values: vec![0.0, 0.0], text: "test".to_string(), metadata: None }, + Vector { id: 2, values: vec![3.0, 4.0], text: "test".to_string(), metadata: None }, + Vector { id: 3, values: vec![6.0, 8.0], text: "test".to_string(), metadata: None }, ]; let index = FlatIndex::new(2, vectors); @@ -230,9 +230,9 @@ mod tests { #[test] fn test_flat_index_with_dot_product() { let vectors = vec![ - Vector { id: 1, values: vec![1.0, 2.0], text: None, metadata: None }, - Vector { id: 2, values: vec![2.0, 1.0], text: None, metadata: None }, - Vector { id: 3, values: vec![0.0, 0.0], text: None, metadata: None }, + Vector { id: 1, values: vec![1.0, 2.0], text: "test".to_string(), metadata: None }, + Vector { id: 2, values: vec![2.0, 1.0], text: "test".to_string(), metadata: None }, + Vector { id: 3, values: vec![0.0, 0.0], text: "test".to_string(), metadata: None }, ]; let index = FlatIndex::new(2, vectors); @@ -247,8 +247,8 @@ mod tests { #[test] fn test_flat_index_change_similarity_metric() { let vectors = vec![ - Vector { id: 1, values: vec![1.0, 2.0], text: None, metadata: None }, - Vector { id: 2, values: vec![2.0, 1.0], text: None, metadata: None }, + Vector { id: 1, values: vec![1.0, 2.0], text: "test".to_string(), metadata: None }, + Vector { id: 2, values: vec![2.0, 1.0], text: "test".to_string(), metadata: None }, ]; let index = FlatIndex::new(2, vectors); diff --git a/src/index/hnsw.rs b/src/index/hnsw.rs index 07e5573..c7c4d34 100644 --- a/src/index/hnsw.rs +++ b/src/index/hnsw.rs @@ -31,7 +31,7 @@ //! //! # fn example() -> Result<(), Box> { //! let mut index = HNSWIndex::new(384); -//! let vector = Vector { id: 1, values: vec![0.1; 384], text: None, metadata: None }; +//! let vector = Vector { id: 1, values: vec![0.1; 384], text: "test".to_string(), metadata: None }; //! //! index.add(vector)?; //! let results = index.search(&[0.1; 384], 5, SimilarityMetric::Cosine); @@ -287,7 +287,7 @@ fn test_add_vector() { let vector = Vector { id: 1, values: vec![1.0, 2.0, 3.0], - text: None, + text: "test".to_string(), metadata: None, }; @@ -302,7 +302,7 @@ fn test_add_vector_dimension_mismatch() { let vector = Vector { id: 1, values: vec![1.0, 2.0], // Wrong dimension - text: None, + text: "test".to_string(), metadata: None, }; @@ -315,10 +315,10 @@ fn test_search_basic() { let mut hnsw = HNSWIndex::new(3); let vectors = vec![ - Vector { id: 1, values: vec![1.0, 0.0, 0.0], text: None, metadata: None }, - Vector { id: 2, values: vec![0.0, 1.0, 0.0], text: None, metadata: None }, - Vector { id: 3, values: vec![0.0, 0.0, 1.0], text: None, metadata: None }, - Vector { id: 4, values: vec![1.0, 1.0, 0.0], text: None, metadata: None }, + Vector { id: 1, values: vec![1.0, 0.0, 0.0], text: "test".to_string(), metadata: None }, + Vector { id: 2, values: vec![0.0, 1.0, 0.0], text: "test".to_string(), metadata: None }, + Vector { id: 3, values: vec![0.0, 0.0, 1.0], text: "test".to_string(), metadata: None }, + Vector { id: 4, values: vec![1.0, 1.0, 0.0], text: "test".to_string(), metadata: None }, ]; for vector in vectors { @@ -355,10 +355,10 @@ fn test_id_mapping() { // Add vectors with custom IDs let vectors = vec![ - Vector { id: 100, values: vec![1.0, 0.0, 0.0], text: None, metadata: None }, - Vector { id: 200, values: vec![0.0, 1.0, 0.0], text: None, metadata: None }, - Vector { id: 300, values: vec![0.0, 0.0, 1.0], text: None, metadata: None }, - Vector { id: 400, values: vec![1.0, 1.0, 0.0], text: None, metadata: None }, + Vector { id: 100, values: vec![1.0, 0.0, 0.0], text: "test".to_string(), metadata: None }, + Vector { id: 200, values: vec![0.0, 1.0, 0.0], text: "test".to_string(), metadata: None }, + Vector { id: 300, values: vec![0.0, 0.0, 1.0], text: "test".to_string(), metadata: None }, + Vector { id: 400, values: vec![1.0, 1.0, 0.0], text: "test".to_string(), metadata: None }, ]; for vector in vectors { @@ -385,8 +385,8 @@ fn test_id_mapping() { fn test_duplicate_id_error() { let mut hnsw = HNSWIndex::new(3); - let vector1 = Vector { id: 1, values: vec![1.0, 2.0, 3.0], text: None, metadata: None }; - let vector2 = Vector { id: 1, values: vec![4.0, 5.0, 6.0], text: None, metadata: None }; // Same ID + let vector1 = Vector { id: 1, values: vec![1.0, 2.0, 3.0], text: "test".to_string(), metadata: None }; + let vector2 = Vector { id: 1, values: vec![4.0, 5.0, 6.0], text: "test".to_string(), metadata: None }; // Same ID assert!(hnsw.add(vector1).is_ok()); assert!(hnsw.add(vector2).is_err()); // Should fail with duplicate ID @@ -396,7 +396,7 @@ fn test_duplicate_id_error() { fn test_delete_vector() { let mut hnsw = HNSWIndex::new(3); - let vector = Vector { id: 42, values: vec![1.0, 2.0, 3.0], text: None, metadata: None }; + let vector = Vector { id: 42, values: vec![1.0, 2.0, 3.0], text: "test".to_string(), metadata: None }; assert!(hnsw.add(vector).is_ok()); assert_eq!(hnsw.len(), 1); @@ -430,9 +430,9 @@ fn test_serialization_deserialization() { // Create an HNSW index with some data let mut hnsw = HNSWIndex::new(3); let vectors = vec![ - Vector { id: 1, values: vec![1.0, 0.0, 0.0], text: None, metadata: None }, - Vector { id: 2, values: vec![0.0, 1.0, 0.0], text: None, metadata: None }, - Vector { id: 3, values: vec![0.0, 0.0, 1.0], text: None, metadata: None }, + Vector { id: 1, values: vec![1.0, 0.0, 0.0], text: "test".to_string(), metadata: None }, + Vector { id: 2, values: vec![0.0, 1.0, 0.0], text: "test".to_string(), metadata: None }, + Vector { id: 3, values: vec![0.0, 0.0, 1.0], text: "test".to_string(), metadata: None }, ]; for vector in vectors { @@ -463,7 +463,7 @@ fn test_serialization_deserialization() { assert_eq!(vector3.values, vec![0.0, 0.0, 1.0]); // Test that we can add a new vector to the deserialized index - let new_vector = Vector { id: 4, values: vec![1.0, 1.0, 1.0], text: None, metadata: None }; + let new_vector = Vector { id: 4, values: vec![1.0, 1.0, 1.0], text: "test".to_string(), metadata: None }; assert!(deserialized.add(new_vector).is_ok()); assert_eq!(deserialized.len(), 4); @@ -514,7 +514,7 @@ fn test_empty_hnsw_serialization_deserialization() { assert!(deserialized.is_empty()); // Test that we can add vectors to the deserialized empty index - let vector = Vector { id: 1, values: vec![1.0, 2.0, 3.0], text: None, metadata: None }; + let vector = Vector { id: 1, values: vec![1.0, 2.0, 3.0], text: "test".to_string(), metadata: None }; assert!(deserialized.add(vector).is_ok()); assert_eq!(deserialized.len(), 1); assert!(!deserialized.is_empty()); @@ -526,9 +526,9 @@ fn test_search_with_limited_vectors() { // Add only 3 vectors let vectors = vec![ - Vector { id: 1, values: vec![1.0, 0.0, 0.0], text: None, metadata: None }, - Vector { id: 2, values: vec![0.0, 1.0, 0.0], text: None, metadata: None }, - Vector { id: 3, values: vec![0.0, 0.0, 1.0], text: None, metadata: None }, + Vector { id: 1, values: vec![1.0, 0.0, 0.0], text: "test".to_string(), metadata: None }, + Vector { id: 2, values: vec![0.0, 1.0, 0.0], text: "test".to_string(), metadata: None }, + Vector { id: 3, values: vec![0.0, 0.0, 1.0], text: "test".to_string(), metadata: None }, ]; for vector in vectors { diff --git a/src/lib.rs b/src/lib.rs index 03c4724..0e00594 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -147,7 +147,7 @@ pub const DEFAULT_VECTOR_DIMENSION: usize = 768; /// let vector = Vector { /// id: 1, /// values: vec![0.1, 0.2, 0.3, 0.4], -/// text: Some("Sample document text".to_string()), +/// text: "Sample document text".to_string(), /// metadata: Some(json!({ /// "title": "Sample Document", /// "category": "example", @@ -162,7 +162,7 @@ pub struct Vector { /// The vector values (embedding coordinates) pub values: Vec, /// The original text that was embedded to create this vector - pub text: Option, + pub text: String, /// Optional metadata associated with the vector /// Can contain arbitrary JSON data for flexible schema-less storage pub metadata: Option, @@ -181,7 +181,7 @@ pub struct Vector { /// let result = SearchResult { /// id: 42, /// score: 0.95, -/// text: Some("Document content text".to_string()), +/// text: "Document content text".to_string(), /// metadata: Some(json!({"title": "Document Title"})), /// }; /// ``` @@ -192,7 +192,7 @@ pub struct SearchResult { /// Similarity score (higher is more similar) pub score: f64, /// The original text that was embedded to create this vector - pub text: Option, + pub text: String, /// Optional metadata from the matching vector pub metadata: Option, } @@ -209,7 +209,7 @@ pub struct SearchResult { /// /// # fn example() -> Result<(), Box> { /// let mut index = FlatIndex::new(3, Vec::new()); -/// let vector = Vector { id: 1, values: vec![1.0, 2.0, 3.0], metadata: None }; +/// let vector = Vector { id: 1, values: vec![1.0, 2.0, 3.0], text: "test".to_string(), metadata: None }; /// /// index.add(vector)?; /// let results = index.search(&[1.1, 2.1, 3.1], 5, SimilarityMetric::Cosine); @@ -254,7 +254,7 @@ pub trait VectorIndex { /// let mut wrapper = VectorIndexWrapper::Flat(FlatIndex::new(3, Vec::new())); /// /// // Add a vector -/// let vector = Vector { id: 1, values: vec![1.0, 2.0, 3.0], metadata: None }; +/// let vector = Vector { id: 1, values: vec![1.0, 2.0, 3.0], text: "test".to_string(), metadata: None }; /// wrapper.add(vector)?; /// /// // Search using the wrapper @@ -645,8 +645,8 @@ mod tests { #[test] fn test_vector_store_creation() { let vectors = vec![ - Vector { id: 0, values: vec![1.0, 2.0, 3.0], text: None, metadata: None }, - Vector { id: 1, values: vec![4.0, 5.0, 6.0], text: None, metadata: None }, + Vector { id: 0, values: vec![1.0, 2.0, 3.0], text: "test1".to_string(), metadata: None }, + Vector { id: 1, values: vec![4.0, 5.0, 6.0], text: "test2".to_string(), metadata: None }, ]; let store = FlatIndex::new(3, vectors); assert_eq!(store.len(), 2); @@ -656,9 +656,9 @@ mod tests { #[test] fn test_vector_store_search() { let vectors = vec![ - Vector { id: 0, values: vec![1.0, 0.0, 0.0], text: None, metadata: None }, - Vector { id: 1, values: vec![0.0, 1.0, 0.0], text: None, metadata: None }, - Vector { id: 2, values: vec![0.0, 0.0, 1.0], text: None, metadata: None }, + Vector { id: 0, values: vec![1.0, 0.0, 0.0], text: "test1".to_string(), metadata: None }, + Vector { id: 1, values: vec![0.0, 1.0, 0.0], text: "test2".to_string(), metadata: None }, + Vector { id: 2, values: vec![0.0, 0.0, 1.0], text: "test3".to_string(), metadata: None }, ]; let store = FlatIndex::new(3, vectors); let query = vec![1.0, 0.0, 0.0]; @@ -675,8 +675,8 @@ mod tests { // Test FlatIndex wrapper let vectors = vec![ - Vector { id: 1, values: vec![1.0, 0.0, 0.0], text: None, metadata: None }, - Vector { id: 2, values: vec![0.0, 1.0, 0.0], text: None, metadata: None }, + Vector { id: 1, values: vec![1.0, 0.0, 0.0], text: "test1".to_string(), metadata: None }, + Vector { id: 2, values: vec![0.0, 1.0, 0.0], text: "test2".to_string(), metadata: None }, ]; let flat_index = FlatIndex::new(3, vectors); let wrapper = VectorIndexWrapper::Flat(flat_index); @@ -717,7 +717,7 @@ mod tests { let vector = Vector { id: 1, values: vec![1.0, 2.0, 3.0], - text: Some("Test document text".to_string()), + text: "Test document text".to_string(), metadata: Some(metadata.clone()), }; @@ -733,7 +733,7 @@ mod tests { let vector_no_metadata = Vector { id: 2, values: vec![4.0, 5.0, 6.0], - text: None, + text: "Test text".to_string(), metadata: None, }; diff --git a/src/persistence.rs b/src/persistence.rs index 28ac715..29a708c 100644 --- a/src/persistence.rs +++ b/src/persistence.rs @@ -166,10 +166,31 @@ mod tests { use crate::{FlatIndex, HNSWIndex, Vector, SimilarityMetric}; use tempfile::TempDir; + // Mock embedding function for testing + struct MockEmbeddingFunction { + dimension: usize, + } + + impl MockEmbeddingFunction { + fn new(dimension: usize) -> Self { + Self { dimension } + } + } + + impl crate::embeddings::EmbeddingFunction for MockEmbeddingFunction { + fn generate_embedding(&self, _text: &str) -> crate::embeddings::Result> { + Ok(vec![1.0; self.dimension]) + } + + fn dimension(&self) -> usize { + self.dimension + } + } + fn create_test_collection() -> Collection { let vectors = vec![ - Vector { id: 0, values: vec![1.0, 2.0, 3.0], text: None, metadata: None }, - Vector { id: 1, values: vec![4.0, 5.0, 6.0], text: None, metadata: None }, + Vector { id: 0, values: vec![1.0, 2.0, 3.0], text: "test".to_string(), metadata: None }, + Vector { id: 1, values: vec![4.0, 5.0, 6.0], text: "test".to_string(), metadata: None }, ]; let flat_index = FlatIndex::new(3, vectors); let index = VectorIndexWrapper::Flat(flat_index); @@ -256,12 +277,9 @@ mod tests { let collection = Collection::new("test_hnsw_collection".to_string(), index); - // Add some vectors - let vector1 = Vector { id: 0, values: vec![1.0, 2.0, 3.0], text: None, metadata: None }; - let vector2 = Vector { id: 1, values: vec![4.0, 5.0, 6.0], text: None, metadata: None }; - - collection.add_vector(vector1).unwrap(); - collection.add_vector(vector2).unwrap(); + // Add vectors using text method + collection.add_text("test1", &MockEmbeddingFunction::new(3)).unwrap(); + collection.add_text("test2", &MockEmbeddingFunction::new(3)).unwrap(); // Save and load save_collection_to_file(&collection, &file_path).unwrap(); diff --git a/src/server.rs b/src/server.rs index 5697da8..efbd121 100644 --- a/src/server.rs +++ b/src/server.rs @@ -16,9 +16,7 @@ //! //! ## Vector Operations //! - `POST /collections/{name}/text` - Add text (auto-generates embedding, optional metadata) -//! - `POST /collections/{name}/vector` - Add raw vector //! - `POST /collections/{name}/search/text` - Search by text -//! - `POST /collections/{name}/search/vector` - Search by vector //! - `GET /collections/{name}/vectors/{id}` - Get vector by ID //! - `DELETE /collections/{name}/vectors/{id}` - Delete vector by ID //! @@ -67,7 +65,7 @@ use tower_http::cors::CorsLayer; use tower_http::trace::TraceLayer; use tracing::{info, error}; -use crate::{VectorLiteClient, Vector, SearchResult, SimilarityMetric, IndexType}; +use crate::{VectorLiteClient, SearchResult, SimilarityMetric, IndexType}; use crate::errors::{VectorLiteError, VectorLiteResult}; // Request/Response types @@ -79,7 +77,7 @@ pub struct CreateCollectionRequest { #[derive(Debug, Serialize)] pub struct CreateCollectionResponse { - pub message: String, + pub name: String, } #[derive(Debug, Deserialize)] @@ -91,21 +89,8 @@ pub struct AddTextRequest { #[derive(Debug, Serialize)] pub struct AddTextResponse { pub id: Option, - pub message: String, -} - -#[derive(Debug, Deserialize)] -pub struct AddVectorRequest { - pub id: u64, - pub values: Vec, - pub text: Option, - pub metadata: Option, } -#[derive(Debug, Serialize)] -pub struct AddVectorResponse { - pub message: String, -} #[derive(Debug, Deserialize)] pub struct SearchTextRequest { @@ -114,23 +99,15 @@ pub struct SearchTextRequest { pub similarity_metric: Option, } -#[derive(Debug, Deserialize)] -pub struct SearchVectorRequest { - pub query: Vec, - pub k: Option, - pub similarity_metric: Option, -} #[derive(Debug, Serialize)] pub struct SearchResponse { pub results: Option>, - pub message: String, } #[derive(Debug, Serialize)] pub struct CollectionInfoResponse { pub info: Option, - pub message: String, } #[derive(Debug, Serialize)] @@ -150,7 +127,6 @@ pub struct SaveCollectionRequest { #[derive(Debug, Serialize)] pub struct SaveCollectionResponse { - pub message: String, pub file_path: Option, } @@ -162,7 +138,6 @@ pub struct LoadCollectionRequest { #[derive(Debug, Serialize)] pub struct LoadCollectionResponse { - pub message: String, pub collection_name: Option, } @@ -222,7 +197,7 @@ async fn create_collection( Ok(_) => { info!("Created collection: {}", payload.name); Ok(Json(CreateCollectionResponse { - message: format!("Collection '{}' created successfully", payload.name), + name: payload.name, })) } Err(e) => { @@ -240,7 +215,6 @@ async fn get_collection_info( match client.get_collection_info(&collection_name) { Ok(info) => Ok(Json(CollectionInfoResponse { info: Some(info), - message: "Collection info retrieved successfully".to_string(), })), Err(e) => Err(e.status_code()), } @@ -255,7 +229,7 @@ async fn delete_collection( Ok(_) => { info!("Deleted collection: {}", collection_name); Ok(Json(CreateCollectionResponse { - message: format!("Collection '{}' deleted successfully", collection_name), + name: collection_name, })) } Err(e) => Err(e.status_code()), @@ -273,7 +247,6 @@ async fn add_text( info!("Added text to collection '{}' with ID: {}", collection_name, id); Ok(Json(AddTextResponse { id: Some(id), - message: "Text added successfully".to_string(), })) } Err(e) => { @@ -283,31 +256,6 @@ async fn add_text( } -async fn add_vector( - State(state): State, - Path(collection_name): Path, - Json(payload): Json, -) -> Result, StatusCode> { - let vector = Vector { - id: payload.id, - values: payload.values, - text: payload.text, - metadata: payload.metadata, - }; - - let client = state.read().map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; - match client.add_vector_to_collection(&collection_name, vector) { - Ok(_) => { - info!("Added vector to collection '{}' with ID: {}", collection_name, payload.id); - Ok(Json(AddVectorResponse { - message: "Vector added successfully".to_string(), - })) - } - Err(e) => { - Err(e.status_code()) - } - } -} async fn search_text( State(state): State, @@ -331,41 +279,12 @@ async fn search_text( info!("Search completed for collection '{}' with {} results", collection_name, results.len()); Ok(Json(SearchResponse { results: Some(results), - message: "Search completed successfully".to_string(), })) } Err(e) => Err(e.status_code()), } } -async fn search_vector( - State(state): State, - Path(collection_name): Path, - Json(payload): Json, -) -> Result, StatusCode> { - let k = payload.k.unwrap_or(10); - let similarity_metric = match payload.similarity_metric { - Some(metric) => match parse_similarity_metric(&metric) { - Ok(m) => m, - Err(e) => { - return Err(e.status_code()); - } - }, - None => SimilarityMetric::Cosine, - }; - - let client = state.read().map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; - match client.search_vector_in_collection(&collection_name, &payload.query, k, similarity_metric) { - Ok(results) => { - info!("Vector search completed for collection '{}' with {} results", collection_name, results.len()); - Ok(Json(SearchResponse { - results: Some(results), - message: "Vector search completed successfully".to_string(), - })) - } - Err(e) => Err(e.status_code()), - } -} async fn get_vector( State(state): State, @@ -375,8 +294,7 @@ async fn get_vector( match client.get_vector_from_collection(&collection_name, vector_id) { Ok(Some(vector)) => { Ok(Json(serde_json::json!({ - "vector": vector, - "message": "Vector retrieved successfully" + "vector": vector }))) } Ok(None) => { @@ -396,9 +314,7 @@ async fn delete_vector( match client.delete_from_collection(&collection_name, vector_id) { Ok(_) => { info!("Deleted vector {} from collection '{}'", vector_id, collection_name); - Ok(Json(serde_json::json!({ - "message": "Vector deleted successfully" - }))) + Ok(Json(serde_json::json!({}))) } Err(e) => { Err(e.status_code()) @@ -429,7 +345,6 @@ async fn save_collection( Ok(_) => { info!("Saved collection '{}' to file: {}", collection_name, payload.file_path); Ok(Json(SaveCollectionResponse { - message: format!("Collection '{}' saved successfully", collection_name), file_path: Some(payload.file_path), })) } @@ -486,7 +401,6 @@ async fn load_collection( info!("Loaded collection '{}' from file: {}", collection_name, payload.file_path); Ok(Json(LoadCollectionResponse { - message: format!("Collection '{}' loaded successfully", collection_name), collection_name: Some(collection_name), })) } @@ -499,9 +413,7 @@ pub fn create_app(state: AppState) -> Router { .route("/collections/:name", get(get_collection_info)) .route("/collections/:name", delete(delete_collection)) .route("/collections/:name/text", post(add_text)) - .route("/collections/:name/vector", post(add_vector)) .route("/collections/:name/search/text", post(search_text)) - .route("/collections/:name/search/vector", post(search_vector)) .route("/collections/:name/vectors/:id", get(get_vector)) .route("/collections/:name/vectors/:id", delete(delete_vector)) .route("/collections/:name/save", post(save_collection)) diff --git a/tests/http_integration_test.rs b/tests/http_integration_test.rs index 01802cd..21116ec 100644 --- a/tests/http_integration_test.rs +++ b/tests/http_integration_test.rs @@ -94,7 +94,7 @@ async fn test_create_collection() { let body = axum::body::to_bytes(response.into_body(), usize::MAX).await.unwrap(); let json: serde_json::Value = serde_json::from_slice(&body).unwrap(); - assert!(json["message"].as_str().unwrap().contains("created successfully")); + assert_eq!(json["name"], "test_collection"); } #[tokio::test] @@ -175,34 +175,8 @@ async fn test_add_text_to_collection() { let body = axum::body::to_bytes(response.into_body(), usize::MAX).await.unwrap(); let json: serde_json::Value = serde_json::from_slice(&body).unwrap(); assert_eq!(json["id"], 0); - assert!(json["message"].as_str().unwrap().contains("added successfully")); } -#[tokio::test] -async fn test_add_vector_to_collection() { - let mut client = create_test_client(); - client.create_collection("test_collection", vectorlite::IndexType::Flat).unwrap(); - let app = create_app(std::sync::Arc::new(std::sync::RwLock::new(client))); - - let payload = json!({ - "id": 42, - "values": [1.0, 2.0, 3.0] - }); - - let request = Request::builder() - .uri("/collections/test_collection/vector") - .method("POST") - .header("content-type", "application/json") - .body(Body::from(serde_json::to_vec(&payload).unwrap())) - .unwrap(); - - let response = app.oneshot(request).await.unwrap(); - assert_eq!(response.status(), StatusCode::OK); - - let body = axum::body::to_bytes(response.into_body(), usize::MAX).await.unwrap(); - let json: serde_json::Value = serde_json::from_slice(&body).unwrap(); - assert!(json["message"].as_str().unwrap().contains("added successfully")); -} #[tokio::test] async fn test_search_text() { @@ -234,35 +208,6 @@ async fn test_search_text() { assert_eq!(json["results"][0]["id"], 0); } -#[tokio::test] -async fn test_search_vector() { - let mut client = create_test_client(); - client.create_collection("test_collection", vectorlite::IndexType::Flat).unwrap(); - client.add_text_to_collection("test_collection", "Hello world", None).unwrap(); - let app = create_app(std::sync::Arc::new(std::sync::RwLock::new(client))); - - let payload = json!({ - "query": [1.0, 2.0, 3.0], - "k": 5, - "similarity_metric": "cosine" - }); - - let request = Request::builder() - .uri("/collections/test_collection/search/vector") - .method("POST") - .header("content-type", "application/json") - .body(Body::from(serde_json::to_vec(&payload).unwrap())) - .unwrap(); - - let response = app.oneshot(request).await.unwrap(); - assert_eq!(response.status(), StatusCode::OK); - - let body = axum::body::to_bytes(response.into_body(), usize::MAX).await.unwrap(); - let json: serde_json::Value = serde_json::from_slice(&body).unwrap(); - assert!(json["results"].is_array()); - assert_eq!(json["results"].as_array().unwrap().len(), 1); - assert_eq!(json["results"][0]["id"], 0); -} #[tokio::test] async fn test_get_vector() { @@ -304,7 +249,7 @@ async fn test_delete_vector() { let body = axum::body::to_bytes(response.into_body(), usize::MAX).await.unwrap(); let json: serde_json::Value = serde_json::from_slice(&body).unwrap(); - assert!(json["message"].as_str().unwrap().contains("deleted successfully")); + assert!(json.is_object()); } #[tokio::test] @@ -324,5 +269,5 @@ async fn test_delete_collection() { let body = axum::body::to_bytes(response.into_body(), usize::MAX).await.unwrap(); let json: serde_json::Value = serde_json::from_slice(&body).unwrap(); - assert!(json["message"].as_str().unwrap().contains("deleted successfully")); + assert_eq!(json["name"], "test_collection"); } diff --git a/tests/integration_test.rs b/tests/integration_test.rs index 564fe64..76c543a 100644 --- a/tests/integration_test.rs +++ b/tests/integration_test.rs @@ -26,6 +26,7 @@ pub fn load_test_dataset(path: &str, dimension: usize) -> Result Date: Fri, 24 Oct 2025 15:20:04 +1100 Subject: [PATCH 6/7] fix documentation in readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e26262b..52984f1 100644 --- a/README.md +++ b/README.md @@ -104,7 +104,7 @@ cargo build --features memory-optimized ## Rust SDK Example -```rust +```rust,no_run use vectorlite::{VectorLiteClient, EmbeddingGenerator, IndexType, SimilarityMetric}; use serde_json::json; From 8dc4722da5fb89d2b08810a2edc805ae7ee762d7 Mon Sep 17 00:00:00 2001 From: mathieu Date: Fri, 24 Oct 2025 15:23:36 +1100 Subject: [PATCH 7/7] no run in lib.rs --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 0e00594..55b773b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -38,7 +38,7 @@ //! //! ## Quick Start //! -//! ```rust +//! ```rust,no_run //! use vectorlite::{VectorLiteClient, EmbeddingGenerator, IndexType, SimilarityMetric}; //! use serde_json::json; //!