diff --git a/rust/Cargo.lock b/rust/Cargo.lock index 25cb750..f655aab 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -95,7 +95,7 @@ dependencies = [ [[package]] name = "bbqvec" -version = "0.0.9" +version = "0.0.10" dependencies = [ "anyhow", "argminmax", diff --git a/rust/src/backend.rs b/rust/src/backend.rs index c2bd90c..d52e773 100644 --- a/rust/src/backend.rs +++ b/rust/src/backend.rs @@ -2,6 +2,7 @@ use anyhow::Result; use crate::{Basis, Bitmap, ResultSet, Vector, ID}; +#[allow(clippy::module_name_repetitions)] pub struct BackendInfo { pub quantization: String, pub has_index_data: bool, @@ -10,6 +11,7 @@ pub struct BackendInfo { pub vector_count: usize, } +#[allow(clippy::missing_errors_doc, clippy::module_name_repetitions)] pub trait VectorBackend { fn put_vector(&mut self, id: ID, v: &Vector) -> Result<()>; fn compute_similarity(&self, target: &Vector, target_id: ID) -> Result; @@ -18,6 +20,8 @@ pub trait VectorBackend { fn vector_exists(&self, id: ID) -> bool; fn close(self) -> Result<()>; + /// # Errors + /// `compute_similarity` can fail fn find_nearest(&self, target: &Vector, k: usize) -> Result { let mut set = ResultSet::new(k); for id in self.iter_vector_ids() { @@ -30,14 +34,20 @@ pub trait VectorBackend { fn load_bases(&self) -> Result>>; fn load_bitmap(&mut self, basis: usize, index: i32) -> Result>; + /// # Errors + /// none fn save_bases(&mut self, _bases: &[Basis]) -> Result<()> { Ok(()) } + /// # Errors + /// none fn save_bitmap(&mut self, _basis: usize, _index: usize, _bitmap: &impl Bitmap) -> Result<()> { Ok(()) } + /// # Errors + /// none fn sync(&self) -> Result<()> { Ok(()) } diff --git a/rust/src/backend_memory.rs b/rust/src/backend_memory.rs index b935ef9..7104480 100644 --- a/rust/src/backend_memory.rs +++ b/rust/src/backend_memory.rs @@ -22,6 +22,8 @@ pub struct QuantizedMemoryBackend { pub type MemoryBackend = QuantizedMemoryBackend; impl QuantizedMemoryBackend { + /// # Errors + /// none pub fn new(dimensions: usize, n_basis: usize) -> Result { Ok(Self { vecs: Vec::new(), @@ -41,10 +43,11 @@ impl VectorBackend for QuantizedMemoryBackend { if v.len() != self.dimensions { return Err(anyhow!("dimensions don't match")); } + #[allow(clippy::cast_possible_truncation)] let uid = id as usize; if self.vecs.len() <= uid { if self.vecs.capacity() == uid { - self.vecs.reserve(min(self.vecs.capacity(), 1024 * 1024)) + self.vecs.reserve(min(self.vecs.capacity(), 1024 * 1024)); } self.vecs.resize(uid + 1, None); } @@ -57,6 +60,7 @@ impl VectorBackend for QuantizedMemoryBackend { fn compute_similarity(&self, target: &Vector, target_id: crate::ID) -> Result { // Make sure it's normalized! + #[allow(clippy::cast_possible_truncation)] let v = self.vecs[target_id as usize] .as_ref() .ok_or(anyhow!("No vector present"))?; @@ -82,6 +86,7 @@ impl VectorBackend for QuantizedMemoryBackend { } fn vector_exists(&self, id: ID) -> bool { + #[allow(clippy::cast_possible_truncation)] let v = self.vecs.get(id as usize); match v { Some(x) => x.is_some(), diff --git a/rust/src/bitmaps.rs b/rust/src/bitmaps.rs index 56d242b..6dd5d8f 100644 --- a/rust/src/bitmaps.rs +++ b/rust/src/bitmaps.rs @@ -26,25 +26,28 @@ impl Bitmap for roaring::RoaringBitmap { self.is_empty() } + #[allow(clippy::cast_possible_truncation)] fn count(&self) -> usize { self.len() as usize } fn add(&mut self, id: ID) { + #[allow(clippy::cast_possible_truncation)] self.insert(id as u32); } fn iter_elems(&self) -> impl Iterator { + #[allow(clippy::cast_lossless)] self.iter().map(|x| x as ID) } fn and_not(&mut self, rhs: &Self) { - self.sub_assign(rhs) + self.sub_assign(rhs); } fn or(&mut self, rhs: &Self) { - self.bitor_assign(rhs) + self.bitor_assign(rhs); } fn xor(&mut self, rhs: &Self) { - self.bitxor_assign(rhs) + self.bitxor_assign(rhs); } fn estimate_size(&self) -> usize { self.serialized_size() @@ -64,11 +67,12 @@ impl Bitmap for bitvec::prelude::BitVec { self.is_empty() } + #[allow(clippy::cast_possible_truncation)] fn add(&mut self, id: ID) { if self.len() <= id as usize { - self.resize((id + 1) as usize, false) + self.resize((id + 1) as usize, false); } - self.set(id as usize, true) + self.set(id as usize, true); } fn iter_elems(&self) -> impl Iterator { @@ -78,24 +82,24 @@ impl Bitmap for bitvec::prelude::BitVec { #[inline] fn and_not(&mut self, rhs: &Self) { for elem in self.as_raw_mut_slice().iter_mut().zip(rhs.as_raw_slice()) { - *elem.0 &= !elem.1 + *elem.0 &= !elem.1; } } #[inline] fn or(&mut self, rhs: &Self) { if self.len() < rhs.len() { - self.resize(rhs.len(), false) + self.resize(rhs.len(), false); } - self.bitor_assign(rhs) + self.bitor_assign(rhs); } #[inline] fn xor(&mut self, rhs: &Self) { if self.len() < rhs.len() { - self.resize(rhs.len(), false) + self.resize(rhs.len(), false); } - self.bitxor_assign(rhs) + self.bitxor_assign(rhs); } fn estimate_size(&self) -> usize { @@ -112,28 +116,31 @@ impl Bitmap for croaring::Bitmap { self.is_empty() } + #[allow(clippy::cast_possible_truncation)] fn count(&self) -> usize { self.cardinality() as usize } fn add(&mut self, id: ID) { - self.add(id as u32) + #[allow(clippy::cast_possible_truncation)] + self.add(id as u32); } fn iter_elems(&self) -> impl Iterator { + #[allow(clippy::cast_lossless)] self.iter().map(|x| x as ID) } fn and_not(&mut self, rhs: &Self) { - self.andnot_inplace(rhs) + self.andnot_inplace(rhs); } fn or(&mut self, rhs: &Self) { - self.or_inplace(rhs) + self.or_inplace(rhs); } fn xor(&mut self, rhs: &Self) { - self.xor_inplace(rhs) + self.xor_inplace(rhs); } fn estimate_size(&self) -> usize { diff --git a/rust/src/counting_bitmap.rs b/rust/src/counting_bitmap.rs index bed8f4d..87a5745 100644 --- a/rust/src/counting_bitmap.rs +++ b/rust/src/counting_bitmap.rs @@ -33,7 +33,10 @@ impl CountingBitmap { } pub fn cardinalities(&self) -> Vec { - self.bitmaps.iter().map(|b| b.count()).collect::>() + self.bitmaps + .iter() + .map(super::bitmaps::Bitmap::count) + .collect::>() } pub fn top_k(&self, search_k: usize) -> Option<&B> { diff --git a/rust/src/helpers.rs b/rust/src/helpers.rs index 21532d4..dbacd23 100644 --- a/rust/src/helpers.rs +++ b/rust/src/helpers.rs @@ -2,15 +2,17 @@ use rand::Rng; use crate::{Vector, ID}; +#[must_use] pub fn create_random_vector(dimensions: usize) -> Vector { let mut out = Vec::new(); for _ in 0..dimensions { - out.push(rand::thread_rng().gen_range(-1.0..1.0)) + out.push(rand::thread_rng().gen_range(-1.0..1.0)); } crate::vector::normalize(&mut out); out } +#[must_use] pub fn create_vector_set(dimensions: usize, count: usize) -> Vec { std::iter::repeat_with(|| create_random_vector(dimensions)) .take(count) diff --git a/rust/src/quantization.rs b/rust/src/quantization.rs index 960c1b2..ce105c9 100644 --- a/rust/src/quantization.rs +++ b/rust/src/quantization.rs @@ -2,6 +2,7 @@ use crate::{vector::distance, Vector}; use anyhow::Result; use half::{bf16, vec::HalfFloatVecExt}; +#[allow(clippy::module_name_repetitions)] pub trait Quantization: Default { type Lower: Clone; fn similarity(x: &Self::Lower, y: &Self::Lower) -> Result; @@ -13,6 +14,7 @@ pub trait Quantization: Default { fn name() -> &'static str; } +#[allow(clippy::module_name_repetitions)] #[derive(Default)] pub struct NoQuantization {} @@ -42,15 +44,21 @@ impl Quantization for NoQuantization { fn marshal(v: &Self::Lower, array: &mut [u8]) -> Result<()> { for (i, f) in v.iter().enumerate() { let bytes = f.to_le_bytes(); - let _ = &array[i * 4..i * 4 + 4].copy_from_slice(&bytes); + let () = &array[i * 4..i * 4 + 4].copy_from_slice(&bytes); } Ok(()) } fn unmarshal(array: &[u8]) -> Result { - let mut vec = Vec::new(); - for i in (0..array.len()).step_by(4) { - let bytes = &array[i..i + 4]; + let (capacity, _remainder) = (array.len() / 4, array.len() % 4); + /* + if remainder != 0 { + can return err rather than panic from unwrap + also if do so can make it chunks_exact instead of chunks + } + */ + let mut vec = Vec::with_capacity(capacity); + for bytes in array.chunks(4) { let f: f32 = f32::from_le_bytes(bytes.try_into().unwrap()); vec.push(f); } @@ -58,6 +66,7 @@ impl Quantization for NoQuantization { } } +#[allow(clippy::module_name_repetitions)] #[derive(Default)] pub struct BF16Quantization {} @@ -90,15 +99,14 @@ impl Quantization for BF16Quantization { fn marshal(v: &Self::Lower, array: &mut [u8]) -> Result<()> { for (i, f) in v.iter().enumerate() { let bytes = f.to_le_bytes(); - let _ = &array[i * 2..i * 2 + 2].copy_from_slice(&bytes); + let () = &array[i * 2..i * 2 + 2].copy_from_slice(&bytes); } Ok(()) } fn unmarshal(array: &[u8]) -> Result { - let mut vec = Vec::new(); - for i in (0..array.len()).step_by(2) { - let bytes = &array[i..i + 2]; + let mut vec = Vec::with_capacity(array.len() / 2); + for bytes in array.chunks(2) { let f: bf16 = bf16::from_le_bytes(bytes.try_into().unwrap()); vec.push(f); } diff --git a/rust/src/result.rs b/rust/src/result.rs index 9b06ed2..3490c96 100644 --- a/rust/src/result.rs +++ b/rust/src/result.rs @@ -1,11 +1,13 @@ use crate::ID; +#[allow(clippy::module_name_repetitions)] #[derive(Debug)] pub struct SearchResult { pub similarity: f32, pub id: ID, } +#[allow(clippy::module_name_repetitions)] #[derive(Debug)] pub struct ResultSet { sims: Vec, @@ -15,6 +17,7 @@ pub struct ResultSet { } impl ResultSet { + #[must_use] pub fn new(k: usize) -> Self { Self { sims: Vec::with_capacity(k), @@ -24,14 +27,18 @@ impl ResultSet { } } + #[must_use] pub fn len(&self) -> usize { self.sims.len() } + #[must_use] pub fn is_empty(&self) -> bool { self.sims.is_empty() } + #[allow(clippy::cast_precision_loss, clippy::cast_lossless)] + #[must_use] pub fn compute_recall(&self, baseline: &ResultSet, at: usize) -> f64 { let mut found = 0; for x in baseline.ids.iter().take(at) { diff --git a/rust/src/spaces/simple.rs b/rust/src/spaces/simple.rs index 910ac80..3d9b50c 100644 --- a/rust/src/spaces/simple.rs +++ b/rust/src/spaces/simple.rs @@ -1,7 +1,10 @@ +#[allow(clippy::wildcard_imports)] #[cfg(target_arch = "x86_64")] use super::simple_avx::*; +#[allow(clippy::wildcard_imports)] #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] use super::simple_neon::*; +#[allow(clippy::wildcard_imports)] #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] use super::simple_sse::*; use crate::unaligned_f32::UnalignedF32Slice; diff --git a/rust/src/spaces/simple_avx.rs b/rust/src/spaces/simple_avx.rs index f1fbe31..c497d5e 100644 --- a/rust/src/spaces/simple_avx.rs +++ b/rust/src/spaces/simple_avx.rs @@ -1,3 +1,4 @@ +#[allow(clippy::wildcard_imports)] use std::arch::x86_64::*; use std::ptr::read_unaligned; @@ -12,6 +13,12 @@ unsafe fn hsum256_ps_avx(x: __m256) -> f32 { _mm_cvtss_f32(x32) } +#[allow( + clippy::ptr_as_ptr, + clippy::cast_ptr_alignment, + clippy::many_single_char_names, + clippy::similar_names +)] #[target_feature(enable = "avx")] #[target_feature(enable = "fma")] pub(crate) unsafe fn euclid_similarity_avx(v1: &UnalignedF32Slice, v2: &UnalignedF32Slice) -> f32 { @@ -61,6 +68,11 @@ pub(crate) unsafe fn euclid_similarity_avx(v1: &UnalignedF32Slice, v2: &Unaligne result } +#[allow( + clippy::ptr_as_ptr, + clippy::cast_ptr_alignment, + clippy::many_single_char_names +)] #[target_feature(enable = "avx")] #[target_feature(enable = "fma")] pub(crate) unsafe fn dot_similarity_avx(v1: &UnalignedF32Slice, v2: &UnalignedF32Slice) -> f32 { @@ -140,11 +152,11 @@ mod tests { let euclid_simd = unsafe { euclid_similarity_avx(v1, v2) }; let euclid = euclidean_distance_non_optimized(v1, v2); - assert_eq!(euclid_simd, euclid); + assert!((euclid_simd - euclid).abs() < f32::EPSILON); let dot_simd = unsafe { dot_similarity_avx(v1, v2) }; let dot = dot_product_non_optimized(v1, v2); - assert_eq!(dot_simd, dot); + assert!((dot_simd - dot).abs() < f32::EPSILON); // let cosine_simd = unsafe { cosine_preprocess_avx(v1.clone()) }; // let cosine = cosine_preprocess(v1); diff --git a/rust/src/spaces/simple_sse.rs b/rust/src/spaces/simple_sse.rs index 8e000c6..a546ba5 100644 --- a/rust/src/spaces/simple_sse.rs +++ b/rust/src/spaces/simple_sse.rs @@ -1,5 +1,7 @@ +#[allow(clippy::wildcard_imports)] #[cfg(target_arch = "x86")] use std::arch::x86::*; +#[allow(clippy::wildcard_imports)] #[cfg(target_arch = "x86_64")] use std::arch::x86_64::*; use std::ptr::read_unaligned; @@ -13,6 +15,12 @@ unsafe fn hsum128_ps_sse(x: __m128) -> f32 { _mm_cvtss_f32(x32) } +#[allow( + clippy::ptr_as_ptr, + clippy::cast_ptr_alignment, + clippy::many_single_char_names, + clippy::similar_names +)] #[allow(dead_code)] #[target_feature(enable = "sse")] pub(crate) unsafe fn euclid_similarity_sse(v1: &UnalignedF32Slice, v2: &UnalignedF32Slice) -> f32 { @@ -58,6 +66,11 @@ pub(crate) unsafe fn euclid_similarity_sse(v1: &UnalignedF32Slice, v2: &Unaligne result } +#[allow( + clippy::ptr_as_ptr, + clippy::cast_ptr_alignment, + clippy::many_single_char_names +)] #[target_feature(enable = "sse")] pub(crate) unsafe fn dot_similarity_sse(v1: &UnalignedF32Slice, v2: &UnalignedF32Slice) -> f32 { // It is safe to load unaligned floats from a pointer. @@ -134,11 +147,11 @@ mod tests { let euclid_simd = unsafe { euclid_similarity_sse(v1, v2) }; let euclid = euclidean_distance_non_optimized(v1, v2); - assert_eq!(euclid_simd, euclid); + assert!((euclid_simd - euclid).abs() < f32::EPSILON); let dot_simd = unsafe { dot_similarity_sse(v1, v2) }; let dot = dot_product_non_optimized(v1, v2); - assert_eq!(dot_simd, dot); + assert!((dot_simd - dot).abs() < f32::EPSILON); // let cosine_simd = unsafe { cosine_preprocess_sse(v1.clone()) }; // let cosine = cosine_preprocess(v1); diff --git a/rust/src/unaligned_f32.rs b/rust/src/unaligned_f32.rs index 292014d..60d4fc6 100644 --- a/rust/src/unaligned_f32.rs +++ b/rust/src/unaligned_f32.rs @@ -9,6 +9,7 @@ use std::{ use bytemuck::cast_slice; use byteorder::ByteOrder; +#[allow(clippy::module_name_repetitions)] /// A wrapper struct that is used to read unaligned floats directly from memory. #[repr(transparent)] pub struct UnalignedF32Slice([u8]); @@ -17,6 +18,7 @@ impl UnalignedF32Slice { /// Creates an unaligned slice of f32 wrapper from a slice of bytes. pub fn from_bytes(bytes: &[u8]) -> anyhow::Result<&Self> { if bytes.len() % size_of::() == 0 { + #[allow(clippy::transmute_ptr_to_ptr, clippy::missing_transmute_annotations)] Ok(unsafe { transmute(bytes) }) } else { Err(anyhow::anyhow!("Byte size mismatch to f32")) @@ -61,7 +63,7 @@ impl UnalignedF32Slice { impl<'a> From<&'a crate::Vector> for &'a UnalignedF32Slice { fn from(value: &'a Vec) -> Self { - UnalignedF32Slice::from_slice(value.as_slice()) + UnalignedF32Slice::from_slice(value) } } diff --git a/rust/src/vector.rs b/rust/src/vector.rs index ed5173a..4135d1f 100644 --- a/rust/src/vector.rs +++ b/rust/src/vector.rs @@ -1,5 +1,6 @@ use crate::Vector; +#[allow(clippy::inline_always)] #[inline(always)] pub fn normalize(vec: &mut Vector) { let s = crate::unaligned_f32::UnalignedF32Slice::from_slice(vec.as_slice()); @@ -7,11 +8,14 @@ pub fn normalize(vec: &mut Vector) { vec.iter_mut().for_each(|v| *v /= norm); } +#[allow(clippy::inline_always)] #[inline(always)] +#[must_use] pub fn dot_product(vec: &Vector, other: &Vector) -> f32 { crate::spaces::simple::dot_product(vec.into(), other.into()) } +#[allow(clippy::inline_always)] #[inline(always)] pub fn subtract_into(vec: &mut Vector, other: &Vector) { for (v, o) in vec.iter_mut().zip(other.iter()) { @@ -19,7 +23,9 @@ pub fn subtract_into(vec: &mut Vector, other: &Vector) { } } +#[allow(clippy::inline_always)] #[inline(always)] +#[must_use] pub fn distance(vec: &Vector, other: &Vector) -> f32 { vec.iter() .zip(other.iter()) diff --git a/rust/src/vector_store.rs b/rust/src/vector_store.rs index 3a25e44..403a91d 100644 --- a/rust/src/vector_store.rs +++ b/rust/src/vector_store.rs @@ -19,28 +19,38 @@ pub struct VectorStore { } impl VectorStore { + /// # Errors + /// loading bases and bitmaps from backend can fail pub fn new(backend: E) -> Result { VectorStore::new_vector_store(backend) } + /// # Errors + /// loading bases and bitmaps from backend can fail pub fn new_croaring_bitmap(backend: E) -> Result { VectorStore::new_vector_store(backend) } } impl VectorStore { + /// # Errors + /// loading bases and bitmaps from backend can fail pub fn new_roaring_bitmap(backend: E) -> Result { VectorStore::new_vector_store(backend) } } impl VectorStore { + /// # Errors + /// loading bases and bitmaps from backend can fail pub fn new_bitvec_bitmap(backend: E) -> Result { VectorStore::new_vector_store(backend) } } impl VectorStore { + /// # Errors + /// loading bases and bitmaps from backend can fail pub fn new_vector_store(mut backend: E) -> Result { let info = backend.info(); let bases = match backend.load_bases()? { @@ -57,11 +67,16 @@ impl VectorStore { Ok(out) } + /// # Errors + /// `put_vector` can fail for the backend as can `add_to_bitmaps` + #[allow(clippy::inline_always)] #[inline(always)] pub fn add_vector(&mut self, id: ID, vector: &Vector) -> Result<()> { self.add_vector_iter(vec![(id, vector)].into_iter()) } + /// # Errors + /// `put_vector` can fail for the backend as can `add_to_bitmaps` pub fn add_vector_iter<'a>( &mut self, iter: impl Iterator, @@ -73,6 +88,8 @@ impl VectorStore { Ok(()) } + /// # Errors + /// finding a counting layer or `compute_similarity` can fail pub fn find_nearest( &self, target: &Vector, @@ -88,6 +105,9 @@ impl VectorStore { self.find_nearest_internal(target, k, search_k, sp) } + /// # Errors + /// finding a counting layer or `compute_similarity` can fail + #[allow(clippy::inline_always)] #[inline(always)] fn find_nearest_internal( &self, @@ -103,9 +123,9 @@ impl VectorStore { let mut spill_into = B::new(); proj.clear(); for b in basis { - proj.push(dot_product(target, b)) + proj.push(dot_product(target, b)); } - for _s in 0..(spill + 1) { + for _s in 0..=spill { let face_idx = find_face_idx(&proj); if let Some(bm) = self.bitmaps[i].get(&face_idx) { spill_into.or(bm); @@ -124,7 +144,9 @@ impl VectorStore { Ok(rs) } - #[allow(unused)] + /// # Errors + /// no actual error as of now + #[allow(unused, clippy::unnecessary_wraps)] fn add_to_bitmaps(&mut self, id: ID, vec: &Vector) -> Result<()> { let mut proj = Vec::with_capacity(self.dimensions); for (bi, basis) in self.bases.iter().enumerate() { @@ -138,11 +160,14 @@ impl VectorStore { Ok(()) } + /// # Errors + /// `find_nearest` can fail because `compute_similarity` can fail pub fn full_table_scan(&self, vec: &Vector, k: usize) -> Result { self.backend.find_nearest(vec, k) } } +#[allow(clippy::unnecessary_wraps, clippy::similar_names)] fn make_basis(n_basis: usize, dimensions: usize) -> Result> { let mut bases = Vec::::with_capacity(n_basis); for _n in 0..n_basis { @@ -189,6 +214,7 @@ fn load_all_bitmaps(be: &mut impl VectorBackend) -> Result::new(); for x in 0..info.dimensions { + #[allow(clippy::cast_possible_wrap, clippy::cast_possible_truncation)] let index = x as i32; let bit = be.load_bitmap::(i, index)?; if let Some(bitmap) = bit { @@ -203,19 +229,22 @@ fn load_all_bitmaps(be: &mut impl VectorBackend) -> Result i32 { let (min_idx, max_idx) = projection.argminmax(); + #[allow(clippy::cast_possible_wrap, clippy::cast_possible_truncation)] let idx = if projection[max_idx].abs() >= projection[min_idx].abs() { max_idx as i32 } else { min_idx as i32 }; + #[allow(clippy::cast_sign_loss)] if projection[idx as usize] > 0.0 { idx + 1 } else { diff --git a/rust/tests/search.rs b/rust/tests/search.rs index ba3779a..fe3b4bb 100644 --- a/rust/tests/search.rs +++ b/rust/tests/search.rs @@ -3,7 +3,7 @@ use bbqvec::{self, IndexIDIterator}; #[test] fn search_index() -> Result<()> { - let data = bbqvec::create_vector_set(10, 100000); + let data = bbqvec::create_vector_set(10, 100_000); let mem = bbqvec::MemoryBackend::new(10, 10)?; let mut store = bbqvec::VectorStore::new_croaring_bitmap(mem)?; println!("Made store");