Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion rust/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 10 additions & 0 deletions rust/src/backend.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use anyhow::Result;

use crate::{Basis, Bitmap, ResultSet, Vector, ID};

#[allow(clippy::module_name_repetitions)]
pub struct BackendInfo {
pub quantization: String,
pub has_index_data: bool,
Expand All @@ -10,6 +11,7 @@ pub struct BackendInfo {
pub vector_count: usize,
}

#[allow(clippy::missing_errors_doc, clippy::module_name_repetitions)]
pub trait VectorBackend {
fn put_vector(&mut self, id: ID, v: &Vector) -> Result<()>;
fn compute_similarity(&self, target: &Vector, target_id: ID) -> Result<f32>;
Expand All @@ -18,6 +20,8 @@ pub trait VectorBackend {
fn vector_exists(&self, id: ID) -> bool;
fn close(self) -> Result<()>;

/// # Errors
/// `compute_similarity` can fail
fn find_nearest(&self, target: &Vector, k: usize) -> Result<ResultSet> {
let mut set = ResultSet::new(k);
for id in self.iter_vector_ids() {
Expand All @@ -30,14 +34,20 @@ pub trait VectorBackend {
fn load_bases(&self) -> Result<Option<Vec<Basis>>>;
fn load_bitmap<B: Bitmap>(&mut self, basis: usize, index: i32) -> Result<Option<B>>;

/// # Errors
/// none
fn save_bases(&mut self, _bases: &[Basis]) -> Result<()> {
Ok(())
}

/// # Errors
/// none
fn save_bitmap(&mut self, _basis: usize, _index: usize, _bitmap: &impl Bitmap) -> Result<()> {
Ok(())
}

/// # Errors
/// none
fn sync(&self) -> Result<()> {
Ok(())
}
Expand Down
7 changes: 6 additions & 1 deletion rust/src/backend_memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ pub struct QuantizedMemoryBackend<Q: Quantization> {
pub type MemoryBackend = QuantizedMemoryBackend<crate::quantization::NoQuantization>;

impl<Q: Quantization> QuantizedMemoryBackend<Q> {
/// # Errors
/// none
pub fn new(dimensions: usize, n_basis: usize) -> Result<Self> {
Ok(Self {
vecs: Vec::new(),
Expand All @@ -41,10 +43,11 @@ impl<Q: Quantization> VectorBackend for QuantizedMemoryBackend<Q> {
if v.len() != self.dimensions {
return Err(anyhow!("dimensions don't match"));
}
#[allow(clippy::cast_possible_truncation)]
let uid = id as usize;
if self.vecs.len() <= uid {
if self.vecs.capacity() == uid {
self.vecs.reserve(min(self.vecs.capacity(), 1024 * 1024))
self.vecs.reserve(min(self.vecs.capacity(), 1024 * 1024));
}
self.vecs.resize(uid + 1, None);
}
Expand All @@ -57,6 +60,7 @@ impl<Q: Quantization> VectorBackend for QuantizedMemoryBackend<Q> {

fn compute_similarity(&self, target: &Vector, target_id: crate::ID) -> Result<f32> {
// Make sure it's normalized!
#[allow(clippy::cast_possible_truncation)]
let v = self.vecs[target_id as usize]
.as_ref()
.ok_or(anyhow!("No vector present"))?;
Expand All @@ -82,6 +86,7 @@ impl<Q: Quantization> VectorBackend for QuantizedMemoryBackend<Q> {
}

fn vector_exists(&self, id: ID) -> bool {
#[allow(clippy::cast_possible_truncation)]
let v = self.vecs.get(id as usize);
match v {
Some(x) => x.is_some(),
Expand Down
35 changes: 21 additions & 14 deletions rust/src/bitmaps.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,25 +26,28 @@ impl Bitmap for roaring::RoaringBitmap {
self.is_empty()
}

#[allow(clippy::cast_possible_truncation)]
fn count(&self) -> usize {
self.len() as usize
}

fn add(&mut self, id: ID) {
#[allow(clippy::cast_possible_truncation)]
self.insert(id as u32);
}

fn iter_elems(&self) -> impl Iterator<Item = ID> {
#[allow(clippy::cast_lossless)]
self.iter().map(|x| x as ID)
}
fn and_not(&mut self, rhs: &Self) {
self.sub_assign(rhs)
self.sub_assign(rhs);
}
fn or(&mut self, rhs: &Self) {
self.bitor_assign(rhs)
self.bitor_assign(rhs);
}
fn xor(&mut self, rhs: &Self) {
self.bitxor_assign(rhs)
self.bitxor_assign(rhs);
}
fn estimate_size(&self) -> usize {
self.serialized_size()
Expand All @@ -64,11 +67,12 @@ impl Bitmap for bitvec::prelude::BitVec {
self.is_empty()
}

#[allow(clippy::cast_possible_truncation)]
fn add(&mut self, id: ID) {
if self.len() <= id as usize {
self.resize((id + 1) as usize, false)
self.resize((id + 1) as usize, false);
}
self.set(id as usize, true)
self.set(id as usize, true);
}

fn iter_elems(&self) -> impl Iterator<Item = ID> {
Expand All @@ -78,24 +82,24 @@ impl Bitmap for bitvec::prelude::BitVec {
#[inline]
fn and_not(&mut self, rhs: &Self) {
for elem in self.as_raw_mut_slice().iter_mut().zip(rhs.as_raw_slice()) {
*elem.0 &= !elem.1
*elem.0 &= !elem.1;
}
}

#[inline]
fn or(&mut self, rhs: &Self) {
if self.len() < rhs.len() {
self.resize(rhs.len(), false)
self.resize(rhs.len(), false);
}
self.bitor_assign(rhs)
self.bitor_assign(rhs);
}

#[inline]
fn xor(&mut self, rhs: &Self) {
if self.len() < rhs.len() {
self.resize(rhs.len(), false)
self.resize(rhs.len(), false);
}
self.bitxor_assign(rhs)
self.bitxor_assign(rhs);
}

fn estimate_size(&self) -> usize {
Expand All @@ -112,28 +116,31 @@ impl Bitmap for croaring::Bitmap {
self.is_empty()
}

#[allow(clippy::cast_possible_truncation)]
fn count(&self) -> usize {
self.cardinality() as usize
}

fn add(&mut self, id: ID) {
self.add(id as u32)
#[allow(clippy::cast_possible_truncation)]
self.add(id as u32);
}

fn iter_elems(&self) -> impl Iterator<Item = ID> {
#[allow(clippy::cast_lossless)]
self.iter().map(|x| x as ID)
}

fn and_not(&mut self, rhs: &Self) {
self.andnot_inplace(rhs)
self.andnot_inplace(rhs);
}

fn or(&mut self, rhs: &Self) {
self.or_inplace(rhs)
self.or_inplace(rhs);
}

fn xor(&mut self, rhs: &Self) {
self.xor_inplace(rhs)
self.xor_inplace(rhs);
}

fn estimate_size(&self) -> usize {
Expand Down
5 changes: 4 additions & 1 deletion rust/src/counting_bitmap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,10 @@ impl<B: Bitmap> CountingBitmap<B> {
}

pub fn cardinalities(&self) -> Vec<usize> {
self.bitmaps.iter().map(|b| b.count()).collect::<Vec<_>>()
self.bitmaps
.iter()
.map(super::bitmaps::Bitmap::count)
.collect::<Vec<_>>()
}

pub fn top_k(&self, search_k: usize) -> Option<&B> {
Expand Down
4 changes: 3 additions & 1 deletion rust/src/helpers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,17 @@ use rand::Rng;

use crate::{Vector, ID};

#[must_use]
pub fn create_random_vector(dimensions: usize) -> Vector {
let mut out = Vec::new();
for _ in 0..dimensions {
out.push(rand::thread_rng().gen_range(-1.0..1.0))
out.push(rand::thread_rng().gen_range(-1.0..1.0));
}
crate::vector::normalize(&mut out);
out
}

#[must_use]
pub fn create_vector_set(dimensions: usize, count: usize) -> Vec<Vector> {
std::iter::repeat_with(|| create_random_vector(dimensions))
.take(count)
Expand Down
24 changes: 16 additions & 8 deletions rust/src/quantization.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use crate::{vector::distance, Vector};
use anyhow::Result;
use half::{bf16, vec::HalfFloatVecExt};

#[allow(clippy::module_name_repetitions)]
pub trait Quantization: Default {
type Lower: Clone;
fn similarity(x: &Self::Lower, y: &Self::Lower) -> Result<f32>;
Expand All @@ -13,6 +14,7 @@ pub trait Quantization: Default {
fn name() -> &'static str;
}

#[allow(clippy::module_name_repetitions)]
#[derive(Default)]
pub struct NoQuantization {}

Expand Down Expand Up @@ -42,22 +44,29 @@ impl Quantization for NoQuantization {
fn marshal(v: &Self::Lower, array: &mut [u8]) -> Result<()> {
for (i, f) in v.iter().enumerate() {
let bytes = f.to_le_bytes();
let _ = &array[i * 4..i * 4 + 4].copy_from_slice(&bytes);
let () = &array[i * 4..i * 4 + 4].copy_from_slice(&bytes);
}
Ok(())
}

fn unmarshal(array: &[u8]) -> Result<Self::Lower> {
let mut vec = Vec::new();
for i in (0..array.len()).step_by(4) {
let bytes = &array[i..i + 4];
let (capacity, _remainder) = (array.len() / 4, array.len() % 4);
/*
if remainder != 0 {
can return err rather than panic from unwrap
also if do so can make it chunks_exact instead of chunks
}
*/
let mut vec = Vec::with_capacity(capacity);
for bytes in array.chunks(4) {
let f: f32 = f32::from_le_bytes(bytes.try_into().unwrap());
vec.push(f);
}
Ok(vec)
}
}

#[allow(clippy::module_name_repetitions)]
#[derive(Default)]
pub struct BF16Quantization {}

Expand Down Expand Up @@ -90,15 +99,14 @@ impl Quantization for BF16Quantization {
fn marshal(v: &Self::Lower, array: &mut [u8]) -> Result<()> {
for (i, f) in v.iter().enumerate() {
let bytes = f.to_le_bytes();
let _ = &array[i * 2..i * 2 + 2].copy_from_slice(&bytes);
let () = &array[i * 2..i * 2 + 2].copy_from_slice(&bytes);
}
Ok(())
}

fn unmarshal(array: &[u8]) -> Result<Self::Lower> {
let mut vec = Vec::new();
for i in (0..array.len()).step_by(2) {
let bytes = &array[i..i + 2];
let mut vec = Vec::with_capacity(array.len() / 2);
for bytes in array.chunks(2) {
let f: bf16 = bf16::from_le_bytes(bytes.try_into().unwrap());
vec.push(f);
}
Expand Down
7 changes: 7 additions & 0 deletions rust/src/result.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
use crate::ID;

#[allow(clippy::module_name_repetitions)]
#[derive(Debug)]
pub struct SearchResult {
pub similarity: f32,
pub id: ID,
}

#[allow(clippy::module_name_repetitions)]
#[derive(Debug)]
pub struct ResultSet {
sims: Vec<f32>,
Expand All @@ -15,6 +17,7 @@ pub struct ResultSet {
}

impl ResultSet {
#[must_use]
pub fn new(k: usize) -> Self {
Self {
sims: Vec::with_capacity(k),
Expand All @@ -24,14 +27,18 @@ impl ResultSet {
}
}

#[must_use]
pub fn len(&self) -> usize {
self.sims.len()
}

#[must_use]
pub fn is_empty(&self) -> bool {
self.sims.is_empty()
}

#[allow(clippy::cast_precision_loss, clippy::cast_lossless)]
#[must_use]
pub fn compute_recall(&self, baseline: &ResultSet, at: usize) -> f64 {
let mut found = 0;
for x in baseline.ids.iter().take(at) {
Expand Down
3 changes: 3 additions & 0 deletions rust/src/spaces/simple.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
#[allow(clippy::wildcard_imports)]
#[cfg(target_arch = "x86_64")]
use super::simple_avx::*;
#[allow(clippy::wildcard_imports)]
#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
use super::simple_neon::*;
#[allow(clippy::wildcard_imports)]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
use super::simple_sse::*;
use crate::unaligned_f32::UnalignedF32Slice;
Expand Down
Loading