From dfde62e498b0fe8fcfb5405848abb87e9bc1b527 Mon Sep 17 00:00:00 2001 From: Evan West Date: Fri, 26 May 2023 22:43:13 +0000 Subject: [PATCH 01/10] SplinterDB rust wrapper: initial commit --- rust/.gitignore | 2 + rust/Cargo.toml | 5 + rust/splinterdb-rs/Cargo.toml | 13 ++ rust/splinterdb-rs/src/lib.rs | 299 ++++++++++++++++++++++++++++++++ rust/splinterdb-rs/src/tests.rs | 220 +++++++++++++++++++++++ rust/splinterdb-sys/Cargo.toml | 12 ++ rust/splinterdb-sys/build.rs | 39 +++++ rust/splinterdb-sys/src/lib.rs | 60 +++++++ rust/splinterdb-sys/wrapper.h | 6 + 9 files changed, 656 insertions(+) create mode 100755 rust/.gitignore create mode 100755 rust/Cargo.toml create mode 100644 rust/splinterdb-rs/Cargo.toml create mode 100644 rust/splinterdb-rs/src/lib.rs create mode 100755 rust/splinterdb-rs/src/tests.rs create mode 100644 rust/splinterdb-sys/Cargo.toml create mode 100755 rust/splinterdb-sys/build.rs create mode 100644 rust/splinterdb-sys/src/lib.rs create mode 100755 rust/splinterdb-sys/wrapper.h diff --git a/rust/.gitignore b/rust/.gitignore new file mode 100755 index 000000000..fa8d85ac5 --- /dev/null +++ b/rust/.gitignore @@ -0,0 +1,2 @@ +Cargo.lock +target diff --git a/rust/Cargo.toml b/rust/Cargo.toml new file mode 100755 index 000000000..c8f42e829 --- /dev/null +++ b/rust/Cargo.toml @@ -0,0 +1,5 @@ +[workspace] +members = [ + "splinterdb-sys", + "splinterdb-rs", +] diff --git a/rust/splinterdb-rs/Cargo.toml b/rust/splinterdb-rs/Cargo.toml new file mode 100644 index 000000000..479c8bcf9 --- /dev/null +++ b/rust/splinterdb-rs/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "splinterdb-rs" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +splinterdb-sys = { path = "../splinterdb-sys" } +serde = { version = "1.0", optional = true, features = ["derive"] } + +[dev-dependencies] +tempfile = "3.2.0" diff --git a/rust/splinterdb-rs/src/lib.rs b/rust/splinterdb-rs/src/lib.rs new file mode 100644 index 000000000..df4a0bc40 --- /dev/null +++ b/rust/splinterdb-rs/src/lib.rs @@ -0,0 +1,299 @@ +use std::io::{Error, Result}; +use std::path::Path; + +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +#[derive(Debug)] +pub struct DBConfig { + pub cache_size_bytes: usize, + pub disk_size_bytes: usize, + pub max_key_size: u8, + pub max_value_size: u8, +} + +#[derive(Debug)] +pub struct SplinterDB { + _inner: *mut splinterdb_sys::splinterdb, + sdb_cfg: splinterdb_sys::splinterdb_config, + data_cfg: splinterdb_sys::data_config, +} + +unsafe impl Sync for SplinterDB {} +unsafe impl Send for SplinterDB {} + +impl Drop for SplinterDB { + fn drop(&mut self) { + unsafe { splinterdb_sys::splinterdb_close(&mut self._inner) }; + } +} + +#[derive(Debug, PartialEq)] +pub enum LookupResult { + Found(Vec), + FoundTruncated(Vec), + NotFound, +} + +fn as_result(rc: ::std::os::raw::c_int) -> Result<()> { + if rc != 0 { + Err(Error::from_raw_os_error(rc)) + } else { + Ok(()) + } +} + +fn create_splinter_slice(ref v: &[u8]) -> splinterdb_sys::slice { + unsafe { + splinterdb_sys::slice { + length: v.len() as u64, + data: ::std::mem::transmute(v.as_ptr()), + } + } +} + +#[derive(Debug)] +pub struct IteratorResult<'a> { + pub key: &'a [u8], + pub value: &'a [u8], +} + +#[derive(Debug)] +pub struct RangeIterator<'a> { + _inner: *mut splinterdb_sys::splinterdb_iterator, + _marker: ::std::marker::PhantomData, + _parent_marker: ::std::marker::PhantomData<&'a splinterdb_sys::splinterdb>, + state: Option>, +} + +impl<'a> Drop for RangeIterator<'a> { + fn drop(&mut self) { + unsafe { splinterdb_sys::splinterdb_iterator_deinit(self._inner) } + } +} + +impl<'a> RangeIterator<'a> { + pub fn new(iter: *mut splinterdb_sys::splinterdb_iterator) -> RangeIterator<'a> { + RangeIterator { + _inner: iter, + _marker: ::std::marker::PhantomData, + _parent_marker: ::std::marker::PhantomData, + state: None, + } + } + + // stashes current state of the iterator from the C API + fn _stash_current(&mut self) { + let mut key_out: splinterdb_sys::slice = splinterdb_sys::slice { + length: 0, + data: ::std::ptr::null(), + }; + let mut val_out: splinterdb_sys::slice = splinterdb_sys::slice { + length: 0, + data: ::std::ptr::null(), + }; + + let (key, value): (&[u8], &[u8]) = unsafe { + // get key and value + splinterdb_sys::splinterdb_iterator_get_current( + self._inner, + &mut key_out, + &mut val_out, + ); + // parse key and value into rust slices + ( + ::std::slice::from_raw_parts( + ::std::mem::transmute(key_out.data), + key_out.length as usize, + ), + ::std::slice::from_raw_parts( + ::std::mem::transmute(val_out.data), + val_out.length as usize, + ), + ) + }; + let r = IteratorResult { key, value }; + self.state = Some(r); + } + + fn _inner_advance(&mut self) { + unsafe { splinterdb_sys::splinterdb_iterator_next(self._inner) }; + } + + // almost an iterator, but we need to be able to return errors + // and retain ownership of the result + #[allow(clippy::should_implement_trait)] + pub fn next(&mut self) -> Result> { + // Rust iterator expects to start just before the first element + // but Splinter iterators start at the first element + // so we only call _inner_advance if its our first iteration + if self.state.is_some() { + self._inner_advance(); + } + + let valid = unsafe { splinterdb_sys::splinterdb_iterator_valid(self._inner) }; + if !valid { + let rc = unsafe { splinterdb_sys::splinterdb_iterator_status(self._inner) }; + as_result(rc)?; + return Ok(None); + } + + self._stash_current(); + match self.state { + None => Ok(None), + Some(ref r) => Ok(Some(r)), + } + } +} + +fn path_as_cstring>(path: P) -> std::ffi::CString { + let as_os_str = path.as_ref().as_os_str(); + let as_str = as_os_str.to_str().unwrap(); + std::ffi::CString::new(as_str).unwrap() +} + +impl SplinterDB { + // Create a SplinterDB object. This is uninitialized. + pub fn create_uninit_obj() -> SplinterDB { + SplinterDB { + _inner: std::ptr::null_mut(), + sdb_cfg: unsafe { std::mem::zeroed() }, + data_cfg: unsafe { std::mem::zeroed() }, + } + } + + fn db_create_or_open>( + &mut self, + path: &P, + cfg: &DBConfig, + open_existing: bool, + ) -> Result<()> { + let path = path_as_cstring(path); // don't drop until init is done + + self.sdb_cfg.filename = path.as_ptr(); + self.sdb_cfg.cache_size = cfg.cache_size_bytes as u64; + self.sdb_cfg.disk_size = cfg.disk_size_bytes as u64; + self.sdb_cfg.data_cfg = &mut self.data_cfg; + + unsafe { + splinterdb_sys::default_data_config_init( + cfg.max_key_size as u64, + self.sdb_cfg.data_cfg, + ); + }; + + let rc = if open_existing { + unsafe { splinterdb_sys::splinterdb_open(&self.sdb_cfg, &mut self._inner) } + } else { + unsafe { splinterdb_sys::splinterdb_create(&self.sdb_cfg, &mut self._inner) } + }; + as_result(rc) + } + + pub fn db_create>(&mut self, path: &P, cfg: &DBConfig) -> Result<()> { + self.db_create_or_open(path, cfg, false) + } + + pub fn db_open>(&mut self, path: &P, cfg: &DBConfig) -> Result<()> { + self.db_create_or_open(path, cfg, true) + } + + pub fn register_thread(&self) { + unsafe { splinterdb_sys::splinterdb_register_thread(self._inner) }; + } + + pub fn deregister_thread(&self) { + unsafe { splinterdb_sys::splinterdb_deregister_thread(self._inner) }; + } + + pub fn insert(&self, key: &[u8], value: &[u8]) -> Result<()> { + let key_slice: splinterdb_sys::slice = create_splinter_slice(key); + let val_slice: splinterdb_sys::slice = create_splinter_slice(value); + + let rc = unsafe { + splinterdb_sys::splinterdb_insert( + self._inner, + key_slice, + val_slice, + ) + }; + as_result(rc) + } + + pub fn delete(&self, key: &[u8]) -> Result<()> { + let rc = unsafe { + splinterdb_sys::splinterdb_delete( + self._inner, + create_splinter_slice(key), + ) + }; + as_result(rc) + } + + pub fn lookup(&self, key: &[u8]) -> Result { + unsafe { + let mut lr: splinterdb_sys::splinterdb_lookup_result = std::mem::zeroed(); + splinterdb_sys::splinterdb_lookup_result_init( + self._inner, + &mut lr, + 0, + std::ptr::null_mut(), + ); + + let rc = splinterdb_sys::splinterdb_lookup( + self._inner, + create_splinter_slice(key), + &mut lr, + ); + as_result(rc)?; + + let found = splinterdb_sys::splinterdb_lookup_found(&lr); + if !found { + return Ok(LookupResult::NotFound); + } + + let mut val: splinterdb_sys::slice = splinterdb_sys::slice{ + length: 0, + data: std::mem::zeroed(), + }; + let rc = splinterdb_sys::splinterdb_lookup_result_value( + &lr, + &mut val, + ); + as_result(rc)?; + + // TODO: Can we avoid this memory init and copy? + let mut value: Vec = vec![0; val.length as usize]; + std::ptr::copy( + val.data, + std::mem::transmute(value.as_mut_ptr()), + val.length as usize, + ); + Ok(LookupResult::Found(value)) + } + } + + pub fn range(&self, start_key: Option<&[u8]>) -> Result { + let mut iter: *mut splinterdb_sys::splinterdb_iterator = std::ptr::null_mut(); + + let rc = unsafe { + let start_slice: splinterdb_sys::slice = match start_key { + Some(s) => splinterdb_sys::slice { + length: s.len() as u64, + data: ::std::mem::transmute(s.as_ptr()), + }, + None => splinterdb_sys::slice { + length: 0, + data: ::std::ptr::null(), + }, + }; + splinterdb_sys::splinterdb_iterator_init( + self._inner, + &mut iter, + start_slice, + ) + }; + as_result(rc)?; + Ok(RangeIterator::new(iter)) + } +} + +mod tests; diff --git a/rust/splinterdb-rs/src/tests.rs b/rust/splinterdb-rs/src/tests.rs new file mode 100755 index 000000000..56ed7602c --- /dev/null +++ b/rust/splinterdb-rs/src/tests.rs @@ -0,0 +1,220 @@ +// Tests of the splinterdb-rs library +// +// If you add a new function to the public API of this library, add a test here +// (or extend an existing test) to demonstrate how to use it. + +#[cfg(test)] +mod tests { + + // Test of performing two insertions and lookup + #[test] + fn ins_test() -> std::io::Result<()> { + use splinterdb_sys::slice; + use tempfile::tempdir; + println!("BEGINNING TEST!"); + + let mut sdb = crate::SplinterDB::create_uninit_obj(); + + let data_dir = tempdir()?; // is removed on drop + let data_file = data_dir.path().join("db.splinterdb"); + + sdb.db_create( + &data_file, + &crate::DBConfig { + cache_size_bytes: 1024 * 1024, + disk_size_bytes: 30 * 1024 * 1024, + max_key_size: 23, + max_value_size: 100, + }, + )?; + + println!("SUCCESSFULLY CREATED DB!"); + + let key = b"some-key-0".to_vec(); + let value = b"some-value-0".to_vec(); + + // verify that we can correctly create a splinter-slice from these keys and values + let ks: slice = crate::create_splinter_slice(&key); + let vs: slice = crate::create_splinter_slice(&value); + assert_eq!(ks.length, key.len() as u64); + assert_eq!(vs.length, value.len() as u64); + unsafe { + for i in 0..key.len() { + assert_eq!(*((ks.data as *const u8).offset(i as isize)), key[i]); + } + for i in 0..value.len() { + assert_eq!(*((vs.data as *const u8).offset(i as isize)), value[i]); + } + } + + sdb.insert(&key, &value)?; + sdb.insert(&(b"some-key-4".to_vec()), &(b"some-value-4".to_vec()))?; + println!("SUCCESSFULLY INSERTED TO DB!"); + + let res = sdb.lookup(&key)?; + match res { + crate::LookupResult::NotFound => panic!("inserted key not found"), + crate::LookupResult::FoundTruncated(_) => panic!("inserted key found but truncated"), + crate::LookupResult::Found(v) => assert_eq!(v, value), + } + + println!("SUCCESSFULLY PERFORMED LOOKUP!"); + + println!("Dropping SplinterDB!"); + drop(sdb); + println!("Drop done! Exiting"); + Ok(()) + } + + // Insert and delete, then lookup + #[test] + fn ins_and_del_test() -> std::io::Result<()> { + use tempfile::tempdir; + println!("BEGINNING TEST!"); + + let mut sdb = crate::SplinterDB::create_uninit_obj(); + + let data_dir = tempdir()?; // is removed on drop + let data_file = data_dir.path().join("db.splinterdb"); + + sdb.db_create( + &data_file, + &crate::DBConfig { + cache_size_bytes: 1024 * 1024, + disk_size_bytes: 30 * 1024 * 1024, + max_key_size: 23, + max_value_size: 100, + }, + )?; + + println!("SUCCESSFULLY CREATED DB!"); + + let key = b"some-key-0".to_vec(); + let value = b"some-value-0".to_vec(); + sdb.insert(&key, &value)?; + sdb.insert(&(b"some-key-1".to_vec()), &(b"some-value-1".to_vec()))?; + sdb.insert(&(b"some-key-2".to_vec()), &(b"some-value-2".to_vec()))?; + println!("SUCCESSFULLY PERFORMED INSERTIONS!"); + + sdb.delete(&(b"some-key-1".to_vec()))?; + sdb.delete(&(b"some-key-2".to_vec()))?; + + // lookup key that should not be present + let res = sdb.lookup(&(b"some-key-1".to_vec()))?; + match res { + crate::LookupResult::NotFound => println!("Good!"), + crate::LookupResult::FoundTruncated(_) => panic!("Should not have found this key!"), + crate::LookupResult::Found(_) => panic!("Should not have found this key!"), + } + + // lookup key that should still be present + let res = sdb.lookup(&key)?; + match res { + crate::LookupResult::NotFound => panic!("inserted key not found"), + crate::LookupResult::FoundTruncated(_) => panic!("inserted key found but truncated"), + crate::LookupResult::Found(v) => assert_eq!(v, value), + } + + println!("SUCCESSFULLY PERFORMED LOOKUPS!"); + + println!("Dropping SplinterDB!"); + drop(sdb); + println!("Drop done! Exiting"); + Ok(()) + } + + #[test] + fn overwrite_test() -> std::io::Result<()> { + use tempfile::tempdir; + println!("BEGINNING TEST!"); + + let mut sdb = crate::SplinterDB::create_uninit_obj(); + + let data_dir = tempdir()?; // is removed on drop + let data_file = data_dir.path().join("db.splinterdb"); + + sdb.db_create( + &data_file, + &crate::DBConfig { + cache_size_bytes: 1024 * 1024, + disk_size_bytes: 30 * 1024 * 1024, + max_key_size: 23, + max_value_size: 100, + }, + )?; + println!("SUCCESSFULLY CREATED DB!"); + + let key = b"some-key-0".to_vec(); + let value = b"some-value-0".to_vec(); + let nval = b"some-value-1".to_vec(); + sdb.insert(&key, &value)?; + sdb.insert(&key, &nval)?; + + // lookup key + let res = sdb.lookup(&key)?; + match res { + crate::LookupResult::NotFound => panic!("inserted key not found"), + crate::LookupResult::FoundTruncated(_) => panic!("inserted key found but truncated"), + crate::LookupResult::Found(v) => { + assert_eq!(v, nval); + }, + } + println!("SUCCESSFULLY PERFORMED LOOKUP!"); + + println!("Dropping SplinterDB!"); + drop(sdb); + println!("Drop done! Exiting"); + Ok(()) + } + + #[test] + fn range_lookup_test() -> std::io::Result<()> { + use tempfile::tempdir; + println!("BEGINNING TEST!"); + + let mut sdb = crate::SplinterDB::create_uninit_obj(); + + let data_dir = tempdir()?; // is removed on drop + let data_file = data_dir.path().join("db.splinterdb"); + + sdb.db_create( + &data_file, + &crate::DBConfig { + cache_size_bytes: 1024 * 1024, + disk_size_bytes: 30 * 1024 * 1024, + max_key_size: 23, + max_value_size: 100, + }, + )?; + println!("SUCCESSFULLY CREATED DB!"); + + sdb.insert(&(b"some-key-0".to_vec()), &(b"some-value-0".to_vec()))?; + sdb.insert(&(b"some-key-3".to_vec()), &(b"some-value-3".to_vec()))?; + sdb.insert(&(b"some-key-5".to_vec()), &(b"some-value-5".to_vec()))?; + sdb.insert(&(b"some-key-6".to_vec()), &(b"some-value-6".to_vec()))?; + + let mut found: Vec<(Vec, Vec)> = Vec::new(); // to collect results + let mut iter = sdb.range(None)?; + loop { + match iter.next() { + Ok(Some(r)) => found.push((r.key.to_vec(), r.value.to_vec())), + Ok(None) => break, + Err(e) => return Err(e), + } + } + + println!("Found {} results", found.len()); + + assert_eq!(found[0], (b"some-key-0".to_vec(), b"some-value-0".to_vec())); + assert_eq!(found[1], (b"some-key-3".to_vec(), b"some-value-3".to_vec())); + assert_eq!(found[2], (b"some-key-5".to_vec(), b"some-value-5".to_vec())); + assert_eq!(found[3], (b"some-key-6".to_vec(), b"some-value-6".to_vec())); + + drop(iter); + + println!("Dropping SplinterDB!"); + drop(sdb); + println!("Drop done! Exiting"); + Ok(()) + } +} diff --git a/rust/splinterdb-sys/Cargo.toml b/rust/splinterdb-sys/Cargo.toml new file mode 100644 index 000000000..b52f1a60e --- /dev/null +++ b/rust/splinterdb-sys/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "splinterdb-sys" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[build-dependencies] +bindgen = "0.65.1" + +[dependencies] +tempfile = "3.2.0" diff --git a/rust/splinterdb-sys/build.rs b/rust/splinterdb-sys/build.rs new file mode 100755 index 000000000..c64752d20 --- /dev/null +++ b/rust/splinterdb-sys/build.rs @@ -0,0 +1,39 @@ +extern crate bindgen; + +use std::env; +use std::path::PathBuf; + +fn main() { + // Tell cargo to look for shared libraries in the specified directory + println!("cargo:rustc-link-search=/usr/local/lib"); + + // Tell cargo to tell rustc to link to splinterdb shared lib + println!("cargo:rustc-link-lib=splinterdb"); + + // Tell cargo to invalidate the built crate whenever the wrapper changes + println!("cargo:rerun-if-changed=wrapper.h"); + + // The bindgen::Builder is the main entry point + // to bindgen, and lets you build up options for + // the resulting bindings. + let bindings = bindgen::Builder::default() + .no_copy("splinterdb.*") + .no_copy("writable_buffer") + .no_copy("data_config") + .allowlist_type("splinterdb.*") + .allowlist_function("splinterdb.*") + .allowlist_function("default_data_config.*") + .allowlist_var("SPLINTERDB.*") + .allowlist_var(".*_SIZE") + .clang_arg("-DSPLINTERDB_PLATFORM_DIR=platform_linux") + .header("wrapper.h") + .parse_callbacks(Box::new(bindgen::CargoCallbacks)) + .generate() + .expect("Unable to generate bindings"); + + // Write the bindings to the $OUT_DIR/bindings.rs file. + let out_path = PathBuf::from(env::var("OUT_DIR").unwrap()); + bindings + .write_to_file(out_path.join("bindings.rs")) + .expect("Couldn't write bindings!"); +} diff --git a/rust/splinterdb-sys/src/lib.rs b/rust/splinterdb-sys/src/lib.rs new file mode 100644 index 000000000..7af08008e --- /dev/null +++ b/rust/splinterdb-sys/src/lib.rs @@ -0,0 +1,60 @@ +#![allow(non_upper_case_globals)] +#![allow(non_camel_case_types)] +#![allow(non_snake_case)] +include!(concat!(env!("OUT_DIR"), "/bindings.rs")); + +#[cfg(test)] +mod tests { + + fn path_as_cstring>(path: P) -> std::ffi::CString { + let as_os_str = path.as_ref().as_os_str(); + let as_str = as_os_str.to_str().unwrap(); + std::ffi::CString::new(as_str).unwrap() + } + + // Really basic "smoke" test of the generated code, just to see that the + // C library actually links. + #[test] + fn invoke_things() { + use tempfile::tempdir; + + let data_dir = tempdir().unwrap(); // is removed on drop + let data_file = data_dir.path().join("db.splinterdb"); + let path = path_as_cstring(data_file); // don't drop until init is done + + println!("path = {:?}", path); + + let mut data_config: super::data_config = unsafe { std::mem::zeroed() }; + let mut cfg: super::splinterdb_config = unsafe { std::mem::zeroed() }; + cfg.filename = path.as_ptr(); + cfg.cache_size = 200 * 1024 * 1024; + cfg.disk_size = 400 * 1024 * 1024; + cfg.data_cfg = &mut data_config; + + println!("CONFIG CREATED!"); + + let mut splinterdb: *mut super::splinterdb = std::ptr::null_mut(); + + println!("SPLINTER POINTER CREATED!"); + + unsafe { + super::default_data_config_init( + 32, + cfg.data_cfg, + ) + }; + + println!("CONFIG INIT!"); + + let rc = unsafe { + super::splinterdb_create(&cfg, &mut splinterdb) + }; + assert_eq!(rc, 0); + + println!("SPLINTER CREATED!"); + + unsafe { super::splinterdb_close(&mut splinterdb) }; + + println!("SPLINTER CLOSED!"); + } +} \ No newline at end of file diff --git a/rust/splinterdb-sys/wrapper.h b/rust/splinterdb-sys/wrapper.h new file mode 100755 index 000000000..fc5a2bde0 --- /dev/null +++ b/rust/splinterdb-sys/wrapper.h @@ -0,0 +1,6 @@ +// This file lists the C headers that bindgen will process +// when creating the rust interface to splinterdb +// See regenerate.sh for details + +#include +#include From 497a96678253a8e24935c3ab73ebce661339428d Mon Sep 17 00:00:00 2001 From: Evan West Date: Sat, 27 May 2023 19:50:01 +0000 Subject: [PATCH 02/10] SplinterDB rust wrapper: add documentation --- rust/README.md | 36 +++++++++++++++++++++++++++++++++++ rust/splinterdb-rs/README.md | 6 ++++++ rust/splinterdb-sys/README.md | 21 ++++++++++++++++++++ rust/splinterdb-sys/wrapper.h | 1 - 4 files changed, 63 insertions(+), 1 deletion(-) create mode 100755 rust/README.md create mode 100755 rust/splinterdb-rs/README.md create mode 100755 rust/splinterdb-sys/README.md diff --git a/rust/README.md b/rust/README.md new file mode 100755 index 000000000..af434d3d4 --- /dev/null +++ b/rust/README.md @@ -0,0 +1,36 @@ +# Rust Wrapper for SplinterDB + +These docs assume some basic familiarity with the Rust language and tools, particularly the [Rust build tool `cargo`](https://doc.rust-lang.org/book/ch01-03-hello-cargo.html) + +## Overview +Rust may be suitable for developing applications that use SplinterDB, and for writing certain types of tests of SplinterDB. + +This directory contains Rust bindings for SplinterDB +- `splinterdb-sys`: Lowest level, unsafe Rust declarations for a subset of the SplinterDB public API. +- `splinterdb-rs`: A safe and ergonomic Rust wrapper, intended for use by other Rust libraries and Rust applications. + +## Usage +Ensure you have Rust and Cargo available, e.g. use [rustup](https://rustup.rs/). + +Next, [build and install the SplinterDB C library](../../docs/build.md) **using `clang-13`**, +e.g.: +```sh +CC=clang-13 LD=clang-13 make -C .. && make install -C .. +``` + +Then from this directory, run +```sh +cargo build +cargo test +``` + +Cargo builds into the `target/debug` subdirectory. For release builds, add `--release` to the above commands and look in `target/release`. + + +## Why does this only build with `clang` and not `gcc`? +Short answer: because of link time optimization (LTO). + +Longer answer: +To use LTO across languages, e.g. C with Rust, all compilation units must be built using the same toolchain. +The Rust compiler is based on LLVM, not GCC. Therefore, SplinterDB must be built with `clang` (or +without LTO), in order to be usable from Rust. diff --git a/rust/splinterdb-rs/README.md b/rust/splinterdb-rs/README.md new file mode 100755 index 000000000..4aa9f6779 --- /dev/null +++ b/rust/splinterdb-rs/README.md @@ -0,0 +1,6 @@ +# `splinterdb-rs` + +This crate aims to be a safe and ergonomic Rust wrapper SplinterDB's public API. + +Currently, it exposes a simple key/value abstraction, by using the `splinterdb` and +`default_data_config` modules. diff --git a/rust/splinterdb-sys/README.md b/rust/splinterdb-sys/README.md new file mode 100755 index 000000000..19cb8c0ce --- /dev/null +++ b/rust/splinterdb-sys/README.md @@ -0,0 +1,21 @@ +# `splinterdb-sys` + +Lowest level, unsafe Rust declarations for (some of) the C functions exported from SplinterDB. + +The exported headers are listed in `wrapper.h`. In order to build, the splinterdb shared libraries must be built and present at `\usr\local\lib`. The location where the shared libraries are found can be changed by modifying `build.rs`. + +If the shared libraries change, `cargo build` should automatically detect the change and rebuild this package. + +## Generating the Wrapper +The wrapper code is generated automatically upon a call to `cargo build` based upon the files listed in `wrapper.h`. + +If it is necessary to expand the functionality of `splinterdb-sys` simply add more files to `wrapper.h` and build again. + +## Verifying the Wrapper +From this directory run +```sh +cargo build +cargo test +``` + +This runs the smoke test for this library, ensuring that the C bindings are created successfully. If errors occur ensure that `\usr\local\lib` is in `LD_LIBRARY_PATH` and the shared libraries have been successfully installed at that location. diff --git a/rust/splinterdb-sys/wrapper.h b/rust/splinterdb-sys/wrapper.h index fc5a2bde0..2e8382987 100755 --- a/rust/splinterdb-sys/wrapper.h +++ b/rust/splinterdb-sys/wrapper.h @@ -1,6 +1,5 @@ // This file lists the C headers that bindgen will process // when creating the rust interface to splinterdb -// See regenerate.sh for details #include #include From d9f51634afac501b2b8403e9887ea2a9aea86298 Mon Sep 17 00:00:00 2001 From: Evan West Date: Tue, 6 Jun 2023 19:16:19 +0000 Subject: [PATCH 03/10] Use rust functions in the data config + rust client --- rust/Cargo.toml | 1 + rust/splinterdb-cli/Cargo.toml | 13 + rust/splinterdb-cli/README.md | 52 ++++ rust/splinterdb-cli/src/main.rs | 411 +++++++++++++++++++++++++++++ rust/splinterdb-rs/Cargo.toml | 2 + rust/splinterdb-rs/src/lib.rs | 107 ++++++-- rust/splinterdb-rs/src/rust_cfg.rs | 240 +++++++++++++++++ rust/splinterdb-rs/src/tests.rs | 177 ++++++++++++- rust/splinterdb-sys/build.rs | 1 + rust/splinterdb-sys/wrapper.h | 1 + 10 files changed, 971 insertions(+), 34 deletions(-) create mode 100644 rust/splinterdb-cli/Cargo.toml create mode 100644 rust/splinterdb-cli/README.md create mode 100644 rust/splinterdb-cli/src/main.rs create mode 100755 rust/splinterdb-rs/src/rust_cfg.rs diff --git a/rust/Cargo.toml b/rust/Cargo.toml index c8f42e829..d8c6539ec 100755 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -2,4 +2,5 @@ members = [ "splinterdb-sys", "splinterdb-rs", + "splinterdb-cli", ] diff --git a/rust/splinterdb-cli/Cargo.toml b/rust/splinterdb-cli/Cargo.toml new file mode 100644 index 000000000..9ba98e4d1 --- /dev/null +++ b/rust/splinterdb-cli/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "splinterdb-cli" +version = "0.1.0" +edition = "2021" + +[dependencies] +clap = { version = "3.0.7", features = ["derive"] } +crossbeam-utils = "0.8.5" +rand = "0.8.4" +rand_pcg = "0.3.1" +serde = { version = "1.0", optional = true, features = ["derive"] } +serde_json = "1.0" +splinterdb-rs = { path = "../splinterdb-rs", features = ["serde"] } diff --git a/rust/splinterdb-cli/README.md b/rust/splinterdb-cli/README.md new file mode 100644 index 000000000..cb33a3547 --- /dev/null +++ b/rust/splinterdb-cli/README.md @@ -0,0 +1,52 @@ +# SplinterDB Client +A simple command line utility for SplinterDB. + +It also serves as an example of how to build an application using the Rust wrapper for SplinterDB. + +For build instructions, see the [README in the parent directory](../README.md). + +For usage, run `target/debug/splinterdb-cli --help`: + +## Walkthrough +Initialize a new database file on disk +``` +$ target/debug/splinterdb-cli --file /tmp/my-db init-db --disk-mb 2000 --key-size 20 --value-size 110 +``` +Note this creates both the named `/tmp/my-db` file and an extra metadata +file `/tmp/my-db.meta`. Both files must be present for the other +commands to work. + +List contents (currently empty) +``` +$ target/debug/splinterdb-cli --file /tmp/my-db list +``` + +Add some data +``` +$ target/debug/splinterdb-cli --file /tmp/my-db insert -k "key1" -v "value1" +$ target/debug/splinterdb-cli --file /tmp/my-db insert -k "key2" -v "value2" +$ target/debug/splinterdb-cli --file /tmp/my-db insert -k "key3" -v "value3" +``` + +List contents again +``` +$ target/debug/splinterdb-cli --file /tmp/my-db list +``` + +Delete a key/value pair +``` +$ target/debug/splinterdb-cli --file /tmp/my-db delete --key "key2" +``` + +Lookup a single value +``` +$ target/debug/splinterdb-cli --file /tmp/my-db get -k "key1" +``` + +## Performance testing +The same tool may be used for testing the performance of SplinterDB. + +This will overwrite the chosen file or block device with random data, and print results at the end +``` +$ target/debug/splinterdb-cli --file /tmp/test perf --threads 8 --writes-per-thread 50000 +``` diff --git a/rust/splinterdb-cli/src/main.rs b/rust/splinterdb-cli/src/main.rs new file mode 100644 index 000000000..d67a33228 --- /dev/null +++ b/rust/splinterdb-cli/src/main.rs @@ -0,0 +1,411 @@ +use clap::Parser; + +/// SplinterDB command line tool +#[derive(Parser)] +#[clap(name = "splinterdb-cli", version = "0.1")] +struct Opts { + #[clap(short, long)] + file: String, + + #[clap(subcommand)] + subcmd: SubCommand, +} + +#[derive(Parser)] +enum SubCommand { + InitDB(InitDB), + Insert(Insert), + Delete(Delete), + Get(Get), + List(List), + Perf(Perf), +} + +/// Insert a key-value pair into an existing database +#[derive(Parser)] +struct Insert { + /// Key to insert + #[clap(short, long)] + pub key: String, + + /// Value to insert + #[clap(short, long)] + pub value: String, +} + +/// Delete a key and its value from an existing database +#[derive(Parser)] +struct Delete { + /// Key to delete + #[clap(short, long)] + pub key: String, +} + +/// Get the value for a key from an existing database +#[derive(Parser)] +struct Get { + /// Key to lookup + #[clap(short, long)] + pub key: String, +} + +/// List all keys and values in an existing database +#[derive(Parser)] +struct List {} + +/// Initialize a new database file +#[derive(Parser)] +struct InitDB { + /// Size of in-memory cache, in MB + #[clap(short, long, default_value = "30")] + pub cache_mb: u16, + + /// Size of file to use on disk, in MB + #[clap(short, long, default_value = "100")] + pub disk_mb: u16, + + /// Maximum length of keys, in bytes + #[clap(short, long, default_value = "16")] + pub key_size: usize, + + /// Maximum length of values, in bytes + #[clap(short, long, default_value = "100")] + pub value_size: usize, +} + +const MB: usize = 1024 * 1024; + +type CLIResult = Result>; + +use std::fs::File; +use std::path::Path; +use splinterdb_rs::*; + +// Simple implementation of some merge behavior for performance testing +// When an update is performed, simply make the value the larger of the two +struct SimpleMerge {} +impl SdbRustDataFuncs for SimpleMerge { + // leave all functions but merges as default + + fn merge(_key: &[u8], old_msg: SdbMessage, new_msg: SdbMessage) -> std::io::Result + { + let old_val = old_msg.data; + let new_val = new_msg.data; + + let upd_val = if old_val >= new_val { + old_val + } else { + new_val + }; + + // if old insert and new update -> insert + // otherwise -> update + match old_msg.msg_type { + SdbMessageType::INSERT => Ok(SdbMessage { + msg_type: SdbMessageType::INSERT, + data: upd_val, + }), + SdbMessageType::UPDATE => Ok(SdbMessage { + msg_type: SdbMessageType::UPDATE, + data: upd_val, + }), + _ => panic!("Expected INSERT or UPDATE"), + } + } + fn merge_final(_key: &[u8], oldest_msg: SdbMessage) -> std::io::Result + { + // Simply label this message as an insertion + Ok(SdbMessage { + msg_type: SdbMessageType::INSERT, + data: oldest_msg.data, + }) + } +} + +fn get_metadata_path(db_path: &str) -> String { + format!("{}.meta", db_path) +} + +fn meta_save(db_path: &str, db_config: &splinterdb_rs::DBConfig) -> CLIResult<()> { + let meta_file = File::create(Path::new(&get_metadata_path(db_path)))?; + ::serde_json::ser::to_writer(meta_file, db_config)?; + Ok(()) +} + +fn meta_load(db_path: &str) -> CLIResult { + let mut meta_file = File::open(Path::new(&get_metadata_path(db_path)))?; + let mut bytes = Vec::new(); + use std::io::Read; + meta_file.read_to_end(&mut bytes)?; + let db_config = serde_json::from_slice(&bytes)?; + Ok(db_config) +} + +impl InitDB { + fn run(&self, opts: &Opts) -> CLIResult<()> { + let db_config = splinterdb_rs::DBConfig { + cache_size_bytes: (self.cache_mb as usize) * MB, + disk_size_bytes: (self.disk_mb as usize) * MB, + max_key_size: self.key_size, + max_value_size: self.value_size, + }; + meta_save(&opts.file, &db_config)?; + + let mut db = splinterdb_rs::SplinterDB::new::(); + db.db_create(&opts.file, &db_config)?; + drop(db); + Ok(()) + } +} + +impl Get { + fn run(&self, opts: &Opts) -> CLIResult<()> { + let db_config = meta_load(&opts.file)?; + let mut db = splinterdb_rs::SplinterDB::new::(); + db.db_open(&opts.file, &db_config)?; + let res = db.lookup(self.key.as_bytes())?; + match res { + splinterdb_rs::LookupResult::NotFound => Err("key not found".into()), + splinterdb_rs::LookupResult::FoundTruncated(_) => { + Err("value truncated: this is a bug".into()) + } + splinterdb_rs::LookupResult::Found(v) => { + let v = std::str::from_utf8(&v)?; + println!("{}", v); + Ok(()) + } + } + } +} + +impl Insert { + fn run(&self, opts: &Opts) -> CLIResult<()> { + let db_config = meta_load(&opts.file)?; + let mut db = splinterdb_rs::SplinterDB::new::(); + db.db_open(&opts.file, &db_config)?; + let key = self.key.as_bytes(); + let val = self.value.as_bytes(); + db.insert(key, val)?; + Ok(()) + } +} + +impl Delete { + fn run(&self, opts: &Opts) -> CLIResult<()> { + let db_config = meta_load(&opts.file)?; + let mut db = splinterdb_rs::SplinterDB::new::(); + db.db_open(&opts.file, &db_config)?; + let key = self.key.as_bytes(); + db.delete(key)?; + Ok(()) + } +} + +impl List { + fn run(&self, opts: &Opts) -> CLIResult<()> { + let db_config = meta_load(&opts.file)?; + let mut db = splinterdb_rs::SplinterDB::new::(); + db.db_open(&opts.file, &db_config)?; + let mut iter = db.range(None)?; + loop { + match iter.next() { + Ok(Some(&splinterdb_rs::IteratorResult { key, value })) => { + let key = std::str::from_utf8(key)?; + let value = std::str::from_utf8(value)?; + println!("\t{} : {}", key, value) + } + Ok(None) => { + println!(""); + break; + } + Err(e) => { + println!("got error: {:?}", e); + break; + } + } + } + Ok(()) + } +} + +use rand::{Rng, SeedableRng}; +use rand_pcg::Pcg64; + +use crossbeam_utils::thread; +use std::time::Instant; + +/// Test performance. Will overwrite the target file with random data. +#[derive(Parser)] +pub struct Perf { + /// Number of insert threads + #[clap(short, long, default_value = "1")] + threads: u32, + + /// Number of writes to do on each thread + #[clap(short, long, default_value = "10000")] + writes_per_thread: u32, + + /// Random seed + #[clap(long, default_value = "0")] + seed: u64, + + /// Size of in-memory cache, in MB + #[clap(long, default_value = "400")] + cache_mb: u16, + + /// Size of file to use on disk, in MB + #[clap(long, default_value = "9000")] + disk_mb: u32, +} + +impl Perf { + const KEY_SIZE: usize = 32; + const VALUE_SIZE: usize = 64; + const REPORT_PERIOD: u32 = 500000; + + pub fn run(&self, file: String) -> CLIResult<()> { + let db_config = splinterdb_rs::DBConfig { + cache_size_bytes: self.cache_mb as usize * MB, + disk_size_bytes: self.disk_mb as usize * MB, + max_key_size: Perf::KEY_SIZE, + max_value_size: Perf::VALUE_SIZE, + }; + let path = file; + + let mut db = splinterdb_rs::SplinterDB::new::(); + db.db_create(&path, &db_config)?; + + eprint!("Inserts "); + + let start_time = Instant::now(); + // spawn several threads within a "scope" + // the scope guarantees that all threads have joined before + // control leaves the scope + thread::scope(|s| { + for i in 0..self.threads { + let db = &db; + let i = i; + let num_writes = self.writes_per_thread; + + s.spawn(move |_| { + // closure, work done on this thread + // on each thread, register it with splinterdb + db.register_thread(); + let mut rng = Pcg64::seed_from_u64(i as u64); + + // do num_writes into splinterdb + for count in 0..num_writes { + let mut key = [0u8; Perf::KEY_SIZE as usize]; + let mut value = [0u8; Perf::VALUE_SIZE as usize]; + Perf::rand_fill_buffer(&mut rng, &mut key); + Perf::rand_fill_buffer(&mut rng, &mut value); + db.insert(&key, &value).unwrap(); + + if (count+1) % Perf::REPORT_PERIOD == 0 { + eprint!("."); + } + } + db.deregister_thread(); + }); + } + }) // all threads have joined at this point + .unwrap(); + drop(db); // flush all caches to disk + + let write_complete_time = Instant::now(); + let total_write_time = (write_complete_time - start_time).as_secs_f32(); + + eprintln!(""); + eprint!("Updates "); + + let mut db = splinterdb_rs::SplinterDB::new::(); + db.db_create(&path, &db_config)?; + + let update_time = Instant::now(); + thread::scope(|s| { + for i in 0..self.threads { + let db = &db; + let i = i; + let num_writes = self.writes_per_thread; + + s.spawn(move |_| { + // closure, work done on this thread + // on each thread, register it with splinterdb + db.register_thread(); + let mut rng = Pcg64::seed_from_u64(i as u64); + + // do num_writes into splinterdb + for count in 0..num_writes { + let mut key = [0u8; Perf::KEY_SIZE as usize]; + let mut value = [0u8; Perf::VALUE_SIZE as usize]; + Perf::rand_fill_buffer(&mut rng, &mut key); + Perf::rand_fill_buffer(&mut rng, &mut value); + db.update(&key, &value).unwrap(); + + if (count+1) % Perf::REPORT_PERIOD == 0 { + eprint!("."); + } + } + db.deregister_thread(); + }); + } + }) // all threads have joined at this point + .unwrap(); + drop(db); // flush all caches to disk + + let update_complete_time = Instant::now(); + let total_update_time = (update_complete_time - update_time).as_secs_f32(); + + let total_writes = self.threads as u64 * self.writes_per_thread as u64; + let mb_written = + total_writes * (Perf::KEY_SIZE as u64 + Perf::VALUE_SIZE as u64) / MB as u64; + + eprintln!( + "\n{:>8} {:>12} {:>12} {:>8} {:>8} {:>15}", + "threads", "inserts", "MB_inserted", "seconds", "bw_MBps", "inserts/sec" + ); + println!( + "{:>8} {:>12} {:>12} {:>8.2} {:>8.2} {:>15.2}", + self.threads, + total_writes, + mb_written, + total_write_time, + mb_written as f32 / total_write_time, + total_writes as f32 / total_write_time, + ); + eprintln!( + "\n{:>8} {:>12} {:>12} {:>8} {:>8} {:>15}", + "threads", "updates", "MB_updated", "seconds", "bw_MBps", "updates/sec" + ); + println!( + "{:>8} {:>12} {:>12} {:>8.2} {:>8.2} {:>15.2}", + self.threads, + total_writes, + mb_written, + total_update_time, + mb_written as f32 / total_update_time, + total_writes as f32 / total_update_time, + ); + + Ok(()) + } + + fn rand_fill_buffer(rng: &mut Pcg64, to_fill: &mut [u8]) { + for x in to_fill.iter_mut() { + *x = rng.gen(); + } + } +} + +fn main() -> CLIResult<()> { + let opts: Opts = Opts::parse(); + + match opts.subcmd { + SubCommand::InitDB(ref init_db) => init_db.run(&opts), + SubCommand::Insert(ref insert) => insert.run(&opts), + SubCommand::Delete(ref delete) => delete.run(&opts), + SubCommand::Get(ref get) => get.run(&opts), + SubCommand::List(ref list) => list.run(&opts), + SubCommand::Perf(ref perf) => perf.run(opts.file), + } +} diff --git a/rust/splinterdb-rs/Cargo.toml b/rust/splinterdb-rs/Cargo.toml index 479c8bcf9..0a0c8f1db 100644 --- a/rust/splinterdb-rs/Cargo.toml +++ b/rust/splinterdb-rs/Cargo.toml @@ -8,6 +8,8 @@ edition = "2021" [dependencies] splinterdb-sys = { path = "../splinterdb-sys" } serde = { version = "1.0", optional = true, features = ["derive"] } +hex = "0.4" +xxhash-rust = { version = "0.8.6", features = ["xxh32"] } [dev-dependencies] tempfile = "3.2.0" diff --git a/rust/splinterdb-rs/src/lib.rs b/rust/splinterdb-rs/src/lib.rs index df4a0bc40..314db97b3 100644 --- a/rust/splinterdb-rs/src/lib.rs +++ b/rust/splinterdb-rs/src/lib.rs @@ -1,13 +1,39 @@ use std::io::{Error, Result}; use std::path::Path; +#[derive(Debug)] +pub enum CompareResult { + LESS, // first less than second + EQUAL, // first and second equal + GREATER, // first greater than second +} + +#[derive(Debug)] +pub enum SdbMessageType { + INVALID, + INSERT, + UPDATE, + DELETE, + OTHER, // TODO: IS THIS POSSIBLE? +} + +// Rust side representation of a splinterDB message +#[derive(Debug)] +pub struct SdbMessage { + pub msg_type: SdbMessageType, + pub data: Vec, +} + +pub mod rust_cfg; +pub use rust_cfg::*; + #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] #[derive(Debug)] pub struct DBConfig { pub cache_size_bytes: usize, pub disk_size_bytes: usize, - pub max_key_size: u8, - pub max_value_size: u8, + pub max_key_size: usize, + pub max_value_size: usize, } #[derive(Debug)] @@ -64,14 +90,18 @@ pub struct RangeIterator<'a> { state: Option>, } -impl<'a> Drop for RangeIterator<'a> { - fn drop(&mut self) { +impl<'a> Drop for RangeIterator<'a> +{ + fn drop(&mut self) + { unsafe { splinterdb_sys::splinterdb_iterator_deinit(self._inner) } } } -impl<'a> RangeIterator<'a> { - pub fn new(iter: *mut splinterdb_sys::splinterdb_iterator) -> RangeIterator<'a> { +impl<'a> RangeIterator<'a> +{ + pub fn new(iter: *mut splinterdb_sys::splinterdb_iterator) -> RangeIterator<'a> + { RangeIterator { _inner: iter, _marker: ::std::marker::PhantomData, @@ -81,7 +111,8 @@ impl<'a> RangeIterator<'a> { } // stashes current state of the iterator from the C API - fn _stash_current(&mut self) { + fn _stash_current(&mut self) + { let mut key_out: splinterdb_sys::slice = splinterdb_sys::slice { length: 0, data: ::std::ptr::null(), @@ -114,14 +145,16 @@ impl<'a> RangeIterator<'a> { self.state = Some(r); } - fn _inner_advance(&mut self) { + fn _inner_advance(&mut self) + { unsafe { splinterdb_sys::splinterdb_iterator_next(self._inner) }; } // almost an iterator, but we need to be able to return errors // and retain ownership of the result #[allow(clippy::should_implement_trait)] - pub fn next(&mut self) -> Result> { + pub fn next(&mut self) -> Result> + { // Rust iterator expects to start just before the first element // but Splinter iterators start at the first element // so we only call _inner_advance if its our first iteration @@ -144,19 +177,22 @@ impl<'a> RangeIterator<'a> { } } -fn path_as_cstring>(path: P) -> std::ffi::CString { +fn path_as_cstring>(path: P) -> std::ffi::CString +{ let as_os_str = path.as_ref().as_os_str(); let as_str = as_os_str.to_str().unwrap(); std::ffi::CString::new(as_str).unwrap() } -impl SplinterDB { - // Create a SplinterDB object. This is uninitialized. - pub fn create_uninit_obj() -> SplinterDB { +impl SplinterDB +{ + // Create a new SplinterDB object. This is uninitialized. + pub fn new() -> SplinterDB + { SplinterDB { _inner: std::ptr::null_mut(), sdb_cfg: unsafe { std::mem::zeroed() }, - data_cfg: unsafe { std::mem::zeroed() }, + data_cfg: rust_cfg::new_sdb_data_config::(0), } } @@ -168,18 +204,16 @@ impl SplinterDB { ) -> Result<()> { let path = path_as_cstring(path); // don't drop until init is done + // set up the splinterdb config self.sdb_cfg.filename = path.as_ptr(); self.sdb_cfg.cache_size = cfg.cache_size_bytes as u64; self.sdb_cfg.disk_size = cfg.disk_size_bytes as u64; self.sdb_cfg.data_cfg = &mut self.data_cfg; - unsafe { - splinterdb_sys::default_data_config_init( - cfg.max_key_size as u64, - self.sdb_cfg.data_cfg, - ); - }; + // set key bytes + self.data_cfg.max_key_size = cfg.max_key_size as u64; + // Open or create the database let rc = if open_existing { unsafe { splinterdb_sys::splinterdb_open(&self.sdb_cfg, &mut self._inner) } } else { @@ -196,15 +230,18 @@ impl SplinterDB { self.db_create_or_open(path, cfg, true) } - pub fn register_thread(&self) { + pub fn register_thread(&self) + { unsafe { splinterdb_sys::splinterdb_register_thread(self._inner) }; } - pub fn deregister_thread(&self) { + pub fn deregister_thread(&self) + { unsafe { splinterdb_sys::splinterdb_deregister_thread(self._inner) }; } - pub fn insert(&self, key: &[u8], value: &[u8]) -> Result<()> { + pub fn insert(&self, key: &[u8], value: &[u8]) -> Result<()> + { let key_slice: splinterdb_sys::slice = create_splinter_slice(key); let val_slice: splinterdb_sys::slice = create_splinter_slice(value); @@ -218,7 +255,23 @@ impl SplinterDB { as_result(rc) } - pub fn delete(&self, key: &[u8]) -> Result<()> { + pub fn update(&self, key: &[u8], delta: &[u8]) -> Result<()> + { + let key_slice: splinterdb_sys::slice = create_splinter_slice(key); + let delta_slice: splinterdb_sys::slice = create_splinter_slice(delta); + + let rc = unsafe { + splinterdb_sys::splinterdb_update( + self._inner, + key_slice, + delta_slice, + ) + }; + as_result(rc) + } + + pub fn delete(&self, key: &[u8]) -> Result<()> + { let rc = unsafe { splinterdb_sys::splinterdb_delete( self._inner, @@ -228,7 +281,8 @@ impl SplinterDB { as_result(rc) } - pub fn lookup(&self, key: &[u8]) -> Result { + pub fn lookup(&self, key: &[u8]) -> Result + { unsafe { let mut lr: splinterdb_sys::splinterdb_lookup_result = std::mem::zeroed(); splinterdb_sys::splinterdb_lookup_result_init( @@ -271,7 +325,8 @@ impl SplinterDB { } } - pub fn range(&self, start_key: Option<&[u8]>) -> Result { + pub fn range(&self, start_key: Option<&[u8]>) -> Result + { let mut iter: *mut splinterdb_sys::splinterdb_iterator = std::ptr::null_mut(); let rc = unsafe { diff --git a/rust/splinterdb-rs/src/rust_cfg.rs b/rust/splinterdb-rs/src/rust_cfg.rs new file mode 100755 index 000000000..fe112c20d --- /dev/null +++ b/rust/splinterdb-rs/src/rust_cfg.rs @@ -0,0 +1,240 @@ +use std::io::Result; +use splinterdb_sys::*; +use crate::{create_splinter_slice, SdbMessageType, SdbMessage, CompareResult}; +use xxhash_rust::xxh32::xxh32; + +fn sdb_slice_to_vec(s: &slice) -> Vec +{ + unsafe { + std::slice::from_raw_parts(s.data as *const u8, s.length as usize).to_vec() + } +} + +fn raw_to_vec(data: *const ::std::os::raw::c_void, length: usize) -> Vec +{ + unsafe { + std::slice::from_raw_parts(data as *const u8, length as usize).to_vec() + } +} + +fn int_to_msg_type(i: ::std::os::raw::c_uint) -> SdbMessageType +{ + match i { + 0 => SdbMessageType::INVALID, + 1 => SdbMessageType::INSERT, + 2 => SdbMessageType::UPDATE, + 3 => SdbMessageType::DELETE, + _ => SdbMessageType::OTHER, + } +} + +fn create_sdb_message(msg: &message) -> SdbMessage +{ + SdbMessage { + msg_type: int_to_msg_type(msg.type_), + data: sdb_slice_to_vec(&msg.data), + } +} + +fn sdb_msg_from_acc(ma: &merge_accumulator) -> SdbMessage +{ + unsafe { + SdbMessage { + msg_type: int_to_msg_type(splinterdb_sys::merge_accumulator_message_class(ma)), + data: sdb_slice_to_vec(&splinterdb_sys::merge_accumulator_to_slice(ma)), + } + } +} + +pub trait SdbRustDataFuncs { + fn key_comp(key1: &[u8], key2: &[u8]) -> CompareResult + { + if key1 < key2 { + return CompareResult::LESS; + } else if key1 == key2 { + return CompareResult::EQUAL; + } + return CompareResult::GREATER; + } + + fn key_hash(key: &[u8], seed: u32) -> u32 + { + xxh32(key, seed) + } + + // By default we do not implement merge functionality + fn merge(_key: &[u8], _old_msg: SdbMessage, new_msg: SdbMessage) -> Result + { + Ok(new_msg) + } + fn merge_final(_key: &[u8], oldest_msg: SdbMessage) -> Result + { + Ok(oldest_msg) + } + + fn str_key(key: &[u8], dst: &mut [u8]) -> () + { + // 2 characters per byte + if 2 * key.len() > dst.len() as usize { + panic!("Key too long to convert to string!"); + } + let hex_str: String = hex::encode(key); + for (i, c) in hex_str.chars().enumerate() { + dst[i] = c as u8; + } + } + + fn str_msg(msg: SdbMessage, dst: &mut [u8]) -> () + { + if 2 * msg.data.len() > dst.len() as usize { + panic!("Msg too long to convert to string!"); + } + let hex_str: String = hex::encode(msg.data); + for (i, c) in hex_str.chars().enumerate() { + dst[i] = c as u8; + } + } +} + +pub fn new_sdb_data_config(key_size: u64) -> data_config +{ + data_config { + max_key_size: key_size, + key_compare: Some(key_compare::), + key_hash: Some(key_hash::), + merge_tuples: Some(merge_tuples::), + merge_tuples_final: Some(merge_tuples_final::), + key_to_string: Some(key_to_string::), + message_to_string: Some(message_to_string::), + } +} + +// Implement all the default data functions +pub struct DefaultSdb {} +impl SdbRustDataFuncs for DefaultSdb {} + +// These functions are templatized by the SdbRustDataFuncs a structure specified by the +// user that implements the key functions in rust. +// +// These functions act as a wrapper for the data config functions converting from +// the SplinterDB C API to a rust friendly API. +pub extern "C" fn key_compare( + _cfg: *const data_config, + key1: slice, + key2: slice, +) -> ::std::os::raw::c_int +{ + let res: CompareResult = T::key_comp(&sdb_slice_to_vec(&key1), &sdb_slice_to_vec(&key2)); + match res { + CompareResult::LESS => -1, + CompareResult::EQUAL => 0, + CompareResult::GREATER => 1, + } +} + +pub extern "C" fn key_hash( + input: *const ::std::os::raw::c_void, + length: usize, + seed: uint32, +) -> uint32 +{ + T::key_hash(&raw_to_vec(input, length), seed) +} + +pub extern "C" fn merge_tuples( + _cfg: *const data_config, + key: slice, + old_message: message, + new_message: *mut merge_accumulator, +) -> ::std::os::raw::c_int +{ + // convert the merge_accumulator to a message + let new_msg: SdbMessage = unsafe { + sdb_msg_from_acc(&*new_message) + }; + + // pass the old message and new message to user's merge() function + let res: SdbMessage = match T::merge( + &sdb_slice_to_vec(&key), + create_sdb_message(&old_message), + new_msg + ) + { + Ok(r) => r, + Err(..) => return -1, + }; + + // update the merge_accumulator with the results of the user's func + unsafe { + splinterdb_sys::merge_accumulator_copy_message( + new_message, + message { + type_: res.msg_type as ::std::os::raw::c_uint, + data: create_splinter_slice(&res.data), + } + ); + } + + return 0; +} + +pub extern "C" fn merge_tuples_final( + _cfg: *const data_config, + key: slice, + oldest_message: *mut merge_accumulator, +) -> ::std::os::raw::c_int +{ + // convert the accumulator to a message + let new_msg: SdbMessage = unsafe { + sdb_msg_from_acc(&*oldest_message) + }; + + // call user's merge_final() function + let res: SdbMessage = match T::merge_final( + &sdb_slice_to_vec(&key), + new_msg, + ) + { + Ok(r) => r, + Err(..) => return -1, + }; + + // update the merge_accumulator with results of merge_final() + unsafe { + splinterdb_sys::merge_accumulator_copy_message( + oldest_message, + message { + type_: res.msg_type as ::std::os::raw::c_uint, + data: create_splinter_slice(&res.data), + } + ); + } + + return 0; +} + +pub extern "C" fn key_to_string( + _cfg: *const data_config, + key: slice, + str_: *mut ::std::os::raw::c_char, + max_len: uint64, +) -> () +{ + T::str_key( + &sdb_slice_to_vec(&key), + &mut raw_to_vec(str_ as *const ::std::os::raw::c_void, max_len as usize) + ); +} + +pub extern "C" fn message_to_string( + _cfg: *const data_config, + msg: message, + str_: *mut ::std::os::raw::c_char, + max_len: uint64, +) -> () +{ + T::str_msg( + create_sdb_message(&msg), + &mut raw_to_vec(str_ as *const ::std::os::raw::c_void, max_len as usize) + ); +} diff --git a/rust/splinterdb-rs/src/tests.rs b/rust/splinterdb-rs/src/tests.rs index 56ed7602c..0544382b7 100755 --- a/rust/splinterdb-rs/src/tests.rs +++ b/rust/splinterdb-rs/src/tests.rs @@ -5,15 +5,17 @@ #[cfg(test)] mod tests { + use crate::{SdbMessage, SdbMessageType, SdbRustDataFuncs}; + use std::io::Result; // Test of performing two insertions and lookup #[test] - fn ins_test() -> std::io::Result<()> { + fn ins_test() -> Result<()> { use splinterdb_sys::slice; use tempfile::tempdir; println!("BEGINNING TEST!"); - let mut sdb = crate::SplinterDB::create_uninit_obj(); + let mut sdb = crate::SplinterDB::new::(); let data_dir = tempdir()?; // is removed on drop let data_file = data_dir.path().join("db.splinterdb"); @@ -68,11 +70,11 @@ mod tests { // Insert and delete, then lookup #[test] - fn ins_and_del_test() -> std::io::Result<()> { + fn ins_and_del_test() -> Result<()> { use tempfile::tempdir; println!("BEGINNING TEST!"); - let mut sdb = crate::SplinterDB::create_uninit_obj(); + let mut sdb = crate::SplinterDB::new::(); let data_dir = tempdir()?; // is removed on drop let data_file = data_dir.path().join("db.splinterdb"); @@ -123,12 +125,66 @@ mod tests { Ok(()) } + // Many inserts and lookup test #[test] - fn overwrite_test() -> std::io::Result<()> { + fn many_ins_lookup() -> Result<()> { use tempfile::tempdir; println!("BEGINNING TEST!"); - let mut sdb = crate::SplinterDB::create_uninit_obj(); + let mut sdb = crate::SplinterDB::new::(); + + let data_dir = tempdir()?; // is removed on drop + let data_file = data_dir.path().join("db.splinterdb"); + + sdb.db_create( + &data_file, + &crate::DBConfig { + cache_size_bytes: 1024 * 1024, + disk_size_bytes: 30 * 1024 * 1024, + max_key_size: 23, + max_value_size: 100, + }, + )?; + + println!("SUCCESSFULLY CREATED DB!"); + + for i in 0..=100 { + let key = ("some-key-".to_owned() + &i.to_string()).into_bytes().to_vec(); + let value = ("some-value-".to_owned() + &i.to_string()).into_bytes().to_vec(); + sdb.insert(&key, &value)?; + } + println!("SUCCESSFULLY PERFORMED INSERTIONS!"); + + // lookup key that should not be present + let res = sdb.lookup(&(b"some-key-101".to_vec()))?; + match res { + crate::LookupResult::NotFound => println!("Good!"), + crate::LookupResult::FoundTruncated(_) => panic!("Should not have found this key!"), + crate::LookupResult::Found(_) => panic!("Should not have found this key!"), + } + + // lookup key that should still be present + let res = sdb.lookup(&(b"some-key-56".to_vec()))?; + match res { + crate::LookupResult::NotFound => panic!("inserted key not found"), + crate::LookupResult::FoundTruncated(_) => panic!("inserted key found but truncated"), + crate::LookupResult::Found(v) => assert_eq!(v, b"some-value-56".to_vec()), + } + + println!("SUCCESSFULLY PERFORMED LOOKUPS!"); + + println!("Dropping SplinterDB!"); + drop(sdb); + println!("Drop done! Exiting"); + Ok(()) + } + + #[test] + fn overwrite_test() -> Result<()> { + use tempfile::tempdir; + println!("BEGINNING TEST!"); + + let mut sdb = crate::SplinterDB::new::(); let data_dir = tempdir()?; // is removed on drop let data_file = data_dir.path().join("db.splinterdb"); @@ -168,11 +224,11 @@ mod tests { } #[test] - fn range_lookup_test() -> std::io::Result<()> { + fn range_lookup_test() -> Result<()> { use tempfile::tempdir; println!("BEGINNING TEST!"); - let mut sdb = crate::SplinterDB::create_uninit_obj(); + let mut sdb = crate::SplinterDB::new::(); let data_dir = tempdir()?; // is removed on drop let data_file = data_dir.path().join("db.splinterdb"); @@ -217,4 +273,109 @@ mod tests { println!("Drop done! Exiting"); Ok(()) } + + // Simple implementation of some merge behavior for testing + // When an update is performed, simply make the value the larger of the two + struct SimpleMerge {} + impl SdbRustDataFuncs for SimpleMerge { + // leave all functions but merges as default + + fn merge(_key: &[u8], old_msg: SdbMessage, new_msg: SdbMessage) -> Result + { + let old_val = old_msg.data; + let new_val = new_msg.data; + + let upd_val = if old_val >= new_val { + old_val + } else { + new_val + }; + + // if old insert and new update -> insert + // otherwise -> update + match old_msg.msg_type { + SdbMessageType::INSERT => Ok(SdbMessage { + msg_type: SdbMessageType::INSERT, + data: upd_val, + }), + SdbMessageType::UPDATE => Ok(SdbMessage { + msg_type: SdbMessageType::UPDATE, + data: upd_val, + }), + _ => panic!("Expected INSERT or UPDATE"), + } + } + fn merge_final(_key: &[u8], oldest_msg: SdbMessage) -> Result + { + // Simply label this message as an insertion + Ok(SdbMessage { + msg_type: SdbMessageType::INSERT, + data: oldest_msg.data, + }) + } + } + + #[test] + fn simple_merge_test() -> Result<()> { + use tempfile::tempdir; + println!("BEGINNING TEST!"); + + let mut sdb = crate::SplinterDB::new::(); + + let data_dir = tempdir()?; // is removed on drop + let data_file = data_dir.path().join("db.splinterdb"); + + sdb.db_create( + &data_file, + &crate::DBConfig { + cache_size_bytes: 1024 * 1024, + disk_size_bytes: 30 * 1024 * 1024, + max_key_size: 23, + max_value_size: 100, + }, + )?; + println!("SUCCESSFULLY CREATED DB!"); + + sdb.insert(&(b"some-key-0".to_vec()), &(b"some-value-0".to_vec()))?; + sdb.insert(&(b"some-key-3".to_vec()), &(b"some-value-3".to_vec()))?; + sdb.insert(&(b"some-key-5".to_vec()), &(b"some-value-5".to_vec()))?; + sdb.insert(&(b"some-key-6".to_vec()), &(b"some-value-6".to_vec()))?; + + println!("SUCCESSFULLY PERFORMED INSERTIONS!"); + + sdb.update(&(b"some-key-0".to_vec()), &(b"some-value-3".to_vec()))?; + sdb.update(&(b"some-key-3".to_vec()), &(b"some-value-2".to_vec()))?; + sdb.update(&(b"some-key-5".to_vec()), &(b"some-value-5".to_vec()))?; + sdb.update(&(b"some-key-6".to_vec()), &(b"some-value-9999999999999999999".to_vec()))?; + + // also issue update to key that does not exist + sdb.update(&(b"some-key-2".to_vec()), &(b"some-value-2".to_vec()))?; + + println!("SUCCESSFULLY PERFORMED MERGES!"); + + let mut found: Vec<(Vec, Vec)> = Vec::new(); // to collect results + let mut iter = sdb.range(None)?; + loop { + match iter.next() { + Ok(Some(r)) => found.push((r.key.to_vec(), r.value.to_vec())), + Ok(None) => break, + Err(e) => return Err(e), + } + } + + println!("Found {} results", found.len()); + + assert_eq!(found[0], (b"some-key-0".to_vec(), b"some-value-3".to_vec())); + assert_eq!(found[1], (b"some-key-2".to_vec(), b"some-value-2".to_vec())); + assert_eq!(found[2], (b"some-key-3".to_vec(), b"some-value-3".to_vec())); + assert_eq!(found[3], (b"some-key-5".to_vec(), b"some-value-5".to_vec())); + assert_eq!(found[4], (b"some-key-6".to_vec(), b"some-value-9999999999999999999".to_vec())); + + drop(iter); + + println!("Dropping SplinterDB!"); + drop(sdb); + println!("Drop done! Exiting"); + Ok(()) + } } diff --git a/rust/splinterdb-sys/build.rs b/rust/splinterdb-sys/build.rs index c64752d20..01427765a 100755 --- a/rust/splinterdb-sys/build.rs +++ b/rust/splinterdb-sys/build.rs @@ -23,6 +23,7 @@ fn main() { .allowlist_type("splinterdb.*") .allowlist_function("splinterdb.*") .allowlist_function("default_data_config.*") + .allowlist_function("merge.*") .allowlist_var("SPLINTERDB.*") .allowlist_var(".*_SIZE") .clang_arg("-DSPLINTERDB_PLATFORM_DIR=platform_linux") diff --git a/rust/splinterdb-sys/wrapper.h b/rust/splinterdb-sys/wrapper.h index 2e8382987..fe07f7830 100755 --- a/rust/splinterdb-sys/wrapper.h +++ b/rust/splinterdb-sys/wrapper.h @@ -3,3 +3,4 @@ #include #include +#include From b9a9bc40dd6c68d273be4f8df2dc9f1fc1fd3691 Mon Sep 17 00:00:00 2001 From: Evan West Date: Tue, 6 Jun 2023 19:20:19 +0000 Subject: [PATCH 04/10] rust documentation update --- rust/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/rust/README.md b/rust/README.md index af434d3d4..0837a2b02 100755 --- a/rust/README.md +++ b/rust/README.md @@ -8,6 +8,8 @@ Rust may be suitable for developing applications that use SplinterDB, and for wr This directory contains Rust bindings for SplinterDB - `splinterdb-sys`: Lowest level, unsafe Rust declarations for a subset of the SplinterDB public API. - `splinterdb-rs`: A safe and ergonomic Rust wrapper, intended for use by other Rust libraries and Rust applications. +- `splinterdb-cli`: A simple command line utility that provides a limited key/value interface. + It serves as an example of how to build a Rust application that uses SplinterDB as a library, and can be used for basic performance testing. ## Usage Ensure you have Rust and Cargo available, e.g. use [rustup](https://rustup.rs/). From 0408f717a63b98e7cb9d401f6c172c97cd873b35 Mon Sep 17 00:00:00 2001 From: Evan West Date: Tue, 6 Jun 2023 20:23:25 +0000 Subject: [PATCH 05/10] Rust wrapper: documentation for rust callbacks and access adjustments --- rust/splinterdb-rs/README.md | 75 ++++++++++++++++++++++++++++-- rust/splinterdb-rs/src/lib.rs | 28 ++--------- rust/splinterdb-rs/src/rust_cfg.rs | 42 +++++++++++++++-- 3 files changed, 113 insertions(+), 32 deletions(-) diff --git a/rust/splinterdb-rs/README.md b/rust/splinterdb-rs/README.md index 4aa9f6779..c6f24cd12 100755 --- a/rust/splinterdb-rs/README.md +++ b/rust/splinterdb-rs/README.md @@ -1,6 +1,75 @@ # `splinterdb-rs` -This crate aims to be a safe and ergonomic Rust wrapper SplinterDB's public API. +This crate aims to be a safe and ergonomic Rust wrapper for SplinterDB's public API. -Currently, it exposes a simple key/value abstraction, by using the `splinterdb` and -`default_data_config` modules. +By default, it exposes a simple key/value abstraction, akin to that of the SplinterDB `default_data_config`. + +## The `splinterdb-rs` API +#### splinterdb-rs::new::\() +Returns a new (uninitialized) SplinterDB object `sdb`. +Here `T` is a struct that implements the `SdbRustDataFuncs` trait, i.e. the rust callbacks for SplinterDB's data_config. If you do not wish to define your own callbacks, use the default callbacks: `splinterdb-rs::new::();`. + +#### sdb.db_create(path, cfg) +Create a database or overwrite an existing one at `path` and configure `sdb` using `cfg`. + +#### sdb.db_open(path, cfg) +Open an existing database at `path` and configure `sdb` using `cfg`. + +#### sdb.insert(key, value) +Insert a new key/value pair or overwrite an existing key/value pair. + +#### sdb.update(key, delta) +Perform an update of key using delta. See the rust callbacks for defining the semantics of this update. + +#### sdb.delete(key) +Delete a key from the database. + +#### sdb.lookup(key) +Lookup the current value of a key. +Returns a `LookupResult = enum { Found(Vec), + FoundTruncated(Vec), + NotFound }`. + +#### sdb.range(start_key) +Perform a range query beginning at `start_key`. +This function returns `ri: RangeIterator` from which key/value pairs may be iteratively extracted using `ri.next()`. Thus the range is defined by the `start_key` and the number of calls to `ri.next()`. + +## Rust Callbacks for SplinterDB's data_config +To implement merge/update functionality for SplinterDB or, for example, to use a custom `key_compare` function requires the user to implement rust functions for SplinterDB to call. + +These functions are defined by the `SdbRustDataFuncs` trait. See `src/rust_cfg.rs` for the definition of the trait, comments on each function, and the default implementations. Define a new set of callbacks as follows: +``` +struct Callbacks {} +impl SdbRustDataFuncs for Callbacks +{ + fn key_comp(key1: &[u8], key2: &[u8]) -> CompareResult + { + // ... + } + + fn key_hash(key: &[u8], seed: u32) -> u32 + { + // ... + } + fn merge(_key: &[u8], _old_msg: SdbMessage, new_msg: SdbMessage) -> Result + { + // ... + } + fn merge_final(_key: &[u8], oldest_msg: SdbMessage) -> Result + { + // ... + } + fn str_key(key: &[u8], dst: &mut [u8]) -> () + { + // ... + } + fn str_msg(msg: SdbMessage, dst: &mut [u8]) -> () + { + // ... + } +} +``` +Then use these callbacks when creating a new SplinterDB object: `sdb = splinterdb-rs::new::();` + + +If any of the default implementations are sufficient for your purposes then those functions may be excluded from your `impl`. See `SimpleMerge` in `src/tests.rs` for an example. \ No newline at end of file diff --git a/rust/splinterdb-rs/src/lib.rs b/rust/splinterdb-rs/src/lib.rs index 314db97b3..9d3eb481d 100644 --- a/rust/splinterdb-rs/src/lib.rs +++ b/rust/splinterdb-rs/src/lib.rs @@ -1,31 +1,9 @@ use std::io::{Error, Result}; use std::path::Path; -#[derive(Debug)] -pub enum CompareResult { - LESS, // first less than second - EQUAL, // first and second equal - GREATER, // first greater than second -} - -#[derive(Debug)] -pub enum SdbMessageType { - INVALID, - INSERT, - UPDATE, - DELETE, - OTHER, // TODO: IS THIS POSSIBLE? -} - -// Rust side representation of a splinterDB message -#[derive(Debug)] -pub struct SdbMessage { - pub msg_type: SdbMessageType, - pub data: Vec, -} - pub mod rust_cfg; -pub use rust_cfg::*; +pub use rust_cfg::{CompareResult, SdbMessageType, SdbMessage, SdbRustDataFuncs, DefaultSdb}; +use rust_cfg::new_sdb_data_config; #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] #[derive(Debug)] @@ -192,7 +170,7 @@ impl SplinterDB SplinterDB { _inner: std::ptr::null_mut(), sdb_cfg: unsafe { std::mem::zeroed() }, - data_cfg: rust_cfg::new_sdb_data_config::(0), + data_cfg: new_sdb_data_config::(0), } } diff --git a/rust/splinterdb-rs/src/rust_cfg.rs b/rust/splinterdb-rs/src/rust_cfg.rs index fe112c20d..9bf1eb978 100755 --- a/rust/splinterdb-rs/src/rust_cfg.rs +++ b/rust/splinterdb-rs/src/rust_cfg.rs @@ -1,8 +1,31 @@ use std::io::Result; use splinterdb_sys::*; -use crate::{create_splinter_slice, SdbMessageType, SdbMessage, CompareResult}; +use crate::create_splinter_slice; use xxhash_rust::xxh32::xxh32; +#[derive(Debug)] +pub enum CompareResult { + LESS, // first less than second + EQUAL, // first and second equal + GREATER, // first greater than second +} + +#[derive(Debug)] +pub enum SdbMessageType { + INVALID, + INSERT, + UPDATE, + DELETE, + OTHER, // TODO: IS THIS POSSIBLE? +} + +// Rust side representation of a splinterDB message +#[derive(Debug)] +pub struct SdbMessage { + pub msg_type: SdbMessageType, + pub data: Vec, +} + fn sdb_slice_to_vec(s: &slice) -> Vec { unsafe { @@ -46,7 +69,10 @@ fn sdb_msg_from_acc(ma: &merge_accumulator) -> SdbMessage } } +// Trait defining the rust callbacks for SplinterDB's data_config +// By default we do not implement merge functionality pub trait SdbRustDataFuncs { + // Compare two keys, returning if key1 is less than/equal/greater than key2 fn key_comp(key1: &[u8], key2: &[u8]) -> CompareResult { if key1 < key2 { @@ -56,22 +82,29 @@ pub trait SdbRustDataFuncs { } return CompareResult::GREATER; } - - fn key_hash(key: &[u8], seed: u32) -> u32 + // Return the hash of key, seeding the hash with seed + fn key_hash(key: &[u8], seed: u32) -> u32 { xxh32(key, seed) } - // By default we do not implement merge functionality + // Combine two splinterDB messages into one given that + // 1. new_msg is of type UPDATE + // 2. old_msg is of type INSERT or UPDATE + // The returned message may be either an update or an insert fn merge(_key: &[u8], _old_msg: SdbMessage, new_msg: SdbMessage) -> Result { Ok(new_msg) } + + // Resolve an update message when there is no older record to apply the update to. + // Must return either an INSERT or DELETE type message fn merge_final(_key: &[u8], oldest_msg: SdbMessage) -> Result { Ok(oldest_msg) } + // Convert a key to a string fn str_key(key: &[u8], dst: &mut [u8]) -> () { // 2 characters per byte @@ -84,6 +117,7 @@ pub trait SdbRustDataFuncs { } } + // Convert a SplinterDB message to a string fn str_msg(msg: SdbMessage, dst: &mut [u8]) -> () { if 2 * msg.data.len() > dst.len() as usize { From 6429152ca8c07768e56d9cb825b1630d7b692023 Mon Sep 17 00:00:00 2001 From: Evan West Date: Tue, 6 Jun 2023 20:50:05 +0000 Subject: [PATCH 06/10] rust wrapper fix: bindings may treat c bools as rust i32 --- rust/splinterdb-rs/src/lib.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/rust/splinterdb-rs/src/lib.rs b/rust/splinterdb-rs/src/lib.rs index 9d3eb481d..941b47ccc 100644 --- a/rust/splinterdb-rs/src/lib.rs +++ b/rust/splinterdb-rs/src/lib.rs @@ -140,8 +140,8 @@ impl<'a> RangeIterator<'a> self._inner_advance(); } - let valid = unsafe { splinterdb_sys::splinterdb_iterator_valid(self._inner) }; - if !valid { + let valid = unsafe { splinterdb_sys::splinterdb_iterator_valid(self._inner) } as i32; + if valid == 0 { let rc = unsafe { splinterdb_sys::splinterdb_iterator_status(self._inner) }; as_result(rc)?; return Ok(None); @@ -277,8 +277,8 @@ impl SplinterDB ); as_result(rc)?; - let found = splinterdb_sys::splinterdb_lookup_found(&lr); - if !found { + let found = splinterdb_sys::splinterdb_lookup_found(&lr) as i32; + if found == 0 { return Ok(LookupResult::NotFound); } From 93c4a36bc4ad0ce85e4d35d7e4b3bc763bace342 Mon Sep 17 00:00:00 2001 From: Evan West Date: Fri, 9 Jun 2023 02:34:54 +0000 Subject: [PATCH 07/10] rust wrapper: Change to standard rust Ordering --- rust/splinterdb-rs/src/rust_cfg.rs | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/rust/splinterdb-rs/src/rust_cfg.rs b/rust/splinterdb-rs/src/rust_cfg.rs index 9bf1eb978..f59b96be6 100755 --- a/rust/splinterdb-rs/src/rust_cfg.rs +++ b/rust/splinterdb-rs/src/rust_cfg.rs @@ -1,4 +1,5 @@ use std::io::Result; +use std::cmp::Ordering; use splinterdb_sys::*; use crate::create_splinter_slice; use xxhash_rust::xxh32::xxh32; @@ -73,14 +74,9 @@ fn sdb_msg_from_acc(ma: &merge_accumulator) -> SdbMessage // By default we do not implement merge functionality pub trait SdbRustDataFuncs { // Compare two keys, returning if key1 is less than/equal/greater than key2 - fn key_comp(key1: &[u8], key2: &[u8]) -> CompareResult + fn key_comp(key1: &[u8], key2: &[u8]) -> Ordering { - if key1 < key2 { - return CompareResult::LESS; - } else if key1 == key2 { - return CompareResult::EQUAL; - } - return CompareResult::GREATER; + key1.cmp(&key2) } // Return the hash of key, seeding the hash with seed fn key_hash(key: &[u8], seed: u32) -> u32 @@ -158,11 +154,11 @@ pub extern "C" fn key_compare( key2: slice, ) -> ::std::os::raw::c_int { - let res: CompareResult = T::key_comp(&sdb_slice_to_vec(&key1), &sdb_slice_to_vec(&key2)); + let res: Ordering = T::key_comp(&sdb_slice_to_vec(&key1), &sdb_slice_to_vec(&key2)); match res { - CompareResult::LESS => -1, - CompareResult::EQUAL => 0, - CompareResult::GREATER => 1, + Ordering::Less => -1, + Ordering::Equal => 0, + Ordering::Greater => 1, } } From 245c02069a6907d1c46b57cc3441dcabca0005d1 Mon Sep 17 00:00:00 2001 From: Evan West Date: Thu, 3 Aug 2023 18:46:51 +0000 Subject: [PATCH 08/10] add support for bidirectional iterators to rust wrapper --- rust/splinterdb-cli/src/main.rs | 67 ++++++++++++++++++++++++-- rust/splinterdb-rs/src/lib.rs | 84 +++++++++++++++++++++------------ rust/splinterdb-rs/src/tests.rs | 39 +++++++++++++-- 3 files changed, 151 insertions(+), 39 deletions(-) diff --git a/rust/splinterdb-cli/src/main.rs b/rust/splinterdb-cli/src/main.rs index d67a33228..aef74edff 100644 --- a/rust/splinterdb-cli/src/main.rs +++ b/rust/splinterdb-cli/src/main.rs @@ -18,6 +18,7 @@ enum SubCommand { Delete(Delete), Get(Get), List(List), + RevList(RevList), Perf(Perf), } @@ -53,6 +54,11 @@ struct Get { #[derive(Parser)] struct List {} +/// List all keys and values in an existing database +/// in descending order +#[derive(Parser)] +struct RevList {} + /// Initialize a new database file #[derive(Parser)] struct InitDB { @@ -207,14 +213,66 @@ impl List { let mut db = splinterdb_rs::SplinterDB::new::(); db.db_open(&opts.file, &db_config)?; let mut iter = db.range(None)?; + loop { + let res_opt = iter.get_curr(); + match res_opt { + None => break, + Some(result) => { + let key = std::str::from_utf8(result.key)?; + let value = std::str::from_utf8(result.value)?; + println!("\t{} : {}", key, value) + } + } + match iter.next() { + Ok(true) => (), + Ok(false) => { + println!(""); + break; + } + Err(e) => { + println!("got error: {:?}", e); + break; + } + } + } + Ok(()) + } +} + +impl RevList { + fn run(&self, opts: &Opts) -> CLIResult<()> { + let db_config = meta_load(&opts.file)?; + let mut db = splinterdb_rs::SplinterDB::new::(); + db.db_open(&opts.file, &db_config)?; + let mut iter = db.range(None)?; + // find the end of the list loop { match iter.next() { - Ok(Some(&splinterdb_rs::IteratorResult { key, value })) => { - let key = std::str::from_utf8(key)?; - let value = std::str::from_utf8(value)?; + Ok(true) => (), + Ok(false) => break, + Err(e) => { + println!("got error: {:?}", e); + return Ok(()); + } + } + } + // move back one element to end of range + iter.prev()?; + + // iterate list backwards + loop { + let res_opt = iter.get_curr(); + match res_opt { + None => break, + Some(result) => { + let key = std::str::from_utf8(result.key)?; + let value = std::str::from_utf8(result.value)?; println!("\t{} : {}", key, value) } - Ok(None) => { + } + match iter.prev() { + Ok(true) => (), + Ok(false) => { println!(""); break; } @@ -406,6 +464,7 @@ fn main() -> CLIResult<()> { SubCommand::Delete(ref delete) => delete.run(&opts), SubCommand::Get(ref get) => get.run(&opts), SubCommand::List(ref list) => list.run(&opts), + SubCommand::RevList(ref revlist) => revlist.run(&opts), SubCommand::Perf(ref perf) => perf.run(opts.file), } } diff --git a/rust/splinterdb-rs/src/lib.rs b/rust/splinterdb-rs/src/lib.rs index 941b47ccc..3eab8763a 100644 --- a/rust/splinterdb-rs/src/lib.rs +++ b/rust/splinterdb-rs/src/lib.rs @@ -61,14 +61,14 @@ pub struct IteratorResult<'a> { } #[derive(Debug)] -pub struct RangeIterator<'a> { +pub struct SplinterCursor<'a> { _inner: *mut splinterdb_sys::splinterdb_iterator, _marker: ::std::marker::PhantomData, _parent_marker: ::std::marker::PhantomData<&'a splinterdb_sys::splinterdb>, state: Option>, } -impl<'a> Drop for RangeIterator<'a> +impl<'a> Drop for SplinterCursor<'a> { fn drop(&mut self) { @@ -76,21 +76,36 @@ impl<'a> Drop for RangeIterator<'a> } } -impl<'a> RangeIterator<'a> +// Bidirectional cursor for SplinterDB +// can return errors and retains ownership of the result +impl<'a> SplinterCursor<'a> { - pub fn new(iter: *mut splinterdb_sys::splinterdb_iterator) -> RangeIterator<'a> + pub fn new(iter: *mut splinterdb_sys::splinterdb_iterator) + -> Result> { - RangeIterator { + Ok(SplinterCursor { _inner: iter, _marker: ::std::marker::PhantomData, _parent_marker: ::std::marker::PhantomData, - state: None, - } + state: Self::_get_current(iter)?, + }) } - // stashes current state of the iterator from the C API - fn _stash_current(&mut self) + // returns the current state of the iterator from the C API + fn _get_current(it: *mut splinterdb_sys::splinterdb_iterator) + -> Result>> { + let valid: i32 = unsafe { + splinterdb_sys::splinterdb_iterator_valid(it) + } as i32; + + if valid == 0 { + // cannot access the current element, check status + let rc = unsafe { splinterdb_sys::splinterdb_iterator_status(it) }; + as_result(rc)?; + return Ok(None); + } + let mut key_out: splinterdb_sys::slice = splinterdb_sys::slice { length: 0, data: ::std::ptr::null(), @@ -103,7 +118,7 @@ impl<'a> RangeIterator<'a> let (key, value): (&[u8], &[u8]) = unsafe { // get key and value splinterdb_sys::splinterdb_iterator_get_current( - self._inner, + it, &mut key_out, &mut val_out, ); @@ -120,38 +135,45 @@ impl<'a> RangeIterator<'a> ) }; let r = IteratorResult { key, value }; - self.state = Some(r); + Ok(Some(r)) } - fn _inner_advance(&mut self) + pub fn get_curr(&self) -> Option<&IteratorResult> { - unsafe { splinterdb_sys::splinterdb_iterator_next(self._inner) }; + match self.state { + None => None, + Some(ref r) => Some(r), + } } - // almost an iterator, but we need to be able to return errors - // and retain ownership of the result #[allow(clippy::should_implement_trait)] - pub fn next(&mut self) -> Result> + pub fn next(&mut self) -> Result { - // Rust iterator expects to start just before the first element - // but Splinter iterators start at the first element - // so we only call _inner_advance if its our first iteration - if self.state.is_some() { - self._inner_advance(); + let can_next = unsafe { splinterdb_sys::splinterdb_iterator_can_next(self._inner) } as i32; + if can_next == 0 { + let rc = unsafe { splinterdb_sys::splinterdb_iterator_status(self._inner) }; + as_result(rc)?; + return Ok(false); } + unsafe { splinterdb_sys::splinterdb_iterator_next(self._inner); } - let valid = unsafe { splinterdb_sys::splinterdb_iterator_valid(self._inner) } as i32; - if valid == 0 { + self.state = Self::_get_current(self._inner)?; + Ok(true) + } + + #[allow(clippy::should_implement_trait)] + pub fn prev(&mut self) -> Result + { + let can_prev = unsafe { splinterdb_sys::splinterdb_iterator_can_prev(self._inner) } as i32; + if can_prev == 0 { let rc = unsafe { splinterdb_sys::splinterdb_iterator_status(self._inner) }; as_result(rc)?; - return Ok(None); + return Ok(false); } + unsafe { splinterdb_sys::splinterdb_iterator_prev(self._inner); } - self._stash_current(); - match self.state { - None => Ok(None), - Some(ref r) => Ok(Some(r)), - } + self.state = Self::_get_current(self._inner)?; + Ok(true) } } @@ -303,7 +325,7 @@ impl SplinterDB } } - pub fn range(&self, start_key: Option<&[u8]>) -> Result + pub fn range(&self, start_key: Option<&[u8]>) -> Result { let mut iter: *mut splinterdb_sys::splinterdb_iterator = std::ptr::null_mut(); @@ -325,7 +347,7 @@ impl SplinterDB ) }; as_result(rc)?; - Ok(RangeIterator::new(iter)) + return SplinterCursor::new(iter); } } diff --git a/rust/splinterdb-rs/src/tests.rs b/rust/splinterdb-rs/src/tests.rs index 0544382b7..e989be02f 100755 --- a/rust/splinterdb-rs/src/tests.rs +++ b/rust/splinterdb-rs/src/tests.rs @@ -249,12 +249,17 @@ mod tests { sdb.insert(&(b"some-key-5".to_vec()), &(b"some-value-5".to_vec()))?; sdb.insert(&(b"some-key-6".to_vec()), &(b"some-value-6".to_vec()))?; + println!("SUCCESSFULLY PERFORMED INSERTIONS!"); let mut found: Vec<(Vec, Vec)> = Vec::new(); // to collect results let mut iter = sdb.range(None)?; loop { + match iter.get_curr() { + None => break, + Some(r) => found.push((r.key.to_vec(), r.value.to_vec())), + } match iter.next() { - Ok(Some(r)) => found.push((r.key.to_vec(), r.value.to_vec())), - Ok(None) => break, + Ok(true) => (), + Ok(false) => break, Err(e) => return Err(e), } } @@ -266,6 +271,28 @@ mod tests { assert_eq!(found[2], (b"some-key-5".to_vec(), b"some-value-5".to_vec())); assert_eq!(found[3], (b"some-key-6".to_vec(), b"some-value-6".to_vec())); + // try going backwards as well + found = Vec::new(); + iter.prev()?; + loop { + match iter.get_curr() { + None => break, + Some(r) => found.push((r.key.to_vec(), r.value.to_vec())), + } + match iter.prev() { + Ok(true) => (), + Ok(false) => break, + Err(e) => return Err(e), + } + } + + println!("Found {} results", found.len()); + + assert_eq!(found[0], (b"some-key-6".to_vec(), b"some-value-6".to_vec())); + assert_eq!(found[1], (b"some-key-5".to_vec(), b"some-value-5".to_vec())); + assert_eq!(found[2], (b"some-key-3".to_vec(), b"some-value-3".to_vec())); + assert_eq!(found[3], (b"some-key-0".to_vec(), b"some-value-0".to_vec())); + drop(iter); println!("Dropping SplinterDB!"); @@ -356,9 +383,13 @@ mod tests { let mut found: Vec<(Vec, Vec)> = Vec::new(); // to collect results let mut iter = sdb.range(None)?; loop { + match iter.get_curr() { + None => break, + Some(r) => found.push((r.key.to_vec(), r.value.to_vec())), + } match iter.next() { - Ok(Some(r)) => found.push((r.key.to_vec(), r.value.to_vec())), - Ok(None) => break, + Ok(true) => (), + Ok(false) => break, Err(e) => return Err(e), } } From 86f1a5fb4de28e7c222fd97d3a8f352fc0db131b Mon Sep 17 00:00:00 2001 From: Evan West Date: Thu, 3 Aug 2023 19:50:09 +0000 Subject: [PATCH 09/10] add rust wrapper tests to unit tests --- test.sh | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/test.sh b/test.sh index dd0db1bef..96d85078c 100755 --- a/test.sh +++ b/test.sh @@ -661,6 +661,31 @@ function run_other_driver_tests() { "$BINDIR"/driver_test filter_test --seed "$SEED" } +# ################################################################## +# Run the tests of the Rust wrapper +# Exits without running tests if rust and cargo are not installed +# ################################################################## +function run_rust_wrapper_tests() { + set +x + rustc --version > /dev/null + have_rustc=$? + cargo --version > /dev/null + have_cargo=$? + + if [ ! $have_rustc ] || [ ! $have_cargo ]; then + echo "Rust and cargo not installed... skipping tests" + set -x + return 0 + fi + set -x + cd rust + cargo test --tests --release + ret=$? + cd - + return $ret +} + + # ################################################################## # main() begins here # ################################################################## @@ -740,6 +765,8 @@ if [ "$INCLUDE_SLOW_TESTS" != "true" ]; then run_with_timing "Smoke tests" run_fast_unit_tests + run_with_timing "Rust wrapper tests" run_rust_wrapper_tests + if [ "$RUN_MAKE_TESTS" == "true" ]; then run_with_timing "Basic build-and-test tests" test_make_run_tests fi @@ -771,6 +798,9 @@ fi # Run all the unit-tests first, to get basic coverage run_with_timing "Fast unit tests" "$BINDIR"/unit_test +# Run rust wrapper tests +run_with_timing "Rust wrapper tests" run_rust_wrapper_tests + # ------------------------------------------------------------------------ # Run mini-unit-tests that were excluded from bin/unit_test binary: # ------------------------------------------------------------------------ From 9d10ce846285fbb92428c8acd65a87754e8f039a Mon Sep 17 00:00:00 2001 From: Evan West Date: Fri, 4 Aug 2023 16:28:26 +0000 Subject: [PATCH 10/10] switch to which command in test.sh --- test.sh | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/test.sh b/test.sh index 96d85078c..4272f1885 100755 --- a/test.sh +++ b/test.sh @@ -667,13 +667,13 @@ function run_other_driver_tests() { # ################################################################## function run_rust_wrapper_tests() { set +x - rustc --version > /dev/null + which rustc have_rustc=$? - cargo --version > /dev/null + which cargo have_cargo=$? if [ ! $have_rustc ] || [ ! $have_cargo ]; then - echo "Rust and cargo not installed... skipping tests" + echo "Rust or cargo not installed... skipping tests" set -x return 0 fi @@ -685,7 +685,6 @@ function run_rust_wrapper_tests() { return $ret } - # ################################################################## # main() begins here # ##################################################################