From cc9eb35d4e251e79976d4c1728387a46393b6916 Mon Sep 17 00:00:00 2001 From: dzmitry-lahoda Date: Mon, 13 Jan 2025 16:53:34 +0000 Subject: [PATCH] feat: borsh support and compile time limits to schemas --- CHANGELOG.md | 3 +- Cargo.toml | 2 + README.md | 4 + src/bounded_vec.rs | 238 +++++++++++++++++++++++++++++++++++++++++++-- 4 files changed, 236 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c2c603f..ddefc0e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/). ## [Unreleased] - ReleaseDate - prevent out of bound construction and define empty vs nonempty at compile time - +- optional `borsh` support + ## [0.7.1] - 2022-08-01 ### Added - fix `Abrbitrary` impl to honor upper(U) and lower(L) bounds; diff --git a/Cargo.toml b/Cargo.toml index 5ac0615..4f6f942 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,11 +15,13 @@ serde = { version = "1.0.123", default-features = false, features = [ schemars = { version = ">=0.8,<1", default-features = false, optional = true } thiserror = { version = "2", default-features = false } proptest = { version = "1.0.0", optional = true } +borsh = { version = "1.5.4", default-features = false, features = ["unstable__schema"], optional = true} [features] serde = ["dep:serde"] schema = ["serde", "dep:schemars"] arbitrary = ["proptest"] +borsh = ["dep:borsh"] [dev-dependencies] proptest = { version = "1.0.0" } diff --git a/README.md b/README.md index 42a27c2..feb41c8 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,7 @@ ## bounded-vec `BoundedVec` - Non-empty rust `std::vec::Vec` wrapper with type guarantees on lower(`L`) and upper(`U`) bounds for items quantity. Inspired by [vec1](https://github.com/rustonaut/vec1). +`EmptyBoundedVec` if only upper bound `U` is needed. This crate is `#![no_std]` compatible with `alloc`. ## Example @@ -24,6 +25,9 @@ assert_eq!(data, [2u8,4].into()); - optional(non-default) `serde` feature that adds serialization to `BoundedVec`. - optional(non-default) `schema` feature that adds JSON schema support via `schemars` (requires `serde`). - optional(non-default) `arbitrary` feature that adds `proptest::Arbitrary` implementation to `BoundedVec`. +- optional(non-default) `borsh` feature that adds `borsh` binary encoding, decoding and schema +- optional(nin-default) `arbitrary` for `proptest` support + ## Changelog See [CHANGELOG.md](CHANGELOG.md). diff --git a/src/bounded_vec.rs b/src/bounded_vec.rs index 3461ff6..7171a1c 100644 --- a/src/bounded_vec.rs +++ b/src/bounded_vec.rs @@ -8,7 +8,7 @@ use thiserror::Error; /// /// # Type Parameters /// -/// * `W` - witness type to prove vector ranges and shape if interface accordingly +/// * `W` - witness type to prove vector ranges and shape it interface accordingly #[derive(PartialEq, Eq, Debug, Clone, Hash, PartialOrd, Ord)] pub struct BoundedVec> { inner: Vec, @@ -38,8 +38,11 @@ pub enum BoundedVecOutOfBounds { /// Module for type witnesses used to prove vector bounds at compile time pub mod witnesses { - - // NOTE: we can have proves if needed for some cases like 8/16/32/64 upper bound, so can make memory and serde more compile safe and efficient + // NOTE: + // we can have proves if needed for some cases like 8/16/32/64 upper bound and operating range, + // and make memory layout more efficient: + // - decide stackalloc or smallvec or std::vec, depending on range * size_of at compile time + // - make some values of vec to be not usize, but other numbers /// Compile-time proof of valid bounds. Must be consturcted with same bounds to instantiate `BoundedVec`. #[derive(Clone, Copy, Debug, PartialEq, Eq)] @@ -59,13 +62,30 @@ pub mod witnesses { panic!("L must be less than or equal to U") } + serde::(); NonEmpty::(()) } } + const fn serde() { + #[cfg(feature = "schema")] + if U as u128 > u32::MAX as u128 { + // there is not const safe way to cast usize to u32, nor to other bigger number + panic!("`schemars` encodes `maxLength` as u32, so `U` must be less than or equal to `u32::MAX`") + } + + #[cfg(feature = "borsh")] + if U as u128 > u32::MAX as u128 { + panic!("`borsh` specifies size of dynamic containers as u32, so `U` must be less than or equal to `u32::MAX`") + } + } + /// Type a compile-time proof for possibly empty vector with upper bound pub const fn empty() -> Empty { - const { Empty::(()) } + const { + serde::(); + Empty::(()) + } } } @@ -88,7 +108,7 @@ impl BoundedVec> { /// BoundedVec::<_, 0, 8, witnesses::Empty<8>>::from_vec(vec![1u8, 2]).unwrap(); /// ``` pub fn from_vec(items: Vec) -> Result { - let _witness = witnesses::empty::(); + let _ = witnesses::empty::(); let len = items.len(); if len > U { Err(BoundedVecOutOfBounds::UpperBoundError { @@ -238,7 +258,7 @@ impl BoundedVec>::from_vec(vec![1u8, 2]).unwrap(); /// ``` pub fn from_vec(items: Vec) -> Result { - let _witness = witnesses::non_empty::(); + let _ = witnesses::non_empty::(); let len = items.len(); if len < L { Err(BoundedVecOutOfBounds::LowerBoundError { @@ -578,6 +598,184 @@ impl OptBoundedVecToVec } } +/// Suports encoding and decoding with [borsh](https://crates.io/crates/borsh), and BorshSchema. +/// +/// By default Borsh uses u32 as length prefix for sequences. +/// For bounded we used u8, u16 or u32 depending on the U. +/// Increase or decreaasing U may not always be backward compatible. +#[cfg(feature = "borsh")] +mod borsh_impl { + use super::*; + use alloc::collections::btree_map::{BTreeMap, Entry}; + use borsh::{BorshDeserialize, BorshSchema, BorshSerialize}; + + impl BorshSerialize + for BoundedVec + { + fn serialize( + &self, + writer: &mut Writer, + ) -> borsh::io::Result<()> { + let len = self.inner.len(); + if U <= usize::from(u8::MAX) { + #[expect(clippy::expect_used)] + let len: u8 = len.try_into().expect("proved by design"); + len.serialize(writer)?; + } else if U <= usize::from(u16::MAX) { + #[expect(clippy::expect_used)] + let len: u16 = len.try_into().expect("proved by design"); + len.serialize(writer)?; + } else { + #[expect(clippy::expect_used)] + let len: u32 = len.try_into().expect("proved by design"); + len.serialize(writer)?; + }; + + // adapted from internals of borsh-rs + let data = self.as_slice(); + if let Some(u8_slice) = T::u8_slice(data) { + writer.write_all(u8_slice)?; + } else { + for item in data { + item.serialize(writer)?; + } + } + Ok(()) + } + } + + impl BorshDeserialize + for BoundedVec + { + fn deserialize_reader(reader: &mut R) -> borsh::io::Result { + let len = if U <= usize::from(u8::MAX) { + usize::from(u8::deserialize_reader(reader)?) + } else if U <= usize::from(u16::MAX) { + usize::from(u16::deserialize_reader(reader)?) + } else { + let len = u32::deserialize_reader(reader)?; + usize::try_from(len).map_err(|_| { + borsh::io::Error::new( + borsh::io::ErrorKind::Other, + alloc::format!("Length overflow: got {}", len), + ) + })? + }; + if len < L { + return Err(borsh::io::Error::new( + borsh::io::ErrorKind::Other, + alloc::format!("Lower bound violation: got {} (expected >= {})", len, L), + )); + } else if len > U { + return Err(borsh::io::Error::new( + borsh::io::ErrorKind::Other, + alloc::format!("Upper bound violation: got {} (expected <= {})", len, U), + )); + } + // adapted from internals for borsh-rs + let data = if len == 0 { + Vec::new() + } else if let Some(vec_bytes) = T::vec_from_reader(len as u32, reader)? { + vec_bytes + } else { + let el_size = core::mem::size_of::() as u32; + let cautious = + core::cmp::max(core::cmp::min(len as u32, 4096 / el_size), 1) as usize; + + // TODO(16): return capacity allocation when we can safely do that. + let mut result = Vec::with_capacity(cautious); + for _ in 0..len { + result.push(T::deserialize_reader(reader)?); + } + result + }; + + Ok(Self { + inner: data, + _marker: core::marker::PhantomData, + }) + } + } + + impl BorshSchema for BoundedVec { + fn add_definitions_recursively( + definitions: &mut BTreeMap, + ) { + let len_width = if U <= usize::from(u8::MAX) { + 1 + } else if U <= usize::from(u16::MAX) { + 2 + } else { + 4 // proven by design + }; + + let definition = borsh::schema::Definition::Sequence { + length_width: len_width, + #[expect(clippy::expect_used)] + length_range: core::ops::RangeInclusive::::new( + u64::try_from(L).expect("proved by design"), + u64::try_from(U).expect("proved by design"), + ), + elements: T::declaration(), + }; + match definitions.entry(Self::declaration()) { + Entry::Occupied(occ) => { + let existing_def = occ.get(); + assert_eq!( + existing_def, + &definition, + "Redefining type schema for {}. Types with the same names are not supported.", + occ.key() + ); + } + Entry::Vacant(vac) => { + vac.insert(definition); + } + } + T::add_definitions_recursively(definitions); + } + + fn declaration() -> borsh::schema::Declaration { + alloc::format!("BoundedVec<{}, {}, {}>", T::declaration(), L, U) + } + } + + #[cfg(test)] + mod tests { + use borsh::schema::BorshSchemaContainer; + + use super::*; + #[test] + #[allow(clippy::expect_used)] + fn borsh_encdec() { + let data: BoundedVec = vec![1u8, 2].try_into().expect("borsh works"); + let buf = &mut Vec::new(); + data.serialize(buf).expect("borsh works"); + let decoded = + BoundedVec::::deserialize(&mut buf.as_slice()).expect("borsh works"); + let compatible_decoded = + BoundedVec::::deserialize(&mut buf.as_slice()).expect("borsh works"); + assert_eq!(data.get(0), decoded.get(0)); + assert_eq!(data.get(1), decoded.get(1)); + assert_eq!(data.get(0), compatible_decoded.get(0)); + assert_eq!(data.get(1), compatible_decoded.get(1)); + assert!(BoundedVec::::deserialize(&mut buf.as_slice()).is_err()); + + let schema = BorshSchemaContainer::for_type::>(); + let schema = schema + .get_definition("BoundedVec") + .expect("borsh works"); + assert!(matches!( + schema, + borsh::schema::Definition::Sequence { + length_width: 1, + .. + } + )); + } + } +} + #[allow(clippy::unwrap_used)] #[cfg(feature = "arbitrary")] mod arbitrary { @@ -653,8 +851,8 @@ mod serde_impl { use schemars::schema::{InstanceType, SchemaObject}; use schemars::JsonSchema; - // we cannot use attributes, because the do not work with `const`, only numeric literals supported - impl JsonSchema for BoundedVec { + // we cannot use `serde` attributes, because these do not work with `const`, only numeric literals supported + impl JsonSchema for BoundedVec { fn schema_name() -> alloc::string::String { alloc::format!("BoundedVec{}Min{}Max{}", T::schema_name(), L, U) } @@ -666,8 +864,14 @@ mod serde_impl { items: Some(schemars::schema::SingleOrVec::Single( T::json_schema(gen).into(), )), - min_items: Some(L as u32), - max_items: Some(U as u32), + #[expect(clippy::expect_used)] // design time failure + min_items: Some( + u32::try_from(L).expect("JSON schema does not support so large ranges"), + ), + #[expect(clippy::expect_used)] // design time failure + max_items: Some( + u32::try_from(U).expect("JSON schema does not support so large ranges"), + ), ..Default::default() })), ..Default::default() @@ -675,6 +879,20 @@ mod serde_impl { .into() } } + + #[cfg(test)] + mod tests { + use super::*; + use schemars::schema_for; + #[test] + fn json_schema() { + let schema = schema_for!(BoundedVec); + let min_items = schema.schema.array.as_ref().unwrap().min_items.unwrap(); + let max_items = schema.schema.array.as_ref().unwrap().max_items.unwrap(); + assert_eq!(min_items, 2); + assert_eq!(max_items, 8); + } + } } }