From d8566a6a39393c0d2b828c0e4d53b6c59d75c376 Mon Sep 17 00:00:00 2001 From: Yuri Edward Date: Sun, 11 May 2025 21:31:00 +0200 Subject: [PATCH 1/3] Added new MemBufStr --- Cargo.toml | 2 +- src/format.rs | 150 +++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 150 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index e1c196c..0f9df82 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "bp3d-util" -version = "2.1.1" +version = "2.2.0" authors = ["Yuri Edward "] edition = "2021" description = "OS independant Rust language utilities." diff --git a/src/format.rs b/src/format.rs index ee3c1a8..9445eec 100644 --- a/src/format.rs +++ b/src/format.rs @@ -30,6 +30,88 @@ use std::mem::MaybeUninit; +/// A structure which acts similar to a [FixedBufStr] but borrows from a buffer instead of owning a +/// stack allocation. +pub struct MemBufStr<'a> { + len: &'a mut usize, + buffer: &'a mut [MaybeUninit], +} + +impl<'a> MemBufStr<'a> { + /// Wraps a memory buffer with its length in a new string buffer. + /// + /// # Safety + /// + /// It is UB to construct a [MemBufStr] if `len` is not a valid position in the buffer `buffer`. + /// It is also UB to construct a [MemBufStr] from a `buffer` which does not contain only UTF-8 + /// bytes. If `len` points to uninitialized memory in `buffer` constructing [MemBufStr] is UB. + pub unsafe fn wrap_uninit(len: &'a mut usize, buffer: &'a mut [MaybeUninit]) -> MemBufStr<'a> { + MemBufStr { + buffer, + len + } + } + + /// Wraps a memory buffer with its length in a new string buffer. + /// + /// # Safety + /// + /// It is UB to construct a [MemBufStr] if `len` is not a valid position in the buffer `buffer`. + /// It is also UB to construct a [MemBufStr] from a `buffer` which does not contain only UTF-8 + /// bytes. + pub unsafe fn wrap(len: &'a mut usize, buffer: &'a mut [u8]) -> MemBufStr<'a> { + MemBufStr { + buffer: std::mem::transmute(buffer), + len + } + } + + /// Extracts the string from this buffer. + //type inference works so why should the code look awfully more complex? + #[allow(clippy::missing_transmute_annotations)] + pub fn str(&self) -> &str { + unsafe { std::str::from_utf8_unchecked(std::mem::transmute(&self.buffer[..*self.len])) } + } + + /// Appends a raw byte buffer at the end of this string buffer. + /// + /// Returns the number of bytes written. + /// + /// # Arguments + /// + /// * `buf`: the raw byte buffer to append. + /// + /// returns: usize + /// + /// # Safety + /// + /// * [MemBufStr](MemBufStr) contains only valid UTF-8 strings so buf must contain only valid UTF-8 + /// bytes. + /// * If buf contains invalid UTF-8 bytes, further operations on the log message buffer may + /// result in UB. + //type inference works so why should the code look awfully more complex? + #[allow(clippy::missing_transmute_annotations)] + pub unsafe fn write(&mut self, buf: &[u8]) -> usize { + let len = utf8_max(buf, self.buffer.len() - *self.len); + unsafe { + std::ptr::copy_nonoverlapping( + buf.as_ptr(), + std::mem::transmute(self.buffer.as_mut_ptr().add(*self.len)), + len, + ); + } + *self.len += len; + len + } +} + +impl<'a> std::fmt::Write for MemBufStr<'a> { + fn write_str(&mut self, value: &str) -> std::fmt::Result { + unsafe { self.write(value.as_bytes()) }; + Ok(()) + } +} + /// Fixed length string buffer. #[derive(Clone, Debug)] pub struct FixedBufStr { @@ -192,8 +274,9 @@ impl std::io::Write for IoToFmt { #[cfg(test)] mod tests { - use crate::format::FixedBufStr; + use crate::format::{FixedBufStr, MemBufStr}; use std::fmt::Write; + use std::mem::MaybeUninit; #[test] fn basic() { @@ -205,6 +288,18 @@ mod tests { assert_eq!(msg.str(), "this is a test"); } + #[test] + fn basic_mem() { + let mut buf: [MaybeUninit; 64] = unsafe { MaybeUninit::uninit().assume_init() }; + let mut len = 0; + let mut msg = unsafe { MemBufStr::wrap_uninit(&mut len, &mut buf) }; + let _ = write!(msg, "this"); + let _ = write!(msg, " is"); + let _ = write!(msg, " a"); + let _ = write!(msg, " test"); + assert_eq!(msg.str(), "this is a test"); + } + #[test] fn truncate_ascii() { let mut msg: FixedBufStr<4> = FixedBufStr::new(); @@ -216,6 +311,19 @@ mod tests { assert_eq!(msg.str(), "this"); } + #[test] + fn truncate_ascii_mem() { + let mut buf = [0; 4]; + let mut len = 0; + let mut msg = unsafe { MemBufStr::wrap(&mut len, &mut buf) }; + let _ = write!(msg, "this"); + let _ = write!(msg, " is"); + let _ = write!(msg, " a"); + let _ = write!(msg, " test"); + assert_eq!(msg.str().len(), 4); + assert_eq!(msg.str(), "this"); + } + #[test] fn truncate_utf8_exact() { let mut msg: FixedBufStr<3> = FixedBufStr::new(); @@ -224,6 +332,16 @@ mod tests { assert_eq!(msg.str(), "我"); } + #[test] + fn truncate_utf8_exact_mem() { + let mut buf = [0; 3]; + let mut len = 0; + let mut msg = unsafe { MemBufStr::wrap(&mut len, &mut buf) }; + let _ = write!(msg, "我"); + assert_eq!(msg.str().len(), 3); + assert_eq!(msg.str(), "我"); + } + #[test] fn truncate_utf8_exact2() { let mut msg: FixedBufStr<6> = FixedBufStr::new(); @@ -232,6 +350,16 @@ mod tests { assert_eq!(msg.str(), "我是"); } + #[test] + fn truncate_utf8_exact2_mem() { + let mut buf = [0; 6]; + let mut len = 0; + let mut msg = unsafe { MemBufStr::wrap(&mut len, &mut buf) }; + let _ = write!(msg, "我是"); + assert_eq!(msg.str().len(), 6); + assert_eq!(msg.str(), "我是"); + } + #[test] fn truncate_utf8_exact3() { let mut msg: FixedBufStr<6> = FixedBufStr::new(); @@ -240,6 +368,16 @@ mod tests { assert_eq!(msg.str(), "我abc"); } + #[test] + fn truncate_utf8_exact3_mem() { + let mut buf = [0; 6]; + let mut len = 0; + let mut msg = unsafe { MemBufStr::wrap(&mut len, &mut buf) }; + let _ = write!(msg, "我abcd"); + assert_eq!(msg.str().len(), 6); + assert_eq!(msg.str(), "我abc"); + } + #[test] fn truncate_utf8() { let mut msg: FixedBufStr<4> = FixedBufStr::new(); @@ -247,4 +385,14 @@ mod tests { assert_eq!(msg.str().len(), 3); assert_eq!(msg.str(), "我"); } + + #[test] + fn truncate_utf8_mem() { + let mut buf = [0; 4]; + let mut len = 0; + let mut msg = unsafe { MemBufStr::wrap(&mut len, &mut buf) }; + let _ = write!(msg, "我是"); + assert_eq!(msg.str().len(), 3); + assert_eq!(msg.str(), "我"); + } } From de295c69acb010e1c983dcbb6a72490be61ed4ad Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Sun, 11 May 2025 19:35:46 +0000 Subject: [PATCH 2/3] Format Rust code using rustfmt --- src/format.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/format.rs b/src/format.rs index 9445eec..91cef5b 100644 --- a/src/format.rs +++ b/src/format.rs @@ -45,11 +45,11 @@ impl<'a> MemBufStr<'a> { /// It is UB to construct a [MemBufStr] if `len` is not a valid position in the buffer `buffer`. /// It is also UB to construct a [MemBufStr] from a `buffer` which does not contain only UTF-8 /// bytes. If `len` points to uninitialized memory in `buffer` constructing [MemBufStr] is UB. - pub unsafe fn wrap_uninit(len: &'a mut usize, buffer: &'a mut [MaybeUninit]) -> MemBufStr<'a> { - MemBufStr { - buffer, - len - } + pub unsafe fn wrap_uninit( + len: &'a mut usize, + buffer: &'a mut [MaybeUninit], + ) -> MemBufStr<'a> { + MemBufStr { buffer, len } } /// Wraps a memory buffer with its length in a new string buffer. @@ -62,7 +62,7 @@ impl<'a> MemBufStr<'a> { pub unsafe fn wrap(len: &'a mut usize, buffer: &'a mut [u8]) -> MemBufStr<'a> { MemBufStr { buffer: std::mem::transmute(buffer), - len + len, } } From 89e3d58b18c7c038d05979d03766466fa1edfa72 Mon Sep 17 00:00:00 2001 From: Yuri Edward Date: Sun, 11 May 2025 21:37:47 +0200 Subject: [PATCH 3/3] Fixed most clippy warnings --- src/format.rs | 3 ++- src/string.rs | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/format.rs b/src/format.rs index 91cef5b..0881d6e 100644 --- a/src/format.rs +++ b/src/format.rs @@ -61,6 +61,7 @@ impl<'a> MemBufStr<'a> { /// bytes. pub unsafe fn wrap(len: &'a mut usize, buffer: &'a mut [u8]) -> MemBufStr<'a> { MemBufStr { + #[allow(clippy::missing_transmute_annotations)] buffer: std::mem::transmute(buffer), len, } @@ -105,7 +106,7 @@ impl<'a> MemBufStr<'a> { } } -impl<'a> std::fmt::Write for MemBufStr<'a> { +impl std::fmt::Write for MemBufStr<'_> { fn write_str(&mut self, value: &str) -> std::fmt::Result { unsafe { self.write(value.as_bytes()) }; Ok(()) diff --git a/src/string.rs b/src/string.rs index d7fef38..04c24ad 100644 --- a/src/string.rs +++ b/src/string.rs @@ -96,7 +96,6 @@ fn utf8_min(buf: &[u8], start: usize) -> &[u8] { if unsafe { buf.get_unchecked(start) } & 0x80 == 0x00 { &buf[start..] } else { - let start = start; let mut i = start; unsafe { while i < buf.len() && buf.get_unchecked(i) & 0xC0 == 0x80 {