Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "bp3d-util"
version = "1.4.2"
version = "2.0.0"
authors = ["Yuri Edward <yuri6037@outlook.com>"]
edition = "2021"
description = "OS independant Rust language utilities."
Expand All @@ -24,6 +24,7 @@ result = ["extension"]
path = ["extension"]
extension = []
index-map = []
string = []

[package.metadata.docs.rs]
all-features = true
Expand Down
24 changes: 14 additions & 10 deletions src/extension.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2024, BlockProject 3D
// Copyright (c) 2025, BlockProject 3D
//
// All rights reserved.
//
Expand Down Expand Up @@ -32,19 +32,23 @@
#[macro_export]
macro_rules! extension {
(
$(#[$meta: meta])*
pub extension $name: ident $(<$($generic: ident),*>)?: $ty: ident$(<$($generic1: ident),*>)? {
$($tokens: tt)*
}
$(
$(#[$meta: meta])*
pub extension $name: ident $(<$($generic: ident),*>)?: $ty: ty {
$($tokens: tt)*
}
)*
) => {
mod sealing {
pub trait Sealed {}
}
impl$(<$($generic1),*>)? sealing::Sealed for $ty $(<$($generic1),*>)? {}
$(
impl$(<$($generic),*>)? sealing::Sealed for $ty {}

$(#[$meta])*
pub trait $name $(<$($generic),*>)? : sealing::Sealed {
$($tokens)*
}
$(#[$meta])*
pub trait $name $(<$($generic),*>)? : sealing::Sealed {
$($tokens)*
}
)*
};
}
5 changes: 4 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2024, BlockProject 3D
// Copyright (c) 2025, BlockProject 3D
//
// All rights reserved.
//
Expand Down Expand Up @@ -54,3 +54,6 @@ pub mod extension;

#[cfg(feature = "index-map")]
pub mod index_map;

#[cfg(feature = "string")]
pub mod string;
6 changes: 3 additions & 3 deletions src/result.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2024, BlockProject 3D
// Copyright (c) 2025, BlockProject 3D
//
// All rights reserved.
//
Expand Down Expand Up @@ -33,7 +33,7 @@ use std::error::Error;

extension! {
/// Result extensions designed to simplify console based tools.
pub extension ResultExt<T>: Result<T, E> {
pub extension ResultExt<T, E>: Result<T, E> {
/// Expects a given result to unwrap without issues, in case the result is an error,
/// this function exits the program.
///
Expand All @@ -47,7 +47,7 @@ extension! {
}
}

impl<T, E: Error> ResultExt<T> for Result<T, E> {
impl<T, E: Error> ResultExt<T, E> for Result<T, E> {
fn expect_exit(self, msg: &str, code: i32) -> T {
match self {
Ok(v) => v,
Expand Down
266 changes: 266 additions & 0 deletions src/string.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,266 @@
// Copyright (c) 2025, BlockProject 3D
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of BlockProject 3D nor the names of its contributors
// may be used to endorse or promote products derived from this software
// without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

//! String utilities.

use crate::extension;
use std::borrow::Cow;

/// The range trait that represents all supported range types for sub_nearest method.
pub trait Range {
/// The actual implementation of nearest substring, see [sub_nearest](StrTools::sub_nearest) for
/// more information.
fn sub_nearest<'a>(&self, obj: &'a str) -> &'a str;
}

extension! {
/// The main StrTools extension trait.
pub extension StrTools: str {
/// A substring method which truncates strings at the nearest UTF-8 code rather than
/// panicking.
///
/// # Panics
///
/// This function still panics if the given range is out of bounds. It however does not panic
/// if the passed range falls withing a UTF-8 code.
fn sub_nearest(&self, range: impl Range) -> &str;

/// A string capitalize function which operates on UTF-8 strings.
fn capitalise(&self) -> Cow<str>;

/// A string decapitalize function which operates on UTF-8 strings. This essentially does
/// the inverse of the [capitalise](StrTools::capitalise) function.
fn decapitalise(&self) -> Cow<str>;
}

/// The main string tools operating on raw byte slices.
pub extension BufTools: [u8] {
/// A string capitalize function which operates on ASCII only strings.
fn capitalise_ascii(&self) -> Cow<[u8]>;

/// A string decapitalize function which operates on ASCII only strings. This essentially does
/// the inverse of the [capitalise](BufTools::capitalise_ascii) function.
fn decapitalise_ascii(&self) -> Cow<[u8]>;
}
}

fn utf8_max(buf: &[u8], max: usize) -> &[u8] {
if unsafe { buf.get_unchecked(max.unchecked_sub(1)) } & 0x80 == 0x00 {
&buf[..max]
} else {
let start = unsafe { max.unchecked_sub(1) };
let mut i = start;
unsafe {
while buf.get_unchecked(i) & 0xC0 == 0x80 {
i = i.unchecked_sub(1);
}
let n = start.unchecked_sub(i);
if (buf.get_unchecked(i) & 0xF0 == 0xF0 && n == 4)
|| (buf.get_unchecked(i) & 0xE0 == 0xE0 && n == 3)
|| (buf.get_unchecked(i) & 0xC0 == 0xC0 && n == 2)
{
&buf[..max]
} else {
&buf[..i]
}
}
}
}

fn utf8_min(buf: &[u8], start: usize) -> &[u8] {
if unsafe { buf.get_unchecked(start) } & 0x80 == 0x00 {
&buf[start..]
} else {
let start = start;

Check warning on line 99 in src/string.rs

View workflow job for this annotation

GitHub Actions / clippy

redundant redefinition of a binding `start`

warning: redundant redefinition of a binding `start` --> src/string.rs:99:9 | 99 | let start = start; | ^^^^^^^^^^^^^^^^^^ | help: `start` is initially defined here --> src/string.rs:95:25 | 95 | fn utf8_min(buf: &[u8], start: usize) -> &[u8] { | ^^^^^ = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#redundant_locals = note: `#[warn(clippy::redundant_locals)]` on by default
let mut i = start;
unsafe {
while i < buf.len() && buf.get_unchecked(i) & 0xC0 == 0x80 {
i = i.unchecked_add(1);
}
&buf[i..]
}
}
}

impl Range for std::ops::Range<usize> {
fn sub_nearest<'a>(&self, obj: &'a str) -> &'a str {
let bytes = obj.as_bytes();
let bytes = utf8_max(bytes, self.end);
if bytes.is_empty() {
return "";
}
let bytes = utf8_min(bytes, self.start);
unsafe { std::str::from_utf8(bytes).unwrap_unchecked() }
}
}

impl Range for std::ops::RangeTo<usize> {
fn sub_nearest<'a>(&self, obj: &'a str) -> &'a str {
let bytes = obj.as_bytes();
let bytes = utf8_max(bytes, self.end);
unsafe { std::str::from_utf8(bytes).unwrap_unchecked() }
}
}

impl Range for std::ops::RangeFrom<usize> {
fn sub_nearest<'a>(&self, obj: &'a str) -> &'a str {
let bytes = obj.as_bytes();
let bytes = utf8_min(bytes, self.start);
unsafe { std::str::from_utf8(bytes).unwrap_unchecked() }
}
}

impl StrTools for str {
fn sub_nearest(&self, range: impl Range) -> &str {
range.sub_nearest(self)
}

fn capitalise(&self) -> Cow<str> {
if self.is_empty() {
return self.into();
}
let first = unsafe { self.chars().next().unwrap_unchecked() };
if first.is_uppercase() {
self.into()
} else {
(self.sub_nearest(..1).to_uppercase() + self.sub_nearest(1..)).into()
}
}

fn decapitalise(&self) -> Cow<str> {
if self.is_empty() {
return self.into();
}
let first = unsafe { self.chars().next().unwrap_unchecked() };
if first.is_uppercase() {
(self.sub_nearest(..1).to_lowercase() + self.sub_nearest(1..)).into()
} else {
self.into()
}
}
}

impl BufTools for [u8] {
fn capitalise_ascii(&self) -> Cow<[u8]> {
if self.is_empty() {
return self.into();
}
if self[0] >= b'A' && self[0] <= b'Z' {
self.into()
} else {
let mut v: Vec<u8> = self.into();
v[0] = v[0].to_ascii_uppercase();
v.into()
}
}

fn decapitalise_ascii(&self) -> Cow<[u8]> {
if self.is_empty() {
return self.into();
}
if self[0] >= b'A' && self[0] <= b'Z' {
let mut v: Vec<u8> = self.into();
v[0] = v[0].to_ascii_lowercase();
v.into()
} else {
self.into()
}
}
}

#[cfg(test)]
mod tests {
use crate::string::{BufTools, StrTools};
use std::borrow::Cow;

#[test]
fn sub_basic() {
let str = "Hello";
assert_eq!(str.sub_nearest(..1), "H");
assert_eq!(str.sub_nearest(1..), "ello");
}

#[test]
fn truncate_ascii() {
let s = "this is a test";
assert_eq!(s.sub_nearest(..4), "this");
assert_eq!(&s[4..7], " is");
assert_eq!(s.sub_nearest(4..7), " is");
}

#[test]
fn truncate_utf8() {
let msg = "我";
assert_eq!(msg.sub_nearest(..3), "我");
assert_eq!(msg.sub_nearest(..1), "");
assert_eq!(msg.sub_nearest(1..), "");
}

#[test]
fn truncate_utf82() {
let msg = "我是";
assert_eq!(msg.sub_nearest(..6), "我是");
assert_eq!(msg.sub_nearest(..5), "我");
assert_eq!(msg.sub_nearest(1..), "是");
}

#[test]
fn truncate_utf83() {
let msg = "我abcd";
assert_eq!(msg.sub_nearest(..6), "我abc");
assert_eq!(msg.sub_nearest(1..), "abcd");
assert_eq!(msg.sub_nearest(1..2), "");
assert_eq!(msg.sub_nearest(1..4), "a");
assert_eq!(msg.sub_nearest(1..5), "ab");
assert_eq!(msg.sub_nearest(1..msg.len()), "abcd");
assert_eq!(msg.sub_nearest(1..msg.len() - 1), "abc");
}

#[test]
fn basic_capitalize() {
let msg = "abc";
let msg1 = "Abc";
assert_eq!(msg.capitalise(), "Abc");
assert_eq!(msg1.capitalise(), "Abc");
assert!(matches!(msg1.capitalise(), Cow::Borrowed(_)));
assert_eq!(msg1.decapitalise(), "abc");
}

#[test]
fn ascii_capitalize() {
let msg = "abc";
let msg1 = "Abc";
assert_eq!(&*msg.as_bytes().capitalise_ascii(), b"Abc");
assert_eq!(&*msg1.as_bytes().capitalise_ascii(), b"Abc");
assert!(matches!(
msg1.as_bytes().capitalise_ascii(),
Cow::Borrowed(_)
));
assert_eq!(&*msg1.as_bytes().decapitalise_ascii(), b"abc");
}
}