Add optional simd utf8 validation

This commit is contained in:
Nikolay Kim 2022-01-09 19:43:49 +06:00
parent 42e140b805
commit eac1f068fb
9 changed files with 60 additions and 23 deletions

View file

@ -1,5 +1,9 @@
# Changes # Changes
## [0.1.9] (2022-01-10)
* Add optional simd utf8 validation
## [0.1.8] (2021-12-18) ## [0.1.8] (2021-12-18)
* Remove futures patch dependency * Remove futures patch dependency

View file

@ -1,6 +1,6 @@
[package] [package]
name = "ntex-bytes" name = "ntex-bytes"
version = "0.1.8" version = "0.1.9"
license = "MIT" license = "MIT"
authors = ["Nikolay Kim <fafhrd91@gmail.com>", "Carl Lerche <me@carllerche.com>"] authors = ["Nikolay Kim <fafhrd91@gmail.com>", "Carl Lerche <me@carllerche.com>"]
description = "Types and traits for working with bytes (bytes crate fork)" description = "Types and traits for working with bytes (bytes crate fork)"
@ -11,13 +11,20 @@ keywords = ["buffers", "zero-copy", "io"]
categories = ["network-programming", "data-structures"] categories = ["network-programming", "data-structures"]
edition = "2018" edition = "2018"
[features]
default = []
# simd utf8 check support
simd = ["simdutf8"]
[dependencies] [dependencies]
bitflags = "1.3" bitflags = "1.3"
bytes = "1.0.0" bytes = "1.0.0"
serde = "1.0.0" serde = "1.0.0"
futures-core = { version = "0.3", default-features = false, features = ["alloc"] } futures-core = { version = "0.3", default-features = false, features = ["alloc"] }
simdutf8 = { version = "0.1.3", optional = true }
[dev-dependencies] [dev-dependencies]
serde_test = "1.0" serde_test = "1.0"
serde_json = "1.0" serde_json = "1.0"
ntex = "0.5.0-b.0" ntex = "0.5.0"

View file

@ -179,7 +179,7 @@ impl ops::Deref for ByteString {
fn deref(&self) -> &str { fn deref(&self) -> &str {
let bytes = self.0.as_ref(); let bytes = self.0.as_ref();
// SAFETY: // SAFETY:
// UTF-8 validity is guaranteed at during construction. // UTF-8 validity is guaranteed during construction.
unsafe { str::from_utf8_unchecked(bytes) } unsafe { str::from_utf8_unchecked(bytes) }
} }
} }
@ -213,42 +213,54 @@ impl<'a> From<borrow::Cow<'a, str>> for ByteString {
} }
impl TryFrom<&[u8]> for ByteString { impl TryFrom<&[u8]> for ByteString {
type Error = str::Utf8Error; type Error = ();
#[inline] #[inline]
fn try_from(value: &[u8]) -> Result<Self, Self::Error> { fn try_from(value: &[u8]) -> Result<Self, Self::Error> {
let _ = str::from_utf8(value)?; if utf8::is_valid(&value) {
Ok(ByteString(Bytes::copy_from_slice(value))) Ok(ByteString(Bytes::copy_from_slice(value)))
} else {
Err(())
}
} }
} }
impl TryFrom<Vec<u8>> for ByteString { impl TryFrom<Vec<u8>> for ByteString {
type Error = str::Utf8Error; type Error = ();
#[inline] #[inline]
fn try_from(value: Vec<u8>) -> Result<Self, Self::Error> { fn try_from(value: Vec<u8>) -> Result<Self, Self::Error> {
let buf = String::from_utf8(value).map_err(|err| err.utf8_error())?; if utf8::is_valid(&value) {
Ok(ByteString(Bytes::from(buf))) Ok(ByteString(Bytes::from(value)))
} else {
Err(())
}
} }
} }
impl TryFrom<Bytes> for ByteString { impl TryFrom<Bytes> for ByteString {
type Error = str::Utf8Error; type Error = ();
#[inline] #[inline]
fn try_from(value: Bytes) -> Result<Self, Self::Error> { fn try_from(value: Bytes) -> Result<Self, Self::Error> {
let _ = str::from_utf8(value.as_ref())?; if utf8::is_valid(&value) {
Ok(ByteString(value)) Ok(ByteString(value))
} else {
Err(())
}
} }
} }
impl TryFrom<BytesMut> for ByteString { impl TryFrom<BytesMut> for ByteString {
type Error = str::Utf8Error; type Error = ();
#[inline] #[inline]
fn try_from(value: crate::BytesMut) -> Result<Self, Self::Error> { fn try_from(value: crate::BytesMut) -> Result<Self, Self::Error> {
let _ = str::from_utf8(&value)?; if utf8::is_valid(&value) {
Ok(ByteString(value.freeze())) Ok(ByteString(value.freeze()))
} else {
Err(())
}
} }
} }
@ -291,6 +303,20 @@ mod serde {
} }
} }
#[cfg(feature = "simd")]
mod utf8 {
pub(super) fn is_valid(input: &[u8]) -> bool {
simdutf8::basic::from_utf8(input).is_ok()
}
}
#[cfg(not(feature = "simd"))]
mod utf8 {
pub(super) fn is_valid(input: &[u8]) -> bool {
std::str::from_utf8(input).is_ok()
}
}
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use std::borrow::ToOwned; use std::borrow::ToOwned;

View file

@ -16,4 +16,4 @@ name = "ntex_codec"
path = "src/lib.rs" path = "src/lib.rs"
[dependencies] [dependencies]
ntex-bytes = "0.1" ntex-bytes = "0.1.9"

View file

@ -17,7 +17,7 @@ path = "src/lib.rs"
[dependencies] [dependencies]
ntex-codec = "0.6.0" ntex-codec = "0.6.0"
ntex-bytes = "0.1.8" ntex-bytes = "0.1.9"
ntex-util = "0.1.8" ntex-util = "0.1.8"
ntex-service = "0.3.1" ntex-service = "0.3.1"

View file

@ -18,7 +18,7 @@ default = ["http"]
[dependencies] [dependencies]
serde = "1.0" serde = "1.0"
ntex-bytes = "0.1" ntex-bytes = "0.1.9"
log = "0.4" log = "0.4"
http = { version = "0.2", optional = true } http = { version = "0.2", optional = true }
regex = { version = "1.5.4", default-features = false, features = ["std"] } regex = { version = "1.5.4", default-features = false, features = ["std"] }

View file

@ -25,7 +25,7 @@ openssl = ["tls_openssl"]
rustls = ["tls_rust"] rustls = ["tls_rust"]
[dependencies] [dependencies]
ntex-bytes = "0.1.8" ntex-bytes = "0.1.9"
ntex-io = "0.1.2" ntex-io = "0.1.2"
ntex-util = "0.1.8" ntex-util = "0.1.8"
ntex-service = "0.3.1" ntex-service = "0.3.1"

View file

@ -16,9 +16,9 @@ name = "ntex_tokio"
path = "src/lib.rs" path = "src/lib.rs"
[dependencies] [dependencies]
ntex-bytes = "0.1.8" ntex-bytes = "0.1.9"
ntex-io = "0.1.0" ntex-io = "0.1.1"
ntex-util = "0.1.6" ntex-util = "0.1.8"
log = "0.4" log = "0.4"
pin-project-lite = "0.2" pin-project-lite = "0.2"
tokio = { version = "1", default-features = false, features = ["rt", "net", "sync", "signal"] } tokio = { version = "1", default-features = false, features = ["rt", "net", "sync", "signal"] }

View file

@ -50,7 +50,7 @@ ntex-router = "0.5.1"
ntex-service = "0.3.1" ntex-service = "0.3.1"
ntex-macros = "0.1.3" ntex-macros = "0.1.3"
ntex-util = "0.1.8" ntex-util = "0.1.8"
ntex-bytes = "0.1.8" ntex-bytes = "0.1.9"
ntex-tls = "0.1.1" ntex-tls = "0.1.1"
ntex-rt = "0.4.1" ntex-rt = "0.4.1"
ntex-io = "0.1.2" ntex-io = "0.1.2"