From e4a2c26e09e1bcc46eb373186b115e779369ae74 Mon Sep 17 00:00:00 2001 From: hezz Date: Mon, 22 Jul 2024 18:20:12 +0800 Subject: [PATCH 01/13] featperf: :zap: use simd for is_latin --- rust/fury/src/meta/meta_string.rs | 4 +- rust/fury/src/meta/mod.rs | 1 + rust/fury/src/meta/string_util.rs | 145 ++++++++++++++++++++++++++++++ 3 files changed, 149 insertions(+), 1 deletion(-) create mode 100644 rust/fury/src/meta/string_util.rs diff --git a/rust/fury/src/meta/meta_string.rs b/rust/fury/src/meta/meta_string.rs index 3b46265e8d..627433c9fd 100644 --- a/rust/fury/src/meta/meta_string.rs +++ b/rust/fury/src/meta/meta_string.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +use crate::meta::string_util; + #[derive(Debug, PartialEq)] pub enum Encoding { Utf8 = 0x00, @@ -102,7 +104,7 @@ impl MetaStringEncoder { } fn is_latin(&self, s: &str) -> bool { - s.bytes().all(|b| b.is_ascii()) + string_util::is_latin(s) } pub fn encode(&self, input: &str) -> Result { diff --git a/rust/fury/src/meta/mod.rs b/rust/fury/src/meta/mod.rs index 4e4d40b29a..02871e8257 100644 --- a/rust/fury/src/meta/mod.rs +++ b/rust/fury/src/meta/mod.rs @@ -16,4 +16,5 @@ // under the License. mod meta_string; +mod string_util; pub use meta_string::{Encoding, MetaStringDecoder, MetaStringEncoder}; diff --git a/rust/fury/src/meta/string_util.rs b/rust/fury/src/meta/string_util.rs new file mode 100644 index 0000000000..e920b03b2a --- /dev/null +++ b/rust/fury/src/meta/string_util.rs @@ -0,0 +1,145 @@ +#[cfg(target_feature = "neon")] +use std::arch::aarch64::*; + +#[cfg(target_feature = "avx2")] +use std::arch::x86_64::*; + + +#[cfg(target_feature = "sse2")] +use std::arch::x86_64::*; + +#[cfg(target_arch = "x86_64")] +pub(crate) const MIN_DIM_SIZE_AVX: usize = 32; + +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + all(target_arch = "aarch64", target_feature = "neon") +))] +pub(crate) const MIN_DIM_SIZE_SIMD: usize = 16; + +#[cfg(target_feature = "avx2")] +unsafe fn is_latin_avx(s: &str) -> bool { + let bytes = s.as_bytes(); + let len = bytes.len(); + let mut i = 0; + + let ascii_mask = _mm256_set1_epi8(0x80u8 as i8); // 0x80 = 1000 0000 + + while i + MIN_DIM_SIZE_AVX <= len { + let chunk = _mm256_loadu_si256(bytes.as_ptr().add(i) as *const __m256i); + let masked = _mm256_and_si256(chunk, ascii_mask); + let cmp = _mm256_cmpeq_epi8(masked, _mm256_setzero_si256()); + + if _mm256_movemask_epi8(cmp) != -1 { + return false; + } + + i += MIN_DIM_SIZE_AVX; + } + + while i < len { + if bytes[i] & 0x80 != 0 { + return false; + } + i += 1; + } + + true +} + + +#[cfg(target_feature = "sse2")] +unsafe fn is_latin_sse(s: &str) -> bool { + let bytes = s.as_bytes(); + let len = bytes.len(); + let mut i = 0; + + let ascii_mask = _mm_set1_epi8(0x80u8 as i8); // 0x80 = 1000 0000 + + while i + MIN_DIM_SIZE_SIMD <= len { + let chunk = _mm_loadu_si128(bytes.as_ptr().add(i) as *const __m128i); + let masked = _mm_and_si128(chunk, ascii_mask); + let cmp = _mm_cmpeq_epi8(masked, _mm_setzero_si128()); + + if _mm_movemask_epi8(cmp) != 0xFFFF { + return false; + } + + i += MIN_DIM_SIZE_SIMD; + } + + while i < len { + if bytes[i] & 0x80 != 0 { + return false; + } + i += 1; + } + + true +} + + + + +unsafe fn is_latin_neon(s: &str) -> bool { + let bytes = s.as_bytes(); + let len = bytes.len(); + let mut i = 0; + + let ascii_mask = vdupq_n_u8(0x80); + while i + MIN_DIM_SIZE_SIMD <= len { + let chunk = vld1q_u8(bytes.as_ptr().add(i)); + let masked = vandq_u8(chunk, ascii_mask); + let cmp = vceqq_u8(masked, vdupq_n_u8(0)); + + if vminvq_u8(cmp) == 0 { + return false; + } + i += MIN_DIM_SIZE_SIMD; + } + + while i < len { + if bytes[i] & 0x80 != 0 { + return false; + } + i += 1; + } + + true +} + +fn is_latin_standard(s: &str) -> bool { + s.bytes().all(|b| b.is_ascii()) +} + + + +pub(crate) fn is_latin(s: &str) -> bool { + #[cfg(target_arch = "x86_64")] + { + if is_x86_feature_detected!("avx") + && is_x86_feature_detected!("fma") + && s.len() >= MIN_DIM_SIZE_AVX + { + return unsafe { is_latin_avx(s) }; + } + } + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + if is_x86_feature_detected!("sse") && s.len() >= MIN_DIM_SIZE_SIMD { + return unsafe { is_latin_sse(s)}; + } + } + + #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] + { + if std::arch::is_aarch64_feature_detected!("neon") && s.len() >= MIN_DIM_SIZE_SIMD { + return unsafe {is_latin_neon(s)}; + } + } + is_latin_standard(s) + + +} \ No newline at end of file From 7367d97d869ddd95b30929552bc5995f656af2e3 Mon Sep 17 00:00:00 2001 From: hezz Date: Mon, 22 Jul 2024 21:55:01 +0800 Subject: [PATCH 02/13] perf: optimize simd for is_latin --- rust/fury/Cargo.toml | 9 ++++ rust/fury/benches/simd_bench.rs | 90 +++++++++++++++++++++++++++++++ rust/fury/src/meta/string_util.rs | 67 +++++++++-------------- 3 files changed, 123 insertions(+), 43 deletions(-) create mode 100644 rust/fury/benches/simd_bench.rs diff --git a/rust/fury/Cargo.toml b/rust/fury/Cargo.toml index d39d537572..83424ebaf5 100644 --- a/rust/fury/Cargo.toml +++ b/rust/fury/Cargo.toml @@ -30,3 +30,12 @@ lazy_static = { version = "1.4" } byteorder = { version = "1.4" } chrono = "0.4" thiserror = { default-features = false, version = "1.0" } + + +[[bench]] +name = "simd_bench" +harness = false + + +[dev-dependencies] +criterion = "0.5.1" \ No newline at end of file diff --git a/rust/fury/benches/simd_bench.rs b/rust/fury/benches/simd_bench.rs new file mode 100644 index 0000000000..110c659336 --- /dev/null +++ b/rust/fury/benches/simd_bench.rs @@ -0,0 +1,90 @@ + +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +#[cfg(target_arch = "x86_64")] +use std::arch::x86_64::*; + +const MIN_DIM_SIZE_SIMD: usize = 16; +const MIN_DIM_SIZE_AVX: usize = 32; + +#[target_feature(enable = "sse2")] +unsafe fn is_latin_sse(s: &str) -> bool { + let bytes = s.as_bytes(); + let len = s.len(); + let ascii_mask = _mm_set1_epi8(0x80u8 as i8); + let remaining = len % MIN_DIM_SIZE_SIMD; + let range_end= len - remaining; + for i in (0..range_end).step_by(MIN_DIM_SIZE_SIMD) { + let chunk = _mm_loadu_si128(bytes.as_ptr().add(i) as *const __m128i); + let masked = _mm_and_si128(chunk, ascii_mask); + let cmp = _mm_cmpeq_epi8(masked, _mm_setzero_si128()); + if _mm_movemask_epi8(cmp) != 0xFFFF { + return false; + } + + } + for i in range_end..len { + if ! bytes[i].is_ascii() {} + return false; + } + true +} + +#[cfg(target_arch = "x86_64")] +unsafe fn is_latin_avx(s: &str) -> bool { + let bytes = s.as_bytes(); + let len = s.len(); + let ascii_mask = _mm256_set1_epi8(0x80u8 as i8); + let remaining = len % MIN_DIM_SIZE_AVX; + + for i in (0..(len - remaining)).step_by(MIN_DIM_SIZE_AVX) { + let chunk = _mm256_loadu_si256(bytes.as_ptr().add(i) as *const __m256i); + let masked = _mm256_and_si256(chunk, ascii_mask); + let cmp = _mm256_cmpeq_epi8(masked, _mm256_setzero_si256()); + if _mm256_movemask_epi8(cmp) != 0xFFFF { + return false; + } + + } + for i in (len - remaining)..len { + if ! bytes[i].is_ascii() {} + return false; + } + true +} + + +fn is_latin_std(s: &str) -> bool { + s.bytes().all(|b| b.is_ascii()) +} + +fn criterion_benchmark(c: &mut Criterion) { + let test_str_short = "Hello, World!"; + let test_str_long = "Hello, World! ".repeat(1000); + + c.bench_function("SIMD sse short", |b| { + b.iter(|| unsafe { is_latin_sse(black_box(test_str_short)) }) + }); + + c.bench_function("SIMD sse long", |b| { + b.iter(|| unsafe { is_latin_sse(black_box(&test_str_long)) }) + }); + + c.bench_function("SIMD avx short", |b| { + b.iter(|| unsafe { is_latin_avx(black_box(test_str_short)) }) + }); + + c.bench_function("SIMD avx long", |b| { + b.iter(|| unsafe { is_latin_avx(black_box(&test_str_long)) }) + }); + + c.bench_function("Standard short", |b| { + b.iter(|| is_latin_std(black_box(test_str_short))) + }); + + c.bench_function("Standard long", |b| { + b.iter(|| is_latin_std(black_box(&test_str_long))) + }); +} + +criterion_group!(benches, criterion_benchmark); +criterion_main!(benches); diff --git a/rust/fury/src/meta/string_util.rs b/rust/fury/src/meta/string_util.rs index e920b03b2a..c25a10532e 100644 --- a/rust/fury/src/meta/string_util.rs +++ b/rust/fury/src/meta/string_util.rs @@ -18,33 +18,26 @@ pub(crate) const MIN_DIM_SIZE_AVX: usize = 32; ))] pub(crate) const MIN_DIM_SIZE_SIMD: usize = 16; -#[cfg(target_feature = "avx2")] +#[cfg(target_arch = "x86_64")] unsafe fn is_latin_avx(s: &str) -> bool { let bytes = s.as_bytes(); let len = bytes.len(); - let mut i = 0; - - let ascii_mask = _mm256_set1_epi8(0x80u8 as i8); // 0x80 = 1000 0000 + let ascii_mask = _mm256_set1_epi8(0x80u8 as i8); + let remaining = len % MIN_DIM_SIZE_SIMD; - while i + MIN_DIM_SIZE_AVX <= len { + for i in (0..(len - remaining)).step_by(MIN_DIM_SIZE_SIMD) { let chunk = _mm256_loadu_si256(bytes.as_ptr().add(i) as *const __m256i); let masked = _mm256_and_si256(chunk, ascii_mask); let cmp = _mm256_cmpeq_epi8(masked, _mm256_setzero_si256()); - - if _mm256_movemask_epi8(cmp) != -1 { + if _mm256_movemask_epi8(cmp) != 0xFFFF { return false; } - i += MIN_DIM_SIZE_AVX; } - - while i < len { - if bytes[i] & 0x80 != 0 { - return false; - } - i += 1; + for i in (len - remaining)..len { + if ! bytes[i].is_ascii() {} + return false; } - true } @@ -53,59 +46,47 @@ unsafe fn is_latin_avx(s: &str) -> bool { unsafe fn is_latin_sse(s: &str) -> bool { let bytes = s.as_bytes(); let len = bytes.len(); - let mut i = 0; - - let ascii_mask = _mm_set1_epi8(0x80u8 as i8); // 0x80 = 1000 0000 + let ascii_mask = _mm_set1_epi8(0x80u8 as i8); + let remaining = len % MIN_DIM_SIZE_SIMD; - while i + MIN_DIM_SIZE_SIMD <= len { + for i in (0..(len - remaining)).step_by(MIN_DIM_SIZE_SIMD) { let chunk = _mm_loadu_si128(bytes.as_ptr().add(i) as *const __m128i); let masked = _mm_and_si128(chunk, ascii_mask); let cmp = _mm_cmpeq_epi8(masked, _mm_setzero_si128()); - if _mm_movemask_epi8(cmp) != 0xFFFF { return false; } - i += MIN_DIM_SIZE_SIMD; } - - while i < len { - if bytes[i] & 0x80 != 0 { - return false; - } - i += 1; + for i in (len - remaining)..len { + if ! bytes[i].is_ascii() {} + return false; } - true } - +#[cfg(target_feature = "neon")] unsafe fn is_latin_neon(s: &str) -> bool { let bytes = s.as_bytes(); let len = bytes.len(); - let mut i = 0; + let ascii_mask = vdupq_n_u8(0x80u8 as i8); + let remaining = len % MIN_DIM_SIZE_SIMD; - let ascii_mask = vdupq_n_u8(0x80); - while i + MIN_DIM_SIZE_SIMD <= len { + for i in (0..(len - remaining)).step_by(MIN_DIM_SIZE_SIMD) { let chunk = vld1q_u8(bytes.as_ptr().add(i)); let masked = vandq_u8(chunk, ascii_mask); - let cmp = vceqq_u8(masked, vdupq_n_u8(0)); - - if vminvq_u8(cmp) == 0 { + let cmp = vceqq_u8(masked,vdupq_n_u8(0)); + if vminvq_u8(cmp) == 0 { return false; } - i += MIN_DIM_SIZE_SIMD; + } - - while i < len { - if bytes[i] & 0x80 != 0 { - return false; - } - i += 1; + for i in (len - remaining)..len { + if ! bytes[i].is_ascii() {} + return false; } - true } From b96b698e6eb9db9ed6dabeecd2d29d2f60b3ae88 Mon Sep 17 00:00:00 2001 From: hezz Date: Mon, 22 Jul 2024 22:07:20 +0800 Subject: [PATCH 03/13] fix: Code Style Check --- rust/fury/benches/simd_bench.rs | 17 +++++++++++++++++ rust/fury/src/meta/string_util.rs | 17 +++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/rust/fury/benches/simd_bench.rs b/rust/fury/benches/simd_bench.rs index 110c659336..222cf08240 100644 --- a/rust/fury/benches/simd_bench.rs +++ b/rust/fury/benches/simd_bench.rs @@ -1,3 +1,20 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + use criterion::{black_box, criterion_group, criterion_main, Criterion}; #[cfg(target_arch = "x86_64")] diff --git a/rust/fury/src/meta/string_util.rs b/rust/fury/src/meta/string_util.rs index c25a10532e..93e8039c67 100644 --- a/rust/fury/src/meta/string_util.rs +++ b/rust/fury/src/meta/string_util.rs @@ -1,3 +1,20 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + #[cfg(target_feature = "neon")] use std::arch::aarch64::*; From 47eed8397dd83a79d1a870263351e317de7a313b Mon Sep 17 00:00:00 2001 From: hezz Date: Mon, 22 Jul 2024 22:10:29 +0800 Subject: [PATCH 04/13] fix: rust format --- rust/fury/benches/simd_bench.rs | 16 ++++---- rust/fury/src/meta/string_util.rs | 66 ++++++++++++++----------------- 2 files changed, 36 insertions(+), 46 deletions(-) diff --git a/rust/fury/benches/simd_bench.rs b/rust/fury/benches/simd_bench.rs index 222cf08240..b388dabb27 100644 --- a/rust/fury/benches/simd_bench.rs +++ b/rust/fury/benches/simd_bench.rs @@ -15,7 +15,6 @@ // specific language governing permissions and limitations // under the License. - use criterion::{black_box, criterion_group, criterion_main, Criterion}; #[cfg(target_arch = "x86_64")] use std::arch::x86_64::*; @@ -29,7 +28,7 @@ unsafe fn is_latin_sse(s: &str) -> bool { let len = s.len(); let ascii_mask = _mm_set1_epi8(0x80u8 as i8); let remaining = len % MIN_DIM_SIZE_SIMD; - let range_end= len - remaining; + let range_end = len - remaining; for i in (0..range_end).step_by(MIN_DIM_SIZE_SIMD) { let chunk = _mm_loadu_si128(bytes.as_ptr().add(i) as *const __m128i); let masked = _mm_and_si128(chunk, ascii_mask); @@ -37,11 +36,11 @@ unsafe fn is_latin_sse(s: &str) -> bool { if _mm_movemask_epi8(cmp) != 0xFFFF { return false; } - } for i in range_end..len { - if ! bytes[i].is_ascii() {} - return false; + if !bytes[i].is_ascii() { + return false; + } } true } @@ -60,16 +59,15 @@ unsafe fn is_latin_avx(s: &str) -> bool { if _mm256_movemask_epi8(cmp) != 0xFFFF { return false; } - } for i in (len - remaining)..len { - if ! bytes[i].is_ascii() {} - return false; + if !bytes[i].is_ascii() { + return false; + } } true } - fn is_latin_std(s: &str) -> bool { s.bytes().all(|b| b.is_ascii()) } diff --git a/rust/fury/src/meta/string_util.rs b/rust/fury/src/meta/string_util.rs index 93e8039c67..6141e0d8e8 100644 --- a/rust/fury/src/meta/string_util.rs +++ b/rust/fury/src/meta/string_util.rs @@ -21,7 +21,6 @@ use std::arch::aarch64::*; #[cfg(target_feature = "avx2")] use std::arch::x86_64::*; - #[cfg(target_feature = "sse2")] use std::arch::x86_64::*; @@ -49,16 +48,15 @@ unsafe fn is_latin_avx(s: &str) -> bool { if _mm256_movemask_epi8(cmp) != 0xFFFF { return false; } - } for i in (len - remaining)..len { - if ! bytes[i].is_ascii() {} - return false; + if !bytes[i].is_ascii() { + return false; + } } true } - #[cfg(target_feature = "sse2")] unsafe fn is_latin_sse(s: &str) -> bool { let bytes = s.as_bytes(); @@ -73,17 +71,15 @@ unsafe fn is_latin_sse(s: &str) -> bool { if _mm_movemask_epi8(cmp) != 0xFFFF { return false; } - } for i in (len - remaining)..len { - if ! bytes[i].is_ascii() {} - return false; + if !bytes[i].is_ascii() { + return false; + } } true } - - #[cfg(target_feature = "neon")] unsafe fn is_latin_neon(s: &str) -> bool { let bytes = s.as_bytes(); @@ -94,15 +90,15 @@ unsafe fn is_latin_neon(s: &str) -> bool { for i in (0..(len - remaining)).step_by(MIN_DIM_SIZE_SIMD) { let chunk = vld1q_u8(bytes.as_ptr().add(i)); let masked = vandq_u8(chunk, ascii_mask); - let cmp = vceqq_u8(masked,vdupq_n_u8(0)); - if vminvq_u8(cmp) == 0 { + let cmp = vceqq_u8(masked, vdupq_n_u8(0)); + if vminvq_u8(cmp) == 0 { return false; } - } for i in (len - remaining)..len { - if ! bytes[i].is_ascii() {} - return false; + if !bytes[i].is_ascii() { + return false; + } } true } @@ -111,33 +107,29 @@ fn is_latin_standard(s: &str) -> bool { s.bytes().all(|b| b.is_ascii()) } - - pub(crate) fn is_latin(s: &str) -> bool { #[cfg(target_arch = "x86_64")] + { + if is_x86_feature_detected!("avx") + && is_x86_feature_detected!("fma") + && s.len() >= MIN_DIM_SIZE_AVX { - if is_x86_feature_detected!("avx") - && is_x86_feature_detected!("fma") - && s.len() >= MIN_DIM_SIZE_AVX - { - return unsafe { is_latin_avx(s) }; - } + return unsafe { is_latin_avx(s) }; } + } - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - { - if is_x86_feature_detected!("sse") && s.len() >= MIN_DIM_SIZE_SIMD { - return unsafe { is_latin_sse(s)}; - } + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + if is_x86_feature_detected!("sse") && s.len() >= MIN_DIM_SIZE_SIMD { + return unsafe { is_latin_sse(s) }; } + } - #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] - { - if std::arch::is_aarch64_feature_detected!("neon") && s.len() >= MIN_DIM_SIZE_SIMD { - return unsafe {is_latin_neon(s)}; - } + #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] + { + if std::arch::is_aarch64_feature_detected!("neon") && s.len() >= MIN_DIM_SIZE_SIMD { + return unsafe { is_latin_neon(s) }; } - is_latin_standard(s) - - -} \ No newline at end of file + } + is_latin_standard(s) +} From 4057df042539fb06d009c84be58189d7b68394b5 Mon Sep 17 00:00:00 2001 From: hezz Date: Mon, 22 Jul 2024 22:57:37 +0800 Subject: [PATCH 05/13] fix: ci bug & add test --- rust/fury/Cargo.toml | 3 +- rust/fury/benches/simd_bench.rs | 10 ++-- rust/fury/src/meta/string_util.rs | 85 ++++++++++++++++++++++++++----- 3 files changed, 79 insertions(+), 19 deletions(-) diff --git a/rust/fury/Cargo.toml b/rust/fury/Cargo.toml index 83424ebaf5..bba04afb73 100644 --- a/rust/fury/Cargo.toml +++ b/rust/fury/Cargo.toml @@ -38,4 +38,5 @@ harness = false [dev-dependencies] -criterion = "0.5.1" \ No newline at end of file +criterion = "0.5.1" +rand = "0.8.5" \ No newline at end of file diff --git a/rust/fury/benches/simd_bench.rs b/rust/fury/benches/simd_bench.rs index b388dabb27..aeb777a184 100644 --- a/rust/fury/benches/simd_bench.rs +++ b/rust/fury/benches/simd_bench.rs @@ -37,8 +37,8 @@ unsafe fn is_latin_sse(s: &str) -> bool { return false; } } - for i in range_end..len { - if !bytes[i].is_ascii() { + for item in bytes.iter().take(range_end).skip(range_end){ + if item.is_ascii() { return false; } } @@ -51,7 +51,7 @@ unsafe fn is_latin_avx(s: &str) -> bool { let len = s.len(); let ascii_mask = _mm256_set1_epi8(0x80u8 as i8); let remaining = len % MIN_DIM_SIZE_AVX; - + let range_end = len-remaining; for i in (0..(len - remaining)).step_by(MIN_DIM_SIZE_AVX) { let chunk = _mm256_loadu_si256(bytes.as_ptr().add(i) as *const __m256i); let masked = _mm256_and_si256(chunk, ascii_mask); @@ -60,8 +60,8 @@ unsafe fn is_latin_avx(s: &str) -> bool { return false; } } - for i in (len - remaining)..len { - if !bytes[i].is_ascii() { + for item in bytes.iter().take(range_end).skip(range_end){ + if item.is_ascii() { return false; } } diff --git a/rust/fury/src/meta/string_util.rs b/rust/fury/src/meta/string_util.rs index 6141e0d8e8..e1d815e750 100644 --- a/rust/fury/src/meta/string_util.rs +++ b/rust/fury/src/meta/string_util.rs @@ -39,18 +39,19 @@ unsafe fn is_latin_avx(s: &str) -> bool { let bytes = s.as_bytes(); let len = bytes.len(); let ascii_mask = _mm256_set1_epi8(0x80u8 as i8); - let remaining = len % MIN_DIM_SIZE_SIMD; + let remaining = len % MIN_DIM_SIZE_AVX; + let range_end = len - remaining; - for i in (0..(len - remaining)).step_by(MIN_DIM_SIZE_SIMD) { + for i in (0..range_end).step_by(MIN_DIM_SIZE_AVX) { let chunk = _mm256_loadu_si256(bytes.as_ptr().add(i) as *const __m256i); let masked = _mm256_and_si256(chunk, ascii_mask); let cmp = _mm256_cmpeq_epi8(masked, _mm256_setzero_si256()); - if _mm256_movemask_epi8(cmp) != 0xFFFF { + if _mm256_movemask_epi8(cmp) != -1 { return false; } } - for i in (len - remaining)..len { - if !bytes[i].is_ascii() { + for item in bytes.iter().take(len).skip(range_end){ + if item.is_ascii() { return false; } } @@ -63,8 +64,8 @@ unsafe fn is_latin_sse(s: &str) -> bool { let len = bytes.len(); let ascii_mask = _mm_set1_epi8(0x80u8 as i8); let remaining = len % MIN_DIM_SIZE_SIMD; - - for i in (0..(len - remaining)).step_by(MIN_DIM_SIZE_SIMD) { + let range_end = len - remaining; + for i in (0..range_end).step_by(MIN_DIM_SIZE_SIMD) { let chunk = _mm_loadu_si128(bytes.as_ptr().add(i) as *const __m128i); let masked = _mm_and_si128(chunk, ascii_mask); let cmp = _mm_cmpeq_epi8(masked, _mm_setzero_si128()); @@ -72,8 +73,8 @@ unsafe fn is_latin_sse(s: &str) -> bool { return false; } } - for i in (len - remaining)..len { - if !bytes[i].is_ascii() { + for item in bytes.iter().take(len).skip(range_end){ + if item.is_ascii() { return false; } } @@ -86,8 +87,8 @@ unsafe fn is_latin_neon(s: &str) -> bool { let len = bytes.len(); let ascii_mask = vdupq_n_u8(0x80u8 as i8); let remaining = len % MIN_DIM_SIZE_SIMD; - - for i in (0..(len - remaining)).step_by(MIN_DIM_SIZE_SIMD) { + let range_end = len - remaining; + for i in (0..range_end).step_by(MIN_DIM_SIZE_SIMD) { let chunk = vld1q_u8(bytes.as_ptr().add(i)); let masked = vandq_u8(chunk, ascii_mask); let cmp = vceqq_u8(masked, vdupq_n_u8(0)); @@ -95,8 +96,8 @@ unsafe fn is_latin_neon(s: &str) -> bool { return false; } } - for i in (len - remaining)..len { - if !bytes[i].is_ascii() { + for item in bytes.iter().take(len).skip(range_end){ + if item.is_ascii() { return false; } } @@ -133,3 +134,61 @@ pub(crate) fn is_latin(s: &str) -> bool { } is_latin_standard(s) } + +#[cfg(test)] +mod tests { + // 导入外部模块中的内容 + use super::*; + use rand::Rng; + + fn generate_random_string(length: usize) -> String { + const CHARSET: &[u8] = b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; + let mut rng = rand::thread_rng(); + + let result: String = (0..length) + .map(|_| { + let idx = rng.gen_range(0..CHARSET.len()); + CHARSET[idx] as char + }) + .collect(); + + result + } + + #[test] + fn test_is_latin() { + let s = generate_random_string(1000); + let not_latin_str = generate_random_string(1000) + "abc\u{1234}"; + + #[cfg(target_arch = "x86_64")] + { + if is_x86_feature_detected!("avx") && is_x86_feature_detected!("fma") { + assert!(unsafe { is_latin_avx(&s) }); + assert!(!unsafe { is_latin_avx(¬_latin_str) }); + } + } + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + if is_x86_feature_detected!("sse") && s.len() >= MIN_DIM_SIZE_SIMD { + assert!(unsafe { is_latin_sse(&s) }); + assert!(!unsafe { is_latin_sse(¬_latin_str) }); + } + } + + #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] + { + if std::arch::is_aarch64_feature_detected!("neon") && s.len() >= MIN_DIM_SIZE_SIMD { + + assert!(unsafe { is_latin_neon(&s) }); + assert!(!unsafe { is_latin_neon(¬_latin_str) }); + } + } + assert!(is_latin_standard(&s)); + assert!(!is_latin_standard(¬_latin_str)); + + + + + } +} From 8bde36ce44d62cb7832b3f93bd17bd99d6d16720 Mon Sep 17 00:00:00 2001 From: hezz Date: Mon, 22 Jul 2024 23:00:50 +0800 Subject: [PATCH 06/13] fix: CI code format --- rust/fury/benches/simd_bench.rs | 6 +++--- rust/fury/src/meta/string_util.rs | 11 +++-------- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/rust/fury/benches/simd_bench.rs b/rust/fury/benches/simd_bench.rs index aeb777a184..2650be4f7e 100644 --- a/rust/fury/benches/simd_bench.rs +++ b/rust/fury/benches/simd_bench.rs @@ -37,7 +37,7 @@ unsafe fn is_latin_sse(s: &str) -> bool { return false; } } - for item in bytes.iter().take(range_end).skip(range_end){ + for item in bytes.iter().take(range_end).skip(range_end) { if item.is_ascii() { return false; } @@ -51,7 +51,7 @@ unsafe fn is_latin_avx(s: &str) -> bool { let len = s.len(); let ascii_mask = _mm256_set1_epi8(0x80u8 as i8); let remaining = len % MIN_DIM_SIZE_AVX; - let range_end = len-remaining; + let range_end = len - remaining; for i in (0..(len - remaining)).step_by(MIN_DIM_SIZE_AVX) { let chunk = _mm256_loadu_si256(bytes.as_ptr().add(i) as *const __m256i); let masked = _mm256_and_si256(chunk, ascii_mask); @@ -60,7 +60,7 @@ unsafe fn is_latin_avx(s: &str) -> bool { return false; } } - for item in bytes.iter().take(range_end).skip(range_end){ + for item in bytes.iter().take(range_end).skip(range_end) { if item.is_ascii() { return false; } diff --git a/rust/fury/src/meta/string_util.rs b/rust/fury/src/meta/string_util.rs index e1d815e750..97b8fb3899 100644 --- a/rust/fury/src/meta/string_util.rs +++ b/rust/fury/src/meta/string_util.rs @@ -50,7 +50,7 @@ unsafe fn is_latin_avx(s: &str) -> bool { return false; } } - for item in bytes.iter().take(len).skip(range_end){ + for item in bytes.iter().take(len).skip(range_end) { if item.is_ascii() { return false; } @@ -73,7 +73,7 @@ unsafe fn is_latin_sse(s: &str) -> bool { return false; } } - for item in bytes.iter().take(len).skip(range_end){ + for item in bytes.iter().take(len).skip(range_end) { if item.is_ascii() { return false; } @@ -96,7 +96,7 @@ unsafe fn is_latin_neon(s: &str) -> bool { return false; } } - for item in bytes.iter().take(len).skip(range_end){ + for item in bytes.iter().take(len).skip(range_end) { if item.is_ascii() { return false; } @@ -179,16 +179,11 @@ mod tests { #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] { if std::arch::is_aarch64_feature_detected!("neon") && s.len() >= MIN_DIM_SIZE_SIMD { - assert!(unsafe { is_latin_neon(&s) }); assert!(!unsafe { is_latin_neon(¬_latin_str) }); } } assert!(is_latin_standard(&s)); assert!(!is_latin_standard(¬_latin_str)); - - - - } } From 9bd674072be1995521bfd9160b2a2d76610ab0b9 Mon Sep 17 00:00:00 2001 From: hezz Date: Mon, 22 Jul 2024 23:17:00 +0800 Subject: [PATCH 07/13] fix: CI macos bug --- rust/fury/src/meta/string_util.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/fury/src/meta/string_util.rs b/rust/fury/src/meta/string_util.rs index 97b8fb3899..de30c6e489 100644 --- a/rust/fury/src/meta/string_util.rs +++ b/rust/fury/src/meta/string_util.rs @@ -85,7 +85,7 @@ unsafe fn is_latin_sse(s: &str) -> bool { unsafe fn is_latin_neon(s: &str) -> bool { let bytes = s.as_bytes(); let len = bytes.len(); - let ascii_mask = vdupq_n_u8(0x80u8 as i8); + let ascii_mask = vdupq_n_u8(0x80); let remaining = len % MIN_DIM_SIZE_SIMD; let range_end = len - remaining; for i in (0..range_end).step_by(MIN_DIM_SIZE_SIMD) { From e2f0c9f3005935d2d3bd3a1c2f7286b0f57ea9a2 Mon Sep 17 00:00:00 2001 From: hezz Date: Mon, 22 Jul 2024 23:23:26 +0800 Subject: [PATCH 08/13] fix: CI bench bug --- rust/fury/benches/simd_bench.rs | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/rust/fury/benches/simd_bench.rs b/rust/fury/benches/simd_bench.rs index 2650be4f7e..4fc0109678 100644 --- a/rust/fury/benches/simd_bench.rs +++ b/rust/fury/benches/simd_bench.rs @@ -16,11 +16,21 @@ // under the License. use criterion::{black_box, criterion_group, criterion_main, Criterion}; -#[cfg(target_arch = "x86_64")] +#[cfg(target_feature = "avx2")] +use std::arch::x86_64::*; + +#[cfg(target_feature = "sse2")] use std::arch::x86_64::*; -const MIN_DIM_SIZE_SIMD: usize = 16; -const MIN_DIM_SIZE_AVX: usize = 32; +#[cfg(target_arch = "x86_64")] +pub(crate) const MIN_DIM_SIZE_AVX: usize = 32; + +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + all(target_arch = "aarch64", target_feature = "neon") +))] +pub(crate) const MIN_DIM_SIZE_SIMD: usize = 16; #[target_feature(enable = "sse2")] unsafe fn is_latin_sse(s: &str) -> bool { @@ -45,7 +55,7 @@ unsafe fn is_latin_sse(s: &str) -> bool { true } -#[cfg(target_arch = "x86_64")] +#[cfg(target_feature = "avx2")] unsafe fn is_latin_avx(s: &str) -> bool { let bytes = s.as_bytes(); let len = s.len(); From e93bd3262ce3fa1220de9b4cea96879205b21d1b Mon Sep 17 00:00:00 2001 From: hezz Date: Mon, 22 Jul 2024 23:26:20 +0800 Subject: [PATCH 09/13] fix: CI bench bug --- rust/fury/benches/simd_bench.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/rust/fury/benches/simd_bench.rs b/rust/fury/benches/simd_bench.rs index 4fc0109678..dab796fc16 100644 --- a/rust/fury/benches/simd_bench.rs +++ b/rust/fury/benches/simd_bench.rs @@ -32,7 +32,7 @@ pub(crate) const MIN_DIM_SIZE_AVX: usize = 32; ))] pub(crate) const MIN_DIM_SIZE_SIMD: usize = 16; -#[target_feature(enable = "sse2")] +#[cfg(target_feature = "sse2")] unsafe fn is_latin_sse(s: &str) -> bool { let bytes = s.as_bytes(); let len = s.len(); @@ -86,18 +86,19 @@ fn criterion_benchmark(c: &mut Criterion) { let test_str_short = "Hello, World!"; let test_str_long = "Hello, World! ".repeat(1000); + #[cfg(target_feature = "sse2")] c.bench_function("SIMD sse short", |b| { b.iter(|| unsafe { is_latin_sse(black_box(test_str_short)) }) }); - + #[cfg(target_feature = "sse2")] c.bench_function("SIMD sse long", |b| { b.iter(|| unsafe { is_latin_sse(black_box(&test_str_long)) }) }); - + #[cfg(target_feature = "avx2")] c.bench_function("SIMD avx short", |b| { b.iter(|| unsafe { is_latin_avx(black_box(test_str_short)) }) }); - + #[cfg(target_feature = "avx2")] c.bench_function("SIMD avx long", |b| { b.iter(|| unsafe { is_latin_avx(black_box(&test_str_long)) }) }); From f214c334a1a006b1fcbe203066edf6b4aca2cb51 Mon Sep 17 00:00:00 2001 From: hezz Date: Mon, 22 Jul 2024 23:29:32 +0800 Subject: [PATCH 10/13] fix: CI bench bug --- rust/fury/benches/simd_bench.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/fury/benches/simd_bench.rs b/rust/fury/benches/simd_bench.rs index dab796fc16..428b6cc5c8 100644 --- a/rust/fury/benches/simd_bench.rs +++ b/rust/fury/benches/simd_bench.rs @@ -22,7 +22,7 @@ use std::arch::x86_64::*; #[cfg(target_feature = "sse2")] use std::arch::x86_64::*; -#[cfg(target_arch = "x86_64")] +#[cfg(target_feature = "avx2")] pub(crate) const MIN_DIM_SIZE_AVX: usize = 32; #[cfg(any( From bad9609a9108e727fafa88342c1d8d64f629f60f Mon Sep 17 00:00:00 2001 From: hezz Date: Mon, 22 Jul 2024 23:32:10 +0800 Subject: [PATCH 11/13] fix: CI bench bug --- rust/fury/benches/simd_bench.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/rust/fury/benches/simd_bench.rs b/rust/fury/benches/simd_bench.rs index 428b6cc5c8..749facaf8c 100644 --- a/rust/fury/benches/simd_bench.rs +++ b/rust/fury/benches/simd_bench.rs @@ -27,8 +27,7 @@ pub(crate) const MIN_DIM_SIZE_AVX: usize = 32; #[cfg(any( target_arch = "x86", - target_arch = "x86_64", - all(target_arch = "aarch64", target_feature = "neon") + target_arch = "x86_64" ))] pub(crate) const MIN_DIM_SIZE_SIMD: usize = 16; From 599d6a6201d656af342515f0cdf8f36cfdf82f21 Mon Sep 17 00:00:00 2001 From: hezz Date: Mon, 22 Jul 2024 23:34:04 +0800 Subject: [PATCH 12/13] fix: code format --- rust/fury/benches/simd_bench.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/rust/fury/benches/simd_bench.rs b/rust/fury/benches/simd_bench.rs index 749facaf8c..1ca08152eb 100644 --- a/rust/fury/benches/simd_bench.rs +++ b/rust/fury/benches/simd_bench.rs @@ -25,10 +25,7 @@ use std::arch::x86_64::*; #[cfg(target_feature = "avx2")] pub(crate) const MIN_DIM_SIZE_AVX: usize = 32; -#[cfg(any( - target_arch = "x86", - target_arch = "x86_64" -))] +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] pub(crate) const MIN_DIM_SIZE_SIMD: usize = 16; #[cfg(target_feature = "sse2")] From 48efeaaface39b08f010fb11102ea843ee323c58 Mon Sep 17 00:00:00 2001 From: hezz Date: Mon, 22 Jul 2024 23:44:09 +0800 Subject: [PATCH 13/13] fix: cargo test --- rust/fury/benches/simd_bench.rs | 4 ++-- rust/fury/src/meta/string_util.rs | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/rust/fury/benches/simd_bench.rs b/rust/fury/benches/simd_bench.rs index 1ca08152eb..7649533fc3 100644 --- a/rust/fury/benches/simd_bench.rs +++ b/rust/fury/benches/simd_bench.rs @@ -44,7 +44,7 @@ unsafe fn is_latin_sse(s: &str) -> bool { } } for item in bytes.iter().take(range_end).skip(range_end) { - if item.is_ascii() { + if !item.is_ascii() { return false; } } @@ -67,7 +67,7 @@ unsafe fn is_latin_avx(s: &str) -> bool { } } for item in bytes.iter().take(range_end).skip(range_end) { - if item.is_ascii() { + if !item.is_ascii() { return false; } } diff --git a/rust/fury/src/meta/string_util.rs b/rust/fury/src/meta/string_util.rs index de30c6e489..ea8659110b 100644 --- a/rust/fury/src/meta/string_util.rs +++ b/rust/fury/src/meta/string_util.rs @@ -51,7 +51,7 @@ unsafe fn is_latin_avx(s: &str) -> bool { } } for item in bytes.iter().take(len).skip(range_end) { - if item.is_ascii() { + if !item.is_ascii() { return false; } } @@ -74,7 +74,7 @@ unsafe fn is_latin_sse(s: &str) -> bool { } } for item in bytes.iter().take(len).skip(range_end) { - if item.is_ascii() { + if !item.is_ascii() { return false; } } @@ -97,7 +97,7 @@ unsafe fn is_latin_neon(s: &str) -> bool { } } for item in bytes.iter().take(len).skip(range_end) { - if item.is_ascii() { + if !item.is_ascii() { return false; } }