Skip to content

Commit e836b85

Browse files
committed
[deps] Update is_utf8 to version 1.3.2
1 parent 8f34cc8 commit e836b85

File tree

2 files changed

+49
-24
lines changed

2 files changed

+49
-24
lines changed

deps/is_utf8/CMakeLists.txt

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.15)
33
project(is_utf8
44
DESCRIPTION "Fast UTF-8 Validation"
55
LANGUAGES CXX
6-
VERSION 1.3.1
6+
VERSION 1.3.2
77
)
88

99
include(GNUInstallDirs)
@@ -20,12 +20,16 @@ if (NOT CMAKE_BUILD_TYPE)
2020
endif()
2121
endif()
2222

23-
set(CMAKE_CXX_STANDARD 14)
23+
# We compile tools, tests, etc. with C++ 11. Override yourself if you need on a
24+
# target.
25+
set(IS_UTF8_CXX_STANDARD 11 CACHE STRING "the C++ standard to use for is_utf8")
26+
27+
set(CMAKE_CXX_STANDARD ${IS_UTF8_CXX_STANDARD})
2428
set(CMAKE_CXX_STANDARD_REQUIRED ON)
2529
set(CMAKE_CXX_EXTENSIONS OFF)
2630
set(CMAKE_MACOSX_RPATH OFF)
2731

28-
set(IS_UTF8_LIB_VERSION "1.3.1" CACHE STRING "is_utf8 library version")
32+
set(IS_UTF8_LIB_VERSION "1.3.2" CACHE STRING "is_utf8 library version")
2933
set(IS_UTF8_LIB_SOVERSION "1" CACHE STRING "is_utf8 library soversion")
3034

3135
set(IS_UTF8_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src)
@@ -40,6 +44,8 @@ endif(BUILD_TESTING)
4044

4145

4246
add_subdirectory(benchmarks)
47+
48+
message(STATUS "Compiling using the C++ standard:" ${CMAKE_CXX_STANDARD})
4349
# ---- Install rules ----
4450
add_library(is_utf8::is_utf8 ALIAS is_utf8)
4551

deps/is_utf8/src/is_utf8.cpp

Lines changed: 40 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -872,8 +872,13 @@ template <typename T> std::string toBinaryString(T b) {
872872
#ifndef IS_UTF8_IMPLEMENTATION_ARM64
873873
#define IS_UTF8_IMPLEMENTATION_ARM64 (IS_UTF8_IS_ARM64)
874874
#endif
875-
#define IS_UTF8_CAN_ALWAYS_RUN_ARM64 \
876-
IS_UTF8_IMPLEMENTATION_ARM64 &&IS_UTF8_IS_ARM64
875+
876+
#if IS_UTF8_IMPLEMENTATION_ARM64 &&IS_UTF8_IS_ARM64
877+
#define IS_UTF8_CAN_ALWAYS_RUN_ARM64 1
878+
#else
879+
#define IS_UTF8_CAN_ALWAYS_RUN_ARM64 0
880+
#endif
881+
877882
878883
#if IS_UTF8_IMPLEMENTATION_ARM64
879884
@@ -1116,8 +1121,9 @@ template <typename T, typename Mask = simd8<bool>> struct base_u8 {
11161121
return *this_cast;
11171122
}
11181123
1119-
is_utf8_really_inline Mask operator==(const simd8<T> other) const {
1120-
return vceqq_u8(*this, other);
1124+
friend is_utf8_really_inline Mask operator==(const simd8<T> lhs,
1125+
const simd8<T> rhs) {
1126+
return vceqq_u8(lhs, rhs);
11211127
}
11221128
11231129
template <int N = 1>
@@ -2172,7 +2178,7 @@ namespace icelake {} // namespace icelake
21722178
// We should not get warnings while including <x86intrin.h> yet we do
21732179
// under some versions of GCC.
21742180
// If the x86intrin.h header has uninitialized values that are problematic,
2175-
// it is a GCC issue, we want to ignore these warnigns.
2181+
// it is a GCC issue, we want to ignore these warnings.
21762182
IS_UTF8_DISABLE_GCC_WARNING(-Wuninitialized)
21772183
#endif
21782184
@@ -2342,8 +2348,11 @@ IS_UTF8_POP_DISABLE_WARNINGS
23422348
#endif
23432349
// To see why (__BMI__) && (__PCLMUL__) && (__LZCNT__) are not part of this
23442350
// next line, see https://github.com/simdutf/simdutf/issues/1247
2345-
#define IS_UTF8_CAN_ALWAYS_RUN_HASWELL \
2346-
((IS_UTF8_IMPLEMENTATION_HASWELL) && (IS_UTF8_IS_X86_64) && (__AVX2__))
2351+
#if ((IS_UTF8_IMPLEMENTATION_HASWELL) && (IS_UTF8_IS_X86_64) && (__AVX2__))
2352+
#define IS_UTF8_CAN_ALWAYS_RUN_HASWELL 1
2353+
#else
2354+
#define IS_UTF8_CAN_ALWAYS_RUN_HASWELL 0
2355+
#endif
23472356

23482357
#if IS_UTF8_IMPLEMENTATION_HASWELL
23492358

@@ -2398,7 +2407,7 @@ class implementation final : public is_utf8_internals::implementation {
23982407
// We should not get warnings while including <x86intrin.h> yet we do
23992408
// under some versions of GCC.
24002409
// If the x86intrin.h header has uninitialized values that are problematic,
2401-
// it is a GCC issue, we want to ignore these warnigns.
2410+
// it is a GCC issue, we want to ignore these warnings.
24022411
IS_UTF8_DISABLE_GCC_WARNING(-Wuninitialized)
24032412
#endif
24042413

@@ -2539,8 +2548,9 @@ struct base8 : base<simd8<T>> {
25392548
is_utf8_really_inline T last() const {
25402549
return _mm256_extract_epi8(*this, 31);
25412550
}
2542-
is_utf8_really_inline Mask operator==(const simd8<T> other) const {
2543-
return _mm256_cmpeq_epi8(*this, other);
2551+
friend is_utf8_really_inline Mask operator==(const simd8<T> lhs,
2552+
const simd8<T> rhs) {
2553+
return _mm256_cmpeq_epi8(lhs, rhs);
25442554
}
25452555

25462556
static const int SIZE = sizeof(base<T>::value);
@@ -2965,8 +2975,9 @@ struct base16 : base<simd16<T>> {
29652975
is_utf8_really_inline base16(const Pointer *ptr)
29662976
: base16(_mm256_loadu_si256(reinterpret_cast<const __m256i *>(ptr))) {}
29672977

2968-
is_utf8_really_inline Mask operator==(const simd16<T> other) const {
2969-
return _mm256_cmpeq_epi16(*this, other);
2978+
friend is_utf8_really_inline Mask operator==(const simd16<T> lhs,
2979+
const simd16<T> rhs) {
2980+
return _mm256_cmpeq_epi16(lhs, rhs);
29702981
}
29712982

29722983
/// the size of vector in bytes
@@ -3340,9 +3351,11 @@ IS_UTF8_UNTARGET_REGION
33403351

33413352
#endif
33423353

3343-
#define IS_UTF8_CAN_ALWAYS_RUN_WESTMERE \
3344-
(IS_UTF8_IMPLEMENTATION_WESTMERE && IS_UTF8_IS_X86_64 && __SSE4_2__ && \
3345-
__PCLMUL__)
3354+
#if IS_UTF8_IMPLEMENTATION_WESTMERE && IS_UTF8_IS_X86_64 && __SSE4_2__ && __PCLMUL__
3355+
#define IS_UTF8_CAN_ALWAYS_RUN_WESTMERE 1
3356+
#else
3357+
#define IS_UTF8_CAN_ALWAYS_RUN_WESTMERE 0
3358+
#endif
33463359

33473360
#if IS_UTF8_IMPLEMENTATION_WESTMERE
33483361

@@ -3395,7 +3408,7 @@ class implementation final : public is_utf8_internals::implementation {
33953408
// We should not get warnings while including <x86intrin.h> yet we do
33963409
// under some versions of GCC.
33973410
// If the x86intrin.h header has uninitialized values that are problematic,
3398-
// it is a GCC issue, we want to ignore these warnigns.
3411+
// it is a GCC issue, we want to ignore these warnings.
33993412
IS_UTF8_DISABLE_GCC_WARNING(-Wuninitialized)
34003413
#endif
34013414

@@ -3517,8 +3530,9 @@ struct base8 : base<simd8<T>> {
35173530
is_utf8_really_inline base8() : base<simd8<T>>() {}
35183531
is_utf8_really_inline base8(const __m128i _value) : base<simd8<T>>(_value) {}
35193532

3520-
is_utf8_really_inline Mask operator==(const simd8<T> other) const {
3521-
return _mm_cmpeq_epi8(*this, other);
3533+
friend is_utf8_really_inline Mask operator==(const simd8<T> lhs,
3534+
const simd8<T> rhs) {
3535+
return _mm_cmpeq_epi8(lhs, rhs);
35223536
}
35233537

35243538
static const int SIZE = sizeof(base<simd8<T>>::value);
@@ -4032,8 +4046,9 @@ struct base16 : base<simd16<T>> {
40324046
is_utf8_really_inline base16(const Pointer *ptr)
40334047
: base16(_mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr))) {}
40344048

4035-
is_utf8_really_inline Mask operator==(const simd16<T> other) const {
4036-
return _mm_cmpeq_epi16(*this, other);
4049+
friend is_utf8_really_inline Mask operator==(const simd16<T> lhs,
4050+
const simd16<T> rhs) {
4051+
return _mm_cmpeq_epi16(lhs, rhs);
40374052
}
40384053

40394054
static const int SIZE = sizeof(base<simd16<T>>::value);
@@ -4407,7 +4422,11 @@ IS_UTF8_UNTARGET_REGION
44074422
#endif
44084423
#endif
44094424

4410-
#define IS_UTF8_CAN_ALWAYS_RUN_FALLBACK (IS_UTF8_IMPLEMENTATION_FALLBACK)
4425+
#if IS_UTF8_IMPLEMENTATION_FALLBACK
4426+
#define IS_UTF8_CAN_ALWAYS_RUN_FALLBACK 1
4427+
#else
4428+
#define IS_UTF8_CAN_ALWAYS_RUN_FALLBACK 0
4429+
#endif
44114430

44124431
#if IS_UTF8_IMPLEMENTATION_FALLBACK
44134432

0 commit comments

Comments
 (0)