From f3d48dfd65c5127d03d776ec36eafedd4fed76dc Mon Sep 17 00:00:00 2001 From: wumeibanfa <904360463@qq.com> Date: Mon, 27 Oct 2025 20:18:48 +0800 Subject: [PATCH 1/2] 1 --- be/src/runtime/primitive_type.h | 14 +-- be/src/vec/columns/column_varbinary.cpp | 6 +- be/src/vec/columns/column_varbinary.h | 18 ++-- .../{string_view.cpp => string_container.cpp} | 6 +- .../{string_view.h => string_container.h} | 40 ++++----- be/src/vec/core/field.h | 8 +- be/src/vec/data_types/data_type_varbinary.cpp | 6 +- be/src/vec/data_types/data_type_varbinary.h | 6 +- be/src/vec/functions/function_varbinary.cpp | 8 +- be/src/vec/utils/varbinaryop_subbinary.h | 8 +- be/test/vec/columns/column_varbinary_test.cpp | 48 +++++------ ...iew_test.cpp => string_container_test.cpp} | 86 +++++++++---------- .../data_types/data_type_varbinary_test.cpp | 8 +- be/test/vec/function/function_test_util.h | 6 +- 14 files changed, 134 insertions(+), 134 deletions(-) rename be/src/vec/common/{string_view.cpp => string_container.cpp} (92%) rename be/src/vec/common/{string_view.h => string_container.h} (80%) rename be/test/vec/common/{string_view_test.cpp => string_container_test.cpp} (78%) diff --git a/be/src/runtime/primitive_type.h b/be/src/runtime/primitive_type.h index 6dd0fed82be38f..d89fa1a0f160d8 100644 --- a/be/src/runtime/primitive_type.h +++ b/be/src/runtime/primitive_type.h @@ -28,7 +28,7 @@ #include "olap/decimal12.h" #include "olap/uint24.h" #include "runtime/define_primitive_type.h" -#include "vec/common/string_view.h" +#include "vec/common/string_container.h" #include "vec/core/field.h" #include "vec/core/types.h" #include "vec/runtime/vdatetime_value.h" @@ -634,15 +634,15 @@ struct PrimitiveTypeTraits { }; template <> struct PrimitiveTypeTraits { - using CppType = doris::StringView; + using CppType = doris::StringContainer; using StorageFieldType = CppType; - using CppNativeType = doris::StringView; - using ColumnItemType = doris::StringView; + using CppNativeType = doris::StringContainer; + using ColumnItemType = doris::StringContainer; using DataType = vectorized::DataTypeVarbinary; using ColumnType = vectorized::ColumnVarbinary; - using NearestFieldType = doris::StringView; - using AvgNearestFieldType = doris::StringView; - using AvgNearestFieldType256 = doris::StringView; + using NearestFieldType = doris::StringContainer; + using AvgNearestFieldType = doris::StringContainer; + using AvgNearestFieldType256 = doris::StringContainer; static constexpr PrimitiveType NearestPrimitiveType = TYPE_VARBINARY; static constexpr PrimitiveType AvgNearestPrimitiveType = TYPE_VARBINARY; static constexpr PrimitiveType AvgNearestPrimitiveType256 = TYPE_VARBINARY; diff --git a/be/src/vec/columns/column_varbinary.cpp b/be/src/vec/columns/column_varbinary.cpp index 00698921e60205..51b517d483af5b 100644 --- a/be/src/vec/columns/column_varbinary.cpp +++ b/be/src/vec/columns/column_varbinary.cpp @@ -115,7 +115,7 @@ size_t ColumnVarbinary::filter(const IColumn::Filter& filter) { } else { auto val = src_vec.get_data()[i]; const auto* dst = _arena.insert(val.data(), val.size()); - _data[pos] = doris::StringView(dst, val.size()); + _data[pos] = doris::StringContainer(dst, val.size()); } pos++; } @@ -144,7 +144,7 @@ MutableColumnPtr ColumnVarbinary::permute(const IColumn::Permutation& perm, size continue; } const auto* dst = const_cast(_arena).insert(val.data(), val.size()); - res_data[i] = doris::StringView(dst, val.size()); + res_data[i] = doris::StringContainer(dst, val.size()); } return res; @@ -159,7 +159,7 @@ void ColumnVarbinary::replace_column_data(const IColumn& rhs, size_t row, size_t return; } const auto* dst = _arena.insert(val.data(), val.size()); - _data[self_row] = doris::StringView(dst, val.size()); + _data[self_row] = doris::StringContainer(dst, val.size()); } #include "common/compile_check_end.h" diff --git a/be/src/vec/columns/column_varbinary.h b/be/src/vec/columns/column_varbinary.h index 6411eb26ec0850..4aec2ba17dcff0 100644 --- a/be/src/vec/columns/column_varbinary.h +++ b/be/src/vec/columns/column_varbinary.h @@ -26,7 +26,7 @@ #include "vec/columns/column.h" #include "vec/common/arena.h" #include "vec/common/assert_cast.h" -#include "vec/common/string_view.h" +#include "vec/common/string_container.h" namespace doris::vectorized { #include "common/compile_check_begin.h" @@ -36,7 +36,7 @@ class ColumnVarbinary final : public COWHelper { friend class COWHelper; public: - using Container = PaddedPODArray; + using Container = PaddedPODArray; ColumnVarbinary() = default; ColumnVarbinary(const size_t n) : _data(n) {} @@ -73,7 +73,7 @@ class ColumnVarbinary final : public COWHelper { char* alloc(size_t length) { return _arena.alloc(length); } void insert(const Field& x) override { - auto value = vectorized::get(x); + auto value = vectorized::get(x); insert_data(value.data(), value.size()); } @@ -84,7 +84,7 @@ class ColumnVarbinary final : public COWHelper { } void insert_data(const char* pos, size_t length) override { - if (length <= doris::StringView::kInlineSize) { + if (length <= doris::StringContainer::kInlineSize) { insert_inline_data(pos, length); } else { insert_to_buffer(pos, length); @@ -92,16 +92,16 @@ class ColumnVarbinary final : public COWHelper { } void insert_inline_data(const char* pos, size_t length) { - DCHECK(length <= doris::StringView::kInlineSize); - _data.push_back(doris::StringView(pos, cast_set(length))); + DCHECK(length <= doris::StringContainer::kInlineSize); + _data.push_back(doris::StringContainer(pos, cast_set(length))); } void insert_to_buffer(const char* pos, size_t length) { const char* dst = _arena.insert(pos, length); - _data.push_back(doris::StringView(dst, cast_set(length))); + _data.push_back(doris::StringContainer(dst, cast_set(length))); } - void insert_default() override { _data.push_back(doris::StringView()); } + void insert_default() override { _data.push_back(doris::StringContainer()); } int compare_at(size_t n, size_t m, const IColumn& rhs_, int /*nan_direction_hint*/) const override { @@ -131,7 +131,7 @@ class ColumnVarbinary final : public COWHelper { size_t allocated_bytes() const override { return _data.allocated_bytes() + _arena.size(); } size_t byte_size() const override { - size_t bytes = _data.size() * sizeof(doris::StringView); + size_t bytes = _data.size() * sizeof(doris::StringContainer); return bytes + _arena.used_size(); } diff --git a/be/src/vec/common/string_view.cpp b/be/src/vec/common/string_container.cpp similarity index 92% rename from be/src/vec/common/string_view.cpp rename to be/src/vec/common/string_container.cpp index 657e4585f22e54..804359f945d103 100644 --- a/be/src/vec/common/string_view.cpp +++ b/be/src/vec/common/string_container.cpp @@ -15,11 +15,11 @@ // specific language governing permissions and limitations // under the License. -#include "vec/common/string_view.h" +#include "vec/common/string_container.h" namespace doris { -bool StringView::operator==(const StringView& other) const { +bool StringContainer::operator==(const StringContainer& other) const { // Compare lengths and first 4 characters. if (size_and_prefix_as_int64() != other.size_and_prefix_as_int64()) { return false; @@ -34,7 +34,7 @@ bool StringView::operator==(const StringView& other) const { return memcmp(value_.data + kPrefixSize, other.value_.data + kPrefixSize, size_ - kPrefixSize) == 0; } -int32_t StringView::compare(const StringView& other) const { +int32_t StringContainer::compare(const StringContainer& other) const { if (prefix_as_int() != other.prefix_as_int()) { // The result is decided on prefix. The shorter will be less because the // prefix is padded with zeros. diff --git a/be/src/vec/common/string_view.h b/be/src/vec/common/string_container.h similarity index 80% rename from be/src/vec/common/string_view.h rename to be/src/vec/common/string_container.h index 5cd560aad4a871..72f52366c64d26 100644 --- a/be/src/vec/common/string_view.h +++ b/be/src/vec/common/string_container.h @@ -35,22 +35,22 @@ namespace doris { // exposes a subset of the interface. If the string is 12 characters // or less, it is inlined and no reference is held. If it is longer, a // reference to the string is held and the 4 first characters are -// cached in the StringView. This allows failing comparisons early and +// cached in the StringContainer. This allows failing comparisons early and // reduces the CPU cache working set when dealing with short strings. -class StringView { +class StringContainer { #include "common/compile_check_begin.h" public: using value_type = char; static constexpr size_t kPrefixSize = 4 * sizeof(char); static constexpr size_t kInlineSize = 12; - StringView() { - static_assert(sizeof(StringView) == 16); - memset(this, 0, sizeof(StringView)); + StringContainer() { + static_assert(sizeof(StringContainer) == 16); + memset(this, 0, sizeof(StringContainer)); } - StringView(const char* data, uint32_t len) : size_(len) { + StringContainer(const char* data, uint32_t len) : size_(len) { DCHECK_GE(len, 0); DCHECK(data || len == 0); if (isInline()) { @@ -72,20 +72,20 @@ class StringView { } } - StringView(unsigned char* data, uint32_t len) - : StringView(reinterpret_cast(data), len) {} + StringContainer(unsigned char* data, uint32_t len) + : StringContainer(reinterpret_cast(data), len) {} bool isInline() const { return isInline(size_); } ALWAYS_INLINE static constexpr bool isInline(uint32_t size) { return size <= kInlineSize; } - explicit StringView(std::string&& value) = delete; - explicit StringView(const std::string& value) - : StringView(value.data(), cast_set(value.size())) {} - explicit StringView(std::string_view value) - : StringView(value.data(), cast_set(value.size())) {} - /* implicit */ StringView(const char* data) - : StringView(data, cast_set(strlen(data))) {} + explicit StringContainer(std::string&& value) = delete; + explicit StringContainer(const std::string& value) + : StringContainer(value.data(), cast_set(value.size())) {} + explicit StringContainer(std::string_view value) + : StringContainer(value.data(), cast_set(value.size())) {} + /* implicit */ StringContainer(const char* data) + : StringContainer(data, cast_set(strlen(data))) {} doris::StringRef to_string_ref() const { return {data(), size()}; } operator std::string_view() && = delete; @@ -101,12 +101,12 @@ class StringView { void set_size(uint32_t size) { size_ = size; } - bool operator==(const StringView& other) const; - friend std::ostream& operator<<(std::ostream& os, const StringView& stringView) { - os.write(stringView.data(), stringView.size()); + bool operator==(const StringContainer& other) const; + friend std::ostream& operator<<(std::ostream& os, const StringContainer& StringContainer) { + os.write(StringContainer.data(), StringContainer.size()); return os; } - auto operator<=>(const StringView& other) const { + auto operator<=>(const StringContainer& other) const { const auto cmp = compare(other); return cmp < 0 ? std::strong_ordering::less : cmp > 0 ? std::strong_ordering::greater @@ -116,7 +116,7 @@ class StringView { // Returns 0, if this == other // < 0, if this < other // > 0, if this > other - int32_t compare(const StringView& other) const; + int32_t compare(const StringContainer& other) const; const char* begin() && = delete; const char* begin() const& { return data(); } diff --git a/be/src/vec/core/field.h b/be/src/vec/core/field.h index 1131ecfc4f7844..fac05715e8d7dd 100644 --- a/be/src/vec/core/field.h +++ b/be/src/vec/core/field.h @@ -38,7 +38,7 @@ #include "olap/hll.h" #include "util/bitmap_value.h" #include "util/quantile_state.h" -#include "vec/common/string_view.h" +#include "vec/common/string_container.h" #include "vec/common/uint128.h" #include "vec/core/types.h" #include "vec/json/path_in_data.h" @@ -388,7 +388,7 @@ class Field { case PrimitiveType::TYPE_VARCHAR: return get() <=> rhs.get(); case PrimitiveType::TYPE_VARBINARY: - return get() <=> rhs.get(); + return get() <=> rhs.get(); case PrimitiveType::TYPE_DECIMAL32: return get() <=> rhs.get(); case PrimitiveType::TYPE_DECIMAL64: @@ -436,7 +436,7 @@ class Field { f(field.template get()); return; case PrimitiveType::TYPE_VARBINARY: - f(field.template get()); + f(field.template get()); return; case PrimitiveType::TYPE_JSONB: f(field.template get()); @@ -490,7 +490,7 @@ class Field { DBMS_MIN_FIELD_SIZE - sizeof(PrimitiveType), Null, UInt64, UInt128, Int64, Int128, IPv6, Float64, String, JsonbField, Array, Tuple, Map, VariantMap, DecimalField, DecimalField, DecimalField, DecimalField, - DecimalField, BitmapValue, HyperLogLog, QuantileState, doris::StringView> + DecimalField, BitmapValue, HyperLogLog, QuantileState, doris::StringContainer> storage; PrimitiveType type; diff --git a/be/src/vec/data_types/data_type_varbinary.cpp b/be/src/vec/data_types/data_type_varbinary.cpp index 440b79f112e090..94efc7b10afb65 100644 --- a/be/src/vec/data_types/data_type_varbinary.cpp +++ b/be/src/vec/data_types/data_type_varbinary.cpp @@ -35,7 +35,7 @@ #include "vec/common/assert_cast.h" #include "vec/common/string_buffer.hpp" #include "vec/common/string_ref.h" -#include "vec/common/string_view.h" +#include "vec/common/string_container.h" #include "vec/core/field.h" #include "vec/core/types.h" @@ -43,7 +43,7 @@ namespace doris::vectorized { #include "common/compile_check_begin.h" Field DataTypeVarbinary::get_default() const { - return Field::create_field(StringView()); + return Field::create_field(StringContainer()); } MutableColumnPtr DataTypeVarbinary::create_column() const { @@ -132,7 +132,7 @@ FieldWithDataType DataTypeVarbinary::get_field_with_data_type(const IColumn& col const auto& column_data = assert_cast(column); return FieldWithDataType {.field = Field::create_field( - doris::StringView(column_data.get_data_at(row_num))), + doris::StringContainer(column_data.get_data_at(row_num))), .base_scalar_type_id = get_primitive_type()}; } diff --git a/be/src/vec/data_types/data_type_varbinary.h b/be/src/vec/data_types/data_type_varbinary.h index fa13d19287d458..848ad502cf94b5 100644 --- a/be/src/vec/data_types/data_type_varbinary.h +++ b/be/src/vec/data_types/data_type_varbinary.h @@ -27,7 +27,7 @@ #include "runtime/define_primitive_type.h" #include "runtime/primitive_type.h" #include "serde/data_type_string_serde.h" -#include "vec/common/string_view.h" +#include "vec/common/string_container.h" #include "vec/core/field.h" #include "vec/data_types/data_type.h" #include "vec/data_types/serde/data_type_serde.h" @@ -40,7 +40,7 @@ class IColumn; class DataTypeVarbinary : public IDataType { public: using ColumnType = ColumnVarbinary; - using FieldType = doris::StringView; + using FieldType = doris::StringContainer; static constexpr PrimitiveType PType = TYPE_VARBINARY; @@ -72,7 +72,7 @@ class DataTypeVarbinary : public IDataType { Field get_field(const TExprNode& node) const override { DCHECK_EQ(node.node_type, TExprNodeType::VARBINARY_LITERAL); DCHECK(node.__isset.varbinary_literal); - return Field::create_field(doris::StringView(node.varbinary_literal.value)); + return Field::create_field(doris::StringContainer(node.varbinary_literal.value)); } FieldWithDataType get_field_with_data_type(const IColumn& column, diff --git a/be/src/vec/functions/function_varbinary.cpp b/be/src/vec/functions/function_varbinary.cpp index ec592c236bc14f..fadde6dfb39261 100644 --- a/be/src/vec/functions/function_varbinary.cpp +++ b/be/src/vec/functions/function_varbinary.cpp @@ -64,7 +64,7 @@ class FunctionToBinary : public IFunction { auto col_res = ColumnVarbinary::create(); const auto& data = col->get_chars(); const auto& offsets = col->get_offsets(); - col_res->get_data().assign(input_rows_count, StringView()); + col_res->get_data().assign(input_rows_count, StringContainer()); for (int i = 0; i < input_rows_count; ++i) { const auto* source = reinterpret_cast(&data[offsets[i - 1]]); @@ -155,7 +155,7 @@ struct VarbinaryLengthImpl { return {std::make_shared()}; } - static Status vector(const PaddedPODArray& data, + static Status vector(const PaddedPODArray& data, PaddedPODArray& res) { size_t rows_count = data.size(); res.resize(rows_count); @@ -174,7 +174,7 @@ struct ToBase64BinaryImpl { using ColumnType = ColumnString; static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_VARBINARY; - static Status vector(const PaddedPODArray& data, + static Status vector(const PaddedPODArray& data, ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets) { auto rows_count = data.size(); dst_offsets.resize(rows_count); @@ -221,7 +221,7 @@ struct FromBase64BinaryImpl { static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets, ColumnVarbinary* res, NullMap& null_map) { auto rows_count = offsets.size(); - res->get_data().assign(rows_count, StringView()); + res->get_data().assign(rows_count, StringContainer()); for (size_t i = 0; i < rows_count; i++) { const auto* source = reinterpret_cast(&data[offsets[i - 1]]); diff --git a/be/src/vec/utils/varbinaryop_subbinary.h b/be/src/vec/utils/varbinaryop_subbinary.h index ef4d0b1d20a2ad..2ffc394f168ec7 100644 --- a/be/src/vec/utils/varbinaryop_subbinary.h +++ b/be/src/vec/utils/varbinaryop_subbinary.h @@ -30,17 +30,17 @@ namespace doris::vectorized { constexpr auto SIZE_OF_UINT = sizeof(uint32_t); struct VarBinaryOP { - static void check_and_insert_data(doris::StringView& sView, const char* data, uint32_t len, + static void check_and_insert_data(doris::StringContainer& sView, const char* data, uint32_t len, bool before_is_inline) { if (before_is_inline) { sView.set_size(len); } else { - sView = doris::StringView(data, len); + sView = doris::StringContainer(data, len); } } static std::pair alloc(ColumnVarbinary* res_col, size_t index, uint32_t len) { - bool is_inline = StringView::isInline(len); + bool is_inline = StringContainer::isInline(len); char* dst = nullptr; if (is_inline) { dst = reinterpret_cast(&(res_col->get_data()[index])) + SIZE_OF_UINT; @@ -90,7 +90,7 @@ struct SubBinaryUtil { res->get_data().reserve(size); for (size_t i = 0; i < size; ++i) { - doris::StringView binary = binarys->get_data()[index_check_const(i)]; + doris::StringContainer binary = binarys->get_data()[index_check_const(i)]; int binary_size = static_cast(binary.size()); int start_value = start->get_data()[index_check_const(i)]; diff --git a/be/test/vec/columns/column_varbinary_test.cpp b/be/test/vec/columns/column_varbinary_test.cpp index f593c1d20017b2..08a79feebb27b5 100644 --- a/be/test/vec/columns/column_varbinary_test.cpp +++ b/be/test/vec/columns/column_varbinary_test.cpp @@ -33,7 +33,7 @@ #include "vec/columns/column_string.h" #include "vec/common/assert_cast.h" #include "vec/common/string_ref.h" -#include "vec/common/string_view.h" +#include "vec/common/string_container.h" #include "vec/core/types.h" namespace doris::vectorized { @@ -64,9 +64,9 @@ TEST_F(ColumnVarbinaryTest, BasicInsertGetPopClear) { EXPECT_EQ(col->get_name(), std::string("ColumnVarbinary")); EXPECT_EQ(col->size(), 0U); - const size_t inline_len = std::min(doris::StringView::kInlineSize, 8); + const size_t inline_len = std::min(doris::StringContainer::kInlineSize, 8); const std::string small = make_bytes(inline_len, 0x11); - const std::string big = make_bytes(doris::StringView::kInlineSize + 32, 0x22); + const std::string big = make_bytes(doris::StringContainer::kInlineSize + 32, 0x22); size_t before_bytes = col->byte_size(); @@ -78,7 +78,7 @@ TEST_F(ColumnVarbinaryTest, BasicInsertGetPopClear) { ASSERT_EQ(memcmp(r0.data, small.data(), small.size()), 0); size_t after_small_bytes = col->byte_size(); - ASSERT_EQ(after_small_bytes - before_bytes, sizeof(doris::StringView)); + ASSERT_EQ(after_small_bytes - before_bytes, sizeof(doris::StringContainer)); ASSERT_EQ(after_small_bytes - before_bytes, 16); col->insert_default(); @@ -94,11 +94,11 @@ TEST_F(ColumnVarbinaryTest, BasicInsertGetPopClear) { ASSERT_EQ(memcmp(r2.data, big.data(), big.size()), 0); size_t after_big_bytes = col->byte_size(); - // big insert adds one StringView slot + big payload in arena (Arena may add alignment/overhead) + // big insert adds one StringContainer slot + big payload in arena (Arena may add alignment/overhead) size_t diff = after_big_bytes - after_small_bytes; std::cout << "after_big_bytes: " << after_big_bytes << " after_small_bytes: " << after_small_bytes << " diff: " << diff << std::endl; - ASSERT_GE(diff, sizeof(doris::StringView) + big.size()); + ASSERT_GE(diff, sizeof(doris::StringContainer) + big.size()); // pop_back col->pop_back(1); @@ -115,7 +115,7 @@ TEST_F(ColumnVarbinaryTest, BasicInsertGetPopClear) { TEST_F(ColumnVarbinaryTest, InsertFromAndRanges) { auto src = ColumnVarbinary::create(); std::vector vals = {make_bytes(1, 0x01), make_bytes(2, 0x02), - make_bytes(doris::StringView::kInlineSize + 5, 0x03), + make_bytes(doris::StringContainer::kInlineSize + 5, 0x03), make_bytes(0, 0x00), make_bytes(7, 0x05)}; for (auto& v : vals) { src->insert_data(v.data(), v.size()); @@ -161,11 +161,11 @@ TEST_F(ColumnVarbinaryTest, FilterBothModes) { // Mix inline (small) and non-inline (large > kInlineSize) values std::vector vals = { make_bytes(1, 0x10), // inline - make_bytes(doris::StringView::kInlineSize + 5, 0x91), // non-inline (dropped) + make_bytes(doris::StringContainer::kInlineSize + 5, 0x91), // non-inline (dropped) make_bytes(3, 0x12), // inline - make_bytes(doris::StringView::kInlineSize + 7, 0x92), // non-inline + make_bytes(doris::StringContainer::kInlineSize + 7, 0x92), // non-inline make_bytes(0, 0x00), // empty (dropped) - make_bytes(doris::StringView::kInlineSize + 9, 0x93) // non-inline + make_bytes(doris::StringContainer::kInlineSize + 9, 0x93) // non-inline }; for (auto& v : vals) { col->insert_data(v.data(), v.size()); @@ -206,9 +206,9 @@ TEST_F(ColumnVarbinaryTest, Permute) { // Include large (non-inline) entries to exercise arena path std::vector vals = { make_bytes(1, 0x20), // inline - make_bytes(doris::StringView::kInlineSize + 3, 0xA0), // non-inline + make_bytes(doris::StringContainer::kInlineSize + 3, 0xA0), // non-inline make_bytes(3, 0x22), // inline - make_bytes(doris::StringView::kInlineSize + 8, 0xA1) // non-inline + make_bytes(doris::StringContainer::kInlineSize + 8, 0xA1) // non-inline }; for (auto& v : vals) { col->insert_data(v.data(), v.size()); @@ -242,7 +242,7 @@ TEST_F(ColumnVarbinaryTest, Permute) { TEST_F(ColumnVarbinaryTest, CloneResized) { auto col = ColumnVarbinary::create(); std::vector vals = {make_bytes(1, 0x30), make_bytes(0, 0x00), - make_bytes(doris::StringView::kInlineSize + 1, 0x31)}; + make_bytes(doris::StringContainer::kInlineSize + 1, 0x31)}; for (auto& v : vals) { col->insert_data(v.data(), v.size()); } @@ -277,7 +277,7 @@ TEST_F(ColumnVarbinaryTest, ReplaceColumnData) { // mix inline and non-inline std::vector vals = { make_bytes(2, 0x40), // inline - make_bytes(doris::StringView::kInlineSize + 4, 0xB0), // non-inline + make_bytes(doris::StringContainer::kInlineSize + 4, 0xB0), // non-inline make_bytes(4, 0x42) // inline }; for (auto& v : vals) { @@ -286,7 +286,7 @@ TEST_F(ColumnVarbinaryTest, ReplaceColumnData) { auto rhs = ColumnVarbinary::create(); std::vector rhs_vals = { - make_bytes(doris::StringView::kInlineSize + 7, 0xC0), // non-inline + make_bytes(doris::StringContainer::kInlineSize + 7, 0xC0), // non-inline make_bytes(1, 0x51) // inline }; for (auto& v : rhs_vals) { @@ -308,7 +308,7 @@ TEST_F(ColumnVarbinaryTest, ReplaceColumnData) { TEST_F(ColumnVarbinaryTest, SerializeDeserializeRoundtripManual) { auto col = ColumnVarbinary::create(); - std::string v = make_bytes(doris::StringView::kInlineSize + 17, 0x60); + std::string v = make_bytes(doris::StringContainer::kInlineSize + 17, 0x60); std::vector buf; auto len = static_cast(v.size()); @@ -339,7 +339,7 @@ TEST_F(ColumnVarbinaryTest, FieldAccessOperatorAndGet) { auto col = ColumnVarbinary::create(); std::vector vals = { make_bytes(1, 0x11), make_bytes(0, 0x00), - make_bytes(doris::StringView::kInlineSize + 6, 0x12)}; // include non-inline + make_bytes(doris::StringContainer::kInlineSize + 6, 0x12)}; // include non-inline for (auto& v : vals) { col->insert_data(v.data(), v.size()); } @@ -347,13 +347,13 @@ TEST_F(ColumnVarbinaryTest, FieldAccessOperatorAndGet) { for (size_t i = 0; i < vals.size(); ++i) { // operator[] Field f = (*col)[i]; - auto sv = vectorized::get(f); + auto sv = vectorized::get(f); ASSERT_EQ(sv.size(), vals[i].size()); ASSERT_EQ(memcmp(sv.data(), vals[i].data(), sv.size()), 0); // get(size_t, Field&) Field f2; col->get(i, f2); - auto sv2 = vectorized::get(f2); + auto sv2 = vectorized::get(f2); ASSERT_EQ(sv2.size(), vals[i].size()); ASSERT_EQ(memcmp(sv2.data(), vals[i].data(), sv2.size()), 0); } @@ -363,12 +363,12 @@ TEST_F(ColumnVarbinaryTest, InsertField) { auto col = ColumnVarbinary::create(); // prepare inline and non-inline fields std::string inline_v = make_bytes(2, 0x21); - std::string big_v = make_bytes(doris::StringView::kInlineSize + 10, 0x22); + std::string big_v = make_bytes(doris::StringContainer::kInlineSize + 10, 0x22); Field f_inline = Field::create_field( - doris::StringView(inline_v.data(), inline_v.size())); + doris::StringContainer(inline_v.data(), inline_v.size())); Field f_big = - Field::create_field(doris::StringView(big_v.data(), big_v.size())); + Field::create_field(doris::StringContainer(big_v.data(), big_v.size())); col->insert(f_inline); col->insert(f_big); @@ -385,7 +385,7 @@ TEST_F(ColumnVarbinaryTest, InsertField) { TEST_F(ColumnVarbinaryTest, SerializeValueIntoArenaAndImpl) { auto col = ColumnVarbinary::create(); std::string small = make_bytes(3, 0x31); // inline - std::string big = make_bytes(doris::StringView::kInlineSize + 12, 0x32); // non-inline + std::string big = make_bytes(doris::StringContainer::kInlineSize + 12, 0x32); // non-inline col->insert_data(small.data(), small.size()); col->insert_data(big.data(), big.size()); @@ -425,7 +425,7 @@ TEST_F(ColumnVarbinaryTest, AllocatedBytesAndHasEnoughCapacity) { } // Force some non-inline values to ensure arena usage for (int i = 0; i < 3; ++i) { - auto big = make_bytes(doris::StringView::kInlineSize + 20 + i, 0x90 + i); + auto big = make_bytes(doris::StringContainer::kInlineSize + 20 + i, 0x90 + i); dest->insert_data(big.data(), big.size()); } // Capture capacity & size diff --git a/be/test/vec/common/string_view_test.cpp b/be/test/vec/common/string_container_test.cpp similarity index 78% rename from be/test/vec/common/string_view_test.cpp rename to be/test/vec/common/string_container_test.cpp index 4bfd8c25ea6664..eb96a0e58e4e32 100644 --- a/be/test/vec/common/string_view_test.cpp +++ b/be/test/vec/common/string_container_test.cpp @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include "vec/common/string_view.h" +#include "vec/common/string_container.h" #include @@ -29,7 +29,7 @@ namespace doris { -class StringViewTest : public ::testing::Test {}; +class StringContainerTest : public ::testing::Test {}; static std::string make_bytes(size_t n, uint8_t seed = 0x30) { std::string s; @@ -44,68 +44,68 @@ static std::string make_bytes(size_t n, uint8_t seed = 0x30) { return s; } -TEST_F(StringViewTest, EmptyAndBasics) { - StringView sv; +TEST_F(StringContainerTest, EmptyAndBasics) { + StringContainer sv; EXPECT_TRUE(sv.empty()); EXPECT_EQ(sv.size(), 0U); EXPECT_TRUE(sv.isInline()); - StringView a("abc"); + StringContainer a("abc"); EXPECT_FALSE(a.empty()); EXPECT_EQ(a.size(), 3U); EXPECT_TRUE(a.isInline()); EXPECT_EQ(std::string(a), std::string("abc")); std::string s12(12, 'x'); - StringView b(s12); + StringContainer b(s12); EXPECT_TRUE(b.isInline()); EXPECT_EQ(b.size(), 12U); std::string s13(13, 'y'); - StringView c(s13); + StringContainer c(s13); EXPECT_FALSE(c.isInline()); EXPECT_EQ(c.size(), 13U); } -TEST_F(StringViewTest, DataPointerInlineVsOutline) { +TEST_F(StringContainerTest, DataPointerInlineVsOutline) { std::string small = "hello"; - StringView si(small); + StringContainer si(small); EXPECT_TRUE(si.isInline()); EXPECT_NE(si.data(), small.data()); // inline stores its own bytes std::string big = make_bytes(16); - StringView so(big); + StringContainer so(big); EXPECT_FALSE(so.isInline()); EXPECT_EQ(so.data(), big.data()); // outline holds external pointer } -TEST_F(StringViewTest, EqualityAndCompare) { - StringView a("abcd"); - StringView b("abcd"); +TEST_F(StringContainerTest, EqualityAndCompare) { + StringContainer a("abcd"); + StringContainer b("abcd"); EXPECT_TRUE(a == b); EXPECT_EQ(a.compare(b), 0); - StringView c("abce"); + StringContainer c("abce"); EXPECT_FALSE(a == c); EXPECT_LT(a.compare(c), 0); // 'd' < 'e' // different length, same prefix - StringView d("ab"); - StringView e("abc"); + StringContainer d("ab"); + StringContainer e("abc"); EXPECT_LT(d.compare(e), 0); EXPECT_GT(e.compare(d), 0); // same first 4 bytes, differ later (exercise non-prefix compare path) std::string s1 = std::string("abcd") + std::string("XXXX"); std::string s2 = std::string("abcd") + std::string("YYYY"); - StringView x(s1); - StringView y(s2); + StringContainer x(s1); + StringContainer y(s2); EXPECT_NE(x.compare(y), 0); } -TEST_F(StringViewTest, EmbeddedNulls) { +TEST_F(StringContainerTest, EmbeddedNulls) { std::string raw = std::string("ab\0cd\0ef", 8); - StringView sv(raw); + StringContainer sv(raw); EXPECT_EQ(sv.size(), 8U); // string conversion preserves bytes std::string s = static_cast(sv); @@ -113,14 +113,14 @@ TEST_F(StringViewTest, EmbeddedNulls) { EXPECT_EQ(::memcmp(s.data(), raw.data(), 8), 0); // equality with same content containing nulls - StringView sv2(raw); + StringContainer sv2(raw); EXPECT_TRUE(sv == sv2); EXPECT_EQ(sv.compare(sv2), 0); } -TEST_F(StringViewTest, ConversionsAndIteration) { +TEST_F(StringContainerTest, ConversionsAndIteration) { std::string src = make_bytes(10); - const StringView sv(src); + const StringContainer sv(src); // to_string_ref auto ref = sv.to_string_ref(); @@ -138,9 +138,9 @@ TEST_F(StringViewTest, ConversionsAndIteration) { EXPECT_EQ(::memcmp(via_iter.data(), sv.data(), sv.size()), 0); } -TEST_F(StringViewTest, OstreamWrite) { +TEST_F(StringContainerTest, OstreamWrite) { std::string raw = std::string("12\0\0", 4); - StringView sv(raw); + StringContainer sv(raw); std::ostringstream oss; oss << sv; // write() respects size; embedded nulls are preserved std::string out = oss.str(); @@ -148,12 +148,12 @@ TEST_F(StringViewTest, OstreamWrite) { EXPECT_EQ(::memcmp(out.data(), raw.data(), raw.size()), 0); } -TEST_F(StringViewTest, NonInlineEqualityAndCompare) { +TEST_F(StringContainerTest, NonInlineEqualityAndCompare) { // Create two large (> kInlineSize) equal strings std::string base_a = make_bytes(24, 0x41); // length 24 std::string base_b = base_a; // identical - StringView sva(base_a); - StringView svb(base_b); + StringContainer sva(base_a); + StringContainer svb(base_b); EXPECT_FALSE(sva.isInline()); EXPECT_FALSE(svb.isInline()); EXPECT_TRUE(sva == svb); @@ -164,8 +164,8 @@ TEST_F(StringViewTest, NonInlineEqualityAndCompare) { std::string diff1 = base_a; std::string diff2 = base_a; diff2[15] ^= 0x01; // change one byte after prefix region - StringView svd1(diff1); - StringView svd2(diff2); + StringContainer svd1(diff1); + StringContainer svd2(diff2); EXPECT_NE(svd1.compare(svd2), 0); EXPECT_NE(svd1 == svd2, true); @@ -173,15 +173,15 @@ TEST_F(StringViewTest, NonInlineEqualityAndCompare) { std::string p1 = base_a; std::string p2 = base_a; p2[0] = static_cast(p2[0] + 1); - StringView svp1(p1), svp2(p2); + StringContainer svp1(p1), svp2(p2); int cmp = svp1.compare(svp2); EXPECT_LT(cmp, 0); EXPECT_TRUE((svp1 <=> svp2) == std::strong_ordering::less); } -TEST_F(StringViewTest, StrConversionInlineAndNonInline) { +TEST_F(StringContainerTest, StrConversionInlineAndNonInline) { std::string inl = "abcd"; // inline - StringView svi(inl); + StringContainer svi(inl); std::string out_inl = svi.str(); EXPECT_EQ(out_inl.size(), inl.size()); EXPECT_EQ(out_inl, inl); @@ -190,19 +190,19 @@ TEST_F(StringViewTest, StrConversionInlineAndNonInline) { std::string big = make_bytes(20, 0x50); // ensure > 12 big[5] = '\0'; big[14] = '\0'; - StringView svb(big); + StringContainer svb(big); EXPECT_FALSE(svb.isInline()); std::string out_big = svb.str(); EXPECT_EQ(out_big.size(), big.size()); EXPECT_EQ(::memcmp(out_big.data(), big.data(), big.size()), 0); } -TEST_F(StringViewTest, ThreeWayComparisonOrdering) { - StringView a("abcd"); // inline - StringView b("abce"); // inline > a +TEST_F(StringContainerTest, ThreeWayComparisonOrdering) { + StringContainer a("abcd"); // inline + StringContainer b("abce"); // inline > a auto tmp_long = make_bytes(30); // create std::string first (avoid rvalue deleted ctor) - StringView c(tmp_long); // non-inline - StringView d(c); // identical non-inline + StringContainer c(tmp_long); // non-inline + StringContainer d(c); // identical non-inline // a vs b EXPECT_TRUE((a <=> b) == std::strong_ordering::less); EXPECT_TRUE((b <=> a) == std::strong_ordering::greater); @@ -220,20 +220,20 @@ TEST_F(StringViewTest, ThreeWayComparisonOrdering) { } } -TEST_F(StringViewTest, DumpHex) { +TEST_F(StringContainerTest, DumpHex) { // Empty - StringView empty; + StringContainer empty; EXPECT_EQ(empty.dump_hex(), "X''"); // Inline with known bytes const unsigned char bytes_inline[] = {0x00, 0x01, 0x0A, 0x1F, 0x7F}; - StringView svi(reinterpret_cast(bytes_inline), sizeof(bytes_inline)); + StringContainer svi(reinterpret_cast(bytes_inline), sizeof(bytes_inline)); EXPECT_TRUE(svi.isInline()); EXPECT_EQ(svi.dump_hex(), "X'00010A1F7F'"); // Non-inline, length > 12 std::string big = make_bytes(16, 0x20); // bytes 0x20,0x21,... - StringView svb(big); + StringContainer svb(big); EXPECT_FALSE(svb.isInline()); // Build expected std::ostringstream oss; diff --git a/be/test/vec/data_types/data_type_varbinary_test.cpp b/be/test/vec/data_types/data_type_varbinary_test.cpp index 1049a60b991582..c62ef4719d8274 100644 --- a/be/test/vec/data_types/data_type_varbinary_test.cpp +++ b/be/test/vec/data_types/data_type_varbinary_test.cpp @@ -34,7 +34,7 @@ #include "vec/common/assert_cast.h" #include "vec/common/string_buffer.hpp" #include "vec/common/string_ref.h" -#include "vec/common/string_view.h" +#include "vec/common/string_container.h" #include "vec/core/field.h" #include "vec/core/types.h" #include "vec/data_types/common_data_type_serder_test.h" @@ -87,7 +87,7 @@ TEST_F(DataTypeVarbinaryTest, CreateColumnAndCheckColumn) { TEST_F(DataTypeVarbinaryTest, GetDefaultField) { DataTypeVarbinary dt; Field def = dt.get_default(); - const auto& sv = get(def); + const auto& sv = get(def); EXPECT_EQ(sv.size(), 0U); } @@ -175,7 +175,7 @@ TEST_F(DataTypeVarbinaryTest, GetFieldWithDataType) { auto fwd = dt.get_field_with_data_type(*col, 0); EXPECT_EQ(fwd.base_scalar_type_id, PrimitiveType::TYPE_VARBINARY); - const auto& sv = get(fwd.field); + const auto& sv = get(fwd.field); ASSERT_EQ(sv.size(), v.size()); ASSERT_EQ(memcmp(sv.data(), v.data(), sv.size()), 0); } @@ -188,7 +188,7 @@ TEST_F(DataTypeVarbinaryTest, GetFieldFromTExprNode) { node.__isset.varbinary_literal = true; Field f = dt.get_field(node); - const auto& sv = get(f); + const auto& sv = get(f); ASSERT_EQ(sv.size(), 5U); ASSERT_EQ(memcmp(sv.data(), "hello", 5), 0); } diff --git a/be/test/vec/function/function_test_util.h b/be/test/vec/function/function_test_util.h index 7e2799c9dd5704..9fdf88eb50ee7f 100644 --- a/be/test/vec/function/function_test_util.h +++ b/be/test/vec/function/function_test_util.h @@ -104,7 +104,7 @@ using VARCHAR = std::string; using CHAR = std::string; using STRING = std::string; -using VARBINARY = doris::StringView; +using VARBINARY = doris::StringContainer; using DOUBLE = double; using FLOAT = float; @@ -133,8 +133,8 @@ struct ut_input_type { }; template <> struct ut_input_type { - using type = doris::StringView; - inline static type default_value = doris::StringView("test_default"); + using type = doris::StringContainer; + inline static type default_value = doris::StringContainer("test_default"); }; template <> struct ut_input_type { From 20f4159544a509f7f1bba651fcb0872014c16d60 Mon Sep 17 00:00:00 2001 From: wumeibanfa <904360463@qq.com> Date: Mon, 27 Oct 2025 20:24:49 +0800 Subject: [PATCH 2/2] checkstyle --- be/src/vec/core/field.h | 11 +++++----- be/src/vec/data_types/data_type_varbinary.cpp | 2 +- be/src/vec/data_types/data_type_varbinary.h | 3 ++- be/test/vec/columns/column_varbinary_test.cpp | 20 +++++++++---------- be/test/vec/common/string_container_test.cpp | 8 ++++---- .../data_types/data_type_varbinary_test.cpp | 2 +- 6 files changed, 24 insertions(+), 22 deletions(-) diff --git a/be/src/vec/core/field.h b/be/src/vec/core/field.h index fac05715e8d7dd..de6e7f9bdde6ba 100644 --- a/be/src/vec/core/field.h +++ b/be/src/vec/core/field.h @@ -486,11 +486,12 @@ class Field { std::string_view as_string_view() const; private: - std::aligned_union_t< - DBMS_MIN_FIELD_SIZE - sizeof(PrimitiveType), Null, UInt64, UInt128, Int64, Int128, IPv6, - Float64, String, JsonbField, Array, Tuple, Map, VariantMap, DecimalField, - DecimalField, DecimalField, DecimalField, - DecimalField, BitmapValue, HyperLogLog, QuantileState, doris::StringContainer> + std::aligned_union_t, DecimalField, + DecimalField, DecimalField, + DecimalField, BitmapValue, HyperLogLog, QuantileState, + doris::StringContainer> storage; PrimitiveType type; diff --git a/be/src/vec/data_types/data_type_varbinary.cpp b/be/src/vec/data_types/data_type_varbinary.cpp index 94efc7b10afb65..8394645eb7cb47 100644 --- a/be/src/vec/data_types/data_type_varbinary.cpp +++ b/be/src/vec/data_types/data_type_varbinary.cpp @@ -34,8 +34,8 @@ #include "vec/columns/column_varbinary.h" #include "vec/common/assert_cast.h" #include "vec/common/string_buffer.hpp" -#include "vec/common/string_ref.h" #include "vec/common/string_container.h" +#include "vec/common/string_ref.h" #include "vec/core/field.h" #include "vec/core/types.h" diff --git a/be/src/vec/data_types/data_type_varbinary.h b/be/src/vec/data_types/data_type_varbinary.h index 848ad502cf94b5..4ec908e8dc7306 100644 --- a/be/src/vec/data_types/data_type_varbinary.h +++ b/be/src/vec/data_types/data_type_varbinary.h @@ -72,7 +72,8 @@ class DataTypeVarbinary : public IDataType { Field get_field(const TExprNode& node) const override { DCHECK_EQ(node.node_type, TExprNodeType::VARBINARY_LITERAL); DCHECK(node.__isset.varbinary_literal); - return Field::create_field(doris::StringContainer(node.varbinary_literal.value)); + return Field::create_field( + doris::StringContainer(node.varbinary_literal.value)); } FieldWithDataType get_field_with_data_type(const IColumn& column, diff --git a/be/test/vec/columns/column_varbinary_test.cpp b/be/test/vec/columns/column_varbinary_test.cpp index 08a79feebb27b5..0e1d2b9d917f67 100644 --- a/be/test/vec/columns/column_varbinary_test.cpp +++ b/be/test/vec/columns/column_varbinary_test.cpp @@ -32,8 +32,8 @@ #include "vec/columns/column.h" #include "vec/columns/column_string.h" #include "vec/common/assert_cast.h" -#include "vec/common/string_ref.h" #include "vec/common/string_container.h" +#include "vec/common/string_ref.h" #include "vec/core/types.h" namespace doris::vectorized { @@ -160,11 +160,11 @@ TEST_F(ColumnVarbinaryTest, FilterBothModes) { auto col = ColumnVarbinary::create(); // Mix inline (small) and non-inline (large > kInlineSize) values std::vector vals = { - make_bytes(1, 0x10), // inline + make_bytes(1, 0x10), // inline make_bytes(doris::StringContainer::kInlineSize + 5, 0x91), // non-inline (dropped) - make_bytes(3, 0x12), // inline + make_bytes(3, 0x12), // inline make_bytes(doris::StringContainer::kInlineSize + 7, 0x92), // non-inline - make_bytes(0, 0x00), // empty (dropped) + make_bytes(0, 0x00), // empty (dropped) make_bytes(doris::StringContainer::kInlineSize + 9, 0x93) // non-inline }; for (auto& v : vals) { @@ -205,9 +205,9 @@ TEST_F(ColumnVarbinaryTest, Permute) { auto col = ColumnVarbinary::create(); // Include large (non-inline) entries to exercise arena path std::vector vals = { - make_bytes(1, 0x20), // inline + make_bytes(1, 0x20), // inline make_bytes(doris::StringContainer::kInlineSize + 3, 0xA0), // non-inline - make_bytes(3, 0x22), // inline + make_bytes(3, 0x22), // inline make_bytes(doris::StringContainer::kInlineSize + 8, 0xA1) // non-inline }; for (auto& v : vals) { @@ -276,9 +276,9 @@ TEST_F(ColumnVarbinaryTest, ReplaceColumnData) { auto col = ColumnVarbinary::create(); // mix inline and non-inline std::vector vals = { - make_bytes(2, 0x40), // inline + make_bytes(2, 0x40), // inline make_bytes(doris::StringContainer::kInlineSize + 4, 0xB0), // non-inline - make_bytes(4, 0x42) // inline + make_bytes(4, 0x42) // inline }; for (auto& v : vals) { col->insert_data(v.data(), v.size()); @@ -287,7 +287,7 @@ TEST_F(ColumnVarbinaryTest, ReplaceColumnData) { auto rhs = ColumnVarbinary::create(); std::vector rhs_vals = { make_bytes(doris::StringContainer::kInlineSize + 7, 0xC0), // non-inline - make_bytes(1, 0x51) // inline + make_bytes(1, 0x51) // inline }; for (auto& v : rhs_vals) { rhs->insert_data(v.data(), v.size()); @@ -384,7 +384,7 @@ TEST_F(ColumnVarbinaryTest, InsertField) { TEST_F(ColumnVarbinaryTest, SerializeValueIntoArenaAndImpl) { auto col = ColumnVarbinary::create(); - std::string small = make_bytes(3, 0x31); // inline + std::string small = make_bytes(3, 0x31); // inline std::string big = make_bytes(doris::StringContainer::kInlineSize + 12, 0x32); // non-inline col->insert_data(small.data(), small.size()); col->insert_data(big.data(), big.size()); diff --git a/be/test/vec/common/string_container_test.cpp b/be/test/vec/common/string_container_test.cpp index eb96a0e58e4e32..27d1455d455895 100644 --- a/be/test/vec/common/string_container_test.cpp +++ b/be/test/vec/common/string_container_test.cpp @@ -198,11 +198,11 @@ TEST_F(StringContainerTest, StrConversionInlineAndNonInline) { } TEST_F(StringContainerTest, ThreeWayComparisonOrdering) { - StringContainer a("abcd"); // inline - StringContainer b("abce"); // inline > a + StringContainer a("abcd"); // inline + StringContainer b("abce"); // inline > a auto tmp_long = make_bytes(30); // create std::string first (avoid rvalue deleted ctor) - StringContainer c(tmp_long); // non-inline - StringContainer d(c); // identical non-inline + StringContainer c(tmp_long); // non-inline + StringContainer d(c); // identical non-inline // a vs b EXPECT_TRUE((a <=> b) == std::strong_ordering::less); EXPECT_TRUE((b <=> a) == std::strong_ordering::greater); diff --git a/be/test/vec/data_types/data_type_varbinary_test.cpp b/be/test/vec/data_types/data_type_varbinary_test.cpp index c62ef4719d8274..d74c0424e107f3 100644 --- a/be/test/vec/data_types/data_type_varbinary_test.cpp +++ b/be/test/vec/data_types/data_type_varbinary_test.cpp @@ -33,8 +33,8 @@ #include "vec/columns/column_varbinary.h" #include "vec/common/assert_cast.h" #include "vec/common/string_buffer.hpp" -#include "vec/common/string_ref.h" #include "vec/common/string_container.h" +#include "vec/common/string_ref.h" #include "vec/core/field.h" #include "vec/core/types.h" #include "vec/data_types/common_data_type_serder_test.h"