From 9d802189dbeef5d208003e1bb35c1c0cd023ed4b Mon Sep 17 00:00:00 2001 From: yiguolei Date: Fri, 5 Sep 2025 14:35:48 +0800 Subject: [PATCH 1/5] [bugfix](compaction) the avg segment size should always be less than input rows --- be/src/olap/compaction.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp index 3fbd59b41a33af..9aae2f7b5c9591 100644 --- a/be/src/olap/compaction.cpp +++ b/be/src/olap/compaction.cpp @@ -297,11 +297,14 @@ int64_t Compaction::get_avg_segment_rows() { const auto& meta = _tablet->tablet_meta(); if (meta->compaction_policy() == CUMULATIVE_TIME_SERIES_POLICY) { int64_t compaction_goal_size_mbytes = meta->time_series_compaction_goal_size_mbytes(); - return (compaction_goal_size_mbytes * 1024 * 1024 * 2) / - (_input_rowsets_data_size / (_input_row_num + 1) + 1); + // The output segment rows should be less than total input rows + return std::max((compaction_goal_size_mbytes * 1024 * 1024 * 2) / + (_input_rowsets_data_size / (_input_row_num + 1) + 1), + _input_row_num + 1); } - return config::vertical_compaction_max_segment_size / - (_input_rowsets_data_size / (_input_row_num + 1) + 1); + return std::max(config::vertical_compaction_max_segment_size / + (_input_rowsets_data_size / (_input_row_num + 1) + 1), + _input_row_num + 1); } CompactionMixin::CompactionMixin(StorageEngine& engine, TabletSharedPtr tablet, From a39e8768ab92066632c576ba87b231eec372b1ce Mon Sep 17 00:00:00 2001 From: yiguolei Date: Fri, 5 Sep 2025 14:48:41 +0800 Subject: [PATCH 2/5] f --- be/test/olap/base_compaction_test.cpp | 34 ++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/be/test/olap/base_compaction_test.cpp b/be/test/olap/base_compaction_test.cpp index 7d9abe54ed2163..edea2360a2e4cf 100644 --- a/be/test/olap/base_compaction_test.cpp +++ b/be/test/olap/base_compaction_test.cpp @@ -39,11 +39,12 @@ namespace doris { class TestBaseCompaction : public testing::Test {}; static RowsetSharedPtr create_rowset(Version version, int num_segments, bool overlapping, - int data_size) { + int data_size, int rows_num) { auto rs_meta = std::make_shared(); rs_meta->set_rowset_type(BETA_ROWSET); // important rs_meta->_rowset_meta_pb.set_start_version(version.first); rs_meta->_rowset_meta_pb.set_end_version(version.second); + rs_meta->_rowset_meta_pb.set_rows_num(rows_num); rs_meta->set_num_segments(num_segments); rs_meta->set_segments_overlap(overlapping ? OVERLAPPING : NONOVERLAPPING); rs_meta->set_total_disk_size(data_size); @@ -66,10 +67,10 @@ TEST_F(TestBaseCompaction, filter_input_rowset) { BaseCompaction compaction(engine, tablet); //std::vector rowsets; - RowsetSharedPtr init_rs = create_rowset({0, 1}, 1, false, 0); + RowsetSharedPtr init_rs = create_rowset({0, 1}, 1, false, 0, 100); tablet->_rs_version_map.emplace(init_rs->version(), init_rs); for (int i = 2; i < 30; ++i) { - RowsetSharedPtr rs = create_rowset({i, i}, 1, false, 1024); + RowsetSharedPtr rs = create_rowset({i, i}, 1, false, 1024, 100); tablet->_rs_version_map.emplace(rs->version(), rs); } Status st = compaction.pick_rowsets_to_compact(); @@ -81,4 +82,31 @@ TEST_F(TestBaseCompaction, filter_input_rowset) { EXPECT_EQ(compaction._input_rowsets.back()->end_version(), 21); } +TEST_F(TestBaseCompaction, zero_input_rows) { + StorageEngine engine({}); + TabletMetaSharedPtr tablet_meta; + tablet_meta.reset(new TabletMeta(2, 3, 15674, 15675, 6, 7, TTabletSchema(), 6, {{7, 8}}, + UniqueId(9, 10), TTabletType::TABLET_TYPE_DISK, + TCompressionType::LZ4F)); + TabletSharedPtr tablet(new Tablet(engine, tablet_meta, nullptr, CUMULATIVE_SIZE_BASED_POLICY)); + tablet->_cumulative_point = 25; + BaseCompaction compaction(engine, tablet); + //std::vector rowsets; + + RowsetSharedPtr init_rs = create_rowset({0, 1}, 1, false, 0, 0); + tablet->_rs_version_map.emplace(init_rs->version(), init_rs); + for (int i = 2; i < 30; ++i) { + RowsetSharedPtr rs = create_rowset({i, i}, 1, false, 1024, 0); + tablet->_rs_version_map.emplace(rs->version(), rs); + } + Status st = compaction.pick_rowsets_to_compact(); + EXPECT_TRUE(st.ok()); + EXPECT_EQ(compaction._input_rowsets.front()->start_version(), 0); + EXPECT_EQ(compaction._input_rowsets.front()->end_version(), 1); + + EXPECT_EQ(compaction._input_rowsets.back()->start_version(), 21); + EXPECT_EQ(compaction._input_rowsets.back()->end_version(), 21); + std::cout << "input rowsets: " << compaction.get_avg_segment_rows() << std::endl; +} + } // namespace doris From 812fdc344495ab3cb2d2c3bb8e266ff4f6d978dc Mon Sep 17 00:00:00 2001 From: yiguolei Date: Fri, 5 Sep 2025 15:32:11 +0800 Subject: [PATCH 3/5] f --- be/test/olap/base_compaction_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/be/test/olap/base_compaction_test.cpp b/be/test/olap/base_compaction_test.cpp index edea2360a2e4cf..3ac0c49c34c58a 100644 --- a/be/test/olap/base_compaction_test.cpp +++ b/be/test/olap/base_compaction_test.cpp @@ -44,7 +44,7 @@ static RowsetSharedPtr create_rowset(Version version, int num_segments, bool ove rs_meta->set_rowset_type(BETA_ROWSET); // important rs_meta->_rowset_meta_pb.set_start_version(version.first); rs_meta->_rowset_meta_pb.set_end_version(version.second); - rs_meta->_rowset_meta_pb.set_rows_num(rows_num); + rs_meta->_rowset_meta_pb.set_num_rows(rows_num); rs_meta->set_num_segments(num_segments); rs_meta->set_segments_overlap(overlapping ? OVERLAPPING : NONOVERLAPPING); rs_meta->set_total_disk_size(data_size); From 683cb0ebfdfa00b677def7e293464551965cc359 Mon Sep 17 00:00:00 2001 From: yiguolei Date: Fri, 5 Sep 2025 15:51:30 +0800 Subject: [PATCH 4/5] f --- be/src/olap/compaction.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp index 9aae2f7b5c9591..6638d0bc4464cf 100644 --- a/be/src/olap/compaction.cpp +++ b/be/src/olap/compaction.cpp @@ -298,11 +298,11 @@ int64_t Compaction::get_avg_segment_rows() { if (meta->compaction_policy() == CUMULATIVE_TIME_SERIES_POLICY) { int64_t compaction_goal_size_mbytes = meta->time_series_compaction_goal_size_mbytes(); // The output segment rows should be less than total input rows - return std::max((compaction_goal_size_mbytes * 1024 * 1024 * 2) / + return std::min((compaction_goal_size_mbytes * 1024 * 1024 * 2) / (_input_rowsets_data_size / (_input_row_num + 1) + 1), _input_row_num + 1); } - return std::max(config::vertical_compaction_max_segment_size / + return std::min(config::vertical_compaction_max_segment_size / (_input_rowsets_data_size / (_input_row_num + 1) + 1), _input_row_num + 1); } From f176f23be74e64c6049ff3c1d949c819b215e5f6 Mon Sep 17 00:00:00 2001 From: yiguolei Date: Fri, 5 Sep 2025 15:55:10 +0800 Subject: [PATCH 5/5] f --- be/test/olap/base_compaction_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/be/test/olap/base_compaction_test.cpp b/be/test/olap/base_compaction_test.cpp index 3ac0c49c34c58a..f529e11e7ed676 100644 --- a/be/test/olap/base_compaction_test.cpp +++ b/be/test/olap/base_compaction_test.cpp @@ -106,7 +106,7 @@ TEST_F(TestBaseCompaction, zero_input_rows) { EXPECT_EQ(compaction._input_rowsets.back()->start_version(), 21); EXPECT_EQ(compaction._input_rowsets.back()->end_version(), 21); - std::cout << "input rowsets: " << compaction.get_avg_segment_rows() << std::endl; + EXPECT_EQ(compaction.get_avg_segment_rows(), 1); } } // namespace doris