Skip to content

Commit 39f9074

Browse files
authored
branch-3.0.4: [fix](array index) Correct null bitmap writing for inverted index #47846 (#48180)
cherry pick from #47846
1 parent 919f041 commit 39f9074

File tree

5 files changed

+905
-79
lines changed

5 files changed

+905
-79
lines changed

be/src/olap/rowset/segment_v2/column_writer.cpp

+4-6
Original file line numberDiff line numberDiff line change
@@ -469,7 +469,9 @@ Status ScalarColumnWriter::init() {
469469
return Status::OK();
470470
}
471471
Status add_nulls(uint32_t count) override { return Status::OK(); }
472-
Status add_array_nulls(uint32_t row_id) override { return Status::OK(); }
472+
Status add_array_nulls(const uint8_t* null_map, size_t num_rows) override {
473+
return Status::OK();
474+
}
473475
Status finish() override { return Status::OK(); }
474476
int64_t size() const override { return 0; }
475477
void close_on_error() override {}
@@ -951,11 +953,7 @@ Status ArrayColumnWriter::append_nullable(const uint8_t* null_map, const uint8_t
951953
RETURN_IF_ERROR(append_data(ptr, num_rows));
952954
if (is_nullable()) {
953955
if (_opts.need_inverted_index) {
954-
for (int row_id = 0; row_id < num_rows; row_id++) {
955-
if (null_map[row_id] == 1) {
956-
RETURN_IF_ERROR(_inverted_index_builder->add_array_nulls(row_id));
957-
}
958-
}
956+
RETURN_IF_ERROR(_inverted_index_builder->add_array_nulls(null_map, num_rows));
959957
}
960958
RETURN_IF_ERROR(_null_writer->append_data(&null_map, num_rows));
961959
}

be/src/olap/rowset/segment_v2/inverted_index_writer.cpp

+38-14
Original file line numberDiff line numberDiff line change
@@ -303,8 +303,26 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter {
303303
return Status::OK();
304304
}
305305

306-
Status add_array_nulls(uint32_t row_id) override {
307-
_null_bitmap.add(row_id);
306+
Status add_array_nulls(const uint8_t* null_map, size_t num_rows) override {
307+
DCHECK(_rid >= num_rows);
308+
if (num_rows == 0 || null_map == nullptr) {
309+
return Status::OK();
310+
}
311+
std::vector<uint32_t> null_indices;
312+
null_indices.reserve(num_rows / 8);
313+
314+
// because _rid is the row id in block, not segment, and we add data before we add nulls,
315+
// so we need to subtract num_rows to get the row id in segment
316+
for (size_t i = 0; i < num_rows; i++) {
317+
if (null_map[i] == 1) {
318+
null_indices.push_back(_rid - num_rows + static_cast<uint32_t>(i));
319+
}
320+
}
321+
322+
if (!null_indices.empty()) {
323+
_null_bitmap.addMany(null_indices.size(), null_indices.data());
324+
}
325+
308326
return Status::OK();
309327
}
310328

@@ -378,8 +396,9 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter {
378396
return Status::OK();
379397
}
380398

381-
Status add_array_values(size_t field_size, const void* value_ptr, const uint8_t* null_map,
382-
const uint8_t* offsets_ptr, size_t count) override {
399+
Status add_array_values(size_t field_size, const void* value_ptr,
400+
const uint8_t* nested_null_map, const uint8_t* offsets_ptr,
401+
size_t count) override {
383402
DBUG_EXECUTE_IF("InvertedIndexColumnWriterImpl::add_array_values_count_is_zero",
384403
{ count = 0; })
385404
if (count == 0) {
@@ -404,7 +423,7 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter {
404423
lucene::document::Field* new_field = nullptr;
405424
CL_NS(analysis)::TokenStream* ts = nullptr;
406425
for (auto j = start_off; j < start_off + array_elem_size; ++j) {
407-
if (null_map[j] == 1) {
426+
if (nested_null_map && nested_null_map[j] == 1) {
408427
continue;
409428
}
410429
auto* v = (Slice*)((const uint8_t*)value_ptr + j * field_size);
@@ -500,7 +519,7 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter {
500519
for (int i = 0; i < count; ++i) {
501520
auto array_elem_size = offsets[i + 1] - offsets[i];
502521
for (size_t j = start_off; j < start_off + array_elem_size; ++j) {
503-
if (null_map[j] == 1) {
522+
if (nested_null_map && nested_null_map[j] == 1) {
504523
continue;
505524
}
506525
const CppType* p = &reinterpret_cast<const CppType*>(value_ptr)[j];
@@ -520,7 +539,8 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter {
520539
DBUG_EXECUTE_IF("InvertedIndexColumnWriterImpl::add_array_values_field_is_nullptr",
521540
{ _field = nullptr; })
522541
DBUG_EXECUTE_IF(
523-
"InvertedIndexColumnWriterImpl::add_array_values_index_writer_is_nullptr",
542+
"InvertedIndexColumnWriterImpl::add_array_values_index_writer_is_"
543+
"nullptr",
524544
{ _index_writer = nullptr; })
525545
if (_field == nullptr || _index_writer == nullptr) {
526546
LOG(ERROR) << "field or index writer is null in inverted index writer.";
@@ -582,9 +602,10 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter {
582602
std::string new_value;
583603
size_t value_length = sizeof(CppType);
584604

585-
DBUG_EXECUTE_IF("InvertedIndexColumnWriterImpl::add_value_bkd_writer_add_throw_error", {
586-
_CLTHROWA(CL_ERR_IllegalArgument, ("packedValue should be length=xxx"));
587-
});
605+
DBUG_EXECUTE_IF(
606+
"InvertedIndexColumnWriterImpl::add_value_bkd_writer_add_throw_"
607+
"error",
608+
{ _CLTHROWA(CL_ERR_IllegalArgument, ("packedValue should be length=xxx")); });
588609

589610
_value_key_coder->full_encode_ascending(&value, &new_value);
590611
_bkd_writer->add((const uint8_t*)new_value.c_str(), value_length, _rid);
@@ -643,8 +664,8 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter {
643664
_bkd_writer->finish(data_out.get(), index_out.get()),
644665
int(field_type));
645666
} else {
646-
LOG(WARNING)
647-
<< "Inverted index writer create output error occurred: nullptr";
667+
LOG(WARNING) << "Inverted index writer create output error "
668+
"occurred: nullptr";
648669
_CLTHROWA(CL_ERR_IO, "Create output error with nullptr");
649670
}
650671
} else if constexpr (field_is_slice_type(field_type)) {
@@ -653,9 +674,12 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter {
653674
InvertedIndexDescriptor::get_temporary_null_bitmap_file_name()));
654675
write_null_bitmap(null_bitmap_out.get());
655676
DBUG_EXECUTE_IF(
656-
"InvertedIndexWriter._throw_clucene_error_in_fulltext_writer_close", {
677+
"InvertedIndexWriter._throw_clucene_error_in_fulltext_"
678+
"writer_close",
679+
{
657680
_CLTHROWA(CL_ERR_IO,
658-
"debug point: test throw error in fulltext index writer");
681+
"debug point: test throw error in fulltext "
682+
"index writer");
659683
});
660684
}
661685
} catch (CLuceneError& e) {

be/src/olap/rowset/segment_v2/inverted_index_writer.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ class InvertedIndexColumnWriter {
6464
size_t count) = 0;
6565

6666
virtual Status add_nulls(uint32_t count) = 0;
67-
virtual Status add_array_nulls(uint32_t row_id) = 0;
67+
virtual Status add_array_nulls(const uint8_t* null_map, size_t num_rows) = 0;
6868

6969
virtual Status finish() = 0;
7070

be/src/olap/task/index_builder.cpp

+18-23
Original file line numberDiff line numberDiff line change
@@ -589,9 +589,9 @@ Status IndexBuilder::_write_inverted_index_data(TabletSchemaSPtr tablet_schema,
589589
return converted_result.first;
590590
}
591591
const auto* ptr = (const uint8_t*)converted_result.second->get_data();
592-
if (converted_result.second->get_nullmap()) {
593-
RETURN_IF_ERROR(_add_nullable(column_name, writer_sign, field.get(),
594-
converted_result.second->get_nullmap(), &ptr,
592+
const auto* null_map = converted_result.second->get_nullmap();
593+
if (null_map) {
594+
RETURN_IF_ERROR(_add_nullable(column_name, writer_sign, field.get(), null_map, &ptr,
595595
block->rows()));
596596
} else {
597597
RETURN_IF_ERROR(_add_data(column_name, writer_sign, field.get(), &ptr, block->rows()));
@@ -606,18 +606,6 @@ Status IndexBuilder::_add_nullable(const std::string& column_name,
606606
const std::pair<int64_t, int64_t>& index_writer_sign,
607607
Field* field, const uint8_t* null_map, const uint8_t** ptr,
608608
size_t num_rows) {
609-
size_t offset = 0;
610-
auto next_run_step = [&]() {
611-
size_t step = 1;
612-
for (auto i = offset + 1; i < num_rows; ++i) {
613-
if (null_map[offset] == null_map[i]) {
614-
step++;
615-
} else {
616-
break;
617-
}
618-
}
619-
return step;
620-
};
621609
// TODO: need to process null data for inverted index
622610
if (field->type() == FieldType::OLAP_FIELD_TYPE_ARRAY) {
623611
DCHECK(field->get_sub_field_count() == 1);
@@ -638,20 +626,27 @@ Status IndexBuilder::_add_nullable(const std::string& column_name,
638626
DBUG_EXECUTE_IF("IndexBuilder::_add_nullable_add_array_values_error", {
639627
_CLTHROWA(CL_ERR_IO, "debug point: _add_nullable_add_array_values_error");
640628
})
629+
RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_array_nulls(null_map,
630+
num_rows));
641631
} catch (const std::exception& e) {
642632
return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>(
643633
"CLuceneError occured: {}", e.what());
644634
}
645-
// we should refresh nullmap for array
646-
for (int row_id = 0; row_id < num_rows; row_id++) {
647-
if (null_map && null_map[row_id] == 1) {
648-
RETURN_IF_ERROR(
649-
_inverted_index_builders[index_writer_sign]->add_array_nulls(row_id));
650-
}
651-
}
635+
652636
return Status::OK();
653637
}
654-
638+
size_t offset = 0;
639+
auto next_run_step = [&]() {
640+
size_t step = 1;
641+
for (auto i = offset + 1; i < num_rows; ++i) {
642+
if (null_map[offset] == null_map[i]) {
643+
step++;
644+
} else {
645+
break;
646+
}
647+
}
648+
return step;
649+
};
655650
try {
656651
do {
657652
auto step = next_run_step();

0 commit comments

Comments
 (0)