40
40
#include " common/Tracer.h"
41
41
#include " common/Types.h"
42
42
#include " google/protobuf/message_lite.h"
43
+ #include " index/VectorIndex.h"
43
44
#include " index/VectorMemIndex.h"
44
45
#include " mmap/ChunkedColumn.h"
45
46
#include " mmap/Utils.h"
@@ -115,7 +116,8 @@ ChunkedSegmentSealedImpl::LoadVecIndex(const LoadIndexInfo& info) {
115
116
if (get_bit (field_data_ready_bitset_, field_id)) {
116
117
fields_.erase (field_id);
117
118
set_bit (field_data_ready_bitset_, field_id, false );
118
- } else if (get_bit (binlog_index_bitset_, field_id)) {
119
+ }
120
+ if (get_bit (binlog_index_bitset_, field_id)) {
119
121
set_bit (binlog_index_bitset_, field_id, false );
120
122
vector_indexings_.drop_field_indexing (field_id);
121
123
}
@@ -136,8 +138,7 @@ ChunkedSegmentSealedImpl::WarmupChunkCache(const FieldId field_id,
136
138
auto & field_meta = schema_->operator [](field_id);
137
139
AssertInfo (field_meta.is_vector (), " vector field is not vector type" );
138
140
139
- if (!get_bit (index_ready_bitset_, field_id) &&
140
- !get_bit (binlog_index_bitset_, field_id)) {
141
+ if (!get_bit (index_ready_bitset_, field_id)) {
141
142
return ;
142
143
}
143
144
@@ -496,21 +497,13 @@ ChunkedSegmentSealedImpl::LoadFieldData(FieldId field_id, FieldDataInfo& data) {
496
497
insert_record_.seal_pks ();
497
498
}
498
499
499
- bool use_temp_index = false ;
500
500
{
501
- // update num_rows to build temperate binlog index
501
+ // update num_rows to build temperate intermin index
502
502
std::unique_lock lck (mutex_);
503
503
update_row_count (num_rows);
504
504
}
505
505
506
- if (generate_interim_index (field_id)) {
507
- std::unique_lock lck (mutex_);
508
- fields_.erase (field_id);
509
- set_bit (field_data_ready_bitset_, field_id, false );
510
- use_temp_index = true ;
511
- }
512
-
513
- if (!use_temp_index) {
506
+ if (!generate_interim_index (field_id)) {
514
507
std::unique_lock lck (mutex_);
515
508
set_bit (field_data_ready_bitset_, field_id, true );
516
509
}
@@ -1746,8 +1739,12 @@ ChunkedSegmentSealedImpl::bulk_subscript(FieldId field_id,
1746
1739
if (count == 0 ) {
1747
1740
return fill_with_empty (field_id, count);
1748
1741
}
1749
-
1750
- if (HasIndex (field_id)) {
1742
+ AssertInfo (HasFieldData (field_id) || HasIndex (field_id),
1743
+ " Not found raw data in index or field data struct." );
1744
+ if (HasFieldData (field_id)) {
1745
+ Assert (get_bit (field_data_ready_bitset_, field_id));
1746
+ return get_raw_data (field_id, field_meta, seg_offsets, count);
1747
+ } else {
1751
1748
// if field has load scalar index, reverse raw data from index
1752
1749
if (!IsVectorDataType (field_meta.get_data_type ())) {
1753
1750
// AssertInfo(num_chunk() == 1,
@@ -1761,10 +1758,6 @@ ChunkedSegmentSealedImpl::bulk_subscript(FieldId field_id,
1761
1758
}
1762
1759
return get_vector (field_id, seg_offsets, count);
1763
1760
}
1764
-
1765
- Assert (get_bit (field_data_ready_bitset_, field_id));
1766
-
1767
- return get_raw_data (field_id, field_meta, seg_offsets, count);
1768
1761
}
1769
1762
1770
1763
std::unique_ptr<DataArray>
@@ -1821,15 +1814,22 @@ ChunkedSegmentSealedImpl::HasRawData(int64_t field_id) const {
1821
1814
auto fieldID = FieldId (field_id);
1822
1815
const auto & field_meta = schema_->operator [](fieldID);
1823
1816
if (IsVectorDataType (field_meta.get_data_type ())) {
1824
- if (get_bit (index_ready_bitset_, fieldID) |
1825
- get_bit (binlog_index_bitset_, fieldID)) {
1817
+ if (get_bit (index_ready_bitset_, fieldID)) {
1826
1818
AssertInfo (vector_indexings_.is_ready (fieldID),
1827
1819
" vector index is not ready" );
1828
1820
auto field_indexing = vector_indexings_.get_field_indexing (fieldID);
1829
1821
auto vec_index = dynamic_cast <index ::VectorIndex*>(
1830
1822
field_indexing->indexing_ .get ());
1831
1823
return vec_index->HasRawData ();
1832
1824
}
1825
+ } else if (get_bit (binlog_index_bitset_, fieldID)) {
1826
+ AssertInfo (vector_indexings_.is_ready (fieldID),
1827
+ " vector index is not ready" );
1828
+ auto field_indexing = vector_indexings_.get_field_indexing (fieldID);
1829
+ auto vec_index =
1830
+ dynamic_cast <index ::VectorIndex*>(field_indexing->indexing_ .get ());
1831
+ return vec_index->HasRawData () ||
1832
+ get_bit (field_data_ready_bitset_, fieldID);
1833
1833
} else {
1834
1834
auto scalar_index = scalar_indexings_.find (fieldID);
1835
1835
if (scalar_index != scalar_indexings_.end ()) {
@@ -2020,6 +2020,8 @@ ChunkedSegmentSealedImpl::generate_interim_index(const FieldId field_id) {
2020
2020
}
2021
2021
// check data type
2022
2022
if (field_meta.get_data_type () != DataType::VECTOR_FLOAT &&
2023
+ field_meta.get_data_type () != DataType::VECTOR_FLOAT16 &&
2024
+ field_meta.get_data_type () != DataType::VECTOR_BFLOAT16 &&
2023
2025
!is_sparse) {
2024
2026
return false ;
2025
2027
}
@@ -2065,16 +2067,50 @@ ChunkedSegmentSealedImpl::generate_interim_index(const FieldId field_id) {
2065
2067
is_sparse
2066
2068
? dynamic_cast <ChunkedSparseFloatColumn*>(vec_data.get ())->Dim ()
2067
2069
: field_meta.get_dim ();
2070
+ auto index_metric = field_binlog_config->GetMetricType ();
2071
+ std::unique_ptr<index ::VectorIndex> vec_index = nullptr ;
2072
+ if (!is_sparse) {
2073
+ knowhere::ViewDataOp view_data = [field_raw_data_ptr =
2074
+ vec_data](size_t id) {
2075
+ return field_raw_data_ptr->ValueAt (id);
2076
+ };
2077
+ if (field_meta.get_data_type () == DataType::VECTOR_FLOAT) {
2078
+ vec_index = std::make_unique<index ::VectorMemIndex<float >>(
2079
+ field_binlog_config->GetIndexType (),
2080
+ index_metric,
2081
+ knowhere::Version::GetCurrentVersion ().VersionNumber (),
2082
+ view_data);
2083
+ } else if (field_meta.get_data_type () == DataType::VECTOR_FLOAT16) {
2084
+ vec_index =
2085
+ std::make_unique<index ::VectorMemIndex<knowhere::fp16>>(
2086
+ field_binlog_config->GetIndexType (),
2087
+ index_metric,
2088
+ knowhere::Version::GetCurrentVersion ().VersionNumber (),
2089
+ view_data);
2090
+ } else if (field_meta.get_data_type () ==
2091
+ DataType::VECTOR_BFLOAT16) {
2092
+ vec_index =
2093
+ std::make_unique<index ::VectorMemIndex<knowhere::bf16>>(
2094
+ field_binlog_config->GetIndexType (),
2095
+ index_metric,
2096
+ knowhere::Version::GetCurrentVersion ().VersionNumber (),
2097
+ view_data);
2098
+ }
2099
+ } else {
2100
+ vec_index = std::make_unique<index ::VectorMemIndex<float >>(
2101
+ field_binlog_config->GetIndexType (),
2102
+ index_metric,
2103
+ knowhere::Version::GetCurrentVersion ().VersionNumber ());
2104
+ }
2105
+ if (vec_index == nullptr ) {
2106
+ LOG_INFO (" fail to generate intermin index, invalid data type." );
2107
+ return false ;
2108
+ }
2068
2109
2069
2110
auto build_config = field_binlog_config->GetBuildBaseParams ();
2070
2111
build_config[knowhere::meta::DIM] = std::to_string (dim);
2071
2112
build_config[knowhere::meta::NUM_BUILD_THREAD] = std::to_string (1 );
2072
- auto index_metric = field_binlog_config->GetMetricType ();
2073
2113
2074
- auto vec_index = std::make_unique<index ::VectorMemIndex<float >>(
2075
- field_binlog_config->GetIndexType (),
2076
- index_metric,
2077
- knowhere::Version::GetCurrentVersion ().VersionNumber ());
2078
2114
auto num_chunk = vec_data->num_chunks ();
2079
2115
for (int i = 0 ; i < num_chunk; ++i) {
2080
2116
auto dataset = knowhere::GenDataSet (
@@ -2091,19 +2127,26 @@ ChunkedSegmentSealedImpl::generate_interim_index(const FieldId field_id) {
2091
2127
2092
2128
if (enable_binlog_index ()) {
2093
2129
std::unique_lock lck (mutex_);
2130
+ if (vec_index->HasRawData ()) {
2131
+ fields_.erase (field_id);
2132
+ set_bit (field_data_ready_bitset_, field_id, false );
2133
+ } else {
2134
+ // some knowhere view data index not has raw data, still keep it
2135
+ set_bit (field_data_ready_bitset_, field_id, true );
2136
+ }
2094
2137
vector_indexings_.append_field_indexing (
2095
2138
field_id, index_metric, std::move (vec_index));
2096
2139
2097
2140
vec_binlog_config_[field_id] = std::move (field_binlog_config);
2098
2141
set_bit (binlog_index_bitset_, field_id, true );
2099
2142
LOG_INFO (
2100
- " replace binlog with binlog index in segment {}, field {}." ,
2143
+ " replace binlog with intermin index in segment {}, field {}." ,
2101
2144
this ->get_segment_id (),
2102
2145
field_id.get ());
2103
2146
}
2104
2147
return true ;
2105
2148
} catch (std::exception & e) {
2106
- LOG_WARN (" fail to generate binlog index, because {}" , e.what ());
2149
+ LOG_WARN (" fail to generate intermin index, because {}" , e.what ());
2107
2150
return false ;
2108
2151
}
2109
2152
}
0 commit comments