17
17
package org .apache .lucene .document ;
18
18
19
19
import com .carrotsearch .randomizedtesting .annotations .TimeoutSuite ;
20
- import java .nio .file .Path ;
21
20
import org .apache .lucene .index .DirectoryReader ;
22
21
import org .apache .lucene .index .IndexWriter ;
23
22
import org .apache .lucene .index .IndexWriterConfig ;
24
23
import org .apache .lucene .index .TieredMergePolicy ;
25
24
import org .apache .lucene .index .VectorSimilarityFunction ;
26
25
import org .apache .lucene .search .IndexSearcher ;
27
26
import org .apache .lucene .search .KnnFloatVectorQuery ;
28
- import org .apache .lucene .search .MatchAllDocsQuery ;
29
- import org .apache .lucene .search .MatchNoDocsQuery ;
30
- import org .apache .lucene .search .Query ;
31
- import org .apache .lucene .search .SeededKnnFloatVectorQuery ;
32
27
import org .apache .lucene .search .TopDocs ;
33
28
import org .apache .lucene .store .Directory ;
34
29
import org .apache .lucene .store .FSDirectory ;
35
30
import org .apache .lucene .tests .codecs .vector .ConfigurableMCodec ;
36
31
import org .apache .lucene .tests .util .LuceneTestCase ;
37
32
import org .apache .lucene .tests .util .LuceneTestCase .Monster ;
38
- import org .junit .BeforeClass ;
39
33
40
34
@ TimeoutSuite (millis = 86_400_000 ) // 24 hour timeout
41
35
@ Monster ("takes ~10 minutes and needs extra heap, disk space, file handles" )
42
36
public class TestManyKnnDocs extends LuceneTestCase {
43
37
// gradlew -p lucene/core test --tests TestManyKnnDocs -Ptests.heapsize=16g -Dtests.monster=true
44
38
45
- private static Path testDir ;
46
-
47
- @ BeforeClass
48
- public static void init_index () throws Exception {
39
+ public void testLargeSegment () throws Exception {
49
40
IndexWriterConfig iwc = new IndexWriterConfig ();
50
41
iwc .setCodec (
51
42
new ConfigurableMCodec (
@@ -55,138 +46,27 @@ public static void init_index() throws Exception {
55
46
mp .setMaxMergeAtOnce (256 ); // avoid intermediate merges (waste of time with HNSW?)
56
47
mp .setSegmentsPerTier (256 ); // only merge once at the end when we ask
57
48
iwc .setMergePolicy (mp );
49
+ String fieldName = "field" ;
58
50
VectorSimilarityFunction similarityFunction = VectorSimilarityFunction .DOT_PRODUCT ;
59
51
60
- try (Directory dir = FSDirectory .open (testDir = createTempDir ("ManyKnnVectorDocs" ));
52
+ try (Directory dir = FSDirectory .open (createTempDir ("ManyKnnVectorDocs" ));
61
53
IndexWriter iw = new IndexWriter (dir , iwc )) {
62
54
63
55
int numVectors = 2088992 ;
56
+ float [] vector = new float [1 ];
57
+ Document doc = new Document ();
58
+ doc .add (new KnnFloatVectorField (fieldName , vector , similarityFunction ));
64
59
for (int i = 0 ; i < numVectors ; i ++) {
65
- float [] vector = new float [1 ];
66
- Document doc = new Document ();
67
60
vector [0 ] = (i % 256 );
68
- doc .add (new KnnFloatVectorField ("field" , vector , similarityFunction ));
69
- doc .add (new KeywordField ("int" , "" + i , org .apache .lucene .document .Field .Store .YES ));
70
- doc .add (new StoredField ("intValue" , i ));
71
61
iw .addDocument (doc );
72
62
}
73
63
74
64
// merge to single segment and then verify
75
65
iw .forceMerge (1 );
76
66
iw .commit ();
77
- }
78
- }
79
-
80
- public void testLargeSegmentKnn () throws Exception {
81
- try (Directory dir = FSDirectory .open (testDir )) {
82
67
IndexSearcher searcher = new IndexSearcher (DirectoryReader .open (dir ));
83
- for (int i = 0 ; i < 256 ; i ++) {
84
- Query filterQuery = new MatchAllDocsQuery ();
85
- float [] vector = new float [128 ];
86
- vector [0 ] = i ;
87
- vector [1 ] = 1 ;
88
- TopDocs docs =
89
- searcher .search (new KnnFloatVectorQuery ("field" , vector , 10 , filterQuery ), 5 );
90
- assertEquals (5 , docs .scoreDocs .length );
91
- Document d = searcher .storedFields ().document (docs .scoreDocs [0 ].doc );
92
- String s = "" ;
93
- for (int j = 0 ; j < docs .scoreDocs .length - 1 ; j ++) {
94
- s += docs .scoreDocs [j ].doc + " " + docs .scoreDocs [j ].score + "\n " ;
95
- }
96
- assertEquals (s , i + 256 , d .getField ("intValue" ).numericValue ());
97
- }
98
- }
99
- }
100
-
101
- public void testLargeSegmentSeededExact () throws Exception {
102
- try (Directory dir = FSDirectory .open (testDir )) {
103
- IndexSearcher searcher = new IndexSearcher (DirectoryReader .open (dir ));
104
- for (int i = 0 ; i < 256 ; i ++) {
105
- Query seedQuery = KeywordField .newExactQuery ("int" , "" + (i + 256 ));
106
- Query filterQuery = new MatchAllDocsQuery ();
107
- float [] vector = new float [128 ];
108
- vector [0 ] = i ;
109
- vector [1 ] = 1 ;
110
- TopDocs docs =
111
- searcher .search (
112
- new SeededKnnFloatVectorQuery ("field" , vector , 10 , filterQuery , seedQuery ), 5 );
113
- assertEquals (5 , docs .scoreDocs .length );
114
- String s = "" ;
115
- for (int j = 0 ; j < docs .scoreDocs .length - 1 ; j ++) {
116
- s += docs .scoreDocs [j ].doc + " " + docs .scoreDocs [j ].score + "\n " ;
117
- }
118
- Document d = searcher .storedFields ().document (docs .scoreDocs [0 ].doc );
119
- assertEquals (s , i + 256 , d .getField ("intValue" ).numericValue ());
120
- }
121
- }
122
- }
123
-
124
- public void testLargeSegmentSeededNearby () throws Exception {
125
- try (Directory dir = FSDirectory .open (testDir )) {
126
- IndexSearcher searcher = new IndexSearcher (DirectoryReader .open (dir ));
127
- for (int i = 0 ; i < 256 ; i ++) {
128
- Query seedQuery = KeywordField .newExactQuery ("int" , "" + i );
129
- Query filterQuery = new MatchAllDocsQuery ();
130
- float [] vector = new float [128 ];
131
- vector [0 ] = i ;
132
- vector [1 ] = 1 ;
133
- TopDocs docs =
134
- searcher .search (
135
- new SeededKnnFloatVectorQuery ("field" , vector , 10 , filterQuery , seedQuery ), 5 );
136
- assertEquals (5 , docs .scoreDocs .length );
137
- String s = "" ;
138
- for (int j = 0 ; j < docs .scoreDocs .length - 1 ; j ++) {
139
- s += docs .scoreDocs [j ].doc + " " + docs .scoreDocs [j ].score + "\n " ;
140
- }
141
- Document d = searcher .storedFields ().document (docs .scoreDocs [0 ].doc );
142
- assertEquals (s , i + 256 , d .getField ("intValue" ).numericValue ());
143
- }
144
- }
145
- }
146
-
147
- public void testLargeSegmentSeededDistant () throws Exception {
148
- try (Directory dir = FSDirectory .open (testDir )) {
149
- IndexSearcher searcher = new IndexSearcher (DirectoryReader .open (dir ));
150
- for (int i = 0 ; i < 256 ; i ++) {
151
- Query seedQuery = KeywordField .newExactQuery ("int" , "" + (i + 128 ));
152
- Query filterQuery = new MatchAllDocsQuery ();
153
- float [] vector = new float [128 ];
154
- vector [0 ] = i ;
155
- vector [1 ] = 1 ;
156
- TopDocs docs =
157
- searcher .search (
158
- new SeededKnnFloatVectorQuery ("field" , vector , 10 , filterQuery , seedQuery ), 5 );
159
- assertEquals (5 , docs .scoreDocs .length );
160
- Document d = searcher .storedFields ().document (docs .scoreDocs [0 ].doc );
161
- String s = "" ;
162
- for (int j = 0 ; j < docs .scoreDocs .length - 1 ; j ++) {
163
- s += docs .scoreDocs [j ].doc + " " + docs .scoreDocs [j ].score + "\n " ;
164
- }
165
- assertEquals (s , i + 256 , d .getField ("intValue" ).numericValue ());
166
- }
167
- }
168
- }
169
-
170
- public void testLargeSegmentSeededNone () throws Exception {
171
- try (Directory dir = FSDirectory .open (testDir )) {
172
- IndexSearcher searcher = new IndexSearcher (DirectoryReader .open (dir ));
173
- for (int i = 0 ; i < 256 ; i ++) {
174
- Query seedQuery = new MatchNoDocsQuery ();
175
- Query filterQuery = new MatchAllDocsQuery ();
176
- float [] vector = new float [128 ];
177
- vector [0 ] = i ;
178
- vector [1 ] = 1 ;
179
- TopDocs docs =
180
- searcher .search (
181
- new SeededKnnFloatVectorQuery ("field" , vector , 10 , filterQuery , seedQuery ), 5 );
182
- assertEquals (5 , docs .scoreDocs .length );
183
- Document d = searcher .storedFields ().document (docs .scoreDocs [0 ].doc );
184
- String s = "" ;
185
- for (int j = 0 ; j < docs .scoreDocs .length - 1 ; j ++) {
186
- s += docs .scoreDocs [j ].doc + " " + docs .scoreDocs [j ].score + "\n " ;
187
- }
188
- assertEquals (s , i + 256 , d .getField ("intValue" ).numericValue ());
189
- }
68
+ TopDocs docs = searcher .search (new KnnFloatVectorQuery ("field" , new float [] {120 }, 10 ), 5 );
69
+ assertEquals (5 , docs .scoreDocs .length );
190
70
}
191
71
}
192
72
}
0 commit comments