Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
61 commits
Select commit Hold shift + click to select a range
af115b9
Setup no-op Lucene90RandomAcessDictionaryPostingsFormat
Sep 19, 2023
554c34c
Merge branch 'apache:main' into ramdon_access_term_dict
Tony-X Oct 16, 2023
88afec3
Rename Lucene90RandomAcessDictionaryPostingsFormat to Lucene90RandomA…
Oct 16, 2023
d16c501
restrict class visibility
Oct 16, 2023
3299fe0
Support per-type term index based on FST<Long>
Oct 16, 2023
137d5d3
Move the code to be under sandbox
Oct 26, 2023
b758ec5
Add interfaces for encoding/decoding TermStates
Oct 26, 2023
7d35ed2
Make the concrete TermStateCodecComponents singletons
Oct 27, 2023
6a1506b
Fix the expected export module check
Oct 30, 2023
e06f303
Implment TermStateCodecComponent.getBitWidth for monotonically increa…
Oct 30, 2023
ea2c76f
Implement a codec (not Lucene Codec) for IntBlockTermState
Nov 2, 2023
c87713c
Add more javadoc and minor re-naming
Nov 2, 2023
322a0f0
TestTermStateCodecImpl to decode at non-block starting positions
Nov 2, 2023
0976ce7
Implement compact BitUnpacker
Nov 6, 2023
a90f608
Fix typo and improve error reporting
Nov 6, 2023
4374b1d
Merge branch 'apache:main' into ramdon_access_term_dict
Tony-X Nov 6, 2023
a5160ab
Rename the module from lucene90.* to lucene99.* to reflect upstream C…
Nov 6, 2023
ece7710
Implement compact generic byte-oriented BitPacker
Nov 7, 2023
0f3b5a1
Fix issues identfied by precommit checks
Nov 7, 2023
cc0751f
Remove unused member field `totalNumBytesWritten`
Nov 7, 2023
39e9e08
Test TermStateCodecImpl with real compact bit-packer
Nov 7, 2023
12f9c83
Implement TermStateCodecImpl.getCodec for (TermType, IndexOptions)
Nov 7, 2023
402965f
Implement term (type, ord) lookup in TermsIndex
Nov 9, 2023
3ce5ea9
create sub-package `termdict` to hold term dictionary implementions
Nov 13, 2023
fd9beca
Revert "create sub-package `termdict` to hold term dictionary impleme…
Nov 13, 2023
b3bf288
Setup sketch implementations for RandomAccessTermsDict
Nov 16, 2023
877d1cf
remove unneeded initialization of int to 0.
Nov 16, 2023
2cdfb04
Support serialize/deserialize for TermsStats
Nov 16, 2023
777c40d
Explictlty mark the generic type arugment of FSTCompiler<Long> in Ter…
Nov 16, 2023
8a0b1cc
Implement writing random-access term dictionary
Nov 18, 2023
644e57d
Merge branch 'apache:main' into ramdon_access_term_dict
Tony-X Nov 18, 2023
8396830
Fix build after mering from apahce:main
Nov 18, 2023
96d6e33
Test serailize/deserialize multiple fields' term dictionary
Nov 19, 2023
622e56f
Remove unused member in RandomAccessTermsDictWriter
Nov 20, 2023
cf1104d
Implement Lucene99RandomAccessTermsReader/Writer
Nov 20, 2023
4df3ad1
Move the package from sandbox to codecs
Nov 20, 2023
f57ddbb
Fix bugs in writing Lucene99RandomAccessTermsW
Nov 20, 2023
c66808d
Implement TermsEnum for Lucene99RandomAccess
Nov 21, 2023
ea572a3
Fix bugs found in tests
Nov 22, 2023
5a8efd3
Reduce index cloning calls when looking up terms
Nov 22, 2023
73601d7
Merge branch 'apache:main' into ramdon_access_term_dict
Tony-X Nov 22, 2023
8ab9139
Add Lucene99RandomAccessTermDictCodec
Nov 22, 2023
10d4181
Fix build after mering from apache:main
Nov 22, 2023
ac1b77f
Add missing javadoc
Nov 22, 2023
aa0074d
Optimize for less allocation
Nov 25, 2023
46b46e6
Make decode TermState allocation-free
Nov 25, 2023
2c875e7
Use ThreadLocal to reuse TermDataReader data objects
Nov 26, 2023
6a71a81
Forked FST.java to work with primtive long
Nov 26, 2023
e06665e
Allocate only one set of buffers in TermDataReader instead of one set…
Nov 26, 2023
35af1d2
Make TermDataReader lazily init its buffer and clone IndexInput
Nov 26, 2023
79c0fb3
Implement BytesRefPrimitiveLongFSTEnum that works with a primitive lo…
Nov 27, 2023
b74a05d
Fix getFirstArc() bug in PrimitiveLongFST.
Nov 27, 2023
f328e9f
Reuse single IntBlockTermState in TermDataReader
Nov 27, 2023
0aadef5
Don't create slice description when requesting random-access input slice
Nov 27, 2023
e70e712
Use primitive long FST for term lookup to avoid allocation from boxin…
Nov 27, 2023
3d21b1a
Make RAFDirectory resilient to `null` description when slicing
Nov 27, 2023
cd60a4f
Implement interesect
Nov 28, 2023
9239247
Lazy decode termstate in IntersectEnum
Nov 29, 2023
05743d9
Minor non-functionarly change for TermsIndexBuilder
Dec 9, 2023
93ed998
implement FST + FSA intersection that leverages fast addressing of ar…
Dec 9, 2023
c7e1568
Reuse stack frames to avoid allocating too many Arc and Transitions
Dec 12, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions lucene/codecs/src/java/module-info.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,19 @@
exports org.apache.lucene.codecs.simpletext;
exports org.apache.lucene.codecs.uniformsplit;
exports org.apache.lucene.codecs.uniformsplit.sharedterms;
exports org.apache.lucene.sandbox.codecs.lucene99.randomaccess;
exports org.apache.lucene.sandbox.codecs.lucene99.randomaccess.bitpacking;

provides org.apache.lucene.codecs.PostingsFormat with
org.apache.lucene.codecs.blocktreeords.BlockTreeOrdsPostingsFormat,
org.apache.lucene.codecs.bloom.BloomFilteringPostingsFormat,
org.apache.lucene.codecs.memory.DirectPostingsFormat,
org.apache.lucene.codecs.memory.FSTPostingsFormat,
org.apache.lucene.codecs.uniformsplit.UniformSplitPostingsFormat,
org.apache.lucene.codecs.uniformsplit.sharedterms.STUniformSplitPostingsFormat;
org.apache.lucene.codecs.uniformsplit.sharedterms.STUniformSplitPostingsFormat,
org.apache.lucene.sandbox.codecs.lucene99.randomaccess
.Lucene99RandomAccessDictionaryPostingsFormat;
provides org.apache.lucene.codecs.Codec with
org.apache.lucene.codecs.simpletext.SimpleTextCodec;
org.apache.lucene.codecs.simpletext.SimpleTextCodec,
org.apache.lucene.sandbox.codecs.lucene99.randomaccess.Lucene99RandomAccessTermDictCodec;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.lucene.sandbox.codecs.lucene99.randomaccess;

import java.io.IOException;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.BitUtil;

final class ByteArrayByteSlice implements ByteSlice {
private final byte[] bytes;

ByteArrayByteSlice(byte[] bytes) {
this.bytes = bytes;
}

@Override
public long size() {
return bytes.length;
}

@Override
public void writeAll(DataOutput output) throws IOException {
output.writeBytes(bytes, bytes.length);
}

@Override
public long getLong(long pos) {
return (long) BitUtil.VH_LE_LONG.get(bytes, (int) pos);
}

@Override
public byte[] getBytes(long pos, int length) {
if (length == 0) {
return new byte[0];
}
byte[] result = new byte[length];
System.arraycopy(bytes, (int) pos, result, 0, length);
return result;
}

@Override
public void readBytesTo(byte[] destination, long pos, int length) {
if (length == 0) {
return;
}
System.arraycopy(bytes, (int) pos, destination, 0, length);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.lucene.sandbox.codecs.lucene99.randomaccess;

import java.io.IOException;
import org.apache.lucene.store.DataOutput;

/** A slice of bytes */
interface ByteSlice {
long size();

void writeAll(DataOutput output) throws IOException;

long getLong(long pos) throws IOException;

byte[] getBytes(long pos, int length) throws IOException;

void readBytesTo(byte[] destination, long pos, int length) throws IOException;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.lucene.sandbox.codecs.lucene99.randomaccess;

import java.io.IOException;

/** Factory of {@link ByteSlice} */
@FunctionalInterface
interface ByteSliceProvider {
ByteSlice newByteSlice() throws IOException;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.sandbox.codecs.lucene99.randomaccess;

import java.io.IOException;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.lucene99.Lucene99PostingsFormat;
import org.apache.lucene.codecs.lucene99.Lucene99PostingsReader;
import org.apache.lucene.codecs.lucene99.Lucene99PostingsWriter;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.util.IOUtils;

/**
* Similar to {@link Lucene99PostingsFormat} but with a different term dictionary implementation.
*
* @lucene.experimental
*/
public final class Lucene99RandomAccessDictionaryPostingsFormat extends PostingsFormat {
static String TERM_DICT_META_HEADER_CODEC_NAME = "RandomAccessTermsDict";
static String TERM_INDEX_HEADER_CODEC_NAME = "RandomAccessTermsDictIndex";
static String TERM_DATA_META_HEADER_CODEC_NAME_PREFIX = "RandomAccessTermsDictTermDataMeta";
static String TERM_DATA_HEADER_CODEC_NAME_PREFIX = "RandomAccessTermsDictTermData";

static String TERM_DICT_META_INFO_EXTENSION = "tmeta";
static String TERM_INDEX_EXTENSION = "tidx";
static String TERM_DATA_META_EXTENSION_PREFIX = "tdm";
static String TERM_DATA_EXTENSION_PREFIX = "tdd";

// Increment version to change it
static final int VERSION_START = 0;
static final int VERSION_CURRENT = VERSION_START;

/** Creates {@code Lucene90RandomAccessDictionaryPostingsFormat} */
public Lucene99RandomAccessDictionaryPostingsFormat() {
super("Lucene99RandomAccess");
}

@Override
public String toString() {
return getName();
}

@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
Lucene99PostingsWriter postingsWriter = new Lucene99PostingsWriter(state);
boolean success = false;
try {
FieldsConsumer ret = new Lucene99RandomAccessTermsWriter(state, postingsWriter);
success = true;
return ret;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(postingsWriter);
}
}
}

@Override
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
Lucene99PostingsReader postingsReader = new Lucene99PostingsReader(state);
boolean success = false;
try {
FieldsProducer ret = new Lucene99RandomAccessTermsReader(postingsReader, state);
success = true;
return ret;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(postingsReader);
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.lucene.sandbox.codecs.lucene99.randomaccess;

import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.lucene99.Lucene99Codec;
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;

/**
* A Codec that uses {@link Lucene99RandomAccessDictionaryPostingsFormat} on top of {@link
* Lucene99Codec}
*/
public class Lucene99RandomAccessTermDictCodec extends FilterCodec {
private final Lucene99RandomAccessDictionaryPostingsFormat lucene99RandomAccessPostingsFormat =
new Lucene99RandomAccessDictionaryPostingsFormat();

public Lucene99RandomAccessTermDictCodec() {
super("Lucene99RandomAccessTermDict", new Lucene99Codec());
}

@Override
public PostingsFormat postingsFormat() {
return new PerFieldPostingsFormat() {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
return lucene99RandomAccessPostingsFormat;
}
};
}
}
Loading