From 16cc34e810b391b80a34c599019d220f16e672ca Mon Sep 17 00:00:00 2001 From: twosom Date: Sun, 20 Jul 2025 15:32:14 +0900 Subject: [PATCH 1/4] feat(nori): add metadata support to Korean tokenizer --- .../lucene/analysis/ko/DecompoundToken.java | 15 ++++++- .../lucene/analysis/ko/DictionaryToken.java | 18 ++++++++ .../lucene/analysis/ko/KoreanTokenizer.java | 3 ++ .../org/apache/lucene/analysis/ko/Token.java | 2 + .../apache/lucene/analysis/ko/Viterbi.java | 15 ++++++- .../lucene/analysis/ko/dict/KoMorphData.java | 6 ++- .../analysis/ko/dict/UserDictionary.java | 13 +++++- .../analysis/ko/dict/UserMorphData.java | 12 +++++- .../ko/tokenattributes/MetadataAttribute.java | 22 ++++++++++ .../MetadataAttributeImpl.java | 43 +++++++++++++++++++ .../analysis/ko/TestKoreanTokenizer.java | 22 ++++++++++ .../apache/lucene/analysis/ko/userdict.txt | 4 ++ 12 files changed, 168 insertions(+), 7 deletions(-) create mode 100644 lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/MetadataAttribute.java create mode 100644 lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/MetadataAttributeImpl.java diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/DecompoundToken.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/DecompoundToken.java index bdd30ee28128..d58cec2dd7e6 100644 --- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/DecompoundToken.java +++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/DecompoundToken.java @@ -22,6 +22,7 @@ /** A token that was generated from a compound. */ public class DecompoundToken extends Token { private final POS.Tag posTag; + private final String metadata; /** * Creates a new DecompoundToken @@ -31,11 +32,18 @@ public class DecompoundToken extends Token { * @param startOffset The start offset of the token in the analyzed text. * @param endOffset The end offset of the token in the analyzed text. * @param type The type of this token. + * @param metadata The metadata of this token. */ public DecompoundToken( - POS.Tag posTag, String surfaceForm, int startOffset, int endOffset, TokenType type) { + POS.Tag posTag, + String surfaceForm, + int startOffset, + int endOffset, + TokenType type, + String metadata) { super(surfaceForm.toCharArray(), 0, surfaceForm.length(), startOffset, endOffset, type); this.posTag = posTag; + this.metadata = metadata; } @Override @@ -77,4 +85,9 @@ public String getReading() { public KoMorphData.Morpheme[] getMorphemes() { return null; } + + @Override + public String getMetadata() { + return metadata; + } } diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/DictionaryToken.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/DictionaryToken.java index 3d04515f2f44..5ac4be6fe213 100644 --- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/DictionaryToken.java +++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/DictionaryToken.java @@ -23,6 +23,7 @@ public class DictionaryToken extends Token { private final int wordId; private final KoMorphData morphAtts; + private String metadata = null; public DictionaryToken( TokenType type, @@ -108,4 +109,21 @@ public String getReading() { public KoMorphData.Morpheme[] getMorphemes() { return morphAtts.getMorphemes(wordId, getSurfaceForm(), getOffset(), getLength()); } + + @Override + public String getMetadata() { + return this.metadata; + } + + public void setMetadata(String metadata) { + this.metadata = metadata; + } + + public int getWordId() { + return wordId; + } + + public KoMorphData getMorphAtts() { + return morphAtts; + } } diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanTokenizer.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanTokenizer.java index 15bdb3422ad7..3fcac7c28d12 100644 --- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanTokenizer.java +++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanTokenizer.java @@ -25,6 +25,7 @@ import org.apache.lucene.analysis.ko.dict.TokenInfoFST; import org.apache.lucene.analysis.ko.dict.UnknownDictionary; import org.apache.lucene.analysis.ko.dict.UserDictionary; +import org.apache.lucene.analysis.ko.tokenattributes.MetadataAttribute; import org.apache.lucene.analysis.ko.tokenattributes.PartOfSpeechAttribute; import org.apache.lucene.analysis.ko.tokenattributes.ReadingAttribute; import org.apache.lucene.analysis.morph.GraphvizFormatter; @@ -77,6 +78,7 @@ public enum DecompoundMode { private final Viterbi viterbi; private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final MetadataAttribute metadataAtt = addAttribute(MetadataAttribute.class); private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class); @@ -233,6 +235,7 @@ public boolean incrementToken() throws IOException { // System.out.println("off=" + token.getOffset() + " len=" + length + " vs " + // token.getSurfaceForm().length); termAtt.copyBuffer(token.getSurfaceForm(), token.getOffset(), length); + metadataAtt.setToken(token); offsetAtt.setOffset(correctOffset(token.getStartOffset()), correctOffset(token.getEndOffset())); posAtt.setToken(token); readingAtt.setToken(token); diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/Token.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/Token.java index 480d4abf4101..7fd530c977b1 100644 --- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/Token.java +++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/Token.java @@ -44,4 +44,6 @@ protected Token( * token. */ public abstract KoMorphData.Morpheme[] getMorphemes(); + + public abstract String getMetadata(); } diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/Viterbi.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/Viterbi.java index 8ba9cb36979f..f9dc5ff5d7d2 100644 --- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/Viterbi.java +++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/Viterbi.java @@ -18,11 +18,13 @@ import java.io.IOException; import java.util.EnumMap; + import org.apache.lucene.analysis.ko.dict.CharacterDefinition; import org.apache.lucene.analysis.ko.dict.KoMorphData; import org.apache.lucene.analysis.ko.dict.TokenInfoDictionary; import org.apache.lucene.analysis.ko.dict.UnknownDictionary; import org.apache.lucene.analysis.ko.dict.UserDictionary; +import org.apache.lucene.analysis.ko.dict.UserMorphData; import org.apache.lucene.analysis.morph.ConnectionCosts; import org.apache.lucene.analysis.morph.Dictionary; import org.apache.lucene.analysis.morph.GraphvizFormatter; @@ -248,6 +250,10 @@ protected void backtrace(Position endPosData, int fromIDX) { if (token.getPOSType() == POS.Type.MORPHEME || mode == KoreanTokenizer.DecompoundMode.NONE) { if (shouldFilterToken(token) == false) { + if (token.getMorphAtts() instanceof UserMorphData userMorphData) { + final String metadata = userMorphData.metadatas[token.getWordId()]; + token.setMetadata(metadata); + } pending.add(token); if (VERBOSE) { System.out.println(" add token=" + pending.get(pending.size() - 1)); @@ -264,9 +270,11 @@ protected void backtrace(Position endPosData, int fromIDX) { int endOffset = backWordPos + length; int posLen = 0; // decompose the compound + String metadata = null; for (int i = morphemes.length - 1; i >= 0; i--) { final KoMorphData.Morpheme morpheme = morphemes[i]; final Token compoundToken; + metadata = morpheme.metadata(); if (token.getPOSType() == POS.Type.COMPOUND) { assert endOffset - morpheme.surfaceForm().length() >= 0; compoundToken = @@ -275,7 +283,8 @@ protected void backtrace(Position endPosData, int fromIDX) { morpheme.surfaceForm(), endOffset - morpheme.surfaceForm().length(), endOffset, - backType); + backType, + metadata); } else { compoundToken = new DecompoundToken( @@ -283,7 +292,8 @@ protected void backtrace(Position endPosData, int fromIDX) { morpheme.surfaceForm(), token.getStartOffset(), token.getEndOffset(), - backType); + backType, + metadata); } if (i == 0 && mode == KoreanTokenizer.DecompoundMode.MIXED) { compoundToken.setPositionIncrement(0); @@ -297,6 +307,7 @@ protected void backtrace(Position endPosData, int fromIDX) { } if (mode == KoreanTokenizer.DecompoundMode.MIXED) { token.setPositionLength(Math.max(1, posLen)); + token.setMetadata(metadata); pending.add(token); if (VERBOSE) { System.out.println(" add token=" + pending.get(pending.size() - 1)); diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/KoMorphData.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/KoMorphData.java index 2b2c53a4d6db..530dda3c0636 100644 --- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/KoMorphData.java +++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/KoMorphData.java @@ -22,7 +22,11 @@ /** Represents Korean morphological information. */ public interface KoMorphData extends MorphData { /** A morpheme extracted from a compound token. */ - record Morpheme(POS.Tag posTag, String surfaceForm) {} + record Morpheme(POS.Tag posTag, String surfaceForm, String metadata) { + public Morpheme(POS.Tag posTag, String surfaceForm) { + this(posTag, surfaceForm, null); + } + } /** * Get the {@link org.apache.lucene.analysis.ko.POS.Type} of specified word (morpheme, compound, diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UserDictionary.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UserDictionary.java index 00767e0f8dd6..9c7d893abe55 100644 --- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UserDictionary.java +++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UserDictionary.java @@ -34,6 +34,7 @@ * (세종시 세종 시). */ public final class UserDictionary implements Dictionary { + public static final String METADATA_SEPARATOR = " >> "; // text -> wordID private final TokenInfoFST fst; @@ -82,10 +83,19 @@ private UserDictionary(List entries) throws IOException { String lastToken = null; List _segmentations = new ArrayList<>(entries.size()); + List _metadatas = new ArrayList<>(entries.size()); short[] rightIds = new short[entries.size()]; long ord = 0; int entryIndex = 0; for (String entry : entries) { + if (entry.contains(METADATA_SEPARATOR)) { + var split = entry.split(METADATA_SEPARATOR); + entry = split[0]; + var metadata = split[1]; + _metadatas.add(metadata); + } else { + _metadatas.add(null); + } String[] splits = entry.split("\\s+"); String token = splits[0]; if (token.equals(lastToken)) { @@ -138,7 +148,8 @@ private UserDictionary(List entries) throws IOException { this.fst = new TokenInfoFST(FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader())); int[][] segmentations = _segmentations.toArray(new int[_segmentations.size()][]); - this.morphAtts = new UserMorphData(segmentations, rightIds); + String[] metadatas = _metadatas.toArray(String[]::new); + this.morphAtts = new UserMorphData(segmentations, rightIds, metadatas); } public TokenInfoFST getFST() { diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UserMorphData.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UserMorphData.java index 2056bd0829c9..6783f0f9d123 100644 --- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UserMorphData.java +++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UserMorphData.java @@ -19,7 +19,7 @@ import org.apache.lucene.analysis.ko.POS; /** Morphological information for user dictionary. */ -final class UserMorphData implements KoMorphData { +public final class UserMorphData implements KoMorphData { private static final int WORD_COST = -100000; // NNG left @@ -28,12 +28,18 @@ final class UserMorphData implements KoMorphData { // length, length... indexed by compound ID or null for simple noun private final int[][] segmentations; private final short[] rightIds; + public String[] metadatas; UserMorphData(int[][] segmentations, short[] rightIds) { this.segmentations = segmentations; this.rightIds = rightIds; } + UserMorphData(int[][] segmentations, short[] rightIds, String[] metadatas) { + this(segmentations, rightIds); + this.metadatas = metadatas; + } + @Override public int getLeftId(int morphId) { return LEFT_ID; @@ -79,10 +85,12 @@ public Morpheme[] getMorphemes(int morphId, char[] surfaceForm, int off, int len if (segs == null) { return null; } + String metadata = metadatas[morphId]; int offset = 0; Morpheme[] morphemes = new Morpheme[segs.length]; for (int i = 0; i < segs.length; i++) { - morphemes[i] = new Morpheme(POS.Tag.NNG, new String(surfaceForm, off + offset, segs[i])); + morphemes[i] = + new Morpheme(POS.Tag.NNG, new String(surfaceForm, off + offset, segs[i]), metadata); offset += segs[i]; } return morphemes; diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/MetadataAttribute.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/MetadataAttribute.java new file mode 100644 index 000000000000..2fe20cd59b7a --- /dev/null +++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/MetadataAttribute.java @@ -0,0 +1,22 @@ +package org.apache.lucene.analysis.ko.tokenattributes; + +import org.apache.lucene.analysis.ko.Token; +import org.apache.lucene.util.Attribute; + +/** + * Attribute for Korean token metadata. + * + *

This attribute provides access to additional metadata associated with Korean tokens, + * particularly from user dictionaries and compound word morphemes. + * + *

Note: in some cases this value may not be applicable, and will be null. + * + * @lucene.experimental + */ +public interface MetadataAttribute extends Attribute { + /** Get the metadata string of the token. */ + String getMetadata(); + + /** Set the current token. */ + void setToken(Token token); +} diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/MetadataAttributeImpl.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/MetadataAttributeImpl.java new file mode 100644 index 000000000000..b2886821cbd6 --- /dev/null +++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/MetadataAttributeImpl.java @@ -0,0 +1,43 @@ +package org.apache.lucene.analysis.ko.tokenattributes; + +import org.apache.lucene.analysis.ko.Token; +import org.apache.lucene.util.AttributeImpl; +import org.apache.lucene.util.AttributeReflector; + +/** + * Attribute implementation for Korean token metadata. + * + * @lucene.experimental + */ +public class MetadataAttributeImpl extends AttributeImpl implements MetadataAttribute { + private Token token; + + @Override + public String getMetadata() { + if (this.token != null) { + return this.token.getMetadata(); + } + return null; + } + + @Override + public void setToken(Token token) { + this.token = token; + } + + @Override + public void clear() { + this.token = null; + } + + @Override + public void copyTo(AttributeImpl target) { + final MetadataAttribute t = (MetadataAttribute) target; + t.setToken(this.token); + } + + @Override + public void reflectWith(AttributeReflector reflector) { + reflector.reflect(MetadataAttribute.class, "metadata", getMetadata()); + } +} diff --git a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanTokenizer.java b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanTokenizer.java index 6ce60a53e3f3..484295b64aa3 100644 --- a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanTokenizer.java +++ b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanTokenizer.java @@ -28,6 +28,7 @@ import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.ko.KoreanTokenizer.DecompoundMode; import org.apache.lucene.analysis.ko.dict.UserDictionary; +import org.apache.lucene.analysis.ko.tokenattributes.MetadataAttribute; import org.apache.lucene.analysis.ko.tokenattributes.PartOfSpeechAttribute; import org.apache.lucene.analysis.ko.tokenattributes.ReadingAttribute; import org.apache.lucene.tests.analysis.BaseTokenStreamTestCase; @@ -576,6 +577,27 @@ public void testDuplicate() throws IOException { } } + public void testMetadataAttribute() throws IOException { + assertMetadata(analyzer, "자바", "컴퓨터 언어"); + assertMetadata(analyzer, "java", "컴퓨터 언어"); + assertMetadata(analyzer, "엘라스틱서치", "검색 엔진"); + + assertMetadata(analyzerDecompoundKeep, "엘라스틱서치", "검색 엔진"); + } + + private void assertMetadata(Analyzer analyzer, String input, String metadata) throws IOException { + try (TokenStream ts = analyzer.tokenStream("ignored", input)) { + final MetadataAttribute metadataAtt = ts.addAttribute(MetadataAttribute.class); + ts.reset(); + while (ts.incrementToken()) { + assertNotNull(metadataAtt.getMetadata()); + assertEquals(metadata, metadataAtt.getMetadata()); + } + assertFalse(ts.incrementToken()); + ts.end(); + } + } + private void assertReadings(Analyzer analyzer, String input, String... readings) throws IOException { try (TokenStream ts = analyzer.tokenStream("ignored", input)) { diff --git a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/userdict.txt b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/userdict.txt index d8503230735c..c76f5e0bd2a6 100644 --- a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/userdict.txt +++ b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/userdict.txt @@ -8,3 +8,7 @@ C샤프 날씨 21세기대한민국 세기 +# With Metadata +자바 >> 컴퓨터 언어 +java >> 컴퓨터 언어 +엘라스틱서치 엘라스틱 서치 >> 검색 엔진 From 81ce2c8751feb3b1901afad1b33031d5427486f0 Mon Sep 17 00:00:00 2001 From: twosom Date: Sun, 20 Jul 2025 15:46:04 +0900 Subject: [PATCH 2/4] chore : add CHANGES.txt --- lucene/CHANGES.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index e625456a70c4..1fa0bc223c67 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -37,6 +37,8 @@ New Features * GITHUB#14792: Introduced OffHeapQuantizedFloatVectorValues class to access float vectors when only quantized byte vectors are available in the index. (Pulkit Gupta) +* GITHUB#14969: Add metadata support to Nori Korean analyzer tokens, allowing users to attach additional information to dictionary words. (twosom) + Improvements --------------------- From f516e53fa662080c2bd6eba61fa9461543421537 Mon Sep 17 00:00:00 2001 From: twosom Date: Sun, 20 Jul 2025 15:52:26 +0900 Subject: [PATCH 3/4] chore : spotless apply --- .../nori/src/java/org/apache/lucene/analysis/ko/Viterbi.java | 1 - 1 file changed, 1 deletion(-) diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/Viterbi.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/Viterbi.java index f9dc5ff5d7d2..0e24dc53d8d4 100644 --- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/Viterbi.java +++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/Viterbi.java @@ -18,7 +18,6 @@ import java.io.IOException; import java.util.EnumMap; - import org.apache.lucene.analysis.ko.dict.CharacterDefinition; import org.apache.lucene.analysis.ko.dict.KoMorphData; import org.apache.lucene.analysis.ko.dict.TokenInfoDictionary; From 66ed9058f6fa91e06ab7bf22bcac3f7862ef1767 Mon Sep 17 00:00:00 2001 From: twosom Date: Sun, 20 Jul 2025 16:02:18 +0900 Subject: [PATCH 4/4] chore : add license header --- .../ko/tokenattributes/MetadataAttribute.java | 16 ++++++++++++++++ .../tokenattributes/MetadataAttributeImpl.java | 16 ++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/MetadataAttribute.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/MetadataAttribute.java index 2fe20cd59b7a..c4797124b4df 100644 --- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/MetadataAttribute.java +++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/MetadataAttribute.java @@ -1,3 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.lucene.analysis.ko.tokenattributes; import org.apache.lucene.analysis.ko.Token; diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/MetadataAttributeImpl.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/MetadataAttributeImpl.java index b2886821cbd6..2d92b6ccf1f9 100644 --- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/MetadataAttributeImpl.java +++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/MetadataAttributeImpl.java @@ -1,3 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.lucene.analysis.ko.tokenattributes; import org.apache.lucene.analysis.ko.Token;