From 16cc34e810b391b80a34c599019d220f16e672ca Mon Sep 17 00:00:00 2001
From: twosom <two_somang@icloud.com>
Date: Sun, 20 Jul 2025 15:32:14 +0900
Subject: [PATCH 1/4] feat(nori): add metadata support to Korean tokenizer

---
 .../lucene/analysis/ko/DecompoundToken.java   | 15 ++++++-
 .../lucene/analysis/ko/DictionaryToken.java   | 18 ++++++++
 .../lucene/analysis/ko/KoreanTokenizer.java   |  3 ++
 .../org/apache/lucene/analysis/ko/Token.java  |  2 +
 .../apache/lucene/analysis/ko/Viterbi.java    | 15 ++++++-
 .../lucene/analysis/ko/dict/KoMorphData.java  |  6 ++-
 .../analysis/ko/dict/UserDictionary.java      | 13 +++++-
 .../analysis/ko/dict/UserMorphData.java       | 12 +++++-
 .../ko/tokenattributes/MetadataAttribute.java | 22 ++++++++++
 .../MetadataAttributeImpl.java                | 43 +++++++++++++++++++
 .../analysis/ko/TestKoreanTokenizer.java      | 22 ++++++++++
 .../apache/lucene/analysis/ko/userdict.txt    |  4 ++
 12 files changed, 168 insertions(+), 7 deletions(-)
 create mode 100644 lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/MetadataAttribute.java
 create mode 100644 lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/MetadataAttributeImpl.java

diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/DecompoundToken.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/DecompoundToken.java
index bdd30ee28128..d58cec2dd7e6 100644
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/DecompoundToken.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/DecompoundToken.java
@@ -22,6 +22,7 @@
 /** A token that was generated from a compound. */
 public class DecompoundToken extends Token {
   private final POS.Tag posTag;
+  private final String metadata;
 
   /**
    * Creates a new DecompoundToken
@@ -31,11 +32,18 @@ public class DecompoundToken extends Token {
    * @param startOffset The start offset of the token in the analyzed text.
    * @param endOffset The end offset of the token in the analyzed text.
    * @param type The type of this token.
+   * @param metadata The metadata of this token.
    */
   public DecompoundToken(
-      POS.Tag posTag, String surfaceForm, int startOffset, int endOffset, TokenType type) {
+      POS.Tag posTag,
+      String surfaceForm,
+      int startOffset,
+      int endOffset,
+      TokenType type,
+      String metadata) {
     super(surfaceForm.toCharArray(), 0, surfaceForm.length(), startOffset, endOffset, type);
     this.posTag = posTag;
+    this.metadata = metadata;
   }
 
   @Override
@@ -77,4 +85,9 @@ public String getReading() {
   public KoMorphData.Morpheme[] getMorphemes() {
     return null;
   }
+
+  @Override
+  public String getMetadata() {
+    return metadata;
+  }
 }
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/DictionaryToken.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/DictionaryToken.java
index 3d04515f2f44..5ac4be6fe213 100644
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/DictionaryToken.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/DictionaryToken.java
@@ -23,6 +23,7 @@
 public class DictionaryToken extends Token {
   private final int wordId;
   private final KoMorphData morphAtts;
+  private String metadata = null;
 
   public DictionaryToken(
       TokenType type,
@@ -108,4 +109,21 @@ public String getReading() {
   public KoMorphData.Morpheme[] getMorphemes() {
     return morphAtts.getMorphemes(wordId, getSurfaceForm(), getOffset(), getLength());
   }
+
+  @Override
+  public String getMetadata() {
+    return this.metadata;
+  }
+
+  public void setMetadata(String metadata) {
+    this.metadata = metadata;
+  }
+
+  public int getWordId() {
+    return wordId;
+  }
+
+  public KoMorphData getMorphAtts() {
+    return morphAtts;
+  }
 }
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanTokenizer.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanTokenizer.java
index 15bdb3422ad7..3fcac7c28d12 100644
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanTokenizer.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanTokenizer.java
@@ -25,6 +25,7 @@
 import org.apache.lucene.analysis.ko.dict.TokenInfoFST;
 import org.apache.lucene.analysis.ko.dict.UnknownDictionary;
 import org.apache.lucene.analysis.ko.dict.UserDictionary;
+import org.apache.lucene.analysis.ko.tokenattributes.MetadataAttribute;
 import org.apache.lucene.analysis.ko.tokenattributes.PartOfSpeechAttribute;
 import org.apache.lucene.analysis.ko.tokenattributes.ReadingAttribute;
 import org.apache.lucene.analysis.morph.GraphvizFormatter;
@@ -77,6 +78,7 @@ public enum DecompoundMode {
   private final Viterbi viterbi;
 
   private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+  private final MetadataAttribute metadataAtt = addAttribute(MetadataAttribute.class);
   private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
   private final PositionIncrementAttribute posIncAtt =
       addAttribute(PositionIncrementAttribute.class);
@@ -233,6 +235,7 @@ public boolean incrementToken() throws IOException {
     // System.out.println("off=" + token.getOffset() + " len=" + length + " vs " +
     // token.getSurfaceForm().length);
     termAtt.copyBuffer(token.getSurfaceForm(), token.getOffset(), length);
+    metadataAtt.setToken(token);
     offsetAtt.setOffset(correctOffset(token.getStartOffset()), correctOffset(token.getEndOffset()));
     posAtt.setToken(token);
     readingAtt.setToken(token);
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/Token.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/Token.java
index 480d4abf4101..7fd530c977b1 100644
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/Token.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/Token.java
@@ -44,4 +44,6 @@ protected Token(
    * token.
    */
   public abstract KoMorphData.Morpheme[] getMorphemes();
+
+  public abstract String getMetadata();
 }
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/Viterbi.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/Viterbi.java
index 8ba9cb36979f..f9dc5ff5d7d2 100644
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/Viterbi.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/Viterbi.java
@@ -18,11 +18,13 @@
 
 import java.io.IOException;
 import java.util.EnumMap;
+
 import org.apache.lucene.analysis.ko.dict.CharacterDefinition;
 import org.apache.lucene.analysis.ko.dict.KoMorphData;
 import org.apache.lucene.analysis.ko.dict.TokenInfoDictionary;
 import org.apache.lucene.analysis.ko.dict.UnknownDictionary;
 import org.apache.lucene.analysis.ko.dict.UserDictionary;
+import org.apache.lucene.analysis.ko.dict.UserMorphData;
 import org.apache.lucene.analysis.morph.ConnectionCosts;
 import org.apache.lucene.analysis.morph.Dictionary;
 import org.apache.lucene.analysis.morph.GraphvizFormatter;
@@ -248,6 +250,10 @@ protected void backtrace(Position endPosData, int fromIDX) {
         if (token.getPOSType() == POS.Type.MORPHEME
             || mode == KoreanTokenizer.DecompoundMode.NONE) {
           if (shouldFilterToken(token) == false) {
+            if (token.getMorphAtts() instanceof UserMorphData userMorphData) {
+              final String metadata = userMorphData.metadatas[token.getWordId()];
+              token.setMetadata(metadata);
+            }
             pending.add(token);
             if (VERBOSE) {
               System.out.println("    add token=" + pending.get(pending.size() - 1));
@@ -264,9 +270,11 @@ protected void backtrace(Position endPosData, int fromIDX) {
             int endOffset = backWordPos + length;
             int posLen = 0;
             // decompose the compound
+            String metadata = null;
             for (int i = morphemes.length - 1; i >= 0; i--) {
               final KoMorphData.Morpheme morpheme = morphemes[i];
               final Token compoundToken;
+              metadata = morpheme.metadata();
               if (token.getPOSType() == POS.Type.COMPOUND) {
                 assert endOffset - morpheme.surfaceForm().length() >= 0;
                 compoundToken =
@@ -275,7 +283,8 @@ protected void backtrace(Position endPosData, int fromIDX) {
                         morpheme.surfaceForm(),
                         endOffset - morpheme.surfaceForm().length(),
                         endOffset,
-                        backType);
+                        backType,
+                        metadata);
               } else {
                 compoundToken =
                     new DecompoundToken(
@@ -283,7 +292,8 @@ protected void backtrace(Position endPosData, int fromIDX) {
                         morpheme.surfaceForm(),
                         token.getStartOffset(),
                         token.getEndOffset(),
-                        backType);
+                        backType,
+                        metadata);
               }
               if (i == 0 && mode == KoreanTokenizer.DecompoundMode.MIXED) {
                 compoundToken.setPositionIncrement(0);
@@ -297,6 +307,7 @@ protected void backtrace(Position endPosData, int fromIDX) {
             }
             if (mode == KoreanTokenizer.DecompoundMode.MIXED) {
               token.setPositionLength(Math.max(1, posLen));
+              token.setMetadata(metadata);
               pending.add(token);
               if (VERBOSE) {
                 System.out.println("    add token=" + pending.get(pending.size() - 1));
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/KoMorphData.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/KoMorphData.java
index 2b2c53a4d6db..530dda3c0636 100644
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/KoMorphData.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/KoMorphData.java
@@ -22,7 +22,11 @@
 /** Represents Korean morphological information. */
 public interface KoMorphData extends MorphData {
   /** A morpheme extracted from a compound token. */
-  record Morpheme(POS.Tag posTag, String surfaceForm) {}
+  record Morpheme(POS.Tag posTag, String surfaceForm, String metadata) {
+    public Morpheme(POS.Tag posTag, String surfaceForm) {
+      this(posTag, surfaceForm, null);
+    }
+  }
 
   /**
    * Get the {@link org.apache.lucene.analysis.ko.POS.Type} of specified word (morpheme, compound,
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UserDictionary.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UserDictionary.java
index 00767e0f8dd6..9c7d893abe55 100644
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UserDictionary.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UserDictionary.java
@@ -34,6 +34,7 @@
  * (세종시 세종 시).
  */
 public final class UserDictionary implements Dictionary<UserMorphData> {
+  public static final String METADATA_SEPARATOR = " >> ";
   // text -> wordID
   private final TokenInfoFST fst;
 
@@ -82,10 +83,19 @@ private UserDictionary(List<String> entries) throws IOException {
 
     String lastToken = null;
     List<int[]> _segmentations = new ArrayList<>(entries.size());
+    List<String> _metadatas = new ArrayList<>(entries.size());
     short[] rightIds = new short[entries.size()];
     long ord = 0;
     int entryIndex = 0;
     for (String entry : entries) {
+      if (entry.contains(METADATA_SEPARATOR)) {
+        var split = entry.split(METADATA_SEPARATOR);
+        entry = split[0];
+        var metadata = split[1];
+        _metadatas.add(metadata);
+      } else {
+        _metadatas.add(null);
+      }
       String[] splits = entry.split("\\s+");
       String token = splits[0];
       if (token.equals(lastToken)) {
@@ -138,7 +148,8 @@ private UserDictionary(List<String> entries) throws IOException {
     this.fst =
         new TokenInfoFST(FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()));
     int[][] segmentations = _segmentations.toArray(new int[_segmentations.size()][]);
-    this.morphAtts = new UserMorphData(segmentations, rightIds);
+    String[] metadatas = _metadatas.toArray(String[]::new);
+    this.morphAtts = new UserMorphData(segmentations, rightIds, metadatas);
   }
 
   public TokenInfoFST getFST() {
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UserMorphData.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UserMorphData.java
index 2056bd0829c9..6783f0f9d123 100644
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UserMorphData.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UserMorphData.java
@@ -19,7 +19,7 @@
 import org.apache.lucene.analysis.ko.POS;
 
 /** Morphological information for user dictionary. */
-final class UserMorphData implements KoMorphData {
+public final class UserMorphData implements KoMorphData {
   private static final int WORD_COST = -100000;
 
   // NNG left
@@ -28,12 +28,18 @@ final class UserMorphData implements KoMorphData {
   // length, length... indexed by compound ID or null for simple noun
   private final int[][] segmentations;
   private final short[] rightIds;
+  public String[] metadatas;
 
   UserMorphData(int[][] segmentations, short[] rightIds) {
     this.segmentations = segmentations;
     this.rightIds = rightIds;
   }
 
+  UserMorphData(int[][] segmentations, short[] rightIds, String[] metadatas) {
+    this(segmentations, rightIds);
+    this.metadatas = metadatas;
+  }
+
   @Override
   public int getLeftId(int morphId) {
     return LEFT_ID;
@@ -79,10 +85,12 @@ public Morpheme[] getMorphemes(int morphId, char[] surfaceForm, int off, int len
     if (segs == null) {
       return null;
     }
+    String metadata = metadatas[morphId];
     int offset = 0;
     Morpheme[] morphemes = new Morpheme[segs.length];
     for (int i = 0; i < segs.length; i++) {
-      morphemes[i] = new Morpheme(POS.Tag.NNG, new String(surfaceForm, off + offset, segs[i]));
+      morphemes[i] =
+          new Morpheme(POS.Tag.NNG, new String(surfaceForm, off + offset, segs[i]), metadata);
       offset += segs[i];
     }
     return morphemes;
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/MetadataAttribute.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/MetadataAttribute.java
new file mode 100644
index 000000000000..2fe20cd59b7a
--- /dev/null
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/MetadataAttribute.java
@@ -0,0 +1,22 @@
+package org.apache.lucene.analysis.ko.tokenattributes;
+
+import org.apache.lucene.analysis.ko.Token;
+import org.apache.lucene.util.Attribute;
+
+/**
+ * Attribute for Korean token metadata.
+ *
+ * <p>This attribute provides access to additional metadata associated with Korean tokens,
+ * particularly from user dictionaries and compound word morphemes.
+ *
+ * <p>Note: in some cases this value may not be applicable, and will be null.
+ *
+ * @lucene.experimental
+ */
+public interface MetadataAttribute extends Attribute {
+  /** Get the metadata string of the token. */
+  String getMetadata();
+
+  /** Set the current token. */
+  void setToken(Token token);
+}
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/MetadataAttributeImpl.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/MetadataAttributeImpl.java
new file mode 100644
index 000000000000..b2886821cbd6
--- /dev/null
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/MetadataAttributeImpl.java
@@ -0,0 +1,43 @@
+package org.apache.lucene.analysis.ko.tokenattributes;
+
+import org.apache.lucene.analysis.ko.Token;
+import org.apache.lucene.util.AttributeImpl;
+import org.apache.lucene.util.AttributeReflector;
+
+/**
+ * Attribute implementation for Korean token metadata.
+ *
+ * @lucene.experimental
+ */
+public class MetadataAttributeImpl extends AttributeImpl implements MetadataAttribute {
+  private Token token;
+
+  @Override
+  public String getMetadata() {
+    if (this.token != null) {
+      return this.token.getMetadata();
+    }
+    return null;
+  }
+
+  @Override
+  public void setToken(Token token) {
+    this.token = token;
+  }
+
+  @Override
+  public void clear() {
+    this.token = null;
+  }
+
+  @Override
+  public void copyTo(AttributeImpl target) {
+    final MetadataAttribute t = (MetadataAttribute) target;
+    t.setToken(this.token);
+  }
+
+  @Override
+  public void reflectWith(AttributeReflector reflector) {
+    reflector.reflect(MetadataAttribute.class, "metadata", getMetadata());
+  }
+}
diff --git a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanTokenizer.java b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanTokenizer.java
index 6ce60a53e3f3..484295b64aa3 100644
--- a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanTokenizer.java
+++ b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanTokenizer.java
@@ -28,6 +28,7 @@
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.ko.KoreanTokenizer.DecompoundMode;
 import org.apache.lucene.analysis.ko.dict.UserDictionary;
+import org.apache.lucene.analysis.ko.tokenattributes.MetadataAttribute;
 import org.apache.lucene.analysis.ko.tokenattributes.PartOfSpeechAttribute;
 import org.apache.lucene.analysis.ko.tokenattributes.ReadingAttribute;
 import org.apache.lucene.tests.analysis.BaseTokenStreamTestCase;
@@ -576,6 +577,27 @@ public void testDuplicate() throws IOException {
     }
   }
 
+  public void testMetadataAttribute() throws IOException {
+    assertMetadata(analyzer, "자바", "컴퓨터 언어");
+    assertMetadata(analyzer, "java", "컴퓨터 언어");
+    assertMetadata(analyzer, "엘라스틱서치", "검색 엔진");
+
+    assertMetadata(analyzerDecompoundKeep, "엘라스틱서치", "검색 엔진");
+  }
+
+  private void assertMetadata(Analyzer analyzer, String input, String metadata) throws IOException {
+    try (TokenStream ts = analyzer.tokenStream("ignored", input)) {
+      final MetadataAttribute metadataAtt = ts.addAttribute(MetadataAttribute.class);
+      ts.reset();
+      while (ts.incrementToken()) {
+        assertNotNull(metadataAtt.getMetadata());
+        assertEquals(metadata, metadataAtt.getMetadata());
+      }
+      assertFalse(ts.incrementToken());
+      ts.end();
+    }
+  }
+
   private void assertReadings(Analyzer analyzer, String input, String... readings)
       throws IOException {
     try (TokenStream ts = analyzer.tokenStream("ignored", input)) {
diff --git a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/userdict.txt b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/userdict.txt
index d8503230735c..c76f5e0bd2a6 100644
--- a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/userdict.txt
+++ b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/userdict.txt
@@ -8,3 +8,7 @@ C샤프
 날씨
 21세기대한민국
 세기
+# With Metadata
+자바 >> 컴퓨터 언어
+java >> 컴퓨터 언어
+엘라스틱서치 엘라스틱 서치 >> 검색 엔진

From 81ce2c8751feb3b1901afad1b33031d5427486f0 Mon Sep 17 00:00:00 2001
From: twosom <two_somang@icloud.com>
Date: Sun, 20 Jul 2025 15:46:04 +0900
Subject: [PATCH 2/4] chore : add CHANGES.txt

---
 lucene/CHANGES.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index e625456a70c4..1fa0bc223c67 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -37,6 +37,8 @@ New Features
 
 * GITHUB#14792: Introduced OffHeapQuantizedFloatVectorValues class to access float vectors when only quantized byte vectors are available in the index. (Pulkit Gupta)
 
+* GITHUB#14969: Add metadata support to Nori Korean analyzer tokens, allowing users to attach additional information to dictionary words. (twosom)
+
 Improvements
 ---------------------
 

From f516e53fa662080c2bd6eba61fa9461543421537 Mon Sep 17 00:00:00 2001
From: twosom <two_somang@icloud.com>
Date: Sun, 20 Jul 2025 15:52:26 +0900
Subject: [PATCH 3/4] chore : spotless apply

---
 .../nori/src/java/org/apache/lucene/analysis/ko/Viterbi.java     | 1 -
 1 file changed, 1 deletion(-)

diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/Viterbi.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/Viterbi.java
index f9dc5ff5d7d2..0e24dc53d8d4 100644
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/Viterbi.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/Viterbi.java
@@ -18,7 +18,6 @@
 
 import java.io.IOException;
 import java.util.EnumMap;
-
 import org.apache.lucene.analysis.ko.dict.CharacterDefinition;
 import org.apache.lucene.analysis.ko.dict.KoMorphData;
 import org.apache.lucene.analysis.ko.dict.TokenInfoDictionary;

From 66ed9058f6fa91e06ab7bf22bcac3f7862ef1767 Mon Sep 17 00:00:00 2001
From: twosom <two_somang@icloud.com>
Date: Sun, 20 Jul 2025 16:02:18 +0900
Subject: [PATCH 4/4] chore : add license header

---
 .../ko/tokenattributes/MetadataAttribute.java    | 16 ++++++++++++++++
 .../tokenattributes/MetadataAttributeImpl.java   | 16 ++++++++++++++++
 2 files changed, 32 insertions(+)

diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/MetadataAttribute.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/MetadataAttribute.java
index 2fe20cd59b7a..c4797124b4df 100644
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/MetadataAttribute.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/MetadataAttribute.java
@@ -1,3 +1,19 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 package org.apache.lucene.analysis.ko.tokenattributes;
 
 import org.apache.lucene.analysis.ko.Token;
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/MetadataAttributeImpl.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/MetadataAttributeImpl.java
index b2886821cbd6..2d92b6ccf1f9 100644
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/MetadataAttributeImpl.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/MetadataAttributeImpl.java
@@ -1,3 +1,19 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 package org.apache.lucene.analysis.ko.tokenattributes;
 
 import org.apache.lucene.analysis.ko.Token;