sphinx-doc · cglukas · Sep 13, 2025 · Sep 14, 2025 · Sep 15, 2025 · Sep 15, 2025
diff --git a/AUTHORS.rst b/AUTHORS.rst
@@ -83,6 +83,7 @@ Contributors
 * Lars Hupfeldt Nielsen - OpenSSL FIPS mode md5 bug fix
 * Louis Maddox -- better docstrings
 * Łukasz Langa -- partial support for autodoc
+* Lukas Wieg -- JavaScript search improvement
 * Marco Buttu -- doctest extension (pyversion option)
 * Mark Ostroth -- semantic HTML contributions
 * Martin Hans -- autodoc improvements

diff --git a/CHANGES.rst b/CHANGES.rst
@@ -130,6 +130,7 @@ Bugs fixed
   directly defined in certain cases, depending on autodoc processing
   order.
   Patch by Jeremy Maitin-Shepard.
+* #13892: HTML search: fix word exclusion in the search by prefixing words with "-".
 
 
 Testing

diff --git a/sphinx/themes/basic/static/searchtools.js b/sphinx/themes/basic/static/searchtools.js
@@ -171,15 +171,28 @@ const _orderResultsByScoreThenName = (a, b) => {
  * Default splitQuery function. Can be overridden in ``sphinx.search`` with a
  * custom function per language.
  *
- * The regular expression works by splitting the string on consecutive characters
- * that are not Unicode letters, numbers, underscores, or emoji characters.
- * This is the same as ``\W+`` in Python, preserving the surrogate pair area.
+ * The `consecutiveLetters` regular expression works by matching consecutive characters
+ * that are Unicode letters, numbers, underscores, or emoji characters.
+ *
+ * The `searchWords` regular expression works by matching a word like structure
+ * that matches the `consecutiveLetters` with or without a leading hyphen '-' which is
+ * used to exclude search terms later on.
  */
 if (typeof splitQuery === "undefined") {
-  var splitQuery = (query) =>
-    query
-      .split(/[^\p{Letter}\p{Number}_\p{Emoji_Presentation}]+/gu)
-      .filter((term) => term); // remove remaining empty strings
+  var splitQuery = (query) => {
+    const consecutiveLetters =
+      /[\p{Letter}\p{Number}_\p{Emoji_Presentation}]+/gu;
+    const searchWords = new RegExp(
+      `(${consecutiveLetters.source})|\\s(-${consecutiveLetters.source})`,
+      "gu",
+    );
+    return Array.from(
+      query
+        .matchAll(searchWords)
+        .map((results) => results[1] ?? results[2]) // select one of the possible groups (e.g. "word" or "-word").
+        .filter((term) => term), // remove remaining empty strings.
+    );
+  };
-  var splitQuery = (query) => {
-    const consecutiveLetters =
-      /[\p{Letter}\p{Number}_\p{Emoji_Presentation}]+/gu;
-    const searchWords = new RegExp(
-      `(${consecutiveLetters.source})|\\s(-${consecutiveLetters.source})`,
-      "gu",
-    );
-    return Array.from(
-      query
-        .matchAll(searchWords)
-        .map((results) => results[1] ?? results[2]) // select one of the possible groups (e.g. "word" or "-word").
-        .filter((term) => term), // remove remaining empty strings.
-    );
-  };
+  var splitQuery = (query) =>
+    query
+      .split(/(?<!\s)[-]|[^\p{Letter}\p{Number}\-_\p{Emoji_Presentation}]+/gu)
+      .filter((term) => term); // remove remaining empty strings
-  var splitQuery = (query) => {
-    const consecutiveLetters =
-      /[\p{Letter}\p{Number}_\p{Emoji_Presentation}]+/gu;
-    const searchWords = new RegExp(
-      `(${consecutiveLetters.source})|\\s(-${consecutiveLetters.source})`,
-      "gu",
-    );
-    return Array.from(
-      query
-        .matchAll(searchWords)
-        .map((results) => results[1] ?? results[2]) // select one of the possible groups (e.g. "word" or "-word").
-        .filter((term) => term), // remove remaining empty strings.
-    );
-  };
+  var splitQuery = (query) =>
+    query
+      .split(/(?<!\s)[-]|[^\p{Letter}\p{Number}\-_\p{Emoji_Presentation}]+/gu)
+      .filter((term) => term); // remove remaining empty strings
 }
 
 /**
@@ -627,15 +640,18 @@ const Search = {
 
       // ensure that none of the excluded terms is in the search result
       if (
-        [...excludedTerms].some(
-          (term) =>
-            terms[term] === file
-            || titleTerms[term] === file
-            || (terms[term] || []).includes(file)
-            || (titleTerms[term] || []).includes(file),
-        )
+        [...excludedTerms].some((excludedTerm) => {
+          // Both mappings will contain either a single integer or a list of integers.
+          // Converting them to lists makes the comparison more readable.
+          let excludedTermFiles = [].concat(terms[excludedTerm]);
+          let excludedTitleFiles = [].concat(titleTerms[excludedTerm]);
+          return (
+            excludedTermFiles.includes(file)
+            || excludedTitleFiles.includes(file)
+          );
+        })
-        [...excludedTerms].some((excludedTerm) => {
-          // Both mappings will contain either a single integer or a list of integers.
-          // Converting them to lists makes the comparison more readable.
-          let excludedTermFiles = [].concat(terms[excludedTerm]);
-          let excludedTitleFiles = [].concat(titleTerms[excludedTerm]);
-          return (
-            excludedTermFiles.includes(file)
-            || excludedTitleFiles.includes(file)
-          );
-        })
+        [...excludedTerms].some(
+          (term) =>
+            terms[term] === file
+            || titleTerms[term] === file
+            || (terms[term] || []).includes(file)
+            || (titleTerms[term] || []).includes(file),
+        )
-        [...excludedTerms].some((excludedTerm) => {
-          // Both mappings will contain either a single integer or a list of integers.
-          // Converting them to lists makes the comparison more readable.
-          let excludedTermFiles = [].concat(terms[excludedTerm]);
-          let excludedTitleFiles = [].concat(titleTerms[excludedTerm]);
-          return (
-            excludedTermFiles.includes(file)
-            || excludedTitleFiles.includes(file)
-          );
-        })
+        [...excludedTerms].some(
+          (term) =>
+            terms[term] === file
+            || titleTerms[term] === file
+            || (terms[term] || []).includes(file)
+            || (titleTerms[term] || []).includes(file),
+        )
       )
-        break;
+        continue;
 
       // select one (max) score for the file.
       const score = Math.max(...wordList.map((w) => scoreMap.get(file).get(w)));

diff --git a/tests/js/fixtures/search_exclusion/searchindex.js b/tests/js/fixtures/search_exclusion/searchindex.js
diff --git a/tests/js/roots/search_exclusion/conf.py b/tests/js/roots/search_exclusion/conf.py
diff --git a/tests/js/roots/search_exclusion/excluded.rst b/tests/js/roots/search_exclusion/excluded.rst
@@ -0,0 +1,4 @@
+Excluded Page
+=============
+
+This is a page with the special word penguin.
diff --git a/tests/js/roots/search_exclusion/index.rst b/tests/js/roots/search_exclusion/index.rst
@@ -0,0 +1,12 @@
+Main Page
+=========
+
+This is the main page of the ``search_exclusion`` test project.
+
+This document is used as a test fixture to check that search results can be
+filtered in the query by specifying excluded terms.
+
+A term which starts with a hypen will be used as excluded term.
+
+Include a second page which can be excluded in the search:
+:index:`excluded`
diff --git a/tests/js/searchtools.spec.js b/tests/js/searchtools.spec.js
@@ -66,6 +66,33 @@ describe("Basic html theme search", function () {
       expect(Search.performTermsSearch(searchterms, excluded)).toEqual(hits);
     });
 
+    it("should find results when excluded terms are used", function () {
+      // This fixture already existed and has the right data to make this test work.
+      // Replace with another matching fixture if necessary.
+      eval(loadFixture("search_exclusion/searchindex.js"));
+
+      // It's important that the searchterm is included in multiple pages while the
+      // excluded term is not included in all pages.
+      // In this case the ``for`` is included in the two existing pages while the ``ask``
+      // is only included in one page.
+      [_searchQuery, searchterms, excluded, ..._remainingItems] =
+        Search._parseQuery("page -penguin");
+
+      // prettier-ignore
+      hits = [[
+        'index',
+        'Main Page',
+        '',
+        null,
+        15,
+        'index.rst',
+        'text'
+      ]];
+
+      expect(excluded).toEqual(new Set(["penguin"]));
+      expect(Search.performTermsSearch(searchterms, excluded)).toEqual(hits);
+    });
+
     it('should partially-match "sphinx" when in title index', function () {
       eval(loadFixture("partial/searchindex.js"));
 
@@ -295,6 +322,16 @@ describe("splitQuery regression tests", () => {
     expect(parts).toEqual(["Pin", "Code"]);
   });
 
+  it("can keep underscores in words", () => {
+    const parts = splitQuery("python_function");
+    expect(parts).toEqual(["python_function"]);
+  });
+
+  it("can maintain negated search words", () => {
+    const parts = splitQuery("Pin -Code");
+    expect(parts).toEqual(["Pin", "-Code"]);
+  });
+
   it("can split Chinese characters", () => {
     const parts = splitQuery("Hello from 中国 上海");
     expect(parts).toEqual(["Hello", "from", "中国", "上海"]);
-Original file line number
+Diff line change
@@ Expand Up / @@ -130,6 +130,7 @@ Bugs fixed @@
       directly defined in certain cases, depending on autodoc processing
       order.
       Patch by Jeremy Maitin-Shepard.
+    * #13892: HTML search: fix word exclusion in the search by prefixing words with "-".
     Testing
@@ Expand Down @@