From 7d5e47e97f828bf5522e89a0cc26ce354bfc1efd Mon Sep 17 00:00:00 2001
From: adrian adewunmi <adrian.a.adewunmi@googlemail.com>
Date: Sun, 17 May 2026 10:18:26 +0100
Subject: [PATCH 1/6] feat(summarisation): improve summary grounding by
 preserving source sentences verbatim

---
 apps/insights/nlp/summarisation.py        |  2 +-
 apps/insights/tests/test_summarisation.py | 22 ++++++++++++++++++++++
 2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/apps/insights/nlp/summarisation.py b/apps/insights/nlp/summarisation.py
index c9022db..b34b563 100644
--- a/apps/insights/nlp/summarisation.py
+++ b/apps/insights/nlp/summarisation.py
@@ -62,6 +62,6 @@ def summarise_text(text: str | None, max_sentences: int = 2) -> str:
         :max_sentences
     ]
 
-    # Preserve source order after ranking so the summary reads naturally.
+    # Preserve source order and copy sentences verbatim so summaries stay grounded.
     selected_in_source_order = sorted(selected, key=lambda item: item[1])
     return " ".join(sentence for _score, _index, sentence in selected_in_source_order)
diff --git a/apps/insights/tests/test_summarisation.py b/apps/insights/tests/test_summarisation.py
index 0e6c753..1d03d29 100644
--- a/apps/insights/tests/test_summarisation.py
+++ b/apps/insights/tests/test_summarisation.py
@@ -6,6 +6,7 @@
 
 from apps.insights.nlp import summarisation
 from apps.insights.nlp.summarisation import LOW_INFORMATION_SUMMARY, summarise_text
+from apps.insights.nlp.text_processing import split_sentences
 
 
 def test_summarise_text_selects_high_signal_source_sentence() -> None:
@@ -37,6 +38,27 @@ def test_summarise_text_preserves_source_order_after_scoring() -> None:
     )
 
 
+def test_summarise_text_uses_only_user_note_sentences() -> None:
+    """Every summary sentence should be copied from the user's own notes."""
+    text = (
+        "Photosynthesis photosynthesis uses chlorophyll to convert light into glucose. "
+        "Cell respiration releases stored energy during revision. "
+        "Photosynthesis depends on carbon dioxide and water."
+    )
+
+    result = summarise_text(text, max_sentences=2)
+
+    assert result == (
+        "Photosynthesis photosynthesis uses chlorophyll to convert light into glucose. "
+        "Photosynthesis depends on carbon dioxide and water."
+    )
+    source_sentences = split_sentences(text)
+    summary_sentences = split_sentences(result)
+    assert summary_sentences
+    assert all(sentence in source_sentences for sentence in summary_sentences)
+    assert "mitochondria" not in result
+
+
 def test_summarise_text_handles_empty_input() -> None:
     """Empty input should return the low-information summary."""
     assert summarise_text("") == LOW_INFORMATION_SUMMARY

From d2b2c127551f9b5dca2c966c629e74300d79b42f Mon Sep 17 00:00:00 2001
From: adrian adewunmi <adrian.a.adewunmi@googlemail.com>
Date: Sun, 17 May 2026 10:22:26 +0100
Subject: [PATCH 2/6] feat(confidence): implement rule-based confidence scoring
 for study insights

---
 apps/insights/nlp/confidence.py | 70 +++++++++++++++++++++++++++++++++
 1 file changed, 70 insertions(+)
 create mode 100644 apps/insights/nlp/confidence.py

diff --git a/apps/insights/nlp/confidence.py b/apps/insights/nlp/confidence.py
new file mode 100644
index 0000000..7208f13
--- /dev/null
+++ b/apps/insights/nlp/confidence.py
@@ -0,0 +1,70 @@
+"""Rule-based confidence scoring for deterministic study insights."""
+
+from __future__ import annotations
+
+from apps.insights.nlp.text_processing import meaningful_tokens
+
+
+def score_confidence(text: str | None, keywords: list[str], summary: str) -> int:
+    """Score confidence for a generated insight.
+
+    The score is a transparent quality heuristic. It reflects whether the
+    source text contains enough meaningful content to support a useful
+    extractive summary and keyword set.
+
+    Args:
+        text: Raw source text.
+        keywords: Extracted keyword list.
+        summary: Generated summary text.
+
+    Returns:
+        Integer confidence score from 0 to 100.
+    """
+    tokens = meaningful_tokens(text)
+
+    if not tokens:
+        return 0
+
+    score = 20
+
+    if len(tokens) >= 20:
+        score += 25
+    elif len(tokens) >= 10:
+        score += 15
+    else:
+        score += 5
+
+    if len(set(tokens)) >= 10:
+        score += 15
+    elif len(set(tokens)) >= 5:
+        score += 8
+
+    if len(keywords) >= 5:
+        score += 20
+    elif len(keywords) >= 3:
+        score += 12
+    elif keywords:
+        score += 5
+
+    if summary and "not enough study note content" not in summary.lower():
+        score += 20
+
+    return max(0, min(score, 100))
+
+
+def confidence_label(score: int) -> str:
+    """Return a user-facing confidence label.
+
+    Args:
+        score: Confidence score from 0 to 100.
+
+    Returns:
+        One of ``Low``, ``Medium``, or ``High``.
+    """
+    if score >= 75:
+        return "High"
+
+    if score >= 45:
+        return "Medium"
+
+    return "Low"
\ No newline at end of file

From 80071c32c668a1e282237ad7d1685c7c992a8b95 Mon Sep 17 00:00:00 2001
From: adrian adewunmi <adrian.a.adewunmi@googlemail.com>
Date: Sun, 17 May 2026 10:24:19 +0100
Subject: [PATCH 3/6] feat(tests): add unit tests for confidence scoring
 functionality

---
 apps/insights/tests/test_confidence.py | 48 ++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)
 create mode 100644 apps/insights/tests/test_confidence.py

diff --git a/apps/insights/tests/test_confidence.py b/apps/insights/tests/test_confidence.py
new file mode 100644
index 0000000..aa521f9
--- /dev/null
+++ b/apps/insights/tests/test_confidence.py
@@ -0,0 +1,48 @@
+"""Tests for deterministic confidence scoring."""
+
+from __future__ import annotations
+
+from apps.insights.nlp.confidence import confidence_label, score_confidence
+
+
+def test_score_confidence_returns_zero_for_empty_text() -> None:
+    """No note content should produce zero confidence."""
+    result = score_confidence("", [], "There is not enough content.")
+
+    assert result == 0
+
+
+def test_score_confidence_stays_within_bounds() -> None:
+    """Confidence should always be a percentage-style bounded value."""
+    text = " ".join(["django testing database workflow"] * 20)
+
+    result = score_confidence(
+        text,
+        ["django", "testing", "database", "workflow"],
+        "Django testing database workflow.",
+    )
+
+    assert 0 <= result <= 100
+
+
+def test_score_confidence_increases_for_richer_content() -> None:
+    """Richer text with keywords and summary should score higher."""
+    weak = score_confidence("Django.", ["django"], "Django.")
+    strong = score_confidence(
+        (
+            "Django testing confirms reliable session workflows. "
+            "Database-backed notes improve review quality. "
+            "Pytest verifies permissions and persistence behaviour."
+        ),
+        ["django", "testing", "database", "pytest", "permissions"],
+        "Django testing confirms reliable session workflows.",
+    )
+
+    assert strong > weak
+
+
+def test_confidence_label_maps_score_to_user_facing_label() -> None:
+    """Confidence labels should be simple and predictable."""
+    assert confidence_label(20) == "Low"
+    assert confidence_label(60) == "Medium"
+    assert confidence_label(90) == "High"
\ No newline at end of file

From 246728ff03c65d7981d885d448222728f874d158 Mon Sep 17 00:00:00 2001
From: adrian adewunmi <adrian.a.adewunmi@googlemail.com>
Date: Sun, 17 May 2026 10:30:34 +0100
Subject: [PATCH 4/6] feat(tests): add test for confidence scoring with
 extractive summary comparison

---
 apps/insights/tests/test_confidence.py | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/apps/insights/tests/test_confidence.py b/apps/insights/tests/test_confidence.py
index aa521f9..8bebf5a 100644
--- a/apps/insights/tests/test_confidence.py
+++ b/apps/insights/tests/test_confidence.py
@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 from apps.insights.nlp.confidence import confidence_label, score_confidence
+from apps.insights.nlp.summarisation import LOW_INFORMATION_SUMMARY
 
 
 def test_score_confidence_returns_zero_for_empty_text() -> None:
@@ -41,8 +42,26 @@ def test_score_confidence_increases_for_richer_content() -> None:
     assert strong > weak
 
 
+def test_score_confidence_does_not_reward_low_information_summary() -> None:
+    """Fallback summary text should not be treated as a usable summary."""
+    text = (
+        "Django testing confirms reliable session workflows. "
+        "Database-backed notes improve review quality."
+    )
+    keywords = ["django", "testing", "database"]
+
+    with_fallback_summary = score_confidence(text, keywords, LOW_INFORMATION_SUMMARY)
+    with_extract_summary = score_confidence(
+        text,
+        keywords,
+        "Django testing confirms reliable session workflows.",
+    )
+
+    assert with_extract_summary > with_fallback_summary
+
+
 def test_confidence_label_maps_score_to_user_facing_label() -> None:
     """Confidence labels should be simple and predictable."""
     assert confidence_label(20) == "Low"
     assert confidence_label(60) == "Medium"
-    assert confidence_label(90) == "High"
\ No newline at end of file
+    assert confidence_label(90) == "High"

From 27b6972cf12eb4a3575da6f2d58121fe8272b262 Mon Sep 17 00:00:00 2001
From: adrian adewunmi <adrian.a.adewunmi@googlemail.com>
Date: Sun, 17 May 2026 10:35:33 +0100
Subject: [PATCH 5/6] feat(confidence): clarify confidence scoring description
 and update documentation

---
 apps/insights/nlp/confidence.py        |  8 ++++----
 apps/insights/tests/test_confidence.py | 14 ++++++++++++++
 docs/ai-nlp-contract.md                |  4 ++--
 3 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/apps/insights/nlp/confidence.py b/apps/insights/nlp/confidence.py
index 7208f13..3de9416 100644
--- a/apps/insights/nlp/confidence.py
+++ b/apps/insights/nlp/confidence.py
@@ -8,9 +8,9 @@
 def score_confidence(text: str | None, keywords: list[str], summary: str) -> int:
     """Score confidence for a generated insight.
 
-    The score is a transparent quality heuristic. It reflects whether the
-    source text contains enough meaningful content to support a useful
-    extractive summary and keyword set.
+    The score is a transparent quality heuristic, not a probability or model
+    intelligence signal. It reflects whether the source text contains enough
+    meaningful content to support a useful extractive summary and keyword set.
 
     Args:
         text: Raw source text.
@@ -67,4 +67,4 @@ def confidence_label(score: int) -> str:
     if score >= 45:
         return "Medium"
 
-    return "Low"
\ No newline at end of file
+    return "Low"
diff --git a/apps/insights/tests/test_confidence.py b/apps/insights/tests/test_confidence.py
index 8bebf5a..4eb366e 100644
--- a/apps/insights/tests/test_confidence.py
+++ b/apps/insights/tests/test_confidence.py
@@ -26,6 +26,20 @@ def test_score_confidence_stays_within_bounds() -> None:
     assert 0 <= result <= 100
 
 
+def test_score_confidence_is_repeatable_for_same_inputs() -> None:
+    """The same inputs should always produce the same heuristic score."""
+    text = (
+        "Django testing confirms reliable session workflows. "
+        "Database-backed notes improve review quality."
+    )
+    keywords = ["django", "testing", "database"]
+    summary = "Django testing confirms reliable session workflows."
+
+    scores = [score_confidence(text, keywords, summary) for _ in range(5)]
+
+    assert len(set(scores)) == 1
+
+
 def test_score_confidence_increases_for_richer_content() -> None:
     """Richer text with keywords and summary should score higher."""
     weak = score_confidence("Django.", ["django"], "Django.")
diff --git a/docs/ai-nlp-contract.md b/docs/ai-nlp-contract.md
index 73aaf87..0c92f36 100644
--- a/docs/ai-nlp-contract.md
+++ b/docs/ai-nlp-contract.md
@@ -123,8 +123,8 @@ Confidence labels:
 - `Medium` for scores from 45 to 74
 - `High` for scores from 75 to 100
 
-The confidence score is not a probability and does not claim factual
-correctness. It is a quality signal for the generated insight.
+The confidence score is not a probability, an intelligence score, or a claim
+of factual correctness. It is a quality signal for the generated insight.
 
 ## Explanation
 

From 79419703ddb61ce9bfa4fb8dff4b253ca4cd7b1f Mon Sep 17 00:00:00 2001
From: adrian adewunmi <adrian.a.adewunmi@googlemail.com>
Date: Sun, 17 May 2026 10:55:11 +0100
Subject: [PATCH 6/6] feat(tests): add test for moderate unique term variety in
 confidence scoring

---
 apps/insights/tests/test_confidence.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/apps/insights/tests/test_confidence.py b/apps/insights/tests/test_confidence.py
index 4eb366e..ed57adf 100644
--- a/apps/insights/tests/test_confidence.py
+++ b/apps/insights/tests/test_confidence.py
@@ -40,6 +40,15 @@ def test_score_confidence_is_repeatable_for_same_inputs() -> None:
     assert len(set(scores)) == 1
 
 
+def test_score_confidence_rewards_moderate_unique_term_variety() -> None:
+    """Five to nine unique meaningful terms should receive the middle bonus."""
+    text = "alpha beta gamma delta epsilon alpha beta gamma delta epsilon"
+
+    result = score_confidence(text, [], "")
+
+    assert result == 43
+
+
 def test_score_confidence_increases_for_richer_content() -> None:
     """Richer text with keywords and summary should score higher."""
     weak = score_confidence("Django.", ["django"], "Django.")