AAdewunmi · AAdewunmi · May 17, 2026 · May 17, 2026 · May 17, 2026 · May 17, 2026
diff --git a/apps/insights/nlp/confidence.py b/apps/insights/nlp/confidence.py
@@ -0,0 +1,70 @@
+"""Rule-based confidence scoring for deterministic study insights."""
+
+from __future__ import annotations
+
+from apps.insights.nlp.text_processing import meaningful_tokens
+
+
+def score_confidence(text: str | None, keywords: list[str], summary: str) -> int:
+    """Score confidence for a generated insight.
+
+    The score is a transparent quality heuristic, not a probability or model
+    intelligence signal. It reflects whether the source text contains enough
+    meaningful content to support a useful extractive summary and keyword set.
+
+    Args:
+        text: Raw source text.
+        keywords: Extracted keyword list.
+        summary: Generated summary text.
+
+    Returns:
+        Integer confidence score from 0 to 100.
+    """
+    tokens = meaningful_tokens(text)
+
+    if not tokens:
+        return 0
+
+    score = 20
+
+    if len(tokens) >= 20:
+        score += 25
+    elif len(tokens) >= 10:
+        score += 15
+    else:
+        score += 5
+
+    if len(set(tokens)) >= 10:
+        score += 15
+    elif len(set(tokens)) >= 5:
+        score += 8
+
+    if len(keywords) >= 5:
+        score += 20
+    elif len(keywords) >= 3:
+        score += 12
+    elif keywords:
+        score += 5
+
+    if summary and "not enough study note content" not in summary.lower():
+        score += 20
+
+    return max(0, min(score, 100))
+
+
+def confidence_label(score: int) -> str:
+    """Return a user-facing confidence label.
+
+    Args:
+        score: Confidence score from 0 to 100.
+
+    Returns:
+        One of ``Low``, ``Medium``, or ``High``.
+    """
+    if score >= 75:
+        return "High"
+
+    if score >= 45:
+        return "Medium"
+
+    return "Low"
diff --git a/apps/insights/nlp/summarisation.py b/apps/insights/nlp/summarisation.py
@@ -62,6 +62,6 @@ def summarise_text(text: str | None, max_sentences: int = 2) -> str:
         :max_sentences
     ]
 
-    # Preserve source order after ranking so the summary reads naturally.
+    # Preserve source order and copy sentences verbatim so summaries stay grounded.
     selected_in_source_order = sorted(selected, key=lambda item: item[1])
     return " ".join(sentence for _score, _index, sentence in selected_in_source_order)
diff --git a/apps/insights/tests/test_confidence.py b/apps/insights/tests/test_confidence.py
@@ -0,0 +1,90 @@
+"""Tests for deterministic confidence scoring."""
+
+from __future__ import annotations
+
+from apps.insights.nlp.confidence import confidence_label, score_confidence
+from apps.insights.nlp.summarisation import LOW_INFORMATION_SUMMARY
+
+
+def test_score_confidence_returns_zero_for_empty_text() -> None:
+    """No note content should produce zero confidence."""
+    result = score_confidence("", [], "There is not enough content.")
+
+    assert result == 0
+
+
+def test_score_confidence_stays_within_bounds() -> None:
+    """Confidence should always be a percentage-style bounded value."""
+    text = " ".join(["django testing database workflow"] * 20)
+
+    result = score_confidence(
+        text,
+        ["django", "testing", "database", "workflow"],
+        "Django testing database workflow.",
+    )
+
+    assert 0 <= result <= 100
+
+
+def test_score_confidence_is_repeatable_for_same_inputs() -> None:
+    """The same inputs should always produce the same heuristic score."""
+    text = (
+        "Django testing confirms reliable session workflows. "
+        "Database-backed notes improve review quality."
+    )
+    keywords = ["django", "testing", "database"]
+    summary = "Django testing confirms reliable session workflows."
+
+    scores = [score_confidence(text, keywords, summary) for _ in range(5)]
+
+    assert len(set(scores)) == 1
+
+
+def test_score_confidence_rewards_moderate_unique_term_variety() -> None:
+    """Five to nine unique meaningful terms should receive the middle bonus."""
+    text = "alpha beta gamma delta epsilon alpha beta gamma delta epsilon"
+
+    result = score_confidence(text, [], "")
+
+    assert result == 43
+
+
+def test_score_confidence_increases_for_richer_content() -> None:
+    """Richer text with keywords and summary should score higher."""
+    weak = score_confidence("Django.", ["django"], "Django.")
+    strong = score_confidence(
+        (
+            "Django testing confirms reliable session workflows. "
+            "Database-backed notes improve review quality. "
+            "Pytest verifies permissions and persistence behaviour."
+        ),
+        ["django", "testing", "database", "pytest", "permissions"],
+        "Django testing confirms reliable session workflows.",
+    )
+
+    assert strong > weak
+
+
+def test_score_confidence_does_not_reward_low_information_summary() -> None:
+    """Fallback summary text should not be treated as a usable summary."""
+    text = (
+        "Django testing confirms reliable session workflows. "
+        "Database-backed notes improve review quality."
+    )
+    keywords = ["django", "testing", "database"]
+
+    with_fallback_summary = score_confidence(text, keywords, LOW_INFORMATION_SUMMARY)
+    with_extract_summary = score_confidence(
+        text,
+        keywords,
+        "Django testing confirms reliable session workflows.",
+    )
+
+    assert with_extract_summary > with_fallback_summary
+
+
+def test_confidence_label_maps_score_to_user_facing_label() -> None:
+    """Confidence labels should be simple and predictable."""
+    assert confidence_label(20) == "Low"
+    assert confidence_label(60) == "Medium"
+    assert confidence_label(90) == "High"
diff --git a/apps/insights/tests/test_summarisation.py b/apps/insights/tests/test_summarisation.py
@@ -6,6 +6,7 @@
 
 from apps.insights.nlp import summarisation
 from apps.insights.nlp.summarisation import LOW_INFORMATION_SUMMARY, summarise_text
+from apps.insights.nlp.text_processing import split_sentences
 
 
 def test_summarise_text_selects_high_signal_source_sentence() -> None:
@@ -37,6 +38,27 @@ def test_summarise_text_preserves_source_order_after_scoring() -> None:
     )
 
 
+def test_summarise_text_uses_only_user_note_sentences() -> None:
+    """Every summary sentence should be copied from the user's own notes."""
+    text = (
+        "Photosynthesis photosynthesis uses chlorophyll to convert light into glucose. "
+        "Cell respiration releases stored energy during revision. "
+        "Photosynthesis depends on carbon dioxide and water."
+    )
+
+    result = summarise_text(text, max_sentences=2)
+
+    assert result == (
+        "Photosynthesis photosynthesis uses chlorophyll to convert light into glucose. "
+        "Photosynthesis depends on carbon dioxide and water."
+    )
+    source_sentences = split_sentences(text)
+    summary_sentences = split_sentences(result)
+    assert summary_sentences
+    assert all(sentence in source_sentences for sentence in summary_sentences)
+    assert "mitochondria" not in result
+
+
 def test_summarise_text_handles_empty_input() -> None:
     """Empty input should return the low-information summary."""
     assert summarise_text("") == LOW_INFORMATION_SUMMARY

diff --git a/docs/ai-nlp-contract.md b/docs/ai-nlp-contract.md
@@ -123,8 +123,8 @@ Confidence labels:
 - `Medium` for scores from 45 to 74
 - `High` for scores from 75 to 100
 
-The confidence score is not a probability and does not claim factual
-correctness. It is a quality signal for the generated insight.
+The confidence score is not a probability, an intelligence score, or a claim
+of factual correctness. It is a quality signal for the generated insight.
 
 ## Explanation