From 7d5e47e97f828bf5522e89a0cc26ce354bfc1efd Mon Sep 17 00:00:00 2001 From: adrian adewunmi Date: Sun, 17 May 2026 10:18:26 +0100 Subject: [PATCH 1/6] feat(summarisation): improve summary grounding by preserving source sentences verbatim --- apps/insights/nlp/summarisation.py | 2 +- apps/insights/tests/test_summarisation.py | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/apps/insights/nlp/summarisation.py b/apps/insights/nlp/summarisation.py index c9022db..b34b563 100644 --- a/apps/insights/nlp/summarisation.py +++ b/apps/insights/nlp/summarisation.py @@ -62,6 +62,6 @@ def summarise_text(text: str | None, max_sentences: int = 2) -> str: :max_sentences ] - # Preserve source order after ranking so the summary reads naturally. + # Preserve source order and copy sentences verbatim so summaries stay grounded. selected_in_source_order = sorted(selected, key=lambda item: item[1]) return " ".join(sentence for _score, _index, sentence in selected_in_source_order) diff --git a/apps/insights/tests/test_summarisation.py b/apps/insights/tests/test_summarisation.py index 0e6c753..1d03d29 100644 --- a/apps/insights/tests/test_summarisation.py +++ b/apps/insights/tests/test_summarisation.py @@ -6,6 +6,7 @@ from apps.insights.nlp import summarisation from apps.insights.nlp.summarisation import LOW_INFORMATION_SUMMARY, summarise_text +from apps.insights.nlp.text_processing import split_sentences def test_summarise_text_selects_high_signal_source_sentence() -> None: @@ -37,6 +38,27 @@ def test_summarise_text_preserves_source_order_after_scoring() -> None: ) +def test_summarise_text_uses_only_user_note_sentences() -> None: + """Every summary sentence should be copied from the user's own notes.""" + text = ( + "Photosynthesis photosynthesis uses chlorophyll to convert light into glucose. " + "Cell respiration releases stored energy during revision. " + "Photosynthesis depends on carbon dioxide and water." + ) + + result = summarise_text(text, max_sentences=2) + + assert result == ( + "Photosynthesis photosynthesis uses chlorophyll to convert light into glucose. " + "Photosynthesis depends on carbon dioxide and water." + ) + source_sentences = split_sentences(text) + summary_sentences = split_sentences(result) + assert summary_sentences + assert all(sentence in source_sentences for sentence in summary_sentences) + assert "mitochondria" not in result + + def test_summarise_text_handles_empty_input() -> None: """Empty input should return the low-information summary.""" assert summarise_text("") == LOW_INFORMATION_SUMMARY From d2b2c127551f9b5dca2c966c629e74300d79b42f Mon Sep 17 00:00:00 2001 From: adrian adewunmi Date: Sun, 17 May 2026 10:22:26 +0100 Subject: [PATCH 2/6] feat(confidence): implement rule-based confidence scoring for study insights --- apps/insights/nlp/confidence.py | 70 +++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 apps/insights/nlp/confidence.py diff --git a/apps/insights/nlp/confidence.py b/apps/insights/nlp/confidence.py new file mode 100644 index 0000000..7208f13 --- /dev/null +++ b/apps/insights/nlp/confidence.py @@ -0,0 +1,70 @@ +"""Rule-based confidence scoring for deterministic study insights.""" + +from __future__ import annotations + +from apps.insights.nlp.text_processing import meaningful_tokens + + +def score_confidence(text: str | None, keywords: list[str], summary: str) -> int: + """Score confidence for a generated insight. + + The score is a transparent quality heuristic. It reflects whether the + source text contains enough meaningful content to support a useful + extractive summary and keyword set. + + Args: + text: Raw source text. + keywords: Extracted keyword list. + summary: Generated summary text. + + Returns: + Integer confidence score from 0 to 100. + """ + tokens = meaningful_tokens(text) + + if not tokens: + return 0 + + score = 20 + + if len(tokens) >= 20: + score += 25 + elif len(tokens) >= 10: + score += 15 + else: + score += 5 + + if len(set(tokens)) >= 10: + score += 15 + elif len(set(tokens)) >= 5: + score += 8 + + if len(keywords) >= 5: + score += 20 + elif len(keywords) >= 3: + score += 12 + elif keywords: + score += 5 + + if summary and "not enough study note content" not in summary.lower(): + score += 20 + + return max(0, min(score, 100)) + + +def confidence_label(score: int) -> str: + """Return a user-facing confidence label. + + Args: + score: Confidence score from 0 to 100. + + Returns: + One of ``Low``, ``Medium``, or ``High``. + """ + if score >= 75: + return "High" + + if score >= 45: + return "Medium" + + return "Low" \ No newline at end of file From 80071c32c668a1e282237ad7d1685c7c992a8b95 Mon Sep 17 00:00:00 2001 From: adrian adewunmi Date: Sun, 17 May 2026 10:24:19 +0100 Subject: [PATCH 3/6] feat(tests): add unit tests for confidence scoring functionality --- apps/insights/tests/test_confidence.py | 48 ++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 apps/insights/tests/test_confidence.py diff --git a/apps/insights/tests/test_confidence.py b/apps/insights/tests/test_confidence.py new file mode 100644 index 0000000..aa521f9 --- /dev/null +++ b/apps/insights/tests/test_confidence.py @@ -0,0 +1,48 @@ +"""Tests for deterministic confidence scoring.""" + +from __future__ import annotations + +from apps.insights.nlp.confidence import confidence_label, score_confidence + + +def test_score_confidence_returns_zero_for_empty_text() -> None: + """No note content should produce zero confidence.""" + result = score_confidence("", [], "There is not enough content.") + + assert result == 0 + + +def test_score_confidence_stays_within_bounds() -> None: + """Confidence should always be a percentage-style bounded value.""" + text = " ".join(["django testing database workflow"] * 20) + + result = score_confidence( + text, + ["django", "testing", "database", "workflow"], + "Django testing database workflow.", + ) + + assert 0 <= result <= 100 + + +def test_score_confidence_increases_for_richer_content() -> None: + """Richer text with keywords and summary should score higher.""" + weak = score_confidence("Django.", ["django"], "Django.") + strong = score_confidence( + ( + "Django testing confirms reliable session workflows. " + "Database-backed notes improve review quality. " + "Pytest verifies permissions and persistence behaviour." + ), + ["django", "testing", "database", "pytest", "permissions"], + "Django testing confirms reliable session workflows.", + ) + + assert strong > weak + + +def test_confidence_label_maps_score_to_user_facing_label() -> None: + """Confidence labels should be simple and predictable.""" + assert confidence_label(20) == "Low" + assert confidence_label(60) == "Medium" + assert confidence_label(90) == "High" \ No newline at end of file From 246728ff03c65d7981d885d448222728f874d158 Mon Sep 17 00:00:00 2001 From: adrian adewunmi Date: Sun, 17 May 2026 10:30:34 +0100 Subject: [PATCH 4/6] feat(tests): add test for confidence scoring with extractive summary comparison --- apps/insights/tests/test_confidence.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/apps/insights/tests/test_confidence.py b/apps/insights/tests/test_confidence.py index aa521f9..8bebf5a 100644 --- a/apps/insights/tests/test_confidence.py +++ b/apps/insights/tests/test_confidence.py @@ -3,6 +3,7 @@ from __future__ import annotations from apps.insights.nlp.confidence import confidence_label, score_confidence +from apps.insights.nlp.summarisation import LOW_INFORMATION_SUMMARY def test_score_confidence_returns_zero_for_empty_text() -> None: @@ -41,8 +42,26 @@ def test_score_confidence_increases_for_richer_content() -> None: assert strong > weak +def test_score_confidence_does_not_reward_low_information_summary() -> None: + """Fallback summary text should not be treated as a usable summary.""" + text = ( + "Django testing confirms reliable session workflows. " + "Database-backed notes improve review quality." + ) + keywords = ["django", "testing", "database"] + + with_fallback_summary = score_confidence(text, keywords, LOW_INFORMATION_SUMMARY) + with_extract_summary = score_confidence( + text, + keywords, + "Django testing confirms reliable session workflows.", + ) + + assert with_extract_summary > with_fallback_summary + + def test_confidence_label_maps_score_to_user_facing_label() -> None: """Confidence labels should be simple and predictable.""" assert confidence_label(20) == "Low" assert confidence_label(60) == "Medium" - assert confidence_label(90) == "High" \ No newline at end of file + assert confidence_label(90) == "High" From 27b6972cf12eb4a3575da6f2d58121fe8272b262 Mon Sep 17 00:00:00 2001 From: adrian adewunmi Date: Sun, 17 May 2026 10:35:33 +0100 Subject: [PATCH 5/6] feat(confidence): clarify confidence scoring description and update documentation --- apps/insights/nlp/confidence.py | 8 ++++---- apps/insights/tests/test_confidence.py | 14 ++++++++++++++ docs/ai-nlp-contract.md | 4 ++-- 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/apps/insights/nlp/confidence.py b/apps/insights/nlp/confidence.py index 7208f13..3de9416 100644 --- a/apps/insights/nlp/confidence.py +++ b/apps/insights/nlp/confidence.py @@ -8,9 +8,9 @@ def score_confidence(text: str | None, keywords: list[str], summary: str) -> int: """Score confidence for a generated insight. - The score is a transparent quality heuristic. It reflects whether the - source text contains enough meaningful content to support a useful - extractive summary and keyword set. + The score is a transparent quality heuristic, not a probability or model + intelligence signal. It reflects whether the source text contains enough + meaningful content to support a useful extractive summary and keyword set. Args: text: Raw source text. @@ -67,4 +67,4 @@ def confidence_label(score: int) -> str: if score >= 45: return "Medium" - return "Low" \ No newline at end of file + return "Low" diff --git a/apps/insights/tests/test_confidence.py b/apps/insights/tests/test_confidence.py index 8bebf5a..4eb366e 100644 --- a/apps/insights/tests/test_confidence.py +++ b/apps/insights/tests/test_confidence.py @@ -26,6 +26,20 @@ def test_score_confidence_stays_within_bounds() -> None: assert 0 <= result <= 100 +def test_score_confidence_is_repeatable_for_same_inputs() -> None: + """The same inputs should always produce the same heuristic score.""" + text = ( + "Django testing confirms reliable session workflows. " + "Database-backed notes improve review quality." + ) + keywords = ["django", "testing", "database"] + summary = "Django testing confirms reliable session workflows." + + scores = [score_confidence(text, keywords, summary) for _ in range(5)] + + assert len(set(scores)) == 1 + + def test_score_confidence_increases_for_richer_content() -> None: """Richer text with keywords and summary should score higher.""" weak = score_confidence("Django.", ["django"], "Django.") diff --git a/docs/ai-nlp-contract.md b/docs/ai-nlp-contract.md index 73aaf87..0c92f36 100644 --- a/docs/ai-nlp-contract.md +++ b/docs/ai-nlp-contract.md @@ -123,8 +123,8 @@ Confidence labels: - `Medium` for scores from 45 to 74 - `High` for scores from 75 to 100 -The confidence score is not a probability and does not claim factual -correctness. It is a quality signal for the generated insight. +The confidence score is not a probability, an intelligence score, or a claim +of factual correctness. It is a quality signal for the generated insight. ## Explanation From 79419703ddb61ce9bfa4fb8dff4b253ca4cd7b1f Mon Sep 17 00:00:00 2001 From: adrian adewunmi Date: Sun, 17 May 2026 10:55:11 +0100 Subject: [PATCH 6/6] feat(tests): add test for moderate unique term variety in confidence scoring --- apps/insights/tests/test_confidence.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/apps/insights/tests/test_confidence.py b/apps/insights/tests/test_confidence.py index 4eb366e..ed57adf 100644 --- a/apps/insights/tests/test_confidence.py +++ b/apps/insights/tests/test_confidence.py @@ -40,6 +40,15 @@ def test_score_confidence_is_repeatable_for_same_inputs() -> None: assert len(set(scores)) == 1 +def test_score_confidence_rewards_moderate_unique_term_variety() -> None: + """Five to nine unique meaningful terms should receive the middle bonus.""" + text = "alpha beta gamma delta epsilon alpha beta gamma delta epsilon" + + result = score_confidence(text, [], "") + + assert result == 43 + + def test_score_confidence_increases_for_richer_content() -> None: """Richer text with keywords and summary should score higher.""" weak = score_confidence("Django.", ["django"], "Django.")