Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 70 additions & 0 deletions apps/insights/nlp/confidence.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
"""Rule-based confidence scoring for deterministic study insights."""

from __future__ import annotations

from apps.insights.nlp.text_processing import meaningful_tokens


def score_confidence(text: str | None, keywords: list[str], summary: str) -> int:
"""Score confidence for a generated insight.

The score is a transparent quality heuristic, not a probability or model
intelligence signal. It reflects whether the source text contains enough
meaningful content to support a useful extractive summary and keyword set.

Args:
text: Raw source text.
keywords: Extracted keyword list.
summary: Generated summary text.

Returns:
Integer confidence score from 0 to 100.
"""
tokens = meaningful_tokens(text)

if not tokens:
return 0

score = 20

if len(tokens) >= 20:
score += 25
elif len(tokens) >= 10:
score += 15
else:
score += 5

if len(set(tokens)) >= 10:
score += 15
elif len(set(tokens)) >= 5:
score += 8

if len(keywords) >= 5:
score += 20
elif len(keywords) >= 3:
score += 12
elif keywords:
score += 5

if summary and "not enough study note content" not in summary.lower():
score += 20

return max(0, min(score, 100))


def confidence_label(score: int) -> str:
"""Return a user-facing confidence label.

Args:
score: Confidence score from 0 to 100.

Returns:
One of ``Low``, ``Medium``, or ``High``.
"""
if score >= 75:
return "High"

if score >= 45:
return "Medium"

return "Low"
2 changes: 1 addition & 1 deletion apps/insights/nlp/summarisation.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,6 @@ def summarise_text(text: str | None, max_sentences: int = 2) -> str:
:max_sentences
]

# Preserve source order after ranking so the summary reads naturally.
# Preserve source order and copy sentences verbatim so summaries stay grounded.
selected_in_source_order = sorted(selected, key=lambda item: item[1])
return " ".join(sentence for _score, _index, sentence in selected_in_source_order)
90 changes: 90 additions & 0 deletions apps/insights/tests/test_confidence.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
"""Tests for deterministic confidence scoring."""

from __future__ import annotations

from apps.insights.nlp.confidence import confidence_label, score_confidence
from apps.insights.nlp.summarisation import LOW_INFORMATION_SUMMARY


def test_score_confidence_returns_zero_for_empty_text() -> None:
"""No note content should produce zero confidence."""
result = score_confidence("", [], "There is not enough content.")

assert result == 0


def test_score_confidence_stays_within_bounds() -> None:
"""Confidence should always be a percentage-style bounded value."""
text = " ".join(["django testing database workflow"] * 20)

result = score_confidence(
text,
["django", "testing", "database", "workflow"],
"Django testing database workflow.",
)

assert 0 <= result <= 100


def test_score_confidence_is_repeatable_for_same_inputs() -> None:
"""The same inputs should always produce the same heuristic score."""
text = (
"Django testing confirms reliable session workflows. "
"Database-backed notes improve review quality."
)
keywords = ["django", "testing", "database"]
summary = "Django testing confirms reliable session workflows."

scores = [score_confidence(text, keywords, summary) for _ in range(5)]

assert len(set(scores)) == 1


def test_score_confidence_rewards_moderate_unique_term_variety() -> None:
"""Five to nine unique meaningful terms should receive the middle bonus."""
text = "alpha beta gamma delta epsilon alpha beta gamma delta epsilon"

result = score_confidence(text, [], "")

assert result == 43


def test_score_confidence_increases_for_richer_content() -> None:
"""Richer text with keywords and summary should score higher."""
weak = score_confidence("Django.", ["django"], "Django.")
strong = score_confidence(
(
"Django testing confirms reliable session workflows. "
"Database-backed notes improve review quality. "
"Pytest verifies permissions and persistence behaviour."
),
["django", "testing", "database", "pytest", "permissions"],
"Django testing confirms reliable session workflows.",
)

assert strong > weak


def test_score_confidence_does_not_reward_low_information_summary() -> None:
"""Fallback summary text should not be treated as a usable summary."""
text = (
"Django testing confirms reliable session workflows. "
"Database-backed notes improve review quality."
)
keywords = ["django", "testing", "database"]

with_fallback_summary = score_confidence(text, keywords, LOW_INFORMATION_SUMMARY)
with_extract_summary = score_confidence(
text,
keywords,
"Django testing confirms reliable session workflows.",
)

assert with_extract_summary > with_fallback_summary


def test_confidence_label_maps_score_to_user_facing_label() -> None:
"""Confidence labels should be simple and predictable."""
assert confidence_label(20) == "Low"
assert confidence_label(60) == "Medium"
assert confidence_label(90) == "High"
22 changes: 22 additions & 0 deletions apps/insights/tests/test_summarisation.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from apps.insights.nlp import summarisation
from apps.insights.nlp.summarisation import LOW_INFORMATION_SUMMARY, summarise_text
from apps.insights.nlp.text_processing import split_sentences


def test_summarise_text_selects_high_signal_source_sentence() -> None:
Expand Down Expand Up @@ -37,6 +38,27 @@ def test_summarise_text_preserves_source_order_after_scoring() -> None:
)


def test_summarise_text_uses_only_user_note_sentences() -> None:
"""Every summary sentence should be copied from the user's own notes."""
text = (
"Photosynthesis photosynthesis uses chlorophyll to convert light into glucose. "
"Cell respiration releases stored energy during revision. "
"Photosynthesis depends on carbon dioxide and water."
)

result = summarise_text(text, max_sentences=2)

assert result == (
"Photosynthesis photosynthesis uses chlorophyll to convert light into glucose. "
"Photosynthesis depends on carbon dioxide and water."
)
source_sentences = split_sentences(text)
summary_sentences = split_sentences(result)
assert summary_sentences
assert all(sentence in source_sentences for sentence in summary_sentences)
assert "mitochondria" not in result


def test_summarise_text_handles_empty_input() -> None:
"""Empty input should return the low-information summary."""
assert summarise_text("") == LOW_INFORMATION_SUMMARY
Expand Down
4 changes: 2 additions & 2 deletions docs/ai-nlp-contract.md
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,8 @@ Confidence labels:
- `Medium` for scores from 45 to 74
- `High` for scores from 75 to 100

The confidence score is not a probability and does not claim factual
correctness. It is a quality signal for the generated insight.
The confidence score is not a probability, an intelligence score, or a claim
of factual correctness. It is a quality signal for the generated insight.

## Explanation

Expand Down
Loading