Skip to content
18 changes: 18 additions & 0 deletions apps/insights/migrations/0002_alter_studyinsight_keywords.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 5.2.13 on 2026-05-18 09:57

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("insights", "0001_initial"),
]

operations = [
migrations.AlterField(
model_name="studyinsight",
name="keywords",
field=models.JSONField(blank=True, default=list),
),
]
2 changes: 1 addition & 1 deletion apps/insights/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class StudyInsight(models.Model):
related_name="insights",
)
summary = models.TextField()
keywords = models.JSONField(default=list)
keywords = models.JSONField(default=list, blank=True)
confidence = models.PositiveSmallIntegerField(
validators=[MinValueValidator(0), MaxValueValidator(100)]
)
Expand Down
45 changes: 45 additions & 0 deletions apps/insights/selectors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
"""Read selectors for study insights."""

from __future__ import annotations

from django.db.models import QuerySet

from apps.insights.models import StudyInsight


def get_user_insights(user: object) -> QuerySet[StudyInsight]:
"""Return insights attached to sessions owned by a user.

Args:
user: Authenticated user.

Returns:
QuerySet of user-scoped insights.
"""
return (
StudyInsight.objects.filter(session__owner=user)
.select_related("session", "session__owner")
.order_by("-created_at", "-id")
)


def get_latest_session_insight(
*,
session: object,
user: object,
) -> StudyInsight | None:
"""Return the latest insight for a user-owned session.

Args:
session: Study session instance.
user: Authenticated user.

Returns:
Latest matching insight or ``None``.
"""
return (
StudyInsight.objects.filter(session=session, session__owner=user)
.select_related("session", "session__owner")
.order_by("-created_at", "-id")
.first()
)
119 changes: 119 additions & 0 deletions apps/insights/services.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
"""Application services for generating persisted study insights."""

from __future__ import annotations

from dataclasses import dataclass

from django.core.exceptions import FieldError, PermissionDenied
from django.db import transaction

from apps.insights.models import StudyInsight
from apps.insights.nlp.confidence import score_confidence
from apps.insights.nlp.explanations import build_explanation
from apps.insights.nlp.keyword_extraction import extract_keywords
from apps.insights.nlp.summarisation import summarise_text
from apps.insights.nlp.text_processing import source_text_hash
from apps.sessions.models import StudyNote


@dataclass(frozen=True)
class InsightGenerationResult:
"""Return value for insight generation."""

insight: StudyInsight
created: bool


def get_session_note_text(session: object) -> str:
"""Return combined note text for a study session.

Args:
session: Study session instance.

Returns:
Combined note content in deterministic order.
"""
notes_manager = getattr(session, "notes", None)

if notes_manager is not None:
notes = notes_manager.all()
else:
notes = StudyNote.objects.filter(session=session)

try:
notes = notes.order_by("created_at", "id")
except FieldError:
# Older Sprint 2 builds may not include created_at on StudyNote.
notes = notes.order_by("id")

content_values = []
for note in notes:
content = getattr(note, "content", "")
if content:
content_values.append(content.strip())

return "\n\n".join(value for value in content_values if value)


def analyse_note_text(text: str) -> dict[str, object]:
"""Run the deterministic NLP pipeline over source note text.

Args:
text: Combined note content.

Returns:
Dictionary containing summary, keywords, confidence, explanation,
and source hash.
"""
keywords = extract_keywords(text)
summary = summarise_text(text)
confidence = score_confidence(text, keywords, summary)
explanation = build_explanation(text, keywords, confidence)

return {
"summary": summary,
"keywords": keywords,
"confidence": confidence,
"explanation": explanation,
"source_hash": source_text_hash(text),
}


@transaction.atomic
def generate_insight_for_session(
*,
session: object,
requested_by: object,
) -> InsightGenerationResult:
"""Generate or reuse a persisted insight for a study session.

Args:
session: Study session to analyse.
requested_by: Authenticated user requesting the insight.

Returns:
InsightGenerationResult with the insight and creation flag.

Raises:
PermissionDenied: If the requesting user does not own the session.
"""
session_owner = getattr(session, "owner", None)

if session_owner != requested_by:
raise PermissionDenied("You can only generate insights for your own sessions.")

note_text = get_session_note_text(session)
payload = analyse_note_text(note_text)

insight, created = StudyInsight.objects.get_or_create(
session=session,
source_hash=payload["source_hash"],
defaults={
"summary": payload["summary"],
"keywords": payload["keywords"],
"confidence": payload["confidence"],
"explanation": payload["explanation"],
},
)

return InsightGenerationResult(insight=insight, created=created)
103 changes: 103 additions & 0 deletions apps/insights/tests/test_architecture_boundaries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
"""Architecture boundary tests for the insights workflow."""

from __future__ import annotations

import ast
from pathlib import Path

PROJECT_ROOT = Path(__file__).resolve().parents[3]
APPS_ROOT = PROJECT_ROOT / "apps"

FORBIDDEN_NLP_MODULE_PREFIX = "apps.insights.nlp"
FORBIDDEN_NLP_IMPORT_NAMES = {
"LOW_INFORMATION_SUMMARY",
"build_explanation",
"confidence_label",
"extract_keywords",
"meaningful_tokens",
"normalise_text",
"score_confidence",
"source_text_hash",
"split_paragraphs",
"split_sentences",
"summarise_text",
}


def find_view_nlp_import_violations(
*,
project_root: Path,
apps_root: Path,
) -> list[str]:
"""Return view imports that cross the insights NLP boundary."""
violations: list[str] = []

for view_path in sorted(apps_root.glob("**/views.py")):
tree = ast.parse(view_path.read_text(encoding="utf-8"), filename=str(view_path))

for node in ast.walk(tree):
if isinstance(node, ast.Import):
violations.extend(
f"{view_path.relative_to(project_root)}:{node.lineno} imports "
f"{alias.name}"
for alias in node.names
if alias.name == FORBIDDEN_NLP_MODULE_PREFIX
or alias.name.startswith(f"{FORBIDDEN_NLP_MODULE_PREFIX}.")
)

if isinstance(node, ast.ImportFrom):
module = node.module or ""

if module == FORBIDDEN_NLP_MODULE_PREFIX or module.startswith(
f"{FORBIDDEN_NLP_MODULE_PREFIX}."
):
violations.append(
f"{view_path.relative_to(project_root)}:{node.lineno} imports "
f"from {module}"
)

violations.extend(
f"{view_path.relative_to(project_root)}:{node.lineno} imports "
f"NLP helper {alias.name}"
for alias in node.names
if alias.name in FORBIDDEN_NLP_IMPORT_NAMES
)

return violations


def test_detects_views_that_import_insights_nlp_helpers(tmp_path: Path) -> None:
"""Boundary violations should report the view file and forbidden import."""
view_path = tmp_path / "apps" / "example" / "views.py"
view_path.parent.mkdir(parents=True)
view_path.write_text(
"\n".join(
[
"import apps.insights.nlp.summarisation",
"from apps.insights.nlp.keyword_extraction import extract_keywords",
]
),
encoding="utf-8",
)

violations = find_view_nlp_import_violations(
project_root=tmp_path,
apps_root=tmp_path / "apps",
)

assert violations == [
"apps/example/views.py:1 imports apps.insights.nlp.summarisation",
"apps/example/views.py:2 imports from apps.insights.nlp.keyword_extraction",
"apps/example/views.py:2 imports NLP helper extract_keywords",
]


def test_views_do_not_import_insights_nlp_helpers() -> None:
"""Views should call insight services instead of importing NLP internals."""
assert (
find_view_nlp_import_violations(
project_root=PROJECT_ROOT,
apps_root=APPS_ROOT,
)
== []
)
Loading
Loading