diff --git a/apps/insights/migrations/0002_alter_studyinsight_keywords.py b/apps/insights/migrations/0002_alter_studyinsight_keywords.py new file mode 100644 index 0000000..80371ae --- /dev/null +++ b/apps/insights/migrations/0002_alter_studyinsight_keywords.py @@ -0,0 +1,18 @@ +# Generated by Django 5.2.13 on 2026-05-18 09:57 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("insights", "0001_initial"), + ] + + operations = [ + migrations.AlterField( + model_name="studyinsight", + name="keywords", + field=models.JSONField(blank=True, default=list), + ), + ] diff --git a/apps/insights/models.py b/apps/insights/models.py index 64cf64e..21c93e7 100644 --- a/apps/insights/models.py +++ b/apps/insights/models.py @@ -23,7 +23,7 @@ class StudyInsight(models.Model): related_name="insights", ) summary = models.TextField() - keywords = models.JSONField(default=list) + keywords = models.JSONField(default=list, blank=True) confidence = models.PositiveSmallIntegerField( validators=[MinValueValidator(0), MaxValueValidator(100)] ) diff --git a/apps/insights/selectors.py b/apps/insights/selectors.py new file mode 100644 index 0000000..09da06d --- /dev/null +++ b/apps/insights/selectors.py @@ -0,0 +1,45 @@ +"""Read selectors for study insights.""" + +from __future__ import annotations + +from django.db.models import QuerySet + +from apps.insights.models import StudyInsight + + +def get_user_insights(user: object) -> QuerySet[StudyInsight]: + """Return insights attached to sessions owned by a user. + + Args: + user: Authenticated user. + + Returns: + QuerySet of user-scoped insights. + """ + return ( + StudyInsight.objects.filter(session__owner=user) + .select_related("session", "session__owner") + .order_by("-created_at", "-id") + ) + + +def get_latest_session_insight( + *, + session: object, + user: object, +) -> StudyInsight | None: + """Return the latest insight for a user-owned session. + + Args: + session: Study session instance. + user: Authenticated user. + + Returns: + Latest matching insight or ``None``. + """ + return ( + StudyInsight.objects.filter(session=session, session__owner=user) + .select_related("session", "session__owner") + .order_by("-created_at", "-id") + .first() + ) diff --git a/apps/insights/services.py b/apps/insights/services.py new file mode 100644 index 0000000..9fa069b --- /dev/null +++ b/apps/insights/services.py @@ -0,0 +1,119 @@ +"""Application services for generating persisted study insights.""" + +from __future__ import annotations + +from dataclasses import dataclass + +from django.core.exceptions import FieldError, PermissionDenied +from django.db import transaction + +from apps.insights.models import StudyInsight +from apps.insights.nlp.confidence import score_confidence +from apps.insights.nlp.explanations import build_explanation +from apps.insights.nlp.keyword_extraction import extract_keywords +from apps.insights.nlp.summarisation import summarise_text +from apps.insights.nlp.text_processing import source_text_hash +from apps.sessions.models import StudyNote + + +@dataclass(frozen=True) +class InsightGenerationResult: + """Return value for insight generation.""" + + insight: StudyInsight + created: bool + + +def get_session_note_text(session: object) -> str: + """Return combined note text for a study session. + + Args: + session: Study session instance. + + Returns: + Combined note content in deterministic order. + """ + notes_manager = getattr(session, "notes", None) + + if notes_manager is not None: + notes = notes_manager.all() + else: + notes = StudyNote.objects.filter(session=session) + + try: + notes = notes.order_by("created_at", "id") + except FieldError: + # Older Sprint 2 builds may not include created_at on StudyNote. + notes = notes.order_by("id") + + content_values = [] + for note in notes: + content = getattr(note, "content", "") + if content: + content_values.append(content.strip()) + + return "\n\n".join(value for value in content_values if value) + + +def analyse_note_text(text: str) -> dict[str, object]: + """Run the deterministic NLP pipeline over source note text. + + Args: + text: Combined note content. + + Returns: + Dictionary containing summary, keywords, confidence, explanation, + and source hash. + """ + keywords = extract_keywords(text) + summary = summarise_text(text) + confidence = score_confidence(text, keywords, summary) + explanation = build_explanation(text, keywords, confidence) + + return { + "summary": summary, + "keywords": keywords, + "confidence": confidence, + "explanation": explanation, + "source_hash": source_text_hash(text), + } + + +@transaction.atomic +def generate_insight_for_session( + *, + session: object, + requested_by: object, +) -> InsightGenerationResult: + """Generate or reuse a persisted insight for a study session. + + Args: + session: Study session to analyse. + requested_by: Authenticated user requesting the insight. + + Returns: + InsightGenerationResult with the insight and creation flag. + + Raises: + PermissionDenied: If the requesting user does not own the session. + """ + session_owner = getattr(session, "owner", None) + + if session_owner != requested_by: + raise PermissionDenied("You can only generate insights for your own sessions.") + + note_text = get_session_note_text(session) + payload = analyse_note_text(note_text) + + insight, created = StudyInsight.objects.get_or_create( + session=session, + source_hash=payload["source_hash"], + defaults={ + "summary": payload["summary"], + "keywords": payload["keywords"], + "confidence": payload["confidence"], + "explanation": payload["explanation"], + }, + ) + + return InsightGenerationResult(insight=insight, created=created) diff --git a/apps/insights/tests/test_architecture_boundaries.py b/apps/insights/tests/test_architecture_boundaries.py new file mode 100644 index 0000000..0290f7f --- /dev/null +++ b/apps/insights/tests/test_architecture_boundaries.py @@ -0,0 +1,103 @@ +"""Architecture boundary tests for the insights workflow.""" + +from __future__ import annotations + +import ast +from pathlib import Path + +PROJECT_ROOT = Path(__file__).resolve().parents[3] +APPS_ROOT = PROJECT_ROOT / "apps" + +FORBIDDEN_NLP_MODULE_PREFIX = "apps.insights.nlp" +FORBIDDEN_NLP_IMPORT_NAMES = { + "LOW_INFORMATION_SUMMARY", + "build_explanation", + "confidence_label", + "extract_keywords", + "meaningful_tokens", + "normalise_text", + "score_confidence", + "source_text_hash", + "split_paragraphs", + "split_sentences", + "summarise_text", +} + + +def find_view_nlp_import_violations( + *, + project_root: Path, + apps_root: Path, +) -> list[str]: + """Return view imports that cross the insights NLP boundary.""" + violations: list[str] = [] + + for view_path in sorted(apps_root.glob("**/views.py")): + tree = ast.parse(view_path.read_text(encoding="utf-8"), filename=str(view_path)) + + for node in ast.walk(tree): + if isinstance(node, ast.Import): + violations.extend( + f"{view_path.relative_to(project_root)}:{node.lineno} imports " + f"{alias.name}" + for alias in node.names + if alias.name == FORBIDDEN_NLP_MODULE_PREFIX + or alias.name.startswith(f"{FORBIDDEN_NLP_MODULE_PREFIX}.") + ) + + if isinstance(node, ast.ImportFrom): + module = node.module or "" + + if module == FORBIDDEN_NLP_MODULE_PREFIX or module.startswith( + f"{FORBIDDEN_NLP_MODULE_PREFIX}." + ): + violations.append( + f"{view_path.relative_to(project_root)}:{node.lineno} imports " + f"from {module}" + ) + + violations.extend( + f"{view_path.relative_to(project_root)}:{node.lineno} imports " + f"NLP helper {alias.name}" + for alias in node.names + if alias.name in FORBIDDEN_NLP_IMPORT_NAMES + ) + + return violations + + +def test_detects_views_that_import_insights_nlp_helpers(tmp_path: Path) -> None: + """Boundary violations should report the view file and forbidden import.""" + view_path = tmp_path / "apps" / "example" / "views.py" + view_path.parent.mkdir(parents=True) + view_path.write_text( + "\n".join( + [ + "import apps.insights.nlp.summarisation", + "from apps.insights.nlp.keyword_extraction import extract_keywords", + ] + ), + encoding="utf-8", + ) + + violations = find_view_nlp_import_violations( + project_root=tmp_path, + apps_root=tmp_path / "apps", + ) + + assert violations == [ + "apps/example/views.py:1 imports apps.insights.nlp.summarisation", + "apps/example/views.py:2 imports from apps.insights.nlp.keyword_extraction", + "apps/example/views.py:2 imports NLP helper extract_keywords", + ] + + +def test_views_do_not_import_insights_nlp_helpers() -> None: + """Views should call insight services instead of importing NLP internals.""" + assert ( + find_view_nlp_import_violations( + project_root=PROJECT_ROOT, + apps_root=APPS_ROOT, + ) + == [] + ) diff --git a/apps/insights/tests/test_insight_generation.py b/apps/insights/tests/test_insight_generation.py new file mode 100644 index 0000000..7d3ba15 --- /dev/null +++ b/apps/insights/tests/test_insight_generation.py @@ -0,0 +1,201 @@ +"""Integration tests for insight generation and persistence.""" + +from __future__ import annotations + +import pytest +from django.core.exceptions import FieldError, PermissionDenied + +from apps.insights.models import StudyInsight +from apps.insights.selectors import get_latest_session_insight, get_user_insights +from apps.insights.services import generate_insight_for_session, get_session_note_text +from apps.sessions.factories import StudyNoteFactory, StudySessionFactory +from apps.users.factories import CustomUserFactory + +pytestmark = pytest.mark.django_db + + +class LegacyNoteCollection: + """Minimal note collection that simulates an older note schema.""" + + def __init__(self) -> None: + self.notes = [ + type("LegacyNote", (), {"content": " First note. "})(), + type("LegacyNote", (), {"content": ""})(), + type("LegacyNote", (), {"content": "Second note."})(), + ] + + def order_by(self, *fields: str) -> LegacyNoteCollection: + """Raise for created_at ordering and allow id-only fallback ordering.""" + if fields == ("created_at", "id"): + raise FieldError("created_at is not available") + + assert fields == ("id",) + return self + + def __iter__(self): + """Return note instances for service aggregation.""" + return iter(self.notes) + + +def test_generate_insight_persists_analysis_result() -> None: + """Generating an insight should persist summary, keywords, and confidence.""" + session = StudySessionFactory() + StudyNoteFactory( + session=session, + content=( + "Django testing improves confidence. " + "Django views, forms, and database tests protect the workflow." + ), + ) + + result = generate_insight_for_session( + session=session, + requested_by=session.owner, + ) + + assert result.created is True + assert StudyInsight.objects.count() == 1 + assert result.insight.session == session + assert result.insight.session.owner == session.owner + assert result.insight.summary + assert "django" in result.insight.keywords + assert result.insight.confidence > 0 + assert result.insight.explanation + + +def test_get_session_note_text_falls_back_for_legacy_note_queries(monkeypatch) -> None: + """Note text aggregation should handle sessions without a notes manager.""" + note_collection = LegacyNoteCollection() + + class LegacyStudyNote: + """Minimal StudyNote replacement for fallback query coverage.""" + + class objects: + """Manager replacement used by get_session_note_text.""" + + @staticmethod + def filter(*, session: object) -> LegacyNoteCollection: + assert session == legacy_session + return note_collection + + legacy_session = type("LegacySession", (), {})() + monkeypatch.setattr("apps.insights.services.StudyNote", LegacyStudyNote) + + assert get_session_note_text(legacy_session) == "First note.\n\nSecond note." + + +def test_generate_insight_uses_source_hash() -> None: + """Persisted insights should include a stable source hash.""" + session = StudySessionFactory() + StudyNoteFactory(session=session, content="PostgreSQL persistence matters.") + + result = generate_insight_for_session( + session=session, + requested_by=session.owner, + ) + + assert len(result.insight.source_hash) == 64 + + +def test_generate_insight_reuses_existing_result_for_unchanged_notes() -> None: + """Running generation twice against unchanged notes should be idempotent.""" + session = StudySessionFactory() + StudyNoteFactory(session=session, content="Testing testing database workflow.") + + first = generate_insight_for_session( + session=session, + requested_by=session.owner, + ) + second = generate_insight_for_session( + session=session, + requested_by=session.owner, + ) + + assert first.created is True + assert second.created is False + assert first.insight.pk == second.insight.pk + assert StudyInsight.objects.count() == 1 + + +def test_generate_insight_creates_new_result_when_notes_change() -> None: + """Changing source notes should produce a new source hash and insight.""" + session = StudySessionFactory() + StudyNoteFactory(session=session, content="Initial Django notes.") + + first = generate_insight_for_session( + session=session, + requested_by=session.owner, + ) + + StudyNoteFactory(session=session, content="Additional pytest database notes.") + + second = generate_insight_for_session( + session=session, + requested_by=session.owner, + ) + + assert first.insight.source_hash != second.insight.source_hash + assert StudyInsight.objects.count() == 2 + + +def test_generate_insight_handles_session_without_notes() -> None: + """A session without notes should still create an honest low-confidence result.""" + session = StudySessionFactory() + + result = generate_insight_for_session( + session=session, + requested_by=session.owner, + ) + + assert result.insight.confidence == 0 + assert "not enough study note content" in result.insight.summary.lower() + + +def test_generate_insight_rejects_sessions_owned_by_other_users() -> None: + """Users should only generate insights for their own study sessions.""" + session = StudySessionFactory() + other_user = CustomUserFactory() + + with pytest.raises(PermissionDenied): + generate_insight_for_session( + session=session, + requested_by=other_user, + ) + + assert StudyInsight.objects.count() == 0 + + +def test_insight_selectors_scope_results_through_session_owner() -> None: + """Insight reads should inherit ownership from the parent study session.""" + user = CustomUserFactory() + other_user = CustomUserFactory() + user_session = StudySessionFactory(owner=user) + other_session = StudySessionFactory(owner=other_user) + StudyNoteFactory(session=user_session, content="Django selectors protect owners.") + StudyNoteFactory(session=other_session, content="Private notes belong elsewhere.") + + user_result = generate_insight_for_session( + session=user_session, + requested_by=user, + ) + other_result = generate_insight_for_session( + session=other_session, + requested_by=other_user, + ) + + assert list(get_user_insights(user)) == [user_result.insight] + assert ( + get_latest_session_insight( + session=user_session, + user=user, + ) + == user_result.insight + ) + assert ( + get_latest_session_insight( + session=other_session, + user=user, + ) + is None + ) + assert other_result.insight not in get_user_insights(user) diff --git a/apps/insights/tests/test_models.py b/apps/insights/tests/test_models.py index e2c7c64..60d0714 100644 --- a/apps/insights/tests/test_models.py +++ b/apps/insights/tests/test_models.py @@ -54,6 +54,13 @@ def test_study_insight_rejects_invalid_keyword_shape() -> None: insight.full_clean() +def test_study_insight_allows_empty_keyword_list() -> None: + """Low-information insights may have no detected keywords.""" + insight = StudyInsightFactory(keywords=[]) + + assert insight.keywords == [] + + def test_study_insight_rejects_non_string_keywords() -> None: """Keyword lists cannot contain non-string values.""" insight = StudyInsightFactory.build(keywords=["django", 3])