-
Notifications
You must be signed in to change notification settings - Fork 3
Automatic categorization (labelling) of reports based on asking LLM questions. #196
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 5 commits
1371f40
a5e8611
9ab56e0
34c1c41
7125830
578f9e7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,47 @@ | ||||||
| from django.contrib import admin | ||||||
|
|
||||||
| from .models import LabelBackfillJob, LabelChoice, LabelGroup, LabelQuestion, ReportLabel | ||||||
|
|
||||||
|
|
||||||
| class LabelChoiceInline(admin.TabularInline): | ||||||
| model = LabelChoice | ||||||
| extra = 0 | ||||||
|
|
||||||
|
|
||||||
| @admin.register(LabelGroup) | ||||||
| class LabelGroupAdmin(admin.ModelAdmin): | ||||||
| list_display = ("name", "is_active", "order") | ||||||
| list_filter = ("is_active",) | ||||||
| search_fields = ("name",) | ||||||
| ordering = ("order", "name") | ||||||
|
|
||||||
|
|
||||||
| @admin.register(LabelQuestion) | ||||||
| class LabelQuestionAdmin(admin.ModelAdmin): | ||||||
| list_display = ("label", "question", "group", "is_active", "order") | ||||||
| list_filter = ("group", "is_active") | ||||||
| search_fields = ("label", "question") | ||||||
| ordering = ("group__order", "order", "label") | ||||||
| inlines = (LabelChoiceInline,) | ||||||
|
|
||||||
|
|
||||||
| @admin.register(ReportLabel) | ||||||
| class ReportLabelAdmin(admin.ModelAdmin): | ||||||
| list_display = ("report", "question", "choice", "confidence", "verified", "created_at") | ||||||
| list_filter = ("verified", "question__group") | ||||||
| search_fields = ("report__document_id", "question__name", "choice__label") | ||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Bug:
Proposed fix- search_fields = ("report__document_id", "question__name", "choice__label")
+ search_fields = ("report__document_id", "question__label", "choice__label")📝 Committable suggestion
Suggested change
🤖 Prompt for AI Agents |
||||||
| ordering = ("-created_at",) | ||||||
|
|
||||||
|
|
||||||
| @admin.register(LabelBackfillJob) | ||||||
| class LabelBackfillJobAdmin(admin.ModelAdmin): | ||||||
| list_display = ( | ||||||
| "id", | ||||||
| "label_group", | ||||||
| "status", | ||||||
| "processed_reports", | ||||||
| "total_reports", | ||||||
| "created_at", | ||||||
| ) | ||||||
| list_filter = ("status",) | ||||||
| ordering = ("-created_at",) | ||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,42 @@ | ||
| from django.apps import AppConfig | ||
|
|
||
|
|
||
| class LabelsConfig(AppConfig): | ||
| name = "radis.labels" | ||
|
|
||
| def ready(self) -> None: | ||
| register_app() | ||
|
|
||
| from radis.reports.site import ( | ||
| ReportsCreatedHandler, | ||
| ReportsUpdatedHandler, | ||
| register_reports_created_handler, | ||
| register_reports_updated_handler, | ||
| ) | ||
|
|
||
| from . import signals # noqa: F401 | ||
| from .site import handle_reports_created, handle_reports_updated | ||
|
|
||
| register_reports_created_handler( | ||
| ReportsCreatedHandler( | ||
| name="Labels", | ||
| handle=handle_reports_created, | ||
| ) | ||
| ) | ||
| register_reports_updated_handler( | ||
| ReportsUpdatedHandler( | ||
| name="Labels", | ||
| handle=handle_reports_updated, | ||
| ) | ||
| ) | ||
|
|
||
|
|
||
| def register_app() -> None: | ||
| from adit_radis_shared.common.site import MainMenuItem, register_main_menu_item | ||
|
|
||
| register_main_menu_item( | ||
| MainMenuItem( | ||
| url_name="label_group_list", | ||
| label="Auto Labels", | ||
| ) | ||
| ) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| DEFAULT_LABEL_CHOICES = [ | ||
| {"value": "yes", "label": "Yes", "is_unknown": False, "order": 1}, | ||
| {"value": "no", "label": "No", "is_unknown": False, "order": 2}, | ||
| {"value": "cannot_decide", "label": "Cannot decide", "is_unknown": True, "order": 3}, | ||
| ] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,66 @@ | ||
| from crispy_forms.helper import FormHelper | ||
| from crispy_forms.layout import Column, Layout, Row | ||
| from django import forms | ||
|
|
||
| from .models import LabelGroup, LabelQuestion | ||
|
|
||
|
|
||
| class LabelGroupForm(forms.ModelForm): | ||
| class Meta: | ||
| model = LabelGroup | ||
| fields = [ | ||
| "name", | ||
| "description", | ||
| "is_active", | ||
| "order", | ||
| ] | ||
|
|
||
| def __init__(self, *args, **kwargs): | ||
| super().__init__(*args, **kwargs) | ||
|
|
||
| self.helper = FormHelper() | ||
| self.helper.form_tag = False | ||
| self.helper.layout = Layout( | ||
| Row( | ||
| Column("name", "description"), | ||
| Column("is_active", "order", css_class="col-3"), | ||
| ) | ||
| ) | ||
|
|
||
|
|
||
| class LabelQuestionForm(forms.ModelForm): | ||
| class Meta: | ||
| model = LabelQuestion | ||
| fields = [ | ||
| "label", | ||
| "question", | ||
| "is_active", | ||
| "order", | ||
| ] | ||
|
|
||
| def __init__(self, *args, **kwargs): | ||
| self.group = kwargs.pop("group", None) | ||
| super().__init__(*args, **kwargs) | ||
|
|
||
| self.helper = FormHelper() | ||
| self.helper.form_tag = False | ||
| self.helper.layout = Layout( | ||
| "label", | ||
| "question", | ||
| Row(Column("is_active"), Column("order", css_class="col-3")), | ||
| ) | ||
|
|
||
| self.fields["question"].required = False | ||
| self.fields["question"].help_text = "Optional. If left empty, the label is used." | ||
|
|
||
| def clean_label(self): | ||
| label = self.cleaned_data.get("label", "") | ||
| if not label or not self.group: | ||
| return label | ||
|
|
||
| existing = LabelQuestion.objects.filter(group=self.group, label__iexact=label) | ||
| if self.instance and self.instance.pk: | ||
| existing = existing.exclude(pk=self.instance.pk) | ||
| if existing.exists(): | ||
| raise forms.ValidationError("A question with this label already exists in this group.") | ||
| return label |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,103 @@ | ||
| from __future__ import annotations | ||
|
|
||
| from itertools import batched | ||
|
|
||
| from django.core.management.base import BaseCommand, CommandError | ||
| from django.utils import timezone | ||
|
|
||
| from radis.reports.models import Report | ||
|
|
||
| from ...models import LabelBackfillJob, LabelGroup | ||
| from ...tasks import process_label_group | ||
|
|
||
|
|
||
| class Command(BaseCommand): | ||
| help = "Enqueue labeling tasks for existing reports." | ||
|
|
||
| def add_arguments(self, parser): | ||
| parser.add_argument( | ||
| "--group", | ||
| dest="group", | ||
| help="Label group name or ID. If omitted, all active groups are used.", | ||
| ) | ||
| parser.add_argument( | ||
| "--batch-size", | ||
| dest="batch_size", | ||
| type=int, | ||
| default=None, | ||
| help="Override the task batch size.", | ||
| ) | ||
| parser.add_argument( | ||
| "--limit", | ||
| dest="limit", | ||
| type=int, | ||
| default=None, | ||
| help="Limit the number of reports to enqueue.", | ||
| ) | ||
|
|
||
| def handle(self, *args, **options): | ||
| group_value = options.get("group") | ||
| batch_size = options.get("batch_size") | ||
| limit = options.get("limit") | ||
|
|
||
| if group_value: | ||
| group = self._get_group(group_value) | ||
| groups = [group] | ||
| else: | ||
| groups = list(LabelGroup.objects.filter(is_active=True)) | ||
|
|
||
| if not groups: | ||
| self.stdout.write(self.style.WARNING("No active label groups found.")) | ||
| return | ||
|
|
||
| report_ids = Report.objects.order_by("id").values_list("id", flat=True) | ||
| if limit: | ||
| report_ids = report_ids[:limit] | ||
| report_ids = list(report_ids) | ||
|
|
||
| if not report_ids: | ||
| self.stdout.write(self.style.WARNING("No reports found.")) | ||
| return | ||
|
|
||
| if batch_size is None: | ||
| from django.conf import settings | ||
|
|
||
| batch_size = settings.LABELING_TASK_BATCH_SIZE | ||
|
|
||
| for group in groups: | ||
| backfill_job = LabelBackfillJob.objects.create( | ||
| label_group=group, | ||
| status=LabelBackfillJob.Status.IN_PROGRESS, | ||
| started_at=timezone.now(), | ||
| total_reports=len(report_ids), | ||
| ) | ||
|
|
||
| for report_batch in batched(report_ids, batch_size): | ||
| process_label_group.defer( | ||
| label_group_id=group.id, | ||
| report_ids=list(report_batch), | ||
| backfill_job_id=backfill_job.id, | ||
| ) | ||
|
|
||
| self.stdout.write( | ||
| self.style.SUCCESS( | ||
| f"Enqueued labeling for {len(report_ids)} reports " | ||
| f"in group '{group.name}' (backfill job #{backfill_job.id})." | ||
| ) | ||
| ) | ||
|
|
||
| def _get_group(self, value: str) -> LabelGroup: | ||
| if value.isdigit(): | ||
| group = LabelGroup.objects.filter(id=int(value)).first() | ||
| else: | ||
| matches = LabelGroup.objects.filter(name=value) | ||
| if matches.count() > 1: | ||
| raise CommandError( | ||
| f"Multiple label groups named '{value}' exist. Use the numeric ID." | ||
| ) | ||
| group = matches.first() | ||
|
|
||
| if not group: | ||
| raise CommandError(f"Label group '{value}' not found.") | ||
|
|
||
| return group |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The
search_fieldsforReportLabelAdminincludesquestion__name, but theLabelQuestionmodel does not have anamefield. This will raise aFieldErrorwhen using the search functionality in the Django admin for Report Labels. You should probably search on thelabelorquestionfield of theLabelQuestionmodel instead.