diff --git a/tests/webapp/api/test_push_api.py b/tests/webapp/api/test_push_api.py index 3ebc4594f0f..856a68febed 100644 --- a/tests/webapp/api/test_push_api.py +++ b/tests/webapp/api/test_push_api.py @@ -1,6 +1,7 @@ import datetime import pytest +from django.contrib.postgres.search import SearchVector from django.urls import reverse from tests.conftest import IS_WINDOWS @@ -407,16 +408,20 @@ def test_push_search(client, test_repository): Commit.objects.create( push=push1, revision="1234abcd", author="kaz ", comments="Initial commit" ) + Commit.objects.create( push=push2, revision="2234abcd", author="foo ", comments="Bug 12345567 - fix" ) + Commit.objects.create( push=push3, revision="3234abcd", author="quxzan .com", comments="Bug 12345567 - Feature added", ) - + Commit.objects.update( + search_vector=SearchVector("revision", "author", "comments", config="english") + ) # Test search by comments resp = client.get( reverse("push-list", kwargs={"project": test_repository.name}) + "?search=bug" @@ -424,6 +429,7 @@ def test_push_search(client, test_repository): assert resp.status_code == 200 results = resp.json()["results"] + assert len(results) == 2 assert set([result["id"] for result in results]) == set([3, 2]) @@ -444,7 +450,7 @@ def test_push_search(client, test_repository): assert resp.status_code == 200 results = resp.json()["results"] - assert len(results) == 1 + assert len(results) == 2 assert results[0]["id"] == push2.id # Test search by revision diff --git a/treeherder/model/migrations/0041_update_search_vector.py b/treeherder/model/migrations/0041_update_search_vector.py new file mode 100644 index 00000000000..b499fe2a6f6 --- /dev/null +++ b/treeherder/model/migrations/0041_update_search_vector.py @@ -0,0 +1,39 @@ +# Generated by Django 5.1.5 on 2025-03-06 14:49 + +import django.contrib.postgres.indexes +import django.contrib.postgres.search +from django.contrib.postgres.search import SearchVector +from django.db import migrations +from django.db.models.functions import Substr + + +def update_search_vector(apps, schema_editor): + Commit = apps.get_model("model", "Commit") + Commit.objects.update( + search_vector=SearchVector("revision", "author", Substr("comments", 1, 100000), config="english") + ) + + +class Migration(migrations.Migration): + dependencies = [ + ("model", "0040_alter_textlogerror_unique_together_and_more"), + ] + + operations = [ + migrations.RemoveIndex( + model_name="commit", + name="search_vector_idx", + ), + migrations.AddField( + model_name="commit", + name="search_vector", + field=django.contrib.postgres.search.SearchVectorField(blank=True, null=True), + ), + migrations.AddIndex( + model_name="commit", + index=django.contrib.postgres.indexes.GinIndex( + fields=["search_vector"], name="search_vector_idx" + ), + ), + migrations.RunPython(update_search_vector), + ] diff --git a/treeherder/model/models.py b/treeherder/model/models.py index 117750eedfe..96a00a6b8c6 100644 --- a/treeherder/model/models.py +++ b/treeherder/model/models.py @@ -9,13 +9,12 @@ from django.conf import settings from django.contrib.auth.models import User from django.contrib.postgres.indexes import GinIndex -from django.contrib.postgres.search import SearchVector, TrigramSimilarity +from django.contrib.postgres.search import SearchVectorField, TrigramSimilarity from django.core.cache import cache from django.core.exceptions import ObjectDoesNotExist from django.core.validators import MinLengthValidator from django.db import models, transaction from django.db.models import Count, Max, Min, Q, Subquery -from django.db.models.functions import Substr from django.db.utils import ProgrammingError from django.forms import model_to_dict from django.utils import timezone @@ -187,15 +186,13 @@ class Commit(models.Model): revision = models.CharField(max_length=40, db_index=True) author = models.CharField(max_length=150) comments = models.TextField() + search_vector = SearchVectorField(null=True, blank=True) class Meta: db_table = "commit" unique_together = ("push", "revision") indexes = [ - GinIndex( - SearchVector("revision", "author", Substr("comments", 1, 100000), config="english"), - name="search_vector_idx", - ), + GinIndex(fields=["search_vector"], name="search_vector_idx"), ] def __str__(self): diff --git a/treeherder/webapp/api/push.py b/treeherder/webapp/api/push.py index 7d03f7d91bd..f773bbc7e73 100644 --- a/treeherder/webapp/api/push.py +++ b/treeherder/webapp/api/push.py @@ -3,8 +3,8 @@ import newrelic.agent from cache_memoize import cache_memoize -from django.contrib.postgres.search import SearchQuery, SearchVector -from django.db.models.functions import Substr +from django.contrib.postgres.search import SearchQuery +from django.db.models import Exists, OuterRef, Q from rest_framework import viewsets from rest_framework.decorators import action from rest_framework.response import Response @@ -74,22 +74,23 @@ def list(self, request, project): search_param = filter_params.get("search") if search_param: repository = Repository.objects.get(name=project) - filtered_commits = ( - Commit.objects.annotate( - search=SearchVector( - "revision", "author", Substr("comments", 1, 100000), config="english" - ) - ) + # Subquery to check if a commit exists with the search term + commit_exists_subquery = Commit.objects.filter( + push_id=OuterRef("id"), search_vector=SearchQuery(search_param) + ).values("id") + pushes = ( + Push.objects.annotate(has_matching_commit=Exists(commit_exists_subquery)) .filter( - search=SearchQuery(search_param, config="english"), - push__repository=repository, + Q(repository=repository) + & ( + Q(has_matching_commit=True) + | Q(author__icontains=search_param) + | Q(revision__icontains=search_param) + ) ) - .values_list("push_id", flat=True) - # Get most recent results and limit result to 200 - .order_by("-push__time") - .distinct()[:200] - ) - pushes = pushes.filter(id__in=filtered_commits) + .distinct() + .order_by("-time")[:200] + ) # Get most recent results and limit result to 200 for param, value in meta.items(): if param == "fromchange": revision_field = "revision__startswith" if len(value) < 40 else "revision"