Skip to content

Commit 91c9106

Browse files
Add searchable field column to handle full text search (#8544)
* create migration for removing previously applied search vector, add search vector column and update existing fields * fix tests for search fields * squash migrations * remove replace from migration, print statement from test and add Substr to comments * Retrigger GitHub Actions * rebase and update migration * update query to search both push and commit models * update test script * Remove debug print statement from push API. --------- Co-authored-by: Sebastian Hengst <[email protected]>
1 parent 685b0c4 commit 91c9106

File tree

4 files changed

+67
-24
lines changed

4 files changed

+67
-24
lines changed

tests/webapp/api/test_push_api.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import datetime
22

33
import pytest
4+
from django.contrib.postgres.search import SearchVector
45
from django.urls import reverse
56

67
from tests.conftest import IS_WINDOWS
@@ -405,23 +406,28 @@ def test_push_search(client, test_repository):
405406
Commit.objects.create(
406407
push=push1, revision="1234abcd", author="kaz <[email protected]>", comments="Initial commit"
407408
)
409+
408410
Commit.objects.create(
409411
push=push2, revision="2234abcd", author="foo <[email protected]>", comments="Bug 12345567 - fix"
410412
)
413+
411414
Commit.objects.create(
412415
push=push3,
413416
revision="3234abcd",
414417
author="quxzan <qux@bar>.com",
415418
comments="Bug 12345567 - Feature added",
416419
)
417-
420+
Commit.objects.update(
421+
search_vector=SearchVector("revision", "author", "comments", config="english")
422+
)
418423
# Test search by comments
419424
resp = client.get(
420425
reverse("push-list", kwargs={"project": test_repository.name}) + "?search=bug"
421426
)
422427
assert resp.status_code == 200
423428

424429
results = resp.json()["results"]
430+
425431
assert len(results) == 2
426432
assert set([result["id"] for result in results]) == set([3, 2])
427433

@@ -442,7 +448,7 @@ def test_push_search(client, test_repository):
442448
assert resp.status_code == 200
443449

444450
results = resp.json()["results"]
445-
assert len(results) == 1
451+
assert len(results) == 2
446452
assert results[0]["id"] == push2.id
447453

448454
# Test search by revision
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# Generated by Django 5.1.5 on 2025-03-06 14:49
2+
3+
import django.contrib.postgres.indexes
4+
import django.contrib.postgres.search
5+
from django.contrib.postgres.search import SearchVector
6+
from django.db import migrations
7+
from django.db.models.functions import Substr
8+
9+
10+
def update_search_vector(apps, schema_editor):
11+
Commit = apps.get_model("model", "Commit")
12+
Commit.objects.update(
13+
search_vector=SearchVector("revision", "author", Substr("comments", 1, 100000), config="english")
14+
)
15+
16+
17+
class Migration(migrations.Migration):
18+
dependencies = [
19+
("model", "0040_alter_textlogerror_unique_together_and_more"),
20+
]
21+
22+
operations = [
23+
migrations.RemoveIndex(
24+
model_name="commit",
25+
name="search_vector_idx",
26+
),
27+
migrations.AddField(
28+
model_name="commit",
29+
name="search_vector",
30+
field=django.contrib.postgres.search.SearchVectorField(blank=True, null=True),
31+
),
32+
migrations.AddIndex(
33+
model_name="commit",
34+
index=django.contrib.postgres.indexes.GinIndex(
35+
fields=["search_vector"], name="search_vector_idx"
36+
),
37+
),
38+
migrations.RunPython(update_search_vector),
39+
]

treeherder/model/models.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,12 @@
99
from django.conf import settings
1010
from django.contrib.auth.models import User
1111
from django.contrib.postgres.indexes import GinIndex
12-
from django.contrib.postgres.search import SearchVector, TrigramSimilarity
12+
from django.contrib.postgres.search import SearchVectorField, TrigramSimilarity
1313
from django.core.cache import cache
1414
from django.core.exceptions import ObjectDoesNotExist
1515
from django.core.validators import MinLengthValidator
1616
from django.db import models, transaction
1717
from django.db.models import Count, Max, Min, Q, Subquery
18-
from django.db.models.functions import Substr
1918
from django.db.utils import ProgrammingError
2019
from django.forms import model_to_dict
2120
from django.utils import timezone
@@ -187,15 +186,13 @@ class Commit(models.Model):
187186
revision = models.CharField(max_length=40, db_index=True)
188187
author = models.CharField(max_length=150)
189188
comments = models.TextField()
189+
search_vector = SearchVectorField(null=True, blank=True)
190190

191191
class Meta:
192192
db_table = "commit"
193193
unique_together = ("push", "revision")
194194
indexes = [
195-
GinIndex(
196-
SearchVector("revision", "author", Substr("comments", 1, 100000), config="english"),
197-
name="search_vector_idx",
198-
),
195+
GinIndex(fields=["search_vector"], name="search_vector_idx"),
199196
]
200197

201198
def __str__(self):

treeherder/webapp/api/push.py

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33

44
import newrelic.agent
55
from cache_memoize import cache_memoize
6-
from django.contrib.postgres.search import SearchQuery, SearchVector
7-
from django.db.models.functions import Substr
6+
from django.contrib.postgres.search import SearchQuery
7+
from django.db.models import Exists, OuterRef, Q
88
from rest_framework import viewsets
99
from rest_framework.decorators import action
1010
from rest_framework.response import Response
@@ -74,22 +74,23 @@ def list(self, request, project):
7474
search_param = filter_params.get("search")
7575
if search_param:
7676
repository = Repository.objects.get(name=project)
77-
filtered_commits = (
78-
Commit.objects.annotate(
79-
search=SearchVector(
80-
"revision", "author", Substr("comments", 1, 100000), config="english"
81-
)
82-
)
77+
# Subquery to check if a commit exists with the search term
78+
commit_exists_subquery = Commit.objects.filter(
79+
push_id=OuterRef("id"), search_vector=SearchQuery(search_param)
80+
).values("id")
81+
pushes = (
82+
Push.objects.annotate(has_matching_commit=Exists(commit_exists_subquery))
8383
.filter(
84-
search=SearchQuery(search_param, config="english"),
85-
push__repository=repository,
84+
Q(repository=repository)
85+
& (
86+
Q(has_matching_commit=True)
87+
| Q(author__icontains=search_param)
88+
| Q(revision__icontains=search_param)
89+
)
8690
)
87-
.values_list("push_id", flat=True)
88-
# Get most recent results and limit result to 200
89-
.order_by("-push__time")
90-
.distinct()[:200]
91-
)
92-
pushes = pushes.filter(id__in=filtered_commits)
91+
.distinct()
92+
.order_by("-time")[:200]
93+
) # Get most recent results and limit result to 200
9394
for param, value in meta.items():
9495
if param == "fromchange":
9596
revision_field = "revision__startswith" if len(value) < 40 else "revision"

0 commit comments

Comments
 (0)