Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 44 additions & 7 deletions src/sentry/preprod/api/endpoints/builds.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,24 +28,30 @@
from sentry.preprod.models import PreprodArtifact

ERR_FEATURE_REQUIRED = "Feature {} is not enabled for the organization."
ERR_BAD_KEY = "Key {} is unknown."

search_config = SearchConfig.create_from(
SearchConfig(),
# Text keys we allow operators to be used on
# text_operator_keys={"app_id"},
# Keys that support numeric comparisons
# numeric_keys={"state", "pr_number"},
numeric_keys={"download_count", "build_number", "download_size", "install_size"},
# Keys that support date filtering
# date_keys={"date_built", "date_added"},
# Key mappings for user-friendly names
key_mappings={
"app_id": ["app_id", "package_name", "bundle_id"],
"app_id": ["package_name", "bundle_id"],
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we not need to map app_id since it already app_id ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep!

},
# Allowed search keys
allowed_keys={
"app_id",
"package_name",
"bundle_id",
"download_count",
"build_version",
"build_number",
"download_size",
"install_size",
},
# Enable boolean operators
# allow_boolean=True,
Expand All @@ -56,17 +62,27 @@
)


def get_field_type(key: str) -> str | None:
match key:
case "download_size":
return "byte"
case "install_size":
return "byte"
case _:
return None


def apply_filters(
queryset: BaseQuerySet[PreprodArtifact], filters: Sequence[QueryToken]
) -> BaseQuerySet[PreprodArtifact]:
for token in filters:
# Skip operators and other non-filter types
if isinstance(token, str): # Handles "AND", "OR" literals
raise InvalidSearchQuery(f"Boolean operators are not supported: {token}")
if isinstance(token, AggregateFilter):
raise InvalidSearchQuery("Aggregate filters are not supported")
if isinstance(token, ParenExpression):
raise InvalidSearchQuery("Parenthetical expressions are not supported")
if isinstance(token, AggregateFilter):
raise InvalidSearchQuery("Aggregate filters are not supported")

assert isinstance(token, SearchFilter)

Expand All @@ -80,9 +96,17 @@ def apply_filters(
# since allow_boolean is not set in SearchConfig.
d = {}
if token.is_in_filter:
d[f"{token.key.name}__in"] = token.value.value
d[f"{name}__in"] = token.value.value
elif token.operator == ">":
d[f"{name}__gt"] = token.value.value
elif token.operator == "<":
d[f"{name}__lt"] = token.value.value
elif token.operator == ">=":
d[f"{name}__gte"] = token.value.value
elif token.operator == "<=":
d[f"{name}__lte"] = token.value.value
else:
d[token.key.name] = token.value.value
d[name] = token.value.value

q = Q(**d)
if token.is_negation:
Expand Down Expand Up @@ -135,9 +159,14 @@ def get(self, request: Request, organization: Organization) -> Response:
if end:
queryset = queryset.filter(date_added__lte=end)

queryset = queryset.annotate_download_count() # type: ignore[attr-defined]
queryset = queryset.annotate_main_size_metrics()

query = request.GET.get("query", "").strip()
try:
search_filters = parse_search_query(query, config=search_config)
search_filters = parse_search_query(
query, config=search_config, get_field_type=get_field_type
)
queryset = apply_filters(queryset, search_filters)
except InvalidSearchQuery as e:
# CodeQL complains about str(e) below but ~all handlers
Expand All @@ -163,6 +192,12 @@ def get(self, request: Request, organization: Organization, key: str) -> Respons
status=403,
)

if key not in search_config.allowed_keys:
return Response(
{"detail": ERR_BAD_KEY.format(key)},
status=400,
)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tag values endpoint allows annotation-based keys that fail

Medium Severity

The BuildTagKeyValuesEndpoint validates keys against search_config.allowed_keys, which now includes download_count, download_size, and install_size. These are annotations created by annotate_download_count() and annotate_main_size_metrics(), but this endpoint doesn't add those annotations before querying. When users request tag values for these keys, the query will fail because queryset.values(db_key) references fields that don't exist on the model without the annotations.

Fix in Cursor Fix in Web

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed

match key:
case "bundle_id":
db_key = "app_id"
Expand Down Expand Up @@ -214,6 +249,8 @@ def row_to_tag_value(row: dict[str, Any]) -> dict[str, Any]:

queryset = queryset.values(db_key)
queryset = queryset.exclude(**{f"{db_key}__isnull": True})
queryset = queryset.annotate_download_count()
queryset = queryset.annotate_main_size_metrics()
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: The annotate_main_size_metrics() call occurs after the queryset is grouped with .values(db_key), causing the Subquery with OuterRef("pk") to fail as the primary key is unavailable.
Severity: HIGH

🔍 Detailed Analysis

In BuildTagKeyValuesEndpoint.get(), the queryset is grouped by a specific key using .values(db_key) before annotate_main_size_metrics() is called. This annotation method uses a Subquery with OuterRef("pk") to fetch size metrics. However, after the grouping operation, the primary key of individual artifacts is no longer present in the queryset's context. As a result, when the API is queried for install_size or download_size tag values, the subquery will fail to resolve, leading to NULL or incorrect data being returned for those fields.

💡 Suggested Fix

Move the call to annotate_main_size_metrics() to before the queryset is grouped by .values(db_key). Alternatively, refactor the annotation to use aggregate functions that are compatible with grouped querysets, similar to how annotate_download_count() is implemented.

🤖 Prompt for AI Agent
Review the code at the location below. A potential bug has been identified by an AI
agent.
Verify if this is a real issue. If it is, propose a fix; if not, explain why it's not
valid.

Location: src/sentry/preprod/api/endpoints/builds.py#L253

Potential issue: In `BuildTagKeyValuesEndpoint.get()`, the queryset is grouped by a
specific key using `.values(db_key)` before `annotate_main_size_metrics()` is called.
This annotation method uses a `Subquery` with `OuterRef("pk")` to fetch size metrics.
However, after the grouping operation, the primary key of individual artifacts is no
longer present in the queryset's context. As a result, when the API is queried for
`install_size` or `download_size` tag values, the subquery will fail to resolve, leading
to `NULL` or incorrect data being returned for those fields.

Did we get this right? 👍 / 👎 to inform future reviews.
Reference ID: 8404362

queryset = queryset.annotate(
count=Count("*"), first_seen=Min("date_added"), last_seen=Max("date_added")
)
Expand Down
16 changes: 15 additions & 1 deletion src/sentry/preprod/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

import sentry_sdk
from django.db import models
from django.db.models import IntegerField, Sum
from django.db.models import IntegerField, OuterRef, Subquery, Sum
from django.db.models.functions import Coalesce

from sentry.backup.scopes import RelocationScope
Expand Down Expand Up @@ -35,6 +35,20 @@ def annotate_download_count(self) -> Self:
)
)

def annotate_main_size_metrics(self) -> Self:
# Import here to avoid circular import since PreprodArtifactSizeMetrics
# is defined later in this file
from sentry.preprod.models import PreprodArtifactSizeMetrics

main_metrics = PreprodArtifactSizeMetrics.objects.filter(
preprod_artifact=OuterRef("pk"),
metrics_artifact_type=PreprodArtifactSizeMetrics.MetricsArtifactType.MAIN_ARTIFACT,
)
return self.annotate(
install_size=Subquery(main_metrics.values("max_install_size")[:1]),
download_size=Subquery(main_metrics.values("max_download_size")[:1]),
)
Comment on lines +40 to +50

This comment was marked as outdated.



class PreprodArtifactModelManager(BaseManager["PreprodArtifact"]):
def get_queryset(self) -> PreprodArtifactQuerySet:
Expand Down
84 changes: 84 additions & 0 deletions tests/sentry/preprod/api/endpoints/test_builds.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,90 @@ def test_query_bundle_id_is_app_id_alias(self) -> None:
assert len(response.json()) == 1
assert response.json()[0]["app_info"]["app_id"] == "foo"

@with_feature("organizations:preprod-frontend-routes")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there are only tests for download count, not the other routes, but i assume they all work the same?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added one for install size which tests the size bit. It's true it might be good to test all of the fields at least once, I'll fix in a follow up.

def test_download_count_for_installable_artifact(self) -> None:
# Create an installable artifact (has both installable_app_file_id and build_number)
artifact = self.create_preprod_artifact(
installable_app_file_id=12345,
build_number=100,
)
# Create InstallablePreprodArtifact records with download counts
self.create_installable_preprod_artifact(artifact, download_count=5)
self.create_installable_preprod_artifact(artifact, download_count=10)

response = self._request({})
assert response.status_code == 200
data = response.json()
assert len(data) == 1
# Download count should be the sum of all InstallablePreprodArtifact records
assert data[0]["distribution_info"]["download_count"] == 15
assert data[0]["distribution_info"]["is_installable"] is True

@with_feature("organizations:preprod-frontend-routes")
def test_download_count_zero_for_non_installable_artifact(self) -> None:
# Create a non-installable artifact (no installable_app_file_id)
self.create_preprod_artifact()

response = self._request({})
assert response.status_code == 200
data = response.json()
assert len(data) == 1
assert data[0]["distribution_info"]["download_count"] == 0
assert data[0]["distribution_info"]["is_installable"] is False

@with_feature("organizations:preprod-frontend-routes")
def test_download_count_multiple_artifacts(self) -> None:
# Create multiple installable artifacts with different download counts
artifact1 = self.create_preprod_artifact(
app_id="com.app.one",
installable_app_file_id=11111,
build_number=1,
)
self.create_installable_preprod_artifact(artifact1, download_count=100)

artifact2 = self.create_preprod_artifact(
app_id="com.app.two",
installable_app_file_id=22222,
build_number=2,
)
self.create_installable_preprod_artifact(artifact2, download_count=50)
self.create_installable_preprod_artifact(artifact2, download_count=25)

response = self._request({})
assert response.status_code == 200
data = response.json()
assert len(data) == 2

# Results are ordered by date_added descending, so artifact2 comes first
app_two = next(b for b in data if b["app_info"]["app_id"] == "com.app.two")
app_one = next(b for b in data if b["app_info"]["app_id"] == "com.app.one")

assert app_one["distribution_info"]["download_count"] == 100
assert app_two["distribution_info"]["download_count"] == 75

@with_feature("organizations:preprod-frontend-routes")
def test_query_install_size(self) -> None:
# Create artifacts with different install sizes via size metrics
small_artifact = self.create_preprod_artifact(app_id="small.app")
self.create_preprod_artifact_size_metrics(small_artifact, max_install_size=1000000) # 1 MB

large_artifact = self.create_preprod_artifact(app_id="large.app")
self.create_preprod_artifact_size_metrics(large_artifact, max_install_size=5000000) # 5 MB

# Filter for artifacts with install_size > 2 MB
response = self._request({"query": "install_size:>2000000"})
assert response.status_code == 200
data = response.json()
assert len(data) == 1
assert data[0]["app_info"]["app_id"] == "large.app"

# Filter for artifacts with install_size < 2 MB
response = self._request({"query": "install_size:<2000000"})
assert response.status_code == 200
data = response.json()
assert len(data) == 1
assert data[0]["app_info"]["app_id"] == "small.app"


class BuildTagKeyValuesEndpointTest(APITestCase):

Expand Down
Loading