Skip to content
Open
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 22 additions & 28 deletions src/sentry/eventstream/kafka/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
from datetime import datetime
from typing import TYPE_CHECKING, Any

from arroyo.backends.kafka import build_kafka_producer_configuration
from arroyo.backends.kafka import KafkaPayload
from arroyo.types import Topic as ArroyoTopic
from confluent_kafka import KafkaError
from confluent_kafka import Message as KafkaMessage
from confluent_kafka import Producer
Expand All @@ -20,8 +21,8 @@
from sentry.eventstream.types import EventStreamEventType
from sentry.killswitches import killswitch_matches_context
from sentry.utils import json
from sentry.utils.confluent_producer import get_confluent_producer
from sentry.utils.kafka_config import get_kafka_producer_cluster_options, get_topic_definition
from sentry.utils.arroyo_producer import get_arroyo_producer
from sentry.utils.kafka_config import get_topic_definition

EAP_ITEMS_CODEC: Codec[TraceItem] = get_topic_codec(Topic.SNUBA_ITEMS)

Expand All @@ -45,12 +46,9 @@ def get_transactions_topic(self, project_id: int) -> Topic:

def get_producer(self, topic: Topic) -> Producer:
if topic not in self.__producers:
cluster_name = get_topic_definition(topic)["cluster"]
cluster_options = get_kafka_producer_cluster_options(cluster_name)
cluster_options["client.id"] = "sentry.eventstream.kafka"
# XXX(markus): We should use `sentry.utils.arroyo_producer.get_arroyo_producer`.
self.__producers[topic] = get_confluent_producer(
build_kafka_producer_configuration(default_config=cluster_options)
self.__producers[topic] = get_arroyo_producer(
name="sentry.eventstream.kafka",
topic=topic,
)

return self.__producers[topic]
Expand Down Expand Up @@ -189,29 +187,25 @@ def _send(

producer = self.get_producer(topic)

# Polling the producer is required to ensure callbacks are fired. This
# means that the latency between a message being delivered (or failing
# to be delivered) and the corresponding callback being fired is
# roughly the same as the duration of time that passes between publish
# calls. If this ends up being too high, the publisher should be moved
# into a background thread that can poll more frequently without
# interfering with request handling. (This does `poll` does not act as
# a heartbeat for the purposes of any sort of session expiration.)
# Note that this call to poll() is *only* dealing with earlier
# asynchronous produce() calls from the same process.
producer.poll(0.0)

assert isinstance(extra_data, tuple)

real_topic = get_topic_definition(topic)["real_topic_name"]

try:
producer.produce(
topic=real_topic,
key=str(project_id).encode("utf-8") if not skip_semantic_partitioning else None,
value=json.dumps((self.EVENT_PROTOCOL_VERSION, _type) + extra_data),
on_delivery=self.delivery_callback,
headers=[(k, v.encode("utf-8")) for k, v in headers.items()],
ArroyoTopic(real_topic),
payload=KafkaPayload(
key=str(project_id).encode("utf-8") if not skip_semantic_partitioning else None,
value=json.dumps((self.EVENT_PROTOCOL_VERSION, _type) + extra_data).encode(
"utf-8"
),
headers=[(k, v.encode("utf-8")) for k, v in headers.items()],
),
).add_done_callback(
lambda future: self.delivery_callback(
future.exception() if future.exception() is not None else None,
future.result().message,
)
)
except Exception as error:
logger.exception("Could not publish message: %s", error)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wrt the if not asynchronous handling, I don't see a flush() method on the KafkaProducer. I think we want to do produce_future.result() to block until delivery completes instead.

Expand All @@ -229,8 +223,8 @@ def _send_item(self, trace_item: TraceItem) -> None:
real_topic = get_topic_definition(Topic.SNUBA_ITEMS)["real_topic_name"]
try:
producer.produce(
topic=real_topic,
value=EAP_ITEMS_CODEC.encode(trace_item),
ArroyoTopic(real_topic),
KafkaPayload(key=None, value=EAP_ITEMS_CODEC.encode(trace_item), headers=[]),
)
except Exception as error:
logger.exception("Could not publish trace items: %s", error)
Loading