-
-
Notifications
You must be signed in to change notification settings - Fork 4.6k
fix(eventstream): Use an arroyo producer for eventstream #104763
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
evanh
wants to merge
9
commits into
master
Choose a base branch
from
evanh/fix/evenstream-kafka-producer
base: master
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
Show all changes
9 commits
Select commit
Hold shift + click to select a range
f945618
fix(eventstream): Use an arroyo producer for eventstream
evanh da4f2e5
just use arroyo producer
evanh 6ed0cab
fix some usage bugs
evanh 043f1e1
fix tests/bugs
evanh 3529da8
fixes
evanh f857748
fix type bug
evanh ded3d09
Merge branch 'master' into evanh/fix/evenstream-kafka-producer
evanh 05d896e
type fixing
evanh 7889ef8
fix future
evanh File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Some comments aren't visible on the classic Files Changed page.
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3,13 +3,14 @@ | |
| import logging | ||
| import time | ||
| from collections.abc import Mapping, MutableMapping, Sequence | ||
| from concurrent.futures import Future | ||
| from datetime import datetime | ||
| from typing import TYPE_CHECKING, Any | ||
| from typing import TYPE_CHECKING, Any, cast | ||
|
|
||
| from arroyo.backends.kafka import build_kafka_producer_configuration | ||
| from arroyo.backends.kafka import KafkaPayload, KafkaProducer | ||
| from arroyo.types import BrokerValue | ||
| from arroyo.types import Topic as ArroyoTopic | ||
| from confluent_kafka import KafkaError | ||
| from confluent_kafka import Message as KafkaMessage | ||
| from confluent_kafka import Producer | ||
| from sentry_kafka_schemas.codecs import Codec | ||
| from sentry_protos.snuba.v1.trace_item_pb2 import TraceItem | ||
|
|
||
|
|
@@ -20,8 +21,8 @@ | |
| from sentry.eventstream.types import EventStreamEventType | ||
| from sentry.killswitches import killswitch_matches_context | ||
| from sentry.utils import json | ||
| from sentry.utils.confluent_producer import get_confluent_producer | ||
| from sentry.utils.kafka_config import get_kafka_producer_cluster_options, get_topic_definition | ||
| from sentry.utils.arroyo_producer import get_arroyo_producer | ||
| from sentry.utils.kafka_config import get_topic_definition | ||
|
|
||
| EAP_ITEMS_CODEC: Codec[TraceItem] = get_topic_codec(Topic.SNUBA_ITEMS) | ||
|
|
||
|
|
@@ -37,30 +38,31 @@ def __init__(self, **options: Any) -> None: | |
| self.topic = Topic.EVENTS | ||
| self.transactions_topic = Topic.TRANSACTIONS | ||
| self.issue_platform_topic = Topic.EVENTSTREAM_GENERIC | ||
| self.__producers: MutableMapping[Topic, Producer] = {} | ||
| self.__producers: MutableMapping[Topic, KafkaProducer] = {} | ||
| self.error_last_logged_time: int | None = None | ||
|
|
||
| def get_transactions_topic(self, project_id: int) -> Topic: | ||
| return self.transactions_topic | ||
|
|
||
| def get_producer(self, topic: Topic) -> Producer: | ||
| def get_producer(self, topic: Topic) -> KafkaProducer: | ||
| if topic not in self.__producers: | ||
| cluster_name = get_topic_definition(topic)["cluster"] | ||
| cluster_options = get_kafka_producer_cluster_options(cluster_name) | ||
| cluster_options["client.id"] = "sentry.eventstream.kafka" | ||
| # XXX(markus): We should use `sentry.utils.arroyo_producer.get_arroyo_producer`. | ||
| self.__producers[topic] = get_confluent_producer( | ||
| build_kafka_producer_configuration(default_config=cluster_options) | ||
| self.__producers[topic] = get_arroyo_producer( | ||
| name="sentry.eventstream.kafka", | ||
| topic=topic, | ||
| use_simple_futures=False, | ||
| ) | ||
evanh marked this conversation as resolved.
Show resolved
Hide resolved
evanh marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| return self.__producers[topic] | ||
evanh marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| def delivery_callback(self, error: KafkaError | None, message: KafkaMessage) -> None: | ||
| def delivery_callback(self, error: KafkaError | None) -> None: | ||
| now = int(time.time()) | ||
| if error is not None: | ||
| if self.error_last_logged_time is None or now > self.error_last_logged_time + 60: | ||
| self.error_last_logged_time = now | ||
| logger.error("Could not publish message (error: %s): %r", error, message) | ||
| logger.error( | ||
| "Could not publish message (error: %s)", | ||
| error, | ||
| ) | ||
|
|
||
| def _get_headers_for_insert( | ||
| self, | ||
|
|
@@ -189,48 +191,41 @@ def _send( | |
|
|
||
| producer = self.get_producer(topic) | ||
|
|
||
| # Polling the producer is required to ensure callbacks are fired. This | ||
| # means that the latency between a message being delivered (or failing | ||
| # to be delivered) and the corresponding callback being fired is | ||
| # roughly the same as the duration of time that passes between publish | ||
| # calls. If this ends up being too high, the publisher should be moved | ||
| # into a background thread that can poll more frequently without | ||
| # interfering with request handling. (This does `poll` does not act as | ||
| # a heartbeat for the purposes of any sort of session expiration.) | ||
| # Note that this call to poll() is *only* dealing with earlier | ||
| # asynchronous produce() calls from the same process. | ||
| producer.poll(0.0) | ||
|
|
||
| assert isinstance(extra_data, tuple) | ||
|
|
||
| real_topic = get_topic_definition(topic)["real_topic_name"] | ||
|
|
||
| try: | ||
| producer.produce( | ||
| topic=real_topic, | ||
| key=str(project_id).encode("utf-8") if not skip_semantic_partitioning else None, | ||
| value=json.dumps((self.EVENT_PROTOCOL_VERSION, _type) + extra_data), | ||
| on_delivery=self.delivery_callback, | ||
| headers=[(k, v.encode("utf-8")) for k, v in headers.items()], | ||
| produce_future = producer.produce( | ||
| destination=ArroyoTopic(real_topic), | ||
| payload=KafkaPayload( | ||
| key=str(project_id).encode("utf-8") if not skip_semantic_partitioning else None, | ||
| value=json.dumps((self.EVENT_PROTOCOL_VERSION, _type) + extra_data).encode( | ||
| "utf-8" | ||
| ), | ||
| headers=[(k, v.encode("utf-8")) for k, v in headers.items()], | ||
| ), | ||
| ) | ||
| # Since use_simple_futures=False, we know this is a Future | ||
| cast(Future[BrokerValue[KafkaPayload]], produce_future).add_done_callback( | ||
| lambda future: self.delivery_callback(future.exception()) | ||
| ) | ||
evanh marked this conversation as resolved.
Show resolved
Hide resolved
evanh marked this conversation as resolved.
Show resolved
Hide resolved
evanh marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| except Exception as error: | ||
| logger.exception("Could not publish message: %s", error) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Wrt the |
||
| return | ||
evanh marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| if not asynchronous: | ||
| # flush() is a convenience method that calls poll() until len() is zero | ||
| producer.flush() | ||
|
|
||
| def requires_post_process_forwarder(self) -> bool: | ||
| return True | ||
|
|
||
| def _send_item(self, trace_item: TraceItem) -> None: | ||
| producer = self.get_producer(Topic.SNUBA_ITEMS) | ||
| real_topic = get_topic_definition(Topic.SNUBA_ITEMS)["real_topic_name"] | ||
| try: | ||
| producer.produce( | ||
| topic=real_topic, | ||
| value=EAP_ITEMS_CODEC.encode(trace_item), | ||
| _ = producer.produce( | ||
| destination=ArroyoTopic(real_topic), | ||
| payload=KafkaPayload( | ||
| key=None, value=EAP_ITEMS_CODEC.encode(trace_item), headers=[] | ||
| ), | ||
| ) | ||
| except Exception as error: | ||
| logger.exception("Could not publish trace items: %s", error) | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.