Skip to content

Add async task background worker #4591

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 15 commits into
base: srothh/worker-class-hierarchy
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
132 changes: 132 additions & 0 deletions sentry_sdk/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from abc import ABC, abstractmethod
import os
import threading
import asyncio

from time import sleep, time
from sentry_sdk._queue import Queue, FullError
Expand Down Expand Up @@ -186,3 +187,134 @@ def _target(self) -> None:
finally:
self._queue.task_done()
sleep(0)


class AsyncWorker(Worker):
def __init__(self, queue_size: int = DEFAULT_QUEUE_SIZE) -> None:
self._queue: Optional[asyncio.Queue[Any]] = None
self._queue_size = queue_size
self._task: Optional[asyncio.Task[None]] = None
# Event loop needs to remain in the same process
self._task_for_pid: Optional[int] = None
self._loop: Optional[asyncio.AbstractEventLoop] = None
# Track active callback tasks so they have a strong reference and can be cancelled on kill
self._active_tasks: set[asyncio.Task[None]] = set()

@property
def is_alive(self) -> bool:
if self._task_for_pid != os.getpid():
return False
if not self._task or not self._loop:
return False
return self._loop.is_running() and not self._task.done()

def kill(self) -> None:
if self._task:
if self._queue is not None:
try:
self._queue.put_nowait(_TERMINATOR)
except asyncio.QueueFull:
logger.debug("async worker queue full, kill failed")
# Also cancel any active callback tasks
# Avoid modifying the set while cancelling tasks
tasks_to_cancel = set(self._active_tasks)
for task in tasks_to_cancel:
task.cancel()
self._active_tasks.clear()
self._loop = None
self._task = None
self._task_for_pid = None

def start(self) -> None:
if not self.is_alive:
try:
self._loop = asyncio.get_running_loop()
if self._queue is None:
self._queue = asyncio.Queue(maxsize=self._queue_size)
self._task = self._loop.create_task(self._target())
self._task_for_pid = os.getpid()
except RuntimeError:
# There is no event loop running
logger.warning("No event loop running, async worker not started")
self._loop = None
self._task = None
self._task_for_pid = None

def full(self) -> bool:
if self._queue is None:
return True
return self._queue.full()

def _ensure_task(self) -> None:
if not self.is_alive:
self.start()

async def _wait_flush(self, timeout: float, callback: Optional[Any] = None) -> None:
if not self._loop or not self._loop.is_running() or self._queue is None:
return

initial_timeout = min(0.1, timeout)

# Timeout on the join
try:
await asyncio.wait_for(self._queue.join(), timeout=initial_timeout)
except asyncio.TimeoutError:
pending = self._queue.qsize() + len(self._active_tasks)
logger.debug("%d event(s) pending on flush", pending)
if callback is not None:
callback(pending, timeout)

try:
remaining_timeout = timeout - initial_timeout
await asyncio.wait_for(self._queue.join(), timeout=remaining_timeout)
except asyncio.TimeoutError:
pending = self._queue.qsize() + len(self._active_tasks)
logger.error("flush timed out, dropped %s events", pending)

def flush(self, timeout: float, callback: Optional[Any] = None) -> Optional[asyncio.Task[None]]: # type: ignore[override]
if self.is_alive and timeout > 0.0 and self._loop and self._loop.is_running():
return self._loop.create_task(self._wait_flush(timeout, callback))
return None

def submit(self, callback: Callable[[], Any]) -> bool:
self._ensure_task()
if self._queue is None:
return False
try:
self._queue.put_nowait(callback)
return True
except asyncio.QueueFull:
return False

async def _target(self) -> None:
if self._queue is None:
return
while True:
callback = await self._queue.get()
if callback is _TERMINATOR:
self._queue.task_done()
break
# Firing tasks instead of awaiting them allows for concurrent requests
task = asyncio.create_task(self._process_callback(callback))
# Create a strong reference to the task so it can be cancelled on kill
# and does not get garbage collected while running
self._active_tasks.add(task)
task.add_done_callback(self._on_task_complete)
# Yield to let the event loop run other tasks
await asyncio.sleep(0)

async def _process_callback(self, callback: Callable[[], Any]) -> None:
# Callback is an async coroutine, need to await it
await callback()
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: AsyncWorker Awaits Sync Callbacks

The AsyncWorker's _process_callback method incorrectly awaits the callback. This violates the Worker interface, which expects synchronous callbacks (as accepted by the submit method), causing runtime TypeErrors when synchronous functions are submitted.

Locations (1)
Fix in Cursor Fix in Web

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Discussed previously


def _on_task_complete(self, task: asyncio.Task[None]) -> None:
try:
task.result()
except Exception:
logger.error("Failed processing job", exc_info=True)
finally:
# Mark the task as done and remove it from the active tasks set
# This happens only after the task has completed
if self._queue is not None:
self._queue.task_done()
self._active_tasks.discard(task)
Loading