From d329517d9af202c3353afcc5e7b5912d0911b3f5 Mon Sep 17 00:00:00 2001
From: hoangtrann <thhoang.tr@gmail.com>
Date: Sat, 22 Nov 2025 06:05:08 +0700
Subject: [PATCH 1/2] [IMP] queue_job: requeue orphaned jobs

---
 queue_job/jobrunner/runner.py            | 37 ++++++++++++++++++++++++
 queue_job/tests/test_requeue_dead_job.py | 31 ++++++++++++++++++++
 2 files changed, 68 insertions(+)

diff --git a/queue_job/jobrunner/runner.py b/queue_job/jobrunner/runner.py
index a0db6751db..ccdbeaec11 100644
--- a/queue_job/jobrunner/runner.py
+++ b/queue_job/jobrunner/runner.py
@@ -386,6 +386,35 @@ def _query_requeue_dead_jobs(self):
             RETURNING uuid
             """
 
+    def _query_requeue_orphaned_jobs(self):
+        """Query to requeue jobs stuck in 'enqueued' state without a lock.
+
+        This handles the edge case where the runner marks a job as 'enqueued'
+        but the HTTP request to start the job never reaches the Odoo server
+        (e.g., due to server shutdown/crash between setting enqueued and
+        the controller receiving the request). These jobs have no lock record
+        because set_started() was never called, so they are invisible to
+        _query_requeue_dead_jobs().
+        """
+        return """
+            UPDATE
+                queue_job
+            SET
+                state='pending'
+            WHERE
+                state = 'enqueued'
+                AND date_enqueued < (now() AT TIME ZONE 'utc' - INTERVAL '10 sec')
+                AND NOT EXISTS (
+                    SELECT
+                        1
+                    FROM
+                        queue_job_lock
+                    WHERE
+                        queue_job_id = queue_job.id
+                )
+            RETURNING uuid
+            """
+
     def requeue_dead_jobs(self):
         """
         Set started and enqueued jobs but not locked to pending
@@ -414,6 +443,14 @@ def requeue_dead_jobs(self):
             for (uuid,) in cr.fetchall():
                 _logger.warning("Re-queued dead job with uuid: %s", uuid)
 
+            # Requeue orphaned jobs (enqueued but never started, no lock)
+            query = self._query_requeue_orphaned_jobs()
+            cr.execute(query)
+            for (uuid,) in cr.fetchall():
+                _logger.warning(
+                    "Re-queued orphaned job (enqueued without lock) with uuid: %s", uuid
+                )
+
 
 class QueueJobRunner:
     def __init__(
diff --git a/queue_job/tests/test_requeue_dead_job.py b/queue_job/tests/test_requeue_dead_job.py
index c6c82a2f4d..5c6ca47e52 100644
--- a/queue_job/tests/test_requeue_dead_job.py
+++ b/queue_job/tests/test_requeue_dead_job.py
@@ -131,3 +131,34 @@ def test_requeue_dead_jobs(self):
         # because we committed the cursor, the savepoint of the test method is
         # gone, and this would break TransactionCase cleanups
         self.cr.execute("SAVEPOINT test_%d" % self._savepoint_id)
+
+    def test_requeue_orphaned_jobs(self):
+        uuid = "test_enqueued_job"
+        queue_job = self.create_dummy_job(uuid)
+        job_obj = Job.load(self.env, queue_job.uuid)
+
+        # Only enqueued job, don't set it to started to simulate the scenario
+        # that system shutdown before job is starting
+        job_obj.set_enqueued()
+        job_obj.date_enqueued = datetime.now() - timedelta(minutes=1)
+        job_obj.store()
+
+        # job ins't actually picked up by the first requeue attempt
+        query = Database(self.env.cr.dbname)._query_requeue_dead_jobs()
+        self.env.cr.execute(query)
+        uuids_requeued = self.env.cr.fetchall()
+        self.assertFalse(uuids_requeued)
+
+        # job is picked up by the 2nd requeue attempt
+        query = Database(self.env.cr.dbname)._query_requeue_orphaned_jobs()
+        self.env.cr.execute(query)
+        uuids_requeued = self.env.cr.fetchall()
+        self.assertTrue(queue_job.uuid in j[0] for j in uuids_requeued)
+
+        # clean up
+        queue_job.unlink()
+        self.env.cr.commit()  # pylint: disable=E8102
+
+        # because we committed the cursor, the savepoint of the test method is
+        # gone, and this would break TransactionCase cleanups
+        self.cr.execute("SAVEPOINT test_%d" % self._savepoint_id)

From c7324dbf91311d84f4636b91e0917211c152531c Mon Sep 17 00:00:00 2001
From: hoangtrann <thhoang.tr@gmail.com>
Date: Wed, 31 Dec 2025 19:27:16 +0700
Subject: [PATCH 2/2] [IMP] queue_job: query orphaned dead job not exist in
 lock table

---
 queue_job/jobrunner/runner.py            | 78 ++++++++----------------
 queue_job/tests/test_requeue_dead_job.py |  8 +--
 2 files changed, 26 insertions(+), 60 deletions(-)

diff --git a/queue_job/jobrunner/runner.py b/queue_job/jobrunner/runner.py
index ccdbeaec11..4cf063e818 100644
--- a/queue_job/jobrunner/runner.py
+++ b/queue_job/jobrunner/runner.py
@@ -365,52 +365,26 @@ def _query_requeue_dead_jobs(self):
                         ELSE exc_info
                     END)
             WHERE
-                id in (
-                    SELECT
-                        queue_job_id
-                    FROM
-                        queue_job_lock
-                    WHERE
-                        queue_job_id in (
-                            SELECT
-                                id
-                            FROM
-                                queue_job
-                            WHERE
-                                state IN ('enqueued','started')
-                                AND date_enqueued <
-                                (now() AT TIME ZONE 'utc' - INTERVAL '10 sec')
-                        )
-                    FOR UPDATE SKIP LOCKED
-                )
-            RETURNING uuid
-            """
-
-    def _query_requeue_orphaned_jobs(self):
-        """Query to requeue jobs stuck in 'enqueued' state without a lock.
-
-        This handles the edge case where the runner marks a job as 'enqueued'
-        but the HTTP request to start the job never reaches the Odoo server
-        (e.g., due to server shutdown/crash between setting enqueued and
-        the controller receiving the request). These jobs have no lock record
-        because set_started() was never called, so they are invisible to
-        _query_requeue_dead_jobs().
-        """
-        return """
-            UPDATE
-                queue_job
-            SET
-                state='pending'
-            WHERE
-                state = 'enqueued'
+                state IN ('enqueued','started')
                 AND date_enqueued < (now() AT TIME ZONE 'utc' - INTERVAL '10 sec')
-                AND NOT EXISTS (
-                    SELECT
-                        1
-                    FROM
-                        queue_job_lock
-                    WHERE
-                        queue_job_id = queue_job.id
+                AND (
+                    id in (
+                        SELECT
+                            queue_job_id
+                        FROM
+                            queue_job_lock
+                        WHERE
+                            queue_job_lock.queue_job_id = queue_job.id
+                        FOR UPDATE SKIP LOCKED
+                    )
+                    OR NOT EXISTS (
+                        SELECT
+                            1
+                        FROM
+                            queue_job_lock
+                        WHERE
+                            queue_job_lock.queue_job_id = queue_job.id
+                    )
                 )
             RETURNING uuid
             """
@@ -433,6 +407,12 @@ def requeue_dead_jobs(self):
         However, when the Odoo server crashes or is otherwise force-stopped,
         running jobs are interrupted while the runner has no chance to know
         they have been aborted.
+
+        This also handles orphaned jobs (enqueued but never started, no lock).
+        This edge case occurs when the runner marks a job as 'enqueued'
+        but the HTTP request to start the job never reaches the Odoo server
+        (e.g., due to server shutdown/crash between setting enqueued and
+        the controller receiving the request).
         """
 
         with closing(self.conn.cursor()) as cr:
@@ -443,14 +423,6 @@ def requeue_dead_jobs(self):
             for (uuid,) in cr.fetchall():
                 _logger.warning("Re-queued dead job with uuid: %s", uuid)
 
-            # Requeue orphaned jobs (enqueued but never started, no lock)
-            query = self._query_requeue_orphaned_jobs()
-            cr.execute(query)
-            for (uuid,) in cr.fetchall():
-                _logger.warning(
-                    "Re-queued orphaned job (enqueued without lock) with uuid: %s", uuid
-                )
-
 
 class QueueJobRunner:
     def __init__(
diff --git a/queue_job/tests/test_requeue_dead_job.py b/queue_job/tests/test_requeue_dead_job.py
index 5c6ca47e52..180e1294eb 100644
--- a/queue_job/tests/test_requeue_dead_job.py
+++ b/queue_job/tests/test_requeue_dead_job.py
@@ -143,16 +143,10 @@ def test_requeue_orphaned_jobs(self):
         job_obj.date_enqueued = datetime.now() - timedelta(minutes=1)
         job_obj.store()
 
-        # job ins't actually picked up by the first requeue attempt
+        # job is now picked up by the requeue query (which includes orphaned jobs)
         query = Database(self.env.cr.dbname)._query_requeue_dead_jobs()
         self.env.cr.execute(query)
         uuids_requeued = self.env.cr.fetchall()
-        self.assertFalse(uuids_requeued)
-
-        # job is picked up by the 2nd requeue attempt
-        query = Database(self.env.cr.dbname)._query_requeue_orphaned_jobs()
-        self.env.cr.execute(query)
-        uuids_requeued = self.env.cr.fetchall()
         self.assertTrue(queue_job.uuid in j[0] for j in uuids_requeued)
 
         # clean up