Maintainers
+Maintainers
This module is maintained by the OCA.

diff --git a/queue_job_cron_jobrunner/README.rst b/queue_job_cron_jobrunner/README.rst index 4c3826de48..5e059fc05a 100644 --- a/queue_job_cron_jobrunner/README.rst +++ b/queue_job_cron_jobrunner/README.rst @@ -78,13 +78,6 @@ Parallel execution of jobs can be achieved by leveraging multiple ``ir.cron`` re * Duplicate the ``queue_job_cron`` cron record as many times as needed, until you have as much records as cron workers. -Known issues / Roadmap -====================== - -* Support channel capacity and priority. (See ``_acquire_one_job``) -* Gracefully handle CronWorker CPU timeouts. (See ``_job_runner``) -* Commit transaction after job state updated to started. (See ``_process``) - Bug Tracker =========== diff --git a/queue_job_cron_jobrunner/models/queue_job.py b/queue_job_cron_jobrunner/models/queue_job.py index f6a4c5f36f..b1abba9a55 100644 --- a/queue_job_cron_jobrunner/models/queue_job.py +++ b/queue_job_cron_jobrunner/models/queue_job.py @@ -4,14 +4,16 @@ import logging import traceback -from datetime import datetime +from datetime import datetime, timedelta from io import StringIO +import psutil from psycopg2 import OperationalError from odoo import _, api, fields, models, tools from odoo.service.model import PG_CONCURRENCY_ERRORS_TO_RETRY +from odoo.addons.base.models.ir_cron import _intervalTypes from odoo.addons.queue_job.controllers.main import PG_RETRY from odoo.addons.queue_job.exception import ( FailedJobError, @@ -19,6 +21,7 @@ RetryableJobError, ) from odoo.addons.queue_job.job import Job +from odoo.addons.queue_job.jobrunner import QueueJobRunner _logger = logging.getLogger(__name__) @@ -27,14 +30,12 @@ class QueueJob(models.Model): _inherit = "queue.job" @api.model - def _acquire_one_job(self): + def _acquire_one_job(self, commit=False): """Acquire the next job to be run. :returns: queue.job record (locked for update) """ - # TODO: This method should respect channel priority and capacity, - # rather than just fetching them by creation date. - self.flush() + runner = QueueJobRunner.from_environ_or_config() self.env.cr.execute( """ SELECT id @@ -42,32 +43,73 @@ def _acquire_one_job(self): WHERE state = 'pending' AND (eta IS NULL OR eta <= (now() AT TIME ZONE 'UTC')) ORDER BY priority, date_created - LIMIT 1 FOR NO KEY UPDATE SKIP LOCKED + FOR NO KEY UPDATE """ ) - row = self.env.cr.fetchone() - return self.browse(row and row[0]) + rows = self.env.cr.fetchall() + + channels = {} + for queue_job in self.search([("state", "=", "started")]): + if not queue_job.channel: + continue + channels[queue_job.channel] = channels.get(queue_job.channel, 0) + 1 + channels_without_capacity = set() + for channel_str, running in channels.items(): + channel = runner.channel_manager.get_channel_by_name( + channel_str, autocreate=True + ) + if channel.capacity and channel.capacity <= running: + channels_without_capacity.add(channel_str) + channels_without_capacity.discard( + "root" + ) # root must be disabled to avoid normal jobrunner + _logger.info( + "_acquire_one_job channels_without_capacity %s", + channels_without_capacity, + ) + + result = self.browse() + for row in rows: + queue_job = self.browse(row[0]) + if queue_job.channel and queue_job.channel in channels_without_capacity: + continue + job = Job._load_from_db_record(queue_job) + job.set_started() + job.store() + _logger.info( + "_acquire_one_job queue.job %s[channel=%s,uuid=%s] started", + row[0], + job.channel, + job.uuid, + ) + result = queue_job + break + self.flush() + if commit: # pragma: no cover + self.env.cr.commit() # pylint: disable=invalid-commit + return result def _process(self, commit=False): """Process the job""" self.ensure_one() job = Job._load_from_db_record(self) - # Set it as started - job.set_started() - job.store() - _logger.debug("%s started", job.uuid) - # TODO: Commit the state change so that the state can be read from the UI - # while the job is processing. However, doing this will release the - # lock on the db, so we need to find another way. - # if commit: - # self.flush() - # self.env.cr.commit() - # Actual processing try: try: with self.env.cr.savepoint(): + _logger.info( + "perform %s[channel=%s,uuid=%s]", + self.id, + self.channel, + self.uuid, + ) job.perform() + _logger.info( + "performed %s[channel=%s,uuid=%s]", + self.id, + self.channel, + self.uuid, + ) job.set_done() job.store() except OperationalError as err: @@ -87,13 +129,18 @@ def _process(self, commit=False): msg = _("Job interrupted and set to Done: nothing to do.") job.set_done(msg) job.store() + _logger.info( + "interrupted %s[channel=%s,uuid=%s]", self.id, self.channel, self.uuid + ) except RetryableJobError as err: # delay the job later, requeue job.postpone(result=str(err), seconds=5) job.set_pending(reset_retry=False) job.store() - _logger.debug("%s postponed", job) + _logger.info( + "postponed %s[channel=%s,uuid=%s]", self.id, self.channel, self.uuid + ) except (FailedJobError, Exception): with StringIO() as buff: @@ -101,6 +148,9 @@ def _process(self, commit=False): _logger.error(buff.getvalue()) job.set_failed(exc_info=buff.getvalue()) job.store() + _logger.info( + "failed %s[channel=%s,uuid=%s]", self.id, self.channel, self.uuid + ) if commit: # pragma: no cover self.env["base"].flush() @@ -113,18 +163,71 @@ def _process(self, commit=False): @api.model def _job_runner(self, commit=True): """Short-lived job runner, triggered by async crons""" - job = self._acquire_one_job() + self._release_started_jobs(commit=commit) + job = self._acquire_one_job(commit=commit) + while job: job._process(commit=commit) - job = self._acquire_one_job() - # TODO: If limit_time_real_cron is reached before all the jobs are done, - # the worker will be killed abruptly. - # Ideally, find a way to know if we're close to reaching this limit, - # stop processing, and trigger a new execution to continue. - # - # if job and limit_time_real_cron_reached_or_about_to_reach: - # self._cron_trigger() - # break + + if self._stop_processing(): + _logger.info( + "Stop processing queue jobs in this " + "ir.cron call, waiting next ir.cron call.", + ) + return + + job = self._acquire_one_job(commit=commit) + + @api.model + def _stop_processing(self): + """compute what ever the next ir.cron call is going to be + trigger, if yes we stop processing queue job here + + One of the goal is to mitigate that, when you have a long list of queue + job to process, the cron thread can be killed + by odoo.sh or odoo with the limit_time_real_cron limit. + + We suggest to set ir cron interval lower to the limit_time_real_cron. + """ + # In the current cursor (nor a new cursor) we can't see fresh nextcall which: + # is committed by Odoo at the end of the cron so we assume all crons are running + # so nextcall is the current started date + next_calls = [ + cron.nextcall + _intervalTypes[cron.interval_type](cron.interval_number) + for cron in self.env["ir.cron"] + .sudo() + .search([("queue_job_runner", "=", True)]) + ] + if not next_calls: + _logger.info("Stopping queue job processing, no nextcall found.") + return True + + next_cron_job_runner_trigger_date = min(next_calls) + + stop_processing_threshold_seconds = int( + self.env["ir.config_parameter"] + .sudo() + .get_param( + "queue_job_cron_jobrunner.stop_processing_threshold_seconds", + "0", + ) + ) + end_process_queue_job_date = next_cron_job_runner_trigger_date - timedelta( + seconds=stop_processing_threshold_seconds + ) + now = fields.Datetime.now() + _logger.debug( + "now: %s - estimated cron nextcall: %s - " + "Threshold: %ss" + "stop processing new job after %s", + now, + next_cron_job_runner_trigger_date, + stop_processing_threshold_seconds, + end_process_queue_job_date, + ) + if now >= end_process_queue_job_date: + return True + return False @api.model def _cron_trigger(self, at=None): @@ -166,6 +269,24 @@ def _ensure_cron_trigger(self): if delayed_etas: self._cron_trigger(at=list(delayed_etas)) + @api.model + def _release_started_jobs(self, commit=False): + pids = [x.pid for x in psutil.process_iter()] + for record in self.search( + [("state", "=", "started"), ("worker_pid", "not in", pids)] + ): + job = Job._load_from_db_record(record) + job.set_pending() + job.store() + _logger.info( + "release started job %s[channel=%s,uuid=%s]", + record.id, + record.channel, + record.uuid, + ) + if commit: # pragma: no cover + self.env.cr.commit() # pylint: disable=invalid-commit + @api.model_create_multi def create(self, vals_list): # When jobs are created, also create the cron trigger diff --git a/queue_job_cron_jobrunner/readme/ROADMAP.rst b/queue_job_cron_jobrunner/readme/ROADMAP.rst deleted file mode 100644 index b82e199202..0000000000 --- a/queue_job_cron_jobrunner/readme/ROADMAP.rst +++ /dev/null @@ -1,3 +0,0 @@ -* Support channel capacity and priority. (See ``_acquire_one_job``) -* Gracefully handle CronWorker CPU timeouts. (See ``_job_runner``) -* Commit transaction after job state updated to started. (See ``_process``) diff --git a/queue_job_cron_jobrunner/static/description/index.html b/queue_job_cron_jobrunner/static/description/index.html index 6e6be54f46..e3c595db46 100644 --- a/queue_job_cron_jobrunner/static/description/index.html +++ b/queue_job_cron_jobrunner/static/description/index.html @@ -392,12 +392,11 @@
Bugs are tracked on GitHub Issues. In case of trouble, please check there if your issue has already been reported. If you spotted it first, help us to smash it by providing a detailed and welcomed @@ -440,15 +431,15 @@
Do not contact contributors directly about support or help with technical issues.
This module is maintained by the OCA.