|
| 1 | +import json |
| 2 | +import logging |
1 | 3 | import os |
2 | | -import gc |
3 | 4 | import time |
4 | | -import logging |
5 | | -from threading import Thread |
| 5 | +from threading import Lock, Thread |
6 | 6 |
|
7 | 7 | from database import DBSession |
| 8 | +from faas_scheduler.models import ScriptLog |
8 | 9 | from faas_scheduler.utils import ( |
9 | | - check_and_set_tasks_timeout, |
10 | | - delete_log_after_days, |
11 | | - delete_statistics_after_days, |
12 | | - basic_log, |
| 10 | + add_script, |
| 11 | + run_script, |
| 12 | + get_script_file, |
| 13 | + hook_update_script |
13 | 14 | ) |
14 | 15 |
|
15 | | -basic_log("scheduler.log") |
| 16 | +logger = logging.getLogger(__name__) |
| 17 | + |
16 | 18 |
|
17 | | -SUB_PROCESS_TIMEOUT = int( |
18 | | - os.environ.get("PYTHON_PROCESS_TIMEOUT", 60 * 15) |
19 | | -) # 15 minutes |
| 19 | +class ScriptQueue: |
20 | 20 |
|
21 | | -logger = logging.getLogger(__name__) |
| 21 | + def __init__(self): |
| 22 | + self.q = [] # a list of ScriptLog |
| 23 | + self.script_logs_dict = {} # a dict of {id: ScriptLog} |
| 24 | + self.lock = Lock() |
| 25 | + self.running_count = {} |
| 26 | + # a dict of |
| 27 | + # { |
| 28 | + # "<team>": 0, |
| 29 | + # "<team>_<dtable_uuid>": 0, |
| 30 | + # "<team>_<dtable_uuid>_<script_name>": 0 |
| 31 | + # } |
| 32 | + try: |
| 33 | + run_limit_per_team = os.environ.get('RUN_LIMIT_PER_TEAM', 0) |
| 34 | + except: |
| 35 | + run_limit_per_team = 0 |
| 36 | + try: |
| 37 | + run_limit_per_base = os.environ.get('RUN_LIMIT_PER_BASE', 0) |
| 38 | + except: |
| 39 | + run_limit_per_base = 0 |
| 40 | + try: |
| 41 | + run_limit_per_script = os.environ.get('RUN_LIMIT_PER_SCRIPT', 0) |
| 42 | + except: |
| 43 | + run_limit_per_script = 0 |
| 44 | + self.config = { |
| 45 | + 'run_limit_per_team': run_limit_per_team, |
| 46 | + 'run_limit_per_base': run_limit_per_base, |
| 47 | + 'run_limit_per_script': run_limit_per_script |
| 48 | + } |
| 49 | + |
| 50 | + def can_run_script(self, script_log: ScriptLog): |
| 51 | + if script_log.org_id != -1: |
| 52 | + running_team_key = f'{script_log.org_id}' |
| 53 | + else: |
| 54 | + running_team_key = f'{script_log.owner}' |
| 55 | + running_base_key = f'{running_team_key}_{script_log.dtable_uuid}' |
| 56 | + running_script_key = f'{running_base_key}_{script_log.script_name}' |
| 57 | + |
| 58 | + if self.config['run_limit_per_team'] > 0 and self.config['run_limit_per_team'] <= self.running_count.get(running_team_key, 0): |
| 59 | + return False |
| 60 | + if self.config['run_limit_per_base'] > 0 and self.config['run_limit_per_base'] <= self.running_count.get(running_base_key, 0): |
| 61 | + return False |
| 62 | + if self.config['run_limit_per_script'] > 0 and self.config['run_limit_per_script'] <= self.running_count.get(running_script_key, 0): |
| 63 | + return False |
| 64 | + |
| 65 | + return True |
| 66 | + |
| 67 | + def add_script_log(self, script_log: ScriptLog): |
| 68 | + with self.lock: |
| 69 | + self.q.append(script_log) |
| 70 | + self.script_logs_dict[script_log.id] = script_log |
| 71 | + |
| 72 | + def get(self): |
| 73 | + """get the first valid task from self.q |
22 | 74 |
|
| 75 | + Return: an instance of ScriptTask or None |
| 76 | + """ |
| 77 | + with self.lock: |
| 78 | + return_task = None |
23 | 79 |
|
24 | | -class FAASTaskTimeoutSetter(Thread): |
| 80 | + index = 0 |
| 81 | + while index < len(self.q): |
| 82 | + script_log = self.q[index] |
| 83 | + if self.can_run_script(script_log): |
| 84 | + return_task = script_log |
| 85 | + self.q.pop(index) |
| 86 | + break |
| 87 | + index += 1 |
| 88 | + |
| 89 | + return return_task |
| 90 | + |
| 91 | + def script_done_callback(self, script_log: ScriptLog): |
| 92 | + with self.lock: |
| 93 | + if script_log.org_id != -1: |
| 94 | + running_team_key = f'{script_log.org_id}' |
| 95 | + else: |
| 96 | + running_team_key = f'{script_log.owner}' |
| 97 | + running_base_key = f'{running_team_key}_{script_log.dtable_uuid}' |
| 98 | + running_script_key = f'{running_base_key}_{script_log.script_name}' |
| 99 | + if running_team_key in self.running_count: |
| 100 | + self.running_count[running_team_key] -= 1 |
| 101 | + if running_base_key in self.running_count: |
| 102 | + self.running_count[running_base_key] -= 1 |
| 103 | + if running_script_key in self.running_count: |
| 104 | + self.running_count[running_script_key] -= 1 |
| 105 | + |
| 106 | + |
| 107 | +class Scheduelr: |
25 | 108 |
|
26 | 109 | def __init__(self): |
27 | | - super(FAASTaskTimeoutSetter, self).__init__() |
28 | | - self.interval = 60 * 30 # every half an hour |
29 | | - |
30 | | - def run(self): |
31 | | - if SUB_PROCESS_TIMEOUT and isinstance(SUB_PROCESS_TIMEOUT, int): |
32 | | - while True: |
33 | | - logger.info("Start automatic cleanup ...") |
34 | | - db_session = DBSession() |
35 | | - try: |
36 | | - check_and_set_tasks_timeout(db_session) |
37 | | - except Exception as e: |
38 | | - logger.exception("task cleaner error: %s", e) |
39 | | - finally: |
40 | | - db_session.close() |
41 | | - |
42 | | - # python garbage collection |
43 | | - logger.info("gc.collect: %s", str(gc.collect())) |
44 | | - |
45 | | - # remove old script_logs and statistics |
46 | | - delete_log_after_days(db_session) |
47 | | - delete_statistics_after_days(db_session) |
48 | | - |
49 | | - # sleep |
50 | | - logger.info("Sleep for %d seconds ...", self.interval) |
51 | | - time.sleep(self.interval) |
52 | | - |
53 | | - |
54 | | -if __name__ == "__main__": |
55 | | - task_timeout_setter = FAASTaskTimeoutSetter() |
56 | | - task_timeout_setter.start() |
| 110 | + self.script_queue = ScriptQueue() |
| 111 | + |
| 112 | + def add_script_log( |
| 113 | + self, |
| 114 | + dtable_uuid, |
| 115 | + org_id, |
| 116 | + owner, |
| 117 | + script_name, |
| 118 | + context_data, |
| 119 | + operate_from |
| 120 | + ): |
| 121 | + script_log = add_script( |
| 122 | + DBSession(), |
| 123 | + dtable_uuid, |
| 124 | + owner, |
| 125 | + org_id, |
| 126 | + script_name, |
| 127 | + context_data, |
| 128 | + operate_from |
| 129 | + ) |
| 130 | + self.script_queue.add_script_log(script_log) |
| 131 | + return script_log |
| 132 | + |
| 133 | + def schedule(self): |
| 134 | + while True: |
| 135 | + script_log = self.script_queue.get() |
| 136 | + if not script_log: |
| 137 | + time.sleep(0.5) |
| 138 | + try: |
| 139 | + script_file_info = get_script_file(script_log.dtable_uuid, script_log.script_name) |
| 140 | + run_script( |
| 141 | + script_log.script_id, |
| 142 | + script_log.dtable_uuid, |
| 143 | + script_log.script_name, |
| 144 | + script_file_info['script_url'], |
| 145 | + script_file_info['temp_api_token'], |
| 146 | + json.loads(script_log.context_data) |
| 147 | + ) |
| 148 | + except Exception as e: |
| 149 | + logger.exception(f'run script: {script_log} error {e}') |
| 150 | + |
| 151 | + def script_done_callback( |
| 152 | + self, |
| 153 | + script_id, |
| 154 | + success, |
| 155 | + return_code, |
| 156 | + output, |
| 157 | + spend_time |
| 158 | + ): |
| 159 | + script_log = self.script_queue.script_logs_dict.pop(script_id) |
| 160 | + hook_update_script( |
| 161 | + DBSession(), |
| 162 | + script_id, |
| 163 | + success, |
| 164 | + return_code, |
| 165 | + output, |
| 166 | + spend_time |
| 167 | + ) |
| 168 | + if not script_log: # not counted in memory, only update db record |
| 169 | + return |
| 170 | + |
| 171 | + def load_pending_script_logs(self): |
| 172 | + """load pending script logs, should be called only when server start |
| 173 | + """ |
| 174 | + script_logs = DBSession.query(ScriptLog).filter_by(state=ScriptLog.PENDING).order_by(ScriptLog.id) |
| 175 | + for script_log in script_logs: |
| 176 | + self.script_queue.add_script_log(script_log) |
| 177 | + |
| 178 | + def start(self): |
| 179 | + self.load_pending_script_logs() |
| 180 | + Thread(target=self.schedule, daemon=True).start() |
| 181 | + |
| 182 | + |
| 183 | +scheduler = Scheduelr() |
0 commit comments