Skip to content

Commit d4ba3b4

Browse files
committed
BF: CS-2266: Scheduler emits ORT_update_project_usage for every project each cycle (missing prj_where filter) → PROJECT_MOD event storm
1 parent f6e25d9 commit d4ba3b4

3 files changed

Lines changed: 63 additions & 0 deletions

File tree

source/clients/qevent/ocs_qevent.cc

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ qevent_options *Global_qevent_options;
8181
static void qevent_show_usage();
8282
static void qevent_testsuite_mode(sge_evc_class_t *evc);
8383
static void qevent_subscribe_mode(sge_evc_class_t *evc);
84+
static void qevent_monitor_all_mode(sge_evc_class_t *evc);
8485
static const char* qevent_get_event_name(int event);
8586
static void qevent_trigger_scripts(int qevent_event, qevent_options *option_struct, lListElem *event);
8687
static void qevent_start_trigger_script(int qevent_event, const char* script_file, lListElem *event);
@@ -326,11 +327,13 @@ static void qevent_show_usage() {
326327

327328
fprintf(stdout,"qevent [-h|-help] -ts|-testsuite\n");
328329
fprintf(stdout,"qevent [-h|-help] -sm|-subscribe\n");
330+
fprintf(stdout,"qevent [-h|-help] -am|-all\n");
329331
fprintf(stdout,"qevent [-h|-help] -trigger EVENT SCRIPT [ -trigger EVENT SCRIPT, ... ]\n\n");
330332

331333
fprintf(stdout," -h, -help show usage\n");
332334
fprintf(stdout," -ts, -testsuite run in testsuite mode\n");
333335
fprintf(stdout," -sm, -subscribe run in subscribe mode\n");
336+
fprintf(stdout," -am, -all subscribe to and print all events\n");
334337
fprintf(stdout," -trigger EVENT SCRIPT start SCRIPT (executable) when EVENT occurs\n");
335338
fprintf(stdout,"\n");
336339
fprintf(stdout,"SCRIPT - path to a executable shell script\n");
@@ -351,6 +354,7 @@ static void qevent_parse_command_line([[maybe_unused]] int argc, char **argv, qe
351354
option_struct->help_option = 0;
352355
option_struct->testsuite_option = 0;
353356
option_struct->subscribe_option = 0;
357+
option_struct->monitor_all_option = 0;
354358
option_struct->trigger_option_count =0;
355359

356360
while (*(++argv)) {
@@ -366,6 +370,10 @@ static void qevent_parse_command_line([[maybe_unused]] int argc, char **argv, qe
366370
option_struct->subscribe_option = 1;
367371
continue;
368372
}
373+
if (!strcmp("-am", *argv) || !strcmp("-all", *argv)) {
374+
option_struct->monitor_all_option = 1;
375+
continue;
376+
}
369377
if (!strcmp("-trigger", *argv)) {
370378
int ok = 0;
371379
if (option_struct->trigger_option_count >= MAX_TRIGGER_SCRIPTS ) {
@@ -512,6 +520,13 @@ int main(int argc, char *argv[])
512520
sge_exit(0);
513521
}
514522

523+
/* check for monitor-all option */
524+
if (enabled_options.monitor_all_option) {
525+
qevent_monitor_all_mode(evc);
526+
sge_dstring_free(enabled_options.error_message);
527+
sge_exit(0);
528+
}
529+
515530
if (enabled_options.trigger_option_count > 0) {
516531
lCondition *where =nullptr;
517532
lEnumeration *what = nullptr;
@@ -747,3 +762,42 @@ static void qevent_subscribe_mode(sge_evc_class_t *evc)
747762
DRETURN_VOID;
748763
}
749764

765+
/** @brief Subscribe to and print every event (diagnostic / testsuite mode).
766+
*
767+
* Subscribes to all event types (SGE_TYPE_ALL) and prints a one-line
768+
* description of every received event via print_event(), e.g.
769+
* "<n>. EVENT MOD PROJECT <name>" for a sgeE_PROJECT_MOD event. The function
770+
* blocks and keeps processing events until the client is shut down.
771+
*
772+
* Unlike the testsuite mode (-ts) and subscribe mode (-sm), which only
773+
* register for JOB and JATASK events, this mode makes every event type
774+
* observable. It is used by the testsuite to watch event types that are
775+
* otherwise invisible through qevent (e.g. PROJECT_MOD, see CS-2266).
776+
*
777+
* @param evc the enrolled event client to subscribe with and run
778+
*/
779+
static void qevent_monitor_all_mode(sge_evc_class_t *evc)
780+
{
781+
DENTER(TOP_LAYER);
782+
783+
sge_mirror_initialize(evc, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr);
784+
sge_mirror_subscribe(evc, SGE_TYPE_ALL, print_event, nullptr, nullptr, nullptr, nullptr);
785+
786+
// Deliver events quickly and flush PROJECT_MOD immediately, so that an
787+
// observer (e.g. the testsuite) sees them within a scheduler interval
788+
// instead of waiting for the default event delivery interval.
789+
evc->ec_set_edtime(evc, 2);
790+
evc->ec_set_flush(evc, sgeE_PROJECT_MOD, true, 1);
791+
792+
while (!shut_me_down) {
793+
sge_mirror_error error = sge_mirror_process_events(evc);
794+
if (error == SGE_EM_TIMEOUT && !shut_me_down) {
795+
sleep(10);
796+
continue;
797+
}
798+
}
799+
800+
sge_mirror_shutdown(evc);
801+
DRETURN_VOID;
802+
}
803+

source/clients/qevent/ocs_qevent.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ typedef struct qevent_options {
4343
int help_option;
4444
int testsuite_option;
4545
int subscribe_option;
46+
int monitor_all_option;
4647
int trigger_option_count;
4748
int trigger_option_events[MAX_TRIGGER_SCRIPTS];
4849
const char* trigger_option_scripts[MAX_TRIGGER_SCRIPTS];

source/libs/sched/sgeee.cc

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3478,6 +3478,14 @@ sge_build_sgeee_orders(scheduler_all_data_t *lists, lList *running_jobs, lList *
34783478
* build update project usage order
34793479
*-----------------------------------------------------------------*/
34803480
if (lists->project_list) {
3481+
/* Only select projects whose usage actually changed since the last
3482+
* order generation - symmetric to user_where above. Without this
3483+
* filter every project is selected on every scheduler run, producing
3484+
* a redundant ORT_update_project_usage order and a PROJECT_MOD event
3485+
* per project per cycle even on a completely idle cluster (CS-2266).
3486+
* PR_usage_seqno is stamped (like UU_usage_seqno) only when usage is
3487+
* booked, so the empty-list guard below then skips quiet cycles. */
3488+
prj_where = lWhere("%T(%I > %u)", PR_Type, PR_usage_seqno, last_seqno);
34813489
norders = lGetNumberOfElem(order_list);
34823490
if ((up_list = lSelect("", lists->project_list, prj_where, prj_usage_what))) {
34833491
if (lGetNumberOfElem(up_list)>0) {

0 commit comments

Comments
 (0)