diff --git a/CMakeLists.txt b/CMakeLists.txt index e285767f2..34f65d052 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -51,6 +51,7 @@ if (NOT CMAKE_BUILD_TYPE) "Debug" CACHE STRING "select build type between Debug and Release" FORCE) endif () +set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DENABLE_DEBUG_CHECKS") # 3RD_PARTY OPTIONS option(WITH_OS_3RDPARTY "Use 3rdparty libraries provided by OS packages" OFF) diff --git a/source/daemons/qmaster/sge_c_gdi.cc b/source/daemons/qmaster/sge_c_gdi.cc index 9fc6d322d..d519b4d3c 100644 --- a/source/daemons/qmaster/sge_c_gdi.cc +++ b/source/daemons/qmaster/sge_c_gdi.cc @@ -562,7 +562,7 @@ sge_c_gdi_get_in_worker(gdi_object_t *ao, sge_gdi_packet_class_t *packet, sge_gd /* * DIRTY HACK: The "ok" message should be removed from the answer list - * 05/21/2007 qualitiy was ANSWER_QUALITY_INFO but this results in "ok" + * 05/21/2007 quality was ANSWER_QUALITY_INFO but this results in "ok" * messages on qconf side */ snprintf(SGE_EVENT, SGE_EVENT_SIZE, SFNMAX, MSG_GDI_OKNL); diff --git a/source/daemons/qmaster/sge_thread_reader.cc b/source/daemons/qmaster/sge_thread_reader.cc index a9fc0fd72..bfdf7ee2e 100644 --- a/source/daemons/qmaster/sge_thread_reader.cc +++ b/source/daemons/qmaster/sge_thread_reader.cc @@ -137,6 +137,9 @@ sge_reader_main(void *arg) { component_set_thread_id(thread_id); DPRINTF(SFN "(%d) started\n", thread_name, thread_id); + // this thread will use the READER data store + ocs::DataStore::select_active_ds(ocs::DataStore::Id::READER); + // init monitoring cl_thread_func_startup(thread_config); sge_monitor_init(p_monitor, thread_config->thread_name, GDI_EXT, RT_WARNING, RT_ERROR); @@ -199,6 +202,13 @@ sge_reader_main(void *arg) { is_only_read_request = false; } +#if defined (ENABLE_DEBUG_CHECKS) + if (!is_only_read_request) { + CRITICAL("reader thread tries to execute write request"); + abort(); + } +#endif + /* * acquire the correct lock */ diff --git a/source/libs/gdi/sge_gdi_packet.cc b/source/libs/gdi/sge_gdi_packet.cc index 3b4539acd..29f46038a 100644 --- a/source/libs/gdi/sge_gdi_packet.cc +++ b/source/libs/gdi/sge_gdi_packet.cc @@ -290,7 +290,7 @@ sge_gdi_task_create(sge_gdi_packet_class_t *packet, lList **answer_list, u_long3 * NOTES * MT-NOTE: sge_gdi_task_free() is MT safe as long as the structure * passed to this function is not accessed by more than one -* thread simultaniously. +* thread simultaneously. * * SEE ALSO * gdi/request_internal/sge_gdi_task_create() diff --git a/source/libs/gdi/sge_gdi_packet_internal.h b/source/libs/gdi/sge_gdi_packet_internal.h index d36d2e848..b4d0caa04 100644 --- a/source/libs/gdi/sge_gdi_packet_internal.h +++ b/source/libs/gdi/sge_gdi_packet_internal.h @@ -37,6 +37,8 @@ #include "gdi/sge_gdi_packet_type.h" #include "uti/sge_tq.h" +#include "sge_gdi_packet_type.h" + extern sge_tq_queue_t *GlobalRequestQueue; extern sge_tq_queue_t *ReaderRequestQueue; extern sge_tq_queue_t *ReaderWaitingRequestQueue; diff --git a/source/libs/gdi/sge_gdi_packet_pb_cull.cc b/source/libs/gdi/sge_gdi_packet_pb_cull.cc index f4a0706ca..47ab3b6c2 100644 --- a/source/libs/gdi/sge_gdi_packet_pb_cull.cc +++ b/source/libs/gdi/sge_gdi_packet_pb_cull.cc @@ -480,6 +480,11 @@ sge_gdi_packet_pack_task(sge_gdi_packet_class_t *packet, sge_gdi_task_class_t *t DRETURN(ret); error_with_mapping: ret = sge_gdi_map_pack_errors(pack_ret, answer_list); + if (task->do_select_pack_simultaneous) { + // data_list references a master list + // avoid it being freed when the packet/task gets freed + task->data_list = nullptr; + } DRETURN(ret); } diff --git a/source/libs/sgeobj/ocs_DataStore.cc b/source/libs/sgeobj/ocs_DataStore.cc index f89a5fec8..147db4874 100644 --- a/source/libs/sgeobj/ocs_DataStore.cc +++ b/source/libs/sgeobj/ocs_DataStore.cc @@ -20,6 +20,7 @@ #include +#include "uti/sge_log.h" #include "uti/sge_rmon_macros.h" #include "basis_types.h" @@ -118,10 +119,35 @@ namespace ocs { * @return pointer to the master list. will never be nullptr. */ lList ** - DataStore::get_master_list_rw(sge_object_type type) { + DataStore::get_master_list_rw(sge_object_type type, bool for_read) { DENTER(DATA_STORE_LAYER); GET_SPECIFIC(obj_thread_local_t, obj_state, obj_state_init, obj_state_key); +#if defined (ENABLE_DEBUG_CHECKS) + auto ds_id = obj_state->ds_id; + const char *thread_name = component_get_thread_name(); + if (thread_name != nullptr) { + if (strcmp(thread_name, "worker") == 0 && ds_id != DataStore::Id::GLOBAL) { + CRITICAL("Worker thread is trying to access data store %d for list %d", ds_id, type); + abort(); + } + + if (strcmp(thread_name, "reader") == 0) { + if (ds_id != DataStore::Id::READER) { + CRITICAL("Reader thread is trying to access data store %d for list %d", ds_id, type); + abort(); + } + // @todo enable once CS-825 is fixed +#if 0 + if (!for_read) { + CRITICAL("Reader thread is trying to get master list with write access"); + abort(); + } +#endif + } + } +#endif + lList **ret; ret = &(obj_thread_shared.data_store[obj_state->ds_id].master_list[type]); #ifdef OBSERVE diff --git a/source/libs/sgeobj/ocs_DataStore.h b/source/libs/sgeobj/ocs_DataStore.h index efd0e18db..6e7ecca24 100644 --- a/source/libs/sgeobj/ocs_DataStore.h +++ b/source/libs/sgeobj/ocs_DataStore.h @@ -41,7 +41,7 @@ namespace ocs { select_active_ds(ocs::DataStore::Id ds_id); static lList ** - get_master_list_rw(sge_object_type type); + get_master_list_rw(sge_object_type type, bool for_read = false); /** * Returns a master list (RO-access) from the currently active data store of the active threads @@ -50,7 +50,7 @@ namespace ocs { */ static inline const lList ** get_master_list(sge_object_type type) { - return const_cast(ocs::DataStore::get_master_list_rw(type)); + return const_cast(ocs::DataStore::get_master_list_rw(type, true)); } static void