@@ -68,6 +68,7 @@ opal_process_name_t pmix_name_invalid = {UINT32_MAX, UINT32_MAX};
6868 * infrastructure that manages its structure (e.g., OpenPMIx). If we setup this
6969 * session directory structure, then we shall cleanup after ourselves.
7070 */
71+ static bool destroy_top_session_dir = false;
7172static bool destroy_job_session_dir = false;
7273static bool destroy_proc_session_dir = false;
7374
@@ -983,22 +984,25 @@ int ompi_rte_finalize(void)
983984{
984985
985986 /* cleanup the session directory we created */
987+ if (NULL != opal_process_info .top_session_dir && destroy_top_session_dir ) {
988+ opal_os_dirpath_destroy (opal_process_info .top_session_dir ,
989+ true, check_file );
990+ free (opal_process_info .top_session_dir );
991+ opal_process_info .top_session_dir = NULL ;
992+ destroy_top_session_dir = false;
993+ }
994+
986995 if (NULL != opal_process_info .job_session_dir && destroy_job_session_dir ) {
987996 opal_os_dirpath_destroy (opal_process_info .job_session_dir ,
988- false , check_file );
997+ true , check_file );
989998 free (opal_process_info .job_session_dir );
990999 opal_process_info .job_session_dir = NULL ;
9911000 destroy_job_session_dir = false;
9921001 }
9931002
994- if (NULL != opal_process_info .top_session_dir ) {
995- free (opal_process_info .top_session_dir );
996- opal_process_info .top_session_dir = NULL ;
997- }
998-
9991003 if (NULL != opal_process_info .proc_session_dir && destroy_proc_session_dir ) {
10001004 opal_os_dirpath_destroy (opal_process_info .proc_session_dir ,
1001- false , check_file );
1005+ true , check_file );
10021006 free (opal_process_info .proc_session_dir );
10031007 opal_process_info .proc_session_dir = NULL ;
10041008 destroy_proc_session_dir = false;
@@ -1165,27 +1169,45 @@ void ompi_rte_wait_for_debugger(void)
11651169
11661170static int _setup_top_session_dir (char * * sdir )
11671171{
1172+ /*
1173+ * Use a session directory structure similar to prrte (create only one
1174+ * directory for the top session) so that it can be cleaned up correctly
1175+ * when terminated.
1176+ */
11681177 char * tmpdir ;
1178+ int rc ;
1179+ uid_t uid = geteuid ();
1180+ pid_t pid = getpid ();
11691181
11701182 if ( NULL == (tmpdir = getenv ("TMPDIR" )) )
11711183 if ( NULL == (tmpdir = getenv ("TEMP" )) )
11721184 if ( NULL == (tmpdir = getenv ("TMP" )) )
11731185 tmpdir = "/tmp" ;
11741186
1175- * sdir = strdup (tmpdir );
1187+ if (0 > opal_asprintf (sdir , "%s/%s.%s.%lu.%lu" ,
1188+ tmpdir , "ompi" ,
1189+ opal_process_info .nodename ,
1190+ (unsigned long )pid , (unsigned long ) uid )) {
1191+ opal_process_info .top_session_dir = NULL ;
1192+ return OPAL_ERR_OUT_OF_RESOURCE ;
1193+ }
1194+ rc = opal_os_dirpath_create (opal_process_info .top_session_dir , 0755 );
1195+ if (OPAL_SUCCESS != rc ) {
1196+ // could not create top session dir
1197+ free (opal_process_info .top_session_dir );
1198+ opal_process_info .top_session_dir = NULL ;
1199+ return rc ;
1200+ }
1201+ destroy_top_session_dir = true;
11761202 return OPAL_SUCCESS ;
11771203}
11781204
11791205static int _setup_job_session_dir (char * * sdir )
11801206{
11811207 int rc ;
1182- /* get the effective uid */
1183- uid_t uid = geteuid ();
11841208
1185- if (0 > opal_asprintf (sdir , "%s/ompi.%s.%lu/jf.0/ %u" ,
1209+ if (0 > opal_asprintf (sdir , "%s/%u" ,
11861210 opal_process_info .top_session_dir ,
1187- opal_process_info .nodename ,
1188- (unsigned long )uid ,
11891211 opal_process_info .my_name .jobid )) {
11901212 opal_process_info .job_session_dir = NULL ;
11911213 return OPAL_ERR_OUT_OF_RESOURCE ;
0 commit comments