1
1
import os
2
2
from glob import glob
3
3
import logging
4
+ import random
5
+ import time
6
+
7
+ from parsl .dataflow .errors import RundirCreateError
4
8
5
9
logger = logging .getLogger (__name__ )
6
10
7
11
8
- def make_rundir (path : str ) -> str :
12
+ def make_rundir (path : str , * , max_tries : int = 3 ) -> str :
9
13
"""When a path has not been specified, make the run directory.
10
14
11
15
Creates a rundir with the following hierarchy:
@@ -18,23 +22,39 @@ def make_rundir(path: str) -> str:
18
22
Kwargs:
19
23
- path (str): String path to a specific run dir
20
24
"""
21
- try :
22
- if not os .path .exists (path ):
23
- os .makedirs (path )
24
-
25
- prev_rundirs = glob (os .path .join (path , "[0-9]*[0-9]" ))
26
-
27
- current_rundir = os .path .join (path , '000' )
28
-
29
- if prev_rundirs :
30
- # Since we globbed on files named as 0-9
31
- x = sorted ([int (os .path .basename (x )) for x in prev_rundirs ])[- 1 ]
32
- current_rundir = os .path .join (path , '{0:03}' .format (x + 1 ))
33
-
34
- os .makedirs (current_rundir )
35
- logger .debug ("Parsl run initializing in rundir: {0}" .format (current_rundir ))
36
- return os .path .abspath (current_rundir )
37
-
38
- except Exception :
39
- logger .exception ("Failed to create run directory" )
40
- raise
25
+ backoff_time_s = 1 + random .random ()
26
+
27
+ os .makedirs (path , exist_ok = True )
28
+
29
+ # try_count is 1-based for human readability
30
+ try_count = 1
31
+ while True :
32
+
33
+ # Python 3.10 introduces root_dir argument to glob which in future
34
+ # can be used to simplify this code, something like:
35
+ # prev_rundirs = glob("[0-9]*[0-9]", root_dir=path)
36
+ full_prev_rundirs = glob (os .path .join (path , "[0-9]*[0-9]" ))
37
+ prev_rundirs = [os .path .basename (d ) for d in full_prev_rundirs ]
38
+
39
+ next = max ([int (d ) for d in prev_rundirs ] + [- 1 ]) + 1
40
+
41
+ current_rundir = os .path .join (path , '{0:03}' .format (next ))
42
+
43
+ try :
44
+ os .makedirs (current_rundir )
45
+ logger .debug ("rundir created: %s" , current_rundir )
46
+ return os .path .abspath (current_rundir )
47
+ except FileExistsError :
48
+ logger .warning (f"Could not create rundir { current_rundir } on try { try_count } " )
49
+
50
+ if try_count >= max_tries :
51
+ raise
52
+ else :
53
+ logger .debug ("Backing off {}s" , backoff_time_s )
54
+ time .sleep (backoff_time_s )
55
+ backoff_time_s *= 2 + random .random ()
56
+ try_count += 1
57
+
58
+ # this should never be reached - the above loop should have either returned
59
+ # or raised an exception on the last try
60
+ raise RundirCreateError ()
0 commit comments