11
2+ import sys
23import os .path
3- import uuid
4+ import subprocess
45import datetime
5- import sys
6+ import uuid
67
7- from . import common
8-
9-
10- template = """
11- apiVersion: batch/v1
12- kind: Job
13- metadata:
14- name: mesc-{kind}-{name}
15- labels:
16- jobgroup: microesc-{kind}
17- spec:
18- template:
19- metadata:
20- name: microesc-{kind}
21- labels:
22- jobgroup: microesc-{kind}
23- spec:
24- containers:
25- - name: jobrunner
26- image: {image}
27- command: {command}
28- securityContext:
29- privileged: true
30- capabilities:
31- add:
32- - SYS_ADMIN
33- lifecycle:
34- postStart:
35- exec:
36- command: ["gcsfuse", "-o", "nonempty", "--implicit-dirs", {bucket}, {mountpoint}]
37- preStop:
38- exec:
39- command: ["fusermount", "-u", {mountpoint}]
40- resources:
41- requests:
42- cpu: "1.3"
43- restartPolicy: Never
44- """
45-
46-
47- def array_str (a ):
48- m = ', ' .join ([ '"{}"' .format (p ) for p in a ])
49- return '[ {} ]' .format (m )
50-
51- def render_job (image , script , args , mountpoint , bucket ):
52- cmd = ["python3" , "{}.py" .format (script ) ]
53-
54- for k , v in args .items ():
55- cmd += [ '--{}' .format (k ), str (v ) ]
56-
57- p = dict (
58- image = image ,
59- kind = script ,
60- name = args ['name' ],
61- command = array_str (cmd ),
62- bucket = bucket ,
63- mountpoint = mountpoint ,
64- )
65- s = template .format (** p )
66- return s
67-
68- def generate_train_jobs (settings , jobs_dir , image , experiment , out_dir , mountpoint , bucket ):
69-
70- t = datetime .datetime .now ().strftime ('%Y%m%d-%H%M' )
71- u = str (uuid .uuid4 ())[0 :4 ]
72- name = "-" .join ([experiment , t , u ])
8+ import pandas
9+ import numpy
7310
74- folds = list (range (0 , 9 ))
75-
76- for fold in folds :
77- args = {
78- 'experiment' : experiment ,
79- 'models' : out_dir ,
80- 'fold' : fold ,
81- 'name' : name + '-fold{}' .format (fold ),
82- }
83-
84- s = render_job (image , 'train' , args , mountpoint , bucket )
85-
86- job_filename = "train-{}.yaml" .format (fold )
87- out_path = os .path .join (jobs_dir , job_filename )
88- with open (out_path , 'w' ) as out :
89- out .write (s )
11+ from microesc import common
12+
13+ def arglist (options ):
14+ args = [ "--{}={}" .format (k , v ) for k , v in options .items () ]
15+ return args
16+
17+ def command_for_job (options ):
18+ args = [
19+ 'python3' , 'train.py'
20+ ]
21+ args += arglist (options )
22+ return args
23+
24+ def generate_train_jobs (experiments , settings_path , folds , overrides ):
25+
26+ timestamp = datetime .datetime .now ().strftime ('%Y%m%d-%H%M' )
27+ unique = str (uuid .uuid4 ())[0 :4 ]
28+ def name (experiment , fold ):
29+ name = "-" .join ([experiment , timestamp , unique ])
30+ return name + '-fold{}' .format (fold )
31+
32+ def job (exname , experiment ):
33+
34+ for fold in folds :
35+ n = name (exname , fold )
36+
37+ options = {
38+ 'name' : n ,
39+ 'fold' : fold ,
40+ 'settings' : settings_path ,
41+ }
42+ for k , v in experiment .items ():
43+ # overrides per experiment
44+ options [k ] = v
45+
46+ for k , v in overrides .items ():
47+ options [k ] = v
48+
49+ cmd = command_for_job (options )
50+ return cmd
51+
52+ # FIXME: better job name
53+ jobs = [ job (str (idx ), ex ) for idx , ex in experiments .iterrows () ]
54+ return jobs
9055
9156def parse (args ):
9257
9358 import argparse
9459
9560 parser = argparse .ArgumentParser (description = 'Generate jobs' )
9661
97- common .add_arguments (parser )
62+ # common.add_arguments(parser)
9863
9964 a = parser .add_argument
10065
66+ a ('--models' , default = 'models.csv' ,
67+ help = '%(default)s' )
68+ a ('--settings' , default = 'experiments/ldcnn20k60.yaml' ,
69+ help = '%(default)s' )
70+
10171
10272 a ('--jobs' , dest = 'jobs_dir' , default = './data/jobs' ,
10373 help = '%(default)s' )
10474
105- a ('--bucket' , type = str , default = 'jonnor-micro-esc' ,
106- help = 'GCS bucket to write to. Default: %(default)s' )
107-
108- a ('--image' , type = str , default = 'gcr.io/masterthesis-231919/base:21' ,
109- help = 'Docker image to use' )
75+ a ('--check' , action = 'store_true' ,
76+ help = 'Only run a pre-flight check' )
11077
11178 parsed = parser .parse_args (args )
11279
@@ -115,16 +82,19 @@ def parse(args):
11582def main ():
11683 args = parse (sys .argv [1 :])
11784
118- mountpoint = '/mnt/bucket'
119- storage_dir = mountpoint + '/models'
85+ models = pandas . read_csv ( args . models )
86+ settings = common . load_settings_path ( args . settings )
12087
121- name = args .experiment
122- settings = common .load_experiment (args .experiments_dir , name )
123-
124- out = os .path .join (args .jobs_dir , name )
125- common .ensure_directories (out )
88+ overrides = {}
89+ folds = list (range (0 , 9 ))
90+ if args .check :
91+ folds = (1 ,)
92+ overrides ['train_samples' ] = settings ['batch' ]* 1
93+ overrides ['val_samples' ] = settings ['batch' ]* 1
12694
127- generate_train_jobs (settings , out , args .image , name , storage_dir , mountpoint , args .bucket )
128- print ('wrote to' , out )
95+ cmds = generate_train_jobs (models , args .settings , folds , overrides )
12996
97+ print ('\n ' .join (" " .join (cmd ) for cmd in cmds ))
13098
99+ if __name__ == '__main__' :
100+ main ()
0 commit comments