Skip to content

Commit bd44b50

Browse files
committed
Update job generator
1 parent 2c307c9 commit bd44b50

File tree

7 files changed

+195
-174
lines changed

7 files changed

+195
-174
lines changed

README.md

Lines changed: 0 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -35,42 +35,3 @@ Evaluate the resulting models
3535

3636
python3 test.py
3737

38-
39-
## Run experiments using Docker, Kubernetes and Google Cloud
40-
41-
Create project in Google Cloud
42-
43-
Install locally
44-
45-
Docker
46-
google-cloud-sdk
47-
kubectl
48-
49-
Create Kubernetes cluster
50-
51-
gcloud container clusters create cluster --scopes storage-full --machine-type n1-highcpu-2 --num-nodes 10 \
52-
--create-subnetwork name=my-subnet-0 \
53-
--enable-ip-alias \
54-
--enable-private-nodes \
55-
--master-ipv4-cidr 172.16.0.0/28 \
56-
--no-enable-basic-auth \
57-
--no-issue-client-certificate \
58-
--no-enable-master-authorized-networks
59-
60-
gcloud container clusters get-credentials cluster
61-
kubectl get nodes
62-
63-
Build Docker images and push to GKE
64-
65-
export PROJECT_ID="$(gcloud config get-value project -q)"
66-
docker build -t gcr.io/${PROJECT_ID}/base:15 -f Dockerfile .
67-
docker push gcr.io/${PROJECT_ID}/base
68-
69-
Generate Kubernetes jobs and start them
70-
71-
python3 microesc/jobs.py experiments/sbcnn16k30.yaml
72-
kubectl create -f data/jobs/
73-
74-
Delete jobs
75-
76-
kubectl delete jobs `kubectl get jobs -o custom-columns=:.metadata.name`

experiments/ldcnn20k60.yaml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,11 @@ fmax: 11025
55
n_fft: 1024
66
hop_length: 512
77
augmentations: 12
8+
augment: 1
89
frames: 31
910
batch: 400
1011
epochs: 50
1112
train_samples: 30000
1213
val_samples: 5000
13-
augment: 1
1414
voting: 'mean'
1515
voting_overlap: 0.5
16-
pool: '3x2'
17-
kernel: '5x5'

microesc/common.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,10 @@ def add_arguments(parser):
3030

3131
def load_experiment(folder, name):
3232
path = os.path.join(folder, name+'.yaml')
33+
return load_settings_path(path)
3334

35+
def load_settings_path(path):
3436
with open(path, 'r') as config_file:
3537
settings = yaml.load(config_file.read())
36-
38+
3739
return settings

microesc/jobs.py

Lines changed: 69 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -1,112 +1,79 @@
11

2+
import sys
23
import os.path
3-
import uuid
4+
import subprocess
45
import datetime
5-
import sys
6+
import uuid
67

7-
from . import common
8-
9-
10-
template = """
11-
apiVersion: batch/v1
12-
kind: Job
13-
metadata:
14-
name: mesc-{kind}-{name}
15-
labels:
16-
jobgroup: microesc-{kind}
17-
spec:
18-
template:
19-
metadata:
20-
name: microesc-{kind}
21-
labels:
22-
jobgroup: microesc-{kind}
23-
spec:
24-
containers:
25-
- name: jobrunner
26-
image: {image}
27-
command: {command}
28-
securityContext:
29-
privileged: true
30-
capabilities:
31-
add:
32-
- SYS_ADMIN
33-
lifecycle:
34-
postStart:
35-
exec:
36-
command: ["gcsfuse", "-o", "nonempty", "--implicit-dirs", {bucket}, {mountpoint}]
37-
preStop:
38-
exec:
39-
command: ["fusermount", "-u", {mountpoint}]
40-
resources:
41-
requests:
42-
cpu: "1.3"
43-
restartPolicy: Never
44-
"""
45-
46-
47-
def array_str(a):
48-
m = ', '.join([ '"{}"'.format(p) for p in a ])
49-
return '[ {} ]'.format(m)
50-
51-
def render_job(image, script, args, mountpoint, bucket):
52-
cmd = ["python3", "{}.py".format(script) ]
53-
54-
for k, v in args.items():
55-
cmd += [ '--{}'.format(k), str(v) ]
56-
57-
p = dict(
58-
image=image,
59-
kind=script,
60-
name=args['name'],
61-
command=array_str(cmd),
62-
bucket=bucket,
63-
mountpoint=mountpoint,
64-
)
65-
s = template.format(**p)
66-
return s
67-
68-
def generate_train_jobs(settings, jobs_dir, image, experiment, out_dir, mountpoint, bucket):
69-
70-
t = datetime.datetime.now().strftime('%Y%m%d-%H%M')
71-
u = str(uuid.uuid4())[0:4]
72-
name = "-".join([experiment, t, u])
8+
import pandas
9+
import numpy
7310

74-
folds = list(range(0, 9))
75-
76-
for fold in folds:
77-
args = {
78-
'experiment': experiment,
79-
'models': out_dir,
80-
'fold': fold,
81-
'name': name+'-fold{}'.format(fold),
82-
}
83-
84-
s = render_job(image, 'train', args, mountpoint, bucket)
85-
86-
job_filename = "train-{}.yaml".format(fold)
87-
out_path = os.path.join(jobs_dir, job_filename)
88-
with open(out_path, 'w') as out:
89-
out.write(s)
11+
from microesc import common
12+
13+
def arglist(options):
14+
args = [ "--{}={}".format(k, v) for k, v in options.items() ]
15+
return args
16+
17+
def command_for_job(options):
18+
args = [
19+
'python3', 'train.py'
20+
]
21+
args += arglist(options)
22+
return args
23+
24+
def generate_train_jobs(experiments, settings_path, folds, overrides):
25+
26+
timestamp = datetime.datetime.now().strftime('%Y%m%d-%H%M')
27+
unique = str(uuid.uuid4())[0:4]
28+
def name(experiment, fold):
29+
name = "-".join([experiment, timestamp, unique])
30+
return name+'-fold{}'.format(fold)
31+
32+
def job(exname, experiment):
33+
34+
for fold in folds:
35+
n = name(exname, fold)
36+
37+
options = {
38+
'name': n,
39+
'fold': fold,
40+
'settings': settings_path,
41+
}
42+
for k, v in experiment.items():
43+
# overrides per experiment
44+
options[k] = v
45+
46+
for k, v in overrides.items():
47+
options[k] = v
48+
49+
cmd = command_for_job(options)
50+
return cmd
51+
52+
# FIXME: better job name
53+
jobs = [ job(str(idx), ex) for idx, ex in experiments.iterrows() ]
54+
return jobs
9055

9156
def parse(args):
9257

9358
import argparse
9459

9560
parser = argparse.ArgumentParser(description='Generate jobs')
9661

97-
common.add_arguments(parser)
62+
#common.add_arguments(parser)
9863

9964
a = parser.add_argument
10065

66+
a('--models', default='models.csv',
67+
help='%(default)s')
68+
a('--settings', default='experiments/ldcnn20k60.yaml',
69+
help='%(default)s')
70+
10171

10272
a('--jobs', dest='jobs_dir', default='./data/jobs',
10373
help='%(default)s')
10474

105-
a('--bucket', type=str, default='jonnor-micro-esc',
106-
help='GCS bucket to write to. Default: %(default)s')
107-
108-
a('--image', type=str, default='gcr.io/masterthesis-231919/base:21',
109-
help='Docker image to use')
75+
a('--check', action='store_true',
76+
help='Only run a pre-flight check')
11077

11178
parsed = parser.parse_args(args)
11279

@@ -115,16 +82,19 @@ def parse(args):
11582
def main():
11683
args = parse(sys.argv[1:])
11784

118-
mountpoint = '/mnt/bucket'
119-
storage_dir = mountpoint+'/models'
85+
models = pandas.read_csv(args.models)
86+
settings = common.load_settings_path(args.settings)
12087

121-
name = args.experiment
122-
settings = common.load_experiment(args.experiments_dir, name)
123-
124-
out = os.path.join(args.jobs_dir, name)
125-
common.ensure_directories(out)
88+
overrides = {}
89+
folds = list(range(0, 9))
90+
if args.check:
91+
folds = (1,)
92+
overrides['train_samples'] = settings['batch']*1
93+
overrides['val_samples'] = settings['batch']*1
12694

127-
generate_train_jobs(settings, out, args.image, name, storage_dir, mountpoint, args.bucket)
128-
print('wrote to', out)
95+
cmds = generate_train_jobs(models, args.settings, folds, overrides)
12996

97+
print('\n'.join(" ".join(cmd) for cmd in cmds))
13098

99+
if __name__ == '__main__':
100+
main()

models.csv

Lines changed: 6 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,7 @@
1-
id,nick,conv_block,kernel_size,downsample,downsample_type,filters, (ram_use,flash_use,maccs) (val_acc_avg, val_acc_std, test_acc_avg, test_acc_std) (inference_time)
2-
0,SB-CNN,conv,5x5,3x2,maxpool,24
3-
1,Stride,conv,5x5,2x2,stride,
4-
2,DepthwiseSep,dw,5x5,2x2,stride,
5-
3,MobileNet,dw_pw,5x5,2x2,stride,
6-
4,MobileNetV2,pw_dw_pw,5x5,2x2,stride,
7-
8-
5x9x
9-
10-
# SpatiallySeparable. pw_sdw_pw
11-
12-
Find out effect of better convolutional blocks on accuracy vs inference time.
13-
(and striding)
14-
(wide versus deep)
15-
(different voting overlaps)
16-
17-
Stride in Keras/Tensorflow must be uniform.
18-
19-
first all with 5x5 kernel, 2 intermediate blocks.
20-
Then can try 3x3 kernel, 3 intermediate blocks
21-
22-
Use same learning rate for all.
23-
24-
Adjust number of convolutions to make MACC approximately equal within groups.
25-
Ref Google paper keyword spotting. tstride/fstride?
26-
27-
Should have a preflight check. Runs all models, in parallell, 1 epoch, 1 fold, 1/10 the samples.
28-
29-
Then can test reporting tools based on that.
30-
31-
Plot training curves together.
32-
1+
conv_block,conv_size,downsample_size,downsample_type,filters
2+
conv,5x5,3x2,maxpool,24
3+
conv,5x5,2x2,stride,24
4+
dw,5x5,2x2,stride,24
5+
dw_pw,5x5,2x2,stride,24
6+
pw_dw_pw,5x5,2x2,stride,24
337

report/report.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -715,6 +715,22 @@ The SB-CNN model was used as a base, with 30 mels bands. ST FP-SENSING1 function
715715
# Methods
716716

717717

718+
## Blabla
719+
<!---
720+
Find out effect of better convolutional blocks on accuracy vs inference time.
721+
(and striding)
722+
(wide versus deep)
723+
(different voting overlaps)
724+
-->
725+
726+
Stride in Keras/Tensorflow must be uniform.
727+
728+
first all with 5x5 kernel, 2 intermediate blocks.
729+
Then can try 3x3 kernel, 3 intermediate blocks
730+
731+
Adjust number of convolutions to make MACC approximately equal within groups.
732+
Ref Google paper keyword spotting. tstride/fstride?
733+
718734

719735

720736
## Model pipeline

0 commit comments

Comments
 (0)