forked from graphcore/examples
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathoptions.py
277 lines (246 loc) · 15.9 KB
/
options.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
# Copyright (c) 2021 Graphcore Ltd. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import logging
import os
import sys
from tensorflow.python import ipu
from fastspeech2 import FastSpeech2Config
def _str_to_bool(value):
if isinstance(value, bool) or value is None:
return value
if value.lower() in {'false', 'f', '0', 'no', 'n'}:
return False
elif value.lower() in {'true', 't', '1', 'yes', 'y'}:
return True
raise argparse.ArgumentTypeError(f'{value} is not a valid boolean value')
def add_common_arguments(parser, required=True):
group = parser.add_argument_group('Common options')
# Training options
group.add_argument('--train', action='store_true',
help="Whether to run training loop or not. Default is True.")
group.add_argument('--eval', action='store_true',
help="""
Whether to run evaluation loop or not. Default is False.
If both `--train` and `--eval` are set, then the evaluation will be done after trainning.
""")
group.add_argument('--batch-size', type=int,
help="Set batch-size for training graph")
group.add_argument('--base-learning-rate', type=float, default=2e-5,
help="Base learning rate exponent (2**N). blr = lr / bs")
group.add_argument('--loss-scaling', type=float, default=1,
help="Loss scaling factor.")
group.add_argument('--optimizer', type=str, default="adamw", choices=['adam', 'adamw'],
help="Optimizer")
group.add_argument('--beta1', type=float, default=0.9,
help="Adam/AdamW beta1 coefficient.")
group.add_argument('--beta2', type=float, default=0.999,
help="Adam/AdamW beta2 coefficient.")
group.add_argument('--weight-decay-rate', type=float, default=0.0,
help="Weight decay to use during optimisation.")
group.add_argument('--decay-steps', type=int, default=1000,
help="Learning rate schedule decay steps.")
group.add_argument('--decay-date', type=float, default=0.9,
help="Learning rate schedule decay rate. Only for Exponential.")
group.add_argument('--epsilon', type=float, default=1e-4,
help="Optimiser epsilon value.")
group.add_argument('--lr-schedule', default='exponential', choices=["exponential", "cosine", "polynomial"],
help="Learning rate schedule function.")
group.add_argument('--warmup', default=0.1, type=float,
help="Learning rate schedule warm-up period, in epochs (float) or number of steps (integer).")
group.add_argument('--seed', default=None,
help="Seed for randomizing training")
group.add_argument('--wandb', type=_str_to_bool, default=False,
help="Enable logging and experiment tracking with Weights & Biases.")
group.add_argument('--wandb-name', type=str, default=None,
help="Override Weights&Biases run name.")
group.add_argument('--init-checkpoint', type=str, default=None,
help='Initialise a new training session from this checkpoint.')
group.add_argument('--epochs', default=None, type=int,
help='Number of epochs we want to let the training last.')
group.add_argument('--epochs-per-save', default=5, type=int,
help='Number of epochs to save model.')
group.add_argument('--steps-per-epoch', default=None, type=int,
help='Specifies the total number of steps to be performed per epoch.')
# FastSpeech2 options
group.add_argument('--vocab-size', type=int,
help="""Vocabulary size of `FastSpeech2`.""")
# Encoder
group.add_argument('--encoder-hidden-size', type=int,
help="""Size ofthe encoder layers and the pooler layer.""")
group.add_argument('--encoder-num-hidden-layers', type=int,
help="""Number of hidden layers in the encoder.""")
group.add_argument('--encoder-num-attention-heads', type=int,
help="""Number of attention heads for each attention layer in the encoder.""")
group.add_argument('--encoder-intermediate-size', type=int,
help="""The size of the "intermediate" (i.e., feed-forward) layer in the encoder.""")
group.add_argument('--encoder-intermediate-kernel-size', type=int,
help="""The convolution kernel size of the "intermediate" (i.e., feed-forward) layer in the encoder.""")
group.add_argument('--encoder-hidden-act', type=int,
help="""The non-linear activation function (function or string) in the encoder and pooler.""")
group.add_argument('--output-attentions', type=_str_to_bool, default=False,
help="Whether to output the attentions.")
group.add_argument('--output-hidden-states', type=_str_to_bool, default=False,
help="Whether to output the hidden states.")
group.add_argument('--layer-norm-eps', type=float, default=1e-4,
help="Layer Normalization epsilon value.")
# Decoder
group.add_argument('--decoder-hidden-size', type=int,
help="""Size ofthe decoder layers and the pooler layer.""")
group.add_argument('--decoder-num-hidden-layers', type=int,
help="""Number of hidden layers in the decoder.""")
group.add_argument('--decoder-num-attention-heads', type=int,
help="""Number of attention heads for each attention layer in the decoder.""")
group.add_argument('--decoder-intermediate-size', type=int,
help="""The size of the "intermediate" (i.e., feed-forward) layer in the decoder.""")
group.add_argument('--decoder-intermediate-kernel-size', type=int,
help="""The convolution kernel size of the "intermediate" (i.e., feed-forward) layer in the decoder.""")
group.add_argument('--decoder-hidden-act', type=int,
help="""The non-linear activation function (function or string) in the decoder and pooler.""")
# Duration predictor(Only for FastSpeech)
group.add_argument('--duration-predictor-num-conv-layers', type=int, default=2,
help="""Number of convolution layers in the duration predictor.""")
group.add_argument('--duration-predictor-kernel-size', type=int, default=3,
help="""The convolution kernel size of the duration predictor.""")
group.add_argument('--duration-predictor-dropout-probs', type=int, default=0.5,
help="""The dropout probability for duration predictor.""")
group.add_argument('--duration-predictor-filters', type=int, default=256,
help="""Number of filters in the duration predictor.""")
# Variance predictor and Postnet
group.add_argument('--variant-predictor-num-conv-layers', type=int, default=2,
help="""Number of convolution layers in the variant predictor.""")
group.add_argument('--variant-predictor-kernel-size', type=int, default=3,
help="""The convolution kernel size of the variant predictor.""")
group.add_argument('--variant-predictor-dropout-rate', type=int, default=0.5,
help="""The dropout probability for variant predictor.""")
group.add_argument('--variant-predictor-filter', type=int, default=256,
help="""Number of filters in the variant predictor.""")
group.add_argument('--use-postnet', type=_str_to_bool, default=True,
help="Whether to use postnet after decoder.")
group.add_argument('--postnet-num-conv-layers', type=int,
help="""Number of convolution layers in the postnet.""")
group.add_argument('--postnet-conv-kernel-size', type=int,
help="""The convolution kernel size of the postnet.""")
group.add_argument('--postnet-dropout-rate', type=int,
help="""The dropout probability for postnet.""")
group.add_argument('--postnet-conv-filters', type=int,
help="""Number of convolution filters in the postnet.""")
group.add_argument('--hidden-dropout-prob', type=float, default=0.2,
help="""The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.""")
group.add_argument('--attention-probs-dropout-prob', type=float, default=0.1,
help="""The dropout ratio for the attention probabilities.""")
group.add_argument('--max-position-embeddings', type=int,
help="""The maximum sequence length that this model might ever be used with. Typically set this to something large just in case (e.g., 512 or 1024 or 2048).""")
group.add_argument('--initializer-range', type=float, default=0.02,
help="""The stdev of the truncated-normal-initializer for initializing all weight matrices.""")
group.add_argument('--max-seq-length', type=int, default=135,
help='The maximuim sequence length.')
group.add_argument('--max-wave-length', type=int, default=1000,
help='The maximuim wave length.')
group.add_argument('--num-mels', type=int, default=80,
help='The number of mel-spectrograms dimensions.')
# IPU options
pipeline_schedule_options = [
_.name for _ in ipu.pipelining_ops.PipelineSchedule]
schedulers_available = ['Clustering',
'PostOrder', 'LookAhead', 'ShortestPath']
recomputation_modes_available = [
p.name for p in ipu.pipelining_ops.RecomputationMode]
group.add_argument('--gradient-accumulation-count', type=int, default=None,
help="Number of gradients to accumulate in the pipeline. Must also set --shards > 1.")
group.add_argument('--pipeline-schedule', type=str, default="Interleaved", choices=pipeline_schedule_options,
help="Pipelining scheduler.")
group.add_argument('--replicas', type=int, default=1,
help="Replicate graph over N workers to increase batch to batch-size*N")
group.add_argument('--precision', type=str, default="16", choices=["16", "32"],
help="Precision of Ops(weights/activations/gradients) data types: 16, 32.")
group.add_argument('--available-memory-proportion', type=str, default=0.23,
help="Proportion of IPU memory available to matmul operations. A list can be used to specify the value for each IPU.")
group.add_argument('--variable-offloading', type=_str_to_bool, default=True,
help="Enable offloading of training variables into remote memory.")
group.add_argument('--stochastic-rounding', type=_str_to_bool, default=True,
help="Enable stochastic rounding. Set to False when run evaluation.")
group.add_argument("--xla-recompute", default=True, action="store_true",
help="Recompute activations during backward pass")
group.add_argument('--fp-exceptions', default=False, action="store_true",
help="Enable floating-point exeptions.")
group.add_argument('--partials-type', type=str, default="half", choices=["half", "float"],
help="Floating-point precision of data in matmul and convolution operations..")
group.add_argument('--scheduler', type=str, default='Clustering', choices=schedulers_available,
help="Forces the compiler to use a specific scheduler when ordering the instructions.")
group.add_argument('--recomputation-mode', type=str,
default="RecomputeAndBackpropagateInterleaved", choices=recomputation_modes_available)
group.add_argument('--reduction-type', type=str, choices=['sum', 'mean'], default='mean',
help='The reduction type applied to the pipeline, the choice is between summation and mean.')
group.add_argument('--weight-norm-clip', type=float, default=0.,
help='The value from which we want to clip the w_norm value, value of 0 is no weight clipping.')
# Dataset options
group.add_argument('--data-path', type=str, required=False,
help="path to training/validation dataset tfrecord file.")
group.add_argument('--parallell-io-threads', type=int, default=4,
help="Number of cpu threads used to do data prefetch.")
group.add_argument('--generated-data', action="store_true", default=False,
help="Generates synthetic-data on the host and then use it for training.")
# Add logging-specific arguments
group_log = parser.add_argument_group('Logging')
group_log.add_argument('--log-dir', type=str,
help="Log and weights save directory")
group_log.add_argument('--steps-per-logs', type=int, default=1,
help="Logs per epoch (if number of epochs specified)")
return parser
def create_command_line_parser():
parser = argparse.ArgumentParser(description="""FastSpeech2 on Graphcore's IPU""",
add_help=False,
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
group = parser.add_argument_group("""Main options""")
group.add_argument('--help', action='store_true',
default=False, help="Display help.")
group.add_argument('--config', type=str,
help='FastSpeech2 configuration file in JSON format.')
return parser
def create_all_options_parser():
parser = create_command_line_parser()
parser = add_common_arguments(parser)
return parser
def make_global_options(task_specific_parsers=[]):
# Parse command-line arguments
command_line_parser = create_command_line_parser()
all_options_parser = create_all_options_parser()
for task_parser in task_specific_parsers:
all_options_parser = task_parser(all_options_parser)
known_command_line_args, unknown_command_line_args = command_line_parser.parse_known_args()
if known_command_line_args.help or known_command_line_args.config is None:
all_options_parser.print_help()
sys.exit(os.EX_OK)
# Parse options specified in the configuration file into
config_file_path = known_command_line_args.config
opts_from_config_file = FastSpeech2Config.from_json_file(config_file_path)
# Build the global options structure from the default options
current_options = vars(all_options_parser.parse_args())
unknown_options = [opt for opt in opts_from_config_file.keys(
) if opt not in current_options.keys()]
if unknown_options:
logging.error(f"Unonwn options: {unknown_options}")
sys.exit(os.EX_USAGE)
# Overwrite global options by those specified in the config file.
current_options.update(opts_from_config_file)
options_namespace = argparse.Namespace(**current_options)
# Overwrite with command-line arguments
all_options_namespace = all_options_parser.parse_args(
unknown_command_line_args, options_namespace)
logging.info(
f"Overwrite configuration parameters: {', '.join(unknown_command_line_args)}")
# argparse.Namespace -> dict()
opts = vars(all_options_namespace)
return opts