forked from mindspore-lab/mindocr
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdb_r18_synthtext.yaml
107 lines (100 loc) · 2.45 KB
/
db_r18_synthtext.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
system:
mode: 0 # 0 for graph mode, 1 for pynative mode in MindSpore
distribute: False
amp_level: 'O0'
seed: 42
log_interval: 100
val_while_train: False
drop_overflow_update: False
model:
type: det
transform: null
backbone:
name: det_resnet18
pretrained: True
neck:
name: DBFPN
out_channels: 256
bias: False
head:
name: DBHead
k: 50
bias: False
adaptive: True
loss:
name: DBLoss
eps: 1.0e-6
l1_scale: 10
bce_scale: 5
bce_replace: bceloss
scheduler:
scheduler: polynomial_decay
lr: 0.007
num_epochs: 2
decay_rate: 0.9
warmup_epochs: 0
optimizer:
opt: SGD
filter_bias_and_bn: True
momentum: 0.9
weight_decay: 5.0e-4
# only used for mixed precision training
loss_scaler:
type: dynamic
loss_scale: 512
scale_factor: 2
scale_window: 1000
train:
ckpt_save_dir: 'ckpts/dbnet/SynthText'
dataset_sink_mode: True
dataset:
type: SynthTextDataset
dataset_root: /data/ocr_datasets
data_dir: SynthText
label_file: SynthText/gt_processed.mat
sample_ratio: 1.0
transform_pipeline:
- DecodeImage:
img_mode: RGB
to_float32: False
- RandomColorAdjust:
brightness: 0.1255 # 32.0 / 255
saturation: 0.5
- RandomHorizontalFlip:
p: 0.5
- RandomRotate:
degrees: [ -10, 10 ]
expand_canvas: False
p: 1.0
- RandomScale:
scale_range: [ 0.5, 3.0 ]
p: 1.0
- RandomCropWithBBox:
max_tries: 10
min_crop_ratio: 0.1
crop_size: [ 640, 640 ]
p: 1.0
- ValidatePolygons:
- ShrinkBinaryMap:
min_text_size: 8
shrink_ratio: 0.4
- BorderMap:
shrink_ratio: 0.4
thresh_min: 0.3
thresh_max: 0.7
- NormalizeImage:
bgr_to_rgb: False
is_hwc: True
mean: imagenet
std: imagenet
- ToCHWImage:
# the order of the dataloader list, matching the network input and the input labels for the loss function, and optional data for debug/visualize
output_columns: [ 'image', 'binary_map', 'mask', 'thresh_map', 'thresh_mask' ]
# output_columns: ['image'] # for debug op performance
net_input_column_index: [0] # input indices for network forward func in output_columns
label_column_index: [1, 2, 3, 4] # input indices marked as label
loader:
shuffle: True
batch_size: 16
drop_remainder: True
num_workers: 8