configs/det/dbnet/db_r18_synthtext.yaml

system:
  mode: 0 # 0 for graph mode, 1 for pynative mode in MindSpore
  distribute: False
  amp_level: 'O0'
  seed: 42
  log_interval: 100
  val_while_train: False
  drop_overflow_update: False

model:
  type: det
  transform: null
  backbone:
    name: det_resnet18
    pretrained: True
  neck:
    name: DBFPN
    out_channels: 256
    bias: False
  head:
    name: DBHead
    k: 50
    bias: False
    adaptive: True

loss:
  name: DBLoss
  eps: 1.0e-6
  l1_scale: 10
  bce_scale: 5
  bce_replace: bceloss

scheduler:
  scheduler: polynomial_decay
  lr: 0.007
  num_epochs: 2
  decay_rate: 0.9
  warmup_epochs: 0

optimizer:
  opt: SGD
  filter_bias_and_bn: True
  momentum: 0.9
  weight_decay: 5.0e-4

# only used for mixed precision training
loss_scaler:
  type: dynamic
  loss_scale: 512
  scale_factor: 2
  scale_window: 1000

train:
  ckpt_save_dir: 'ckpts/dbnet/SynthText'
  dataset_sink_mode: True
  dataset:
    type: SynthTextDataset
    dataset_root: /data/ocr_datasets
    data_dir: SynthText
    label_file: SynthText/gt_processed.mat
    sample_ratio: 1.0
    transform_pipeline:
      - DecodeImage:
          img_mode: RGB
          to_float32: False
      - RandomColorAdjust:
          brightness: 0.1255  # 32.0 / 255
          saturation: 0.5
      - RandomHorizontalFlip:
          p: 0.5
      - RandomRotate:
          degrees: [ -10, 10 ]
          expand_canvas: False
          p: 1.0
      - RandomScale:
          scale_range: [ 0.5, 3.0 ]
          p: 1.0
      - RandomCropWithBBox:
          max_tries: 10
          min_crop_ratio: 0.1
          crop_size: [ 640, 640 ]
          p: 1.0
      - ValidatePolygons:
      - ShrinkBinaryMap:
          min_text_size: 8
          shrink_ratio: 0.4
      - BorderMap:
          shrink_ratio: 0.4
          thresh_min: 0.3
          thresh_max: 0.7
      - NormalizeImage:
          bgr_to_rgb: False
          is_hwc: True
          mean: imagenet
          std: imagenet
      - ToCHWImage:
    #  the order of the dataloader list, matching the network input and the input labels for the loss function, and optional data for debug/visualize
    output_columns: [ 'image', 'binary_map', 'mask', 'thresh_map', 'thresh_mask' ]
#    output_columns: ['image'] # for debug op performance
    net_input_column_index: [0] # input indices for network forward func in output_columns
    label_column_index: [1, 2, 3, 4] # input indices marked as label

  loader:
    shuffle: True
    batch_size: 16
    drop_remainder: True
    num_workers: 8