configs/rec/rare/rare_resnet34.yaml

system:
  mode: 0 # 0 for graph mode, 1 for pynative mode in MindSpore
  distribute: True
  amp_level: "O2"
  seed: 42
  log_interval: 100
  val_while_train: True
  drop_overflow_update: False

common:
  character_dict_path: &character_dict_path
  num_classes: &num_classes 38 # num_chars_in_dict + 2
  max_text_len: &max_text_len 25
  infer_mode: &infer_mode False
  use_space_char: &use_space_char False
  batch_size: &batch_size 512

model:
  type: rec
  transform: null
  backbone:
    name: rec_resnet34
    pretrained: False
  neck:
    name: RNNEncoder
    hidden_size: 256
  head:
    name: AttentionHead
    hidden_size: 256
    out_channels: *num_classes
    batch_max_length: *max_text_len

postprocess:
  name: RecAttnLabelDecode
  character_dict_path: *character_dict_path
  use_space_char: *use_space_char

metric:
  name: RecMetric
  main_indicator: acc
  character_dict_path: *character_dict_path
  ignore_space: True
  print_flag: False

loss:
  name: AttentionLoss

scheduler:
  scheduler: warmup_cosine_decay
  min_lr: 0.0
  lr: 0.0005
  num_epochs: 30
  warmup_epochs: 1
  decay_epochs: 29

optimizer:
  opt: adamw
  filter_bias_and_bn: True
  weight_decay: 0.05

loss_scaler:
  type: dynamic
  loss_scale: 512
  scale_factor: 2.0
  scale_window: 1000

train:
  ckpt_save_dir: "./tmp_rec"
  dataset_sink_mode: False
  dataset:
    type: LMDBDataset
    dataset_root: path/to/data_lmdb_release/
    data_dir: training/
    label_file: null
    sample_ratio: 1.0
    shuffle: True
    transform_pipeline:
      - DecodeImage:
          img_mode: BGR
          to_float32: False
      - RecAttnLabelEncode:
          max_text_len: *max_text_len
          character_dict_path: *character_dict_path
          use_space_char: *use_space_char
          lower: True
      - RecResizeImg: # different from paddle (paddle converts image from HWC to CHW and rescale to [-1, 1] after resize.
          image_shape: [32, 100] # H, W
          infer_mode: *infer_mode
          character_dict_path: *character_dict_path
          padding: False # aspect ratio will be preserved if true.
      - NormalizeImage: # different from paddle (paddle wrongly normalize BGR image with RGB mean/std from ImageNet for det, and simple rescale to [-1, 1] in rec.
          bgr_to_rgb: True
          is_hwc: True
          mean: [127.0, 127.0, 127.0]
          std: [127.0, 127.0, 127.0]
      - ToCHWImage:
    output_columns: ["image", "text_seq"]
    net_input_column_index: [0, 1] # input indices for network forward func in output_columns
    label_column_index: [1] # input indices marked as label

  loader:
    shuffle: True # TODO: tbc
    batch_size: *batch_size
    drop_remainder: True
    max_rowsize: 12
    num_workers: 1

eval:
  ckpt_load_path: "./tmp_rec/best.ckpt"
  dataset_sink_mode: False
  dataset:
    type: LMDBDataset
    dataset_root: path/to/data_lmdb_release/
    data_dir: validation/
    label_file: null
    sample_ratio: 1.0
    shuffle: False
    transform_pipeline:
      - DecodeImage:
          img_mode: RGB
          to_float32: False
      - RecAttnLabelEncode:
          max_text_len: *max_text_len
          character_dict_path: *character_dict_path
          use_space_char: *use_space_char
          lower: True
      - RecResizeNormForInfer:
          target_height: 32
          target_width: 100
          keep_ratio: False
          padding: False
          norm_before_pad: False
      - ToCHWImage:
    #  the order of the dataloader list, matching the network input and the input labels for the loss function, and optional data for debug/visaulize
    output_columns: ["image", "text_padded", "text_length"]
    net_input_column_index: [0] # input indices for network forward func in output_columns
    label_column_index: [1, 2] # input indices marked as label

  loader:
    shuffle: False
    batch_size: 512
    drop_remainder: False
    max_rowsize: 12
    num_workers: 1