configs/main.yaml

defaults:
  - _self_
  - model: parseq
  - charset: 94_full
  - dataset: real

model:
  _convert_: all
  img_size: [ 32, 128 ]  # [ height, width ]
  max_label_length: 25
  # The ordering in charset_train matters. It determines the token IDs assigned to each character.
  charset_train: ???
  # For charset_test, ordering doesn't matter.
  charset_test: "0123456789abcdefghijklmnopqrstuvwxyz"
  batch_size: 384
  weight_decay: 0.0
  warmup_pct: 0.075  # equivalent to 1.5 epochs of warm up

data:
  _target_: strhub.data.module.SceneTextDataModule
  root_dir: data
  train_dir: ???
  batch_size: ${model.batch_size}
  img_size: ${model.img_size}
  charset_train: ${model.charset_train}
  charset_test: ${model.charset_test}
  max_label_length: ${model.max_label_length}
  remove_whitespace: true
  normalize_unicode: true
  augment: true
  num_workers: 2

trainer:
  _target_: pytorch_lightning.Trainer
  _convert_: all
  val_check_interval: 1000
  #max_steps: 169680  # 20 epochs x 8484 steps (for batch size = 384, real data)
  max_epochs: 20
  gradient_clip_val: 20
  accelerator: gpu
  devices: 2

ckpt_path: null
pretrained: null

hydra:
  output_subdir: config
  run:
    dir: outputs/${model.name}/${now:%Y-%m-%d}_${now:%H-%M-%S}
  sweep:
    dir: multirun/${model.name}/${now:%Y-%m-%d}_${now:%H-%M-%S}
    subdir: ${hydra.job.override_dirname}