hparams_config.yml

pytorch:
  # Generic training settings
  max_epochs: 40
  batch_size: 32
  loader_workers: 0  # https://github.com/giacomorebecchi/food-pricing/issues/45
  lazy_dataset: False
  shuffle_train_dataset: True
  num_sanity_val_steps: 0
  accumulate_grad_batches: null
  accelerator: auto
  devices: 1
  # Size of initial and intermediate embeddings
  img_dim: 224
  embedding_dim: 300
  language_feature_dim: 512
  vision_feature_dim: 512
  # Size of the layer after the concatenation 
  fusion_output_dim: 512
  # Dropout
  dropout_p: 0.4
  # Callbacks
  verbose: True
  early_stop_patience: 40
  backup_n_epochs: 5
  # Optimization settings
  optimizer_name: adamw
  optimizer_lr: 0.00005
  optimizer_weight_decay: 0.1
  lr_scheduler_factor: 0.2
  lr_scheduler_patience: 2
  lr_scheduler_min_lr: 0.0000001
  # Encoder optimization settings
  encoder_optimizer_lr: 0.00001
  encoder_optimizer_weight_decay: 0.1
  # Storage of test predictions
  store_submission_frame: True
  # Unfreezing of encoders parameters
  n_epochs_unfreeze_language_module: 10
  n_epochs_unfreeze_vision_module: 10
  n_epochs_unfreeze_dual_module: 10

xgb:
  # Data transformation in embeddings
  load_data: True  # could be True if sure the cached arrays are correct
  loader_workers: 0  # https://github.com/giacomorebecchi/food-pricing/issues/45
  shuffle: False
  # Limit combination of hyperparameters tested
  max_iter: 1000 # intentionally very high
  # XGB trainer hyperparameters
  num_round: 100
  early_stopping_rounds: 10
  # XGB hyperparameters
  booster: gbtree
  tree_method: hist
  colsample_bytree: [0.5, 0.6, 0.7, 0.8, 0.9, 1]  # 6
  objective: reg:squarederror
  eta: [0.01, 0.05, 0.1, 0.2, 0.3]  # 5
  max_depth: [4, 5, 6, 7, 8]  # 5
  subsample: [0.8, 0.9, 1]  # 3
  # Storage of test predictions
  store_submission_frame: True