Skip to content

Commit

Permalink
[Feature] Add extra dataloader settings in configs. (#264)
Browse files Browse the repository at this point in the history
* [Feature] support to set validation samples per gpu independently

* set default to be cfg.data.samples_per_gpu

* modify the tools/test.py

* using 'train_dataloader', 'val_dataloader', 'test_dataloader' for specific settings

* test 'evaluation' branch
  • Loading branch information
fangyixiao18 authored Apr 13, 2022
1 parent cad039b commit b859828
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 29 deletions.
58 changes: 34 additions & 24 deletions mmselfsup/apis/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,21 +90,30 @@ def train_model(model,
f'{cfg.data.imgs_per_gpu} in this experiments')
cfg.data.samples_per_gpu = cfg.data.imgs_per_gpu

data_loaders = [
build_dataloader(
ds,
samples_per_gpu=cfg.data.samples_per_gpu,
workers_per_gpu=cfg.data.workers_per_gpu,
# `num_gpus` will be ignored if distributed
num_gpus=len(cfg.gpu_ids),
dist=distributed,
replace=getattr(cfg.data, 'sampling_replace', False),
seed=cfg.seed,
drop_last=getattr(cfg.data, 'drop_last', False),
prefetch=cfg.prefetch,
persistent_workers=cfg.persistent_workers,
img_norm_cfg=cfg.img_norm_cfg) for ds in dataset
]
# The default loader config
loader_cfg = dict(
# cfg.gpus will be ignored if distributed
num_gpus=len(cfg.gpu_ids),
dist=distributed,
replace=getattr(cfg.data, 'sampling_replace', False),
drop_last=getattr(cfg.data, 'drop_last', False),
prefetch=getattr(cfg, 'prefetch', False),
seed=cfg.get('seed'),
persistent_workers=cfg.persistent_workers,
img_norm_cfg=cfg.img_norm_cfg)

# The overall dataloader settings
loader_cfg.update({
k: v
for k, v in cfg.data.items() if k not in [
'train', 'val', 'test', 'train_dataloader', 'val_dataloader',
'test_dataloader'
]
})
# The specific train dataloader settings
train_loader_cfg = {**loader_cfg, **cfg.data.get('train_dataloader', {})}

data_loaders = [build_dataloader(ds, **train_loader_cfg) for ds in dataset]

# put model on gpus
if distributed:
Expand Down Expand Up @@ -173,15 +182,16 @@ def train_model(model,
# register evaluation hook
if cfg.get('evaluation', None):
val_dataset = build_dataset(cfg.data.val)
val_dataloader = build_dataloader(
val_dataset,
samples_per_gpu=cfg.data.samples_per_gpu,
workers_per_gpu=cfg.data.workers_per_gpu,
dist=distributed,
shuffle=False,
prefetch=cfg.data.val.prefetch,
drop_last=getattr(cfg.data, 'drop_last', False),
img_norm_cfg=cfg.get('img_norm_cfg', dict()))

# The specific validation dataloader settings
val_loader_cfg = {
**loader_cfg,
'shuffle': False, # Not shuffle by default
'drop_last': False,
**cfg.data.get('val_dataloader', {}),
}
val_dataloader = build_dataloader(val_dataset, **val_loader_cfg)

eval_cfg = cfg.get('evaluation', {})
eval_cfg['by_epoch'] = cfg.runner['type'] != 'IterBasedRunner'
eval_hook = DistEvalHook if distributed else EvalHook
Expand Down
6 changes: 6 additions & 0 deletions tests/test_apis/test_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ def test_train_model():
cfg.data.samples_per_gpu = 1
cfg.data.workers_per_gpu = 2

cfg.data.val.data_source.data_prefix = 'tests/data/'
cfg.data.val.data_source.ann_file = 'tests/data/data_list.txt'

# Specify the optimizer
cfg.optimizer = dict(
type='SGD', lr=0.005, momentum=0.9, weight_decay=0.0001)
Expand Down Expand Up @@ -83,6 +86,9 @@ def test_train_model():
# Build the dataset
datasets = [ExampleDataset()]

# evaluation
cfg.evaluation = dict(interval=10, topk=(1, 5))

# Start pre-train
train_model(
model,
Expand Down
28 changes: 23 additions & 5 deletions tools/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,12 +114,30 @@ def main():
'Automatically set "samples_per_gpu"="imgs_per_gpu"='
f'{cfg.data.imgs_per_gpu} in this experiments')
cfg.data.samples_per_gpu = cfg.data.imgs_per_gpu
data_loader = build_dataloader(
dataset,
samples_per_gpu=cfg.data.samples_per_gpu,
workers_per_gpu=cfg.data.workers_per_gpu,

# The default loader config
loader_cfg = dict(
# cfg.gpus will be ignored if distributed
num_gpus=len(cfg.gpu_ids),
dist=distributed,
shuffle=False)
prefetch=getattr(cfg, 'prefetch', False),
img_norm_cfg=cfg.img_norm_cfg)

# The overall dataloader settings
loader_cfg.update({
k: v
for k, v in cfg.data.items() if k not in [
'train', 'val', 'test', 'train_dataloader', 'val_dataloader',
'test_dataloader'
]
})
# The specific train dataloader settings
test_loader_cfg = {
**loader_cfg,
'shuffle': False, # Not shuffle by default
**cfg.data.get('test_dataloader', {}),
}
data_loader = build_dataloader(dataset, **test_loader_cfg)

# build the model and load checkpoint
model = build_algorithm(cfg.model)
Expand Down

0 comments on commit b859828

Please sign in to comment.