forked from dqshuai/MetaFormer
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtraining_setup.txt
115 lines (85 loc) · 4.81 KB
/
training_setup.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
-=-= Google Cloud Compute Engine Specs and OS Image =-=-
Python 3.7.12
NVIDIA CUDA 11.3
NCCL 2.14.3
PyTorch 1.13.1+ CUDA 11.7
OS: Debian 10
CPU: Intel(R) Xeon(R) CPU @ 2.00GHz (4 vCores)
RAM: 15GB
GPU: NVIDIA Tesla T4, 16GB Memory
Storage: 300GB Balanced Disk (SSD)
-=-= GCP Compute Engine VM Instance Setup =-=-
nvidia-smi
nvcc -V
python3 --version
python3
import torch; torch.__version__; torch.cuda.is_available(); torch.cuda.nccl.version(); torch.cuda.device_count(); torch.cuda.current_device(); torch.cuda.device(0); torch.cuda.get_device_name(0);
git clone https://github.com/orumetsu/MetaFormerBSL.git
cd ./MetaFormerBSL
mkdir ./output
mkdir ./datasets
mkdir ./datasets/inaturalist2018
mkdir ./pretrained_model
cd ./pretrained_model
pip3 install gdown
gdown 11gCk_IuSN7krdkOUSWSM4xlf8GGknmxc
cd ../datasets/inaturalist2018
gdown 1GDU4Am1yAtkujl_c3MZnjTV-B_Z8RUF8
wget --no-check-certificate https://ml-inat-competition-datasets.s3.amazonaws.com/2018/train_val2018.tar.gz
md5sum train_val2018.tar.gz ---> b1c6952ce38f31868cc50ea72d066cc3
wget --no-check-certificate https://ml-inat-competition-datasets.s3.amazonaws.com/2018/train2018.json.tar.gz
md5sum train2018.json.tar.gz ---> bfa29d89d629cbf04d826a720c0a68b0
wget --no-check-certificate https://ml-inat-competition-datasets.s3.amazonaws.com/2018/val2018.json.tar.gz
md5sum val2018.json.tar.gz ---> f2ed8bfe3e9901cdefceb4e53cd3775d
wget --no-check-certificate https://ml-inat-competition-datasets.s3.amazonaws.com/2018/inat2018_locations.zip
md5sum inat2018_locations.zip ---> 1704763abc47b75820aa5a3d93c6c0f3
wget --no-check-certificate https://ml-inat-competition-datasets.s3.amazonaws.com/2018/categories.json.tar.gz
tar -xf categories.json.tar.gz
tar -xf train2018.json.tar.gz
tar -xf val2018.json.tar.gz
tar -xf train_val2018.tar.gz
sudo apt-get install unzip
unzip inat2018_locations.zip
rm categories.json.tar.gz inat2018_locations.zip train2018.json.tar.gz train_val2018.tar.gz val2018.json.tar.gz
mv ./inat2018_locations/train2018_locations.json ./train2018_locations.json
mv ./inat2018_locations/val2018_locations.json ./val2018_locations.json
rmdir ./inat2018_locations
cd ../..
pip3 install timm==0.4.5
pip3 install opencv-python==4.7.0.72
pip3 install tensorboard==2.11.2
pip3 install termcolor==2.3.0
pip3 install yacs==0.1.8
pip3 install -U openmim
mim install mmcv==2.0.0
--- SOFTMAX LOSS ---
torchrun --standalone --nnodes=1 --nproc_per_node=1 main.py --cfg ./configs/MetaFG_meta_0_224.yaml --dataset inaturalist2018 --pretrain ./pretrained_model/metafg_0_inat21_384.pth --opts DATA.IMG_SIZE 224 --epochs 150 --warmup-epochs 10 --num-workers 4 --batch-size 128 --accumulation-steps 4 --tag MetaFormer_0-iNat2018-Softmax_Loss
--- BALANCED SOFTMAX LOSS ---
torchrun --standalone --nnodes=1 --nproc_per_node=1 main.py --cfg ./configs/MetaFG_meta_0_224.yaml --dataset inaturalist2018 --pretrain ./pretrained_model/metafg_0_inat21_384.pth --opts DATA.IMG_SIZE 224 --epochs 150 --warmup-epochs 10 --num-workers 4 --batch-size 128 --accumulation-steps 4 --tag MetaFormer_0-iNat2018-Balanced_Softmax_Loss --use-balanced-softmax-loss
--- Evaluate model ---
torchrun --standalone --nnodes=1 --nproc_per_node=1 main.py --cfg ./configs/MetaFG_meta_0_224.yaml --dataset inaturalist2018 --eval --resume "./output/MetaFG_meta_0/MetaFormer_0-iNat2018-Softmax_Loss/ckpt_epoch_133.pth" --num-workers 4 --batch-size 8
torchrun --standalone --nnodes=1 --nproc_per_node=1 main.py --cfg ./configs/MetaFG_meta_0_224.yaml --dataset inaturalist2018 --eval --resume "./output/MetaFG_meta_0/MetaFormer_0-iNat2018-Balanced_Softmax_Loss/ckpt_epoch_140.pth" --num-workers 4 --batch-size 8
--- Local Directory ---
cd "/mnt/e/Rivan/Tugas Akhir S1/MetaFormerBSL"
--- USE THIS IF ON LINUX TERMINAL ---
$ tmux
$ make <something big>
......
Connection fails for some reason
Reconect
$ tmux ls
0: 1 windows (created Tue Aug 23 12:39:52 2011) [103x30]
$ tmux attach -t 0
Back in the tmux session
--- tmux stuff ---
Scroll Mode: Press Ctrl-b then [
Quit Scroll Mode: Press q
Quit Window: Press Ctrl-b then d
--- Output Path ---
/home/rivan_arif/MetaFormerBSL/output/MetaFG_meta_0/MetaFormer_0-iNat2018-Balanced_Softmax_Loss/xxxxxx.xxx
/home/rivan_arif/MetaFormerBSL/output/MetaFG_meta_0/MetaFormer_0-iNat2018-Softmax_Loss/xxxxxx.xxx
--- GCLOUD CLI SETUP ---
[Block Project SSH Keys]
gcloud init [then just follow instructions]
gcloud compute scp metaformerbsl-1:/home/rivan_arif/MetaFormerBSL/output/MetaFG_meta_0/MetaFormer_0-iNat2018-Balanced_Softmax_Loss/log_rank_0.txt "E:\Rivan\Tugas Akhir S1\MetaFormerBSL\output\MetaFG_meta_0\MetaFormer_0-iNat2018-Balanced_Softmax_Loss" --tunnel-through-iap
gcloud compute scp metaformerbsl-1:/home/rivan_arif/MetaFormerBSL/output/MetaFG_meta_0/MetaFormer_0-iNat2018-Softmax_Loss/log_rank_0.txt "E:\Rivan\Tugas Akhir S1\MetaFormerBSL\output\MetaFG_meta_0\MetaFormer_0-iNat2018-Softmax_Loss" --tunnel-through-iap