-
Notifications
You must be signed in to change notification settings - Fork 46
/
Copy pathpre_process.py
49 lines (40 loc) · 1.33 KB
/
pre_process.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import os
import pickle
import cv2 as cv
import mxnet as mx
from mxnet import recordio
from tqdm import tqdm
from config import path_imgidx, path_imgrec, IMG_DIR, pickle_file
from utils import ensure_folder
if __name__ == "__main__":
ensure_folder(IMG_DIR)
imgrec = recordio.MXIndexedRecordIO(path_imgidx, path_imgrec, 'r')
# print(len(imgrec))
samples = []
class_ids = set()
# # %% 1 ~ 5179510
try:
for i in tqdm(range(10000000)):
# print(i)
header, s = recordio.unpack(imgrec.read_idx(i + 1))
img = mx.image.imdecode(s).asnumpy()
# print(img.shape)
img = cv.cvtColor(img, cv.COLOR_RGB2BGR)
# print(header.label)
# print(type(header.label))
label = int(header.label)
class_ids.add(label)
filename = '{}.jpg'.format(i)
samples.append({'img': filename, 'label': label})
filename = os.path.join(IMG_DIR, filename)
cv.imwrite(filename, img)
# except KeyboardInterrupt:
# raise
except Exception as err:
print(err)
with open(pickle_file, 'wb') as file:
pickle.dump(samples, file)
print('num_samples: ' + str(len(samples)))
class_ids = list(class_ids)
print(len(class_ids))
print(max(class_ids))