-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathgenerateHashes.py
78 lines (68 loc) · 2.99 KB
/
generateHashes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os
import sys
import glob
import time
import base64
import datetime
import subprocess
import multiprocessing
inputFolder = r'C:\images\to\be\hashed'
def generateHash(outputFolder, appName, libName, imageFile):
workerId = multiprocessing.current_process().name
result = subprocess.run([os.path.join(outputFolder, appName), os.path.join(outputFolder, libName), imageFile], stdout = subprocess.PIPE)
fileName = result.stdout.decode('utf-8').split('|')[0]
hashString = result.stdout.decode('utf-8').split('|')[1].replace('\r\n', '').replace('\n', '')
hashList = hashString.split(',')
for i, hashPart in enumerate(hashList):
hashList[i] = int(hashPart).to_bytes((len(hashPart) + 7) // 8, 'big')
hashBytes = b''.join(hashList)
#print(fileName + ',' + base64.b64encode(hashBytes).decode('utf-8'))
with open(os.path.join(outputFolder, workerId + '.txt'), 'a', encoding = 'utf8') as outputFile:
outputFile.write(fileName + ',' + base64.b64encode(hashBytes).decode('utf-8') + '\n')
if __name__ == '__main__':
outputFolder = os.getcwd()
if sys.platform == "win32":
appName = 'jPhotoDNA.exe'
libName = 'PhotoDNAx64.dll'
elif sys.platform == "darwin":
appName = 'jPhotoDNA.app/Contents/MacOS/jPhotoDNA'
libName = 'PhotoDNAx64.so'
else:
print("Linux is not supported.")
quit()
if (inputFolder == r'C:\images\to\be\hashed'):
print('Please update the input folder path on row 13.')
quit()
startTime = time.time()
print('Generating hashes for all images under ' + inputFolder)
p = multiprocessing.Pool()
print('Starting processing using ' + str(p._processes) + ' threads.')
imageCount = 0
images = glob.glob(os.path.join(inputFolder, '**', '*.jp*g'), recursive = True)
images.extend(glob.glob(os.path.join(inputFolder, '**', '*.png'), recursive = True))
images.extend(glob.glob(os.path.join(inputFolder, '**', '*.gif'), recursive = True))
images.extend(glob.glob(os.path.join(inputFolder, '**', '*.bmp'), recursive = True))
for f in images:
imageCount = imageCount + 1
p.apply_async(generateHash, [outputFolder, appName, libName, f])
p.close()
p.join()
allHashes = []
for i in range(p._processes):
try:
workerId = 'SpawnPoolWorker-' + str(i + 1)
with open(os.path.join(outputFolder, workerId + '.txt'), 'r', encoding = 'utf8') as inputFile:
fileContents = inputFile.read().splitlines()
allHashes.extend(fileContents)
os.remove(os.path.join(outputFolder, workerId + '.txt'))
#print('Merged the ' + workerId + ' output.')
except FileNotFoundError:
#print(workerId + ' not used. Skipping.')
pass
with open(os.path.join(outputFolder, 'hashes.csv'), 'w', encoding = 'utf8', errors = 'ignore') as f:
for word in allHashes:
f.write(str(word) + '\n')
print('Results saved into ' + os.path.join(outputFolder, 'hashes.csv'))
print('Generated hashes for ' + f'{imageCount:,}' + ' images in ' + str(int(round((time.time() - startTime)))) + ' seconds.')