Skip to content

Commit

Permalink
first commit
Browse files Browse the repository at this point in the history
  • Loading branch information
UngeheurenUngeziefer committed Jun 15, 2020
0 parents commit acea765
Showing 1 changed file with 40 additions and 0 deletions.
40 changes: 40 additions & 0 deletions Duplicate_cleaner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import hashlib
from hashlib import md5
from scipy import *
import matplotlib.pyplot as plt
import time
import numpy as np
import os

def file_hash(filepath):
with open(filepath, 'rb') as f:
return md5(f.read()).hexdigest()

os.chdir(r'C:\Users\sewer\MyPython\Pepe_project\downloads')
# print(os.getcwd())

files_list = os.listdir()
# print(len(files_list))

duplicates = []
hash_keys = dict()
for index, filename in enumerate(os.listdir('.')):
if os.path.isfile(filename):
with open(filename, 'rb') as f:
filehash = hashlib.md5(f.read()).hexdigest()
if filehash not in hash_keys:
hash_keys[filehash] = index
else:
duplicates.append((index, hash_keys[filehash]))

# print(duplicates)
# 405 and 4

for file_indexes in duplicates[:30]:
plt.subplot(121), plt.imshow(imread(files_list[file_indexes[1]]))
plt.title(file_indexes[1]), plt.xticks([]), plt.yticks([])

plt.subplot(122), plt.imshow(imread(files_list[file_indexes[0]]))
plt.title(str(file_indexes[0]) + ' duplicate', plt.xticks([]), plt.yticks([]))
plt.show()

0 comments on commit acea765

Please sign in to comment.