-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtranscript.py
64 lines (51 loc) · 1.84 KB
/
transcript.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import glob, os
import hashlib
import io
import logging
logger = logging.getLogger(__name__)
def md5sum(src):
md5 = hashlib.md5()
with io.open(src, mode="rb") as fd:
content = fd.read()
md5.update(content)
return md5.hexdigest()
def check_for_files(file_path):
for file_path_object in glob.glob(file_path):
if os.path.isfile(file_path_object):
return True
def remove_all_new_transcript():
for file in glob.glob("*.pdf"):
if os.path.isfile(file):
os.remove(file)
# True when two md5 are different
def is_different_transcript(old_transcript_md5):
new_transcript_md5 = None
files = glob.glob("*.pdf")
if files:
file = files[0]
if os.path.getsize(file) > 102400:
new_transcript_md5 = md5sum(file)
logger.info('New transcript fileName: %s', file)
logger.info('Hash: %s', new_transcript_md5)
# if new_transcript_md5 is None or two md5 are same
if not new_transcript_md5 or new_transcript_md5 == old_transcript_md5:
logger.info('Same MD5')
remove_all_new_transcript()
return False
else:
logger.info('old_transcript_md5: %s', old_transcript_md5)
logger.info('new_transcript_md5: %s', new_transcript_md5)
return True
logger.error('Transcript download failed')
return False
def get_old_transcript_md5():
os.chdir(os.path.dirname(os.path.abspath(__file__)) + '\old_transcript')
transcript_md5 = None
files = glob.glob("*.pdf")
if files:
file = files[0]
transcript_md5 = md5sum(file)
logger.info('Old transcript fileName: %s', file)
logger.info('Hash: %s', transcript_md5)
os.chdir(os.path.dirname(os.path.abspath(__file__)))
return transcript_md5