forked from mdh30/OCR
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathextract_text.py
34 lines (27 loc) · 857 Bytes
/
extract_text.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
from PIL import Image
import pytesseract
import re
import time
import urllib
import requests
from credentials import api_key
# Main function to call
def img_text():
start_time = time.time()
print("--- %s seconds prepair---" % (time.time() - start_time))
start_time = time.time()
pytesseract.pytesseract.tesseract_cmd = 'C:\\Program Files (x86)\\Tesseract-OCR\\tesseract.exe'
text = pytesseract.image_to_string(Image.open("processed_file.png"))
print("--- %s seconds read---" % (time.time() - start_time))
URL = "https://translate.yandex.net/api/v1.5/tr/translate"
PARAMS = {
'key': api_key(),
'text': text,
'lang': "en-es"
}
r = requests.get(url=URL, params=PARAMS)
data = r.text
data = re.sub('<[^>]+>', '', data)[1:]
print(text)
print(data)
return text, data