-
Notifications
You must be signed in to change notification settings - Fork 6.5k
/
Copy pathdoctext.py
125 lines (99 loc) · 3.75 KB
/
doctext.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#!/usr/bin/env python
# Copyright 2017 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Outlines document text given an image.
Example:
python doctext.py resources/text_menu.jpg
"""
# [START vision_document_text_tutorial]
# [START vision_document_text_tutorial_imports]
import argparse
from enum import Enum
import io
from google.cloud import vision
from PIL import Image, ImageDraw
# [END vision_document_text_tutorial_imports]
class FeatureType(Enum):
PAGE = 1
BLOCK = 2
PARA = 3
WORD = 4
SYMBOL = 5
def draw_boxes(image, bounds, color):
"""Draw a border around the image using the hints in the vector list."""
draw = ImageDraw.Draw(image)
for bound in bounds:
draw.polygon(
[
bound.vertices[0].x,
bound.vertices[0].y,
bound.vertices[1].x,
bound.vertices[1].y,
bound.vertices[2].x,
bound.vertices[2].y,
bound.vertices[3].x,
bound.vertices[3].y,
],
None,
color,
)
return image
# [START vision_document_text_tutorial_detect_bounds]
def get_document_bounds(image_file, feature):
"""Returns document bounds given an image."""
client = vision.ImageAnnotatorClient()
bounds = []
with io.open(image_file, "rb") as image_file:
content = image_file.read()
image = vision.Image(content=content)
response = client.document_text_detection(image=image)
document = response.full_text_annotation
# Collect specified feature bounds by enumerating all document features
for page in document.pages:
for block in page.blocks:
for paragraph in block.paragraphs:
for word in paragraph.words:
for symbol in word.symbols:
if feature == FeatureType.SYMBOL:
bounds.append(symbol.bounding_box)
if feature == FeatureType.WORD:
bounds.append(word.bounding_box)
if feature == FeatureType.PARA:
bounds.append(paragraph.bounding_box)
if feature == FeatureType.BLOCK:
bounds.append(block.bounding_box)
# The list `bounds` contains the coordinates of the bounding boxes.
return bounds
# [END vision_document_text_tutorial_detect_bounds]
def render_doc_text(filein, fileout):
image = Image.open(filein)
bounds = get_document_bounds(filein, FeatureType.BLOCK)
draw_boxes(image, bounds, "blue")
bounds = get_document_bounds(filein, FeatureType.PARA)
draw_boxes(image, bounds, "red")
bounds = get_document_bounds(filein, FeatureType.WORD)
draw_boxes(image, bounds, "yellow")
if fileout != 0:
image.save(fileout)
else:
image.show()
if __name__ == "__main__":
# [START vision_document_text_tutorial_run_application]
parser = argparse.ArgumentParser()
parser.add_argument("detect_file", help="The image for text detection.")
parser.add_argument("-out_file", help="Optional output file", default=0)
args = parser.parse_args()
render_doc_text(args.detect_file, args.out_file)
# [END vision_document_text_tutorial_run_application]
# [END vision_document_text_tutorial]