Skip to content

Commit

Permalink
Allow rendering word boxes for debug
Browse files Browse the repository at this point in the history
  • Loading branch information
jbarlow83 committed Nov 10, 2023
1 parent 8b5a8ed commit e21cae1
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 6 deletions.
18 changes: 13 additions & 5 deletions ocrmypdf_easyocr/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,10 +97,13 @@ def initialize(plugin_manager: pluggy.PluginManager):

@hookimpl
def add_options(parser):
easyocr_options = parser.add_argument_group(
"EasyOCR", "Advanced control of EasyOCR"
)
easyocr_options = parser.add_argument_group("EasyOCR", "EasyOCR options")
easyocr_options.add_argument("--easyocr-no-gpu", action="store_false", dest="gpu")
easyocr_options.add_argument(
"--easyocr-debug-suppress-images",
action="store_true",
dest="easyocr_debug_suppress_images",
)


class EasyOCREngine(OcrEngine):
Expand Down Expand Up @@ -155,8 +158,13 @@ def generate_pdf(input_file, output_pdf, output_text, options):
text = " ".join([result.text for result in results])
output_text.write_text(text)

# easyocr_to_pdf(input_file, 1.0, results, output_pdf)
easyocr_to_pikepdf(input_file, 1.0, results, output_pdf)
easyocr_to_pikepdf(
input_file,
1.0,
results,
output_pdf,
boxes=options.easyocr_debug_suppress_images,
)


@hookimpl
Expand Down
3 changes: 2 additions & 1 deletion ocrmypdf_easyocr/_pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,7 @@ def easyocr_to_pikepdf(
image_scale: float,
results: Iterable[EasyOCRResult],
output_pdf: Path,
boxes: bool,
):
"""Convert EasyOCR results to a PDF with text annotations (no images).
Expand Down Expand Up @@ -302,7 +303,7 @@ def easyocr_to_pikepdf(
Font=Dictionary({"/f-0-0": register_glyphlessfont(pdf)})
)

cs = generate_text_content_stream(results, scale, height, boxes=False)
cs = generate_text_content_stream(results, scale, height, boxes=boxes)
pdf.pages[0].Contents = pdf.make_stream(unparse_content_stream(cs))

pdf.save(output_pdf)
Expand Down

0 comments on commit e21cae1

Please sign in to comment.