Skip to content

Commit

Permalink
Update __init__.py
Browse files Browse the repository at this point in the history
redirecting stdout to stderr to be compliant with ocrmypdf. otherwise we brake the usage of ocrmypdf with piped stdin / stdout...
  • Loading branch information
rakurtz authored Aug 30, 2024
1 parent 051ed1b commit ca5ffa3
Showing 1 changed file with 8 additions and 1 deletion.
9 changes: 8 additions & 1 deletion ocrmypdf_easyocr/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
import logging
import multiprocessing.managers
import os
import sys
import contextlib
import threading
import traceback
from pathlib import Path
Expand Down Expand Up @@ -111,7 +113,12 @@ def _ocr_process(q: multiprocessing.Queue[Task], options):
if reader is None:
use_gpu = options.gpu
languages = [ISO_639_3_2[lang] for lang in options.languages]
reader = easyocr.Reader(languages, use_gpu)

# Redirect stdout to stderr during Reader initialization to be compliant with ocrmypdf
# otherwise piping a pdf output to stdout gets interfered with the progress bar of loading the model to ram
with contextlib.redirect_stdout(sys.stderr):
reader = easyocr.Reader(languages, use_gpu)

output_dict["output"] = reader.readtext(
gray, batch_size=options.easyocr_batch_size
)
Expand Down

0 comments on commit ca5ffa3

Please sign in to comment.