From a9916e5ec190720807225fe44482b1c49b1240ef Mon Sep 17 00:00:00 2001 From: "James R. Barlow" Date: Fri, 30 Aug 2024 13:02:35 -0700 Subject: [PATCH] Use celery multiprocessing if available Closes #9 --- ocrmypdf_easyocr/__init__.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/ocrmypdf_easyocr/__init__.py b/ocrmypdf_easyocr/__init__.py index 2c6db6c..6f6bc9c 100644 --- a/ocrmypdf_easyocr/__init__.py +++ b/ocrmypdf_easyocr/__init__.py @@ -5,11 +5,10 @@ from __future__ import annotations +import contextlib import logging -import multiprocessing.managers import os import sys -import contextlib import threading import traceback from pathlib import Path @@ -27,6 +26,12 @@ from ocrmypdf_easyocr._easyocr import tidy_easyocr_result from ocrmypdf_easyocr._pdf import easyocr_to_pikepdf +try: + # Use Celery's multiprocessing if available + import billiard as multiprocessing +except ImportError: + import multiprocessing.managers + log = logging.getLogger(__name__) ISO_639_3_2: dict[str, str] = { @@ -113,12 +118,12 @@ def _ocr_process(q: multiprocessing.Queue[Task], options): if reader is None: use_gpu = options.gpu languages = [ISO_639_3_2[lang] for lang in options.languages] - + # Redirect stdout to stderr during Reader initialization to be compliant with ocrmypdf - # otherwise piping a pdf output to stdout gets interfered with the progress bar of loading the model to ram + # otherwise piping a pdf output to stdout gets interfered with the progress bar of loading the model to ram with contextlib.redirect_stdout(sys.stderr): reader = easyocr.Reader(languages, use_gpu) - + output_dict["output"] = reader.readtext( gray, batch_size=options.easyocr_batch_size )