Skip to content

Commit

Permalink
Use celery multiprocessing if available
Browse files Browse the repository at this point in the history
Closes #9
  • Loading branch information
jbarlow83 committed Aug 30, 2024
1 parent f3d5f7a commit a9916e5
Showing 1 changed file with 10 additions and 5 deletions.
15 changes: 10 additions & 5 deletions ocrmypdf_easyocr/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,10 @@

from __future__ import annotations

import contextlib
import logging
import multiprocessing.managers
import os
import sys
import contextlib
import threading
import traceback
from pathlib import Path
Expand All @@ -27,6 +26,12 @@
from ocrmypdf_easyocr._easyocr import tidy_easyocr_result
from ocrmypdf_easyocr._pdf import easyocr_to_pikepdf

try:
# Use Celery's multiprocessing if available
import billiard as multiprocessing
except ImportError:
import multiprocessing.managers

log = logging.getLogger(__name__)

ISO_639_3_2: dict[str, str] = {
Expand Down Expand Up @@ -113,12 +118,12 @@ def _ocr_process(q: multiprocessing.Queue[Task], options):
if reader is None:
use_gpu = options.gpu
languages = [ISO_639_3_2[lang] for lang in options.languages]

# Redirect stdout to stderr during Reader initialization to be compliant with ocrmypdf
# otherwise piping a pdf output to stdout gets interfered with the progress bar of loading the model to ram
# otherwise piping a pdf output to stdout gets interfered with the progress bar of loading the model to ram
with contextlib.redirect_stdout(sys.stderr):
reader = easyocr.Reader(languages, use_gpu)

output_dict["output"] = reader.readtext(
gray, batch_size=options.easyocr_batch_size
)
Expand Down

0 comments on commit a9916e5

Please sign in to comment.