diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..d60525c --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,83 @@ +name: Test EasyOCR + +on: + push: + branches: + - main + - ci + - release/* + - feature/* + tags: + - v* + paths-ignore: + - README* + pull_request: + +jobs: + test_linux: + name: Test ${{ matrix.os }} with Python ${{ matrix.python }} + runs-on: ${{ matrix.os }} + strategy: + matrix: + include: + - os: ubuntu-latest + python: "3.11" + tesseract5: true + - os: ubuntu-latest + python: "3.12" + tesseract5: true + env: + OS: ${{ matrix.os }} + PYTHON: ${{ matrix.python }} + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: "0" # 0=all, needed for setuptools-scm to resolve version tags + + - uses: actions/setup-python@v4 + name: Setup Python + with: + python-version: ${{ matrix.python }} + cache: "pip" + + - name: Install Tesseract 5 + if: matrix.tesseract5 + run: | + sudo add-apt-repository -y ppa:alex-p/tesseract-ocr-devel + + - name: Install common packages + run: | + sudo apt-get update + sudo apt-get install -y --no-install-recommends \ + curl \ + ghostscript \ + img2pdf \ + libexempi8 \ + libffi-dev \ + libsm6 libxext6 libxrender-dev \ + pngquant \ + poppler-utils \ + tesseract-ocr \ + tesseract-ocr-deu \ + tesseract-ocr-eng \ + tesseract-ocr-osd \ + unpaper \ + zlib1g + + - name: Install Python packages + run: | + python -m pip install --upgrade pip wheel + python -m pip install --prefer-binary .[test] + + - name: Report versions + run: | + tesseract --version + gs --version + pngquant --version + unpaper --version + img2pdf --version + + - name: Test + run: | + python -m pytest tests/ diff --git a/ocrmypdf_easyocr/__init__.py b/ocrmypdf_easyocr/__init__.py index fb83b73..ab1d6b2 100644 --- a/ocrmypdf_easyocr/__init__.py +++ b/ocrmypdf_easyocr/__init__.py @@ -104,7 +104,7 @@ def add_options(parser): class EasyOCREngine(OcrEngine): - """Implements OCR with Tesseract.""" + """Implements OCR with EasyOCR.""" @staticmethod def version(): diff --git a/pyproject.toml b/pyproject.toml index 903a6a8..2b29d7a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,6 +21,9 @@ ocrmypdf = "ocrmypdf_easyocr" [project.optional-dependencies] dev = ["ruff>=0.0.272"] +test = [ + "pytest>=6.2.5", +] [build-system] requires = ["setuptools"] diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..f829efb --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,24 @@ +# SPDX-FileCopyrightText: 2023 James R. Barlow +# SPDX-License-Identifier: MIT + +from pathlib import Path + +import pytest + +TESTS_ROOT = Path(__file__).parent.resolve() +PROJECT_ROOT = TESTS_ROOT + + +@pytest.fixture(scope="session") +def resources() -> Path: + return Path(TESTS_ROOT) / "resources" + + +@pytest.fixture(scope="function") +def outdir(tmp_path) -> Path: + return tmp_path + + +@pytest.fixture(scope="function") +def outpdf(tmp_path) -> Path: + return tmp_path / "out.pdf" diff --git a/tests/resources/jbig2.pdf b/tests/resources/jbig2.pdf new file mode 100644 index 0000000..d1a62d4 Binary files /dev/null and b/tests/resources/jbig2.pdf differ diff --git a/tests/test_basic.py b/tests/test_basic.py new file mode 100644 index 0000000..98557c9 --- /dev/null +++ b/tests/test_basic.py @@ -0,0 +1,16 @@ +# SPDX-FileCopyrightText: 2023 James R. Barlow +# SPDX-License-Identifier: MIT + +import ocrmypdf +import pikepdf +import pytest + +import ocrmypdf_easyocr + + +def test_easyocr(resources, outpdf): + ocrmypdf.ocr(resources / "jbig2.pdf", outpdf) + assert outpdf.exists() + + with pikepdf.open(outpdf) as pdf: + assert "EasyOCR" in str(pdf.docinfo["/Creator"])