Skip to content

Commit

Permalink
将onnx模型和字体打包进docker镜像
Browse files Browse the repository at this point in the history
  • Loading branch information
awwaawwa committed Dec 18, 2024
1 parent 08199fd commit b557a12
Show file tree
Hide file tree
Showing 6 changed files with 32 additions and 7 deletions.
3 changes: 3 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
.venv
.github
docs
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ cython_debug/
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
.idea/
.vscode
.DS_Store
uv.lock
10 changes: 6 additions & 4 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM python:3.12
FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim

WORKDIR /app

Expand All @@ -7,9 +7,11 @@ COPY . .
EXPOSE 7860

ENV PYTHONUNBUFFERED=1
ADD "https://github.com/satbyy/go-noto-universal/releases/download/v7.0/GoNotoKurrent-Regular.ttf" /app
RUN apt-get update && \
apt-get install --no-install-recommends -y libgl1 && \
rm -rf /var/lib/apt/lists/*

RUN apt-get update && apt-get install -y libgl1

RUN pip install .
RUN uv pip install --system --no-cache . && uv run pdf2zh/warmup.py

CMD ["pdf2zh", "-i"]
8 changes: 7 additions & 1 deletion pdf2zh/doclayout.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import os.path

import cv2
import huggingface_hub.utils
import numpy as np
import ast
import onnx
Expand Down Expand Up @@ -82,7 +83,12 @@ def from_pretrained(repo_id: str, filename: str):
model_dir = snapshot_download(repo_mapping[repo_id])
pth = os.path.join(model_dir, filename)
else:
pth = hf_hub_download(repo_id=repo_id, filename=filename, etag_timeout=1)
try:
pth = hf_hub_download(repo_id=repo_id, filename=filename, etag_timeout=1, local_files_only=True)
print("Using local DocLayout-YOLO-DocStructBench-onnx file", pth)
except huggingface_hub.utils.LocalEntryNotFoundError:
print("Downloading DocLayout-YOLO-DocStructBench-onnx from Huggingface Hub...")
pth = hf_hub_download(repo_id=repo_id, filename=filename, etag_timeout=1)
return OnnxModel(pth)

@property
Expand Down
5 changes: 4 additions & 1 deletion pdf2zh/high_level.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,10 @@ def translate_stream(
font_list.append((resfont, None))
elif lang_out.lower() in noto_list: # noto
resfont = "noto"
ttf_path = os.path.join(tempfile.gettempdir(), "GoNotoKurrent-Regular.ttf")
# docker
ttf_path = '/app/GoNotoKurrent-Regular.ttf'
if not os.path.exists(ttf_path):
ttf_path = os.path.join(tempfile.gettempdir(), "GoNotoKurrent-Regular.ttf")
if not os.path.exists(ttf_path):
print("Downloading Noto font...")
urllib.request.urlretrieve(
Expand Down
10 changes: 10 additions & 0 deletions pdf2zh/warmup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from doclayout import DocLayoutModel

def warmup():
print('Warming up the model...')
DocLayoutModel.load_available()
print('Warm-up completed!')


if __name__ == "__main__":
warmup()

0 comments on commit b557a12

Please sign in to comment.