Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
nickscamara committed Jan 6, 2025
2 parents 64d73f9 + f0b106d commit 24447bb
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 13 deletions.
1 change: 1 addition & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ RUN python3.10 download_models.py
#serverless
# CMD ["sh", "-c", "ls && python3.10 serverless.py"]

#download paddleocr model
RUN sh download_model.sh

CMD ["python3.10", "-m", "app.serverless"]
Expand Down
24 changes: 24 additions & 0 deletions app/serverless.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,28 @@ def setup():
# Warm up the conversion process
convert_to_markdown(sample_pdf_bytes, sample_dir, filename)


# def get_model(
# self,
# ocr: bool,
# show_log: bool,
# lang=None,
# layout_model=None,
# formula_enable=None,
# table_enable=None,
# ):
#layout_model: doclayout_yolo, apply_formula: True, apply_ocr: True, apply_table: False, table_model: rapid_table, lang: None
def init_model():
from magic_pdf.model.doc_analyze_by_custom_model import ModelSingleton
model_manager = ModelSingleton()
print("About to init model")
txt_model = model_manager.get_model(False, False) # noqa: F841
print('txt_model init final')
ocr_model = model_manager.get_model(True, False) # noqa: F841
print('ocr_model init final')
return 0


def handler(event):
try:
# Extract base64 encoded file and filename from the event
Expand Down Expand Up @@ -87,4 +109,6 @@ def handler(event):
# Call setup to initiate and warm up resources
# setup()

init_model()

runpod.serverless.start({"handler": handler})
23 changes: 10 additions & 13 deletions download_model.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,26 +14,26 @@ download_and_extract() {

echo "Downloading ${model_name} from ${url}" | tee -a "$LOG_FILE"

# **Create the destination directory before downloading**
# Create the destination directory before downloading
mkdir -p "$(dirname "${destination_path}")"

# **Download the file directly to destination_path without appending .tar**
# Download the file directly to destination_path without appending .tar
if ! curl -L -o "${destination_path}" "${url}" 2>&1 | tee -a "$LOG_FILE"; then
echo "Failed to download ${model_name} from ${url}" | tee -a "$LOG_FILE"
exit 1
fi

echo "Extracting ${model_name}" | tee -a "$LOG_FILE"

# **Extract the downloaded tar file**
# Extract the downloaded tar file
if ! tar -xvf "${destination_path}" -C "$(dirname "${destination_path}")" 2>&1 | tee -a "$LOG_FILE"; then
echo "Failed to extract ${model_name}" | tee -a "$LOG_FILE"
exit 1
fi

echo "Removing archive for ${model_name}" | tee -a "$LOG_FILE"

# **Remove the downloaded tar file**
# Remove the downloaded tar file
if ! rm "${destination_path}" | tee -a "$LOG_FILE"; then
echo "Failed to remove archive for ${model_name}" | tee -a "$LOG_FILE"
exit 1
Expand All @@ -43,19 +43,16 @@ download_and_extract() {
echo "----------------------------------------" | tee -a "$LOG_FILE"
}

# Download PP Detect Model to /root/.paddleocr/whl/cls/ch_ppocr_mobile_v2.0_cls_infer/
# Corrected download paths without double directories
download_and_extract "https://paddleocr.bj.bcebos.com/PP-OCRv4/chinese/ch_PP-OCRv4_det_infer.tar" \
"/root/.paddleocr/whl/cls/ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v2.0_cls_infer.tar" \
"/root/.paddleocr/whl/det/ch/ch_PP-OCRv4_det_infer.tar" \
"PP Detect Model"
download_and_extract "https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar" \
"/root/.paddleocr/whl/cls/ch_ppocr_mobile_v2.0_cls_infer.tar" \
"PP Class Model"

# Download PP Rec Model to /root/.paddleocr/whl/rec/ch_ppocr_mobile_v2.0_rec_infer/
download_and_extract "https://paddleocr.bj.bcebos.com/PP-OCRv4/chinese/ch_PP-OCRv4_rec_infer.tar" \
"/root/.paddleocr/whl/rec/ch_ppocr_mobile_v2.0_rec_infer/ch_ppocr_mobile_v2.0_rec_infer.tar" \
"/root/.paddleocr/whl/rec/ch/ch_ppocr_mobile_v2.0_rec_infer.tar" \
"PP Rec Model"

# **New Addition**: Download PP Rec Model to /root/.paddleocr/whl/rec/ch/ch_PP-OCRv4_rec_infer/
download_and_extract "https://paddleocr.bj.bcebos.com/PP-OCRv4/chinese/ch_PP-OCRv4_rec_infer.tar" \
"/root/.paddleocr/whl/rec/ch/ch_PP-OCRv4_rec_infer/ch_PP-OCRv4_rec_infer.tar" \
"PP Rec Model Additional Path"

echo "All model downloads completed at $(date)" | tee -a "$LOG_FILE"

0 comments on commit 24447bb

Please sign in to comment.