-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathocr.py
202 lines (175 loc) · 8.14 KB
/
ocr.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
import streamlit as st
import numpy as np
import cv2
import pytesseract
from PIL import Image
import tempfile
import os
import uuid
import fitz
from azure.storage.blob import BlobServiceClient
import subprocess
import threading
# import openai # Import OpenAI SDK
import ollama
# Configuration and Credentials
CONNECTION_STRING = "BlobEndpoint=https://jhjfsdb.blob.core.windows.net/;QueueEndpoint=https://jhjfsdb.queue.core.windows.net/;FileEndpoint=https://jhjfsdb.file.core.windows.net/;TableEndpoint=https://jhjfsdb.table.core.windows.net/;SharedAccessSignature=sv=2022-11-02&ss=bfqt&srt=sco&sp=rwdlacupiytfx&se=2024-11-30T07:56:49Z&st=2024-11-29T23:56:49Z&spr=https,http&sig=6l9g7%2BATRsEjE3WuomwYYxBw3zyplw2W%2ByOtZ9HRWaM%3D"
CONTAINER_NAME = "store"
# OPENAI_API_KEY = "your-openai-api-key" # Your OpenAI API key
# openai.api_key = OPENAI_API_KEY
class ImageProcessor:
@staticmethod
def preprocess_image(image_path):
"""Preprocess image for better OCR accuracy."""
image = cv2.imread(image_path)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
_, processed = cv2.threshold(gray, 130, 255, cv2.THRESH_BINARY)
return processed
@staticmethod
def extract_text(image_path):
"""Extract text from preprocessed image using Tesseract."""
image = ImageProcessor.preprocess_image(image_path)
text = pytesseract.image_to_string(image, lang="ara+eng+fra")
return text
@staticmethod
def extract_images_from_pdf(pdf_path):
"""Extract images from PDF."""
doc = fitz.open(pdf_path)
images = []
for page_number in range(len(doc)):
pix = doc[page_number].get_pixmap(dpi=300)
image_array = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.height, pix.width, pix.n)
if image_array.shape[2] == 4:
image_array = cv2.cvtColor(image_array, cv2.COLOR_RGBA2RGB)
images.append(image_array)
return images
class AzureStorageManager:
def _init_(self, connection_string, container_name):
"""Initialize Azure Blob Storage client."""
self.blob_service_client = BlobServiceClient.from_connection_string(connection_string)
self.container_client = self.blob_service_client.get_container_client(container_name)
def upload_file(self, file_path):
"""Upload file to Azure Blob Storage."""
try:
blob_client = self.container_client.get_blob_client(os.path.basename(file_path))
with open(file_path, "rb") as data:
blob_client.upload_blob(data, overwrite=True)
return True
except Exception as e:
st.error(f"Azure Upload Error: {e}")
return False
# class OpenAITextCorrector:
# @staticmethod
# def correct_text(ocr_text):
# """Use OpenAI API to correct and structure OCR text."""
# prompt = f"""
# You are a professional medical prescription assistant.
# Extract and structure medical prescription information precisely from the following text:
# {ocr_text}
# Provide a clean, structured output with:
# - Doctor's Name
# - Patient Name
# - Date of Prescription
# - Medicines:
# * Name
# * Dosage
# * Instructions
# """
# try:
# response = openai.Completion.create(
# model="gpt-4", # You can also use "text-davinci-003" for cheaper options
# prompt=prompt,
# max_tokens=1000,
# temperature=0.5
# )
# corrected_text = response.choices[0].text.strip()
# return corrected_text
# except Exception as e:
# st.error(f"OpenAI API Error: {e}")
# return ""
class LLaMaTextCorrector:
@staticmethod
def correct_text(ocr_text):
"""Use LLaMa to correct and structure OCR text."""
chat_history = [
{"role": "system", "content": """You are a professional medical prescription assistant.
You should always answer the question, no matter the quality of the input text.
Make sure to structure your answer clearly and provide all the requested details !
Don't give answers like : I can't help with that. Is there anything else I can help you with?"""},
{"role": "user", "content": f"""
You should Answer No Matter what ! Even if it is not well !
Please analyze the following text carefully and provide the following details in a structured format:
- Doctor's Name
- Patient's Name
- Date of Prescription
- Medicines:
* Name
* Dosage
* Instructions
Here is the text to analyze:
{ocr_text}
"""}
]
try:
answer = ollama.chat(model="llama3.2", messages=chat_history, stream=True)
corrected_text = ""
for token_dict in answer:
token = token_dict["message"]["content"]
corrected_text += token
return corrected_text
except Exception as e:
st.error(f"LLaMa Error: {e}")
return ""
def main():
# Streamlit UI Configuration
st.set_page_config(page_title="Se7ty Healthcare App", page_icon="🩺")
st.image("images.jpg", width=150, caption="Se7ty Healthcare App", use_column_width=False)
st.markdown("<h1>Se7ty Healthcare App</h1>", unsafe_allow_html=True)
st.markdown("<p>Easily process and save your ordonnance (prescription).</p>", unsafe_allow_html=True)
# File Upload
uploaded_file = st.file_uploader("Upload an Image or PDF", type=["jpg", "jpeg", "png", "pdf"])
use_camera = st.checkbox("Take a Photo with Your Camera")
image_to_process = None
if uploaded_file:
file_type = uploaded_file.type
if file_type == "application/pdf":
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
temp_pdf.write(uploaded_file.read())
pdf_images = ImageProcessor.extract_images_from_pdf(temp_pdf.name)
if pdf_images:
image_to_process = pdf_images[0]
st.image(image_to_process, caption="First Page of Uploaded PDF", use_column_width=True)
else:
image_to_process = np.array(Image.open(uploaded_file))
st.image(image_to_process, caption="Uploaded Ordonnance", use_column_width=True)
elif use_camera:
picture = st.camera_input("Capture your Ordonnance")
if picture:
image_to_process = np.array(Image.open(picture))
st.image(image_to_process, caption="Captured Ordonnance", use_column_width=True)
if image_to_process is not None:
with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp:
temp_image_path = temp.name
cv2.imwrite(temp_image_path, cv2.cvtColor(image_to_process, cv2.COLOR_RGB2BGR))
preprocessed_image = ImageProcessor.preprocess_image(temp_image_path)
st.image(preprocessed_image, caption="Preprocessed Ordonnance", channels="GRAY")
# Text Extraction
recognized_text = ImageProcessor.extract_text(temp_image_path)
st.text_area("Extracted Ordonnance Text", recognized_text, height=200)
# Correct Text using OpenAI
# corrected_text = OpenAITextCorrector.correct_text(recognized_text)
corrected_text = LLaMaTextCorrector.correct_text(recognized_text)
st.text_area("Structured Prescription Details", corrected_text, height=300)
# Save Corrected Text
random_file_name = f"data/corrected_ordonnance_{uuid.uuid4().hex}.txt"
with open(random_file_name, "w", encoding="utf-8") as file:
file.write(corrected_text)
# Azure Storage Upload
azure_manager = AzureStorageManager(CONNECTION_STRING, CONTAINER_NAME)
if azure_manager.upload_file(random_file_name):
st.success(f"Prescription details saved and uploaded to Azure: {random_file_name}")
# Clean up temporary files
os.remove(temp_image_path)
os.remove(random_file_name)
if __name__ == "__main__":
main()