-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbio_medical_ner_parser.py
100 lines (74 loc) · 3.54 KB
/
bio_medical_ner_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import datetime
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.output_parsers import ResponseSchema
from langchain.output_parsers import StructuredOutputParser
from dotenv import load_dotenv
load_dotenv('.env')
class BioMedicalParser:
def __init__(self):
self.load_model()
self.parser()
def load_model(self):
current_date = datetime.datetime.now().date()
target_date = datetime.date(2024, 6, 12)
if current_date > target_date:
self.llm_model = "gpt-3.5-turbo"
else:
self.llm_model = "gpt-3.5-turbo-0301"
self.chat = ChatOpenAI(temperature=0.0, model=self.llm_model)
def parser(self):
symptoms_schema = ResponseSchema(name="symptoms",
description="extract symptoms from text")
duration_schema = ResponseSchema(name="duration",
description="extract duration from text")
disease_schema = ResponseSchema(name="disease",
description="extract disease form text")
medication_schema = ResponseSchema(name="medication",
description="Extract medication from text")
medical_test_schema = ResponseSchema(name="medical_test",
description="Extract medical test from text")
response_schemas = [symptoms_schema,
duration_schema,
disease_schema,
medication_schema,
medical_test_schema
]
self.output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
self.format_instructions = self.output_parser.get_format_instructions()
bio_medical_ner_template = """\
Ignore all the previous instruction. Now For the following text, extract the following information:
symptoms: extract the symptoms from the text
output them as Python list, if not available return []
duration: extract the duration from the text
output them as Python list, if not available return []
disease: extract diseases from the text
output them as Python list, if not available return []
medication: extract medication from the text
output them as Python list and if not available return []
medical_test_schema: extract medical test from the text
output them as Python list and if not available return []
Format the output as JSON with the following keys:
symptoms
duration
disease
medication
medical_test
text: {text}
{format_instructions}
"""
self.prompt = ChatPromptTemplate.from_template(template=bio_medical_ner_template)
def extract_biomedical_entities(self, input_text):
messages = self.format_messages(input_text)
response = self.chat(messages)
output_dict = self.output_parser.parse(response.content)
return output_dict
def format_messages(self, text):
messages = self.prompt.format_messages(text=text,
format_instructions=self.format_instructions)
return messages
if __name__ == "__main__":
bio_medical_parser = BioMedicalParser()
input_text = " i have fever and headache for 2 days"
result = bio_medical_parser.extract_biomedical_entities(input_text)
print(result)