-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
137 lines (113 loc) · 4.35 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
from groq import Groq
import PyPDF2
import docx
import os
#import openai credentials from os
client = Groq(api_key=os.environ.get("GROQ_API_KEY"),)
#file_path to the input file
"""Please note that pdf, docx files can now be used"""
file_path="user_input.txt"
#extract text from text file
def extract_text_from_txt_file(file_path):
with open(file_path, 'r') as file:
text=file.read()
return text
#extract text from pdf file
def extract_text_from_pdf_file(file_path):
with open(file_path, 'rb') as file:
reader = PyPDF2.PdfReader(file)
text = ''
for page in reader.pages:
text += page.extract_text()
return text
#extract text from docx file
def extract_text_from_docx_file(file_path):
doc = docx.Document(file_path)
text = ''
for paragraph in doc.paragraphs:
text += paragraph.text
return text
#Detect the file format of input file
def extract_text(file_path):
if file_path.endswith('.pdf'):
return extract_text_from_pdf_file(file_path)
elif file_path.endswith('.docx'):
return extract_text_from_docx_file(file_path)
elif file_path.endswith('.txt'):
return extract_text_from_txt_file(file_path)
else:
print('Unsupported file format')
return ''
text = extract_text(file_path)
#print text given by user
"""print(text)"""
#Step 1 (make small chunks of text of 1000 words approx)
def text_chunker(text):
words=text.split()
processed_text=""
for i in range(0, len(words), 1000):
chunk=" ".join(words[i:i + 1000])
response = process_text(chunk)
processed_text += response.strip() + " "
return processed_text.strip()
#Step 2 (Text Processing)
def process_text(text):
#this step summarises chunks of text
chat_completion=client.chat.completions.create(
model="llama3-8b-8192",
messages=[
{
"role":"user",
"content":"Please summarize the following text in a precise and accurate manner: "
+ text +
"The summary should be long than 15-20 lines and should capture the essence of the context and main ideas without losing important details."
}
]
)
print("Analyzing input file.....")
return chat_completion.choices[0].message.content.strip()
#calling functions of step 1 & 2
Chunked_Summary=text_chunker(text)
#Step 3 (file creating and storing summary)
def file_create_write(file_path, summary, summary_file_name):
#extract filename without extension
file_name=os.path.splitext(file_path)[0]
#create folder with same name as file
folder_name=file_name
os.makedirs(folder_name, exist_ok=True)
#create text file inside the folder
file1_path=os.path.join(folder_name,summary_file_name)
#write summary to the file
with open(file1_path,'w') as file1:
file1.write(summary)
file_create_write(file_path,Chunked_Summary,'Chunked_Summary.txt')
#Step 4 finally summarizing the chunked summary
Final_Summary=text_chunker(Chunked_Summary)
#file creation and storing summary
file_create_write(file_path,Final_Summary,'Conclusion_Summary.text')
#Step 5: Key_Notes Generation
def process_key_Notes(text):
chat_completion = client.chat.completions.create(
model="llama3-8b-8192",
messages= [{
'role':'user',
'content':"Generate keynotes from the following text:"
+ text +
"Make sure to accurately capture the meaning and represent it in numbered points in order."
+ "At the end, include a heading titled 'Bare Essentials' that summarizes the core concepts from the text."
}])
print("analyzing input file.....")
return chat_completion.choices[0].message.content.strip()
def generate_keynotes(text):
words=text.split()
processed_text=""
for i in range(0,len(words), 1000):
chunk="".join(words[i:i+1000])
response=process_key_Notes(chunk, )
processed_text += response.strip()
return processed_text.strip()
#generating keynotes chunk and processing
key_notes=generate_keynotes(Final_Summary)
file_create_write(file_path,key_notes,'Key_Notes.txt')
#All steps completed
print("The summary and Keynotes are generated and saved to the current directory.")