-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsummarize.py
34 lines (26 loc) · 1.08 KB
/
summarize.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
from transformers import pipeline
from configparser import ConfigParser
import os
import dbConnect
def summarize(dbCursor,dbConnection):
summarizer = pipeline("summarization")
# find articles that don't have a summary
sql = "SELECT id,body FROM articles WHERE summary = ''"
dbCursor.execute(sql)
articles = dbCursor.fetchall()
# add a summary for each article
for article in articles:
print("Summarizing article id {}".format(article[0]))
summary = summarizer(article[1][:1000], max_length=130, min_length=30, do_sample=False)
sql = "UPDATE articles SET summary = %s WHERE id = %s"
values = (summary[0]["summary_text"], article[0])
dbCursor.execute(sql, values)
dbConnection.commit()
def main():
config = ConfigParser()
configFile = os.path.join(os.path.dirname(__file__), 'config.ini')
config.read(configFile)
dbCursor, dbConnection = dbConnect.getDbConnection(config["database"])
summarize(dbCursor, dbConnection)
if __name__ == "__main__":
main()