-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwebsite_extraction_Email.py
59 lines (53 loc) · 1.87 KB
/
website_extraction_Email.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import sys, random, time
import urllib2
import smtplib, os
from email.MIMEMultipart import MIMEMultipart
from email.MIMEBase import MIMEBase
from email.MIMEText import MIMEText
from email.MIMEImage import MIMEImage
from email.Utils import COMMASPACE, formatdate
from email import Encoders
from bs4 import BeautifulSoup
def textExtraction():
website = urllib2.urlopen("Insert website here")
websiteHtml = website.read()
website.close()
soup = BeautifulSoup(websiteHtml)
for script in soup(["script", "style"]):
script.extract()
text = soup.get_text()
textFile = open ("ExtractedText.txt", "w+")
textFile.write(text.encode('utf8'))
textFile.close()
def Email():
gmail_user = "user@gmail.com"
gmail_pwd = "user_password"
FROM = 'user@gmail.com'
TO = ['recepient@email provider.com'] #must be a list
SUBJECT = "Insert subject here"
TEXT = "insert message here"
# Preparing for the actual message
message = """\From: %s\nTo: %s\nSubject: %s\n\n%s
""" % (FROM, ", ".join(TO), SUBJECT, TEXT)
try:
#server = smtplib.SMTP(SERVER)
server = smtplib.SMTP("smtp.gmail.com", 587) #port 465 is also another option
server.ehlo()
server.starttls()
server.login(gmail_user, gmail_pwd)
server.sendmail(FROM, TO, message)
#server.quit()
server.close()
print 'successfully sent the mail'
except:
print "failed to send mail"
def LinkFinder():
links = [] #Creates a list of links
website = urllib2.urlopen("Insert website address here")
websiteHtml = website.read()
website.close()
soup = BeautifulSoup(websiteHtml)
AllLinks = soup.find_all("a")
for link in soup.find_all('a'):
links.append(link.get('href'))
return links #Returns the links associated with a website