-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathCrawler.py
140 lines (124 loc) · 4.3 KB
/
Crawler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
#!/usr/bin/python
# Desenvolvido por Adriel Freud!
# Contato: businessc0rp2k17@gmail.com
# FB: http://www.facebook.com/xrn401
# =>DebutySecTeamSecurity<=
#conding: utf-8
# MODO DE USO: crawler.py http://site.com/
# OBS: Nao esqueca do 'HTTP' or 'HTTPS'
import re
import argparse
from bs4 import BeautifulSoup
from time import sleep
import requests
import socket
import json
import sys
import time, datetime
import urllib.request as urllib2
ts = time.time()
dt = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d-%H-%M-%S')
today = datetime.datetime.today()
t = today.strftime("[%H:%M:%S] - ")
menu = """\033[1;36m
____ _ __ __ _
/ ___|_ __ __ ___ _| | ___ _ __ \ \ / /__| |__
| | | '__/ _` \ \ /\ / / |/ _ \ '__| \ \ /\ / / _ \ '_ \
| |___| | | (_| |\ V V /| | __/ | \ V V / __/ |_) |
\____|_| \__,_| \_/\_/ |_|\___|_| \_/\_/ \___|_.__/
Powered by Adriel Freud\n"""
parse = argparse.ArgumentParser(description="For Get Informations of WebSite")
parse.add_argument("-u", "--url", help="Url for get Informations! ")
parse.add_argument("-p", "--proxy", help="Set a proxy! ")
parse.add_argument("-c", "--cookie", help="Set a Cookie ")
parse.add_argument("-d", "--data", help="Set a forms ")
args = parse.parse_args()
header = {'user-agent': 'Mozilla/5.0 (X11; Linux i686; rv:43.0) Gecko/10100101 Firefox/43.0 Iceweasel/43.0.4'}
def printar_detalhes(url):
IP = socket.gethostbyname(url.strip('https://'))
req = requests.get('http://ip-api.com/json/'+IP, headers=header)
Geo = json.loads(req.text)
print('\nIP: %s\n'%Geo['query'])
print('Country: %s\n'%Geo['country'])
print('Country code: %s\n'%Geo['countryCode'])
print('Region: %s\n'%Geo['regionName'])
print('Region code: %s\n'%Geo['region'])
print('City: %s\n'%Geo['city'])
print('Zip Code: %s\n'%Geo['zip'])
print('Latitude: %s\n'%Geo['lat'])
print('Longitude: %s\n'%Geo['lon'])
print('Timezone: %s\n'%Geo['timezone'])
print('ISP: %s\n'%Geo['isp'])
print('Organization: %s\n'%Geo['org'])
print('AS number/name: %s\n'%Geo['as'])
def email_extrator(url):
print("\n\033[1;36m<==================== Emails! ====================>")
abrir = requests.get(url, headers=header)
code = abrir.text
e_mail = re.findall(r"\"?([-a-zA-Z0-9.`?{}]+@\w+\.\w+)\"?", code)
if e_mail:
for emails in e_mail:
print('\n\033[31m'+t+'[==>] Email: %s'%emails)
else:
exit(0)
def whois(url):
site = 'https://www.whois.com/whois/{0}'.format(url)
req = requests.get(site, headers=header)
code = req.status_code
if code == 200:
print("")
html = req.text
bs = BeautifulSoup(html, "html.parser")
div = bs.find_all("pre", {"class":"df-raw"})
for divs in div:
print('\033[1;36m<==================== info ==================>\n\n%s'%divs.get_text())
def capture(url):
req = requests.get(url, headers=header)
code = req.status_code
if code == 200:
html = req.text
print("\n[*]Request Succefully!\n")
bt = BeautifulSoup(html, "html.parser")
urls = re.findall('(?<=href=["\'])https?://.+?(?=["\'])', html)
print("\033[1;36m<==================== Links ====================>\n\n")
for u in urls:
print("\033[31m"+t+"[==>] Links: %s"%u)
else:
print("\n\033[31m[!]Request Failed, Exiting Program...\n ")
sleep(3)
exit(1)
def grabbining(url, proxy, cookie, form):
esc = str(input("GET/POST | [p][g] | \\[G]: "))
if esc.lower() == "p":
req = requests.post(url, headers=header, proxies={'http': proxy,'https': proxy}, params=form, cookies={'Cookie':cookie})
else:
req = requests.get(url, headers=header, proxies={'http': proxy,'https': proxy}, params=form, cookies={'Cookie':cookie})
code = req.status_code
if code == 200:
html = req.text
print("\n[*]Request Succefully!\n")
print("\033[1;36m<==================== Information ====================>\n\n")
print(html.encode('utf-8'))
else:
print("\n\033[31m[!]Request Failed, Exiting Program...\n ")
exit(1)
if args.url:
print(menu)
if len(sys.argv) > 3:
url = sys.argv[2]
proxy = sys.argv[4]
cookie = sys.argv[6]
form = sys.argv[8]
grabbining(url, proxy, cookie, form)
else:
print(urllib2.urlopen(args.url).info())
capture(args.url)
try:
printar_detalhes(args.url)
except:
pass
whois(args.url)
email_extrator(args.url)
else:
print(menu)
parse.print_help()