-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpdf_link_extractor.py
48 lines (39 loc) · 1.57 KB
/
pdf_link_extractor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#Autor >> AlfonzCS
import pikepdf # pip3 install pikepdf
def ClownLogo():
from colorama import init, Fore
import sys, random, time
init()
clear = "\x1b[0m"
colors = [36, 32, 34, 35, 31, 37]
x = """
____ ____ ______ ___ __ __ __
/ __ \/ __ \/ ____/ / (_)___ / /__ ___ _ __/ /__________ ______/ /_____ _____
/ /_/ / / / / /_ / / / __ \/ //_/ / _ \| |/_/ __/ ___/ __ `/ ___/ __/ __ \/ ___/
/ ____/ /_/ / __/ / / / / / / ,< / __/> </ /_/ / / /_/ / /__/ /_/ /_/ / /
/_/ /_____/_/ /_/_/_/ /_/_/|_| \___/_/|_|\__/_/ \__,_/\___/\__/\____/_/
CS! : PDF Link extractor extrae todos los link de los arcivos pdf seleccionados.
"""
for N, line in enumerate(x.split("\n")):
sys.stdout.write("\x1b[1;%dm%s%s\n" % (random.choice(colors), line, clear))
time.sleep(0.05)
ClownLogo()
try:
pdf_file = sys.argv[1]
except:
print('[x] Error')
try:
file = pdf_file
# file = "1710.05006.pdf"
pdf_file = pikepdf.Pdf.open(file)
urls = []
# iterate over PDF pages
for page in pdf_file.pages:
for annots in page.get("/Annots"):
uri = annots.get("/A").get("/URI")
if uri is not None:
print("[+] URL Found:", uri)
urls.append(uri)
print("[*] Total URLs extracted:", len(urls))
except:
print('[-] PDF no encontrado')