-
Notifications
You must be signed in to change notification settings - Fork 0
/
product.py
332 lines (253 loc) · 16.6 KB
/
product.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
import tkinter as tk
from tkinter import ttk
from tkinterdnd2 import DND_FILES, TkinterDnD
from PyPDF2 import PdfReader
from PIL import Image, ImageTk
import re
import sys
import os
# import io
from functions.drop_and_display import drop_and_display
from functions.encode_pdf import encode_pdf
from functions.find_js import find_js_blocks
from functions.find_bombs import find_bombs
from functions.find_loops import find_loops
class ProductApp:
def __init__(self, path):
def on_mouse_wheel(event):
canvas.yview_scroll(int(-1 * (event.delta / 120)), "units")
root = TkinterDnD.Tk()
# Create a frame to hold the content
content_frame = ttk.Frame(root)
# Pack the frame into the root window
content_frame.pack(fill="both", expand=True)
# Create a Canvas widget with a vertical scrollbar
canvas = tk.Canvas(content_frame, scrollregion=(0, 0, 0, 600)) # , height=300, width=400
canvas.pack(side="left", fill="both", expand=True)
scrollbar = ttk.Scrollbar(content_frame, orient="vertical", command=canvas.yview)
# Pack the canvas and scrollbar into the frame
scrollbar.pack(side="right", fill="y")
canvas.config(yscrollcommand=scrollbar.set)
# Bind mouse wheel scrolling to the canvas
canvas.bind_all("<MouseWheel>", on_mouse_wheel)
target_frame = ttk.Frame(canvas)
canvas.create_window((0,0), window= target_frame, anchor="nw")
# ------------------------------ NOTICE MESSAGE -------------------------------
# Create a label for displaying the "No JavaScript code detected" message
js_label = tk.Label(target_frame, text="JavaScript code detected", fg="red")
js_label.pack_forget() # Initially hide the label
# Create a label for displaying the "Infinite Loop create by objects detected" message
infinite_loop_label = tk.Label(target_frame, text="Infinite Loop create by objects detected", fg="red")
infinite_loop_label.pack_forget() # Initially hide the label
# Create a label for displaying the "Possible to have Deflate Bomb" message
deflate_bomb_label = tk.Label(target_frame, text="Possible to have Deflate Bomb", fg = "red")
deflate_bomb_label.pack_forget() # Initially hide the label
broken_label = tk.Label(target_frame, text="Broken PDF (Highly not recommended to open this)", fg = "red")
broken_label.pack_forget() # Initially hide the label
# Create a label for displaying the "No Dos attack methods detected" message
no_dos_label = tk.Label(target_frame, text="No Dos attack methods detected")
no_dos_label.pack_forget() # Initially hide the label
# ---------------------------- Create a Tkinter window --------------------------------------
root.title("PDF Shield")
# Configure the window height
root.geometry("300x450")
# Label for instructional messages
file_label = tk.Label(target_frame, text="Drag and drop a PDF file here")
file_label.pack(pady=10)
# Label to display the PDF cover
cover_label = tk.Label(target_frame)
cover_label.pack(pady=10, padx=10)
# ---------- Function to close the window ----------
def exit_window():
root.destroy()
# ---------- Function to show JavaScript code from the PDF ----------
def show_js_code():
# print("SHOW-JS-CODE FUNCTION'S FILE PATH: " + path) # This is not the -show.pdf file
# Converts the bytes into ACSII
modified_pdf_path = encode_pdf(path)
# Show JS blocks in the modified PDF
# modified_pdf_path = path.replace('.pdf', '-show.pdf')
find_js_blocks(modified_pdf_path, root)
# Remove the generated PDF file
os.remove(modified_pdf_path)
# ---------- Function to remove JavaScript code from the PDF ----------
def remove_js():
# self.file_path = path
# pdf_file_path = self.file_path
# Converts the bytes into ACSII
pdf_file_path = encode_pdf(path)
# pdf_file_path = path.replace('.pdf', '-show.pdf')
# -------------------------------- REMOVE OBJECTS CONTAINS JAVASCRIPT CODE --------------------------------
# Read the PDF file as binary
with open(pdf_file_path, 'rb') as pdf_file:
# Read all bytes from the PDF
pdf_bytes = pdf_file.read()
# Use regular expressions to find the block
pattern = rb'endobj(?:(?!endobj).)*?/(?:J|#74)(?:S|#83)[^/]*\n.*?endobj'
matches = re.findall(pattern, pdf_bytes, re.DOTALL)
# Add FontMatrix pattern to the regular expression
fontmatrix_pattern = rb'endobj(?:(?!endobj).)*?/(?:F|#70)(?:o|#111)(?:n|#110)(?:t|#116)(?:M|#77)(?:a|#97)(?:t|#116)(?:r|#114)(?:i|#105)(?:x|#120).*?endobj'
fontmatrix_matches = re.findall(fontmatrix_pattern, pdf_bytes, re.DOTALL)
matches += fontmatrix_matches
# if matches:
modified_pdf_bytes = pdf_bytes
for match in matches:
# Replace the matched block with "endobj" <-- the 'endobj' of the object above
modified_pdf_bytes = modified_pdf_bytes.replace(match, b"endobj")
# Create a new PDF file with "-removed.pdf" postfix
output_file_path = pdf_file_path.replace('-show.pdf', '-removed.pdf')
with open(output_file_path, 'wb') as output_pdf_file:
output_pdf_file.write(modified_pdf_bytes)
# print("SHOW PDF: " + pdf_file_path)
os.remove(pdf_file_path)
# print("removed show pdf")
# -------------------------------- REPLACE JAVASCRIPT NAME OBJECTS --------------------------------
pdf_file_path = output_file_path
# Read the PDF file as binary
with open(pdf_file_path, 'rb') as pdf_file:
# Read all bytes from the PDF
pdf_bytes = pdf_file.read()
# Define JavaScript patterns
js_pattern = re.compile(rb'\/(?:J|#74)(?:S|#83)\s*', re.IGNORECASE)
javascript_pattern = re.compile(rb'\/(?:J|#74)(?:a|#97)(?:v|#118)(?:a|#97)(?:S|#83)(?:c|#99)(?:r|#114)(?:i|#105)(?:p|#112)(?:t|#116)\s*', re.IGNORECASE)
# Add FontMatrix pattern to the regular expression
fontmatrix_pattern = re.compile(rb'\/(?:F|#70)(?:o|#111)(?:n|#110)(?:t|#116)(?:M|#77)(?:a|#97)(?:t|#116)(?:r|#114)(?:i|#105)(?:x|#120)\s*', re.IGNORECASE)
# Replace JavaScript name objects with empty strings
modified_pdf_bytes = js_pattern.sub(b'', pdf_bytes)
modified_pdf_bytes = javascript_pattern.sub(b'', modified_pdf_bytes)
modified_pdf_bytes = fontmatrix_pattern.sub(b'', modified_pdf_bytes)
# Create a new PDF file with "-removed.pdf" postfix
with open(pdf_file_path, 'wb') as pdf_file:
pdf_file.write(modified_pdf_bytes)
print(f"Modified PDF saved as {pdf_file_path}")
pop_window = tk.Toplevel(root) # Create a new top-level window
pop_label = tk.Label(pop_window, text=f"Modified PDF saved as {pdf_file_path}")
pop_label.pack()
# root.withdraw()
def show_bomb():
# Show Bomb blocks in the modified PDF
modified_pdf_path = encode_pdf(path)
find_bombs(modified_pdf_path, root)
# Remove the generated PDF file
os.remove(modified_pdf_path)
def remove_bomb():
# Converts the bytes into ACSII
pdf_file_path = encode_pdf(path)
# -------------------------------- REMOVE OBJECTS CONTAINS FLATEDECODE NAME OBJECTS --------------------------------
# Read the PDF file as binary
with open(pdf_file_path, 'rb') as pdf_file:
# Read all bytes from the PDF
pdf_bytes = pdf_file.read()
# Decode the bytes with UTF-8 encoding and print as a string
# decoded_pdf_text = pdf_bytes.decode('utf-8', errors='ignore') # 'ignore' handles non-UTF-8 bytes
# Use regular expressions to find the block
pattern = re.compile(b'\d+\s+\d+\s+obj(?:(?!\d+\s+\d+\s+obj).)*?(?=(?:\/(?:F|#70)(?:l|#108)(?:a|#97)(?:t|#116)(?:e|#101)(?:D|#68)(?:e|#101)(?:c|#99)(?:o|#111)(?:d|#100)(?:e|#101)|\/(?:A|#65)(?:S|#83)(?:C|#67)(?:I|#73)(?:I|#73)(?:H|#72)(?:e|#101)(?:x|#120)(?:D|#68)(?:e|#101)(?:c|#99)(?:o|#111)(?:d|#100)(?:e|#101)|\/(?:D|#68)(?:C|#67)(?:T|#84)(?:D|#68)(?:e|#101)(?:c|#99)(?:o|#111)(?:d|#100)(?:e|#101))(?:\/(?:F|#70)(?:l|#108)(?:a|#97)(?:t|#116)(?:e|#101)(?:D|#68)(?:e|#101)(?:c|#99)(?:o|#111)(?:d|#100)(?:e|#101)|\/(?:A|#65)(?:S|#83)(?:C|#67)(?:I|#73)(?:I|#73)(?:H|#72)(?:e|#101)(?:x|#120)(?:D|#68)(?:e|#101)(?:c|#99)(?:o|#111)(?:d|#100)(?:e|#101)|\/(?:D|#68)(?:C|#67)(?:T|#84)(?:D|#68)(?:e|#101)(?:c|#99)(?:o|#111)(?:d|#100)(?:e|#101))).*?endobj', re.DOTALL)
matches = pattern.findall(pdf_bytes)
# output_file_path = pdf_file_path.replace('-show.pdf', '-removed.pdf')
if matches:
modified_pdf_bytes = pdf_bytes
for match in matches:
# No need to replace with "endobj" because it's already endobj
modified_pdf_bytes = modified_pdf_bytes.replace(match, b"")
# Create a new PDF file with "-removed.pdf" postfix
output_file_path = pdf_file_path.replace('-show.pdf', '-removed.pdf')
with open(output_file_path, 'wb') as output_pdf_file:
output_pdf_file.write(modified_pdf_bytes)
print("SHOW PDF: " + pdf_file_path)
os.remove(pdf_file_path)
print(f"Modified PDF saved as {output_file_path}")
pop_window = tk.Toplevel(root) # Create a new top-level window
pop_label = tk.Label(pop_window, text=f"Modified PDF saved as {output_file_path}")
pop_label.pack()
def show_loop():
# Show Bomb blocks in the modified PDF
modified_pdf_path = encode_pdf(path)
find_loops(modified_pdf_path, root)
# Remove the generated PDF file
os.remove(modified_pdf_path)
def remove_all():
#################################### REMOVE BOMB ##################################################
# Converts the bytes in PDF into ACSII
pdf_file_path = encode_pdf(path)
# -------------------------------- REMOVE OBJECTS CONTAINS FLATEDECODE NAME OBJECTS --------------------------------
# Read the PDF file as binary
with open(pdf_file_path, 'rb') as pdf_file:
# Read all bytes from the PDF
pdf_bytes = pdf_file.read()
# Decode the bytes with UTF-8 encoding and print as a string
# decoded_pdf_text = pdf_bytes.decode('utf-8', errors='ignore') # 'ignore' handles non-UTF-8 bytes
# Use regular expressions to find the block
pattern = re.compile(b'\d+\s+\d+\s+obj(?:(?!\d+\s+\d+\s+obj).)*?(?=(?:\/(?:F|#70)(?:l|#108)(?:a|#97)(?:t|#116)(?:e|#101)(?:D|#68)(?:e|#101)(?:c|#99)(?:o|#111)(?:d|#100)(?:e|#101)|\/(?:A|#65)(?:S|#83)(?:C|#67)(?:I|#73)(?:I|#73)(?:H|#72)(?:e|#101)(?:x|#120)(?:D|#68)(?:e|#101)(?:c|#99)(?:o|#111)(?:d|#100)(?:e|#101)|\/(?:D|#68)(?:C|#67)(?:T|#84)(?:D|#68)(?:e|#101)(?:c|#99)(?:o|#111)(?:d|#100)(?:e|#101))(?:\/(?:F|#70)(?:l|#108)(?:a|#97)(?:t|#116)(?:e|#101)(?:D|#68)(?:e|#101)(?:c|#99)(?:o|#111)(?:d|#100)(?:e|#101)|\/(?:A|#65)(?:S|#83)(?:C|#67)(?:I|#73)(?:I|#73)(?:H|#72)(?:e|#101)(?:x|#120)(?:D|#68)(?:e|#101)(?:c|#99)(?:o|#111)(?:d|#100)(?:e|#101)|\/(?:D|#68)(?:C|#67)(?:T|#84)(?:D|#68)(?:e|#101)(?:c|#99)(?:o|#111)(?:d|#100)(?:e|#101))).*?endobj', re.DOTALL)
matches = pattern.findall(pdf_bytes)
if matches:
modified_pdf_bytes = pdf_bytes
for match in matches:
# No need to replace with "endobj" because it's already endobj
modified_pdf_bytes = modified_pdf_bytes.replace(match, b"")
# Create a new PDF file with "-removed.pdf" postfix
output_file_path = pdf_file_path.replace('-show.pdf', '-removed.pdf')
with open(output_file_path, 'wb') as output_pdf_file:
output_pdf_file.write(modified_pdf_bytes)
else:
print("No bomb block found in the PDF.")
print("SHOW PDF: " + pdf_file_path)
os.remove(pdf_file_path)
#################################### REMOVE JAVASCRIPT ##################################################
# Take the PDF product path generated from the previous step
pdf_file_path = output_file_path
# -------------------------------- REMOVE OBJECTS CONTAINS JAVASCRIPT CODE --------------------------------
# Read the PDF file as binary
with open(pdf_file_path, 'rb') as pdf_file:
pdf_bytes = pdf_file.read()
# Use regular expressions to find the block
pattern = rb'endobj(?:(?!endobj).)*?/(?:J|#74)(?:S|#83)[^/]*\n.*?endobj'
matches = re.findall(pattern, pdf_bytes, re.DOTALL)
if matches:
modified_pdf_bytes = pdf_bytes
for match in matches:
# Replace the matched block with "endobj" <-- the 'endobj' of the object above
modified_pdf_bytes = modified_pdf_bytes.replace(match, b"endobj")
# Rewrite the PDF file with "-removed.pdf" postfix
with open(output_file_path, 'wb') as output_pdf_file:
output_pdf_file.write(modified_pdf_bytes)
# -------------------------------- REPLACE JAVASCRIPT NAME OBJECTS --------------------------------
pdf_file_path = output_file_path
# Read the PDF file as binary
with open(pdf_file_path, 'rb') as pdf_file:
# Read all bytes from the PDF
pdf_bytes = pdf_file.read()
# Define JavaScript patterns
js_pattern = re.compile(rb'\/(?:J|#74)(?:S|#83)\s*', re.IGNORECASE)
javascript_pattern = re.compile(rb'\/(?:J|#74)(?:a|#97)(?:v|#118)(?:a|#97)(?:S|#83)(?:c|#99)(?:r|#114)(?:i|#105)(?:p|#112)(?:t|#116)\s*', re.IGNORECASE)
# Replace JavaScript name objects with empty strings
modified_pdf_bytes = js_pattern.sub(b'', pdf_bytes)
modified_pdf_bytes = javascript_pattern.sub(b'', modified_pdf_bytes)
# Create a new PDF file with "-removed.pdf" postfix
with open(pdf_file_path, 'wb') as pdf_file:
pdf_file.write(modified_pdf_bytes)
# ----------------------------------NOT UPDATE YET----------------------------------
print(f"Modified PDF saved as {output_file_path}")
pop_window = tk.Toplevel(root) # Create a new top-level window
pop_label = tk.Label(pop_window, text=f"Modified PDF saved as {output_file_path}")
pop_label.pack()
# Buttons for remove, show, and exit
remove_js_button = tk.Button(target_frame, text="Remove JS", command = remove_js)
show_js_button = tk.Button(target_frame, text="Show JS", command = show_js_code)
show_loop_button = tk.Button(target_frame, text="Show Infinite object Loop", command = show_loop)
# remove_loop_button = tk.Button(root, text="Break Infinite object Loop", command = "")
show_bomb_button = tk.Button(target_frame, text="Show Deflate Bomb", command = show_bomb)
remove_bomb_button = tk.Button(target_frame, text="Remove Deflate Bomb", command = remove_bomb)
remove_all_button = tk.Button(target_frame, text="Remove All", command = remove_all ) # remove_all
exit_button = tk.Button(target_frame, text="Exit", command=exit_window)
if path.endswith(".pdf"):
drop_and_display(path, cover_label, file_label, js_label, infinite_loop_label, deflate_bomb_label, no_dos_label,broken_label, remove_js_button, show_js_button,show_loop_button, remove_bomb_button, show_bomb_button, remove_all_button, exit_button)
else:
# Handle the case where the provided argument is not a PDF file
file_label = tk.Label(root, text=f"Invalid file format. Please drag and drop a PDF file.")
file_label.pack(anchor="center")
# Add an icon to the window
root.iconbitmap(f"shield_icon.ico")
# Disable window resizing
root.resizable(False, False)
# Run the Tkinter main loop
root.mainloop()