From 4116c552cc2bc245bfd7a27a81974e8ec54396dc Mon Sep 17 00:00:00 2001 From: wasbt <36480372+wasbt@users.noreply.github.com> Date: Fri, 10 May 2024 13:38:23 +0100 Subject: [PATCH] Update pdf_annot_urls.py The updated script enhances PDF analysis capabilities by integrating additional functionalities to identify potential security threats in PDF files 1.Blacklist IP Detection : On known DNS Blacklist (DNSBL) servers. 2.Comprehensive URL Analysis: Building upon its predecessor, the script now extends its scrutiny to encompass multiple URLs within a single PDF annotation. By parsing and analyzing each URL individually. 3.Efficient Multithreading. 4.Configurable Parameters: The script allows for customization through configurable parameters such as the list of DNSBL servers and malicious TLDs. --- modules/signatures/all/pdf_annot_urls.py | 179 +++++++++++++++++++---- 1 file changed, 148 insertions(+), 31 deletions(-) diff --git a/modules/signatures/all/pdf_annot_urls.py b/modules/signatures/all/pdf_annot_urls.py index 740e0370..5144206b 100644 --- a/modules/signatures/all/pdf_annot_urls.py +++ b/modules/signatures/all/pdf_annot_urls.py @@ -1,4 +1,4 @@ -# Copyright (C) 2015 Optiv, Inc. (brad.spengler@optiv.com) +# Copyright (C) 2024 Wassime BATTA # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -13,39 +13,156 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -from lib.cuckoo.common.abstracts import Signature +import socket, os, asyncio +from urllib.parse import urlparse, parse_qs +from lib.cuckoo.common.abstracts import Signature, CUCKOO_ROOT +import threading +def is_blacklisted(target, dnsbl_servers): + try: + ip_address = socket.gethostbyname(target) + for server in dnsbl_servers: + query = '.'.join(reversed(str(ip_address).split("."))) + "." + server + try: + threading.Thread(target=socket.gethostbyname, args=(query,)).start() + return True, server # Found blacklisted server + except socket.error: + pass + return False, None # No blacklisted server found + except socket.gaierror: + return "Invalid domain or IP address.", None -class PDF_Annot_URLs(Signature): - name = "pdf_annot_urls" - description = "The PDF contains a Link Annotation to a compressed archive or executable file" - severity = 3 + +def extract_domains(url): + parsed_url = urlparse(url) + domains = set() + if parsed_url.netloc: + domains.add(parsed_url.netloc) + query_params = parse_qs(parsed_url.query) + for param_values in query_params.values(): + for value in param_values: + param_url = urlparse(value) + if param_url.netloc: + domains.add(param_url.netloc) + return domains + + +class PDF_Annot_URLs_Checker(Signature): + name = "pdf_annot_urls_checker" + description = "The PDF contains a Link Annotation" + severity = 2 # Default severity categories = ["static"] - authors = ["Optiv"] - minimum = "1.3" + authors = ["Wassime BATTA"] + minimum = "0.5" + + filter_analysistypes = set(["file","static"]) + + malicious_tlds_file = os.path.join(CUCKOO_ROOT, "data/malicioustlds.txt") + + dnsbl_servers = [ + "zen.spamhaus.org", + "dnsbl.sorbs.net", + "bl.spamcop.net", + "cbl.abuseat.org", + "b.barracudacentral.org", + "dnsbl-1.uceprotect.net", + "dnsbl-2.uceprotect.net", + "dnsbl-3.uceprotect.net", + "dnsbl.dronebl.org", + "noptr.spamrats.com", + "multi.surbl.org", + "psbl.surriel.com", + "dnsbl.invaluement.com", + "dyna.spamrats.com", + "spam.spamrats.com", + "dul.dnsbl.sorbs.net", + "dynip.rothen.com", + "spamsources.fabel.dk", + "truncate.gbudb.net", + "db.wpbl.info", + "dnsbl.zapbl.net", + "combined.rbl.msrbl.net", + "tor.dan.me.uk", + "relays.nether.net", + "rbl.efnetrbl.org", + "bl.kundenserver.de", + "rbl.interserver.net", + "rbl.rbldns.ru", + "all.rbl.jp", + "sbl.spamhaus.org", + "xbl.spamhaus.org", + "pbl.spamhaus.org", + "dnsbl-4.uceprotect.net", + "dnsbl-5.uceprotect.net", + "dnsbl-6.uceprotect.net", + "spamrbl.imp.ch", + "bogons.cymru.com", + "rbl.realtimeblacklist.com", + "http.dnsbl.sorbs.net", + ] + + def __init__(self, *args, **kwargs): + super(PDF_Annot_URLs_Checker, self).__init__(*args, **kwargs) + self.malicious_tlds = self.load_malicious_tlds() - filter_analysistypes = set(["file"]) + def load_malicious_tlds(self): + malicious_tlds = set() + with open(self.malicious_tlds_file, "r") as f: + for line in f: + line = line.strip() + if line.startswith("."): + malicious_tlds.add(line) + return malicious_tlds def run(self): - found_URLs = False - if "static" in self.results and "pdf" in self.results["static"]: - if "PDF" in self.results["target"]["file"].get("type", ""): - if "Annot_URLs" in self.results["static"]["pdf"]: - for entry in self.results["static"]["pdf"]["Annot_URLs"]: - entrylower = entry.lower() - if entrylower.endswith( - (".zip", ".exe", ".msi", ".bat", ".scr", ".rar", ".com") - ) and not entrylower.startswith( - "mailto:" - ): # skip mailto: as it can't add attachments - skip = False - # skip triggering on http:// and https:// links that don't have anything after the domain name - # so http://foo.com will be skipped, but http://foo.com/malware.com will not be - if entrylower.startswith("http://") and not entrylower.find("/", 8): - skip = True - elif entrylower.startswith("https://") and not entrylower.find("/", 9): - skip = True - if skip: - self.data.append({"url": entry}) - found_URLs = True - return found_URLs + found_malicious_extension = False + found_malicious_domain = False + found_domain_only = False + found_blacklist_ip = False + suspect = False + + if "PDF" in self.results["target"]["file"].get("type", ""): + if "Annot_URLs" in self.results["target"]["file"]["pdf"]: + for entry in self.results["target"]["file"]["pdf"]["Annot_URLs"]: + entry_lower = entry.lower() + self.data.append({"url": entry}) + if entry_lower.endswith((".exe", ".zip", ".rar", ".bat", ".cmd", ".js", ".jse", ".vbs", ".vbe", ".ps1", ".psm1", ".sh")) \ + and not entry_lower.startswith("mailto:"): + found_malicious_extension = True + + if entry_lower.startswith("http://") or entry_lower.startswith("https://"): + domain_start = entry_lower.find("//") + 2 + domain_end = entry_lower.find("/", domain_start) + if domain_end == -1: + domain = entry_lower[domain_start:] + else: + domain = entry_lower[domain_start:domain_end] + + for malicious_tld in self.malicious_tlds: + if domain.endswith(malicious_tld): + found_malicious_domain = True + break + else: + # If no malicious TLDs detected, set found_domain_only to True + targets = extract_domains(entry_lower) + for target in targets: + blacklisted_server, server = is_blacklisted(target, self.dnsbl_servers) + if blacklisted_server: + found_blacklist_ip = True + self.data.append({"blacklisted": f"The domain or IP address {target} is blacklisted on the following server: {server} "}) + #break # Stop checking once blacklisted IP is found + #print ( blacklisted_server) + #else: + # print(f"The domain or IP address {target} is not blacklisted.") + + + if found_malicious_domain or found_malicious_extension or found_blacklist_ip : + self.severity = 6 + self.description = "The PDF contains a Malicious Link Annotation" + suspect = True + elif found_domain_only: + self.severity = 2 + self.description = "The PDF contains a Link Annotation" + suspect = True + + return suspect