diff --git a/clone.py b/clone.py index c7b1e0fc..27fc5b11 100644 --- a/clone.py +++ b/clone.py @@ -34,7 +34,7 @@ class Cloner(object): def __init__(self, root, max_depth, css_validate): self.visited_urls = [] - self.root, self.error_page = self.add_scheme(root) + self.root, self.error_page = self.add_scheme(root) self.max_depth = max_depth self.moved_root = None if len(self.root.host) < 4: @@ -42,7 +42,7 @@ def __init__(self, root, max_depth, css_validate): self.target_path = '/opt/snare/pages/{}'.format(self.root.host) if not os.path.exists(self.target_path): - os.mkdir(self.target_path) + os.mkdir(self.target_path) self.css_validate = css_validate self.new_urls = Queue() self.meta = {} @@ -152,7 +152,7 @@ async def get_body(self, session): response = await session.get(current_url, headers={'Accept': 'text/html'}) content_type = response.content_type data = await response.read() - + except (aiohttp.ClientError, asyncio.TimeoutError) as client_error: self.logger.error(client_error) else: @@ -165,8 +165,8 @@ async def get_body(self, session): data = str(soup).encode() with open(os.path.join(self.target_path, hash_name), 'wb') as index_fh: index_fh.write(data) - if content_type == 'text/css': - css = cssutils.parseString(data, validate=self.css_validate) + if content_type == 'text/css': + css = cssutils.parseString(data, validate=self.css_validate) for carved_url in cssutils.getUrls(css): if carved_url.startswith('data'): continue @@ -191,7 +191,7 @@ async def run(self): session = aiohttp.ClientSession() try: await self.new_urls.put((self.root, 0)) - await self.new_urls.put((self.error_page,0)) + await self.new_urls.put((self.error_page, 0)) await self.get_body(session) except KeyboardInterrupt: raise @@ -199,7 +199,8 @@ async def run(self): with open(os.path.join(self.target_path, 'meta.json'), 'w') as mj: json.dump(self.meta, mj) await session.close() - + + def str_to_bool(v): if v.lower() == 'true': return True @@ -208,6 +209,7 @@ def str_to_bool(v): else: raise argparse.ArgumentTypeError('Boolean value expected') + def main(): if os.getuid() != 0: print('Clone has to be run as root!') @@ -226,7 +228,7 @@ def main(): if args.log_path: log_err = args.log_path + "clone.err" else: - log_err = "/opt/snare/clone.err" + log_err = "/opt/snare/clone.err" logger.Logger.create_clone_logger(log_err, __package__) print("Error logs will be stored in {}\n".format(log_err)) try: @@ -244,6 +246,6 @@ def main(): / / / / / / / // |/ / __/ / /_/ / / /___ / /____ / /_/ // /| / /___/ _, _/ /_____//______//_____//_/ |_/_____/_/ |_| - + """) main() diff --git a/docs/source/cloner.rst b/docs/source/cloner.rst index 3011f6bc..91d38f2f 100644 --- a/docs/source/cloner.rst +++ b/docs/source/cloner.rst @@ -12,3 +12,4 @@ Description * **target** -- url of website to be cloned * **max--depth** -- maximum depth of the web-pages desired to be cloned (optional), default: full depth of the site +* **css--validate** -- set whether css validation is required (optional), default: None diff --git a/logger.py b/logger.py index e0b146e1..4f877893 100644 --- a/logger.py +++ b/logger.py @@ -1,6 +1,7 @@ import logging import logging.handlers + class LevelFilter(logging.Filter): """Filters (lets through) all messages with level < LEVEL""" @@ -8,24 +9,26 @@ def __init__(self, level): self.level = level def filter(self, record): - return record.levelno < self.level # "<" instead of "<=": since logger.setLevel is inclusive, this should be exclusive + return record.levelno < self.level + # "<" instead of "<=": since logger.setLevel is inclusive, this should be exclusive + class Logger: - + @staticmethod def create_logger(debug_filename, err_filename, logger_name): logger = logging.getLogger(logger_name) logger.setLevel(logging.DEBUG) logger.propagate = False formatter = logging.Formatter( - fmt='%(asctime)s %(levelname)s:%(name)s:%(funcName)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S') - + fmt='%(asctime)s %(levelname)s:%(name)s:%(funcName)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S') + # ERROR log to 'snare.err' error_log_handler = logging.handlers.RotatingFileHandler(err_filename, encoding='utf-8') error_log_handler.setLevel(logging.ERROR) error_log_handler.setFormatter(formatter) logger.addHandler(error_log_handler) - + # DEBUG log to 'snare.log' debug_log_handler = logging.handlers.RotatingFileHandler(debug_filename, encoding='utf-8') debug_log_handler.setLevel(logging.DEBUG) @@ -33,17 +36,16 @@ def create_logger(debug_filename, err_filename, logger_name): max_level_filter = LevelFilter(logging.ERROR) debug_log_handler.addFilter(max_level_filter) logger.addHandler(debug_log_handler) - + return logger - + @staticmethod def create_clone_logger(err_filename, logger_name): logger = logging.getLogger(logger_name) formatter = logging.Formatter( - fmt='%(asctime)s %(levelname)s:%(name)s:%(funcName)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S') + fmt='%(asctime)s %(levelname)s:%(name)s:%(funcName)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S') # ERROR log to 'clone.err' error_log_handler = logging.handlers.RotatingFileHandler(err_filename, encoding='utf-8') error_log_handler.setLevel(logging.ERROR) error_log_handler.setFormatter(formatter) logger.addHandler(error_log_handler) - \ No newline at end of file diff --git a/snare.py b/snare.py index 2894a53e..57f509f5 100644 --- a/snare.py +++ b/snare.py @@ -55,7 +55,7 @@ def __init__(self, meta, run_args, debug=False, keep_alive=75, **kwargs): self.dir = '/opt/snare/pages/{}'.format(run_args.page_dir) self.meta = meta - + self.logger = logging.getLogger(__name__) self.sroute = StaticRoute( @@ -223,9 +223,11 @@ async def parse_tanner_response(self, requested_name, detection): content = None status_code = 200 headers = {} - p = re.compile('/+') # Creating a regex object for the pattern of multiple contiguous forward slashes - requested_name = p.sub('/', requested_name) # Substituting all occurrences of the pattern with single forward slash - + # Creating a regex object for the pattern of multiple contiguous forward slashes + p = re.compile('/+') + # Substituting all occurrences of the pattern with single forward slash + requested_name = p.sub('/', requested_name) + if detection['type'] == 1: query_start = requested_name.find('?') if query_start != -1: @@ -235,7 +237,7 @@ async def parse_tanner_response(self, requested_name, detection): requested_name = self.run_args.index_page try: if requested_name[-1] == '/': - requested_name = requested_name[:-1] + requested_name = requested_name[:-1] requested_name = unquote(requested_name) file_name = self.meta[requested_name]['hash'] content_type = self.meta[requested_name]['content_type'] @@ -248,7 +250,7 @@ async def parse_tanner_response(self, requested_name, detection): with open(path, 'rb') as fh: content = fh.read() content = await self.handle_html_content(content) - + else: path = os.path.join(self.dir, file_name) if os.path.isfile(path): @@ -464,9 +466,9 @@ async def check_tanner(): base_page_path = '/opt/snare/pages/' config = configparser.ConfigParser() config.read(os.path.join(base_path, args.config)) - + log_debug = args.log_dir + "snare.log" - log_err = args.log_dir + "snare.err" + log_err = args.log_dir + "snare.err" logger.Logger.create_logger(log_debug, log_err, __package__) if args.list_pages: