Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update doc for css validation #123

Merged
merged 9 commits into from
May 3, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 11 additions & 9 deletions clone.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,15 @@
class Cloner(object):
def __init__(self, root, max_depth, css_validate):
self.visited_urls = []
self.root, self.error_page = self.add_scheme(root)
self.root, self.error_page = self.add_scheme(root)
self.max_depth = max_depth
self.moved_root = None
if len(self.root.host) < 4:
sys.exit('invalid taget {}'.format(self.root.host))
self.target_path = '/opt/snare/pages/{}'.format(self.root.host)

if not os.path.exists(self.target_path):
os.mkdir(self.target_path)
os.mkdir(self.target_path)
self.css_validate = css_validate
self.new_urls = Queue()
self.meta = {}
Expand Down Expand Up @@ -152,7 +152,7 @@ async def get_body(self, session):
response = await session.get(current_url, headers={'Accept': 'text/html'})
content_type = response.content_type
data = await response.read()

except (aiohttp.ClientError, asyncio.TimeoutError) as client_error:
self.logger.error(client_error)
else:
Expand All @@ -165,8 +165,8 @@ async def get_body(self, session):
data = str(soup).encode()
with open(os.path.join(self.target_path, hash_name), 'wb') as index_fh:
index_fh.write(data)
if content_type == 'text/css':
css = cssutils.parseString(data, validate=self.css_validate)
if content_type == 'text/css':
css = cssutils.parseString(data, validate=self.css_validate)
for carved_url in cssutils.getUrls(css):
if carved_url.startswith('data'):
continue
Expand All @@ -191,15 +191,16 @@ async def run(self):
session = aiohttp.ClientSession()
try:
await self.new_urls.put((self.root, 0))
await self.new_urls.put((self.error_page,0))
await self.new_urls.put((self.error_page, 0))
await self.get_body(session)
except KeyboardInterrupt:
raise
finally:
with open(os.path.join(self.target_path, 'meta.json'), 'w') as mj:
json.dump(self.meta, mj)
await session.close()



def str_to_bool(v):
if v.lower() == 'true':
return True
Expand All @@ -208,6 +209,7 @@ def str_to_bool(v):
else:
raise argparse.ArgumentTypeError('Boolean value expected')


def main():
if os.getuid() != 0:
print('Clone has to be run as root!')
Expand All @@ -226,7 +228,7 @@ def main():
if args.log_path:
log_err = args.log_path + "clone.err"
else:
log_err = "/opt/snare/clone.err"
log_err = "/opt/snare/clone.err"
logger.Logger.create_clone_logger(log_err, __package__)
print("Error logs will be stored in {}\n".format(log_err))
try:
Expand All @@ -244,6 +246,6 @@ def main():
/ / / / / / / // |/ / __/ / /_/ /
/ /___ / /____ / /_/ // /| / /___/ _, _/
/_____//______//_____//_/ |_/_____/_/ |_|

""")
main()
1 change: 1 addition & 0 deletions docs/source/cloner.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ Description

* **target** -- url of website to be cloned
* **max--depth** -- maximum depth of the web-pages desired to be cloned (optional), default: full depth of the site
* **css--validate** -- set whether css validation is required (optional), default: None
20 changes: 11 additions & 9 deletions logger.py
Original file line number Diff line number Diff line change
@@ -1,49 +1,51 @@
import logging
import logging.handlers


class LevelFilter(logging.Filter):
"""Filters (lets through) all messages with level < LEVEL"""

def __init__(self, level):
self.level = level

def filter(self, record):
return record.levelno < self.level # "<" instead of "<=": since logger.setLevel is inclusive, this should be exclusive
return record.levelno < self.level
# "<" instead of "<=": since logger.setLevel is inclusive, this should be exclusive


class Logger:

@staticmethod
def create_logger(debug_filename, err_filename, logger_name):
logger = logging.getLogger(logger_name)
logger.setLevel(logging.DEBUG)
logger.propagate = False
formatter = logging.Formatter(
fmt='%(asctime)s %(levelname)s:%(name)s:%(funcName)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
fmt='%(asctime)s %(levelname)s:%(name)s:%(funcName)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S')

# ERROR log to 'snare.err'
error_log_handler = logging.handlers.RotatingFileHandler(err_filename, encoding='utf-8')
error_log_handler.setLevel(logging.ERROR)
error_log_handler.setFormatter(formatter)
logger.addHandler(error_log_handler)

# DEBUG log to 'snare.log'
debug_log_handler = logging.handlers.RotatingFileHandler(debug_filename, encoding='utf-8')
debug_log_handler.setLevel(logging.DEBUG)
debug_log_handler.setFormatter(formatter)
max_level_filter = LevelFilter(logging.ERROR)
debug_log_handler.addFilter(max_level_filter)
logger.addHandler(debug_log_handler)

return logger

@staticmethod
def create_clone_logger(err_filename, logger_name):
logger = logging.getLogger(logger_name)
formatter = logging.Formatter(
fmt='%(asctime)s %(levelname)s:%(name)s:%(funcName)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
fmt='%(asctime)s %(levelname)s:%(name)s:%(funcName)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
# ERROR log to 'clone.err'
error_log_handler = logging.handlers.RotatingFileHandler(err_filename, encoding='utf-8')
error_log_handler.setLevel(logging.ERROR)
error_log_handler.setFormatter(formatter)
logger.addHandler(error_log_handler)

18 changes: 10 additions & 8 deletions snare.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def __init__(self, meta, run_args, debug=False, keep_alive=75, **kwargs):
self.dir = '/opt/snare/pages/{}'.format(run_args.page_dir)

self.meta = meta

self.logger = logging.getLogger(__name__)

self.sroute = StaticRoute(
Expand Down Expand Up @@ -223,9 +223,11 @@ async def parse_tanner_response(self, requested_name, detection):
content = None
status_code = 200
headers = {}
p = re.compile('/+') # Creating a regex object for the pattern of multiple contiguous forward slashes
requested_name = p.sub('/', requested_name) # Substituting all occurrences of the pattern with single forward slash

# Creating a regex object for the pattern of multiple contiguous forward slashes
p = re.compile('/+')
# Substituting all occurrences of the pattern with single forward slash
requested_name = p.sub('/', requested_name)

if detection['type'] == 1:
query_start = requested_name.find('?')
if query_start != -1:
Expand All @@ -235,7 +237,7 @@ async def parse_tanner_response(self, requested_name, detection):
requested_name = self.run_args.index_page
try:
if requested_name[-1] == '/':
requested_name = requested_name[:-1]
requested_name = requested_name[:-1]
requested_name = unquote(requested_name)
file_name = self.meta[requested_name]['hash']
content_type = self.meta[requested_name]['content_type']
Expand All @@ -248,7 +250,7 @@ async def parse_tanner_response(self, requested_name, detection):
with open(path, 'rb') as fh:
content = fh.read()
content = await self.handle_html_content(content)

else:
path = os.path.join(self.dir, file_name)
if os.path.isfile(path):
Expand Down Expand Up @@ -464,9 +466,9 @@ async def check_tanner():
base_page_path = '/opt/snare/pages/'
config = configparser.ConfigParser()
config.read(os.path.join(base_path, args.config))

log_debug = args.log_dir + "snare.log"
log_err = args.log_dir + "snare.err"
log_err = args.log_dir + "snare.err"
logger.Logger.create_logger(log_debug, log_err, __package__)

if args.list_pages:
Expand Down