Skip to content

Commit

Permalink
Merge pull request #1308 from yogeshojha/1176-bug-error-creating-eyew…
Browse files Browse the repository at this point in the history
…itness-screenshots-with-a-non-standard-url

Added validators to validate URL fixes #1176
  • Loading branch information
yogeshojha authored Jul 14, 2024
2 parents 77afd9f + 9f4b741 commit 519547a
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 1 deletion.
3 changes: 2 additions & 1 deletion web/reNgine/common_func.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from dashboard.models import *
from startScan.models import *
from targetApp.models import *
from reNgine.utilities import is_valid_url


logger = get_task_logger(__name__)
Expand Down Expand Up @@ -334,7 +335,7 @@ def get_http_urls(
endpoints = [e for e in endpoints if e.is_alive]

# Grab only http_url from endpoint objects
endpoints = [e.http_url for e in endpoints]
endpoints = [e.http_url for e in endpoints if is_valid_url(e.http_url)]
if ignore_files: # ignore all files
extensions_path = f'{RENGINE_HOME}/fixtures/extensions.txt'
with open(extensions_path, 'r') as f:
Expand Down
28 changes: 28 additions & 0 deletions web/reNgine/utilities.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
import validators

from celery._state import get_current_task
from celery.utils.log import ColorFormatter
Expand Down Expand Up @@ -86,3 +87,30 @@ def replace_nulls(obj):
return {key: replace_nulls(value) for key, value in obj.items()}
else:
return obj


def is_valid_url(url, validate_only_http_scheme=True):
"""
Validate a URL/endpoint
Args:
url (str): The URL to validate.
validate_only_http_scheme (bool): If True, only validate HTTP/HTTPS URLs.
Returns:
bool: True if the URL is valid, False otherwise.
"""
# no urls returns false
if not url:
return False

# urls with space are not valid urls
if ' ' in url:
return False

if validators.url(url):
# check for scheme, for example ftp:// can be a valid url but may not be required to crawl etc
if validate_only_http_scheme:
return url.startswith('http://') or url.startswith('https://')
return True
return False

0 comments on commit 519547a

Please sign in to comment.