Skip to content
This repository has been archived by the owner on Nov 14, 2023. It is now read-only.

Commit

Permalink
Added validation for URLs which used as remote data source (cvat-ai#4387
Browse files Browse the repository at this point in the history
)
  • Loading branch information
Andrey Zhavoronkov authored and mikhail-treskin committed Jun 22, 2022
1 parent f92db10 commit 99d3b9b
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Security
- Updated ELK to 6.8.23 which uses log4j 2.17.1 (<https://github.com/openvinotoolkit/cvat/pull/4206>)
- Added validation for URLs which used as remote data source (<https://github.com/openvinotoolkit/cvat/pull/4387>)

## \[1.7.0] - 2021-11-15

Expand Down
43 changes: 43 additions & 0 deletions cvat/apps/engine/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import itertools
import os
import sys
from rest_framework.serializers import ValidationError
import rq
import re
import shutil
Expand All @@ -14,6 +15,8 @@
from urllib import parse as urlparse
from urllib import request as urlrequest
import requests
import ipaddress
import dns.resolver
import django_rq

from django.conf import settings
Expand Down Expand Up @@ -203,13 +206,53 @@ def _validate_manifest(manifests, root_dir):
raise Exception('Invalid manifest was uploaded')
return None

def _validate_url(url):
def _validate_ip_address(ip_address):
if not ip_address.is_global:
raise ValidationError('Non public IP address \'{}\' is provided!'.format(ip_address))

ALLOWED_SCHEMES = ['http', 'https']

parsed_url = urlparse.urlparse(url)

if parsed_url.scheme not in ALLOWED_SCHEMES:
raise ValueError('Unsupported URL sheme: {}. Only http and https are supported'.format(parsed_url.scheme))

try:
ip_address = ipaddress.ip_address(parsed_url.hostname)
_validate_ip_address(ip_address)
except ValueError as _:
ip_v4_records = None
ip_v6_records = None
try:
ip_v4_records = dns.resolver.query(parsed_url.hostname, 'A')
for record in ip_v4_records:
_validate_ip_address(ipaddress.ip_address(record.to_text()))
except ValidationError:
raise
except Exception as e:
slogger.glob.info('Cannot get A record for domain \'{}\': {}'.format(parsed_url.hostname, e))

try:
ip_v6_records = dns.resolver.query(parsed_url.hostname, 'AAAA')
for record in ip_v6_records:
_validate_ip_address(ipaddress.ip_address(record.to_text()))
except ValidationError:
raise
except Exception as e:
slogger.glob.info('Cannot get AAAA record for domain \'{}\': {}'.format(parsed_url.hostname, e))

if not ip_v4_records and not ip_v6_records:
raise ValidationError('Cannot resolve IP address for domain \'{}\''.format(parsed_url.hostname))

def _download_data(urls, upload_dir):
job = rq.get_current_job()
local_files = {}
for url in urls:
name = os.path.basename(urlrequest.url2pathname(urlparse.urlparse(url).path))
if name in local_files:
raise Exception("filename collision: {}".format(name))
_validate_url(url)
slogger.glob.info("Downloading: {}".format(url))
job.meta['status'] = '{} is being downloaded..'.format(url)
job.save_meta()
Expand Down
1 change: 1 addition & 0 deletions cvat/requirements/base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,4 @@ datumaro==0.2.0 --no-binary=datumaro
urllib3>=1.26.5 # not directly required, pinned by Snyk to avoid a vulnerability
natsort==8.0.0
mistune>=2.0.1 # not directly required, pinned by Snyk to avoid a vulnerability
dnspython==2.2.0

0 comments on commit 99d3b9b

Please sign in to comment.