Skip to content

Commit

Permalink
Create env var for scancode.io file scan timeout #593
Browse files Browse the repository at this point in the history
    * The environment variable SCANCODEIO_SCAN_FILE_TIMEOUT can be set to control how much time is given to a file when scanning a codebase

Signed-off-by: Jono Yang <jyang@nexb.com>
  • Loading branch information
JonoYang committed Feb 1, 2023
1 parent 4fe368f commit 0a7c248
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 7 deletions.
11 changes: 7 additions & 4 deletions scancodeio/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,10 @@
# 37 bytes gives you a string of 50 characters
secret_key = secrets.token_urlsafe(37)
with open(ENV_FILE, "w") as f:
f.write(f"SECRET_KEY=\"{secret_key}\"\n")
f.write("SCANCODEIO_DB_ENGINE=\"django.db.backends.sqlite3\"\n")
f.write(f"SCANCODEIO_DB_NAME=\"{sqlite_db_location}\"\n")
f.write(f"SCANCODEIO_WORKSPACE_LOCATION=\"{workspace_location}\"\n")
f.write(f'SECRET_KEY="{secret_key}"\n')
f.write('SCANCODEIO_DB_ENGINE="django.db.backends.sqlite3"\n')
f.write(f'SCANCODEIO_DB_NAME="{sqlite_db_location}"\n')
f.write(f'SCANCODEIO_WORKSPACE_LOCATION="{workspace_location}"\n')

if not Path(ENV_FILE).exists():
ENV_FILE = ROOT_DIR(".env")
Expand Down Expand Up @@ -99,6 +99,9 @@
# Default limit for "most common" entries in QuerySets.
SCANCODEIO_MOST_COMMON_LIMIT = env.int("SCANCODEIO_MOST_COMMON_LIMIT", default=7)

# Default to 2 minutes
SCANCODEIO_SCAN_FILE_TIMEOUT = env.int("SCANCODEIO_SCAN_FILE_TIMEOUT", default=120)

# Application definition

INSTALLED_APPS = (
Expand Down
18 changes: 15 additions & 3 deletions scanpipe/pipes/scancode.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,9 @@ def get_resource_info(location):
return file_info


def _scan_resource(location, scanners, with_threading=True):
def _scan_resource(
location, scanners, timeout=scancode_cli.DEFAULT_TIMEOUT, with_threading=True
):
"""
Wraps the scancode-toolkit `scan_resource` method to support timeout on direct
scanner functions calls.
Expand All @@ -189,6 +191,7 @@ def _scan_resource(location, scanners, with_threading=True):
_, _, errors, _, results, _ = scancode_cli.scan_resource(
location_rid,
scanners,
timeout=timeout,
with_threading=with_threading,
)
return results, errors
Expand All @@ -201,13 +204,22 @@ def scan_file(location, with_threading=True):
Returns a dictionary of scan `results` and a list of `errors`.
"""
timeout = getattr(
settings, "SCANCODEIO_SCAN_FILE_TIMEOUT", scancode_cli.DEFAULT_TIMEOUT
)
print("timeout: " + str(timeout))
scanners = [
Scanner("copyrights", scancode_api.get_copyrights),
Scanner("licenses", partial(scancode_api.get_licenses, include_text=True)),
Scanner("emails", scancode_api.get_emails),
Scanner("urls", scancode_api.get_urls),
]
return _scan_resource(location, scanners, with_threading)
return _scan_resource(
location=location,
scanners=scanners,
timeout=timeout,
with_threading=with_threading,
)


def scan_for_package_data(location, with_threading=True):
Expand Down Expand Up @@ -423,7 +435,7 @@ def run_scancode(location, output_file, options, raise_on_error=False):
options_from_settings = getattr(settings, "SCANCODE_TOOLKIT_CLI_OPTIONS", [])
max_workers = get_max_workers(keep_available=1)

app_dir = os.environ.get('APPDIR')
app_dir = os.environ.get("APPDIR")
if app_dir:
# We are in an AppImage, and should look for scancode at usr/bin/scancode
scancode_executable_path = str(Path(app_dir) / "usr/bin/scancode")
Expand Down

0 comments on commit 0a7c248

Please sign in to comment.