diff --git a/unblob/cli.py b/unblob/cli.py index 4af43a8a57..15fd16496e 100644 --- a/unblob/cli.py +++ b/unblob/cli.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 import sys from pathlib import Path -from typing import List, Optional, Tuple +from typing import Iterable, List, Optional, Tuple import click from structlog import get_logger @@ -16,6 +16,7 @@ from .processing import ( DEFAULT_DEPTH, DEFAULT_PROCESS_NUM, + DEFAULT_SKIP_MAGIC, ExtractionConfig, process_file, ) @@ -116,6 +117,16 @@ def __init__( help="Load plugins from the provided path.", show_default=True, ) +@click.option( + "-S", + "--skip-magic", + "skip_magic", + type=click.STRING, + default=DEFAULT_SKIP_MAGIC, + help="Skip processing files with given magic prefix", + show_default=True, + multiple=True, +) @click.option( "-p", "--process-num", @@ -146,6 +157,7 @@ def cli( extract_root: Path, depth: int, entropy_depth: int, + skip_magic: Iterable[str], process_num: int, keep_extracted_chunks: bool, verbose: int, @@ -164,6 +176,7 @@ def cli( max_depth=depth, entropy_depth=entropy_depth, entropy_plot=bool(verbose >= 3), + skip_magic=skip_magic, process_num=process_num, handlers=handlers, keep_extracted_chunks=keep_extracted_chunks, diff --git a/unblob/processing.py b/unblob/processing.py index a52a0b0704..87be90ae51 100644 --- a/unblob/processing.py +++ b/unblob/processing.py @@ -3,9 +3,10 @@ import statistics from operator import attrgetter from pathlib import Path -from typing import List +from typing import Iterable, List import attr +import magic import plotext as plt from structlog import get_logger @@ -32,6 +33,7 @@ DEFAULT_DEPTH = 10 DEFAULT_PROCESS_NUM = multiprocessing.cpu_count() +DEFAULT_SKIP_MAGIC = ("ELF", "JPEG", "GIF", "PNG") @attr.define(kw_only=True) @@ -40,6 +42,7 @@ class ExtractionConfig: entropy_depth: int entropy_plot: bool = False max_depth: int = DEFAULT_DEPTH + skip_magic: Iterable[str] = DEFAULT_SKIP_MAGIC process_num: int = DEFAULT_PROCESS_NUM keep_extracted_chunks: bool = False handlers: Handlers = BUILTIN_HANDLERS @@ -146,6 +149,18 @@ def __init__( def process(self): logger.debug("Processing file", path=self.task.path, size=self.size) + detect = magic.detect_from_filename(self.task.path) + skip = any( + [detect.name.startswith(pattern) for pattern in self.config.skip_magic] + ) + if skip: + logger.debug( + "Skip file from processing based on file-magic", + magic=detect.name, + path=self.task.path, + ) + return + with self.task.path.open("rb") as file: all_chunks = search_chunks_by_priority( self.task.path, file, self.size, self.config.handlers, self.result