Skip to content

Commit

Permalink
skip file from being processed based on their magic
Browse files Browse the repository at this point in the history
  • Loading branch information
Marton ILLES committed Mar 26, 2022
1 parent 3fae4be commit adbb4e6
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 0 deletions.
13 changes: 13 additions & 0 deletions unblob/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from .processing import (
DEFAULT_DEPTH,
DEFAULT_PROCESS_NUM,
DEFAULT_SKIP_MAGIC,
ExtractionConfig,
process_file,
)
Expand Down Expand Up @@ -116,6 +117,16 @@ def __init__(
help="Load plugins from the provided path.",
show_default=True,
)
@click.option(
"-S",
"--skip-magic",
"skip_magic",
type=click.STRING,
default=DEFAULT_SKIP_MAGIC,
help="Skip processing files with given magic prefix",
show_default=True,
multiple=True,
)
@click.option(
"-p",
"--process-num",
Expand Down Expand Up @@ -146,6 +157,7 @@ def cli(
extract_root: Path,
depth: int,
entropy_depth: int,
skip_magic: List[str],
process_num: int,
keep_extracted_chunks: bool,
verbose: int,
Expand All @@ -164,6 +176,7 @@ def cli(
max_depth=depth,
entropy_depth=entropy_depth,
entropy_plot=bool(verbose >= 3),
skip_magic=skip_magic,
process_num=process_num,
handlers=handlers,
keep_extracted_chunks=keep_extracted_chunks,
Expand Down
15 changes: 15 additions & 0 deletions unblob/processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from typing import List

import attr
import magic
import plotext as plt
from structlog import get_logger

Expand All @@ -32,6 +33,7 @@

DEFAULT_DEPTH = 10
DEFAULT_PROCESS_NUM = multiprocessing.cpu_count()
DEFAULT_SKIP_MAGIC = ["ELF", "JPEG", "GIF", "PNG"]


@attr.define(kw_only=True)
Expand All @@ -40,6 +42,7 @@ class ExtractionConfig:
entropy_depth: int
entropy_plot: bool = False
max_depth: int = DEFAULT_DEPTH
skip_magic: List[str] = DEFAULT_SKIP_MAGIC
process_num: int = DEFAULT_PROCESS_NUM
keep_extracted_chunks: bool = False
handlers: Handlers = BUILTIN_HANDLERS
Expand Down Expand Up @@ -146,6 +149,18 @@ def __init__(
def process(self):
logger.debug("Processing file", path=self.task.path, size=self.size)

detect = magic.detect_from_filename(self.task.path)
skip = any(
[detect.name.startswith(pattern) for pattern in self.config.skip_magic]
)
if skip:
logger.debug(
"Skip file from processing based on file-magic",
magic=detect.name,
path=self.task.path,
)
return

with self.task.path.open("rb") as file:
all_chunks = search_chunks_by_priority(
self.task.path, file, self.size, self.config.handlers, self.result
Expand Down

0 comments on commit adbb4e6

Please sign in to comment.