diff --git a/CHANGELOG.md b/CHANGELOG.md index 76eb630ad..a0c4717d9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,19 +3,22 @@ ## master (unreleased) ### New Features + +- add warning for dynamic .NET samples #1864 @v1bh475u - add lint for detecting duplicate features in capa-rules #2250 @v1bh475u - add span-of-calls scope to match features against a across a sliding window of API calls within a thread @williballenthin #2532 - add lint to catch rules that depend on other rules with impossible scope @williballenthin #2124 ### Breaking Changes +- remove `is_static_limitation` method from `capa.rules.Rule` - add span-of-calls scope to rule format - capabilities functions return dataclasses instead of tuples ### New Rules (3) -- data-manipulation/encryption/rsa/encrypt-data-using-rsa-via-embedded-library Ana06 -- data-manipulation/encryption/use-bigint-function Ana06 +- data-manipulation/encryption/rsa/encrypt-data-using-rsa-via-embedded-library @Ana06 +- data-manipulation/encryption/use-bigint-function @Ana06 - nursery/dynamic-add-veh wballenthin@google.com - diff --git a/capa/capabilities/common.py b/capa/capabilities/common.py index 9acbd68f5..9a6e13b05 100644 --- a/capa/capabilities/common.py +++ b/capa/capabilities/common.py @@ -19,7 +19,7 @@ from typing import Optional from dataclasses import dataclass -from capa.rules import Scope, RuleSet +from capa.rules import Rule, Scope, RuleSet from capa.engine import FeatureSet, MatchResults from capa.features.address import NO_ADDRESS from capa.render.result_document import LibraryFunction, StaticFeatureCounts, DynamicFeatureCounts @@ -58,28 +58,6 @@ def find_file_capabilities( return FileCapabilities(features, matches, len(file_features)) -def has_file_limitation(rules: RuleSet, capabilities: MatchResults, is_standalone=True) -> bool: - file_limitation_rules = list(filter(lambda r: r.is_file_limitation_rule(), rules.rules.values())) - - for file_limitation_rule in file_limitation_rules: - if file_limitation_rule.name not in capabilities: - continue - - logger.warning("-" * 80) - for line in file_limitation_rule.meta.get("description", "").split("\n"): - logger.warning(" %s", line) - logger.warning(" Identified via rule: %s", file_limitation_rule.name) - if is_standalone: - logger.warning(" ") - logger.warning(" Use -v or -vv if you really want to see the capabilities identified by capa.") - logger.warning("-" * 80) - - # bail on first file limitation - return True - - return False - - @dataclass class Capabilities: matches: MatchResults @@ -100,3 +78,40 @@ def find_capabilities(ruleset: RuleSet, extractor: FeatureExtractor, disable_pro return find_dynamic_capabilities(ruleset, extractor, disable_progress=disable_progress, **kwargs) raise ValueError(f"unexpected extractor type: {extractor.__class__.__name__}") + + +def has_limitation(rules: list, capabilities: Capabilities | FileCapabilities, is_standalone: bool) -> bool: + + for rule in rules: + if rule.name not in capabilities.matches: + continue + logger.warning("-" * 80) + for line in rule.meta.get("description", "").split("\n"): + logger.warning(" %s", line) + logger.warning(" Identified via rule: %s", rule.name) + if is_standalone: + logger.warning(" ") + logger.warning(" Use -v or -vv if you really want to see the capabilities identified by capa.") + logger.warning("-" * 80) + + # bail on first file limitation + return True + return False + + +def is_static_limitation_rule(r: Rule) -> bool: + return r.meta.get("namespace", "") == "internal/limitation/static" + + +def has_static_limitation(rules: RuleSet, capabilities: Capabilities | FileCapabilities, is_standalone=True) -> bool: + file_limitation_rules = list(filter(lambda r: is_static_limitation_rule(r), rules.rules.values())) + return has_limitation(file_limitation_rules, capabilities, is_standalone) + + +def is_dynamic_limitation_rule(r: Rule) -> bool: + return r.meta.get("namespace", "") == "internal/limitation/dynamic" + + +def has_dynamic_limitation(rules: RuleSet, capabilities: Capabilities | FileCapabilities, is_standalone=True) -> bool: + dynamic_limitation_rules = list(filter(lambda r: is_dynamic_limitation_rule(r), rules.rules.values())) + return has_limitation(dynamic_limitation_rules, capabilities, is_standalone) diff --git a/capa/ghidra/capa_explorer.py b/capa/ghidra/capa_explorer.py index b07b34ba8..a5c135573 100644 --- a/capa/ghidra/capa_explorer.py +++ b/capa/ghidra/capa_explorer.py @@ -247,7 +247,7 @@ def get_capabilities(): capabilities = capa.capabilities.common.find_capabilities(rules, extractor, True) - if capa.capabilities.common.has_file_limitation(rules, capabilities.matches, is_standalone=False): + if capa.capabilities.common.has_static_limitation(rules, capabilities, is_standalone=False): popup("capa explorer encountered warnings during analysis. Please check the console output for more information.") # type: ignore [name-defined] # noqa: F821 logger.info("capa encountered warnings during analysis") diff --git a/capa/ghidra/capa_ghidra.py b/capa/ghidra/capa_ghidra.py index 8d095347d..462aa1b64 100644 --- a/capa/ghidra/capa_ghidra.py +++ b/capa/ghidra/capa_ghidra.py @@ -87,7 +87,7 @@ def run_headless(): meta.analysis.library_functions = capabilities.library_functions meta.analysis.layout = capa.loader.compute_layout(rules, extractor, capabilities.matches) - if capa.capabilities.common.has_file_limitation(rules, capabilities.matches, is_standalone=True): + if capa.capabilities.common.has_static_limitation(rules, capabilities, is_standalone=True): logger.info("capa encountered warnings during analysis") if args.json: @@ -137,7 +137,7 @@ def run_ui(): meta.analysis.library_functions = capabilities.library_functions meta.analysis.layout = capa.loader.compute_layout(rules, extractor, capabilities.matches) - if capa.capabilities.common.has_file_limitation(rules, capabilities.matches, is_standalone=False): + if capa.capabilities.common.has_static_limitation(rules, capabilities, is_standalone=False): logger.info("capa encountered warnings during analysis") if verbose == "vverbose": diff --git a/capa/ida/plugin/form.py b/capa/ida/plugin/form.py index d82f76db5..36d104c89 100644 --- a/capa/ida/plugin/form.py +++ b/capa/ida/plugin/form.py @@ -820,7 +820,7 @@ def slot_progress_feature_extraction(text): capa.ida.helpers.inform_user_ida_ui("capa encountered file type warnings during analysis") - if capa.capabilities.common.has_file_limitation(ruleset, capabilities.matches, is_standalone=False): + if capa.capabilities.common.has_static_limitation(ruleset, capabilities, is_standalone=False): capa.ida.helpers.inform_user_ida_ui("capa encountered file limitation warnings during analysis") except Exception as e: logger.exception("Failed to check for file limitations (error: %s)", e) diff --git a/capa/main.py b/capa/main.py index f962e5708..34ea5ad1a 100644 --- a/capa/main.py +++ b/capa/main.py @@ -99,7 +99,13 @@ FORMAT_BINJA_DB, FORMAT_BINEXPORT2, ) -from capa.capabilities.common import Capabilities, find_capabilities, has_file_limitation, find_file_capabilities +from capa.capabilities.common import ( + Capabilities, + find_capabilities, + has_static_limitation, + find_file_capabilities, + has_dynamic_limitation, +) from capa.features.extractors.base_extractor import ( ProcessFilter, FunctionFilter, @@ -747,11 +753,12 @@ def get_file_extractors_from_cli(args, input_format: str) -> list[FeatureExtract raise ShouldExitError(E_INVALID_FILE_TYPE) from e -def find_file_limitations_from_cli(args, rules: RuleSet, file_extractors: list[FeatureExtractor]) -> bool: +def find_static_limitations_from_cli(args, rules: RuleSet, file_extractors: list[FeatureExtractor]) -> bool: """ args: args: The parsed command line arguments from `install_common_args`. + Only file-scoped feature extractors like pefile are used. Dynamic feature extractors can handle packed samples and do not need to be considered here. raises: @@ -770,7 +777,7 @@ def find_file_limitations_from_cli(args, rules: RuleSet, file_extractors: list[F # file limitations that rely on non-file scope won't be detected here. # nor on FunctionName features, because pefile doesn't support this. - found_file_limitation = has_file_limitation(rules, pure_file_capabilities.matches) + found_file_limitation = has_static_limitation(rules, pure_file_capabilities) if found_file_limitation: # bail if capa encountered file limitation e.g. a packed binary # do show the output in verbose mode, though. @@ -780,6 +787,31 @@ def find_file_limitations_from_cli(args, rules: RuleSet, file_extractors: list[F return found_file_limitation +def find_dynamic_limitations_from_cli(args, rules: RuleSet, file_extractors: list[FeatureExtractor]) -> bool: + """ + Does the dynamic analysis describe some trace that we may not support well? + For example, .NET samples detonated in a sandbox, which may rely on different API patterns than we currently describe in our rules. + + args: + args: The parsed command line arguments from `install_common_args`. + + raises: + ShouldExitError: if the program is invoked incorrectly and should exit.. + """ + found_dynamic_limitation = False + for file_extractor in file_extractors: + pure_dynamic_capabilities = find_file_capabilities(rules, file_extractor, {}) + found_dynamic_limitation = has_dynamic_limitation(rules, pure_dynamic_capabilities) + + if found_dynamic_limitation: + # bail if capa encountered file limitation e.g. a dotnet sample is detected + # do show the output in verbose mode, though. + if not (args.verbose or args.vverbose or args.json): + logger.debug("file limitation short circuit, won't analyze fully.") + raise ShouldExitError(E_FILE_LIMITATION) + return found_dynamic_limitation + + def get_signatures_from_cli(args, input_format: str, backend: str) -> list[Path]: if backend != BACKEND_VIV: logger.debug("skipping library code matching: only supported by the vivisect backend") @@ -964,11 +996,13 @@ def main(argv: Optional[list[str]] = None): ensure_input_exists_from_cli(args) input_format = get_input_format_from_cli(args) rules = get_rules_from_cli(args) - found_file_limitation = False + found_limitation = False + file_extractors = get_file_extractors_from_cli(args, input_format) if input_format in STATIC_FORMATS: # only static extractors have file limitations - file_extractors = get_file_extractors_from_cli(args, input_format) - found_file_limitation = find_file_limitations_from_cli(args, rules, file_extractors) + found_limitation = find_static_limitations_from_cli(args, rules, file_extractors) + if input_format in DYNAMIC_FORMATS: + found_limitation = find_dynamic_limitations_from_cli(args, rules, file_extractors) except ShouldExitError as e: return e.status_code @@ -1002,8 +1036,9 @@ def main(argv: Optional[list[str]] = None): ) meta.analysis.layout = capa.loader.compute_layout(rules, extractor, capabilities.matches) - if isinstance(extractor, StaticFeatureExtractor) and found_file_limitation: + if found_limitation: # bail if capa's static feature extractor encountered file limitation e.g. a packed binary + # or capa's dynamic feature extractor encountered some limitation e.g. a dotnet sample # do show the output in verbose mode, though. if not (args.verbose or args.vverbose or args.json): return E_FILE_LIMITATION @@ -1056,7 +1091,7 @@ def ida_main(): meta.analysis.feature_counts = capabilities.feature_counts meta.analysis.library_functions = capabilities.library_functions - if has_file_limitation(rules, capabilities.matches, is_standalone=False): + if has_static_limitation(rules, capabilities, is_standalone=False): capa.ida.helpers.inform_user_ida_ui("capa encountered warnings during analysis") colorama.init(strip=True) @@ -1094,7 +1129,7 @@ def ghidra_main(): meta.analysis.feature_counts = capabilities.feature_counts meta.analysis.library_functions = capabilities.library_functions - if has_file_limitation(rules, capabilities.matches, is_standalone=False): + if has_static_limitation(rules, capabilities, is_standalone=False): logger.info("capa encountered warnings during analysis") print(capa.render.default.render(meta, rules, capabilities.matches)) diff --git a/capa/rules/__init__.py b/capa/rules/__init__.py index 7c1025256..87e12582e 100644 --- a/capa/rules/__init__.py +++ b/capa/rules/__init__.py @@ -966,9 +966,6 @@ def _extract_subscope_rules_rec(self, statement): for child in statement.get_children(): yield from self._extract_subscope_rules_rec(child) - def is_file_limitation_rule(self) -> bool: - return self.meta.get("namespace", "") == "internal/limitation/file" - def is_subscope_rule(self): return bool(self.meta.get("capa/subscope-rule", False)) diff --git a/scripts/show-capabilities-by-function.py b/scripts/show-capabilities-by-function.py index 5ae5c5d08..d2c736838 100644 --- a/scripts/show-capabilities-by-function.py +++ b/scripts/show-capabilities-by-function.py @@ -160,7 +160,7 @@ def main(argv=None): meta = capa.loader.collect_metadata(argv, args.input_file, input_format, os_, args.rules, extractor, capabilities) meta.analysis.layout = capa.loader.compute_layout(rules, extractor, capabilities.matches) - if capa.capabilities.common.has_file_limitation(rules, capabilities.matches): + if capa.capabilities.common.has_static_limitation(rules, capabilities): # bail if capa encountered file limitation e.g. a packed binary # do show the output in verbose mode, though. if not (args.verbose or args.vverbose or args.json):