From c099b3d0939d8b7812e60f1d5adc4bcebfae3ee2 Mon Sep 17 00:00:00 2001 From: David Korczynski Date: Thu, 2 Jan 2025 10:22:32 -0800 Subject: [PATCH] core: avoid needing to specify language for analysis Signed-off-by: David Korczynski --- src/fuzz_introspector/commands.py | 3 ++ src/fuzz_introspector/frontends/oss_fuzz.py | 39 ++++----------------- src/fuzz_introspector/utils.py | 33 +++++++++++++++++ 3 files changed, 42 insertions(+), 33 deletions(-) diff --git a/src/fuzz_introspector/commands.py b/src/fuzz_introspector/commands.py index 9278b0033..9b82c017c 100644 --- a/src/fuzz_introspector/commands.py +++ b/src/fuzz_introspector/commands.py @@ -46,6 +46,9 @@ def correlate_binaries_to_logs(binaries_dir: str) -> int: def end_to_end(args) -> int: """Runs both frontend and backend.""" + if not args.language: + args.language = utils.detect_language(args.target_dir) + oss_fuzz.analyse_folder(args.language, args.target_dir, 'LLVMFuzzerTestOneInput') diff --git a/src/fuzz_introspector/frontends/oss_fuzz.py b/src/fuzz_introspector/frontends/oss_fuzz.py index 1994c0854..cca89f04b 100644 --- a/src/fuzz_introspector/frontends/oss_fuzz.py +++ b/src/fuzz_introspector/frontends/oss_fuzz.py @@ -190,40 +190,13 @@ def process_jvm_project(target_dir, entrypoint, out): f.write(f'Call tree\n{calltree}') -def process_rust_project(target_dir, out): - """Process a project in Rust based language""" - # Extract rust source files - logger.info('Going Rust route') - source_files = [] - source_files = frontend_rust.capture_source_files_in_tree(target_dir) - - # Process tree sitter for rust source files - logger.info('Found %d files to include in analysis', len(source_files)) - logger.info('Loading tree-sitter trees') - source_codes = frontend_rust.load_treesitter_trees(source_files) - - # Create and dump project - logger.info('Creating base project.') - project = frontend_rust.Project(source_codes) - - # Process calltree and method data - for harness in project.get_source_codes_with_harnesses(): - harness_name = harness.source_file.split('/')[-1].split('.')[0] - - # Method data - logger.info(f'Dump methods for {harness_name}') - target = os.path.join(out, f'fuzzerLogFile-{harness_name}.data.yaml') - project.dump_module_logic(target, harness_name) - - # Calltree - logger.info(f'Extracting calltree for {harness_name}') - calltree = project.extract_calltree(harness.source_file, harness) - target = os.path.join(out, f'fuzzerLogFile-{harness_name}.data') - with open(target, 'w', encoding='utf-8') as f: - f.write(f'Call tree\n{calltree}') - +def analyse_folder(language: str = '', + directory: str = '', + entrypoint: str = '', + out='', + module_only=False): + """Runs a full frontend analysis on a given directory""" -def analyse_folder(language, directory, entrypoint, out='', module_only=False): if language == 'c': process_c_project(directory, entrypoint, out, module_only) if language.lower() in ['cpp', 'c++']: diff --git a/src/fuzz_introspector/utils.py b/src/fuzz_introspector/utils.py index 9e6295e28..0bf707559 100644 --- a/src/fuzz_introspector/utils.py +++ b/src/fuzz_introspector/utils.py @@ -21,6 +21,7 @@ import re import shutil import yaml +import pathlib from bs4 import BeautifulSoup @@ -564,3 +565,35 @@ def locate_rust_fuzz_item(funcname: str, item_list: List[str]) -> str: break return '' + + +def detect_language(directory) -> str: + """Given a folder finds the likely programming language of the project""" + language_extensions = { + 'c': ['.c', '.h'], + 'cpp': ['.cpp', '.cc', '.c++', '.h', '.hpp'], + 'jvm': ['.java'], + 'rust': ['.rs'] + } + paths_to_avoid = [ + '/src/aflplusplus', '/src/honggfuzz', '/src/libfuzzer', '/src/fuzztest' + ] + + language_counts = {} + + for dirpath, _, filenames in os.walk(directory): + if any([x for x in paths_to_avoid if dirpath.startswith(x)]): + continue + for filename in filenames: + for language, extensions in language_extensions.items(): + if pathlib.Path(filename).suffix in extensions: + curr_count = language_counts.get(language, 0) + language_counts[language] = curr_count + 1 + + max_lang = '' + max_count = -1 + for language, count in language_counts.items(): + if count >= max_count: + max_count = count + max_lang = language + return max_lang