Skip to content

Commit

Permalink
core: avoid needing to specify language for analysis
Browse files Browse the repository at this point in the history
Signed-off-by: David Korczynski <david@adalogics.com>
  • Loading branch information
DavidKorczynski committed Jan 2, 2025
1 parent 4bd93b3 commit c099b3d
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 33 deletions.
3 changes: 3 additions & 0 deletions src/fuzz_introspector/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ def correlate_binaries_to_logs(binaries_dir: str) -> int:

def end_to_end(args) -> int:
"""Runs both frontend and backend."""
if not args.language:
args.language = utils.detect_language(args.target_dir)

oss_fuzz.analyse_folder(args.language, args.target_dir,
'LLVMFuzzerTestOneInput')

Expand Down
39 changes: 6 additions & 33 deletions src/fuzz_introspector/frontends/oss_fuzz.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,40 +190,13 @@ def process_jvm_project(target_dir, entrypoint, out):
f.write(f'Call tree\n{calltree}')


def process_rust_project(target_dir, out):
"""Process a project in Rust based language"""
# Extract rust source files
logger.info('Going Rust route')
source_files = []
source_files = frontend_rust.capture_source_files_in_tree(target_dir)

# Process tree sitter for rust source files
logger.info('Found %d files to include in analysis', len(source_files))
logger.info('Loading tree-sitter trees')
source_codes = frontend_rust.load_treesitter_trees(source_files)

# Create and dump project
logger.info('Creating base project.')
project = frontend_rust.Project(source_codes)

# Process calltree and method data
for harness in project.get_source_codes_with_harnesses():
harness_name = harness.source_file.split('/')[-1].split('.')[0]

# Method data
logger.info(f'Dump methods for {harness_name}')
target = os.path.join(out, f'fuzzerLogFile-{harness_name}.data.yaml')
project.dump_module_logic(target, harness_name)

# Calltree
logger.info(f'Extracting calltree for {harness_name}')
calltree = project.extract_calltree(harness.source_file, harness)
target = os.path.join(out, f'fuzzerLogFile-{harness_name}.data')
with open(target, 'w', encoding='utf-8') as f:
f.write(f'Call tree\n{calltree}')

def analyse_folder(language: str = '',
directory: str = '',
entrypoint: str = '',
out='',
module_only=False):
"""Runs a full frontend analysis on a given directory"""

def analyse_folder(language, directory, entrypoint, out='', module_only=False):
if language == 'c':
process_c_project(directory, entrypoint, out, module_only)
if language.lower() in ['cpp', 'c++']:
Expand Down
33 changes: 33 additions & 0 deletions src/fuzz_introspector/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import re
import shutil
import yaml
import pathlib

from bs4 import BeautifulSoup

Expand Down Expand Up @@ -564,3 +565,35 @@ def locate_rust_fuzz_item(funcname: str, item_list: List[str]) -> str:
break

return ''


def detect_language(directory) -> str:
"""Given a folder finds the likely programming language of the project"""
language_extensions = {
'c': ['.c', '.h'],
'cpp': ['.cpp', '.cc', '.c++', '.h', '.hpp'],
'jvm': ['.java'],
'rust': ['.rs']
}
paths_to_avoid = [
'/src/aflplusplus', '/src/honggfuzz', '/src/libfuzzer', '/src/fuzztest'
]

language_counts = {}

for dirpath, _, filenames in os.walk(directory):
if any([x for x in paths_to_avoid if dirpath.startswith(x)]):
continue
for filename in filenames:
for language, extensions in language_extensions.items():
if pathlib.Path(filename).suffix in extensions:
curr_count = language_counts.get(language, 0)
language_counts[language] = curr_count + 1

max_lang = ''
max_count = -1
for language, count in language_counts.items():
if count >= max_count:
max_count = count
max_lang = language
return max_lang

0 comments on commit c099b3d

Please sign in to comment.