Skip to content

Commit

Permalink
Merge branch 'main' into generalise-source-code-file
Browse files Browse the repository at this point in the history
Signed-off-by: Arthur Chan <arthur.chan@adalogics.com>
  • Loading branch information
arthurscchan authored Jan 17, 2025
2 parents eafe6ae + 71b4ee5 commit daf47b6
Show file tree
Hide file tree
Showing 10 changed files with 93 additions and 100 deletions.
3 changes: 3 additions & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,6 @@ jobs:
yapf -d -r ./tools/web-fuzzing-introspection/app/webapp/
yapf -d ./tools/web-fuzzing-introspection/app/*.py
yapf -d -r ./tools/web-fuzzing-introspection/app/static/assets/db
- name: pylint
run: |
cd src && pylint --recursive=y fuzz_introspector main.py || true
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ matplotlib==3.7.5
PyYAML==6.0
soupsieve==2.2.1
yapf==0.32.0
pylint==3.0.0
flake8
pep8
mypy
Expand Down
24 changes: 17 additions & 7 deletions src/fuzz_introspector/analyses/optimal_targets.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.
"""Analysis for identifying optimal targets"""

import os
import copy
import json
import logging
Expand Down Expand Up @@ -166,10 +167,13 @@ def analysis_func(self,

# Create section for how the state of the project will be if
# the optimal target functions are hit.
html_string += self.get_consequential_section(new_profile, conclusions,
html_string += self.get_consequential_section(new_profile,
conclusions,
tables,
table_of_contents,
coverage_url, basefolder)
coverage_url,
basefolder,
out_dir=out_dir)

logger.info(f" - Completed analysis {self.get_name()}")
html_string += "</div>" # .collapsible
Expand Down Expand Up @@ -379,10 +383,14 @@ def create_top_summary_info(
return html_string

def get_consequential_section(
self, new_profile: project_profile.MergedProjectProfile,
conclusions: List[html_helpers.HTMLConclusion], tables: List[str],
self,
new_profile: project_profile.MergedProjectProfile,
conclusions: List[html_helpers.HTMLConclusion],
tables: List[str],
table_of_contents: html_helpers.HtmlTableOfContents,
coverage_url: str, basefolder: str) -> str:
coverage_url: str,
basefolder: str,
out_dir: str = '') -> str:
"""Create section showing state of project if optimal targets are hit"""
html_string = (
"<p>Implementing fuzzers that target the above functions "
Expand All @@ -406,8 +414,10 @@ def get_consequential_section(

# Write all functions to the .js file
if self.dump_files:
with open(constants.OPTIMAL_TARGETS_ALL_FUNCTIONS,
'w') as func_file:
with open(
os.path.join(out_dir,
constants.OPTIMAL_TARGETS_ALL_FUNCTIONS),
'w') as func_file:
func_file.write("var analysis_1_data = ")
func_file.write(json.dumps(all_functions_json))
return html_string
62 changes: 37 additions & 25 deletions src/fuzz_introspector/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,12 @@
sys.setrecursionlimit(10000)

logger = logging.getLogger(name=__name__)
LOG_FMT = '%(asctime)s.%(msecs)03d %(levelname)s %(module)s - %(funcName)s: %(message)s'
LOG_FMT = ('%(asctime)s.%(msecs)03d %(levelname)s '
'%(module)s - %(funcName)s: %(message)s')


def get_cmdline_parser() -> argparse.ArgumentParser:
"""Parse the commandline"""
parser = argparse.ArgumentParser()

subparsers = parser.add_subparsers(dest='command')
Expand All @@ -40,23 +42,39 @@ def get_cmdline_parser() -> argparse.ArgumentParser:
default="c-cpp",
help="Language of project")

full_parser = subparsers.add_parser('full', help='End to end run')
full_parser.add_argument('--target_dir')
full_parser.add_argument('--language')
full_parser.add_argument('--out-dir', default='')
full_parser.add_argument('--name', default='no-name')
full_parser.add_argument('--coverage_url', default='')
full_parser = subparsers.add_parser(
'full', help='Analyse folder and generate HTML report and analyses.')
full_parser.add_argument('--target-dir',
type=str,
help='Directory holding source to analyse.',
required=True)
full_parser.add_argument('--language',
type=str,
help='Programming of the source code to analyse.',
choices=constants.LANGUAGES_SUPPORTED)
full_parser.add_argument('--out-dir',
default='',
type=str,
help='Folder to store analysis results.')
full_parser.add_argument('--name',
default='no-name',
type=str,
help='Name of the report.')
full_parser.add_argument('--coverage-url',
default='',
type=str,
help='Base coverage URL.')

# Report generation command
report_parser = subparsers.add_parser(
"report",
help="generate fuzz-introspector HTML report",
)
report_parser.add_argument("--target_dir",
report_parser.add_argument("--target-dir",
type=str,
help="Directory where the data files are",
required=True)
report_parser.add_argument("--coverage_url",
report_parser.add_argument("--coverage-url",
type=str,
help="URL with coverage information",
default="/covreport/linux")
Expand All @@ -76,7 +94,7 @@ def get_cmdline_parser() -> argparse.ArgumentParser:
action='store_true',
default=False,
help="Enables all analyses")
report_parser.add_argument("--correlation_file",
report_parser.add_argument("--correlation-file",
type=str,
default="",
help="File with correlation data")
Expand All @@ -99,7 +117,7 @@ def get_cmdline_parser() -> argparse.ArgumentParser:
"correlate",
help="correlate executable files to fuzzer introspector logs")
correlate_parser.add_argument(
"--binaries_dir",
"--binaries-dir",
type=str,
required=True,
help="Directory with binaries to scan for Fuzz introspector tags")
Expand All @@ -120,20 +138,13 @@ def get_cmdline_parser() -> argparse.ArgumentParser:


def set_logging_level() -> None:
if os.environ.get("FUZZ_LOGLEVEL"):
level = os.environ.get("FUZZ_LOGLEVEL")
if level == "debug":
logging.basicConfig(
level=logging.DEBUG,
format=LOG_FMT,
datefmt='%Y-%m-%d %H:%M:%S',
)
else:
logging.basicConfig(
level=logging.INFO,
format=LOG_FMT,
datefmt='%Y-%m-%d %H:%M:%S',
)
"""Sets logging level."""
if os.environ.get('FUZZ_LOGLEVEL', 'info') == 'debug':
logging.basicConfig(
level=logging.DEBUG,
format=LOG_FMT,
datefmt='%Y-%m-%d %H:%M:%S',
)
else:
logging.basicConfig(
level=logging.INFO,
Expand All @@ -144,6 +155,7 @@ def set_logging_level() -> None:


def main() -> int:
"""Main CLI entrypoint."""
set_logging_level()

parser = get_cmdline_parser()
Expand Down
2 changes: 1 addition & 1 deletion src/fuzz_introspector/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def end_to_end(args) -> int:
else:
out_dir = os.getcwd()

if args.language == 'jvm':
if args.language == constants.LANGUAGES.JAVA:
entrypoint = 'fuzzerTestOneInput'
else:
entrypoint = 'LLVMFuzzerTestOneInput'
Expand Down
22 changes: 22 additions & 0 deletions src/fuzz_introspector/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,28 @@

SAVED_SOURCE_FOLDER = 'source-code'


class LANGUAGES:
C = 'c'
CPP = 'c++'
JAVA = 'jvm'
GO = 'go'
RUST = 'rust'


LANGUAGES_SUPPORTED = [
LANGUAGES.C, LANGUAGES.CPP, LANGUAGES.JAVA, LANGUAGES.GO, LANGUAGES.RUST
]

LANGUAGE_EXTENSIONS = {
LANGUAGES.C: ['.c', '.h'],
LANGUAGES.CPP:
['.c', '.cpp', '.cc', '.c++', '.cxx', '.h', '.hpp', '.hh', '.hxx'],
LANGUAGES.JAVA: ['.java'],
LANGUAGES.RUST: ['.rs'],
LANGUAGES.GO: ['.go', '.cgo'],
}

# Holds data about all functions in javascript, to ease loading of static
# website.
ALL_FUNCTION_JS = "all_functions.js"
Expand Down
1 change: 0 additions & 1 deletion src/fuzz_introspector/frontends/frontend_cpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
from fuzz_introspector.frontends.datatypes import Project, SourceCodeFile

logger = logging.getLogger(name=__name__)
LOG_FMT = '%(asctime)s.%(msecs)03d %(levelname)s %(module)s - %(funcName)s: %(message)s'


class CppSourceCodeFile(SourceCodeFile):
Expand Down
1 change: 0 additions & 1 deletion src/fuzz_introspector/frontends/frontend_rust.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
from fuzz_introspector.frontends.datatypes import Project, SourceCodeFile

logger = logging.getLogger(name=__name__)
LOG_FMT = '%(asctime)s.%(msecs)03d %(levelname)s %(module)s - %(funcName)s: %(message)s'


class RustSourceCodeFile(SourceCodeFile):
Expand Down
67 changes: 10 additions & 57 deletions src/fuzz_introspector/frontends/oss_fuzz.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
################################################################################

import os
import argparse
import pathlib
import logging

Expand All @@ -26,55 +25,22 @@
from fuzz_introspector.frontends import frontend_rust
from fuzz_introspector.frontends.datatypes import Project

from fuzz_introspector import constants

logger = logging.getLogger(name=__name__)
LOG_FMT = '%(asctime)s.%(msecs)03d %(levelname)s %(module)s - %(funcName)s: %(message)s'

LANGUAGE_EXTENSION_MAP = {
'c': ['.c', '.h'],
'c++':
['.c', '.cpp', '.cc', '.c++', '.cxx', '.h', '.hpp', '.hh', '.hxx', '.inl'],
'cpp':
['.c', '.cpp', '.cc', '.c++', '.cxx', '.h', '.hpp', '.hh', '.hxx', '.inl'],
'go': ['.go', '.cgo'],
'jvm': ['.java'],
'rust': ['.rs'],
}

EXCLUDE_DIRECTORIES = [
'node_modules', 'aflplusplus', 'honggfuzz', 'inspector', 'libfuzzer',
'fuzztest', 'target', 'build'
]


def setup_logging():
"""Initializes logging"""
logging.basicConfig(
level=logging.INFO,
format=LOG_FMT,
datefmt='%Y-%m-%d %H:%M:%S',
)


def parse_args():
"""Parse command line arguments."""
parser = argparse.ArgumentParser()

parser.add_argument('--target-dir',
help='Directory of which do analysis',
required=True)
parser.add_argument('--entrypoint', help='Entrypoint for the calltree')
parser.add_argument('--language',
help='Language of target project',
required=True)

return parser.parse_args()


def capture_source_files_in_tree(directory_tree: str,
language: str) -> list[str]:
"""Captures source code files in a given directory."""
language_files = []
language_extensions = LANGUAGE_EXTENSION_MAP.get(language.lower(), [])
language_extensions = constants.LANGUAGE_EXTENSIONS.get(
language.lower(), [])

for dirpath, _, filenames in os.walk(directory_tree):
# Skip some non project directories
Expand Down Expand Up @@ -161,7 +127,7 @@ def analyse_folder(language: str = '',
source_files = capture_source_files_in_tree(directory, language)
logger.info('Found %d files to include in analysis', len(source_files))

if language == 'c':
if language == constants.LANGUAGES.C:
project = process_c_project(directory,
entrypoint,
out,
Expand All @@ -170,29 +136,29 @@ def analyse_folder(language: str = '',
dump_output=dump_output)
else:
# Process for different language
if language.lower() in ['cpp', 'c++']:
if language == constants.LANGUAGES.CPP:
logger.info('Going C++ route')
logger.info('Loading tree-sitter trees')
if not entrypoint:
entrypoint = 'LLVMFuzzerTestOneInput'
project = frontend_cpp.load_treesitter_trees(source_files)
elif language == 'go':
elif language == constants.LANGUAGES.GO:
logger.info('Going Go route')
logger.info('Loading tree-sitter trees and create base project')
project = frontend_go.load_treesitter_trees(source_files)
elif language == 'jvm':
elif language == constants.LANGUAGES.JAVA:
logger.info('Going JVM route')
logger.info('Loading tree-sitter trees and create base project')
if not entrypoint:
entrypoint = 'fuzzerTestOneInput'
project = frontend_jvm.load_treesitter_trees(
source_files, entrypoint)
elif language == 'rust':
elif language == constants.LANGUAGES.RUST:
logger.info('Going Rust route')
logger.info('Loading tree-sitter trees and create base project')
project = frontend_rust.load_treesitter_trees(source_files)
else:
logger.error('Unsupported language: %s' % language)
logger.error('Unsupported language: %s', language)
return Project([])

# Process calltree and method data
Expand Down Expand Up @@ -225,16 +191,3 @@ def analyse_folder(language: str = '',
f.write(f'Call tree\n{calltree}')

return project


def main():
"""Main"""

setup_logging()
args = parse_args()

analyse_folder(args.language, args.target_dir, args.entrypoint)


if __name__ == "__main__":
main()
10 changes: 2 additions & 8 deletions src/fuzz_introspector/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -575,23 +575,17 @@ def locate_rust_fuzz_item(funcname: str, item_list: List[str]) -> str:

def detect_language(directory) -> str:
"""Given a folder finds the likely programming language of the project"""
language_extensions = {
'c': ['.c', '.h'],
'cpp': ['.cpp', '.cc', '.c++', '.h', '.hpp'],
'jvm': ['.java'],
'rust': ['.rs']
}

paths_to_avoid = [
'/src/aflplusplus', '/src/honggfuzz', '/src/libfuzzer', '/src/fuzztest'
]

language_counts: Dict[str, int] = {}

for dirpath, _, filenames in os.walk(directory):
if any([x for x in paths_to_avoid if dirpath.startswith(x)]):
continue
for filename in filenames:
for language, extensions in language_extensions.items():
for language, extensions in constants.LANGUAGE_EXTENSIONS.items():
if pathlib.Path(filename).suffix in extensions:
curr_count = language_counts.get(language, 0)
language_counts[language] = curr_count + 1
Expand Down

0 comments on commit daf47b6

Please sign in to comment.