From f2f532968102852b40261d00e8fe7ee178621fa0 Mon Sep 17 00:00:00 2001 From: Arthur Chan Date: Tue, 21 Jan 2025 18:23:03 +0000 Subject: [PATCH] Analyser: Add far reach low coverage analyser Signed-off-by: Arthur Chan --- src/fuzz_introspector/analyses/__init__.py | 2 + .../far_reach_low_coverage_analyser.py | 195 ++++++++++++++++++ src/fuzz_introspector/analysis.py | 3 + src/fuzz_introspector/cli.py | 48 ++++- src/fuzz_introspector/commands.py | 18 ++ 5 files changed, 262 insertions(+), 4 deletions(-) create mode 100644 src/fuzz_introspector/analyses/far_reach_low_coverage_analyser.py diff --git a/src/fuzz_introspector/analyses/__init__.py b/src/fuzz_introspector/analyses/__init__.py index 164164ccc..1cd83b8c0 100644 --- a/src/fuzz_introspector/analyses/__init__.py +++ b/src/fuzz_introspector/analyses/__init__.py @@ -25,6 +25,7 @@ from fuzz_introspector.analyses import sinks_analyser from fuzz_introspector.analyses import annotated_cfg from fuzz_introspector.analyses import source_code_line_analyser +from fuzz_introspector.analyses import far_reach_low_coverage_analyser # All optional analyses. # Ordering here is important as top analysis will be shown first in the report @@ -45,4 +46,5 @@ # directly from CLI without the need to generate HTML reports standalone_analyses: list[type[analysis.AnalysisInterface]] = [ source_code_line_analyser.SourceCodeLineAnalyser, + far_reach_low_coverage_analyser.FarReachLowCoverageAnalyser, ] diff --git a/src/fuzz_introspector/analyses/far_reach_low_coverage_analyser.py b/src/fuzz_introspector/analyses/far_reach_low_coverage_analyser.py new file mode 100644 index 000000000..f48985ad7 --- /dev/null +++ b/src/fuzz_introspector/analyses/far_reach_low_coverage_analyser.py @@ -0,0 +1,195 @@ +# Copyright 2025 Fuzz Introspector Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Analysis plugin for introspection of the functions which are far +reached and with low coverage.""" +import os +import json +import logging + +from typing import (Any, List, Dict) + +from fuzz_introspector import (analysis, html_helpers) + +from fuzz_introspector.datatypes import (project_profile, fuzzer_profile, + function_profile) + +logger = logging.getLogger(name=__name__) + + +class FarReachLowCoverageAnalyser(analysis.AnalysisInterface): + """Locate for the functions which are far reached and with + low coverage.""" + + name: str = 'FarReachLowCoverageAnalyser' + + def __init__(self): + self.json_results: Dict[str, Any] = {} + self.json_string_result = '' + + @classmethod + def get_name(cls): + """Return the analyser identifying name for processing. + + :return: The identifying name of this analyser + :rtype: str + """ + return cls.name + + def get_json_string_result(self) -> str: + """Return the stored json string result. + + :return: The json string result processed and stored + by this analyser + :rtype: str + """ + if self.json_string_result: + return self.json_string_result + + return json.dumps(self.json_results) + + def set_json_string_result(self, string): + """Store the result of this analyser as json string result + for further processing in a later time. + + :param json_string: A json string variable storing the + processing result of the analyser for future use + :type json_string: str + """ + self.json_string_result = string + + def set_flags(self, exclude_static_functions: bool, + only_referenced_functions: bool, + only_header_functions: bool): + """Configure the flags from the CLI.""" + self.exclude_static_functions = exclude_static_functions + self.only_referenced_functions = only_referenced_functions + self.only_header_functions = only_header_functions + + def set_max_functions(self, max_functions: int): + """Configure the max functions to return from CLI.""" + self.max_functions = max_functions + + def set_introspection_project( + self, introspection_project: analysis.IntrospectionProject): + """Configure the introspection project wrapper for retrieving + debug data.""" + self.introspection_project = introspection_project + + def analysis_func(self, + table_of_contents: html_helpers.HtmlTableOfContents, + tables: List[str], + proj_profile: project_profile.MergedProjectProfile, + profiles: List[fuzzer_profile.FuzzerProfile], + basefolder: str, coverage_url: str, + conclusions: List[html_helpers.HTMLConclusion], + out_dir: str) -> str: + logger.info(' - Running analysis %s', self.get_name()) + logger.info( + ' - Settings: exclude_static_functions: %s,' + 'only_referenced_functions: %s,' + 'only_header_functions: %s,' + 'max_functions: %d', self.exclude_static_functions, + self.only_referenced_functions, self.only_header_functions, + self.max_functions) + + result_list: List[Dict[str, Any]] = [] + + # Get all functions from the profiles + all_functions = List(proj_profile.all_functions.values()) + all_functions.extend(proj_profile.all_constructors.values()) + + # Get cross reference function dict + if self.only_referenced_functions: + xref_dict = self._get_cross_reference_dict(all_functions) + else: + xref_dict = {} + + # Get interesting functions sorted by complexity and runtime coverage + filtered_functions = self._get_functions_of_interest( + all_functions, proj_profile) + + # Process the final result list of functions according to the + # configured flags + for function in filtered_functions: + # Check for max_functions count + if len(result_list) > self.max_functions: + break + + # Check for only_referenced_functions flag + if (self.only_referenced_functions and + function.function_name not in xref_dict): + continue + + # Check for only_header_functions + # TODO No Debug information from the new frontend yet. + # Handle this later + + # Check for exclude_static_functions flag + # TODO No Debug information from the new frontend yet. + # Handle this later + + result_list.append(function.to_dict()) + + self.json_results['functions'] = result_list + result_json_path = os.path.join(out_dir, 'result.json') + logger.info('Found %d function candidiates.', len(result_list)) + logger.info('Dumping result to %s', result_json_path) + with open(result_json_path, 'w') as f: + json.dump(self.json_results, f) + + return '' + + def _get_cross_reference_dict( + self, + functions: List[function_profile.FunctionProfile] + ) -> Dict[str, int]: + """Internal helper function to build up a function cross reference + dict.""" + func_xrefs: Dict[str, int] = {} + + for function in functions: + for dst, src_list in function.callsite.items(): + func_xrefs_count = func_xrefs.get(dst, 0) + func_xrefs_count += len(src_list) + func_xrefs[dst] = func_xrefs_count + + return func_xrefs + + def _get_functions_of_interest( + self, + functions: List[function_profile.FunctionProfile], + proj_profile: project_profile.MergedProjectProfile, + ) -> List[function_profile.FunctionProfile]: + """Internal helper function to get a sorted functions of interest.""" + filtered_functions = [] + + for function in functions: + # Skipping non-related jvm methods and methods from enum classes + # is_accessible is True by default, i.e. for non jvm projects + if (not function.is_accessible or function.is_jvm_library + or function.is_enum): + continue + + coverage = proj_profile.get_func_hit_percentage( + function.function_name) + + if coverage < 20.0: + filtered_functions.append(function) + + # Sort the filtered functions + filtered_functions.sort(key=lambda x: ( + -x.cyclomatic_complexity, + proj_profile.get_func_hit_percentage(x.function_name))) + + return filtered_functions diff --git a/src/fuzz_introspector/analysis.py b/src/fuzz_introspector/analysis.py index e3f9f15ea..0d91d6c07 100644 --- a/src/fuzz_introspector/analysis.py +++ b/src/fuzz_introspector/analysis.py @@ -209,6 +209,9 @@ def analysis_func(self, be shown at the top of the report page. :type conclusions: List[html_helpers.HTMLConclusion] + :param out_dir: Output directory of analysis report. + :type out_dir: str + :rtype: str :returns: A string that corresponds to HTML that can be embedded in the html report. diff --git a/src/fuzz_introspector/cli.py b/src/fuzz_introspector/cli.py index 7d2f3cdeb..e2ebcb63d 100644 --- a/src/fuzz_introspector/cli.py +++ b/src/fuzz_introspector/cli.py @@ -143,10 +143,11 @@ def get_cmdline_parser() -> argparse.ArgumentParser: 'analyse', help='Standlone analyser commands to run on the target project.') - analyser_parser = analyse_parser.add_subparsers( - dest='analyser', - required=True, - help='Available analyser: SourceCodeLineAnalyser') + analyser_parser = analyse_parser.add_subparsers(dest='analyser', + required=True, + help=""" + Available analyser: + SourceCodeLineAnalyser FarReachLowCoverageAnalyser""") source_code_line_analyser_parser = analyser_parser.add_parser( 'SourceCodeLineAnalyser', @@ -178,6 +179,45 @@ def get_cmdline_parser() -> argparse.ArgumentParser: type=str, help='Folder to store analysis results.') + far_reach_low_coverage_analyser_parser = analyser_parser.add_parser( + 'FarReachLowCoverageAnalyser', + help=('Provide interesting functions in the project that ' + 'are good targets for fuzzing with low runtime coverage.')) + + far_reach_low_coverage_analyser_parser.add_argument( + '--exclude-static-functions', + action='store_true', + help='Excluding static functions in the analysing result.') + far_reach_low_coverage_analyser_parser.add_argument( + '--only-referenced-functions', + action='store_true', + help='Excluding non-referenced functions in the analysing result.') + far_reach_low_coverage_analyser_parser.add_argument( + '--only-header-functions', + action='store_true', + help=('Excluding functions without header declaration in the ' + 'analysing result.')) + far_reach_low_coverage_analyser_parser.add_argument( + '--max-functions', + default=30, + type=int, + help='The max number of functions returned by this analysis.') + far_reach_low_coverage_analyser_parser.add_argument( + '--target-dir', + type=str, + help='Directory holding source to analyse.', + required=True) + far_reach_low_coverage_analyser_parser.add_argument( + '--language', + type=str, + help='Programming of the source code to analyse.', + choices=constants.LANGUAGES_SUPPORTED) + far_reach_low_coverage_analyser_parser.add_argument( + '--out-dir', + default='', + type=str, + help='Folder to store analysis results.') + return parser diff --git a/src/fuzz_introspector/commands.py b/src/fuzz_introspector/commands.py index 88240cdf4..253e855fc 100644 --- a/src/fuzz_introspector/commands.py +++ b/src/fuzz_introspector/commands.py @@ -215,6 +215,24 @@ def analyse(args) -> int: introspection_proj.proj_profile, introspection_proj.profiles, '', '', [], out_dir) + elif target_analyser.get_name() == 'FarReachLowCoverageAnalyser': + exclude_static_functions = args.exclude_static_functions + only_referenced_functions = args.only_referenced_functions + only_header_functions = args.only_header_functions + max_functions = args.max_functions + + introspection_proj.load_debug_report(out_dir) + + target_analyser.set_flags(exclude_static_functions, + only_referenced_functions, + only_header_functions) + target_analyser.set_max_functions(max_functions) + target_analyser.set_introspection_project(introspection_proj) + + target_analyser.analysis_func(html_helpers.HtmlTableOfContents(), [], + introspection_proj.proj_profile, + introspection_proj.profiles, '', '', [], + out_dir) # TODO Add more analyser for standalone run