From 291da9ffe83fcbb78267f2cb739d2ccb80ba7540 Mon Sep 17 00:00:00 2001 From: Valentinas Bakaitis Date: Tue, 10 Sep 2024 13:29:25 +1200 Subject: [PATCH 1/2] Fixes issue #90 - produces separate sarif issues for each result instance --- mobsfscan/formatters/sarif.py | 86 ++++++++++++++++++++++------------- 1 file changed, 54 insertions(+), 32 deletions(-) diff --git a/mobsfscan/formatters/sarif.py b/mobsfscan/formatters/sarif.py index 59b2cd4..0be7495 100644 --- a/mobsfscan/formatters/sarif.py +++ b/mobsfscan/formatters/sarif.py @@ -76,14 +76,15 @@ def add_results(path, scan_results, run): rule_indices = {} for rule_id, issue_dict in res.items(): - result = create_result(path, rule_id, issue_dict, rules, rule_indices) - run.results.append(result) + results = create_rule_results(path, rule_id, issue_dict, rules, rule_indices) + run.results += results if len(rules) > 0: run.tool.driver.rules = list(rules.values()) -def create_result(path, rule_id, issue_dict, rules, rule_indices): +def create_rule_results(path, rule_id, issue_dict, rules, rule_indices): + rule_results = [] if rule_id in rules: rule = rules[rule_id] rule_index = rule_indices[rule_id] @@ -105,21 +106,41 @@ def create_result(path, rule_id, issue_dict, rules, rule_indices): rules[rule_id] = rule rule_indices[rule_id] = rule_index - locations = [] - for item in issue_dict.get('files', []): - physical_location = om.PhysicalLocation( - artifact_location=om.ArtifactLocation( - uri=to_uri(item['file_path'])), - ) - physical_location.region = om.Region( - start_line=item['match_lines'][0], - end_line=item['match_lines'][1], - start_column=item['match_position'][0], - end_column=item['match_position'][1], - snippet=om.ArtifactContent(text=item['match_string']), - ) - locations.append(om.Location(physical_location=physical_location)) - if not locations: + files = issue_dict.get('files', []) + + # if there are locations - we iterate over them and create + # a separete result for each location + if files: + for item in files: + locations = [] + physical_location = om.PhysicalLocation( + artifact_location=om.ArtifactLocation( + uri=to_uri(item['file_path'])), + ) + physical_location.region = om.Region( + start_line=item['match_lines'][0], + end_line=item['match_lines'][1], + start_column=item['match_position'][0], + end_column=item['match_position'][1], + snippet=om.ArtifactContent(text=item['match_string']), + ) + locations.append(om.Location(physical_location=physical_location)) + rule_results.append(om.Result( + rule_id=rule.id, + rule_index=rule_index, + message=om.Message(text=issue_dict['metadata']['description']), + level=level_from_severity(issue_dict['metadata']['severity']), + locations=locations, + properties={ + 'owasp-mobile': issue_dict['metadata']['owasp-mobile'], + 'masvs': issue_dict['metadata']['masvs'], + 'cwe': issue_dict['metadata']['cwe'], + 'reference': issue_dict['metadata']['reference'], + }, + )) + # if there are no locations - only create a single resuklt + else: + locations = [] artifact = om.PhysicalLocation( artifact_location=om.ArtifactLocation( uri=path[0]), @@ -132,20 +153,21 @@ def create_result(path, rule_id, issue_dict, rules, rule_indices): snippet=om.ArtifactContent(text='Missing Best Practice'), ) locations.append(om.Location(physical_location=artifact)) - - return om.Result( - rule_id=rule.id, - rule_index=rule_index, - message=om.Message(text=issue_dict['metadata']['description']), - level=level_from_severity(issue_dict['metadata']['severity']), - locations=locations, - properties={ - 'owasp-mobile': issue_dict['metadata']['owasp-mobile'], - 'masvs': issue_dict['metadata']['masvs'], - 'cwe': issue_dict['metadata']['cwe'], - 'reference': issue_dict['metadata']['reference'], - }, - ) + rule_results.append(om.Result( + rule_id=rule.id, + rule_index=rule_index, + message=om.Message(text=issue_dict['metadata']['description']), + level=level_from_severity(issue_dict['metadata']['severity']), + locations=locations, + properties={ + 'owasp-mobile': issue_dict['metadata']['owasp-mobile'], + 'masvs': issue_dict['metadata']['masvs'], + 'cwe': issue_dict['metadata']['cwe'], + 'reference': issue_dict['metadata']['reference'], + }, + )) + + return rule_results def sarif_output(outfile, scan_results, mobsfscan_version, path): From 3d3fca46da11a05f71d57425f38d18e360c263d3 Mon Sep 17 00:00:00 2001 From: Valentinas Bakaitis Date: Wed, 11 Sep 2024 12:03:24 +1200 Subject: [PATCH 2/2] deduplicate findings before returning them --- mobsfscan/mobsfscan.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/mobsfscan/mobsfscan.py b/mobsfscan/mobsfscan.py index e4f677d..c924fbb 100644 --- a/mobsfscan/mobsfscan.py +++ b/mobsfscan/mobsfscan.py @@ -127,6 +127,20 @@ def format_output(self, results) -> dict: self.post_ignore_rules() self.post_ignore_rules_by_severity() self.post_ignore_files() + self.deduplicate_files() + + def deduplicate_files(self): + for _, details in self.result['results'].items(): + files = details.get('files') + # some results don't have any files, so we need to check before we continue + if files: + # "file" here refers to the dictionary containig the file_path, match_lines, etc. + # for each file we create a tuple with it's contents + # then using those tuples as keys and "file" as values we create a dictionary + # This means that for each unique "file" we will get only one entry as we can't have duplicate keys + # Once this is done - convert the dictionary back to list by grabbing it's values and passing it to list() + unique_files = list({tuple(sorted(f.items())): f for f in files}.values()) + details['files'] = unique_files def format_semgrep(self, sgrep_output): """Format semgrep output."""