Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix SARIF and duplication issues #91

Merged
merged 4 commits into from
Nov 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 54 additions & 32 deletions mobsfscan/formatters/sarif.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,14 +76,15 @@ def add_results(path, scan_results, run):
rule_indices = {}

for rule_id, issue_dict in res.items():
result = create_result(path, rule_id, issue_dict, rules, rule_indices)
run.results.append(result)
results = create_rule_results(path, rule_id, issue_dict, rules, rule_indices)
run.results += results

if len(rules) > 0:
run.tool.driver.rules = list(rules.values())


def create_result(path, rule_id, issue_dict, rules, rule_indices):
def create_rule_results(path, rule_id, issue_dict, rules, rule_indices):
rule_results = []
if rule_id in rules:
rule = rules[rule_id]
rule_index = rule_indices[rule_id]
Expand All @@ -105,21 +106,41 @@ def create_result(path, rule_id, issue_dict, rules, rule_indices):
rules[rule_id] = rule
rule_indices[rule_id] = rule_index

locations = []
for item in issue_dict.get('files', []):
physical_location = om.PhysicalLocation(
artifact_location=om.ArtifactLocation(
uri=to_uri(item['file_path'])),
)
physical_location.region = om.Region(
start_line=item['match_lines'][0],
end_line=item['match_lines'][1],
start_column=item['match_position'][0],
end_column=item['match_position'][1],
snippet=om.ArtifactContent(text=item['match_string']),
)
locations.append(om.Location(physical_location=physical_location))
if not locations:
files = issue_dict.get('files', [])

# if there are locations - we iterate over them and create
# a separete result for each location
if files:
for item in files:
locations = []
physical_location = om.PhysicalLocation(
artifact_location=om.ArtifactLocation(
uri=to_uri(item['file_path'])),
)
physical_location.region = om.Region(
start_line=item['match_lines'][0],
end_line=item['match_lines'][1],
start_column=item['match_position'][0],
end_column=item['match_position'][1],
snippet=om.ArtifactContent(text=item['match_string']),
)
locations.append(om.Location(physical_location=physical_location))
rule_results.append(om.Result(
rule_id=rule.id,
rule_index=rule_index,
message=om.Message(text=issue_dict['metadata']['description']),
level=level_from_severity(issue_dict['metadata']['severity']),
locations=locations,
properties={
'owasp-mobile': issue_dict['metadata']['owasp-mobile'],
'masvs': issue_dict['metadata']['masvs'],
'cwe': issue_dict['metadata']['cwe'],
'reference': issue_dict['metadata']['reference'],
},
))
# if there are no locations - only create a single resuklt
else:
locations = []
artifact = om.PhysicalLocation(
artifact_location=om.ArtifactLocation(
uri=path[0]),
Expand All @@ -132,20 +153,21 @@ def create_result(path, rule_id, issue_dict, rules, rule_indices):
snippet=om.ArtifactContent(text='Missing Best Practice'),
)
locations.append(om.Location(physical_location=artifact))

return om.Result(
rule_id=rule.id,
rule_index=rule_index,
message=om.Message(text=issue_dict['metadata']['description']),
level=level_from_severity(issue_dict['metadata']['severity']),
locations=locations,
properties={
'owasp-mobile': issue_dict['metadata']['owasp-mobile'],
'masvs': issue_dict['metadata']['masvs'],
'cwe': issue_dict['metadata']['cwe'],
'reference': issue_dict['metadata']['reference'],
},
)
rule_results.append(om.Result(
rule_id=rule.id,
rule_index=rule_index,
message=om.Message(text=issue_dict['metadata']['description']),
level=level_from_severity(issue_dict['metadata']['severity']),
locations=locations,
properties={
'owasp-mobile': issue_dict['metadata']['owasp-mobile'],
'masvs': issue_dict['metadata']['masvs'],
'cwe': issue_dict['metadata']['cwe'],
'reference': issue_dict['metadata']['reference'],
},
))

return rule_results


def sarif_output(outfile, scan_results, mobsfscan_version, path):
Expand Down
14 changes: 14 additions & 0 deletions mobsfscan/mobsfscan.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,20 @@ def format_output(self, results) -> dict:
self.post_ignore_rules()
self.post_ignore_rules_by_severity()
self.post_ignore_files()
self.deduplicate_files()

def deduplicate_files(self):
for _, details in self.result['results'].items():
files = details.get('files')
# some results don't have any files, so we need to check before we continue
if files:
# "file" here refers to the dictionary containig the file_path, match_lines, etc.
# for each file we create a tuple with it's contents
# then using those tuples as keys and "file" as values we create a dictionary
# This means that for each unique "file" we will get only one entry as we can't have duplicate keys
# Once this is done - convert the dictionary back to list by grabbing it's values and passing it to list()
unique_files = list({tuple(sorted(f.items())): f for f in files}.values())
details['files'] = unique_files

def format_semgrep(self, sgrep_output):
"""Format semgrep output."""
Expand Down