Skip to content

Commit

Permalink
chore: include relative imports
Browse files Browse the repository at this point in the history
Signed-off-by: Ben Selwyn-Smith <benselwynsmith@googlemail.com>
  • Loading branch information
benmss committed Jul 11, 2024
1 parent 739826e commit 50b45ce
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -151,10 +151,8 @@ def extract_from_ast(self, source_content: str) -> set[str]:
imports.add(alias.name)
elif isinstance(node, ast.ImportFrom):
module = node.module
if node.level != 0 or not module:
# Relative imports are ignored.
continue
imports.add(module)
if module:
imports.add("." * node.level + module)

return imports

Expand All @@ -171,14 +169,24 @@ def extract_from_lines(self, source_content: str) -> set[str]:
set[str]
The list of imports.
"""
pattern_import = r"(?:import\s+)(\w+(?:(?:\.{1}|(?:\s*,\s*))\w+)*)(?:(?:\s|#).*)?"
alias_pattern = r"\s+as\s+\w+(?:\.{0,1}\w+)*"
# Pattern for module aliases.

module_name = r"\w+(?:\.{0,1}\w+"
# <module_name> as described under pattern_import.

pattern_import = (
r"(?:import\s+)(" + module_name + r")*(?:" + alias_pattern + r")?"
r"(?:(?:\s*,\s*)(?:" + module_name + r")*(?:" + alias_pattern + r")?))*)(?:(?:\s|#).*)?"
)
# Allows for a standard import statement.
# E.g.: import <module_name(s)> <other_text>
# Where <module_name(s)> consists of one or more <module_name>.
# Where <module_name> consists of one or more words (a-z or 0-9 or underscore) separated by periods.
# Where <module_name> consists of one or more words (a-z or 0-9 or underscore) separated by periods,
# with an optional alias.
# Where <other_text> allows any character(s) either after a single space or a hash (#).

pattern_from_import = r"(?:from\s+)(\w+(?:\.{1}\w+)*)(?:\s+import\s+\w+).*"
pattern_from_import = r"(?:from\s+)([.]*\w+(?:\.{1}\w+)*)(?:\s+import\s+\w+).*"
# Allows for a from import statement.
# E.g.: from <module_name> import <other_text>
# Where <module_name> is as above. (Note only a single module can be placed here.)
Expand All @@ -198,10 +206,12 @@ def extract_from_lines(self, source_content: str) -> set[str]:
continue

if match.group(1):
# Standard import, handle commas if present.
# Standard import, handle commas and aliases if present.
splits = match.group(1).split(",")
for split in splits:
split = split.strip()
# Remove aliases
split = re.sub(alias_pattern, "", split)
if split:
imports.add(split)
elif match.group(2):
Expand Down
10 changes: 8 additions & 2 deletions tests/malware_analyzer/pypi/test_suspicious_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,12 @@ def sample_code_() -> str:
import re
import os, sys
import requests as r
import logging as l,pathlib as p
import logging as l,pathlib as p# this is a comment
from packageurl import PackageURL
from macaron.output_reporter.results import Record, Report, SCMStatus
from ..r import Requests as req
from ..test import test as t1
from .... import nothing
from .test.test2.test3 import test as t1000
def test():
pass
Expand All @@ -91,16 +93,20 @@ def sample_code_imports_() -> list[str]:
"pathlib",
"packageurl",
"macaron.output_reporter.results",
"..test",
".test.test2.test3",
]


def test_ast_import_extraction(sample_code: str, sample_code_imports: list[str]) -> None:
"""Test the extraction of imports using the AST method."""
parsed_imports = SuspiciousSetupAnalyzer().extract_from_ast(sample_code)
assert len(parsed_imports) == len(sample_code_imports)
assert bool(parsed_imports.intersection(sample_code_imports))


def test_re_import_extraction(sample_code: str, sample_code_imports: list[str]) -> None:
"""Test the extraction of imports using the regular expression method."""
parsed_imports = SuspiciousSetupAnalyzer().extract_from_lines(sample_code)
assert len(parsed_imports) == len(sample_code_imports)
assert bool(parsed_imports.intersection(sample_code_imports))

0 comments on commit 50b45ce

Please sign in to comment.