Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

python: use raw strings for regex #105990

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/version-check.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@


def get_version_from_tag(tag):
m = re.match("llvmorg-([0-9]+)\.([0-9]+)\.([0-9]+)(-rc[0-9]+)?$", tag)
m = re.match(r"llvmorg-([0-9]+)\.([0-9]+)\.([0-9]+)(-rc[0-9]+)?$", tag)
if m:
if m.lastindex == 4:
# We have an rc tag.
Expand Down
4 changes: 2 additions & 2 deletions clang/docs/tools/dump_ast_matchers.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def extract_result_types(comment):


def strip_doxygen(comment):
"""Returns the given comment without \-escaped words."""
r"""Returns the given comment without \-escaped words."""
# If there is only a doxygen keyword in the line, delete the whole line.
comment = re.sub(r"^\\[^\s]+\n", r"", comment, flags=re.M)

Expand Down Expand Up @@ -236,7 +236,7 @@ def act_on_decl(declaration, comment, allowed_types):

# Parse the various matcher definition macros.
m = re.match(
""".*AST_TYPE(LOC)?_TRAVERSE_MATCHER(?:_DECL)?\(
r""".*AST_TYPE(LOC)?_TRAVERSE_MATCHER(?:_DECL)?\(
\s*([^\s,]+\s*),
\s*(?:[^\s,]+\s*),
\s*AST_POLYMORPHIC_SUPPORTED_TYPES\(([^)]*)\)
Expand Down
2 changes: 1 addition & 1 deletion clang/test/Analysis/check-analyzer-fixit.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def run_test_once(args, extra_args):
# themselves. We need to keep the comments to preserve line numbers while
# avoiding empty lines which could potentially trigger formatting-related
# checks.
cleaned_test = re.sub("// *CHECK-[A-Z0-9\-]*:[^\r\n]*", "//", input_text)
cleaned_test = re.sub(r"// *CHECK-[A-Z0-9\-]*:[^\r\n]*", "//", input_text)
write_file(temp_file_name, cleaned_test)

original_file_name = temp_file_name + ".orig"
Expand Down
4 changes: 2 additions & 2 deletions compiler-rt/lib/asan/scripts/asan_symbolize.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ def symbolize(self, addr, binary, offset):
# * For C functions atos omits parentheses and argument types.
# * For C++ functions the function name (i.e., `foo` above) may contain
# templates which may contain parentheses.
match = re.match("^(.*) \(in (.*)\) \((.*:\d*)\)$", atos_line)
match = re.match(r"^(.*) \(in (.*)\) \((.*:\d*)\)$", atos_line)
logging.debug("atos_line: %s", atos_line)
if match:
function_name = match.group(1)
Expand Down Expand Up @@ -541,7 +541,7 @@ def process_line_posix(self, line):
# names in the regex because it could be an
# Objective-C or C++ demangled name.
stack_trace_line_format = (
"^( *#([0-9]+) *)(0x[0-9a-f]+) *(?:in *.+)? *\((.*)\+(0x[0-9a-f]+)\)"
r"^( *#([0-9]+) *)(0x[0-9a-f]+) *(?:in *.+)? *\((.*)\+(0x[0-9a-f]+)\)"
)
match = re.match(stack_trace_line_format, line)
if not match:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ def get_address_object(address_name: str, offset: int = 0):


def _search_line_for_cmd_start(line: str, start: int, valid_commands: dict) -> int:
"""Scan `line` for a string matching any key in `valid_commands`.
r"""Scan `line` for a string matching any key in `valid_commands`.

Start searching from `start`.
Commands escaped with `\` (E.g. `\DexLabel('a')`) are ignored.
Expand Down Expand Up @@ -543,7 +543,7 @@ def test_parse_share_line(self):
def test_parse_escaped(self):
"""Escaped commands are ignored."""

lines = ['words \MockCmd("IGNORED") words words words\n']
lines = [r'words \MockCmd("IGNORED") words words words\n']

values = self._find_all_mock_values_in_lines(lines)

Expand Down
6 changes: 3 additions & 3 deletions cross-project-tests/lit.cfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ def can_target_host():
xcode_lldb_vers = subprocess.check_output(["xcrun", "lldb", "--version"]).decode(
"utf-8"
)
match = re.search("lldb-(\d+)", xcode_lldb_vers)
match = re.search(r"lldb-(\d+)", xcode_lldb_vers)
if match:
apple_lldb_vers = int(match.group(1))
if apple_lldb_vers < 1000:
Expand All @@ -247,7 +247,7 @@ def get_gdb_version_string():
if len(gdb_vers_lines) < 1:
print("Unkown GDB version format (too few lines)", file=sys.stderr)
return None
match = re.search("GNU gdb \(.*?\) ((\d|\.)+)", gdb_vers_lines[0].strip())
match = re.search(r"GNU gdb \(.*?\) ((\d|\.)+)", gdb_vers_lines[0].strip())
if match is None:
print(f"Unkown GDB version format: {gdb_vers_lines[0]}", file=sys.stderr)
return None
Expand All @@ -261,7 +261,7 @@ def get_clang_default_dwarf_version_string(triple):
# Get the flags passed by the driver and look for -dwarf-version.
cmd = f'{llvm_config.use_llvm_tool("clang")} -g -xc -c - -v -### --target={triple}'
stderr = subprocess.run(cmd.split(), stderr=subprocess.PIPE).stderr.decode()
match = re.search("-dwarf-version=(\d+)", stderr)
match = re.search(r"-dwarf-version=(\d+)", stderr)
if match is None:
print("Cannot determine default dwarf version", file=sys.stderr)
return None
Expand Down
2 changes: 1 addition & 1 deletion libcxx/utils/synchronize_csv_status_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ def sync_csv(rows: List[Tuple], from_github: List[PaperInfo]) -> List[Tuple]:
results.append(gh.for_printing())
continue
elif paper.status != gh.status:
print(f"We found a CSV row and a Github issue with different statuses:\nrow: {row}\Github issue: {gh}")
print(rf"We found a CSV row and a Github issue with different statuses:\nrow: {row}\Github issue: {gh}")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not a regex -- this change looks wrong.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See https://docs.python.org/3/reference/lexical_analysis.html#escape-sequences. The \n and \G are interpreted as an escape sequence. Hence necessitates the change.

Copy link
Member

@thesamesam thesamesam Aug 26, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe @negril is right.

Note that https://docs.python.org/3/whatsnew/3.12.html#other-language-changes doesn't say it's exclusive to regex:

A backslash-character pair that is not a valid escape sequence now generates a SyntaxWarning, instead of DeprecationWarning. For example, re.compile("\d+.\d+") now emits a SyntaxWarning ("\d" is an invalid escape sequence, use raw strings for regular expression: re.compile(r"\d+.\d+")). In a future Python version, SyntaxError will eventually be raised, instead of SyntaxWarning. (Contributed by Victor Stinner in gh-98401.)

EDIT: In fact, looking at python/cpython#98401, general format strings for print are used as an example.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't look intended to me though. The \G should probably have been a \nG.

Copy link
Author

@negril negril Aug 27, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That can be fixed if you want me to?

Copy link
Member

@ldionne ldionne Aug 27, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, yeah that's the issue. The \G is unintended. I'll fix this right now.

b2dd840

results.append(row)

return results
Expand Down
2 changes: 1 addition & 1 deletion lld/test/MachO/tools/validate-unwind-info.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@


def main():
hex = "[a-f\d]"
hex = r"[a-f\d]"
hex8 = hex + "{8}"

parser = argparse.ArgumentParser(description=__doc__)
Expand Down
2 changes: 1 addition & 1 deletion lld/utils/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def __str__(self):
def getBenchmarks():
ret = []
for i in glob.glob("*/response*.txt"):
m = re.match("response-(.*)\.txt", os.path.basename(i))
m = re.match(r"response-(.*)\.txt", os.path.basename(i))
variant = m.groups()[0] if m else None
ret.append(Bench(os.path.dirname(i), variant))
return ret
Expand Down
8 changes: 4 additions & 4 deletions lldb/examples/python/crashlog.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ class DarwinImage(symbolication.Image):
except:
dsymForUUIDBinary = ""

dwarfdump_uuid_regex = re.compile("UUID: ([-0-9a-fA-F]+) \(([^\(]+)\) .*")
dwarfdump_uuid_regex = re.compile(r"UUID: ([-0-9a-fA-F]+) \(([^\(]+)\) .*")

def __init__(
self, text_addr_lo, text_addr_hi, identifier, version, uuid, path, verbose
Expand Down Expand Up @@ -501,7 +501,7 @@ def find_image_with_identifier(self, identifier):
for image in self.images:
if image.identifier == identifier:
return image
regex_text = "^.*\.%s$" % (re.escape(identifier))
regex_text = r"^.*\.%s$" % (re.escape(identifier))
regex = re.compile(regex_text)
for image in self.images:
if regex.match(image.identifier):
Expand Down Expand Up @@ -925,7 +925,7 @@ def get(cls):
version = r"(?:" + super().version + r"\s+)?"
address = r"(0x[0-9a-fA-F]{4,})" # 4 digits or more

symbol = """
symbol = r"""
(?:
[ ]+
(?P<symbol>.+)
Expand Down Expand Up @@ -1095,7 +1095,7 @@ def parse_normal(self, line):
self.crashlog.process_identifier = line[11:].strip()
elif line.startswith("Version:"):
version_string = line[8:].strip()
matched_pair = re.search("(.+)\((.+)\)", version_string)
matched_pair = re.search(r"(.+)\((.+)\)", version_string)
if matched_pair:
self.crashlog.process_version = matched_pair.group(1)
self.crashlog.process_compatability_version = matched_pair.group(2)
Expand Down
2 changes: 1 addition & 1 deletion lldb/examples/python/delta.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def parse_log_file(file, options):
print("# Log file: '%s'" % file)
print("#----------------------------------------------------------------------")

timestamp_regex = re.compile("(\s*)([1-9][0-9]+\.[0-9]+)([^0-9].*)$")
timestamp_regex = re.compile(r"(\s*)([1-9][0-9]+\.[0-9]+)([^0-9].*)$")

base_time = 0.0
last_time = 0.0
Expand Down
6 changes: 3 additions & 3 deletions lldb/examples/python/gdbremote.py
Original file line number Diff line number Diff line change
Expand Up @@ -1537,13 +1537,13 @@ def parse_gdb_log(file, options):
a long time during a preset set of debugger commands."""

tricky_commands = ["qRegisterInfo"]
timestamp_regex = re.compile("(\s*)([1-9][0-9]+\.[0-9]+)([^0-9].*)$")
timestamp_regex = re.compile(r"(\s*)([1-9][0-9]+\.[0-9]+)([^0-9].*)$")
packet_name_regex = re.compile("([A-Za-z_]+)[^a-z]")
packet_transmit_name_regex = re.compile(
"(?P<direction>send|read) packet: (?P<packet>.*)"
)
packet_contents_name_regex = re.compile("\$([^#]*)#[0-9a-fA-F]{2}")
packet_checksum_regex = re.compile(".*#[0-9a-fA-F]{2}$")
packet_contents_name_regex = re.compile(r"\$([^#]*)#[0-9a-fA-F]{2}")
packet_checksum_regex = re.compile(r".*#[0-9a-fA-F]{2}$")
packet_names_regex_str = "(" + "|".join(gdb_remote_commands.keys()) + ")(.*)"
packet_names_regex = re.compile(packet_names_regex_str)

Expand Down
6 changes: 3 additions & 3 deletions lldb/examples/python/jump.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def parse_linespec(linespec, frame, result):
)

if not matched:
mo = re.match("^\+([0-9]+)$", linespec)
mo = re.match(r"^\+([0-9]+)$", linespec)
if mo is not None:
matched = True
# print "Matched +<count>"
Expand All @@ -54,7 +54,7 @@ def parse_linespec(linespec, frame, result):
)

if not matched:
mo = re.match("^\-([0-9]+)$", linespec)
mo = re.match(r"^\-([0-9]+)$", linespec)
if mo is not None:
matched = True
# print "Matched -<count>"
Expand All @@ -79,7 +79,7 @@ def parse_linespec(linespec, frame, result):
breakpoint = target.BreakpointCreateByLocation(file_name, line_number)

if not matched:
mo = re.match("\*((0x)?([0-9a-f]+))$", linespec)
mo = re.match(r"\*((0x)?([0-9a-f]+))$", linespec)
if mo is not None:
matched = True
# print "Matched <address-expression>"
Expand Down
2 changes: 1 addition & 1 deletion lldb/examples/python/performance.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,7 @@ def __init__(self, pid):

def Measure(self):
output = subprocess.getoutput(self.command).split("\n")[-1]
values = re.split("[-+\s]+", output)
values = re.split(r"[-+\s]+", output)
for idx, stat in enumerate(values):
multiplier = 1
if stat:
Expand Down
6 changes: 3 additions & 3 deletions lldb/examples/python/symbolication.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,9 +177,9 @@ class Section:
"""Class that represents an load address range"""

sect_info_regex = re.compile("(?P<name>[^=]+)=(?P<range>.*)")
addr_regex = re.compile("^\s*(?P<start>0x[0-9A-Fa-f]+)\s*$")
addr_regex = re.compile(r"^\s*(?P<start>0x[0-9A-Fa-f]+)\s*$")
range_regex = re.compile(
"^\s*(?P<start>0x[0-9A-Fa-f]+)\s*(?P<op>[-+])\s*(?P<end>0x[0-9A-Fa-f]+)\s*$"
r"^\s*(?P<start>0x[0-9A-Fa-f]+)\s*(?P<op>[-+])\s*(?P<end>0x[0-9A-Fa-f]+)\s*$"
)

def __init__(self, start_addr=None, end_addr=None, name=None):
Expand Down Expand Up @@ -557,7 +557,7 @@ def find_images_with_identifier(self, identifier):
if image.identifier == identifier:
images.append(image)
if len(images) == 0:
regex_text = "^.*\.%s$" % (re.escape(identifier))
regex_text = r"^.*\.%s$" % (re.escape(identifier))
regex = re.compile(regex_text)
for image in self.images:
if regex.match(image.identifier):
Expand Down
2 changes: 1 addition & 1 deletion lldb/packages/Python/lldbsuite/test/lldbpexpect.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,4 +104,4 @@ def cursor_forward_escape_seq(self, chars_to_move):
Returns the escape sequence to move the cursor forward/right
by a certain amount of characters.
"""
return b"\x1b\[" + str(chars_to_move).encode("utf-8") + b"C"
return rb"\x1b\[" + str(chars_to_move).encode("utf-8") + b"C"
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def timeout_to_seconds(timeout):


class ProcessHelper(object):
"""Provides an interface for accessing process-related functionality.
r"""Provides an interface for accessing process-related functionality.

This class provides a factory method that gives the caller a
platform-specific implementation instance of the class.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,15 @@ def test_backticks_in_alias(self):
interp = self.dbg.GetCommandInterpreter()
result = lldb.SBCommandReturnObject()
interp.HandleCommand(
"command alias _test-argv-cmd expression -Z \`argc\` -- argv", result
r"command alias _test-argv-cmd expression -Z \`argc\` -- argv", result
)
self.assertCommandReturn(result, "Made the alias")
interp.HandleCommand("_test-argv-cmd", result)
self.assertCommandReturn(result, "The alias worked")

# Now try a harder case where we create this using an alias:
interp.HandleCommand(
"command alias _test-argv-parray-cmd parray \`argc\` argv", result
r"command alias _test-argv-parray-cmd parray \`argc\` argv", result
)
self.assertCommandReturn(result, "Made the alias")
interp.HandleCommand("_test-argv-parray-cmd", result)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def test(self):
alloc0 = re.search("^.*IRMemoryMap::Malloc.+?0xdead0000.*$", log, re.MULTILINE)
# Malloc adds additional bytes to allocation size, hence 10007
alloc1 = re.search(
"^.*IRMemoryMap::Malloc\s*?\(10007.+?0xdead1000.*$", log, re.MULTILINE
r"^.*IRMemoryMap::Malloc\s*?\(10007.+?0xdead1000.*$", log, re.MULTILINE
)
self.assertTrue(alloc0, "Couldn't find an allocation at a given address.")
self.assertTrue(
Expand Down
2 changes: 1 addition & 1 deletion lldb/test/API/commands/expression/test/TestExprs.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def build_and_run(self):
def test_floating_point_expr_commands(self):
self.build_and_run()

self.expect("expression 2.234f", patterns=["\(float\) \$.* = 2\.234"])
self.expect("expression 2.234f", patterns=[r"\(float\) \$.* = 2\.234"])
# (float) $2 = 2.234

def test_many_expr_commands(self):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def test_gui(self):
self.child.expect_exact("Threads")

# The main thread should be expanded.
self.child.expect("#\d+: main")
self.child.expect(r"#\d+: main")

# Quit the GUI
self.child.send(escape_key)
Expand Down
6 changes: 3 additions & 3 deletions lldb/test/API/commands/help/TestHelp.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,13 +349,13 @@ def test_help_show_tags(self):
self.expect(
"help memory read",
patterns=[
"--show-tags\n\s+Include memory tags in output "
"\(does not apply to binary output\)."
r"--show-tags\n\s+Include memory tags in output "
r"\(does not apply to binary output\)."
],
)
self.expect(
"help memory find",
patterns=["--show-tags\n\s+Include memory tags in output."],
patterns=[r"--show-tags\n\s+Include memory tags in output."],
)

@no_debug_info_test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def test(self):

self.runCmd("process kill")

self.runCmd("process launch -X true -w %s -- foo\ bar" % (self.getBuildDir()))
self.runCmd(r"process launch -X true -w %s -- foo\ bar" % (self.getBuildDir()))

process = self.process()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,12 @@ def test_unavailable_registers(self):
"register read --all",
patterns=[
"(?sm)^general purpose registers:\n"
"^\s+rdx = 0x5555555555555555\n"
r"^\s+rdx = 0x5555555555555555\n"
".*"
"^3 registers were unavailable.\n"
"\n"
"^supplementary registers:\n"
"^\s+edx = 0x55555555\n"
r"^\s+edx = 0x55555555\n"
".*"
"^12 registers were unavailable."
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -630,14 +630,14 @@ def test_register_read_fields(self):
# N/Z/C/V bits will always be present, so check only for those.
self.expect(
"register read cpsr",
patterns=["= \(N = [0|1], Z = [0|1], C = [0|1], V = [0|1]"],
patterns=[r"= \(N = [0|1], Z = [0|1], C = [0|1], V = [0|1]"],
)
self.expect(
"register read fpsr", patterns=["= \(QC = [0|1], IDC = [0|1], IXC = [0|1]"]
"register read fpsr", patterns=[r"= \(QC = [0|1], IDC = [0|1], IXC = [0|1]"]
)
# AHP/DN/FZ always present, others may vary.
self.expect(
"register read fpcr", patterns=["= \(AHP = [0|1], DN = [0|1], FZ = [0|1]"]
"register read fpcr", patterns=[r"= \(AHP = [0|1], DN = [0|1], FZ = [0|1]"]
)

# Should get enumerator descriptions for RMode.
Expand Down
Loading
Loading