Skip to content

Commit

Permalink
Merge pull request #6078 from MetRonnie/cylc-lint
Browse files Browse the repository at this point in the history
`cylc lint` S007: fix regex catastrophic backtracking
  • Loading branch information
wxtim authored Apr 22, 2024
2 parents e621d9c + 588ca29 commit 950f66c
Show file tree
Hide file tree
Showing 3 changed files with 102 additions and 116 deletions.
1 change: 1 addition & 0 deletions changes.d/6078.fix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fixed bug where `cylc lint` could hang when checking `inherit` settings in `flow.cylc`.
86 changes: 33 additions & 53 deletions cylc/flow/scripts/lint.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,38 @@ def check_indentation(line: str) -> bool:
return bool(len(match[0]) % 4 != 0)


INHERIT_REGEX = re.compile(r'\s*inherit\s*=\s*(.*)')
FAM_NAME_IGNORE_REGEX = re.compile(
# Stuff we want to ignore when checking for lowercase in family names
r'''
# comments
(?<!{)\#.*
# or Cylc parameters
| <[^>]+>
# or Jinja2
| {{.*?}} | {%.*?%} | {\#.*?\#}
# or EmPy
| (@[\[{\(]).*([\]\}\)])
''',
re.X
)
LOWERCASE_REGEX = re.compile(r'[a-z]')


def check_lowercase_family_names(line: str) -> bool:
"""Check for lowercase in family names."""
match = INHERIT_REGEX.match(line)
if not match:
return False
# Replace stuff we want to ignore with a neutral char (tilde will do):
content = FAM_NAME_IGNORE_REGEX.sub('~', match.group(1))
return any(
LOWERCASE_REGEX.search(i)
for i in content.split(',')
if i.strip(' \'"') not in {'None', 'none', 'root'}
)


FUNCTION = 'function'

STYLE_GUIDE = (
Expand Down Expand Up @@ -351,62 +383,10 @@ def check_indentation(line: str) -> bool:
'url': STYLE_GUIDE + 'trailing-whitespace',
FUNCTION: re.compile(r'[ \t]$').findall
},
# Look for families both from inherit=FAMILY and FAMILY:trigger-all/any.
# Do not match inherit lines with `None` at the start.
"S007": {
'short': 'Family name contains lowercase characters.',
'url': STYLE_GUIDE + 'task-naming-conventions',
FUNCTION: re.compile(
r'''
# match all inherit statements
^\s*inherit\s*=
# filtering out those which match only valid family names
(?!
\s*
# none, None and root are valid family names
# and `inherit =` or `inherit = # x` are valid too
(['"]?(none|None|root|\#.*|$)['"]?|
(
# as are families named with capital letters
[A-Z0-9_-]+
# and optional quotes
| [\'\"]
# which may include Cylc parameters
| (<[^>]+>)
# or Jinja2
| ({[{%].*[%}]})
# or EmPy
| (@[\[{\(]).*([\]\}\)])
)+
)
# this can be a comma separated list
(
\s*,\s*
# none, None and root are valid family names
(['"]?(none|None|root)['"]?|
(
# as are families named with capital letters
[A-Z0-9_-]+
# and optional quotes
| [\'\"]
# which may include Cylc parameters
| (<[^>]+>)
# or Jinja2
| ({[{%].*[%}]})
# or EmPy
| (@[\[{\(]).*([\]\}\)])
)+
)
)*
# allow trailing commas and whitespace
\s*,?\s*
# allow trailing comments
(\#.*)?
$
)
''',
re.X
).findall,
FUNCTION: check_lowercase_family_names,
},
"S008": {
'short': JINJA2_FOUND_WITHOUT_SHEBANG,
Expand Down
131 changes: 68 additions & 63 deletions tests/unit/scripts/test_lint.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

from cylc.flow.scripts.lint import (
MANUAL_DEPRECATIONS,
check_lowercase_family_names,
get_cylc_files,
get_pyproject_toml,
get_reference_rst,
Expand Down Expand Up @@ -240,77 +241,81 @@ def test_check_cylc_file_line_no():


@pytest.mark.parametrize(
'inherit_line',
(
param(item, id=str(ind))
for ind, item in enumerate([
# lowercase family names are not permitted
'inherit = g',
'inherit = foo, b, a, r',
'inherit = FOO, bar',
'inherit = None, bar',
'inherit = A, b, C',
'inherit = "A", "b"',
"inherit = 'A', 'b'",
# parameters, jinja2 and empy should be ignored
# but any lowercase chars before or after should not
'inherit = a<x>z',
'inherit = a{{ x }}z',
'inherit = a@( x )z',
])
)
'line',
[
# lowercase family names are not permitted
'inherit = g',
'inherit = FOO, bar',
'inherit = None, bar',
'inherit = A, b, C',
'inherit = "A", "b"',
"inherit = 'A', 'b'",
'inherit = FOO_BAr',
# whitespace & trailing commas
' inherit = a , ',
# parameters, jinja2 and empy should be ignored
# but any lowercase chars before or after should not
'inherit = A<x>z',
'inherit = A{{ x }}z',
'inherit = N{# #}one',
'inherit = A@( x )z',
]
)
def test_inherit_lowercase_matches(inherit_line):
lint = lint_text(inherit_line, ['style'])
assert any('S007' in msg for msg in lint.messages)
def test_check_lowercase_family_names__true(line):
assert check_lowercase_family_names(line) is True


@pytest.mark.parametrize(
'inherit_line',
(
param(item, id=str(ind))
for ind, item in enumerate([
# undefined values are ok
'inherit =',
'inherit = ',
# none, None and root are ok
'inherit = none',
'inherit = None',
'inherit = root',
# trailing commas and whitespace are ok
'inherit = None,',
'inherit = None, ',
'inherit = None , ',
# uppercase family names are ok
'inherit = None, FOO, BAR',
'inherit = FOO',
'inherit = BAZ',
'inherit = root',
'inherit = FOO_BAR_0',
# parameters should be ignored
'inherit = A<a>Z',
'inherit = <a=1, b-1, c+1>',
# jinja2 should be ignored
'line',
[
# undefined values are ok
'inherit =',
'inherit = ',
# none, None and root are ok
'inherit = none',
'inherit = None',
'inherit = root',
# whitespace & trailing commas
'inherit = None,',
'inherit = None, ',
' inherit = None , ',
# uppercase family names are ok
'inherit = None, FOO, BAR',
'inherit = FOO',
'inherit = FOO_BAR_0',
# parameters should be ignored
'inherit = A<a>Z',
'inherit = <a=1, b-1, c+1>',
# jinja2 should be ignored
param(
'inherit = A{{ a }}Z, {% for x in range(5) %}'
'A{{ x }}, {% endfor %}',
# empy should be ignored
'inherit = A@( a )Z',
# trailing comments should be ignored
'inherit = A, B # no, comment',
'inherit = # a',
# quotes are ok
'inherit = "A", "B"',
"inherit = 'A', 'B'",
'inherit = "None", B',
'inherit = <a = 1, b - 1>',
# one really awkward, but valid example
id='jinja2-long'
),
# empy should be ignored
'inherit = A@( a )Z',
# trailing comments should be ignored
'inherit = A, B # no, comment',
'inherit = # a',
# quotes are ok
'inherit = "A", "B"',
"inherit = 'A', 'B'",
'inherit = "None", B',
'inherit = <a = 1, b - 1>',
# one really awkward, but valid example
param(
'inherit = none, FOO_BAR_0, "<a - 1>", A<a>Z, A{{a}}Z, A@(a)Z',
])
)
id='awkward'
),
]
)
def test_inherit_lowercase_not_match_none(inherit_line):
lint = lint_text(inherit_line, ['style'])
assert not any('S007' in msg for msg in lint.messages)
def test_check_lowercase_family_names__false(line):
assert check_lowercase_family_names(line) is False


def test_inherit_lowercase_matches():
lint = lint_text('inherit = a', ['style'])
assert any('S007' in msg for msg in lint.messages)


@pytest.mark.parametrize(
Expand Down

0 comments on commit 950f66c

Please sign in to comment.