From 584d0331c8a73785af8a78fa601c540f5fe7bea2 Mon Sep 17 00:00:00 2001 From: cobalt <61329810+cobaltt7@users.noreply.github.com> Date: Fri, 17 Jan 2025 00:09:22 -0600 Subject: [PATCH] fix: Don't remove parenthesis around long dictionary values (#4377) --- CHANGES.md | 3 + src/black/linegen.py | 77 ++++--- src/black/trans.py | 18 +- tests/data/cases/preview_long_dict_values.py | 218 +++++++++++++++++-- tests/data/cases/preview_long_strings.py | 40 +++- 5 files changed, 290 insertions(+), 66 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 51374fbe7f8..416aabcdf13 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -24,6 +24,8 @@ (#4498) - Remove parentheses around sole list items (#4312) - Collapse multiple empty lines after an import into one (#4489) +- Prevent `string_processing` and `wrap_long_dict_values_in_parens` from removing + parentheses around long dictionary values (#4377) ### Configuration @@ -43,6 +45,7 @@ ### Performance + - Speed up the `is_fstring_start` function in Black's tokenizer (#4541) ### Output diff --git a/src/black/linegen.py b/src/black/linegen.py index ca8b30665c2..90ca5da8587 100644 --- a/src/black/linegen.py +++ b/src/black/linegen.py @@ -973,29 +973,7 @@ def _maybe_split_omitting_optional_parens( try: # The RHSResult Omitting Optional Parens. rhs_oop = _first_right_hand_split(line, omit=omit) - is_split_right_after_equal = ( - len(rhs.head.leaves) >= 2 and rhs.head.leaves[-2].type == token.EQUAL - ) - rhs_head_contains_brackets = any( - leaf.type in BRACKETS for leaf in rhs.head.leaves[:-1] - ) - # the -1 is for the ending optional paren - rhs_head_short_enough = is_line_short_enough( - rhs.head, mode=replace(mode, line_length=mode.line_length - 1) - ) - rhs_head_explode_blocked_by_magic_trailing_comma = ( - rhs.head.magic_trailing_comma is None - ) - if ( - not ( - is_split_right_after_equal - and rhs_head_contains_brackets - and rhs_head_short_enough - and rhs_head_explode_blocked_by_magic_trailing_comma - ) - # the omit optional parens split is preferred by some other reason - or _prefer_split_rhs_oop_over_rhs(rhs_oop, rhs, mode) - ): + if _prefer_split_rhs_oop_over_rhs(rhs_oop, rhs, mode): yield from _maybe_split_omitting_optional_parens( rhs_oop, line, mode, features=features, omit=omit ) @@ -1006,8 +984,15 @@ def _maybe_split_omitting_optional_parens( if line.is_chained_assignment: pass - elif not can_be_split(rhs.body) and not is_line_short_enough( - rhs.body, mode=mode + elif ( + not can_be_split(rhs.body) + and not is_line_short_enough(rhs.body, mode=mode) + and not ( + Preview.wrap_long_dict_values_in_parens + and rhs.opening_bracket.parent + and rhs.opening_bracket.parent.parent + and rhs.opening_bracket.parent.parent.type == syms.dictsetmaker + ) ): raise CannotSplit( "Splitting failed, body is still too long and can't be split." @@ -1038,6 +1023,44 @@ def _prefer_split_rhs_oop_over_rhs( Returns whether we should prefer the result from a split omitting optional parens (rhs_oop) over the original (rhs). """ + # contains unsplittable type ignore + if ( + rhs_oop.head.contains_unsplittable_type_ignore() + or rhs_oop.body.contains_unsplittable_type_ignore() + or rhs_oop.tail.contains_unsplittable_type_ignore() + ): + return True + + # Retain optional parens around dictionary values + if ( + Preview.wrap_long_dict_values_in_parens + and rhs.opening_bracket.parent + and rhs.opening_bracket.parent.parent + and rhs.opening_bracket.parent.parent.type == syms.dictsetmaker + and rhs.body.bracket_tracker.delimiters + ): + # Unless the split is inside the key + return any(leaf.type == token.COLON for leaf in rhs_oop.tail.leaves) + + # the split is right after `=` + if not (len(rhs.head.leaves) >= 2 and rhs.head.leaves[-2].type == token.EQUAL): + return True + + # the left side of assignment contains brackets + if not any(leaf.type in BRACKETS for leaf in rhs.head.leaves[:-1]): + return True + + # the left side of assignment is short enough (the -1 is for the ending optional + # paren) + if not is_line_short_enough( + rhs.head, mode=replace(mode, line_length=mode.line_length - 1) + ): + return True + + # the left side of assignment won't explode further because of magic trailing comma + if rhs.head.magic_trailing_comma is not None: + return True + # If we have multiple targets, we prefer more `=`s on the head vs pushing them to # the body rhs_head_equal_count = [leaf.type for leaf in rhs.head.leaves].count(token.EQUAL) @@ -1065,10 +1088,6 @@ def _prefer_split_rhs_oop_over_rhs( # the first line is short enough and is_line_short_enough(rhs_oop.head, mode=mode) ) - # contains unsplittable type ignore - or rhs_oop.head.contains_unsplittable_type_ignore() - or rhs_oop.body.contains_unsplittable_type_ignore() - or rhs_oop.tail.contains_unsplittable_type_ignore() ) diff --git a/src/black/trans.py b/src/black/trans.py index 7435b64b03d..b885bc65ace 100644 --- a/src/black/trans.py +++ b/src/black/trans.py @@ -856,7 +856,7 @@ def _validate_msg(line: Line, string_idx: int) -> TResult[None]: ): return TErr( "StringMerger does NOT merge f-strings with different quote types" - "and internal quotes." + " and internal quotes." ) if id(leaf) in line.comments: @@ -887,6 +887,7 @@ class StringParenStripper(StringTransformer): The line contains a string which is surrounded by parentheses and: - The target string is NOT the only argument to a function call. - The target string is NOT a "pointless" string. + - The target string is NOT a dictionary value. - If the target string contains a PERCENT, the brackets are not preceded or followed by an operator with higher precedence than PERCENT. @@ -934,11 +935,14 @@ def do_match(self, line: Line) -> TMatchResult: ): continue - # That LPAR should NOT be preceded by a function name or a closing - # bracket (which could be a function which returns a function or a - # list/dictionary that contains a function)... + # That LPAR should NOT be preceded by a colon (which could be a + # dictionary value), function name, or a closing bracket (which + # could be a function returning a function or a list/dictionary + # containing a function)... if is_valid_index(idx - 2) and ( - LL[idx - 2].type == token.NAME or LL[idx - 2].type in CLOSING_BRACKETS + LL[idx - 2].type == token.COLON + or LL[idx - 2].type == token.NAME + or LL[idx - 2].type in CLOSING_BRACKETS ): continue @@ -2264,12 +2268,12 @@ def do_transform( elif right_leaves and right_leaves[-1].type == token.RPAR: # Special case for lambda expressions as dict's value, e.g.: # my_dict = { - # "key": lambda x: f"formatted: {x}, + # "key": lambda x: f"formatted: {x}", # } # After wrapping the dict's value with parentheses, the string is # followed by a RPAR but its opening bracket is lambda's, not # the string's: - # "key": (lambda x: f"formatted: {x}), + # "key": (lambda x: f"formatted: {x}"), opening_bracket = right_leaves[-1].opening_bracket if opening_bracket is not None and opening_bracket in left_leaves: index = left_leaves.index(opening_bracket) diff --git a/tests/data/cases/preview_long_dict_values.py b/tests/data/cases/preview_long_dict_values.py index a19210605f6..7c65d67496d 100644 --- a/tests/data/cases/preview_long_dict_values.py +++ b/tests/data/cases/preview_long_dict_values.py @@ -1,4 +1,25 @@ # flags: --unstable +x = { + "xx_xxxxx_xxxxxxxxxx_xxxxxxxxx_xx": ( + "xx:xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxxx{xx}xxx_xxxxx_xxxxxxxxx_xxxxxxxxxxxx_xxxx" + ) +} +x = { + "xx_xxxxx_xxxxxxxxxx_xxxxxxxxx_xx": ( + "xx:xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxxx{xx}xxx_xxxxx_xxxxxxxxx_xxxxxxxxxxxx_xxxx" + ), +} +x = { + "foo": bar, + "foo": bar, + "foo": ( + xx_xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxxxxxx_xxxxx_xxxxxxxxx_xxxxxxxxxxxx_xxxx + ), +} +x = { + "xx_xxxxx_xxxxxxxxxx_xxxxxxxxx_xx": "xx:xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxx" +} + my_dict = { "something_something": r"Lorem ipsum dolor sit amet, an sed convenire eloquentiam \t" @@ -6,23 +27,90 @@ r"signiferumque, duo ea vocibus consetetur scriptorem. Facer \t", } +# Function calls as keys +tasks = { + get_key_name( + foo, + bar, + baz, + ): src, + loop.run_in_executor(): src, + loop.run_in_executor(xx_xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxxxxxx): src, + loop.run_in_executor( + xx_xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxxxxxx_xxxxx_xxxxx + ): src, + loop.run_in_executor(): ( + xx_xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxxxxxx_xxxxx_xxxxxxxxx_xxxxxxxxxxxx_xxxx + ), +} + +# Dictionary comprehensions +tasks = { + key_name: ( + xx_xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxxxxxx_xxxxx_xxxxxxxxx_xxxxxxxxxxxx_xxxx + ) + for src in sources +} +tasks = {key_name: foobar for src in sources} +tasks = { + get_key_name( + src, + ): "foo" + for src in sources +} +tasks = { + get_key_name( + foo, + bar, + baz, + ): src + for src in sources +} +tasks = { + get_key_name(): ( + xx_xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxxxxxx_xxxxx_xxxxxxxxx_xxxxxxxxxxxx_xxxx + ) + for src in sources +} +tasks = {get_key_name(): foobar for src in sources} + + +# Delimiters inside the value +def foo(): + def bar(): + x = { + common.models.DateTimeField: datetime(2020, 1, 31, tzinfo=utc) + timedelta( + days=i + ), + } + x = { + common.models.DateTimeField: ( + datetime(2020, 1, 31, tzinfo=utc) + timedelta(days=i) + ), + } + x = { + "foobar": (123 + 456), + } + x = { + "foobar": (123) + 456, + } + + my_dict = { "a key in my dict": a_very_long_variable * and_a_very_long_function_call() / 100000.0 } - my_dict = { "a key in my dict": a_very_long_variable * and_a_very_long_function_call() * and_another_long_func() / 100000.0 } - my_dict = { "a key in my dict": MyClass.some_attribute.first_call().second_call().third_call(some_args="some value") } { - 'xxxxxx': + "xxxxxx": xxxxxxxxxxxxxxxxxxx.xxxxxxxxxxxxxx( xxxxxxxxxxxxxx={ - 'x': + "x": xxxxxxxxxxxxxxxxxxxxxxxxxx.xxxxxxxxxxxxxxxxxxxxxxxxxxxxx( xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx=( xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx @@ -30,8 +118,8 @@ xxxxxxxxxxxxx=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx .xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx( xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx={ - 'x': x.xx, - 'x': x.x, + "x": x.xx, + "x": x.x, })))) }), } @@ -58,7 +146,26 @@ def func(): # output - +x = { + "xx_xxxxx_xxxxxxxxxx_xxxxxxxxx_xx": ( + "xx:xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxxx{xx}xxx_xxxxx_xxxxxxxxx_xxxxxxxxxxxx_xxxx" + ) +} +x = { + "xx_xxxxx_xxxxxxxxxx_xxxxxxxxx_xx": ( + "xx:xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxxx{xx}xxx_xxxxx_xxxxxxxxx_xxxxxxxxxxxx_xxxx" + ), +} +x = { + "foo": bar, + "foo": bar, + "foo": ( + xx_xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxxxxxx_xxxxx_xxxxxxxxx_xxxxxxxxxxxx_xxxx + ), +} +x = { + "xx_xxxxx_xxxxxxxxxx_xxxxxxxxx_xx": "xx:xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxx" +} my_dict = { "something_something": ( @@ -68,12 +175,80 @@ def func(): ), } +# Function calls as keys +tasks = { + get_key_name( + foo, + bar, + baz, + ): src, + loop.run_in_executor(): src, + loop.run_in_executor(xx_xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxxxxxx): src, + loop.run_in_executor( + xx_xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxxxxxx_xxxxx_xxxxx + ): src, + loop.run_in_executor(): ( + xx_xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxxxxxx_xxxxx_xxxxxxxxx_xxxxxxxxxxxx_xxxx + ), +} + +# Dictionary comprehensions +tasks = { + key_name: ( + xx_xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxxxxxx_xxxxx_xxxxxxxxx_xxxxxxxxxxxx_xxxx + ) + for src in sources +} +tasks = {key_name: foobar for src in sources} +tasks = { + get_key_name( + src, + ): "foo" + for src in sources +} +tasks = { + get_key_name( + foo, + bar, + baz, + ): src + for src in sources +} +tasks = { + get_key_name(): ( + xx_xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxxxxxx_xxxxx_xxxxxxxxx_xxxxxxxxxxxx_xxxx + ) + for src in sources +} +tasks = {get_key_name(): foobar for src in sources} + + +# Delimiters inside the value +def foo(): + def bar(): + x = { + common.models.DateTimeField: ( + datetime(2020, 1, 31, tzinfo=utc) + timedelta(days=i) + ), + } + x = { + common.models.DateTimeField: ( + datetime(2020, 1, 31, tzinfo=utc) + timedelta(days=i) + ), + } + x = { + "foobar": 123 + 456, + } + x = { + "foobar": (123) + 456, + } + + my_dict = { "a key in my dict": ( a_very_long_variable * and_a_very_long_function_call() / 100000.0 ) } - my_dict = { "a key in my dict": ( a_very_long_variable @@ -82,7 +257,6 @@ def func(): / 100000.0 ) } - my_dict = { "a key in my dict": ( MyClass.some_attribute.first_call() @@ -113,17 +287,15 @@ def func(): class Random: def func(): - random_service.status.active_states.inactive = ( - make_new_top_level_state_from_dict({ - "topLevelBase": { - "secondaryBase": { - "timestamp": 1234, - "latitude": 1, - "longitude": 2, - "actionTimestamp": ( - Timestamp(seconds=1530584000, nanos=0).ToJsonString() - ), - } - }, - }) - ) + random_service.status.active_states.inactive = make_new_top_level_state_from_dict({ + "topLevelBase": { + "secondaryBase": { + "timestamp": 1234, + "latitude": 1, + "longitude": 2, + "actionTimestamp": ( + Timestamp(seconds=1530584000, nanos=0).ToJsonString() + ), + } + }, + }) diff --git a/tests/data/cases/preview_long_strings.py b/tests/data/cases/preview_long_strings.py index ba48c19d542..cf1d12b6e3e 100644 --- a/tests/data/cases/preview_long_strings.py +++ b/tests/data/cases/preview_long_strings.py @@ -279,7 +279,7 @@ def foo(): "........................................................................... \\N{LAO KO LA}" ) -msg = lambda x: f"this is a very very very long lambda value {x} that doesn't fit on a single line" +msg = lambda x: f"this is a very very very very long lambda value {x} that doesn't fit on a single line" dict_with_lambda_values = { "join": lambda j: ( @@ -329,6 +329,20 @@ def foo(): log.info(f"""Skipping: {'a' == 'b'} {desc['ms_name']} {money=} {dte=} {pos_share=} {desc['status']} {desc['exposure_max']}""") +x = { + "xx_xxxxx_xxxxxxxxxx_xxxxxxxxx_xx": ( + "xx:xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxxx{xx}xxx_xxxxx_xxxxxxxxx_xxxxxxxxxxxx_xxxx" + ) +} +x = { + "xx_xxxxx_xxxxxxxxxx_xxxxxxxxx_xx": "xx:xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxxx{xx}xxx_xxxxx_xxxxxxxxx_xxxxxxxxxxxx_xxxx", +} +x = { + "xx_xxxxx_xxxxxxxxxx_xxxxxxxxx_xx": ( + "xx:xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxx" + ) +} + # output @@ -842,11 +856,9 @@ def foo(): " \\N{LAO KO LA}" ) -msg = ( - lambda x: ( - f"this is a very very very long lambda value {x} that doesn't fit on a single" - " line" - ) +msg = lambda x: ( + f"this is a very very very very long lambda value {x} that doesn't fit on a" + " single line" ) dict_with_lambda_values = { @@ -925,4 +937,18 @@ def foo(): log.info( f"""Skipping: {'a' == 'b'} {desc['ms_name']} {money=} {dte=} {pos_share=} {desc['status']} {desc['exposure_max']}""" -) \ No newline at end of file +) + +x = { + "xx_xxxxx_xxxxxxxxxx_xxxxxxxxx_xx": ( + "xx:xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxxx{xx}xxx_xxxxx_xxxxxxxxx_xxxxxxxxxxxx_xxxx" + ) +} +x = { + "xx_xxxxx_xxxxxxxxxx_xxxxxxxxx_xx": ( + "xx:xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxxx{xx}xxx_xxxxx_xxxxxxxxx_xxxxxxxxxxxx_xxxx" + ), +} +x = { + "xx_xxxxx_xxxxxxxxxx_xxxxxxxxx_xx": "xx:xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxx" +}