Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: simplify OrderedDict arguments in lexer #598

Merged
merged 1 commit into from
Apr 13, 2021
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 37 additions & 82 deletions google/cloud/bigquery/magics/line_arg_parser/lexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,90 +49,45 @@
# the value of an option other than "--params", we do not really care about its
# structure, and thus do not want to use any of the "Python tokens" for pattern matching.
#
# Since token definition order is important, an OrderedDict is needed with tightly
# controlled member definitions (i.e. passed as a sequence, and *not* via kwargs).
# Token definition order is important, thus an OrderedDict is used. In addition, PEP 468
# guarantees us that the order of kwargs is preserved in Python 3.6+.
Comment on lines +52 to +53
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note - we still need to use OrderedDict instad of a regular dict, because the regular dict's key insertion order should be considered an implementation detail of CPython 3.6 (that behavior has only been promoted to a language spec in Python 3.7).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah! I didn't realize kwargs became ordered a version earlier :-)

token_types = OrderedDict(
[
(
"state_parse_pos_args",
OrderedDict(
[
(
"GOTO_PARSE_NON_PARAMS_OPTIONS",
r"(?P<GOTO_PARSE_NON_PARAMS_OPTIONS>(?=--))", # double dash - starting the options list
),
(
"DEST_VAR",
r"(?P<DEST_VAR>[^\d\W]\w*)", # essentially a Python ID
),
]
),
),
(
"state_parse_non_params_options",
OrderedDict(
[
(
"GOTO_PARSE_PARAMS_OPTION",
r"(?P<GOTO_PARSE_PARAMS_OPTION>(?=--params(?:\s|=|--|$)))", # the --params option
),
("OPTION_SPEC", r"(?P<OPTION_SPEC>--\w+)"),
("OPTION_EQ", r"(?P<OPTION_EQ>=)"),
("OPT_VAL", r"(?P<OPT_VAL>\S+?(?=\s|--|$))"),
]
),
),
(
"state_parse_params_option",
OrderedDict(
[
(
"PY_STRING",
r"(?P<PY_STRING>(?:{})|(?:{}))".format(
r"'(?:[^'\\]|\.)*'",
r'"(?:[^"\\]|\.)*"', # single and double quoted strings
),
),
("PARAMS_OPT_SPEC", r"(?P<PARAMS_OPT_SPEC>--params(?=\s|=|--|$))"),
("PARAMS_OPT_EQ", r"(?P<PARAMS_OPT_EQ>=)"),
(
"GOTO_PARSE_NON_PARAMS_OPTIONS",
r"(?P<GOTO_PARSE_NON_PARAMS_OPTIONS>(?=--\w+))", # found another option spec
),
("PY_BOOL", r"(?P<PY_BOOL>True|False)"),
("DOLLAR_PY_ID", r"(?P<DOLLAR_PY_ID>\$[^\d\W]\w*)"),
(
"PY_NUMBER",
r"(?P<PY_NUMBER>-?[1-9]\d*(?:\.\d+)?(:?[e|E][+-]?\d+)?)",
),
("SQUOTE", r"(?P<SQUOTE>')"),
("DQUOTE", r'(?P<DQUOTE>")'),
("COLON", r"(?P<COLON>:)"),
("COMMA", r"(?P<COMMA>,)"),
("LCURL", r"(?P<LCURL>\{)"),
("RCURL", r"(?P<RCURL>})"),
("LSQUARE", r"(?P<LSQUARE>\[)"),
("RSQUARE", r"(?P<RSQUARE>])"),
("LPAREN", r"(?P<LPAREN>\()"),
("RPAREN", r"(?P<RPAREN>\))"),
]
),
),
(
"common",
OrderedDict(
[
("WS", r"(?P<WS>\s+)"),
("EOL", r"(?P<EOL>$)"),
(
# anything not a whitespace or matched by something else
"UNKNOWN",
r"(?P<UNKNOWN>\S+)",
),
]
),
state_parse_pos_args=OrderedDict(
GOTO_PARSE_NON_PARAMS_OPTIONS=r"(?P<GOTO_PARSE_NON_PARAMS_OPTIONS>(?=--))", # double dash - starting the options list
DEST_VAR=r"(?P<DEST_VAR>[^\d\W]\w*)", # essentially a Python ID
),
state_parse_non_params_options=OrderedDict(
GOTO_PARSE_PARAMS_OPTION=r"(?P<GOTO_PARSE_PARAMS_OPTION>(?=--params(?:\s|=|--|$)))", # the --params option
OPTION_SPEC=r"(?P<OPTION_SPEC>--\w+)",
OPTION_EQ=r"(?P<OPTION_EQ>=)",
OPT_VAL=r"(?P<OPT_VAL>\S+?(?=\s|--|$))",
),
state_parse_params_option=OrderedDict(
PY_STRING=r"(?P<PY_STRING>(?:{})|(?:{}))".format( # single and double quoted strings
r"'(?:[^'\\]|\.)*'", r'"(?:[^"\\]|\.)*"'
),
]
PARAMS_OPT_SPEC=r"(?P<PARAMS_OPT_SPEC>--params(?=\s|=|--|$))",
PARAMS_OPT_EQ=r"(?P<PARAMS_OPT_EQ>=)",
GOTO_PARSE_NON_PARAMS_OPTIONS=r"(?P<GOTO_PARSE_NON_PARAMS_OPTIONS>(?=--\w+))", # found another option spec
PY_BOOL=r"(?P<PY_BOOL>True|False)",
DOLLAR_PY_ID=r"(?P<DOLLAR_PY_ID>\$[^\d\W]\w*)",
PY_NUMBER=r"(?P<PY_NUMBER>-?[1-9]\d*(?:\.\d+)?(:?[e|E][+-]?\d+)?)",
SQUOTE=r"(?P<SQUOTE>')",
DQUOTE=r'(?P<DQUOTE>")',
COLON=r"(?P<COLON>:)",
COMMA=r"(?P<COMMA>,)",
LCURL=r"(?P<LCURL>\{)",
RCURL=r"(?P<RCURL>})",
LSQUARE=r"(?P<LSQUARE>\[)",
RSQUARE=r"(?P<RSQUARE>])",
LPAREN=r"(?P<LPAREN>\()",
RPAREN=r"(?P<RPAREN>\))",
),
common=OrderedDict(
WS=r"(?P<WS>\s+)",
EOL=r"(?P<EOL>$)",
UNKNOWN=r"(?P<UNKNOWN>\S+)", # anything not a whitespace or matched by something else
),
)


Expand Down