Skip to content

Commit

Permalink
prototype multi-line strings in version development
Browse files Browse the repository at this point in the history
  • Loading branch information
mlin authored Nov 24, 2022
1 parent 4def324 commit f11fa41
Show file tree
Hide file tree
Showing 7 changed files with 220 additions and 13 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ unit_tests:
python3 tests/no_docker_services.py

integration_tests:
prove -v tests/{check,eval,runner,zip}.t
prove -v tests/{check,eval,runner,zip,multi_line_strings}.t
python3 tests/no_docker_services.py

skylab_bulk_rna:
Expand Down
21 changes: 14 additions & 7 deletions WDL/Expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,12 +360,12 @@ class String(Base):
"""
:type: List[Union[str,WDL.Expr.Placeholder]]
The parts list begins and ends with matching single- or double- quote marks. Between these is
a sequence of literal strings and/or interleaved placeholder expressions. Escape sequences in
the literals will NOT have been decoded (although the parser will have checked they're valid).
Strings arising from task commands leave escape sequences to be interpreted by the shell in the
task container. Other string literals have their escape sequences interpreted upon evaluation
to string values.
The parts list begins and ends with the original delimiters (quote marks, braces, or triple
angle brackets). Between these is a sequence of literal strings and/or interleaved placeholder
expressions. Escape sequences in the literals will NOT have been decoded (although the parser
will have checked they're valid). Strings arising from task commands leave escape sequences to
be interpreted by the shell in the task container. Other string literals have their escape
sequences interpreted upon evaluation to string values.
"""

command: bool
Expand Down Expand Up @@ -423,7 +423,14 @@ def _eval(self, env: Env.Bindings[Value.Base], stdlib: StdLib.Base) -> Value.Str
else:
assert False
# concatenate the stringified parts and trim the surrounding quotes
return Value.String("".join(ans)[1:-1])
# TODO: make command repr include delimiters for consistency
if self.command:
return Value.String("".join(ans))
delim = self.parts[0]
assert delim in ("'", '"', "{", "<<<")
delim2 = self.parts[-1]
assert delim2 in ("'", '"', "}", ">>>") and len(delim) == len(delim2)
return Value.String("".join(ans)[len(delim) : -len(delim)])

@property
def literal(self) -> Optional[Value.Base]:
Expand Down
10 changes: 7 additions & 3 deletions WDL/_grammar.py
Original file line number Diff line number Diff line change
Expand Up @@ -436,7 +436,7 @@
| FLOAT -> float
| SIGNED_FLOAT -> float
?string: string1 | string2
?string: string1 | string2 | multistring
STRING_INNER1: ("\\'"|/[^']/)
ESCAPED_STRING1: "'" STRING_INNER1* "'"
Expand All @@ -445,12 +445,13 @@
_EITHER_DELIM.2: "~{" | "${"
// string (single-quoted)
STRING1_CHAR: "\\'" | /[^'~$]/ | /\$(?=[^{])/ | /\~(?=[^{])/
_DOUBLE_BACKSLASH.2: "\\\\"
STRING1_CHAR: _DOUBLE_BACKSLASH | "\\'" | /[^'~$]/ | /\$(?=[^{])/ | /\~(?=[^{])/
STRING1_FRAGMENT: STRING1_CHAR+
string1: /'/ (STRING1_FRAGMENT? _EITHER_DELIM expr "}")* STRING1_FRAGMENT? /'/ -> string
// string (double-quoted)
STRING2_CHAR: "\\\"" | /[^"~$]/ | /\$(?=[^{])/ | /~(?=[^{])/
STRING2_CHAR: _DOUBLE_BACKSLASH | "\\\"" | /[^"~$]/ | /\$(?=[^{])/ | /~(?=[^{])/
STRING2_FRAGMENT: STRING2_CHAR+
string2: /"/ (STRING2_FRAGMENT? _EITHER_DELIM expr "}")* STRING2_FRAGMENT? /"/ -> string
Expand All @@ -462,6 +463,9 @@
COMMAND2_FRAGMENT: COMMAND2_CHAR+
command2: "<<<" (COMMAND2_FRAGMENT? "~{" placeholder "}")* COMMAND2_FRAGMENT? ">>>" -> command
// multi-line string (very similar to command2, but processed slightly differently)
multistring: /<<</ (COMMAND2_FRAGMENT? "~{" expr "}")* COMMAND2_FRAGMENT? />>>/ -> string
CNAME: /[a-zA-Z][a-zA-Z0-9_]*/
%import common.INT
Expand Down
59 changes: 57 additions & 2 deletions WDL/_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,10 +121,65 @@ def string(self, meta, items) -> Expr.Base:
# ...but preserve originals in AST.
parts.append(item.value)
assert len(parts) >= 2
assert parts[0] in ['"', "'"]
assert parts[-1] in ['"', "'"]
assert parts[0] in ['"', "'", "<<<"], parts[0]
assert parts[-1] in ['"', "'", ">>>"], parts[-1]
if parts[0] == "<<<":
self._preprocess_multistring(meta, parts)
return Expr.String(self._sp(meta), parts)

def _preprocess_multistring(self, meta, parts):
# From each str part, remove escaped newlines and any whitespace following them. Escaped
# newlines are preceded by an odd number of backslashes.
for i in range(1, len(parts) - 1):
part = parts[i]
if isinstance(part, str):
part_lines = part.split("\n")
for j in range(len(part_lines) - 1):
part_line = part_lines[j]
if (len(part_line) - len(part_line.rstrip("\\"))) % 2 == 1:
part_lines[j] = part_line[:-1]
if j < len(part_lines) - 1:
part_lines[j + 1] = part_lines[j + 1].lstrip(" \t")
else:
part_lines[j] += "\n"
parts[i] = "".join(part_lines)
# Trim whitespace from the left of the first line and the right of the last line (including
# the first/last newline, if any).
if len(parts) > 2 and isinstance(parts[1], str):
parts[1] = parts[1].lstrip(" \t")
if parts[1] and parts[1][0] == "\n":
parts[1] = parts[1][1:]
if len(parts) > 2 and isinstance(parts[-2], str):
parts[-2] = parts[-2].rstrip(" \t")
if parts[-2] and parts[-2][-1] == "\n":
parts[-2] = parts[-2][:-1]
# Detect common leading whitespace on the remaining non-blank lines. For this purpose,
# use a pseudo-string with dummy "~{}" substituted for placeholders, which is simpler than
# tracking how newlines intersperse with the placeholders in the AST.
common_ws = None
pseudo = "".join((part if isinstance(part, str) else "~{}") for part in parts[1:-1])
for line in pseudo.split("\n"):
line_ws = len(line) - len(line.lstrip())
if line_ws < len(line):
common_ws = line_ws if common_ws is None else min(line_ws, common_ws)
# Remove the common leading whitespace. Here, we do need careful bookkeeping around
# placeholders in the AST.
if common_ws is not None and common_ws > 0:
at_new_line = True
for i in range(1, len(parts) - 1):
part = parts[i]
if not isinstance(part, str):
at_new_line = False
else:
part_lines = part.split("\n")
for j, line in enumerate(part_lines):
if at_new_line:
assert not line[:common_ws].strip()
part_lines[j] = line[common_ws:]
at_new_line = True
parts[i] = "\n".join(part_lines)
at_new_line = parts[i].endswith("\n")

def string_literal(self, meta, items):
assert len(items) == 1
assert items[0].value.startswith('"') or items[0].value.startswith("'")
Expand Down
24 changes: 24 additions & 0 deletions tests/multi_line_strings.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#!/bin/bash
# bash-tap tests for WDL multi-line strings (runs multi_line_strings.wdl)
set -o pipefail

cd "$(dirname $0)/.."
SOURCE_DIR="$(pwd)"

BASH_TAP_ROOT="tests/bash-tap"
source tests/bash-tap/bash-tap-bootstrap

export PYTHONPATH="$SOURCE_DIR:$PYTHONPATH"
miniwdl="python3 -m WDL"

plan tests 1

if [[ -z $TMPDIR ]]; then
TMPDIR=/tmp
fi
DN=$(mktemp -d "${TMPDIR}/miniwdl_tests_XXXXXX")
DN=$(realpath "$DN")
cd $DN

$miniwdl run "$SOURCE_DIR/tests/multi_line_strings.wdl" --verbose
is $? "0"
107 changes: 107 additions & 0 deletions tests/multi_line_strings.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
version development

# Test cases for multi-line string literals (openwdl PR# 414)
# The integration test suite executes this WDL (via multi_line_strings.t) which you can also run
# standalone in your miniwdl repo clone:
# python3 -m WDL run tests/multi_line_strings.wdl --dir /tmp --verbose
workflow test_multi_line_strings {
String speed = "quick"
String color = "brown"
String verb = "jumps"

String spaces = " "
String name = "Henry"
String company = "Acme"

# Test cases: pairs with a multi-line string and an escaped single-line string; the two should
# equal each other.
Array[Pair[String, String]] examples = [
(<<<
The ~{speed} ~{color}
fox ~{verb} over \
the lazy dog.>>>,
"The quick brown\n fox jumps over the lazy dog."),
(<<<hello world>>>, "hello world"),
(<<< hello world >>>, "hello world"),
(<<<
hello world>>>, "hello world"),
(<<<
hello world
>>>, "hello world"),
(<<<
hello \
world \
>>>, "hello world"),
(<<<
hello world
>>>, "\nhello world\n"),
(<<<
hello \
world
>>>, "hello world"),
(<<<
this is a
multi-line string
>>>, "this is a\n\n multi-line string"),
(<<<
this is a
multi-line string
>>>, "this is a\n\nmulti-line string\n"),
(<<<
this is a \
string that \
contains no newlines
>>>, "this is a string that contains no newlines"),
(<<<
multi-line string \
with 'single' and "double" quotes
>>>, "multi-line string with 'single' and \"double\" quotes"),
(<<<
~{spaces}Hello ~{name},
~{spaces}Welcome to ~{company}!
>>>, " Hello Henry,\n Welcome to Acme!"),
(<<<
\x20 Forced
indentation
>>>, " Forced\n indentation"),
(<<<abc\
>>>, "abc"),
(<<<abc\\
>>>, "abc\\"),
(<<<abc\\>>>, 'abc\\'),
(<<<abc\\
def>>>, "abc\\\ndef"),
(<<<abc\\\
def>>>, "abc\\def")
]

scatter (ex in examples) {
if (ex.left != ex.right) {
call fail { input: lhs = ex.left, rhs = ex.right }
}
}

output {
Int cases_ok = length(examples)
}
}

task fail {
input {
String lhs
String rhs
}

command {
>&2 echo "$(cat ~{write_json(lhs)}) != $(cat ~{write_json(rhs)})"
exit 1
}

output {}
}
10 changes: 10 additions & 0 deletions tests/test_0eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,6 +397,16 @@ def test_short_circuit(self):
("true || 1/0 == 1", "true"),
)

def test_multi_line_strings(self):
# NOTE: most of the multi-line string tests are in tests/multi_line_strings.wdl which runs
# in the integration suite. Generally easier to write there without having to double-escape
# (python+WDL). These are here mainly to provide code coverage.
env = cons_env(("color", WDL.Value.String("brown")))
self._test_tuples(
("<<< \n \\\n >>>", '""', "development"),
("<<<\n quick ~{color}\n fox\n >>>", json.dumps(" quick brown\nfox"), env, "development")
)

def cons_env(*bindings):
b = WDL.Env.Bindings()
for (x,y) in bindings:
Expand Down

0 comments on commit f11fa41

Please sign in to comment.