From c7c19a230a205a33b43179af52b81705d6acb218 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Thu, 29 Jun 2023 21:51:04 +0200 Subject: [PATCH] Almost finished refactoring the container detection. Need to test edge cases though. --- nf_core/download.py | 239 ++++++++++++++++++++++++++++---------------- 1 file changed, 154 insertions(+), 85 deletions(-) diff --git a/nf_core/download.py b/nf_core/download.py index 5114281d0d..80f6bbb76f 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -684,13 +684,33 @@ def find_container_images(self, workflow_directory): # Find any config variables that look like a container for k, v in self.nf_config.items(): if k.startswith("process.") and k.endswith(".container"): - # Append tuples, needed for consistency with the container_value_defs - # because both will run through rectify_raw_container_matches() - config_findings.append(v.strip('"').strip("'")) + """ + Can be plain string / Docker URI or DSL2 syntax + + Since raw parsing is done by Nextflow, single quotes will be (partially) escaped in DSL2. + Use cleaning regex on DSL2. Same as for modules, except that (?(?(?:.(?!(?[\'\"]) matches the literal word "container" followed by a whitespace and a quote character. - The quote character is captured into the quote group \1. + container\s+[\s{}$=]* matches the literal word "container" followed by whitespace, brackets, equal or variable names. + (?P[\'\"]) The quote character is captured into the quote group \1. The pattern (?:.(?!\1))*.? is used to match any character (.) not followed by the closing quote character (?!\1). This capture happens greedy *, but we add a .? to ensure that we don't match the whole file until the last occurrence of the closing quote character, but rather stop at the first occurrence. \1 inserts the matched quote character into the regex, either " or '. + It may be followed by whitespace or closing bracket [\s}]* re.DOTALL is used to account for the string to be spread out across multiple lines. """ container_regex = re.compile( - r"container\s+(?P[\'\"])(?P(?:.(?!\1))*.?)\1", + r"container\s+[\s{}=$]*(?P[\'\"])(?P(?:.(?!\1))*.?)\1[\s}]*", re.DOTALL, ) - module_container = re.findall(container_regex, search_space) - - # Not sure if there will ever be multiple container definitions per module, but beware DSL3. - for _, container_value in module_container: - """ - Now isolate all quoted strings from the container definition above. - We also need to account for escape sequences before the quotes this time. Yeah! - - [^\"\'] makes sure that the outermost quote character is not matched. - (?P(?(?:.(?!(?(?(?:.(?!(?(?(?:.(?!(?(?(?:.(?!(?