diff --git a/CHANGELOG.md b/CHANGELOG.md index b8cc8f1d1b..d5bd030639 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -42,6 +42,7 @@ - Use `nfcore/gitpod:dev` container in the dev branch ([#2196](https://github.com/nf-core/tools/pull/2196)) - Replace requests_mock with responses in test mocks ([#2165](https://github.com/nf-core/tools/pull/2165)). - Add warning when installing a module from an `org_path` that exists in multiple remotes in `modules.json` ([#2228](https://github.com/nf-core/tools/pull/2228) [#2239](https://github.com/nf-core/tools/pull/2239)). +- Add the possibility to translate refgenie asset aliases to the ones used in a pipeline with an alias_translations.yaml file ([#2242](https://github.com/nf-core/tools/pull/2242)). ## [v2.7.2 - Mercury Eagle Patch](https://github.com/nf-core/tools/releases/tag/2.7.2) - [2022-12-19] diff --git a/nf_core/refgenie.py b/nf_core/refgenie.py index a10e4fecdf..b666844699 100644 --- a/nf_core/refgenie.py +++ b/nf_core/refgenie.py @@ -2,6 +2,7 @@ Update a nextflow.config file with refgenie genomes """ +import json import logging import os import re @@ -10,6 +11,7 @@ import rich import rich.traceback +import yaml import nf_core.utils @@ -45,6 +47,7 @@ def _print_nf_config(rgc): """ abg = rgc.list_assets_by_genome() genomes_str = "" + alias_translations = _get_alias_translation_file(rgc) for genome, asset_list in abg.items(): genomes_str += f" '{genome}' {{\n" for asset in asset_list: @@ -54,6 +57,10 @@ def _print_nf_config(rgc): except Exception: log.warning(f"{genome}/{asset} is incomplete, ignoring...") else: + # Translate an alias name to the alias used in the pipeline + if asset in alias_translations.keys(): + log.info(f"Translating refgenie asset alias {asset} to {alias_translations[asset]}.") + asset = alias_translations[asset] genomes_str += f' {asset.ljust(20, " ")} = "{pth}"\n' genomes_str += " }\n" @@ -100,6 +107,38 @@ def _update_nextflow_home_config(refgenie_genomes_config_file, nxf_home): log.info(f"Created new nextflow config file: {nxf_home_config}") +def _get_alias_translation_file(rgc): + """ + Read a file containing alias translations. + + Alias translation file should be located in the same folder as the refgenie `genome_config.yaml` file, + the path is set to $REFGENIE environment variable by `refgenie init`. + Alias translation file should be named `alias_translations.yaml` + + Input file contains the name of refgenie server aliases as keys and the name of the respective nf-core pipeline aliases as values. + Such as: + ensembl_gtf: gtf + star_index: star + """ + translations = {} + + if "REFGENIE" in os.environ: + refgenie_genomes_config_path = os.environ.get("REFGENIE") + refgenie_genomes_config_directory = Path(refgenie_genomes_config_path).parents[0] + elif "genome_folder" in rgc: + refgenie_genomes_config_directory = Path(rgc["genome_folder"]) + else: + return translations + + try: + with open(refgenie_genomes_config_directory / "alias_translations.yaml") as yaml_file: + translations = yaml.load(yaml_file, Loader=yaml.Loader) + except FileNotFoundError: + pass + + return translations + + def update_config(rgc): """ Update the genomes.config file after a local refgenie database has been updated diff --git a/tests/test_refgenie.py b/tests/test_refgenie.py index 1ff2683416..73fbcb863f 100644 --- a/tests/test_refgenie.py +++ b/tests/test_refgenie.py @@ -7,6 +7,8 @@ import tempfile import unittest +import yaml + class TestRefgenie(unittest.TestCase): """Class for refgenie tests""" @@ -19,6 +21,7 @@ def setUp(self): self.NXF_HOME = os.path.join(self.tmp_dir, ".nextflow") self.NXF_REFGENIE_PATH = os.path.join(self.NXF_HOME, "nf-core", "refgenie_genomes.config") self.REFGENIE = os.path.join(self.tmp_dir, "genomes_config.yaml") + self.translation_file = os.path.join(self.tmp_dir, "alias_translations.yaml") # Set NXF_HOME environment variable # avoids adding includeConfig statement to config file outside the current tmpdir try: @@ -37,6 +40,10 @@ def setUp(self): with open(self.REFGENIE, "a") as fh: fh.write(f"nextflow_config: {os.path.join(self.NXF_REFGENIE_PATH)}\n") + # Add an alias translation to YAML file + with open(self.translation_file, "a") as fh: + fh.write("ensembl_gtf: gtf\n") + def tearDown(self) -> None: # Remove the tempdir again os.system(f"rm -rf {self.tmp_dir}") @@ -53,3 +60,13 @@ def test_update_refgenie_genomes_config(self): out = subprocess.check_output(shlex.split(cmd), stderr=subprocess.STDOUT) assert "Updated nf-core genomes config" in str(out) + + def test_asset_alias_translation(self): + """Test that asset aliases are translated correctly""" + # Populate the config with a genome + cmd = f"refgenie pull hg38/ensembl_gtf -c {self.REFGENIE}" + subprocess.check_output(shlex.split(cmd), stderr=subprocess.STDOUT) + cmd = f"cat {self.NXF_REFGENIE_PATH}" + out = subprocess.check_output(shlex.split(cmd), stderr=subprocess.STDOUT) + assert " gtf = " in str(out) + assert " ensembl_gtf = " not in str(out)