Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add consistency checks between installed modules and modules.json #1200

Merged
merged 7 commits into from
Jul 14, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

### Modules

* Added consistency checks between installed modules and `modules.json` ([#1199](https://github.com/nf-core/tools/issues/1199))

## [v2.0.1 - Palladium Platypus Junior](https://github.com/nf-core/tools/releases/tag/2.0.1) - [2021-07-13]

### Template
Expand Down
7 changes: 6 additions & 1 deletion nf_core/modules/install.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,12 @@ def install(self, module):
log.error("You cannot install a module in a clone of nf-core/modules")
return False
# Check whether pipelines is valid
self.has_valid_directory()
if not self.has_valid_directory():
return False

# Verify that 'modules.json' is consistent with the installed modules
self.modules_json_up_to_date()

if not self.update_all:
# Get the available modules
try:
Expand Down
17 changes: 11 additions & 6 deletions nf_core/modules/list.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,6 @@ def pattern_msg(keywords):

# No pipeline given - show all remote
if self.remote:
log.info(
f"Modules available from {self.modules_repo.name} ({self.modules_repo.branch})"
f"{pattern_msg(keywords)}:\n"
)

# Get the list of available modules
try:
Expand All @@ -68,15 +64,16 @@ def pattern_msg(keywords):

# We have a pipeline - list what's installed
else:
log.info(f"Modules installed in '{self.dir}'{pattern_msg(keywords)}:\n")

# Check whether pipelines is valid
try:
self.has_valid_directory()
except UserWarning as e:
log.error(e)
return ""

# Verify that 'modules.json' is consistent with the installed modules
self.modules_json_up_to_date()

# Get installed modules
self.get_pipeline_modules()

Expand Down Expand Up @@ -120,4 +117,12 @@ def pattern_msg(keywords):

if print_json:
return json.dumps(modules, sort_keys=True, indent=4)

if self.remote:
log.info(
f"Modules available from {self.modules_repo.name} ({self.modules_repo.branch})"
f"{pattern_msg(keywords)}:\n"
)
else:
log.info(f"Modules installed in '{self.dir}'{pattern_msg(keywords)}:\n")
return table
63 changes: 39 additions & 24 deletions nf_core/modules/module_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,57 +154,72 @@ def create_modules_json(pipeline_dir):
file_progress = progress_bar.add_task(
"Creating 'modules.json' file", total=sum(map(len, repo_module_names.values())), test_name="module.json"
)
for repo_name, module_names in repo_module_names.items():
for repo_name, module_names in sorted(repo_module_names.items()):
try:
modules_repo = ModulesRepo(repo=repo_name)
except LookupError as e:
raise UserWarning(e)

repo_path = os.path.join(modules_dir, repo_name)
modules_json["repos"][repo_name] = dict()
for module_name in module_names:
for module_name in sorted(module_names):
module_path = os.path.join(repo_path, module_name)
progress_bar.update(file_progress, advance=1, test_name=f"{repo_name}/{module_name}")
try:
# Find the correct commit SHA for the local files.
# We iterate over the commit log pages until we either
# find a matching commit or we reach the end of the commits
correct_commit_sha = None
commit_page_nbr = 1
while correct_commit_sha is None:

commit_shas = [
commit["git_sha"]
for commit in get_module_git_log(
module_name, modules_repo=modules_repo, page_nbr=commit_page_nbr
)
]
correct_commit_sha = find_correct_commit_sha(
module_name, module_path, modules_repo, commit_shas
)
commit_page_nbr += 1

modules_json["repos"][repo_name][module_name] = {"git_sha": correct_commit_sha}
except (UserWarning, LookupError) as e:
correct_commit_sha = find_correct_commit_sha(module_name, module_path, modules_repo)

except (LookupError, UserWarning) as e:
log.warn(
f"Could not fetch 'git_sha' for module: '{module_name}'. Please try to install a newer version of this module. ({e})"
)
continue
modules_json["repos"][repo_name][module_name] = {"git_sha": correct_commit_sha}

modules_json_path = os.path.join(pipeline_dir, "modules.json")
with open(modules_json_path, "w") as fh:
json.dump(modules_json, fh, indent=4)


def find_correct_commit_sha(module_name, module_path, modules_repo, commit_shas):
def find_correct_commit_sha(module_name, module_path, modules_repo):
"""
Returns the SHA for the latest commit where the local files are identical to the remote files
Args:
module_name (str): Name of module
module_path (str): Path to module in local repo
module_repo (str): Remote repo for module
commit_shas ([ str ]): List of commit SHAs for module, sorted in descending order
Returns:
commit_sha (str): The latest commit SHA where local files are identical to remote files
"""
try:
# Find the correct commit SHA for the local files.
# We iterate over the commit log pages until we either
# find a matching commit or we reach the end of the commits
correct_commit_sha = None
commit_page_nbr = 1
while correct_commit_sha is None:
commit_shas = [
commit["git_sha"]
for commit in get_module_git_log(module_name, modules_repo=modules_repo, page_nbr=commit_page_nbr)
]
correct_commit_sha = iterate_commit_log_page(module_name, module_path, modules_repo, commit_shas)
commit_page_nbr += 1
return correct_commit_sha
except (UserWarning, LookupError) as e:
raise


def iterate_commit_log_page(module_name, module_path, modules_repo, commit_shas):
"""
Iterates through a list of commits for a module and checks if the local file contents match the remote
Args:
module_name (str): Name of module
module_path (str): Path to module in local repo
module_repo (str): Remote repo for module
commit_shas ([ str ]): List of commit SHAs for module, sorted in descending order
Returns:
commit_sha (str): The latest commit SHA from 'commit_shas' where local files
are identical to remote files
"""

files_to_check = ["main.nf", "functions.nf", "meta.yml"]
local_file_contents = [None, None, None]
Expand Down
129 changes: 129 additions & 0 deletions nf_core/modules/modules_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,13 @@
import os
import glob
import shutil
import copy
import json
import logging
import yaml

import nf_core.modules.module_utils
import nf_core.utils
from nf_core.modules.modules_repo import ModulesRepo

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -106,6 +108,130 @@ def has_modules_file(self):
except UserWarning as e:
raise

def modules_json_up_to_date(self):
"""
Checks whether the modules installed in the directory
are consistent with the entries in the 'modules.json' file and vice versa.

If a module has an entry in the 'modules.json' file but is missing in the directory,
we first try to reinstall the module from the remote and if that fails we remove the entry
in 'modules.json'.

If a module is installed but the entry in 'modules.json' is missing we iterate through
the commit log in the remote to try to determine the SHA.
"""
mod_json = self.load_modules_json()
fresh_mod_json = copy.deepcopy(mod_json)
self.get_pipeline_modules()
missing_from_modules_json = {}

# Iterate through all installed modules
# and remove all entries in modules_json which
# are present in the directory
for repo, modules in self.module_names.items():
if repo in mod_json["repos"]:
for module in modules:
if module in mod_json["repos"][repo]:
mod_json["repos"][repo].pop(module)
else:
if repo not in missing_from_modules_json:
missing_from_modules_json[repo] = []
missing_from_modules_json[repo].append(module)
if len(mod_json["repos"][repo]) == 0:
mod_json["repos"].pop(repo)
else:
missing_from_modules_json[repo] = modules

# If there are any modules left in 'modules.json' after all installed are removed,
# we try to reinstall them
if len(mod_json["repos"]) > 0:
missing_but_in_mod_json = [
f"'{repo}/{module}'" for repo, modules in mod_json["repos"].items() for module in modules
]
log.info(
f"Reinstalling modules found in 'modules.json' but missing from directory: {', '.join(missing_but_in_mod_json)}"
)

remove_from_mod_json = {}
for repo, modules in mod_json["repos"].items():
try:
modules_repo = ModulesRepo(repo=repo)
modules_repo.get_modules_file_tree()
install_folder = [modules_repo.owner, modules_repo.repo]
except LookupError as e:
remove_from_mod_json[repo] = list(modules.keys())
continue

for module, entry in modules.items():
sha = entry.get("git_sha")
if sha is None:
if repo not in remove_from_mod_json:
remove_from_mod_json[repo] = []
remove_from_mod_json[repo].append(module)
continue
module_dir = os.path.join(self.dir, "modules", *install_folder, module)
self.download_module_file(module, sha, modules_repo, install_folder, module_dir)

# If the reinstall fails, we remove those entries in 'modules.json'
if sum(map(len, remove_from_mod_json.values())) > 0:
uninstallable_mods = [
f"'{repo}/{module}'" for repo, modules in remove_from_mod_json.items() for module in modules
]
if len(uninstallable_mods) == 1:
log.info(f"Was unable to reinstall {uninstallable_mods[0]}. Removing 'modules.json' entry")
else:
log.info(
f"Was unable to reinstall some modules. Removing 'modules.json' entries: {', '.join(uninstallable_mods)}"
)

for repo, modules in remove_from_mod_json.items():
for module in modules:
fresh_mod_json["repos"][repo].pop(module)
if len(fresh_mod_json["repos"][repo]) == 0:
fresh_mod_json["repos"].pop(repo)

# If some modules didn't have an entry in the 'modules.json' file
# we try to determine the SHA from the commit log of the remote
if sum(map(len, missing_from_modules_json.values())) > 0:

format_missing = [
f"'{repo}/{module}'" for repo, modules in missing_from_modules_json.items() for module in modules
]
if len(format_missing) == 1:
log.info(f"Recomputing commit SHA for module {format_missing[0]} which was missing from 'modules.json'")
else:
log.info(
f"Recomputing commit SHAs for modules which which were were missing from 'modules.json': {', '.join(format_missing)}"
)
failed_to_find_commit_sha = []
for repo, modules in missing_from_modules_json.items():
modules_repo = ModulesRepo(repo=repo)
repo_path = os.path.join(self.dir, "modules", repo)
for module in modules:
module_path = os.path.join(repo_path, module)
try:
correct_commit_sha = nf_core.modules.module_utils.find_correct_commit_sha(
module, module_path, modules_repo
)
if repo not in fresh_mod_json["repos"]:
fresh_mod_json["repos"][repo] = {}

fresh_mod_json["repos"][repo][module] = {"git_sha": correct_commit_sha}
except (LookupError, UserWarning) as e:
failed_to_find_commit_sha.append(f"'{repo}/{module}'")

if len(failed_to_find_commit_sha) > 0:

def _s(some_list):
return "" if len(some_list) == 1 else "s"

log.info(
f"Could not determine 'git_sha' for module{_s(failed_to_find_commit_sha)}: '{', '.join(failed_to_find_commit_sha)}'."
f"\nPlease try to install a newer version of {'this' if len(failed_to_find_commit_sha) == 1 else 'these'} module{_s(failed_to_find_commit_sha)}."
)

self.dump_modules_json(fresh_mod_json)

def clear_module_dir(self, module_name, module_dir):
"""Removes all files in the module directory"""
try:
Expand Down Expand Up @@ -160,6 +286,9 @@ def update_modules_json(self, modules_json, repo_name, module_name, module_versi

def dump_modules_json(self, modules_json):
modules_json_path = os.path.join(self.dir, "modules.json")
# Sort the 'modules.json' repo entries
modules_json["repos"] = nf_core.utils.sort_dictionary(modules_json["repos"])

with open(modules_json_path, "w") as fh:
json.dump(modules_json, fh, indent=4)

Expand Down
2 changes: 1 addition & 1 deletion nf_core/modules/modules_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def get_module_file_urls(self, module, commit=""):
results[f["path"]] = f["url"]
if commit != "":
for path in results:
results[path] = f"https://api.github.com/repos/nf-core/modules/contents/{path}?ref={commit}"
results[path] = f"https://api.github.com/repos/{self.name}/contents/{path}?ref={commit}"
return results

def download_gh_file(self, dl_filename, api_url):
Expand Down
11 changes: 11 additions & 0 deletions nf_core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -773,3 +773,14 @@ def load_tools_config(dir="."):
return {}

return tools_config


def sort_dictionary(d):
"""Sorts a nested dictionary recursively"""
result = dict()
for k, v in sorted(d.items()):
if isinstance(v, dict):
result[k] = sort_dictionary(v)
else:
result[k] = v
return result