Skip to content

Commit

Permalink
add migrate script (closes #5)
Browse files Browse the repository at this point in the history
  • Loading branch information
dmeliza committed May 8, 2018
1 parent 61e4700 commit 2965a62
Show file tree
Hide file tree
Showing 3 changed files with 144 additions and 1 deletion.
1 change: 1 addition & 0 deletions nbank/archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
"registry": "%(registry_url)s",
"policy": {
"auto_identifiers": false,
"auto_id_type": null,
"keep_extensions": true,
"allow_directories": false,
"require_hash": true,
Expand Down
141 changes: 141 additions & 0 deletions nbank/migrate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
# -*- coding: utf-8 -*-
# -*- mode: python -*-
""" import catalog from old nbank (<0.7.0) into registry
This script performs the following checks:
- catalog is from correct version of nbank
- archive contains the resources referenced in the catalog
- if supplied, hash matches hash of file
"""

from __future__ import absolute_import
from __future__ import print_function
from __future__ import division
from __future__ import unicode_literals

import logging

from nbank import __version__
from nbank import core

log = logging.getLogger('nbank') # root logger


def check_catalog(catalog):
""" Checks catalog for conformance to the nbank namespace and version. Raises ValueError on failure """
if not catalog.get("namespace", None) == "neurobank.catalog":
raise ValueError("document does not have 'namespace' field set to 'neurobank.catalog'")
if not catalog.get("version", None) == "1.0":
raise ValueError("catalog version is not equal to '1.0'")
if "resources" not in catalog:
raise ValueError("document is missing 'resources' field")
if not isinstance(catalog["resources"], (list, tuple)):
raise ValueError("'resources' field is not a list or tuple")


def register_resources(catalog, archive_path, dtype=None, hash=False, auth=None, **metadata):
""" Add resources from catalog (if found in archive_path) to neurobank archive """
import os
from nbank import util
from nbank.archive import get_config, find_resource
from nbank.registry import add_resource, find_domain_by_path, full_url
archive_path = os.path.abspath(archive_path)
cfg = get_config(archive_path)
log.info("archive: %s", archive_path)
registry_url = cfg["registry"]
log.info(" registry: %s", registry_url)

# check that domain exists for this path
domain = find_domain_by_path(registry_url, archive_path)
log.info(" domain name: %s", domain)
if domain is None:
raise RuntimeError("archive '%s' not in registry. make sure to run nbank init before migrating" % archive_path)

for res in catalog["resources"]:
id = res.pop("id", None)
if id is None:
continue
log.info("processing resource '%s':", id)
resource_path = find_resource(archive_path, id)
if resource_path is None:
log.info(" does not exist; skipping")
continue
else:
log.info(" path: %s", resource_path)
if hash or cfg['policy']['require_hash']:
sha1 = util.hash(resource_path)
log.info(" sha1: %s", sha1)
else:
sha1 = None
# merge metadata from catalog and arguments:
res.update(**metadata)
result = add_resource(registry_url, id, dtype, domain, sha1, auth, **res)
registry_id = full_url(registry_url, result["name"])
log.info(" registered as %s", registry_id)
yield {"source": resource_path, "id": result["name"]}


def main(argv=None):
import datetime
import argparse
import sys
import json
import requests as rq
from nbank.script import userpwd, ParseKeyVal

p = argparse.ArgumentParser(description="import catalog from old nbank (<0.7.0) into registry")
p.add_argument('-v','--version', action="version",
version="%(prog)s " + __version__)
p.add_argument('-r', dest='registry_url', help="URL of the registry service. "
"Default is to use the environment variable '%s'" % core.env_registry,
default=core.default_registry())
p.add_argument('-a', dest='auth', help="username:password to authenticate with registry. "
"If not supplied, will attempt to use .netrc file",
type=userpwd, default=None)
p.add_argument('--debug', help="show verbose log messages", action="store_true")

p.add_argument('-d','--dtype', help="specify the datatype for the deposited resources")
p.add_argument('-H','--hash', action="store_true",
help="calculate a SHA1 hash of each file and store in the registry")
p.add_argument('-k', help="specify metadata field (use multiple -k for multiple values)",
action=ParseKeyVal, default=dict(), metavar="KEY=VALUE", dest='metadata')
p.add_argument('-j', "--json-out", action="store_true",
help="output each deposited file to stdout as line-deliminated JSON")
p.add_argument('directory', help="path of the archive where the files are stored. "
"This location needs to have been added as a domain to the registry (with nbank init) "
"before running this script.")
p.add_argument('catalog', help='the JSON catalog to import')

args = p.parse_args(argv)

ch = logging.StreamHandler()
formatter = logging.Formatter("%(message)s")
loglevel = logging.DEBUG if args.debug else logging.INFO
log.setLevel(loglevel)
ch.setLevel(loglevel) # change
ch.setFormatter(formatter)
log.addHandler(ch)

log.debug("version: %s", __version__)
log.debug("run time: %s", datetime.datetime.now())

try:
log.debug("checking catalog: %s", args.catalog)
catalog = json.load(open(args.catalog, 'rU'))
check_catalog(catalog)
for res in register_resources(catalog, args.directory, args.dtype, args.hash, args.auth, **args.metadata):
if args.json_out:
json.dump(res, fp=sys.stdout)
sys.stdout.write("\n")
except (FileNotFoundError, json.JSONDecodeError, ValueError) as e:
log.error(" error: %s", e)
except rq.exceptions.HTTPError as e:
# bad request means the domain name is taken or badly formed
if e.response.status_code == 400:
data = e.response.json()
for k, v in data.items():
for vv in v:
log.error(" error: %s - %s", k, vv)
else:
raise e
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@

packages=find_packages(exclude=["*test*"]),

entry_points={'console_scripts': ['nbank = nbank.script:main'] },
entry_points={'console_scripts': ['nbank = nbank.script:main',
'nbank-migrate = nbank.migrate:main'] },

install_requires=["requests>2.18"],
test_suite='nose.collector'
Expand Down

0 comments on commit 2965a62

Please sign in to comment.