diff --git a/docs/aliases.md b/docs/aliases.md new file mode 100644 index 00000000..90d414ef --- /dev/null +++ b/docs/aliases.md @@ -0,0 +1,18 @@ +# Aliases for Indexd records + +The alias feature in `indexd` allows a client to associate an alias with a document and then retrieve the document by that alias. + +There are currently two implementations of an alias system in the codebase, one of which is deprecated. The original alias system (the `/alias` endpoint) was deprecated +because it aliases records by hash instead of by GUID (See https://github.com/uc-cdis/indexd/issues/173). It was replaced by the new alias system (the `/index/{GUID}/aliases` endpoint) +in 11/2019. + +## How the current alias system works (`/index/{GUID}/alias` endpoint) + +The current alias system allows the client to associate an alias (a text string) +with a document's GUID. (In the indexd codebase, GUIDs are also referred to as `did`s.) An alias cannot be associated with more than one GUID. Once a client has associated an alias with a record, the record can be retrieved by the alias on the root endpoint (`/{alias}`). + +**Aliases do not carry over to new versions of a resource**. When a new version of a resource is created with `POST /index/{GUID}`, the new version has a different GUID than +the old version. Aliases are associated with GUIDs, and the old version's aliases do not carry over to the new version's GUID. It is the client's responsibility to migrate aliases +to new versions of a resource if this is the behavior they want. + +> NOTE: The current alias system is implemented in `indexd/index/blueprint.py` and uses the `index_record_alias` table. Confusingly, the current alias system is **not** implemented in `/indexd/alias` and does **not** use the `alias_record` table -- these are from the deprecated original alias system. \ No newline at end of file diff --git a/indexd/dos/blueprint.py b/indexd/dos/blueprint.py index 90d74320..0f79b930 100644 --- a/indexd/dos/blueprint.py +++ b/indexd/dos/blueprint.py @@ -23,7 +23,8 @@ def get_dos_record(record): """ try: ret = blueprint.index_driver.get(record) - ret["alias"] = blueprint.index_driver.get_aliases_for_did(record) + # record may be a baseID or a DID / GUID. If record is a baseID, ret["did"] is the latest GUID for that record. + ret["alias"] = blueprint.index_driver.get_aliases_for_did(ret["did"]) except IndexNoRecordFound: try: ret = blueprint.index_driver.get_by_alias(record) diff --git a/indexd/index/blueprint.py b/indexd/index/blueprint.py index c8944f3a..7edee9de 100644 --- a/indexd/index/blueprint.py +++ b/indexd/index/blueprint.py @@ -13,12 +13,17 @@ from .schema import PUT_RECORD_SCHEMA from .schema import POST_RECORD_SCHEMA +from .schema import RECORD_ALIAS_SCHEMA from .errors import NoRecordFound from .errors import MultipleRecordsFound from .errors import RevisionMismatch from .errors import UnhealthyCheck +from cdislogging import get_logger + +logger = get_logger("indexd/index blueprint", log_level="info") + blueprint = flask.Blueprint("index", __name__) blueprint.config = dict() @@ -375,6 +380,82 @@ def add_index_record_version(record): return flask.jsonify(ret), 200 +@blueprint.route("/index//aliases", methods=["GET"]) +def get_aliases(record): + """ + Get all aliases associated with this DID / GUID + """ + # error handling done in driver + aliases = blueprint.index_driver.get_aliases_for_did(record) + + aliases_payload = {"aliases": [{"value": alias} for alias in aliases]} + return flask.jsonify(aliases_payload), 200 + + +@blueprint.route("/index//aliases", methods=["POST"]) +def append_aliases(record): + """ + Append one or more aliases to aliases already associated with this + DID / GUID, if any. + """ + # we set force=True so that if MIME type of request is not application/JSON, + # get_json will still throw a UserError. + aliases_json = flask.request.get_json(force=True) + try: + jsonschema.validate(aliases_json, RECORD_ALIAS_SCHEMA) + except jsonschema.ValidationError as err: + logger.warn(f"Bad request body:\n{err}") + raise UserError(err) + + aliases = [record["value"] for record in aliases_json["aliases"]] + + # authorization and error handling done in driver + blueprint.index_driver.append_aliases_for_did(aliases, record) + + aliases = blueprint.index_driver.get_aliases_for_did(record) + aliases_payload = {"aliases": [{"value": alias} for alias in aliases]} + return flask.jsonify(aliases_payload), 200 + + +@blueprint.route("/index//aliases", methods=["PUT"]) +def replace_aliases(record): + """ + Replace all aliases associated with this DID / GUID + """ + # we set force=True so that if MIME type of request is not application/JSON, + # get_json will still throw a UserError. + aliases_json = flask.request.get_json(force=True) + try: + jsonschema.validate(aliases_json, RECORD_ALIAS_SCHEMA) + except jsonschema.ValidationError as err: + logger.warn(f"Bad request body:\n{err}") + raise UserError(err) + + aliases = [record["value"] for record in aliases_json["aliases"]] + + # authorization and error handling done in driver + blueprint.index_driver.replace_aliases_for_did(aliases, record) + + aliases_payload = {"aliases": [{"value": alias} for alias in aliases]} + return flask.jsonify(aliases_payload), 200 + + +@blueprint.route("/index//aliases", methods=["DELETE"]) +def delete_all_aliases(record): + # authorization and error handling done in driver + blueprint.index_driver.delete_all_aliases_for_did(record) + + return flask.jsonify("Aliases deleted successfully"), 200 + + +@blueprint.route("/index//aliases/", methods=["DELETE"]) +def delete_one_alias(record, alias): + # authorization and error handling done in driver + blueprint.index_driver.delete_one_alias_for_did(alias, record) + + return flask.jsonify("Aliases deleted successfully"), 200 + + @blueprint.route("/index//versions", methods=["GET"]) def get_all_index_record_versions(record): """ diff --git a/indexd/index/drivers/alchemy.py b/indexd/index/drivers/alchemy.py index 31d99afa..83e64c51 100644 --- a/indexd/index/drivers/alchemy.py +++ b/indexd/index/drivers/alchemy.py @@ -23,7 +23,7 @@ from sqlalchemy.orm.exc import MultipleResultsFound, NoResultFound from indexd import auth -from indexd.errors import UserError +from indexd.errors import UserError, AuthError from indexd.index.driver import IndexDriverABC from indexd.index.errors import ( MultipleRecordsFound, @@ -145,7 +145,7 @@ class IndexRecordAlias(Base): __tablename__ = "index_record_alias" did = Column(String, ForeignKey("index_record.did"), primary_key=True) - name = Column(String, primary_key=True) + name = Column(String, primary_key=True, unique=True) __table_args__ = ( Index("index_record_alias_idx", "did"), @@ -256,6 +256,14 @@ def create_urls_metadata(urls_metadata, record, session): session.add(IndexRecordUrlMetadata(url=url, key=k, value=v, did=record.did)) +def get_record_if_exists(did, session): + """ + Searches for a record with this did and returns it. + If no record found, returns None. + """ + return session.query(IndexRecord).filter(IndexRecord.did == did).first() + + class SQLAlchemyIndexDriver(IndexDriverABC): """ SQLAlchemy implementation of index driver. @@ -764,9 +772,167 @@ def get_aliases_for_did(self, did): Gets the aliases for a did """ with self.session as session: + self.logger.info(f"Trying to get all aliases for did {did}...") + + index_record = get_record_if_exists(did, session) + if index_record is None: + self.logger.warn(f"No record found for did {did}") + raise NoRecordFound(did) + query = session.query(IndexRecordAlias).filter(IndexRecordAlias.did == did) return [i.name for i in query] + def append_aliases_for_did(self, aliases, did): + """ + Append one or more aliases to aliases already associated with one DID / GUID. + """ + with self.session as session: + self.logger.info( + f"Trying to append new aliases {aliases} to aliases for did {did}..." + ) + + index_record = get_record_if_exists(did, session) + if index_record is None: + self.logger.warn(f"No record found for did {did}") + raise NoRecordFound(did) + + # authorization + try: + resources = [u.resource for u in index_record.authz] + auth.authorize("update", resources) + except AuthError as err: + self.logger.warn( + f"Auth error while appending aliases to did {did}: User not authorized to update one or more of these resources: {resources}" + ) + raise err + + # add new aliases + index_record_aliases = [ + IndexRecordAlias(did=did, name=alias) for alias in aliases + ] + try: + session.add_all(index_record_aliases) + session.commit() + except IntegrityError as err: + # One or more aliases in request were non-unique + self.logger.warn( + f"One or more aliases in request already associated with this or another GUID: {aliases}" + ) + raise UserError( + f"One or more aliases in request already associated with this or another GUID: {aliases}" + ) + + def replace_aliases_for_did(self, aliases, did): + """ + Replace all aliases for one DID / GUID with new aliases. + """ + with self.session as session: + self.logger.info( + f"Trying to replace aliases for did {did} with new aliases {aliases}..." + ) + + index_record = get_record_if_exists(did, session) + if index_record is None: + self.logger.warn(f"No record found for did {did}") + raise NoRecordFound(did) + + # authorization + try: + resources = [u.resource for u in index_record.authz] + auth.authorize("update", resources) + except AuthError as err: + self.logger.warn( + f"Auth error while replacing aliases for did {did}: User not authorized to update one or more of these resources: {resources}" + ) + raise err + + try: + # delete this GUID's aliases + session.query(IndexRecordAlias).filter( + IndexRecordAlias.did == did + ).delete(synchronize_session="evaluate") + # add new aliases + index_record_aliases = [ + IndexRecordAlias(did=did, name=alias) for alias in aliases + ] + session.add_all(index_record_aliases) + session.commit() + self.logger.info( + f"Replaced aliases for did {did} with new aliases {aliases}" + ) + except IntegrityError: + # One or more aliases in request were non-unique + self.logger.warn( + f"One or more aliases in request already associated with another GUID: {aliases}" + ) + raise UserError( + f"One or more aliases in request already associated with another GUID: {aliases}" + ) + + def delete_all_aliases_for_did(self, did): + """ + Delete all of this DID / GUID's aliases. + """ + with self.session as session: + self.logger.info(f"Trying to delete all aliases for did {did}...") + + index_record = get_record_if_exists(did, session) + if index_record is None: + self.logger.warn(f"No record found for did {did}") + raise NoRecordFound(did) + + # authorization + try: + resources = [u.resource for u in index_record.authz] + auth.authorize("delete", resources) + except AuthError as err: + self.logger.warn( + f"Auth error while deleting all aliases for did {did}: User not authorized to delete one or more of these resources: {resources}" + ) + raise err + + # delete all aliases + session.query(IndexRecordAlias).filter(IndexRecordAlias.did == did).delete( + synchronize_session="evaluate" + ) + + self.logger.info(f"Deleted all aliases for did {did}.") + + def delete_one_alias_for_did(self, alias, did): + """ + Delete one of this DID / GUID's aliases. + """ + with self.session as session: + self.logger.info(f"Trying to delete alias {alias} for did {did}...") + + index_record = get_record_if_exists(did, session) + if index_record is None: + self.logger.warn(f"No record found for did {did}") + raise NoRecordFound(did) + + # authorization + try: + resources = [u.resource for u in index_record.authz] + auth.authorize("delete", resources) + except AuthError as err: + self.logger.warn( + f"Auth error deleting alias {alias} for did {did}: User not authorized to delete one or more of these resources: {resources}" + ) + raise err + + # delete just this alias + num_rows_deleted = ( + session.query(IndexRecordAlias) + .filter(IndexRecordAlias.did == did, IndexRecordAlias.name == alias) + .delete(synchronize_session="evaluate") + ) + + if num_rows_deleted == 0: + self.logger.warn(f"No alias {alias} found for did {did}") + raise NoRecordFound(alias) + + self.logger.info(f"Deleted alias {alias} for did {did}.") + def get(self, did): """ Gets a record given the record id or baseid. @@ -1255,6 +1421,12 @@ def migrate_12(session, **kwargs): ) +def migrate_13(session, **kwargs): + session.execute( + "ALTER TABLE {} ADD UNIQUE ( name )".format(IndexRecordAlias.__tablename__) + ) + + # ordered schema migration functions that the index should correspond to # CURRENT_SCHEMA_VERSION - 1 when it's written SCHEMA_MIGRATION_FUNCTIONS = [ @@ -1270,5 +1442,6 @@ def migrate_12(session, **kwargs): migrate_10, migrate_11, migrate_12, + migrate_13, ] CURRENT_SCHEMA_VERSION = len(SCHEMA_MIGRATION_FUNCTIONS) diff --git a/indexd/index/schema.py b/indexd/index/schema.py index b86cd9ed..d4072634 100644 --- a/indexd/index/schema.py +++ b/indexd/index/schema.py @@ -84,3 +84,16 @@ "urls_metadata": {"type": "object"}, }, } + +RECORD_ALIAS_SCHEMA = { + "$schema": "http://json-schema.org/schema#", + "type": "object", + "additionalProperties": False, + "description": "Aliases that can be used in place of an Index record's DID", + "properties": { + "aliases": { + "type": "array", + "items": {"type": "object", "properties": {"value": {"type": "string"}}}, + } + }, +} diff --git a/openapis/swagger.yaml b/openapis/swagger.yaml index e0f1b9f3..79f76b62 100644 --- a/openapis/swagger.yaml +++ b/openapis/swagger.yaml @@ -388,6 +388,155 @@ paths: description: Record is deleted successfully security: - basic_auth: [] + '/index/{GUID}/aliases': + get: + tags: + - index + summary: Get all aliases associated with this GUID + operationId: getAliases + produces: + - application/json + parameters: + - name: GUID + in: path + description: The GUID to query. + required: true + type: string + responses: + '200': + description: Successful operation. Returns 0 or more aliases associated with this GUID. + schema: + $ref: '#/definitions/AliasInfo' + '404': + description: GUID not found. + schema: + $ref: '#/definitions/ErrorResponse' + security: [] + post: + tags: + - index + summary: Append new aliases to this GUID + description: Adds new aliases to the aliases already associated with this GUID. The new aliases must be globally unique. + operationId: addNewAliases + produces: + - application/json + parameters: + - name: GUID + in: path + description: The GUID to associate the new aliases with. + required: true + type: string + - in: body + name: body + description: The new aliases to associate with this GUID. The new aliases must be globally unique. + required: true + schema: + $ref: '#/definitions/AliasInfo' + responses: + '200': + description: successful operation + schema: + $ref: '#/definitions/AliasInfo' + '400': + description: 'Invalid request: One or more aliases are non-unique: one or more of the aliases are already associated with this or another GUID.' + schema: + $ref: '#/definitions/ErrorResponse' + '404': + description: GUID not found. + schema: + $ref: '#/definitions/ErrorResponse' + security: + - basic_auth: [] + put: + tags: + - index + summary: Replace all aliases for this GUID + description: Replaces all aliases associated with this GUID with new aliases. The new aliases can be the same as the aliases already associated with this GUID, but must otherwise be globally unique. + operationId: updateAliases + consumes: + - application/json + produces: + - application/json + parameters: + - name: GUID + in: path + description: The GUID to associate the new aliases with. + required: true + type: string + - in: body + name: body + description: The new aliases to associate with this GUID. The new aliases can be the same as the aliases already associated with this GUID, but must otherwise be globally unique. + required: true + schema: + $ref: '#/definitions/AliasInfo' + responses: + '200': + description: successful operation + schema: + $ref: '#/definitions/AliasInfo' + '400': + description: 'Invalid request: One or more aliases are non-unique: one or more of the aliases are already associated with another GUID.' + schema: + $ref: '#/definitions/ErrorResponse' + '404': + description: GUID not found. + schema: + $ref: '#/definitions/ErrorResponse' + security: + - basic_auth: [] + delete: + tags: + - index + summary: Delete all aliases for this GUID + description: '' + operationId: deleteAllAliases + produces: + - application/json + parameters: + - name: GUID + in: path + description: The GUID to delete these aliases from. + required: true + type: string + responses: + '200': + description: Aliases were deleted successfully. + '404': + description: 'GUID not found. No aliases were deleted.' + schema: + $ref: '#/definitions/ErrorResponse' + security: + - basic_auth: [] + '/index/{GUID}/aliases/{ALIAS}': + delete: + tags: + - index + summary: Delete one alias for this GUID + description: '' + operationId: deleteOneAlias + produces: + - application/json + parameters: + - name: GUID + in: path + description: The GUID to delete this alias from. + required: true + type: string + - name: ALIAS + in: path + description: The alias to delete from this GUID. The alias should be URL-encoded. + required: true + type: string + responses: + '200': + description: Alias was deleted successfully. + '404': + description: 'GUID not found. No aliases were deleted.' + schema: + $ref: '#/definitions/ErrorResponse' + security: + - basic_auth: [] + '/index/{GUID}/latest': get: tags: @@ -441,6 +590,7 @@ paths: security: [] '/alias/': get: + deprecated: true tags: - alias summary: list aliases with pagination @@ -497,6 +647,7 @@ paths: $ref: '#/definitions/HashInfo' '/alias/{ALIASSTRING}': put: + deprecated: true tags: - alias summary: Create or update an alias @@ -534,6 +685,7 @@ paths: security: - basic_auth: [] get: + deprecated: true tags: - alias summary: Fetch an alias @@ -558,6 +710,7 @@ paths: description: Invalid input security: [] delete: + deprecated: true tags: - alias summary: Delete an alias @@ -864,6 +1017,17 @@ securityDefinitions: basic_auth: type: basic definitions: + AliasInfo: + type: object + properties: + aliases: + type: array + items: + type: object + properties: + value: + type: string + description: The identifier that acts as an alias for this GUID. HashInfo: type: object properties: diff --git a/tests/test_aliases_endpoints.py b/tests/test_aliases_endpoints.py new file mode 100644 index 00000000..cf318f24 --- /dev/null +++ b/tests/test_aliases_endpoints.py @@ -0,0 +1,650 @@ +import pytest +import string +import json +import urllib.parse + +from indexd import get_app + +# Test fixtures and helper functions +# ============================================= +def url_encode(str): + return urllib.parse.quote(str, safe="") + + +def get_endpoint(guid): + return "/index/{}/aliases".format(guid) + + +def to_payload(aliases): + """ + Boxes a list of aliases into a JSON payload object expected + by the server. + """ + return {"aliases": [{"value": alias} for alias in aliases]} + + +def payload_to_list(alias_payload): + """ + Unboxes a JSON payload object expected by the server into + a list of alias names. + """ + return [record["value"] for record in alias_payload["aliases"]] + + +def create_record(client, user): + """ + Creates a record in indexd and returns that record's GUID. + """ + document = { + "form": "object", + "size": 123, + "urls": ["s3://endpointurl/bucket/key"], + "hashes": {"md5": "8b9942cf415384b27cadf1f4d2d682e5"}, + "metadata": {"project_id": "bpa-UChicago"}, + } + res = client.post("/index/", json=document, headers=user) + assert res.status_code == 200 + # The GUID is the "did" (Document IDentifier) returned from a successful + # POST request. + guid = res.get_json()["did"] + + return guid + + +@pytest.fixture(scope="function") +def guid(client, user): + """ + Creates a record in indexd and returns that record's GUID. + """ + return create_record(client, user) + + +@pytest.fixture(scope="function") +def aliases(client, user, guid): + """ + Associates aliases with a GUID in indexd and returns the new aliases. + """ + aliases = [ + ".?=G}k@Up3LIlv+p96yaI06,t@?j=ejk[%+", + 'Fa"uW< A"/\'hELmTjH%r%6@Tp^HaB^', + "{j'8D6d5fc]5#[*9n%|G9\"hZ?z3:wX", + '"U7XsT+EXD|1?@$ywDV"ce