diff --git a/annotator/annotation.py b/annotator/annotation.py index c131aae..f256f42 100644 --- a/annotator/annotation.py +++ b/annotator/annotation.py @@ -54,6 +54,7 @@ def save(self, *args, **kwargs): super(Annotation, self).save(*args, **kwargs) + @classmethod def search_raw(cls, query=None, params=None, raw_result=False, user=None, authorization_enabled=None): diff --git a/annotator/oa_renderer.py b/annotator/oa_renderer.py new file mode 100644 index 0000000..9129f3f --- /dev/null +++ b/annotator/oa_renderer.py @@ -0,0 +1,161 @@ +import logging +log = logging.getLogger(__name__) + +try: + from collections import OrderedDict +except ImportError: + try: + from ordereddict import OrderedDict + except ImportError: + log.warn("No OrderedDict available, JSON-LD content will be unordered. " + "Use Python>=2.7 or install ordereddict module to fix.") + OrderedDict = dict + + +class OARenderer(object): + def __init__(self, jsonld_baserurl=None): + self.jsonld_baseurl = jsonld_baserurl + + def render(self, annotation): + """The JSON-LD formatted RDF representation of the annotation.""" + + context = [ + "http://www.w3.org/ns/oa-context-20130208.json", + {'annotator': 'http://annotatorjs.org/ns/'} + ] + + if self.jsonld_baseurl is not None: + context.append({'@base': self.jsonld_baseurl}) + + # Extract textual_bodies and tags + textual_bodies = get_textual_bodies(annotation) + tags = get_tags(annotation) + + # The JSON-LD spec recommends to put @context at the top of the + # document, so we'll be nice and use and ordered dictionary. + out = OrderedDict() + out['@context'] = context + out['@id'] = annotation['id'] + out['@type'] = 'oa:Annotation' + out['hasBody'] = has_body(textual_bodies, tags) + out['hasTarget'] = has_target(annotation) + out['annotatedBy'] = annotated_by(annotation) + out['annotatedAt'] = annotated_at(annotation) + out['serializedBy'] = serialized_by() + out['serializedAt'] = serialized_at(annotation) + out['motivatedBy'] = motivated_by(textual_bodies, tags) + return out + + +def has_body(textual_bodies, tags): + """Return all annotation bodies: the text comment and each tag""" + bodies = [] + bodies += textual_bodies + bodies += tags + return bodies + + +def get_textual_bodies(annotation): + """A list with a single text body or an empty list""" + if not annotation.get('text'): + # Note that we treat an empty text as not having text at all. + return [] + body = { + '@type': ['dctypes:Text', 'cnt:ContentAsText'], + 'dc:format': 'text/plain', + 'cnt:chars': annotation['text'], + } + return [body] + + +def get_tags(annotation): + """A list of oa:Tag items""" + if 'tags' not in annotation: + return [] + return [ + { + '@type': ['oa:Tag', 'cnt:ContentAsText'], + 'dc:format': 'text/plain', + 'cnt:chars': tag, + } + for tag in annotation['tags'] + ] + + +def motivated_by(textual_bodies, tags): + """Motivations for the annotation. + + Currently any combination of commenting and/or tagging. + """ + motivations = [] + if textual_bodies: + motivations.append('oa:commenting') + if tags: + motivations.append('oa:tagging') + return motivations + + +def has_target(annotation): + """The targets of the annotation. + + Returns a selector for each range of the page content that was + selected, or if a range is absent the url of the page itself. + """ + targets = [] + if 'uri' not in annotation: + return targets + if annotation.get('ranges'): + # Build the selector for each quote + for rangeSelector in annotation['ranges']: + selector = { + '@type': 'annotator:TextRangeSelector', + 'annotator:startContainer': rangeSelector['start'], + 'annotator:endContainer': rangeSelector['end'], + 'annotator:startOffset': rangeSelector['startOffset'], + 'annotator:endOffset': rangeSelector['endOffset'], + } + target = { + '@type': 'oa:SpecificResource', + 'hasSource': annotation['uri'], + 'hasSelector': selector, + } + targets.append(target) + else: + # The annotation targets the page as a whole + targets.append(annotation['uri']) + return targets + + +def annotated_by(annotation): + """The user that created the annotation.""" + if not annotation.get('user'): + return {} + return { + '@type': 'foaf:Agent', # It could be either a person or a bot + 'foaf:name': annotation['user'], + } + + +def annotated_at(annotation): + """The annotation's creation date""" + if annotation.get('created'): + return annotation['created'] + + +def serialized_by(): + """The software used for serializing.""" + return { + '@id': 'annotator:annotator-store', + '@type': 'prov:Software-agent', + 'foaf:name': 'annotator-store', + 'foaf:homepage': {'@id': 'http://annotatorjs.org'}, + } # todo: add version number + + +def serialized_at(annotation): + """The last time the serialization changed.""" + # Following the spec[1], we do not use the current time, but the last + # time the annotation graph has been updated. + # [1]: https://hypothes.is/a/R6uHQyVTQYqBc4-1V9X56Q + if annotation.get('updated'): + return annotation['updated'] diff --git a/tests/test_oa_renderer.py b/tests/test_oa_renderer.py new file mode 100644 index 0000000..ccbd041 --- /dev/null +++ b/tests/test_oa_renderer.py @@ -0,0 +1,238 @@ +import copy +from nose.tools import * + +from . import TestCase +from annotator.oa_renderer import OARenderer + +annotation = { + 'created': '2015-03-07T09:48:34.891753+00:00', + 'id': 'test-annotation-id-1', + 'ranges': [{ + 'type': 'RangeSelector', + 'startOffset': 0, + 'endOffset': 30, + 'end': '/div[1]/div[5]/div[1]/div[5]/div[1]/div[2]', + 'start': '/div[1]/div[5]/div[1]/div[5]/div[1]/div[1]' + }], + 'text': 'From childhood\'s hour I have not been' + 'As others were-I have not seen', + 'tags': ['Edgar Allan Poe', 'Alone', 'Poem'], + 'updated': '2015-03-07T09:49:34.891769+00:00', + 'uri': 'http://www.poetryfoundation.org/poem/175776', + 'user': 'nameless.raven' +} + +oa_rendered_annotation = { + '@context': [ + "http://www.w3.org/ns/oa-context-20130208.json", + {'annotator': 'http://annotatorjs.org/ns/'} + ], + '@id': annotation['id'], + '@type': 'oa:Annotation', + 'hasBody': [ + { + '@type': ['dctypes:Text', 'cnt:ContentAsText'], + 'dc:format': 'text/plain', + 'cnt:chars': annotation['text'] + }, + { + '@type': ['oa:Tag', 'cnt:ContentAsText'], + 'dc:format': 'text/plain', + 'cnt:chars': annotation['tags'][0] + }, + { + '@type': ['oa:Tag', 'cnt:ContentAsText'], + 'dc:format': 'text/plain', + 'cnt:chars': annotation['tags'][1] + }, + { + '@type': ['oa:Tag', 'cnt:ContentAsText'], + 'dc:format': 'text/plain', + 'cnt:chars': annotation['tags'][2] + } + ], + 'hasTarget': [ + { + '@type': 'oa:SpecificResource', + 'hasSource': annotation['uri'], + 'hasSelector': { + '@type': 'annotator:TextRangeSelector', + 'annotator:startContainer': annotation['ranges'][0]['start'], + 'annotator:endContainer': annotation['ranges'][0]['end'], + 'annotator:startOffset': annotation['ranges'][0]['startOffset'], + 'annotator:endOffset': annotation['ranges'][0]['endOffset'] + } + } + ], + 'annotatedBy': { + '@type': 'foaf:Agent', + 'foaf:name': annotation['user'] + }, + 'annotatedAt': annotation['created'], + 'serializedBy': { + '@id': 'annotator:annotator-store', + '@type': 'prov:Software-agent', + 'foaf:name': 'annotator-store', + 'foaf:homepage': {'@id': 'http://annotatorjs.org'}, + }, + 'serializedAt': annotation['updated'], + 'motivatedBy': ['oa:commenting', 'oa:tagging'] +} + + +class TestOARenderer(TestCase): + def setup(self): + super(TestOARenderer, self).setup() + self.renderer = OARenderer() + + def teardown(self): + super(TestOARenderer, self).teardown() + + def test_context_without_jsonld_baseurl(self): + rendered = self.renderer.render(annotation) + + assert '@context' in rendered + context = rendered['@context'] + exp_context = oa_rendered_annotation['@context'] + assert len(context) is 2 + assert context[0] == exp_context[0] + assert context[1] == exp_context[1] + + def test_context_with_jsonld_baseurl(self): + jsonld_baseurl = 'http://jsonld_baseurl.com' + renderer = OARenderer(jsonld_baseurl) + rendered = renderer.render(annotation) + + assert '@context' in rendered + context = rendered['@context'] + assert len(context) is 3 + assert '@base' in context[2] + assert context[2]['@base'] == jsonld_baseurl + + def test_id(self): + rendered = self.renderer.render(annotation) + assert '@id' in rendered + assert rendered['@id'] == oa_rendered_annotation['@id'] + + def test_type(self): + rendered = self.renderer.render(annotation) + assert '@type' in rendered + assert rendered['@type'] == oa_rendered_annotation['@type'] + + def test_has_body(self): + rendered = self.renderer.render(annotation) + + assert 'hasBody' in rendered + hasBody = rendered['hasBody'] + assert len(hasBody) is 4 + + assert hasBody[0] == oa_rendered_annotation['hasBody'][0] + assert hasBody[1] == oa_rendered_annotation['hasBody'][1] + assert hasBody[2] == oa_rendered_annotation['hasBody'][2] + assert hasBody[3] == oa_rendered_annotation['hasBody'][3] + + assert 'motivatedBy' in rendered + assert len(rendered['motivatedBy']) is 2 + assert rendered['motivatedBy'][0] == 'oa:commenting' + assert rendered['motivatedBy'][1] == 'oa:tagging' + + def test_has_body_without_tags(self): + copied_annotation = copy.deepcopy(annotation) + del copied_annotation['tags'] + rendered = self.renderer.render(copied_annotation) + + assert 'hasBody' in rendered + hasBody = rendered['hasBody'] + assert len(hasBody) is 1 + assert hasBody[0] == oa_rendered_annotation['hasBody'][0] + + assert 'motivatedBy' in rendered + assert len(rendered['motivatedBy']) is 1 + assert rendered['motivatedBy'][0] == 'oa:commenting' + + def test_has_body_without_text(self): + copied_annotation = copy.deepcopy(annotation) + del copied_annotation['text'] + rendered = self.renderer.render(copied_annotation) + + assert 'hasBody' in rendered + hasBody = rendered['hasBody'] + assert len(hasBody) is 3 + assert hasBody[0] == oa_rendered_annotation['hasBody'][1] + assert hasBody[1] == oa_rendered_annotation['hasBody'][2] + assert hasBody[2] == oa_rendered_annotation['hasBody'][3] + + assert 'motivatedBy' in rendered + assert len(rendered['motivatedBy']) is 1 + assert rendered['motivatedBy'][0] == 'oa:tagging' + + def test_has_body_empty(self): + copied_annotation = copy.deepcopy(annotation) + del copied_annotation['text'] + del copied_annotation['tags'] + rendered = self.renderer.render(copied_annotation) + + assert 'hasBody' in rendered + hasBody = rendered['hasBody'] + assert len(hasBody) is 0 + + assert 'motivatedBy' in rendered + assert len(rendered['motivatedBy']) is 0 + + def test_has_target(self): + rendered = self.renderer.render(annotation) + + assert 'hasTarget' in rendered + hasTarget = rendered['hasTarget'] + assert len(hasTarget) is 1 + assert hasTarget[0] == oa_rendered_annotation['hasTarget'][0] + + assert 'hasSelector' in hasTarget[0] + hasSelector = hasTarget[0]['hasSelector'] + oa_selector = oa_rendered_annotation['hasTarget'][0]['hasSelector'] + assert hasSelector == oa_selector + + def test_has_target_without_ranges(self): + copied_annotation = copy.deepcopy(annotation) + del copied_annotation['ranges'] + rendered = self.renderer.render(copied_annotation) + + assert 'hasTarget' in rendered + hasTarget = rendered['hasTarget'] + assert len(hasTarget) is 1 + assert hasTarget[0] == annotation['uri'] + + def test_has_target_without_uri(self): + copied_annotation = copy.deepcopy(annotation) + del copied_annotation['uri'] + rendered = self.renderer.render(copied_annotation) + + assert 'hasTarget' in rendered + hasTarget = rendered['hasTarget'] + assert len(hasTarget) is 0 + + def test_annotated_by(self): + rendered = self.renderer.render(annotation) + + assert 'annotatedBy' in rendered + assert rendered['annotatedBy'] == oa_rendered_annotation['annotatedBy'] + + def test_annotated_by_without_user(self): + copied_annotation = copy.deepcopy(annotation) + del copied_annotation['user'] + rendered = self.renderer.render(copied_annotation) + + assert 'annotatedBy' in rendered + assert rendered['annotatedBy'] == {} + + def test_annotated_at(self): + rendered = self.renderer.render(annotation) + + assert 'annotatedAt' in rendered + assert rendered['annotatedAt'] == oa_rendered_annotation['annotatedAt'] + + def test_serialized_at(self): + rendered = self.renderer.render(annotation) + + assert 'serializedAt' in rendered + assert rendered['serializedAt'] == oa_rendered_annotation['serializedAt']