diff --git a/annotator/annotation.py b/annotator/annotation.py index 31e09af..ab004bf 100644 --- a/annotator/annotation.py +++ b/annotator/annotation.py @@ -64,6 +64,7 @@ def save(self, *args, **kwargs): super(Annotation, self).save(*args, **kwargs) + @classmethod def search_raw(cls, query=None, params=None, user=None, authorization_enabled=None, **kwargs): diff --git a/annotator/openannotation.py b/annotator/openannotation.py new file mode 100644 index 0000000..41dfe52 --- /dev/null +++ b/annotator/openannotation.py @@ -0,0 +1,166 @@ +import logging +log = logging.getLogger(__name__) + +# Import OrderedDict if available +try: + from collections import OrderedDict +except ImportError: + try: + from ordereddict import OrderedDict + except ImportError: + log.warn("No OrderedDict available, JSON-LD content will be unordered. " + "Use Python>=2.7 or install ordereddict module to fix.") + OrderedDict = dict + +from annotator.annotation import Annotation + +class OAAnnotation(Annotation): + """A helper class to represent an annotation according to the Open + Annotation Data Model: http://www.openannotation.org/spec/core/core.html + + Currently it only generates JSON-LD. + """ + + # The ID of the annotation will be relative to the base URL, if it is set. + jsonld_baseurl = None + + @property + def jsonld(self): + """The JSON-LD formatted RDF representation of the annotation.""" + + context = [ + "http://www.w3.org/ns/oa-context-20130208.json", + {'annotator': 'http://annotatorjs.org/ns/'} + ] + + if self.jsonld_baseurl is not None: + context.append({'@base': self.jsonld_baseurl}) + + # The JSON-LD spec recommends to put @context at the top of the + # document, so we'll be nice and use and ordered dictionary. + annotation = OrderedDict() + annotation['@context'] = context + annotation['@id'] = self['id'] + annotation['@type'] = 'oa:Annotation' + annotation['hasBody'] = self.has_body + annotation['hasTarget'] = self.has_target + annotation['annotatedBy'] = self.annotated_by + annotation['annotatedAt'] = self.annotated_at + annotation['serializedBy'] = self.serialized_by + annotation['serializedAt'] = self.serialized_at + annotation['motivatedBy'] = self.motivated_by + return annotation + + @property + def has_body(self): + """Return all annotation bodies: the text comment and each tag""" + bodies = [] + bodies += self.textual_bodies + bodies += self.tags + return bodies + + @property + def textual_bodies(self): + """A list with a single text body or an empty list""" + if not self.get('text'): + # Note that we treat an empty text as not having text at all. + return [] + body = { + '@type': ['dctypes:Text', 'cnt:ContentAsText'], + 'dc:format': 'text/plain', + 'cnt:chars': self['text'], + } + return [body] + + @property + def tags(self): + """A list of oa:Tag items""" + if not 'tags' in self: + return [] + return [ + { + '@type': ['oa:Tag', 'cnt:ContentAsText'], + 'dc:format': 'text/plain', + 'cnt:chars': tag, + } + for tag in self['tags'] + ] + + @property + def motivated_by(self): + """Motivations for the annotation. + + Currently any combination of commenting and/or tagging. + """ + motivations = [] + if self.textual_bodies: + motivations.append('oa:commenting') + if self.tags: + motivations.append('oa:tagging') + return motivations + + @property + def has_target(self): + """The targets of the annotation. + + Returns a selector for each range of the page content that was + selected, or if a range is absent the url of the page itself. + """ + targets = [] + if not 'uri' in self: + return targets + if self.get('ranges'): + # Build the selector for each quote + for rangeSelector in self['ranges']: + selector = { + '@type': 'annotator:TextRangeSelector', + 'annotator:startContainer': rangeSelector['start'], + 'annotator:endContainer': rangeSelector['end'], + 'annotator:startOffset': rangeSelector['startOffset'], + 'annotator:endOffset': rangeSelector['endOffset'], + } + target = { + '@type': 'oa:SpecificResource', + 'hasSource': self['uri'], + 'hasSelector': selector, + } + targets.append(target) + else: + # The annotation targets the page as a whole + targets.append(self['uri']) + return targets + + @property + def annotated_by(self): + """The user that created the annotation.""" + if not self.get('user'): + return [] + return { + '@type': 'foaf:Agent', # It could be either a person or a bot + 'foaf:name': self['user'], + } + + @property + def annotated_at(self): + """The annotation's creation date""" + if self.get('created'): + return self['created'] + + @property + def serialized_by(self): + """The software used for serializing.""" + return { + '@id': 'annotator:annotator-store', + '@type': 'prov:Software-agent', + 'foaf:name': 'annotator-store', + 'foaf:homepage': {'@id': 'http://annotatorjs.org'}, + } # todo: add version number + + @property + def serialized_at(self): + """The last time the serialization changed.""" + # Following the spec[1], we do not use the current time, but the last + # time the annotation graph has been updated. + # [1]: https://hypothes.is/a/R6uHQyVTQYqBc4-1V9X56Q + if self.get('updated'): + return self['updated'] diff --git a/annotator/store.py b/annotator/store.py index 8bcb938..9d48e6b 100644 --- a/annotator/store.py +++ b/annotator/store.py @@ -1,16 +1,20 @@ """ -This module implements a Flask-based JSON API to talk with the annotation store via the -Annotation model. +This module implements a Flask-based JSON API to talk with the annotation store +via the Annotation model. It defines these routes: * Root - * Index + * Index (OA) * Create - * Read + * Read (OA) * Update * Delete - * Search + * Search (OA) * Raw ElasticSearch search See their descriptions in `root`'s definition for more detail. + +Routes marked with OA (the read-only endpoints) will render the annotations in +JSON-LD following the Open Annotation Data Model if the user agent prefers this +(by accepting application/ld+json). """ from __future__ import absolute_import @@ -25,6 +29,7 @@ from annotator.atoi import atoi from annotator.annotation import Annotation +from annotator.openannotation import OAAnnotation store = Blueprint('store', __name__) @@ -39,6 +44,31 @@ def jsonify(obj, *args, **kwargs): return Response(res, mimetype='application/json', *args, **kwargs) +""" +Define renderers that can be used for presenting the annotation. Note that we +currently only use JSON-based types. The renderer returns not a string but a +jsonifiable object. +""" +def render_jsonld(annotation): + """Returns a JSON-LD RDF representation of the annotation""" + oa_annotation = OAAnnotation(annotation) + oa_annotation.jsonld_baseurl = url_for('.read_annotation', + id='', _external=True) + return oa_annotation.jsonld + +renderers = { + 'application/ld+json': render_jsonld, + 'application/json': lambda annotation: annotation, +} +types_by_preference = ['application/json', 'application/ld+json'] + +def render(annotation, content_type=None): + """Return the annotation in the given or negotiated content_type""" + if content_type is None: + content_type = preferred_content_type(types_by_preference) + return renderers[content_type](annotation) + + @store.before_request def before_request(): if not hasattr(g, 'annotation_class'): @@ -144,7 +174,9 @@ def index(): user = None annotations = g.annotation_class.search(user=user) - return jsonify(annotations) + + return jsonify(list(map(render, annotations))) + # CREATE @store.route('/annotations', methods=['POST']) @@ -190,7 +222,8 @@ def read_annotation(id): if failure: return failure - return jsonify(annotation) + + return jsonify(render(annotation)) # UPDATE @@ -282,7 +315,7 @@ def search_annotations(): total = g.annotation_class.count(**kwargs) return jsonify({'total': total, - 'rows': results}) + 'rows': list(map(render, results))}) # RAW ES SEARCH @@ -418,3 +451,18 @@ def _update_query_raw(qo, params, k, v): elif k == 'search_type': params[k] = v + +def preferred_content_type(possible_types): + """Tells which content (MIME) type is preferred by the user agent. + + In case of ties (or absence of an Accept header) items earlier in the + sequence are chosen. + + Arguments: + possible_types -- Sequence of content types, in order of preference. + """ + default = possible_types[0] + best_type = request.accept_mimetypes.best_match( + possible_types, + default) + return best_type diff --git a/tests/test_openannotation.py b/tests/test_openannotation.py new file mode 100644 index 0000000..2bd749f --- /dev/null +++ b/tests/test_openannotation.py @@ -0,0 +1,93 @@ +import re + +from annotator.annotation import Annotation +from annotator.openannotation import OAAnnotation +from annotator.elasticsearch import _add_created, _add_updated + +class TestOpenAnnotation(object): + + def _make_annotation(self): + annotation_fields = { + 'id': '1234', + 'text': 'blablabla', + 'uri': 'http://localhost:4000/dev.html', + 'ranges': [ + { + 'start': '/ul[1]/li[1]', + 'end': '/ul[1]/li[1]', + 'startOffset': 0, + 'endOffset': 26 + } + ], + 'user': 'alice', + 'quote': 'Lorem ipsum dolor sit amet', + 'consumer': 'mockconsumer', + 'permissions': { + 'read': [], + 'admin': [], + 'update': [], + 'delete': [] + } + } + annotation = OAAnnotation(annotation_fields) + _add_created(annotation) + _add_updated(annotation) + return annotation + + def test_basics(self): + ann = self._make_annotation() + + # Get the JSON-LD (as a dictionary) + ann_ld = ann.jsonld + + # Check the values of some basic fields + ldid = ann_ld['@id'] + assert ldid == '1234', "Incorrect annotation @id: {0}!={1}".format(ldid, id) + assert ann_ld['@type'] == 'oa:Annotation' + assert ann_ld['hasBody'] == [{ + "cnt:chars": "blablabla", + "@type": [ + "dctypes:Text", + "cnt:ContentAsText" + ], + "dc:format": "text/plain" + }], "Incorrect hasBody: {0}".format(ann_ld['hasBody']) + + assert ann_ld['hasTarget'] == [{ + "hasSource": "http://localhost:4000/dev.html", + "hasSelector": { + "annotator:endContainer": "/ul[1]/li[1]", + "annotator:startOffset": 0, + "annotator:startContainer": "/ul[1]/li[1]", + "@type": "annotator:TextRangeSelector", + "annotator:endOffset": 26 + }, + "@type": "oa:SpecificResource" + }], "Incorrect hasTarget: {0}".format(ann_ld['hasBody']) + + assert ann_ld['annotatedBy'] == { + '@type': 'foaf:Agent', + 'foaf:name': 'alice', + }, "Incorrect annotatedBy: {0}".format(ann_ld['annotatedBy']) + + date_str = "nnnn-nn-nnTnn:nn:nn(\.nnnnnn)?([+-]nn.nn|Z)" + date_regex = re.compile(date_str.replace("n","\d")) + assert date_regex.match(ann_ld['annotatedAt']), "Incorrect annotatedAt: {0}".format(ann_ld['annotatedAt']) + assert date_regex.match(ann_ld['serializedAt']), "Incorrect createdAt: {0}".format(ann_ld['annotatedAt']) + + +def assemble_context(context_value): + if isinstance(context_value, dict): + return context_value + elif isinstance(context_value, list): + # Merge all context parts + context = {} + for context_piece in context_value: + if isinstance(context_piece, dict): + context.update(context_piece) + return context + elif isinstance(context, str): + # XXX: we do not retrieve an externally defined context + raise NotImplementedError + else: + raise AssertionError("@context should be dict, list, or str") diff --git a/tests/test_store.py b/tests/test_store.py index a541ff1..a47db9d 100644 --- a/tests/test_store.py +++ b/tests/test_store.py @@ -1,7 +1,9 @@ from . import TestCase from .helpers import MockUser +import functools from nose.tools import * from mock import patch +import re from flask import json, g from six.moves import xrange @@ -9,6 +11,7 @@ from annotator import auth, es from annotator.annotation import Annotation +from .test_openannotation import assemble_context class TestStore(TestCase): def setup(self): @@ -341,6 +344,75 @@ def test_search_offset(self): assert_equal(len(res['rows']), 20) assert_equal(res['rows'][0], first) + def test_mimetypes(self): + """Test if correct responses are returned for given Accept headers. + + Tests each content-negotiating endpoint with several accept-header + values. + """ + kwargs = dict(text=u"Foo", id='123') + self._create_annotation(**kwargs) + accept_headers = { + 'no_accept': None, + 'pref_jsonld': 'application/ld+json,application/json;q=0.9', + 'pref_json': 'application/json,application/ld+json;q=0.9', + 'pref_either': 'application/ld+json,application/json', + 'eat_all': '*/*', + } + + endpoints = { + 'read': {'url': '/api/annotations/123', + 'get_ann': lambda res: res}, + 'search': {'url': '/api/search', + 'get_ann': lambda res: res['rows'][0]}, + 'index': {'url': '/api/annotations', + 'get_ann': lambda res: res[0]}, + } + + def returns_ld(endpoint, preference): + accept_header = accept_headers[preference] + headers = dict(self.headers, Accept=accept_header) + response = self.cli.get(endpoint['url'],headers=headers) + annotation = endpoint['get_ann'](json.loads(response.data)) + return '@id' in annotation + + for action, endpoint in endpoints.items(): + is_ld = functools.partial(returns_ld, endpoint) + # Currently, we only want JSON-LD if we explicitly ask for it. + assert is_ld('pref_jsonld'), "Expected JSON-LD response from %s" % action + assert not is_ld('pref_json'), "Expected plain JSON response from %s" % action + assert not is_ld('pref_either'), "Expected plain JSON response from %s" % action + assert not is_ld('eat_all'), "Plain JSON should be default (for %s)" % action + assert not is_ld('no_accept'), "Plain JSON should be default (for %s)" % action + + def test_jsonld_base(self): + """Test if the JSON-LD representation contains the correct @base""" + # Create an annotation + annotation_fields = { + "text": "blablabla", + "uri": "http://localhost:4000/dev.html", + "user": "alice", + "quote": "Lorem ipsum dolor sit amet", + "consumer": "mockconsumer", + } + ann_orig = self._create_annotation(**annotation_fields) + id = ann_orig['id'] + + # Fetch this annotation in JSON-LD format + headers = dict(self.headers, Accept='application/ld+json') + res = self.cli.get('/api/annotations/{0}'.format(id), + headers=headers) + ann_ld = json.loads(res.data) + + assert '@context' in ann_ld, "Expected a @context in JSON-LD" + context = assemble_context(ann_ld['@context']) + + # Check @base value (note it will be different for a real deployment) + base = context.get('@base') + assert base is not None, "Annotation should have a @base in @context" + assert base == 'http://localhost/api/annotations/', 'Base incorrect, found @base: "{0}"'.format(base) + + def _get_search_results(self, qs=''): res = self.cli.get('/api/search?{qs}'.format(qs=qs), headers=self.headers) return json.loads(res.data)